In [None]:
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import seaborn as sns
sns.set()
%matplotlib inline

In [None]:
w = np.random.randn(500, 2)
plt.figure(figsize=(6, 6))
plt.scatter(w[:,0], w[:,1])
plt.axis('equal')
plt.show()
w = np.random.randn(500, 3)
fig = plt.figure(figsize=(6, 6))
ax = fig.add_subplot(111, projection='3d')
ax.scatter(w[:,0], w[:,1], w[:,2])
ax.axis('equal')
plt.show()

In [None]:
dims = [1, 3, 5, 10, 30, 100]

def norms(x):
 return np.sqrt(np.sum(np.square(x), axis=1))

# Distribution of ||x||
plt.figure()
for d in dims:
 x = np.random.randn(1000, d)
 sns.distplot(norms(x), kde_kws={'label' : 'dim={0:d}'.format(d)})
 plt.xlabel('Norm')
plt.show()

In [None]:
def gen_gaussian_clusters(mu0, dim, n_samples):
 """
 Generate three Gaussian clusters centered at:
 (mu0[0], 0, 0, ...)
 (mu0[1], 0, 0, ...)
 (mu0[2], 0, 0, ...)
 :param mu0: Determines the center of the clusters.
 :param dim: Dimension of the Gaussian samples.
 :param n_samples: Number of samples to generate for each cluster.
 """
 mu = np.zeros((3, dim))
 mu[:,0] = mu0
 x1 = np.random.randn(n_samples, dim) + mu[0,:]
 x2 = np.random.randn(n_samples, dim) + mu[1,:]
 x3 = np.random.randn(n_samples, dim) + mu[2,:]
 return x1, x2, x3

dim = 500
n_samples = 500
mu0 = np.array([0, 5, 10])

x1, x2, x3 = gen_gaussian_clusters(mu0, dim, n_samples)

# Visualize if possible
plt.figure()
if dim == 1:
 plt.hist(x1)
 plt.hist(x2)
 plt.hist(x3)
elif dim >= 2:
 plt.scatter(x1[:,0], x1[:,1])
 plt.scatter(x2[:,0], x2[:,1])
 plt.scatter(x3[:,0], x3[:,1])
 plt.axis('equal')
plt.show()

In [None]:
# Plot the distribution of distances for various # of dimensions
dims = [3, 10, 50, 100, 200, 500]

# Distance distribution
plt.figure(1, figsize=(15, 10))
for i, d in enumerate(dims, 1):
 x1, x2, x3 = gen_gaussian_clusters(mu0, d, n_samples)
 plt.subplot(2, 3, i)
 sns.distplot(norms(x1))
 sns.distplot(norms(x2))
 sns.distplot(norms(x3))
 plt.legend(['x1', 'x2', 'x3'])
 plt.xlabel('Distance to origin')
 plt.title('dim = {0:d}'.format(d))
 plt.xlim(0, plt.xlim()[1])
plt.show()

from scipy.spatial.distance import pdist, squareform

# Distance matrix
plt.figure(figsize=(16, 10))
for i, d in enumerate(dims, 1):
 x1, x2, x3 = gen_gaussian_clusters(mu0, d, n_samples)
 plt.subplot(2, 3, i)
 D = squareform(pdist(np.concatenate((x1, x2, x3), axis=0), 'euclidean'))
 sns.heatmap(D)
 plt.title('dim = {0:d}'.format(d))
plt.show()