Collapsing high-dimensional normal distributions by tophtucker

My friend Colin has been trying to give me a better intuition for higher dimensions. His first fun fact was that the volume of an n-dimensional hypersphere peaks somewhere around n=6 or so, I forget, and then approaches 0 as n approaches infinity. Damn! Meanwhile, of course the volume of a hypercube just diverges to infinity, as you’d expect. So if you inscribe a hypersphere in a hypercube, as dimension increases, more and more of the volume is in “the corners” — ultimately, almost all of it.

That’s not what I’m trying to show here, it’s just cool. This is somewhat different.

Colin also pointed out that, in a high-dimensional multivariate random normal distribution (with identity covariance matrix), all the mass ends up coming to be found in a sort of donut at some distance from the middle. There’s very little mass in the middle. Of course the origin is still the mean/median/mode. The problem is that the middle is just so dang small, and there’s SO MUCH SPACE as you go a little further out. So if you’re just looking at the distribution of distances from the middle, most are a ways out — more so the more dimensions you’re in.

This is a lot like the old joke that the average family has 2.2 kids, but no single family has 2.2 kids, God willing. Now consider lots of other attributes (dimensions) too: their ages, genders, heights, locations, professions, pets, hobbies, politics, faiths, vices, material possessions, favorite books, dreams, crimes, secrets, loves, etc. Of course people are mostly normal. But almost no person is normal.

Unlike the incredible shrinking hypersphere, this is evident even in low human-scale dimensions, like 2, and 3.

INSTRUCTIONS:

Mouse around to admire the lil parallax that shows you that the third blob of points is 3D.
Drag “3D” to “2D” to flatten the third dimension, while keeping every point the same distance from the origin, i.e. the same radius. Imagine every point is on a fixed arm that can rotate around the origin, but can’t extend or shrink. (That’d be a good thing to visualize if I had more time!)
Drag “2D” to “1D” to flatten the second dimension, again keeping every point the same distance from the origin. Like sweeping up every point on its little invisible fixed arm and collecting them so they’re all facing the same way. (This also sweeps up the 1D points with a negative radius around to the positive side.)

When you let go on 1D, the points will “relax” a bit to show you their distribution of distances from the origin.

Notice that on the left (1D), the mode (fattest part of the distribution) is right at the origin. But in the middle (2D), it’s a bit removed. And on the right (3D) it’s farther still.

Hopefully you can kinda imagine how, every time you add a dimension, it contributes some non-negative component to the radius. Like, when you add a third dimension perpendicular to the screen, you can’t initially see the point’s displacement along that axis — but it is probably displaced somewhat, and when you rotate that onto the visible plane, it’ll appear to get a little further away from the origin. Like you add a perpendicular leg to a line and take the hypotenuse and that’s gotta be longer than the original line.

Where does it end??

colin’s python & chart etc

also: THAT GUY’S TALK!!

Here’s a helpful little library https://github.com/tulip/multivariate-normal-js that I didn’t actually need because I didn’t need covariance.
I had a version that I wanted to generalize to arbitrarily many dimensions but got kinda hung up on the hyperdimensional spherical coordinates. Vestiges preserved in index2.html. You know it’s good when there’s an index2!

index.html

<!DOCTYPE html>
<meta charset="utf-8">

<style>
html, body {
  margin: 0;
  padding: 0;
  width: 100%;
  height: 100%;
}

svg {
  width: 100%;
  height: 100%;
}

line {
  stroke: #eee;
  stroke-width: 1;
}
</style>

<body>
  <svg></svg>
</body>

<script src="d3.v4.js"></script>
<script>

var svg = d3.select("svg"),
    width = 960,
    height = 500,
    n = 3,
    scale = d3.scaleLinear()
      .domain([0,4])
      .range([0, width/n/2]),
    mouse = [0,0]

var dimensions = d3.range(n)
  .map(d => {
    var data = d3.range(1000).map(() => {
      var cartesian = [
        d3.randomNormal()(),
        d > 0 ? d3.randomNormal()() : 0,
        d > 1 ? d3.randomNormal()() : 0
      ]
      return {
        cartesian,
        spherical: cartesianToSpherical(cartesian),
        coords: cartesian
      }
    })

    var simulation = d3.forceSimulation(data)
        .force("x", d3.forceX(function(d) { return scale(d.spherical.r); }).strength(1))
        .force("y", d3.forceY(0))
        .force("collide", d3.forceCollide(1))
        .stop();

    for (var i = 0; i < 120; ++i) simulation.tick();

    return data;
  })

var dim = svg.selectAll("g.dimension")
  .data(dimensions)
  .enter()
  .append("g")
  .attr("class", "dimension")

dim
  .append("line")
  .attr("y1", 0)
  .attr("y2", 0)
  .attr("x1", -width/n/2)
  .attr("x2", width/n/2)

dim
  .call(render)

d3.select("body").on("mousemove", function() {
  mouse = d3.mouse(this);
  svg.selectAll("circle")
    .attr("transform", d => d.coords[2] ?
      "translate(" + d.coords[2]*5*mouse[0]/width +
              ", " + d.coords[2]*5*mouse[1]/height + ")" : null
    )
})

function render(selection) {
  selection.each(function(data, index) {
    var sel = d3.select(this)
      .attr("transform",
        "translate(" + width * (0.16 + 0.33 * index) + ", " + height / 2 + ")")

    var circle = sel.selectAll("circle")
      .data(data)
      .enter()
    .append("circle")
      .attr("r", d => Math.max(.1, 1 + d.coords[2]/5))
      .attr("cy", d => -scale(d.coords[0]))
      .attr("cx", d => scale(d.coords[1]))
    .transition()
      .delay(3000)
      .duration(3000)
      .tween("cx.flatten2d", function(d) {
        var node = this, i = d3.interpolate(d.spherical.theta, Math.PI/2);
        return function(t) {
          d.coords = sphericalToCartesian({
            r: d.spherical.r,
            theta: i(t),
            phi: d.spherical.phi
          })
          node.setAttribute("r", Math.max(.1, 1 + d.coords[2]/5));
          node.setAttribute("cy", -scale(d.coords[0]));
          node.setAttribute("cx", scale(d.coords[1]));
          node.setAttribute("transform", d.coords[2] ?
            "translate(" + d.coords[2]*5*mouse[0]/width +
                    ", " + d.coords[2]*5*mouse[1]/height + ")" : null
          )
        };
      })
    .transition()
      .duration(3000)
      .tween("cx.flatten1d", function(d) {
        var node = this, i = d3.interpolate(d.spherical.phi, 0);
        return function(t) {
          d.coords = sphericalToCartesian({
            r: d.spherical.r,
            theta: Math.PI/2,
            phi: i(t)
          })
          node.setAttribute("cy", -scale(d.coords[0]));
          node.setAttribute("cx", scale(d.coords[1]));
        };
      })
    .transition()
      .delay(1000)
      .duration(3000)
      // .attr("cx", d => d.x)
      .attr("cx", d => d.y)
  })
}

// https://en.wikipedia.org/wiki/Spherical_coordinate_system
// https://en.wikipedia.org/wiki/N-sphere#Spherical_coordinates
function cartesianToSpherical(cartesian) {
  return {
    r: distance(cartesian),
    theta: Math.acos(cartesian[2] / distance(cartesian)),
    phi: Math.atan2(cartesian[1], cartesian[0]) //atan2 for weird quadrant stuff!
  }
}

function sphericalToCartesian(spherical) {
  return [
    spherical.r * Math.sin(spherical.theta) * Math.cos(spherical.phi),
    spherical.r * Math.sin(spherical.theta) * Math.sin(spherical.phi),
    spherical.r * Math.cos(spherical.theta)
  ]
}

function distance(arr) {
  return Math.sqrt(sum(
      arr.map(d => Math.pow(d,2))
  ))
}

function product(arr) {
  return arr.reduce((acc, cur) => acc * cur, 1)
}

function sum(arr) {
  return arr.reduce((acc, cur) => acc + cur, 0)
}

</script>

index2.html

<!DOCTYPE html>
<meta charset="utf-8">

<style>
html, body {
  margin: 0;
  padding: 0;
  width: 100%;
  height: 100%;
}

</style>

<body>
  <canvas width="960" height="500" id="canvas"></canvas>
</body>

<script src="https://d3js.org/d3.v4.min.js"></script>
<script src="multivariate-normal.min.js"></script>
<script>

var canvas = document.getElementById("canvas"),
    width = canvas.width,
    height = canvas.height,
    n = 6,
    nn = n,
    scale = d3.scaleLinear().domain([0,4]).range([0, width/n])
    context = canvas.getContext("2d"),
    context.fillStyle = '#000000',
    context.globalAlpha = .1;

var dimensions = d3.range(n)
  .map(d => d+1)
  .map(d => ({
    dim: d,
    dist: getDistribution(d),
    origin: [
      width/n + (2*width/n) * ((d-1) % (n/2)),
      height/4 + height/2 * ~~((d-1) / (n/2))
    ]
  }))

dimensions.forEach(getSamples)

var timeScale = d3.scaleLinear()
  .domain([0, 10000])
  .range([n, 0])

// d3.timer(function(t) {
//   context.clearRect(0, 0, width, height);
//   dimensions.forEach(function(d) {
//     context.save();
//     context.translate(d.origin[0], d.origin[1]);
//     d.samples.forEach(function(dd) {
//       var from = Math.min(dd.length, Math.ceil(timeScale(t))) - 1
//       var to = Math.min(dd.length, Math.floor(timeScale(t))) - 1
//
//       console.log(dd[from].spherical, dd[to].spherical)
//       debugger
//
//       var coords = interpolateSpherical(dd[from].spherical, dd[to].spherical)(t)
//       context.beginPath();
//       context.arc(scale(coords[0]), coords[1] ? scale(coords[1]) : 0, 2, 0, 2 * Math.PI);
//       context.fill();
//     })
//     context.restore();
//   })
// })

function interpolateSpherical(from, to) {
  return function(t) {
    return getEuclideanFromSpherical({
      r: from.r,
      angular: d3.interpolate(from.angular, to.angular)(t)
    })
  }
}

dimensions.forEach(d => renderDistribution(d, nn))

var interval = d3.interval(function() {
  if(nn > 0) {
    nn--;
    context.clearRect(0, 0, width, height);
    dimensions.forEach(d => renderDistribution(d, nn))
  } else {
    interval.stop();
  }
}, 1000)

function renderDistribution(d, n) {
  context.save();
  context.translate(d.origin[0], d.origin[1]);
  d.samples.forEach(function(dd) {
    var coords = dd[Math.min(dd.length-1, nn)].euclidean;
    context.beginPath();
    context.arc(scale(coords[0]), coords[1] ? scale(coords[1]) : 0, 2, 0, 2 * Math.PI);
    context.fill();
  })
  context.restore();
}

function getSamples(d) {
  var samples = d3.range(1000).map(() => {
    var sample = d.dist.sample()
    return {
      euclidean: sample,
      spherical: getSphericalFromEuclidean(sample)
    }
  })
  d.samples = samples.map(dd =>
    d3.range(d.dim).map(dim => {
      var angles = dd.spherical.angular.map((a,i,arr) => {
        if(i >= d.dim - dim - 1) {
          return a;
        } else {
          return 0;
        }
      })
      var truncatedSpherical = {
        r: dd.spherical.r,
        angular: angles
      }
      // var truncatedSpherical = {
      //   r: dd.spherical.r,
      //   // angular: dd.spherical.angular.slice(d.dim - dim - 1)
      //   // angular: dd.spherical.angular.slice(0, dim)
      // }
      return {
        euclidean: getEuclideanFromSpherical(truncatedSpherical),
        spherical: truncatedSpherical
      }
    })
  )
}

function distance(arr) {
  return Math.sqrt(sum(
      arr.map(d => Math.pow(d,2))
  ))
}

function product(arr) {
  return arr.reduce((acc, cur) => acc * cur, 1)
}

function sum(arr) {
  return arr.reduce((acc, cur) => acc + cur, 0)
}

// https://en.wikipedia.org/wiki/N-sphere#Spherical_coordinates
function getEuclideanFromSpherical(vector) {
  return vector.angular.concat([null]).map(
    (d,i,arr) => vector.r *
      product(arr.slice(0,i).map(Math.sin)) *
      (d == null ? 1 : Math.cos(d))
  )
}

function getSphericalFromEuclidean(vector) {
  return {
    r: distance(vector),
    angular: vector.slice(0, vector.length-1).map(
      (d,i,arr) => Math.acos(
        d / distance(vector.slice(i)) || 0
      )
      * (i == arr.length-1 && d < 0 ? -1 : 1) +
      + (i == arr.length-1 && d < 0 ? 2*Math.PI : 0)
    )
  }
}

function getDistribution(n) {
  return MultivariateNormal.default(
    getZeroVector(n),
    getIdentityMatrix(n)
  )
}

function getIdentityMatrix(n) {
  return d3.range(n).map(i => d3.range(n).map(j => j == i ? 1.0 : 0.0))
}

function getZeroVector(n) {
  return d3.range(n).map(d => 0)
}

</script>