block by armollica 0f28622e38434b04907d768df6cec684

Visualizing Distributions

Full Screen

A few ways to visualize 1D distributions.

Swarm and pile plots show all the data points but can’t be used when there are a lot of data points. Histograms and boxplots work for any number of data points since they are visualizing summary statistics. Histograms provide more information than a boxplot. Boxplots fit in small spaces, making them nice for comparing many distributions side-by-side.

Other chart types for 1D distributions: violin plot, kernel density, empirical CDF.

The d3.forceChart() plugin is used for the swarm and pile plots; d3.layout.histogram() for the histogram; d3.scale.quantile() for the quartile summary statistics used in the boxplot.

index.html

<html>
  <head>
    <style>
      body { font-family: monospace; }
      .axis line,
      .axis path { fill: none; }
      .y.axis line { stroke: black; }
      .x.axis { 
        font-size: 16px;
        font-weight: bold; 
      }
    </style>
  </head>
  <body>
     <script src="https://d3js.org/d3.v3.min.js" charset="utf-8"></script>
     <script src="https://cdnjs.cloudflare.com/ajax/libs/lodash.js/4.11.1/lodash.min.js"></script>
     <script src="force-chart.js"></script>
     <script>
       var margin = { top: 30, left: 50, bottom: 30, right: 10 },
           width = 960 - margin.left - margin.right,
           height = 500 - margin.top - margin.bottom;
       
       var chartTypes = ["Swarm", "Pile", "Histogram", "Boxplot"];
       
       var chartScale = d3.scale.ordinal()
            .domain(chartTypes)
            .rangeRoundBands([0, width], .3),
           barScale = d3.scale.linear()
            .range([0, chartScale.rangeBand()]),
           yScale = d3.scale.linear().range([height, 0]).nice();
       
       var xAxis = d3.svg.axis().scale(chartScale).orient("top"),
           yAxis = d3.svg.axis().scale(yScale).orient("left");
       
       var swarmChart = d3.forceChart()
        .padding(1)
        .x(0)
        .y(function(d) { return yScale(d.yVal); })
        .r(1.5)
        .xGravity(1/5)
        .yGravity(100)
        .draggable(false);
        
       var pileChart = d3.forceChart()
        .padding(1)
        .x(0)
        .y(function(d) { return yScale(d.yVal); })
        .r(1.5)
        .xGravity(function(d) { return d.x <= 0 ? 400 : 1/2; })
        .yGravity(100)
        .draggable(false);
       
       var svg = d3.select("body").append("svg")
          .attr("width", width + margin.left + margin.right)
          .attr("height", height + margin.top + margin.bottom)
        .append("g")
          .attr("transform", "translate(" + margin.left + "," + margin.top + ")");
       
       var data = d3.range(700)
          .map(function(i) { return { yVal: d3.random.logNormal(1, 0.5)() }; });
       
       yScale.domain(d3.extent(data, function(d) { return d.yVal; }));
       
       // Draw axes
       svg.append("g").call(xAxis)
        .attr("class", "x axis");
       svg.append("g").call(yAxis)
        .attr("class", "y axis");
       
       // Draw swarm chart
       svg.append("g").call(swarmChart, _.cloneDeep(data))
          .attr("class", "swarm")
          .attr("transform", "translate(" + (chartScale("Swarm")  + chartScale.rangeBand()/2) + ",0)")
        .selectAll(".node").append("circle")
          .attr("r", function(d) { return d.r; });
          
       // Run for a bunch of ticks and freeze
       swarmChart.force().stop().start();
       for (var i = 0; i < 200; i++) { swarmChart.force().tick(); }
       swarmChart.force().stop();
       
       // Draw pile chart
       svg.append("g").call(pileChart, _.cloneDeep(data))
          .attr("class", "pile")
          .attr("transform", "translate(" + chartScale("Pile") + ",0)")
        .selectAll(".node").append("circle")
          .attr("r", function(d) { return d.r; });
          
       // Run for a bunch of ticks and freeze
       pileChart.force().stop().start();
       for (var i = 0; i < 500; i++) { pileChart.force().tick(); }
       pileChart.force().stop();
       
       // Draw histogram
       svg.append("g").call(histogram, data)
        .attr("class", "histogram")
        .attr("transform", "translate(" + chartScale("Histogram") + ",0)");
       
       // Draw boxplot
       var boxWidth = 20;
       svg.append("g").call(boxplot, data, boxWidth)
        .attr("class", "boxplot")
        .attr("transform", "translate(" + (chartScale("Boxplot") + chartScale.rangeBand()/2 - boxWidth/2) + ",0)");
        
      function histogram(selection, data) {
        
        var bins = yScale.ticks(25);
        
        var binnedData = d3.layout.histogram()
          .bins(bins)
          .value(function(d) { return d.yVal; })
          (data);
        
        barScale.domain([0, d3.max(binnedData, function(d) { return d.y; })]);
        
        var barHeight = yScale(bins[0]) - yScale(bins[1]),
            barPadding = 2;
        
        selection.selectAll(".bar").data(binnedData)
          .enter().append("rect")
            .attr("class", "bar")
            .attr("transform", function(d) {
              return "translate(0," + yScale(d.x) + ")"; 
            })
            .attr("y", barPadding - barHeight)
            .attr("height", barHeight - barPadding)
            .attr("width", function(d) { return barScale(d.y); });
      }
      
      function boxplot(selection, data, boxWidth) {
        
        var quartiles = d3.scale.quantile()
          .domain(data.map(function(d) { return d.yVal; }))
          .range(d3.range(4))
          .quantiles();
        
        var extent = d3.extent(data, function(d) { return d.yVal; });
        
        // Draw dashes
        selection.append("path").datum([
            [boxWidth/2, yScale(extent[1])],
            [boxWidth/2, yScale(extent[0])]
          ])
          .attr("d", d3.svg.line())
          .style("stroke", "black")
          .style("stroke-dasharray", "4, 10");
        
        // Draw box
        selection.append("rect")
          .attr("y", yScale(quartiles[2]))
          .attr("height", yScale(quartiles[0]) - yScale(quartiles[2]))
          .attr("width", boxWidth)
          .attr("fill", "white")
          .attr("stroke", "black");
        
        // Draw lines
        selection.selectAll("line").data([extent[1], quartiles[1], extent[0]])
          .enter().append("line")
            .attr("x2", boxWidth)
            .attr("y1", function(d) { return yScale(d); })
            .attr("y2", function(d) { return yScale(d); })
            .attr("stroke", "black");
        
        
      }
     </script>
  </body>
</html>

force-chart.js

d3.forceChart = function() {
  var width = 400, 
      height = 300, 
      padding = 3,
      x = function(d) { return d[0]; },
      y = function(d) { return d[1]; },
      r = function(d) { return d[2]; },
      xStart = function(d) { return x(d) + 50*Math.random() - 25},
      yStart = function(d) { return y(d) + 50*Math.random() - 25},
      rStart = function(d) { return r(d); },
      draggable = true,
      xGravity = function(d) { return 1; },
      yGravity = function(d) { return 1; },
      rGravity = function(d) { return 1; },
      shape = "circle",
      tickUpdate = function() {};
  
  var force = d3.layout.force()
    .charge(0)
    .gravity(0);
  
  function chart(selection, nodes) {
    
    if (shape === "circle") { collide = collideCircle; }
    else if (shape === "square") { collide = collideSquare; }
    else { console.error("forceChart.shape must be 'circle' or 'square'"); }
    
    nodes = nodes
      .map(function(d) {
        d.x = xStart(d);
        d.y = yStart(d);
        d.r = rStart(d);
        d.x0 = x(d);
        d.y0 = y(d);
        d.r0 = r(d);
        return d;    
      });
      
    var gNodes = selection.selectAll(".node").data(nodes)
      .enter().append("g")
        .attr("class", "node")
        .call(draggable ? force.drag : function() {});
        
    force
      .size([width, height])
      .nodes(nodes)
      .on("tick", tick)
      .start();
      
    function tick(e) {
      gNodes
        .each(gravity(e.alpha * .1))
        .each(collide(.5))
        .attr("transform", function(d) {
          return "translate(" + d.x + "," + d.y + ")";
        })
        .call(tickUpdate);
    }

    function gravity(k) {
      return function(d) {
        var dx = d.x0 - d.x,
            dy = d.y0 - d.y,
            dr = d.r0 - d.r;
            
        d.x += dx * k * xGravity(d);
        d.y += dy * k * yGravity(d);
        d.r += dr * k * rGravity(d);
      };
    }

    function collideCircle(k) {
      var q = d3.geom.quadtree(nodes);
      return function(node) {
        var nr = node.r + padding,
            nx1 = node.x - nr,
            nx2 = node.x + nr,
            ny1 = node.y - nr,
            ny2 = node.y + nr;
        q.visit(function(quad, x1, y1, x2, y2) {
          if (quad.point && (quad.point !== node)) {
            var x = node.x - quad.point.x,
                y = node.y - quad.point.y,
                l = x * x + y * y,
                r = nr + quad.point.r;
            if (l < r * r) {
              l = ((l = Math.sqrt(l)) - r) / l * k;
              node.x -= x *= l;
              node.y -= y *= l;
              quad.point.x += x;
              quad.point.y += y;
            }
          }
          return x1 > nx2 || x2 < nx1 || y1 > ny2 || y2 < ny1;
        });
      };
    }
    
    function collideSquare(k) {
    var q = d3.geom.quadtree(nodes);
    return function(node) {
      var nr = node.r + padding,
          nx1 = node.x - nr,
          nx2 = node.x + nr,
          ny1 = node.y - nr,
          ny2 = node.y + nr;
      q.visit(function(quad, x1, y1, x2, y2) {
        if (quad.point && (quad.point !== node)) {
          var x = node.x - quad.point.x,
              y = node.y - quad.point.y,
              lx = Math.abs(x),
              ly = Math.abs(y),
              r = nr + quad.point.r;
          if (lx < r && ly < r) {
            if (lx > ly) {
              lx = (lx - r) * (x < 0 ? -k : k);
              node.x -= lx;
              quad.point.x += lx;
            } else {
              ly = (ly - r) * (y < 0 ? -k : k);
              node.y -= ly;
              quad.point.y += ly;
            }
          }
        }
        return x1 > nx2 || x2 < nx1 || y1 > ny2 || y2 < ny1;
      });
    };
  }
  }
  
  chart.size = function(_) {
    if (!arguments.length) return [width, height];
    width = _[0];
    height = _[1];
    return chart;
  };
  
  chart.x = function(_) {
    if (!arguments.length) return x;
    if (typeof _ === "number") {
      x = function() { return _; };
    }
    else if (typeof _ === "function") {
      x = _;
    }
    return chart;
  };
  
  chart.y = function(_) {
    if (!arguments.length) return y;
    if (typeof _ === "number") {
      y = function() { return _; };
    }
    else if (typeof _ === "function") {
      y = _;
    }
    return chart;
  };
  
  chart.r = function(_) {
    if (!arguments.length) return r;
    if (typeof _ === "number") {
      r = function() { return _; };
    }
    else if (typeof _ === "function") {
      r = _;
    }
    return chart;
  };
  
  chart.draggable = function(_) {
    if (!arguments.length) return draggable;
    draggable = _;
    return chart;
  };
  
  chart.padding = function(_) {
    if (!arguments.length) return padding;
    padding = _;
    return chart;
  };
  
  chart.xGravity = function(_) {
    if (!arguments.length) return xGravity;
    if (typeof _ === "number") {
      xGravity = function() { return _; };
    }
    else if (typeof _ === "function") {
      xGravity = _;
    }
    return chart;
  };
  
  chart.yGravity = function(_) {
    if (!arguments.length) return yGravity;
    if (typeof _ === "number") {
      yGravity = function() { return _; };
    }
    else if (typeof _ === "function") {
      yGravity = _;
    }
    return chart;
  };
  
  chart.rGravity = function(_) {
    if (!arguments.length) return rGravity;
    if (typeof _ === "number") {
      rGravity = function() { return _; };
    }
    else if (typeof _ === "function") {
      rGravity = _;
    }
    return chart;
  };
  
  chart.xStart = function(_) {
    if (!arguments.length) return xStart;
    if (typeof _ === "number") {
      xStart = function() { return _; };
    }
    else if (typeof _ === "function") {
      xStart = _;
    }
    return chart;
  };
  
  chart.yStart = function(_) {
    if (!arguments.length) return yStart;
    if (typeof _ === "number") {
      yStart = function() { return _; };
    }
    else if (typeof _ === "function") {
      yStart = _;
    }
    return chart;
  };
  
  chart.rStart = function(_) {
    if (!arguments.length) return rStart;
    if (typeof _ === "number") {
      rStart = function() { return _; };
    }
    else if (typeof _ === "function") {
      rStart = _;
    }
    return chart;
  };
  
  chart.shape = function(_) {
    if (!arguments.length) return shape;
    shape = _;
    return chart;
  };
  
  chart.tickUpdate = function(_) {
    if (!arguments.length) return tickUpdate;
    tickUpdate = _;
    return chart;
  };
  
  chart.force = function() {
    return force;
  };
  
  return chart;
};