block by armollica 82c136720d31633bc79f1ea121a422e2

K-Nearest Neighbors

Full Screen

K-nearest neighbors search of a 2D set of points. Move the slider or scroll to change k.

The algorithm was adapted from this block

index.html

<html>
  <head>
    <style>
      body {
        font-family: monospace;
      }
      
      .k {
        position: absolute;
        left: 10px;
        top: 10px;
      }
      
      .k span {
        position: relative;
        bottom: 7px;
        padding: 0 10px;
      }
      
      .hull {
        fill: tomato;
        fill-opacity: 0.25;
        stroke: tomato;
        pointer-events: none;
      }
    </style>
  </head>
  <body>
    <div class="k">
     <span>K = 25</span><input type="range" name="k" value="25" min="1">
    </div>
    
     <script src="https://d3js.org/d3.v3.min.js" charset="utf-8"></script>
     <script src="k-nearest-neighbors.js"></script>
     <script>
       var width = 960,
           height = 500,
           k = 25;
       
       var kDiv = d3.select(".k")
        .on("change", function() {
          k = +kDiv.select("input").node().value;
          d3.select(this).select("span").text("K = " + k);
          findNearest.k(k);
        });
       
       var svg = d3.select("body").append("svg")
        .attr("width", width)
        .attr("height", height);
      
       // add invisible rectangle that will pick up mouse movement
       svg.append("rect")
        .attr("width", width)
        .attr("height", height)
        .style("fill-opacity", 0)
        .on("mousemove", mousemove)
        .on("wheel", function() {
          var wheelUp = event.deltaY < 0;
          wheelUp ? k++ : k--;
          k = clamp(k, 1, 100); // limit k to be between 1 and 100
          kDiv.select("input").node().value = k;
          kDiv.select("span").text("K = " + k);
          findNearest.k(k);
          mousemove.call(this);
        });
      
      var data = d3.range(500)
        .map(function(d) {
          return {
            x: d3.random.normal(width/2, width/8)(),
            y: Math.random() * height
          };
        });
      
      var findNearest = d3.kNearestNeighbors()
        .extent([[-1, -1], [width + 1, height + 1]])
        .x(function(d) { return d.x; })
        .y(function(d) { return d.y; })
        .k(k)
        .data(data);
        
      var hull = svg.append("path").attr("class", "hull");
      
      var circles = svg.append("g").attr("class", "circles")
          .selectAll("circle").data(data)
        .enter().append("circle")
          .attr("cx", function(d) { return d.x; })
          .attr("cy", function(d) { return d.y; })
          .attr("r", 2)
          .style("opacity", 0.5);
      
      function mousemove() {
        preventDefault(event);
        
        var nearest = findNearest(d3.mouse(this));
        
        // Draw convex hull around k-nearest points (if k > 1)
        hull.datum(d3.geom.hull(nearest))
          .attr("d", function(d) { 
            return d.length > 1 ? "M" + d.join("L") + "Z" : null; 
          });
       
        // Get corresponding "nearest" data points from original data array 
        nearest = nearest
          .map(function(d) { return data[d.i]; });
        
        // Color nearest points red
        circles
          .style("fill", function(d) {
            return nearest.indexOf(d) !== -1 ? "red" : null;
          });
      }
      
      function clamp(d, min, max) {
        return d < min ? min : d > max ? max : d;
      }
      
      function preventDefault(e) {
        e = e || window.event;
        if (e.preventDefault) e.preventDefault();
        e.returnValue = false;  
      }
     </script>
  </body>
</html>

k-nearest-neighbors.js

// algorithm source: http://bl.ocks.org/llb4ll/8709363

d3.kNearestNeighbors = function() {
  var points = [],
      nodes,
      data,
      extent = null,
      k = 1,
      quadtree = d3.geom.quadtree(),
      x = function(d) { return d[0]; },
      y = function(d) { return d[1]; };
      
  function findNearest(point) {
    
    // TODO: make this more efficient by not recalculating quadtree at
    //       each call of findNearest()
    
    // Extract points from the data array
    points = data.map(function(d) { return [x(d), y(d)]; });
    
    // Add quadtree info to the points
    nodes = quadtreeify(points);

    // Flag k-nearest points by adding `selected` property set to `true`
    kNearest(new Array(nodes), [], point);
    
    // Return nearest points along with indices from origianl `data` array
    return points
      .map(function(d, i) {
        var datum = [d[0], d[1]];
        datum.i = i;
        return d.selected ? datum : null; 
      })
      .filter(function(d) { return d !== null; });
  }
  
  findNearest.extent = function(_) {
    if (!arguments.length) return extent;
    extent = _;
    quadtree.extent(extent);
    return findNearest;
  };
  
  findNearest.data = function(_) {
    if (!arguments.length) return data;
    data = _;
    return findNearest;
  };
  
  findNearest.k = function(_) {
    if (!arguments.length) return k;
    k = _;
    return findNearest;
  };
  
  findNearest.x = function(_) {
    if (!arguments.length) return x;
    x = _;
    return findNearest;
  };
  
  findNearest.y = function(_) {
    if (!arguments.length) return y;
    y = _;
    return findNearest;
  };
  
  return findNearest;
  
  // Add quadtree information to each point (i.e., rectangles, depth, ...)
  function quadtreeify(points) {
    var nodes = quadtree(points);
    nodes.depth = 0;
    nodes.visit(function(node, x1, y1, x2, y2) {
      node.x1 = x1;
      node.y1 = y1;
      node.x2 = x2;
      node.y2 = y2;
      for (var i = 0; i < 4; i++) {
        if (node.nodes[i]) node.nodes[i].depth = node.depth + 1;
      }
    });
    return nodes;
  }
  
  // calculate the euclidean distance of two points with coordinates a(ax, ay) and b(bx, by)
  function euclideanDistance(ax, ay, bx, by) {
    return Math.sqrt(Math.pow(ax - bx, 2) + Math.pow(ay - by, 2));
  }
  
  // calculate minimum distance between search point rectangles
  function minDistance(x, y, x1, y1, x2, y2) {
    var dx1 = x - x1,
        dx2 = x - x2,
        dy1 = y - y1,
        dy2 = y - y2;
    
    // x is between x1 and x2
    if (dx1 * dx2 < 0) {
      // (x, y) is inside the rectangle
      if (dy1 * dy2 < 0) {
        return 0; // return 0 as a point in the rectangle
      }
      return Math.min(Math.abs(dy1), Math.abs(dy2));
    }
    
    // y is between y1 and y2 (and not inside rectangle)
    if (dy1 * dy2 < 0) {
      return Math.min(Math.abs(dx1), Math.abs(dx2));
    }
    return Math.min( 
      Math.min(euclideanDistance(x,y,x1,y1), euclideanDistance(x,y,x2,y2)), 
      Math.min(euclideanDistance(x,y,x1,y2), euclideanDistance(x,y,x2,y1))
    );
  }
  
  // Find the nodes within the specified rectangle (used recursively)
  function kNearest(bestQueue, resultQueue, point) {
    var x = point[0],
        y = point[1];
    
    // sort children according to their minDistance/euclideanDistance to search point
    bestQueue.sort(function(a, b) {
      // add minDistance to nodes if not there already
      [a, b].forEach(function(d) {
        if (d.minDistance === undefined) {
          d.scanned = true;
          if (d.leaf) {
            d.point.scanned = true;
            d.minDistance = euclideanDistance(x, y, d.x, d.y);
          }
          else {
            d.minDistance = minDistance(x, y, d.x1, d.y1, d.x2, d.y2);
          }
        }
      });
      return b.minDistance - a.minDistance;
    });
    
    // add nearest leafs (if any)
    for (var i = bestQueue.length - 1; i >= 0; i--) {
      var elem = bestQueue[i];
      if (elem.leaf) {
        elem.point.selected = true;
        bestQueue.pop();
        resultQueue.push(elem);
      } else { break; }
      if (resultQueue.length >= k) break;
    }
    
    // check if enough points found
    if (resultQueue.length >= k || bestQueue.length == 0) { return; }
    else {
      // ...otherwise add child nodes to bestQueue and recurse
      var visitedNode = bestQueue.pop();
      visitedNode.visited = true;
      visitedNode.nodes.forEach(function(d) {
        bestQueue.push(d);
      });
      kNearest(bestQueue, resultQueue, point);
    }
  }
}