block by armollica 56217d01ddf1370773da

Multivariate linear regression

Full Screen

Multivariate linear regression

Click on the chart to change the model prediction data point (orange line) and see how the independent variables impact one another’s marginal effect on the dependent variable.

Shown are the model predictions (solid line) and 95% confidence regression interval (dashed line) for the following model:

mpg = B0 + B1 wt + B2 hp + B3 (wt x hp) + B4 wt2 + B5 hp2

where

mph = Miles/Gallon

wt = Weight (1000 lb)

hp = Gross horsepower

Because this model is linear in the parameters, Bj, it can be estimated using least-squares. The independent variables, weight and horsepower, enter the model nonlinearly and so the partial derivative of weight and horsepower will vary as weight and horsepower change. In other words, the “marginal effect” on fuel efficiency from an increase in a car’s weight is different depending upon whether the car was intially light or heavy.

The slopes on the above graphs show this marginal effect. The downward sloping functions confirm what would be expected; cars that weigh more and have more horsepower are predicted to be less fuel efficient (have lower miles/gallon).

The (slight) nonlinearity in the regression equation shows that an increase in a car’s horsepower has a varying impact on fuel efficiency depending on whether the car initially had a lot of horsepower or just a little. The slope is steeper at lower horsepower levels and flatter at higher horsepower levels. This implies that an increasing a car’s horsepower by 20 is predicted to decrease fuel efficiency more for a low-horsepower car than it is for a high-horsepower car.

Because of the cross-product term, (wt x hp), the marginal effect of a car’s weight on miles/gallon depends upon the horsepower of the car and vice versa. Clicking on the chart changes the weight and horsepower. Changing the weight changes the marginal effect for horsepower and changing the horsepower changes the marginal effect for weight. Increasing a car’s weight makes the regression equation for horsepower flatter and lower. This implies that for heavier cars, horsepower plays a smaller role in determining fuel efficiency. Note that the orange indicator line cross the model prediction line at the same place.

Here are the equations for the marginal effects (i.e., partial derivatives of mpg):

mpg’wt = B1 + B3 hp + 2 B4 wt

mpg’hp = B2 + B3 wt + 2 B5 hp

Notes: The model and data used here are for demonstration purposes and the model predictions are very weak approximations to a complex system. To see the model results open up the console log in the browser (F12 on Chrome). The data source is mtcars from R’s dataset package. Formulas for regression interval and White’s scaled variance-covariance matrix were found here: http://www.ssc.wisc.edu/~bhansen/econometrics/ Using science.js for linear algebra functions.

index.html

<html>
  <head>
   <style>
    body {
      font: 12px sans-serif;
    }
    
    .axis path,
    .axis line {
      fill: none;
      stroke: black;
      stroke-width: 1px;
    }
    
    .line {
      fill: none;
      stroke: black;
    }
    
    .line.upper,
    .line.lower {
      stroke-dasharray: 5, 5;
    }
    
    .marker {
      stroke: tomato;
      stroke-width: 2px;
    }
    
   </style>
  </head>
  <body>
    <script src="//d3js.org/d3.v3.min.js" charset="utf-8"></script>
    <script src="science.v1.min.js"></script>
    <script src="lm.js"></script>
    <script src="line-chart.js"></script>
    <script>
      var margin = { top: 10, left: 30, bottom: 50, right: 10 },
          width = 480 - margin.left - margin.right,
          height = 300 - margin.top - margin.bottom;
      
      var lineChart = d3.lineChart() // see line-chart.js
        .width(width)
        .height(height)
        .yExtent([0, 32]);

      [ { key: "wt", name: "Weight (1000 lbs)"}, 
        { key: "hp", name: "Gross horsepower" }]
        .forEach(function(d) {
          var container = d3.select("body").append("div")
            .attr("class", "container " + d.key)
            .style("position", "relative")
            .style("float", "left");
            
          var svg = container.append("svg")
            .attr("width", width + margin.left + margin.right)
            .attr("height", height + margin.top + margin.bottom)
          .append("g")
            .attr("class", "canvas")
            .attr("transform", "translate(" + margin.left + "," + margin.top + ")");
          
          svg.append("line")
            .attr("class", "marker");
          
          svg.append("g").attr("class", "x axis")
              .attr("transform", "translate(0," + height + ")")
            .append("text")
              .attr("class", "label")
              .attr("x", width)
              .attr("y", -6)
              .style("text-anchor", "end")
              .text(d.name);
              
          svg.append("g").attr("class", "y axis")
            .append("text")
              .attr("class", "label")
              .attr("y", 6)
              .attr("dx", ".71em")
              .style("text-anchor", "start")
              .text("Miles/Gallon")
            
          svg.append("rect")
            .attr("class", "event-rect")
            .attr("width", width)
            .attr("height", height)
            .style("fill-opacity", 0);
        });
        
      d3.json("cars.json", function(error, cars) {
        if (error) throw error;
        
        var model = lm()  // see lm.js
          .y(function(d) { return d.mpg; })
          .X(function(d) { 
            return [1, d.wt, d.hp, d.wt*d.hp, Math.pow(d.wt, 2), Math.pow(d.hp, 2)]; 
          })
          .data(cars);
          
        var fit = model();
        
        console.log(fit);
        
        var wt = d3.mean(cars, pluck("wt"));
        var wtExtent = d3.extent(cars, pluck("wt"));
        
        var hp = d3.mean(cars, pluck("hp"));
        var hpExtent = d3.extent(cars, pluck("hp"));
          
        d3.select(".wt .event-rect")
          .on("click", function() {
            var x = d3.mouse(this)[0];
            var wtInterval = renderWtInterval(hp);
                wt = wtInterval.xScale.invert(x);
            
            var hpInterval = renderHpInterval(wt);
            
            d3.select(".wt .marker")
              .attr("x1", x)
              .attr("x2", x);
          });
          
        d3.select(".hp .event-rect")
          .on("click", function() {
            var x = d3.mouse(this)[0];
            var hpInterval = renderHpInterval(wt);
                hp = hpInterval.xScale.invert(x);
            
            var wtInterval = renderWtInterval(hp);
            
            d3.select(".hp .marker")
              .attr("x1", x)
              .attr("x2", x);
          });
        
        var wtInterval = renderWtInterval(hp);
        var wtScale = wtInterval.xScale;
            d3.select(".wt .marker")
              .attr("x1", wtScale(wt))
              .attr("x2", wtScale(wt))
              .attr("y1", height)
              .attr("y2", 0);
        
        var hpInterval = renderHpInterval(wt);
        var hpScale = hpInterval.xScale;
            d3.select(".hp .marker")
              .attr("x1", hpScale(hp))
              .attr("x2", hpScale(hp))
              .attr("y1", height)
              .attr("y2", 0);
       
        function renderWtInterval(hp) {
          var wt_data = sequence(cars, pluck("wt"), 40)
            .map(function(wt) {
              var interval = fit.getRegressionInterval({ wt: wt, hp: hp });
              return {
                wt: wt,
                y_pred: interval.y_pred,
                y_upper: interval.y_upper,
                y_lower: interval.y_lower
              };
            });
          
          wtChart = lineChart
            .x(function(d) { return d.wt; });
          
          d3.select(".wt svg").select(".canvas")
            .call(wtChart.y(pluck("y_pred")), "y line", wt_data)
            .call(wtChart.y(pluck("y_lower")), "lower line", wt_data)
            .call(wtChart.y(pluck("y_upper")), "upper line", wt_data);
          
          d3.select(".wt svg").select(".x.axis").call(wtChart.xAxis());
          d3.select(".wt svg").select(".y.axis").call(wtChart.yAxis());
          
          return {
            data: wt_data,
            xScale: wtChart.xScale(),
            yScale: wtChart.yScale()
          };
        }
         
        function renderHpInterval(wt) {
          var wt_mean = d3.mean(cars, pluck("wt"));
          var hp_data = sequence(cars, pluck("hp"), 40)
            .map(function(hp) {
              var interval = fit.getRegressionInterval({ wt: wt, hp: hp });
              return {
                hp: hp,
                y_pred: interval.y_pred,
                y_upper: interval.y_upper,
                y_lower: interval.y_lower
              };
            });
          
          hpChart = lineChart
            .x(function(d) { return d.hp; });
            
          d3.select(".hp svg").select(".canvas")
            .call(hpChart.y(pluck("y_pred")), "y line", hp_data)
            .call(hpChart.y(pluck("y_lower")), "lower line", hp_data)
            .call(hpChart.y(pluck("y_upper")), "upper line", hp_data);
            
          d3.select(".hp svg").select(".x.axis").call(hpChart.xAxis());
          d3.select(".hp svg").select(".y.axis").call(hpChart.yAxis());
          
          return {
            data: hp_data,
            xScale: hpChart.xScale(),
            yScale: hpChart.yScale()
          };
        }

      });
      
      // Returns accessor function for a given key
      // makes accessing "columns" from associative arrays easier
      function pluck(key) { return function(d) { return d[key]; }; }
      
      // Returns a sequence of numbers (of specified length) between the min 
      // and max of an array
      function sequence(data, accessor, length_out) {
        var extent = d3.extent(data, accessor);
        var step = (extent[1] - extent[0])/length_out;
        return d3.range(extent[0], extent[1], step);
      }
    </script>
  </body>
</html>

cars.json

[{"mpg":21,"cyl":6,"disp":160,"hp":110,"drat":3.9,"wt":2.62,"qsec":16.46,"vs":0,"am":1,"gear":4,"carb":4,"_row":"Mazda RX4"},{"mpg":21,"cyl":6,"disp":160,"hp":110,"drat":3.9,"wt":2.875,"qsec":17.02,"vs":0,"am":1,"gear":4,"carb":4,"_row":"Mazda RX4 Wag"},{"mpg":22.8,"cyl":4,"disp":108,"hp":93,"drat":3.85,"wt":2.32,"qsec":18.61,"vs":1,"am":1,"gear":4,"carb":1,"_row":"Datsun 710"},{"mpg":21.4,"cyl":6,"disp":258,"hp":110,"drat":3.08,"wt":3.215,"qsec":19.44,"vs":1,"am":0,"gear":3,"carb":1,"_row":"Hornet 4 Drive"},{"mpg":18.7,"cyl":8,"disp":360,"hp":175,"drat":3.15,"wt":3.44,"qsec":17.02,"vs":0,"am":0,"gear":3,"carb":2,"_row":"Hornet Sportabout"},{"mpg":18.1,"cyl":6,"disp":225,"hp":105,"drat":2.76,"wt":3.46,"qsec":20.22,"vs":1,"am":0,"gear":3,"carb":1,"_row":"Valiant"},{"mpg":14.3,"cyl":8,"disp":360,"hp":245,"drat":3.21,"wt":3.57,"qsec":15.84,"vs":0,"am":0,"gear":3,"carb":4,"_row":"Duster 360"},{"mpg":24.4,"cyl":4,"disp":146.7,"hp":62,"drat":3.69,"wt":3.19,"qsec":20,"vs":1,"am":0,"gear":4,"carb":2,"_row":"Merc 240D"},{"mpg":22.8,"cyl":4,"disp":140.8,"hp":95,"drat":3.92,"wt":3.15,"qsec":22.9,"vs":1,"am":0,"gear":4,"carb":2,"_row":"Merc 230"},{"mpg":19.2,"cyl":6,"disp":167.6,"hp":123,"drat":3.92,"wt":3.44,"qsec":18.3,"vs":1,"am":0,"gear":4,"carb":4,"_row":"Merc 280"},{"mpg":17.8,"cyl":6,"disp":167.6,"hp":123,"drat":3.92,"wt":3.44,"qsec":18.9,"vs":1,"am":0,"gear":4,"carb":4,"_row":"Merc 280C"},{"mpg":16.4,"cyl":8,"disp":275.8,"hp":180,"drat":3.07,"wt":4.07,"qsec":17.4,"vs":0,"am":0,"gear":3,"carb":3,"_row":"Merc 450SE"},{"mpg":17.3,"cyl":8,"disp":275.8,"hp":180,"drat":3.07,"wt":3.73,"qsec":17.6,"vs":0,"am":0,"gear":3,"carb":3,"_row":"Merc 450SL"},{"mpg":15.2,"cyl":8,"disp":275.8,"hp":180,"drat":3.07,"wt":3.78,"qsec":18,"vs":0,"am":0,"gear":3,"carb":3,"_row":"Merc 450SLC"},{"mpg":10.4,"cyl":8,"disp":472,"hp":205,"drat":2.93,"wt":5.25,"qsec":17.98,"vs":0,"am":0,"gear":3,"carb":4,"_row":"Cadillac Fleetwood"},{"mpg":10.4,"cyl":8,"disp":460,"hp":215,"drat":3,"wt":5.424,"qsec":17.82,"vs":0,"am":0,"gear":3,"carb":4,"_row":"Lincoln Continental"},{"mpg":14.7,"cyl":8,"disp":440,"hp":230,"drat":3.23,"wt":5.345,"qsec":17.42,"vs":0,"am":0,"gear":3,"carb":4,"_row":"Chrysler Imperial"},{"mpg":32.4,"cyl":4,"disp":78.7,"hp":66,"drat":4.08,"wt":2.2,"qsec":19.47,"vs":1,"am":1,"gear":4,"carb":1,"_row":"Fiat 128"},{"mpg":30.4,"cyl":4,"disp":75.7,"hp":52,"drat":4.93,"wt":1.615,"qsec":18.52,"vs":1,"am":1,"gear":4,"carb":2,"_row":"Honda Civic"},{"mpg":33.9,"cyl":4,"disp":71.1,"hp":65,"drat":4.22,"wt":1.835,"qsec":19.9,"vs":1,"am":1,"gear":4,"carb":1,"_row":"Toyota Corolla"},{"mpg":21.5,"cyl":4,"disp":120.1,"hp":97,"drat":3.7,"wt":2.465,"qsec":20.01,"vs":1,"am":0,"gear":3,"carb":1,"_row":"Toyota Corona"},{"mpg":15.5,"cyl":8,"disp":318,"hp":150,"drat":2.76,"wt":3.52,"qsec":16.87,"vs":0,"am":0,"gear":3,"carb":2,"_row":"Dodge Challenger"},{"mpg":15.2,"cyl":8,"disp":304,"hp":150,"drat":3.15,"wt":3.435,"qsec":17.3,"vs":0,"am":0,"gear":3,"carb":2,"_row":"AMC Javelin"},{"mpg":13.3,"cyl":8,"disp":350,"hp":245,"drat":3.73,"wt":3.84,"qsec":15.41,"vs":0,"am":0,"gear":3,"carb":4,"_row":"Camaro Z28"},{"mpg":19.2,"cyl":8,"disp":400,"hp":175,"drat":3.08,"wt":3.845,"qsec":17.05,"vs":0,"am":0,"gear":3,"carb":2,"_row":"Pontiac Firebird"},{"mpg":27.3,"cyl":4,"disp":79,"hp":66,"drat":4.08,"wt":1.935,"qsec":18.9,"vs":1,"am":1,"gear":4,"carb":1,"_row":"Fiat X1-9"},{"mpg":26,"cyl":4,"disp":120.3,"hp":91,"drat":4.43,"wt":2.14,"qsec":16.7,"vs":0,"am":1,"gear":5,"carb":2,"_row":"Porsche 914-2"},{"mpg":30.4,"cyl":4,"disp":95.1,"hp":113,"drat":3.77,"wt":1.513,"qsec":16.9,"vs":1,"am":1,"gear":5,"carb":2,"_row":"Lotus Europa"},{"mpg":15.8,"cyl":8,"disp":351,"hp":264,"drat":4.22,"wt":3.17,"qsec":14.5,"vs":0,"am":1,"gear":5,"carb":4,"_row":"Ford Pantera L"},{"mpg":19.7,"cyl":6,"disp":145,"hp":175,"drat":3.62,"wt":2.77,"qsec":15.5,"vs":0,"am":1,"gear":5,"carb":6,"_row":"Ferrari Dino"},{"mpg":15,"cyl":8,"disp":301,"hp":335,"drat":3.54,"wt":3.57,"qsec":14.6,"vs":0,"am":1,"gear":5,"carb":8,"_row":"Maserati Bora"},{"mpg":21.4,"cyl":4,"disp":121,"hp":109,"drat":4.11,"wt":2.78,"qsec":18.6,"vs":1,"am":1,"gear":4,"carb":2,"_row":"Volvo 142E"}]

line-chart.js

d3.lineChart = function() {
  var width = 400,
      height = 300,
      xExtent, yExtent,
      x, y,
      xScale = d3.scale.linear(), 
      yScale = d3.scale.linear(),
      path = d3.svg.line();
  
  function chart(selection, lineClass, data) {
    xScale
      .domain(xExtent || d3.extent(data, x))
      .range([0, width]);
    
    yScale
      .domain(yExtent || d3.extent(data, y))
      .range([height, 0]);
      
    path
      .x(function(d) { return xScale(x(d)); })
      .y(function(d) { return yScale(y(d)); });
    
    var lineSelectorString = "." + (lineClass.replace(" ", "."));
    
    var lines = selection.selectAll(lineSelectorString).data([data]);
    
    lines.enter().append("path")
      .attr("class", lineClass);
    
    lines
      .transition().duration(333)
        .attr("d", path);
      
    lines.exit().remove();
  }
  
  chart.width = function(_) {
    if (!arguments.length) return width;
    width = _;
    return chart;
  };
  
  chart.height = function(_) {
    if (!arguments.length) return height;
    height = _;
    return chart;
  };
  
  chart.x = function(_) {
    if (!arguments.length) return x;
    x = _;
    return chart;
  };
  
  chart.y = function(_) {
    if (!arguments.length) return y;
    y = _;
    return chart;
  };
  
  chart.xExtent = function(_) {
    if (!arguments.length) return xExtent;
    xExtent = _;
    return chart;
  };
  
  chart.yExtent = function(_) {
    if (!arguments.length) return yExtent;
    yExtent = _;
    return chart;
  };
  
  // Getter functions
  chart.xAxis = function() { return d3.svg.axis().scale(xScale).orient("bottom"); };
  chart.yAxis = function() { return d3.svg.axis().scale(yScale).orient("left"); };
  chart.xScale = function() { return xScale; };
  chart.yScale = function() { return yScale; };
  
  return chart;
}

lm.js

// Multivariate linear regression
function lm() {
  
  //make science.lin function easier to reference
  var m = science.lin.multiply,
      t = science.lin.transpose,
      inv = science.lin.inverse;
      
  var _y, _X, _data;
  
  function model() {
    var y = vectorToMatrix(_data.map(_y));  // dependent variable (n x 1 matrix)
    var X = _data.map(_X);                  // independent variables (n x k matrix)
    var n = X.length;                       // n = # of observations
    var k = X[0].length;                    // k = # of model parameters
    var invXX = inv(m(t(X), X));            // (X'X)^1  (k x k matrix)
    var B = m(invXX, m(t(X), y));           // model parameters (k x 1 matrix)
    var y_pred = m(X, B);                   // in-sample model predictions
    var e = d3.range(n).map(function(i) {   // prediction error
      return [y[i][0] - y_pred[i][0]];
    });
    
    // Variance-covariance matrix using scaled White's formula
    // (see chapter 4 from http://www.ssc.wisc.edu/~bhansen/econometrics/)
    var u = X.map(function(x, i) {
      return x.map(function(d) { return d * e[i][0]; });
    });
    var whites_scaler = n/(n-k);
    var V = m(invXX, m(m(t(u), u), invXX))
      .map(function(row) {
        return row.map(function(d) { return whites_scaler * d; });
      });
    
    // parameter standard errors
    var se = diag(V).map(function(d) { return [Math.sqrt(d[0])]; }); 
     
    // t-statistics
    var tstat = B.map(function(b, i) { return [b[0] / se[i][0]]; });
    
    return {
      B: B,
      se: se,
      V: V,
      t: tstat,
      getRegressionInterval: getRegressionInterval
    };
    
    // get asymptotic 95% confidence regression interval 
    // at a given data point
    function getRegressionInterval(d) {
      var x = [_X(d)]; // 1 x k
      var y_pred = m(x, B)[0][0];
      var interval = 1.96 * Math.sqrt(m(m(x, V), t(x))[0][0]);
      return {
        y_lower: y_pred - interval,
        y_pred: y_pred,
        y_upper: y_pred + interval
      };
    }
  }
  
  model.y = function(_) {
    if (!arguments.length) return _y;
    _y = _;
    return model;
  };
  
  model.X = function(_) {
    if (!arguments.length) return _X;
    _X = _;
    return model;
  };
  
  model.data = function(_) {
    if (!arguments.length) return _data;
    _data = _;
    return model;
  };
  
  return model;
  
  // convert to/from "array of numbers" from/to "1D matrix"
  function vectorToMatrix(_) { return _.map(function(d) { return [d]; }); }
  function matrixToVector(_) { return _.map(function(d) { return d[0]; }); }
  
  // create matrix of size r x c with all 1's
  function ones(r, c) {
    return d3.range(r).map(function() {
      return d3.range(c).map(function() { return 1; });
    });
  }
  
  // get diagonal vector from square matrix
  function diag(x) { return x.map(function(d, j) { return [d[j]]; }); }
}