Bayes (WIP) by tophtucker

From a friend: “1. What’s the probability of a HS athlete going pro? 2. Suppose we know a pro athlete. What’s the probability she was a college athlete?”

So I was thinking about my favorite intuitive illustrations and explanations of conditional probability and Bayes’ theorem, e.g.

I asked for help here, there’s some good discussion: https://math.stackexchange.com/questions/2407913/is-it-possible-to-divide-a-square-into-four-parts-of-arbitrary-size-with-two-lin

To-do:

Label areas: P(A ⋀ B), P(A ⋀ ¬B), P(¬A ⋀ B), P(¬A ⋀ ¬B)
Smoothly transition from A|B lines to B|A lines (ghosting the past lines and transitioning solid ones to new spots through the diagonal)
Show derivations and definitions on hover
Add a hover-sensitive plain text summary sentence caption (“If B is true, then the probability of A is the probability of A and B divided by the probability of A…”, highlighting the relevant quantities and areas, maybe color-coding)
Add covariance as a lever to fiddle with (closely related but not identical to the diagonal??)

One cool thing about this is that you can feel out which things are linear and which are not. The slope of the diagonal line is independent of P(A), which I would not have intuited. And P(A|B) and P(A|¬B) are nonlinear functions of P(A), P(B|A), and P(B|¬A), which I don’t think I had any intuition about, but feels central to a lot of counterintuitive results of conditional probability questions.

As nice as it is to “feel out”, I want to be able to SEE any of those things I feel — spatialize the state space. Plot everything as a function of everything else, see the steepest slopes, marginal sensitivities, etc. A kind of phase space, idk. Ideally with the same visualization. Explode it along a third axis of all possible values of the current parameter… yessssssss that’d be so good, so doable. Whichever parameter you’re currently holding, explode out all possible values along the z-axis, so you can see the nonlinear effects of dragging by dx.

I still want to make something that captures some feeling I have of weighing prior and posterior confidences, and the updating flowing one way or the other accordingly, almost hydraulically.

index.html

<!DOCTYPE html>
<meta charset="utf-8">

<style>

html, body, svg {
  margin: 0;
  padding: 0;
  width: 100%;
  height: 100%;
}

rect {
  fill: none;
  stroke: black;
}

line {
  stroke: black;
}

line.derived {
  stroke: #ddd;
  /*visibility: hidden;*/
  stroke-dasharray: 2,2;
}

line.diagonal {
  stroke-dasharray: 2,2;
  stroke: #ddd;
  visibility: hidden;
}

/*line.diagonal.derived {
  visibility: hidden;
}*/

text {
  text-anchor: middle;
  font-family: sans-serif;
}

text.derived {
  fill: #ddd;
}

</style>

<body>

<svg></svg>

</body>

<script src="https://d3js.org/d3.v4.min.js"></script>
<script>

var width = 300,
    height = 300,
    x = d3.scaleLinear().range([0,width]),
    y = d3.scaleLinear().range([0,height])

var p_a = Math.random(),
    p_b_given_a = Math.random(),
    p_b_given_not_a = Math.random(),
    p_b,
    p_a_given_b,
    p_a_given_not_b

var variables = [
  {
    name: "P(A)",
    axis: 0,
    side: 0,
    level: 1,
    derived: false,
    derivation: () => (p_b * p_a_given_b) + ((1-p_b) * p_a_given_not_b),
    value: function(_) { return arguments.length ? p_a = _ : p_a }
  },
  {
    name: "P(B|A)",
    axis: 1,
    side: 0,
    level: 0,
    derived: false,
    derivation: () => (p_a_given_b * p_b) / p_a,
    value: function(_) { return arguments.length ? p_b_given_a = _ : p_b_given_a }
  },
  {
    name: "P(B|¬A)",
    axis: 1,
    side: 1,
    level: 0,
    derived: false,
    derivation: () => ((1-p_a_given_b) * p_b) / (1-p_a),
    value: function(_) { return arguments.length ? p_b_given_not_a = _ : p_b_given_not_a }
  },
  {
    name: "P(B)",
    axis: 1,
    side: 0,
    level: 1,
    derived: true,
    derivation: () => (p_a * p_b_given_a) + ((1-p_a) * p_b_given_not_a),
    value: function(_) { return arguments.length ? p_b = _ : p_b }
  },
  {
    name: "P(A|B)",
    axis: 0,
    side: 0,
    level: 0,
    derived: true,
    derivation: () => (p_b_given_a * p_a) / p_b,
    value: function(_) { return arguments.length ? p_a_given_b = _ : p_a_given_b }
  },
  {
    name: "P(A|¬B)",
    axis: 0,
    side: 1,
    level: 0,
    derived: true,
    derivation: () => ((1-p_b_given_a) * p_a) / (1-p_b),
    value: function(_) { return arguments.length ? p_a_given_not_b = _ : p_a_given_not_b }
  }
]

var svg = d3.select("svg")
  .append("g")
  .attr("transform", `translate(${innerWidth/2 - width/2}, ${innerHeight/2 - height/2})`)

var p_a_line_diagonal = svg.append("line")
var p_b_line_diagonal = svg.append("line")

var label = svg.selectAll("text.label")
  .data(variables)
  .enter()
  .append("text")
  .classed("label", true)

var line = svg.selectAll("line.varline")
  .data(variables)
  .enter()
  .append("line")
  .classed("varline", true)

var rect = svg.append("rect")

function renderLine(selection) {
  selection.each(function(d) {
    var sel = d3.select(this)
      .classed("derived", d.derived)

    if(d.axis===0) {
      // P(A), y-axis (horizontal lines, moving along vertical axis)
      sel
        .attr("x1", x(d.level ? -0.1 : d.side ? p_b : 0))
        .attr("x2", x(d.level ? 1.1 : d.side ? 1 : p_b))
        .attr("y1", y(d.value()))
        .attr("y2", y(d.value()))

    } else if(d.axis===1) {
      // P(B), x-axis (vertical lines, moving along horizontal axis)
      sel
        .attr("x1", x(d.value()))
        .attr("x2", x(d.value()))
        .attr("y1", y(d.level ? -0.1 : d.side ? p_a : 0))
        .attr("y2", y(d.level ? 1.1 : d.side ? 1 : p_a))

    }
  })
}

function renderLabel(selection) {
  selection.each(function(d) {
    var sel = d3.select(this).text(`${d.name} = ${d.value().toFixed(2)}`)
      .classed("derived", d.derived)

    if(d.derived) {
      sel
        .style("cursor", "pointer")
        .on("click", () => {
          variables.forEach(d => d.derived = !d.derived)
          render()
        })
    }

    if(d.axis===0) {
      // P(A), y-axis (vertical)
      sel
        .style("text-anchor", d.side ? "start" : "end")
        .attr("x", d.side ? x(1) : 0)
        .attr("y", y(d.value()))
        .attr("dx", (d.side ? 1 : -1) * (2 * d.level + 1) + "em")
        .attr("dy", ".25em")

      if(!d.derived) {
        sel
          .style("cursor", "ns-resize")
          .call(d3.drag().on("drag", function(d,i) {
            var val = Math.max(Math.min(y.invert(d3.event.y),1),0)
            d.value(val)
            d3.select(this).attr("y", y(d.value()))
            render()
          }))
      }

    } else if(d.axis===1) {
      // P(B), x-axis (horizontal)
      sel
        .style("text-anchor", "middle")
        .attr("x", x(d.value()))
        .attr("y", d.side ? y(1) : 0)
        .attr("dx", 0)
        .attr("dy", .4 + (d.side ? 1 : -1) * (2 * d.level + 1) + "em")

      if(!d.derived) {
        sel
          .style("cursor", "ew-resize")
          .call(d3.drag().on("drag", function(d,i) {
            var val = Math.max(Math.min(x.invert(d3.event.x),1),0)
            d.value(val)
            d3.select(this).attr("x", x(d.value()))
            render()
          }))
      }
    }
  })
}

render()

function render() {

  label.data().filter(d => d.derived).forEach(d => d.value(d.derivation()))

  // cov(X, Y) = E[XY] - E[X]E[Y]
  var covariance = (p_a * p_b_given_a) - (p_a * p_b)
  // console.log(covariance)
  // ???

  x = x.range([0,width])
  y = y.range([0,height])

  rect
    .attr("width", width)
    .attr("height", height)

  label.call(renderLabel)
  line.call(renderLine)

  var lineEqA = getLineFunctionFromPoints(
    [p_b / 2, p_a_given_b],
    [(p_b + 1)/2, p_a_given_not_b]
  )

  var lineEqB = getLineFunctionFromPoints(
    [p_a / 2, p_b_given_a],
    [(p_a + 1)/2, p_b_given_not_a]
  )

  p_a_line_diagonal
    .attr("x1", x(0))
    .attr("x2", x(1))
    .attr("y1", y(lineEqA(0)))
    .attr("y2", y(lineEqA(1)))
    .classed("diagonal", true)
    .classed("derived", true)

  p_b_line_diagonal
    .attr("x1", x(lineEqB(0)))
    .attr("x2", x(lineEqB(1)))
    .attr("y1", y(0))
    .attr("y2", y(1))
    .classed("diagonal", true)
}

function getLineFunctionFromPoints(a,b) {
  var m = (b[1]-a[1])/(b[0]-a[0])
  return x => m*(x - a[0]) + a[1]
}

</script>