block by armollica 7da151558db71ed679b818bbef002c88

Horizon Chart

Full Screen

Clone of Xan Gregg’s chart which he created using JMP (see his tweet). Xan’s chart used the data from another chart that Henrik Lindberg created (see this block for my reproduction of that chart).

This was an excuse to see how to implement a horizon chart using D3.

Here’s Henrik’s description of the chart:

Peak time of day for sports and leisure

Number of participants throughout the day compared to peak popularity. Note the morning-and-evening everyday workouts, the midday hobbies, and the evenings/late nights out.

index.html

<!DOCTYPE html>
<html>
<head>
<style>

svg {
    display: block;
    margin: 0 auto;
}

.axis .domain {
    display: none;
}

.axis--x text {
    fill: #999;
}

.axis--x line {
    stroke: #aaa;
}

.axis--activity .tick line {
    display: none;
}

.axis--activity text {
    font-size: 12px;
    fill: #777;
}

</style>
</head>
<body>
<script src="https://d3js.org/d3.v4.min.js"></script>
<script>

var numHorizons = 4;

var margin = { top: 30, right: 10, bottom: 30, left: 300 },
    width = 700 - margin.left - margin.right,
    height = 600 - margin.top - margin.bottom;

var formatTime = d3.timeFormat('%I %p');

var svg = d3.select('body').append('svg')
        .attr('width', width + margin.left + margin.right)
        .attr('height', height + margin.top + margin.bottom)
    .append('g')
        .attr('transform', 'translate(' + margin.left + ',' + margin.top + ')');

var x = function(d) { return d.time; },
    xScale = d3.scaleTime().range([0, width]),
    xValue = function(d) { return xScale(x(d)); },
    xAxis = d3.axisBottom(xScale).tickFormat(formatTime);

var y = function(d) { return d.value; },
    yScale = d3.scaleLinear().clamp(true),
    y0Value = function(d) { return yScale(0); }
    y1Value = function(d) { return yScale(y(d)); };

var activity = function(d) { return d.key; },
    activityScale = d3.scaleBand().range([0, height]).padding(0.05),
    activityValue = function(d) { return activityScale(activity(d)); },
    activityAxis = d3.axisLeft(activityScale);

var horizonScale = d3.scaleQuantize()
    .range(d3.range(numHorizons));

var fill = function(d) { return d.yExtent[0]; },
    fillScale = d3.scaleLinear().range(['lavender', 'purple']).interpolate(d3.interpolateHcl),
    fillValue = function(d) { return fillScale(fill(d)); };

var area = d3.area()
    .x(xValue)
    .y0(y0Value)
    .y1(y1Value);

function parseTime(offset) {
    var date = new Date(2017, 0, 1); // chose an arbitrary day
    return d3.timeMinute.offset(date, offset);
}

function row(d) {
    return {
        activity: d.activity,
        time: parseTime(d.time),
        value: +d.p_smooth
    };
}

d3.tsv('data.tsv', row, function(error, dataFlat) {
    if (error) throw error;

    // Sort by time
    dataFlat.sort(function(a, b) { return a.time - b.time; });

    var data = d3.nest()
        .key(function(d) { return d.activity; })
        .entries(dataFlat);
    
    // Sort activities by peak activity time
    function peakTime(d) {
        var i = d3.scan(d.values, function(a, b) { return y(b) - y(a); });
        return d.values[i].time;
    };
    data.sort(function(a, b) { return peakTime(b) - peakTime(a); });

    xScale.domain(d3.extent(dataFlat, x));

    activityScale.domain(data.map(function(d) { return d.key; }));

    yScale.range([activityScale.bandwidth(), 0]);

    horizonScale.domain(d3.extent(dataFlat, y));

    svg.append('g').attr('class', 'axis axis--x')
        .attr('transform', 'translate(0,' + height + ')')
        .call(xAxis);

    svg.append('g').attr('class', 'axis axis--activity')
        .call(activityAxis);

    var gActivity = svg.append('g').attr('class', 'activities')
            .selectAll('.activity').data(data)
        .enter().append('g')
            .attr('class', function(d) { return 'activity activity--' + d.key; })
            .attr('transform', function(d) {
                var ty = activityValue(d);
                return 'translate(0,' + ty + ')';
            });
    
    function horizonData(d) {
        return horizonScale.range()
            .map(function(i) {
                return {
                    yExtent: horizonScale.invertExtent(i),
                    key: d.key,
                    values: d.values
                };
            });
    }

    var gHorizon = gActivity.selectAll('.horizon').data(horizonData)
        .enter().append('path')
            .attr('class', 'horizon')
            .each(function(d) {
                // TODO: create separate y-scales using d3.local()
                yScale.domain(d.yExtent);
                d3.select(this)
                    .attr('d', area(d.values));
            })
            .style('fill', fillValue);
});

</script>
</body>
</html>

Makefile

data.tsv: activity.tsv
	Rscript process-activity.R $< > $@

process-activity.R

#!/usr/bin/env Rscript

# Adapted from https://github.com/halhen/viz-pub/blob/master/sports-time-of-day/2_gen_chart.R

library(tidyverse)

filename <- commandArgs(trailingOnly = TRUE)[1]

df <- read_tsv(filename)

df %>%
  group_by(activity) %>% 
  filter(max(p) > 3e-04, # Keep the most popular ones
         !grepl('n\\.e\\.c', activity)) %>% # Remove n.e.c. (not elsewhere classified)
  arrange(time) %>%
  mutate(p_peak = p / max(p), # Normalize as percentage of peak popularity
         p_smooth = (lag(p_peak) + p_peak + lead(p_peak)) / 3, # Moving average
         p_smooth = coalesce(p_smooth, p_peak)) %>% # When there's no lag or lead, we get NA. Use the pointwise data
  ungroup() %>%
  do({ # 24:00:00 is missing from the source data; add for a complete cycle
    rbind(.,
          filter(., time == 0) %>%
            mutate(time = 24*60))
  }) %>%
  mutate(time = ifelse(time < 3 * 60, time + 24 * 60, time)) %>% # Set start of chart to 03:00; few things overlap this hour  
  mutate(activity = reorder(activity, p_peak, FUN=which.max)) %>% # order by peak time
  format_tsv() %>%
  cat()