Clone of Xan Gregg’s chart which he created using JMP (see his tweet). Xan’s chart used the data from another chart that Henrik Lindberg created (see this block for my reproduction of that chart).
This was an excuse to see how to implement a horizon chart using D3.
Here’s Henrik’s description of the chart:
Peak time of day for sports and leisure
Number of participants throughout the day compared to peak popularity. Note the morning-and-evening everyday workouts, the midday hobbies, and the evenings/late nights out.
<!DOCTYPE html>
<html>
<head>
<style>
svg {
display: block;
margin: 0 auto;
}
.axis .domain {
display: none;
}
.axis--x text {
fill: #999;
}
.axis--x line {
stroke: #aaa;
}
.axis--activity .tick line {
display: none;
}
.axis--activity text {
font-size: 12px;
fill: #777;
}
</style>
</head>
<body>
<script src="https://d3js.org/d3.v4.min.js"></script>
<script>
var numHorizons = 4;
var margin = { top: 30, right: 10, bottom: 30, left: 300 },
width = 700 - margin.left - margin.right,
height = 600 - margin.top - margin.bottom;
var formatTime = d3.timeFormat('%I %p');
var svg = d3.select('body').append('svg')
.attr('width', width + margin.left + margin.right)
.attr('height', height + margin.top + margin.bottom)
.append('g')
.attr('transform', 'translate(' + margin.left + ',' + margin.top + ')');
var x = function(d) { return d.time; },
xScale = d3.scaleTime().range([0, width]),
xValue = function(d) { return xScale(x(d)); },
xAxis = d3.axisBottom(xScale).tickFormat(formatTime);
var y = function(d) { return d.value; },
yScale = d3.scaleLinear().clamp(true),
y0Value = function(d) { return yScale(0); }
y1Value = function(d) { return yScale(y(d)); };
var activity = function(d) { return d.key; },
activityScale = d3.scaleBand().range([0, height]).padding(0.05),
activityValue = function(d) { return activityScale(activity(d)); },
activityAxis = d3.axisLeft(activityScale);
var horizonScale = d3.scaleQuantize()
.range(d3.range(numHorizons));
var fill = function(d) { return d.yExtent[0]; },
fillScale = d3.scaleLinear().range(['lavender', 'purple']).interpolate(d3.interpolateHcl),
fillValue = function(d) { return fillScale(fill(d)); };
var area = d3.area()
.x(xValue)
.y0(y0Value)
.y1(y1Value);
function parseTime(offset) {
var date = new Date(2017, 0, 1); // chose an arbitrary day
return d3.timeMinute.offset(date, offset);
}
function row(d) {
return {
activity: d.activity,
time: parseTime(d.time),
value: +d.p_smooth
};
}
d3.tsv('data.tsv', row, function(error, dataFlat) {
if (error) throw error;
// Sort by time
dataFlat.sort(function(a, b) { return a.time - b.time; });
var data = d3.nest()
.key(function(d) { return d.activity; })
.entries(dataFlat);
// Sort activities by peak activity time
function peakTime(d) {
var i = d3.scan(d.values, function(a, b) { return y(b) - y(a); });
return d.values[i].time;
};
data.sort(function(a, b) { return peakTime(b) - peakTime(a); });
xScale.domain(d3.extent(dataFlat, x));
activityScale.domain(data.map(function(d) { return d.key; }));
yScale.range([activityScale.bandwidth(), 0]);
horizonScale.domain(d3.extent(dataFlat, y));
svg.append('g').attr('class', 'axis axis--x')
.attr('transform', 'translate(0,' + height + ')')
.call(xAxis);
svg.append('g').attr('class', 'axis axis--activity')
.call(activityAxis);
var gActivity = svg.append('g').attr('class', 'activities')
.selectAll('.activity').data(data)
.enter().append('g')
.attr('class', function(d) { return 'activity activity--' + d.key; })
.attr('transform', function(d) {
var ty = activityValue(d);
return 'translate(0,' + ty + ')';
});
function horizonData(d) {
return horizonScale.range()
.map(function(i) {
return {
yExtent: horizonScale.invertExtent(i),
key: d.key,
values: d.values
};
});
}
var gHorizon = gActivity.selectAll('.horizon').data(horizonData)
.enter().append('path')
.attr('class', 'horizon')
.each(function(d) {
// TODO: create separate y-scales using d3.local()
yScale.domain(d.yExtent);
d3.select(this)
.attr('d', area(d.values));
})
.style('fill', fillValue);
});
</script>
</body>
</html>
data.tsv: activity.tsv
Rscript process-activity.R $< > $@
#!/usr/bin/env Rscript
# Adapted from https://github.com/halhen/viz-pub/blob/master/sports-time-of-day/2_gen_chart.R
library(tidyverse)
filename <- commandArgs(trailingOnly = TRUE)[1]
df <- read_tsv(filename)
df %>%
group_by(activity) %>%
filter(max(p) > 3e-04, # Keep the most popular ones
!grepl('n\\.e\\.c', activity)) %>% # Remove n.e.c. (not elsewhere classified)
arrange(time) %>%
mutate(p_peak = p / max(p), # Normalize as percentage of peak popularity
p_smooth = (lag(p_peak) + p_peak + lead(p_peak)) / 3, # Moving average
p_smooth = coalesce(p_smooth, p_peak)) %>% # When there's no lag or lead, we get NA. Use the pointwise data
ungroup() %>%
do({ # 24:00:00 is missing from the source data; add for a complete cycle
rbind(.,
filter(., time == 0) %>%
mutate(time = 24*60))
}) %>%
mutate(time = ifelse(time < 3 * 60, time + 24 * 60, time)) %>% # Set start of chart to 03:00; few things overlap this hour
mutate(activity = reorder(activity, p_peak, FUN=which.max)) %>% # order by peak time
format_tsv() %>%
cat()