Happy Birthday, Thomas Jefferson! While running a little startup called the United States of America, TJ supported some pretty awesome science, built some pretty sweet inventions and generally was a pretty cool guy. But apparently, his real love was gardening.
* Jefferson to Charles W. Peale, August 20, 1811. Lipscomb, Andrew A. and Albert Ellery Bergh, ed. The Writings of Thomas Jefferson, Volume 13. Washington D.C.: Issued under the auspices of the Thomas Jefferson Memorial Association of the United States, 1903-04, p. 79.
Let's see what kind of horticultural data he left behind!
# !conda install lxml cssselect pandas bokeh -y
# !pip install pyquery coffeetools jademagic
# !npm install -g coffee-script
import datetime
import re
import math
import pandas
from bokeh.plotting import output_notebook, show, figure, ColumnDataSource
from bokeh.models import HoverTool, CustomJS
from bokeh.resources import CDN
import IPython
from pyquery import PyQuery
from coffeetools import coffee
output_notebook(resources=CDN)
It is provided by the Massachusetts Historical Society.
IPython.display.IFrame("//www.masshist.org/thomasjeffersonpapers/garden", width="100%", height="800px")
garden_url = "//www.masshist.org/thomasjeffersonpapers/doc?id=garden_{page}"
img_url = "//www.masshist.org/thomasjeffersonpapers/garden/image/lg/garden_{page}_lg.jpg"
we'll refactor this later...
page = 1
pq = PyQuery(garden_url.format(page=page))
head = pq(".head").text()
head
year = re.match(r'\d*', head).group()
year
location = re.match(r'\d+\.\s*(.*)\.$', head).group(1)
location
entries = [(a.text_content().strip(), b.text_content().strip())
for a, b in list(zip(pq(".entrydate"), pq(".entry")))]
entries
month = None
month_pattern = r"Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec"
month_idx = {abbr: i + 1 for i, abbr in enumerate(month_pattern.split("|"))}
day_pattern = r"\d+"
def fix_dates(entries, page=None, month=None, year=None):
day = None
month = None
for date, entry in entries:
day_match = re.findall(day_pattern, date)
month_match = re.findall(month_pattern, date)
day = day_match[0] if day_match else day
month = month_match[0] if month_match else month
try:
date = datetime.date(int(year), month_idx.get(month, 1) or 1, int(day))
yield date, int(year), month_idx.get(month, 1) or 1, page, entry
except:
pass
df = pandas.DataFrame.from_records(
fix_dates(entries, year=year, page=page),
columns=["date", "year", "month", "page", "entry"],
index=["date"])
df
def hover():
return HoverTool(
tooltips=[
("Date", "$y"),
("Entry", "@entry"),
]
)
cds = ColumnDataSource(df)
p = figure(tools=[hover()], x_axis_type="datetime")
p.circle(x="date", y="year", source=cds)
show(p);
# be nice: cache
_pq = {}
year = None
location = None
all_entries = []
for page in range(1, 67):
url = garden_url.format(page=page)
pq = _pq.get(url, None)
if pq is None:
pq = _pq[url] = PyQuery(url)
head = pq(".head").text()
location_match = re.match(r'\d+\.\s*(.*)\.$', head)
if location_match:
location = location_match.group(1)
year_match = re.findall(r'\d{4}', head)
if year_match:
year = year_match[0]
entries = [(a.text_content().strip(), b.text_content().strip())
for a, b in list(zip(pq(".entrydate"), pq(".entry")))]
entries = fix_dates(entries, year=year, page=page)
#print(list(entries))
if entries:
all_entries.extend(entries)
df = pandas.DataFrame.from_records(
all_entries,
columns=["date", "year", "month", "page", "entry"],
index=["date"])
df["length"] = [math.log(len(entry)) * 3 for entry in df["entry"]]
df["julian"] = [int(idx.strftime("%j")) for idx in df.index]
df["month_name"] = df.month.apply(lambda e: datetime.datetime(2001,e,1).strftime("%B"))
pages = pandas.DataFrame.from_records(
[(min(entries), min(entries).strftime("%j"), page, img_url.format(page=page))
for page, entries in df.groupby(by=["page"]).groups.items()],
columns=["date", "julian", "page", "url"],
index="date"
)
pages.head()
p = figure(title="TJ's Garden Notes (1766-1824)",
y_axis_type="datetime", y_axis_label="Date",
x_axis_label="Day of Year")
p.circle(x="julian", y="date", size="length", alpha=0.5,
source=ColumnDataSource(df))
show(p);
pages.url = pages.url.apply(lambda v: v.replace('lg','thumb'))
df['url'] = """"""
for i, v in pages.url.items():
pages.set_value(i,'url',v.replace('lg','thumb'))
p = figure(title="TJ's Garden Book(1766-1824)",
y_axis_type="datetime", y_axis_label="Date",
x_axis_label="Day of Year",
width=600,
height=800)
circle = p.circle(x="julian", y="date", size="length", alpha=0.5,
source=ColumnDataSource(df))
images = p.image_url(x="julian", y="date", url="url", anchor='center', global_alpha=0.2,
source=ColumnDataSource(pages))
p.add_tools(HoverTool(
renderers=[circle],
tooltips=None,
callback=CustomJS(
args={
'circle': circle.data_source,
'images': images.data_source
}, code=r"""
if(window.moment){
update();
}
function update() {
var cdata = circle.get('data'),
indices = cb_data.index['1d'].indices,
info = $('#info').text(''),
page = cdata['page'][indices[0]],
pageIdx = _.indexOf(images.attributes.data.page, page),
url = images.attributes.data.url[pageIdx],
img = url && $('#jefferson-img-lg').css({
"background-image": "url(" + url.replace(/thumb/g, "lg") + ")",
"background-size": "contain",
"background-position": "middle middle",
"background-repeat": "no-repeat",
"position": "absolute",
top: 0,
left: 0,
right: 0,
bottom: 0
});
indices.map(function(index){
$("<div/>")
.css({"padding": "1rem", "line-height": "2rem"})
.append([
$("<h4/>").text(moment(new Date(cdata['date'][index]))
.format("dddd, MMMM D, YYYY")),
$("<ul/>").append(
cdata['entry'][index].split("\n")
.filter(function(line){ return line.trim(); })
.map(function(line){
return $("<li/>").text(line);
})
)
])
.appendTo(info);
});
}
""")))
p.x_range.callback = CustomJS(
args={
'x_axis': p.x_range,
'y_axis': p.y_range,
'images': images.glyph,
},
code=coffee.compile(
"""
dx = x_axis.get('end') - x_axis.get('start')
lower = 1
if dx > lower
images.set 'global_alpha', .2 + (.7)*Math.exp( -1*Math.pow(dx-lower,2)/(2*Math.pow(50,2)))
""",
bare=True))
show(p);
df.to_csv('jefferson.csv')
%%html
<div id="info"></div>
%%html
<div id="jefferson-img-lg"></div>
%%javascript
nbpresent.mode.themes.set({
"default": "jefferson",
"theme": {
jefferson: {
palette: {
"paper": {
id: "paper",
rgb: [252, 248, 237]
},
"ink": {
id: "ink",
rgb: [65, 55, 41]
},
"highlight": {
id: "highlight",
rgb: [162, 137, 100]
}
},
backgrounds: {
"paper": {
"background-color": "paper"
}
},
"text-base": {
"font-family": "Vollkorn",
"font-size": 1.5,
"color": "ink"
},
rules: {
h1: {
"font-size": 7,
"color": "ink"
},
h2: {
"color": "highlight",
"font-size": 5,
},
h4: {
"font-size": 2,
"color": "highlight",
"text-align": "center",
"border-bottom": "solid 1px rgb(162, 137, 100)"
},
blockquote: {
"text-align": "justify",
"border": "none",
"font-style": "italic"
}
}
}
}});
%%html
<script src="https://cdnjs.cloudflare.com/ajax/libs/underscore.js/1.8.3/underscore.js"></script>
<script>
$(function(){
requirejs({
paths: {moment: "https://cdnjs.cloudflare.com/ajax/libs/moment.js/2.12.0/moment.min"}
},
["moment"],
function(moment){
window.moment = moment;
update();
});
})
</script>