Spread of tweets during the two days of Openvis Conf.
–
Built with blockbuilder.org
forked from sxywu‘s block: openvis tweets #1
forked from sxywu‘s block: openvis tweets #2
forked from sxywu‘s block: openvis tweets #3
forked from sxywu‘s block: openvis tweets #4
<!DOCTYPE html>
<head>
<meta charset="utf-8">
<script src="https://cdnjs.cloudflare.com/ajax/libs/d3/3.5.5/d3.min.js"></script>
<script src='https://cdnjs.cloudflare.com/ajax/libs/lodash.js/4.11.2/lodash.js'></script>
<link href='https://fonts.googleapis.com/css?family=Lora' rel='stylesheet' type='text/css'>
<style>
body {
font-family: 'Lora', serif;
margin:0;
color: #49438C;
}
#main {
width: 1000px;
}
#left, #right {
width: 45%;
display: inline-block;
padding: 15px;
vertical-align: top;
}
#left div, #right div {
display: inline-block;
padding: 5px;
}
</style>
</head>
<body>
<div id='main'>
<div id='left'></div>
<div id='right'></div>
</div>
<script>
var startDate = new Date('2016-04-25T00:00:00-04:00');
var endDate = new Date('2016-04-27T00:00:00-04:00');
var dateFormat = d3.time.format('%x %I:%M%p');
// 100 most common words, taken from https://gist.github.com/gravitymonkey/2406023
// thank you gravitymonkey you beautiful person.
var commonWords = ["the","of","and","a","to","in","is","you","that","it","he","was","for","on","are","as","with","his","they","I","at","be","this","have","from","or","one","had","by","word","but","not","what","all","were","we","when","your","can","said","there","use","an","each","which","she","do","how","their","if","will","up","other","about","out","many","then","them","these","so","some","her","would","make","like","him","into","time","has","look","two","more","write","go","see","number","no","way","could","people","my","than","first","water","been","call","who","oil","its","now","find","long","down","day","did","get","come","made","may","part"];
var customWords = ["openvisconf", "talk", "me", "here", "im", "very", "just", "too", "really", "much", "our", "us", "most", "another", "off", "should", "cant", "via", "going", "dont", "also", "says", "always", "after", "such", "check", "need", "keep", "say", "any", "hey", "between", "–", "over", "강남풀싸롱", "available", "gt", "got", "still", "lots", "being", "seen", "looks", "free", "am", "users", "take", "tiny", "own", "before", "big", "england", "back", "ive", "everyone", "super", "maybe", "stuff", "even", "lot", "make", "last", "open", "through", "something", "httpstcozc0ps1kc8h"];
commonWords = _.chain(commonWords)
.union(customWords)
.reduce(function(obj, word) {
word = word.toLowerCase();
obj[word] = 1;
return obj
}, {}).value();
var translations = {
"datavis": "dataviz",
"viz": "vis",
"charts": "chart",
"tools": "tool",
"things": "thing",
"visualizations": "visualization",
"using": "use",
"making": "make",
"slides": "slide",
"talks": "talk",
"learning": "learn",
"visualizing": "visualize",
"showing": "show",
"looking": "look",
"talking": "talk",
"thanks": "thank",
"tweets": "tweet",
"working": "work",
"maps": "map",
"thinking": "think",
"speakers": "speaker",
"friends": "friend",
"d3js": "d3",
"days": "day",
"folks": "folk"
}
d3.json('tweets.json', function(tweets) {
tweets = _.chain(tweets)
.filter(tweet => {
tweet.date = new Date(tweet.postedTime);
return !tweet.body.match(/^RT/) &&
startDate <= tweet.date && tweet.date <= endDate;
}).sortBy(tweet => -tweet.date)
.reduce((obj, tweet) => {
obj[tweet.link] = tweet;
return obj;
}, {})
.value();
var words = {};
_.each(tweets, function(tweet, key) {
var username = tweet.actor.preferredUsername;
_.each(tweet.body.split(' '), function(word) {
word = word.toLowerCase()
.replace(/[.,\/#!$%\^&\*;:{}=\-_`~()'|+]/g,"").replace(/\s/g, "");
// if word is a mention, or one of the 100 most common words
if (!word || word.startsWith('@') || commonWords[word] || parseInt(word)) return;
// also translate some words to a more common form
word = translations[word] || word;
var wordObj = words[word];
if (!wordObj) {
wordObj = words[word] = {
text: word,
count: 0,
tweets: {},
users: {}
};
}
if (!wordObj.tweets[key]) {
wordObj.count += 1;
wordObj.tweets[key] = 1;
wordObj.users[username] = 1;
}
});
});
words = _.chain(words)
.sortBy(function(word) {
return -word.count;
}).take(100).value();
// now find the words closely correlated with each other
var filteredTweets = {};
var wordsByTweets = {};
_.each(words, function(word) {
_.each(word.tweets, function(val, tweet) {
var wBT = wordsByTweets[tweet];
if (!wBT) {
wBT = wordsByTweets[tweet] = {};
}
wBT[word.text] = 1;
filteredTweets[tweet] = tweets[tweet];
});
});
var correlations = {};
_.each(wordsByTweets, function(words, tweet) {
// this is so inefficient hahaha yay
_.each(words, function(val, word1) {
_.each(words, function(val, word2) {
if (word1 !== word2) {
var key = [word1, word2].sort().join(',');
if (!correlations[key]) {
correlations[key] = {
count: 0,
source: word1,
target: word2,
tweets: {}
};
}
if (!correlations[key].tweets[tweet]) {
correlations[key].count += 1;
correlations[key].tweets[tweet] = 1;
}
}
});
});
});
correlations = _.chain(correlations)
.filter(function(correlation) {return correlation.count > 1})
.sortBy(function(correlation) {return -correlation.count})
.value();
var left = d3.select('#left');
var right = d3.select('#right');
left.append('h1')
.text('top 100 words');
right.append('h1')
.text('commonly correlated words');
left.selectAll('div')
.data(words)
.enter().append('div')
.text(function(d) {return d.text});
right.selectAll('div')
.data(correlations)
.enter().append('div')
.text(function(d) {return d.source + ',' + d.target});
_.each(words, function(word) {
word.tweets = _.keys(word.tweets);
word.users = _.keys(word.users);
});
_.each(correlations, function(correlation) {
correlation.tweets = _.keys(correlation.tweets);
});
_.each(wordsByTweets, function(words, tweet) {
wordsByTweets[tweet] = _.keys(words);
});
// console.log(JSON.stringify(filteredTweets));
console.log(JSON.stringify(words))
// console.log(JSON.stringify(correlations));
// console.log(JSON.stringify(wordsByTweets));
// console.log(_.size(filteredTweets));
});
</script>
</body>