block by vlandham 9f22ca7f11789ef90ec66457b12d162f

TextArc in D3 -old

Full Screen

index.html

<!doctype html>
<html lang="en">
<head>
  <meta charset="utf-8">
  <meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1">

  <title>Textarc with D3</title>
  <meta name="description" content="">
  <meta name="author" content="Jim Vallandingham">

  <meta name="viewport" content="width=device-width, initial-scale=1.0">
  <script src="https://d3js.org/d3.v4.js"></script>

 
  <link rel="stylesheet" href="style.css">

</head>

<body>

  <div class="container">
    <div id="main" role="main">
      <h1 id="title">Alice's Adventures In Wonderland</h1>
      <div id="word"></div>
      <div id="vis"></div>
      <div id="about">
        <p>This is an attempt at a partial recreation of the amazing <a href="//www.textarc.org/">TextArc</a> text visualization by W. Bradford Paley.</p>
        <p>This is meant as a tribute to the pioneering work, and as an experiment in how one might implement such interactive visualizations using current open web technologies.</p>
        <p>This version is implemented completely in the browser using <a href="//d3js.org/">D3.js</a>. Alice in Wonderland text is derived from <a href="//www.gutenberg.org/ebooks/11">Project Gutenberg</a>.</p>
        <p><a href="https://github.com/vlandham/textarc">Source Code</a></p>
      </div>
    </div>
  </div> <!--! end of #container -->

  <script src="stop_words.js"></script>
  <script src="vis.js"></script>

</body>
</html>

stop_words.js

var stop_words = [
  'a',
  'about',
  'above',
  'across',
  'after',
  'again',
  'against',
  'all',
  'almost',
  'alone',
  'along',
  'already',
  'also',
  'although',
  'always',
  'among',
  'an',
  'and',
  'another',
  'any',
  'anybody',
  'anyone',
  'anything',
  'anywhere',
  'are',
  'area',
  'areas',
  'around',
  'as',
  'ask',
  'asked',
  'asking',
  'asks',
  'at',
  'away',
  'b',
  'back',
  'backed',
  'backing',
  'backs',
  'be',
  'became',
  'because',
  'become',
  'becomes',
  'been',
  'before',
  'began',
  'behind',
  'being',
  'beings',
  'best',
  'better',
  'between',
  'big',
  'both',
  'but',
  'by',
  'c',
  'came',
  'can',
  'cannot',
  'case',
  'cases',
  'certain',
  'certainly',
  'clear',
  'clearly',
  'come',
  'could',
  'd',
  'did',
  'differ',
  'different',
  'differently',
  'do',
  'does',
  'done',
  'down',
  'down',
  'downed',
  'downing',
  'downs',
  'during',
  'e',
  'each',
  'early',
  'either',
  'end',
  'ended',
  'ending',
  'ends',
  'enough',
  'even',
  'evenly',
  'ever',
  'every',
  'everybody',
  'everyone',
  'everything',
  'everywhere',
  'f',
  'face',
  'faces',
  'fact',
  'facts',
  'far',
  'felt',
  'few',
  'find',
  'finds',
  'first',
  'for',
  'four',
  'from',
  'full',
  'fully',
  'further',
  'furthered',
  'furthering',
  'furthers',
  'g',
  'gave',
  'general',
  'generally',
  'get',
  'gets',
  'give',
  'given',
  'gives',
  'go',
  'going',
  'good',
  'goods',
  'got',
  'great',
  'greater',
  'greatest',
  'group',
  'grouped',
  'grouping',
  'groups',
  'h',
  'had',
  'has',
  'have',
  'having',
  'he',
  'her',
  'here',
  'high',
  'higher',
  'highest',
  'him',
  'his',
  'how',
  'however',
  'i',
  'if',
  'important',
  'in',
  'interest',
  'interested',
  'interesting',
  'interests',
  'into',
  'is',
  'it',
  'its',
  'itself',
  'j',
  'just',
  'k',
  'keep',
  'keeps',
  'kind',
  'knew',
  'know',
  'known',
  'knows',
  'l',
  'large',
  'largely',
  'last',
  'later',
  'latest',
  'least',
  'less',
  'let',
  'lets',
  'like',
  'likely',
  'long',
  'longer',
  'longest',
  'm',
  'made',
  'make',
  'making',
  'man',
  'many',
  'may',
  'me',
  'member',
  'members',
  'men',
  'might',
  'more',
  'most',
  'mostly',
  'mr',
  'mrs',
  'much',
  'must',
  'my',
  'myself',
  'n',
  'necessary',
  'need',
  'needed',
  'needing',
  'needs',
  'never',
  'new',
  'new',
  'newer',
  'newest',
  'next',
  'no',
  'nobody',
  'non',
  'noone',
  'not',
  'nothing',
  'now',
  'nowhere',
  'number',
  'numbers',
  'o',
  'of',
  'off',
  'often',
  'old',
  'older',
  'oldest',
  'on',
  'once',
  'one',
  'only',
  'open',
  'opened',
  'opening',
  'opens',
  'or',
  'order',
  'ordered',
  'ordering',
  'orders',
  'other',
  'others',
  'our',
  'out',
  'over',
  'p',
  'part',
  'parted',
  'parting',
  'parts',
  'per',
  'perhaps',
  'place',
  'places',
  'point',
  'pointed',
  'pointing',
  'points',
  'possible',
  'present',
  'presented',
  'presenting',
  'presents',
  'problem',
  'problems',
  'put',
  'puts',
  'q',
  'quite',
  'r',
  'rather',
  'really',
  'right',
  'right',
  'room',
  'rooms',
  's',
  'said',
  'same',
  'saw',
  'say',
  'says',
  'second',
  'seconds',
  'see',
  'seem',
  'seemed',
  'seeming',
  'seems',
  'sees',
  'several',
  'shall',
  'she',
  'should',
  'show',
  'showed',
  'showing',
  'shows',
  'side',
  'sides',
  'since',
  'small',
  'smaller',
  'smallest',
  'so',
  'some',
  'somebody',
  'someone',
  'something',
  'somewhere',
  'state',
  'states',
  'still',
  'still',
  'such',
  'sure',
  't',
  'take',
  'taken',
  'than',
  'that',
  'the',
  'their',
  'them',
  'then',
  'there',
  'therefore',
  'these',
  'they',
  'thing',
  'things',
  'think',
  'thinks',
  'this',
  'those',
  'though',
  'thought',
  'thoughts',
  'three',
  'through',
  'thus',
  'to',
  'today',
  'together',
  'too',
  'took',
  'toward',
  'turn',
  'turned',
  'turning',
  'turns',
  'two',
  'u',
  'under',
  'until',
  'up',
  'upon',
  'us',
  'use',
  'used',
  'uses',
  'v',
  'very',
  'w',
  'want',
  'wanted',
  'wanting',
  'wants',
  'was',
  'way',
  'ways',
  'we',
  'well',
  'wells',
  'went',
  'were',
  'what',
  'when',
  'where',
  'whether',
  'which',
  'while',
  'who',
  'whole',
  'whose',
  'why',
  'will',
  'with',
  'within',
  'without',
  'work',
  'worked',
  'working',
  'works',
  'would',
  'x',
  'y',
  'year',
  'years',
  'yet',
  'you',
  'young',
  'younger',
  'youngest',
  'your',
  'yours',
  'z'
];

style.css


body, input, textarea {
  font-family: Georgia, "Times New Roman", Serif;
  background-color: black;
  color: white;
}

#main {
  color: white;
}

.container {
  /*width: 940px;*/
  /*margin: auto;*/
  width: 980px
}

.container {
  padding-right: 15px;
  padding-left: 15px;
  margin-right: auto;
  margin-left: auto;
}

#word {
  font-size:20px;
  color: "white";
  position: absolute;
  top: 20px;
  left: 30px;
}

#title, #about {

  font-family: Georgia, "Times New Roman", Serif;
  text-align:center;
  color: #ddd;
  opacity: 0.4;
  font-style: italic;
}

.sentence {
  opacity: 0.4;
  fill: #ddd;
}

.highlight {
  fill: #1FC946;
  opacity: 1.0;
}

.line {
  stroke:#C5A438;
  stroke-width:1;
  fill:none;
  pointer-events: none;
}

#vis {
  /*font-size: 2px;*/
}

#vis .word {
  /*opacity: 0.4;*/
}

vis.js


d3.selection.prototype.moveToFront = function() {
  return this.each(function(){
    this.parentNode.appendChild(this);
  });
};

var removePunctuation = function(string) {
  return string.replace(/['!"#$%&\\'()\*+,\-\.\/:;<=>?@\[\\\]\^_`{|}~']/g," ").replace(/\s{2,}/g," ");
};

var visWidth = 960;
var visHeight = 500;

// pulls out all the sentences
// TODO: we don't really need the lengths at all - remove
// TODO: whitespace seems to be removed?
//  - more likely - the interesting spacing is removed in the gutenberg version
var sentenceLengths = function(text) {
  // text = text.replace(/['\"\‘\’]/gm,"");
  // tregex = /\n|([^\r\n.!?]+([.!?]+|$))/gim;
  // var sentences = text.match(tregex).map(function(s) { return s.trim(); });

  var sentences = text.split("\n");

  var data = sentences.map(function(s) {
    var d = {};
    d.sentence = s.replace(/ /g, '\u00a0');
    d.lookupSentence = removePunctuation(s).toLowerCase();
    d.length = s.length;
    return d;
  });

  return data;
};

// TODO: combine with sentences somehow to link sentence data with word data
var getWords = function(text) {
  text = text.replace(/['\"\‘\’]/gm,"");
  // text = text.replace(/[.,-\/#!$%\^&\*;:{}=\-_`~()]/g,"");
  text = removePunctuation(text);
  var allWords = text.split(" ").map(function(w) { return {"word": w};});

  // allWords = allWords.filter(function(w) { return stop_words.indexOf(w.word.toLowerCase()) == -1; });

  //TODO: magic knowledge of the size of the ellipse here.
  var wordCenters = radialPlacement().width(460).height(280).center({"x":visWidth / 2, "y":visHeight / 2 });
  wordCenters(allWords);

  var wordsLen = allWords.length;
  var words = d3.map();
  for(i = 0;i < wordsLen;i++) {
    var word = allWords[i];
    var wordList = [];
    var wordKey = word.word.toLowerCase();
    if(words.has(wordKey)) {
      wordList = words.get(wordKey);
    }

    wordList.push({"word":word.word, "index":i, "pos":i / wordsLen, "x":word.x, "y":word.y, "angle":word.angle});
    // if(word.w == "Alice") {
    //   console.log(wordList.length);
    // }
    words.set(wordKey, wordList);
  }

  // get the version of the word used in the most positions
  // this will be the visual respresentation used
  // TODO: still not quite right. Example - FATHER
  var getMostFrequent = function(positions) {
    // var words = positions.map(function(p) { return p.word; });

    if (positions.length === 1) {
      return positions[0].word;
    }

    var wordCounts = d3.nest()
      .key(function(p) { return p.word; })
      .rollup(function(words) { return words.length;})
      .entries(positions);

    wordCounts.sort(function(a,b) { return b.values - a.values; });
    return wordCounts[0].key;
  };

  var wordMap = [];
  words.forEach(function(word, positions) {
    var w = {"key":positions[0].word};
    w.visual = getMostFrequent(positions);
    w.x = d3.sum(positions.map(function(p) { return p.x; })) / positions.length;
    w.y = d3.sum(positions.map(function(p) { return p.y; })) / positions.length;
    w.positions = positions;
    // if(word == "Alice") {
    //   console.log(positions);
    // }
    w.count = positions.length;
    wordMap.push(w);
  });

  // sort to put more frequent words on top
  return wordMap.sort(function(a,b) { return a.count - b.count; });
};

// sets up the x and y for a radial layou
// TODO: modified to lazily add parameters to the input keys - so everything
// is expected to be an object. Bad for many reasons.
var radialPlacement = function() {
  var values = d3.map();
  var increment = 20;
  var radius = 200;
  var width = 500;
  var height = 300;
  var tapper = -50;
  var center = {"x":0, "y":0};
  var start = -90;

  var current = start;

  var radialLocation = function(center, angle, width, height, tapper) {
    return {"x":(center.x + (width * Math.cos(angle * Math.PI / 180) - tapper)),
            "y": (center.y + (height * Math.sin(angle * Math.PI / 180) + tapper))};
  };

  // var placement = function(key) {
  //   var value = values.get(key);
  //   if (!values.has(key)) {
  //     value = place(key);
  //   }
  //   return value;
  // };

  var place = function(obj) {
    var value = radialLocation(center, current, width, height, tapper);
    // now it just adds attributes to the object. DANGEROUS
    obj.x = value.x;
    obj.y = value.y;
    obj.angle = current;
    // values.set(obj,value);
    current += increment;
    tapper += increment;
    tapper = Math.min(tapper, 0);
    return value;
  };

  var placement = function(keys) {
    values = d3.map();
    increment = 360 / keys.length;

    keys.forEach(function(k) {
      place(k);
    });
  };

  placement.keys = function(_) {
    if (!arguments.length) {
      return d3.keys(values);
    }
    setKeys(_);
    return placement;
  };

   placement.center = function(_) {
    if (!arguments.length) {
      return center;
    }
    center = _;
    return placement;
   };

  //  placement.radius = function(_) {
  //    if (!arguments.length) {
  //      return radius;
  //    }
   //
  //    radius = _;
  //    return placement;
  //  };

   placement.width = function(_) {
     if (!arguments.length) {
       return width;
     }

     width = _;
     return placement;
   };

   placement.height = function(_) {
     if (!arguments.length) {
       return height;
     }

     height = _;
     return placement;
   };

   placement.start = function(_) {
     if (!arguments.length) {
       return start;
     }
     start = _;
     return placement;
   };

  return placement;
};

var chart = function() {
  var width = visWidth;
  var height = visHeight;
  var margin = {top: 20, right: 20, bottom: 20, left: 20};
  var g = null;
  var sentence = null;
  var word = null;

  var sentenceCenters = radialPlacement().width(520).center({"x":width / 2 - 30, "y":height / 2 });

  var chart = function(selection) {
    selection.each(function(rawData) {

      var sentences = rawData.sentences;
      sentenceCenters(sentences);

      var words = rawData.words;

      var svg = d3.select(this).selectAll("svg").data([sentences]);
      var gEnter = svg.enter().append("svg").append("g");

      svg.attr("width", width + margin.left + margin.right );
      svg.attr("height", height + margin.top + margin.bottom );
      g = svg.select("g")
        .attr("transform", "translate(" + margin.left + "," + margin.top + ")");

      sentence = g.selectAll(".sentence")
        .data(sentences).enter()
        .append("text")
        .attr("class", "sentence")
        .attr("x",  function(d) { return d.x; })
        .attr("y",  function(d) { return d.y; })
        // .attr("text-anchor", function(d) { return d.angle > 90 ? "end" : "start"; })
        .attr("text-anchor", "start")
        // .attr("fill", "#ddd")
        // .attr("opacity", 0.4)
        .attr("font-size", "2px")
        .text(function(d) { return d.sentence; });

      var maxCount = d3.max(words, function(w) { return w.count; });
      var color = d3.scale.log()
        .domain([1,maxCount / 2])
        .range(["#333", "#fff"]);

      word = g.selectAll(".word")
        .data(words.filter(function(w) { return stop_words.indexOf(w.key) == -1; })).enter()
        .append("text")
        .attr("class", "word")
        .attr("x",  function(d) { return d.x; })
        .attr("y",  function(d) { return d.y; })
        .attr("text-anchor", "middle")
        .attr("text-anchor", function(d) { return d.x > (width / 2) ? "end" : "start"; })
        // .attr("font-size", function(d) { return (Math.min(d.count, 12)) + "px";})
        .attr("font-size", "8px")
        // .attr("fill", "#ddd")
        // .attr("opacity", function(d) { return Math.min(d.count / 20, 0.5); })
        // .attr("opacity", function(d) { return d.count > 30 ? 0.9 : 0.4; })
        // .attr("fill", function(d) { return d.count > 30 ? "#ddd": "#555"; })
        .attr("fill", function(d) { return  color(d.count); })
        .text(function(d) { return d.visual; })
        .on("mouseover", mouseover)
        .on("mouseout", mouseout);
    });
  };

  //TODO: this will match sentences with sub-words in them as well.
  // example "mouse" will match "mouse" but also "doormouse".
  // a fix would be to add spaces around the word - but then we need
  // to ensure that the lookupSentence is removing 's and other punctuation properly
  function getSentencesWith(aWord) {
    return sentence.filter(function(s) {
      return s.lookupSentence.indexOf(aWord.toLowerCase()) > -1;
    });
  }

  function mouseover(d,i) {
    var bbox = this.getBBox();
    var direction = d.x > (width / 2) ? -1 : 1;
    g.selectAll(".line")
    .data(d.positions)
    .enter()
    .append("line")
    .attr("class", "line")
    .attr("x1", d.x + (direction * (bbox.width / 2)))
    .attr("y1", d.y - (bbox.height / 3))
    .attr("x2", function(p) { return p.x; })
    .attr("y2", function(p) { return p.y; });

    d3.select("#word").html(d.visual);

    if( !d.sentences ) {
      d.sentences = getSentencesWith(d.key);
    }
    d.sentences.classed("highlight", true).moveToFront();
  }

  function mouseout(d,i) {
    g.selectAll(".line").remove();
    sentence.classed("highlight", false);
  }

  return chart;
};

function plotData(selector, data, plot) {
  d3.select(selector)
    .datum(data)
    .call(plot);
}


var plot = chart();

function display(error, text) {
  var sentences = sentenceLengths(text);
  var words = getWords(text);
  plotData("#vis", {"sentences":sentences, "words": words}, plot);
}

queue()
  .defer(d3.text, "data/alice.txt")
  .await(display);