block by nitaku 4e81dac80f36f73be6a4

Text ontology

Full Screen

A visualization of a small text ontology (Bellandi et al. 2014), describing the concepts extracted from a portion of Galileo Galilei’s Sidereus Nuncius. The excerpt focuses on the first time Galileo manages to see three of Jupiter’s satellites with his “telescope” (Perspicillum). At the time, Galileo didn’t see the movement of the bodies, and classified them as fixed stars.

The visualization is created automatically from data with cola.js, using a set of constraints similar to the ones introduced in a previous example about tangled trees.

index.js

// Generated by CoffeeScript 1.4.0
(function() {
  var LABEL_WIDTH, PAD_MULTIPLIER, PROP_R, R, d3cola, defs, enter_label_box, enter_links, enter_nodes, enter_terms, graph, height, i, l, levels, links, n, nn, nodes, svg, topological_order, tspans, vis, width, _i, _j, _k, _l, _len, _len1, _len2, _len3, _len4, _len5, _len6, _len7, _m, _n, _o, _p, _ref, _ref1, _ref2, _ref3, _ref4, _ref5, _ref6, _ref7;

  graph = {
    nodes: [
      {
        id: 0,
        type: 'class',
        terms: [],
        label: 'Thing'
      }, {
        id: 1,
        type: 'class',
        terms: [],
        label: 'Physical_Object'
      }, {
        id: 2,
        type: 'class',
        terms: ['Stellula'],
        label: 'Celestial_Body'
      }, {
        id: 3,
        type: 'class',
        terms: [],
        label: 'Wandering_Star'
      }, {
        id: 4,
        type: 'class',
        terms: ['inerrans stella'],
        label: 'Fixed_Star'
      }, {
        id: 5,
        type: 'class',
        label: 'Galilean_Moon',
        terms: []
      }, {
        id: 6,
        type: 'instance',
        terms: [],
        label: 'Moon'
      }, {
        id: 7,
        type: 'instance',
        terms: [],
        label: 'Earth'
      }, {
        id: 8,
        type: 'instance',
        terms: ['Iuppiter'],
        label: 'Jupiter'
      }, {
        id: 9,
        type: 'instance',
        terms: []
      }, {
        id: 10,
        type: 'instance',
        terms: []
      }, {
        id: 11,
        type: 'instance',
        terms: []
      }
    ],
    links: [
      {
        id: 1,
        source: 1,
        target: 0,
        type: 'is_a'
      }, {
        id: 2,
        source: 2,
        target: 1,
        type: 'is_a'
      }, {
        id: 3,
        source: 3,
        target: 2,
        type: 'is_a'
      }, {
        id: 4,
        source: 4,
        target: 2,
        type: 'is_a'
      }, {
        id: 5,
        source: 5,
        target: 4,
        type: 'is_a'
      }, {
        id: 6,
        source: 6,
        target: 3,
        type: 'instance_of'
      }, {
        id: 7,
        source: 7,
        target: 3,
        type: 'instance_of'
      }, {
        id: 8,
        source: 8,
        target: 3,
        type: 'instance_of'
      }, {
        id: 9,
        source: 9,
        target: 5,
        type: 'instance_of'
      }, {
        id: 10,
        source: 10,
        target: 5,
        type: 'instance_of'
      }, {
        id: 11,
        source: 11,
        target: 5,
        type: 'instance_of'
      }, {
        id: 13,
        source: 9,
        target: 8,
        type: 'is_near'
      }, {
        id: 14,
        source: 10,
        target: 8,
        type: 'is_near'
      }, {
        id: 15,
        source: 11,
        target: 8,
        type: 'is_near'
      }
    ]
  };

  /* objectify the graph
  */


  /* resolve node IDs (not optimized at all!)
  */


  _ref = graph.links;
  for (_i = 0, _len = _ref.length; _i < _len; _i++) {
    l = _ref[_i];
    _ref1 = graph.nodes;
    for (_j = 0, _len1 = _ref1.length; _j < _len1; _j++) {
      n = _ref1[_j];
      if (l.source === n.id) {
        l.source = n;
      }
      if (l.target === n.id) {
        l.target = n;
      }
    }
  }

  /* store the node index into the node itself
  */


  _ref2 = graph.nodes;
  for (i = _k = 0, _len2 = _ref2.length; _k < _len2; i = ++_k) {
    n = _ref2[i];
    n.i = i;
  }

  /* store neighbor nodes into each node
  */


  _ref3 = graph.nodes;
  for (i = _l = 0, _len3 = _ref3.length; _l < _len3; i = ++_l) {
    n = _ref3[i];
    n.in_neighbors = [];
    n.out_neighbors = [];
  }

  _ref4 = graph.links;
  for (_m = 0, _len4 = _ref4.length; _m < _len4; _m++) {
    l = _ref4[_m];
    l.source.out_neighbors.push(l.target);
    l.target.in_neighbors.push(l.source);
  }

  /* compute longest distances
  */


  topological_order = tsort(graph.links.filter(function(l) {
    return l.type === 'is_a' || l.type === 'instance_of';
  }).map(function(l) {
    return [l.source.i, l.target.i];
  }));

  _ref5 = graph.nodes;
  for (_n = 0, _len5 = _ref5.length; _n < _len5; _n++) {
    n = _ref5[_n];
    n.longest_dist = -Infinity;
  }

  graph.nodes[0].longest_dist = 0;

  _ref6 = topological_order.reverse();
  for (_o = 0, _len6 = _ref6.length; _o < _len6; _o++) {
    i = _ref6[_o];
    n = graph.nodes[i];
    _ref7 = n.in_neighbors;
    for (_p = 0, _len7 = _ref7.length; _p < _len7; _p++) {
      nn = _ref7[_p];
      if (nn.longest_dist < n.longest_dist + 1) {
        nn.longest_dist = n.longest_dist + 1;
      }
    }
  }

  graph.constraints = [];

  /* create the alignment contraints
  */


  levels = _.uniq(graph.nodes.filter(function(n) {
    return n.type === 'class' || n.type === 'instance';
  }).map(function(n) {
    return n.longest_dist;
  }));

  levels.forEach(function(depth) {
    return graph.constraints.push({
      type: 'alignment',
      axis: 'y',
      offsets: graph.nodes.filter(function(n) {
        return n.longest_dist === depth;
      }).map(function(n) {
        return {
          node: n.i,
          offset: 0
        };
      })
    });
  });

  R = 18;

  PROP_R = 1;

  PAD_MULTIPLIER = 3.5;

  LABEL_WIDTH = 100;

  /* create the position contraints
  */


  levels.forEach(function(depth, i) {
    var n1, n2;
    if (i < levels.length - 1) {
      n1 = _.find(graph.nodes, function(n) {
        return n.longest_dist === depth;
      });
      n2 = _.find(graph.nodes, function(n) {
        return n.longest_dist === depth + 1;
      });
      return graph.constraints.push({
        axis: 'y',
        left: n1.i,
        right: n2.i,
        gap: 2 * R
      });
    }
  });

  svg = d3.select('svg');

  width = svg.node().getBoundingClientRect().width;

  height = svg.node().getBoundingClientRect().height;

  defs = svg.append('defs');

  vis = svg.append('g').attr({
    transform: 'translate(20,80)'
  });

  /* define arrow markers for graph links
  */


  defs.append('marker').attr({
    id: 'end_arrow_is_a',
    viewBox: '-2 -2 10 14',
    refX: 5 + R,
    refY: 5,
    markerWidth: 6.3,
    markerHeight: 4.5,
    orient: 'auto'
  }).append('path').attr({
    d: 'M0,0 L0,10 L9,5 z'
  });

  defs.append('marker').attr({
    id: 'end_arrow_instance_of',
    viewBox: '-2 -2 10 14',
    refX: 5 + R,
    refY: 5,
    markerWidth: 6.3,
    markerHeight: 4.5,
    orient: 'auto'
  }).append('path').attr({
    d: 'M0,0 L0,10 L9,5 z'
  });

  /* create nodes and links
  */


  links = vis.selectAll('.link').data(graph.links, function(d) {
    return d.id;
  });

  enter_links = links.enter().append('line').attr('class', function(d) {
    return "link " + d.type;
  });

  enter_links.append('title').text(function(d) {
    var s, t;
    s = d.source.label != null ? d.source.label.toUpperCase() : "Unlabeled " + d.source.type;
    t = d.target.label != null ? d.target.label.toUpperCase() : "Unlabeled " + d.target.type;
    return "" + s + " " + d.type + " " + t;
  });

  nodes = vis.selectAll('.node').data(graph.nodes, function(d) {
    return d.id;
  });

  enter_nodes = nodes.enter().append('g').attr('class', function(d) {
    return "node " + d.type;
  });

  enter_nodes.append('circle').attr('r', R);

  enter_nodes.append('title').text(function(d) {
    if (d.label != null) {
      return "" + (d.label.toUpperCase()) + " (" + d.type + ")";
    } else {
      return "Unlabeled " + d.type;
    }
  });

  /* draw the label box
  */


  enter_label_box = enter_nodes.append('g');

  enter_label_box.append('text').text(function(d) {
    return d.label;
  }).attr({
    "class": 'label',
    x: R + 4,
    dy: function(d) {
      if ((d.terms != null) && d.terms.length > 0) {
        return '-0.25em';
      } else {
        return '0.35em';
      }
    }
  });

  /* draw the terms
  */


  enter_terms = enter_label_box.append('text').attr({
    "class": 'terms'
  });

  tspans = enter_terms.selectAll('tspan').data(function(d) {
    if (d.terms != null) {
      return d.terms;
    } else {
      return [];
    }
  });

  tspans.enter().append('tspan').text(function(d) {
    return d;
  }).attr({
    x: R + 6,
    y: function(d, i) {
      return "" + i + "em";
    },
    dy: '0.85em'
  });

  /* cola layout
  */


  graph.nodes.forEach(function(v) {
    if (v.type === 'property') {
      v.width = 2 * PROP_R;
      return v.height = 2 * PROP_R;
    } else {
      v.width = PAD_MULTIPLIER * R + ((v.label != null) || (v.terms != null) && v.terms.length > 0 ? LABEL_WIDTH : 0);
      return v.height = PAD_MULTIPLIER * R;
    }
  });

  d3cola = cola.d3adaptor().size([width, height]).linkDistance(function(l) {
    if (l.type === 'instance_of') {
      return 60;
    } else if (l.type === '_has_property') {
      return 20;
    } else {
      return 80;
    }
  }).constraints(graph.constraints).avoidOverlaps(true).nodes(graph.nodes).links(graph.links).on('tick', function() {
    /* update nodes and links
    */
    nodes.attr('transform', function(d) {
      return "translate(" + d.x + "," + d.y + ")";
    });
    return links.attr('x1', function(d) {
      return d.source.x;
    }).attr('y1', function(d) {
      return d.source.y;
    }).attr('x2', function(d) {
      return d.target.x;
    }).attr('y2', function(d) {
      return d.target.y;
    });
  });

  d3cola.start(100, 30, 30);

  svg.append('foreignObject').html('<div>Die itaque septima Ianuarii, instantis anni millesimi sexcentesimi decimi, hora sequentis noctis prima, cum caelestia sydera per Perspicillum spectarem, <b>Iuppiter</b> sese obviam fecit; cumque admodum excellens mihi parassem instrumentum (quod antea ob alterius organi debilitatem minime contingerat), tres illi adstare <b>Stellulas</b>, exiguas quidem, veruntamen clarissimas, cognovi; quae, licet e numero <b>inerrantium</b> a me crederentur, nonnullam tamen intulerunt admirationem, eo quod secundum exactam lineam rectam atque Eclipticae parallelam dispositae videbantur, ac caeteris magnitudine paribus splendidiores.</div>').attr({
    "class": 'text',
    width: 360,
    height: 200,
    x: 30,
    y: 30
  });

}).call(this);

index.html

<!doctype html>
<html lang="en">
  <head>
    <meta charset="utf-8">
    <title>Text ontology</title>
    <link rel="stylesheet" href="index.css">
    <script src="//d3js.org/d3.v3.min.js"></script>
    <script src="//marvl.infotech.monash.edu/webcola/cola.v3.min.js"></script>
    <script src="tsort.js"></script>
    <script src="//cdnjs.cloudflare.com/ajax/libs/lodash.js/3.10.0/lodash.min.js"></script>
  </head>
  <body>
    <svg width="960px" height="500px"></svg>
    <script src="index.js"></script>
  </body>
</html>

index.coffee

graph = {
  nodes: [
    {id:  0, type: 'class', terms: [], label: 'Thing'},
    {id:  1, type: 'class', terms: [], label: 'Physical_Object'},
    {id:  2, type: 'class', terms: ['Stellula'], label: 'Celestial_Body'},
    {id:  3, type: 'class', terms: [], label: 'Wandering_Star'},
    {id:  4, type: 'class', terms: ['inerrans stella'], label: 'Fixed_Star'},
    {id:  5, type: 'class', label: 'Galilean_Moon', terms: []},
    {id:  6, type: 'instance', terms: [], label: 'Moon'},
    {id:  7, type: 'instance', terms: [], label: 'Earth'},
    {id:  8, type: 'instance', terms: ['Iuppiter'], label: 'Jupiter'},
    {id:  9, type: 'instance', terms: []},
    {id: 10, type: 'instance', terms: []},
    {id: 11, type: 'instance', terms: []}
  ],
  links: [
    {id:  1, source:  1, target: 0, type: 'is_a'},
    {id:  2, source:  2, target: 1, type: 'is_a'},
    {id:  3, source:  3, target: 2, type: 'is_a'},
    {id:  4, source:  4, target: 2, type: 'is_a'},
    {id:  5, source:  5, target: 4, type: 'is_a'},
    {id:  6, source:  6, target: 3, type: 'instance_of'},
    {id:  7, source:  7, target: 3, type: 'instance_of'},
    {id:  8, source:  8, target: 3, type: 'instance_of'},
    {id:  9, source:  9, target: 5, type: 'instance_of'},
    {id: 10, source: 10, target: 5, type: 'instance_of'},
    {id: 11, source: 11, target: 5, type: 'instance_of'},
    {id:  13, source:  9, target: 8, type: 'is_near'},
    {id:  14, source: 10, target: 8, type: 'is_near'},
    {id:  15, source: 11, target: 8, type: 'is_near'}
  ]}


### objectify the graph ###
### resolve node IDs (not optimized at all!) ###
for l in graph.links
  for n in graph.nodes
    if l.source is n.id
      l.source = n

    if l.target is n.id
      l.target = n
  
### store the node index into the node itself ###
for n, i in graph.nodes
  n.i = i
  
### store neighbor nodes into each node ###
for n, i in graph.nodes
  n.in_neighbors = []
  n.out_neighbors = []
  
for l in graph.links
  l.source.out_neighbors.push l.target
  l.target.in_neighbors.push l.source
  
### compute longest distances ###
topological_order = tsort(graph.links.filter((l) -> l.type is 'is_a' or l.type is 'instance_of').map (l) -> [l.source.i, l.target.i])
# console.debug 'Topological order: ' + topological_order

for n in graph.nodes
  n.longest_dist = -Infinity
  
# Thing (0) is the root node
graph.nodes[0].longest_dist = 0

for i in topological_order.reverse()
  n = graph.nodes[i]
  for nn in n.in_neighbors
    if nn.longest_dist < n.longest_dist+1
      nn.longest_dist = n.longest_dist+1

# console.debug graph.nodes

graph.constraints = []

### create the alignment contraints ###
levels = _.uniq graph.nodes.filter((n) -> n.type is 'class' or n.type is 'instance').map (n) -> n.longest_dist
# console.log levels
levels.forEach (depth) ->
  graph.constraints.push {
    type: 'alignment',
    axis: 'y',
    offsets: graph.nodes.filter((n) -> n.longest_dist is depth).map (n) -> {
      node: n.i,
      offset: 0
    }
  }

R = 18
PROP_R = 1
PAD_MULTIPLIER = 3.5
LABEL_WIDTH = 100

### create the position contraints ###
levels.forEach (depth, i) ->
  if i < levels.length-1
    n1 = _.find graph.nodes, (n) -> n.longest_dist is depth
    n2 = _.find graph.nodes, (n) -> n.longest_dist is depth+1
    graph.constraints.push {
      axis: 'y',
      left: n1.i,
      right: n2.i,
      gap: 2*R
    }

svg = d3.select('svg')
width = svg.node().getBoundingClientRect().width
height = svg.node().getBoundingClientRect().height

defs = svg.append('defs')

vis = svg.append('g')
  .attr
    transform: 'translate(20,80)'

### define arrow markers for graph links ###
defs.append('marker')
  .attr
    id: 'end_arrow_is_a'
    viewBox: '-2 -2 10 14'
    refX: 5+R
    refY: 5
    markerWidth: 6.3
    markerHeight: 4.5
    orient: 'auto'
.append('path')
  .attr
    d: 'M0,0 L0,10 L9,5 z'
    
defs.append('marker')
  .attr
    id: 'end_arrow_instance_of'
    viewBox: '-2 -2 10 14'
    refX: 5+R
    refY: 5
    markerWidth: 6.3
    markerHeight: 4.5
    orient: 'auto'
.append('path')
  .attr
    d: 'M0,0 L0,10 L9,5 z'

### create nodes and links ###
links = vis.selectAll('.link')
  .data(graph.links, (d) -> d.id)
  
enter_links = links
  .enter().append('line')
    .attr('class', (d) -> "link #{d.type}")
    
enter_links.append('title')
  .text (d) ->
    s = if d.source.label? then d.source.label.toUpperCase() else "Unlabeled #{d.source.type}"
    t = if d.target.label? then d.target.label.toUpperCase() else "Unlabeled #{d.target.type}"
    
    return "#{s} #{d.type} #{t}"

nodes = vis.selectAll('.node')
  .data(graph.nodes, (d) -> d.id)
        
enter_nodes = nodes.enter().append('g')
  .attr('class', (d) -> "node #{d.type}")

enter_nodes.append('circle')
  .attr('r', R)
  
enter_nodes.append('title')
  .text (d) ->
    if d.label?
      return "#{d.label.toUpperCase()} (#{d.type})"
    else
      return "Unlabeled #{d.type}"

### draw the label box ###
enter_label_box = enter_nodes.append('g')

enter_label_box.append('text')
  .text((d) -> d.label)
  .attr
    class: 'label'
    x: R+4
    dy: (d) -> if d.terms? and d.terms.length > 0 then '-0.25em' else '0.35em'
    
### draw the terms ###
enter_terms = enter_label_box.append('text')
  .attr
    class: 'terms'
    
tspans = enter_terms.selectAll('tspan')
  .data((d) -> if d.terms? then d.terms else [])

tspans.enter().append('tspan')
  .text((d) -> d)
  .attr
    x: R+6
    y: (d,i) -> "#{i}em"
    dy: '0.85em'
  
### cola layout ###
graph.nodes.forEach (v) ->
  if v.type is 'property'
    v.width = 2*PROP_R
    v.height = 2*PROP_R
  else
    v.width = PAD_MULTIPLIER*R + if v.label? or v.terms? and v.terms.length > 0 then LABEL_WIDTH else 0
    v.height = PAD_MULTIPLIER*R

d3cola = cola.d3adaptor()
  .size([width, height])
  .linkDistance((l) -> if l.type is 'instance_of' then 60 else if l.type is '_has_property' then 20 else 80)
  .constraints(graph.constraints)
  .avoidOverlaps(true)
  .nodes(graph.nodes)
  .links(graph.links)
  .on 'tick', () ->
    ### update nodes and links  ###
    nodes
      .attr('transform', (d) -> "translate(#{d.x},#{d.y})")

    links
      .attr('x1', (d) -> d.source.x)
      .attr('y1', (d) -> d.source.y)
      .attr('x2', (d) -> d.target.x)
      .attr('y2', (d) -> d.target.y)
      
d3cola.start(100,30,30)

svg.append('foreignObject')
  .html('<div>Die itaque septima Ianuarii, instantis anni millesimi sexcentesimi decimi, hora sequentis noctis prima, cum caelestia sydera per Perspicillum spectarem, <b>Iuppiter</b> sese obviam fecit; cumque admodum excellens mihi parassem instrumentum (quod antea ob alterius organi debilitatem minime contingerat), tres illi adstare <b>Stellulas</b>, exiguas quidem, veruntamen clarissimas, cognovi; quae, licet e numero <b>inerrantium</b> a me crederentur, nonnullam tamen intulerunt admirationem, eo quod secundum exactam lineam rectam atque Eclipticae parallelam dispositae videbantur, ac caeteris magnitudine paribus splendidiores.</div>')
  .attr
    class: 'text'
    width: 360
    height: 200
    x: 30
    y: 30

index.css

.label {
  font-family: monospace;
  text-transform: uppercase;
}
.terms {
  font-style: italic;
  font-size: 11px;
  fill: #333;
}

.node > circle {
  stroke-width: 0;
}
.node.class > circle {
  fill: #fdb462;
  stroke: #e3943c;
}
.node.instance > circle {
  fill: #bebada;
  stroke: #928dbd;
}
.node:hover > circle {
  stroke-width: 2px;
}
.node:hover .label {
  font-weight: bold;
}

.link {
  stroke: #DDD;
  stroke-width: 4px;
  opacity: 0.6;
}
.link:hover {
  opacity: 1;
}

.link.is_a {
  stroke: #fdb462;
  marker-end: url(#end_arrow_is_a);
}
#end_arrow_is_a {
  stroke: #fdb462;
  stroke-width: 2.3px;
  fill: white;
}
.link.instance_of {
  stroke: #bebada;
  marker-end: url(#end_arrow_instance_of);
}
#end_arrow_instance_of {
  stroke: #bebada;
  stroke-width: 2.3px;
  fill: white;
}

.link.is_near {
  stroke: #8dd3c7;
  marker-end: none;
}

.text {
  font-size: 15px;
  text-align: justify;
}
.text > div::first-letter {
  font-size: 3em;
}
.text > div::first-line {
  line-height: 100%;
}

tsort.js

/**
 * general topological sort
 * @author SHIN Suzuki (shinout310@gmail.com)
 * @param Array<Array> edges : list of edges. each edge forms Array<ID,ID> e.g. [12 , 3]
 *
 * @returns Array : topological sorted list of IDs
 **/

function tsort(edges) {
  var nodes   = {}, // hash: stringified id of the node => { id: id, afters: lisf of ids }
      sorted  = [], // sorted list of IDs ( returned value )
      visited = {}; // hash: id of already visited node => true

  var Node = function(id) {
    this.id = id;
    this.afters = [];
  };

  // 1. build data structures
  edges.forEach(function(v) {
    var from = v[0], to = v[1];
    if (!nodes[from]) nodes[from] = new Node(from);
    if (!nodes[to]) nodes[to]     = new Node(to);
    nodes[from].afters.push(to);
  });

  // 2. topological sort
  Object.keys(nodes).forEach(function visit(idstr, ancestors) {
    var node = nodes[idstr],
        id   = node.id;

    // if already exists, do nothing
    if (visited[idstr]) return;

    if (!Array.isArray(ancestors)) ancestors = [];

    ancestors.push(id);

    visited[idstr] = true;

    node.afters.forEach(function(afterID) {
      if (ancestors.indexOf(afterID) >= 0)  // if already in ancestors, a closed chain exists.
        throw new Error('closed chain : ' +  afterID + ' is in ' + id);

      visit(afterID.toString(), ancestors.map(function(v) { return v })); // recursive call
    });

    sorted.unshift(id);
  });

  return sorted;
}

/**
 * TEST
 **/
function tsortTest() {

  // example 1: success
  var edges = [
    [1, 2],
    [1, 3],
    [2, 4],
    [3, 4]
  ];

  var sorted = tsort(edges);
  console.log(sorted);

  // example 2: failure ( A > B > C > A )
  edges = [
    ['A', 'B'],
    ['B', 'C'],
    ['C', 'A']
  ];

  try {
    sorted = tsort(edges);
  }
  catch (e) {
    console.log(e.message);
  }

  // example 3: generate random edges
  var max = 100, iteration = 30;
  function randomInt(max) {
    return Math.floor(Math.random() * max) + 1;
  }

  edges = (function() {
    var ret = [], i = 0;
    while (i++ < iteration) ret.push( [randomInt(max), randomInt(max)] );
    return ret;
  })();

  try {
    sorted = tsort(edges);
    console.log("succeeded", sorted);
  }
  catch (e) {
    console.log("failed", e.message);
  }

}


// for node.js
if (typeof exports == 'object' && exports === this) {
  module.exports = tsort;
  if (process.argv[1] === __filename) tsortTest();
}