block by fil ef49ae2aebcded9b41f414da3cff5c90

Les couleurs du Décodex

Full Screen

Une carte du décodex du Monde.

[Mise à jour: le serveur du Monde n’autorise plus l’accès de sites tiers au fichier de données https://www.lemonde.fr/webservice/decodex/updates, ce qui casse cette visualisation. Ne sachant s’il est autorisé de recopier le fichier ici, je m’abstiens.]

Le Monde vient de publier son “Décodex: nos conseils pour vérifier les informations qui circulent en ligne”.

Il s’agit d’une liste de sites, commentés et notés selon un schéma de couleurs.

Nous les regroupons ici en fonction de certains mots-clés contenus dans les descriptifs, avec l’algorithme t-SNE.

index.html

<!DOCTYPE html>
<head>
    <meta charset="utf-8">
    <script src="https://d3js.org/d3.v4.min.js"></script>
 
    <style>
        body {
            margin: 0;
            position: fixed;
            top: 0;
            right: 0;
            bottom: 0;
            left: 0;
          font-family: monospace;
        }
      #tip {
        position: absolute;
        top: -100;
        left: 0;
        z-index: 2;
        background: white;
        padding: 2px;
        pointer-events: none;
        max-width: 27em;
      }
      
      #legende {
        position: absolute;
        top: 0;
        left: 0;
        z-index: 1;
        background: rgba(255,255,255,0.2);
        padding: 2px;
        pointer-events: none;
        max-width: 27em;
      }
    </style>
</head>

<body>
<script>
    const width = 820,
        height = 820,
        margin = { top: 60, bottom: 60, left: 60, right: 60 },
        scalecountry = d3.scaleLinear().range(['#777', '#60c7ff',  '#fe4823', 'orange','#00e00b',]).domain([0,1,2,3,4]),
        centerx = d3.scaleLinear()
        .range([0, width]),
        centery = d3.scaleLinear()
        .range([0, height]);
    const svg = d3.select("body").append("svg")
            .attr("width", width + margin.left + margin.right)
            .attr("height", height + margin.top + margin.bottom)
    .append('g')
    .attr('transform', `translate(${margin.left}, ${margin.top})`);

const tip = d3.select('body').append('div').attr('id', 'tip')
  
    d3.json('https://www.lemonde.fr/webservice/decodex/updates', function (json) {

      
      tags = [/\bblog/, /bpas une source/, /\b(complot|conspi)/, /\bdroite/, /\b(gauche)/, /\b(extrême[- ]droite|national[ei]|patri|antisé|migran|raciste|homophob|islam|ivg)/, /\bfran[çc][ea]/, /\baméri/, /\b(faux|fausse|erroné|fictif|mauvais|trompeu|condamn)/, /\b(satirique|parodi|blague|caricatur)/, /b(opinion|militant)/, /\brégional/, /\b(journal\b|presse|quotidien|magazine|hebdo)/, /\bspécialis/, /\b(peu fiable|sensation|racole|vérifi)/ ];
      
      
        const data = d3.values(json.sites)
            .map((d, i) => {
              return {
                lon: Math.random(),
                lat: Math.random(), 
                name: d[2],
                link: '//www.lemonde.fr/verification/source/' + d[3] + '/',
                r: 12.5,
                score: +d[0],
                desc: d[1],
                tags: tags.map(tag => !!d[1].toLowerCase().match(tag)),
                color: scalecountry(+d[0])
              };
            });


      
        // pos is the array of positions that will be updated by the tsne worker
        // start with the geographic coordinates as is (plate-carrée)
        // random or [0,0] is fine too
        let pos = data.map(d => [d.lon, -d.lat]);


        const forcetsne = d3.forceSimulation(
          data.map(d => (d.x = width / 2, d.y = height / 2, d))
        )
            .alphaDecay(0.005)
            .alpha(0.2)
            .force('tsne', function (alpha) {

                centerx.domain(d3.extent(pos.map(d => d[0]))); 
                centery.domain(d3.extent(pos.map(d => d[1])));

                data.forEach((d, i) => {
                    d.x += alpha * (centerx(pos[i][0]) - d.x);
                    d.y += alpha * (centery(pos[i][1]) - d.y);
                });
            })
            .force('collide', d3.forceCollide().radius(d => 1 + d.r))
            .on('tick', function () {

              // drawcanvas(canvas, data);
              drawsvg(svg, data);

            });

        function drawsvg(svg, nodes) {
          const g = svg.selectAll('g.city')
          .data(nodes);
          
          var enter = g.enter().append('g').classed('city', true);

          enter.append('circle')
          .attr('r', d => d.r)
          .attr('fill', d => d.color)
          .on('click', d => {
            window.open(d.link,'targetWindow');
          })
          .on('mouseover', function(d) {
            tip
            .style('left', Math.min(width - 200, d.x - 10)+'px')
            .style('top', Math.min(height-100, d.y + margin.top + 10)+'px')
            .html(d.name + '<br><small><em>Commentaire des Décodeurs:</em> ' + d.desc + '</small>')
          }); 
          
          enter
          .filter(d => d.r > 7)
          .append('text')
          .attr('fill', 'white')
          .style('font-size', '10px')
          .attr('text-anchor', 'middle')
          .attr('dominant-baseline', 'middle')
          .attr('pointer-events', 'none')
          .text(d => d.name.replace(/^(Le |La |L')/, '').substring(0,4));
          
          g.attr('transform', d => `translate(${d.x},${d.y})`);

        }

      
d3.queue()
    .defer(d3.text, 'tsne.js')
    .defer(d3.text, 'https://unpkg.com/d3-geo')
    .defer(d3.text, 'worker.js')
    .awaitAll(function (err, scripts) {

    const worker = new Worker(
      window.URL.createObjectURL(
        new Blob(scripts, {
          type: "text/javascript"
        })
      )
    );
  
  worker.postMessage({
      maxIter: 10,
      dim: 2,
      perplexity: 30.0,
      data: data
  });

  worker.onmessage = function (e) {
    if (e.data.log) console.log.apply(this, e.data.log);
    if (e.data.pos) pos = e.data.pos;
    if (e.data.done && e.data.done < 10000  && e.data.cost > 1e-2) {
      worker.postMessage({
            maxIter: e.data.done + 10,
      });
    }
  };
});
      
    });
</script>
</body>

tsne.js

// create main global object
var tsnejs = tsnejs || { REVISION: 'ALPHA' };

(function(global) {
  "use strict";

  // utility function
  var assert = function(condition, message) {
    if (!condition) { throw message || "Assertion failed"; }
  }

  // syntax sugar
  var getopt = function(opt, field, defaultval) {
    if(opt.hasOwnProperty(field)) {
      return opt[field];
    } else {
      return defaultval;
    }
  }

  // return 0 mean unit standard deviation random number
  var return_v = false;
  var v_val = 0.0;
  var gaussRandom = function() {
    if(return_v) { 
      return_v = false;
      return v_val; 
    }
    var u = 2*Math.random()-1;
    var v = 2*Math.random()-1;
    var r = u*u + v*v;
    if(r == 0 || r > 1) return gaussRandom();
    var c = Math.sqrt(-2*Math.log(r)/r);
    v_val = v*c; // cache this for next function call for efficiency
    return_v = true;
    return u*c;
  }

  // return random normal number
  var randn = function(mu, std){ return mu+gaussRandom()*std; }

  // utilitity that creates contiguous vector of zeros of size n
  var zeros = function(n) {
    if(typeof(n)==='undefined' || isNaN(n)) { return []; }
    if(typeof ArrayBuffer === 'undefined') {
      // lacking browser support
      var arr = new Array(n);
      for(var i=0;i<n;i++) { arr[i]= 0; }
      return arr;
    } else {
      return new Float64Array(n); // typed arrays are faster
    }
  }

  // utility that returns 2d array filled with random numbers
  // or with value s, if provided
  var randn2d = function(n,d,s) {
    var uses = typeof s !== 'undefined';
    var x = [];
    for(var i=0;i<n;i++) {
      var xhere = [];
      for(var j=0;j<d;j++) { 
        if(uses) {
          xhere.push(s); 
        } else {
          xhere.push(randn(0.0, 1e-4)); 
        }
      }
      x.push(xhere);
    }
    return x;
  }

  // compute L2 distance between two vectors
  var L2 = function(x1, x2) {
    var D = x1.length;
    var d = 0;
    for(var i=0;i<D;i++) { 
      var x1i = x1[i];
      var x2i = x2[i];
      d += (x1i-x2i)*(x1i-x2i);
    }
    return d;
  }

  // compute pairwise distance in all vectors in X
  var xtod = function(X) {
    var N = X.length;
    var dist = zeros(N * N); // allocate contiguous array
    for(var i=0;i<N;i++) {
      for(var j=i+1;j<N;j++) {
        var d = L2(X[i], X[j]);
        dist[i*N+j] = d;
        dist[j*N+i] = d;
      }
    }
    return dist;
  }

  // compute (p_{i|j} + p_{j|i})/(2n)
  var d2p = function(D, perplexity, tol) {
    var Nf = Math.sqrt(D.length); // this better be an integer
    var N = Math.floor(Nf);
    assert(N === Nf, "D should have square number of elements.");
    var Htarget = Math.log(perplexity); // target entropy of distribution
    var P = zeros(N * N); // temporary probability matrix

    var prow = zeros(N); // a temporary storage compartment
    for(var i=0;i<N;i++) {
      var betamin = -Infinity;
      var betamax = Infinity;
      var beta = 1; // initial value of precision
      var done = false;
      var maxtries = 50;

      // perform binary search to find a suitable precision beta
      // so that the entropy of the distribution is appropriate
      var num = 0;
      while(!done) {
        //debugger;

        // compute entropy and kernel row with beta precision
        var psum = 0.0;
        for(var j=0;j<N;j++) {
          var pj = Math.exp(- D[i*N+j] * beta);
          if(i===j) { pj = 0; } // we dont care about diagonals
          prow[j] = pj;
          psum += pj;
        }
        // normalize p and compute entropy
        var Hhere = 0.0;
        for(var j=0;j<N;j++) {
          if(psum == 0) {
             var pj = 0;
          } else {
             var pj = prow[j] / psum;
          }
          prow[j] = pj;
          if(pj > 1e-7) Hhere -= pj * Math.log(pj);
        }

        // adjust beta based on result
        if(Hhere > Htarget) {
          // entropy was too high (distribution too diffuse)
          // so we need to increase the precision for more peaky distribution
          betamin = beta; // move up the bounds
          if(betamax === Infinity) { beta = beta * 2; }
          else { beta = (beta + betamax) / 2; }

        } else {
          // converse case. make distrubtion less peaky
          betamax = beta;
          if(betamin === -Infinity) { beta = beta / 2; }
          else { beta = (beta + betamin) / 2; }
        }

        // stopping conditions: too many tries or got a good precision
        num++;
        if(Math.abs(Hhere - Htarget) < tol) { done = true; }
        if(num >= maxtries) { done = true; }
      }

      // console.log('data point ' + i + ' gets precision ' + beta + ' after ' + num + ' binary search steps.');
      // copy over the final prow to P at row i
      for(var j=0;j<N;j++) { P[i*N+j] = prow[j]; }

    } // end loop over examples i

    // symmetrize P and normalize it to sum to 1 over all ij
    var Pout = zeros(N * N);
    var N2 = N*2;
    for(var i=0;i<N;i++) {
      for(var j=0;j<N;j++) {
        Pout[i*N+j] = Math.max((P[i*N+j] + P[j*N+i])/N2, 1e-100);
      }
    }

    return Pout;
  }

  // helper function
  function sign(x) { return x > 0 ? 1 : x < 0 ? -1 : 0; }

  var tSNE = function(opt) {
    var opt = opt || {};
    this.perplexity = getopt(opt, "perplexity", 30); // effective number of nearest neighbors
    this.dim = getopt(opt, "dim", 2); // by default 2-D tSNE
    this.epsilon = getopt(opt, "epsilon", 10); // learning rate

    this.iter = 0;
  }

  tSNE.prototype = {

    // this function takes a set of high-dimensional points
    // and creates matrix P from them using gaussian kernel
    initDataRaw: function(X) {
      var N = X.length;
      var D = X[0].length;
      assert(N > 0, " X is empty? You must have some data!");
      assert(D > 0, " X[0] is empty? Where is the data?");
      var dists = xtod(X); // convert X to distances using gaussian kernel
      this.P = d2p(dists, this.perplexity, 1e-4); // attach to object
      this.N = N; // back up the size of the dataset
      this.initSolution(); // refresh this
    },

    // this function takes a given distance matrix and creates
    // matrix P from them.
    // D is assumed to be provided as a list of lists, and should be symmetric
    initDataDist: function(D) {
      var N = D.length;
      assert(N > 0, " X is empty? You must have some data!");
      // convert D to a (fast) typed array version
      var dists = zeros(N * N); // allocate contiguous array
      for(var i=0;i<N;i++) {
        for(var j=i+1;j<N;j++) {
          var d = D[i][j];
          dists[i*N+j] = d;
          dists[j*N+i] = d;
        }
      }
      this.P = d2p(dists, this.perplexity, 1e-4);
      this.N = N;
      this.initSolution(); // refresh this
    },

    // (re)initializes the solution to random
    initSolution: function() {
      // generate random solution to t-SNE
      this.Y = randn2d(this.N, this.dim); // the solution
      this.gains = randn2d(this.N, this.dim, 1.0); // step gains to accelerate progress in unchanging directions
      this.ystep = randn2d(this.N, this.dim, 0.0); // momentum accumulator
      this.iter = 0;
    },

    // return pointer to current solution
    getSolution: function() {
      return this.Y;
    },

    // perform a single step of optimization to improve the embedding
    step: function() {
      this.iter += 1;
      var N = this.N;

      var cg = this.costGrad(this.Y); // evaluate gradient
      var cost = cg.cost;
      var grad = cg.grad;

      // perform gradient step
      var ymean = zeros(this.dim);
      for(var i=0;i<N;i++) {
        for(var d=0;d<this.dim;d++) {
          var gid = grad[i][d];
          var sid = this.ystep[i][d];
          var gainid = this.gains[i][d];

          // compute gain update
          var newgain = sign(gid) === sign(sid) ? gainid * 0.8 : gainid + 0.2;
          if(newgain < 0.01) newgain = 0.01; // clamp
          this.gains[i][d] = newgain; // store for next turn

          // compute momentum step direction
          var momval = this.iter < 250 ? 0.5 : 0.8;
          var newsid = momval * sid - this.epsilon * newgain * grad[i][d];
          this.ystep[i][d] = newsid; // remember the step we took

          // step!
          this.Y[i][d] += newsid; 

          ymean[d] += this.Y[i][d]; // accumulate mean so that we can center later
        }
      }

      // reproject Y to be zero mean
      for(var i=0;i<N;i++) {
        for(var d=0;d<this.dim;d++) {
          this.Y[i][d] -= ymean[d]/N;
        }
      }

      //if(this.iter%100===0) console.log('iter ' + this.iter + ', cost: ' + cost);
      return cost; // return current cost
    },

    // for debugging: gradient check
    debugGrad: function() {
      var N = this.N;

      var cg = this.costGrad(this.Y); // evaluate gradient
      var cost = cg.cost;
      var grad = cg.grad;

      var e = 1e-5;
      for(var i=0;i<N;i++) {
        for(var d=0;d<this.dim;d++) {
          var yold = this.Y[i][d];

          this.Y[i][d] = yold + e;
          var cg0 = this.costGrad(this.Y);

          this.Y[i][d] = yold - e;
          var cg1 = this.costGrad(this.Y);
          
          var analytic = grad[i][d];
          var numerical = (cg0.cost - cg1.cost) / ( 2 * e );
          console.log(i + ',' + d + ': gradcheck analytic: ' + analytic + ' vs. numerical: ' + numerical);

          this.Y[i][d] = yold;
        }
      }
    },

    // return cost and gradient, given an arrangement
    costGrad: function(Y) {
      var N = this.N;
      var dim = this.dim; // dim of output space
      var P = this.P;

      var pmul = this.iter < 100 ? 4 : 1; // trick that helps with local optima

      // compute current Q distribution, unnormalized first
      var Qu = zeros(N * N);
      var qsum = 0.0;
      for(var i=0;i<N;i++) {
        for(var j=i+1;j<N;j++) {
          var dsum = 0.0;
          for(var d=0;d<dim;d++) {
            var dhere = Y[i][d] - Y[j][d];
            dsum += dhere * dhere;
          }
          var qu = 1.0 / (1.0 + dsum); // Student t-distribution
          Qu[i*N+j] = qu;
          Qu[j*N+i] = qu;
          qsum += 2 * qu;
        }
      }
      // normalize Q distribution to sum to 1
      var NN = N*N;
      var Q = zeros(NN);
      for(var q=0;q<NN;q++) { Q[q] = Math.max(Qu[q] / qsum, 1e-100); }

      var cost = 0.0;
      var grad = [];
      for(var i=0;i<N;i++) {
        var gsum = new Array(dim); // init grad for point i
        for(var d=0;d<dim;d++) { gsum[d] = 0.0; }
        for(var j=0;j<N;j++) {
          cost += - P[i*N+j] * Math.log(Q[i*N+j]); // accumulate cost (the non-constant portion at least...)
          var premult = 4 * (pmul * P[i*N+j] - Q[i*N+j]) * Qu[i*N+j];
          for(var d=0;d<dim;d++) {
            gsum[d] += premult * (Y[i][d] - Y[j][d]);
          }
        }
        grad.push(gsum);
      }

      return {cost: cost, grad: grad};
    }
  }

  global.tSNE = tSNE; // export tSNE class
})(tsnejs);


// export the library to window, or to module in nodejs
(function(lib) {
  "use strict";
  if (typeof module === "undefined" || typeof module.exports === "undefined") {
    if (typeof window == "object")
        window.tsnejs = lib; // in ordinary browser attach library to window
  } else {
    module.exports = lib; // in nodejs
  }
})(tsnejs);

worker.js

// in worker.onmessage add : `if (e.data.log) console.log.apply(this, e.data.log);`
console.log = function() {
  self.postMessage({ log: [...arguments] });
}

  function distance(a, b) {
  return a
    .map((_,i) => b[i] != a[i])
    .reduce((c,d) => c+d, 0);
}

let iterations = 0,
    model;

self.onmessage = function(e) {
  const msg = e.data,
        data = msg.data,
        maxIter = msg.maxIter || 500,
        perplexity = msg.perplexity || 30;

  let dists = msg.dist;
  if (data && !dists){
    let now = performance.now();
    dists = data.map(
      a => data.map(
        b => distance(a.tags, b.tags) + Math.abs(a.score - b.score)/10
      )
    );
    console.log('computed', data.length * data.length,'distances in', Math.round(performance.now()-now),'ms');
  }
  if (dists) {
      model = new tsnejs.tSNE({
          dim: msg.dim,
          perplexity: perplexity,
      });
      model.initDataDist(dists);
  }

  let startpos = model.getSolution().map(d=>d.slice()),
      pos;

  while (iterations++ < maxIter) {
       // every time you call this, solution gets better
       model.step();
    
    pos = model.getSolution();

       // Y is an array of 2-D points that you can plot
       self.postMessage({
         iterations: iterations - 1,
         pos: pos,
         //log: [ 'step', iterations-1 ]
       });

   }

  let cost = startpos
       .map((d,i) => sqdist(d, pos[i]))
       .reduce ((a,b) => a + b, 0) / pos.length / Math.max(...pos.map(d => Math.abs(d[0]) + Math.abs(d[1])));
  
   self.postMessage({
     done: iterations - 1,
     cost: cost,
     log: [ 'done', iterations - 1, cost ]
   });
};

function sqdist (a,b) {
  let d = [a[0] - b[0], a[1] - b[1]].map(Math.abs);
  return Math.max(d[0], d[1]); 
}