[Mise à jour: le serveur du Monde n’autorise plus l’accès de sites tiers au fichier de données https://www.lemonde.fr/webservice/decodex/updates, ce qui casse cette visualisation. Ne sachant s’il est autorisé de recopier le fichier ici, je m’abstiens.]
Le Monde vient de publier son “Décodex: nos conseils pour vérifier les informations qui circulent en ligne”.
Il s’agit d’une liste de sites, commentés et notés selon un schéma de couleurs.
Nous les regroupons ici en fonction de certains mots-clés contenus dans les descriptifs, avec l’algorithme t-SNE.
<!DOCTYPE html>
<head>
<meta charset="utf-8">
<script src="https://d3js.org/d3.v4.min.js"></script>
<style>
body {
margin: 0;
position: fixed;
top: 0;
right: 0;
bottom: 0;
left: 0;
font-family: monospace;
}
#tip {
position: absolute;
top: -100;
left: 0;
z-index: 2;
background: white;
padding: 2px;
pointer-events: none;
max-width: 27em;
}
#legende {
position: absolute;
top: 0;
left: 0;
z-index: 1;
background: rgba(255,255,255,0.2);
padding: 2px;
pointer-events: none;
max-width: 27em;
}
</style>
</head>
<body>
<script>
const width = 820,
height = 820,
margin = { top: 60, bottom: 60, left: 60, right: 60 },
scalecountry = d3.scaleLinear().range(['#777', '#60c7ff', '#fe4823', 'orange','#00e00b',]).domain([0,1,2,3,4]),
centerx = d3.scaleLinear()
.range([0, width]),
centery = d3.scaleLinear()
.range([0, height]);
const svg = d3.select("body").append("svg")
.attr("width", width + margin.left + margin.right)
.attr("height", height + margin.top + margin.bottom)
.append('g')
.attr('transform', `translate(${margin.left}, ${margin.top})`);
const tip = d3.select('body').append('div').attr('id', 'tip')
d3.json('https://www.lemonde.fr/webservice/decodex/updates', function (json) {
tags = [/\bblog/, /bpas une source/, /\b(complot|conspi)/, /\bdroite/, /\b(gauche)/, /\b(extrême[- ]droite|national[ei]|patri|antisé|migran|raciste|homophob|islam|ivg)/, /\bfran[çc][ea]/, /\baméri/, /\b(faux|fausse|erroné|fictif|mauvais|trompeu|condamn)/, /\b(satirique|parodi|blague|caricatur)/, /b(opinion|militant)/, /\brégional/, /\b(journal\b|presse|quotidien|magazine|hebdo)/, /\bspécialis/, /\b(peu fiable|sensation|racole|vérifi)/ ];
const data = d3.values(json.sites)
.map((d, i) => {
return {
lon: Math.random(),
lat: Math.random(),
name: d[2],
link: '//www.lemonde.fr/verification/source/' + d[3] + '/',
r: 12.5,
score: +d[0],
desc: d[1],
tags: tags.map(tag => !!d[1].toLowerCase().match(tag)),
color: scalecountry(+d[0])
};
});
// pos is the array of positions that will be updated by the tsne worker
// start with the geographic coordinates as is (plate-carrée)
// random or [0,0] is fine too
let pos = data.map(d => [d.lon, -d.lat]);
const forcetsne = d3.forceSimulation(
data.map(d => (d.x = width / 2, d.y = height / 2, d))
)
.alphaDecay(0.005)
.alpha(0.2)
.force('tsne', function (alpha) {
centerx.domain(d3.extent(pos.map(d => d[0])));
centery.domain(d3.extent(pos.map(d => d[1])));
data.forEach((d, i) => {
d.x += alpha * (centerx(pos[i][0]) - d.x);
d.y += alpha * (centery(pos[i][1]) - d.y);
});
})
.force('collide', d3.forceCollide().radius(d => 1 + d.r))
.on('tick', function () {
// drawcanvas(canvas, data);
drawsvg(svg, data);
});
function drawsvg(svg, nodes) {
const g = svg.selectAll('g.city')
.data(nodes);
var enter = g.enter().append('g').classed('city', true);
enter.append('circle')
.attr('r', d => d.r)
.attr('fill', d => d.color)
.on('click', d => {
window.open(d.link,'targetWindow');
})
.on('mouseover', function(d) {
tip
.style('left', Math.min(width - 200, d.x - 10)+'px')
.style('top', Math.min(height-100, d.y + margin.top + 10)+'px')
.html(d.name + '<br><small><em>Commentaire des Décodeurs:</em> ' + d.desc + '</small>')
});
enter
.filter(d => d.r > 7)
.append('text')
.attr('fill', 'white')
.style('font-size', '10px')
.attr('text-anchor', 'middle')
.attr('dominant-baseline', 'middle')
.attr('pointer-events', 'none')
.text(d => d.name.replace(/^(Le |La |L')/, '').substring(0,4));
g.attr('transform', d => `translate(${d.x},${d.y})`);
}
d3.queue()
.defer(d3.text, 'tsne.js')
.defer(d3.text, 'https://unpkg.com/d3-geo')
.defer(d3.text, 'worker.js')
.awaitAll(function (err, scripts) {
const worker = new Worker(
window.URL.createObjectURL(
new Blob(scripts, {
type: "text/javascript"
})
)
);
worker.postMessage({
maxIter: 10,
dim: 2,
perplexity: 30.0,
data: data
});
worker.onmessage = function (e) {
if (e.data.log) console.log.apply(this, e.data.log);
if (e.data.pos) pos = e.data.pos;
if (e.data.done && e.data.done < 10000 && e.data.cost > 1e-2) {
worker.postMessage({
maxIter: e.data.done + 10,
});
}
};
});
});
</script>
</body>
// create main global object
var tsnejs = tsnejs || { REVISION: 'ALPHA' };
(function(global) {
"use strict";
// utility function
var assert = function(condition, message) {
if (!condition) { throw message || "Assertion failed"; }
}
// syntax sugar
var getopt = function(opt, field, defaultval) {
if(opt.hasOwnProperty(field)) {
return opt[field];
} else {
return defaultval;
}
}
// return 0 mean unit standard deviation random number
var return_v = false;
var v_val = 0.0;
var gaussRandom = function() {
if(return_v) {
return_v = false;
return v_val;
}
var u = 2*Math.random()-1;
var v = 2*Math.random()-1;
var r = u*u + v*v;
if(r == 0 || r > 1) return gaussRandom();
var c = Math.sqrt(-2*Math.log(r)/r);
v_val = v*c; // cache this for next function call for efficiency
return_v = true;
return u*c;
}
// return random normal number
var randn = function(mu, std){ return mu+gaussRandom()*std; }
// utilitity that creates contiguous vector of zeros of size n
var zeros = function(n) {
if(typeof(n)==='undefined' || isNaN(n)) { return []; }
if(typeof ArrayBuffer === 'undefined') {
// lacking browser support
var arr = new Array(n);
for(var i=0;i<n;i++) { arr[i]= 0; }
return arr;
} else {
return new Float64Array(n); // typed arrays are faster
}
}
// utility that returns 2d array filled with random numbers
// or with value s, if provided
var randn2d = function(n,d,s) {
var uses = typeof s !== 'undefined';
var x = [];
for(var i=0;i<n;i++) {
var xhere = [];
for(var j=0;j<d;j++) {
if(uses) {
xhere.push(s);
} else {
xhere.push(randn(0.0, 1e-4));
}
}
x.push(xhere);
}
return x;
}
// compute L2 distance between two vectors
var L2 = function(x1, x2) {
var D = x1.length;
var d = 0;
for(var i=0;i<D;i++) {
var x1i = x1[i];
var x2i = x2[i];
d += (x1i-x2i)*(x1i-x2i);
}
return d;
}
// compute pairwise distance in all vectors in X
var xtod = function(X) {
var N = X.length;
var dist = zeros(N * N); // allocate contiguous array
for(var i=0;i<N;i++) {
for(var j=i+1;j<N;j++) {
var d = L2(X[i], X[j]);
dist[i*N+j] = d;
dist[j*N+i] = d;
}
}
return dist;
}
// compute (p_{i|j} + p_{j|i})/(2n)
var d2p = function(D, perplexity, tol) {
var Nf = Math.sqrt(D.length); // this better be an integer
var N = Math.floor(Nf);
assert(N === Nf, "D should have square number of elements.");
var Htarget = Math.log(perplexity); // target entropy of distribution
var P = zeros(N * N); // temporary probability matrix
var prow = zeros(N); // a temporary storage compartment
for(var i=0;i<N;i++) {
var betamin = -Infinity;
var betamax = Infinity;
var beta = 1; // initial value of precision
var done = false;
var maxtries = 50;
// perform binary search to find a suitable precision beta
// so that the entropy of the distribution is appropriate
var num = 0;
while(!done) {
//debugger;
// compute entropy and kernel row with beta precision
var psum = 0.0;
for(var j=0;j<N;j++) {
var pj = Math.exp(- D[i*N+j] * beta);
if(i===j) { pj = 0; } // we dont care about diagonals
prow[j] = pj;
psum += pj;
}
// normalize p and compute entropy
var Hhere = 0.0;
for(var j=0;j<N;j++) {
if(psum == 0) {
var pj = 0;
} else {
var pj = prow[j] / psum;
}
prow[j] = pj;
if(pj > 1e-7) Hhere -= pj * Math.log(pj);
}
// adjust beta based on result
if(Hhere > Htarget) {
// entropy was too high (distribution too diffuse)
// so we need to increase the precision for more peaky distribution
betamin = beta; // move up the bounds
if(betamax === Infinity) { beta = beta * 2; }
else { beta = (beta + betamax) / 2; }
} else {
// converse case. make distrubtion less peaky
betamax = beta;
if(betamin === -Infinity) { beta = beta / 2; }
else { beta = (beta + betamin) / 2; }
}
// stopping conditions: too many tries or got a good precision
num++;
if(Math.abs(Hhere - Htarget) < tol) { done = true; }
if(num >= maxtries) { done = true; }
}
// console.log('data point ' + i + ' gets precision ' + beta + ' after ' + num + ' binary search steps.');
// copy over the final prow to P at row i
for(var j=0;j<N;j++) { P[i*N+j] = prow[j]; }
} // end loop over examples i
// symmetrize P and normalize it to sum to 1 over all ij
var Pout = zeros(N * N);
var N2 = N*2;
for(var i=0;i<N;i++) {
for(var j=0;j<N;j++) {
Pout[i*N+j] = Math.max((P[i*N+j] + P[j*N+i])/N2, 1e-100);
}
}
return Pout;
}
// helper function
function sign(x) { return x > 0 ? 1 : x < 0 ? -1 : 0; }
var tSNE = function(opt) {
var opt = opt || {};
this.perplexity = getopt(opt, "perplexity", 30); // effective number of nearest neighbors
this.dim = getopt(opt, "dim", 2); // by default 2-D tSNE
this.epsilon = getopt(opt, "epsilon", 10); // learning rate
this.iter = 0;
}
tSNE.prototype = {
// this function takes a set of high-dimensional points
// and creates matrix P from them using gaussian kernel
initDataRaw: function(X) {
var N = X.length;
var D = X[0].length;
assert(N > 0, " X is empty? You must have some data!");
assert(D > 0, " X[0] is empty? Where is the data?");
var dists = xtod(X); // convert X to distances using gaussian kernel
this.P = d2p(dists, this.perplexity, 1e-4); // attach to object
this.N = N; // back up the size of the dataset
this.initSolution(); // refresh this
},
// this function takes a given distance matrix and creates
// matrix P from them.
// D is assumed to be provided as a list of lists, and should be symmetric
initDataDist: function(D) {
var N = D.length;
assert(N > 0, " X is empty? You must have some data!");
// convert D to a (fast) typed array version
var dists = zeros(N * N); // allocate contiguous array
for(var i=0;i<N;i++) {
for(var j=i+1;j<N;j++) {
var d = D[i][j];
dists[i*N+j] = d;
dists[j*N+i] = d;
}
}
this.P = d2p(dists, this.perplexity, 1e-4);
this.N = N;
this.initSolution(); // refresh this
},
// (re)initializes the solution to random
initSolution: function() {
// generate random solution to t-SNE
this.Y = randn2d(this.N, this.dim); // the solution
this.gains = randn2d(this.N, this.dim, 1.0); // step gains to accelerate progress in unchanging directions
this.ystep = randn2d(this.N, this.dim, 0.0); // momentum accumulator
this.iter = 0;
},
// return pointer to current solution
getSolution: function() {
return this.Y;
},
// perform a single step of optimization to improve the embedding
step: function() {
this.iter += 1;
var N = this.N;
var cg = this.costGrad(this.Y); // evaluate gradient
var cost = cg.cost;
var grad = cg.grad;
// perform gradient step
var ymean = zeros(this.dim);
for(var i=0;i<N;i++) {
for(var d=0;d<this.dim;d++) {
var gid = grad[i][d];
var sid = this.ystep[i][d];
var gainid = this.gains[i][d];
// compute gain update
var newgain = sign(gid) === sign(sid) ? gainid * 0.8 : gainid + 0.2;
if(newgain < 0.01) newgain = 0.01; // clamp
this.gains[i][d] = newgain; // store for next turn
// compute momentum step direction
var momval = this.iter < 250 ? 0.5 : 0.8;
var newsid = momval * sid - this.epsilon * newgain * grad[i][d];
this.ystep[i][d] = newsid; // remember the step we took
// step!
this.Y[i][d] += newsid;
ymean[d] += this.Y[i][d]; // accumulate mean so that we can center later
}
}
// reproject Y to be zero mean
for(var i=0;i<N;i++) {
for(var d=0;d<this.dim;d++) {
this.Y[i][d] -= ymean[d]/N;
}
}
//if(this.iter%100===0) console.log('iter ' + this.iter + ', cost: ' + cost);
return cost; // return current cost
},
// for debugging: gradient check
debugGrad: function() {
var N = this.N;
var cg = this.costGrad(this.Y); // evaluate gradient
var cost = cg.cost;
var grad = cg.grad;
var e = 1e-5;
for(var i=0;i<N;i++) {
for(var d=0;d<this.dim;d++) {
var yold = this.Y[i][d];
this.Y[i][d] = yold + e;
var cg0 = this.costGrad(this.Y);
this.Y[i][d] = yold - e;
var cg1 = this.costGrad(this.Y);
var analytic = grad[i][d];
var numerical = (cg0.cost - cg1.cost) / ( 2 * e );
console.log(i + ',' + d + ': gradcheck analytic: ' + analytic + ' vs. numerical: ' + numerical);
this.Y[i][d] = yold;
}
}
},
// return cost and gradient, given an arrangement
costGrad: function(Y) {
var N = this.N;
var dim = this.dim; // dim of output space
var P = this.P;
var pmul = this.iter < 100 ? 4 : 1; // trick that helps with local optima
// compute current Q distribution, unnormalized first
var Qu = zeros(N * N);
var qsum = 0.0;
for(var i=0;i<N;i++) {
for(var j=i+1;j<N;j++) {
var dsum = 0.0;
for(var d=0;d<dim;d++) {
var dhere = Y[i][d] - Y[j][d];
dsum += dhere * dhere;
}
var qu = 1.0 / (1.0 + dsum); // Student t-distribution
Qu[i*N+j] = qu;
Qu[j*N+i] = qu;
qsum += 2 * qu;
}
}
// normalize Q distribution to sum to 1
var NN = N*N;
var Q = zeros(NN);
for(var q=0;q<NN;q++) { Q[q] = Math.max(Qu[q] / qsum, 1e-100); }
var cost = 0.0;
var grad = [];
for(var i=0;i<N;i++) {
var gsum = new Array(dim); // init grad for point i
for(var d=0;d<dim;d++) { gsum[d] = 0.0; }
for(var j=0;j<N;j++) {
cost += - P[i*N+j] * Math.log(Q[i*N+j]); // accumulate cost (the non-constant portion at least...)
var premult = 4 * (pmul * P[i*N+j] - Q[i*N+j]) * Qu[i*N+j];
for(var d=0;d<dim;d++) {
gsum[d] += premult * (Y[i][d] - Y[j][d]);
}
}
grad.push(gsum);
}
return {cost: cost, grad: grad};
}
}
global.tSNE = tSNE; // export tSNE class
})(tsnejs);
// export the library to window, or to module in nodejs
(function(lib) {
"use strict";
if (typeof module === "undefined" || typeof module.exports === "undefined") {
if (typeof window == "object")
window.tsnejs = lib; // in ordinary browser attach library to window
} else {
module.exports = lib; // in nodejs
}
})(tsnejs);
// in worker.onmessage add : `if (e.data.log) console.log.apply(this, e.data.log);`
console.log = function() {
self.postMessage({ log: [...arguments] });
}
function distance(a, b) {
return a
.map((_,i) => b[i] != a[i])
.reduce((c,d) => c+d, 0);
}
let iterations = 0,
model;
self.onmessage = function(e) {
const msg = e.data,
data = msg.data,
maxIter = msg.maxIter || 500,
perplexity = msg.perplexity || 30;
let dists = msg.dist;
if (data && !dists){
let now = performance.now();
dists = data.map(
a => data.map(
b => distance(a.tags, b.tags) + Math.abs(a.score - b.score)/10
)
);
console.log('computed', data.length * data.length,'distances in', Math.round(performance.now()-now),'ms');
}
if (dists) {
model = new tsnejs.tSNE({
dim: msg.dim,
perplexity: perplexity,
});
model.initDataDist(dists);
}
let startpos = model.getSolution().map(d=>d.slice()),
pos;
while (iterations++ < maxIter) {
// every time you call this, solution gets better
model.step();
pos = model.getSolution();
// Y is an array of 2-D points that you can plot
self.postMessage({
iterations: iterations - 1,
pos: pos,
//log: [ 'step', iterations-1 ]
});
}
let cost = startpos
.map((d,i) => sqdist(d, pos[i]))
.reduce ((a,b) => a + b, 0) / pos.length / Math.max(...pos.map(d => Math.abs(d[0]) + Math.abs(d[1])));
self.postMessage({
done: iterations - 1,
cost: cost,
log: [ 'done', iterations - 1, cost ]
});
};
function sqdist (a,b) {
let d = [a[0] - b[0], a[1] - b[1]].map(Math.abs);
return Math.max(d[0], d[1]);
}