block by michalskop 5658040

Prague 2010-2013

Full Screen

index.html

<!DOCTYPE html>
<html>
<head>
	<meta charset="utf-8">
	<meta name="viewport" content="width=device-width, initial-scale=1">
<title>Praha 2010-2013</title>
	<link rel="stylesheet" href="//code.jquery.com/mobile/1.3.1/jquery.mobile-1.3.1.min.css" />
	<script src="//code.jquery.com/jquery-1.8.2.min.js"></script>
	<script src="//code.jquery.com/mobile/1.3.1/jquery.mobile-1.3.1.min.js"></script>
<style>


#chart {
  margin-left: -40px;
  height: 506px;
}

text {
  font: 10px sans-serif;
}

.dot {
  stroke: #000;
  opacity: .5;
  stroke-opacity: .75;
}

.axis path, .axis line {
  fill: none;
  stroke: #000;
  shape-rendering: crispEdges;
}

.label {
  fill: #777;
}

.year.label {
  font: 500 100px "Helvetica Neue";
  fill: #ddd;
}

.year.label.active {
  fill: #aaa;
}

.overlay {
  fill: none;
  pointer-events: all;
  cursor: ew-resize;
}

</style>

</head>
<body>
<div data-role="page" class="type-home">
	<div data-role="content">

<header>
  <aside>May 27, 2013, 70th birthday of my dad!</aside>
  <a href="/michalskop/" rel="author">Michal Škop</a>
</header>

<h1>Praha 2010-2013</h1>

<form>
        <label for="slider">Slider:</label>
        <input name="slider" id="slider" min="2010.75" max="2013.25" value="2010.75" step=".01" type="range" />
</form>
<p>
    <a href="#" id="play" data-role="button" data-inline="true" data-icon="refresh"><span id="playText">Play ></span></a>
</p>

 <p id="chart"><svg><defs id="gradients"></defs></svg></p>
 <!-- <p id="slide">XXX</p> -->



<p>Method: weighted PCA; Time intervals: 6 months (projection of 6-month blocks into all data)</p>
<p>There are missing data about Richter (ODS) and Šťastný (ODS) on the <a href="//praha.eu">official website</a></p>

<script src="//d3js.org/d3.v2.js?2.8.1"></script>

<script>

// Various accessors that specify the four dimensions of data to visualize.
function x(d) { return d.d1; }
function y(d) { return d.d2; }
function z(d) { return d.d3; }
function radius(d) { return 1; }
function color(d) { return d.color; }
//function color(d) { return d.group; }
function key(d) { return d.name; }
function display(d) {return d.display;}


// Chart dimensions.
var margin = {top: 19.5, right: 19.5, bottom: 19.5, left: 39.5},
    width = 600 - margin.right,
    height = 400 - margin.top - margin.bottom;

// Various scales. These domains make assumptions of data, naturally.
var xScale = d3.scale.linear().domain([-15, 15]).range([0, width]),
    yScale = d3.scale.linear().domain([-15, 15]).range([height, 0]),
    radiusScale = d3.scale.sqrt().domain([0, 1]).range([0, 10]);
    //colorScale = d3.scale.category10();
    var colorScale = d3.scale.category20c();

// The x & y axes.
var xAxis = d3.svg.axis().orient("bottom").scale(xScale).ticks(12, d3.format(",d")),
    yAxis = d3.svg.axis().scale(yScale).orient("left");

// Create the SVG container and set the origin.
var svg = d3.select("#chart svg") //d3.select("#chart").append("svg")
    .attr("width", width + margin.left + margin.right)
    .attr("height", height + margin.top + margin.bottom)
  .append("g")
    .attr("transform", "translate(" + margin.left + "," + margin.top + ")");

// Add the x-axis.
svg.append("g")
    .attr("class", "x axis")
    .attr("transform", "translate(0," + height + ")")
    .call(xAxis);

// Add the y-axis.
svg.append("g")
    .attr("class", "y axis")
    .call(yAxis);

// Add an x-axis label.
svg.append("text")
    .attr("class", "x label")
    .attr("text-anchor", "end")
    .attr("x", width)
    .attr("y", height - 6)
    .text("Dimension 1");

// Add a y-axis label.
svg.append("text")
    .attr("class", "y label")
    .attr("text-anchor", "end")
    .attr("y", 6)
    .attr("dy", ".75em")
    .attr("transform", "rotate(-90)")
    .text("Dimension 2");

// Add the year label; the value is set on transition.
var label = svg.append("text")
    .attr("class", "year label")
    .attr("text-anchor", "end")
    .attr("y", height - 24)
    .attr("x", width)
    .text("2010/9");

// Load the data.
d3.json("cz_praha_2010-2013_a1_1h.json", function(nations) {

  // A bisector since many nation's data is sparsely-defined.
  var bisect = d3.bisector(function(d) { return d[0]; });

  // Add a dot per nation. Initialize the data at 1800, and set the colors.
  var dot = svg.append("g")
      .attr("class", "dots")
    .selectAll(".dot")
      .data(interpolateData(2010.75))
    .enter().append("circle")
      .attr("class", "dot")
      //.style("fill", function(d) { return d.group; })
      //.style("fill", function (d) { return gradient(colorScale(color(d))) })
      .call(position)
      .sort(order)
      .on("mouseover", animateFirstStep)
        .on("mouseout", animateSecondStep)
        .on("mousedown", animateFirstStep);

  // Add a title.
  dot.append("title")
      .text(function(d) { return d.name});


  var i=0;
  var playing = false;
  $("#play").click(function() {
	  if(playing === false) {
		    startPlaying();
		} else {
			stopPlaying();
		}

  });
  
  function startPlaying() {
  	playing = true;
    $("#playText").html("Stop ||");
    $('#slider').slider('disable');
    // Start a transition that interpolates the data based on year.
    svg.transition()
	  .duration(30000)
	  .ease("linear")
	  .tween("year", function() {return tweenYear($('#slider').val()) })
	  .each("end", stopPlaying);
  }
  
  function stopPlaying() {
	playing = false;
    $("#playText").html("Play >");
    svg.transition().duration(0);
    $('#slider').slider('enable');
  }


  // Positions the dots based on data.
  function position(dot) {
    dot .attr("cx", function(d) { return xScale(x(d)); })
        .attr("cy", function(d) { return yScale(y(d)); })
        .attr("r", function(d) { return radiusScale(radius(d)) })
       //.style("fill", function(d) { return color(d); });
        .style("fill", function (d) { return gradient(color(d)) })
		//.attr("style", function (d) { return "display:" + display(d)+";";});
		.attr("display", function (d) { return display(d);});
        //alert(dot.data);
  }

  // Defines a sort order so that the smallest dots are drawn on top.
  function order(a, b) {
    return radius(b) - radius(a);
  }



  // Tweens the entire chart by first tweening the year, and then the data.
  // For the interpolated data, the dots and label are redrawn.
  function tweenYear(start) {
    var year = d3.interpolateNumber(2010.75,2013.25);
    return function(t) { displayYear(year(t)); };
  }
  
  // Updates the display to show the specified year.
  function displayYear(year) {
    dot.data(interpolateData(year), key).call(position).sort(order);
    month = Math.ceil((year - Math.floor(year))*12);
    label.text(Math.floor(year) + '/' + month);
    i++;
    if ((i%25) == 0) {
      $("#slider").val(year);
      $('#slider').slider('refresh');
    }
  }

  // Interpolates the dataset for the given (fractional) year.
  function interpolateData(year) {
    return nations.map(function(d) {
      return {
        name: d.name,
        //group: d.group,
        d1: interpolateValues(d.d1, year),
        d2: interpolateValues(d.d2, year),
        //d3: interpolateValues(d.d3, year),
        color: findColor(d.color, year),
        display: isDisplayed(d.d1, year)
        //category: d.group
        //lifeExpectancy: interpolateValues(d.lifeExpectancy, year)
      };
    });
  }

  // Finds (and possibly interpolates) the value for the specified year.
  function interpolateValues(values, year) {
    var i = bisect.left(values, year, 0, values.length - 1),
        a = values[i];
    if (i > 0) {
      var b = values[i - 1],
          t = (year - a[0]) / (b[0] - a[0]);
      return a[1] * (1 - t) + b[1] * t;
    }
    return a[1];
  }
  
  function findColor(values, year) {
     var i = bisect.left(values, year, 0, values.length - 1);
     return values[i][1];
  }
  
  function isDisplayed(values, year) {
     if ( (year < values[0][0]) || (year > values[values.length - 1][0]))
       return 'none';
     else 
       return 'inherit';
  }
  
  	function animateFirstStep(d){
		d3.select(this)
		  .transition()            
		    .delay(0)            
		    .duration(1000)
		    .attr("r", 10*2)
		    //.attr("cx", 300*d.x+150+50)
		    ;//.each("end", animateSecondStep);
	}
	
		function animateSecondStep(d){
		d3.select(this)
		  .transition()
		    .duration(1000)
		    .attr("r", 10);
		    //.attr("cx", 300*d.x+150);
	}
	
	//slider
	//see //michalskop.tumblr.com/post/37352195911/strange-behaviour-of-jquery-change
	$('#slider').ready(function() {
	  $('#slider').change(function(){
		displayYear($(this).val());
	  });
	});
	
	
	//color gradients
	////dexvis.wordpress.com/2012/12/25/motion-charts-revisited/
	function shadeColor(color, percent) {

		var R = parseInt(color.substring(1,3),16)
		var G = parseInt(color.substring(3,5),16)
		var B = parseInt(color.substring(5,7),16);

		R = parseInt(R * (100 + percent) / 100);
		G = parseInt(G * (100 + percent) / 100);
		B = parseInt(B * (100 + percent) / 100);

		R = (R<255)?R:255;  
		G = (G<255)?G:255;  
		B = (B<255)?B:255;  

		var RR = ((R.toString(16).length==1)?"0"+R.toString(16):R.toString(16));
		var GG = ((G.toString(16).length==1)?"0"+G.toString(16):G.toString(16));
		var BB = ((B.toString(16).length==1)?"0"+B.toString(16):B.toString(16));

		return "#"+RR+GG+BB;
	}

	function gradient(baseColor)
	{
	  var gradientId = "gradient" + baseColor.substring(1)
	  console.log("COLOR: " + gradientId);

	  //var lightColor = shadeColor(baseColor, -10)
	  var darkColor = shadeColor(baseColor, -20) 
	  
	  var grad = d3.select("#gradients").selectAll("#" + gradientId)
		.data([ gradientId ])
		.enter()
		.append("radialGradient")
		.attr("id", gradientId)
		.attr("gradientUnits", "objectBoundingBox")
		.attr("fx", "30%")
		.attr("fy", "30%")

	  grad.append("stop")
		.attr("offset", "0%")
		.attr("style", "stop-color:#FFFFFF")
	  
	  // Middle
	  grad.append("stop")
		.attr("offset", "40%")
		.attr("style", "stop-color:" + baseColor)

	  // Outer Edges
	  grad.append("stop")
		.attr("offset", "100%")
		.attr("style", "stop-color:" + darkColor)
	  
	  console.log("url(#" + gradientId + ")")
	  return "url(#" + gradientId + ")";
	}
	
});

</script>

<script>

</script>
	</div> <!-- /content -->

</div><!-- /page -->
</body>
</html>

rko3.r

#install.packages("reshape2")
library("reshape2")
#install.packages("sqldf")
library("sqldf")

path = "~/project/mpv_motion/dev/"
#dataname = "cz_psp_2006-2010_a1_1"
#analysisname = 'cz_psp_2006-2010_a1_1q'
dataname = "cz_praha_2010-2013_a1_1"
analysisname = 'cz_praha_2010-2013_a1_1h'

#raw data in 3 columns (values in [-1,1])
Graw = read.table(paste(path,dataname,"/",dataname,".csv",sep=""), header=FALSE, sep=",")
#data divisions x persons
G = acast(Graw,V2~V1,value.var='V3')
#scaled divisions x persons (mean=0 and sd=1 for each division)
H=t(scale(t(G),scale=TRUE))

#weights
#weights 1, based on number of persons in division
w1 = apply(abs(G)==1,1,sum,na.rm=TRUE)/max(apply(abs(G)==1,1,sum,na.rm=TRUE))
w1[is.na(w1)] = 0
#weights 2, "100:100" vs. "195:5"
w2 = 1 - abs(apply(G==1,1,sum,na.rm=TRUE) - apply(G==-1,1,sum,na.rm=TRUE))/apply(!is.na(G),1,sum) 
w2[is.na(w2)] = 0

	#analytics
	plot(w1)
	plot(w2)
	plot(w1*w2)

#weighted scaled divisions x persons
Hw = H * w1 * w2

#index of missing data
#index of missing data divisions x persons
HI = H
HI[!is.na(H)]=1
HI[is.na(H)] = 0

#weighted scaled with NA->0 division x persons
Hw0 = Hw
Hw0[is.na(Hw)]=0

#eliminate persons with too few votes (weighted)
#lower limit to eliminate from calculations
lo_limit = .1
#weights for non missing data division x persons
HIw = HI*w1*w2
#sum of weights of divisions for each persons
tmp = apply(HIw,2,sum)
pw = tmp/max(tmp)
#index of persons in calculation
pI = pw > lo_limit
#weighted scaled with NA->0 and cutted persons with too few votes division x persons
Hw0c = Hw0[,pI]
#index of missing data cutted persons with too few votes divisions x persons
HIc = HI[,pI]

#"covariance" matrix adjusted according to missing data
Hcov=t(Hw0c)%*%Hw0c * 1/(t(HIc)%*%HIc) * (dim(Hw0c)[1])
#Hcov=t(Hw0)%*%Hw0 * 1/(t(HI)%*%HI-1) * (dim(H)[1]-1)

#substitution of missing data in "covariance" matrix
Hcov0 = Hcov
Hcov0[is.na(Hcov)] = 0			#********* 

#eigendecomposition
He=eigen(Hcov0)
# V (rotation values of persons)
V = He$vectors
#projected divisions into dimensions
Hy=Hw0c%*%V

	#analytics
	plot(Hy[,1],Hy[,2])
	plot(sqrt(He$values[1:10]))

#sigma matrix 
sigma = diag(sqrt(He$values))
sigma[is.na(sigma)] = 0

#projection of persons into dimensions
Hproj = t(sigma%*%t(V))
	#analytics
	plot(Hproj[,1],Hproj[,2])

#sigma^-1 matrix
sigma_1 = diag(sqrt(1/He$values))
sigma_1[is.na(sigma_1)] = 0

# U (rotation values of divisions)
U = Hw0c%*%V%*%sigma_1

#U%*%sigma%*%t(V) != Hw0c ;because of adjusting of "covariance" matrix


# NEW MP (or partial)
#New persons / partial (like a projection of divisions from a smaller time interval into all divisions)


analdb = dbConnect(SQLite(), dbname=paste(path,dataname,"/",analysisname,"/",analysisname,".sqlite3",sep=""))
datadb = dbConnect(SQLite(), dbname=paste(path,dataname,"/",dataname,".sqlite3",sep=""))

rotation = strsplit(dbGetQuery(analdb,"SELECT orientation FROM analysis_info")[1,1],",")[[1]]
for (j in 2:length(rotation)) {
  if (Hproj[rot_mp_rank,j-1] * as.double(rotation[j]) < 0) {
    U[,j-1] = -1*U[,j-1]
  }
}

aU = abs(U)

mp_names = dbGetQuery(datadb,"SELECT name FROM mp ORDER BY CAST(code as INTEGER)")
attr(G,'dimnames')[2][[1]] = mp_names$name
#attr(Hw0c,'dimnames')[2][[1]] = mp_names$name[pI]

interval_names = dbGetQuery(analdb,"SELECT distinct(interval_name) FROM analysis_division_in_interval ORDER BY interval_name")

#lower limit to eliminate from projections
lo_limitT = lo_limit

    


for (i in 1:dim(interval_names)[1]) {

    TIf = dbGetQuery(analdb,paste("SELECT division_code, CASE interval_name='",interval_names[i,],"' WHEN 1 THEN 'TRUE' ELSE 'FALSE' END as in_interval FROM analysis_division_in_interval ORDER BY division_code",sep=""))
    
    TIf$in_interval = as.logical(TIf$in_interval)
    
    max_division_code = TIf$division_code[max(which(as.logical(TIf$in_interval)))]
    min_division_code = TIf$division_code[min(which(as.logical(TIf$in_interval)))]
    
    TI = TIf$in_interval
    
    GTc = G[,pI]
    GTc[!TI,] = NA
    HTc = (GTc - attr(H,"scaled:center"))/attr(H,"scaled:scale")
    
    HTIc = HTc
	HTIc[!is.na(HTIc)] = 1
	HTIc[is.na(HTIc)] = 0
    
    HTw0c = HTc * w1 * w2
    HTw0c[is.na(HTw0c)] = 0
    

	#weights for non missing data division x persons
	HTIcw = HTIc*w1*w2
	#sum of weights of divisions for each persons
	tmp = apply(HTIcw,2,sum)
	pTw = tmp/max(tmp)
	#index of persons in calculation
	pTI = pTw > lo_limitT
	
	#weighted scaled with NA->0 and cutted persons with too few votes division x persons
	HTw0cc = HTw0c[,pTI]
	#index of missing data cutted persons with too few votes divisions x persons
	HTIcc = HTIc[,pTI]
    
    
	dweights = t(t(aU)%*%HTIcc / apply(aU,2,sum))  	#person x division
    dweights[is.na(dweights)] = 0
    
    HTw0ccproj = t(HTw0cc)%*%U / dweights


    parties = as.matrix(dbGetQuery(datadb,paste('SELECT COALESCE(NULLIF(tmax.code,""),tmin.code) as code, COALESCE(NULLIF(tmax.color,""),tmin.color) as color FROM (SELECT  mvf.mp_code, g.code, g.color FROM mp_vote_full as mvf  LEFT JOIN "group" as g  ON mvf.group_code=g.code  WHERE division_code="',max_division_code,'") as tmax LEFT JOIN  (SELECT  mvf.mp_code, g.code, g.color FROM mp_vote_full as mvf  LEFT JOIN "group" as g  ON mvf.group_code=g.code  WHERE division_code="',min_division_code,'") as tmin ON tmax.mp_code=tmin.mp_code  ORDER BY CAST(tmax.mp_code as INTEGER)',sep="")))
    partiescc = (parties[pI,])[pTI,]
    
    write.csv(format(cbind(HTw0ccproj[,1:2],partiescc),digits=3),file=paste(path,dataname,"/",analysisname,"/",interval_names[i,],"_result.csv",sep=""))
     #*************************
}



# CUTTING LINES
# based on overall model
#minimizing function
f_cutting_lines=function(beta0) -1*sum(apply(cbind(y*(x%*%beta+beta0),zeros),1,min))

#how many dimensions?
n_first_dimensions = 2

#preparing variables
normals = Hy[,1:n_first_dimensions]
loss_f = data.frame(matrix(0,nrow=dim(H)[1],ncol=4))
colnames(loss_f)=c("Parameter1","Loss1","Parameter_1","Loss_1")
parameters = data.frame(matrix(0,nrow=dim(H)[1],ncol=3))
colnames(parameters)=c("Parameter","Loss","Direction")

xfull = t(t(He$vectors[,1:n_first_dimensions]) * sqrt(He$values[1:n_first_dimensions]))

#calculating all cutting lines
i=1
for (i in as.numeric(1:dim(H)[1])) {
    beta = Hy[i,1:n_first_dimensions]
    y = t(as.matrix(H[i,]))[,pI]
    x = xfull[which(!is.na(y)),]
    y = y[which(!is.na(y))]
    zeros = as.matrix(rep(0,length(y)))
    #** place for improvement: "1000"
    res1 = optim(c(1),f_cutting_lines,method="Brent",lower=-1000,upper=1000)	#1000? arbitrary
    #the sign is arbitrary, the real result may be -1*; we need to minimize the other way as well!!
    y=-y
    res2 = optim(c(1),f_cutting_lines,method="Brent",lower=-1000,upper=1000) #1000? arbitrary
    #the real parameter is the one with lower loss function
    #theoretically should be the same (either +1 or -1) for all divisions(rows), however, due to missing data, the calculation may lead to a few divisions with the other direction
    loss_f[i,] = c(res1$par,res1$value,res2$par,res2$value)
    if (res1$value<=res2$value) {
      parameters[i,] = c(res1$par,res1$value,1)
    } else {
      parameters[i,] = c(res2$par,res2$value,-1)
    }
}
CuttingLines = list(normals=normals,parameters=parameters,loss_function=loss_f,weights=cbind(w1,w2))


##
#cutting lines:
I = w1*w2 > .75
for (i in as.numeric(1:dim(H)[1])) {
  if (I[i] && ((i %% 10) == 0)) {
    beta = CuttingLines$normals[i,]
    beta0 = CuttingLines$parameters$Parameter[i]
	abline(-beta0/beta[2],-beta[1]/beta[2])
    #break
  }
}

# only second half
plot(Hproj[,1],Hproj[,2])
> for (j in as.numeric(1:round(dim(H)[1]/2))) {
     i = round(dim(H)[1]/2) + j
     if (I[i] && ((i %% 10) == 0)) {
         beta = CuttingLines$normals[i,]
         beta0 = CuttingLines$parameters$Parameter[i]
         abline(-beta0/beta[2],-beta[1]/beta[2])
         #break
     }
 }

# when y=0, x is ?
plot(CuttingLines$parameters$Parameter/CuttingLines$normals[,1],ylim=c(-100,100))
# slopes
plot(CuttingLines$parameters$Parameter/CuttingLines$normals[,2],ylim=c(-10,10))