index.html
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>Praha 2010-2013</title>
<link rel="stylesheet" href="//code.jquery.com/mobile/1.3.1/jquery.mobile-1.3.1.min.css" />
<script src="//code.jquery.com/jquery-1.8.2.min.js"></script>
<script src="//code.jquery.com/mobile/1.3.1/jquery.mobile-1.3.1.min.js"></script>
<style>
#chart {
margin-left: -40px;
height: 506px;
}
text {
font: 10px sans-serif;
}
.dot {
stroke: #000;
opacity: .5;
stroke-opacity: .75;
}
.axis path, .axis line {
fill: none;
stroke: #000;
shape-rendering: crispEdges;
}
.label {
fill: #777;
}
.year.label {
font: 500 100px "Helvetica Neue";
fill: #ddd;
}
.year.label.active {
fill: #aaa;
}
.overlay {
fill: none;
pointer-events: all;
cursor: ew-resize;
}
</style>
</head>
<body>
<div data-role="page" class="type-home">
<div data-role="content">
<header>
<aside>May 27, 2013, 70th birthday of my dad!</aside>
<a href="/michalskop/" rel="author">Michal Škop</a>
</header>
<h1>Praha 2010-2013</h1>
<form>
<label for="slider">Slider:</label>
<input name="slider" id="slider" min="2010.75" max="2013.25" value="2010.75" step=".01" type="range" />
</form>
<p>
<a href="#" id="play" data-role="button" data-inline="true" data-icon="refresh"><span id="playText">Play ></span></a>
</p>
<p id="chart"><svg><defs id="gradients"></defs></svg></p>
<p>Method: weighted PCA; Time intervals: 6 months (projection of 6-month blocks into all data)</p>
<p>There are missing data about Richter (ODS) and Šťastný (ODS) on the <a href="//praha.eu">official website</a></p>
<script src="//d3js.org/d3.v2.js?2.8.1"></script>
<script>
function x(d) { return d.d1; }
function y(d) { return d.d2; }
function z(d) { return d.d3; }
function radius(d) { return 1; }
function color(d) { return d.color; }
function key(d) { return d.name; }
function display(d) {return d.display;}
var margin = {top: 19.5, right: 19.5, bottom: 19.5, left: 39.5},
width = 600 - margin.right,
height = 400 - margin.top - margin.bottom;
var xScale = d3.scale.linear().domain([-15, 15]).range([0, width]),
yScale = d3.scale.linear().domain([-15, 15]).range([height, 0]),
radiusScale = d3.scale.sqrt().domain([0, 1]).range([0, 10]);
var colorScale = d3.scale.category20c();
var xAxis = d3.svg.axis().orient("bottom").scale(xScale).ticks(12, d3.format(",d")),
yAxis = d3.svg.axis().scale(yScale).orient("left");
var svg = d3.select("#chart svg")
.attr("width", width + margin.left + margin.right)
.attr("height", height + margin.top + margin.bottom)
.append("g")
.attr("transform", "translate(" + margin.left + "," + margin.top + ")");
svg.append("g")
.attr("class", "x axis")
.attr("transform", "translate(0," + height + ")")
.call(xAxis);
svg.append("g")
.attr("class", "y axis")
.call(yAxis);
svg.append("text")
.attr("class", "x label")
.attr("text-anchor", "end")
.attr("x", width)
.attr("y", height - 6)
.text("Dimension 1");
svg.append("text")
.attr("class", "y label")
.attr("text-anchor", "end")
.attr("y", 6)
.attr("dy", ".75em")
.attr("transform", "rotate(-90)")
.text("Dimension 2");
var label = svg.append("text")
.attr("class", "year label")
.attr("text-anchor", "end")
.attr("y", height - 24)
.attr("x", width)
.text("2010/9");
d3.json("cz_praha_2010-2013_a1_1h.json", function(nations) {
var bisect = d3.bisector(function(d) { return d[0]; });
var dot = svg.append("g")
.attr("class", "dots")
.selectAll(".dot")
.data(interpolateData(2010.75))
.enter().append("circle")
.attr("class", "dot")
.call(position)
.sort(order)
.on("mouseover", animateFirstStep)
.on("mouseout", animateSecondStep)
.on("mousedown", animateFirstStep);
dot.append("title")
.text(function(d) { return d.name});
var i=0;
var playing = false;
$("#play").click(function() {
if(playing === false) {
startPlaying();
} else {
stopPlaying();
}
});
function startPlaying() {
playing = true;
$("#playText").html("Stop ||");
$('#slider').slider('disable');
svg.transition()
.duration(30000)
.ease("linear")
.tween("year", function() {return tweenYear($('#slider').val()) })
.each("end", stopPlaying);
}
function stopPlaying() {
playing = false;
$("#playText").html("Play >");
svg.transition().duration(0);
$('#slider').slider('enable');
}
function position(dot) {
dot .attr("cx", function(d) { return xScale(x(d)); })
.attr("cy", function(d) { return yScale(y(d)); })
.attr("r", function(d) { return radiusScale(radius(d)) })
.style("fill", function (d) { return gradient(color(d)) })
.attr("display", function (d) { return display(d);});
}
function order(a, b) {
return radius(b) - radius(a);
}
function tweenYear(start) {
var year = d3.interpolateNumber(2010.75,2013.25);
return function(t) { displayYear(year(t)); };
}
function displayYear(year) {
dot.data(interpolateData(year), key).call(position).sort(order);
month = Math.ceil((year - Math.floor(year))*12);
label.text(Math.floor(year) + '/' + month);
i++;
if ((i%25) == 0) {
$("#slider").val(year);
$('#slider').slider('refresh');
}
}
function interpolateData(year) {
return nations.map(function(d) {
return {
name: d.name,
d1: interpolateValues(d.d1, year),
d2: interpolateValues(d.d2, year),
color: findColor(d.color, year),
display: isDisplayed(d.d1, year)
};
});
}
function interpolateValues(values, year) {
var i = bisect.left(values, year, 0, values.length - 1),
a = values[i];
if (i > 0) {
var b = values[i - 1],
t = (year - a[0]) / (b[0] - a[0]);
return a[1] * (1 - t) + b[1] * t;
}
return a[1];
}
function findColor(values, year) {
var i = bisect.left(values, year, 0, values.length - 1);
return values[i][1];
}
function isDisplayed(values, year) {
if ( (year < values[0][0]) || (year > values[values.length - 1][0]))
return 'none';
else
return 'inherit';
}
function animateFirstStep(d){
d3.select(this)
.transition()
.delay(0)
.duration(1000)
.attr("r", 10*2)
;
}
function animateSecondStep(d){
d3.select(this)
.transition()
.duration(1000)
.attr("r", 10);
}
$('#slider').ready(function() {
$('#slider').change(function(){
displayYear($(this).val());
});
});
function shadeColor(color, percent) {
var R = parseInt(color.substring(1,3),16)
var G = parseInt(color.substring(3,5),16)
var B = parseInt(color.substring(5,7),16);
R = parseInt(R * (100 + percent) / 100);
G = parseInt(G * (100 + percent) / 100);
B = parseInt(B * (100 + percent) / 100);
R = (R<255)?R:255;
G = (G<255)?G:255;
B = (B<255)?B:255;
var RR = ((R.toString(16).length==1)?"0"+R.toString(16):R.toString(16));
var GG = ((G.toString(16).length==1)?"0"+G.toString(16):G.toString(16));
var BB = ((B.toString(16).length==1)?"0"+B.toString(16):B.toString(16));
return "#"+RR+GG+BB;
}
function gradient(baseColor)
{
var gradientId = "gradient" + baseColor.substring(1)
console.log("COLOR: " + gradientId);
var darkColor = shadeColor(baseColor, -20)
var grad = d3.select("#gradients").selectAll("#" + gradientId)
.data([ gradientId ])
.enter()
.append("radialGradient")
.attr("id", gradientId)
.attr("gradientUnits", "objectBoundingBox")
.attr("fx", "30%")
.attr("fy", "30%")
grad.append("stop")
.attr("offset", "0%")
.attr("style", "stop-color:#FFFFFF")
grad.append("stop")
.attr("offset", "40%")
.attr("style", "stop-color:" + baseColor)
grad.append("stop")
.attr("offset", "100%")
.attr("style", "stop-color:" + darkColor)
console.log("url(#" + gradientId + ")")
return "url(#" + gradientId + ")";
}
});
</script>
<script>
</script>
</div>
</div>
</body>
</html>
rko3.r
#install.packages("reshape2")
library("reshape2")
#install.packages("sqldf")
library("sqldf")
path = "~/project/mpv_motion/dev/"
#dataname = "cz_psp_2006-2010_a1_1"
#analysisname = 'cz_psp_2006-2010_a1_1q'
dataname = "cz_praha_2010-2013_a1_1"
analysisname = 'cz_praha_2010-2013_a1_1h'
#raw data in 3 columns (values in [-1,1])
Graw = read.table(paste(path,dataname,"/",dataname,".csv",sep=""), header=FALSE, sep=",")
#data divisions x persons
G = acast(Graw,V2~V1,value.var='V3')
#scaled divisions x persons (mean=0 and sd=1 for each division)
H=t(scale(t(G),scale=TRUE))
#weights
#weights 1, based on number of persons in division
w1 = apply(abs(G)==1,1,sum,na.rm=TRUE)/max(apply(abs(G)==1,1,sum,na.rm=TRUE))
w1[is.na(w1)] = 0
#weights 2, "100:100" vs. "195:5"
w2 = 1 - abs(apply(G==1,1,sum,na.rm=TRUE) - apply(G==-1,1,sum,na.rm=TRUE))/apply(!is.na(G),1,sum)
w2[is.na(w2)] = 0
#analytics
plot(w1)
plot(w2)
plot(w1*w2)
#weighted scaled divisions x persons
Hw = H * w1 * w2
#index of missing data
#index of missing data divisions x persons
HI = H
HI[!is.na(H)]=1
HI[is.na(H)] = 0
#weighted scaled with NA->0 division x persons
Hw0 = Hw
Hw0[is.na(Hw)]=0
#eliminate persons with too few votes (weighted)
#lower limit to eliminate from calculations
lo_limit = .1
#weights for non missing data division x persons
HIw = HI*w1*w2
#sum of weights of divisions for each persons
tmp = apply(HIw,2,sum)
pw = tmp/max(tmp)
#index of persons in calculation
pI = pw > lo_limit
#weighted scaled with NA->0 and cutted persons with too few votes division x persons
Hw0c = Hw0[,pI]
#index of missing data cutted persons with too few votes divisions x persons
HIc = HI[,pI]
#"covariance" matrix adjusted according to missing data
Hcov=t(Hw0c)%*%Hw0c * 1/(t(HIc)%*%HIc) * (dim(Hw0c)[1])
#Hcov=t(Hw0)%*%Hw0 * 1/(t(HI)%*%HI-1) * (dim(H)[1]-1)
#substitution of missing data in "covariance" matrix
Hcov0 = Hcov
Hcov0[is.na(Hcov)] = 0 #*********
#eigendecomposition
He=eigen(Hcov0)
# V (rotation values of persons)
V = He$vectors
#projected divisions into dimensions
Hy=Hw0c%*%V
#analytics
plot(Hy[,1],Hy[,2])
plot(sqrt(He$values[1:10]))
#sigma matrix
sigma = diag(sqrt(He$values))
sigma[is.na(sigma)] = 0
#projection of persons into dimensions
Hproj = t(sigma%*%t(V))
#analytics
plot(Hproj[,1],Hproj[,2])
#sigma^-1 matrix
sigma_1 = diag(sqrt(1/He$values))
sigma_1[is.na(sigma_1)] = 0
# U (rotation values of divisions)
U = Hw0c%*%V%*%sigma_1
#U%*%sigma%*%t(V) != Hw0c ;because of adjusting of "covariance" matrix
# NEW MP (or partial)
#New persons / partial (like a projection of divisions from a smaller time interval into all divisions)
analdb = dbConnect(SQLite(), dbname=paste(path,dataname,"/",analysisname,"/",analysisname,".sqlite3",sep=""))
datadb = dbConnect(SQLite(), dbname=paste(path,dataname,"/",dataname,".sqlite3",sep=""))
rotation = strsplit(dbGetQuery(analdb,"SELECT orientation FROM analysis_info")[1,1],",")[[1]]
for (j in 2:length(rotation)) {
if (Hproj[rot_mp_rank,j-1] * as.double(rotation[j]) < 0) {
U[,j-1] = -1*U[,j-1]
}
}
aU = abs(U)
mp_names = dbGetQuery(datadb,"SELECT name FROM mp ORDER BY CAST(code as INTEGER)")
attr(G,'dimnames')[2][[1]] = mp_names$name
#attr(Hw0c,'dimnames')[2][[1]] = mp_names$name[pI]
interval_names = dbGetQuery(analdb,"SELECT distinct(interval_name) FROM analysis_division_in_interval ORDER BY interval_name")
#lower limit to eliminate from projections
lo_limitT = lo_limit
for (i in 1:dim(interval_names)[1]) {
TIf = dbGetQuery(analdb,paste("SELECT division_code, CASE interval_name='",interval_names[i,],"' WHEN 1 THEN 'TRUE' ELSE 'FALSE' END as in_interval FROM analysis_division_in_interval ORDER BY division_code",sep=""))
TIf$in_interval = as.logical(TIf$in_interval)
max_division_code = TIf$division_code[max(which(as.logical(TIf$in_interval)))]
min_division_code = TIf$division_code[min(which(as.logical(TIf$in_interval)))]
TI = TIf$in_interval
GTc = G[,pI]
GTc[!TI,] = NA
HTc = (GTc - attr(H,"scaled:center"))/attr(H,"scaled:scale")
HTIc = HTc
HTIc[!is.na(HTIc)] = 1
HTIc[is.na(HTIc)] = 0
HTw0c = HTc * w1 * w2
HTw0c[is.na(HTw0c)] = 0
#weights for non missing data division x persons
HTIcw = HTIc*w1*w2
#sum of weights of divisions for each persons
tmp = apply(HTIcw,2,sum)
pTw = tmp/max(tmp)
#index of persons in calculation
pTI = pTw > lo_limitT
#weighted scaled with NA->0 and cutted persons with too few votes division x persons
HTw0cc = HTw0c[,pTI]
#index of missing data cutted persons with too few votes divisions x persons
HTIcc = HTIc[,pTI]
dweights = t(t(aU)%*%HTIcc / apply(aU,2,sum)) #person x division
dweights[is.na(dweights)] = 0
HTw0ccproj = t(HTw0cc)%*%U / dweights
parties = as.matrix(dbGetQuery(datadb,paste('SELECT COALESCE(NULLIF(tmax.code,""),tmin.code) as code, COALESCE(NULLIF(tmax.color,""),tmin.color) as color FROM (SELECT mvf.mp_code, g.code, g.color FROM mp_vote_full as mvf LEFT JOIN "group" as g ON mvf.group_code=g.code WHERE division_code="',max_division_code,'") as tmax LEFT JOIN (SELECT mvf.mp_code, g.code, g.color FROM mp_vote_full as mvf LEFT JOIN "group" as g ON mvf.group_code=g.code WHERE division_code="',min_division_code,'") as tmin ON tmax.mp_code=tmin.mp_code ORDER BY CAST(tmax.mp_code as INTEGER)',sep="")))
partiescc = (parties[pI,])[pTI,]
write.csv(format(cbind(HTw0ccproj[,1:2],partiescc),digits=3),file=paste(path,dataname,"/",analysisname,"/",interval_names[i,],"_result.csv",sep=""))
#*************************
}
# CUTTING LINES
# based on overall model
#minimizing function
f_cutting_lines=function(beta0) -1*sum(apply(cbind(y*(x%*%beta+beta0),zeros),1,min))
#how many dimensions?
n_first_dimensions = 2
#preparing variables
normals = Hy[,1:n_first_dimensions]
loss_f = data.frame(matrix(0,nrow=dim(H)[1],ncol=4))
colnames(loss_f)=c("Parameter1","Loss1","Parameter_1","Loss_1")
parameters = data.frame(matrix(0,nrow=dim(H)[1],ncol=3))
colnames(parameters)=c("Parameter","Loss","Direction")
xfull = t(t(He$vectors[,1:n_first_dimensions]) * sqrt(He$values[1:n_first_dimensions]))
#calculating all cutting lines
i=1
for (i in as.numeric(1:dim(H)[1])) {
beta = Hy[i,1:n_first_dimensions]
y = t(as.matrix(H[i,]))[,pI]
x = xfull[which(!is.na(y)),]
y = y[which(!is.na(y))]
zeros = as.matrix(rep(0,length(y)))
#** place for improvement: "1000"
res1 = optim(c(1),f_cutting_lines,method="Brent",lower=-1000,upper=1000) #1000? arbitrary
#the sign is arbitrary, the real result may be -1*; we need to minimize the other way as well!!
y=-y
res2 = optim(c(1),f_cutting_lines,method="Brent",lower=-1000,upper=1000) #1000? arbitrary
#the real parameter is the one with lower loss function
#theoretically should be the same (either +1 or -1) for all divisions(rows), however, due to missing data, the calculation may lead to a few divisions with the other direction
loss_f[i,] = c(res1$par,res1$value,res2$par,res2$value)
if (res1$value<=res2$value) {
parameters[i,] = c(res1$par,res1$value,1)
} else {
parameters[i,] = c(res2$par,res2$value,-1)
}
}
CuttingLines = list(normals=normals,parameters=parameters,loss_function=loss_f,weights=cbind(w1,w2))
##
#cutting lines:
I = w1*w2 > .75
for (i in as.numeric(1:dim(H)[1])) {
if (I[i] && ((i %% 10) == 0)) {
beta = CuttingLines$normals[i,]
beta0 = CuttingLines$parameters$Parameter[i]
abline(-beta0/beta[2],-beta[1]/beta[2])
#break
}
}
# only second half
plot(Hproj[,1],Hproj[,2])
> for (j in as.numeric(1:round(dim(H)[1]/2))) {
i = round(dim(H)[1]/2) + j
if (I[i] && ((i %% 10) == 0)) {
beta = CuttingLines$normals[i,]
beta0 = CuttingLines$parameters$Parameter[i]
abline(-beta0/beta[2],-beta[1]/beta[2])
#break
}
}
# when y=0, x is ?
plot(CuttingLines$parameters$Parameter/CuttingLines$normals[,1],ylim=c(-100,100))
# slopes
plot(CuttingLines$parameters$Parameter/CuttingLines$normals[,2],ylim=c(-10,10))