index.html
<!DOCTYPE html>
<html>
<head>
<title>Presidential elections 2013, 2nd round</title>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta name="viewport" content="initial-scale=1.0, user-scalable=no"/>
<script src="//code.jquery.com/jquery-1.8.2.min.js"></script>
<script>
L_PREFER_CANVAS = true;
</script>
<link rel="stylesheet" href="//netdna.bootstrapcdn.com/bootstrap/3.0.2/css/bootstrap.min.css">
<link rel="stylesheet" href="//cdn.leafletjs.com/leaflet-0.6.4/leaflet.css" />
<script src="//cdn.leafletjs.com/leaflet-0.6.4/leaflet.js"></script>
<style type="text/css">
html, body, #map {
width: 100%;
height: 100%;
margin: 0;
padding: 0;
}
</style>
</head>
<body>
<div class="navbar navbar-inverse navbar-fixed-top" role="navigation">
<div class="container">
<div class="navbar-header">
<a class="navbar-brand" href="#">Presidential elections 2013, 2nd round</a>
</div>
</div>
</div>
<div style="position:fixed;top:50px;z-index:1000;">
<div class="alert alert-info" >The <strong>size</strong> of bubbles represents number of voters, the <strong>color</strong> represents the winner and the <strong>oppacity</strong> the margin of victory. <span style="color:#b00;font-weight:800">Bachelet</span> vs. <span style="color:#00b;font-weight:800">Matthei</span></div>
</div>
<div id="map" style="margin-top:40px;"></div>
<script type="text/javascript">
var map = L.map('map',{zoomControl: false}).setView([-33.5,-70.5], 5);
map.addControl( L.control.zoom({position: 'topright'}) );
L.tileLayer('//{s}.www.toolserver.org/tiles/bw-mapnik/{z}/{x}/{y}.png', {
attribution: '© <a href="//osm.org/copyright">OpenStreetMap</a> contributors'
}).addTo(map);
$.getJSON( "cl_2013_2_ring_bachelet_matthei.json", function (data) {
$.each(data.features, function (index, value) {
circle = L.circle([value.coordinates[1], value.coordinates[0]], Math.sqrt((parseInt(value.population.p6)+parseInt(value.population.p9))*2750), {
color: class2color(value.classname),
fillColor: class2color(value.classname),
fillOpacity: 2*(Math.max(parseInt(value.population.p6),parseInt(value.population.p9)) / (parseInt(value.population.p6)+parseInt(value.population.p9)) - 0.45),
weight: 0.1,
className: value.classname,
}).addTo(map);
perc1 = Math.round(Math.max(parseInt(value.population.p6),parseInt(value.population.p9)) / (parseInt(value.population.p6)+parseInt(value.population.p9)) * 100);
perc2 = 100 - perc1;
circle.bindPopup(value.name + "<br>" + value.winner + " won " + perc1 + " % vs. " + perc2 + " % <br>(" +Math.max(value.population.p6,value.population.p9) + " : " + Math.min(value.population.p6,value.population.p9) + " votes)");
});
});
function class2color(className) {
if (className == 'bachelet') return "#f00";
if (className == 'matthei') return "#00f";
else return "#000";
}
</script>
</body>
</html>
geocode.php
<?php
$language = (isset($_GET['language']) ? $_GET['language'] : 'es') ;
$region = (isset($_GET['region']) ? $_GET['region'] : 'cl') ;
$fin = fopen("address.csv","r");
$fout = fopen("geocoded.csv","w+");
while (($row = fgetcsv($fin, 1000, "\t")) !== FALSE) {
$address = $row[0];
ob_implicit_flush(true);
ob_end_flush();
$url = "http://maps.googleapis.com/maps/api/geocode/json?address=". urlencode($address ) . "&sensor=false®ion={$region}&language={$language}";
$obj = json_decode(grabber($url));
$kraj_obj = g_find_type($obj,'administrative_area_level_1');
$out = $row;
$out['lat'] = $obj->results[0]->geometry->location->lat;
$out['lng'] = $obj->results[0]->geometry->location->lng;
$out['check'] = $kraj_obj->long_name;
fputcsv($fout,$out);
echo $row[0]."<br/>\t";
ob_flush();flush();
sleep(1);
}
function g_find_type($object, $type) {
$array = $object->results[0]->address_components;
foreach ((array) $array as $component) {
foreach ((array) $component->types as $t) {
if ($t == $type) {
return $component;
}
}
}
}
function grabber($url,$options = array())
{
$ch = curl_init ();
curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt ($ch, CURLOPT_URL, $url);
curl_setopt ($ch, CURLOPT_TIMEOUT, 120);
if (count($options) > 0) {
foreach($options as $option) {
curl_setopt ($ch, $option[0], $option[1]);
}
}
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($ch, CURLOPT_HEADER, 0);
$out = curl_exec($ch);
curl_close ($ch);
return $out;
}
scraper.py
import csv
import requests
from lxml import html
def recursion(item,info):
outfile = open('workfile.csv', 'ab')
outwriter = csv.writer(outfile, quoting=csv.QUOTE_NONNUMERIC)
print item
payload = {'codigo': item[1], 'nivel': item[2], 'division': 'GEOGRAFICA', 'codigoPadre': '', 'codigoCircunscripcion': '', 'codigoColegio': '' }
url = 'http://www.eleccionservel.cl/ELECCIONES2013/segundaVuelta'
page = requests.post(url, data=payload)
domtree = html.fromstring(page.text)
if (item[2] == 'E'):
values = []
values.append(domtree.xpath('//table[@class="table_contenido"][2]/tr[2]/td[1]/text()')[0].encode('utf-8','ignore').replace("\xc2\xa0","").strip())
values.append(domtree.xpath('//table[@class="table_contenido"][2]/tr[2]/td[3]/text()')[0].encode('utf-8','ignore').replace(".","").replace("\xc2\xa0","").strip())
values.append(domtree.xpath('//table[@class="table_contenido"][2]/tr[3]/td[1]/text()')[0].encode('utf-8','ignore').replace("\xc2\xa0","").strip())
values.append(domtree.xpath('//table[@class="table_contenido"][2]/tr[3]/td[3]/text()')[0].encode('utf-8','ignore').replace(".","").replace("\xc2\xa0","").strip())
values.append(domtree.xpath('//table[@class="table_contenido"][2]/tr[5]/td[1]/text()')[0].encode('utf-8','ignore').replace("\xc2\xa0","").strip())
values.append(domtree.xpath('//table[@class="table_contenido"][2]/tr[5]/td[3]/text()')[0].encode('utf-8','ignore').replace(".","").replace("\xc2\xa0","").strip())
values.append(domtree.xpath('//table[@class="table_contenido"][2]/tr[6]/td[1]/text()')[0].encode('utf-8','ignore').replace("\xc2\xa0","").strip())
values.append(domtree.xpath('//table[@class="table_contenido"][2]/tr[6]/td[3]/text()')[0].encode('utf-8','ignore').replace(".","").replace("\xc2\xa0","").strip())
values.append(domtree.xpath('//table[@class="table_contenido"][2]/tr[7]/td[1]/text()')[0].encode('utf-8','ignore').replace("\xc2\xa0","").strip())
values.append(domtree.xpath('//table[@class="table_contenido"][2]/tr[7]/td[3]/text()')[0].encode('utf-8','ignore').replace(".","").replace("\xc2\xa0","").strip())
values.append(domtree.xpath('//table[@class="table_contenido"][2]/tr[8]/td[1]/text()')[0].encode('utf-8','ignore').replace("\xc2\xa0","").strip())
values.append( domtree.xpath('//table[@class="table_contenido"][2]/tr[8]/td[3]/text()')[0].encode('utf-8','ignore').replace(".","").replace("\xc2\xa0","").strip())
out = list(info)+values
outwriter.writerow(out)
outfile.close();
else:
names = domtree.xpath('//div[@id="tabs-1"]/ul/li/a/text()');
hrefs = domtree.xpath('//div[@id="tabs-1"]/ul/li/a/@href');
f = open('workfile.html', 'wb')
f.write (page.text.encode('utf-8','ignore'))
if (len(names) == 0):
print names
print hrefs
print page.text
del names[0]
del hrefs[0]
names = [x.encode('utf-8').strip() for x in names]
nameshrefs = zip(names,hrefs)
nameshrefs = [x for x in nameshrefs if not (x[1] == '#')]
helplist = zip(*nameshrefs)
names = helplist[0]
hrefs = helplist[1]
hrefs_li = [x.split("'") for x in hrefs]
codigos = [x[1] for x in hrefs_li]
nivels = [x[3] for x in hrefs_li]
rows = zip(names,codigos,nivels)
for row in rows:
recursion(row,info+row)
levels = {
1: 'R',
2: 'O',
3: 'C',
4: 'E',
5: 'COL',
6: 'M'
}
f = open('workfile.html', 'w')
outfile = open('workfile.csv', 'wb')
outwriter = csv.writer(outfile, quoting=csv.QUOTE_NONNUMERIC)
url = 'http://www.eleccionservel.cl/ELECCIONES2013/vistaNavegacionSegundaVuelta'
mainpage = requests.get(url)
domtree = html.fromstring(mainpage.text)
names = domtree.xpath('//div[@id="tabs-1"]/ul/li/a/text()');
hrefs = domtree.xpath('//div[@id="tabs-1"]/ul/li/a/@href');
del names[0]
del hrefs[0]
names = [x.encode('utf-8').strip() for x in names]
hrefs_li = [x.split("'") for x in hrefs]
codigos = [x[1] for x in hrefs_li]
nivels = [x[3] for x in hrefs_li]
main = zip(names,codigos,nivels)
tree = {}
for item in main:
tree.update({item[1]:item})
for key in tree:
recursion(tree[key],tree[key])
outfile.close()