CL Presidential Elections 2013: 2nd round by michalskop

index.html

<!DOCTYPE html>
<html>
  <head>
    <title>Presidential elections 2013, 2nd round</title>
    <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
    <meta name="viewport" content="initial-scale=1.0, user-scalable=no"/>
	<script src="//code.jquery.com/jquery-1.8.2.min.js"></script>
	
	
	<script>
	  // see //leafletjs.com/reference.html
	  L_PREFER_CANVAS = true;
	</script>

	<link rel="stylesheet" href="//netdna.bootstrapcdn.com/bootstrap/3.0.2/css/bootstrap.min.css">
    <link rel="stylesheet" href="//cdn.leafletjs.com/leaflet-0.6.4/leaflet.css" />
	 <!--[if lte IE 8]>
		 <link rel="stylesheet" href="//cdn.leafletjs.com/leaflet-0.6.4/leaflet.ie.css" />
	 <![endif]-->
    <script src="//cdn.leafletjs.com/leaflet-0.6.4/leaflet.js"></script>
    <!--<script type="text/javascript" src="//mbostock.github.com/d3/d3.js?1.29.1"></script>-->
    <style type="text/css">

		html, body, #map {
		  width: 100%;
		  height: 100%;
		  margin: 0;
		  padding: 0;
		}

    </style>
  </head>
  <body>
  
    <div class="navbar navbar-inverse navbar-fixed-top" role="navigation">
      <div class="container">
        <div class="navbar-header">
          <a class="navbar-brand" href="#">Presidential elections 2013, 2nd round</a>
        </div>
      </div>
    </div>
    
    <div style="position:fixed;top:50px;z-index:1000;">
      <div class="alert alert-info" >The <strong>size</strong> of bubbles represents number of voters, the <strong>color</strong> represents the winner and the <strong>oppacity</strong> the margin of victory. <span style="color:#b00;font-weight:800">Bachelet</span> vs. <span style="color:#00b;font-weight:800">Matthei</span></div>
    </div>
    <div id="map" style="margin-top:40px;"></div>
    <script type="text/javascript">


		// Create the map
		var map = L.map('map',{zoomControl: false}).setView([-33.5,-70.5], 5);
		map.addControl( L.control.zoom({position: 'topright'}) );
		
		// add an OpenStreetMap tile layer
		// also see //wiki.openstreetmap.org/wiki/Tiles
		//L.tileLayer('//{s}.tile.osm.org/{z}/{x}/{y}.png', {
		L.tileLayer('//{s}.www.toolserver.org/tiles/bw-mapnik/{z}/{x}/{y}.png', {
			attribution: '&copy; <a href="//osm.org/copyright">OpenStreetMap</a> contributors'
		}).addTo(map);
		
		//add circles
		//$.getJSON( "cz_president_2013_both_2_ring.json", function (data) {
		$.getJSON( "cl_2013_2_ring_bachelet_matthei.json", function (data) {
		    $.each(data.features, function (index, value) {
				circle = L.circle([value.coordinates[1], value.coordinates[0]], Math.sqrt((parseInt(value.population.p6)+parseInt(value.population.p9))*2750), {
					color: class2color(value.classname),
					fillColor: class2color(value.classname),
					fillOpacity: 2*(Math.max(parseInt(value.population.p6),parseInt(value.population.p9)) / (parseInt(value.population.p6)+parseInt(value.population.p9)) - 0.45),
					weight: 0.1,
					className: value.classname,	//this does not seem to work, so hacked by class2color
				}).addTo(map);
				
				perc1 = Math.round(Math.max(parseInt(value.population.p6),parseInt(value.population.p9)) / (parseInt(value.population.p6)+parseInt(value.population.p9)) * 100);
				perc2 = 100 - perc1;
				
				circle.bindPopup(value.name + "<br>" + value.winner + " won " + perc1 + " % vs. " + perc2 + " % <br>(" +Math.max(value.population.p6,value.population.p9) + " : " + Math.min(value.population.p6,value.population.p9) + " votes)");
		  });
		});
		
		function class2color(className) {
		  if (className == 'bachelet') return "#f00";
		  if (className == 'matthei') return "#00f";
		  else return "#000";
		}
	</script>
  </body>
</html>

geocode.php

<?php
//$addresses = array($_GET['address']);
$language = (isset($_GET['language']) ? $_GET['language'] : 'es') ;
$region = (isset($_GET['region']) ? $_GET['region'] : 'cl') ;

$fin = fopen("address.csv","r");
$fout = fopen("geocoded.csv","w+");


while (($row = fgetcsv($fin, 1000, "\t")) !== FALSE) {

   $address = $row[0];
   //print_r($row);die();

	ob_implicit_flush(true);
	ob_end_flush();


	  $url = "http://maps.googleapis.com/maps/api/geocode/json?address=". urlencode($address ) . "&sensor=false&region={$region}&language={$language}";
	  $obj = json_decode(grabber($url));
	  //$okres_obj =  g_find_type($html,'administrative_area_level_2');
	  $kraj_obj =  g_find_type($obj,'administrative_area_level_1');
	  //$country_obj = g_find_type($html,'country');
	  //echo $address . ";" . $country_obj->long_name . ";" . $kraj_obj->long_name . ";" . $okres_obj->long_name . "<br/>\n";
	  
	  $out = $row;
	  $out['lat'] = $obj->results[0]->geometry->location->lat;
	  $out['lng'] = $obj->results[0]->geometry->location->lng;
	  $out['check'] = $kraj_obj->long_name;
	  
	  fputcsv($fout,$out);
	  echo $row[0]."<br/>\t";
	  ob_flush();flush();
	  sleep(1);
//die();

}

/**
* extracts given area type from google geocoing response object
* @object the google response object (json_decode from json)
* @param type type of the area
* 
* @return object containg of the (1st) type in the object
*
* example: g_find_type('locality')
*/
function g_find_type($object, $type) {
  $array = $object->results[0]->address_components;
  foreach ((array) $array as $component) {
    foreach ((array) $component->types as $t) {
      if ($t == $type) {
        return $component;
      }
    }
  }
}

/**
* curl downloader, with possible options
* @return html
* example:
* grabber('http://example.com',array(CURLOPT_TIMEOUT,180));
*/
function grabber($url,$options = array())
{
    $ch = curl_init ();
    curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1);
    curl_setopt ($ch, CURLOPT_URL, $url);
    curl_setopt ($ch, CURLOPT_TIMEOUT, 120);
    if (count($options) > 0) {
      foreach($options as $option) {
        curl_setopt ($ch, $option[0], $option[1]);
      }
    }
    curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
    curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
    curl_setopt($ch, CURLOPT_HEADER, 0); //this option is important here!!
    $out = curl_exec($ch);
    curl_close ($ch);
    return $out;
}

scraper.py

# -*- coding: utf-8 -*-
# step 1

# csv
import csv
import requests
from lxml import html



#recursion 
def recursion(item,info):
  outfile = open('workfile.csv', 'ab')
  outwriter = csv.writer(outfile, quoting=csv.QUOTE_NONNUMERIC)
  print item
  payload = {'codigo': item[1], 'nivel': item[2], 'division': 'GEOGRAFICA', 'codigoPadre': '', 'codigoCircunscripcion': '', 'codigoColegio': '' }
  url = 'http://www.eleccionservel.cl/ELECCIONES2013/segundaVuelta'
  page = requests.post(url, data=payload)
  domtree = html.fromstring(page.text)
  
  # if it is M, get values, otherwise recursion
  if (item[2] == 'E'):  #there are problems with Colegios and Mesas, needs to specify other params in payload
    values = []
    values.append(domtree.xpath('//table[@class="table_contenido"][2]/tr[2]/td[1]/text()')[0].encode('utf-8','ignore').replace("\xc2\xa0","").strip())
    values.append(domtree.xpath('//table[@class="table_contenido"][2]/tr[2]/td[3]/text()')[0].encode('utf-8','ignore').replace(".","").replace("\xc2\xa0","").strip())
    values.append(domtree.xpath('//table[@class="table_contenido"][2]/tr[3]/td[1]/text()')[0].encode('utf-8','ignore').replace("\xc2\xa0","").strip())
    values.append(domtree.xpath('//table[@class="table_contenido"][2]/tr[3]/td[3]/text()')[0].encode('utf-8','ignore').replace(".","").replace("\xc2\xa0","").strip())
    values.append(domtree.xpath('//table[@class="table_contenido"][2]/tr[5]/td[1]/text()')[0].encode('utf-8','ignore').replace("\xc2\xa0","").strip())
    values.append(domtree.xpath('//table[@class="table_contenido"][2]/tr[5]/td[3]/text()')[0].encode('utf-8','ignore').replace(".","").replace("\xc2\xa0","").strip())
    values.append(domtree.xpath('//table[@class="table_contenido"][2]/tr[6]/td[1]/text()')[0].encode('utf-8','ignore').replace("\xc2\xa0","").strip())
    values.append(domtree.xpath('//table[@class="table_contenido"][2]/tr[6]/td[3]/text()')[0].encode('utf-8','ignore').replace(".","").replace("\xc2\xa0","").strip())
    values.append(domtree.xpath('//table[@class="table_contenido"][2]/tr[7]/td[1]/text()')[0].encode('utf-8','ignore').replace("\xc2\xa0","").strip())
    values.append(domtree.xpath('//table[@class="table_contenido"][2]/tr[7]/td[3]/text()')[0].encode('utf-8','ignore').replace(".","").replace("\xc2\xa0","").strip())
    values.append(domtree.xpath('//table[@class="table_contenido"][2]/tr[8]/td[1]/text()')[0].encode('utf-8','ignore').replace("\xc2\xa0","").strip())
    values.append( domtree.xpath('//table[@class="table_contenido"][2]/tr[8]/td[3]/text()')[0].encode('utf-8','ignore').replace(".","").replace("\xc2\xa0","").strip())
    out = list(info)+values
    #raise Exception('diePy')
    outwriter.writerow(out)
    outfile.close();
    
  else:
    names = domtree.xpath('//div[@id="tabs-1"]/ul/li/a/text()');
    hrefs = domtree.xpath('//div[@id="tabs-1"]/ul/li/a/@href');
    #print page.text
    #print page.encoding
    f = open('workfile.html', 'wb')
    f.write (page.text.encode('utf-8','ignore'))
    if (len(names) == 0):
      print names
      print hrefs
      print page.text
    #print hrefs
    # del vision pais
    del names[0]
    del hrefs[0]
    # 
    names = [x.encode('utf-8').strip() for x in names]
    nameshrefs = zip(names,hrefs)
    nameshrefs = [x for x in nameshrefs if not (x[1] == '#')]
    #print nameshrefs
    #raise Exception('diePy0')    
    helplist = zip(*nameshrefs)
    names = helplist[0]
    hrefs = helplist[1]
    hrefs_li = [x.split("'") for x in hrefs]
    codigos = [x[1] for x in hrefs_li]
    nivels = [x[3] for x in hrefs_li]
    rows = zip(names,codigos,nivels)
    #print rows
    #raise Exception('diePy0') 
    #print 'in:'
    #print rows 
    for row in rows:
      recursion(row,info+row)
      #print 'out:'
      #print rows
    #return rows

levels = {
  1: 'R',    #region
  2: 'O',    #provincia
  3: 'C',    #comuna
  4: 'E',    #circ. electoral
  5: 'COL',    #colegio
  6: 'M'    #mesa
}

f = open('workfile.html', 'w')
outfile = open('workfile.csv', 'wb')
outwriter = csv.writer(outfile, quoting=csv.QUOTE_NONNUMERIC)

# main file
url = 'http://www.eleccionservel.cl/ELECCIONES2013/vistaNavegacionSegundaVuelta'
mainpage = requests.get(url)
domtree = html.fromstring(mainpage.text)

names = domtree.xpath('//div[@id="tabs-1"]/ul/li/a/text()');
hrefs = domtree.xpath('//div[@id="tabs-1"]/ul/li/a/@href');

del names[0]
del hrefs[0]
names = [x.encode('utf-8').strip() for x in names]

hrefs_li = [x.split("'") for x in hrefs]

codigos = [x[1] for x in hrefs_li]
nivels = [x[3] for x in hrefs_li]

main = zip(names,codigos,nivels)
tree = {}

for item in main:
  tree.update({item[1]:item})

#print(tree)

#raise Exception('diePy1')

#recursion
for key in tree:
  recursion(tree[key],tree[key])
    
  

#for item in main:
#  outwriter.writerow(item)  

#outwriter.writerow(div)
outfile.close()

#payload = {'codigo': '1205', 'nivel': 'COL', 'division': 'GEOGRAFICA', 'codigoPadre': '1514', 'codigoCircunscripcion': '1514', 'codigoColegio': '' }
#r = requests.post("http://www.eleccionservel.cl/ELECCIONES2013/segundaVuelta", data=payload)
#print r.encoding
#f.write (r.text.encode('utf-8','ignore'))
#f.close()