block by nitaku 8248112

NCBI taxonomy fractal treemap

Full Screen

NCBI

index.js

// Generated by CoffeeScript 1.10.0
(function() {
  window.main = function() {

    /* obtain a reference to the SVG */
    var vis;
    return vis = d3.select('svg');
  };

}).call(this);

index.html

<!DOCTYPE html>
<html>
    <head>
        <meta charset="utf-8">
        <title>UNTITLED</title>
        <link type="text/css" href="index.css" rel="stylesheet"/>
        <script src="//d3js.org/d3.v3.min.js"></script>
        <script src="index.js"></script>
    </head>
    <body onload="main()">
        <svg width="960" height="500">
        </svg>
    </body>
</html>

0.json

{
"type": "FeatureCollection",
                                                                                
"features": [

]
}

index.sass

division.dmp

0	|	BCT	|	Bacteria	|		|
1	|	INV	|	Invertebrates	|		|
2	|	MAM	|	Mammals	|		|
3	|	PHG	|	Phages	|		|
4	|	PLN	|	Plants	|		|
5	|	PRI	|	Primates	|		|
6	|	ROD	|	Rodents	|		|
7	|	SYN	|	Synthetic	|		|
8	|	UNA	|	Unassigned	|	No species nodes should inherit this division assignment	|
9	|	VRL	|	Viruses	|		|
10	|	VRT	|	Vertebrates	|		|
11	|	ENV	|	Environmental samples	|	Anonymous sequences cloned directly from the environment	|

err.txt

Traceback (most recent call last):
  File "run.py", line 3, in <module>
    gosperify('/data/_workspace/hexes.wkt.csv', 'test')
  File "/var/www/webvis/examples/ncbi_treemap/gosper_regions.py", line 102, in gosperify
    'name': tree[path[-1]]['name']
IndexError: tuple index out of range

gc.prt

--**************************************************************************
--  This is the NCBI genetic code table
--  Initial base data set from Andrzej Elzanowski while at PIR International
--  Addition of Eubacterial and Alternative Yeast by J.Ostell at NCBI
--  Base 1-3 of each codon have been added as comments to facilitate
--    readability at the suggestion of Peter Rice, EMBL
--  Later additions by Taxonomy Group staff at NCBI
--
--  Version 3.9
--     Code 14 differs from code 9 only by translating UAA to Tyr rather than
--     STOP.  A recent study (Telford et al, 2000) has found no evidence that
--     the codon UAA codes for Tyr in the flatworms, but other opinions exist.
--     There are very few GenBank records that are translated with code 14,
--     but a test translation shows that retranslating these records with code
--     9 can cause premature terminations.  Therefore, GenBank will maintain
--     code 14 until further information becomes available.
--
--  Version 3.8
--     Added GTG start to Echinoderm mitochondrial code, code 9
--
--  Version 3.7
--     Added code 23 Thraustochytrium mitochondrial code
--        formerly OGMP code 93
--        submitted by Gertraude Berger, Ph.D.
--
--  Version 3.6
--     Added code 22 TAG-Leu, TCA-stop
--        found in mitochondrial DNA of Scenedesmus obliquus
--        submitted by Gertraude Berger, Ph.D.
--        Organelle Genome Megasequencing Program, Univ Montreal
--
--  Version 3.5
--     Added code 21, Trematode Mitochondrial
--       (as deduced from: Garey & Wolstenholme,1989; Ohama et al, 1990)
--     Added code 16, Chlorophycean Mitochondrial
--       (TAG can translated to Leucine instaed to STOP in chlorophyceans
--        and fungi)
--
--  Version 3.4
--     Added CTG,TTG as allowed alternate start codons in Standard code.
--        Prats et al. 1989, Hann et al. 1992
--
--  Version 3.3 - 10/13/95
--     Added alternate intiation codon ATC to code 5
--        based on complete mitochondrial genome of honeybee
--        Crozier and Crozier (1993)
--
--  Version 3.2 - 6/24/95
--  Code       Comments
--   10        Alternative Ciliate Macronuclear renamed to Euplotid Macro...
--   15        Bleharisma Macro.. code added
--    5        Invertebrate Mito.. GTG allowed as alternate initiator
--   11        Eubacterial renamed to Bacterial as most alternate starts
--               have been found in Achea
--
--
--  Version 3.1 - 1995
--  Updated as per Andrzej Elzanowski at NCBI
--     Complete documentation in NCBI toolkit documentation
--  Note: 2 genetic codes have been deleted
--
--   Old id   Use id     - Notes
--
--   id 7      id 4      - Kinetoplast code now merged in code id 4
--   id 8      id 1      - all plant chloroplast differences due to RNA edit
--
--*************************************************************************

Genetic-code-table ::= {
 {
  name "Standard" ,
  name "SGC0" ,
  id 1 ,
  ncbieaa  "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
  sncbieaa "---M---------------M---------------M----------------------------"
  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
 },
 {
  name "Vertebrate Mitochondrial" ,
  name "SGC1" ,
  id 2 ,
  ncbieaa  "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG",
  sncbieaa "--------------------------------MMMM---------------M------------"
  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
 },
 {
  name "Yeast Mitochondrial" ,
  name "SGC2" ,
  id 3 ,
  ncbieaa  "FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
  sncbieaa "----------------------------------MM----------------------------"
  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
 },
 {
    name "Mold Mitochondrial; Protozoan Mitochondrial; Coelenterate
 Mitochondrial; Mycoplasma; Spiroplasma" ,
  name "SGC3" ,
  id 4 ,
  ncbieaa  "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
  sncbieaa "--MM---------------M------------MMMM---------------M------------"
  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
 },
 {
  name "Invertebrate Mitochondrial" ,
  name "SGC4" ,
  id 5 ,
  ncbieaa  "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG",
  sncbieaa "---M----------------------------MMMM---------------M------------"
  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
 },
 {
  name "Ciliate Nuclear; Dasycladacean Nuclear; Hexamita Nuclear" ,
  name "SGC5" ,
  id 6 ,
  ncbieaa  "FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
  sncbieaa "-----------------------------------M----------------------------"
  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
 },
 {
  name "Echinoderm Mitochondrial; Flatworm Mitochondrial" ,
  name "SGC8" ,
  id 9 ,
  ncbieaa  "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG",
  sncbieaa "-----------------------------------M---------------M------------"
  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
 },
 {
  name "Euplotid Nuclear" ,
  name "SGC9" ,
  id 10 ,
  ncbieaa  "FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
  sncbieaa "-----------------------------------M----------------------------"
  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
 },
 {
  name "Bacterial, Archaeal and Plant Plastid" ,
  id 11 ,
  ncbieaa  "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
  sncbieaa "---M---------------M------------MMMM---------------M------------"
  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
 },
 {
  name "Alternative Yeast Nuclear" ,
  id 12 ,
  ncbieaa  "FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
  sncbieaa "-------------------M---------------M----------------------------"
  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
 },
 {
  name "Ascidian Mitochondrial" ,
  id 13 ,
  ncbieaa  "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG",
  sncbieaa "---M------------------------------MM---------------M------------"
  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
 },
 {
  name "Alternative Flatworm Mitochondrial" ,
  id 14 ,
  ncbieaa  "FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG",
  sncbieaa "-----------------------------------M----------------------------"
  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
 } ,
 {
  name "Blepharisma Macronuclear" ,
  id 15 ,
  ncbieaa  "FFLLSSSSYY*QCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
  sncbieaa "-----------------------------------M----------------------------"
  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
 } ,
 {
  name "Chlorophycean Mitochondrial" ,
  id 16 ,
  ncbieaa  "FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
  sncbieaa "-----------------------------------M----------------------------"
  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
 } ,
 {
  name "Trematode Mitochondrial" ,
  id 21 ,
  ncbieaa  "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG",
  sncbieaa "-----------------------------------M---------------M------------"
  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
 } ,
 {
  name "Scenedesmus obliquus Mitochondrial" ,
  id 22 ,
  ncbieaa  "FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
  sncbieaa "-----------------------------------M----------------------------"
  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
 } ,
 {
  name "Thraustochytrium Mitochondrial" ,
  id 23 ,
  ncbieaa  "FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
  sncbieaa "--------------------------------M--M---------------M------------"
  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
 } ,
 {
  name "Pterobranchia Mitochondrial" ,
  id 24 ,
  ncbieaa  "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG",
  sncbieaa "---M---------------M---------------M---------------M------------"
  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
 } ,
 {
  name "Candidate Division SR1 and Gracilibacteria" ,
  id 25 ,
  ncbieaa  "FFLLSSSSYY**CCGWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
  sncbieaa "---M-------------------------------M---------------M------------"
  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
 }
}

gencode.dmp

0	|		|	Unspecified	|	                                                                 	|	                                                                 	|
1	|		|	Standard	|	FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG 	|	---M---------------M---------------M---------------------------- 	|
2	|		|	Vertebrate Mitochondrial	|	FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG 	|	--------------------------------MMMM---------------M------------ 	|
3	|		|	Yeast Mitochondrial	|	FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG 	|	----------------------------------MM---------------------------- 	|
4	|		|	Mold Mitochondrial; Protozoan Mitochondrial; Coelenterate Mitochondrial; Mycoplasma; Spiroplasma	|	FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG 	|	--MM---------------M------------MMMM---------------M------------ 	|
5	|		|	Invertebrate Mitochondrial	|	FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG 	|	---M----------------------------MMMM---------------M------------ 	|
6	|		|	Ciliate Nuclear; Dasycladacean Nuclear; Hexamita Nuclear	|	FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG 	|	-----------------------------------M---------------------------- 	|
9	|		|	Echinoderm Mitochondrial; Flatworm Mitochondrial	|	FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG 	|	-----------------------------------M---------------M------------ 	|
10	|		|	Euplotid Nuclear	|	FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG 	|	-----------------------------------M---------------------------- 	|
11	|		|	Bacterial, Archaeal and Plant Plastid	|	FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG 	|	---M---------------M------------MMMM---------------M------------ 	|
12	|		|	Alternative Yeast Nuclear	|	FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG 	|	-------------------M---------------M---------------------------- 	|
13	|		|	Ascidian Mitochondrial	|	FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG 	|	-----------------------------------M---------------------------- 	|
14	|		|	Alternative Flatworm Mitochondrial	|	FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG 	|	-----------------------------------M---------------------------- 	|
15	|		|	Blepharisma Macronuclear	|	FFLLSSSSYY*QCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG 	|	-----------------------------------M---------------------------- 	|
16	|		|	Chlorophycean Mitochondrial	|	FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG 	|	-----------------------------------M---------------------------- 	|
21	|		|	Trematode Mitochondrial	|	FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG 	|	-----------------------------------M---------------M------------ 	|
22	|		|	Scenedesmus obliquus mitochondrial	|	FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG 	|	-----------------------------------M---------------------------- 	|
23	|		|	Thraustochytrium mitochondrial code	|	FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG 	|	--------------------------------M--M---------------M------------ 	|
24	|		|	Pterobranchia Mitochondrial	|	FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG 	|	---M---------------M---------------M---------------------------- 	|
25	|		|	Candidate Division SR1 and Gracilibacteria	|	FFLLSSSSYY**CCGWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG 	|	---M---------------M---------------M---------------------------- 	|

gosper_regions.py

from __future__ import print_function
from itertools import izip
import csv
import shapely.wkt
from fiona import collection
from shapely.geometry import mapping
import re
import os

def gosperify(hexes_path, output_dir_path):
    tree = {}

    print('Loading names...')

    with open('names.dmp', 'rb') as names_file:
        names_reader = csv.reader(names_file, delimiter='|', lineterminator='\t|\n')
        
        for names_row in names_reader:
            id = int(names_row[0])
            name = names_row[1].strip()
            
            # names beyond the first are discarded
            if id not in tree:
                tree[id] = {
                    'children': [],
                    'name': name,
                    'id': id
                }
            
    print('Loading nodes...')

    with open('nodes.dmp', 'rb') as nodes_file:
        nodes_reader = csv.reader(nodes_file, delimiter='|', lineterminator='\t|\n')
        
        for node_row in nodes_reader:
            id = int(node_row[0])
            parent = int(node_row[1])
            
            # avoid trivial cycles (for example, the root node is the parent of itself)
            if id != parent:
                tree[parent]['children'].append(tree[id])
                
    print('Walking the tree to fetch leaves...')

    leaves = []

    def walk(node, depth=0, path=()):
        node['path'] = path+(node['id'],)
        
        if len(node['children']) == 0:
            leaves.append(node)
            
        for child in node['children']:
            walk(child, depth+1, node['path'])
            
    walk(tree[1])
    
    print('Assigning hexes...')
    
    leaves_total = len(leaves)
    
    leaves_done = 0
    layers = {}
    
    with open(hexes_path, 'rb') as hexes_file:
        hexes_reader = csv.reader(hexes_file, delimiter=';', quotechar='#')
        
        for leaf, hexes_row in izip(leaves, hexes_reader):
            path = leaf['path']
            
            for i in xrange(min(len(path)+1,1)):
                subpath = path[:i+1]
                depth = len(subpath)
                
                hex = shapely.wkt.loads(hexes_row[0])
                
                if depth not in layers:
                    layers[depth] = {}
                    
                if subpath not in layers[depth]:
                    layers[depth][subpath] = hex
                else:
                    layers[depth][subpath] = layers[depth][subpath].union(hex)
                    
            # logging
            leaves_done += 1
            print('%d/%d leaves done' % (leaves_done, leaves_total), end='\r')
            
    print('Exporting geometries...')
    
    schema = {'geometry': 'Polygon', 'properties': {'path': 'str', 'name': 'str'}}
    
    if not os.path.exists(output_dir_path):
        os.makedirs(output_dir_path)
        
    for depth, regions in layers.items():
        with collection(output_dir_path+'/'+str(depth)+'.json', 'w', 'GeoJSON', schema) as output:
            for path, region in regions.items():
                output.write({
                    'properties': {
                        'path': '.'.join(map(lambda x: str(x), path)),
                        'name': tree[path[-1]]['name']
                    },
                    'geometry': mapping(region)
                })
            

gosper_regions.pyc

�
��Rc@s}ddlmZddlmZddlZddlZddlmZddl	m
Z
ddlZddlZd�Z
dS(i����(tprint_function(tizipN(t
collection(tmappingcs�i}td�tdd���}tj|dddd�}x\|D]T}t|d�}|d	j�}||krDigd
6|d6|d6||<qDqDWWdQXtd
�tdd��{}tj|dddd�}	xV|	D]N}
t|
d�}t|
d	�}||kr�||d
j||�q�q�WWdQXtd�g�dd%��fd���|d	�td�t��}d}
i}t|d��}tj|dddd�}x�t�|�D]�\}}|d}x�t	t
t|�d	d	��D]�}|| }t|�}tjj
|d�}||krNi||<n|||kro||||<q|||j|�|||<qW|
d	7}
td|
|fdd�q�WWdQXtd�idd6idd6dd6d6}tjj|�stj|�nx�|j�D]�\}}t|dt|�dd d!|��t}xj|j�D]\\}}|jiid"jtd#�|��d6||d$dd6d6t|�d6�q^WWdQXqWdS(&NsLoading names...sdata/names.dmptrbt	delimitert|tlineterminators	|
iitchildrentnametidsLoading nodes...sdata/nodes.dmps#Walking the tree to fetch leaves...csl||df|d<t|d�dkr;�j|�nx*|dD]}�||d|d�qFWdS(NR
tpathRii(tlentappend(tnodetdepthRtchild(tleavestwalk(s7/var/www/webvis/examples/ncbi_treemap/gosper_regions.pyR/s
sAssigning hexes...t;t	quotechart#Rs%d/%d leaves donetends
sExporting geometries...tPolygontgeometrytstrt
propertiest/s.jsontwtGeoJSONt.cSs
t|�S(N(R(tx((s7/var/www/webvis/examples/ncbi_treemap/gosper_regions.pyt<lambda>esi����((tprinttopentcsvtreadertinttstripR
RRtxrangetmintshapelytwkttloadstuniontosRtexiststmakedirstitemsRRtwritetjointmapR(t
hexes_pathtoutput_dir_pathttreet
names_filetnames_readert	names_rowR
R	t
nodes_filetnodes_readertnode_rowtparenttleaves_totaltleaves_donetlayerst
hexes_filethexes_readertleaft	hexes_rowRtitsubpathRthextschematregionstoutputtregion((RRs7/var/www/webvis/examples/ncbi_treemap/gosper_regions.pyt	gosperify
sn



&
	

&

#
$
"*	(t
__future__Rt	itertoolsRR#tshapely.wktR)tfionaRtshapely.geometryRtreR-RL(((s7/var/www/webvis/examples/ncbi_treemap/gosper_regions.pyt<module>s

index.coffee

window.main = () ->
    ### obtain a reference to the SVG ###
    vis = d3.select('svg')
    
    

readme.txt

*.dmp files are bcp-like dump from GenBank taxonomy database.

General information.
Field terminator is "\t|\t"
Row terminator is "\t|\n"

nodes.dmp file consists of taxonomy nodes. The description for each node includes the following
fields:
	tax_id					-- node id in GenBank taxonomy database
 	parent tax_id				-- parent node id in GenBank taxonomy database
 	rank					-- rank of this node (superkingdom, kingdom, ...) 
 	embl code				-- locus-name prefix; not unique
 	division id				-- see division.dmp file
 	inherited div flag  (1 or 0)		-- 1 if node inherits division from parent
 	genetic code id				-- see gencode.dmp file
 	inherited GC  flag  (1 or 0)		-- 1 if node inherits genetic code from parent
 	mitochondrial genetic code id		-- see gencode.dmp file
 	inherited MGC flag  (1 or 0)		-- 1 if node inherits mitochondrial gencode from parent
 	GenBank hidden flag (1 or 0)            -- 1 if name is suppressed in GenBank entry lineage
 	hidden subtree root flag (1 or 0)       -- 1 if this subtree has no sequence data yet
 	comments				-- free-text comments and citations

Taxonomy names file (names.dmp):
	tax_id					-- the id of node associated with this name
	name_txt				-- name itself
	unique name				-- the unique variant of this name if name not unique
	name class				-- (synonym, common name, ...)

Divisions file (division.dmp):
	division id				-- taxonomy database division id
	division cde				-- GenBank division code (three characters)
	division name				-- e.g. BCT, PLN, VRT, MAM, PRI...
	comments

Genetic codes file:
	genetic code id				-- GenBank genetic code id
	abbreviation				-- genetic code name abbreviation
	name					-- genetic code name
	cde					-- translation table for this genetic code
	starts					-- start codons for this genetic code

Deleted nodes file (delnodes.dmp):
	tax_id					-- deleted node id

Merged nodes file (merged.dmp):
	old_tax_id                              -- id of nodes which has been merged
	new_tax_id                              -- id of nodes which is result of merging

Citations file (citations.dmp):
	cit_id					-- the unique id of citation
	cit_key					-- citation key
	pubmed_id				-- unique id in PubMed database (0 if not in PubMed)
	medline_id				-- unique id in MedLine database (0 if not in MedLine)
	url					-- URL associated with citation
	text					-- any text (usually article name and authors).
						-- The following characters are escaped in this text by a backslash:
						-- newline (appear as "\n"),
						-- tab character ("\t"),
						-- double quotes ('\"'),
						-- backslash character ("\\").
	taxid_list				-- list of node ids separated by a single space

run.py

from gosper_regions import gosperify

gosperify('/data/_workspace/hexes.wkt.csv', 'test')