block by timelyportfolio 0dba88cd42f588c7fa98

start to work on combining d3 venn and sankey with R tables and igraph

Full Screen

index.html

<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8"/>
<script src="//d3js.org/d3.v3.js"></script>
<script src="//benfred.github.io/venn.js/venn.js"></script>

</head>
<body>
<div class="simple_example"></div>
<script>
// define sets and set set intersections
var sets = [{"label":"1st","size":325},{"label":"2nd","size":285},{"label":"3rd","size":706},{"label":"Crew","size":885},{"label":"Male","size":1731}],
overlaps = [{"sets":[0,4],"size":180},{"sets":[1,4],"size":179},{"sets":[2,4],"size":510},{"sets":[3,4],"size":862},{"sets":[0,2],"size":0},{"sets":[1,2],"size":0},{"sets":[0,1],"size":0},{"sets":[0,2],"size":0},{"sets":[0,3],"size":0},{"sets":[2,3],"size":0},{"sets":[0,1],"size":0},{"sets":[2,3],"size":0},{"sets":[1,3],"size":0},{"sets":[1,2],"size":0},{"sets":[0,3],"size":0},{"sets":[1,3],"size":0}];
// get positions for each set
sets = venn.venn(sets, overlaps);
// draw the diagram in the simple_example div
venn.drawD3Diagram(d3.select(".simple_example"), sets, 300, 300);
</script>
</body>
</html>

code.R

library(dplyr)
library(pipeR)
library(htmltools)

Titanic %>>%
  data.frame %>>%
  ( .[,c(1,2,3,5)] ) %>>%
  #tbl_df %>>%
  (df ~ 
     lapply(
       names(df)[-ncol(df)]
       ,function(c){
         xtabs(as.formula(paste0("Freq~",c)),data=df) %>>%
           data.frame %>>%
           (data.frame(
             variable = colnames(.)[1]
             ,label = as.vector(.[,1])
             ,size = as.vector(.[,2])
           ))
       }
     )
  ) %>>%
  lapply(function(l){l%>>%filter(label!="Female")}) %>>%
  ( do.call(rbind,.) )%>>%
  ( ~ dfSet) %>>%
  (jsonlite::toJSON(.[,c("label","size")])) -> jsonSet


levels(dfSet$variable) %>>%
  (vars~
    (xtabs(paste0("Freq~",paste(vars,collapse="+")),Titanic)) %>>%
    data.frame %>>%
    (rbind(
      .,
      data.frame(anti_join(
        structure(
          lapply(
            vars
            ,function(v){
              dfSet$label[(dfSet$variable==v)] %>>% (data.frame(.,.)) %>>%
                expand.grid
              
            }
          ) %>>% (do.call(rbind,.))
          ,names=vars
        ),.
      ),Freq=0)
    ))
  ) %>>%
  (
    apply(
      .
      ,MARGIN=1
      ,function(x){
        if(length(which(is.element(dfSet$label,x[-length(x)])))>1){
          list(
            sets = which(is.element(dfSet$label,x[-length(x)])) - 1
            , size = ifelse(is.na(as.numeric(tail(x,1))),0,as.numeric(tail(x,1)))
          )
        }
      }
    )
  ) %>>%
  ( .[which(!sapply(.,is.null))] ) %>>%
  unname %>>%
  jsonlite::toJSON(auto_unbox=T,null="null") -> jsonOverlap


tagList(
  tags$div(class = "simple_example")
  ,tags$script(sprintf('
// define sets and set set intersections
var sets = %s,
overlaps = %s;
// get positions for each set
sets = venn.venn(sets, overlaps);
// draw the diagram in the simple_example div
venn.drawD3Diagram(d3.select(".simple_example"), sets, 300, 300);
'
      ,jsonSet
      ,jsonOverlap
    ) %>>% HTML
  )
) %>>%
  attachDependencies(list(
    htmlDependency(
      name = "d3"
      ,version = "3.4"
      ,src = c("href" = "http://d3js.org/")
      ,script = "d3.v3.js"
    )
    ,htmlDependency(
      name = "venn"
      ,version = "0.1"
      ,src = c("href" = "http://benfred.github.io/venn.js")
      ,script = "venn.js"
    )
  )) %>>%
  html_print





# try something with igraph
# adjacency but actually edge list might be easier
Titanic %>>%
  dimnames %>>%
  unname %>>%
  unlist %>>%
  (nd ~
    matrix(ncol=length(nd),nrow=length(nd)) %>>%
    data.frame %>>%
    (structure( .,names = nd,row.names=nd ))
   )


library(igraph)
# try edge list
Titanic %>>%
  dimnames %>>%
  names %>>%
  combn(2) %>>%
  t %>>%
  data.frame(stringsAsFactors=F) %>>%
  #for sankey manually pick combinations
  (.[c(1,4,6),]) %>>%
  #(.[c(1),]) %>>%
  #(.[5,c(1)]) %>>%
  (~ unique(unlist(.)) -> variables ) %>>%
  (~ df ~
     Titanic %>>% dimnames %>>% (.[variables]) %>>% unname %>>% unlist %>>% unique -> nodes
  ) %>>%
  (
    if(length(.) == 1){
      #.
      data.frame()
    } else {
      apply(
        .
        ,MARGIN=1
        ,function(c){
          paste0(as.vector(c),collapse="+")
        }
      ) %>>%
      lapply(
        function(f){
          xtabs(paste0("Freq~",f),Titanic) %>>%
            data.frame %>>%
            structure(names = c("source","target","weight"))
        }
      ) %>>%
      (do.call(rbind,.))
    }
  ) -> elst -> links

ig = graph.edgelist(as.matrix(elst[,1:2]),directed = F)
E(ig)$weight = as.numeric(elst[,3])

plot.igraph(
  ig
  #,layout=layout.circle
  #,layout=layout.spring
  #,layout=layout.spring
  ,layout=layout.fruchterman.reingold.grid
  , edge.color="black"
  ,edge.width=E(ig)$weight/2000 * 30
)

library(networkD3)
#get links as node value/id instead of text
links %>>%
  (
    if (nrow(.) == 0) {
      .
    } else {
      lapply(
        1:ncol(.)
        ,function(x){
          if (is.factor(.[,x])){
            as.character(.[,x])
          } else .[,x]
        }
      )  %>>%
      data.frame(stringsAsFactors = F) %>>%
      structure(names = c("source","target","weight")) 
    }
  ) %>>%
  (
    rbind(
      .
      , structure(
        data.frame("Titanic",xtabs(paste0("Freq~",variables[1]),Titanic))
        ,names= c("source","target","weight")
      )
    )
  ) -> links_transformed

nodes[length(nodes)+1]="Titanic"

links_transformed[,c(1,2)] <- lapply(
  links_transformed[,c(1,2)],
  function(x){
    match(as.character(x),nodes)-1
  }
)


sankeyNetwork(
  Links = links_transformed
  , Nodes = data.frame(name=as.character(nodes),stringsAsFactors = F)
  , Source= "source"
  , Target = "target"
  , Value = "weight"
  , NodeID = "name"
)






#### now try to make a function for easy conversion from tables
#    to other structures
tableConv <- function( tB, vars = NULL, agg = "Freq" ) {
  if (!require(pipeR)) {
    stop("function requires pipeR; please install it")    
  }
  
  if(is.null(vars)){
    vars = names(dimnames(tB)) 
  }

  dimnames(tB)[vars] %>>%
     unname %>>%
     unlist %>>%
     unique -> nodes
  

  links <- if(length(vars) == 1){
    #.
    data.frame()
  } else {
    {if(!is.null(vars)){
      #assume vars in order of source, target
      sapply(1:(length(vars)-1),function(v){c(vars[v],vars[v+1])})
    } else {
      #get all combinations
      combn(v,2)
    }} %>>%
    t %>>%
    data.frame(stringsAsFactors=F) %>>%
    apply(
      MARGIN=1
      ,function(c){
        paste0(as.vector(c),collapse="+")
      }
    ) %>>%
    lapply(
      function(f){
        xtabs(paste0(agg,"~",f),tB) %>>%
          data.frame %>>%
          structure(names = c("source","target","weight"))
        }
    ) %>>%
    (do.call(rbind,.))
  }

  nodes[length(nodes)+1]= as.character(substitute(tB))  

  links %>>%
    (
      if (nrow(.) == 0) {
        .
      } else {
        lapply(
          1:ncol(.)
          ,function(x){
            if (is.factor(.[,x])){
              as.character(.[,x])
            } else .[,x]
          }
        )  %>>%
          data.frame(stringsAsFactors = F) %>>%
          structure(names = c("source","target","weight")) 
      }
    ) %>>%
    (
      rbind(
        .
        , structure(
          data.frame(tail(nodes,1),xtabs(paste0(agg,"~",vars[1]),tB))
          ,names= c("source","target","weight")
        )
      )
    ) -> links_transformed
  

    links_transformed[,c(1,2)] <- lapply(
      links_transformed[,c(1,2)],
      function(x){
        match(as.character(x),nodes)-1
      }
    )
  
    return(
      list(
        nodes = data.frame(name=as.character(nodes),stringsAsFactors = F)
        ,links =  links_transformed
      )
    )
}





tableConv(Titanic,vars=c("Survived","Class","Age")) %>>%
  (sankeyNetwork(
    Links = .$links
    , Nodes = .$nodes
    , Source= "source"
    , Target = "target"
    , Value = "weight"
    , NodeID = "name"
  ))

tableConv(Titanic,vars=c("Sex","Survived")) %>>%
  { 
    ig =(as.matrix(.$links[,1:2]) + 1) %>>% graph.edgelist(directed=T)
    E(ig)$weight <- .$links[,3]
    V(ig)$name <- .$nodes %>>% t %>>% as.character
    ig
  } %>>%
  (plot.igraph(
    .
    #,layout=layout.circle
    #,layout=layout.grid
    ,layout=layout.spring
    #,layout=layout.fruchterman.reingold.grid
    , edge.color="gray"
    , edge.width=E(.)$weight/2000 * 30
    , vertex.label = V(.)$name
  ))