block by timelyportfolio 3616869996703d48a981

R tables to networks for visualization

Full Screen

added as d3_table() in d3r


Although very handy and useful, I think most new R developers do not even know that tables exist. They are a little tricky to use, but they offer some nice functionality. These tables become even more useful when we can visualize them as networks. This little bit of code is a rough first attempt at converting tables to network nodes and edges.

live example

code.R

---
title: "R Tables to Network"
date: "`r format(Sys.time(), '%B %d, %Y')`"
output:
  html_document:
    theme: ~
    toc: true
    mathjax: null
---

# Experiments with tables to network conversion

Get all the libraries.

```{r}
library(pipeR)
library(igraph)

# devtools::install_github("christophergandrud/networkD3")
library(networkD3)
# devtools::install_github("dataknowledge/visNetwork")
library(visNetwork)
```


### Conversion Function

Sorry for all all the pipes \ `%>>%` and difficult-to-read code, but here is the function to convert a table to a network of nodes and edges.  Once we have the table in network form, we can use network packages and `htmlwidgets` to visualize them.

```{r}
#### now try to make a function for easy conversion from tables
#    to other structures
tableConv <- function( tB, vars = NULL, agg = "Freq" ) {
  if (!require(pipeR)) {
    stop("function requires pipeR; please install it")    
  }
  
  if(is.null(vars)){
    vars = names(dimnames(tB)) 
  }
  
  dimnames(tB)[vars] %>>%
    unname %>>%
    unlist %>>%
    unique -> nodes
  
  #  add name of table as root in nodes
  nodes[length(nodes)+1]= as.character(substitute(tB))  
  
  
  links <- if(length(vars) == 1){
    #.
    data.frame()
  } else {
    {if(!is.null(vars)){
      #assume vars in order of source, target
      sapply(1:(length(vars)-1),function(v){c(vars[v],vars[v+1])})
    } else {
      #get all combinations
      combn(v,2)
    }} %>>%
      t %>>%
      data.frame(stringsAsFactors=F) %>>%
      apply(
        MARGIN=1
        ,function(c){
          paste0(as.vector(c),collapse="+")
        }
      ) %>>%
      lapply(
        function(f){
          xtabs(paste0(agg,"~",f),tB) %>>%
            data.frame %>>%
            structure(names = c("source","target","weight"))
        }
      ) %>>%
      (do.call(rbind,.))
  }
  
  links %>>%
    (
      if (nrow(.) == 0) {
        .
      } else {
        lapply(
          1:ncol(.)
          ,function(x){
            if (is.factor(.[,x])){
              as.character(.[,x])
            } else .[,x]
          }
        )  %>>%
          data.frame(stringsAsFactors = F) %>>%
          structure(names = c("source","target","weight")) 
      }
    ) %>>%
    (
      rbind(
        .
        , structure(
          data.frame(tail(nodes,1),xtabs(paste0(agg,"~",vars[1]),tB))
          ,names= c("source","target","weight")
        )
      )
    ) -> links_transformed
  
  
  links_transformed[,c(1,2)] <- lapply(
    links_transformed[,c(1,2)],
    function(x){
      match(as.character(x),nodes)-1
    }
  )
  
  # try to get size for nodes
  nodes <- data.frame(name=as.character(nodes),stringsAsFactors = F)
  
  nodes <- lapply(
    vars
    ,function(v){
      xtabs(paste0(agg,"~",v),tB)
    }
  ) %>>%
    unlist %>>%
    (
      data.frame( 
        name = names(.)
        ,weight = as.vector(.)
        ,stringsAsFactors = F
      )
    ) %>>%
    (
      rbind(
        .
        ,data.frame(
          name = tail(nodes,1)$name
          , weight = sum(tB)
        )  
      )
    ) %>>%
    merge( nodes  ) %>>%
    (
      .[match(nodes$name,.$name),]
    )
  
  rownames(nodes) <- sort(as.numeric(rownames(nodes)))
  
  return(
    list(
      nodes = nodes
      ,links =  links_transformed
    )
  )
}
```


### Titanic as a d3.js Sankey

```{r}
tableConv(Titanic,vars=c("Survived","Age","Class")) %>>%
  (sankeyNetwork(
    Links = .$links
    , Nodes = .$nodes
    , Source= "source"
    , Target = "target"
    , Value = "weight"
    , NodeID = "name"
  ))
```

### UCBAdmissions as a d3.js Sankey
```{r}
tableConv(UCBAdmissions) %>>%
  (sankeyNetwork(
    Links = .$links
    , Nodes = .$nodes
    , Source= "source"
    , Target = "target"
    , Value = "weight"
    , NodeID = "name"
  ))
```

### Titanic as igraph
```{r}
tableConv(Titanic,vars=c("Sex","Survived")) %>>%
{ 
  ig =(as.matrix(.$links[,1:2]) + 1) %>>% graph.edgelist(directed=F)
  E(ig)$weight <- .$links[,3]
  V(ig)$name <- .$nodes[,1] %>>% t %>>% as.character
  V(ig)$weight <- .$nodes$weight
  ig
} %>>%
  (plot.igraph(
    .
    #,layout=layout.circle
    #,layout=layout.grid
    ,layout=layout.spring
    #,layout=layout.fruchterman.reingold.grid
    , vertex.size = V(.)$weight/2000 * 30
    , edge.color="gray"
    , edge.width=E(.)$weight/2000 * 30
    , vertex.label = V(.)$name
  ))
```

### Titanic in visNetwork
```{r}
tableConv( Titanic, c("Survived","Age") ) %>>%
  (
    visNetwork(
      nodes = data.frame(
        id = as.numeric(rownames(.$nodes))-1
        ,label = .$nodes$name
        ,value = .$nodes$weight
      )
      ,edges = data.frame(
        from = .$links$source
        ,to = .$links$target
        ,value = .$links$weight
      )
    )
  )
```