7777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777
7777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777
7777777777777777777777777777777777777777777777777477777717777777777777777777777777777777777777777777
7777777777777777777777777777777227777777774534175527771552777777132177727777777777777777777777777777
7777777777777777777777771177775537777777777224313325274443145527165512567777777777777777777777777777
7777777777777777777777772532775422221777777777712322771432234177771113323715534777777777777777777777
7777777777777777777777771343471253557713477134154294777234355271447714722124217777777777777777777777
7777777777777777777777777124424333417715511455235210416477122775537771323455547777777777777777777777
7777777777777777777712173553134777772331245222771172802714115522411117567711122177777777777777777777
7777777777777777777775434222126715115547165574535351097265275323465317527772553125577777777777777777
7777777777777777777771455215276515572444436279111716081132771536122771033217221255377777777777777777
7777777777777777777777712214311662142135421360917772882711434139112710527237455224343531777777777777
7777777777777777777777243324339688502341154172606272882513341771057204422531724422334217777777777777
7777777777777777777723417151144114086177733324573843889435354177405021552433542325332777777777777777
7777777777777777777771221357714177740862712442242488894243441777685152124722233772435377777777777777
7777777777777777777723341442233569966888641754332788813571227716927735144723517777777777777777777777
7777777777777777777713411434172532224360888682777288522125311504777772505951777777777777777777777777
7777777777777777134442233145412132723772350809177080112252368855556600542777777777777777777777777777
7777777777777777714444142174537165214224422398043885136888654222222456621553177777777777777777777777
7777777777777777777243246426541243333342224508888880888621777777772553254712217777777777777777777777
7777777777777777773553273211126517437777777715888888861777777777777114242245353177777777777777777777
7777777777777777772217777777711714327777777777488888377777777777777723342244211777777777777777777777
7777777777777777777777777777777777777777777777788885777777777777777715317135577777777777777777777777
7777777777777777777777777777777777777777777777788882777777777777777777777772317777777777777777777777
7777777777777777777777777777777777777777777777788881777777777777777777777777777777777777777777777777
7777777777777777777777777777777777777777777777188881777777777777777777777777777777777777777777777777
7777777777777777777777777777777777777777777777188882777777777777777777777777777777777777777777777777
7777777777777777777777777777777777777777777777288883777777777777777777777777777777777777777777777777
7777777777777777777777777777777777777777777777388886777777777777777777777777777777777777777777777777
7777777777777777777777777777777777777777777777688888177777777777777777777777777777777777777777777777
7777777777777777777777777777777777777777777772888888617777777777777777777777777777777777777777777777
7777777777777777777777777777777777777777124440888888886322117777777777777777777777777777777777777777
7777777777777777777777777777777711222455668888888888880900000064177777777777777777177777777777777777
7777777777777777777777777771355606342217395468888880088932777123665553434177777774277777777777777777
7777777777777777777712112553272317777245277209886588230806669542177723471455333531777777777777777777
7777777777777777777771555277122245565947771601089758021660954112533177177774517777777777777777777777
7777777777777777777711777777777263277571222357288210804157145964727217777777277777777777777777777777
7777777777777777777777777777774677771412771037738974458427777164627777777777777777777777777777777777
7777777777777777777777777772459177772777719657728812575017777147163227777777777777777777777777777777
7777777777777777777777777771447777777777392137150813174054777777715112177777777777777777777777777777
7777777777777777777777777111777777777726477124448621776572517777771047777777777777777777777777777777
7777777777777777777777777777777777777501777747108277718577727777777252217777777777777777777777777777
7777777777777777777777777777777777776337777177980777264077777777777723177777777777777777777777777777
7777777777777777777777777777777777744227777774856177157337777777777777227777777777777777777777777777
7777777777777777777777777777777777757717777710822577247745177777777777771777777777777777777777777777
7777777777777777777777777777777777227777777155817577227772617777777777777777777777777777777777777777
7777777777777777777777777777777777117777777542817477717777267777777777777777777777777777777777777777
7777777777777777777777777777777777777777772417047177777777762777777777777777777777777777777777777777
7777777777777777777777777777777777777777771277897777777777732777777777777777777777777777777777777777
7777777777777777777777777777777777777777772777654777777777751777777777777777777777777777777777777777
7777777777777777777777777777777777777777777777375177777777117777777777777777777777777777777777777777
7777777777777777777777777777777777777777777777477377777777777777777777777777777777777777777777777777
7777777777777777777777777777777777777777777777777147777777777777777777777777777777777777777777777777
7777777777777777777777777777777777777777777777777747777777777777777777777777777777777777777777777777
7777777777777777777777777777777777777777777777777717777777777777777777777777777777777777777777777777
7777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777

This post is a brief exploration of the functionality of the data.tree package, which is used for working with data that has a hierarchical structure.

Here is my setup, with the packages that I’ve used:

Resources

To make this post, I’ve extracted the examples and bits of information that were most useful to me from the following two sources:

These resources are clear and thorough, and I couldn’t recommend them more highly.

Creating a data.tree from scratch

First, we create a data.tree structure from scratch.

This example comes from https://cran.r-project.org/web/packages/data.tree/vignettes/data.tree.html

Create the root node:

##   levelName
## 1 Acme Inc.

Create 3 children of the root node: (accounting, research and IT)

##      levelName     
## [1,] Acme Inc.     
## [2,]  ¦--Accounting
## [3,]  ¦--Research  
## [4,]  °--IT

Give the accounting node 2 children:

##      levelName                       
## [1,] Acme Inc.                       
## [2,]  ¦--Accounting                  
## [3,]  ¦   ¦--New Software            
## [4,]  ¦   °--New Accounting Standards
## [5,]  ¦--Research                    
## [6,]  °--IT

Give the Research node 2 children:

##      levelName                       
## [1,] Acme Inc.                       
## [2,]  ¦--Accounting                  
## [3,]  ¦   ¦--New Software            
## [4,]  ¦   °--New Accounting Standards
## [5,]  ¦--Research                    
## [6,]  ¦   ¦--New Product Line        
## [7,]  ¦   °--New Labs                
## [8,]  °--IT

Give the IT node 3 children:

##       levelName                       
##  [1,] Acme Inc.                       
##  [2,]  ¦--Accounting                  
##  [3,]  ¦   ¦--New Software            
##  [4,]  ¦   °--New Accounting Standards
##  [5,]  ¦--Research                    
##  [6,]  ¦   ¦--New Product Line        
##  [7,]  ¦   °--New Labs                
##  [8,]  °--IT                          
##  [9,]      ¦--Outsource               
## [10,]      ¦--Go agile                
## [11,]      °--Switch to R

Climbing the tree

We can consider only the IT branch of the tree:

##      levelName      
## [1,] IT             
## [2,]  ¦--Outsource  
## [3,]  ¦--Go agile   
## [4,]  °--Switch to R

We can consider only the ‘Switch to R’ branch of the IT branch of the tree:

##      levelName  
## [1,] Switch to R

The children nodes of a node can be accessed using the syntax .$children[[i]]:

##      levelName                   
## [1,] Accounting                  
## [2,]  ¦--New Software            
## [3,]  °--New Accounting Standards
##      levelName           
## [1,] Research            
## [2,]  ¦--New Product Line
## [3,]  °--New Labs
##                  levelName
## 1 New Accounting Standards

Now, we add values (cost and probability) to the leaves of the tree (the leaves are the terminal nodes):

##                           levelName    cost    p
## 1  Acme Inc.                             NA   NA
## 2   ¦--Accounting                        NA   NA
## 3   ¦   ¦--New Software             1000000 0.50
## 4   ¦   °--New Accounting Standards  500000 0.75
## 5   ¦--Research                          NA   NA
## 6   ¦   ¦--New Product Line         2000000 0.25
## 7   ¦   °--New Labs                  750000 0.90
## 8   °--IT                                NA   NA
## 9       ¦--Outsource                 400000 0.20
## 10      ¦--Go agile                  250000 0.05
## 11      °--Switch to R                50000 1.00

We can use a recursive function, applied to each node, to sum the cost for each node across all of it’s children:

##                           levelName    cost sum_cost
## 1  Acme Inc.                             NA  4950000
## 2   ¦--Accounting                        NA  1500000
## 3   ¦   ¦--New Software             1000000       NA
## 4   ¦   °--New Accounting Standards  500000       NA
## 5   ¦--Research                          NA  2750000
## 6   ¦   ¦--New Product Line         2000000       NA
## 7   ¦   °--New Labs                  750000       NA
## 8   °--IT                                NA   700000
## 9       ¦--Outsource                 400000       NA
## 10      ¦--Go agile                  250000       NA
## 11      °--Switch to R                50000       NA
##                           levelName    cost sum_cost cost_all
## 1  Acme Inc.                             NA  4950000  4950000
## 2   ¦--Accounting                        NA  1500000  1500000
## 3   ¦   ¦--New Software             1000000       NA  1000000
## 4   ¦   °--New Accounting Standards  500000       NA   500000
## 5   ¦--Research                          NA  2750000  2750000
## 6   ¦   ¦--New Product Line         2000000       NA  2000000
## 7   ¦   °--New Labs                  750000       NA   750000
## 8   °--IT                                NA   700000   700000
## 9       ¦--Outsource                 400000       NA   400000
## 10      ¦--Go agile                  250000       NA   250000
## 11      °--Switch to R                50000       NA    50000

Plotting

See https://graphviz.gitlab.io/_pages/doc/info/attrs.html for more information on the styling of data.tree plots.

See also http://www.bioconductor.org/packages/release/bioc/vignettes/Rgraphviz/inst/doc/newRgraphvizInterface.pdf.

Plotting of data.tree objects using the plot() function in R calls the render_graph() function from the DiagrammeR package.

Run ?DiagrammeR::render_graph in console for more information.

Here are some example plots of our tree:

We can closely control the text appearing inside the nodes using a custom function. This could just as easily be done for text on edges too. We put on each node the total cost across all of it’s children:

##                           levelName    cost sum_cost cost_all
## 1  Acme Inc.                             NA  4950000  4950000
## 2   ¦--Accounting                        NA  1500000  1500000
## 3   ¦   ¦--New Software             1000000       NA  1000000
## 4   ¦   °--New Accounting Standards  500000       NA   500000
## 5   ¦--Research                          NA  2750000  2750000
## 6   ¦   ¦--New Product Line         2000000       NA  2000000
## 7   ¦   °--New Labs                  750000       NA   750000
## 8   °--IT                                NA   700000   700000
## 9       ¦--Outsource                 400000       NA   400000
## 10      ¦--Go agile                  250000       NA   250000
## 11      °--Switch to R                50000       NA    50000

We can change the direction/orientation with the rankdir argument:

Here is an example where we create a function to dynamically choose the border colour of each node according to the cost:

##             New Software New Accounting Standards               Accounting 
##                  1000000                   500000                  1500000 
##         New Product Line                 New Labs                 Research 
##                  2000000                   750000                  2750000 
##                Outsource                 Go agile              Switch to R 
##                   400000                   250000                    50000 
##                       IT                Acme Inc. 
##                   700000                  4950000

Here are some alternative ways to plot the data using other packages:

Converting a data.tree to an R data.frame or list

In order to do predictive modelling or analysis, it is useful to be able to convert the information in the data.tree structure to an R data.frame or list.

What follows below are a few different ways to do this:

Another way is:

Another way is:

..or the data.tree can be converted to a nested list:

## List of 6
##  $ name      : chr "Acme Inc."
##  $ cost_all  : num 4950000
##  $ sum_cost  : num 4950000
##  $ Accounting:List of 4
##   ..$ cost_all                : num 1500000
##   ..$ sum_cost                : num 1500000
##   ..$ New Software            :List of 3
##   .. ..$ cost    : num 1e+06
##   .. ..$ cost_all: num 1e+06
##   .. ..$ p       : num 0.5
##   ..$ New Accounting Standards:List of 3
##   .. ..$ cost    : num 5e+05
##   .. ..$ cost_all: num 5e+05
##   .. ..$ p       : num 0.75
##  $ Research  :List of 4
##   ..$ cost_all        : num 2750000
##   ..$ sum_cost        : num 2750000
##   ..$ New Product Line:List of 3
##   .. ..$ cost    : num 2e+06
##   .. ..$ cost_all: num 2e+06
##   .. ..$ p       : num 0.25
##   ..$ New Labs        :List of 3
##   .. ..$ cost    : num 750000
##   .. ..$ cost_all: num 750000
##   .. ..$ p       : num 0.9
##  $ IT        :List of 5
##   ..$ cost_all   : num 7e+05
##   ..$ sum_cost   : num 7e+05
##   ..$ Outsource  :List of 3
##   .. ..$ cost    : num 4e+05
##   .. ..$ cost_all: num 4e+05
##   .. ..$ p       : num 0.2
##   ..$ Go agile   :List of 3
##   .. ..$ cost    : num 250000
##   .. ..$ cost_all: num 250000
##   .. ..$ p       : num 0.05
##   ..$ Switch to R:List of 3
##   .. ..$ cost    : num 50000
##   .. ..$ cost_all: num 50000
##   .. ..$ p       : num 1

Labelling the levels of the tree

##                           levelName
## 1  Acme Inc.                       
## 2   ¦--Accounting                  
## 3   ¦   ¦--New Software            
## 4   ¦   °--New Accounting Standards
## 5   ¦--Research                    
## 6   ¦   ¦--New Product Line        
## 7   ¦   °--New Labs                
## 8   °--IT                          
## 9       ¦--Outsource               
## 10      ¦--Go agile                
## 11      °--Switch to R

add 2 children to the Outsource node (Outsource is a child of IT):

##                           levelName
## 1  Acme Inc.                       
## 2   ¦--Accounting                  
## 3   ¦   ¦--New Software            
## 4   ¦   °--New Accounting Standards
## 5   ¦--Research                    
## 6   ¦   ¦--New Product Line        
## 7   ¦   °--New Labs                
## 8   °--IT                          
## 9       ¦--Outsource               
## 10      ¦   ¦--India               
## 11      ¦   °--Poland              
## 12      ¦--Go agile                
## 13      °--Switch to R

iterate through the nodes in the acme tree,

##                Acme Inc.               Accounting             New Software 
##                        1                        2                        3 
## New Accounting Standards                 Research         New Product Line 
##                        3                        2                        3 
##                 New Labs                       IT                Outsource 
##                        3                        2                        3 
##                    India                   Poland                 Go agile 
##                        4                        4                        3 
##              Switch to R 
##                        3
##                           levelName level              type    cost
## 1  Acme Inc.                            1    company (root)      NA
## 2   ¦--Accounting                       2        department      NA
## 3   ¦   ¦--New Software                 3           project 1000000
## 4   ¦   °--New Accounting Standards     3           project  500000
## 5   ¦--Research                         2        department      NA
## 6   ¦   ¦--New Product Line             3           project 2000000
## 7   ¦   °--New Labs                     3           project  750000
## 8   °--IT                               2        department      NA
## 9       ¦--Outsource                    3           project  400000
## 10      ¦   ¦--India                    4 outsource_country      NA
## 11      ¦   °--Poland                   4 outsource_country      NA
## 12      ¦--Go agile                     3           project  250000
## 13      °--Switch to R                  3           project   50000

There are many ways to filter, prune and aggregate data.trees (see https://cran.r-project.org/web/packages/data.tree/vignettes/data.tree.html).

creating a data.tree from a data.frame

We specify the tree structure by creating a column called pathString:

pathString continent country
3 world/North America/Bermuda North America Bermuda
4 world/Europe/Norway Europe Norway
5 world/Asia/Qatar Asia Qatar
6 world/Europe/Switzerland Europe Switzerland
7 world/Asia/Macao SAR, China Asia Macao SAR, China
8 world/Europe/Luxembourg Europe Luxembourg
10 world/Oceania/Australia Oceania Australia
11 world/Europe/Sweden Europe Sweden
12 world/Europe/Denmark Europe Denmark
14 world/North America/United States North America United States
##                                 levelName iso3 population    GNI
## 1  world                                               NA     NA
## 2   ¦--North America                                   NA     NA
## 3   ¦   ¦--Bermuda                         BMU      67837 106140
## 4   ¦   ¦--United States                   USA  313973000  55200
## 5   ¦   ¦--Canada                          CAN   33487208  51630
## 6   ¦   ¦--Bahamas, The                    BHS     309156  20980
## 7   ¦   ¦--Trinidad and Tobago             TTO    1310000  20070
## 8   ¦   ¦--Puerto Rico                     PRI    3971020  19310
## 9   ¦   ¦--Barbados                        BRB     284589  15310
## 10  ¦   ¦--St. Kitts and Nevis             KNA      40131  14920
## 11  ¦   ¦--Antigua and Barbuda             ATG      85632  13300
## 12  ¦   ¦--Panama                          PAN    3360474  11130
## 13  ¦   ¦--Costa Rica                      CRI    4253877  10120
## 14  ¦   ¦--Mexico                          MEX  111211789   9870
## 15  ¦   ¦--Grenada                         GRD      90739   7910
## 16  ¦   ¦--St. Lucia                       LCA     160267   7260
## 17  ¦   ¦--Dominica                        DMA      72660   6930
## 18  ¦   ¦--St. Vincent and the Grenadines  VCT     104574   6610
## 19  ¦   ¦--Dominican Republic              DOM    9650054   6040
## 20  ¦   °--... 7 nodes w/ 0 sub                        NA     NA
## 21  °--... 6 nodes w/ 171 sub                          NA     NA

Showcasing some tree-viewing options:

##                                  levelName
## 1   world                                 
## 2    ¦--North America                     
## 3    ¦   ¦--Bermuda                       
## 4    ¦   ¦--United States                 
## 5    ¦   ¦--Canada                        
## 6    ¦   ¦--Bahamas, The                  
## 7    ¦   ¦--Trinidad and Tobago           
## 8    ¦   ¦--Puerto Rico                   
## 9    ¦   ¦--Barbados                      
## 10   ¦   ¦--St. Kitts and Nevis           
## 11   ¦   ¦--Antigua and Barbuda           
## 12   ¦   ¦--Panama                        
## 13   ¦   ¦--Costa Rica                    
## 14   ¦   ¦--Mexico                        
## 15   ¦   ¦--Grenada                       
## 16   ¦   ¦--St. Lucia                     
## 17   ¦   ¦--Dominica                      
## 18   ¦   ¦--St. Vincent and the Grenadines
## 19   ¦   ¦--Dominican Republic            
## 20   ¦   ¦--Jamaica                       
## 21   ¦   ¦--Belize                        
## 22   ¦   ¦--El Salvador                   
## 23   ¦   ¦--Guatemala                     
## 24   ¦   ¦--Honduras                      
## 25   ¦   ¦--Nicaragua                     
## 26   ¦   °--Haiti                         
## 27   ¦--Europe                            
## 28   ¦   ¦--Norway                        
## 29   ¦   ¦--Switzerland                   
## 30   ¦   ¦--Luxembourg                    
## 31   ¦   ¦--Sweden                        
## 32   ¦   ¦--Denmark                       
## 33   ¦   ¦--Netherlands                   
## 34   ¦   ¦--Austria                       
## 35   ¦   ¦--Finland                       
## 36   ¦   ¦--Germany                       
## 37   ¦   ¦--Iceland                       
## 38   ¦   ¦--Belgium                       
## 39   ¦   ¦--Ireland                       
## 40   ¦   ¦--United Kingdom                
## 41   ¦   ¦--France                        
## 42   ¦   ¦--Andorra                       
## 43   ¦   ¦--Italy                         
## 44   ¦   ¦--Spain                         
## 45   ¦   ¦--Slovenia                      
## 46   ¦   ¦--Greece                        
## 47   ¦   ¦--Portugal                      
## 48   ¦   ¦--Malta                         
## 49   ¦   ¦--Estonia                       
## 50   ¦   ¦--Czech Republic                
## 51   ¦   ¦--Slovak Republic               
## 52   ¦   ¦--Lithuania                     
## 53   ¦   ¦--Latvia                        
## 54   ¦   ¦--Poland                        
## 55   ¦   ¦--Hungary                       
## 56   ¦   ¦--Russian Federation            
## 57   ¦   ¦--Croatia                       
## 58   ¦   ¦--Romania                       
## 59   ¦   ¦--Bulgaria                      
## 60   ¦   ¦--Belarus                       
## 61   ¦   ¦--Montenegro                    
## 62   ¦   ¦--Serbia                        
## 63   ¦   ¦--Macedonia, FYR                
## 64   ¦   ¦--Bosnia and Herzegovina        
## 65   ¦   ¦--Albania                       
## 66   ¦   ¦--Kosovo                        
## 67   ¦   ¦--Ukraine                       
## 68   ¦   °--Moldova                       
## 69   ¦--Asia                              
## 70   ¦   ¦--Qatar                         
## 71   ¦   ¦--Macao SAR, China              
## 72   ¦   ¦--Singapore                     
## 73   ¦   ¦--Kuwait                        
## 74   ¦   ¦--United Arab Emirates          
## 75   ¦   ¦--Japan                         
## 76   ¦   ¦--Hong Kong SAR, China          
## 77   ¦   ¦--Brunei Darussalam             
## 78   ¦   ¦--Israel                        
## 79   ¦   ¦--Korea, Rep.                   
## 80   ¦   ¦--Cyprus                        
## 81   ¦   ¦--Saudi Arabia                  
## 82   ¦   ¦--Bahrain                       
## 83   ¦   ¦--Oman                          
## 84   ¦   ¦--Kazakhstan                    
## 85   ¦   ¦--Malaysia                      
## 86   ¦   ¦--Turkey                        
## 87   ¦   ¦--Lebanon                       
## 88   ¦   ¦--Turkmenistan                  
## 89   ¦   ¦--Azerbaijan                    
## 90   ¦   ¦--China                         
## 91   ¦   ¦--Iraq                          
## 92   ¦   ¦--Iran, Islamic Rep.            
## 93   ¦   ¦--Thailand                      
## 94   ¦   ¦--Jordan                        
## 95   ¦   ¦--Mongolia                      
## 96   ¦   ¦--Armenia                       
## 97   ¦   ¦--Georgia                       
## 98   ¦   ¦--Indonesia                     
## 99   ¦   ¦--Philippines                   
## 100  ¦   °--... 17 nodes w/ 0 sub         
## 101  °--... 4 nodes w/ 93 sub
##                       levelName
## 1 world                        
## 2  ¦--North America            
## 3  ¦   ¦--Bermuda              
## 4  ¦   ¦--United States        
## 5  ¦   ¦--Canada               
## 6  ¦   ¦--Bahamas, The         
## 7  ¦   ¦--Trinidad and Tobago  
## 8  ¦   °--... 19 nodes w/ 0 sub
## 9  °--... 6 nodes w/ 183 sub
##                       levelName
## 1 world                        
## 2  ¦--North America            
## 3  ¦   ¦--Bermuda              
## 4  ¦   °--... 23 nodes w/ 0 sub
## 5  °--... 6 nodes w/ 187 sub