# code modified slightly from http://www.phytools.org/eqg2015/asr.html
## Load needed packages for this blogpost
library(phytools)
library(ggtree)
library(tidyverse)
library(scico)
library(viridisLite)
About contMap()
The phytools
package provides (among many other things) the contMap()
function for estimating ancestral character states and visualizing their changes along the branches of a phylogenetic tree. It can either produce the plot directly (default), or be saved as an object with the plot = FALSE
argument, to be further manipulated and plotted later with plot()
.
Default colors
I have to say I’m not a fan of the default color scheme, which is a rainbow palette going from red through yellow and green to blue.
For example, let’s borrow some example code and look at the default plot:
## Load anole tree
<- read.tree("http://www.phytools.org/eqg2015/data/anole.tre")
anole.tree
## Load anole trait data, extract snout-vent-length (svl) as named vector
<- read_csv("http://www.phytools.org/eqg2015/data/svl.csv") %>%
svl mutate(svl = set_names(svl, species)) %>%
pull(svl)
# Plot with default color scheme
<- contMap(anole.tree, svl, plot = FALSE)
contmap_obj
plot(
contmap_obj, type="fan",
legend = 0.7*max(nodeHeights(anole.tree)),
fsize = c(0.5, 0.7))
Although this does provide a wide range of colors, it’s not obvious why one color is greater or less than the others. In particular it’s hard to discern the order of intermediate values (yellow, green, light blue). Indeed, there has been much written on why the rainbow palette is generally not a good way to visualize continuous data.
Defining a new color palette
phytools::setMap()
can be used to specify another color palette. setMap()
passes its second argument (a vector of color names or hexadecimals) to colorRampPalette()
. colorRampPalette()
is a bit unusual in that it’s a function that produces a function, in this case, one that generates a vector of colors interpolating between the original input values:
# colorRampPalette() produces a function
<- colorRampPalette(c("red", "yellow"))
my_color_func class(my_color_func)
[1] "function"
# The function generates n colors interpolating between
# the colors originally passed to colorRampPalette()
<- my_color_func(n = 6)
my_colors ::show_col(my_colors) scales
So, this works fine for generating custom color gradients. But designing accurate, color-blind friendly color palettes is not a simple task. Fortunately, there are several packages available with such carefully crafted palettes. Two of my favorite are viridis
and scico
. How can we use these with the plotting function in phytools?
Using viridis
or scico
palettes
Well, it turns out that as long as we specify the same number of colors, we can replicate the viridis color palette with colorRampPalette()
. The only difference is the alpha, or transparency level, indicated at the end of each hexidecimal with two letters (here “FF”). There is no reason to use transparency here anyways, so that doesn’t matter.
# viridis color palette with 6 colors
viridis(6)
[1] "#440154FF" "#414487FF" "#2A788EFF" "#22A884FF" "#7AD151FF" "#FDE725FF"
# colorRampPalette() replicating viridis color palette
colorRampPalette(viridis(6))(6)
[1] "#440154" "#414487" "#2A788E" "#22A884" "#7AD151" "#FDE725"
So here is the viridis
version of the phytools plot:
# Count the number of unique character states in the observed data:
<- n_distinct(svl)
n_cols
# Change the color palette
<- setMap(contmap_obj, viridis(n_cols))
contmap_obj_viridis
# Plot the mapped characters with the new colors
plot(
contmap_obj_viridis, type="fan",
legend = 0.7*max(nodeHeights(anole.tree)),
fsize = c(0.5, 0.7))
And here is another one, this time using a palette from scico
:
# Change the color palette
<- setMap(contmap_obj, scico(n_cols, palette = "bilbao"))
contmap_obj_scico
# Plot the mapped characters with the new colors
plot(
contmap_obj_scico, type="fan",
legend = 0.7*max(nodeHeights(anole.tree)),
fsize = c(0.5, 0.7))
I personally find this one even easier to interpret than viridis
. It’s very clear which values are low and high.
ggtree
Just for completeness, here is code to replicate the plot in ggtree
.
# Modified from https://yulab-smu.top/treedata-book/chapter4.html#color-tree
# Fit an ancestral state character reconstruction
<- phytools::fastAnc(anole.tree, svl, vars = TRUE, CI = TRUE)
fit
# Make a dataframe with trait values at the tips
<- data.frame(
td node = nodeid(anole.tree, names(svl)),
trait = svl)
# Make a dataframe with estimated trait values at the nodes
<- data.frame(node = names(fit$ace), trait = fit$ace)
nd
# Combine these with the tree data for plotting with ggtree
<- rbind(td, nd)
d $node <- as.numeric(d$node)
d<- full_join(anole.tree, d, by = 'node')
tree
ggtree(
tree, aes(color = trait),
layout = 'circular',
ladderize = FALSE, continuous = "color", size = 1) +
# >>> The important part! <<<
# Choose your favorite scale_color_* function here:
scale_color_scico(palette = "bilbao") +
geom_tiplab(hjust = -.1, size = 2, color = "black") +
xlim(0, 1.2) +
theme(
legend.position = c(0, .82),
legend.text = element_text(size = 8),
legend.title = element_text(size = 8)
)
That’s it!
Reproducibility
Reuse
Citation
@online{nitta2021,
author = {Nitta, Joel},
title = {Selecting Color Schemes for Mapping Ancestral States},
date = {2021-06-02},
url = {https://www.joelnitta.com/posts/2021-06-02_color-scheme-anc-states/},
langid = {en}
}