-
Notifications
You must be signed in to change notification settings - Fork 180
reproduce ggphylo example using ggtree
Guangchuang Yu edited this page Sep 2, 2015
·
7 revisions
ggphylo supports plotting a list of phylo objects.
require(ape)
require(ggphylo)
tree.list = rmtree(3, 20)
ggphylo(tree.list)
AND plotting data along tree
n <- 40; x <- rtree(n); n.nodes <- length(nodes(x))
bootstraps <- 100 - rexp(n.nodes, rate=5) * 100
pop.sizes <- pmax(0, rnorm(n.nodes, mean=50000, sd=50000))
for (i in nodes(x)) {
x <- tree.set.tag(x, i, 'bootstrap', bootstraps[i])
x <- tree.set.tag(x, i, 'pop.size', pop.sizes[i]) }
plot.args <- list(
x,
line.color.by='bootstrap',
line.color.scale=scale_colour_gradient(limits=c(50, 100), low='red', high='black'),
node.size.by='pop.size',
node.size.scale = scale_size_continuous(limits=c(0, 100000),
range=c(1, 5)), label.size=2
)
do.call(ggphylo, plot.args)
In ggtree, multiPhylo object is supported.
require(ggplot2)
require(ggtree)
ggtree(tree.list, ladderize=F, aes(color=.id)) + facet_wrap(~.id) +
geom_point() + geom_text(subset=.(!isTip), aes(label=node), hjust=-.2, size=3) +
geom_tiplab(size=4, hjust=-.2) 
In the ggphylo example, they set attributes based on internal node number. In my opinion, this is not commonly use, since most of the users have no idea of how taxa were mapping to internal node number.
In ggtree, we created %<+% operator to add additional related information, it requires the first column to be taxa label. This is more commonly used since user already have the taxa label.
To reproduce this case, we can employ raxml class, which is design to store RAxML bootstrapping analysis output, to store the bootstrap and pop.size.
xx=new("raxml", phylo=x, bootstrap=data.frame(node=1:getNodeNum(x),
bootstrap=bootstraps,
pop.size=pop.sizes)
)
ggtree(xx, aes(color=bootstrap), ladderize=F, size=1.2) +
geom_point(aes(size=pop.size), subset=.(bootstrap>50), color="black") +
scale_color_gradient(limits=c(50, 100), low="red", high="black") +
scale_size_continuous(limits=c(0, 100000), range=c(1, 5)) +
theme_tree2(legend.position="right") 
draw_ggphylo <- function(x, bootstraps, pop.sizes) {
for (i in nodes(x)) {
x <- tree.set.tag(x, i, 'bootstrap', bootstraps[i])
x <- tree.set.tag(x, i, 'pop.size', pop.sizes[i]) }
plot.args <- list(
x,
line.color.by='bootstrap',
line.color.scale=scale_colour_gradient(limits=c(50, 100), low='red', high='black'),
node.size.by='pop.size',
node.size.scale = scale_size_continuous(limits=c(0, 100000),
range=c(1, 5)), label.size=2
)
do.call(ggphylo, plot.args)
}
draw_ggtree <- function(x, bootstraps, pop.size) {
xx=new("raxml", phylo=x, bootstrap=data.frame(node=1:getNodeNum(x),
bootstrap=bootstraps,
pop.size=pop.sizes)
)
ggtree(xx, aes(color=bootstrap), ladderize=F, size=1.2) +
geom_point(aes(size=pop.size), subset=.(bootstrap>50), color="black") +
scale_color_gradient(limits=c(50, 100), low="red", high="black") +
scale_size_continuous(limits=c(0, 100000), range=c(1, 5)) +
theme_tree2(legend.position="right")
}
require(microbenchmark)
bm <- microbenchmark(
ggphylo = draw_ggphylo(x, bootstraps, pop.sizes),
ggtree = draw_ggtree(x, bootstraps, pop.sizes),
times=100L
)
qplot(y=time, data=bm, color=expr) + scale_y_log10() + ggtitle("run time comparison")