Before starting
If you would like to see this practical in your viewer within RStudio, please execute the following chunk of code:
install.packages("rstudioapi") # installs a required R-package
dir <- tempfile()
dir.create(dir)
download.file("https://gift.uni-goettingen.de/community/index.html",
destfile = file.path(dir, "index.html"))
download.file("https://gift.uni-goettingen.de/community/lesson4.html",
destfile = file.path(dir, "lesson7.html"))
htmlFile <- file.path(dir, "lesson7.html")
rstudioapi::viewer(htmlFile)In this class, we will calculate some classical indices related to functional and phylogenetical diversity. We will use the same dataset for this practical than for the practical 2, i.e. the Barro Colorado Island (BCI) forest-plot inventory in Panama. Refer to the practical 2 for more details regarding this dataset.
BCI <- read.table("data/BCI_Data.csv",
header = TRUE, # does the file have column names
sep = ",", # what separates the column
row.names = 1) # does the file have row names
BCI <- t(BCI) # transposing the matrix to have sites in rows
BCI[4:6, 5:7]## Adelia.triloba Aegiphila.panamensis Alchornea.costaricensis
## Plot4 3 0 18
## Plot5 1 1 3
## Plot6 0 0 2
We will also import a trait table and a phylogenetic tree of the species present in Barro Colorado Island.
Before computing functional diversity (FD) indices, we need to load the trait table of the tree species present in BCI. Wright et al. (2010) gathered several functional traits for these species, and we will use three of the traits they measured in this tutorial:
tra <- read.table("data/traits/BCI_trait.txt", header = TRUE)
dim(tra) # first observation: many missing species## [1] 134 4
When loading a trait table, there are always a few things to look at:
- what is the trait coverage (how many missing values)?
- what distribution do they have?
- how correlated are they?
# Trait coverage
summary(tra$height)## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 6.10 12.93 20.00 20.60 28.93 40.40
summary(tra$seedmass)## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.00005 0.01695 0.07949 0.54985 0.18811 22.86918 37
summary(tra$lma)## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 19.40 44.90 53.00 55.83 65.50 105.10 1
# Distribution of traits
par(mfrow = c(1, 3))
hist(tra$height, main = "Vegetative height (m)", xlab = "")
hist(tra$seedmass, main = "Mean seed dry mass (g)", xlab = "")
hist(tra$lma, main = "Mean LMA (g m-2)", xlab = "")Seed mass has a very assymmetrical distribution with one species having very heavy seeds. A trait with such a distribution is usually log-transformed to smooth the effect of outliers.
tra$seedmass <- log(tra$seedmass)Prioria copaifera have seeds > 20g
library(psych)
# Pair plot
par(mfrow = c(1, 1))
pairs.panels(tra[, c("height", "seedmass", "lma")],
density = FALSE, ellipses = FALSE, hist.col = "grey")Since we observed that not all species from this trait table are present in the site-species matrix, and vice-versa, we subset both tables in order to have a common list of species in both of them.
# Only species from BCI in tra
rownames(tra) <- tra$species
tra_BCI <- tra[which(tra$species %in% colnames(BCI)),
c("height", "seedmass", "lma")]
# Subset BCI data with the species for which we have traits
BCI_tra <- BCI[, which(colnames(BCI) %in% tra$species)]Before computing multidimensional functional indices, we can look at the CWM for the three functional traits. Formula of the CWM is the following
\[ CWM=\sum_{k=1}^{n}{A_{k}.T_k} \]
with k a species, A and T its abundance and trait value in the focal community, and n the total number of species in the focal community.
Several ways are possible to compute this index; we here use the cwm() function from BAT package.
library(BAT)
BCI_cwm <- cwm(trait = tra_BCI, comm = BCI_tra,
na.rm = TRUE) # removal of missing values
head(BCI_cwm)## height seedmass lma
## Plot1 27.06330 -2.587103 51.72110
## Plot2 25.96522 -2.681716 54.14565
## Plot3 26.70504 -2.706178 53.12437
## Plot4 25.45333 -2.613620 51.81920
## Plot5 26.64371 -2.507665 51.71198
## Plot6 27.01557 -2.144623 58.24048
Using the fundiversity R package, we here compute the main indices of FD with continuous traits:
- function richness
- functional dispersion and divergence
- functional evenness
library(fundiversity)
# Functional richness
fd_BCI <- fd_fric(traits = tra_BCI, sp_com = BCI_tra)
# Functional dispersion
fd_BCI <- merge(fd_BCI,
fd_fdis(traits = tra_BCI, sp_com = BCI_tra),
by = "site")
# Functional divergence
fd_BCI <- merge(fd_BCI,
fd_fdiv(traits = tra_BCI, sp_com = BCI_tra),
by = "site")
# Functional evenness
fd_BCI <- merge(fd_BCI,
fd_feve(traits = tra_BCI, sp_com = BCI_tra),
by = "site")
dim(fd_BCI)## [1] 50 5
We can compare these indices across communities, along with species richness.
# Add species richness
rich_BCI <- specnumber(BCI)
rich_BCI <- data.frame(site = names(rich_BCI),
SR = as.numeric(rich_BCI))
fd_BCI <- merge(fd_BCI, rich_BCI, by = "site")
# Pair plot
pairs.panels(fd_BCI[, c("SR", "FRic", "FDis", "FDiv", "FEve")],
density = FALSE, ellipses = FALSE, hist.col = "grey")We here use the phylogenetic tree made available by Pearse et al. (2013).
Before importing the BCI phylogenetic tree, let’s construct a very simple phylogenetic tree and look at its structure.
mini.phy <- read.tree(text = "((((A,B), C), (D,E)),F);") # Newick format
par(mfrow = c(1, 3))
plot(mini.phy)
plot(mini.phy, type = "fan")
plot(mini.phy, type = "unrooted")# nodelabels() # add node numbers
# tiplabels() # add tip numbers
str(mini.phy)## List of 3
## $ edge : int [1:10, 1:2] 7 8 9 10 10 9 8 11 11 7 ...
## $ Nnode : int 5
## $ tip.label: chr [1:6] "A" "B" "C" "D" ...
## - attr(*, "class")= chr "phylo"
## - attr(*, "order")= chr "cladewise"
mini.phy$Nnode## [1] 5
mini.phy$tip.label## [1] "A" "B" "C" "D" "E" "F"
# Edges matrix has a number of rows equal to the number of edges (10)
# Each edge starts and ends with a unique pair of indices
# By convention the indices 1 through N for N tips correspond with the tip
# nodes (species) in the tree.
mini.phy$edge## [,1] [,2]
## [1,] 7 8
## [2,] 8 9
## [3,] 9 10
## [4,] 10 1
## [5,] 10 2
## [6,] 9 3
## [7,] 8 11
## [8,] 11 4
## [9,] 11 5
## [10,] 7 6
nrow(mini.phy$edge)## [1] 10
# does the phylogeny have a root?
# i.e. can the most basal ancestor be identified?
is.rooted(mini.phy)## [1] TRUE
Let’s now import the acutal phylogenetic tree of BCI species.
# Import tree with ape package
phyl <- read.tree("data/phylogeny/phylomatic.tre")
phyl##
## Phylogenetic tree with 308 tips and 137 internal nodes.
##
## Tip labels:
## abarema_macradenia, acacia_melanoceras, andira_inermis, cojoba_rufescens, dipteryx_oleifera, enterolobium_schomburgkii, ...
## Node labels:
## , , , , , , ...
##
## Rooted; includes branch lengths.
str(phyl)## List of 6
## $ edge : int [1:444, 1:2] 309 310 311 312 313 314 315 316 317 317 ...
## $ edge.length: num [1:444] 15.76 1.54 25.73 12.7 0 ...
## $ Nnode : int 137
## $ node.label : chr [1:137] "" "" "" "" ...
## $ tip.label : chr [1:308] "abarema_macradenia" "acacia_melanoceras" "andira_inermis" "cojoba_rufescens" ...
## $ root.edge : num 0
## - attr(*, "class")= chr "phylo"
## - attr(*, "order")= chr "cladewise"
# Number of nodes (m) (n is the number of tips)
phyl$Nnode## [1] 137
# Tip labels: how many species in the tree?
head(phyl$tip.label)## [1] "abarema_macradenia" "acacia_melanoceras"
## [3] "andira_inermis" "cojoba_rufescens"
## [5] "dipteryx_oleifera" "enterolobium_schomburgkii"
length(unique(phyl$tip.label))## [1] 308
The species names are not formatted in the same way as in BCI site-species matrix.
# Species name reformating: first letter capital, . instead of _
substring(phyl$tip.label, 1, 1) <- toupper(substring(
phyl$tip.label, 1, 1))
phyl$tip.label <- gsub("_", ".", phyl$tip.label, fixed = TRUE)Other issue, some species in the tree are not in the site-species matrix and reverse.
We here prune the tree to remove species absent from the BCI site-species matrix.
sp_not_BCI <- phyl$tip.label[!(phyl$tip.label %in% colnames(BCI))]
pruned_tree <- drop.tip(phyl, sp_not_BCI)
length(unique(pruned_tree$tip.label))## [1] 206
table(pruned_tree$tip.label %in% colnames(BCI))##
## TRUE
## 206
# does the phylogeny have a root?
# i.e. can the most basal ancestor be identified?
is.rooted(pruned_tree)## [1] TRUE
We can plot the pruned BCI phylogenetic tree.
plot(pruned_tree, type = "fan", cex = 0.3, edge.color = "gray70",
tip.color = "#ef8a62",
main = "Phylogenetic tree")The most classical index to estimate the phylogenetic richness of an assemblage is the Faith index, whose formula is the following: \[ Faith'PD = \sum_{branches}L_b \]
with \(L_b\) the length of the branch \(b\). Basically, this index sums the length of branches connecting all species in the assemblage.
We use the pd() function from picante package to compute it.
PD_BCI <- pd(BCI, phyl)
head(PD_BCI)## PD SR
## Plot1 5014.815 93
## Plot2 4412.435 84
## Plot3 4710.645 90
## Plot4 4958.761 94
## Plot5 5459.851 101
## Plot6 5001.782 85
par(mfrow = c(1, 2))
hist(PD_BCI$PD, main = "", xlab = "Phylogenetic Diversity")
plot(PD_BCI$SR, PD_BCI$PD, col = rgb(0, 0 ,0, 0.5), pch = 16,
xlab = "Species richness", ylab = "Phylogenetic Diversity")A classic index to estimate phylogenetic divergence is the Mean Pairwise Distance (MPD), which estimates how spread/clumped are the species of an assemblage in the phylogenetic tree.
Before computing this index, we need to construct a distance matrix for each pair of species.
# Cophenetic distances
phyl_dist <- cophenetic(pruned_tree)
dim(phyl_dist)## [1] 206 206
phyl_dist[1:3, 1:3]## Acacia.melanoceras Andira.inermis
## Acacia.melanoceras 0.00000 85.07184
## Andira.inermis 85.07184 0.00000
## Enterolobium.schomburgkii 85.07184 85.07184
## Enterolobium.schomburgkii
## Acacia.melanoceras 85.07184
## Andira.inermis 85.07184
## Enterolobium.schomburgkii 0.00000
head(which(phyl_dist == max(phyl_dist), arr.ind = TRUE))## row col
## Mosannona.garwoodii 206 5
## Mosannona.garwoodii 206 6
## Mosannona.garwoodii 206 7
## Mosannona.garwoodii 206 8
## Mosannona.garwoodii 206 9
## Mosannona.garwoodii 206 10
phyl_dist[206, 5]; rownames(phyl_dist)[206]; colnames(phyl_dist)[5]## [1] 319.1912
## [1] "Mosannona.garwoodii"
## [1] "Inga.acuminata"
We can highlight the most remote species in the tree.
two_sp <- pruned_tree
two_sp$tip.label <- c(rep("", 4), "Inga.acuminata", rep("", 200),
"Mosannona.garwoodii")
par(mfrow = c(1, 1))
plot(two_sp,
# type = "fan",
cex = 0.3,
edge.color = "gray70",
tip.color = "#ef8a62",
main = "Phylogenetic tree")With the cophenetic distance matrix, we can now compute the MPD for each community.
BCI_tree <- BCI[, colnames(BCI) %in% pruned_tree$tip.label]
dim(BCI_tree)## [1] 50 206
mpd_BCI <- mpd(samp = BCI_tree,
cophenetic(phyl),
abundance.weighted = TRUE)
# Convert as data.frame
mpd_BCI <- data.frame(site = rownames(PD_BCI),
MPD = mpd_BCI)We here see how these two indices are linked together and with species richness.
PD_BCI$site <- rownames(PD_BCI)
PD_BCI <- merge(PD_BCI, mpd_BCI, by = "site")
pairs.panels(PD_BCI[, c("SR", "PD", "MPD")],
density = FALSE, ellipses = FALSE, hist.col = "grey")As expected, Faith’PD highly correlates with SR. Null model approach can help to detect the pure effect of phylogenetic richness, but we won’t do that here.
Finally, we can see that functional and phylogenetic divergences are not giving the same output, and therefore story:
fd_pd <- merge(fd_BCI, PD_BCI, by = "site")
# pairs.panels(fd_pd[, c("SR", "FRic", "FDis", "FDiv", "FEve", "PD", "MPD")],
# density = FALSE, ellipses = FALSE, hist.col = "grey")
par(mfrow = c(1, 1))
plot(fd_pd$FDis, fd_pd$MPD, pch = 16,
main = "FD ~ PD",
xlab = "Functional Dispersion", ylab = "Mean Pairwise Distance")How to plot phylogenetic trees with R?
Data Integration, Manipulation and Visualization of Phylogenetic Trees
Tutorial: how to handle phylogenetic trees in R?
Phylogenetic trees in R