Preparation: Open your RStudio project and download
new data for today from Stud-IP
(log in first) and unzip them into your project folder ’/data/. You can
use getwd()
to locate your current working directory, which
should be your project folder. Please install the following R-packages
using install.packages()
:
If you want to visualize this tutorial in the viewer inside RStudio (to save space on your screen) run the following chunk of code:
install.packages("rstudioapi") # install an R-package required for this step
dir <- tempfile()
dir.create(dir)
download.file("https://gift.uni-goettingen.de/mcmmb/index.html", destfile = file.path(dir, "index.html"))
download.file("https://gift.uni-goettingen.de/mcmmb/Day6.html", destfile = file.path(dir, "Day6.html"))
htmlFile <- file.path(dir, "Day6.html")
rstudioapi::viewer(htmlFile)
Now you can conveniently copy code from the viewer into your
script.
Load R packages & island data set
library("maps")
library("classInt")
data(worldMapEnv)
library("psych")
library("car")
library("lme4")
library("jtools")
library("performance")
library("ggplot2")
library("GGally")
library("sf")
library("rnaturalearth")
library("rnaturalearthdata")
library("ggpmisc")
library("cowplot")
library("dplyr")
library("tidyr")
library("GIFT")
We will download island-level angiosperm species richness and environmental data from the GIFT database (Weigelt et al. 2020) using the GIFT R-package.
# Islands in GIFT
islands <- GIFT_regions(GIFT_version = "2.1")
islands <- islands[which(islands$suit_geo == 1 & islands$entity_class == "Island"),
c("entity_ID", "geo_entity")]
# Geographic characteristics
islands_geo <- GIFT_env(entity_ID = islands$entity_ID,
miscellaneous = c("longitude", "latitude", "area", "dist"),
GIFT_version = "2.1")
islands <- inner_join(islands, islands_geo, by = c("entity_ID", "geo_entity"))
# Island geology
islands_geology <- read.csv("data/islands_geology.csv")
islands <- inner_join(islands, islands_geology,
by = c("entity_ID", "geo_entity"))
We now retrieve the species richness per island.
# Angiosperm species richness
islands_richness <- GIFT_richness(taxon_name = "Angiospermae",
GIFT_version = "2.1")
islands_richness <- islands_richness[!is.na(islands_richness$native),
c("entity_ID", "native", "endemic_min")]
names(islands_richness) <- c("entity_ID", "spec_num", "end_num")
islands <- inner_join(islands, islands_richness, by = "entity_ID")
We further add other continuous environmental variables.
# Environmental data
islands_env <- GIFT_env(entity_ID = islands$entity_ID,
rasterlayer = c("mx30_grd", "CHELSA_bio10_1",
"CHELSA_bio10_12",
"Homogeneity_01_05_1km_uint16"),
sumstat = list("max", "mean", "mean", "mean"),
GIFT_version = "2.1")
names(islands_env) <- c("entity_ID", "geo_entity", "max_elev", "mean_temp",
"mean_prec", "mean_homogeneity")
islands <- inner_join(islands, islands_env,
by = c("entity_ID", "geo_entity"))
str(islands)
## 'data.frame': 296 obs. of 14 variables:
## $ entity_ID : num 2 3 126 127 128 129 131 132 133 134 ...
## $ geo_entity : chr "Cartier Island" "Christmas Island" "Cocos (Keeling) North Keeling Island" "Lord Howe Island" ...
## $ longitude : num 123.6 105.6 96.8 159.1 168 ...
## $ latitude : num -12.5 -10.5 -11.8 -31.6 -29 ...
## $ area : num 0.0499 139.6099 3.3589 17.4658 37.8766 ...
## $ dist : num 290 1310 1625 570 1393 ...
## $ age_Ma : num NA 20 NA 6 2.68 ...
## $ geology : chr "shelf" "volcanic" "atoll" "volcanic" ...
## $ spec_num : num 1 200 33 212 137 50 135 172 199 192 ...
## $ end_num : num 0 15 0 69 34 6 0 0 0 0 ...
## $ max_elev : num 4 367 26 465 312 ...
## $ mean_temp : num 27.9 25.4 NA 19.5 19.2 ...
## $ mean_prec : num 1383 2178 NA 1757 1445 ...
## $ mean_homogeneity: num NA 0.301 NA 0.213 0.301 ...
# write.csv(islands, "data/islands.csv", row.names = FALSE)
# islands <- read.csv("data/islands.csv")
We first load the shapes of all countries.
# World coastlines
world <- ne_coastline(scale = "medium", returnclass = "sf")
class(world)
## [1] "sf" "data.frame"
We also define the Mollweide CRS.
mollweide <- "+proj=moll +lon_0=0 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs "
ggplot(world) +
geom_sf() +
coord_sf(crs = mollweide) +
labs(title = "",
subtitle = "Mollweide projection") +
theme_bw() +
theme(panel.border = element_blank())
To combine the islands data.frame with the world map, we first need to convert it to an sf object.
# Convert islands to sf object
islands_sf <- st_as_sf(islands, coords = c("longitude", "latitude"),
crs = "+proj=longlat +ellps=WGS84 +no_defs")
# Map
ggplot(world) +
geom_sf() +
geom_sf(data = islands_sf, shape = 1, stroke = 1.5,
aes(size = spec_num, color = spec_num)) +
scale_color_viridis_c("Species richness", trans = "log", guide = "legend") +
scale_size_continuous("Species richness", trans = "log") +
coord_sf(crs = mollweide) + # coord_sf(crs = eckertIV) +
labs(title = "Angiosperm species richness",
subtitle = "Mollweide projection") +
theme_bw()
## Warning in scale_color_viridis_c("Species richness", trans = "log", guide =
## "legend"): log-2.718282 transformation introduced infinite values.
## Warning in scale_size_continuous("Species richness", trans = "log"):
## log-2.718282 transformation introduced infinite values.
## Warning in sqrt(x): NaNs produced
## Warning: Removed 9 rows containing missing values or values outside the scale range
## (`geom_sf()`).
# https://community.rstudio.com/t/ggplot2-is-it-possible-to-combine-color-fill-and-size-legends/17072
Bonus: custom discrete color scale
# Arrange islands by species richness (low to high)
islands <- islands[order(islands$spec_num), ]
# create colour gradient
my.class.fr.S <- classIntervals(
islands$spec_num, n = 9, style = "fixed",
fixedBreaks = c(0, 25, 50, 100, 200, 400, 800, 1600, 3200, 6400)) # bin data into n quantiles
my.pal <- c("midnightblue", "dodgerblue4", "dodgerblue2", "chartreuse4",
"gold", "orangered1", "darkred")
my.col.fr.S <- findColours(my.class.fr.S, my.pal) # ramp colors based on classInts
my.col.fr.S[islands$spec_num < 1] <- "#666666" # assign grey for zero species
# specify dimensions of plot for saving
if(.Platform$OS.type == "windows"){
windows(width=20*0.3937008, height = 12*0.3937008)
}else{
X11(width = 20*0.3937008, height = 12*0.3937008, type = "cairo")
}
# quartz(width=20*0.3937008, height=12*0.3937008) # for Mac
map("world", fill = FALSE)
title(main = "Angiosperm species richness")
points(islands$longitude, islands$latitude,
pch = ifelse(islands$spec_num >= 1, 1, 4),
cex = (log10(islands$spec_num+1)/2.6)^2 + 1, col = my.col.fr.S, lwd = 2)
if(.Platform$OS.type == "windows"){
savePlot("data/islands.pdf", type = "pdf")
}else{
dev.copy2pdf(file = "data/islands.pdf")
}
# quartz.save("Data/islands.pdf", type = "pdf") # for Mac
dev.off()
Meta-data
1. latitude: Latitude (in decimal degrees)
2. area: Island area (in km2)
3. dist: Distance to the nearest continent (in
km)
4. age_MA: Island geological age (million years)
5. max_elev: Maximum elevation (m)
6. mean_temp: Mean annual temperature (degrees
Celsius)
7. mean_prec: Mean annual precipitation (mm
yr-1)
8. mean_homogeneity: Habitat heterogeneity (similarity
of vegetation index with surrounding pixels)
abiotic <- c("latitude", "area", "dist", "age_Ma", "max_elev",
"mean_temp", "mean_prec", "mean_homogeneity")
ggpairs(islands[, abiotic],
upper = list(continuous = wrap(ggally_cor, digits = 1))) +
theme_bw()
# Other way to have a pair plot:
# pairs.panels(islands[, c(4:7, 11:14)], density = FALSE, ellipses = FALSE,
# hist.col = "white")
We here aim at linking, using the best model, island area and plant species richness.
Species Richness ~ Area
SAR_lm <- lm(spec_num ~ area, data = islands)
summary(SAR_lm)
##
## Call:
## lm(formula = spec_num ~ area, data = islands)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3566.6 -181.1 -122.6 -3.9 4054.3
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.083e+02 2.780e+01 7.495 7.85e-13 ***
## area 3.766e-02 2.504e-03 15.041 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 472.1 on 294 degrees of freedom
## Multiple R-squared: 0.4349, Adjusted R-squared: 0.4329
## F-statistic: 226.2 on 1 and 294 DF, p-value: < 2.2e-16
ggplot(islands, aes(area, spec_num)) +
geom_point() +
stat_smooth(method = "lm") +
stat_poly_eq(method = "lm", formula = y ~ x,
aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~"))) +
labs(x = "Area (km^2)", y = "Species richness") +
theme_bw()
## Warning: The dot-dot notation (`..eq.label..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(eq.label)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `geom_smooth()` using formula = 'y ~ x'
# plot(spec_num ~ area, data = islands)
# car::regLine(SAR_lm, col = "darkgreen")
Species richness ~ c + Areaz
SAR_power <- nls(spec_num ~ c + area^z, data = islands,
start = list(c = 0, z = 1))
summary(SAR_power)
##
## Formula: spec_num ~ c + area^z
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## c 1.728e+02 2.584e+01 6.685 1.16e-10 ***
## z 7.164e-01 5.138e-03 139.441 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 436.4 on 294 degrees of freedom
##
## Number of iterations to convergence: 7
## Achieved convergence tolerance: 3.734e-06
ggplot(islands, aes(area, spec_num)) +
geom_point() +
geom_smooth(method = "nls",
formula = y ~ c + x^z,
method.args = list(start = c(c = 0, z = 1)),
se = FALSE) +
labs(title = "Species-area relationship",
x = "Island area (km^2)", y = "Species number") +
theme_bw()
Predictions of the non-linear model:
newdata <- data.frame(area = seq(min(islands$area), max(islands$area),
length.out = 100))
predicted_richness_SAR_power <- predict(SAR_power, newdata = newdata)
head(predicted_richness_SAR_power)
## [1] 172.8397 326.2882 425.0141 510.0374 587.2289 659.0747
# plot(islands$area, islands$spec_num, xlab = "Island area (km)",
# ylab = "Species number", main = "Species-area relationship")
# points(newdata$area, predicted_richness_SAR1, type = "l",
# col = "darkred", lwd = 2)
Make histograms of species richness and area
plot_grid(
ggplot(islands, aes(area)) +
geom_histogram(color = "black", fill = "grey50") +
labs(x = "Area (km^2)", y = "Count") +
theme_bw(),
ggplot(islands, aes(spec_num)) +
geom_histogram(color = "black", fill = "grey50") +
labs(x = "Species richness", y = "Count") +
theme_bw(),
nrow = 1)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# par(mfrow = c(1, 2))
# hist(islands$area)
# hist(islands$spec_num)
Apply a log10 transformation to species richness and area, make new columns of the transformed variables
islands$log_area <- log10(islands$area + 1)
islands$log_spec_num <- log10(islands$spec_num + 1)
Plot the new histograms.
plot_grid(
ggplot(islands, aes(log_area)) +
geom_histogram(color = "black", fill = "grey50") +
labs(x = "Area (km^2) log-transformed", y = "Count") +
theme_bw(),
ggplot(islands, aes(log_spec_num)) +
geom_histogram(color = "black", fill = "grey50") +
labs(x = "Species richness log-transformed", y = "Count") +
theme_bw(),
nrow = 1)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# par(mfrow = c(1, 2))
# hist(islands$log_area)
# hist(islands$log_spec_num)
Fit a linear model to predict log species richness using log area as a predictor.
SAR_log <- lm(log_spec_num ~ log_area, data = islands)
summary(SAR_log)
##
## Call:
## lm(formula = log_spec_num ~ log_area, data = islands)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.15534 -0.19833 0.06126 0.35182 1.13193
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.26728 0.04906 25.83 <2e-16 ***
## log_area 0.43588 0.02618 16.65 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5175 on 294 degrees of freedom
## Multiple R-squared: 0.4853, Adjusted R-squared: 0.4836
## F-statistic: 277.2 on 1 and 294 DF, p-value: < 2.2e-16
Plot the results.
ggplot(islands, aes(log_area, log_spec_num)) +
geom_point() +
geom_smooth(method = "lm", formula = y ~ x, color = "darkgreen") +
stat_poly_eq(method = "lm", formula = y ~ x,
aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~"))) +
labs(x = "Area (km^2) log-transformed",
y = "Species richness log-transformed") +
theme_bw()
# par(mfrow = c(1, 1))
# plot(log_spec_num ~ log_area, data = islands)
# car::regLine(SAR_log, col = "darkgreen")
Check model assumptions
par(mfrow = c(1, 2))
hist(residuals(SAR_lm))
hist(residuals(SAR_log))
SAR_poisson <- glm(spec_num ~ log_area, data = islands, family = "poisson")
# 100*(161367-43825)/161367
# Plot Poisson GLM
ggplot(islands, aes(log_area, spec_num)) +
geom_point() +
geom_smooth(method = "glm", formula = y ~ x,
method.args = list(family = "poisson"),
color = "darkblue") +
labs(title = "GLM Poisson",
x = "Area log-transformed (log10 km2)",
y = "Species richness") +
theme_bw()
# Plot Poisson GLM in log-log space
newdata_SAR_poisson <- data.frame(log_area = islands$log_area)
newdata_SAR_poisson$pred <- as.numeric(predict(SAR_poisson,
newdata = newdata_SAR_poisson,
type = "response"))
# Log-transforming the predictions
newdata_SAR_poisson$pred_log10 <- log10(newdata_SAR_poisson$pred + 1)
ggplot(islands, aes(log_area, log_spec_num)) +
geom_point() +
geom_smooth(method = "lm", formula = y ~ x, se = FALSE,
color = "darkgreen") +
geom_line(data = newdata_SAR_poisson, aes(log_area, pred_log10),
color = "darkblue", linewidth = 1) +
labs(title = "log-log Species-area relationship",
x = "Area log-transformed (log10 km2)",
y = "Species richness log-transformed") +
theme_bw()
par(mfrow = c(1, 1))
plot(islands$log_area, islands$log_spec_num,
xlab = "log10 island area (log10 km)",
ylab = "log10 species number",
main = "log-log Species-area relationship")
car::regLine(SAR_log)
newdata_SAR_poisson <- data.frame(log_area = seq(min(islands$log_area),
max(islands$log_area),
length.out = 100))
predicted_richness_SAR_poisson <- predict(SAR_poisson, newdata = newdata_SAR_poisson,
type = "response")
points(newdata_SAR_poisson$log_area, log10(predicted_richness_SAR_poisson + 1),
type = "l", col = "purple", lwd = 2)
Now, let’s plot all models together.
# Make a data frame with all the predictions
newdata_all <- data.frame(area = islands$area,
log_area = islands$log_area)
newdata_all$pred_SAR_lm <- as.numeric(predict(SAR_lm,
newdata = newdata_all,
type = "response"))
newdata_all$pred_SAR_power <- as.numeric(predict(SAR_power,
newdata = newdata_all,
type = "response"))
newdata_all$pred_SAR_log <- as.numeric(predict(SAR_log,
newdata = newdata_all,
type = "response"))
newdata_all$pred_SAR_log <- (10^newdata_all$pred_SAR_log)-1
newdata_all$pred_SAR_poisson <- as.numeric(predict(SAR_poisson,
newdata = newdata_all,
type = "response"))
# newdata_all$pred_SAR_poisson_exp10 <- (10^newdata_all$pred_SAR_poisson) - 1
# we also need to reverse the log-transformation for area
# newdata_all$logarea_exp10 <- (10^newdata_all$log_area)-1
# Putting the table in a long format
newdata_all <- pivot_longer(newdata_all, cols = contains("SAR"),
names_to = "Model", values_to = "Predicted values")
ggplot(newdata_all, aes(area, `Predicted values`)) +
geom_point(data = islands, aes(area, spec_num)) +
geom_line(aes(color = Model), linewidth = 1) +
scale_color_brewer(palette = "Set1") +
labs(x = "Area (km^2)", y = "Species richness") +
theme_bw()
plot(islands$area, islands$spec_num, xlab = "Island area (km2)",
ylab = "Species number", main = "Species-area relationship")
# plot the simple linear model
car::regLine(SAR_lm, col = "darkgreen")
# plot the non-linear power law model
points(newdata$area, predicted_richness_SAR_power, type = "l",
col = "darkred", lwd = 2)
# plot the log-log space model in untransformed space
newdata_SAR_log <- data.frame(log_area = log10(seq(min(islands$area),
max(islands$area),
length.out = 100) + 1))
predicted_richness_SAR_log <- predict(SAR_log, newdata = newdata_SAR_log)
points((10^newdata_SAR_log$log_area) - 1, (10^predicted_richness_SAR_log) - 1,
type = "l", col = "purple", lwd = 2)
points(newdata$area, (10^predicted_richness_SAR_log) - 1, type = "l",
col = "darkgreen", lwd = 2, lty = 2)
# plot the Poisson model in untransformed space
predicted_richness_SAR_poisson <- predict(SAR_poisson,
newdata = newdata_SAR_poisson,
type = "response")
points((10^newdata_SAR_poisson$log_area) - 1,
predicted_richness_SAR_poisson,
type = "l", col = "darkblue", lwd = 2)
How do these models compare?
AIC(SAR_lm, SAR_power, SAR_log, SAR_poisson)
## df AIC
## SAR_lm 3 4489.0290
## SAR_power 3 4442.4913
## SAR_log 3 454.0032
## SAR_poisson 2 53799.0371
Warning: You cannot compare AIC values among models with different response variables or different distribution families. This means we can here only compare the AIC values of the first two models!
Apply a log10 transformation to area, elevation, precipitation, and
temperature following:
Kreft, H.
et al. 2008. Global diversity of island floras from a macroecological
perspective. Ecology Letters 11:116-127.
islands$log_area <- log10(islands$area)
islands$log_spec_num <- log10(islands$spec_num + 1)
islands$log_max_elev <- log10(islands$max_elev + 1)
islands$log_mean_prec <- log10(islands$mean_prec)
islands$log_mean_temp <- log10(islands$mean_temp + 5)
str(islands)
## 'data.frame': 296 obs. of 19 variables:
## $ entity_ID : num 2 3 126 127 128 129 131 132 133 134 ...
## $ geo_entity : chr "Cartier Island" "Christmas Island" "Cocos (Keeling) North Keeling Island" "Lord Howe Island" ...
## $ longitude : num 123.6 105.6 96.8 159.1 168 ...
## $ latitude : num -12.5 -10.5 -11.8 -31.6 -29 ...
## $ area : num 0.0499 139.6099 3.3589 17.4658 37.8766 ...
## $ dist : num 290 1310 1625 570 1393 ...
## $ age_Ma : num NA 20 NA 6 2.68 ...
## $ geology : chr "shelf" "volcanic" "atoll" "volcanic" ...
## $ spec_num : num 1 200 33 212 137 50 135 172 199 192 ...
## $ end_num : num 0 15 0 69 34 6 0 0 0 0 ...
## $ max_elev : num 4 367 26 465 312 ...
## $ mean_temp : num 27.9 25.4 NA 19.5 19.2 ...
## $ mean_prec : num 1383 2178 NA 1757 1445 ...
## $ mean_homogeneity: num NA 0.301 NA 0.213 0.301 ...
## $ log_area : num -1.302 2.145 0.526 1.242 1.578 ...
## $ log_spec_num : num 0.301 2.303 1.531 2.328 2.14 ...
## $ log_max_elev : num 0.699 2.566 1.431 2.668 2.496 ...
## $ log_mean_prec : num 3.14 3.34 NA 3.24 3.16 ...
## $ log_mean_temp : num 1.52 1.48 NA 1.39 1.38 ...
Simplify the geology column; change all values that are floor or volcanic to oceanic and afterwards set all values that are not atoll, fragment, oceanic or shelf to NA.
islands$geology <- as.character(islands$geology)
islands$geology[which(islands$geology == "floor" |
islands$geology == "volcanic")] <- "oceanic"
islands$geology[which(islands$geology %in%
c("floor" , "volcanic"))] <- "oceanic"
islands$geology[which(islands$geology == "floor")] <- "oceanic"
islands$geology[which(islands$geology == "volcanic")] <- "oceanic"
islands$geology[which(!islands$geology %in% c("atoll" , "fragment", "oceanic" , "shelf"))] <- NA
summary(as.factor(islands$geology))
## atoll fragment oceanic shelf
## 28 47 187 34
Fit a multiple linear regression to predict biodiversity patterns on islands using the following biogeographic and environmental variables: island area, distance, elevation, temperature, precipitation and island geology.
colnames(islands)[c(6, 8, 15:19)]
## [1] "dist" "geology" "log_area" "log_spec_num"
## [5] "log_max_elev" "log_mean_prec" "log_mean_temp"
new_islands <- islands[, c(6, 8, 15:19)]
new_islands <- na.omit(new_islands) # remove observations with NAs
Multi.mod <- lm(log_spec_num ~ log_area + dist + log_max_elev +
log_mean_temp + log_mean_prec + geology,
data = new_islands)
summary(Multi.mod)
##
## Call:
## lm(formula = log_spec_num ~ log_area + dist + log_max_elev +
## log_mean_temp + log_mean_prec + geology, data = new_islands)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.07139 -0.21153 0.02771 0.19894 0.75164
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.057e+00 2.635e-01 -4.012 7.84e-05 ***
## log_area 3.543e-01 2.138e-02 16.570 < 2e-16 ***
## dist -8.805e-05 1.450e-05 -6.073 4.38e-09 ***
## log_max_elev 8.780e-02 3.223e-02 2.725 0.006871 **
## log_mean_temp 1.284e+00 1.425e-01 9.005 < 2e-16 ***
## log_mean_prec 1.857e-01 5.148e-02 3.607 0.000371 ***
## geologyfragment 2.115e-01 8.248e-02 2.564 0.010908 *
## geologyoceanic 1.153e-01 7.282e-02 1.584 0.114415
## geologyshelf 3.241e-01 9.099e-02 3.562 0.000437 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3036 on 263 degrees of freedom
## Multiple R-squared: 0.7726, Adjusted R-squared: 0.7657
## F-statistic: 111.7 on 8 and 263 DF, p-value: < 2.2e-16
crPlots(Multi.mod)
Can we improve predictions of species richness by removing
individual model terms?
Use manual backward selection to specify the best model
Hints
1. Remove missing observations from your data
2. Use ‘drop1(mod, test=“F”)’
3. Use ‘drop1(update(Multi.mod,~ . -log_max_elev), test = “F”)’
Multi.mod1 <- update(Multi.mod, ~ . -log_max_elev)
AIC(Multi.mod, Multi.mod1)
## df AIC
## Multi.mod 10 134.3083
## Multi.mod1 9 139.8792
# Assign archipelago names for 4 major archipelagos
islands$archipelago <- NA
islands$archipelago[which(islands$entity_ID %in% c(169:188))] <- "Hawaii"
islands$archipelago[which(islands$entity_ID %in% c(145:151))] <- "Canaries"
islands$archipelago[which(islands$entity_ID %in% c(154:168))] <- "Galapagos"
islands$archipelago[which(islands$entity_ID %in% c(131:139))] <- "Azores"
islands_gdm <- islands[!is.na(islands$archipelago),]
# Log-transforming age
islands_gdm$log_age_Ma <- log10(islands_gdm$age_Ma)
The General dynamic model of island biogeography (GDM) applied: just Hawaii (part I)
Species = Area x Time x Time2
# subset data
Hawaii <- islands_gdm[which(islands_gdm$archipelago == "Hawaii"), ]
# The ATT^2 model
gdm1 <- lm(spec_num ~ log_area + age_Ma + I(age_Ma^2), data = Hawaii)
summary(gdm1)
##
## Call:
## lm(formula = spec_num ~ log_area + age_Ma + I(age_Ma^2), data = Hawaii)
##
## Residuals:
## Min 1Q Median 3Q Max
## -213.483 -24.157 -9.271 68.987 130.922
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -64.9936 108.1344 -0.601 0.558993
## log_area 118.3895 24.5249 4.827 0.000414 ***
## age_Ma 21.7449 19.7480 1.101 0.292440
## I(age_Ma^2) -0.7983 0.6953 -1.148 0.273310
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 101.5 on 12 degrees of freedom
## Multiple R-squared: 0.7941, Adjusted R-squared: 0.7427
## F-statistic: 15.43 on 3 and 12 DF, p-value: 0.0002025
crPlots(gdm1)
The General dynamic model of island biogeography (GDM) applied: just Hawaii (part II)
Species = log(Area) x log(Time) x log(Time)2
gdm2 <- lm(spec_num ~ log_area + log_age_Ma + I(log_age_Ma^2), data = Hawaii)
summary(gdm2)
##
## Call:
## lm(formula = spec_num ~ log_area + log_age_Ma + I(log_age_Ma^2),
## data = Hawaii)
##
## Residuals:
## Min 1Q Median 3Q Max
## -214.923 -29.025 2.973 48.080 140.072
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -47.47 87.33 -0.544 0.596700
## log_area 115.33 22.33 5.164 0.000235 ***
## log_age_Ma 209.23 157.76 1.326 0.209460
## I(log_age_Ma^2) -117.18 103.54 -1.132 0.279836
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 99.85 on 12 degrees of freedom
## Multiple R-squared: 0.8006, Adjusted R-squared: 0.7508
## F-statistic: 16.06 on 3 and 12 DF, p-value: 0.0001678
AIC(gdm1, gdm2)
## df AIC
## gdm1 5 198.6306
## gdm2 5 198.1210
crPlots(gdm2)
A linear mixed effects model for all 4 archipelagos
Species = log(Area) + log(Time) + log(Time)2 + (1|archipelago)
gdm_all <- lme4::glmer(spec_num ~ log_area + log_age_Ma + I(log_age_Ma^2) +
(1|archipelago), data = islands_gdm, family = poisson)
summary(gdm_all)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: poisson ( log )
## Formula: spec_num ~ log_area + log_age_Ma + I(log_age_Ma^2) + (1 | archipelago)
## Data: islands_gdm
##
## AIC BIC logLik deviance df.resid
## 1041.0 1050.2 -515.5 1031.0 41
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -7.3764 -2.7981 -0.7854 3.0592 6.8180
##
## Random effects:
## Groups Name Variance Std.Dev.
## archipelago (Intercept) 0.2325 0.4822
## Number of obs: 46, groups: archipelago, 4
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.67426 0.24472 15.01 <2e-16 ***
## log_area 0.75511 0.01443 52.32 <2e-16 ***
## log_age_Ma 0.62206 0.04338 14.34 <2e-16 ***
## I(log_age_Ma^2) -0.63398 0.03875 -16.36 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) log_ar lg_g_M
## log_area -0.163
## log_age_Ma -0.037 0.147
## I(lg_g_M^2) 0.012 -0.105 -0.853
effect_plot(gdm_all, pred = "log_area", outcome.scale = "response",
plot.points = TRUE)
effect_plot(gdm_all, pred = "log_age_Ma", outcome.scale = "response",
plot.points = TRUE)
model_performance(gdm_all, metrics = "common")
## # Indices of model performance
##
## AIC | BIC | R2 (cond.) | R2 (marg.) | ICC | RMSE
## --------------------------------------------------------------
## 1041.042 | 1050.185 | 0.997 | 0.813 | 0.986 | 66.334