back to overview    

Preparation: Open your RStudio project and download new data for today from Stud-IP (log in first) and unzip them into your project folder ’/data/. You can use getwd() to locate your current working directory, which should be your project folder. Please install the following R-packages using install.packages():

 

If you want to visualize this tutorial in the viewer inside RStudio (to save space on your screen) run the following chunk of code:

install.packages("rstudioapi") # install an R-package required for this step
dir <- tempfile()
dir.create(dir)
download.file("https://gift.uni-goettingen.de/mcmmb/index.html", destfile = file.path(dir, "index.html"))
download.file("https://gift.uni-goettingen.de/mcmmb/Day6.html", destfile = file.path(dir, "Day6.html"))
htmlFile <- file.path(dir, "Day6.html")
rstudioapi::viewer(htmlFile)

Now you can conveniently copy code from the viewer into your script.
 

Load R packages & island data set

library("maps")
library("classInt")
data(worldMapEnv)
library("psych")
library("car")
library("lme4")
library("jtools")
library("performance")
library("ggplot2")
library("GGally")
library("sf")
library("rnaturalearth")
library("rnaturalearthdata")
library("ggpmisc")
library("cowplot")
library("dplyr")
library("tidyr")
library("GIFT")

 

1. Data exploration

1.1. Data extraction from GIFT database

We will download island-level angiosperm species richness and environmental data from the GIFT database (Weigelt et al. 2020) using the GIFT R-package.

# Islands in GIFT
islands <- GIFT_regions(GIFT_version = "2.1")
islands <- islands[which(islands$suit_geo == 1 & islands$entity_class == "Island"), 
                   c("entity_ID", "geo_entity")]

# Geographic characteristics
islands_geo <- GIFT_env(entity_ID = islands$entity_ID,
                        miscellaneous = c("longitude", "latitude", "area", "dist"),
                        GIFT_version = "2.1")

islands <- inner_join(islands, islands_geo, by = c("entity_ID", "geo_entity"))

# Island geology
islands_geology <- read.csv("data/islands_geology.csv")

islands <- inner_join(islands, islands_geology,
                      by = c("entity_ID", "geo_entity"))

We now retrieve the species richness per island.

# Angiosperm species richness
islands_richness <- GIFT_richness(taxon_name = "Angiospermae",
                                  GIFT_version = "2.1")
islands_richness <- islands_richness[!is.na(islands_richness$native),
                                     c("entity_ID", "native", "endemic_min")]
names(islands_richness) <- c("entity_ID", "spec_num", "end_num")

islands <- inner_join(islands, islands_richness, by = "entity_ID")

We further add other continuous environmental variables.

# Environmental data
islands_env <- GIFT_env(entity_ID = islands$entity_ID,
                        rasterlayer = c("mx30_grd", "CHELSA_bio10_1", 
                                        "CHELSA_bio10_12",
                                        "Homogeneity_01_05_1km_uint16"),
                        sumstat = list("max", "mean", "mean", "mean"),
                        GIFT_version = "2.1")

names(islands_env) <- c("entity_ID", "geo_entity", "max_elev", "mean_temp",
                        "mean_prec", "mean_homogeneity")

islands <- inner_join(islands, islands_env,
                      by = c("entity_ID", "geo_entity"))
str(islands)
## 'data.frame':    296 obs. of  14 variables:
##  $ entity_ID       : num  2 3 126 127 128 129 131 132 133 134 ...
##  $ geo_entity      : chr  "Cartier Island" "Christmas Island" "Cocos (Keeling) North Keeling Island" "Lord Howe Island" ...
##  $ longitude       : num  123.6 105.6 96.8 159.1 168 ...
##  $ latitude        : num  -12.5 -10.5 -11.8 -31.6 -29 ...
##  $ area            : num  0.0499 139.6099 3.3589 17.4658 37.8766 ...
##  $ dist            : num  290 1310 1625 570 1393 ...
##  $ age_Ma          : num  NA 20 NA 6 2.68 ...
##  $ geology         : chr  "shelf" "volcanic" "atoll" "volcanic" ...
##  $ spec_num        : num  1 200 33 212 137 50 135 172 199 192 ...
##  $ end_num         : num  0 15 0 69 34 6 0 0 0 0 ...
##  $ max_elev        : num  4 367 26 465 312 ...
##  $ mean_temp       : num  27.9 25.4 NA 19.5 19.2 ...
##  $ mean_prec       : num  1383 2178 NA 1757 1445 ...
##  $ mean_homogeneity: num  NA 0.301 NA 0.213 0.301 ...
# write.csv(islands, "data/islands.csv", row.names = FALSE)
# islands <- read.csv("data/islands.csv")

1.2. Map species richness

We first load the shapes of all countries.

# World coastlines
world <- ne_coastline(scale = "medium", returnclass = "sf")
class(world)
## [1] "sf"         "data.frame"

We also define the Mollweide CRS.

mollweide <- "+proj=moll +lon_0=0 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs "

ggplot(world) +
  geom_sf() +
  coord_sf(crs = mollweide) +
  labs(title = "",
       subtitle = "Mollweide projection") +
  theme_bw() +
  theme(panel.border = element_blank())

To combine the islands data.frame with the world map, we first need to convert it to an sf object.

# Convert islands to sf object
islands_sf <- st_as_sf(islands, coords = c("longitude", "latitude"),
                       crs = "+proj=longlat +ellps=WGS84 +no_defs")

# Map
ggplot(world) +
  geom_sf() +
  geom_sf(data = islands_sf, shape = 1, stroke = 1.5,
          aes(size = spec_num, color = spec_num)) +
  scale_color_viridis_c("Species richness", trans = "log", guide = "legend") +
  scale_size_continuous("Species richness", trans = "log") +
  coord_sf(crs = mollweide) + # coord_sf(crs = eckertIV) +
  labs(title = "Angiosperm species richness",
       subtitle = "Mollweide projection") +
  theme_bw()
## Warning in scale_color_viridis_c("Species richness", trans = "log", guide =
## "legend"): log-2.718282 transformation introduced infinite values.
## Warning in scale_size_continuous("Species richness", trans = "log"):
## log-2.718282 transformation introduced infinite values.
## Warning in sqrt(x): NaNs produced
## Warning: Removed 9 rows containing missing values or values outside the scale range
## (`geom_sf()`).

# https://community.rstudio.com/t/ggplot2-is-it-possible-to-combine-color-fill-and-size-legends/17072
Bonus: custom discrete color scale
Bonus
# Arrange islands by species richness (low to high)
islands <- islands[order(islands$spec_num), ]

# create colour gradient
my.class.fr.S <- classIntervals(
  islands$spec_num, n = 9, style = "fixed",
  fixedBreaks = c(0, 25, 50, 100, 200, 400, 800, 1600, 3200, 6400)) # bin data into n quantiles
my.pal <- c("midnightblue", "dodgerblue4", "dodgerblue2", "chartreuse4",
            "gold", "orangered1", "darkred")
my.col.fr.S <- findColours(my.class.fr.S, my.pal) # ramp colors based on classInts
my.col.fr.S[islands$spec_num < 1] <- "#666666" # assign grey for zero species

# specify dimensions of plot for saving
if(.Platform$OS.type == "windows"){ 
  windows(width=20*0.3937008, height = 12*0.3937008)
}else{
  X11(width = 20*0.3937008, height = 12*0.3937008, type = "cairo")
}
# quartz(width=20*0.3937008, height=12*0.3937008) # for Mac 

map("world", fill = FALSE)
title(main = "Angiosperm species richness")
points(islands$longitude, islands$latitude,
       pch = ifelse(islands$spec_num >= 1, 1, 4),
       cex = (log10(islands$spec_num+1)/2.6)^2 + 1, col = my.col.fr.S, lwd = 2)

if(.Platform$OS.type == "windows"){ 
  savePlot("data/islands.pdf", type = "pdf")
}else{
  dev.copy2pdf(file = "data/islands.pdf")
}

# quartz.save("Data/islands.pdf", type = "pdf") # for Mac 
dev.off()
 

1.3. Correlations among biogeographic & environmental variables

Meta-data
1. latitude: Latitude (in decimal degrees)
2. area: Island area (in km2)
3. dist: Distance to the nearest continent (in km)
4. age_MA: Island geological age (million years)
5. max_elev: Maximum elevation (m)
6. mean_temp: Mean annual temperature (degrees Celsius)
7. mean_prec: Mean annual precipitation (mm yr-1)
8. mean_homogeneity: Habitat heterogeneity (similarity of vegetation index with surrounding pixels)

abiotic <- c("latitude", "area", "dist", "age_Ma", "max_elev",
             "mean_temp", "mean_prec", "mean_homogeneity")

ggpairs(islands[, abiotic],
        upper = list(continuous = wrap(ggally_cor, digits = 1))) +
  theme_bw()

# Other way to have a pair plot:
# pairs.panels(islands[, c(4:7, 11:14)], density = FALSE, ellipses = FALSE,
#              hist.col = "white")

 

2. The species area relationship (SAR)

We here aim at linking, using the best model, island area and plant species richness.

a) Linear model

Species Richness ~ Area

SAR_lm <- lm(spec_num ~ area, data = islands)
summary(SAR_lm)
## 
## Call:
## lm(formula = spec_num ~ area, data = islands)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3566.6  -181.1  -122.6    -3.9  4054.3 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 2.083e+02  2.780e+01   7.495 7.85e-13 ***
## area        3.766e-02  2.504e-03  15.041  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 472.1 on 294 degrees of freedom
## Multiple R-squared:  0.4349, Adjusted R-squared:  0.4329 
## F-statistic: 226.2 on 1 and 294 DF,  p-value: < 2.2e-16
ggplot(islands, aes(area, spec_num)) +
  geom_point() +
  stat_smooth(method = "lm") +
  stat_poly_eq(method = "lm", formula = y ~ x,
               aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~"))) +
  labs(x = "Area (km^2)", y = "Species richness") +
  theme_bw()
## Warning: The dot-dot notation (`..eq.label..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(eq.label)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `geom_smooth()` using formula = 'y ~ x'

# plot(spec_num ~ area, data = islands)
# car::regLine(SAR_lm, col = "darkgreen")

 

b) Power law model

Species richness ~ c + Areaz

SAR_power <- nls(spec_num ~ c + area^z, data = islands,
                 start = list(c = 0, z = 1))
summary(SAR_power)
## 
## Formula: spec_num ~ c + area^z
## 
## Parameters:
##    Estimate Std. Error t value Pr(>|t|)    
## c 1.728e+02  2.584e+01   6.685 1.16e-10 ***
## z 7.164e-01  5.138e-03 139.441  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 436.4 on 294 degrees of freedom
## 
## Number of iterations to convergence: 7 
## Achieved convergence tolerance: 3.734e-06
ggplot(islands, aes(area, spec_num)) +
  geom_point() +
  geom_smooth(method = "nls",
              formula = y ~ c + x^z,
              method.args = list(start = c(c = 0, z = 1)),
              se = FALSE) +
  labs(title = "Species-area relationship",
       x = "Island area (km^2)", y = "Species number") +
  theme_bw()

Predictions of the non-linear model:

newdata <- data.frame(area = seq(min(islands$area), max(islands$area),
                                 length.out = 100))
predicted_richness_SAR_power <- predict(SAR_power, newdata = newdata)
head(predicted_richness_SAR_power)
## [1] 172.8397 326.2882 425.0141 510.0374 587.2289 659.0747
# plot(islands$area, islands$spec_num, xlab = "Island area (km)",
#      ylab = "Species number", main = "Species-area relationship")
# points(newdata$area, predicted_richness_SAR1, type = "l",
#        col = "darkred", lwd = 2)

 

c) Log-transformed variables

Make histograms of species richness and area

plot_grid(
  ggplot(islands, aes(area)) +
    geom_histogram(color = "black", fill = "grey50") +
    labs(x = "Area (km^2)", y = "Count") +
    theme_bw(),
  ggplot(islands, aes(spec_num)) +
    geom_histogram(color = "black", fill = "grey50") +
    labs(x = "Species richness", y = "Count") +
    theme_bw(),
  nrow = 1)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# par(mfrow = c(1, 2))
# hist(islands$area)
# hist(islands$spec_num)

 

Apply a log10 transformation to species richness and area, make new columns of the transformed variables

islands$log_area <- log10(islands$area + 1)
islands$log_spec_num <- log10(islands$spec_num + 1)

Plot the new histograms.

plot_grid(
  ggplot(islands, aes(log_area)) +
    geom_histogram(color = "black", fill = "grey50") +
    labs(x = "Area (km^2) log-transformed", y = "Count") +
    theme_bw(),
  ggplot(islands, aes(log_spec_num)) +
    geom_histogram(color = "black", fill = "grey50") +
    labs(x = "Species richness log-transformed", y = "Count") +
    theme_bw(),
  nrow = 1)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# par(mfrow = c(1, 2))
# hist(islands$log_area)
# hist(islands$log_spec_num)

Fit a linear model to predict log species richness using log area as a predictor.

SAR_log <- lm(log_spec_num ~ log_area, data = islands)
summary(SAR_log)
## 
## Call:
## lm(formula = log_spec_num ~ log_area, data = islands)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.15534 -0.19833  0.06126  0.35182  1.13193 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.26728    0.04906   25.83   <2e-16 ***
## log_area     0.43588    0.02618   16.65   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5175 on 294 degrees of freedom
## Multiple R-squared:  0.4853, Adjusted R-squared:  0.4836 
## F-statistic: 277.2 on 1 and 294 DF,  p-value: < 2.2e-16

Plot the results.

ggplot(islands, aes(log_area, log_spec_num)) +
  geom_point() +
  geom_smooth(method = "lm", formula = y ~ x, color = "darkgreen") +
  stat_poly_eq(method = "lm", formula = y ~ x,
               aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~"))) +
  labs(x = "Area (km^2) log-transformed",
       y = "Species richness log-transformed") +
  theme_bw()

# par(mfrow = c(1, 1))
# plot(log_spec_num ~ log_area, data = islands)
# car::regLine(SAR_log, col = "darkgreen")

Check model assumptions

par(mfrow = c(1, 2))
hist(residuals(SAR_lm))
hist(residuals(SAR_log))

 

d) Poisson generalised linear model (GLM)

SAR_poisson <- glm(spec_num ~ log_area, data = islands, family = "poisson")

# 100*(161367-43825)/161367

# Plot Poisson GLM
ggplot(islands, aes(log_area, spec_num)) +
  geom_point() +
  geom_smooth(method = "glm", formula = y ~ x,
              method.args = list(family = "poisson"),
              color = "darkblue") +
  labs(title = "GLM Poisson",
       x = "Area log-transformed (log10 km2)",
       y = "Species richness") +
  theme_bw()

# Plot Poisson GLM in log-log space
newdata_SAR_poisson <- data.frame(log_area = islands$log_area)
newdata_SAR_poisson$pred <- as.numeric(predict(SAR_poisson,
                                               newdata = newdata_SAR_poisson,
                                               type = "response"))
# Log-transforming the predictions
newdata_SAR_poisson$pred_log10 <- log10(newdata_SAR_poisson$pred + 1)

ggplot(islands, aes(log_area, log_spec_num)) +
  geom_point() +
  geom_smooth(method = "lm", formula = y ~ x, se = FALSE,
              color = "darkgreen") +
  geom_line(data = newdata_SAR_poisson, aes(log_area, pred_log10),
            color = "darkblue", linewidth = 1) +
  labs(title = "log-log Species-area relationship",
       x = "Area log-transformed (log10 km2)",
       y = "Species richness log-transformed") +
  theme_bw()

Equivalent code using base R
Bonus
par(mfrow = c(1, 1))
plot(islands$log_area, islands$log_spec_num,
     xlab = "log10 island area (log10 km)",
     ylab = "log10 species number",
     main = "log-log Species-area relationship")
car::regLine(SAR_log)

newdata_SAR_poisson <- data.frame(log_area = seq(min(islands$log_area),
                                                 max(islands$log_area),
                                                 length.out = 100))
predicted_richness_SAR_poisson <- predict(SAR_poisson, newdata = newdata_SAR_poisson,
                                          type = "response")
points(newdata_SAR_poisson$log_area, log10(predicted_richness_SAR_poisson + 1),
       type = "l", col = "purple", lwd = 2)


Now, let’s plot all models together.

# Make a data frame with all the predictions
newdata_all <- data.frame(area = islands$area,
                          log_area = islands$log_area)

newdata_all$pred_SAR_lm <- as.numeric(predict(SAR_lm,
                                              newdata = newdata_all,
                                              type = "response"))

newdata_all$pred_SAR_power <- as.numeric(predict(SAR_power,
                                                 newdata = newdata_all,
                                                 type = "response"))

newdata_all$pred_SAR_log <- as.numeric(predict(SAR_log,
                                               newdata = newdata_all,
                                               type = "response"))
newdata_all$pred_SAR_log <- (10^newdata_all$pred_SAR_log)-1

newdata_all$pred_SAR_poisson <- as.numeric(predict(SAR_poisson,
                                                   newdata = newdata_all,
                                                   type = "response"))
# newdata_all$pred_SAR_poisson_exp10 <- (10^newdata_all$pred_SAR_poisson) - 1

# we also need to reverse the log-transformation for area
# newdata_all$logarea_exp10 <- (10^newdata_all$log_area)-1

# Putting the table in a long format
newdata_all <- pivot_longer(newdata_all, cols = contains("SAR"),
                            names_to = "Model", values_to = "Predicted values")

ggplot(newdata_all, aes(area, `Predicted values`)) +
  geom_point(data = islands, aes(area, spec_num)) +
  geom_line(aes(color = Model), linewidth = 1) +
  scale_color_brewer(palette = "Set1") +
  labs(x = "Area (km^2)", y = "Species richness") +
  theme_bw()

Equivalent code using base R
Bonus
plot(islands$area, islands$spec_num, xlab = "Island area (km2)",
     ylab = "Species number", main = "Species-area relationship")

# plot the simple linear model
car::regLine(SAR_lm, col = "darkgreen")

# plot the non-linear power law model
points(newdata$area, predicted_richness_SAR_power, type = "l",
       col = "darkred", lwd = 2)

# plot the log-log space model in untransformed space
newdata_SAR_log <- data.frame(log_area = log10(seq(min(islands$area),
                                                   max(islands$area),
                                                   length.out = 100) + 1))
predicted_richness_SAR_log <- predict(SAR_log, newdata = newdata_SAR_log)
points((10^newdata_SAR_log$log_area) - 1, (10^predicted_richness_SAR_log) - 1,
       type = "l", col = "purple", lwd = 2)
points(newdata$area, (10^predicted_richness_SAR_log) - 1, type = "l",
       col = "darkgreen", lwd = 2, lty = 2)

# plot the Poisson model in untransformed space
predicted_richness_SAR_poisson <- predict(SAR_poisson,
                                          newdata = newdata_SAR_poisson,
                                          type = "response")
points((10^newdata_SAR_poisson$log_area) - 1,
       predicted_richness_SAR_poisson,
       type = "l", col = "darkblue", lwd = 2)

 

How do these models compare?

AIC(SAR_lm, SAR_power, SAR_log, SAR_poisson)
##             df        AIC
## SAR_lm       3  4489.0290
## SAR_power    3  4442.4913
## SAR_log      3   454.0032
## SAR_poisson  2 53799.0371

Warning: You cannot compare AIC values among models with different response variables or different distribution families. This means we can here only compare the AIC values of the first two models!

 

3. Multi-predictor model of island biodiversity

Apply a log10 transformation to area, elevation, precipitation, and temperature following:
Kreft, H. et al. 2008. Global diversity of island floras from a macroecological perspective. Ecology Letters 11:116-127.

islands$log_area <- log10(islands$area)
islands$log_spec_num <- log10(islands$spec_num + 1)

islands$log_max_elev <- log10(islands$max_elev + 1)
islands$log_mean_prec <- log10(islands$mean_prec)
islands$log_mean_temp <- log10(islands$mean_temp + 5)
str(islands)
## 'data.frame':    296 obs. of  19 variables:
##  $ entity_ID       : num  2 3 126 127 128 129 131 132 133 134 ...
##  $ geo_entity      : chr  "Cartier Island" "Christmas Island" "Cocos (Keeling) North Keeling Island" "Lord Howe Island" ...
##  $ longitude       : num  123.6 105.6 96.8 159.1 168 ...
##  $ latitude        : num  -12.5 -10.5 -11.8 -31.6 -29 ...
##  $ area            : num  0.0499 139.6099 3.3589 17.4658 37.8766 ...
##  $ dist            : num  290 1310 1625 570 1393 ...
##  $ age_Ma          : num  NA 20 NA 6 2.68 ...
##  $ geology         : chr  "shelf" "volcanic" "atoll" "volcanic" ...
##  $ spec_num        : num  1 200 33 212 137 50 135 172 199 192 ...
##  $ end_num         : num  0 15 0 69 34 6 0 0 0 0 ...
##  $ max_elev        : num  4 367 26 465 312 ...
##  $ mean_temp       : num  27.9 25.4 NA 19.5 19.2 ...
##  $ mean_prec       : num  1383 2178 NA 1757 1445 ...
##  $ mean_homogeneity: num  NA 0.301 NA 0.213 0.301 ...
##  $ log_area        : num  -1.302 2.145 0.526 1.242 1.578 ...
##  $ log_spec_num    : num  0.301 2.303 1.531 2.328 2.14 ...
##  $ log_max_elev    : num  0.699 2.566 1.431 2.668 2.496 ...
##  $ log_mean_prec   : num  3.14 3.34 NA 3.24 3.16 ...
##  $ log_mean_temp   : num  1.52 1.48 NA 1.39 1.38 ...

 

Simplify the geology column; change all values that are floor or volcanic to oceanic and afterwards set all values that are not atoll, fragment, oceanic or shelf to NA.

islands$geology <- as.character(islands$geology)
islands$geology[which(islands$geology == "floor" |
                        islands$geology == "volcanic")] <- "oceanic"
islands$geology[which(islands$geology %in%
                        c("floor" , "volcanic"))] <- "oceanic"

islands$geology[which(islands$geology == "floor")] <- "oceanic"
islands$geology[which(islands$geology == "volcanic")] <- "oceanic"


islands$geology[which(!islands$geology %in% c("atoll" , "fragment", "oceanic" , "shelf"))] <- NA

summary(as.factor(islands$geology))
##    atoll fragment  oceanic    shelf 
##       28       47      187       34

 

Fit a multiple linear regression to predict biodiversity patterns on islands using the following biogeographic and environmental variables: island area, distance, elevation, temperature, precipitation and island geology.

colnames(islands)[c(6, 8, 15:19)]
## [1] "dist"          "geology"       "log_area"      "log_spec_num" 
## [5] "log_max_elev"  "log_mean_prec" "log_mean_temp"
new_islands <- islands[, c(6, 8, 15:19)]
new_islands <- na.omit(new_islands) # remove observations with NAs

Multi.mod <- lm(log_spec_num ~ log_area + dist + log_max_elev +
                  log_mean_temp + log_mean_prec + geology,
                data = new_islands)

summary(Multi.mod)
## 
## Call:
## lm(formula = log_spec_num ~ log_area + dist + log_max_elev + 
##     log_mean_temp + log_mean_prec + geology, data = new_islands)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.07139 -0.21153  0.02771  0.19894  0.75164 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -1.057e+00  2.635e-01  -4.012 7.84e-05 ***
## log_area         3.543e-01  2.138e-02  16.570  < 2e-16 ***
## dist            -8.805e-05  1.450e-05  -6.073 4.38e-09 ***
## log_max_elev     8.780e-02  3.223e-02   2.725 0.006871 ** 
## log_mean_temp    1.284e+00  1.425e-01   9.005  < 2e-16 ***
## log_mean_prec    1.857e-01  5.148e-02   3.607 0.000371 ***
## geologyfragment  2.115e-01  8.248e-02   2.564 0.010908 *  
## geologyoceanic   1.153e-01  7.282e-02   1.584 0.114415    
## geologyshelf     3.241e-01  9.099e-02   3.562 0.000437 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3036 on 263 degrees of freedom
## Multiple R-squared:  0.7726, Adjusted R-squared:  0.7657 
## F-statistic: 111.7 on 8 and 263 DF,  p-value: < 2.2e-16
crPlots(Multi.mod)

 

Can we improve predictions of species richness by removing individual model terms?
Use manual backward selection to specify the best model
Hints
1. Remove missing observations from your data
2. Use ‘drop1(mod, test=“F”)’
3. Use ‘drop1(update(Multi.mod,~ . -log_max_elev), test = “F”)’

Multi.mod1 <- update(Multi.mod, ~ . -log_max_elev) 
AIC(Multi.mod, Multi.mod1)
##            df      AIC
## Multi.mod  10 134.3083
## Multi.mod1  9 139.8792

 

4. The General dynamic model of island biogeography (GDM)

# Assign archipelago names for 4 major archipelagos
islands$archipelago <- NA
islands$archipelago[which(islands$entity_ID %in% c(169:188))] <- "Hawaii"
islands$archipelago[which(islands$entity_ID %in% c(145:151))] <- "Canaries"
islands$archipelago[which(islands$entity_ID %in% c(154:168))] <- "Galapagos"
islands$archipelago[which(islands$entity_ID %in% c(131:139))] <- "Azores"
islands_gdm <- islands[!is.na(islands$archipelago),]

# Log-transforming age
islands_gdm$log_age_Ma <- log10(islands_gdm$age_Ma)

 

The General dynamic model of island biogeography (GDM) applied: just Hawaii (part I)

Species = Area x Time x Time2

# subset data
Hawaii <- islands_gdm[which(islands_gdm$archipelago == "Hawaii"), ]

# The ATT^2 model
gdm1 <- lm(spec_num ~ log_area + age_Ma + I(age_Ma^2), data = Hawaii)
summary(gdm1)
## 
## Call:
## lm(formula = spec_num ~ log_area + age_Ma + I(age_Ma^2), data = Hawaii)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -213.483  -24.157   -9.271   68.987  130.922 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -64.9936   108.1344  -0.601 0.558993    
## log_area    118.3895    24.5249   4.827 0.000414 ***
## age_Ma       21.7449    19.7480   1.101 0.292440    
## I(age_Ma^2)  -0.7983     0.6953  -1.148 0.273310    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 101.5 on 12 degrees of freedom
## Multiple R-squared:  0.7941, Adjusted R-squared:  0.7427 
## F-statistic: 15.43 on 3 and 12 DF,  p-value: 0.0002025
crPlots(gdm1)

 

The General dynamic model of island biogeography (GDM) applied: just Hawaii (part II)

Species = log(Area) x log(Time) x log(Time)2

gdm2 <- lm(spec_num ~ log_area + log_age_Ma + I(log_age_Ma^2), data = Hawaii)
summary(gdm2)
## 
## Call:
## lm(formula = spec_num ~ log_area + log_age_Ma + I(log_age_Ma^2), 
##     data = Hawaii)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -214.923  -29.025    2.973   48.080  140.072 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       -47.47      87.33  -0.544 0.596700    
## log_area          115.33      22.33   5.164 0.000235 ***
## log_age_Ma        209.23     157.76   1.326 0.209460    
## I(log_age_Ma^2)  -117.18     103.54  -1.132 0.279836    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 99.85 on 12 degrees of freedom
## Multiple R-squared:  0.8006, Adjusted R-squared:  0.7508 
## F-statistic: 16.06 on 3 and 12 DF,  p-value: 0.0001678
AIC(gdm1, gdm2)
##      df      AIC
## gdm1  5 198.6306
## gdm2  5 198.1210
crPlots(gdm2)

 

A linear mixed effects model for all 4 archipelagos

Species = log(Area) + log(Time) + log(Time)2 + (1|archipelago)

gdm_all <- lme4::glmer(spec_num ~ log_area + log_age_Ma + I(log_age_Ma^2) +
                         (1|archipelago), data = islands_gdm, family = poisson)
summary(gdm_all)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: poisson  ( log )
## Formula: spec_num ~ log_area + log_age_Ma + I(log_age_Ma^2) + (1 | archipelago)
##    Data: islands_gdm
## 
##      AIC      BIC   logLik deviance df.resid 
##   1041.0   1050.2   -515.5   1031.0       41 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -7.3764 -2.7981 -0.7854  3.0592  6.8180 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  archipelago (Intercept) 0.2325   0.4822  
## Number of obs: 46, groups:  archipelago, 4
## 
## Fixed effects:
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)      3.67426    0.24472   15.01   <2e-16 ***
## log_area         0.75511    0.01443   52.32   <2e-16 ***
## log_age_Ma       0.62206    0.04338   14.34   <2e-16 ***
## I(log_age_Ma^2) -0.63398    0.03875  -16.36   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) log_ar lg_g_M
## log_area    -0.163              
## log_age_Ma  -0.037  0.147       
## I(lg_g_M^2)  0.012 -0.105 -0.853
effect_plot(gdm_all, pred = "log_area", outcome.scale = "response",
            plot.points = TRUE)

effect_plot(gdm_all, pred = "log_age_Ma", outcome.scale = "response",
            plot.points = TRUE)

model_performance(gdm_all, metrics = "common")
## # Indices of model performance
## 
## AIC      |      BIC | R2 (cond.) | R2 (marg.) |   ICC |   RMSE
## --------------------------------------------------------------
## 1041.042 | 1050.185 |      0.997 |      0.813 | 0.986 | 66.334