back to overview    

Preparation: Open your RStudio project and download new data for today from Stud-IP (log in first) and unzip them into your project folder ’/data/. You can use getwd() to locate your current working directory, which should be your project folder. Please install the following R-packages using install.packages():

 

If you want to visualize this tutorial in the viewer inside RStudio (to save space on your screen) run the following chunk of code:

install.packages("rstudioapi") # install an R-package required for this step
dir <- tempfile()
dir.create(dir)
download.file("https://gift.uni-goettingen.de/mcmmb/index.html", destfile = file.path(dir, "index.html"))
download.file("https://gift.uni-goettingen.de/mcmmb/Day6.html", destfile = file.path(dir, "Day6.html"))
htmlFile <- file.path(dir, "Day6.html")
rstudioapi::viewer(htmlFile)

Now you can conveniently copy code from the viewer into your script.
 

Load R packages & island data set

library(maps)
library(classInt)
data(worldMapEnv)
library(psych)
library(car)
library(lme4)
library(jtools)
library(performance)
library(ggplot2)
library(GGally)
library(sf)
library(rnaturalearth)
library(rnaturalearthdata)
library(ggpmisc)
library(cowplot)
library(dplyr)
library(tidyr)
library(GIFT)

 

1. Data extraction & exploration

1.1. Data extraction from GIFT database

We will download island-level angiosperm species richness and environmental data from the GIFT database (Weigelt et al. 2020) using the GIFT R-package.

# Islands in GIFT
islands <- GIFT_regions(GIFT_version = "2.1")
islands <- islands[which(islands$suit_geo == 1 & islands$entity_class == "Island"), 
                   c("entity_ID", "geo_entity")]

# Geographic characteristics
islands_geo <- GIFT_env(entity_ID = islands$entity_ID,
                        miscellaneous = c("longitude","latitude","area","dist"),
                        GIFT_version = "2.1")
islands_geo$geo_entity <- NULL

islands <- inner_join(islands, islands_geo, by="entity_ID")

# Island geology
islands_geology <- read.csv("data/islands_geology.csv")

islands <- inner_join(islands, islands_geology, by="entity_ID")

# Angiosperm species richness
islands_richness <- GIFT_richness(taxon_name = "Angiospermae", GIFT_version = "2.1")
islands_richness <- islands_richness[!is.na(islands_richness$native),
                                     c("entity_ID","native","endemic_min")]
names(islands_richness) <- c("entity_ID","spec_num","end_num")

islands <- inner_join(islands, islands_richness, by="entity_ID")

# Environmental data
islands_env <- GIFT_env(entity_ID = islands$entity_ID,
                        rasterlayer = c("mx30_grd", "CHELSA_bio10_1", 
                                       "CHELSA_bio10_12", "Homogeneity_01_05_1km_uint16"),
                        sumstat = list("max","mean","mean","mean"),
                        GIFT_version = "2.1")

names(islands_env) <- c("entity_ID", "geo_entity", "max_elev", "mean_temp", "mean_prec", "mean_homogeneity")
islands_env$geo_entity <- NULL

islands <- inner_join(islands, islands_env, by="entity_ID")
str(islands)
## 'data.frame':    1068 obs. of  14 variables:
##  $ entity_ID       : num  1 2 3 4 5 6 7 8 9 10 ...
##  $ geo_entity      : chr  "Ashmore Reef" "Cartier Island" "Christmas Island" "Coral Sea Islands Territory" ...
##  $ longitude       : num  123 124 106 152 114 ...
##  $ latitude        : num  -12.3 -12.5 -10.5 -19 -28.5 ...
##  $ area            : num  17.836 0.0499 139.6099 3.9508 5.9764 ...
##  $ dist            : num  344.7 290.4 1309.6 315.4 59.1 ...
##  $ age_MA          : num  NA NA 20 NA NA NA NA NA NA NA ...
##  $ geology         : chr  "shelf" "shelf" "volcanic" "atoll" ...
##  $ spec_num        : num  23 1 200 21 72 7 21 1 1 2 ...
##  $ end_num         : num  0 0 15 0 NA NA NA NA NA NA ...
##  $ max_elev        : num  12 4 367 18 16 0 10 0 0 0 ...
##  $ mean_temp       : num  27.9 27.9 25.4 24 20.8 ...
##  $ mean_prec       : num  1395 1383 2178 1085 441 ...
##  $ mean_homogeneity: num  0.296 NA 0.301 NA 0.227 ...
# write.csv(islands, "data/islands.csv", row.names = FALSE)
# islands <- read.csv("data/islands.csv")

1.2. Map species richness

With ggplot2

# World coastlines
world <- ne_coastline(scale = "medium", returnclass = "sf")
class(world)
## [1] "sf"         "data.frame"
# Other projections
eckertIV <- "+proj=eck4 +lon_0=0 +x_0=0 +y_0=0 +ellps=WGS84 +datum=WGS84 +units=m +no_defs" 
mollweide <- "+proj=moll +lon_0=0 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs "
# https://semba-blog.netlify.app/01/26/2020/world-map-and-map-projections/

# Links regarding the choice of projections
# http://www.geography.hunter.cuny.edu/~jochen/gtech361/lectures/lecture04/concepts/Map%20coordinate%20systems/Map%20projections%20and%20distortion.htm
# http://www.geo.hunter.cuny.edu/~jochen/gtech201/lectures/lec6concepts/map%20coordinate%20systems/how%20to%20choose%20a%20projection.htm

plot_grid(
  ggplot(world) +
    geom_sf() +
    labs(title = "",
         subtitle = "WGS84 projection") +
    theme_bw() +
    theme(panel.border = element_blank()),
  ggplot(world) +
    geom_sf() +
    coord_sf(crs = eckertIV) +
    labs(title = "World map",
         subtitle = "EckertIV projection") +
    theme_bw() +
    theme(panel.border = element_blank()),
  ggplot(world) +
    geom_sf() +
    coord_sf(crs = mollweide) +
    labs(title = "",
         subtitle = "Mollweide projection") +
    theme_bw() +
    theme(panel.border = element_blank()),
  nrow = 2)

To combine the islands data.frame with the world map, we first need to convert it to an sf object.

# Convert islands to sf object
islands_sf <- st_as_sf(islands, coords = c("longitude", "latitude"),
                       crs = "+proj=longlat +ellps=WGS84 +no_defs")

# Map
ggplot(world) +
  geom_sf() +
  geom_sf(data = islands_sf, shape = 1, stroke = 1.5,
          aes(size = spec_num, color = spec_num)) +
  scale_color_viridis_c("Species richness", trans = "log", guide = "legend") +
  scale_size_continuous("Species richness", trans = "log") +
  coord_sf(crs = mollweide) + # coord_sf(crs = eckertIV) +
  labs(title = "Angiosperm species richness",
       subtitle = "Mollweide projection") +
  theme_bw()
## Warning in scale_color_viridis_c("Species richness", trans = "log", guide =
## "legend"): log-2.718282 transformation introduced infinite values.
## Warning in scale_size_continuous("Species richness", trans = "log"):
## log-2.718282 transformation introduced infinite values.
## Warning in sqrt(x): NaNs produced
## Warning: Removed 52 rows containing missing values or values outside the scale range
## (`geom_sf()`).

# https://community.rstudio.com/t/ggplot2-is-it-possible-to-combine-color-fill-and-size-legends/17072
Bonus: custom discrete color scale
Bonus
# Arrange islands by species richness (low to high)
islands <- islands[order(islands$spec_num), ]

# create colour gradient
my.class.fr.S <- classIntervals(
  islands$spec_num, n = 9, style = "fixed",
  fixedBreaks = c(0, 25, 50, 100, 200, 400, 800, 1600, 3200, 6400)) # bin data into n quantiles
my.pal <- c("midnightblue", "dodgerblue4", "dodgerblue2", "chartreuse4",
            "gold", "orangered1", "darkred")
my.col.fr.S <- findColours(my.class.fr.S, my.pal) # ramp colors based on classInts
my.col.fr.S[islands$spec_num < 1] <- "#666666" # assign grey for zero species

# specify dimensions of plot for saving
if(.Platform$OS.type == "windows"){ 
  windows(width=20*0.3937008, height=12*0.3937008)
}else{
  X11(width=20*0.3937008, height=12*0.3937008,type="cairo")
}
# quartz(width=20*0.3937008, height=12*0.3937008) # for Mac 

map("world", fill = FALSE)
title(main = "Angiosperm species richness")
points(islands$longitude, islands$latitude,
       pch=ifelse(islands$spec_num >= 1,1,4),
       cex = (log10(islands$spec_num+1)/2.6)^2+1, col = my.col.fr.S, lwd=2)

if(.Platform$OS.type == "windows"){ 
  savePlot("data/islands.pdf", type = "pdf")
}else{
  dev.copy2pdf(file = "data/islands.pdf")
}

# quartz.save("Data/islands.pdf", type = "pdf") # for Mac 
dev.off()
 

1.3. Correlations among biogeographic & environmental variables

Meta-data
1. latitude: Latitude (in decimal degrees)
2. area: Island area (in km2)
3. dist: Distance to the nearest continent (in km)
4. age_MA: Island geological age (million years)
5. max_elev: Maximum elevation (m)
6. mean_temp: Mean annual temperature (degrees Celsius)
7. mean_prec: Mean annual precipitation (mm yr-1)
8. mean_homogeneity: Habitat heterogeneity (similarity of vegetation index with surrounding pixels)

abiotic <- c("latitude", "area", "dist", "age_MA", "max_elev",
             "mean_temp", "mean_prec", "mean_homogeneity")

ggpairs(islands[, abiotic],
        upper = list(continuous = wrap(ggally_cor, digits = 1))) +
  theme_bw()

# Other way to have a pair plot:
# pairs.panels(islands[, c(4:7, 11:14)],
#              density = FALSE, ellipses = FALSE,
#              hist.col = "white")

 

2. The species area relationship (SAR)

We here aim at linking, using the best model, island area and plant species richness.

a) Linear model

Species Richness ~ Area

SAR0 <- lm(spec_num ~ area, data = islands)
summary(SAR0)
## 
## Call:
## lm(formula = spec_num ~ area, data = islands)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2659.4  -150.1  -121.6   -23.7  4186.3 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1.601e+02  1.368e+01   11.70   <2e-16 ***
## area        1.670e-02  3.781e-04   44.16   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 444.6 on 1066 degrees of freedom
## Multiple R-squared:  0.6466, Adjusted R-squared:  0.6463 
## F-statistic:  1951 on 1 and 1066 DF,  p-value: < 2.2e-16
ggplot(islands, aes(area, spec_num)) +
  geom_point() +
  stat_smooth(method = "lm") +
  stat_poly_eq(method = "lm", formula = y ~ x,
               aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~"))) +
  labs(x = "Area (km^2)", y = "Species richness") +
  theme_bw()
## Warning: The dot-dot notation (`..eq.label..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(eq.label)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `geom_smooth()` using formula = 'y ~ x'

# plot(spec_num ~ area, data = islands)
# car::regLine(SAR0, col = "darkgreen")

 

b) Non-linear model

Species richness ~ c + Areaz

SAR1 <- nls(spec_num ~ c + area^z, data = islands,
            start = list(c = 0, z = 1))
summary(SAR1)
## 
## Formula: spec_num ~ c + area^z
## 
## Parameters:
##    Estimate Std. Error t value Pr(>|t|)    
## c 1.256e+02  1.252e+01   10.03   <2e-16 ***
## z 6.854e-01  1.565e-03  437.92   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 405.3 on 1066 degrees of freedom
## 
## Number of iterations to convergence: 8 
## Achieved convergence tolerance: 1.19e-07
ggplot(islands, aes(area, spec_num)) +
  geom_point() +
  geom_smooth(method = "nls",
              formula = y ~ c + x^z,
              method.args = list(start = c(c = 0, z = 1)),
              se = FALSE) +
  labs(title = "Species-area relationship",
       x = "Island area (km^2)", y = "Species number") +
  theme_bw()

# Predictions
newdata <- data.frame(area = seq(min(islands$area), max(islands$area),
                                 length.out = 100))
predicted_richness_SAR1 <- predict(SAR1, newdata = newdata)
head(predicted_richness_SAR1)
## [1]  125.5888  577.4786  852.2704 1085.0528 1294.1615 1487.2630
# plot(islands$area, islands$spec_num, xlab = "Island area (km)",
#      ylab = "Species number", main = "Species-area relationship")
# points(newdata$area, predicted_richness_SAR1, type = "l",
#        col = "darkred", lwd = 2)

 

c) Exercise I

  1. Make histograms of species richness and area
  2. Apply a log10 transformation to species richness and area, make new columns of the transformed variables, and plot histograms of them
  3. Fit a linear model to log predict species richness using log area
  4. Make a plot of the results

 

Exercise I solutions:

Make histograms of species richness and area

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

 

Apply a log10 transformation to species richness and area, make new columns of the transformed variables in islands data.frame, and plot histograms of them
&
Fit a linear model to predict species richness using area

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## 
## Call:
## lm(formula = log_spec_num ~ log_area, data = islands)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.13829 -0.32555  0.05655  0.41166  1.33863 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.16570    0.02169   53.74   <2e-16 ***
## log_area     0.47737    0.01389   34.37   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5602 on 1066 degrees of freedom
## Multiple R-squared:  0.5256, Adjusted R-squared:  0.5252 
## F-statistic:  1181 on 1 and 1066 DF,  p-value: < 2.2e-16

Check model assumptions

 

d) Poisson generalised linear model (GLM)

SAR3 <- glm(spec_num ~ log_area, data = islands, family = "poisson")

# 100*(161367-43825)/161367

# Plot Poisson GLM
ggplot(islands, aes(log_area, spec_num)) +
  geom_point() +
  geom_smooth(method = "glm", formula = y ~ x,
              method.args = list(family = "poisson"),
              color = "darkblue") +
  labs(title = "GLM Poisson",
       x = "Area log-transformed (log10 km2)",
       y = "Species richness") +
  theme_bw()

# Plot Poisson GLM in log-log space
newdata_SAR3 <- data.frame(log_area = islands$log_area)
newdata_SAR3$pred <- as.numeric(predict(SAR3,
                                        newdata = newdata_SAR3,
                                        type = "response"))
# Log-transforming the predictions
newdata_SAR3$pred_log10 <- log10(newdata_SAR3$pred + 1)

ggplot(islands, aes(log_area, log_spec_num)) +
  geom_point() +
  geom_smooth(method = "lm", formula = y ~ x, se = FALSE,
              color = "darkgreen") +
  geom_line(data = newdata_SAR3, aes(log_area, pred_log10),
            color = "darkblue", size = 1) +
  labs(title = "log-log Species-area relationship",
       x = "Area log-transformed (log10 km2)",
       y = "Species richness log-transformed") +
  theme_bw()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

In base R, the equivalent would be:

plot(islands$log_area, islands$log_spec_num,
     xlab = "log10 island area (log10 km)",
     ylab = "log10 species number",
     main = "log-log Species-area relationship")
car::regLine(SAR2)

newdata_SAR3 <- data.frame(log_area = seq(min(islands$log_area),
                                          max(islands$log_area),
                                          length.out = 100))
predicted_richness_SAR3 <- predict(SAR3, newdata = newdata_SAR3,
                                   type = "response")
points(newdata_SAR3$log_area, log10(predicted_richness_SAR3 + 1),
       type = "l", col = "purple", lwd = 2)

Now, let’s plot all models together.

# Make a data frame with all the predictions
newdata_all <- data.frame(area = islands$area,
                          log_area = islands$log_area)

newdata_all$pred_SAR0 <- as.numeric(predict(SAR0,
                                            newdata = newdata_all,
                                            type = "response"))

newdata_all$pred_SAR1 <- as.numeric(predict(SAR1,
                                            newdata = newdata_all,
                                            type = "response"))

newdata_all$pred_SAR2 <- as.numeric(predict(SAR2,
                                            newdata = newdata_all,
                                            type = "response"))
newdata_all$pred_SAR2 <- (10^newdata_all$pred_SAR2)-1

newdata_all$pred_SAR3 <- as.numeric(predict(SAR3,
                                            newdata = newdata_all,
                                            type = "response"))
# newdata_all$pred_SAR3_exp10 <- (10^newdata_all$pred_SAR3) - 1

# we also need to reverse the log-transformation for area
# newdata_all$logarea_exp10 <- (10^newdata_all$log_area)-1

# Putting the table in a long format
newdata_all <- pivot_longer(newdata_all, cols = contains("SAR"), names_to = "Model", values_to = "Predicted values")

ggplot(newdata_all, aes(area, `Predicted values`)) +
  geom_point(data = islands, aes(area, spec_num)) +
  geom_line(aes(color = Model), size = 1) +
  scale_color_brewer(palette = "Set1") +
  labs(x = "Area (km^2)", y = "Species richness") +
  theme_bw()

With base R, the code would be like this:

plot(islands$area, islands$spec_num, xlab = "Island area (km?)",
     ylab = "Species number", main = "Species-area relationship")

# plot the simple linear model
car::regLine(SAR0, col="darkgreen")

# plot the non-linear power law model
points(newdata$area, predicted_richness_SAR1, type = "l",
       col = "darkred", lwd = 2)

# plot the log-log space model in untransformed space
newdata_SAR2 <- data.frame(log_area = log10(seq(min(islands$area),
                                                max(islands$area),
                                                length.out = 100) + 1))
predicted_richness_SAR2 <- predict(SAR2, newdata = newdata_SAR2)
points((10^newdata_SAR2$log_area) - 1, (10^predicted_richness_SAR2) - 1,
       type = "l", col = "purple", lwd = 2)
points(newdata$area, (10^predicted_richness_SAR2) - 1, type = "l",
       col = "darkgreen", lwd = 2, lty = 2)

# plot the Poisson model in untransformed space
predicted_richness_SAR3 <- predict(SAR3, newdata = newdata_SAR3,
                                   type = "response")
points((10^newdata_SAR3$log_area) - 1, predicted_richness_SAR3,
       type = "l", col="darkblue", lwd=2)

 

How do these models compare?

AIC(SAR0, SAR1, SAR2, SAR3)
##      df        AIC
## SAR0  3  16058.205
## SAR1  3  15860.696
## SAR2  3   1796.987
## SAR3  2 158389.922

Warning: You cannot compare AIC values among models with different response variables or different distribution families. This means we can here only compare the AIC values of the first two models!

 

3. Multi-predictor model of island biodiversity

a) Exercise II

  1. Apply a log10 transformation to area, elevation, precipitation, and temperature following:
    Kreft, H. et al. 2008. Global diversity of island floras from a macroecological perspective. Ecology Letters 11:116-127.

  2. Simplify the geology column; change all values that are floor or volcanic to oceanic and afterwards set all values that are not atoll, fragment, oceanic or shelf to NA.

  3. Fit a multiple linear regression to predict biodiversity patterns on islands using the following biogeographic and environmental variables: island area, distance, elevation, temperature, precipitation and island geology.

  4. Use manual backward selection to specify the best model

Hints
1. Remove missing observations from your data
2. Use ‘drop1(mod, test=“F”)’
3. Use ‘drop1(update(Multi.mod,~ . -log_max_elev), test = “F”)’

 

Solutions to Exercise II

Transform your data

## Warning: NaNs produced
## 'data.frame':    1068 obs. of  19 variables:
##  $ entity_ID       : num  1 2 3 4 5 6 7 8 9 10 ...
##  $ geo_entity      : chr  "Ashmore Reef" "Cartier Island" "Christmas Island" "Coral Sea Islands Territory" ...
##  $ longitude       : num  123 124 106 152 114 ...
##  $ latitude        : num  -12.3 -12.5 -10.5 -19 -28.5 ...
##  $ area            : num  17.836 0.0499 139.6099 3.9508 5.9764 ...
##  $ dist            : num  344.7 290.4 1309.6 315.4 59.1 ...
##  $ age_MA          : num  NA NA 20 NA NA NA NA NA NA NA ...
##  $ geology         : chr  "shelf" "shelf" "volcanic" "atoll" ...
##  $ spec_num        : num  23 1 200 21 72 7 21 1 1 2 ...
##  $ end_num         : num  0 0 15 0 NA NA NA NA NA NA ...
##  $ max_elev        : num  12 4 367 18 16 0 10 0 0 0 ...
##  $ mean_temp       : num  27.9 27.9 25.4 24 20.8 ...
##  $ mean_prec       : num  1395 1383 2178 1085 441 ...
##  $ mean_homogeneity: num  0.296 NA 0.301 NA 0.227 ...
##  $ log_area        : num  1.251 -1.302 2.145 0.597 0.776 ...
##  $ log_spec_num    : num  1.38 0.301 2.303 1.342 1.863 ...
##  $ log_max_elev    : num  1.114 0.699 2.566 1.279 1.23 ...
##  $ log_mean_prec   : num  3.14 3.14 3.34 3.04 2.64 ...
##  $ log_mean_temp   : num  1.52 1.52 1.48 1.46 1.41 ...

 

Simplify the geology column

##    atoll fragment  oceanic    shelf     NA's 
##       88       86      277      272      345

 

Fit multiple regression model

## [1] "dist"          "geology"       "log_area"      "log_spec_num" 
## [5] "log_max_elev"  "log_mean_prec" "log_mean_temp"
## 
## Call:
## lm(formula = log_spec_num ~ log_area + dist + log_max_elev + 
##     log_mean_temp + log_mean_prec + geology, data = new_islands)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.16507 -0.17425  0.01397  0.21620  0.78886 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -8.610e-01  1.594e-01  -5.401 9.39e-08 ***
## log_area         3.190e-01  1.192e-02  26.766  < 2e-16 ***
## dist            -1.190e-04  1.027e-05 -11.589  < 2e-16 ***
## log_max_elev     1.185e-01  2.328e-02   5.092 4.67e-07 ***
## log_mean_temp    8.467e-01  6.948e-02  12.187  < 2e-16 ***
## log_mean_prec    3.377e-01  3.669e-02   9.202  < 2e-16 ***
## geologyfragment  2.099e-01  5.517e-02   3.804 0.000156 ***
## geologyoceanic   1.347e-01  4.659e-02   2.892 0.003958 ** 
## geologyshelf     3.488e-01  5.213e-02   6.692 4.85e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3042 on 636 degrees of freedom
## Multiple R-squared:  0.8248, Adjusted R-squared:  0.8226 
## F-statistic: 374.4 on 8 and 636 DF,  p-value: < 2.2e-16

 

Can we improve predictions of species richness by removing individual model terms?

##            df      AIC
## Multi.mod  10 305.9918
## Multi.mod1  9 329.7655

 

4. The General dynamic model of island biogeography (GDM)

# Assign archipelago names for 4 major archipelagos
islands$archipelago <- NA
islands$archipelago[which(islands$entity_ID %in% c(169:188))] <- "Hawaii"
islands$archipelago[which(islands$entity_ID %in% c(145:151))] <- "Canaries"
islands$archipelago[which(islands$entity_ID %in% c(154:168))] <- "Galapagos"
islands$archipelago[which(islands$entity_ID %in% c(131:139))] <- "Azores"
islands_gdm <- islands[!is.na(islands$archipelago),]

# Log-transforming age
islands_gdm$log_age_MA <- log10(islands_gdm$age_MA)

 

The General dynamic model of island biogeography (GDM) applied: just Hawaii (part I)

Species = Area x Time x Time2

# subset data
Hawaii <- islands_gdm[which(islands_gdm$archipelago == "Hawaii"), ]

# The ATT^2 model
gdm1 <- lm(spec_num ~ log_area + age_MA + I(age_MA^2), data = Hawaii)
summary(gdm1)
## 
## Call:
## lm(formula = spec_num ~ log_area + age_MA + I(age_MA^2), data = Hawaii)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -186.217  -36.257   -7.487   54.782  132.712 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  17.8573    75.5422   0.236    0.817    
## log_area    101.6453    18.4843   5.499 7.84e-05 ***
## age_MA        6.9123    14.9556   0.462    0.651    
## I(age_MA^2)  -0.3382     0.5654  -0.598    0.559    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 99.95 on 14 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.7819, Adjusted R-squared:  0.7352 
## F-statistic: 16.74 on 3 and 14 DF,  p-value: 6.624e-05
crPlots(gdm1)

 

The General dynamic model of island biogeography (GDM) applied: just Hawaii (part II)

Species = log(Area) x log(Time) x log(Time)2

gdm2 <- lm(spec_num ~ log_area + log_age_MA + I(log_age_MA^2), data = Hawaii)
summary(gdm2)
## 
## Call:
## lm(formula = spec_num ~ log_area + log_age_MA + I(log_age_MA^2), 
##     data = Hawaii)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -202.253  -32.019    0.423   44.757  129.135 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       -5.993     67.654  -0.089    0.931    
## log_area         104.969     17.780   5.904 3.84e-05 ***
## log_age_MA       178.602    146.703   1.217    0.244    
## I(log_age_MA^2) -128.441     99.082  -1.296    0.216    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 96.45 on 14 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.797,  Adjusted R-squared:  0.7534 
## F-statistic: 18.32 on 3 and 14 DF,  p-value: 4.054e-05
AIC(gdm1, gdm2)
##      df      AIC
## gdm1  5 222.3272
## gdm2  5 221.0441
crPlots(gdm2)

 

A linear mixed effects model for all 4 archipelagos

Species = log(Area) + log(Time) + log(Time)2 + (1|archipelago)

gdm_all <- lme4::glmer(spec_num ~ log_area + log_age_MA + I(log_age_MA^2) +
                         (1|archipelago), data = islands_gdm, family = poisson)
summary(gdm_all)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: poisson  ( log )
## Formula: spec_num ~ log_area + log_age_MA + I(log_age_MA^2) + (1 | archipelago)
##    Data: islands_gdm
## 
##      AIC      BIC   logLik deviance df.resid 
##   1061.6   1070.9   -525.8   1051.6       43 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -7.2215 -2.5559 -0.9946  2.9694  6.9950 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  archipelago (Intercept) 0.2331   0.4828  
## Number of obs: 48, groups:  archipelago, 4
## 
## Fixed effects:
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)      3.63903    0.24484   14.86   <2e-16 ***
## log_area         0.76793    0.01397   54.96   <2e-16 ***
## log_age_MA       0.62899    0.04339   14.50   <2e-16 ***
## I(log_age_MA^2) -0.64479    0.03853  -16.73   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) log_ar lg__MA
## log_area    -0.158              
## log_age_MA  -0.036  0.141       
## I(lg__MA^2)  0.009 -0.091 -0.855
effect_plot(gdm_all, pred = "log_area", outcome.scale = "response", plot.points = TRUE)

effect_plot(gdm_all, pred = "log_age_MA", outcome.scale = "response", plot.points = TRUE)

model_performance(gdm_all, metrics = "common")
## # Indices of model performance
## 
## AIC      |      BIC | R2 (cond.) | R2 (marg.) |   ICC |   RMSE
## --------------------------------------------------------------
## 1061.567 | 1070.923 |      0.997 |      0.823 | 0.982 | 65.521