Preparation: Open your RStudio project and download
new data for today from Stud-IP
(log in first) and unzip them into your project folder ’/data/. You can
use getwd() to get your current working directory which is
your project folder as long as you do not change it. Please install the
following R-packages using install.packages() if not done
yet:
If you want to visualize this tutorial in the viewer inside RStudio (to save space on your screen) run the following chunk of code:
install.packages("rstudioapi") # install an R-package required for this step
dir <- tempfile()
dir.create(dir)
download.file("https://gift.uni-goettingen.de/mcmmb/index.html", destfile = file.path(dir, "index.html"))
download.file("https://gift.uni-goettingen.de/mcmmb/Day5.html", destfile = file.path(dir, "Day5.html"))
htmlFile <- file.path(dir, "Day5.html")
rstudioapi::viewer(htmlFile)
Now you can conveniently copy code from the viewer into your
script.
library("dplyr")
library("terra")
library("sf")
library("RColorBrewer")
library("car")
library("ggplot2")
library("viridis")
library("cowplot")
library("tidyterra")
For today’s exercise, we will use again the elevation, temperature (‘bio1’), and precipitation (‘bio12’) raster layers we downloaded during the previous classes.
Download the following raster layers
GMTED - Digital elevation data https://topotools.cr.usgs.gov/gmted_viewer/viewer.htm
Elevation
above sea level (mn30_grd)
Chelsa - Climate data http://chelsa-climate.org/
Annual
mean temperature (CHELSA_bio10_01.tif)
Mean
annual precipitation (CHELSA_bio10_12.tif)
Extract and copy the rasters into your R-project folder ../data/
# Import global digital elevation model at 30 arc seconds resolution
elev <- rast("data/mn30_grd/") + 0 # + 0 to treat raster as numeric
## |---------|---------|---------|---------|=========================================
names(elev) <- "mn30_grd"
plot(elev, main = "Elevation a.s.l. (m)", col = terrain.colors(255))
# Import climate data from Chelsa at 30 arc seconds resolution -->
bio1 <- rast("data/CHELSA_bio10_01.tif") # temperature
bio12 <- rast("data/CHELSA_bio10_12.tif") # precipitation
plot(bio1, main = "Annual mean temperature (C°)", col = inferno(255))
plot(bio12, main = "Mean annual precipitations (mm)")
Let’s also load again the grid that was used to estimate the Neotropical palm distributions (Kreft, H., Sommer, J.H. & Barthlott, W. (2006) The significance of geographic range size for spatial diversity patterns in Neotropical palms. Ecography, 29, 21-30) and the shapefile representing Americas coastline.
# gridcells corresponding to palm distribution data and americas coastline
grid <- st_read("data/30min_grid_select50%.shp")
## Reading layer `30min_grid_select50%' from data source
## `P:\POSTDOC\Teaching\MCMM - Macroecology SE\MCMMB\data\30min_grid_select50%.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 6038 features and 3 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -116.0007 ymin: -35 xmax: -35.00066 ymax: 36.5
## CRS: NA
americas <- st_read("data/americas.shp")
## Reading layer `americas' from data source
## `P:\POSTDOC\Teaching\MCMM - Macroecology SE\MCMMB\data\americas.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 1 feature and 15 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -124.7158 ymin: -55.91972 xmax: -29.84 ymax: 49.37666
## CRS: NA
# remember to define the Coordinate Reference System (CRS)
st_crs(grid) <- "EPSG:4326" # "+proj=longlat +ellps=WGS84 +no_defs"
st_crs(americas) <- "EPSG:4326" # "+proj=longlat +ellps=WGS84 +no_defs"
ggplot(grid) +
geom_sf() +
geom_sf(data = americas, fill = NA, color = "black") +
theme_void() +
ylim(-40, 40)
Instead of loading the .csv file as in the previous practicals, today we directly load the RData files of palm species richness we made during Week 4. If you don’t have it already in your data folder, you can download it from StudIP here.
load("data/species_num.RData")
We here aim at getting environmental values from the different raster layers for all palm gridcells.
First, we can crop the environmental raster layers based on the palm gridcell shapefile. That’s because the raster layers have a larger extents than what we need for our Neotropical palm data (see also Day 1 section 3.2 and Day 2 section 6).
ext(grid)
## SpatExtent : -116.000656, -35.000656, -35, 36.5 (xmin, xmax, ymin, ymax)
ext(bio1)
## SpatExtent : -180.00013888885, 179.99985967115, -90.00013888885, 83.99986041515 (xmin, xmax, ymin, ymax)
elev <- crop(elev, ext(grid)) # crop raster layers to smaller extent
bio1 <- crop(bio1, ext(grid))
bio12 <- crop(bio12, ext(grid))
par(mfrow = c(1, 3))
plot(elev, main = "Elevation a.s.l. (m)", col = terrain.colors(255))
plot(bio1, main = "Annual mean temperature (C°)", col = inferno(255))
plot(bio12, main = "Mean annual precipitation (mm)",
col = colorRampPalette(brewer.pal(9, "Blues"))(255))
We have an elevation value of 0 for the sea in the elevation raster. We can set it to NA by doing a raster “math trick” i.e. multiplying the layer with bio1:
elev <- elev * bio1 / bio1
# This trick works because in R, every operations involving NA always results in NA.
#E.g. if we multiply a value of 0 (pixel in the ocean) from the elev raster by the respective pixel in bio1 (with value = NA), the resulting value will also be NA.
# Alternative way to do it:
#elev <- ifel(elev <= 0, NA, elev)
par(mfrow = c(1, 1))
zoom(elev, e = ext(c(-82, -76, -6, -2)), main = "Elevation a.s.l. (m)",
col = terrain.colors(255))
plot(st_geometry(grid), add = TRUE)
We can now extract environmental values per grid cell for all raster layers (see Day 3 section 4.2 for how to extract these values using spatial points, e.g. species occurrences).
As an example, let’s see how to do it with the first 10 cells of our grid.
bio1_values <- extract(bio1, grid[c(1:10), ])
str(bio1_values) # bio1_values is a data.frame
## 'data.frame': 36000 obs. of 2 variables:
## $ ID : num 1 1 1 1 1 1 1 1 1 1 ...
## $ CHELSA_bio10_01: num 21 21.2 21.4 21.5 21.6 ...
bio1_values$CHELSA_bio10_01[1:5] # 5 first values
## [1] 20.95 21.25 21.35 21.55 21.65
# Mean temperature across all grid cells
mean(bio1_values$CHELSA_bio10_01)
## [1] 16.53739
# Mean value per grid cell
tapply(bio1_values$CHELSA_bio10_01, bio1_values$ID, mean)
## 1 2 3 4 5 6 7 8
## 20.50053 15.60922 13.76578 20.51814 18.46900 16.06308 13.83175 11.87992
## 9 10
## 18.01908 16.71742
# With dplyr
bio1_values %>%
group_by(ID) %>%
reframe(mean_grid = mean(CHELSA_bio10_01))
## # A tibble: 10 × 2
## ID mean_grid
## <dbl> <dbl>
## 1 1 20.5
## 2 2 15.6
## 3 3 13.8
## 4 4 20.5
## 5 5 18.5
## 6 6 16.1
## 7 7 13.8
## 8 8 11.9
## 9 9 18.0
## 10 10 16.7
# Or, we directly extract the mean temperature per grid cell
bio1_mean <- extract(bio1, grid[c(1:10), ], fun = mean)
bio1_mean
## ID CHELSA_bio10_01
## 1 1 20.50053
## 2 2 15.60922
## 3 3 13.76578
## 4 4 20.51814
## 5 5 18.46900
## 6 6 16.06308
## 7 7 13.83175
## 8 8 11.87992
## 9 9 18.01908
## 10 10 16.71742
# We can also use other functions like the standard deviation
bio1_sd <- extract(bio1, grid[c(1:10), ], fun = sd)
bio1_sd
## ID CHELSA_bio10_01
## 1 1 1.9705359
## 2 2 3.2801623
## 3 3 2.2745926
## 4 4 1.8354143
## 5 5 1.6864150
## 6 6 2.0822390
## 7 7 2.6153251
## 8 8 0.5598465
## 9 9 2.1898481
## 10 10 1.4871673
How long would it take to extract mean values for all grid cells?
nrow(grid) # number of grid cells
## [1] 6038
nrow(grid)/10*system.time(bio1_mean <- extract(bio1, grid[c(1:10), ],
fun = mean))/60
## user system elapsed
## 84.43137 47.19703 129.71637
# on my (not very performant) office PC it would take about 125 minutes...
Note: There are faster packages for dealing with raster layers
available now (velox, fasterRaster). Here, we want to
make use of a workaround that also helps us top understand the data
better.
Alternative work-around: aggregating the raster
layers
Aggregate the environmental raster layer to the same resolution of the
palm grid (see also Day 1 section 3.2 on raster resolution and
aggregation).
The grid is at a resolution of 30 arc-minutes.
Our raster layers have a resolution of:
res(bio1)
## [1] 0.008333333 0.008333333
0.008333333 arc-degree.
One arc-degree is equivalent to 3600 arc-seconds, so 0.008333333
arc-degree correspond to:
0.008333333 * 3600
## [1] 30
30 arc-seconds, which is roughly 1 km at the Equator.
1 arc-minute is 60 arc-seconds, so 30 arc-minutes represent:
30*60
## [1] 1800
1800 arc-seconds.
Since we have 30 arc-seconds resolution, we need to aggregate the raster layers by a factor of:
1800/30
## [1] 60
(PS. As a reminder, you can also check this online tool: https://www.opendem.info/arc2meters.html)
So, let’s aggregate our raster layers by a factor 60:
bio1_30min <- aggregate(bio1, fact = 60, fun = mean, na.rm = TRUE)
dim(bio1)
## [1] 8580 9720 1
ext(bio1)
## SpatExtent : -116.00013914485, -35.00013946885, -35.00013910885, 36.49986060515 (xmin, xmax, ymin, ymax)
dim(bio1_30min) # many fewer cell
## [1] 143 162 1
ext(bio1_30min)
## SpatExtent : -116.00013914485, -35.00013946885, -35.00013910885, 36.49986060515 (xmin, xmax, ymin, ymax)
We also extract the grid centroids:
grid_centroids <- st_centroid(grid)
## Warning: st_centroid assumes attributes are constant over geometries
par(mfrow = c(1, 2))
zoom(bio1, e = ext(c(-82, -76, -6, -2)),
main = "30 seconds resolution",
col = inferno(255))
plot(st_geometry(grid), add=TRUE)
zoom(bio1_30min, e = ext(c(-82, -76, -6, -2)),
main = "30 minutes resolution",
col = inferno(255))
plot(st_geometry(grid), add=TRUE)
plot(st_geometry(grid_centroids), add = TRUE, pch = 3)
# 30 arc-second roughly correspond to 1 km at the Equator
# 30 arc-minutes roughly correspond to 55 kms at the Equator
Extract values based on spatial points (i.e. our centroids, in this case) and aggregated raster
# Temperature
temp_mean <- extract(bio1_30min, grid_centroids) # extract values
head(temp_mean)
## ID CHELSA_bio10_01
## 1 1 20.50053
## 2 2 15.60922
## 3 3 13.76578
## 4 4 20.51814
## 5 5 18.46900
## 6 6 16.06308
# We do that also for Precipitation
bio12_30min <- aggregate(bio12, fact = 60, fun = mean, na.rm = TRUE)
prec_mean <- extract(bio12_30min, grid_centroids)
head(prec_mean)
## ID CHELSA_bio10_12
## 1 1 171.3928
## 2 2 326.8003
## 3 3 313.6492
## 4 4 161.6453
## 5 5 202.6767
## 6 6 280.3808
# Elevation range as a measure of topographic heterogeneity
elev_30min_min <- aggregate(elev, fact = 60, fun = min, na.rm = TRUE) # minimum elevation
elev_30min_max <- aggregate(elev, fact = 60, fun = max, na.rm = TRUE) # maximum elevation
elev_min <- extract(elev_30min_min, grid_centroids)
elev_max <- extract(elev_30min_max, grid_centroids)
elev_range <- elev_max$mn30_grd - elev_min$mn30_grd
elev_range[1:10]
## [1] 1193 1671 1862 1332 1563 1552 1745 667 1517 1336
Write extracted values into gridcell shapefile
grid$temp <- temp_mean$CHELSA_bio10_01
grid$prec <- prec_mean$CHELSA_bio10_12
grid$elev <- elev_range
head(grid)
## Simple feature collection with 6 features and 6 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -115.0007 ymin: 35.5 xmax: -113.0007 ymax: 36.5
## Geodetic CRS: WGS 84
## ID AREA PORTION geometry temp prec elev
## 1 538 0.25 100 MULTIPOLYGON (((-114.0007 3... 20.50053 171.3928 1193
## 2 539 0.25 100 MULTIPOLYGON (((-113.5007 3... 15.60922 326.8003 1671
## 3 540 0.25 100 MULTIPOLYGON (((-113.0007 3... 13.76578 313.6492 1862
## 4 712 0.25 100 MULTIPOLYGON (((-114.5007 3... 20.51814 161.6453 1332
## 5 713 0.25 100 MULTIPOLYGON (((-114.0007 3... 18.46900 202.6767 1563
## 6 714 0.25 100 MULTIPOLYGON (((-113.5007 3... 16.06308 280.3808 1552
Correlation matrix:
cor(st_drop_geometry(grid[, c("temp", "prec", "elev")]))
## temp prec elev
## temp 1.0000000 0.22620347 -0.56097813
## prec 0.2262035 1.00000000 0.01117816
## elev -0.5609781 0.01117816 1.00000000
Histogram of all extracted values:
plot_grid(nrow = 1, ncol = 3,
ggplot(grid, aes(temp)) +
geom_histogram(fill = "grey", color = "black", bins = 30) +
labs(x = "Annual mean temperature (C°)", y = "Count") +
theme_bw(),
ggplot(grid, aes(prec)) +
geom_histogram(fill = "grey", color = "black", bins = 30) +
labs(x = "Mean annual precipitation (mm)", y = "Count") +
theme_bw(),
ggplot(grid, aes(elev)) +
geom_histogram(fill = "grey", color = "black", bins = 30) +
labs(x = "Elevation a.s.l. (m)", y = "Count") +
theme_bw()
)
Join environmental data and species numbers
Join the species numbers (‘species_num’) to the grid shapefile including the newly extracted environmental data.
names(grid)[1] <- "grid_id" # rename ID column in grid cell shape file
grid <- left_join(grid, species_num, by = "grid_id")
head(grid)
## Simple feature collection with 6 features and 10 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -115.0007 ymin: 35.5 xmax: -113.0007 ymax: 36.5
## Geodetic CRS: WGS 84
## grid_id AREA PORTION temp prec elev spec_num Long Lat belt
## 1 538 0.25 100 20.50053 171.3928 1193 1 -114.2507 36.24999 36.25
## 2 539 0.25 100 15.60922 326.8003 1671 1 -113.7507 36.24999 36.25
## 3 540 0.25 100 13.76578 313.6492 1862 1 -113.2507 36.24999 36.25
## 4 712 0.25 100 20.51814 161.6453 1332 1 -114.7507 35.75000 35.75
## 5 713 0.25 100 18.46900 202.6767 1563 1 -114.2507 35.75000 35.75
## 6 714 0.25 100 16.06308 280.3808 1552 1 -113.7507 35.75000 35.75
## geometry
## 1 MULTIPOLYGON (((-114.0007 3...
## 2 MULTIPOLYGON (((-113.5007 3...
## 3 MULTIPOLYGON (((-113.0007 3...
## 4 MULTIPOLYGON (((-114.5007 3...
## 5 MULTIPOLYGON (((-114.0007 3...
## 6 MULTIPOLYGON (((-113.5007 3...
# You can also save the resulting dataframe:
# saveRDS(grid, file = "data/grid_specnum_env.rds")
#grid <- readRDS("data/grid_specnum_env.rds")
# Map of species richness
ggplot(grid, aes(spec_num)) +
geom_histogram(fill = "grey", color = "black", bins = 30) +
labs(x = "Species number", y = "Count") +
theme_bw()
ggplot(grid) +
geom_sf(color = NA, aes(fill = spec_num)) +
geom_sf(data = americas, fill = NA, color = "black") +
labs(title = "Palm species richness") +
scale_fill_viridis_c("Species number") +
ylim(-55, 40) +
theme_void() +
theme(plot.title = element_text(hjust = 0.5,vjust = 10),
plot.margin = margin(1,0,0,0, "cm"))
Calculate 3 linear models of species richness with each environmental variable as predictor and compare their output and AIC values. The AIC (Akaike Information Criterion) estimates the models’ goodness-of-fit by taking in consideration the complexity and the explanation power. Usually the model with the lowest AIC value is considered the best balance of accuracy and “simplicity”.
model_elev <- lm(spec_num ~ elev, data = grid)
summary(model_elev)
##
## Call:
## lm(formula = spec_num ~ elev, data = grid)
##
## Residuals:
## Min 1Q Median 3Q Max
## -21.668 -15.351 -5.018 15.004 59.795
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.199e+01 2.778e-01 79.139 <2e-16 ***
## elev 1.544e-04 2.553e-04 0.605 0.545
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 17.36 on 6036 degrees of freedom
## Multiple R-squared: 6.061e-05, Adjusted R-squared: -0.0001051
## F-statistic: 0.3658 on 1 and 6036 DF, p-value: 0.5453
model_temp <- lm(spec_num ~ temp, data = grid)
summary(model_temp)
##
## Call:
## lm(formula = spec_num ~ temp, data = grid)
##
## Residuals:
## Min 1Q Median 3Q Max
## -25.002 -13.211 -5.178 13.441 62.713
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -10.3551 1.2186 -8.498 <2e-16 ***
## temp 1.4029 0.0519 27.031 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 16.39 on 6036 degrees of freedom
## Multiple R-squared: 0.108, Adjusted R-squared: 0.1078
## F-statistic: 730.7 on 1 and 6036 DF, p-value: < 2.2e-16
model_prec <- lm(spec_num ~ prec, data = grid)
summary(model_prec)
##
## Call:
## lm(formula = spec_num ~ prec, data = grid)
##
## Residuals:
## Min 1Q Median 3Q Max
## -56.577 -7.182 -1.580 6.264 40.064
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.1030778 0.3332687 -18.31 <2e-16 ***
## prec 0.0157354 0.0001681 93.60 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11.09 on 6036 degrees of freedom
## Multiple R-squared: 0.5921, Adjusted R-squared: 0.592
## F-statistic: 8760 on 1 and 6036 DF, p-value: < 2.2e-16
AIC(model_temp, model_prec, model_elev)
## df AIC
## model_temp 3 50913.10
## model_prec 3 46189.17
## model_elev 3 51602.69
Make scatterplots for all three models and plot their regression lines.
# par(mfrow=c(1,3))
# plot(grid$spec_num ~ grid$elev, xlab="Elevation a.s.l. (m)", ylab="Species number")
# abline(model_elev, col="red")
# plot(grid$spec_num ~ grid$temp, xlab="Annual mean temperature (C°)", ylab="Species number")
# abline(model_temp, col="red")
# plot(grid$spec_num ~ grid$prec, xlab="Mean annual precipitation (mm)", ylab="Species number")
# abline(model_prec, col="red")
plot_grid(nrow = 1, ncol = 3,
ggplot(grid, aes(elev, spec_num)) +
geom_point(color = "black", alpha = 0.3) +
stat_smooth(method = "lm", formula = y ~ x, color = "red") +
labs(x = "Elevation a.s.l. (m)", y = "Species number") +
theme_bw(),
ggplot(grid, aes(temp, spec_num)) +
geom_point(color = "black", alpha = 0.3) +
stat_smooth(method = "lm", formula = y ~ x, color = "red") +
labs(x = "Annual mean temperature (C°)", y = "Species number") +
theme_bw(),
ggplot(grid, aes(prec, spec_num)) +
geom_point(color = "black", alpha = 0.3) +
stat_smooth(method = "lm", formula = y ~ x, color = "red") +
labs(x = "Mean annual precipitation (mm)", y = "Species number") +
theme_bw())
model_full <- lm(spec_num ~ elev + temp + prec, data = grid)
summary(model_full)
##
## Call:
## lm(formula = spec_num ~ elev + temp + prec, data = grid)
##
## Residuals:
## Min 1Q Median 3Q Max
## -52.706 -6.942 -1.962 5.957 43.053
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.988e+01 1.017e+00 -29.39 <2e-16 ***
## elev 2.712e-03 1.905e-04 14.24 <2e-16 ***
## temp 1.042e+00 4.210e-02 24.76 <2e-16 ***
## prec 1.457e-02 1.669e-04 87.31 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.56 on 6034 degrees of freedom
## Multiple R-squared: 0.6297, Adjusted R-squared: 0.6295
## F-statistic: 3420 on 3 and 6034 DF, p-value: < 2.2e-16
model_temp_prec <- lm(spec_num ~ temp + prec, data = grid)
model_temp_elev <- lm(spec_num ~ temp + elev, data = grid)
model_elev_prec <- lm(spec_num ~ prec + elev, data = grid)
AIC(model_full, model_temp_prec, model_temp_elev, model_elev_prec) # The best model could be the FULL MODEL!
## df AIC
## model_full 5 45609.06
## model_temp_prec 4 45806.57
## model_temp_elev 4 50539.04
## model_elev_prec 4 46191.16
Plotting the partial residuals. A partial residual plot shows the relationship between a given independent variable and the response variable when you have also other additional independent variables in the model.
crPlots(model_full, layout = c(1, 3), ask = FALSE,
main = "Partial residual plots", grid = FALSE,
ylab = "Partial residuals (species richness)")
Plot species richness, model predictions and residuals
Get predictions and get residuals and join them to gridcell shapefile.
grid$pred <- predict(model_full)
grid$resi <- residuals(model_full)
head(grid)
## Simple feature collection with 6 features and 12 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -115.0007 ymin: 35.5 xmax: -113.0007 ymax: 36.5
## Geodetic CRS: WGS 84
## grid_id AREA PORTION temp prec elev spec_num Long Lat belt
## 1 538 0.25 100 20.50053 171.3928 1193 1 -114.2507 36.24999 36.25
## 2 539 0.25 100 15.60922 326.8003 1671 1 -113.7507 36.24999 36.25
## 3 540 0.25 100 13.76578 313.6492 1862 1 -113.2507 36.24999 36.25
## 4 712 0.25 100 20.51814 161.6453 1332 1 -114.7507 35.75000 35.75
## 5 713 0.25 100 18.46900 202.6767 1563 1 -114.2507 35.75000 35.75
## 6 714 0.25 100 16.06308 280.3808 1552 1 -113.7507 35.75000 35.75
## geometry pred resi
## 1 MULTIPOLYGON (((-114.0007 3... -2.781523 3.781523
## 2 MULTIPOLYGON (((-113.5007 3... -4.317450 5.317450
## 3 MULTIPOLYGON (((-113.0007 3... -5.912218 6.912218
## 4 MULTIPOLYGON (((-114.5007 3... -2.528229 3.528229
## 5 MULTIPOLYGON (((-114.0007 3... -3.439162 4.439162
## 6 MULTIPOLYGON (((-113.5007 3... -4.843784 5.843784
# Option with discrete categories
# lmt <- ceiling(10*max(abs(range(grid$resi))))/10
Plot observed species richness, model predictions and residuals.
plot_grid(
nrow = 1, ncol = 3,
ggplot(grid) +
geom_sf(color = NA, aes(fill = spec_num)) +
geom_sf(data = americas, fill = NA, color = "black") +
labs(title = "Palm species richness") +
scale_fill_viridis_c("Species\nnumber",
limits = range(c(grid$spec_num, grid$pred))) +
theme_void() +
theme(plot.title = element_text(margin = margin(0, 0, 10, 0))) +
ylim(-40, 40),
ggplot(grid) +
geom_sf(color = NA, aes(fill = pred)) +
geom_sf(data = americas, fill = NA, color = "black") +
labs(title = "Model predictions") +
scale_fill_viridis_c("Species\nnumber",
limits = range(c(grid$spec_num, grid$pred))) +
theme_void() +
theme(plot.title = element_text(margin = margin(0, 0, 10, 0))) +
ylim(-40, 40),
ggplot(grid) +
geom_sf(color = NA, aes(fill = resi)) +
geom_sf(data = americas, fill = NA, color = "black") +
labs(title = "Model residuals") +
# Option with discrete categories
# scale_fill_stepsn("Species number", colors = brewer.pal(10, "PiYG"),
# n.breaks = 10, limits = c(-lmt, lmt), show.limits = TRUE) +
scale_fill_gradient2("Species\nnumber", low = brewer.pal(11, "PiYG")[1],
mid = brewer.pal(11, "PiYG")[6],
high = brewer.pal(11, "PiYG")[11], midpoint = 0) +
theme_void() +
theme(plot.title = element_text(margin = margin(0, 0, 10, 0))) +
ylim(-40, 40))
Run a model of species richness in dependence on latitude + latitude² (to account for non-linear relationships between species richness and latitude i.e. latidunal gradient) and compare the AIC to our best model with all environmental predictors.
model_lat <- lm(spec_num ~ abs(Lat) + I(abs(Lat)^2), data = grid)
summary(model_lat)
##
## Call:
## lm(formula = spec_num ~ abs(Lat) + I(abs(Lat)^2), data = grid)
##
## Residuals:
## Min 1Q Median 3Q Max
## -35.711 -5.179 -0.456 5.334 51.082
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 46.455635 0.360060 129.02 <2e-16 ***
## abs(Lat) -2.404682 0.054188 -44.38 <2e-16 ***
## I(abs(Lat)^2) 0.031619 0.001619 19.53 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.83 on 6035 degrees of freedom
## Multiple R-squared: 0.6107, Adjusted R-squared: 0.6105
## F-statistic: 4733 on 2 and 6035 DF, p-value: < 2.2e-16
AIC(model_lat, model_full)
## df AIC
## model_lat 4 45909.33
## model_full 5 45609.06
ggplot(grid, aes(abs(Lat), spec_num)) +
geom_point(color = "black", alpha = 0.3) +
stat_smooth(method = "lm", formula = y ~ x + I(x^2), color = "firebrick1") +
labs(x = "Latitude", y = "Species number") +
theme_bw()