back to overview    

Preparation: Open your RStudio project and download new data for today from Stud-IP (log in first) and unzip them into your project folder ’/data/. You can use getwd() to get your current working directory which is your project folder as long as you do not change it. Please install the following R-packages using install.packages() if not done yet:

 

If you want to visualize this tutorial in the viewer inside RStudio (to save space on your screen) run the following chunk of code:

install.packages("rstudioapi") # install an R-package required for this step
dir <- tempfile()
dir.create(dir)
download.file("https://gift.uni-goettingen.de/mcmmb/index.html", destfile = file.path(dir, "index.html"))
download.file("https://gift.uni-goettingen.de/mcmmb/Day5.html", destfile = file.path(dir, "Day5.html"))
htmlFile <- file.path(dir, "Day5.html")
rstudioapi::viewer(htmlFile)

Now you can conveniently copy code from the viewer into your script.
 

1. Loading data

1.1. R packages

library("dplyr")
library("terra")
library("sf")
library("RColorBrewer")
library("car")
library("ggplot2")
library("viridis")
library("cowplot")
library("tidyterra")

 

1.2. Environmental raster layers

For today’s exercise, we again use the elevation, temperature (‘bio1’), and precipitation (‘bio12’) raster layers we downloaded the first day.
Loading and visualising environmental raster layers Download the following raster layers
GMTED - Digital elevation data https://topotools.cr.usgs.gov/gmted_viewer/viewer.htm
Elevation above sea level (mn30_grd.zip)

Chelsa - Climate data http://chelsa-climate.org/
Annual mean temperature (CHELSA_bio10_01.tif)
Mean annual precipitation (CHELSA_bio10_12.tif)
 

Extract and copy the rasters into your R-project folder ../data/

# Import global digital elevation model at 30 arc seconds resolution
elev <- rast("data/mn30_grd/") + 0 # + 0 to treat raster as numeric
## |---------|---------|---------|---------|=========================================                                          
## Warning: [+] Estimated disk space needed without compression: 3GB. Available: 1
## GB.
plot(elev, main = "Elevation a.s.l. (m)", col = terrain.colors(255))

 

# Import climate data from Chelsa at 30 arc seconds resolution -->
bio1 <- rast("data/CHELSA_bio10_01.tif") # temperature*10
bio12 <- rast("data/CHELSA_bio10_12.tif") # precipitation
plot(bio1, main = "Annual mean temperature (10 x C°)", col = inferno(255))

 

1.3. Gridcell shapefile

# gridcells corresponding to palm distribution data and americas coastline
grid <- st_read("data/30min_grid_select50/30min_grid_select50%.shp")
## Reading layer `30min_grid_select50%' from data source 
##   `C:\Users\Pierre\Work\classes\macroecology\class_content\Gitlab\mcmmb\data\30min_grid_select50\30min_grid_select50%.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 6038 features and 3 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -116.0007 ymin: -35 xmax: -35.00066 ymax: 36.5
## CRS:           NA
americas <- st_read("data/americas/americas.shp")
## Reading layer `americas' from data source 
##   `C:\Users\Pierre\Work\classes\macroecology\class_content\Gitlab\mcmmb\data\americas\americas.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 1 feature and 15 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -124.7158 ymin: -55.91972 xmax: -29.84 ymax: 49.37666
## CRS:           NA
# remember to define the Coordinate Reference System (CRS)
st_crs(grid) <- 4326 # "+proj=longlat +ellps=WGS84 +no_defs"
st_crs(americas) <- 4326 # "+proj=longlat +ellps=WGS84 +no_defs"

ggplot(grid) +
  geom_sf() +
  geom_sf(data = americas, fill = NA, color = "black") +
  theme_void() +
  ylim(-40, 40)

1.4. Distribution data

Instead of loading the .csv file as in the first practicals, we directly load the RData files of palm species richness made during week 4.

load("data/species_num.RData")

2. Extracting environmental data

We here aim at getting environmental values from the different raster layers for all palm gridcells.

2.1. Preparation

First, we can crop te raster layers based on the palm gridcell shapefile. The raster layers have indeed larger extents than they need to have for our Neotropical palm data.

ext(grid)
## SpatExtent : -116.000656, -35.000656, -35, 36.5 (xmin, xmax, ymin, ymax)
elev <- crop(elev, ext(grid)) # crop raster layers to smaller extent
bio1 <- crop(bio1, ext(grid))
bio12 <- crop(bio12, ext(grid))
par(mfrow = c(1, 3))
plot(elev, main = "Elevation a.s.l. (m)", col = terrain.colors(255))
plot(bio1, main = "Annual mean temperature (10 x C°)", col = inferno(255))
plot(bio12, main = "Mean annual precipitation (mm)",
     col = colorRampPalette(brewer.pal(9, "Blues"))(255))

 

To avoid broken numbers temperature has been multiplied by 10 in the ‘bio1’ raster layer. We can undo this by dividing by 10.

bio1 <- bio1/10 # Can take a little while!

 

We have an elevation value of 0 for the sea in the elevation raster. We can set it to NA, by multiplying the layer with bio1:

elev <- elev * bio1 / bio1
par(mfrow = c(1, 1))
zoom(elev, e = ext(c(-82, -76, -6, -2)), main = "Elevation a.s.l. (m)",
     col = terrain.colors(255))
plot(st_geometry(grid), add = TRUE)

2.2. Extract values

We can now extract per grid cell environmental values for all raster layers.

bio1_values <- extract(bio1, grid[c(1:10), ])
str(bio1_values) # bio1_values is a data.frame
## 'data.frame':    36000 obs. of  2 variables:
##  $ ID             : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ CHELSA_bio10_01: num  21.3 21.5 21.6 21.8 21.9 22 22 22 22.1 22.3 ...
bio1_values$CHELSA_bio10_01[1:5] # 5 first values
## [1] 21.3 21.5 21.6 21.8 21.9
# Mean temperature across all grid cells
mean(bio1_values$CHELSA_bio10_01)
## [1] 16.71866
# Mean value per grid cell
tapply(bio1_values$CHELSA_bio10_01, bio1_values$ID, mean)
##        1        2        3        4        5        6        7        8 
## 20.53569 15.72178 13.70375 20.61917 18.68494 16.18158 14.06617 12.29625 
##        9       10 
## 18.40867 16.96864
# With dplyr
bio1_values %>%
  group_by(ID) %>%
  reframe(mean_grid = mean(CHELSA_bio10_01))
## # A tibble: 10 × 2
##       ID mean_grid
##    <dbl>     <dbl>
##  1     1      20.5
##  2     2      15.7
##  3     3      13.7
##  4     4      20.6
##  5     5      18.7
##  6     6      16.2
##  7     7      14.1
##  8     8      12.3
##  9     9      18.4
## 10    10      17.0
# Or, we directly extract the mean temperature per grid cell
bio1_mean <- extract(bio1, grid[c(1:10), ], fun = mean)
bio1_mean
##    ID CHELSA_bio10_01
## 1   1        20.53569
## 2   2        15.72178
## 3   3        13.70375
## 4   4        20.61917
## 5   5        18.68494
## 6   6        16.18158
## 7   7        14.06617
## 8   8        12.29625
## 9   9        18.40867
## 10 10        16.96864
# We can also use other functions like the standard deviation
bio1_sd <- extract(bio1, grid[c(1:10), ], fun = sd)
bio1_sd
##    ID CHELSA_bio10_01
## 1   1       1.8548872
## 2   2       3.1713750
## 3   3       2.1199026
## 4   4       1.7279599
## 5   5       1.5626781
## 6   6       2.0410778
## 7   7       2.4285270
## 8   8       0.4539963
## 9   9       2.1726744
## 10 10       1.5171841

How long would it take to extract mean values for all grid cells?

nrow(grid) # number of grid cells
## [1] 6038
nrow(grid)/10*system.time(bio1_mean <- extract(bio1, grid[c(1:10), ],
                                               fun = mean))/60
##      user    system   elapsed 
## 0.8050667 0.0000000 1.1069667
# about a minute (used to be around 20 minutes with raster package)

Note: There are faster packages for dealing with raster layers available now (velox, fasterRaster). Here, we want to make use of a workaround that also helps us top understand the data better.
 

Alternative work-around: aggregating the raster layers
Aggregate raster layer to the resolution of the palm grid and convert grid to spatial points.

The grid is at a resolution of 30 arc-minutes. Our raster layers have a resolution of:

res(bio1)
## [1] 0.008333333 0.008333333

0.008333333 arc-degree.
One arc-degree is equivalent to 3600 arc-seconds, so 0.008333333 arc-degree correspond to:

0.008333333 * 3600
## [1] 30

30 arc-seconds, which is roughly 1 km at the Equator.
1 arc-minute is 60 arc-seconds, so 30 arc-minutes represent:

30*60
## [1] 1800

1800 arc-seconds.

Since we have 30 arc-seconds resolution, we need to aggregate the raster layers by a factor of:

1800/30
## [1] 60

So, let’s aggregate our raster layers by a factor 60:

bio1_30min <- aggregate(bio1, fact = 60, fun = mean, na.rm = TRUE)
dim(bio1)
## [1] 8580 9720    1
ext(bio1)
## SpatExtent : -116.00013914485, -35.00013946885, -35.00013910885, 36.49986060515 (xmin, xmax, ymin, ymax)
dim(bio1_30min)
## [1] 143 162   1
ext(bio1_30min)
## SpatExtent : -116.00013914485, -35.00013946885, -35.00013910885, 36.49986060515 (xmin, xmax, ymin, ymax)

We also extract the grid centroids:

grid_centroids <- st_centroid(grid)
## Warning: st_centroid assumes attributes are constant over geometries
par(mfrow = c(1, 2))

zoom(bio1, e = ext(c(-82, -76, -6, -2)),
     main = "30 seconds resolution",
     col = inferno(255))
plot(st_geometry(grid), add=TRUE)

zoom(bio1_30min, e = ext(c(-82, -76, -6, -2)),
     main = "30 minutes resolution",
     col = inferno(255))
plot(st_geometry(grid), add=TRUE)
plot(st_geometry(grid_centroids), add = TRUE, pch = 3)

# 30 arc-second roughly correspond to 1 km at the Equator
# 30 arc-minutes roughly correspond to 55 kms at the Equator

Extract values based on spatial points and aggregated raster

# Temperature
temp_mean <- extract(bio1_30min, grid_centroids) # extract values
head(temp_mean)
##   ID CHELSA_bio10_01
## 1  1        20.53569
## 2  2        15.72178
## 3  3        13.70375
## 4  4        20.61917
## 5  5        18.68494
## 6  6        16.18158
# Precipitation
bio12_30min <- aggregate(bio12, fact = 60, fun = mean, na.rm = TRUE)
prec_mean <- extract(bio12_30min, grid_centroids)
head(prec_mean)
##   ID CHELSA_bio10_12
## 1  1        171.7425
## 2  2        242.4058
## 3  3        281.7183
## 4  4        163.6556
## 5  5        206.7469
## 6  6        286.4769
# Elevation range as a measure of topographic heterogeneity
elev_30min_min <- aggregate(elev, fact = 60, fun = min, na.rm = TRUE) # minimum elevation
elev_30min_max <- aggregate(elev, fact = 60, fun = max, na.rm = TRUE) # maximum elevation
elev_min <- extract(elev_30min_min, grid_centroids)
elev_max <- extract(elev_30min_max, grid_centroids)
elev_range <- elev_max$mn30_grd - elev_min$mn30_grd
elev_range[1:10]
##  [1] 1193 1671 1862 1332 1563 1552 1745  667 1517 1336

 

Write extracted values into gridcell shapefile

grid$temp <- temp_mean$CHELSA_bio10_01
grid$prec <- prec_mean$CHELSA_bio10_12
grid$elev <- elev_range

head(grid)
## Simple feature collection with 6 features and 6 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -115.0007 ymin: 35.5 xmax: -113.0007 ymax: 36.5
## Geodetic CRS:  WGS 84
##    ID AREA PORTION                       geometry     temp     prec elev
## 1 538 0.25     100 MULTIPOLYGON (((-114.0007 3... 20.53569 171.7425 1193
## 2 539 0.25     100 MULTIPOLYGON (((-113.5007 3... 15.72178 242.4058 1671
## 3 540 0.25     100 MULTIPOLYGON (((-113.0007 3... 13.70375 281.7183 1862
## 4 712 0.25     100 MULTIPOLYGON (((-114.5007 3... 20.61917 163.6556 1332
## 5 713 0.25     100 MULTIPOLYGON (((-114.0007 3... 18.68494 206.7469 1563
## 6 714 0.25     100 MULTIPOLYGON (((-113.5007 3... 16.18158 286.4769 1552

Correlation matrix:

cor(st_drop_geometry(grid[, c("temp", "prec", "elev")]))
##            temp       prec       elev
## temp  1.0000000  0.3278321 -0.5465026
## prec  0.3278321  1.0000000 -0.1371937
## elev -0.5465026 -0.1371937  1.0000000

Histogram of all extracted values:

plot_grid(nrow = 1, ncol = 3,
          ggplot(grid, aes(temp)) +
            geom_histogram(fill = "grey", color = "black", bins = 30) + 
            labs(x = "Annual mean temperature (C°)", y = "Count") +
            theme_bw(),
          ggplot(grid, aes(prec)) +
            geom_histogram(fill = "grey", color = "black", bins = 30) + 
            labs(x = "Mean annual precipitation (mm)", y = "Count") +
            theme_bw(),
          ggplot(grid, aes(elev)) +
            geom_histogram(fill = "grey", color = "black", bins = 30) + 
            labs(x = "Elevation a.s.l. (m)", y = "Count") +
            theme_bw()
)

 

3. Modelling palm species richness

3.1. Simple models

Join the species numbers (‘species_num’) to the grid shapefile including the newly extracted environmental data.

names(grid)[1] <- "grid_id" # rename ID column in grid cell shape file
grid <- left_join(grid, species_num, by = "grid_id")
head(grid)
## Simple feature collection with 6 features and 9 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -115.0007 ymin: 35.5 xmax: -113.0007 ymax: 36.5
## Geodetic CRS:  WGS 84
##   grid_id AREA PORTION     temp     prec elev spec_num      Long   Lat
## 1     538 0.25     100 20.53569 171.7425 1193        1 -114.2507 36.25
## 2     539 0.25     100 15.72178 242.4058 1671        1 -113.7507 36.25
## 3     540 0.25     100 13.70375 281.7183 1862        1 -113.2507 36.25
## 4     712 0.25     100 20.61917 163.6556 1332        1 -114.7507 35.75
## 5     713 0.25     100 18.68494 206.7469 1563        1 -114.2507 35.75
## 6     714 0.25     100 16.18158 286.4769 1552        1 -113.7507 35.75
##                         geometry
## 1 MULTIPOLYGON (((-114.0007 3...
## 2 MULTIPOLYGON (((-113.5007 3...
## 3 MULTIPOLYGON (((-113.0007 3...
## 4 MULTIPOLYGON (((-114.5007 3...
## 5 MULTIPOLYGON (((-114.0007 3...
## 6 MULTIPOLYGON (((-113.5007 3...
# saveRDS(grid, file = "data/grid_specnum_env.rds")

Join environmental data and species numbers

grid <- readRDS("data/grid_specnum_env.rds")

# Map of species richness
ggplot(grid, aes(spec_num)) +
  geom_histogram(fill = "grey", color = "black", bins = 30) + 
  labs(x = "Species number", y = "Count") +
  theme_bw()

 

Calculate 3 linear models of species richness with each environmental variable as predictor and compare their output and AIC values.

model_elev <- lm(grid$spec_num ~ grid$elev)
summary(model_elev)
## 
## Call:
## lm(formula = grid$spec_num ~ grid$elev)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -21.668 -15.351  -5.018  15.004  59.795 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 2.199e+01  2.778e-01  79.139   <2e-16 ***
## grid$elev   1.544e-04  2.553e-04   0.605    0.545    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 17.36 on 6036 degrees of freedom
## Multiple R-squared:  6.06e-05,   Adjusted R-squared:  -0.0001051 
## F-statistic: 0.3658 on 1 and 6036 DF,  p-value: 0.5453
model_temp <- lm(grid$spec_num ~ grid$temp)
summary(model_temp)
## 
## Call:
## lm(formula = grid$spec_num ~ grid$temp)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -24.915 -13.365  -5.048  13.506  61.804 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -9.22916    1.24760  -7.398 1.58e-13 ***
## grid$temp    1.36045    0.05341  25.473  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 16.49 on 6036 degrees of freedom
## Multiple R-squared:  0.09706,    Adjusted R-squared:  0.09691 
## F-statistic: 648.9 on 1 and 6036 DF,  p-value: < 2.2e-16
model_prec <- lm(grid$spec_num ~ grid$prec)
summary(model_prec)
## 
## Call:
## lm(formula = grid$spec_num ~ grid$prec)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -73.889  -7.719  -1.617   6.421  46.025 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -4.5925962  0.3373768  -13.61   <2e-16 ***
## grid$prec    0.0158535  0.0001802   87.98   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11.49 on 6036 degrees of freedom
## Multiple R-squared:  0.5619, Adjusted R-squared:  0.5618 
## F-statistic:  7740 on 1 and 6036 DF,  p-value: < 2.2e-16
AIC(model_temp, model_prec, model_elev)
##            df      AIC
## model_temp  3 50986.55
## model_prec  3 46620.50
## model_elev  3 51602.69

 

Make scatterplots for all three models and plot their regression lines.

# par(mfrow=c(1,3))
# plot(grid$spec_num ~ grid$elev, xlab="Elevation a.s.l. (m)",  ylab="Species number")
# abline(model_elev, col="red")
# plot(grid$spec_num ~ grid$temp, xlab="Annual mean temperature (C°)",  ylab="Species number")
# abline(model_temp, col="red")
# plot(grid$spec_num ~ grid$prec, xlab="Mean annual precipitation (mm)",  ylab="Species number")
# abline(model_prec, col="red")

plot_grid(nrow = 1, ncol = 3,
          ggplot(grid, aes(elev, spec_num)) +
            geom_point(color = "black", alpha = 0.3) +
            stat_smooth(method = "lm", formula = y ~ x, color = "red") +
            labs(x = "Elevation a.s.l. (m)", y = "Species number") +
            theme_bw(),
          ggplot(grid, aes(temp, spec_num)) +
            geom_point(color = "black", alpha = 0.3) +
            stat_smooth(method = "lm", formula = y ~ x, color = "red") +
            labs(x = "Annual mean temperature (C°)", y = "Species number") +
            theme_bw(),
          ggplot(grid, aes(prec, spec_num)) +
            geom_point(color = "black", alpha = 0.3) +
            stat_smooth(method = "lm", formula = y ~ x, color = "red") +
            labs(x = "Mean annual precipitation (mm)", y = "Species number") +
            theme_bw())

3.2. Multipredictor model

model_full <- lm(grid$spec_num ~ grid$temp + grid$prec + grid$elev)
summary(model_full)
## 
## Call:
## lm(formula = grid$spec_num ~ grid$temp + grid$prec + grid$elev)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -76.787  -7.195  -2.092   6.115  44.902 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -2.542e+01  1.063e+00  -23.91   <2e-16 ***
## grid$temp    8.399e-01  4.470e-02   18.79   <2e-16 ***
## grid$prec    1.513e-02  1.830e-04   82.68   <2e-16 ***
## grid$elev    4.186e-03  1.936e-04   21.62   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11.01 on 6034 degrees of freedom
## Multiple R-squared:  0.5979, Adjusted R-squared:  0.5977 
## F-statistic:  2990 on 3 and 6034 DF,  p-value: < 2.2e-16
model_temp_prec <- lm(grid$spec_num ~ grid$temp + grid$prec)
model_temp_elev <- lm(grid$spec_num ~ grid$temp + grid$elev)
model_elev_prec <- lm(grid$spec_num ~ grid$prec + grid$elev)

AIC(model_full, model_temp_prec, model_temp_elev, model_elev_prec)
##                 df      AIC
## model_full       5 46106.74
## model_temp_prec  4 46555.24
## model_temp_elev  4 50678.42
## model_elev_prec  4 46448.17

Plotting the partial residuals.

crPlots(model_full, layout = c(1, 3), ask = FALSE,
        main = "Partial residual plots", grid = FALSE,
        ylab = "Partial residuals (species richness)")

Plot species richness, model predictions and residuals

Get predictions and get residuals and join them to gridcell shapefile.

grid$pred <- predict(model_full)
grid$resi <- residuals(model_full)
head(grid)
## Simple feature collection with 6 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -115.0007 ymin: 35.5 xmax: -113.0007 ymax: 36.5
## Geodetic CRS:  +proj=longlat +ellps=WGS84 +no_defs
##   grid_id AREA PORTION     temp     prec elev spec_num      Long   Lat
## 1     538 0.25     100 20.53569 171.7425 1193        1 -114.2507 36.25
## 2     539 0.25     100 15.72178 242.4058 1671        1 -113.7507 36.25
## 3     540 0.25     100 13.70375 281.7183 1862        1 -113.2507 36.25
## 4     712 0.25     100 20.61917 163.6556 1332        1 -114.7507 35.75
## 5     713 0.25     100 18.68494 206.7469 1563        1 -114.2507 35.75
## 6     714 0.25     100 16.18158 286.4769 1552        1 -113.7507 35.75
##                         geometry        pred     resi
## 1 MULTIPOLYGON (((-114.0007 3... -0.58183214 1.581832
## 2 MULTIPOLYGON (((-113.5007 3... -1.55497702 2.554977
## 3 MULTIPOLYGON (((-113.0007 3... -1.85556411 2.855564
## 4 MULTIPOLYGON (((-114.5007 3... -0.05224960 1.052250
## 5 MULTIPOLYGON (((-114.0007 3... -0.05782585 1.057826
## 6 MULTIPOLYGON (((-113.5007 3... -1.00000873 2.000009
# Option with discrete categories
# lmt <- ceiling(10*max(abs(range(grid$resi))))/10

Plot observed species richness, model predictions and residuals.

plot_grid(
  nrow = 1, ncol = 3,
  ggplot(grid) +
    geom_sf(color = NA, aes(fill = spec_num)) +
    geom_sf(data = americas, fill = NA, color = "black") +
    labs(title = "Palm species richness") +
    scale_fill_viridis_c("Species\nnumber",
                         limits = range(c(grid$spec_num, grid$pred))) +
    theme_void() +
    theme(plot.title = element_text(margin = margin(0, 0, 10, 0))) + 
    ylim(-40, 40),
  ggplot(grid) +
    geom_sf(color = NA, aes(fill = pred)) +
    geom_sf(data = americas, fill = NA, color = "black") +
    labs(title = "Model predictions") +
    scale_fill_viridis_c("Species\nnumber",
                         limits = range(c(grid$spec_num, grid$pred))) +
    theme_void() +
    theme(plot.title = element_text(margin = margin(0, 0, 10, 0))) + 
    ylim(-40, 40),
  ggplot(grid) +
    geom_sf(color = NA, aes(fill = resi)) +
    geom_sf(data = americas, fill = NA, color = "black") +
    labs(title = "Model residuals") +
    # Option with discrete categories
    # scale_fill_stepsn("Species number", colors = brewer.pal(10, "PiYG"),
    #        n.breaks = 10, limits = c(-lmt, lmt), show.limits = TRUE) + 
    scale_fill_gradient2("Species\nnumber", low = brewer.pal(11, "PiYG")[1],
                         mid = brewer.pal(11, "PiYG")[6],
                         high = brewer.pal(11, "PiYG")[11], midpoint = 0) +
    theme_void() +
    theme(plot.title = element_text(margin = margin(0, 0, 10, 0))) + 
    ylim(-40, 40))

Run a model of species richness in dependence on latitude + latitude² and compare the AIC to our best model with all environmental predictors.

model_lat <- lm(grid$spec_num ~ abs(grid$Lat) + I(abs(grid$Lat)^2))
summary(model_lat)
## 
## Call:
## lm(formula = grid$spec_num ~ abs(grid$Lat) + I(abs(grid$Lat)^2))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -35.711  -5.179  -0.456   5.334  51.082 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        46.455629   0.360059  129.02   <2e-16 ***
## abs(grid$Lat)      -2.404688   0.054188  -44.38   <2e-16 ***
## I(abs(grid$Lat)^2)  0.031619   0.001619   19.53   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.83 on 6035 degrees of freedom
## Multiple R-squared:  0.6107, Adjusted R-squared:  0.6105 
## F-statistic:  4733 on 2 and 6035 DF,  p-value: < 2.2e-16
AIC(model_lat, model_full)
##            df      AIC
## model_lat   4 45909.33
## model_full  5 46106.74
ggplot(grid, aes(abs(Lat), spec_num)) +
  geom_point(color = "black", alpha = 0.3) +
  stat_smooth(method = "lm", formula = y ~ x + I(x^2), color = "firebrick1") +
  labs(x = "Latitude", y = "Species number") +
  theme_bw()