Preparation: Please open your RStudio project and
download all new data for the class from Stud-IP
(log in first) and unzip them into your project folder /data/. You can
use getwd()
to get your current working directory which is
your project folder as long as you do not change it. Please install the
following R-packages using install.packages()
if not done
yet:
sf
terra
dplyr
ggplot2
BIEN
ROCR
performance
And load them into your console:
library("dplyr")
library("ggplot2")
library("sf")
library("terra")
library("BIEN")
library("ROCR")
library("performance")
If you want to visualize this tutorial in the viewer inside RStudio (to save space on your screen) run the following chunk of code:
install.packages("rstudioapi") # install an R-package required for this step
dir <- tempfile()
dir.create(dir)
download.file("https://gift.uni-goettingen.de/mcmmb/index.html",
destfile = file.path(dir, "index.html"))
download.file("https://gift.uni-goettingen.de/mcmmb/Day3.html",
destfile = file.path(dir, "Day3.html"))
htmlFile <- file.path(dir, "Day3.html")
rstudioapi::viewer(htmlFile)
Now you can conveniently copy code from the viewer into your
script.
Load palm data set
For today’s exercise, we will continue to use the Neotropical palm data set from Kreft, H., Sommer, J.H. & Barthlott, W. (2006) The significance of geographic range size for spatial diversity patterns in Neotropical palms. Ecography, 29, 21-30.
species <- read.csv("data/palms_species_per_gridcell.csv")
Data inspection
head(species)
## grid_id spp_Id GENUS EPITHET
## 1 13735 550 Ammandra natalia
## 2 13736 550 Ammandra natalia
## 3 13737 550 Ammandra natalia
## 4 13738 550 Ammandra natalia
## 5 13910 550 Ammandra natalia
## 6 13911 550 Ammandra natalia
str(species)
## 'data.frame': 140860 obs. of 4 variables:
## $ grid_id: int 13735 13736 13737 13738 13910 13911 13912 13913 13914 14085 ...
## $ spp_Id : int 550 550 550 550 550 550 550 550 550 550 ...
## $ GENUS : chr "Ammandra" "Ammandra" "Ammandra" "Ammandra" ...
## $ EPITHET: chr "natalia" "natalia" "natalia" "natalia" ...
There is no species column so make one by combining the columns Genus and Epithet:
species$species <- paste(species$GENUS, species$EPITHET, sep = " ")
Create a subset of the palm data set (4 palm
species) From now on, we will only work with the following
species:
- Geonoma pauciflora
- Geonoma weberbaueri which is actually a synonym of
Geonoma undata
- Phytelephas tenuicaulis
- Mauritia flexuosa
species[which(species$species == "Geonoma weberbaueri"), "species"] <- "Geonoma undata"
subset_sp <- c("Geonoma pauciflora", "Geonoma undata",
"Phytelephas tenuicaulis", "Mauritia flexuosa")
species <- species[which(species$species %in% subset_sp),]
unique(species$species) # list species in data set
## [1] "Phytelephas tenuicaulis" "Geonoma undata"
## [3] "Geonoma pauciflora" "Mauritia flexuosa"
Load GIS shapefile & join it to species distribution data
grid <- st_read("data/30min_grid_select50/30min_grid_select50%.shp")
## Reading layer `30min_grid_select50%' from data source
## `C:\Users\Pierre\Work\classes\macroecology\class_content\Gitlab\mcmmb\data\30min_grid_select50\30min_grid_select50%.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 6038 features and 3 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -116.0007 ymin: -35 xmax: -35.00066 ymax: 36.5
## CRS: NA
st_crs(grid)
## Coordinate Reference System: NA
The coordinate reference system (CRS) is not defined. It is actually based on the latitude/longitude system.
# st_crs(grid) <- "+proj=longlat +ellps=WGS84 +no_defs"
# or if we use the unique EPSG identifier
# st_crs(grid) <- 3857 # https://epsg.io/3857
st_crs(grid) <- 4326
# Shape file showing the borders of the Neotropics
americas <- st_read("data/americas/americas.shp")
## Reading layer `americas' from data source
## `C:\Users\Pierre\Work\classes\macroecology\class_content\Gitlab\mcmmb\data\americas\americas.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 1 feature and 15 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -124.7158 ymin: -55.91972 xmax: -29.84 ymax: 49.37666
## CRS: NA
# st_crs(americas) <- 3857
st_crs(americas) <- 4326
We can now merge the spatial grid with the species table. Remember to put the spatial table on the left part, not to lose its spatial component.
colnames(grid)[1] <- "grid_id"
grid_species <- left_join(grid, species, by = c("grid_id"))
For today’s practical, we will use occurrence data downloaded from BIEN. This file is also available via StudIP (‘Data/Data/BIEN_data.csv’).
# Download via the internet (this can take quite some time!)
# spec_occ <- BIEN_occurrence_species(c("Geonoma pauciflora", "Geonoma undata",
# "Phytelephas tenuicaulis", "Mauritia flexuosa"),
# cultivated = FALSE, new.world = TRUE)
# Or load species occurrence data from disk
spec_occ <- read.csv("data/BIEN_data.csv")
Look at the structure of the table, how many observations do we have?
str(spec_occ)
## 'data.frame': 1408 obs. of 10 variables:
## $ scrubbed_species_binomial : chr "Geonoma pauciflora" "Geonoma pauciflora" "Geonoma pauciflora" "Geonoma pauciflora" ...
## $ latitude : num -24.2 -24 -14.3 -24 -13.6 ...
## $ longitude : num -46.9 -46.8 -39.2 -46.8 -39.7 ...
## $ date_collected : chr "1987-09-03" "2001-04-16" "1994-02-13" "2001-04-17" ...
## $ datasource : chr "NY" "SpeciesLink" "GBIF" "GBIF" ...
## $ dataset : chr "NY" "UNICAMP" "USP" "Escola Superior de Agricultura Luiz de Queiroz, ESALQ" ...
## $ dataowner : chr "NY" "UNICAMP" "USP" "Escola Superior de Agricultura Luiz de Queiroz, ESALQ" ...
## $ custodial_institution_codes: chr "NY" "UNICAMP" "USP" "Escola Superior de Agricultura Luiz de Queiroz, ESALQ" ...
## $ collection_code : chr "Herbarium" "UEC" "SPF" "ESA" ...
## $ datasource_id : int 104 1030 3527 2472 3702 3199 2724 1825 2774 3165 ...
head(spec_occ)
## scrubbed_species_binomial latitude longitude date_collected datasource
## 1 Geonoma pauciflora -24.25000 -46.93300 1987-09-03 NY
## 2 Geonoma pauciflora -24.04778 -46.81833 2001-04-16 SpeciesLink
## 3 Geonoma pauciflora -14.33333 -39.16667 1994-02-13 GBIF
## 4 Geonoma pauciflora -24.04122 -46.82442 2001-04-17 GBIF
## 5 Geonoma pauciflora -13.59528 -39.72167 2001-07-26 GBIF
## 6 Geonoma pauciflora -12.86972 -39.47694 <NA> GBIF
## dataset
## 1 NY
## 2 UNICAMP
## 3 USP
## 4 Escola Superior de Agricultura Luiz de Queiroz, ESALQ
## 5 UESC
## 6 Cenargen
## dataowner
## 1 NY
## 2 UNICAMP
## 3 USP
## 4 Escola Superior de Agricultura Luiz de Queiroz, ESALQ
## 5 UESC
## 6 Cenargen
## custodial_institution_codes collection_code
## 1 NY Herbarium
## 2 UNICAMP UEC
## 3 USP SPF
## 4 Escola Superior de Agricultura Luiz de Queiroz, ESALQ ESA
## 5 UESC UESC
## 6 Cenargen CEN
## datasource_id
## 1 104
## 2 1030
## 3 3527
## 4 2472
## 5 3702
## 6 3199
Subset data set (spec_occ) to only include records with spatial coordinates
spec_occ <- spec_occ[complete.cases(spec_occ[, c("longitude",
"latitude")]), ]
# Alternative:
spec_occ <- spec_occ[which(!(is.na(spec_occ$latitude) |
is.na(spec_occ$longitude))), ]
spec_occ <- spec_occ[which(!is.na(spec_occ$latitude) &
!is.na(spec_occ$longitude)), ]
Count how many records there are for each species
table(spec_occ$scrubbed_species_binomial)
##
## Geonoma pauciflora Geonoma undata Mauritia flexuosa
## 133 814 420
## Phytelephas tenuicaulis
## 41
We need to transform the data frame of BIEN occurrences into a spatial data.frame.
spec_occ_sf <- st_as_sf(spec_occ, coords = c("longitude", "latitude"),
crs = 4326) # 3857)
We can now add the BIEN occurrences onto the previous plot.
Side note
All the point shapes can be quickly viewed by running this:
ggpubr::show_point_shapes()
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
In order to use the facet on two different datasets, we first need to rename the species column in the species occurrences dataset as in the other dataset.
spec_occ_sf$species <- spec_occ_sf$scrubbed_species_binomial
Now we can combine the species ranges derived from the spatial grid with the spatial occurrences:
ggplot(grid_species[which(grid_species$species %in% subset_sp), ]) +
geom_sf(data = grid, color = "grey90", fill = "grey90") +
geom_sf(data = americas, fill = NA) +
geom_sf(aes(fill = species), color = NA, alpha = 0.5, show.legend = FALSE) +
geom_sf(data = spec_occ_sf, aes(color = species), size = 2, shape = 20,
show.legend = FALSE) +
scale_fill_brewer(palette = "Set1") +
scale_color_brewer(palette = "Set1") +
theme_bw() +
facet_wrap(~species)
Pseudo-absences are necessary because occurrence data sets (like BIEN) only record presences and not true absences. Therefore, we must generate pseudo-absences, i.e. possible locations where a species doesn’t occur. For an overview of the different methods and recommendations, see this paper.
In this practical, we will set pseudo-absences by randomly selecting coordinates from the centroids of each grid cell.
First, we extract the centroids of all grid cells:
grid_centroids <- st_centroid(grid)
## Warning: st_centroid assumes attributes are constant over geometries
grid_centroids$present <- 0
As we can see in the next plot, we now have points located in the middle of each grid cell:
ggplot(grid) +
geom_sf() +
geom_sf(data = grid_centroids, shape = 3) +
scale_x_continuous(limits = c(-90, -76)) +
scale_y_continuous(limits = c(20, 31)) +
theme_bw()
We now create a new data.frame that contains all georeferenced occurrences for our four selected species, and the same number of pseudo-absences for each species. The pseudo-absence locations are randomly extracted from the centroids of all grid cells.
First, we add a column to the occurrence data.frame that mentions that all locations correspond to observed occurrences.
spec_occ_sf$present <- 1
Second, we bind a data.frame for each of the selected species with the same number of randomly selected pseudo-absences.
set.seed(100)
pseudo_absences_sf <-
rbind(
cbind(grid_centroids[sample(nrow(grid_centroids),
nrow(spec_occ_sf[which(spec_occ_sf$species == subset_sp[1]), ]),
replace = TRUE),
"present"],
species = subset_sp[1]),
cbind(grid_centroids[sample(nrow(grid_centroids),
nrow(spec_occ_sf[which(spec_occ_sf$species == subset_sp[2]), ]),
replace = TRUE),
"present"],
species = subset_sp[2]),
cbind(grid_centroids[sample(nrow(grid_centroids),
nrow(spec_occ_sf[which(spec_occ_sf$species == subset_sp[3]), ]),
replace = TRUE),
"present"],
species = subset_sp[3]),
cbind(grid_centroids[sample(nrow(grid_centroids),
nrow(spec_occ_sf[which(spec_occ_sf$species == subset_sp[4]), ]),
replace = TRUE),
"present"],
species = subset_sp[4])
)
sdm_sf <- rbind(spec_occ_sf[, c("species", "present")],
pseudo_absences_sf)
We can now plot the presences and generated pseudo-absences for each species:
ggplot() +
geom_sf(data = grid, color = "grey90", fill = "grey90") +
geom_sf(data = americas, fill = NA) +
geom_sf(data = sdm_sf, aes(shape = as.factor(present)), size = 2,
show.legend = FALSE) +
# scale_color_viridis_d() +
scale_shape_manual(values = c("1" = 20,
"0" = 4)) +
scale_y_continuous(limits = c(-35, 36.5)) +
theme_bw() +
facet_wrap(~species)
We will use three environmental variables: elevation, temperature
(‘bio1’), and precipitation (‘bio12’).
Please note that temperature has been multiplied by 10.
# Import global digital elevation model at 30 arc seconds resolution
elev <- rast("data/mn30_grd/") + 0 # +0 to treat raster as numeric
## |---------|---------|---------|---------|=========================================
## Warning: [+] Estimated disk space needed without compression: 3GB. Available: 2
## GB.
plot(elev, main = "Elevation (meters a.s.l.)")
# Import climate data from Chelsa at 30 arc seconds resolution -->
temp <- rast("data/CHELSA_bio10_01.tif") # temperature*10
prec <- rast("data/CHELSA_bio10_12.tif") # precipitation
plot(temp, main = "Mean annual temperature (°C x 10)")
plot(prec, main = "Mean annual precipitation (mm/year)")
For each presence and pseudo-absence, we extract the corresponding
environmental values, using the function
terra::extract
.
Before doing any extraction, we always need to make sure that our spatial objects and raster are having the same CRS:
crs(elev) == crs(sdm_sf)
## [1] FALSE
crs(temp) == crs(sdm_sf)
## [1] TRUE
crs(prec) == crs(sdm_sf)
## [1] TRUE
Now that this condition is met, we can extract the values:
sdm_sf$elev <- terra::extract(elev, sdm_sf)[, "mn30_grd"]
sdm_sf$temp <- terra::extract(temp, sdm_sf)[, "CHELSA_bio10_01"]
sdm_sf$prec <- terra::extract(prec, sdm_sf)[, "CHELSA_bio10_12"]
We can check if we obtained some missing values, and remove them:
summary(sdm_sf$elev)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 151 337 844 1695 4842
summary(sdm_sf$temp)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 22.0 171.0 238.0 213.7 252.0 288.0 3
summary(sdm_sf$prec)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 59 1358 2040 2173 2729 7898 3
# Removing the NAs
sdm_sf <- na.omit(sdm_sf)
We here use a Generalized Linear Model to model the presence of our four palm species. Since we model presence/absence data, we fit a logistic regression. When fitting a logistic regression, we use the Binomial distribution family.
SDM 1: Geonoma pauciflora
logreg_geo_pau <- glm(
present ~ elev + temp + prec,
data = sdm_sf[which(sdm_sf$species == "Geonoma pauciflora"), ],
family = "binomial")
summary(logreg_geo_pau)
##
## Call:
## glm(formula = present ~ elev + temp + prec, family = "binomial",
## data = sdm_sf[which(sdm_sf$species == "Geonoma pauciflora"),
## ])
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.850e+00 1.742e+00 1.636 0.10181
## elev -1.425e-03 5.105e-04 -2.792 0.00525 **
## temp -1.049e-02 6.980e-03 -1.502 0.13298
## prec 2.587e-05 2.003e-04 0.129 0.89721
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 367.36 on 264 degrees of freedom
## Residual deviance: 355.76 on 261 degrees of freedom
## AIC: 363.76
##
## Number of Fisher Scoring iterations: 4
SDM 2: Geonoma undata
logreg_geo_und <- glm(
present ~ elev + temp + prec,
data = sdm_sf[which(sdm_sf$species == "Geonoma undata"), ],
family = "binomial")
summary(logreg_geo_und)
##
## Call:
## glm(formula = present ~ elev + temp + prec, family = "binomial",
## data = sdm_sf[which(sdm_sf$species == "Geonoma undata"),
## ])
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.681e+00 1.870e+00 -3.573 0.000353 ***
## elev 2.505e-03 3.635e-04 6.891 5.54e-12 ***
## temp 7.139e-03 7.043e-03 1.014 0.310723
## prec 1.075e-03 9.138e-05 11.769 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2255.50 on 1626 degrees of freedom
## Residual deviance: 956.79 on 1623 degrees of freedom
## AIC: 964.79
##
## Number of Fisher Scoring iterations: 6
SDM 3: Phytelephas tenuicaulis
logreg_phy_ten <- glm(
present ~ elev + temp + prec,
data = sdm_sf[which(sdm_sf$species == "Phytelephas tenuicaulis"), ],
family = "binomial")
summary(logreg_phy_ten)
##
## Call:
## glm(formula = present ~ elev + temp + prec, family = "binomial",
## data = sdm_sf[which(sdm_sf$species == "Phytelephas tenuicaulis"),
## ])
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.827e+01 1.895e+01 -0.964 0.335
## elev 2.515e-03 3.679e-03 0.684 0.494
## temp 3.583e-02 6.934e-02 0.517 0.605
## prec 3.096e-03 7.535e-04 4.109 3.97e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 112.277 on 80 degrees of freedom
## Residual deviance: 42.656 on 77 degrees of freedom
## AIC: 50.656
##
## Number of Fisher Scoring iterations: 7
SDM 4: Mauritia flexuosa
logreg_mau_fle <- glm(
present ~ elev + temp + prec,
data = sdm_sf[which(sdm_sf$species == "Mauritia flexuosa"), ],
family = "binomial")
summary(logreg_mau_fle)
##
## Call:
## glm(formula = present ~ elev + temp + prec, family = "binomial",
## data = sdm_sf[which(sdm_sf$species == "Mauritia flexuosa"),
## ])
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.360e+01 2.151e+00 -6.323 2.57e-10 ***
## elev 1.329e-03 4.501e-04 2.954 0.00314 **
## temp 4.360e-02 7.868e-03 5.541 3.00e-08 ***
## prec 1.226e-03 1.273e-04 9.629 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1164.49 on 839 degrees of freedom
## Residual deviance: 922.65 on 836 degrees of freedom
## AIC: 930.65
##
## Number of Fisher Scoring iterations: 5
SDMs predict a continuous response, the probability of occurrence of
species. The observations are however binary (presence or absence of a
species).
Many performance measures therefore rely on comparisons like “How many
presence observations does the model correctly predict as
presence”.
To answer this, the continuous probabilities need to be converted into
binary predictions.
Depending on the threshold applied to do this, we will have different
results and positive/negative rates, as shown in the next contingency
table and in the following figures.
True positive rate is also called the Sensitivity and the true negative rate the Specificity.
The threshold chosen affect these two rates:
The most common evaluation statistic that avoids thresholding the data is AUC - the area under the receiver-operating characteristic (ROC) curve. ROC curves are generated by calculating sensitivity (true positive rate) and specificity (true negative rate) for many thresholds along the entire range of predicted probabilities. Then, (1-specificity) is plotted on the x-axis against sensitivity on the y axis. The area under this curve is called the AUC. The further the generated curve deviates from the 1:1 line towards the upper-left corner, the better the model predicts presence/absence of a species. If we would take a random presence and a random absence from our observations and make predictions, than AUC can be interpeted as the chance of assigning a higher predicted occurrence probability to the presence compared to the absence point. Typically, we regard AUC>0.7 as indicating fair predictions (Araujo et al. 2005).
We will assess classification precision of the logistic models using
ROC (Receiver Operating Characteristic Curve) and AUC (Area Under Curve)
for one species (Mauritia flexuosa). The ROC curve shows the
true positive rate (sensitivity) against the false positive rate (1 -
specificity) at various threshold values at which the species may be
considered present. The AUC is the area under the ROC curve.
p <- predict(logreg_mau_fle,
newdata = sdm_sf[which(sdm_sf$species == "Mauritia flexuosa"), ],
type = "response")
pr <- prediction(p,
sdm_sf[which(sdm_sf$species == "Mauritia flexuosa"), ]$present)
prf <- ROCR::performance(pr, measure = "tpr", x.measure = "fpr")
plot(prf, main = "Receiver Operating Characteristic Curve")
abline(a = 0, b = 1)
text(0.9, 0.75, "low threshold")
text(0.3, 0.05, "high threshold")
For the ROC, the curve should be located in the upper left of the
figure which means that at intermediate threshold values the true
positive rate is much higher than the false positive rate.
auc <- ROCR::performance(pr, measure = "auc")
auc <- auc@y.values[[1]]
auc
## [1] 0.7772279
For the AUC, the value should be greater than 0.50 (i.e. a good model
should be correct more than 50% of the time). The higher the AUC, the
better the model is at predicting 0s as 0s and 1s as 1s.
We can also assess model performance using commonly used indices,
such as R2, AIC, RMSE, LOGLOSS, and PCP.
* The Root Mean Square Error (RMSE) is the square root of the mean
squared residuals (prediction errors). Residuals are a measure of how
far from the regression line data points are.
* LOGLOSS evaluates how good or bad predicted probabilities are (high is
bad, low is good).
* PCP is percentage of correct predictions for models with binary
outcomes.
model_performance(logreg_geo_und,
metrics = c("AIC", "R2", "RMSE", "LOGLOSS", "PCP"))
## # Indices of model performance
##
## AIC | Tjur's R2 | RMSE | Log_loss | PCP
## ----------------------------------------------
## 964.788 | 0.670 | 0.281 | 0.294 | 0.835
model_performance(logreg_geo_pau,
metrics = c("AIC", "R2", "RMSE", "LOGLOSS", "PCP"))
## # Indices of model performance
##
## AIC | Tjur's R2 | RMSE | Log_loss | PCP
## ----------------------------------------------
## 363.756 | 0.040 | 0.490 | 0.671 | 0.520
model_performance(logreg_phy_ten,
metrics = c("AIC", "R2", "RMSE", "LOGLOSS", "PCP"))
## # Indices of model performance
##
## AIC | Tjur's R2 | RMSE | Log_loss | PCP
## ---------------------------------------------
## 50.656 | 0.690 | 0.272 | 0.263 | 0.845
model_performance(logreg_mau_fle,
metrics = c("AIC", "R2", "RMSE", "LOGLOSS", "PCP"))
## # Indices of model performance
##
## AIC | Tjur's R2 | RMSE | Log_loss | PCP
## ----------------------------------------------
## 930.652 | 0.253 | 0.431 | 0.549 | 0.626
More details on the evaluation of the performance of SDMs here.
Create new data for predictions
We first crop the raster layers to a smaller extent, and then aggregate
them by a factor 4 to make the operations faster.
new_extent <- ext(-97, -35, -30, 17) # crop the raster layers to smaller extent
elev <- crop(elev, new_extent) # crop raster layers to smaller extent
elev <- aggregate(elev, fact = 4, fun = mean) # aggregate data
## |---------|---------|---------|---------|=========================================
plot(elev, main = "Elevation (meters a.s.l.)")
plot(st_geometry(americas), add = TRUE)
temp <- crop(temp, new_extent)
temp <- aggregate(temp, fact = 4, fun = mean)
## |---------|---------|---------|---------|=========================================
plot(temp, main = "Mean annual temperature (°C x 10)")
plot(st_geometry(americas), add = TRUE)
prec <- crop(prec, new_extent)
prec <- aggregate(prec, fact = 4, fun = mean)
## |---------|---------|---------|---------|=========================================
plot(prec, main = "Mean annual precipitation (mm/year)")
plot(st_geometry(americas), add = TRUE)
Predict presences of each species using our previously fitted
species distribution models
We first extract values from the 3 rater layers:
new_data <- data.frame(elev = as.numeric(values(elev)),
temp = as.numeric(values(temp)),
prec = as.numeric(values(prec)))
Then, using the GLMs, we get predicted probabilities of presence for all these raster values:
# predict presences
geo_pau_predict <- terra::predict(logreg_geo_pau, newdata = new_data,
type = "response")
geo_und_predict <- terra::predict(logreg_geo_und, newdata = new_data,
type = "response")
phy_ten_predict <- terra::predict(logreg_phy_ten, newdata = new_data,
type = "response")
mau_fle_predict <- terra::predict(logreg_mau_fle, newdata = new_data,
type = "response")
We now create rasters with the predicted probabilities as values:
# create raster layers with SDM predictions
geo_pau_predict_rst <- rast(new_extent, nrows=nrow(elev), ncols=ncol(elev),
vals = geo_pau_predict)
geo_und_predict_rst <- rast(new_extent, nrows=nrow(elev), ncols=ncol(elev),
vals = geo_und_predict)
phy_ten_predict_rst <- rast(new_extent, nrows=nrow(elev), ncols=ncol(elev),
vals = phy_ten_predict)
mau_fle_predict_rst <- rast(new_extent, nrows=nrow(elev), ncols=ncol(elev),
vals = mau_fle_predict)
Compare SDM predictions to actual species ranges
par(mfrow = c(1, 2), mar = c(3, 1, 1, 1))
# plot(st_geometry(grid),
# col = ifelse(grid_species$species == "Geonoma pauciflora",
# "#7fbf7b", "white"),
# border = FALSE, main = "Geonoma pauciflora")
plot(st_geometry(grid_species[which(grid_species$species == "Geonoma pauciflora"), ]),
col = "#7fbf7b",
# col = ifelse(grid_species$species == "Geonoma pauciflora",
# "#7fbf7b", "white"),
border = FALSE, main = "Geonoma pauciflora",
xlim = c(-97, -35), ylim = c(-30, 17))
points(spec_occ$longitude[which(spec_occ$scrubbed_species_binomial == "Geonoma pauciflora")],
spec_occ$latitude[which(spec_occ$scrubbed_species_binomial == "Geonoma pauciflora")],
pch = 3,cex = 0.3)
plot(st_geometry(americas),add=TRUE)
plot(geo_pau_predict_rst, main = "SDM predictions", axes = FALSE, box = FALSE)
plot(st_geometry(americas), add = TRUE)
# Geonoma undata
plot(st_geometry(grid_species[which(grid_species$species == "Geonoma undata"), ]),
col = "#7fbf7b",
border = FALSE, main = "Geonoma undata",
xlim = c(-97, -35), ylim = c(-30, 17))
points(spec_occ$longitude[which(spec_occ$scrubbed_species_binomial == "Geonoma undata")],
spec_occ$latitude[which(spec_occ$scrubbed_species_binomial == "Geonoma undata")],
pch = 3,cex = 0.3)
plot(st_geometry(americas), add = TRUE)
plot(geo_und_predict_rst, main = "SDM predictions", axes = FALSE, box = FALSE)
plot(st_geometry(americas), add = TRUE)
# Phytelephas tenuicaulis
plot(st_geometry(grid_species[which(grid_species$species == "Phytelephas tenuicaulis"), ]),
col = "#7fbf7b",
border = FALSE, main = "Phytelephas tenuicaulis",
xlim = c(-97, -35), ylim = c(-30, 17))
points(spec_occ$longitude[which(spec_occ$scrubbed_species_binomial == "Phytelephas tenuicaulis")],
spec_occ$latitude[which(spec_occ$scrubbed_species_binomial == "Phytelephas tenuicaulis")],
pch = 3,cex = 0.3)
plot(st_geometry(americas), add = TRUE)
plot(phy_ten_predict_rst, main = "SDM predictions", axes = FALSE, box = FALSE)
plot(st_geometry(americas), add = TRUE)
# Mauritia flexuosa
plot(st_geometry(grid_species[which(grid_species$species == "Mauritia flexuosa"), ]),
col = "#7fbf7b",
border = FALSE, main = "Mauritia flexuosa",
xlim = c(-97, -35), ylim = c(-30, 17))
points(spec_occ$longitude[which(spec_occ$scrubbed_species_binomial == "Mauritia flexuosa")],
spec_occ$latitude[which(spec_occ$scrubbed_species_binomial == "Mauritia flexuosa")],
pch = 3,cex = 0.3)
plot(st_geometry(americas), add = TRUE)
plot(mau_fle_predict_rst, main = "SDM predictions", axes = FALSE, box = FALSE)
plot(st_geometry(americas), add = TRUE)
For further information about geo-spatial analysis, consult tutorials available via the Coding Club or Geocomputation with R
Some images from the evaluation of the performance of the models were extracted from support.bccvl.org.au
See also the site of Damaris Zurell for more content on the interpretation and evaluation of SDMs.