Last active
July 13, 2022 06:37
-
-
Save MarinaGolivets/1eaac550a1161f212f8ff93fe5eb2645 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# GP Oekologie 2022 | |
# Ingolf Kühn & Marina Golivets | |
# install & load R packages ------------------------------------------------------------------------ | |
rm(list = ls()) | |
# packages that we need for analyses | |
pkgs <- c( | |
"here", # for handling working directory | |
"googlesheets4", # for loading data from Google spreadsheets | |
"tidyverse", # for data manipulation | |
"magrittr", # for piping | |
"skimr", # for summarizing data | |
"gtsummary", # for summarizing data | |
"taxize", # for checking taxonomic names | |
"TR8", # for retrieving trait data from databases | |
"performance", # for inspecting regression models, | |
"rstatix", # for statistical analyses, e.g. ANOVA | |
"DescTools", # for miscellaneous basic stats | |
"ggpubr", # for plotting | |
"ggrepel", # for plotting | |
"viridis", # for plotting | |
"cowplot", # for plotting | |
"vegan", # for multivariate analyses | |
"FactoMineR", # for PCA | |
"ks" # for kernel smoothers | |
) | |
# install (if not installed) and load all the packages | |
# # run only once! | |
# install.packages("devtools") | |
# devtools::install_github("ddsjoberg/gtsummary") | |
# devtools::install_github("ropensci/skimr") | |
for (p in pkgs) { | |
if (!require(p, character.only = TRUE)) install.packages(p) | |
library(p, character.only = TRUE) | |
} | |
# choose an option to not treat warnings as errors | |
options(warn = 1) | |
# check what packages are being used on your machine | |
sessionInfo() | |
# check your working (home) directory | |
here() | |
# load data ---------------------------------------------------------------------------------------- | |
if (length(list.files(here(), "veg_data_2022.csv")) == 0) { | |
# load the data directly from the Google sheet | |
veg <- googlesheets4::read_sheet( | |
"https://docs.google.com/spreadsheets/d/1X9DM-uaaQ9MP5KnSNUTonx67BygXYH_7xd1G_vXOIqE", | |
sheet = "Daten", range = "A1:S142", na = c(NULL, "-", ""), col_types = "iicccidddddddcdccdc" | |
) | |
# save the data to your home directory | |
readr::write_csv2(veg, here("veg_data_2022.csv")) | |
} else { | |
# load the data from your home directory | |
veg <- readr::read_csv2(here("veg_data_2022.csv")) | |
} | |
# take a quick look at the data | |
glimpse(veg) | |
# rename columns ----------------------------------------------------------------------------------- | |
veg %<>% | |
rename( | |
Fortlaufendenr = `...1`, | |
Aufnahmenr = Aufnahmenr., | |
WissName = `Wissenschaftlicher Name`, | |
Hoehe.avg = `Höhe Ø (cm)`, | |
Hoehe.var = `Höhe σ²`, | |
Blattflaeche = `Blatt-Fläche (mm²)`, | |
Frischmasse = `Frischmasse (g)`, | |
Trockenmasse = `Trockenmasse (mg)`, | |
Germinulenmasse = `Germinulenmasse (mg)`, | |
Feuchte = `Ellenberg-Feuchte`, | |
Bodenfeuchte = `Bodenfeuchte (%vol)` | |
) | |
# take a more detailed look at the data | |
skimr::skim(veg) | |
select(veg, -Fortlaufendenr, -Aufnahmenr, -WissName, -Anmerkungen) %>% | |
gtsummary::tbl_summary() | |
# check data for errors ---------------------------------------------------------------------------- | |
# reassign expert water availability measures | |
veg %<>% rows_update(tibble(Aufnahmenr = 1:8, WasserHH = c(1, 2, 3, 5, 6, 7, 8, 4))) | |
# make sure plot-level data don't have duplicates | |
select(veg, Aufnahmenr, Ort, Vegetation, WasserHH, Bodenfeuchte) %>% | |
n_distinct() # should equal the number of plots (n = 8) | |
# make sure species are not duplicated within plots | |
nrow(veg) # 141 | |
select(veg, Aufnahmenr, WissName) %>% | |
n_distinct() # should equal the number of rows (n = 141) | |
# make sure species-level data (from databases) are unique | |
select(veg, WissName) %>% | |
n_distinct() # 79 | |
select(veg, WissName, Lebensform, Germinulenmasse, Bestaeubung, Feuchte) %>% | |
n_distinct() # should equal the number of species (n = 79) | |
# check life form | |
table(veg$Lebensform) | |
filter(veg, Lebensform == "G") %>% | |
pull(WissName) %>% | |
unique() | |
veg %<>% mutate(Lebensform = case_when( | |
WissName == "Phalaris arundinacea" ~ "mL", | |
TRUE ~ Lebensform | |
)) | |
# check % cover per species | |
hist(veg$Deckung) | |
# check total % cover per plot | |
group_by(veg, Aufnahmenr) %>% | |
summarise(Deckung.tot = sum(Deckung)) | |
# check vegetation types at each site | |
table(veg$Ort, veg$Vegetation) | |
# check if Ellenberg humidity indicator values are integers | |
unique(veg$Feuchte) | |
unique(as.integer(veg$Feuchte)) | |
veg %<>% | |
mutate( | |
Feuchte = stringr::str_remove_all(Feuchte, pattern = "~|x|=") %>% # remove non-numeric symbols | |
as.integer() # convert to integer | |
) | |
veg[veg$Fortlaufendenr == 41, "Feuchte"] <- NA | |
# check taxon names | |
tax <- taxize::gnr_resolve(veg$WissName, data_source_id = 1, fields = "all", best_match_only = TRUE) | |
View(tax) | |
veg %<>% | |
mutate( | |
WissName = recode( | |
WissName, | |
`Taraxacum officinalis` = "Taraxacum officinale", | |
`Viola arvense` = "Viola arvensis" | |
) | |
) | |
veg[veg$Fortlaufendenr == 41, "WissName"] <- "Agrostis capillaris" | |
# check leaf area | |
summary(veg$Blattflaeche) | |
veg %<>% mutate(Blattflaeche = na_if(Blattflaeche, 0)) | |
# check seed mass | |
summary(veg$Germinulenmasse) | |
veg %>% | |
filter(Germinulenmasse == .001) %>% | |
pull(WissName) %>% | |
unique() | |
View(available_tr8) | |
gmasse_tr8 <- TR8::tr8( | |
species_list = c("Agrostis stolonifera", "Cerastium semidecandrum"), | |
download_list = c("seed_wght", "SeedMass", "seed_mass"), | |
allow_persistent = TRUE | |
) | |
gmasse_tr8 | |
veg %<>% | |
mutate(Germinulenmasse = case_when( | |
WissName == "Cerastium semidecandrum" ~ .04, | |
WissName == "Agrostis stolonifera" ~ .07, | |
Fortlaufendenr == 41 ~ .1, | |
TRUE ~ Germinulenmasse | |
)) | |
# take a look at the data again | |
skim(veg) | |
# impute missing trait data ------------------------------------------------------------------------ | |
# life form (data from FloraWeb) | |
select(veg, WissName, Lebensform) %>% | |
distinct() %>% | |
filter(is.na(Lebensform)) | |
veg %<>% | |
mutate(Lebensform = case_when( | |
WissName %in% c("Cerastium glomeratum", "Erophila verna") ~ "mL", | |
TRUE ~ Lebensform | |
)) | |
# pollination type (data from FloraWeb) | |
select(veg, WissName, Bestaeubung) %>% | |
distinct() %>% | |
filter(is.na(Bestaeubung)) | |
veg %<>% | |
mutate(Bestaeubung = case_when( | |
WissName %in% c( | |
"Cerastium glomeratum", "Erophila verna", "Valerianella spec." | |
) ~ "se", | |
TRUE ~ Bestaeubung | |
)) | |
# seed mass (data from Seed Information Database) | |
select(veg, WissName, Germinulenmasse) %>% | |
distinct() %>% | |
filter(is.na(Germinulenmasse)) | |
veg %<>% | |
mutate(Germinulenmasse = case_when( | |
WissName == "Allium vineale" ~ 7, | |
WissName == "Carex ovalis" ~ .52, | |
WissName == "Cerastium glomeratum" ~ .05, | |
WissName == "Cerastium glutinosum" ~ .0654, | |
WissName == "Cruciata laevipes" ~ 3.5856, | |
WissName == "Erodium cicutarium" ~ 2, | |
WissName == "Erophila verna" ~ .024, | |
WissName == "Senecio erucifolius" ~ .3935, | |
WissName == "Saxifraga granulata" ~ .03, | |
WissName == "Prunus mahaleb" ~ 79.9, | |
WissName == "Polygonum persicaria" ~ 2.1, | |
TRUE ~ Germinulenmasse | |
)) | |
summary(veg$Germinulenmasse) | |
# convert life form and pollination to factor | |
veg %<>% mutate(across(c(Lebensform, Bestaeubung), ~ as.factor(.))) | |
# calculate SLA & LDMC ----------------------------------------------------------------------------- | |
veg %<>% | |
mutate( | |
SLA = Blattflaeche / Trockenmasse, | |
LDMC = Trockenmasse / Frischmasse | |
) | |
# check SLA | |
hist(veg$SLA) | |
# check LDMC | |
hist(veg$LDMC) | |
filter(veg, LDMC > 500) %>% | |
select(Fortlaufendenr, WissName, Deckung, Blattflaeche, Frischmasse, Trockenmasse, LDMC) | |
# make corrections in the data | |
veg %<>% | |
mutate( | |
Frischmasse = if_else(Fortlaufendenr == 127, .2063, Frischmasse), | |
Trockenmasse = if_else(Fortlaufendenr == 79, 85, Trockenmasse) # it's a guess! | |
) | |
# recalculate SLA & LDMC | |
veg %<>% | |
mutate( | |
SLA = Blattflaeche / Trockenmasse, | |
LDMC = Trockenmasse / Frischmasse | |
) | |
# analyse species numbers -------------------------------------------------------------------------- | |
# check the number of species per plot | |
artz1 <- table(veg$Aufnahmenr) | |
artz1 | |
# check the number of species at each water availability level | |
artz2 <- table(veg$WasserHH) | |
artz2 | |
# test whether the observed species numbers differ from the expected | |
chisq.test(artz2) | |
chisq.test(artz2)$expected | |
barplot(artz2, | |
xlab = "geschätzte Wasserverfügbarkeit (ordinal)", | |
ylab = "Artenzahl" | |
) | |
# analyse water availability ----------------------------------------------------------------------- | |
# check the correspondence between the Ellenberg values and estimated water availability | |
boxplot(veg$Feuchte ~ veg$WasserHH) | |
# calculate the mean plot-level Ellenberg humidity indicator value | |
WasserHH.Feuchte <- group_by(veg, Aufnahmenr, WasserHH, Bodenfeuchte) %>% | |
summarise( | |
# community mean | |
Feuchte_CM = mean(Feuchte, na.rm = TRUE) %>% | |
round(., 1), | |
# community weighted mean | |
Feuchte_CWM = weighted.mean(x = Feuchte, w = Deckung, na.rm = TRUE) %>% | |
round(., 1) | |
) | |
# plot the plot-level Ellenberg humidity | |
op <- par(mfcol = c(1, 2), mar = c(4, 4, 2, 2)) | |
plot(Feuchte_CWM ~ WasserHH, | |
data = WasserHH.Feuchte, col = "red", | |
pch = 20, ylab = "mittlere Feuchte nach Ellenberg" | |
) | |
points(WasserHH.Feuchte$WasserHH, WasserHH.Feuchte$Feuchte_CM, col = "green", pch = 20) | |
legend(2, 7, c("CWM", "CM"), col = c("red", "green"), pch = 20) | |
plot(Feuchte_CWM ~ Bodenfeuchte, data = WasserHH.Feuchte, col = "red", pch = 20, ylab = "") | |
points(WasserHH.Feuchte$Bodenfeuchte, WasserHH.Feuchte$Feuchte_CM, col = "green", pch = 20) | |
legend(2, 7, c("CWM", "CM"), col = c("red", "green"), pch = 20) | |
par(op) | |
# calculate correlation between the water availability and plot-level Ellenberg humidity | |
cor.test(WasserHH.Feuchte$WasserHH, WasserHH.Feuchte$Feuchte_CM, method = "kendall", exact = FALSE) | |
cor.test(WasserHH.Feuchte$WasserHH, WasserHH.Feuchte$Feuchte_CWM, method = "kendall", exact = FALSE) | |
cor.test(WasserHH.Feuchte$Bodenfeuchte, WasserHH.Feuchte$Feuchte_CWM, | |
method = "kendall", exact = FALSE | |
) | |
# add the calculated mean Ellenberg humidity to the main data table | |
veg %<>% left_join(WasserHH.Feuchte) | |
glimpse(veg) | |
# analyse LDMC ------------------------------------------------------------------------------------- | |
# LDMC = leaf dry matter content | |
# inspect the distribution of LDMC visually | |
hist(veg$LDMC) | |
# plot LDMC across water availability levels | |
boxplot( | |
LDMC ~ WasserHH, | |
dat = veg, ylab = "LDMC", xlab = "geschaetzte Wasserverfuegbarkeit (ordinal)", | |
col = rgb(r = 0, g = 0, b = 1, alpha = seq(.1, .9, length.out = 7)), log = "y" | |
) | |
boxplot(LDMC ~ Feuchte_CWM, | |
dat = veg, ylab = "LDMC", xlab = "mittlere Feuchte nach Ellenberg", | |
x.axis = 1:8, col = rgb(r = 0, g = 0, b = 1, alpha = seq(.1, .9, length.out = 7)), log = "y" | |
) | |
ggplot(veg, aes(x = Bodenfeuchte, y = LDMC)) + | |
geom_point(na.rm = TRUE) + | |
stat_smooth(method = "lm", col = "#E69F00", se = FALSE, na.rm = TRUE) + | |
labs(x = "Bodenfeuchte [%]") + | |
theme_minimal() | |
# perform regression analyses | |
# using WasserHH as explanatory variable | |
fm0.LDMC <- lm(LDMC ~ WasserHH, data = veg) | |
summary(fm0.LDMC) | |
performance::check_model(fm0.LDMC) | |
fm1.LDMC <- lm(LDMC ~ WasserHH, weight = Deckung, data = veg) # weighted by % cover | |
summary(fm1.LDMC) | |
check_model(fm1.LDMC) | |
# compare the two models | |
tbl_merge( | |
tbls = list(tbl_regression(fm0.LDMC), tbl_regression(fm1.LDMC)), | |
tab_spanner = c("**Non-weighted**", "**Weigted**") | |
) | |
# using soil moisture (Bodenfeuchte) as explanatory variable | |
fm0.LDMC.b <- lm(LDMC ~ Bodenfeuchte, data = veg) | |
summary(fm0.LDMC.b) | |
check_model(fm0.LDMC.b) | |
fm1.LDMC.b <- lm(LDMC ~ Bodenfeuchte, weight = Deckung, data = veg) | |
summary(fm1.LDMC.b) | |
check_model(fm1.LDMC.b) | |
# compare the two models | |
tbl_merge( | |
tbls = list(tbl_regression(fm0.LDMC.b), tbl_regression(fm1.LDMC.b)), | |
tab_spanner = c("**Non-weighted**", "**Weigted**") | |
) | |
plot1 <- ggplot(veg, aes(x = Bodenfeuchte, y = LDMC)) + | |
geom_point(na.rm = TRUE) + | |
geom_abline( | |
intercept = fm0.LDMC.b$coefficients[1], slope = fm0.LDMC.b$coefficients[2], | |
col = "#E69F00", size = 1, na.rm = TRUE | |
) + | |
geom_abline( | |
intercept = fm1.LDMC.b$coefficients[1], slope = fm1.LDMC.b$coefficients[2], | |
col = "#56B4E9", size = 1, na.rm = TRUE | |
) + | |
labs(x = "Bodenfeuchte [%]") + | |
theme_minimal() + | |
annotate("text", x = 20, y = 270, label = "Non-weighted", col = "#E69F00") + | |
annotate("text", x = 25, y = 320, label = "Weighted", col = "#56B4E9") | |
plot1 | |
# analyse SLA -------------------------------------------------------------------------------------- | |
# SLA = specific leaf area | |
# inspect the distribution of SLA visually | |
hist(veg$SLA) | |
hist(log(veg$SLA)) | |
# test the normality of distribution | |
shapiro_test(veg$SLA) | |
shapiro_test(log(veg$SLA)) | |
# add log-transformed values to the main data table | |
veg %<>% mutate(SLA.log = log(SLA)) | |
# plot the distribution of SLA across water availability levels | |
op <- par(mar = c(4, 5, 2, 3)) | |
boxplot(SLA ~ WasserHH, | |
dat = veg, xlab = "geschätzte Wasserverfügbarkeit (ordinal)", | |
ylab = "SLA", col = "blue", log = "y" | |
) | |
filter(veg, WasserHH == 2 & SLA > 25) %>% | |
select(WissName, Aufnahmenr, SLA) | |
filter(veg, WissName == "Euphorbia cyparissias") %>% | |
select(WissName, Aufnahmenr, SLA) | |
boxplot(SLA.log ~ WasserHH, | |
dat = veg, xlab = "geschätzte Wasserverfügbarkeit (ordinal)", | |
ylab = expression(log("SLA")), col = "blue" | |
) | |
par(op) | |
filter(veg, WasserHH == 7 & SLA.log < 3) %>% | |
select(WissName, Aufnahmenr, SLA) | |
# perform regression analyses | |
fm1.SLA <- lm(SLA ~ WasserHH, weight = Deckung, data = veg) | |
summary(fm1.SLA) | |
check_model(fm1.SLA) | |
fm1.SLA.log <- lm(SLA.log ~ WasserHH, weight = Deckung, data = veg) | |
summary(fm1.SLA.log) | |
check_model(fm1.SLA.log) | |
fm1.SLA.log.b <- lm(SLA.log ~ Bodenfeuchte, weight = Deckung, data = veg) | |
summary(fm1.SLA.log.b) | |
check_model(fm1.SLA.log.b) | |
# plot the regression line | |
plot2 <- ggplot(veg, aes(x = Bodenfeuchte, y = SLA.log)) + | |
geom_point(na.rm = TRUE) + | |
geom_abline( | |
intercept = fm1.SLA.log.b$coefficients[1], slope = fm1.SLA.log.b$coefficients[2], | |
col = "#56B4E9", size = 1, na.rm = TRUE | |
) + | |
labs(x = "Bodenfeuchte [%]", y = expression(log("SLA"))) + | |
theme_minimal() | |
plot2 | |
# look at the relationship between SLA and LDMC | |
plot(SLA ~ LDMC, data = veg) | |
cor.test(veg$SLA, veg$LDMC, use = "complete.obs") | |
# analyse height ----------------------------------------------------------------------------------- | |
# inspect distributions visually | |
hist(veg$Hoehe.avg) | |
hist(log(veg$Hoehe.avg)) | |
# test the normality of distribution | |
shapiro_test(veg$Hoehe.avg) | |
shapiro_test(log(veg$Hoehe.avg)) | |
# add log-transformed values to the main data table | |
veg %<>% mutate(Hoehe.avg.log = log(Hoehe.avg)) | |
# plot the distribution of height across water availability levels | |
op <- par(mar = c(4, 5, 2, 3)) | |
boxplot(Hoehe.avg ~ WasserHH, | |
data = veg, xlab = "geschätzte Wasserverfügbarkeit (ordinal)", | |
ylab = "Wuchshöhe [cm]", col = rainbow(7, start = 1, end = 5 / 7), | |
log = "y" | |
) | |
filter(veg, WasserHH == 6 & Hoehe.avg > 50) %>% | |
select(WissName, Aufnahmenr, Hoehe.avg) | |
filter(veg, WasserHH == 8 & Hoehe.avg < 20) %>% | |
select(WissName, Aufnahmenr, Hoehe.avg) | |
boxplot(Hoehe.avg.log ~ WasserHH, | |
data = veg, xlab = "geschätzte Wasserverfügbarkeit (ordinal)", | |
ylab = expression(log("Wuchshöhe [cm]")), col = rainbow(7, start = 1, end = 5 / 7) | |
) | |
par(op) | |
# perform regression analysis | |
fm1.hoehe.avg.log <- lm(Hoehe.avg.log ~ WasserHH, weight = Deckung, data = veg) | |
summary(fm1.hoehe.avg.log) | |
check_model(fm1.hoehe.avg.log) | |
fm1.hoehe.avg.log.b <- lm(Hoehe.avg.log ~ Bodenfeuchte, weight = Deckung, data = veg) | |
summary(fm1.hoehe.avg.log.b) | |
check_model(fm1.hoehe.avg.log.b) | |
# plot the regression line | |
plot3 <- ggplot(veg, aes(x = Bodenfeuchte, y = Hoehe.avg.log)) + | |
geom_point(na.rm = TRUE) + | |
geom_abline( | |
intercept = fm1.hoehe.avg.log.b$coefficients[1], | |
slope = fm1.hoehe.avg.log.b$coefficients[2], | |
col = "#56B4E9", size = 1, na.rm = TRUE | |
) + | |
geom_smooth(col = "red", se = FALSE, na.rm = TRUE) + | |
labs(x = "Bodenfeuchte [%]", y = expression(log("Wuchshöhe [cm]"))) + | |
theme_minimal() | |
plot3 | |
# analyse height variance -------------------------------------------------------------------------- | |
# plot height variance | |
hist(veg$Hoehe.var) | |
hist(log(veg$Hoehe.var)) | |
# add log-transformed values to the main data table | |
veg %<>% mutate(Hoehe.var.log = log(Hoehe.var)) | |
# check the relationship between height and height variance | |
boxplot(Hoehe.var.log ~ WasserHH, data = veg) | |
plot(Hoehe.var.log ~ Hoehe.avg, data = veg) | |
cor.test(veg$Hoehe.var.log, veg$Hoehe.avg) | |
# calculate the coefficient of variation | |
veg %<>% mutate(Hoehe.cv = sqrt(Hoehe.var) / Hoehe.avg * 100) | |
hist(veg$Hoehe.cv) | |
hist(log(veg$Hoehe.cv)) | |
veg %<>% mutate(Hoehe.cv.log = log(Hoehe.cv)) | |
plot(Hoehe.cv.log ~ Hoehe.avg, data = veg) | |
cor.test(veg$Hoehe.cv.log, veg$Hoehe.avg) | |
boxplot(Hoehe.cv ~ WasserHH, | |
data = veg, xlab = "geschätzte Wasserverfügbarkeit (ordinal)", | |
ylab = "Variationskoeffizient der Wuchshöhe", log = "y", col = topo.colors(7)[7:1] | |
) | |
filter(veg, WasserHH == 4 & Hoehe.cv > 100) %>% | |
select(WissName, Aufnahmenr, Hoehe.avg, Hoehe.var, Hoehe.cv) | |
# perform regression analysis | |
fm1.Hoehe.cv.log <- lm(Hoehe.cv.log ~ WasserHH, data = veg, weight = Deckung) | |
summary(fm1.Hoehe.cv.log) | |
check_model(fm1.Hoehe.cv.log) | |
fm1.Hoehe.cv.log.b <- lm(Hoehe.cv.log ~ Bodenfeuchte, data = veg, weight = Deckung) | |
summary(fm1.Hoehe.cv.log.b) | |
check_model(fm1.Hoehe.cv.log.b) | |
# analyse seed mass -------------------------------------------------------------------------------- | |
hist(veg$Germinulenmasse) | |
hist(log(veg$Germinulenmasse)) | |
veg %<>% mutate(Germinulenmasse.log = log(Germinulenmasse)) | |
op <- par(mar = c(4, 5, 2, 3)) | |
boxplot(Germinulenmasse ~ WasserHH, data = veg, log = "y") | |
boxplot(Germinulenmasse.log ~ WasserHH, data = veg, ylab = expression(log("Germinulenmasse"))) | |
filter(veg, Germinulenmasse.log > 4) %>% | |
select(WissName, Germinulenmasse) | |
par(op) | |
# perform regression analysis | |
fm1.gmasse.log <- lm(Germinulenmasse.log ~ WasserHH, data = veg, weight = Deckung) | |
summary(fm1.gmasse.log) | |
check_model(fm1.gmasse.log) | |
fm1.gmasse.log.b <- lm(Germinulenmasse.log ~ Bodenfeuchte, data = veg, weight = Deckung) | |
summary(fm1.gmasse.log.b) | |
check_model(fm1.gmasse.log.b) | |
# analyse life form -------------------------------------------------------------------------------- | |
# compare the distributions of life forms across vegetation types | |
select(veg, Lebensform, Vegetation) %>% | |
gtsummary::tbl_summary(by = Vegetation) %>% | |
add_p() | |
# calculate total % cover per life form across humidity levels | |
lft.1 <- group_by(veg, Aufnahmenr, WasserHH, Lebensform) %>% | |
summarise(Deckung = sum(Deckung)) | |
head(lft.1) | |
tail(lft.1) | |
lft.2 <- xtabs(Deckung ~ Lebensform + WasserHH, data = lft.1) | |
lft.2 <- DescTools::as.matrix.xtabs(lft.3) | |
lft.2 | |
chisq_test(lft.2, simulate.p.value = TRUE) | |
# plot the distribution of life forms across humidity levels | |
# using base R | |
op <- par(mar = c(4, 5, 2, 3)) | |
barplot(lft.3, | |
xlab = "geschätzte Wasserverfügbarkeit (ordinal)", | |
ylab = expression(sum("Deckung")), | |
main = "Lebensformenspektren", ylim = c(0, 160) | |
) | |
legend(8.5, 150, legend = row.names(lft.3), pch = 15, col = grey.colors(ncol(lft.3))) | |
par(op) | |
# using ggplot2 | |
ggplot(data = lft.1, aes(fill = Lebensform, y = Deckung, x = ordered(WasserHH))) + | |
geom_bar(position = "stack", stat = "identity") + | |
viridis::scale_fill_viridis(discrete = TRUE) + | |
labs(x = "geschätzte Wasserverfügbarkeit (ordinal)", y = expression(sum("Deckung"))) + | |
ggtitle("Lebensformenspektren") + | |
theme_minimal() | |
# analyse pollination type ------------------------------------------------------------------------- | |
# compare the distributions of pollination syndromes across vegetation types | |
select(veg, Bestaeubung, Vegetation) %>% | |
gtsummary::tbl_summary(by = Vegetation) %>% | |
add_p() | |
# calculate total % cover per pollination type across humidity levels | |
bst.1 <- group_by(veg, Aufnahmenr, WasserHH, Bestaeubung) %>% | |
summarise(Deckung = sum(Deckung)) | |
head(bst.1) | |
tail(bst.1) | |
bst.2 <- xtabs(Deckung ~ Bestaeubung + WasserHH, data = bst.1) | |
bst.2 <- DescTools::as.matrix.xtabs(bst.2) | |
bst.2 | |
chisq_test(bst.2, simulate.p.value = TRUE) | |
# plot the distribution of pollination types | |
# using base R | |
op <- par(mar = c(4, 5, 2, 3)) | |
cols <- c("yellow", "brown", "grey", "lightblue") | |
barplot(bst.3, | |
xlab = "geschätzte Wasserverfügbarkeit (ordinal)", ylab = expression(sum("Deckung")), | |
main = "Bestäubungstypenspektren", | |
sub = expression(Chi^2 * "= 243, p < 0.001"), cex.sub = 0.8, col = cols | |
) | |
legend(0, 140, | |
legend = c("Wind", "Selbst", "multiple", "Insekten"), | |
pch = 15, col = cols[4:1], cex = 0.7 | |
) | |
# using ggplot2 | |
plot4 <- ggplot(data = bst.1, aes(fill = Bestaeubung, y = Deckung, x = ordered(WasserHH))) + | |
geom_bar(position = "stack", stat = "identity") + | |
viridis::scale_fill_viridis( | |
discrete = TRUE, labels = c("Insekten", "multiple", "Selbst", "Wind") | |
) + | |
labs(x = "geschätzte Wasserverfügbarkeit (ordinal)", y = expression(sum("Deckung"))) + | |
ggtitle("Bestäubungstypenspektren") + | |
theme_minimal() | |
plot4 | |
# analyse the relationships between traits --------------------------------------------------------- | |
# test if height varies across life forms | |
# plot height by life form | |
ggpubr::ggboxplot(veg, x = "Lebensform", y = "Hoehe.avg.log", select = c("H", "G", "T", "mL")) | |
# check the ANOVA assumptions | |
# outliers | |
group_by(veg, Lebensform) %>% | |
identify_outliers(Hoehe.avg.log) | |
# normality | |
lm.hoe.lf <- lm(Hoehe.avg.log ~ Lebensform, data = veg) | |
ggpubr::ggqqplot(residuals(lm.hoe.lf)) | |
rstatix::shapiro_test(residuals(lm.hoe.lf)) | |
# homogeneity of variances | |
levene_test(veg, Hoehe.avg.log ~ Lebensform) | |
# perform ANOVA and the Kruskal-Wallis test (non-parametric alternative) | |
anova_test(veg, Hoehe.avg.log ~ Lebensform) | |
# because the normality of residuals assumption was violated | |
# the Kruskal Wallis test should be used instead of a standard ANOVA | |
kruskal_test(veg, Hoehe.avg.log ~ Lebensform) | |
# test if the height varies across pollination types | |
veg$Bestaeubung <- as.factor(veg$Bestaeubung) | |
ggboxplot(veg, x = "Bestaeubung", y = "Hoehe.avg.log") | |
group_by(veg, Bestaeubung) %>% | |
identify_outliers(Hoehe.avg.log) %>% | |
select(Fortlaufendenr, WissName, Hoehe.avg, Hoehe.avg.log, is.outlier, is.extreme) | |
lm.hoe.best <- lm(Hoehe.avg.log ~ Bestaeubung, data = veg) | |
ggqqplot(residuals(lm.hoe.best)) | |
shapiro_test(residuals(lm.hoe.best)) | |
levene_test(veg, Hoehe.avg.log ~ Bestaeubung) | |
# because the normality of residuals assumption was violated | |
# we use the Kruskal Wallis test instead of a standard ANOVA | |
# and the Dunn's post hoc test for pairwise comparisons | |
hoe.best.kw <- kruskal_test(veg, Hoehe.avg.log ~ Bestaeubung) | |
hoe.best.kw | |
hoe.best.pwc <- dunn_test(veg, Hoehe.avg.log ~ Bestaeubung) | |
hoe.best.pwc | |
# plot the results | |
hoe.best.pwc %<>% add_xy_position(x = "Bestaeubung") | |
plot6 <- ggboxplot(veg, x = "Bestaeubung", y = "Germinulenmasse.log") + | |
stat_pvalue_manual(hoe.best.pwc, hide.ns = TRUE) + | |
labs( | |
subtitle = get_test_label(hoe.best.kw, detailed = TRUE), | |
caption = get_pwc_label(hoe.best.pwc), | |
y = expression(log("Wuchshöhe")), | |
x = "Bestäubungstyp" | |
) | |
plot6 | |
# test if the SLA varies across life forms | |
ggboxplot(veg, x = "Lebensform", y = "SLA.log", select = c("H", "G", "T", "mL")) | |
group_by(veg, Lebensform) %>% | |
identify_outliers(SLA.log) | |
lm.sla.lf <- lm(SLA.log ~ Lebensform, data = veg) | |
ggqqplot(residuals(lm.sla.lf)) | |
shapiro_test(residuals(lm.sla.lf)) | |
levene_test(veg, SLA.log ~ Lebensform) | |
anova_test(veg, SLA.log ~ Lebensform) | |
# test if the seed mass varies across life forms | |
gmasse <- select(veg, WissName, Germinulenmasse.log, Lebensform, Bestaeubung) %>% | |
distinct() | |
ggboxplot(gmasse, x = "Lebensform", y = "Germinulenmasse.log") | |
group_by(gmasse, Lebensform) %>% | |
identify_outliers(Germinulenmasse.log) | |
lm.gmasse.lf <- lm(Germinulenmasse.log ~ Lebensform, data = gmasse) | |
ggqqplot(residuals(lm.gmasse.lf)) | |
shapiro_test(residuals(lm.gmasse.lf)) | |
levene_test(gmasse, Germinulenmasse.log ~ Lebensform) | |
gmasse.lf.aov <- anova_test(gmasse, Germinulenmasse.log ~ Lebensform) | |
gmasse.lf.aov | |
gmasse.lf.pwc <- tukey_hsd(gmasse, Germinulenmasse.log ~ Lebensform) | |
gmasse.lf.pwc | |
gmasse.lf.pwc %<>% add_xy_position(x = "Lebensform") | |
plot7 <- ggboxplot(gmasse, x = "Lebensform", y = "Germinulenmasse.log") + | |
stat_pvalue_manual(gmasse.lf.pwc, hide.ns = TRUE) + | |
labs( | |
subtitle = get_test_label(gmasse.lf.aov, detailed = TRUE), | |
caption = get_pwc_label(gmasse.lf.pwc), | |
y = expression(log("Germinulenmasse")), | |
x = "Lebensform" | |
) | |
plot7 | |
# test if the seed mass varies across pollination types | |
ggboxplot(gmasse, x = "Bestaeubung", y = "Germinulenmasse.log") | |
group_by(gmasse, Bestaeubung) %>% | |
identify_outliers(Germinulenmasse.log) | |
fm.gmasse.best <- lm(Germinulenmasse.log ~ Bestaeubung, data = gmasse) | |
ggpubr::ggqqplot(residuals(fm.gmasse.best)) | |
rstatix::shapiro_test(residuals(fm.gmasse.best)) | |
ggqqplot(gmasse, "Germinulenmasse.log", facet.by = "Bestaeubung") | |
levene_test(gmasse, Germinulenmasse.log ~ Bestaeubung) | |
# because the homogeneity of variance assumption was violated | |
# we use the Welch test instead of a standard ANOVA | |
# and the Games-Howell post hoc test for pairwise comparisons | |
gmasse.best.aov <- welch_anova_test(gmasse, Germinulenmasse.log ~ Bestaeubung) | |
gmasse.best.aov | |
gmasse.best.pwc <- games_howell_test(gmasse, Germinulenmasse.log ~ Bestaeubung) | |
gmasse.best.pwc | |
gmasse.best.pwc %<>% add_xy_position(x = "Bestaeubung") | |
ggboxplot(gmasse, x = "Bestaeubung", y = "Germinulenmasse.log") + | |
stat_pvalue_manual(gmasse.best.pwc, hide.ns = TRUE) + | |
labs( | |
subtitle = get_test_label(gmasse.best.aov, detailed = TRUE), | |
caption = get_pwc_label(gmasse.best.pwc), | |
y = expression(log("Germinulenmasse")), | |
x = "Bestäubungstyp" | |
) | |
# save the edited data as a separate file | |
write_csv2(veg, here("veg_data_2020_edited.csv")) | |
# analyse species composition ---------------------------------------------------------------------- | |
veg <- read_csv2(here("veg_data_2022_edited.csv")) | |
mat <- xtabs(Deckung ~ Aufnahmenr + WissName, data = veg) | |
str(mat) | |
# perform correspondence analysis | |
decorana(mat) | |
fm.ca <- cca(mat) | |
fm.ca | |
summary(fm.ca) | |
plot(fm.ca, display = "sites") | |
plot(fm.ca) | |
# shorten taxon names | |
wname <- str_remove(veg$WissName, pattern = " cf\\.| x") %>% | |
str_split(pattern = " ", simplify = TRUE) %>% | |
substr(., 1, 3) | |
wname <- paste(wname[, 1], wname[, 2], sep = ".") | |
veg %<>% mutate(wname = wname) | |
# repeat the same analysis with shortened taxon names provided | |
mat <- xtabs(Deckung ~ Aufnahmenr + wname, data = veg) | |
fm.ca <- cca(mat) | |
summary(fm.ca) | |
# plot the results | |
summary(fm.ca)$species %>% | |
as_tibble() %>% | |
mutate(wname = rownames(summary(fm.ca)$species)) %>% | |
ggplot(aes(x = CA1, y = CA2, label = wname)) + | |
geom_point() + | |
ggrepel::geom_text_repel(size = 1.5, force = 15, max.overlaps = 30) + | |
theme_bw() | |
# prepare data for trait ordination ---------------------------------------------------------------- | |
# calculate the proportion of each life form per plot | |
lf.kt <- select(veg, Aufnahmenr, Deckung, Lebensform) %>% | |
pivot_wider( | |
names_from = Lebensform, values_from = Deckung, | |
values_fn = sum, values_fill = 0, names_prefix = "lf." | |
) %>% | |
mutate(total = rowSums(.[, -1])) %>% | |
mutate(across(lf.H:lf.M, ~ . / total), .keep = "unused") | |
# calculate the proportion of each pollination type per plot | |
best.kt <- select(veg, Aufnahmenr, Deckung, Bestaeubung) %>% | |
pivot_wider( | |
names_from = Bestaeubung, values_from = Deckung, | |
values_fn = sum, values_fill = 0, names_prefix = "b." | |
) %>% | |
mutate(total = rowSums(.[, -1])) %>% | |
mutate(across(b.wi:b.mb, ~ . / total), .keep = "unused") | |
# compute community weighted means for numerical traits | |
cwm <- group_by(veg, Aufnahmenr) %>% | |
summarise(across( | |
c(Hoehe.avg.log, Hoehe.cv.log, LDMC, SLA.log, Germinulenmasse.log), | |
~ weighted.mean(., w = Deckung, na.rm = TRUE) | |
)) %>% | |
rename( | |
Ho.av = Hoehe.avg.log, | |
Ho.cv = Hoehe.cv.log, | |
SLA = SLA.log, | |
GM = Germinulenmasse.log | |
) | |
# combine the three tables | |
cwm %<>% | |
left_join(lf.kt) %>% | |
left_join(best.kt) | |
cwm | |
# perform principal components analysis (PCA) of CWMs ---------------------------------------------- | |
# analyse all traits | |
pca1 <- FactoMineR::PCA(cwm[, -c(1:2)]) | |
summary(pca1) | |
cowplot::plot_grid( | |
plot.PCA(pca1), | |
plot.PCA(pca1, c(1, 3)) | |
) | |
# analyse numerical traits only | |
colnames(cwm) | |
pca2 <- PCA(select(cwm, Ho.av:GM)) | |
cowplot::plot_grid( | |
plot.PCA(pca2), | |
plot.PCA(pca2, c(1, 3)) | |
) | |
# perform PCA of species traits -------------------------------------------------------------------- | |
# create a separate table containing numerical traits | |
dat <- select( | |
veg, WissName, Vegetation, Blattflaeche, Frischmasse, Trockenmasse, | |
SLA, LDMC, Hoehe.avg, Hoehe.var, Germinulenmasse | |
) %>% | |
rename( | |
BlF = Blattflaeche, | |
FrM = Frischmasse, | |
TrM = Trockenmasse, | |
Ho.av = Hoehe.avg, | |
Ho.var = Hoehe.var, | |
GM = Germinulenmasse | |
) %>% | |
na.omit() | |
glimpse(dat) | |
# perform PCA | |
pca3 <- FactoMineR::PCA(dat[, -c(1:2)]) | |
summary(pca3) | |
plot.PCA(pca3) | |
# add PC to the trait table | |
dat$pc1 <- pca3$ind$coord[, 1] | |
dat$pc2 <- pca3$ind$coord[, 2] | |
pc12 <- pca3$ind$coord[, 1:2] | |
# plot the PCA results | |
ggplot(data = dat, aes(x = pc1, y = pc2, color = Vegetation, shape = Vegetation)) + | |
geom_hline(yintercept = 0, lty = 2) + | |
geom_vline(xintercept = 0, lty = 2) + | |
geom_point(alpha = .8) + | |
stat_ellipse( | |
geom = "polygon", aes(fill = Vegetation), alpha = .2, | |
show.legend = FALSE, level = .95 | |
) + | |
guides( | |
color = guide_legend(title = "Vegetation"), | |
shape = guide_legend(title = "Vegetation") | |
) + | |
labs(x = "PC 1 (45.24%)", y = "PC 2 (20.27%)") + | |
theme_minimal() + | |
theme( | |
panel.grid = element_blank(), | |
panel.border = element_rect(fill = "transparent"), | |
legend.position = "bottom" | |
) | |
# plot PCA results using kernels | |
# Code from Björn Reu | |
# Used to produce Fig. 2 in Diaz et al. 2016, Nature | |
H <- Hpi(x = pc12) # optimal bandwidth estimation | |
est <- kde(x = pc12, H = H, compute.cont = TRUE) # kernel density estimation | |
# set contour probabilities for drawing contour levels | |
cl <- contourLevels(est, prob = c(.5, .05, .001), approx = TRUE) | |
# use envfit for drawing arrows, can be also done using trait loadings | |
fit <- envfit(pc12, select(dat, BlF:GM)) | |
# create a plot | |
par(mar = c(4, 4, 2, 2)) | |
cols <- ifelse(dat$Vegetation == "TR", "darkred", "darkblue") | |
plot(est, | |
cont = seq(1, 100, by = 1), display = "filled.contour2", add = FALSE, ylab = "", xlab = "", | |
cex.axis = .75, ylim = c(-5, 4), xlim = c(-3, 7), las = 1 | |
) | |
plot(est, abs.cont = cl[1], labels = .5, labcex = .75, add = TRUE, lwd = .75, col = "grey30") | |
plot(est, abs.cont = cl[2], labels = .95, labcex = .75, add = TRUE, lwd = .5, col = "grey60") | |
plot(est, abs.cont = cl[3], labels = .99, labcex = .75, add = TRUE, lwd = .5, col = "grey60") | |
points(pc12, pch = 16, cex = .5, col = cols) | |
plot(fit, cex = .90, col = 1) | |
mtext("PC 1 (45.24%)", cex = .75, side = 1, line = 1.8) | |
mtext("PC 2 (20.27%)", cex = .75, side = 2, line = 1.8) | |
dat[dat$pc1 > 4, ] | |
dat[dat$pc2 < -3, ] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment