Skip to content

Instantly share code, notes, and snippets.

@euxoa
Forked from anonymous/gist:5c812659e110607034f5
Last active August 29, 2015 14:15
Show Gist options
  • Save euxoa/b39c7b61a53ecb150c0c to your computer and use it in GitHub Desktop.
Save euxoa/b39c7b61a53ecb150c0c to your computer and use it in GitHub Desktop.
library(dplyr)
# install.packages("rgeos", type="source")
# install.packages("rgbif", type="source")
# library(rgbif)
# d <- occ_search(orderKey=797, datasetKey="a8d08280-1def-11de-be11-b8a03c50a862", year="2013", limit=2000)
library(RJSONIO)
gen.template <- "http://hyonteiset.luomus.fi/insects/json?op=%s&order=Lep&%s"
kuhmo.url <- sprintf(gen.template, "search", "&county=Kuhmo&startYear=2013&endYear=2013&select=all")
jsobj <- readLines(kuhmo.url)
Encoding(jsobj) <- "latin1"
d.orig <- plyr::ldply(fromJSON(jsobj), identity) %>%
filter(grepl("[0-9]+ ?W|valorysä|valo", method) & grepl("Tähtinen|Saarenmaa", observer)) %>%
filter(family %in% c("Lasiocampidae", "Sphingidae", "Drepanidae", "Geometridae",
"Notodontidae", "Lymantriidae", "Nolidae", "Arctiidae", "Noctuidae")) %>%
mutate(name=paste(genus, species, sep=" "), count=as.integer(totalCount))
d <- d.orig %>%
select(name, count) %>%
group_by(name) %>%
summarise(count=sum(count))
nrow(d); sum(d$count)
plot(log(-sort(-d$count)))
m.ls <- sads::fitsad(d$count, "ls")
m.ls
sqrt(m.ls@vcov[1, 1]) # approximate posterior sd of alpha
m.lp <- sads::fitsad(d$count, "poilog"); sads::ppsad(m.lp)
plot(log(-sort(-d$count))); lines(log(sads::radpred(m.lp)$abund), col="red")
# Expected total number of species from the log-poison model
n.sample <- 10000
p.missing <- sum(dpois(0, lambda=rlnorm(n.sample, m.lp@coef[1], m.lp@coef[2])))/n.sample
n.spec <- nrow(d)/(1-p.missing)
# Again, the posterior
par.sample <- MASS::mvrnorm(n=10000, m.lp@coef, m.lp@vcov)
p.missing <- apply(par.sample, 1, function (pars) {mean(dpois(0, rlnorm(1000, pars[1], pars[2])))})
n.spec <- nrow(d)/(1-p.missing)
hist(n.spec, n=100)
# Uncertainties are high and the estimates are strongly correlated
vc <- m.lp@vcov; m.lp@coef; sqrt(diag(vc)); (vc/sqrt(outer(diag(vc), diag(vc))))[1, 2]
pred <-
data.frame(n.speciments=1:10,
real.count=table(d$count)[1:10],
pred.ls=nrow(d)*sads::dls(1:10, sum(d$count), m.ls@coef),
pred.lp=table(sads::radpred(m.lp)$abund)[1:10])
1-ppois(pred$real.count, pred$pred.ls)
# Per yday there's too little data to get proper estimates for theta or alpha.
# (Also the gaussian approximation for the posterior breaks up.)
d2 <- d.orig %>%
select(name, count, startDay, startMonth) %>%
mutate(yday=30*as.integer(startMonth)+as.integer(startDay)) %>%
group_by(yday, name) %>%
summarise(count=sum(count))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment