Last active
July 11, 2020 18:36
-
-
Save MatsuuraKentaro/3a2b6ef99777cecb5f5ad92b7ee8951a to your computer and use it in GitHub Desktop.
COVID-19: estimate effective reproduction number
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
functions { | |
// calculating the convolutions | |
vector convolution(vector x, vector y_rev) { | |
int T = num_elements(x); | |
vector[T-1] res; | |
for (t in 2:T) { | |
res[t-1] = dot_product(x[1:(t-1)], y_rev[(T-t+2):T]); | |
} | |
return res; | |
} | |
// continuous Poisson distribution | |
real continuous_poisson_lpdf(vector y, vector lam) { | |
real lp = sum(y .* log(lam) - lam - lgamma(y + 1)); | |
return lp; | |
} | |
} | |
data { | |
int<lower=1> T; // number of days | |
vector<lower=0>[T] imported_infect; | |
vector<lower=0>[T] domestic_infect; | |
vector[T] p_gt_rev; // generation time distribution (reverse order) | |
} | |
transformed data { | |
vector[T] cases_infect = imported_infect + domestic_infect; | |
} | |
parameters { | |
vector<lower=0>[T-1] Rt; | |
} | |
model { | |
domestic_infect[2:T] ~ continuous_poisson(Rt .* convolution(cases_infect, p_gt_rev)); | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
functions { | |
// calculating the convolutions | |
vector convolution(vector x, vector y_rev) { | |
int T = num_elements(x); | |
vector[T-1] res; | |
for (t in 2:T) { | |
res[t-1] = dot_product(x[1:(t-1)], y_rev[(T-t+2):T]); | |
} | |
return res; | |
} | |
// continuous Poisson distribution | |
real continuous_poisson_lpdf(vector y, vector lam) { | |
real lp = sum(y .* log(lam) - lam - lgamma(y + 1)); | |
return lp; | |
} | |
} | |
data { | |
int<lower=1> T; // number of days | |
int<lower=0> domestic_onset[T]; | |
int<lower=0> imported_onset[T]; | |
int<lower=0> domestic_report[T]; | |
int<lower=0> imported_report[T]; | |
vector[T] p_gt_rev; // generation time distribution (reverse order) | |
vector[T] p_inc_rev; // incubation period distribution (reverse order) | |
vector[T] p_rep_rev; // report time distribution (reverse order) | |
} | |
parameters { | |
vector<lower=0>[T-1] Rt; | |
real<lower=0> s_smooth; // smoothing factor | |
simplex[T] domestic_infect_from_onset_raw; // distribution of domestic infected people from onset data | |
simplex[T] imported_infect_from_onset_raw; | |
simplex[T] domestic_infect_from_report_raw; | |
simplex[T] imported_infect_from_report_raw; | |
} | |
transformed parameters { | |
vector[T] domestic_infect_from_onset = domestic_infect_from_onset_raw * sum(domestic_onset); // Number of domestic infected people by using back projection from onset data. | |
vector[T] imported_infect_from_onset = imported_infect_from_onset_raw * sum(imported_onset); | |
vector[T] domestic_infect_from_report = domestic_infect_from_report_raw * sum(domestic_report); | |
vector[T] imported_infect_from_report = imported_infect_from_report_raw * sum(imported_report); | |
vector[T] domestic_infect = domestic_infect_from_onset + domestic_infect_from_report; | |
vector[T] imported_infect = imported_infect_from_onset + imported_infect_from_report; | |
vector[T] cases_infect = domestic_infect + imported_infect; | |
} | |
model { | |
// smoothing on the raw scale | |
domestic_infect_from_onset_raw[2:T] ~ normal(domestic_infect_from_onset_raw[1:(T-1)], s_smooth); | |
imported_infect_from_onset_raw[2:T] ~ normal(imported_infect_from_onset_raw[1:(T-1)], s_smooth); | |
domestic_infect_from_report_raw[2:T] ~ normal(domestic_infect_from_report_raw[1:(T-1)], s_smooth); | |
imported_infect_from_report_raw[2:T] ~ normal(imported_infect_from_report_raw[1:(T-1)], s_smooth); | |
// back projection | |
domestic_onset[2:T] ~ poisson(convolution(domestic_infect_from_onset, p_inc_rev)); | |
imported_onset[2:T] ~ poisson(convolution(imported_infect_from_onset, p_inc_rev)); | |
domestic_report[2:T] ~ poisson(convolution(domestic_infect_from_report, p_rep_rev)); | |
imported_report[2:T] ~ poisson(convolution(imported_infect_from_report, p_rep_rev)); | |
// estimating effective reproduction number | |
domestic_infect[2:T] ~ continuous_poisson(Rt .* convolution(cases_infect, p_gt_rev)); | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
functions { | |
// calculating the convolutions | |
vector convolution(vector x, vector y_rev) { | |
int T = num_elements(x); | |
vector[T-1] res; | |
for (t in 2:T) { | |
res[t-1] = dot_product(x[1:(t-1)], y_rev[(T-t+2):T]); | |
} | |
return res; | |
} | |
// continuous Poisson distribution | |
real continuous_poisson_lpdf(vector y, vector lam) { | |
real lp = sum(y .* log(lam) - lam - lgamma(y + 1)); | |
return lp; | |
} | |
} | |
data { | |
int<lower=1> T; // number of days | |
int<lower=0> domestic_onset[T]; | |
int<lower=0> imported_onset[T]; | |
int<lower=0> domestic_report[T]; | |
int<lower=0> imported_report[T]; | |
vector[T] p_gt_rev; // generation time distribution (reverse order) | |
vector[T] p_inc_rev; // incubation period distribution (reverse order) | |
vector[T] p_rep_rev; // report time distribution (reverse order) | |
vector[T] p_rep_cum; // report time cumulative distribution | |
} | |
parameters { | |
vector<lower=0>[T-1] Rt; | |
real<lower=0> s_smooth; // smoothing factor | |
simplex[T] domestic_infect_from_onset_raw; // distribution of domestic infected people from onset data | |
simplex[T] imported_infect_from_onset_raw; | |
simplex[T] domestic_infect_from_report_raw; | |
simplex[T] imported_infect_from_report_raw; | |
simplex[T] domestic_infect_latent_raw; | |
simplex[T] imported_infect_latent_raw; | |
real<lower=sum(domestic_onset)+sum(domestic_report)> domestic_latent_scale; | |
real<lower=sum(imported_onset)+sum(imported_report)> imported_latent_scale; | |
} | |
transformed parameters { | |
vector[T] domestic_infect_from_onset = domestic_infect_from_onset_raw * sum(domestic_onset); // Number of domestic infected people by using back projection from onset data. | |
vector[T] imported_infect_from_onset = imported_infect_from_onset_raw * sum(imported_onset); | |
vector[T] domestic_infect_from_report = domestic_infect_from_report_raw * sum(domestic_report); | |
vector[T] imported_infect_from_report = imported_infect_from_report_raw * sum(imported_report); | |
vector[T] domestic_infect = domestic_infect_from_onset + domestic_infect_from_report; | |
vector[T] imported_infect = imported_infect_from_onset + imported_infect_from_report; | |
vector[T] domestic_infect_latent = domestic_infect_latent_raw * domestic_latent_scale; | |
vector[T] imported_infect_latent = imported_infect_latent_raw * imported_latent_scale; | |
vector[T] cases_infect_latent = domestic_infect_latent + imported_infect_latent; | |
} | |
model { | |
// smoothing on the raw scale | |
domestic_infect_from_onset_raw[2:T] ~ normal(domestic_infect_from_onset_raw[1:(T-1)], s_smooth); | |
imported_infect_from_onset_raw[2:T] ~ normal(imported_infect_from_onset_raw[1:(T-1)], s_smooth); | |
domestic_infect_from_report_raw[2:T] ~ normal(domestic_infect_from_report_raw[1:(T-1)], s_smooth); | |
imported_infect_from_report_raw[2:T] ~ normal(imported_infect_from_report_raw[1:(T-1)], s_smooth); | |
// back projection | |
domestic_onset[2:T] ~ poisson(convolution(domestic_infect_from_onset, p_inc_rev)); | |
imported_onset[2:T] ~ poisson(convolution(imported_infect_from_onset, p_inc_rev)); | |
domestic_report[2:T] ~ poisson(convolution(domestic_infect_from_report, p_rep_rev)); | |
imported_report[2:T] ~ poisson(convolution(imported_infect_from_report, p_rep_rev)); | |
// reporting delay | |
domestic_infect[1:T] ~ continuous_poisson(domestic_infect_latent .* p_rep_cum); | |
imported_infect[1:T] ~ continuous_poisson(imported_infect_latent .* p_rep_cum); | |
// estimating effective reproduction number | |
domestic_infect_latent[2:T] ~ continuous_poisson(Rt .* convolution(cases_infect_latent, p_gt_rev)); | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
lapply(c('data.table', 'dplyr', 'tidyr', 'rstan', 'ggplot2'), library, character.only=TRUE) | |
rstan_options(auto_write=TRUE) | |
options(mc.cores=4) | |
datemin <- as.Date('2019-12-25') # particular choice | |
datemax <- as.Date('2020-05-10') # particular choice | |
##------ loading the dataset ------ | |
fread('data/JapaneseDataCOVID19 (200510).csv') %>% | |
mutate(report = if_else(is.na(confirmed), reported, confirmed), | |
report = as.Date(report), | |
onset = as.Date(onset)) %>% | |
select(-confirmed, -reported) -> df | |
df_onset <- df %>% | |
filter(!is.na(onset)) %>% | |
group_by(onset, exp_type) %>% | |
summarise(count = n()) %>% ungroup() %>% | |
spread(key = exp_type, value = count, fill = 0) %>% | |
rename(domestic_onset = domestic, imported_onset = imported) | |
df_report <- df %>% | |
filter(is.na(onset)) %>% | |
group_by(report, exp_type) %>% | |
summarise(count = n()) %>% ungroup() %>% | |
spread(key = exp_type, value = count, fill = 0) %>% | |
rename(domestic_report = domestic, imported_report = imported) | |
df_cases <- data.frame(date = seq(datemin, datemax, by = 1)) %>% | |
left_join(df_onset, by = c('date'='onset'), fill = 0) %>% | |
left_join(df_report, by = c('date'='report'), fill = 0) %>% | |
replace_na(list(domestic_onset = 0, imported_onset = 0, domestic_report = 0, imported_report = 0)) | |
##------ predetermined distributions ------ | |
## discretesized distribution of generation time (Nishiura, et al, 2020) | |
T <- nrow(df_cases) | |
c_gt <- pweibull(0:T, shape = 2.305, scale = 5.452) | |
p_gt_rev <- c_gt %>% diff() %>% rev() | |
## discretesized distribution of incubation period (from infection to onset) (Linton et al 2020 - with right truncation excl. Wuhan residents) | |
c_inc <- plnorm(0:T, meanlog = 1.519, sdlog = 0.615) | |
p_inc_rev <- c_inc %>% diff() %>% rev() | |
## discretesized distribution of the delay from infection to report | |
# right-truncated reporting delay from onset to repot (estimated values using the MHLW data): dweibull(t, shape = 1.741, scale = 8.573) | |
cdf_rep <- Vectorize(function(t) { | |
integrand <- function(x) { | |
pweibull(t - x, shape = 1.741, scale = 8.573) * dlnorm(x, meanlog = 1.519, sdlog = 0.615) | |
} | |
val <- integrate(integrand, lower=0, upper=t)$value | |
return(val) | |
}) | |
c_rep <- cdf_rep(0:T) | |
p_rep_rev <- c_rep %>% diff() %>% rev() | |
##------ estimation ------ | |
stan_data = list( | |
T = T, | |
domestic_onset = df_cases$domestic_onset, | |
imported_onset = df_cases$imported_onset, | |
domestic_report = df_cases$domestic_report, | |
imported_report = df_cases$imported_report, | |
p_gt_rev = p_gt_rev, | |
p_inc_rev = p_inc_rev, | |
p_rep_rev = p_rep_rev | |
) | |
stanmodel <- stan_model(file='model/model2.stan') | |
fit <- sampling(stanmodel, | |
data = stan_data, | |
iter = 6000, | |
warmup = 1000, | |
thin = 5, | |
pars = c('Rt', 's_smooth', 'domestic_infect', 'imported_infect', 'cases_infect'), | |
seed = 1234) | |
write.table(data.frame(summary(fit)$summary), file='output/fit-summary-model2.csv', sep=',', quote=TRUE, col.names=NA) | |
save.image(file = 'output/result-model2.RData') | |
##------ plot ------ | |
ms <- rstan::extract(fit) | |
qua <- apply(ms$Rt, 2, quantile, probs=c(2.5, 25, 50, 75, 97.5)/100) | |
d_plot <- data.frame(day = df_cases$date[-T], t(qua), check.names = FALSE) %>% | |
filter(between(day, as.Date('2020-02-01'), as.Date('2020-05-03'))) | |
qua <- apply(ms$cases_infect, 2, quantile, probs=c(2.5, 25, 50, 75, 97.5)/100) | |
d_plot2 <- data.frame(day = df_cases$date, t(qua), check.names = FALSE) %>% | |
filter(between(day, as.Date('2020-02-01'), as.Date('2020-05-03'))) | |
scaling <- 130 | |
p <- ggplot() + | |
geom_bar(data=d_plot2, aes(x=day, y=`50%`/scaling), fill='#FAAB18', stat='identity', width=0.7) + | |
geom_errorbar(data=d_plot2, aes(x=day, ymin=`2.5%`/scaling, ymax=`97.5%`/scaling), width=0.5, alpha = 0.4) + | |
geom_ribbon(data=d_plot, aes(x=day, ymin=`2.5%`, ymax=`97.5%`), fill='#1380A1', alpha=0.4) + | |
geom_line(data=d_plot, aes(x=day, y=`50%`), color='#1380A1') + | |
labs(x='\nDate', y='Effective reproduction number\n') + | |
theme(text=element_text(size=12), axis.text=element_text(size=10), legend.position='none') + | |
scale_x_date(date_labels='%m/%d',date_breaks='10 day', expand=c(0, 0)) + | |
scale_y_continuous(limits=c(0, 5.6), expand=c(0, 0), | |
sec.axis=sec_axis(~. * scaling, breaks=c(200, 400, 600), name='Cases\n')) + | |
geom_hline(yintercept=1, linetype='dashed', color='#1380A1', size=0.7) | |
ggsave(p, file = 'output/fig-model2.png', dpi=300, w=5.6, h=4) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
lapply(c('data.table', 'dplyr', 'tidyr', 'rstan', 'ggplot2'), library, character.only=TRUE) | |
rstan_options(auto_write=TRUE) | |
options(mc.cores=4) | |
datemin <- as.Date('2019-12-25') # particular choice | |
datemax <- as.Date('2020-05-10') # particular choice | |
##------ loading the dataset ------ | |
fread('data/JapaneseDataCOVID19 (200510).csv') %>% | |
mutate(report = if_else(is.na(confirmed), reported, confirmed), | |
report = as.Date(report), | |
onset = as.Date(onset)) %>% | |
select(-confirmed, -reported) -> df | |
df_onset <- df %>% | |
filter(!is.na(onset)) %>% | |
group_by(onset, exp_type) %>% | |
summarise(count = n()) %>% ungroup() %>% | |
spread(key = exp_type, value = count, fill = 0) %>% | |
rename(domestic_onset = domestic, imported_onset = imported) | |
df_report <- df %>% | |
filter(is.na(onset)) %>% | |
group_by(report, exp_type) %>% | |
summarise(count = n()) %>% ungroup() %>% | |
spread(key = exp_type, value = count, fill = 0) %>% | |
rename(domestic_report = domestic, imported_report = imported) | |
df_cases <- data.frame(date = seq(datemin, datemax, by = 1)) %>% | |
left_join(df_onset, by = c('date'='onset'), fill = 0) %>% | |
left_join(df_report, by = c('date'='report'), fill = 0) %>% | |
replace_na(list(domestic_onset = 0, imported_onset = 0, domestic_report = 0, imported_report = 0)) | |
##------ predetermined distributions ------ | |
## discretesized distribution of generation time (Nishiura, et al, 2020) | |
T <- nrow(df_cases) | |
c_gt <- pweibull(0:T, shape = 2.305, scale = 5.452) | |
p_gt_rev <- c_gt %>% diff() %>% rev() | |
## discretesized distribution of incubation period (from infection to onset) (Linton et al 2020 - with right truncation excl. Wuhan residents) | |
c_inc <- plnorm(0:T, meanlog = 1.519, sdlog = 0.615) | |
p_inc_rev <- c_inc %>% diff() %>% rev() | |
## discretesized distribution of the delay from infection to report | |
# right-truncated reporting delay from onset to repot (estimated values using the MHLW data): dweibull(t, shape = 1.741, scale = 8.573) | |
cdf_rep <- Vectorize(function(t) { | |
integrand <- function(x) { | |
pweibull(t - x, shape = 1.741, scale = 8.573) * dlnorm(x, meanlog = 1.519, sdlog = 0.615) | |
} | |
val <- integrate(integrand, lower=0, upper=t)$value | |
return(val) | |
}) | |
c_rep <- cdf_rep(0:T) | |
p_rep_rev <- c_rep %>% diff() %>% rev() | |
p_rep_cum <- c_rep %>% rev() %>% .[1:T] | |
##------ estimation ------ | |
stan_data = list( | |
T = T, | |
domestic_onset = df_cases$domestic_onset, | |
imported_onset = df_cases$imported_onset, | |
domestic_report = df_cases$domestic_report, | |
imported_report = df_cases$imported_report, | |
p_gt_rev = p_gt_rev, | |
p_inc_rev = p_inc_rev, | |
p_rep_rev = p_rep_rev, | |
p_rep_cum = p_rep_cum | |
) | |
stanmodel <- stan_model(file='model/model3.stan') | |
fit <- sampling(stanmodel, | |
data = stan_data, | |
iter = 6000, | |
warmup = 1000, | |
thin = 5, | |
pars = c('Rt', 's_smooth', 'domestic_infect_latent', 'imported_infect_latent', 'cases_infect_latent', 'domestic_infect', 'imported_infect'), | |
seed = 1234) | |
write.table(data.frame(summary(fit)$summary), file='output/fit-summary-model3.csv', sep=',', quote=TRUE, col.names=NA) | |
save.image(file = 'output/result-model3.RData') | |
##------ plot ------ | |
ms <- rstan::extract(fit) | |
qua <- apply(ms$Rt, 2, quantile, probs=c(2.5, 25, 50, 75, 97.5)/100) | |
d_plot <- data.frame(day = df_cases$date[-T], t(qua), check.names = FALSE) %>% | |
filter(between(day, as.Date('2020-02-01'), as.Date('2020-05-03'))) | |
qua <- apply(ms$cases_infect_latent, 2, quantile, probs=c(2.5, 25, 50, 75, 97.5)/100) | |
d_plot2 <- data.frame(day = df_cases$date, t(qua), check.names = FALSE) %>% | |
filter(between(day, as.Date('2020-02-01'), as.Date('2020-05-03'))) | |
scaling <- 130 | |
p <- ggplot() + | |
geom_bar(data=d_plot2, aes(x=day, y=`50%`/scaling), fill='#FAAB18', stat='identity', width=0.7) + | |
geom_errorbar(data=d_plot2, aes(x=day, ymin=`2.5%`/scaling, ymax=`97.5%`/scaling), width=0.5, alpha = 0.4) + | |
geom_ribbon(data=d_plot, aes(x=day, ymin=`2.5%`, ymax=`97.5%`), fill='#1380A1', alpha=0.4) + | |
geom_line(data=d_plot, aes(x=day, y=`50%`), color='#1380A1') + | |
labs(x='\nDate', y='Effective reproduction number\n') + | |
theme(text=element_text(size=12), axis.text=element_text(size=10), legend.position='none') + | |
scale_x_date(date_labels='%m/%d',date_breaks='10 day', expand=c(0, 0)) + | |
scale_y_continuous(limits=c(0, 5.6), expand=c(0, 0), | |
sec.axis=sec_axis(~. * scaling, breaks=c(200, 400, 600), name='Cases\n')) + | |
geom_hline(yintercept=1, linetype='dashed', color='#1380A1', size=0.7) | |
ggsave(p, file = 'output/fig-model3.png', dpi=300, w=5.6, h=4) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hi Andrei,
Thank you very much for your comments.
What does "group" mean? The error bars at the beginning of the outbreak for slide #96 in moid.pdf seem to be proportional to the variance of the Poisson distribution. I feel that the error bars at the beginning are too narrow. On the other hand, I think my smoothing method includes the uncertainty of the incubation period (i.e. uncertainty of the time axis), so the error bars are wider.
Also the shape of my smoothing method is the same as the first-order trend model of the state-space model and there is room for change. For example, a stronger second-order trend model could be considered:
That means "domestic_infect_from_onset_raw[t] - domestic_infect_from_onset_raw[t-1] ~ normal(domestic_infect_from_onset_raw[t-1] - domestic_infect_from_onset_raw[t-2], s_smooth) for t = 3,4,...,T"
However, a strong smoothing generates optimistic credible intervals. In the case of decision-making issues, we adopted a weaker smoothing method because we should be pessimistic and avoid underestimating the credible intervals.
The Poisson distribution in Stan accepts only integer response variables. It can be extended using the gamma function. Certainly, the normalization constant depends on the parameter, but since it is close to 1, we can use the continuous Poisson distribution in the above Stan code as an approximation. See:
https://stats.stackexchange.com/questions/310676/continuous-generalization-of-the-negative-binomial-distribution/311927
See below for examples of use:
http://ac.inf.elte.hu/Vol_039_2013/137_39.pdf.
In addition, Sungmok Jung also uses the continuous Poisson distribution in
In [49]
in the MLE version:The Poisson distribution in OpenBUGS also adopted the continuous Poisson distribution by using the gamma function. But I can't remember the URL of implementation part (Pascal code).
I agree about the difficulty of the nowcasting. I also had to truncate the estimated results of the last 1 week. However, looking at the distribution of reporting delay, I think that truncating two weeks is overkill; data from one to two weeks ago can be used effectively, so it wouldn't be a bad idea to show it as a way of informing uncertainty.
Also, if the smoothing method is a second-order trend model, the Rt will not necessarily be a broad confidence interval centered on 1, because the Rt will be calculated to take into account recent trends in the number of cases. But, I think we should avoid situations where the results depend on the trend model largely. After all, I feel that the nowcasting is a difficult problem.
Best regards,
Kentaro