danlewer · November 20, 2024 15:08
diff --git a/offsets_survival.R b/offsets_survival.R
 # simple simulation of survival data for use in regression analysis
 # shows that the effect of a risk factor on an outcome must be estimated using an offset term rather than adjusting for the follow-up duration

 # sample size
 n <- 1e5

 # simulate random binary exposure with 30% prevalence
 binary_exposure <- rbinom(n, 1, 0.3)

 # simulate total observation duration, associated with binary exposure
 observation <- sample(100:400, n, T, prob = (400:100/1000)^4)
 observation <- observation * ifelse(binary_exposure == 1, 1.3, 1)

 # risk of outcome
 baseline_risk <- 1/500 # baseline risk per time increment
 risk_ratio <- 2 # risk associated with exposure
 risk <- ifelse(binary_exposure == 1, baseline_risk * risk_ratio, baseline_risk) # individual risk

 # simulate first event, censor date, and whether event was observed 
 first_event <- rgeom(length(risk), risk) + 1
 censored_observation <- pmin(observation, first_event) # duration until censor date
 event_observed <- as.integer(first_event <= observation)

 # correct model offsetting censored observation
 m1 <- glm(event_observed ~ binary_exposure + offset(log(censored_observation)), family = 'poisson')
 exp(coef(m1)[2]) # should approximate risk ratio

 # model offsetting for full observation
 m2 <- glm(event_observed ~ binary_exposure + offset(log(observation)), family = 'poisson')
 exp(coef(m2)[2]) # does not approximate risk ratio

 # model adjusting for censored observation
 m3 <- glm(event_observed ~ binary_exposure + censored_observation, family = 'poisson')
 exp(coef(m3)[2]) # does not approximate risk ratio
	# simple simulation of survival data for use in regression analysis
	# shows that the effect of a risk factor on an outcome must be estimated using an offset term rather than adjusting for the follow-up duration

	# sample size
	n <- 1e5

	# simulate random binary exposure with 30% prevalence
	binary_exposure <- rbinom(n, 1, 0.3)

	# simulate total observation duration, associated with binary exposure
	observation <- sample(100:400, n, T, prob = (400:100/1000)^4)
	observation <- observation * ifelse(binary_exposure == 1, 1.3, 1)

	# risk of outcome
	baseline_risk <- 1/500 # baseline risk per time increment
	risk_ratio <- 2 # risk associated with exposure
	risk <- ifelse(binary_exposure == 1, baseline_risk * risk_ratio, baseline_risk) # individual risk

	# simulate first event, censor date, and whether event was observed
	first_event <- rgeom(length(risk), risk) + 1
	censored_observation <- pmin(observation, first_event) # duration until censor date
	event_observed <- as.integer(first_event <= observation)

	# correct model offsetting censored observation
	m1 <- glm(event_observed ~ binary_exposure + offset(log(censored_observation)), family = 'poisson')
	exp(coef(m1)[2]) # should approximate risk ratio

	# model offsetting for full observation
	m2 <- glm(event_observed ~ binary_exposure + offset(log(observation)), family = 'poisson')
	exp(coef(m2)[2]) # does not approximate risk ratio

	# model adjusting for censored observation
	m3 <- glm(event_observed ~ binary_exposure + censored_observation, family = 'poisson')
	exp(coef(m3)[2]) # does not approximate risk ratio