Skip to content

Instantly share code, notes, and snippets.

@arraytools
Created December 2, 2024 21:55
Show Gist options
  • Save arraytools/4ced9b19609621e9300d345b7dac5b60 to your computer and use it in GitHub Desktop.
Save arraytools/4ced9b19609621e9300d345b7dac5b60 to your computer and use it in GitHub Desktop.
Compare the effects of sample size on the log-rank test
library(survival)
# Function to generate survival data
generate_data <- function(sample_size, event_rate_group1, event_rate_group2) {
group <- c(rep(1, sample_size), rep(0, sample_size))
event_times <- c(rexp(sample_size, rate = event_rate_group1),
rexp(sample_size, rate = event_rate_group2))
observed <- rep(1, sample_size * 2) # Assuming all events are observed
data <- data.frame(group = group, event_times = event_times, observed = observed)
return(data)
}
# Function to perform the log-rank test
perform_log_rank_test <- function(data) {
surv_object <- Surv(data$event_times, data$observed)
fit <- survdiff(surv_object ~ data$group)
return(1 - pchisq(fit$chisq, df = 1))
}
sample_sizes <- c(10, 30, 50, 100, 200)
p_values <- numeric(length(sample_sizes))
# Simulate and perform the log-rank test for different sample sizes
set.seed(1)
for (i in 1:length(sample_sizes)) {
data <- generate_data(sample_sizes[i], event_rate_group1 = 0.1, event_rate_group2 = 0.2)
p_values[i] <- perform_log_rank_test(data)
print(paste("Sample size:", sample_sizes[i] * 2, ", p-value:", round(p_values[i], 5)))
}
# [1] "Sample size: 20 , p-value: 0.42122"
# [1] "Sample size: 60 , p-value: 0.20008"
# [1] "Sample size: 100 , p-value: 0.0033"
# [1] "Sample size: 200 , p-value: 0"
# [1] "Sample size: 400 , p-value: 0"
# Plotting the results
plot(sample_sizes, p_values, type = "b", pch = 19, col = "blue",
xlab = "Sample Size per Group", ylab = "p-value",
main = "Effect of Sample Size on Log-Rank Test")
abline(h = 0.05, col = "red", lty = 2)
set.seed(1234)
for (i in 1:length(sample_sizes)) {
data <- generate_data(sample_sizes[i], event_rate_group1 = 0.1, event_rate_group2 = 0.2)
p_values[i] <- perform_log_rank_test(data)
print(paste("Sample size:", sample_sizes[i] * 2, ", p-value:", round(p_values[i], 5)))
}
# [1] "Sample size: 20 , p-value: 0.89394"
# [1] "Sample size: 60 , p-value: 0.0937"
# [1] "Sample size: 100 , p-value: 0.27181"
# [1] "Sample size: 200 , p-value: 1e-05"
# [1] "Sample size: 400 , p-value: 0"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment