Skip to content

Instantly share code, notes, and snippets.

View danlewer's full-sized avatar

Dan Lewer danlewer

View GitHub Profile
# simple simulation of survival data for use in regression analysis
# shows that the effect of a risk factor on an outcome must be estimated using an offset term rather than adjusting for the follow-up duration
# sample size
n <- 1e5
# simulate random binary exposure with 30% prevalence
binary_exposure <- rbinom(n, 1, 0.3)
# simulate total observation duration, associated with binary exposure
risk2odds <- function (risk) risk / (1-risk)
odds2risk <- function (odds) odds / (1+odds)
or2rr <- function (OR, p0) OR / ((1 - p0) + (p0 * OR))
rr2or <- function(RR, p0) RR * (1 - p0) / (1 - RR * p0)
@danlewer
danlewer / roundup.R
Created August 20, 2024 12:45
Round up
roundup <- function (x, dig = 2) {
l <- floor(log10(x)) + 1
r <- ceiling(x / (10 ^ (l-dig)))
r * 10 ^ (l-dig)
}
@danlewer
danlewer / ris2csv.R
Last active July 30, 2024 10:45
Convert RIS to CSV (requires data.table)
# Reads a RIS file and converts it into a 'wide' data.table
# Where records have multiple entries for a field, eg. author or keyword, these are given numbered column names, eg. KW1, KW2 etc
# Note if the field label already has a number, this will be maintained, eg. T1 (for title) becomes T11
library(data.table)
ris2csv <- function (file) { # 'file' is the ris file in your working directory, eg. 'myrisfile.ris'
d <- readLines(file)
d <- strsplit(d, " - ", fixed = T)
d[sapply(d, length) == 0] <- 'NEW RECORD'
library(lme4) # for ML/REML fitting of mixed models
# ----------------------------------------------------------------------------------------------
# simulate a clustered dataset with cluster-level outcome affected by an individual "confounder"
# ----------------------------------------------------------------------------------------------
# inputs
# ------
# sample size
# linear interpolation of lines between x and y coordinates, with option to find specific intercepts
# x must be positive monotonic, y does not need to be
interpolate <- function (x, y, xIncrements = 0.01, findx = NULL, findy = NULL) {
if (length(x) != length(y)) stop('length(x) does not equal length(y)')
if (is.unsorted(x)) stop('x is not positive monotonic')
dx <- diff(x)
dy <- diff(y)
xNotches <- round(dx / xIncrements, 0)
yIncrements <- dy / xNotches
# compare Cohen's kappa and % agreement
# for two raters making a binary (yes/no) decision
# assuming the same prevalence for both raters
library(viridisLite)
k <- function (Po, prevalence) {
Pe <- (prevalence^2) + ((1-prevalence)^2)
(Po - Pe) / (1 - Pe)
}
# Here's a function that allows you to specify the number of letters you want (eg. 5 would be A, B, C, D, and E), the length of the code (eg. 3 would be AAA, AAB, AAC, etc), the number of results you want (NA for all of them), and and separating character (eg. '-' would give A-A-A, A-A-B, A-A-C.)
letterCodes <- function(nletters, case = 'upper', lengthCode, nResults = NA, sep = '') {
f <- if (case == 'upper') LETTERS else letters
a <- expand.grid(rep(list(f[1:nletters]), lengthCode))
a <- a[, ncol(a):1]
a <- do.call("paste", c(a, sep = sep))
if (is.na(nResults)) a else a[1:min(nResults, length(a))]
}
# return a table, but specify the values you want to be tabulated
# works like base::table, but supplies only specified values, and returns 0 if none of those values exist in the vector
tab_specific_values <- function(vector, values = unique(vector)) `names<-`(rowSums(outer(values, vector, `==`)), values)
which.median <- function (x, ties.method = c('first', 'last', 'random', 'all'), ...) {
med <- median(x, ...)
dif <- abs(med - x)
ind <- which(dif == min(dif))
if (length(ind) > 1 & ties.method[1] != 'all') {
ind <- if (ties.method[1] == 'first') ind[1] else {if (ties.method[1] == 'last') tail(ind, 1) else sample(ind, 1)}
}
return (ind)
}