Skip to content

Instantly share code, notes, and snippets.

@mattbaggott
Created November 12, 2012 15:53

Revisions

  1. mattbaggott created this gist Nov 12, 2012.
    55 changes: 55 additions & 0 deletions Turnbull.R
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,55 @@
    cria.tau <- function(data){

    # Giolo, Suely Ruiz. "Turnbull's nonparametric estimator for interval-censored data'."
    # Department of Statistics, Federal University of Paraná (2004): 1-10.

    l <- data$left
    r <- data$right
    tau <- sort(unique(c(l,r[is.finite(r)])))
    return(tau)
    }
    S.ini <- function(tau){
    m<-length(tau)
    ekm<-survfit(Surv(tau[1:m-1],rep(1,m-1)))
    So<-c(1,ekm$surv)
    p <- -diff(So)
    return(p)
    }
    cria.A <- function(data,tau){
    tau12 <- cbind(tau[-length(tau)],tau[-1])
    interv <- function(x,inf,sup) ifelse(x[1]>=inf & x[2]<=sup,1,0)
    A <- apply(tau12,1,interv,inf=data$left,sup=data$right)
    id.lin.zero <- which(apply(A==0, 1, all))
    if(length(id.lin.zero)>0) A <- A[-id.lin.zero, ]
    return(A)
    }
    Turnbull <- function(p, A, data, eps=1e-3, iter.max=200, verbose=FALSE){
    n<-nrow(A)
    m<-ncol(A)
    Q<-matrix(1,m)
    iter <- 0
    repeat {
    iter <- iter + 1
    diff<- (Q-p)
    maxdiff<-max(abs(as.vector(diff)))
    if (verbose)
    print(maxdiff)
    if (maxdiff<eps | iter>=iter.max)
    break
    Q<-p
    C<-A%*%p
    p<-p*((t(A)%*%(1/C))/n)
    }
    cat("Iterations = ", iter,"\n")
    cat("Max difference = ", maxdiff,"\n")
    cat("Convergence criteria: Max difference < 1e-3","\n")
    dimnames(p)<-list(NULL,c("P Estimate"))
    surv<-round(c(1,1-cumsum(p)),digits=5)
    right <- data$right
    if(any(!(is.finite(right)))){
    t <- max(right[is.finite(right)])
    return(list(time=tau[tau<t],surv=surv[tau<t]))
    }
    else
    return(list(time=tau,surv=surv))
    }
    69 changes: 69 additions & 0 deletions survivalcurveinterval.R
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,69 @@

    # Giolo, Suely Ruiz. "Turnbull's nonparametric estimator for interval-censored data'."
    # Department of Statistics, Federal University of Paraná (2004): 1-10.

    dat <-structure(list(left = c(1, 1, 1, 28, 34, 34, 35, 36, 36, 3, 3,
    7, 9, 9, 9, 10, 11, 10, 13, 15, 15, 16, 17, 18, 21, 22, 23, 25,
    25, 25, 25, 26, 26, 26, 26, 27, 29, 31, 32, 32, 32, 32, 32, 32,
    32, 32, 1, 1, 28, 28, 34, 37, 37, 2, 2, 3, 3, 3, 3, 3, 4, 5,
    5, 5, 5, 6, 6, 7, 8, 8, 8, 8, 9, 9, 9, 10, 10, 11, 12, 38, 14,
    15, 15, 19, 19, 20, 21, 21, 23, 23, 24, 24, 30, 33), right = c(32,
    33, 30, 2, 3, 2, 1, 7, 5, 6, 9, NA, NA, 16, 16, NA, 23, 17, NA,
    NA, NA, 25, 27, 22, NA, NA, NA, 28, 29, NA, NA, 28, NA, NA, NA,
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 13, 30, 34, 33, 33,
    3, 12, 23, 8, 4, NA, 8, NA, 11, 11, NA, 26, NA, NA, 8, 10, 13,
    15, 11, 15, 31, 18, 14, 17, 16, 15, 21, NA, 21, NA, 20, 19, 22,
    24, NA, NA, 27, NA, NA, NA, 26, 29, NA), ther = c(2, 2, 2, 2,
    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1), cens = c(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
    1, 1, 2, 2, 1, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 1, 1, 2,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2,
    2, 2, 2, 1, 2, 1, 2, 2, 1, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2,
    2, 2, 2, 2, 1, 2, 1, 2, 2, 2, 2, 1, 1, 2, 1, 1, 1, 2, 2, 1)), .Names = c("left",
    "right", "ther", "cens"), row.names = 2:95, class = "data.frame")

    #===============================================
    #Survival curves using intervals - Figure 1
    #===============================================

    require(survival)
    source("Turnbull.R") # Turnbull.R

    dat1 <- dat[dat$ther==1,]
    dat1$right[is.na(dat1$right)] <- Inf
    tau <- cria.tau(dat1)
    p <- S.ini(tau=tau)
    A <- cria.A(data=dat1,tau=tau)
    tb1 <- Turnbull(p,A,dat1)
    tb1
    dat1 <- dat[dat$ther==0,]
    dat1$right[is.na(dat1$right)] <- Inf
    tau <- cria.tau(dat1)
    p <- S.ini(tau=tau)
    A <- cria.A(data=dat1,tau=tau)
    tb2 <- Turnbull(p,A,dat1)
    tb2
    plot(tb1$time,tb1$surv,lty=1, col = 4,type="s",ylim=c(0,1),xlim=range(c(0,60)),
    xlab="Tempos (meses)",ylab="S(t)")
    lines(tb2$time,tb2$surv,lty=4,col=2,type="s")
    legend(1,0.3,lty=c(1,4),col=c(4,2),c("Radioterapia","Radioterapia + Quimioterapia"), bty="n",cex=0.8)

    #===============================================
    #Survival curves using midpoints - Figure 2
    #===============================================

    p <-dat$left+((dat$right-dat$left)/2)
    pm <-ifelse(is.finite(p),p,dat$left)
    cens <- ifelse(is.finite(p),1,0)
    ekm<-survfit(Surv(pm,cens)~ther,type=c("kaplan-meier"),data=dat)
    plot(tb1$time,tb1$surv,lty=1,type="s",col=4,ylim=c(0,1),xlim=c(0,50),xlab="Tempos (meses)",ylab="S(t)")
    lines(tb2$time,tb2$surv,lty=1,col=2,type="s")
    lines(ekm[1]$time,ekm[1]$surv,type="s",col=2,lty=2)
    lines(ekm[2]$time,ekm[2]$surv,type="s",col=4,lty=2)
    legend(3,0.30,lty=2,col=4, "Radiotherapy using midpoints", bty="n",cex=0.8)
    legend(3,0.25,lty=1,col=4, "Radiotherapy using intervals", bty="n",cex=0.8)
    legend(3,0.2,lty=2,col=2,"Radio + Chemotherapy using midpoints", bty="n",cex=0.8)
    legend(3,0.15,lty=1,col=2,"Radio + Chemotherapy using intervals", bty="n",cex=0.8)