What follows is the actual code for simulating the ONKO-KISS data. cum.haz.0* are functions defining the all-cause hazards for Z = 0 and Z = 1, respectively. They will be used for creating the event times via numerical inversion (hence the little y). |
- cum.haz.01 <- function(t, y) { 0.09 * log(t + 1)+ 0.024 * t * t/ 2 - y } cum.haz.02 <- function(t, y) { 0.825 * 0.09 * log(t + 1) + 0.2 * 0.024 * t * t / 2 - y }
|
Cause-specific hazards Z=0: |
- alpha.0.01 <- function(t) { 0.09 / (t + 1) } alpha.0.02 <- function(t) { 0.024 * t }
|
Z=1 : |
- alpha.1.01 <- function(t) { 0.825 * alpha.0.01(t) } alpha.1.02 <- function(t) { 0.2*alpha.0.02(t) }
|
create.times is a function to generate the event times |
- create.times <- function(n, ch, sup.int = 200) { times <- numeric(n) i <- 1 while (i <= n) { u <- runif(1) if (ch(0, -log(u)) * ch(sup.int, -log(u)) < 0) { times[i] <- uniroot(ch, c(0, sup.int), tol = 0.0001, y= -log(u))$root i <- i + 1 } else { cat("pos") } } times }
|
Binomial experiments to decide on the event type. |
- binom.status <- function(ftime, n, a01, a02, size = 1) { prob <- a01(ftime) / (a01(ftime) + a02(ftime)) out <- rbinom(n, size, prob) out }
|
Next, a function for generating the data. |
- gen.data <- function(n, PROB) { n1 <- n * PROB n0 <- n - n1 t0 <- create.times(n0, cum.haz.01) t1 <- create.times(n1, cum.haz.02) fs0 <- binom.status(t0, n0, alpha.0.01, alpha.0.02) fs1 <- binom.status(t1, n1, alpha.1.01, alpha.1.02) cov <- c(rep(0, n0), rep(1, n1)) ft <- c(t0, t1) fs <- c(fs0, fs1) fs <- (-1) * fs + 2 matrix(c(ft, fs, cov), ncol=3) }
|
Create the data: |
- set.seed(261339) y <- gen.data(n = 1500, PROB = 0.5) x <- data.frame(id = 1:length(y[, 1]), T = y[, 1], X.T = y[, 2], Z = y[, 3])
|
and add light censoring: |
- cens <- runif(length(x$T), max = 100) x$C <- cens x$TandC <- pmin(x$T, x$C) x$status <- as.numeric(x$T <= x$C) * x$X.T
|
First-event analysis Cox model for the all-cause hazard: |
- fit <- coxph(Surv(TandC, status != 0) ~ Z, data = x) sfit <- summary(fit) sfit
|
A quick check of the confidence intervals: |
- exp(fit$coefficients + qnorm(0.975) *sqrt(fit$var)) exp(fit$coefficients - qnorm(0.975) *sqrt(fit$var))
|
Analysis of the cause-specific hazards by fitting two separate Cox models |
- fit01 <- coxph(Surv(TandC, status == 1) ~ Z, data = x) fit02 <- coxph(Surv(TandC, status == 2) ~ Z, data = x
|
Analysis of the duplicated data set First, we create the duplicated data set. |
- xl <- rbind(x,x) xl$eventtype <- c(rep("interest",1500), rep("competing", 1500)) xl$newstat <- as.numeric(c(x$status == 1, x$status == 2))
|
1st cox model: first event analysis: |
- summary(coxph(Surv(TandC, newstat != 0) ~ Z, data = xl))
|
2nd cox model: first event analysis using a strata term: |
- summary(coxph(Surv(TandC, newstat != 0) ~ Z + strata(eventtype), data = xl))
|
Creation of the cause-specific covariates: |
- xl$Z.01 <- xl$Z * (xl$eventtype == "interest") xl$Z.02 <- xl$Z * (xl$eventtype == "competing")
|
Cox cause-specific hazards analysis with the duplicated data set: |
- summary(coxph(Surv(TandC, newstat != 0) ~ Z.01 + Z.02 + strata(eventtype), data = xl))
|
Modelling of common effects Now, we create a random variable which has nothing in common with the way the data were simulated. |
- cv <- round(0.5 * rnorm(length(x$id),1)) xl$cv <- rep(cv, 2)
|
cox model with a common effect on both CSHs for cv |
- ## event of interest summary(coxph(Surv(TandC, status == 1) ~ Z + cv, data = x)) ## competing event summary(coxph(Surv(TandC, status == 2) ~ Z + cv,data = x))
|
Duplicated data set (both α01 and α02) |
- summary(coxph(Surv(TandC, newstat != 0) ~ Z.01 + Z.02 + cv + strata(eventtype), data = xl))
|
Breslow estimators, Nelson-Aalen estimators and interpretation Breslow estimates of the baseline hazards: |
- a01.0 <- basehaz(coxph(Surv(TandC, status == 1) ~ Z,data = x), centered = FALSE) # csh01 a02.0 <- basehaz(coxph(Surv(TandC, status == 2) ~ Z, data = x), centered = FALSE) # csh02
|
Figure 5.6 |
- split.screen(figs=c(1,2)) screen(1) plot(c(0, 10), c(0, 1), xlab = expression(paste(Time, " ", italic(t))), ylab= "", type = "n", axes = FALSE, cex.lab=1.5) axis(1, at = seq(0, 10, 5), cex.axis = 1.5) axis(2, at=seq(0, 1, 0.2), cex.axis = 1.25) box() lines(x=a02.0$time, y=a02.0$hazard, type="s", lwd=2) lines(x=a01.0$time, y=a01.0$hazard, type="s", lwd=2, col="darkgrey") legend(0,1, c(expression(paste(A[0], phantom()[1], phantom()[scriptstyle(";")], phantom()[0],"(",italic(t),")")), expression(paste(A[0],phantom()[2],phantom()[scriptstyle(";")], phantom()[0],"(",italic(t),")"))), lty=c(1,1),col=c("darkgrey", "black"),bty="n", cex=1.2, lwd=2) screen(2) plot(c(0, 70), y=c(0, 35), xlab = expression(paste(Time, " ", italic(t))), ylab="", type="n", axes=FALSE, cex.lab=1.5) axis(1, at=seq(0, 70, 10), cex.axis=1.5) axis(2, at=seq(0, 35, 5), cex.axis=1.25) box() lines(x=a02.0$time, y=a02.0$hazard, type="s", lwd=2) lines(x=a01.0$time, y=a01.0$hazard, type="s", lwd=2, col="darkgrey") legend(0,35,c(expression(paste(A[0],phantom()[1],phantom()[scriptstyle(";")], phantom()[0],"(",italic(t),")")), expression(paste(A[0],phantom()[2],phantom()[scriptstyle(";")], phantom()[0],"(",italic(t),")"))), lty=c(1,1),col=c("darkgrey", "black"),bty="n", cex=1.2, lwd=2) close.screen(all.screens=TRUE) mtext("Breslow estimates of the cumulative cause-specific hazards", cex=1.5, line=1, font = 2)
|
Baseline hazards using the duplicated data set: |
- basehaz(coxph(Surv(TandC, newstat != 0) ~ Z.01 + Z.02 + strata(eventtype), data = xl), centered = FALSE)
|
Figure 5.7 |
- ## matrix of logical describing the possible transitions (for using mvna) tra <- matrix(FALSE, ncol = 3, nrow = 3) dimnames(tra) <- list(c("0", "1", "2"), c("0", "1", "2")) tra[1, 2:3] <- TRUE ## Tranformation of the data set into mvna format x.mvna <- x x.mvna$from <- rep(0, nrow(x)) x.mvna$to <- ifelse(x.mvna$status == 0, "cens", x.mvna$status) x.mvna$time <- x$TandC ## cumulative CSHs na.z0 <- mvna(x.mvna[x.mvna$Z == 0, ], c("0", "1", "2"), tra, "cens") na.z1 <- mvna(x.mvna[x.mvna$Z == 1, ], c("0", "1", "2"), tra, "cens") plot(x=c(0, 10), y=c(0, 1), xlab=expression(paste(Time, " ", italic(t))), ylab="", type="n", axes=F, main="", cex.main=1.7, cex.lab=1.5) axis(1, at=seq(0, 10, 5), cex.axis=1.5) axis(2, at=seq(0, 1, 0.2), cex.axis=1.25) box() lines(na.z0[["0 1"]][,"time"], na.z0[["0 1"]][,"na"], type="s", col="darkgrey", lwd=2) lines(na.z0[["0 2"]][,"time"], na.z0[["0 2"]][,"na"], type="s", lwd=2) lines(na.z1[["0 1"]][,"time"], na.z1[["0 1"]][,"na"], type="s", col="darkgrey", lwd=2, lty=2) lines(na.z1[["0 2"]][,"time"], na.z1[["0 2"]][,"na"], type="s", lwd=2, lty=2) legend(0,0.989,lty=c(1,1), rep(expression(paste(phantom("Event of interest"))),2), bty="n", col=c("darkgrey","black"),lwd=2) legend(1,1,lty=c(2,2), c("Event of interest","Competing event"), bty="n", col=c("darkgrey","black"),lwd=2, cex=1.2) legend(0,0.789,lty=c(2,1), rep(expression(paste(phantom("Event of interest"))),2), bty="n", col=c("darkgrey","darkgrey"),lwd=2) legend(1,0.8,lty=c(2,1), c(expression(paste(italic(Z)[i]," =0")), expression(paste(italic(Z)[i]," =1"))), bty="n", lwd=2, cex=1.2)
|
Prediction of the cumulative incidence functions Prediction of the CIFs is performed using the mstate package, using the extended data set xl. |
- require(mstate) ## matrix indicating the possible transitions tmat <- trans.comprisk(2)
|
Add a variable trans that indicates the transition type: |
- xl$trans <- c(rep(1, 1500), rep(2, 1500))
|
Next, we fit the cox csh model with the "Breslow" method for handling ties. |
- fit <- coxph(Surv(TandC, newstat) ~ Z.01 + Z.02 + strata(trans), data = xl, method = "breslow")
|
New data frames for making predictions: An individual with Z=0: |
- newdat.z0 <- data.frame(Z.01 = c(0, 0), Z.02 = c(0, 0), strata = c(1, 2))
|
An individual with Z=1: |
- newdat.z1 <- data.frame(Z.01 = c(1, 0), Z.02 = c(0, 1), strata = c(1, 2))
|
Predicted cumulative hazards: |
- msf.z0 <- msfit(fit, newdat.z0, trans = tmat) msf.z1 <- msfit(fit, newdat.z1, trans = tmat)
|
Predicted CIFs: |
- pt.z0 <- probtrans(msf.z0, 0)[[1]] pt.z1 <- probtrans(msf.z1, 0)[[1]]
|
Figure 5.8 |
- ## modification of the data for using etm() x.etm <- data.frame(id = x$id, from = rep(11, nrow(x)), to = x$status, time = x$TandC, Z = x$Z) aa0 <- etm(x.etm[x.etm$Z == 0, ], c('11', '1', '2'), matrix(c(FALSE, TRUE, TRUE, rep(FALSE, 6)), ncol = 3, nrow = 3, byrow = TRUE), '0', 0) aa1 <- etm(x.etm[x.etm$Z == 1, ], c('11', '1', '2'), matrix(c(FALSE, TRUE, TRUE, rep(FALSE, 6)), ncol = 3, nrow = 3, byrow = TRUE), '0', 0) ## plot oldpar <- par(mfrow = c(1, 2)) plot(aa0, tr.choice = "11 1", col = "gray", lty = 1, legend = FALSE, lwd = 3, xlim = c(0, 30), ylab = "Cumulative Incidence") lines(aa1, tr.choice = "11 1", col = 1, lty = 1, lwd = 3) lines(pt.z0$time, pt.z0$pstate2, col = "gray", lty = 2, lwd = 3) lines(pt.z1$time, pt.z1$pstate2, col = 1, lty = 2, lwd = 3) legend(x = 0, y = 0.83, c("Z = 0", "Z = 1"), bty = "n", col = c("gray", 1), lty = 1, lwd = 3) legend(x = 0, y = 0.7, c("Aalen-Johansen", "Predicted"), col = 1, lty = c(1, 2), bty = "n", lwd = 3) plot(aa0, tr.choice = "11 2", col = "gray", lty = 1, legend = FALSE, lwd = 3, xlim = c(0, 30), ylab = "Cumulative Incidence") lines(aa1, tr.choice = "11 2", col = 1, lty = 1, lwd = 3) lines(pt.z0$time, pt.z0$pstate3, col = "gray", lty = 2, lwd = 3) lines(pt.z1$time, pt.z1$pstate3, col = 1, lty = 2, lwd = 3) par(oldpar)
|
Figure 5.9 |
- plot(aa0, tr.choice = "11 1", col = "gray", lty = 1, legend = FALSE, lwd = 3, xlim = c(0, 30), ylab = "Cumulative Incidence", ylim = c(0, 0.2)) lines(aa1, tr.choice = "11 1", col = 1, lty = 1, lwd = 3) lines(pt.z0$time, pt.z0$pstate2, col = "gray", lty = 2, lwd = 3) lines(pt.z1$time, pt.z1$pstate2, col = 1, lty = 2, lwd = 3) legend(x = 15, y = 0.1, c("Z = 0", "Z = 1"), bty = "n", col = c("gray", 1), lty = 1, lwd = 3) legend(x = 15, y = 0.05, c("Aalen-Johansen", "Predicted"), col = 1, lty = c(1, 2), bty = "n", lwd = 3)
|