#replicating 'baseline' simulations in #'Increasing the efficiency of randomized trial estimates via linear adjustment for a prognostic score' #by Schuler et al 2022 #https://doi.org/10.1515/ijb-2021-0072 n <- 500 w <- c(rep(0,n/2),rep(1,n/2)) x <- array(0,dim=c(n,10)) nSim <- 1000 unadj <- array(0, dim=nSim) covadj <- array(0, dim=nSim) oracle <- array(0, dim=nSim) quadvar <- array(0, dim=n) for (i in 1:nSim) { for (j in 1:10) { x[,j] <- runif(n,-1,1) } #quadratic variable definition, per the paper quadvar <- rowSums(rowSums(x) * x) #true conditional mean under baseline scenario mx <- 0.5*quadvar + 1*rowSums(x) y0 <- mx + rnorm(n) y1 <- mx + rnorm(n) y <- y0 y[w==1] <- y1[w==1] #analysis not adjusting for covariates unadj[i] <- mean(y[w==1])-mean(y[w==0]) #standard ANCOVA, adjusting for each covariate linearly covMod <- lm(y~w+x) covadj[i] <- coef(covMod)[2] #summary(covMod) #oracle model, adjusting for true conditional mean oracleMod <- lm(y~w+mx) oracle[i] <- coef(oracleMod)[2] #summary(oracleMod) } mean(unadj) var(unadj) mean(covadj) var(covadj) mean(oracle) var(oracle)