# function to simplify calculation of binomial confidence intervals for error bars binomialci = function(nsuccess, ntry, conf.level=confidence) { if (length(nsuccess) > 1 & length(ntry)==1) { ntry = rep(ntry, length(nsuccess))} if (length(nsuccess) == 1) { bt = binom.test(nsuccess, ntry, conf.level=conf.level) ci = c(bt$conf.int) # returns the error on p; multiply by ntry to get error on counts return(ci) } else { ci.upper = nsuccess*0. ci.lower = nsuccess*0. for (i in 1:length(nsuccess)) { bt = binom.test(nsuccess[i], ntry[i], conf.level=conf.level) ci.lower[i] = bt$conf.int[1] ci.upper[i] = bt$conf.int[2] } return(list(lower=ci.lower, upper=ci.upper)) } } #function to draw error bars errorbars = function(x,ylower,yupper,hat=0.01, col='black') { arrows(x,ylower,x,yupper,length=hat, angle=90, code=3, col=col) } # Set up a joint-normalized histogram jointhist.mf = function(phdy, condition) { time = 0:13 nmi = numeric(length(time)) nfi = numeric(length(time)) for (i in 1:length(time)) { t = time[i] nmi[i] = sum(phdy$Gender=='M' & condition==TRUE & phdy$TimeToJob==t) nfi[i] = sum(phdy$Gender=='F' & condition==TRUE & phdy$TimeToJob==t) } nm = sum(phdy$Gender=='M') nf = sum(phdy$Gender=='F') mci = binomialci(nmi, nm, conf.level=plotconfidence) # uses common nm and nf for joint normalization fci = binomialci(nfi, nf, conf.level=plotconfidence) mmean = mean(phdy$TimeToJob[condition==TRUE & phdy$Gender=='M']) fmean = mean(phdy$TimeToJob[condition==TRUE & phdy$Gender=='F']) return(list(time=time, nmi=nmi, nfi=nfi, nm=nm, nf=nf, mci=mci, fci=fci, mmean=mmean, fmean=fmean)) # mr1=mr1time, fr1=fr1time, mlimit=mlimittime, flimit=flimittime) } jointhist.mf.plot = function(jh, xlab='time', ylab=ylab, xlim=c(0,11), ylim=c(0,0.15)) { plot(jh$time-0.5,jh$nmi/jh$nm, type='s', xlab=xlab, ylab=ylab, ylim=ylim,xlim=xlim) errorbars(jh$time-0.05,jh$mci$lower,jh$mci$upper) lines(jh$time-0.5,jh$nfi/jh$nf, type='s', col='red') errorbars(jh$time+0.05,jh$fci$lower,jh$fci$upper, col='red') abline(v=jh$mmean,col='black',lty=2) abline(v=jh$fmean,col='red',lty=2) } # Plot the KM estimator with nice error bars km.plot = function(survkm, xlab='time', ylab='K-M Estimator', xlim=c(0,11), ylim=c(0,1)) { plot(survkm, col=c('red','black'), xlab=xlab, ylab=ylab, xlim=xlim) #lines(survkm$time[1:survkm$strata[1]], survkm$upper[1:survkm$strata[1]], lty=2, type='s', col='red') #lines(survkm$time[1:survkm$strata[1]], survkm$lower[1:survkm$strata[1]], lty=2, type='s', col='red') tm = survkm$strata[1]+1:length(survkm$time) tf = 1:survkm$strata[1] #lines(survkm$time[survkm$strata[1]+1:length(survkm$time)], survkm$upper[survkm$strata[1]+1:length(survkm$time)], lty=2, type='s', col='black') #lines(survkm$time[survkm$strata[1]+1:length(survkm$time)], survkm$lower[survkm$strata[1]+1:length(survkm$time)], lty=2, type='s', col='black') errorbars(survkm$time[tm]+0.475, survkm$upper[tm], survkm$lower[tm], col='black') errorbars(survkm$time[tf]+0.525, survkm$upper[tf], survkm$lower[tf], col='red') } cox.hazard.mf = function(coxfit, gender) { # Calculate hazard curves given a cox model given a single two-category explanatory variable sumcoxfit = summary(coxfit) basehaz = basehaz(coxfit) basetime = basehaz$time cumhaz = basehaz$hazard logb = -unname(coxfit$coefficients[1]) logberr = sumcoxfit$coefficients[3]*qnorm(1-0.5*(1-confidence)) # confidence is global... b = exp(logb) # P_(F/M) berr = exp(logb + c(1,-1)*logberr)-b nm = sum(gender=='M') nf = sum(gender=='F') cumhazm = cumhaz*b^(-nf/(nf+nm)) cumhazf = cumhaz*b^(nm/(nf+nm)) difhaz = c(cumhaz[1],cumhaz[2:length(cumhaz)]-cumhaz[1:(length(cumhaz)-1)]) # 'mean' curve difhazm = c(cumhazm[1],cumhazm[2:length(cumhazm)]-cumhazm[1:(length(cumhazm)-1)]) # M curve difhazf = c(cumhazf[1],cumhazf[2:length(cumhazf)]-cumhazf[1:(length(cumhazf)-1)]) # F curve difhaz1 = 1-exp(-difhaz) difhaz1m = 1-exp(-difhazm) difhaz1f = 1-exp(-difhazf) return( list(coxfit=coxfit, sumcoxfit=sumcoxfit, basetime=basetime, basehaz=basehaz, cumhaz=cumhaz, b=b, berr=berr, logb=logb, logberr=logberr, nm=nm, nf=nf, cumhazm=cumhazm, cumhazf=cumhazf, difhaz=difhaz, difhazm=difhazm, difhazf=difhazf, difhaz1=difhaz1, difhaz1m=difhaz1m, difhaz1f=difhaz1f) ) } # Calculate an empirical hazard function (w/error bars) given the K-M estimator km.hazard.mf = function(survkm, conf.level=conf.level) { # (Note that if there are years with no events they are skipped over and rolled into the next # year, so this isn't an ideal solution for small data sets.) hconfl = survkm$time*0 hconfu = survkm$time*0 for (i in 1:length(survkm$time)) { nrisk = survkm$n.risk[i] nhire = survkm$n.event[i] nsurv = nrisk-nhire bt = binom.test(nhire, nrisk, conf.level=conf.level) hconfl[i] = bt$conf.int[1] hconfu[i] = bt$conf.int[2] #print(paste(survkm$time[i], nnow, nprev, bt$estimate, bt$conf.int[1], bt$conf.int[2])) } hft = survkm$time[1:survkm$strata[1]] hfdat = 1 - survkm$surv[1:survkm$strata[1]] / c(1,survkm$surv[1:(survkm$strata[1]-1)]) hfconfl = hconfl[1:survkm$strata[1]] hfconfu = hconfu[1:survkm$strata[1]] hmt = survkm$time[(survkm$strata[1]+1):length(survkm$surv)] hmdat = 1 - survkm$surv[(survkm$strata[1]+1):length(survkm$surv)] / c(1,survkm$surv[(survkm$strata[1]+1):(length(survkm$surv)-1)]) hmconfl = hconfl[(survkm$strata[1]+1):length(survkm$surv)] hmconfu = hconfu[(survkm$strata[1]+1):length(survkm$surv)] return( list(mt=hmt, ft=hft, hm=hmdat, hf=hfdat, hmconfl=hmconfl, hmconfu=hmconfu, hfconfl=hfconfl, hfconfu=hfconfu )) } # Plot the pure empirical hazard (data) and the model hazard (lines) hazard.mf.plot = function(c, h, xlim=c(0,10), ylim=c(0,0.5), xlab='t', ylab='hazard') { # Hazard data points plot(h$mt-0.025, h$hm, xlim=xlim, ylim=ylim, xlab=xlab, ylab=ylab) errorbars(h$mt-0.025, h$hmconfl, h$hmconfu, col='black') points(h$ft+0.025, h$hf, col='red') errorbars(h$ft+0.025, h$hfconfl, h$hfconfu, col='red') # Cox curve #lines(basetime, difhaz1, col='blue') lines(c$basetime[1:11], c$difhaz1m[1:11], col='black') lines(c$basetime[1:11], c$difhaz1f[1:11], col='red') }