# function to simplify calculation of binomial confidence intervals for error bars
binomialci = function(nsuccess, ntry, conf.level=confidence) {
	if (length(nsuccess) > 1 & length(ntry)==1) { ntry = rep(ntry, length(nsuccess))}
	if (length(nsuccess) == 1) {
		bt = binom.test(nsuccess, ntry, conf.level=conf.level)
		ci = c(bt$conf.int) # returns the error on p; multiply by ntry to get error on counts
		return(ci)
	} else {
		ci.upper = nsuccess*0.
		ci.lower = nsuccess*0.
		for (i in 1:length(nsuccess)) {
			bt = binom.test(nsuccess[i], ntry[i], conf.level=conf.level)
			ci.lower[i] = bt$conf.int[1]
			ci.upper[i] = bt$conf.int[2]
		}
		return(list(lower=ci.lower, upper=ci.upper))
	}
}

#function to draw error bars
errorbars = function(x,ylower,yupper,hat=0.01, col='black') {
	arrows(x,ylower,x,yupper,length=hat, angle=90, code=3, col=col)
}

# Set up a joint-normalized histogram
jointhist.mf = function(phdy, condition) {
	time = 0:13
	nmi = numeric(length(time))
	nfi = numeric(length(time))
	for (i in 1:length(time)) {
		t = time[i]
		nmi[i] = sum(phdy$Gender=='M' & condition==TRUE & phdy$TimeToJob==t)
		nfi[i] = sum(phdy$Gender=='F' & condition==TRUE & phdy$TimeToJob==t)
	}
	nm = sum(phdy$Gender=='M')
	nf = sum(phdy$Gender=='F')
	mci = binomialci(nmi, nm, conf.level=plotconfidence) # uses common nm and nf for joint normalization
	fci = binomialci(nfi, nf, conf.level=plotconfidence)
	mmean = mean(phdy$TimeToJob[condition==TRUE & phdy$Gender=='M'])
	fmean = mean(phdy$TimeToJob[condition==TRUE & phdy$Gender=='F'])
  return(list(time=time, nmi=nmi, nfi=nfi, nm=nm, nf=nf, mci=mci, fci=fci, mmean=mmean, fmean=fmean))
  			 # 						           mr1=mr1time, fr1=fr1time, mlimit=mlimittime, flimit=flimittime)
}	

jointhist.mf.plot = function(jh, xlab='time', ylab=ylab, xlim=c(0,11), ylim=c(0,0.15)) {
	 plot(jh$time-0.5,jh$nmi/jh$nm, type='s', xlab=xlab, ylab=ylab, ylim=ylim,xlim=xlim)
   errorbars(jh$time-0.05,jh$mci$lower,jh$mci$upper)
   lines(jh$time-0.5,jh$nfi/jh$nf, type='s', col='red')
   errorbars(jh$time+0.05,jh$fci$lower,jh$fci$upper, col='red')
   abline(v=jh$mmean,col='black',lty=2)
   abline(v=jh$fmean,col='red',lty=2)
}

# Plot the KM estimator with nice error bars	
km.plot = function(survkm, xlab='time', ylab='K-M Estimator', xlim=c(0,11), ylim=c(0,1)) {

	plot(survkm, col=c('red','black'), xlab=xlab, ylab=ylab, xlim=xlim)
	#lines(survkm$time[1:survkm$strata[1]], survkm$upper[1:survkm$strata[1]], lty=2, type='s', col='red')
	#lines(survkm$time[1:survkm$strata[1]], survkm$lower[1:survkm$strata[1]], lty=2, type='s', col='red')
	tm = survkm$strata[1]+1:length(survkm$time)
	tf = 1:survkm$strata[1]
	#lines(survkm$time[survkm$strata[1]+1:length(survkm$time)], survkm$upper[survkm$strata[1]+1:length(survkm$time)], lty=2, type='s', col='black')
	#lines(survkm$time[survkm$strata[1]+1:length(survkm$time)], survkm$lower[survkm$strata[1]+1:length(survkm$time)], lty=2, type='s', col='black')
	errorbars(survkm$time[tm]+0.475, survkm$upper[tm], survkm$lower[tm], col='black')
	errorbars(survkm$time[tf]+0.525, survkm$upper[tf], survkm$lower[tf], col='red')
}	

cox.hazard.mf = function(coxfit, gender) {
  # Calculate hazard curves given a cox model given a single two-category explanatory variable
	sumcoxfit = summary(coxfit)
	basehaz = basehaz(coxfit)
	basetime = basehaz$time
	cumhaz = basehaz$hazard
	logb = -unname(coxfit$coefficients[1])
	logberr = sumcoxfit$coefficients[3]*qnorm(1-0.5*(1-confidence)) # confidence is global...
	b = exp(logb) # P_(F/M)
	berr = exp(logb + c(1,-1)*logberr)-b
	nm = sum(gender=='M')
	nf = sum(gender=='F')
	cumhazm = cumhaz*b^(-nf/(nf+nm))
	cumhazf = cumhaz*b^(nm/(nf+nm))
	difhaz = c(cumhaz[1],cumhaz[2:length(cumhaz)]-cumhaz[1:(length(cumhaz)-1)]) # 'mean' curve
	difhazm = c(cumhazm[1],cumhazm[2:length(cumhazm)]-cumhazm[1:(length(cumhazm)-1)]) # M curve
	difhazf = c(cumhazf[1],cumhazf[2:length(cumhazf)]-cumhazf[1:(length(cumhazf)-1)]) # F curve
	difhaz1 = 1-exp(-difhaz)
	difhaz1m = 1-exp(-difhazm)
	difhaz1f = 1-exp(-difhazf)
	
	return( list(coxfit=coxfit, sumcoxfit=sumcoxfit, 
							 basetime=basetime, basehaz=basehaz, cumhaz=cumhaz, 
							 b=b, berr=berr, logb=logb, logberr=logberr,
							 nm=nm, nf=nf, cumhazm=cumhazm, cumhazf=cumhazf, difhaz=difhaz, difhazm=difhazm, difhazf=difhazf,
							 difhaz1=difhaz1, difhaz1m=difhaz1m, difhaz1f=difhaz1f) )
}

# Calculate an empirical hazard function (w/error bars) given the K-M estimator
km.hazard.mf = function(survkm, conf.level=conf.level) {
	
	# (Note that if there are years with no events they are skipped over and rolled into the next 
	#  year, so this isn't an ideal solution for small data sets.)
	
	hconfl = survkm$time*0
  hconfu = survkm$time*0
  for (i in 1:length(survkm$time)) {
	  nrisk = survkm$n.risk[i]
  	nhire = survkm$n.event[i]
  	nsurv = nrisk-nhire
  	bt = binom.test(nhire, nrisk, conf.level=conf.level)
  	hconfl[i] = bt$conf.int[1]
  	hconfu[i] = bt$conf.int[2]
  	#print(paste(survkm$time[i], nnow, nprev, bt$estimate, bt$conf.int[1], bt$conf.int[2]))
  }	

  hft = survkm$time[1:survkm$strata[1]]
  hfdat = 1 - survkm$surv[1:survkm$strata[1]] / 
  	          c(1,survkm$surv[1:(survkm$strata[1]-1)])
  hfconfl = hconfl[1:survkm$strata[1]]
  hfconfu = hconfu[1:survkm$strata[1]]
  
  hmt = survkm$time[(survkm$strata[1]+1):length(survkm$surv)]
  hmdat = 1 - survkm$surv[(survkm$strata[1]+1):length(survkm$surv)] / 
  	          c(1,survkm$surv[(survkm$strata[1]+1):(length(survkm$surv)-1)])
  hmconfl = hconfl[(survkm$strata[1]+1):length(survkm$surv)]
  hmconfu = hconfu[(survkm$strata[1]+1):length(survkm$surv)]

  return( list(mt=hmt, ft=hft, hm=hmdat, hf=hfdat, hmconfl=hmconfl, hmconfu=hmconfu, hfconfl=hfconfl, hfconfu=hfconfu ))
}

# Plot the pure empirical hazard (data) and the model hazard (lines)
hazard.mf.plot = function(c, h, xlim=c(0,10), ylim=c(0,0.5), xlab='t', ylab='hazard') {
	
	# Hazard data points
	plot(h$mt-0.025, h$hm, xlim=xlim, ylim=ylim, xlab=xlab, ylab=ylab)
	errorbars(h$mt-0.025, h$hmconfl, h$hmconfu, col='black')
	points(h$ft+0.025, h$hf, col='red')
	errorbars(h$ft+0.025, h$hfconfl, h$hfconfu, col='red')
	
	# Cox curve
	#lines(basetime, difhaz1, col='blue')
	lines(c$basetime[1:11], c$difhaz1m[1:11], col='black')
	lines(c$basetime[1:11], c$difhaz1f[1:11], col='red')
}