# Define the confidence levels we will use in this work
confidence = 0.95
plotconfidence = 0.67

# Set plotting defaults
xlab = 'Years after PhD'
mar=c(5.1,4.1,1.1,2.1) 

# Load the custom analysis/plotting functions
setwd("~/AstroPhDs")
source('astrophds_func.r')

Load data; create data frames for different subsets

  setwd("~/AstroPhDs")
  allphd = read.csv('phdoutcomes.csv')
  ntotal = nrow(allphd)
  phd = allphd[allphd$Omit==FALSE,]       # remove the omits
  omitted = allphd[allphd$Omit==TRUE,]    # hold onto the omits here
  phd$Gender = droplevels(phd$Gender) # remove unknown-gender factor level since unknown-gender rows were omitted
  
  nunkyear = sum(is.na(phd$JobYear))
  n = nrow(phd)  # number excluding omits
  nomit = nrow(omitted)
  print(paste('Loaded',n,'PhDs (omitted',nomit,')'))
## [1] "Loaded 1063 PhDs (omitted 91 )"
  TimeToJob = phd$JobYear-phd$PhDYear
  TimeToJob[TimeToJob < 0] = 0
  phd = cbind(phd,TimeToJob)

  phdfac = phd[phd$Left == 'hired',]                              # permanent/long-term astro job
  phdleft = phd[phd$Left == 'left',]                              # left field
  phdtemp = phd[phd$Left == 'postdoc' | phd$Left == 'adjunct',]   # postdoc/temp
  phdnotdoc = phd[phd$Left != 'postdoc' & phd$Left != 'adjunct',] # all jobs (astro+nonastro)
  phdnotdoc$Left = droplevels(phdnotdoc$Left) 

  phdy = phd[is.na(phd$JobYear)==FALSE,]  # same as above four, but omitting unknown-year individuals
  phdfacy = phdfac[is.na(phdfac$JobYear)==FALSE,]
  phdlefty = phdleft[is.na(phdleft$JobYear)==FALSE,]
  phdnotdocy = phdnotdoc[is.na(phdnotdoc$JobYear)==FALSE,]

  # safety step to make sure we don't accidentally use missing-year data for timing analysis
  phdfac = subset(phdfac, select = -c(TimeToJob)) 
  phdleft = subset(phdleft, select = -c(TimeToJob))
  phdnotdoc = subset(phdnotdoc, select = -c(TimeToJob))

  ny = nrow(phdy)

Outcome differences by gender

  otable1 = table(phdnotdoc$Gender, phdnotdoc$JobType)
  ch1 = chisq.test(otable1)
  print('')
## [1] ""
  z = signif(ch1$stdres[1,],3)
  otable1t = cbind(t(otable1),z) # negative z is fewer women than 'expected'; positive is more women
  otable1t = rbind(otable1t, TEMP=c(table(phdtemp$Gender),NA))
  otable1t = rbind(otable1t, OMITTED=c(table(omitted$Gender)[2:3],NA))
  pctf = round(100*otable1t[,1]/(otable1t[,1]+otable1t[,2]))
  pctfunc = round(50*(binomialci(otable1t[,1],otable1t[,1]+otable1t[,2])$upper-
                                        binomialci(otable1t[,1],otable1t[,1]+otable1t[,2])$lower))
  otable1t = cbind(otable1t,pctf,pctfunc)
  otable1t[c(3,2,4,1,5,6),]
##            F   M      z pctf pctfunc
## prof-R1   41 111 -0.661   27       7
## prof      81 155  2.000   34       6
## staff     73 211 -1.550   26       5
## non-astro 81 192  0.200   30       6
## TEMP      39  79     NA   33       9
## OMITTED   15  40     NA   27      12
  paste(table(omitted$Gender)[1],'with unknown gender')
## [1] "36 with unknown gender"
  sum(otable1t[1:5,1:2])  # check should = n
## [1] 1063
  sum(otable1t[6,1:2])+sum(table(omitted$Gender)[1]) # check should = nomit
## [1] 91
    otable2 = table(phdnotdoc$Gender, phdnotdoc$InstClass)
  otable2 = otable2[,2:ncol(otable2)] # omit blank NA column (a little bit dangerous)
  ch2 = chisq.test(otable2)
## Warning in chisq.test(otable2): Chi-squared approximation may be incorrect
  print('')
## [1] ""
  z = signif(ch2$stdres[1,],3)
  otable2t = cbind(t(otable2),z)
  otable2t = otable2t[c(11,12,10,9,1,6,8,7, 5,2,3,4,13),]
  #otable2t
  allastro   = colSums(otable2t[1:8,1:2])
  allnonastro= colSums(otable2t[9:13,1:2])
  otable2t = rbind(otable2t[1:8,],ASTRO=c(allastro,NA),otable2t[9:13,],NONASTRO=c(allnonastro,NA))
  pct2f = round(100*otable2t[,1]/(otable2t[,1]+otable2t[,2]))
  pct2func = round(50*(binomialci(otable2t[,1],otable2t[,1]+otable2t[,2])$upper-binomialci(otable2t[,1],otable2t[,1]+otable2t[,2])$lower))
  otable2t = cbind(otable2t,pct2f,pct2func)
  otable2t  
##               F   M      z pct2f pct2func
## U-R1         46 141 -1.550    25        6
## U-R2         15  22  1.550    41       17
## U-M          21  36  1.310    37       13
## U-for        10  34 -0.968    23       13
## col          25  30  2.730    45       14
## obs          27  78 -0.835    26        9
## other-US     27  82 -1.080    25        8
## other-for    24  54  0.317    31       11
## ASTRO       195 477     NA    29        4
## non-ast.ac.   5   4  1.750    56       33
## education     6   8  1.130    43       27
## government    6  13  0.230    32       22
## industry     53 149 -1.050    26        6
## unknown      11  18  1.050    38       19
## NONASTRO     81 192     NA    30        6

Chi^2 for career outcome differences

  ch1 # are basic outcomes different?
## 
##  Pearson's Chi-squared test
## 
## data:  otable1
## X-squared = 5.0666, df = 3, p-value = 0.167
  ch2 # are detailed outcomes different?
## 
##  Pearson's Chi-squared test
## 
## data:  otable2
## X-squared = 21.755, df = 12, p-value = 0.04036

Basic statistics on time to permanent job

ns = tapply(phdfacy$TimeToJob, phdfacy$Gender, length)
means = tapply(phdfacy$TimeToJob, phdfacy$Gender, mean)
sds   = tapply(phdfacy$TimeToJob, phdfacy$Gender, sd)
ses   = sds/sqrt(ns)
medians = tapply(phdfacy$TimeToJob, phdfacy$Gender, median)
m = data.frame(N=ns,mean=means,SE=ses,median=medians) # create table of N, mean, median, SE
m
##     N     mean        SE median
## F 187 4.406417 0.1648407      5
## M 455 4.859341 0.1226019      5
gap = means[2]-means[1]  # simple-statistics M-F hiring-time gap measurement
gap = unname(gap)
gapunc = sqrt(ses[2]^2+ses[1]^2)*1.96 # assuming confidence is 0.95
gapunc = unname(gapunc)

Time Differences by Gender and JobType

  meantimes1 = tapply(phdnotdocy$TimeToJob, phdnotdocy$InstClass, mean)
  meantimes = tapply(phdnotdocy$TimeToJob, list(phdnotdocy$Gender,phdnotdocy$InstClass), mean)
  setimes = tapply(phdnotdocy$TimeToJob, list(phdnotdocy$Gender,phdnotdocy$InstClass), sd)/sqrt(tapply(phdnotdocy$TimeToJob, list(phdnotdocy$Gender,phdnotdocy$InstClass), length))
  gaptimes = meantimes[1,]-meantimes[2,]
  gapses = sqrt(setimes[1,]^2+setimes[2,]^2) # not a proper t-test, poor for small samples.
  dttable = t(meantimes)
  dttable = cbind(dttable, gap=gaptimes)
  dttable = cbind(dttable, z=gaptimes/gapses)
  dttable = dttable[c(12,13,11,10,9,8,2,7,3,4,5,6),]
  dttable
##                    F        M         gap           z
## U-R1        4.888889 4.921429 -0.03253968 -0.09739638
## U-R2        5.266667 5.954545 -0.68787879 -0.90893226
## U-M         3.333333 5.028571 -1.69523810 -2.10219450
## U-for       4.200000 4.823529 -0.62352941 -0.91430801
## other-US    3.640000 4.693333 -1.05333333 -1.68952425
## other-for   4.458333 5.104167 -0.64583333 -1.18705538
## col         4.708333 4.148148  0.56018519  0.76087424
## obs         4.434783 4.621622 -0.18683901 -0.33664348
## education   4.500000 3.875000  0.62500000  0.41632263
## government  2.833333 3.076923 -0.24358974 -0.17950908
## industry    2.816327 3.202703 -0.38637617 -0.90530413
## non-ast.ac. 1.800000 5.333333 -3.53333333 -1.80049362

Histograms

par(mar=mar)
jhist.hire = jointhist.mf(phdy, condition=(phdy$Left=='hired')) # joint-normalize hiring/leaving histograms
jhist.left = jointhist.mf(phdy, condition=(phdy$Left=='left'))
jhist.limit =jointhist.mf(phdy, condition=(phdy$Left!='hired' & phdy$Left!='left'))
jhist.r1    =jointhist.mf(phdy, condition=(phdy$Left=='hired' & (phdy$JobType=='prof-R1' | phdy$JobType=='prof')))
jointhist.mf.plot(jhist.hire, xlab=xlab, ylab='Fraction starting job')

jointhist.mf.plot(jhist.left, xlab=xlab, ylab='Fraction leaving field')

Some statistical tests comparing the postdoc-time distributions

h = phdnotdocy
# Student t-test
ttest = t.test(h$TimeToJob[h$Gender=='M'],h$TimeToJob[h$Gender=='F'], conf.level=confidence)
ttest
## 
##  Welch Two Sample t-test
## 
## data:  h$TimeToJob[h$Gender == "M"] and h$TimeToJob[h$Gender == "F"]
## t = 2.0094, df = 585.87, p-value = 0.04495
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.008463335 0.740902416
## sample estimates:
## mean of x mean of y 
##  4.386047  4.011364
# Student t-test with null hypothesis of Flaherty 1.1 year gap measurement
ttest.vsF18 = t.test(h$TimeToJob[h$Gender=='M']-1.1,h$TimeToJob[h$Gender=='F'], conf.level=confidence)
ttest.vsF18
## 
##  Welch Two Sample t-test
## 
## data:  h$TimeToJob[h$Gender == "M"] - 1.1 and h$TimeToJob[h$Gender == "F"]
## t = -3.8898, df = 585.87, p-value = 0.0001118
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -1.0915367 -0.3590976
## sample estimates:
## mean of x mean of y 
##  3.286047  4.011364
# Wilcoxon test
wtest = wilcox.test(h$TimeToJob[h$Gender=='M'],h$TimeToJob[h$Gender=='F'])
wtest
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  h$TimeToJob[h$Gender == "M"] and h$TimeToJob[h$Gender == "F"]
## W = 89690, p-value = 0.2023
## alternative hypothesis: true location shift is not equal to 0
# K-S test
kstest = ks.test(h$TimeToJob[h$Gender=='M'],h$TimeToJob[h$Gender=='F'])
## Warning in ks.test(h$TimeToJob[h$Gender == "M"], h$TimeToJob[h$Gender == :
## p-value will be approximate in the presence of ties
kstest
## 
##  Two-sample Kolmogorov-Smirnov test
## 
## data:  h$TimeToJob[h$Gender == "M"] and h$TimeToJob[h$Gender == "F"]
## D = 0.061698, p-value = 0.4738
## alternative hypothesis: two-sided
# Anderson-Darling test
library(kSamples)
## Loading required package: SuppDists
adtest = ad.test(h$TimeToJob[h$Gender=='M'],h$TimeToJob[h$Gender=='F'])
adtest
## 
## 
##  Anderson-Darling k-sample test.
## 
## Number of samples:  2
## Sample sizes:  645, 264
## Number of ties: 892
## 
## Mean of  Anderson-Darling  Criterion: 1
## Standard deviation of  Anderson-Darling  Criterion: 0.75986
## 
## T.AD = ( Anderson-Darling  Criterion - mean)/sigma
## 
## Null Hypothesis: All samples come from a common population.
## 
##              AD  T.AD  asympt. P-value
## version 1: 2.17 1.540          0.07411
## version 2: 1.93 1.223          0.10040

Statistics on leaving the field

# Basic 2x2 table
leavetable = table(phdnotdoc$Gender,phdnotdoc$Left=='left')
leavetable
##    
##     FALSE TRUE
##   F   195   81
##   M   477  192
# Fisher test (are M/F different in leave/stay fractions?)
fisher = fisher.test(leavetable)
mbinom = binom.test(leavetable['M',2], apply(leavetable,1,sum)[2], conf.level=confidence)
fbinom = binom.test(leavetable['F',2], apply(leavetable,1,sum)[1], conf.level=confidence)
fisher
## 
##  Fisher's Exact Test for Count Data
## 
## data:  leavetable
## p-value = 0.8746
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##  0.7048256 1.3384737
## sample estimates:
## odds ratio 
##  0.9690235

Survival and hazard analysis for astronomy hiring

par(mar=mar)
library(survival)
S = Surv(phdy$TimeToJob, phdy$Left=='hired')

survkm = survfit(S~phdy$Gender, conf.int=plotconfidence)
coxfit = coxph(S~phdy$Gender)
sumcoxfit = summary(coxfit)
coxhire = cox.hazard.mf(coxfit, phdy$Gender)
hazhire = km.hazard.mf(survkm, conf.level=plotconfidence)

sumcoxfit
## Call:
## coxph(formula = S ~ phdy$Gender)
## 
##   n= 1026, number of events= 642 
## 
##                  coef exp(coef) se(coef)      z Pr(>|z|)
## phdy$GenderM -0.07717   0.92573  0.08705 -0.886    0.375
## 
##              exp(coef) exp(-coef) lower .95 upper .95
## phdy$GenderM    0.9257       1.08    0.7805     1.098
## 
## Concordance= 0.51  (se = 0.011 )
## Rsquare= 0.001   (max possible= 1 )
## Likelihood ratio test= 0.78  on 1 df,   p=0.3779
## Wald test            = 0.79  on 1 df,   p=0.3753
## Score (logrank) test = 0.79  on 1 df,   p=0.3752
km.plot(survkm, xlab=xlab, ylab='K-M survivorship for astrophysics hiring', xlim=c(0,11))

hazard.mf.plot(coxhire, hazhire, xlab=xlab, ylab='Probability of being hired (if postdoc)')

SR1 = Surv(phdy$TimeToJob, phdy$Left=='hired' & (phdy$JobType=='prof-R1' | phdy$JobType=='prof'))
coxfitR1 = coxph(SR1~phdy$Gender)
coxR1 = cox.hazard.mf(coxfitR1, phdy$Gender)

Survival and hazard analysis for leaving astronomy

par(mar=mar)
SI = Surv(phdy$TimeToJob, phdy$Left=='left')
survkmI = survfit(SI~phdy$Gender, conf.int=plotconfidence)
coxfitI = coxph(SI~phdy$Gender)
sumcoxfitI = summary(coxfitI)

coxleave = cox.hazard.mf(coxfitI, phdy$Gender)
hazleave = km.hazard.mf(survkmI, conf.level=plotconfidence)

sumcoxfitI
## Call:
## coxph(formula = SI ~ phdy$Gender)
## 
##   n= 1026, number of events= 267 
## 
##                  coef exp(coef) se(coef)      z Pr(>|z|)
## phdy$GenderM -0.03215   0.96836  0.13537 -0.238    0.812
## 
##              exp(coef) exp(-coef) lower .95 upper .95
## phdy$GenderM    0.9684      1.033    0.7427     1.263
## 
## Concordance= 0.501  (se = 0.016 )
## Rsquare= 0   (max possible= 0.964 )
## Likelihood ratio test= 0.06  on 1 df,   p=0.8127
## Wald test            = 0.06  on 1 df,   p=0.8123
## Score (logrank) test = 0.06  on 1 df,   p=0.8123
km.plot(survkmI, xlab=xlab, ylab='K-M survivorship for leaving astrophysics', xlim=c(0,11))

hazard.mf.plot(coxleave, hazleave, xlab=xlab, ylab='Probability of leaving field (if postdoc)', ylim=c(0,0.25))

Cumulative distributions

par(mar=mar)
nmy = sum(phdy$Gender=='M')
nfy = sum(phdy$Gender=='F')

mfaccum = cumsum(jhist.hire$nmi)
ffaccum = cumsum(jhist.hire$nfi)
mleavecum = cumsum(jhist.left$nmi)
fleavecum = cumsum(jhist.left$nfi)
mr1cum = cumsum(jhist.r1$nmi)
fr1cum = cumsum(jhist.r1$nfi)

names(mfaccum) = jhist.hire$time
names(ffaccum) = jhist.hire$time
names(mleavecum) = jhist.left$time
names(fleavecum) = jhist.left$time
names(mr1cum) = jhist.r1$time
names(fr1cum) = jhist.r1$time

Y = length(mfaccum)
mlimitcum = 0
flimitcum = 0
hp = coxhire$difhaz1  
lp = coxleave$difhaz1
r1p = coxR1$difhaz1
for (i in 1:(Y-1)) { # assume that current postdocs will be hired/leave and correct appropriately
    #keep a running tally of the number of "limits" (still postdocs with post-PhD age<=current year)
    #and fractionally redistribute them to jobs using the hazard function 
  mfaccum[(i+1):Y] = mfaccum[(i+1):Y] + hp[i] * mlimitcum
  ffaccum[(i+1):Y] = ffaccum[(i+1):Y] + hp[i] * flimitcum
  mleavecum[(i+1):Y] = mleavecum[(i+1):Y] + lp[i] * mlimitcum
  fleavecum[(i+1):Y] = fleavecum[(i+1):Y] + lp[i] * flimitcum
  mr1cum[(i+1):Y] = mr1cum[(i+1):Y] + r1p[i] * mlimitcum
  fr1cum[(i+1):Y] = fr1cum[(i+1):Y] + r1p[i] * flimitcum
  mlimitcum = mlimitcum*(1-hp[i]-lp[i]) + jhist.limit$nmi[i]
  flimitcum = flimitcum*(1-hp[i]-lp[i]) + jhist.limit$nfi[i]
}   
time = jhist.hire$time
plot(c(-1),c(-1), xlim=c(-0.02,11.9),ylim=c(-0.002,1.002), xlab=xlab, ylab='Cumulative fraction', yaxs='i',xaxs='i')
frachired = (mfaccum+ffaccum)/(nfy+nmy) # includes completeness correction
frachiredm = mfaccum/nmy
frachiredf = ffaccum/nfy
fracleft = (mleavecum+fleavecum)/(nfy+nmy) # includes completeness correction
fracleftm = mleavecum/nmy
fracleftf = fleavecum/nfy
fracr1 = (mr1cum+fr1cum)/(nfy+nmy) # does not yet include completeness correction
fracr1m = mr1cum/nmy
fracr1f = fr1cum/nfy
lines(-1:max(time), c(0,frachiredm),typ='s')
lines(-1:max(time), c(1,1-fracleftm),typ='s')
lines(-1:max(time), c(0,frachiredf),typ='s', col='red')
lines(-1:max(time), c(1,1-fracleftf),typ='s', col='red')
text(2,0.5,'postdoc/adjunct')
text(7.5,0.9,'left astrophysics')
text(9,0.25+0.03,'hired into long-term job')
#text(10,0.07,'(professor)', cex=0.85)
text(12.15,  frachired['12'],signif(frachired['12'],2), cex=0.66, xpd=NA, col='darkgray')
text(12.15,1-fracleft['12'],  signif(1-fracleft['12'],2), cex=0.66, xpd=NA, col='darkgray')

#text(12.15,  fracr1['12'],signif(fracr1['12'],2), cex=0.66, xpd=NA, col='darkgray')
#lines(-1:max(time), c(0,fracr1m),typ='s',lty=3) # R1 hires only
#lines(-1:max(time), c(0,fracr1f),typ='s',lty=3,col='red')

Summarize statistics of interest for paper

gtab = table(phd$Gender)
gtaby = table(phdy$Gender)
out = c('')
#out = c(out,paste(' # PhD programs: ', length(unique(phd$PhDInstitute))))
out = c(out,paste(' # total: ', ntotal))
out = c(out,paste(' # omitted: ', nomit))
out = c(out,paste('Total sample size:', n, '( M =',gtab[2], 'F =',gtab[1],')'))
out = c(out,paste('                             ', round(100*gtab[2]/n,1), '%   ', round(100*gtab[1]/n,1), '%   ', sep=''))

outcome = table(phd$Left)
nleftlater = table
names(outcome)[names(outcome)=='hired'] = 'astro'
out = c(out,paste('Career outcomes: ', outcome['postdoc']+outcome['adjunct'], 'still postdocs/adjuncts')) 
out = c(out,paste('                 ', outcome['astro'], 'found long-term astronomy employment'))
                                     #, nlater, 'left after long-term job)'))
out = c(out,paste('                 ', outcome['left'], 'left astronomy'))
pmleave = leavetable['M',2] / apply(leavetable,1,sum)[2]
pmleaveci = mbinom$conf.int[1:2]
pfleave = leavetable['F',2] / apply(leavetable,1,sum)[1]
pfleaveci = fbinom$conf.int[1:2]
out = c(out,paste('# of M that leave:', leavetable['M',2], '/', apply(leavetable,1,sum)[2]))
out = c(out,paste('# of F that leave:', leavetable['F',2], '/', apply(leavetable,1,sum)[1]))
out = c(out,paste('% of M that leave:', round(100*pmleave), '+',round(100*(pmleaveci[2]-pmleave)),'-', round(100*(pmleave - pmleaveci[1]))))
out = c(out,paste('% of F that leave:', round(100*pfleave), '+',round(100*(pfleaveci[2]-pfleave)),'-', round(100*(pfleave - pfleaveci[1]))))
out = c(out,paste('% of M that stay: ', 100-round(100*pmleave), '-',round(100*(pmleaveci[2]-pmleave)),'+', round(100*(pmleave - pmleaveci[1]))))
out = c(out,paste('% of F that stay: ', 100-round(100*pfleave), '-',round(100*(pfleaveci[2]-pfleave)),'+', round(100*(pfleave - pfleaveci[1]))))
out = c(out,paste('Fisher significance of M/F leave rates different: p =', signif(fisher$p.value,2)))

out = c(out,paste(' Significance of different outcome employer classes:',signif(ch2$p.value,3)))
out = c(out,paste(' Significance of different outcome profession classes:',signif(ch1$p.value,3)))


out = c(out,'')

out = c(out,paste(' # with no year: ', nunkyear))
out = c(out,paste('Sample size w/year:', ny, '( M =',gtaby[2], 'F =',gtaby[1],')'))
out = c(out,paste('                             ', round(100*gtaby[2]/ny,1), '%   ', round(100*gtaby[1]/ny,1),'%', sep=''))
out = c(out,paste('Female mean time: ', round(means[1],2), '+/-', round(ses[1],2)))
out = c(out,paste('  Male mean time: ', round(means[2],2), '+/-', round(ses[2],2)))
out = c(out,paste('             Gap: ', round(gap,2), '+/-', round(gapunc,2)))

out = c(out,paste('% hired within  4 years:',signif(100*frachired['4'],3),'%'))
out = c(out,paste('                      (M:',signif(100*frachiredm['4'],3),'%  F:',signif(100*frachiredf['4'],3),'% )'))
out = c(out,paste('% hired within 12 years:',signif(100*frachired['12'],3),'%'))
out = c(out,paste('                      (M:',signif(100*frachiredm['12'],3),'%  F:',signif(100*frachiredf['12'],3),'% )'))
out = c(out,paste('%  left within 12 years:',signif(100*fracleft['12'],3),'%'))
out = c(out,paste('                      (M:',signif(100*fracleftm['12'],3),'%  F:',signif(100*fracleftf['12'],3),'% )'))


diff = means[2]-means[1]
differr = (ttest$conf.int[2]-ttest$conf.int[1])/2
differr2 = sqrt(ses[1]^2+ses[2]^2)
out = c(out,paste('      Difference: ', round(diff,2), '+/-', round(differr,2), ' (p =',signif(ttest$p.value,2),')'))
# paste('      Difference: ', round(diff,2), '+/-', round(differr2,2))


out = c(out, 'Significance of M-F postdoc time distribution difference:')
out = c(out,paste(' Student t-test: p =', signif(ttest$p.value,2), ' (F18 ruled out at p =',signif(ttest.vsF18$p.value,2),')'))
out = c(out,paste('  Wilcoxon test: p =', signif(wtest$p.value,2)))
out = c(out,paste('       K-S test: p =', signif(kstest$p.value,2)))
out = c(out,paste('       A-D test: p =', signif(adtest$ad[1,3],2)))

out = c(out,'')
out = c(out,paste('Survival analysis for astronomy hiring:'))
out = c(out,paste('Cox H(F/M) = ', round(coxhire$b,2), ' + ', round(coxhire$berr[1],2), ' - ', round(-coxhire$berr[2],2)))
out = c(out,paste('Cox significance of H(F/M) != 1: p =', signif(sumcoxfit$coefficients[5],2)))
out = c(out,'')
out = c(out,'Survival analysis for leaving astronomy:')
out = c(out,paste('Cox L(F/M) = ', round(coxleave$b,2), ' + ', round(coxleave$berr[1],2), ' - ', round(-coxleave$berr[2],2)))
out = c(out,paste('Cox significance of L(F/M) != 1: p =', signif(sumcoxfitI$coefficients[5],2)))

writeLines(out)
## 
##  # total:  1154
##  # omitted:  91
## Total sample size: 1063 ( M = 748 F = 315 )
##                              70.4%   29.6%   
## Career outcomes:  118 still postdocs/adjuncts
##                   672 found long-term astronomy employment
##                   273 left astronomy
## # of M that leave: 192 / 669
## # of F that leave: 81 / 276
## % of M that leave: 29 + 4 - 3
## % of F that leave: 29 + 6 - 5
## % of M that stay:  71 - 4 + 3
## % of F that stay:  71 - 6 + 5
## Fisher significance of M/F leave rates different: p = 0.87
##  Significance of different outcome employer classes: 0.0404
##  Significance of different outcome profession classes: 0.167
## 
##  # with no year:  37
## Sample size w/year: 1026 ( M = 723 F = 303 )
##                              70.5%   29.5%
## Female mean time:  4.41 +/- 0.16
##   Male mean time:  4.86 +/- 0.12
##              Gap:  0.45 +/- 0.4
## % hired within  4 years: 28.7 %
##                       (M: 28.1 %  F: 30.1 % )
## % hired within 12 years: 65.1 %
##                       (M: 64.8 %  F: 65.8 % )
## %  left within 12 years: 27.1 %
##                       (M: 27.1 %  F: 27 % )
##       Difference:  0.45 +/- 0.37  (p = 0.045 )
## Significance of M-F postdoc time distribution difference:
##  Student t-test: p = 0.045  (F18 ruled out at p = 0.00011 )
##   Wilcoxon test: p = 0.2
##        K-S test: p = 0.47
##        A-D test: p = 0.074
## 
## Survival analysis for astronomy hiring:
## Cox H(F/M) =  1.08  +  0.2  -  0.17
## Cox significance of H(F/M) != 1: p = 0.38
## 
## Survival analysis for leaving astronomy:
## Cox L(F/M) =  1.03  +  0.31  -  0.24
## Cox significance of L(F/M) != 1: p = 0.81