# Define the confidence levels we will use in this work
confidence = 0.95
plotconfidence = 0.67
# Set plotting defaults
xlab = 'Years after PhD'
mar=c(5.1,4.1,1.1,2.1)
# Load the custom analysis/plotting functions
setwd("~/AstroPhDs")
source('astrophds_func.r')
Load data; create data frames for different subsets
setwd("~/AstroPhDs")
allphd = read.csv('phdoutcomes.csv')
ntotal = nrow(allphd)
phd = allphd[allphd$Omit==FALSE,] # remove the omits
omitted = allphd[allphd$Omit==TRUE,] # hold onto the omits here
phd$Gender = droplevels(phd$Gender) # remove unknown-gender factor level since unknown-gender rows were omitted
nunkyear = sum(is.na(phd$JobYear))
n = nrow(phd) # number excluding omits
nomit = nrow(omitted)
print(paste('Loaded',n,'PhDs (omitted',nomit,')'))
## [1] "Loaded 1063 PhDs (omitted 91 )"
TimeToJob = phd$JobYear-phd$PhDYear
TimeToJob[TimeToJob < 0] = 0
phd = cbind(phd,TimeToJob)
phdfac = phd[phd$Left == 'hired',] # permanent/long-term astro job
phdleft = phd[phd$Left == 'left',] # left field
phdtemp = phd[phd$Left == 'postdoc' | phd$Left == 'adjunct',] # postdoc/temp
phdnotdoc = phd[phd$Left != 'postdoc' & phd$Left != 'adjunct',] # all jobs (astro+nonastro)
phdnotdoc$Left = droplevels(phdnotdoc$Left)
phdy = phd[is.na(phd$JobYear)==FALSE,] # same as above four, but omitting unknown-year individuals
phdfacy = phdfac[is.na(phdfac$JobYear)==FALSE,]
phdlefty = phdleft[is.na(phdleft$JobYear)==FALSE,]
phdnotdocy = phdnotdoc[is.na(phdnotdoc$JobYear)==FALSE,]
# safety step to make sure we don't accidentally use missing-year data for timing analysis
phdfac = subset(phdfac, select = -c(TimeToJob))
phdleft = subset(phdleft, select = -c(TimeToJob))
phdnotdoc = subset(phdnotdoc, select = -c(TimeToJob))
ny = nrow(phdy)
Outcome differences by gender
otable1 = table(phdnotdoc$Gender, phdnotdoc$JobType)
ch1 = chisq.test(otable1)
print('')
## [1] ""
z = signif(ch1$stdres[1,],3)
otable1t = cbind(t(otable1),z) # negative z is fewer women than 'expected'; positive is more women
otable1t = rbind(otable1t, TEMP=c(table(phdtemp$Gender),NA))
otable1t = rbind(otable1t, OMITTED=c(table(omitted$Gender)[2:3],NA))
pctf = round(100*otable1t[,1]/(otable1t[,1]+otable1t[,2]))
pctfunc = round(50*(binomialci(otable1t[,1],otable1t[,1]+otable1t[,2])$upper-
binomialci(otable1t[,1],otable1t[,1]+otable1t[,2])$lower))
otable1t = cbind(otable1t,pctf,pctfunc)
otable1t[c(3,2,4,1,5,6),]
## F M z pctf pctfunc
## prof-R1 41 111 -0.661 27 7
## prof 81 155 2.000 34 6
## staff 73 211 -1.550 26 5
## non-astro 81 192 0.200 30 6
## TEMP 39 79 NA 33 9
## OMITTED 15 40 NA 27 12
paste(table(omitted$Gender)[1],'with unknown gender')
## [1] "36 with unknown gender"
sum(otable1t[1:5,1:2]) # check should = n
## [1] 1063
sum(otable1t[6,1:2])+sum(table(omitted$Gender)[1]) # check should = nomit
## [1] 91
otable2 = table(phdnotdoc$Gender, phdnotdoc$InstClass)
otable2 = otable2[,2:ncol(otable2)] # omit blank NA column (a little bit dangerous)
ch2 = chisq.test(otable2)
## Warning in chisq.test(otable2): Chi-squared approximation may be incorrect
print('')
## [1] ""
z = signif(ch2$stdres[1,],3)
otable2t = cbind(t(otable2),z)
otable2t = otable2t[c(11,12,10,9,1,6,8,7, 5,2,3,4,13),]
#otable2t
allastro = colSums(otable2t[1:8,1:2])
allnonastro= colSums(otable2t[9:13,1:2])
otable2t = rbind(otable2t[1:8,],ASTRO=c(allastro,NA),otable2t[9:13,],NONASTRO=c(allnonastro,NA))
pct2f = round(100*otable2t[,1]/(otable2t[,1]+otable2t[,2]))
pct2func = round(50*(binomialci(otable2t[,1],otable2t[,1]+otable2t[,2])$upper-binomialci(otable2t[,1],otable2t[,1]+otable2t[,2])$lower))
otable2t = cbind(otable2t,pct2f,pct2func)
otable2t
## F M z pct2f pct2func
## U-R1 46 141 -1.550 25 6
## U-R2 15 22 1.550 41 17
## U-M 21 36 1.310 37 13
## U-for 10 34 -0.968 23 13
## col 25 30 2.730 45 14
## obs 27 78 -0.835 26 9
## other-US 27 82 -1.080 25 8
## other-for 24 54 0.317 31 11
## ASTRO 195 477 NA 29 4
## non-ast.ac. 5 4 1.750 56 33
## education 6 8 1.130 43 27
## government 6 13 0.230 32 22
## industry 53 149 -1.050 26 6
## unknown 11 18 1.050 38 19
## NONASTRO 81 192 NA 30 6
Chi^2 for career outcome differences
ch1 # are basic outcomes different?
##
## Pearson's Chi-squared test
##
## data: otable1
## X-squared = 5.0666, df = 3, p-value = 0.167
ch2 # are detailed outcomes different?
##
## Pearson's Chi-squared test
##
## data: otable2
## X-squared = 21.755, df = 12, p-value = 0.04036
Basic statistics on time to permanent job
ns = tapply(phdfacy$TimeToJob, phdfacy$Gender, length)
means = tapply(phdfacy$TimeToJob, phdfacy$Gender, mean)
sds = tapply(phdfacy$TimeToJob, phdfacy$Gender, sd)
ses = sds/sqrt(ns)
medians = tapply(phdfacy$TimeToJob, phdfacy$Gender, median)
m = data.frame(N=ns,mean=means,SE=ses,median=medians) # create table of N, mean, median, SE
m
## N mean SE median
## F 187 4.406417 0.1648407 5
## M 455 4.859341 0.1226019 5
gap = means[2]-means[1] # simple-statistics M-F hiring-time gap measurement
gap = unname(gap)
gapunc = sqrt(ses[2]^2+ses[1]^2)*1.96 # assuming confidence is 0.95
gapunc = unname(gapunc)
Time Differences by Gender and JobType
meantimes1 = tapply(phdnotdocy$TimeToJob, phdnotdocy$InstClass, mean)
meantimes = tapply(phdnotdocy$TimeToJob, list(phdnotdocy$Gender,phdnotdocy$InstClass), mean)
setimes = tapply(phdnotdocy$TimeToJob, list(phdnotdocy$Gender,phdnotdocy$InstClass), sd)/sqrt(tapply(phdnotdocy$TimeToJob, list(phdnotdocy$Gender,phdnotdocy$InstClass), length))
gaptimes = meantimes[1,]-meantimes[2,]
gapses = sqrt(setimes[1,]^2+setimes[2,]^2) # not a proper t-test, poor for small samples.
dttable = t(meantimes)
dttable = cbind(dttable, gap=gaptimes)
dttable = cbind(dttable, z=gaptimes/gapses)
dttable = dttable[c(12,13,11,10,9,8,2,7,3,4,5,6),]
dttable
## F M gap z
## U-R1 4.888889 4.921429 -0.03253968 -0.09739638
## U-R2 5.266667 5.954545 -0.68787879 -0.90893226
## U-M 3.333333 5.028571 -1.69523810 -2.10219450
## U-for 4.200000 4.823529 -0.62352941 -0.91430801
## other-US 3.640000 4.693333 -1.05333333 -1.68952425
## other-for 4.458333 5.104167 -0.64583333 -1.18705538
## col 4.708333 4.148148 0.56018519 0.76087424
## obs 4.434783 4.621622 -0.18683901 -0.33664348
## education 4.500000 3.875000 0.62500000 0.41632263
## government 2.833333 3.076923 -0.24358974 -0.17950908
## industry 2.816327 3.202703 -0.38637617 -0.90530413
## non-ast.ac. 1.800000 5.333333 -3.53333333 -1.80049362
Histograms
par(mar=mar)
jhist.hire = jointhist.mf(phdy, condition=(phdy$Left=='hired')) # joint-normalize hiring/leaving histograms
jhist.left = jointhist.mf(phdy, condition=(phdy$Left=='left'))
jhist.limit =jointhist.mf(phdy, condition=(phdy$Left!='hired' & phdy$Left!='left'))
jhist.r1 =jointhist.mf(phdy, condition=(phdy$Left=='hired' & (phdy$JobType=='prof-R1' | phdy$JobType=='prof')))
jointhist.mf.plot(jhist.hire, xlab=xlab, ylab='Fraction starting job')
jointhist.mf.plot(jhist.left, xlab=xlab, ylab='Fraction leaving field')
Some statistical tests comparing the postdoc-time distributions
h = phdnotdocy
# Student t-test
ttest = t.test(h$TimeToJob[h$Gender=='M'],h$TimeToJob[h$Gender=='F'], conf.level=confidence)
ttest
##
## Welch Two Sample t-test
##
## data: h$TimeToJob[h$Gender == "M"] and h$TimeToJob[h$Gender == "F"]
## t = 2.0094, df = 585.87, p-value = 0.04495
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.008463335 0.740902416
## sample estimates:
## mean of x mean of y
## 4.386047 4.011364
# Student t-test with null hypothesis of Flaherty 1.1 year gap measurement
ttest.vsF18 = t.test(h$TimeToJob[h$Gender=='M']-1.1,h$TimeToJob[h$Gender=='F'], conf.level=confidence)
ttest.vsF18
##
## Welch Two Sample t-test
##
## data: h$TimeToJob[h$Gender == "M"] - 1.1 and h$TimeToJob[h$Gender == "F"]
## t = -3.8898, df = 585.87, p-value = 0.0001118
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -1.0915367 -0.3590976
## sample estimates:
## mean of x mean of y
## 3.286047 4.011364
# Wilcoxon test
wtest = wilcox.test(h$TimeToJob[h$Gender=='M'],h$TimeToJob[h$Gender=='F'])
wtest
##
## Wilcoxon rank sum test with continuity correction
##
## data: h$TimeToJob[h$Gender == "M"] and h$TimeToJob[h$Gender == "F"]
## W = 89690, p-value = 0.2023
## alternative hypothesis: true location shift is not equal to 0
# K-S test
kstest = ks.test(h$TimeToJob[h$Gender=='M'],h$TimeToJob[h$Gender=='F'])
## Warning in ks.test(h$TimeToJob[h$Gender == "M"], h$TimeToJob[h$Gender == :
## p-value will be approximate in the presence of ties
kstest
##
## Two-sample Kolmogorov-Smirnov test
##
## data: h$TimeToJob[h$Gender == "M"] and h$TimeToJob[h$Gender == "F"]
## D = 0.061698, p-value = 0.4738
## alternative hypothesis: two-sided
# Anderson-Darling test
library(kSamples)
## Loading required package: SuppDists
adtest = ad.test(h$TimeToJob[h$Gender=='M'],h$TimeToJob[h$Gender=='F'])
adtest
##
##
## Anderson-Darling k-sample test.
##
## Number of samples: 2
## Sample sizes: 645, 264
## Number of ties: 892
##
## Mean of Anderson-Darling Criterion: 1
## Standard deviation of Anderson-Darling Criterion: 0.75986
##
## T.AD = ( Anderson-Darling Criterion - mean)/sigma
##
## Null Hypothesis: All samples come from a common population.
##
## AD T.AD asympt. P-value
## version 1: 2.17 1.540 0.07411
## version 2: 1.93 1.223 0.10040
Statistics on leaving the field
# Basic 2x2 table
leavetable = table(phdnotdoc$Gender,phdnotdoc$Left=='left')
leavetable
##
## FALSE TRUE
## F 195 81
## M 477 192
# Fisher test (are M/F different in leave/stay fractions?)
fisher = fisher.test(leavetable)
mbinom = binom.test(leavetable['M',2], apply(leavetable,1,sum)[2], conf.level=confidence)
fbinom = binom.test(leavetable['F',2], apply(leavetable,1,sum)[1], conf.level=confidence)
fisher
##
## Fisher's Exact Test for Count Data
##
## data: leavetable
## p-value = 0.8746
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.7048256 1.3384737
## sample estimates:
## odds ratio
## 0.9690235
Survival and hazard analysis for astronomy hiring
par(mar=mar)
library(survival)
S = Surv(phdy$TimeToJob, phdy$Left=='hired')
survkm = survfit(S~phdy$Gender, conf.int=plotconfidence)
coxfit = coxph(S~phdy$Gender)
sumcoxfit = summary(coxfit)
coxhire = cox.hazard.mf(coxfit, phdy$Gender)
hazhire = km.hazard.mf(survkm, conf.level=plotconfidence)
sumcoxfit
## Call:
## coxph(formula = S ~ phdy$Gender)
##
## n= 1026, number of events= 642
##
## coef exp(coef) se(coef) z Pr(>|z|)
## phdy$GenderM -0.07717 0.92573 0.08705 -0.886 0.375
##
## exp(coef) exp(-coef) lower .95 upper .95
## phdy$GenderM 0.9257 1.08 0.7805 1.098
##
## Concordance= 0.51 (se = 0.011 )
## Rsquare= 0.001 (max possible= 1 )
## Likelihood ratio test= 0.78 on 1 df, p=0.3779
## Wald test = 0.79 on 1 df, p=0.3753
## Score (logrank) test = 0.79 on 1 df, p=0.3752
km.plot(survkm, xlab=xlab, ylab='K-M survivorship for astrophysics hiring', xlim=c(0,11))
hazard.mf.plot(coxhire, hazhire, xlab=xlab, ylab='Probability of being hired (if postdoc)')
SR1 = Surv(phdy$TimeToJob, phdy$Left=='hired' & (phdy$JobType=='prof-R1' | phdy$JobType=='prof'))
coxfitR1 = coxph(SR1~phdy$Gender)
coxR1 = cox.hazard.mf(coxfitR1, phdy$Gender)
Survival and hazard analysis for leaving astronomy
par(mar=mar)
SI = Surv(phdy$TimeToJob, phdy$Left=='left')
survkmI = survfit(SI~phdy$Gender, conf.int=plotconfidence)
coxfitI = coxph(SI~phdy$Gender)
sumcoxfitI = summary(coxfitI)
coxleave = cox.hazard.mf(coxfitI, phdy$Gender)
hazleave = km.hazard.mf(survkmI, conf.level=plotconfidence)
sumcoxfitI
## Call:
## coxph(formula = SI ~ phdy$Gender)
##
## n= 1026, number of events= 267
##
## coef exp(coef) se(coef) z Pr(>|z|)
## phdy$GenderM -0.03215 0.96836 0.13537 -0.238 0.812
##
## exp(coef) exp(-coef) lower .95 upper .95
## phdy$GenderM 0.9684 1.033 0.7427 1.263
##
## Concordance= 0.501 (se = 0.016 )
## Rsquare= 0 (max possible= 0.964 )
## Likelihood ratio test= 0.06 on 1 df, p=0.8127
## Wald test = 0.06 on 1 df, p=0.8123
## Score (logrank) test = 0.06 on 1 df, p=0.8123
km.plot(survkmI, xlab=xlab, ylab='K-M survivorship for leaving astrophysics', xlim=c(0,11))
hazard.mf.plot(coxleave, hazleave, xlab=xlab, ylab='Probability of leaving field (if postdoc)', ylim=c(0,0.25))
Cumulative distributions
par(mar=mar)
nmy = sum(phdy$Gender=='M')
nfy = sum(phdy$Gender=='F')
mfaccum = cumsum(jhist.hire$nmi)
ffaccum = cumsum(jhist.hire$nfi)
mleavecum = cumsum(jhist.left$nmi)
fleavecum = cumsum(jhist.left$nfi)
mr1cum = cumsum(jhist.r1$nmi)
fr1cum = cumsum(jhist.r1$nfi)
names(mfaccum) = jhist.hire$time
names(ffaccum) = jhist.hire$time
names(mleavecum) = jhist.left$time
names(fleavecum) = jhist.left$time
names(mr1cum) = jhist.r1$time
names(fr1cum) = jhist.r1$time
Y = length(mfaccum)
mlimitcum = 0
flimitcum = 0
hp = coxhire$difhaz1
lp = coxleave$difhaz1
r1p = coxR1$difhaz1
for (i in 1:(Y-1)) { # assume that current postdocs will be hired/leave and correct appropriately
#keep a running tally of the number of "limits" (still postdocs with post-PhD age<=current year)
#and fractionally redistribute them to jobs using the hazard function
mfaccum[(i+1):Y] = mfaccum[(i+1):Y] + hp[i] * mlimitcum
ffaccum[(i+1):Y] = ffaccum[(i+1):Y] + hp[i] * flimitcum
mleavecum[(i+1):Y] = mleavecum[(i+1):Y] + lp[i] * mlimitcum
fleavecum[(i+1):Y] = fleavecum[(i+1):Y] + lp[i] * flimitcum
mr1cum[(i+1):Y] = mr1cum[(i+1):Y] + r1p[i] * mlimitcum
fr1cum[(i+1):Y] = fr1cum[(i+1):Y] + r1p[i] * flimitcum
mlimitcum = mlimitcum*(1-hp[i]-lp[i]) + jhist.limit$nmi[i]
flimitcum = flimitcum*(1-hp[i]-lp[i]) + jhist.limit$nfi[i]
}
time = jhist.hire$time
plot(c(-1),c(-1), xlim=c(-0.02,11.9),ylim=c(-0.002,1.002), xlab=xlab, ylab='Cumulative fraction', yaxs='i',xaxs='i')
frachired = (mfaccum+ffaccum)/(nfy+nmy) # includes completeness correction
frachiredm = mfaccum/nmy
frachiredf = ffaccum/nfy
fracleft = (mleavecum+fleavecum)/(nfy+nmy) # includes completeness correction
fracleftm = mleavecum/nmy
fracleftf = fleavecum/nfy
fracr1 = (mr1cum+fr1cum)/(nfy+nmy) # does not yet include completeness correction
fracr1m = mr1cum/nmy
fracr1f = fr1cum/nfy
lines(-1:max(time), c(0,frachiredm),typ='s')
lines(-1:max(time), c(1,1-fracleftm),typ='s')
lines(-1:max(time), c(0,frachiredf),typ='s', col='red')
lines(-1:max(time), c(1,1-fracleftf),typ='s', col='red')
text(2,0.5,'postdoc/adjunct')
text(7.5,0.9,'left astrophysics')
text(9,0.25+0.03,'hired into long-term job')
#text(10,0.07,'(professor)', cex=0.85)
text(12.15, frachired['12'],signif(frachired['12'],2), cex=0.66, xpd=NA, col='darkgray')
text(12.15,1-fracleft['12'], signif(1-fracleft['12'],2), cex=0.66, xpd=NA, col='darkgray')
#text(12.15, fracr1['12'],signif(fracr1['12'],2), cex=0.66, xpd=NA, col='darkgray')
#lines(-1:max(time), c(0,fracr1m),typ='s',lty=3) # R1 hires only
#lines(-1:max(time), c(0,fracr1f),typ='s',lty=3,col='red')
Summarize statistics of interest for paper
gtab = table(phd$Gender)
gtaby = table(phdy$Gender)
out = c('')
#out = c(out,paste(' # PhD programs: ', length(unique(phd$PhDInstitute))))
out = c(out,paste(' # total: ', ntotal))
out = c(out,paste(' # omitted: ', nomit))
out = c(out,paste('Total sample size:', n, '( M =',gtab[2], 'F =',gtab[1],')'))
out = c(out,paste(' ', round(100*gtab[2]/n,1), '% ', round(100*gtab[1]/n,1), '% ', sep=''))
outcome = table(phd$Left)
nleftlater = table
names(outcome)[names(outcome)=='hired'] = 'astro'
out = c(out,paste('Career outcomes: ', outcome['postdoc']+outcome['adjunct'], 'still postdocs/adjuncts'))
out = c(out,paste(' ', outcome['astro'], 'found long-term astronomy employment'))
#, nlater, 'left after long-term job)'))
out = c(out,paste(' ', outcome['left'], 'left astronomy'))
pmleave = leavetable['M',2] / apply(leavetable,1,sum)[2]
pmleaveci = mbinom$conf.int[1:2]
pfleave = leavetable['F',2] / apply(leavetable,1,sum)[1]
pfleaveci = fbinom$conf.int[1:2]
out = c(out,paste('# of M that leave:', leavetable['M',2], '/', apply(leavetable,1,sum)[2]))
out = c(out,paste('# of F that leave:', leavetable['F',2], '/', apply(leavetable,1,sum)[1]))
out = c(out,paste('% of M that leave:', round(100*pmleave), '+',round(100*(pmleaveci[2]-pmleave)),'-', round(100*(pmleave - pmleaveci[1]))))
out = c(out,paste('% of F that leave:', round(100*pfleave), '+',round(100*(pfleaveci[2]-pfleave)),'-', round(100*(pfleave - pfleaveci[1]))))
out = c(out,paste('% of M that stay: ', 100-round(100*pmleave), '-',round(100*(pmleaveci[2]-pmleave)),'+', round(100*(pmleave - pmleaveci[1]))))
out = c(out,paste('% of F that stay: ', 100-round(100*pfleave), '-',round(100*(pfleaveci[2]-pfleave)),'+', round(100*(pfleave - pfleaveci[1]))))
out = c(out,paste('Fisher significance of M/F leave rates different: p =', signif(fisher$p.value,2)))
out = c(out,paste(' Significance of different outcome employer classes:',signif(ch2$p.value,3)))
out = c(out,paste(' Significance of different outcome profession classes:',signif(ch1$p.value,3)))
out = c(out,'')
out = c(out,paste(' # with no year: ', nunkyear))
out = c(out,paste('Sample size w/year:', ny, '( M =',gtaby[2], 'F =',gtaby[1],')'))
out = c(out,paste(' ', round(100*gtaby[2]/ny,1), '% ', round(100*gtaby[1]/ny,1),'%', sep=''))
out = c(out,paste('Female mean time: ', round(means[1],2), '+/-', round(ses[1],2)))
out = c(out,paste(' Male mean time: ', round(means[2],2), '+/-', round(ses[2],2)))
out = c(out,paste(' Gap: ', round(gap,2), '+/-', round(gapunc,2)))
out = c(out,paste('% hired within 4 years:',signif(100*frachired['4'],3),'%'))
out = c(out,paste(' (M:',signif(100*frachiredm['4'],3),'% F:',signif(100*frachiredf['4'],3),'% )'))
out = c(out,paste('% hired within 12 years:',signif(100*frachired['12'],3),'%'))
out = c(out,paste(' (M:',signif(100*frachiredm['12'],3),'% F:',signif(100*frachiredf['12'],3),'% )'))
out = c(out,paste('% left within 12 years:',signif(100*fracleft['12'],3),'%'))
out = c(out,paste(' (M:',signif(100*fracleftm['12'],3),'% F:',signif(100*fracleftf['12'],3),'% )'))
diff = means[2]-means[1]
differr = (ttest$conf.int[2]-ttest$conf.int[1])/2
differr2 = sqrt(ses[1]^2+ses[2]^2)
out = c(out,paste(' Difference: ', round(diff,2), '+/-', round(differr,2), ' (p =',signif(ttest$p.value,2),')'))
# paste(' Difference: ', round(diff,2), '+/-', round(differr2,2))
out = c(out, 'Significance of M-F postdoc time distribution difference:')
out = c(out,paste(' Student t-test: p =', signif(ttest$p.value,2), ' (F18 ruled out at p =',signif(ttest.vsF18$p.value,2),')'))
out = c(out,paste(' Wilcoxon test: p =', signif(wtest$p.value,2)))
out = c(out,paste(' K-S test: p =', signif(kstest$p.value,2)))
out = c(out,paste(' A-D test: p =', signif(adtest$ad[1,3],2)))
out = c(out,'')
out = c(out,paste('Survival analysis for astronomy hiring:'))
out = c(out,paste('Cox H(F/M) = ', round(coxhire$b,2), ' + ', round(coxhire$berr[1],2), ' - ', round(-coxhire$berr[2],2)))
out = c(out,paste('Cox significance of H(F/M) != 1: p =', signif(sumcoxfit$coefficients[5],2)))
out = c(out,'')
out = c(out,'Survival analysis for leaving astronomy:')
out = c(out,paste('Cox L(F/M) = ', round(coxleave$b,2), ' + ', round(coxleave$berr[1],2), ' - ', round(-coxleave$berr[2],2)))
out = c(out,paste('Cox significance of L(F/M) != 1: p =', signif(sumcoxfitI$coefficients[5],2)))
writeLines(out)
##
## # total: 1154
## # omitted: 91
## Total sample size: 1063 ( M = 748 F = 315 )
## 70.4% 29.6%
## Career outcomes: 118 still postdocs/adjuncts
## 672 found long-term astronomy employment
## 273 left astronomy
## # of M that leave: 192 / 669
## # of F that leave: 81 / 276
## % of M that leave: 29 + 4 - 3
## % of F that leave: 29 + 6 - 5
## % of M that stay: 71 - 4 + 3
## % of F that stay: 71 - 6 + 5
## Fisher significance of M/F leave rates different: p = 0.87
## Significance of different outcome employer classes: 0.0404
## Significance of different outcome profession classes: 0.167
##
## # with no year: 37
## Sample size w/year: 1026 ( M = 723 F = 303 )
## 70.5% 29.5%
## Female mean time: 4.41 +/- 0.16
## Male mean time: 4.86 +/- 0.12
## Gap: 0.45 +/- 0.4
## % hired within 4 years: 28.7 %
## (M: 28.1 % F: 30.1 % )
## % hired within 12 years: 65.1 %
## (M: 64.8 % F: 65.8 % )
## % left within 12 years: 27.1 %
## (M: 27.1 % F: 27 % )
## Difference: 0.45 +/- 0.37 (p = 0.045 )
## Significance of M-F postdoc time distribution difference:
## Student t-test: p = 0.045 (F18 ruled out at p = 0.00011 )
## Wilcoxon test: p = 0.2
## K-S test: p = 0.47
## A-D test: p = 0.074
##
## Survival analysis for astronomy hiring:
## Cox H(F/M) = 1.08 + 0.2 - 0.17
## Cox significance of H(F/M) != 1: p = 0.38
##
## Survival analysis for leaving astronomy:
## Cox L(F/M) = 1.03 + 0.31 - 0.24
## Cox significance of L(F/M) != 1: p = 0.81