21 Estimating Average Treatment Effects
Also available in Stata and Python versions
21.1 Example 21.1
Load libraries
Causal Effects of Job Training on Earnings
Table 21.1 Column 2 & 3
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.55480 0.33944 13.4186 < 2.2e-16 ***
## train 1.79434 0.66932 2.6809 0.007617 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
PRA2 <- lm(re78 ~ train + age + educ + black + hisp + married + re74 + re75, data=jtrain2)
PRA2r<- coeftest(PRA2, vcov=vcovHC(PRA2, type="HC0"))
PRA2r
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.621739 2.360022 0.2634 0.792331
## train 1.682588 0.651389 2.5831 0.010117 *
## age 0.055771 0.039389 1.4159 0.157522
## educ 0.405883 0.155134 2.6163 0.009196 **
## black -2.169781 0.998166 -2.1738 0.030260 *
## hisp 0.157926 1.352406 0.1168 0.907093
## married -0.140271 0.861806 -0.1628 0.870779
## re74 0.082856 0.106226 0.7800 0.435816
## re75 0.051533 0.123500 0.4173 0.676684
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
df <- subset(jtrain2, !is.na(educ))
logit <- glm(train ~ age + educ + black + hisp + married + re74 + re75, data = jtrain2, family = binomial(logit))
yhat <- predict(logit, type = "response")
uhat<- df$train-yhat
ate <- (uhat*df$re78)/(yhat*(1-yhat))
summary(ate_reg<-lm(ate~1))
##
## Call:
## lm(formula = ate ~ 1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -64.592 -8.431 -1.627 3.649 134.496
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.6274 0.8438 1.929 0.0544 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 17.8 on 444 degrees of freedom
##
## Call:
## lm(formula = ate ~ 1, subset = yhat <= 0.9 & yhat >= 0.1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -64.592 -8.431 -1.627 3.649 134.496
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.6274 0.8438 1.929 0.0544 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 17.8 on 444 degrees of freedom
##
## Call:
## lm(formula = ate ~ 1, subset = yhat <= 0.95 & yhat >= 0.05)
##
## Residuals:
## Min 1Q Median 3Q Max
## -64.592 -8.431 -1.627 3.649 134.496
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.6274 0.8438 1.929 0.0544 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 17.8 on 444 degrees of freedom
df <- jtrain2[,c(2:6, 9:10)]
df <- df[]*uhat
PSW2 <- lm(ate ~ age + educ + black + hisp + married + re74 + re75, data =df)
PSW2r <- coeftest(PSW2, vcov=vcovHC(PSW2, type="HC0"))
PSW2r
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.62736 0.64467 2.5243 0.01194 *
## age 0.39110 0.16200 2.4142 0.01618 *
## educ 1.73499 0.41728 4.1578 3.868e-05 ***
## black -7.21430 3.66680 -1.9675 0.04976 *
## hisp 7.35999 8.39347 0.8769 0.38104
## married -1.03813 3.73185 -0.2782 0.78100
## re74 0.26287 0.61484 0.4275 0.66919
## re75 0.15930 0.59378 0.2683 0.78861
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Table 21.1 Column 4 & 5
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 21.55392 0.31167 69.157 < 2.2e-16 ***
## train -15.20478 0.65567 -23.190 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
PRA3 <- lm(re78 ~ train + age + educ + black + hisp + married + re74 + re75, data=jtrain3)
PRA3r<-coeftest(PRA3, vcov=vcovHC(PRA3, type="HC0"))
PRA3r
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.776734 1.482613 0.5239 0.60039
## train 0.859770 0.765283 1.1235 0.26134
## age -0.081537 0.020637 -3.9510 7.986e-05 ***
## educ 0.528023 0.088245 5.9836 2.475e-09 ***
## black -0.542709 0.441414 -1.2295 0.21900
## hisp 2.165568 1.216207 1.7806 0.07509 .
## married 1.220271 0.495469 2.4629 0.01385 *
## re74 0.277887 0.061681 4.5052 6.916e-06 ***
## re75 0.568122 0.066418 8.5537 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
df <- subset(jtrain3, !is.na(educ))
logit <- glm(train ~ age + educ + black + hisp + married + re74 + re75, data = jtrain3, family = binomial(logit))
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
yhat <- predict(logit, type = "response")
uhat<- df$train-yhat
ate <- uhat*df$re78/(yhat*(1-yhat))
summary(ate_reg<-lm(ate~1))
##
## Call:
## lm(formula = ate ~ 1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -140 -40 -31 -20 44128
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11.03 19.18 0.575 0.565
##
## Residual standard error: 991.8 on 2674 degrees of freedom
##
## Call:
## lm(formula = ate ~ 1, subset = yhat <= 0.9 & yhat >= 0.1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -73.574 -9.421 -1.024 5.394 77.915
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.024 1.116 0.917 0.36
##
## Residual standard error: 19.62 on 308 degrees of freedom
##
## Call:
## lm(formula = ate ~ 1, subset = yhat <= 0.95 & yhat >= 0.05)
##
## Residuals:
## Min 1Q Median 3Q Max
## -128.325 -8.612 0.199 3.907 218.535
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.199 1.048 -0.19 0.849
##
## Residual standard error: 21.52 on 421 degrees of freedom
df <- jtrain3[,c(2:8)]
df <- df[]*uhat
PSW3 <- lm(ate ~ age + educ + black + hisp + married + re74 + re75, data =df)
PSW3r<-coeftest(PSW3, vcov=vcovHC(PSW3, type="HC0"))
PSW3r
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11.029 13.796 0.7994 0.42412
## age -24.330 28.884 -0.8423 0.39968
## educ -24.784 74.145 -0.3343 0.73820
## black -1353.411 780.680 -1.7336 0.08310 .
## hisp -2497.522 1279.632 -1.9518 0.05107 .
## married 1659.848 981.913 1.6904 0.09106 .
## re74 431.648 333.145 1.2957 0.19520
## re75 315.188 280.817 1.1224 0.26179
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Table 21.1 Column 6 & 7
jtrain3$rebar <- (jtrain3$re75 + jtrain3$re74)/2
jtrain3 <- subset(jtrain3, jtrain3$rebar<=15.0)
DiM3s <- lm(re78 ~ train, data=jtrain3)
DiM3sr<-coeftest(DiM3s, vcov=vcovHC(DiM3s, type="HC0"))
DiM3sr
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11.19060 0.33471 33.4341 < 2.2e-16 ***
## train -5.00532 0.65640 -7.6254 5.047e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
PRA3s <- lm(re78 ~ train + age + educ + black + hisp + married + re74 + re75, data=jtrain3)
PRA3sr<-coeftest(PRA3s, vcov=vcovHC(PRA3s, type="HC0"))
PRA3sr
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.100924 1.632791 1.2867 0.198455
## train 2.059039 0.798028 2.5802 0.009998 **
## age -0.101670 0.023749 -4.2810 2.015e-05 ***
## educ 0.404689 0.099010 4.0873 4.666e-05 ***
## black -1.226412 0.591959 -2.0718 0.038507 *
## hisp 0.236051 0.899533 0.2624 0.793049
## married 1.841522 0.596363 3.0879 0.002064 **
## re74 0.246614 0.085380 2.8884 0.003944 **
## re75 0.660033 0.081669 8.0818 1.600e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
df <- subset(jtrain3, !is.na(educ))
logit <- glm(train ~ age + educ + black + hisp + married + re74 + re75, data = jtrain3, family = binomial(logit))
yhat <- predict(logit, type = "response")
uhat<- df$train-yhat
ate <- uhat*df$re78/(yhat*(1-yhat))
summary(ate_reg<-lm(ate~1))
##
## Call:
## lm(formula = ate ~ 1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -130.7 -23.3 -16.0 -7.0 17037.5
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.049 14.692 0.48 0.631
##
## Residual standard error: 500.8 on 1161 degrees of freedom
##
## Call:
## lm(formula = ate ~ 1, subset = yhat <= 0.9 & yhat >= 0.1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -87.175 -8.602 -1.924 7.224 75.893
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.924 1.351 1.424 0.156
##
## Residual standard error: 21.58 on 254 degrees of freedom
##
## Call:
## lm(formula = ate ~ 1, subset = yhat <= 0.95 & yhat >= 0.05)
##
## Residuals:
## Min 1Q Median 3Q Max
## -86.199 -8.188 -0.947 3.545 195.985
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.9468 1.0899 0.869 0.386
##
## Residual standard error: 21.25 on 379 degrees of freedom
df <- jtrain3[,c(2:8)]
df <- df[]*uhat
PSW3s <- lm(ate ~ age + educ + black + hisp + married + re74 + re75, data =df)
PSW3sr<-coeftest(PSW3s, vcov=vcovHC(PSW3s, type="HC0"))
PSW3sr
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.0487 11.7569 0.5995 0.5489
## age 24.6470 17.5994 1.4004 0.1616
## educ -110.8253 78.8408 -1.4057 0.1601
## black -446.0219 349.1988 -1.2773 0.2018
## hisp -1152.8443 911.5121 -1.2648 0.2062
## married 728.6137 532.1123 1.3693 0.1712
## re74 70.8515 55.9734 1.2658 0.2058
## re75 354.0008 228.6956 1.5479 0.1219
Table 21.1
21.2 Example 21.2
Causal Effect of Job Training on Earnings
Table 21.2 Columns 2 and 3, df=jtrain3
df <- df <- jtrain2[,c(1:6, 9:11)]
mATE2 <- with(df, Match(Y=re78,
Tr=train,
X=data.frame(age,educ,black,hisp,married,re74,re75),
estimand="ATE"))
summary(mATE2)
##
## Estimate... 1.6278
## AI SE...... 0.77575
## T-stat..... 2.0983
## p.val...... 0.035876
##
## Original number of observations.............. 445
## Original number of treated obs............... 185
## Matched number of observations............... 445
## Matched number of observations (unweighted). 608
mATT2 <- with(df, Match(Y=re78,
Tr=train,
X=data.frame(age,educ,black,hisp,married,re74,re75),
estimand="ATT"))
summary(mATT2)
##
## Estimate... 1.8237
## AI SE...... 0.89664
## T-stat..... 2.034
## p.val...... 0.041952
##
## Original number of observations.............. 445
## Original number of treated obs............... 185
## Matched number of observations............... 185
## Matched number of observations (unweighted). 269
Table 21.2 Columns 4 and 5,df=jtrain3
df <- jtrain3[,c(1:8,11)]
mATE3 <- with(df, Match(Y=re78,
Tr=train,
X=data.frame(age,educ,black,hisp,married,re74,re75),
estimand="ATE"))
summary(mATE3)
##
## Estimate... -3.8463
## AI SE...... 2.4952
## T-stat..... -1.5415
## p.val...... 0.1232
##
## Original number of observations.............. 1162
## Original number of treated obs............... 180
## Matched number of observations............... 1162
## Matched number of observations (unweighted). 1191
mATT3 <- with(df, Match(Y=re78,
Tr=train,
X=data.frame(age,educ,black,hisp,married,re74,re75),
estimand="ATT"))
summary(mATT3)
##
## Estimate... -0.23234
## AI SE...... 1.6514
## T-stat..... -0.1407
## p.val...... 0.88811
##
## Original number of observations.............. 1162
## Original number of treated obs............... 180
## Matched number of observations............... 180
## Matched number of observations (unweighted). 196
Table 21.2 Columns 6 and 6, data = jtrain3 if (re74 + re75)/2 < 15.0
jtrain3$rebar <- (jtrain3$re75 + jtrain3$re74)/2
jtrain3 <- subset(jtrain3, jtrain3$rebar<=15.0)
df <- jtrain3[,c(1:8,11)]
mATE3s <- with(df, Match(Y=re78,
Tr=train,
X=data.frame(age,educ,black,hisp,married,re74,re75),
estimand="ATE"))
summary(mATE3s)
##
## Estimate... -3.8463
## AI SE...... 2.4952
## T-stat..... -1.5415
## p.val...... 0.1232
##
## Original number of observations.............. 1162
## Original number of treated obs............... 180
## Matched number of observations............... 1162
## Matched number of observations (unweighted). 1191
mATT3s <- with(df, Match(Y=re78,
Tr=train,
X=data.frame(age,educ,black,hisp,married,re74,re75),
estimand="ATT"))
summary(mATT3s)
##
## Estimate... -0.23234
## AI SE...... 1.6514
## T-stat..... -0.1407
## p.val...... 0.88811
##
## Original number of observations.............. 1162
## Original number of treated obs............... 180
## Matched number of observations............... 180
## Matched number of observations (unweighted). 196
21.3 Example 21.3
Estimating the Effects of Education on Fertility
data(fertil2, package='wooldridge')
fertil2['w']=0
fertil2$w[fertil2$educ>=7] = 1
summary(OLS<-lm(children ~ w + age + agesq + evermarr + urban + electric + tv, data=fertil2))
##
## Call:
## lm(formula = children ~ w + age + agesq + evermarr + urban +
## electric + tv, data = fertil2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.9405 -0.7106 0.0256 0.6816 7.4343
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.5266048 0.2451026 -14.388 < 2e-16 ***
## w -0.3935524 0.0495534 -7.942 2.51e-15 ***
## age 0.2719307 0.0171033 15.899 < 2e-16 ***
## agesq -0.0018960 0.0002752 -6.889 6.45e-12 ***
## evermarr 0.6947417 0.0523984 13.259 < 2e-16 ***
## urban -0.2437082 0.0460252 -5.295 1.25e-07 ***
## electric -0.3366440 0.0754557 -4.461 8.35e-06 ***
## tv -0.3259749 0.0897716 -3.631 0.000285 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.431 on 4350 degrees of freedom
## (3 observations deleted due to missingness)
## Multiple R-squared: 0.5861, Adjusted R-squared: 0.5855
## F-statistic: 880 on 7 and 4350 DF, p-value: < 2.2e-16
summary(IV1<-ivreg(children ~ w + age + agesq + evermarr + urban + electric + tv | frsthalf + age + agesq + evermarr + urban + electric + tv, data=fertil2))
##
## Call:
## ivreg(formula = children ~ w + age + agesq + evermarr + urban +
## electric + tv | frsthalf + age + agesq + evermarr + urban +
## electric + tv, data = fertil2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.90822 -0.74098 0.04044 0.73006 7.39859
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.8300500 0.6350035 -4.457 8.53e-06 ***
## w -1.1306797 0.6192352 -1.826 0.0679 .
## age 0.2627018 0.0191600 13.711 < 2e-16 ***
## agesq -0.0019787 0.0002905 -6.811 1.10e-11 ***
## evermarr 0.6167576 0.0845468 7.295 3.53e-13 ***
## urban -0.1672413 0.0795281 -2.103 0.0355 *
## electric -0.2343255 0.1154192 -2.030 0.0424 *
## tv -0.1371643 0.1829146 -0.750 0.4534
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.467 on 4350 degrees of freedom
## Multiple R-Squared: 0.5651, Adjusted R-squared: 0.5644
## Wald test: 829.3 on 7 and 4350 DF, p-value: < 2.2e-16
summary(probit<-glm(w ~ frsthalf + age + agesq + evermarr + urban + electric + tv, data=fertil2, family = binomial(probit)))
##
## Call:
## glm(formula = w ~ frsthalf + age + agesq + evermarr + urban +
## electric + tv, family = binomial(probit), data = fertil2)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.0644 -0.9671 0.5004 0.8949 2.5004
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.1353654 0.2459878 4.616 3.92e-06 ***
## frsthalf -0.2206630 0.0418051 -5.278 1.30e-07 ***
## age -0.0150337 0.0175903 -0.855 0.3927
## agesq -0.0007325 0.0002909 -2.518 0.0118 *
## evermarr -0.2972839 0.0486261 -6.114 9.74e-10 ***
## urban 0.2998132 0.0430948 6.957 3.47e-12 ***
## electric 0.4246616 0.0756960 5.610 2.02e-08 ***
## tv 0.9281441 0.0999561 9.286 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 5987.6 on 4357 degrees of freedom
## Residual deviance: 4856.8 on 4350 degrees of freedom
## (3 observations deleted due to missingness)
## AIC: 4872.8
##
## Number of Fisher Scoring iterations: 4
what<-predict(probit, type = "response")
fertil2 <- subset(fertil2, !is.na(electric))
summary(IV2<-ivreg(children ~ w + age + agesq + evermarr + urban + electric + tv | what + age + agesq + evermarr + urban + electric + tv, data=fertil2))
##
## Call:
## ivreg(formula = children ~ w + age + agesq + evermarr + urban +
## electric + tv | what + age + agesq + evermarr + urban + electric +
## tv, data = fertil2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.8712 -0.9363 0.1459 0.8569 7.3577
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.0326734 0.4119721 -4.934 8.36e-07 ***
## w -1.9745012 0.3317810 -5.951 2.87e-09 ***
## age 0.2521371 0.0194358 12.973 < 2e-16 ***
## agesq -0.0020734 0.0003079 -6.733 1.88e-11 ***
## evermarr 0.5274858 0.0677213 7.789 8.38e-15 ***
## urban -0.0797064 0.0613674 -1.299 0.194
## electric -0.1171971 0.0953329 -1.229 0.219
## tv 0.0789754 0.1302616 0.606 0.544
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.589 on 4350 degrees of freedom
## Multiple R-Squared: 0.4893, Adjusted R-squared: 0.4885
## Wald test: 710.9 on 7 and 4350 DF, p-value: < 2.2e-16
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.0326734 0.3639653 -5.5848 2.482e-08 ***
## w -1.9745012 0.3132711 -6.3029 3.212e-10 ***
## age 0.2521370 0.0209856 12.0147 < 2.2e-16 ***
## agesq -0.0020734 0.0003812 -5.4392 5.645e-08 ***
## evermarr 0.5274858 0.0695151 7.5881 3.947e-14 ***
## urban -0.0797064 0.0604705 -1.3181 0.1875
## electric -0.1171971 0.0891040 -1.3153 0.1885
## tv 0.0789754 0.1083858 0.7287 0.4663
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1