15 Instrumental Variables Estimation and 2SLS
Also covered using Python and Stata
15.1 Example 15.1. Estimating the Return to Education for Married Women
ereg1 <- lm(lwage ~ educ, data=mroz)
ereg2 <- lm(educ ~ fatheduc, data =mroz)
ereg3 <- ivreg(lwage ~ educ | fatheduc, data = mroz)
stargazer(ereg1, ereg2, ereg3, keep.stat=c("n","rsq", "adj.rsq"), no.space=TRUE, type="text")
##
## ===========================================
## Dependent variable:
## ------------------------------
## lwage educ lwage
## OLS OLS instrumental
## variable
## (1) (2) (3)
## -------------------------------------------
## educ 0.109*** 0.059*
## (0.014) (0.035)
## fatheduc 0.282***
## (0.021)
## Constant -0.185 9.799*** 0.441
## (0.185) (0.199) (0.446)
## -------------------------------------------
## Observations 428 753 428
## R2 0.118 0.196 0.093
## Adjusted R2 0.116 0.195 0.091
## ===========================================
## Note: *p<0.1; **p<0.05; ***p<0.01
15.2 Example 15.2. Estimating the Return to Education for Men
wreg1 <- lm(educ ~ sibs, data=wage2)
wreg2 <- ivreg(lwage ~ educ | sibs, data=wage2)
wreg3 <- lm(lwage ~educ, data = wage2)
stargazer(wreg1, wreg2, wreg3, keep.stat=c("n","rsq", "adj.rsq"), no.space=TRUE, type="text")
##
## ============================================
## Dependent variable:
## -------------------------------
## educ lwage
## OLS instrumental OLS
## variable
## (1) (2) (3)
## --------------------------------------------
## sibs -0.228***
## (0.030)
## educ 0.122*** 0.060***
## (0.026) (0.006)
## Constant 14.139*** 5.130*** 5.973***
## (0.113) (0.355) (0.081)
## --------------------------------------------
## Observations 935 935 935
## R2 0.057 -0.009 0.097
## Adjusted R2 0.056 -0.010 0.096
## ============================================
## Note: *p<0.1; **p<0.05; ***p<0.01
15.3 Example 15.3. Estimating the Effect of Smoking on Birth Weight
sreg1 <- lm(packs ~ cigprice, data = bwght)
sreg2 <- ivreg(lbwght ~ packs | cigprice , data = bwght)
stargazer(sreg1, sreg2, keep.stat=c("n","rsq", "adj.rsq"), no.space=TRUE, type="text")
##
## =========================================
## Dependent variable:
## ----------------------------
## packs lbwght
## OLS instrumental
## variable
## (1) (2)
## -----------------------------------------
## cigprice 0.0003
## (0.001)
## packs 2.989
## (8.699)
## Constant 0.067 4.448***
## (0.103) (0.908)
## -----------------------------------------
## Observations 1,388 1,388
## R2 0.0001 -23.230
## Adjusted R2 -0.001 -23.248
## =========================================
## Note: *p<0.1; **p<0.05; ***p<0.01
15.4 Example 15.4. Using College Proximity as an IV for Education
creg1 <- lm(educ ~ nearc4 + exper + expersq + black + smsa + south + smsa66 + reg661 + reg662 + reg663 + reg664 + reg665 + reg666 + reg667 + reg668 + reg669, data=card )
creg2 <- lm(lwage ~ educ + exper + expersq + black + smsa + south + smsa66 + reg661 + reg662 + reg663 + reg664 + reg665 + reg666 + reg667 + reg668 + reg669, data=card )
creg3 <- ivreg (lwage ~ educ + exper + expersq + black + smsa + south + smsa66 + reg661 + reg662 + reg663 + reg664 + reg665 + reg666 + reg667 + reg668 + reg669 | nearc4 + exper + expersq + black + smsa + south + smsa66 + reg661 + reg662 + reg663 + reg664 + reg665 + reg666 + reg667 + reg668 + reg669, data=card)
stargazer(creg1, creg2, creg3, keep.stat=c("n","rsq", "adj.rsq"), no.space=TRUE, type="text")
##
## =============================================
## Dependent variable:
## --------------------------------
## educ lwage
## OLS OLS instrumental
## variable
## (1) (2) (3)
## ---------------------------------------------
## nearc4 0.320***
## (0.088)
## educ 0.075*** 0.132**
## (0.003) (0.055)
## exper -0.413*** 0.085*** 0.108***
## (0.034) (0.007) (0.024)
## expersq 0.001 -0.002*** -0.002***
## (0.002) (0.0003) (0.0003)
## black -0.936*** -0.199*** -0.147***
## (0.094) (0.018) (0.054)
## smsa 0.402*** 0.136*** 0.112***
## (0.105) (0.020) (0.032)
## south -0.052 -0.148*** -0.145***
## (0.135) (0.026) (0.027)
## smsa66 0.025 0.026 0.019
## (0.106) (0.019) (0.022)
## reg661 -0.210 -0.119*** -0.108***
## (0.202) (0.039) (0.042)
## reg662 -0.289** -0.022 -0.007
## (0.147) (0.028) (0.033)
## reg663 -0.238* 0.026 0.040
## (0.143) (0.027) (0.032)
## reg664 -0.093 -0.063* -0.058
## (0.186) (0.036) (0.038)
## reg665 -0.483** 0.009 0.038
## (0.188) (0.036) (0.047)
## reg666 -0.513** 0.022 0.055
## (0.210) (0.040) (0.053)
## reg667 -0.427** -0.001 0.027
## (0.206) (0.039) (0.049)
## reg668 0.314 -0.175*** -0.191***
## (0.242) (0.046) (0.051)
## reg669
##
## Constant 16.849*** 4.739*** 3.774***
## (0.211) (0.072) (0.935)
## ---------------------------------------------
## Observations 3,010 3,010 3,010
## R2 0.477 0.300 0.238
## Adjusted R2 0.474 0.296 0.234
## =============================================
## Note: *p<0.1; **p<0.05; ***p<0.01
15.5 Example 15.5. Return to Education for Working Women
mreg1 <- lm(educ ~ exper + expersq + fatheduc + motheduc, data=mroz)
mreg2 <- ivreg(lwage ~ educ + exper + expersq | exper + expersq + fatheduc + motheduc, data=mroz)
mreg3 <- lm( lwage ~ educ + exper + expersq, data=mroz)
linearHypothesis(mreg1, c("fatheduc=0", "motheduc=0"))
## Linear hypothesis test
##
## Hypothesis:
## fatheduc = 0
## motheduc = 0
##
## Model 1: restricted model
## Model 2: educ ~ exper + expersq + fatheduc + motheduc
##
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 750 3846.2
## 2 748 2884.1 2 962.1 124.76 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## ============================================
## Dependent variable:
## -------------------------------
## educ lwage
## OLS instrumental OLS
## variable
## (1) (2) (3)
## --------------------------------------------
## educ 0.061* 0.107***
## (0.031) (0.014)
## exper 0.085*** 0.044*** 0.042***
## (0.026) (0.013) (0.013)
## expersq -0.002** -0.001** -0.001**
## (0.001) (0.0004) (0.0004)
## fatheduc 0.185***
## (0.024)
## motheduc 0.186***
## (0.026)
## Constant 8.367*** 0.048 -0.522***
## (0.267) (0.400) (0.199)
## --------------------------------------------
## Observations 753 428 428
## R2 0.262 0.136 0.157
## Adjusted R2 0.258 0.130 0.151
## ============================================
## Note: *p<0.1; **p<0.05; ***p<0.01
15.6 Example 15.6. Using Two Test Scores as Indicators of Ability
ireg1 <- ivreg(lwage ~ educ + exper + tenure + married + south + urban + black + IQ | educ + exper + tenure + married + south + urban + black + KWW, data = wage2)
summary(ireg1)
##
## Call:
## ivreg(formula = lwage ~ educ + exper + tenure + married + south +
## urban + black + IQ | educ + exper + tenure + married + south +
## urban + black + KWW, data = wage2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.095561 -0.224017 0.006572 0.234495 1.398914
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.592453 0.325781 14.097 < 2e-16 ***
## educ 0.025032 0.016607 1.507 0.13206
## exper 0.014420 0.003321 4.342 1.56e-05 ***
## tenure 0.010456 0.002601 4.020 6.30e-05 ***
## married 0.200690 0.040678 4.934 9.56e-07 ***
## south -0.051553 0.031128 -1.656 0.09803 .
## urban 0.176706 0.028212 6.264 5.75e-10 ***
## black -0.022561 0.073960 -0.305 0.76040
## IQ 0.013047 0.004934 2.644 0.00832 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3807 on 926 degrees of freedom
## Multiple R-Squared: 0.19, Adjusted R-squared: 0.183
## Wald test: 36.96 on 8 and 926 DF, p-value: < 2.2e-16
15.7 Example 15.7. Return to Education for Working Women
vreg1 <- lm(educ ~ exper + expersq + fatheduc + motheduc, data=subset(mroz, mroz$inlf==1))
v2 <- resid(vreg1)
vreg2 <- ivreg(lwage ~ educ + exper + expersq + v2 | exper + expersq + fatheduc + motheduc + v2, data=subset(mroz, mroz$inlf==1))
vreg3 <- lm( lwage ~ educ + exper + expersq, data=subset(mroz, mroz$inlf==1))
stargazer(vreg1, vreg2, vreg3, keep.stat=c("n","rsq", "adj.rsq"), no.space=TRUE, type="text")
##
## ============================================
## Dependent variable:
## -------------------------------
## educ lwage
## OLS instrumental OLS
## variable
## (1) (2) (3)
## --------------------------------------------
## educ 0.061** 0.107***
## (0.031) (0.014)
## exper 0.045 0.044*** 0.042***
## (0.040) (0.013) (0.013)
## expersq -0.001 -0.001** -0.001**
## (0.001) (0.0004) (0.0004)
## fatheduc 0.190***
## (0.034)
## motheduc 0.158***
## (0.036)
## v2 0.058*
## (0.035)
## Constant 9.103*** 0.048 -0.522***
## (0.427) (0.395) (0.199)
## --------------------------------------------
## Observations 428 428 428
## R2 0.211 0.162 0.157
## Adjusted R2 0.204 0.154 0.151
## ============================================
## Note: *p<0.1; **p<0.05; ***p<0.01
15.8 Example 15.8. Return to Education for Working Women
. u mroz, clear . qui ivreg lwage (educ=fatheduc motheduc) exper* . predict u1, res
mroz2<- subset(mroz, !is.na(wage))
wreg1 <- ivreg(lwage ~ educ + exper + expersq | exper + expersq + fatheduc + motheduc, data=mroz2)
u1 <- resid(wreg1)
wreg2 <- lm(u1 ~ exper + expersq + fatheduc + motheduc, data=mroz2)
summary(wreg2)$r.squared * nobs(wreg2) # LM = N*Rsq
## [1] 0.3780713
wreg3 <- ivreg(lwage ~ educ + exper + expersq | exper + expersq + fatheduc + motheduc + huseduc, data=mroz2)
u1_h <- resid(wreg3)
wreg4 <- lm(u1_h ~ exper + expersq + fatheduc + motheduc + huseduc, data=mroz2)
summary(wreg4)$r.squared * nobs(wreg4) # LM = N*Rsq
## [1] 1.115043
stargazer(wreg1, wreg2, wreg3, wreg4, keep.stat=c("n","rsq", "adj.rsq"), no.space=TRUE, type="text")
##
## ========================================================
## Dependent variable:
## -------------------------------------------
## lwage u1 lwage u1_h
## instrumental OLS instrumental OLS
## variable variable
## (1) (2) (3) (4)
## --------------------------------------------------------
## educ 0.061* 0.080***
## (0.031) (0.022)
## exper 0.044*** -0.00002 0.043*** 0.0001
## (0.013) (0.013) (0.013) (0.013)
## expersq -0.001** 0.00000 -0.001** -0.00001
## (0.0004) (0.0004) (0.0004) (0.0004)
## fatheduc 0.006 0.001
## (0.011) (0.011)
## motheduc -0.007 -0.010
## (0.012) (0.012)
## huseduc 0.007
## (0.011)
## Constant 0.048 0.011 -0.187 0.009
## (0.400) (0.141) (0.285) (0.177)
## --------------------------------------------------------
## Observations 428 428 428 428
## R2 0.136 0.001 0.150 0.003
## Adjusted R2 0.130 -0.009 0.144 -0.009
## ========================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
15.9 Example 15.9. Effect of Education on Fertility
ivregk <- ivreg(kids ~ educ + age + agesq + black + east + northcen + west + farm + othrural + town + smcity + y74 + y76 + y78 + y80 + y82 + y84 | age + agesq + black + east + northcen + west + farm + othrural + town + smcity + y74 + y76 + y78 + y80 + y82 + y84 + meduc + feduc, data=fertil1)
regk <- lm(kids ~ educ + age + agesq + black + east + northcen + west + farm + othrural + town + smcity + y74 + y76 + y78 + y80 + y82 + y84, data=fertil1)
#Endogeneity
regk2 <- lm(educ ~ meduc + feduc, data=fertil1)
v2 <- resid(regk2)
ivregk2 <- ivreg(kids ~ educ + age + agesq + black + east + northcen + west + farm + othrural + town + smcity + y74 + y76 + y78 + y80 + y82 + y84 + v2| age + agesq + black + east + northcen + west + farm + othrural + town + smcity + y74 + y76 + y78 + y80 + y82 + y84 + v2 + meduc + feduc, data=fertil1)
stargazer(ivregk, regk, regk2, ivregk2, keep.stat=c("n","rsq", "adj.rsq"), no.space=TRUE, type="text")
##
## =========================================================
## Dependent variable:
## --------------------------------------------
## kids educ kids
## instrumental OLS OLS instrumental
## variable variable
## (1) (2) (3) (4)
## ---------------------------------------------------------
## educ -0.153*** -0.128*** -0.151***
## (0.039) (0.018) (0.037)
## age 0.524*** 0.532*** 0.531***
## (0.139) (0.138) (0.138)
## agesq -0.006*** -0.006*** -0.006***
## (0.002) (0.002) (0.002)
## black 1.073*** 1.076*** 1.062***
## (0.174) (0.174) (0.175)
## east 0.229* 0.217 0.221*
## (0.134) (0.133) (0.133)
## northcen 0.374*** 0.363*** 0.372***
## (0.122) (0.121) (0.122)
## west 0.208 0.198 0.204
## (0.168) (0.167) (0.167)
## farm -0.077 -0.053 -0.065
## (0.151) (0.147) (0.148)
## othrural -0.195 -0.163 -0.178
## (0.182) (0.175) (0.177)
## town 0.082 0.084 0.080
## (0.125) (0.125) (0.125)
## smcity 0.212 0.212 0.210
## (0.160) (0.160) (0.160)
## y74 0.272 0.268 0.272
## (0.173) (0.173) (0.173)
## y76 -0.095 -0.097 -0.098
## (0.179) (0.179) (0.179)
## y78 -0.057 -0.069 -0.064
## (0.183) (0.182) (0.182)
## y80 -0.053 -0.071 -0.065
## (0.185) (0.183) (0.183)
## y82 -0.496*** -0.522*** -0.514***
## (0.177) (0.172) (0.173)
## y84 -0.521*** -0.545*** -0.535***
## (0.178) (0.175) (0.175)
## meduc 0.184***
## (0.022)
## feduc 0.221***
## (0.025)
## v2 0.029
## (0.042)
## Constant -7.241** -7.742** 8.861*** -7.407**
## (3.137) (3.052) (0.203) (3.090)
## ---------------------------------------------------------
## Observations 1,129 1,129 1,129 1,129
## R2 0.128 0.130 0.269 0.130
## Adjusted R2 0.115 0.116 0.268 0.116
## =========================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
15.10 Example 15.10. Job Training and Worker Productivity
jtrain2 <- subset(jtrain, jtrain$year==1988)
jreg1 <- lm(chrsemp ~ cgrant, data=jtrain2)
jreg2 <- ivreg(clscrap ~ chrsemp | cgrant, data=jtrain2)
jreg3 <- ivreg(clscrap ~ chrsemp, data=jtrain2)
stargazer(jreg1, jreg2, jreg3, keep.stat=c("n","rsq", "adj.rsq"), no.space=TRUE, type="text")
##
## ==========================================
## Dependent variable:
## -----------------------------
## chrsemp clscrap
## OLS instrumental
## variable
## (1) (2) (3)
## ------------------------------------------
## cgrant 27.878***
## (3.129)
## chrsemp -0.014* -0.008*
## (0.008) (0.005)
## Constant 0.509 -0.033 -0.104
## (1.558) (0.127) (0.104)
## ------------------------------------------
## Observations 125 45 45
## R2 0.392 0.016 0.062
## Adjusted R2 0.387 -0.007 0.040
## ==========================================
## Note: *p<0.1; **p<0.05; ***p<0.01