15 Instrumental Variables Estimation and 2SLS
Also covered using Python and Stata
15.1 Example 15.1. Estimating the Return to Education for Married Women
ereg1 <- lm(lwage ~ educ, data=mroz)
ereg2 <- lm(educ ~ fatheduc, data =mroz)
ereg3 <- ivreg(lwage ~ educ | fatheduc, data = mroz) 
stargazer(ereg1, ereg2, ereg3, keep.stat=c("n","rsq", "adj.rsq"), no.space=TRUE, type="text")## 
## ===========================================
##                   Dependent variable:      
##              ------------------------------
##               lwage     educ      lwage    
##                OLS      OLS    instrumental
##                                  variable  
##                (1)      (2)        (3)     
## -------------------------------------------
## educ         0.109***             0.059*   
##              (0.014)             (0.035)   
## fatheduc              0.282***             
##                       (0.021)              
## Constant      -0.185  9.799***    0.441    
##              (0.185)  (0.199)    (0.446)   
## -------------------------------------------
## Observations   428      753        428     
## R2            0.118    0.196      0.093    
## Adjusted R2   0.116    0.195      0.091    
## ===========================================
## Note:           *p<0.1; **p<0.05; ***p<0.0115.2 Example 15.2. Estimating the Return to Education for Men
wreg1 <- lm(educ ~ sibs, data=wage2)
wreg2 <- ivreg(lwage ~ educ | sibs, data=wage2) 
wreg3 <- lm(lwage ~educ, data = wage2)
stargazer(wreg1, wreg2, wreg3, keep.stat=c("n","rsq", "adj.rsq"), no.space=TRUE, type="text")## 
## ============================================
##                    Dependent variable:      
##              -------------------------------
##                educ            lwage        
##                 OLS    instrumental   OLS   
##                          variable           
##                 (1)        (2)        (3)   
## --------------------------------------------
## sibs         -0.228***                      
##               (0.030)                       
## educ                     0.122***   0.060***
##                          (0.026)    (0.006) 
## Constant     14.139***   5.130***   5.973***
##               (0.113)    (0.355)    (0.081) 
## --------------------------------------------
## Observations    935        935        935   
## R2             0.057      -0.009     0.097  
## Adjusted R2    0.056      -0.010     0.096  
## ============================================
## Note:            *p<0.1; **p<0.05; ***p<0.0115.3 Example 15.3. Estimating the Effect of Smoking on Birth Weight
sreg1 <- lm(packs ~ cigprice, data = bwght)
sreg2 <- ivreg(lbwght ~ packs | cigprice , data = bwght)
stargazer(sreg1, sreg2, keep.stat=c("n","rsq", "adj.rsq"), no.space=TRUE, type="text")## 
## =========================================
##                  Dependent variable:     
##              ----------------------------
##                  packs         lbwght    
##                   OLS       instrumental 
##                               variable   
##                   (1)            (2)     
## -----------------------------------------
## cigprice         0.0003                  
##                 (0.001)                  
## packs                           2.989    
##                                (8.699)   
## Constant         0.067        4.448***   
##                 (0.103)        (0.908)   
## -----------------------------------------
## Observations     1,388          1,388    
## R2               0.0001        -23.230   
## Adjusted R2      -0.001        -23.248   
## =========================================
## Note:         *p<0.1; **p<0.05; ***p<0.0115.4 Example 15.4. Using College Proximity as an IV for Education
creg1 <- lm(educ ~ nearc4 + exper + expersq + black + smsa + south + smsa66 + reg661 + reg662 + reg663 + reg664 + reg665 + reg666 + reg667 + reg668 + reg669, data=card )
creg2 <- lm(lwage  ~ educ + exper + expersq + black + smsa + south + smsa66 + reg661 + reg662 + reg663 + reg664 + reg665 + reg666 + reg667 + reg668 + reg669, data=card )
creg3 <- ivreg (lwage ~ educ + exper + expersq + black + smsa + south + smsa66 + reg661 + reg662 + reg663 + reg664 + reg665 + reg666 + reg667 + reg668 + reg669 |  nearc4 + exper + expersq + black + smsa + south + smsa66 + reg661 + reg662 + reg663 + reg664 + reg665 + reg666 + reg667 + reg668 + reg669, data=card)
stargazer(creg1, creg2, creg3, keep.stat=c("n","rsq", "adj.rsq"), no.space=TRUE, type="text")## 
## =============================================
##                    Dependent variable:       
##              --------------------------------
##                educ            lwage         
##                 OLS       OLS    instrumental
##                                    variable  
##                 (1)       (2)        (3)     
## ---------------------------------------------
## nearc4       0.320***                        
##               (0.088)                        
## educ                   0.075***    0.132**   
##                         (0.003)    (0.055)   
## exper        -0.413*** 0.085***    0.108***  
##               (0.034)   (0.007)    (0.024)   
## expersq        0.001   -0.002***  -0.002***  
##               (0.002)  (0.0003)    (0.0003)  
## black        -0.936*** -0.199***  -0.147***  
##               (0.094)   (0.018)    (0.054)   
## smsa         0.402***  0.136***    0.112***  
##               (0.105)   (0.020)    (0.032)   
## south         -0.052   -0.148***  -0.145***  
##               (0.135)   (0.026)    (0.027)   
## smsa66         0.025     0.026      0.019    
##               (0.106)   (0.019)    (0.022)   
## reg661        -0.210   -0.119***  -0.108***  
##               (0.202)   (0.039)    (0.042)   
## reg662       -0.289**   -0.022      -0.007   
##               (0.147)   (0.028)    (0.033)   
## reg663        -0.238*    0.026      0.040    
##               (0.143)   (0.027)    (0.032)   
## reg664        -0.093    -0.063*     -0.058   
##               (0.186)   (0.036)    (0.038)   
## reg665       -0.483**    0.009      0.038    
##               (0.188)   (0.036)    (0.047)   
## reg666       -0.513**    0.022      0.055    
##               (0.210)   (0.040)    (0.053)   
## reg667       -0.427**   -0.001      0.027    
##               (0.206)   (0.039)    (0.049)   
## reg668         0.314   -0.175***  -0.191***  
##               (0.242)   (0.046)    (0.051)   
## reg669                                       
##                                              
## Constant     16.849*** 4.739***    3.774***  
##               (0.211)   (0.072)    (0.935)   
## ---------------------------------------------
## Observations   3,010     3,010      3,010    
## R2             0.477     0.300      0.238    
## Adjusted R2    0.474     0.296      0.234    
## =============================================
## Note:             *p<0.1; **p<0.05; ***p<0.0115.5 Example 15.5. Return to Education for Working Women
mreg1 <- lm(educ ~ exper + expersq + fatheduc + motheduc, data=mroz)
mreg2 <- ivreg(lwage ~ educ + exper + expersq | exper + expersq + fatheduc + motheduc, data=mroz)
mreg3 <- lm( lwage ~ educ + exper + expersq, data=mroz)
linearHypothesis(mreg1, c("fatheduc=0", "motheduc=0"))## Linear hypothesis test
## 
## Hypothesis:
## fatheduc = 0
## motheduc = 0
## 
## Model 1: restricted model
## Model 2: educ ~ exper + expersq + fatheduc + motheduc
## 
##   Res.Df    RSS Df Sum of Sq      F    Pr(>F)    
## 1    750 3846.2                                  
## 2    748 2884.1  2     962.1 124.76 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1## 
## ============================================
##                    Dependent variable:      
##              -------------------------------
##                educ           lwage         
##                OLS    instrumental    OLS   
##                         variable            
##                (1)        (2)         (3)   
## --------------------------------------------
## educ                     0.061*    0.107*** 
##                         (0.031)     (0.014) 
## exper        0.085***   0.044***   0.042*** 
##              (0.026)    (0.013)     (0.013) 
## expersq      -0.002**   -0.001**   -0.001** 
##              (0.001)    (0.0004)   (0.0004) 
## fatheduc     0.185***                       
##              (0.024)                        
## motheduc     0.186***                       
##              (0.026)                        
## Constant     8.367***    0.048     -0.522***
##              (0.267)    (0.400)     (0.199) 
## --------------------------------------------
## Observations   753        428         428   
## R2            0.262      0.136       0.157  
## Adjusted R2   0.258      0.130       0.151  
## ============================================
## Note:            *p<0.1; **p<0.05; ***p<0.0115.6 Example 15.6. Using Two Test Scores as Indicators of Ability
ireg1 <- ivreg(lwage ~ educ + exper + tenure + married + south + urban + black + IQ | educ + exper + tenure + married + south + urban + black + KWW, data = wage2)
summary(ireg1)## 
## Call:
## ivreg(formula = lwage ~ educ + exper + tenure + married + south + 
##     urban + black + IQ | educ + exper + tenure + married + south + 
##     urban + black + KWW, data = wage2)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -2.095561 -0.224017  0.006572  0.234495  1.398914 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.592453   0.325781  14.097  < 2e-16 ***
## educ         0.025032   0.016607   1.507  0.13206    
## exper        0.014420   0.003321   4.342 1.56e-05 ***
## tenure       0.010456   0.002601   4.020 6.30e-05 ***
## married      0.200690   0.040678   4.934 9.56e-07 ***
## south       -0.051553   0.031128  -1.656  0.09803 .  
## urban        0.176706   0.028212   6.264 5.75e-10 ***
## black       -0.022561   0.073960  -0.305  0.76040    
## IQ           0.013047   0.004934   2.644  0.00832 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3807 on 926 degrees of freedom
## Multiple R-Squared:  0.19,   Adjusted R-squared: 0.183 
## Wald test: 36.96 on 8 and 926 DF,  p-value: < 2.2e-1615.7 Example 15.7. Return to Education for Working Women
vreg1 <- lm(educ ~ exper + expersq + fatheduc + motheduc, data=subset(mroz, mroz$inlf==1))
v2 <- resid(vreg1)
vreg2 <- ivreg(lwage ~ educ + exper + expersq + v2 | exper + expersq + fatheduc + motheduc + v2, data=subset(mroz, mroz$inlf==1))
vreg3 <- lm( lwage ~ educ + exper + expersq, data=subset(mroz, mroz$inlf==1))
stargazer(vreg1, vreg2, vreg3, keep.stat=c("n","rsq", "adj.rsq"), no.space=TRUE, type="text")## 
## ============================================
##                    Dependent variable:      
##              -------------------------------
##                educ           lwage         
##                OLS    instrumental    OLS   
##                         variable            
##                (1)        (2)         (3)   
## --------------------------------------------
## educ                    0.061**    0.107*** 
##                         (0.031)     (0.014) 
## exper         0.045     0.044***   0.042*** 
##              (0.040)    (0.013)     (0.013) 
## expersq       -0.001    -0.001**   -0.001** 
##              (0.001)    (0.0004)   (0.0004) 
## fatheduc     0.190***                       
##              (0.034)                        
## motheduc     0.158***                       
##              (0.036)                        
## v2                       0.058*             
##                         (0.035)             
## Constant     9.103***    0.048     -0.522***
##              (0.427)    (0.395)     (0.199) 
## --------------------------------------------
## Observations   428        428         428   
## R2            0.211      0.162       0.157  
## Adjusted R2   0.204      0.154       0.151  
## ============================================
## Note:            *p<0.1; **p<0.05; ***p<0.0115.8 Example 15.8. Return to Education for Working Women
. u mroz, clear . qui ivreg lwage (educ=fatheduc motheduc) exper* . predict u1, res
mroz2<- subset(mroz, !is.na(wage))
wreg1 <- ivreg(lwage ~ educ + exper + expersq | exper + expersq + fatheduc + motheduc, data=mroz2)
u1 <- resid(wreg1)
wreg2 <- lm(u1 ~ exper + expersq + fatheduc + motheduc, data=mroz2)
summary(wreg2)$r.squared * nobs(wreg2) # LM = N*Rsq## [1] 0.3780713wreg3 <- ivreg(lwage ~ educ + exper + expersq | exper + expersq + fatheduc + motheduc + huseduc, data=mroz2)
u1_h <- resid(wreg3)
wreg4 <- lm(u1_h ~ exper + expersq + fatheduc + motheduc + huseduc, data=mroz2)
summary(wreg4)$r.squared * nobs(wreg4) # LM = N*Rsq## [1] 1.115043stargazer(wreg1, wreg2, wreg3, wreg4, keep.stat=c("n","rsq", "adj.rsq"), no.space=TRUE, type="text")## 
## ========================================================
##                          Dependent variable:            
##              -------------------------------------------
##                 lwage        u1       lwage       u1_h  
##              instrumental   OLS    instrumental   OLS   
##                variable              variable           
##                  (1)        (2)        (3)        (4)   
## --------------------------------------------------------
## educ            0.061*               0.080***           
##                (0.031)               (0.022)            
## exper          0.044***   -0.00002   0.043***    0.0001 
##                (0.013)    (0.013)    (0.013)    (0.013) 
## expersq        -0.001**   0.00000    -0.001**   -0.00001
##                (0.0004)   (0.0004)   (0.0004)   (0.0004)
## fatheduc                   0.006                 0.001  
##                           (0.011)               (0.011) 
## motheduc                   -0.007                -0.010 
##                           (0.012)               (0.012) 
## huseduc                                          0.007  
##                                                 (0.011) 
## Constant        0.048      0.011      -0.187     0.009  
##                (0.400)    (0.141)    (0.285)    (0.177) 
## --------------------------------------------------------
## Observations     428        428        428        428   
## R2              0.136      0.001      0.150      0.003  
## Adjusted R2     0.130      -0.009     0.144      -0.009 
## ========================================================
## Note:                        *p<0.1; **p<0.05; ***p<0.0115.9 Example 15.9. Effect of Education on Fertility
ivregk <- ivreg(kids ~ educ + age + agesq + black + east + northcen + west + farm + othrural + town + smcity + y74 + y76 + y78 + y80 + y82 + y84 | age + agesq + black + east + northcen + west + farm + othrural + town + smcity + y74 + y76 + y78 + y80 + y82 + y84 + meduc + feduc, data=fertil1)
regk <- lm(kids ~ educ + age + agesq + black + east + northcen + west + farm + othrural + town + smcity + y74 + y76 + y78 + y80 + y82 + y84, data=fertil1)
#Endogeneity
regk2 <- lm(educ ~ meduc + feduc, data=fertil1)
v2 <- resid(regk2)
ivregk2 <- ivreg(kids ~ educ + age + agesq + black + east + northcen + west + farm + othrural + town + smcity + y74 + y76 + y78 + y80 + y82 + y84 + v2| age + agesq + black + east + northcen + west + farm + othrural + town + smcity + y74 + y76 + y78 + y80 + y82 + y84 + v2 + meduc + feduc, data=fertil1)
stargazer(ivregk, regk, regk2, ivregk2, keep.stat=c("n","rsq", "adj.rsq"), no.space=TRUE, type="text")## 
## =========================================================
##                          Dependent variable:             
##              --------------------------------------------
##                       kids            educ       kids    
##              instrumental    OLS      OLS    instrumental
##                variable                        variable  
##                  (1)         (2)      (3)        (4)     
## ---------------------------------------------------------
## educ          -0.153***   -0.128***           -0.151***  
##                (0.039)     (0.018)             (0.037)   
## age            0.524***   0.532***             0.531***  
##                (0.139)     (0.138)             (0.138)   
## agesq         -0.006***   -0.006***           -0.006***  
##                (0.002)     (0.002)             (0.002)   
## black          1.073***   1.076***             1.062***  
##                (0.174)     (0.174)             (0.175)   
## east            0.229*      0.217               0.221*   
##                (0.134)     (0.133)             (0.133)   
## northcen       0.374***   0.363***             0.372***  
##                (0.122)     (0.121)             (0.122)   
## west            0.208       0.198               0.204    
##                (0.168)     (0.167)             (0.167)   
## farm            -0.077     -0.053               -0.065   
##                (0.151)     (0.147)             (0.148)   
## othrural        -0.195     -0.163               -0.178   
##                (0.182)     (0.175)             (0.177)   
## town            0.082       0.084               0.080    
##                (0.125)     (0.125)             (0.125)   
## smcity          0.212       0.212               0.210    
##                (0.160)     (0.160)             (0.160)   
## y74             0.272       0.268               0.272    
##                (0.173)     (0.173)             (0.173)   
## y76             -0.095     -0.097               -0.098   
##                (0.179)     (0.179)             (0.179)   
## y78             -0.057     -0.069               -0.064   
##                (0.183)     (0.182)             (0.182)   
## y80             -0.053     -0.071               -0.065   
##                (0.185)     (0.183)             (0.183)   
## y82           -0.496***   -0.522***           -0.514***  
##                (0.177)     (0.172)             (0.173)   
## y84           -0.521***   -0.545***           -0.535***  
##                (0.178)     (0.175)             (0.175)   
## meduc                               0.184***             
##                                     (0.022)              
## feduc                               0.221***             
##                                     (0.025)              
## v2                                              0.029    
##                                                (0.042)   
## Constant       -7.241**   -7.742**  8.861***   -7.407**  
##                (3.137)     (3.052)  (0.203)    (3.090)   
## ---------------------------------------------------------
## Observations    1,129       1,129    1,129      1,129    
## R2              0.128       0.130    0.269      0.130    
## Adjusted R2     0.115       0.116    0.268      0.116    
## =========================================================
## Note:                         *p<0.1; **p<0.05; ***p<0.0115.10 Example 15.10. Job Training and Worker Productivity
jtrain2 <- subset(jtrain, jtrain$year==1988)
jreg1 <- lm(chrsemp ~ cgrant, data=jtrain2)
jreg2 <- ivreg(clscrap ~ chrsemp | cgrant, data=jtrain2) 
jreg3 <- ivreg(clscrap ~ chrsemp, data=jtrain2)
stargazer(jreg1, jreg2, jreg3, keep.stat=c("n","rsq", "adj.rsq"), no.space=TRUE, type="text")## 
## ==========================================
##                   Dependent variable:     
##              -----------------------------
##                chrsemp        clscrap     
##                  OLS       instrumental   
##                              variable     
##                  (1)       (2)      (3)   
## ------------------------------------------
## cgrant        27.878***                   
##                (3.129)                    
## chrsemp                  -0.014*  -0.008* 
##                          (0.008)  (0.005) 
## Constant        0.509     -0.033   -0.104 
##                (1.558)   (0.127)  (0.104) 
## ------------------------------------------
## Observations     125        45       45   
## R2              0.392     0.016    0.062  
## Adjusted R2     0.387     -0.007   0.040  
## ==========================================
## Note:          *p<0.1; **p<0.05; ***p<0.01