II Econometric Analysis Using R

Also available in Stata and Python versions

Chapter 6. Additional Single-Equation Topics

Example 6.1

Load libraries

library(wooldridge)
library(AER)
library(stargazer)
library(haven)

Testing for endogenity of educ in wage equation

df <- subset(mroz, !is.na(wage))
summary(OLS1 <- lm(educ ~ exper + expersq + motheduc + fatheduc + huseduc, data=df))
## 
## Call:
## lm(formula = educ ~ exper + expersq + motheduc + fatheduc + huseduc, 
##     data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.6882 -1.1519  0.0097  1.0640  5.7302 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  5.5383110  0.4597824  12.046  < 2e-16 ***
## exper        0.0374977  0.0343102   1.093 0.275059    
## expersq     -0.0006002  0.0010261  -0.585 0.558899    
## motheduc     0.1141532  0.0307835   3.708 0.000237 ***
## fatheduc     0.1060801  0.0295153   3.594 0.000364 ***
## huseduc      0.3752548  0.0296347  12.663  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.738 on 422 degrees of freedom
## Multiple R-squared:  0.4286, Adjusted R-squared:  0.4218 
## F-statistic:  63.3 on 5 and 422 DF,  p-value: < 2.2e-16
v2 <- resid(OLS1)
summary(OLS2 <- lm(lwage ~ exper + expersq + educ + v2, data=df)) 
## 
## Call:
## lm(formula = lwage ~ exper + expersq + educ + v2, data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -3.05797 -0.29594  0.04984  0.37935  2.34204 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.1868572  0.2835905  -0.659  0.51032    
## exper        0.0430973  0.0131810   3.270  0.00116 ** 
## expersq     -0.0008628  0.0003937  -2.192  0.02895 *  
## educ         0.0803918  0.0216362   3.716  0.00023 ***
## v2           0.0471890  0.0285519   1.653  0.09912 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6651 on 423 degrees of freedom
## Multiple R-squared:  0.1622, Adjusted R-squared:  0.1543 
## F-statistic: 20.48 on 4 and 423 DF,  p-value: 1.944e-15

HOME

Example 6.2

OLS1 <- lm(lwage ~ educ*black +exper + expersq + smsa + smsa66 + south + reg661+ reg662+ reg663+ reg664+ reg665+ reg666+ reg667+ reg668, data=card)

OLS2 <- lm(educ ~ black*nearc4 +exper + expersq + smsa + smsa66 + south +  reg661+ reg662+ reg663+ reg664+ reg665+ reg666+ reg667+ reg668, data=card)

v21 <- resid(OLS2)

card['b_educ'] <- card$educ * card$black
OLS3 <-lm(b_educ ~ exper + expersq + black*nearc4 + smsa + smsa66 + south + reg661+ reg662+ reg663+ reg664+ reg665+ reg666+ reg667+ reg668, data=card)
v22 <- resid(OLS3)

OLS4 <- lm(lwage ~ v21 + v22 + educ*black +exper + expersq + smsa + smsa66 + south + reg661+ reg662+ reg663+ reg664+ reg665+ reg666+ reg667+ +reg668, data=card)

stargazer(OLS1, OLS2, OLS3, OLS4, keep.stat=c("n", "rsq", "adj.rsq"), no.space=TRUE, type="text")
## 
## ====================================================
##                        Dependent variable:          
##              ---------------------------------------
##                lwage     educ     b_educ     lwage  
##                 (1)       (2)       (3)       (4)   
## ----------------------------------------------------
## v21                                         -0.057  
##                                             (0.055) 
## v22                                          0.007  
##                                             (0.039) 
## educ         0.071***                       0.127** 
##               (0.004)                       (0.055) 
## black        -0.419*** -0.937*** 11.550***  -0.283  
##               (0.079)   (0.148)   (0.088)   (0.487) 
## nearc4                 0.319***   -0.091            
##                         (0.098)   (0.058)           
## exper        0.082***  -0.413*** 0.053***  0.106*** 
##               (0.007)   (0.034)   (0.020)   (0.024) 
## expersq      -0.002***   0.001   -0.008*** -0.002***
##              (0.0003)   (0.002)   (0.001)  (0.0005) 
## smsa         0.134***  0.402***  0.195***  0.111*** 
##               (0.020)   (0.105)   (0.062)   (0.030) 
## smsa66         0.025     0.025     0.047     0.018  
##               (0.019)   (0.106)   (0.063)   (0.021) 
## south        -0.144***  -0.052   -0.253*** -0.142***
##               (0.026)   (0.136)   (0.080)   (0.027) 
## reg661       -0.122***  -0.210     0.162   -0.110***
##               (0.039)   (0.203)   (0.120)   (0.041) 
## reg662        -0.023    -0.289*    0.006    -0.008  
##               (0.028)   (0.147)   (0.087)   (0.032) 
## reg663         0.023    -0.238*    0.086     0.038  
##               (0.027)   (0.143)   (0.085)   (0.031) 
## reg664        -0.067*   -0.093     0.113    -0.060  
##               (0.036)   (0.186)   (0.110)   (0.037) 
## reg665         0.003   -0.483**   0.262**    0.034  
##               (0.036)   (0.188)   (0.112)   (0.048) 
## reg666         0.015   -0.513**  0.335***    0.050  
##               (0.040)   (0.210)   (0.124)   (0.054) 
## reg667        -0.007   -0.427**   0.296**    0.022  
##               (0.039)   (0.206)   (0.122)   (0.050) 
## reg668       -0.176***   0.314     0.100   -0.191***
##               (0.046)   (0.242)   (0.143)   (0.049) 
## educ:black   0.018***                        0.011  
##               (0.006)                       (0.039) 
## black:nearc4             0.003   0.875***           
##                         (0.177)   (0.105)           
## Constant     4.807***  16.849***   0.095   3.845*** 
##               (0.075)   (0.215)   (0.127)   (0.931) 
## ----------------------------------------------------
## Observations   3,010     3,010     3,010     3,010  
## R2             0.302     0.477     0.952     0.302  
## Adjusted R2    0.298     0.474     0.951     0.298  
## ====================================================
## Note:                    *p<0.1; **p<0.05; ***p<0.01
linearHypothesis(OLS4, c("v21=0", "v22=0"))
## Linear hypothesis test
## 
## Hypothesis:
## v21 = 0
## v22 = 0
## 
## Model 1: restricted model
## Model 2: lwage ~ v21 + v22 + educ * black + exper + expersq + smsa + smsa66 + 
##     south + reg661 + reg662 + reg663 + reg664 + reg665 + reg666 + 
##     reg667 + +reg668
## 
##   Res.Df    RSS Df Sum of Sq      F Pr(>F)
## 1   2993 413.82                           
## 2   2991 413.67  2   0.15005 0.5425 0.5814

IV

IV1 <- ivreg(lwage ~ educ + b_educ + black +exper + expersq + smsa + smsa66 + south + reg661+ reg662+ reg663+ reg664+ reg665+ reg666+ reg667+ +reg668 | nearc4 + black:nearc4 + black +exper + expersq + smsa + smsa66 + south + reg661+ reg662+ reg663+ reg664+ reg665+ reg666+ reg667+ +reg668, data=card)

summary(IV1)
## 
## Call:
## ivreg(formula = lwage ~ educ + b_educ + black + exper + expersq + 
##     smsa + smsa66 + south + reg661 + reg662 + reg663 + reg664 + 
##     reg665 + reg666 + reg667 + +reg668 | nearc4 + black:nearc4 + 
##     black + exper + expersq + smsa + smsa66 + south + reg661 + 
##     reg662 + reg663 + reg664 + reg665 + reg666 + reg667 + +reg668, 
##     data = card)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.84372 -0.24074  0.02335  0.25163  1.42490 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3.8449897  0.9693451   3.967 7.46e-05 ***
## educ         0.1273557  0.0569582   2.236 0.025429 *  
## b_educ       0.0109036  0.0403571   0.270 0.787042    
## black       -0.2827650  0.5064228  -0.558 0.576642    
## exper        0.1059116  0.0251806   4.206 2.67e-05 ***
## expersq     -0.0022406  0.0004823  -4.646 3.54e-06 ***
## smsa         0.1111555  0.0316396   3.513 0.000449 ***
## smsa66       0.0180009  0.0216221   0.833 0.405179    
## south       -0.1424762  0.0283768  -5.021 5.45e-07 ***
## reg661      -0.1103479  0.0427259  -2.583 0.009850 ** 
## reg662      -0.0081783  0.0330717  -0.247 0.804702    
## reg663       0.0382413  0.0327227   1.169 0.242639    
## reg664      -0.0600379  0.0382978  -1.568 0.117066    
## reg665       0.0337805  0.0499262   0.677 0.498707    
## reg666       0.0498975  0.0559401   0.892 0.372475    
## reg667       0.0216942  0.0521928   0.416 0.677692    
## reg668      -0.1908353  0.0505417  -3.776 0.000163 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.387 on 2993 degrees of freedom
## Multiple R-Squared: 0.2435,  Adjusted R-squared: 0.2395 
## Wald test: 48.15 on 16 and 2993 DF,  p-value: < 2.2e-16

HOME

Example 6.3

Overidentifying restriction in the wage equation

summary(IV1 <- ivreg(lwage ~ educ + exper + expersq | exper + expersq + motheduc + fatheduc + huseduc, data=mroz))
## 
## Call:
## ivreg(formula = lwage ~ educ + exper + expersq | exper + expersq + 
##     motheduc + fatheduc + huseduc, data = mroz)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -3.08378 -0.32135  0.03538  0.36934  2.35829 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.1868572  0.2853959  -0.655 0.512997    
## educ         0.0803918  0.0217740   3.692 0.000251 ***
## exper        0.0430973  0.0132649   3.249 0.001250 ** 
## expersq     -0.0008628  0.0003962  -2.178 0.029976 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6693 on 424 degrees of freedom
## Multiple R-Squared: 0.1495,  Adjusted R-squared: 0.1435 
## Wald test: 11.52 on 3 and 424 DF,  p-value: 2.817e-07
uhat <- resid(IV1)
uhat_reg <- lm(uhat ~ exper + expersq + motheduc + fatheduc + huseduc, data=subset(mroz, !is.na(wage)))
stargazer(uhat_reg, no.space=TRUE, type="text")
## 
## ===============================================
##                         Dependent variable:    
##                     ---------------------------
##                                uhat            
## -----------------------------------------------
## exper                         0.0001           
##                               (0.013)          
## expersq                      -0.00001          
##                              (0.0004)          
## motheduc                      -0.010           
##                               (0.012)          
## fatheduc                       0.001           
##                               (0.011)          
## huseduc                        0.007           
##                               (0.011)          
## Constant                       0.009           
##                               (0.177)          
## -----------------------------------------------
## Observations                    428            
## R2                             0.003           
## Adjusted R2                   -0.009           
## Residual Std. Error      0.670 (df = 422)      
## F Statistic             0.220 (df = 5; 422)    
## ===============================================
## Note:               *p<0.1; **p<0.05; ***p<0.01
LM <- (summary(uhat_reg)$r.squared) * (nobs(uhat_reg))
LM 
## [1] 1.115043
pchisq(LM, df=2, lower.tail = FALSE)
## [1] 0.5726266

Hetroskedasticity Robust

coeftest(IV1, vcovHC(IV1, type = "HC1") )
## 
## t test of coefficients:
## 
##                Estimate  Std. Error t value  Pr(>|t|)    
## (Intercept) -0.18685722  0.30126251 -0.6202 0.5354283    
## educ         0.08039176  0.02170330  3.7041 0.0002402 ***
## exper        0.04309732  0.01530642  2.8156 0.0050951 ** 
## expersq     -0.00086280  0.00042166 -2.0462 0.0413549 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Hetroskedasticity using LM statistic pp.137

uhat <- resid(ivreg(lwage ~ educ + exper + expersq | exper + expersq + motheduc + fatheduc + huseduc, data=mroz))
euhat <- predict(edreg<-lm(educ ~ exper + expersq + motheduc + fatheduc + huseduc, data= mroz))
rm <- resid(rmreg<-lm(motheduc~exper + expersq + euhat, data=mroz))
rf <- resid(rfreg<-lm(fatheduc~exper + expersq + euhat, data=mroz))
stargazer(edreg, rmreg,rfreg, no.space=TRUE, type="text")
## 
## ==============================================================================================
##                                                Dependent variable:                            
##                     --------------------------------------------------------------------------
##                               educ                   motheduc                 fatheduc        
##                               (1)                      (2)                      (3)           
## ----------------------------------------------------------------------------------------------
## exper                       0.053**                 -0.105***                -0.107***        
##                             (0.022)                  (0.034)                  (0.035)         
## expersq                      -0.001                   0.002                    0.001          
##                             (0.001)                  (0.001)                  (0.001)         
## motheduc                    0.130***                                                          
##                             (0.022)                                                           
## fatheduc                    0.101***                                                          
##                             (0.021)                                                           
## huseduc                     0.372***                                                          
##                             (0.022)                                                           
## euhat                                                1.425***                 1.534***        
##                                                      (0.061)                  (0.064)         
## Constant                    5.116***                -7.413***                -9.170***        
##                             (0.298)                  (0.742)                  (0.778)         
## ----------------------------------------------------------------------------------------------
## Observations                  753                      753                      753           
## R2                           0.466                    0.430                    0.442          
## Adjusted R2                  0.462                    0.427                    0.440          
## Residual Std. Error     1.672 (df = 747)         2.549 (df = 749)         2.674 (df = 749)    
## F Statistic         130.163*** (df = 5; 747) 187.974*** (df = 3; 749) 197.796*** (df = 3; 749)
## ==============================================================================================
## Note:                                                              *p<0.1; **p<0.05; ***p<0.01
df <- data.frame(cbind(lwage=mroz$lwage, uhat, euhat, rm, rf))
## Warning in cbind(lwage = mroz$lwage, uhat, euhat, rm, rf): number of rows of
## result is not a multiple of vector length (arg 2)
df['one']=1
df <- subset(df, !is.na(lwage))
summary(LMreg <- lm(one ~ uhat:rm + uhat:rf + 0, data=df))
## 
## Call:
## lm(formula = one ~ uhat:rm + uhat:rf + 0, data = df)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## 0.6041 0.9860 1.0003 1.0138 1.2486 
## 
## Coefficients:
##           Estimate Std. Error t value Pr(>|t|)
## uhat:rm -0.0270098  0.0289590  -0.933    0.352
## uhat:rf -0.0004977  0.0307894  -0.016    0.987
## 
## Residual standard error: 1.001 on 426 degrees of freedom
## Multiple R-squared:  0.00238,    Adjusted R-squared:  -0.002303 
## F-statistic: 0.5082 on 2 and 426 DF,  p-value: 0.6019
LM <- (summary(LMreg)$r.squared) * (nobs(LMreg))
LM 
## [1] 1.018745
pchisq(LM, df=2, lower.tail = FALSE)
## [1] 0.6008726

HOME

Example 6.4

Testing for neglected nonlinearities in a wage equation

nls80 <- read_dta("nls80.dta")
summary(nls_reg <- lm(lwage ~exper + tenure + married + south + urban + black + educ, data = nls80))
## 
## Call:
## lm(formula = lwage ~ exper + tenure + married + south + urban + 
##     black + educ, data = nls80)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.98069 -0.21996  0.00707  0.24288  1.22822 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  5.395497   0.113225  47.653  < 2e-16 ***
## exper        0.014043   0.003185   4.409 1.16e-05 ***
## tenure       0.011747   0.002453   4.789 1.95e-06 ***
## married      0.199417   0.039050   5.107 3.98e-07 ***
## south       -0.090904   0.026249  -3.463 0.000558 ***
## urban        0.183912   0.026958   6.822 1.62e-11 ***
## black       -0.188350   0.037667  -5.000 6.84e-07 ***
## educ         0.065431   0.006250  10.468  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3655 on 927 degrees of freedom
## Multiple R-squared:  0.2526, Adjusted R-squared:  0.2469 
## F-statistic: 44.75 on 7 and 927 DF,  p-value: < 2.2e-16
uhat <- resid(nls_reg)
wghat2 <- predict(nls_reg)^2
wghat3 <- predict(nls_reg)^3
u_reg <- lm(uhat ~ exper + tenure + married + south + urban + black + educ + wghat2 + wghat3 , data = nls80)
stargazer(u_reg, no.space=TRUE, type="text")
## 
## ===============================================
##                         Dependent variable:    
##                     ---------------------------
##                                uhat            
## -----------------------------------------------
## exper                         -0.762           
##                               (1.397)          
## tenure                        -0.638           
##                               (1.169)          
## married                       -10.826          
##                              (19.840)          
## south                          4.935           
##                               (9.045)          
## urban                         -9.985           
##                              (18.300)          
## black                         10.226           
##                              (18.739)          
## educ                          -3.552           
##                               (6.510)          
## wghat2                         8.083           
##                              (14.746)          
## wghat3                        -0.401           
##                               (0.728)          
## Constant                     -171.482          
##                              (313.246)         
## -----------------------------------------------
## Observations                    935            
## R2                            0.0004           
## Adjusted R2                   -0.009           
## Residual Std. Error      0.366 (df = 925)      
## F Statistic             0.036 (df = 9; 925)    
## ===============================================
## Note:               *p<0.1; **p<0.05; ***p<0.01
LM <- (summary(u_reg)$r.squared) * (nobs(u_reg))
LM 
## [1] 0.3288689
pchisq(LM, df=2, lower.tail = FALSE)
## [1] 0.8483734

HOME

Example 6.5

Length of Time on Workers Compensation

df = subset(injury, injury$ky==1)
summary(lm(ldurat ~ afchnge*highearn, data=df))
## 
## Call:
## lm(formula = ldurat ~ afchnge * highearn, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.9666 -0.8872  0.0042  0.8126  4.0784 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      1.125615   0.030737  36.621  < 2e-16 ***
## afchnge          0.007657   0.044717   0.171  0.86404    
## highearn         0.256479   0.047446   5.406 6.72e-08 ***
## afchnge:highearn 0.190601   0.068509   2.782  0.00542 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.269 on 5622 degrees of freedom
## Multiple R-squared:  0.02066,    Adjusted R-squared:  0.02014 
## F-statistic: 39.54 on 3 and 5622 DF,  p-value: < 2.2e-16

HOME