II Econometric Analysis Using R

Also available in Stata and Python versions

### Chapter 6. Additional Single-Equation Topics

#### Example 6.1

library(wooldridge)
library(AER)
library(stargazer)
library(haven)

Testing for endogenity of educ in wage equation

df <- subset(mroz, !is.na(wage))
summary(OLS1 <- lm(educ ~ exper + expersq + motheduc + fatheduc + huseduc, data=df))
##
## Call:
## lm(formula = educ ~ exper + expersq + motheduc + fatheduc + huseduc,
##     data = df)
##
## Residuals:
##     Min      1Q  Median      3Q     Max
## -6.6882 -1.1519  0.0097  1.0640  5.7302
##
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)
## (Intercept)  5.5383110  0.4597824  12.046  < 2e-16 ***
## exper        0.0374977  0.0343102   1.093 0.275059
## expersq     -0.0006002  0.0010261  -0.585 0.558899
## motheduc     0.1141532  0.0307835   3.708 0.000237 ***
## fatheduc     0.1060801  0.0295153   3.594 0.000364 ***
## huseduc      0.3752548  0.0296347  12.663  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.738 on 422 degrees of freedom
## Multiple R-squared:  0.4286, Adjusted R-squared:  0.4218
## F-statistic:  63.3 on 5 and 422 DF,  p-value: < 2.2e-16
v2 <- resid(OLS1)
summary(OLS2 <- lm(lwage ~ exper + expersq + educ + v2, data=df))
##
## Call:
## lm(formula = lwage ~ exper + expersq + educ + v2, data = df)
##
## Residuals:
##      Min       1Q   Median       3Q      Max
## -3.05797 -0.29594  0.04984  0.37935  2.34204
##
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.1868572  0.2835905  -0.659  0.51032
## exper        0.0430973  0.0131810   3.270  0.00116 **
## expersq     -0.0008628  0.0003937  -2.192  0.02895 *
## educ         0.0803918  0.0216362   3.716  0.00023 ***
## v2           0.0471890  0.0285519   1.653  0.09912 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6651 on 423 degrees of freedom
## Multiple R-squared:  0.1622, Adjusted R-squared:  0.1543
## F-statistic: 20.48 on 4 and 423 DF,  p-value: 1.944e-15

HOME

#### Example 6.2

OLS1 <- lm(lwage ~ educ*black +exper + expersq + smsa + smsa66 + south + reg661+ reg662+ reg663+ reg664+ reg665+ reg666+ reg667+ reg668, data=card)

OLS2 <- lm(educ ~ black*nearc4 +exper + expersq + smsa + smsa66 + south +  reg661+ reg662+ reg663+ reg664+ reg665+ reg666+ reg667+ reg668, data=card)

v21 <- resid(OLS2)

card['b_educ'] <- card$educ * card$black
OLS3 <-lm(b_educ ~ exper + expersq + black*nearc4 + smsa + smsa66 + south + reg661+ reg662+ reg663+ reg664+ reg665+ reg666+ reg667+ reg668, data=card)
v22 <- resid(OLS3)

OLS4 <- lm(lwage ~ v21 + v22 + educ*black +exper + expersq + smsa + smsa66 + south + reg661+ reg662+ reg663+ reg664+ reg665+ reg666+ reg667+ +reg668, data=card)

stargazer(OLS1, OLS2, OLS3, OLS4, keep.stat=c("n", "rsq", "adj.rsq"), no.space=TRUE, type="text")
##
## ====================================================
##                        Dependent variable:
##              ---------------------------------------
##                lwage     educ     b_educ     lwage
##                 (1)       (2)       (3)       (4)
## ----------------------------------------------------
## v21                                         -0.057
##                                             (0.055)
## v22                                          0.007
##                                             (0.039)
## educ         0.071***                       0.127**
##               (0.004)                       (0.055)
## black        -0.419*** -0.937*** 11.550***  -0.283
##               (0.079)   (0.148)   (0.088)   (0.487)
## nearc4                 0.319***   -0.091
##                         (0.098)   (0.058)
## exper        0.082***  -0.413*** 0.053***  0.106***
##               (0.007)   (0.034)   (0.020)   (0.024)
## expersq      -0.002***   0.001   -0.008*** -0.002***
##              (0.0003)   (0.002)   (0.001)  (0.0005)
## smsa         0.134***  0.402***  0.195***  0.111***
##               (0.020)   (0.105)   (0.062)   (0.030)
## smsa66         0.025     0.025     0.047     0.018
##               (0.019)   (0.106)   (0.063)   (0.021)
## south        -0.144***  -0.052   -0.253*** -0.142***
##               (0.026)   (0.136)   (0.080)   (0.027)
## reg661       -0.122***  -0.210     0.162   -0.110***
##               (0.039)   (0.203)   (0.120)   (0.041)
## reg662        -0.023    -0.289*    0.006    -0.008
##               (0.028)   (0.147)   (0.087)   (0.032)
## reg663         0.023    -0.238*    0.086     0.038
##               (0.027)   (0.143)   (0.085)   (0.031)
## reg664        -0.067*   -0.093     0.113    -0.060
##               (0.036)   (0.186)   (0.110)   (0.037)
## reg665         0.003   -0.483**   0.262**    0.034
##               (0.036)   (0.188)   (0.112)   (0.048)
## reg666         0.015   -0.513**  0.335***    0.050
##               (0.040)   (0.210)   (0.124)   (0.054)
## reg667        -0.007   -0.427**   0.296**    0.022
##               (0.039)   (0.206)   (0.122)   (0.050)
## reg668       -0.176***   0.314     0.100   -0.191***
##               (0.046)   (0.242)   (0.143)   (0.049)
## educ:black   0.018***                        0.011
##               (0.006)                       (0.039)
## black:nearc4             0.003   0.875***
##                         (0.177)   (0.105)
## Constant     4.807***  16.849***   0.095   3.845***
##               (0.075)   (0.215)   (0.127)   (0.931)
## ----------------------------------------------------
## Observations   3,010     3,010     3,010     3,010
## R2             0.302     0.477     0.952     0.302
## Adjusted R2    0.298     0.474     0.951     0.298
## ====================================================
## Note:                    *p<0.1; **p<0.05; ***p<0.01
linearHypothesis(OLS4, c("v21=0", "v22=0"))
## Linear hypothesis test
##
## Hypothesis:
## v21 = 0
## v22 = 0
##
## Model 1: restricted model
## Model 2: lwage ~ v21 + v22 + educ * black + exper + expersq + smsa + smsa66 +
##     south + reg661 + reg662 + reg663 + reg664 + reg665 + reg666 +
##     reg667 + +reg668
##
##   Res.Df    RSS Df Sum of Sq      F Pr(>F)
## 1   2993 413.82
## 2   2991 413.67  2   0.15005 0.5425 0.5814

IV

IV1 <- ivreg(lwage ~ educ + b_educ + black +exper + expersq + smsa + smsa66 + south + reg661+ reg662+ reg663+ reg664+ reg665+ reg666+ reg667+ +reg668 | nearc4 + black:nearc4 + black +exper + expersq + smsa + smsa66 + south + reg661+ reg662+ reg663+ reg664+ reg665+ reg666+ reg667+ +reg668, data=card)

summary(IV1)
##
## Call:
## ivreg(formula = lwage ~ educ + b_educ + black + exper + expersq +
##     smsa + smsa66 + south + reg661 + reg662 + reg663 + reg664 +
##     reg665 + reg666 + reg667 + +reg668 | nearc4 + black:nearc4 +
##     black + exper + expersq + smsa + smsa66 + south + reg661 +
##     reg662 + reg663 + reg664 + reg665 + reg666 + reg667 + +reg668,
##     data = card)
##
## Residuals:
##      Min       1Q   Median       3Q      Max
## -1.84372 -0.24074  0.02335  0.25163  1.42490
##
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)
## (Intercept)  3.8449897  0.9693451   3.967 7.46e-05 ***
## educ         0.1273557  0.0569582   2.236 0.025429 *
## b_educ       0.0109036  0.0403571   0.270 0.787042
## black       -0.2827650  0.5064228  -0.558 0.576642
## exper        0.1059116  0.0251806   4.206 2.67e-05 ***
## expersq     -0.0022406  0.0004823  -4.646 3.54e-06 ***
## smsa         0.1111555  0.0316396   3.513 0.000449 ***
## smsa66       0.0180009  0.0216221   0.833 0.405179
## south       -0.1424762  0.0283768  -5.021 5.45e-07 ***
## reg661      -0.1103479  0.0427259  -2.583 0.009850 **
## reg662      -0.0081783  0.0330717  -0.247 0.804702
## reg663       0.0382413  0.0327227   1.169 0.242639
## reg664      -0.0600379  0.0382978  -1.568 0.117066
## reg665       0.0337805  0.0499262   0.677 0.498707
## reg666       0.0498975  0.0559401   0.892 0.372475
## reg667       0.0216942  0.0521928   0.416 0.677692
## reg668      -0.1908353  0.0505417  -3.776 0.000163 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.387 on 2993 degrees of freedom
## Multiple R-Squared: 0.2435,  Adjusted R-squared: 0.2395
## Wald test: 48.15 on 16 and 2993 DF,  p-value: < 2.2e-16

HOME

#### Example 6.3

Overidentifying restriction in the wage equation

summary(IV1 <- ivreg(lwage ~ educ + exper + expersq | exper + expersq + motheduc + fatheduc + huseduc, data=mroz))
##
## Call:
## ivreg(formula = lwage ~ educ + exper + expersq | exper + expersq +
##     motheduc + fatheduc + huseduc, data = mroz)
##
## Residuals:
##      Min       1Q   Median       3Q      Max
## -3.08378 -0.32135  0.03538  0.36934  2.35829
##
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.1868572  0.2853959  -0.655 0.512997
## educ         0.0803918  0.0217740   3.692 0.000251 ***
## exper        0.0430973  0.0132649   3.249 0.001250 **
## expersq     -0.0008628  0.0003962  -2.178 0.029976 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6693 on 424 degrees of freedom
## Multiple R-Squared: 0.1495,  Adjusted R-squared: 0.1435
## Wald test: 11.52 on 3 and 424 DF,  p-value: 2.817e-07
uhat <- resid(IV1)
uhat_reg <- lm(uhat ~ exper + expersq + motheduc + fatheduc + huseduc, data=subset(mroz, !is.na(wage)))
stargazer(uhat_reg, no.space=TRUE, type="text")
##
## ===============================================
##                         Dependent variable:
##                     ---------------------------
##                                uhat
## -----------------------------------------------
## exper                         0.0001
##                               (0.013)
## expersq                      -0.00001
##                              (0.0004)
## motheduc                      -0.010
##                               (0.012)
## fatheduc                       0.001
##                               (0.011)
## huseduc                        0.007
##                               (0.011)
## Constant                       0.009
##                               (0.177)
## -----------------------------------------------
## Observations                    428
## R2                             0.003
## Residual Std. Error      0.670 (df = 422)
## F Statistic             0.220 (df = 5; 422)
## ===============================================
## Note:               *p<0.1; **p<0.05; ***p<0.01
LM <- (summary(uhat_reg)$r.squared) * (nobs(uhat_reg)) LM ## [1] 1.115043 pchisq(LM, df=2, lower.tail = FALSE) ## [1] 0.5726266 Hetroskedasticity Robust coeftest(IV1, vcovHC(IV1, type = "HC1") ) ## ## t test of coefficients: ## ## Estimate Std. Error t value Pr(>|t|) ## (Intercept) -0.18685722 0.30126251 -0.6202 0.5354283 ## educ 0.08039176 0.02170330 3.7041 0.0002402 *** ## exper 0.04309732 0.01530642 2.8156 0.0050951 ** ## expersq -0.00086280 0.00042166 -2.0462 0.0413549 * ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 Hetroskedasticity using LM statistic pp.137 uhat <- resid(ivreg(lwage ~ educ + exper + expersq | exper + expersq + motheduc + fatheduc + huseduc, data=mroz)) euhat <- predict(edreg<-lm(educ ~ exper + expersq + motheduc + fatheduc + huseduc, data= mroz)) rm <- resid(rmreg<-lm(motheduc~exper + expersq + euhat, data=mroz)) rf <- resid(rfreg<-lm(fatheduc~exper + expersq + euhat, data=mroz)) stargazer(edreg, rmreg,rfreg, no.space=TRUE, type="text") ## ## ============================================================================================== ## Dependent variable: ## -------------------------------------------------------------------------- ## educ motheduc fatheduc ## (1) (2) (3) ## ---------------------------------------------------------------------------------------------- ## exper 0.053** -0.105*** -0.107*** ## (0.022) (0.034) (0.035) ## expersq -0.001 0.002 0.001 ## (0.001) (0.001) (0.001) ## motheduc 0.130*** ## (0.022) ## fatheduc 0.101*** ## (0.021) ## huseduc 0.372*** ## (0.022) ## euhat 1.425*** 1.534*** ## (0.061) (0.064) ## Constant 5.116*** -7.413*** -9.170*** ## (0.298) (0.742) (0.778) ## ---------------------------------------------------------------------------------------------- ## Observations 753 753 753 ## R2 0.466 0.430 0.442 ## Adjusted R2 0.462 0.427 0.440 ## Residual Std. Error 1.672 (df = 747) 2.549 (df = 749) 2.674 (df = 749) ## F Statistic 130.163*** (df = 5; 747) 187.974*** (df = 3; 749) 197.796*** (df = 3; 749) ## ============================================================================================== ## Note: *p<0.1; **p<0.05; ***p<0.01 df <- data.frame(cbind(lwage=mroz$lwage, uhat, euhat, rm, rf))
## Warning in cbind(lwage = mroz$lwage, uhat, euhat, rm, rf): number of rows of ## result is not a multiple of vector length (arg 2) df['one']=1 df <- subset(df, !is.na(lwage)) summary(LMreg <- lm(one ~ uhat:rm + uhat:rf + 0, data=df)) ## ## Call: ## lm(formula = one ~ uhat:rm + uhat:rf + 0, data = df) ## ## Residuals: ## Min 1Q Median 3Q Max ## 0.6041 0.9860 1.0003 1.0138 1.2486 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) ## uhat:rm -0.0270098 0.0289590 -0.933 0.352 ## uhat:rf -0.0004977 0.0307894 -0.016 0.987 ## ## Residual standard error: 1.001 on 426 degrees of freedom ## Multiple R-squared: 0.00238, Adjusted R-squared: -0.002303 ## F-statistic: 0.5082 on 2 and 426 DF, p-value: 0.6019 LM <- (summary(LMreg)$r.squared) * (nobs(LMreg))
LM
## [1] 1.018745
pchisq(LM, df=2, lower.tail = FALSE)
## [1] 0.6008726

HOME

#### Example 6.4

Testing for neglected nonlinearities in a wage equation

summary(nls_reg <- lm(lwage ~exper + tenure + married + south + urban + black + educ, data = nls80))
##
## Call:
## lm(formula = lwage ~ exper + tenure + married + south + urban +
##     black + educ, data = nls80)
##
## Residuals:
##      Min       1Q   Median       3Q      Max
## -1.98069 -0.21996  0.00707  0.24288  1.22822
##
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)
## (Intercept)  5.395497   0.113225  47.653  < 2e-16 ***
## exper        0.014043   0.003185   4.409 1.16e-05 ***
## tenure       0.011747   0.002453   4.789 1.95e-06 ***
## married      0.199417   0.039050   5.107 3.98e-07 ***
## south       -0.090904   0.026249  -3.463 0.000558 ***
## urban        0.183912   0.026958   6.822 1.62e-11 ***
## black       -0.188350   0.037667  -5.000 6.84e-07 ***
## educ         0.065431   0.006250  10.468  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3655 on 927 degrees of freedom
## Multiple R-squared:  0.2526, Adjusted R-squared:  0.2469
## F-statistic: 44.75 on 7 and 927 DF,  p-value: < 2.2e-16
uhat <- resid(nls_reg)
wghat2 <- predict(nls_reg)^2
wghat3 <- predict(nls_reg)^3
u_reg <- lm(uhat ~ exper + tenure + married + south + urban + black + educ + wghat2 + wghat3 , data = nls80)
stargazer(u_reg, no.space=TRUE, type="text")
##
## ===============================================
##                         Dependent variable:
##                     ---------------------------
##                                uhat
## -----------------------------------------------
## exper                         -0.762
##                               (1.397)
## tenure                        -0.638
##                               (1.169)
## married                       -10.826
##                              (19.840)
## south                          4.935
##                               (9.045)
## urban                         -9.985
##                              (18.300)
## black                         10.226
##                              (18.739)
## educ                          -3.552
##                               (6.510)
## wghat2                         8.083
##                              (14.746)
## wghat3                        -0.401
##                               (0.728)
## Constant                     -171.482
##                              (313.246)
## -----------------------------------------------
## Observations                    935
## R2                            0.0004
## Residual Std. Error      0.366 (df = 925)
## F Statistic             0.036 (df = 9; 925)
## ===============================================
## Note:               *p<0.1; **p<0.05; ***p<0.01
LM <- (summary(u_reg)$r.squared) * (nobs(u_reg)) LM ## [1] 0.3288689 pchisq(LM, df=2, lower.tail = FALSE) ## [1] 0.8483734 HOME #### Example 6.5 Length of Time on Workers Compensation df = subset(injury, injury$ky==1)
summary(lm(ldurat ~ afchnge*highearn, data=df))
##
## Call:
## lm(formula = ldurat ~ afchnge * highearn, data = df)
##
## Residuals:
##     Min      1Q  Median      3Q     Max
## -2.9666 -0.8872  0.0042  0.8126  4.0784
##
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)
## (Intercept)      1.125615   0.030737  36.621  < 2e-16 ***
## afchnge          0.007657   0.044717   0.171  0.86404
## highearn         0.256479   0.047446   5.406 6.72e-08 ***
## afchnge:highearn 0.190601   0.068509   2.782  0.00542 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.269 on 5622 degrees of freedom
## Multiple R-squared:  0.02066,    Adjusted R-squared:  0.02014
## F-statistic: 39.54 on 3 and 5622 DF,  p-value: < 2.2e-16

HOME