II Econometric Analysis Using R

Also available in Stata and Python versions

### Chapter 4 - Single-Equation and OLS Estimation

#### Example4.1

library(wooldridge)
library(stargazer)
library(AER)
library(lmtest)
library(haven)

Wage equation for married working women

lwage_hetr <- lm(lwage ~ exper + expersq + educ + age + kidslt6 + kidsge6, data=mroz)
lwage_robust <- coeftest(lwage_hetr, vcovHC(lwage_hetr, type = "HC1") )
stargazer(lwage_hetr, lwage_robust, column.labels=c("Hetrosced.", "Robust SE"), no.space=TRUE, type="text")
##
## =======================================================
##                             Dependent variable:
##                     -----------------------------------
##                              lwage
##                               OLS           coefficient
##                                                test
##                           Hetrosced.         Robust SE
##                               (1)               (2)
## -------------------------------------------------------
## exper                      0.040***          0.040***
##                             (0.013)           (0.015)
## expersq                     -0.001*           -0.001*
##                            (0.0004)          (0.0004)
## educ                       0.108***          0.108***
##                             (0.014)           (0.014)
## age                         -0.001            -0.001
##                             (0.005)           (0.006)
## kidslt6                     -0.061            -0.061
##                             (0.089)           (0.106)
## kidsge6                     -0.015            -0.015
##                             (0.028)           (0.029)
## Constant                    -0.421            -0.421
##                             (0.317)           (0.318)
## -------------------------------------------------------
## Observations                  428
## R2                           0.158
## Residual Std. Error    0.668 (df = 421)
## F Statistic         13.191*** (df = 6; 421)
## =======================================================
## Note:                       *p<0.1; **p<0.05; ***p<0.01
linearHypothesis(lwage_hetr, c("kidslt6=0", "kidsge6=0", "age=0"))
## Linear hypothesis test
##
## Hypothesis:
## kidslt6 = 0
## kidsge6 = 0
## age = 0
##
## Model 1: restricted model
## Model 2: lwage ~ exper + expersq + educ + age + kidslt6 + kidsge6
##
##   Res.Df    RSS Df Sum of Sq     F Pr(>F)
## 1    424 188.31
## 2    421 187.99  3   0.31751 0.237 0.8705

LM1 pp.64

uhat_rst <- resid(lm(lwage ~ exper + expersq + educ, data=mroz))
summary(uhat_reg <- lm(uhat_rst ~ exper + expersq + educ + age + kidslt6 + kidsge6, data=subset(mroz, !is.na(wage))))
##
## Call:
## lm(formula = uhat_rst ~ exper + expersq + educ + age + kidslt6 +
##     kidsge6, data = subset(mroz, !is.na(wage)))
##
## Residuals:
##      Min       1Q   Median       3Q      Max
## -3.08183 -0.30631  0.04606  0.37161  2.35708
##
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)
## (Intercept)  1.011e-01  3.169e-01   0.319    0.750
## exper       -1.747e-03  1.339e-02  -0.130    0.896
## expersq      2.996e-05  4.022e-04   0.074    0.941
## educ         3.423e-04  1.440e-02   0.024    0.981
## age         -1.465e-03  5.292e-03  -0.277    0.782
## kidslt6     -6.071e-02  8.876e-02  -0.684    0.494
## kidsge6     -1.459e-02  2.790e-02  -0.523    0.601
##
## Residual standard error: 0.6682 on 421 degrees of freedom
## Multiple R-squared:  0.001686,   Adjusted R-squared:  -0.01254
## F-statistic: 0.1185 on 6 and 421 DF,  p-value: 0.9942
LM <- (summary(uhat_reg)$r.squared) * (nobs(uhat_reg)) LM  ## [1] 0.7216757 pchisq(LM, df=3, lower.tail = FALSE) ## [1] 0.8680941 HOME #### Example4.1+ LM2 Continued on pp.65 uhat_rst <- resid(lm(lwage ~ exper + expersq + educ, data=mroz)) #u from the restricted model u_age <- resid(lm(age~educ+exper+expersq, data=subset(mroz, !is.na(wage))))*uhat_rst u_kidslt6 <- resid(lm(kidslt6~educ+exper+expersq, data=subset(mroz, !is.na(wage))))*uhat_rst u_kidsge6 <- resid(lm(kidsge6~educ+exper+expersq, data=subset(mroz, !is.na(wage))))*uhat_rst udata <- data.frame(cbind(u_age, u_kidslt6, u_kidsge6)) udata['one'] = 1 #Generates a vector of 1s. summary(one_r <- lm(one ~ u_age + u_kidslt6 + u_kidsge6 + 0, data=udata)) ## ## Call: ## lm(formula = one ~ u_age + u_kidslt6 + u_kidsge6 + 0, data = udata) ## ## Residuals: ## Min 1Q Median 3Q Max ## 0.7818 0.9926 0.9996 1.0055 1.2620 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) ## u_age -0.002512 0.010715 -0.234 0.815 ## u_kidslt6 -0.090505 0.169209 -0.535 0.593 ## u_kidsge6 -0.026719 0.060337 -0.443 0.658 ## ## Residual standard error: 1.003 on 425 degrees of freedom ## Multiple R-squared: 0.001196, Adjusted R-squared: -0.005854 ## F-statistic: 0.1696 on 3 and 425 DF, p-value: 0.9169 LM <- (summary(one_r)$r.squared) * (nobs(one_r))
LM 
## [1] 0.5118801
pchisq(LM, df=3, lower.tail = FALSE)
## [1] 0.9162738

HOME

#### Example4.3

Using IQ as a Proxy for Ability

nls80 <- read_dta("nls80.dta") #Data From working directory or using the bcuse command in Stata
summary(lm(lwage ~ exper + tenure + married + south + urban + black + educ, data=nls80))
##
## Call:
## lm(formula = lwage ~ exper + tenure + married + south + urban +
##     black + educ, data = nls80)
##
## Residuals:
##      Min       1Q   Median       3Q      Max
## -1.98069 -0.21996  0.00707  0.24288  1.22822
##
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)
## (Intercept)  5.395497   0.113225  47.653  < 2e-16 ***
## exper        0.014043   0.003185   4.409 1.16e-05 ***
## tenure       0.011747   0.002453   4.789 1.95e-06 ***
## married      0.199417   0.039050   5.107 3.98e-07 ***
## south       -0.090904   0.026249  -3.463 0.000558 ***
## urban        0.183912   0.026958   6.822 1.62e-11 ***
## black       -0.188350   0.037667  -5.000 6.84e-07 ***
## educ         0.065431   0.006250  10.468  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3655 on 927 degrees of freedom
## Multiple R-squared:  0.2526, Adjusted R-squared:  0.2469
## F-statistic: 44.75 on 7 and 927 DF,  p-value: < 2.2e-16
summary(lm(lwage ~ exper + tenure + married + south + urban + black + educ + iq, data=nls80))
##
## Call:
## lm(formula = lwage ~ exper + tenure + married + south + urban +
##     black + educ + iq, data = nls80)
##
## Residuals:
##      Min       1Q   Median       3Q      Max
## -2.01203 -0.22244  0.01017  0.22951  1.27478
##
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)
## (Intercept)  5.1764392  0.1280006  40.441  < 2e-16 ***
## exper        0.0141458  0.0031651   4.469 8.82e-06 ***
## tenure       0.0113951  0.0024394   4.671 3.44e-06 ***
## married      0.1997644  0.0388025   5.148 3.21e-07 ***
## south       -0.0801695  0.0262529  -3.054 0.002325 **
## urban        0.1819463  0.0267929   6.791 1.99e-11 ***
## black       -0.1431253  0.0394925  -3.624 0.000306 ***
## educ         0.0544106  0.0069285   7.853 1.12e-14 ***
## iq           0.0035591  0.0009918   3.589 0.000350 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3632 on 926 degrees of freedom
## Multiple R-squared:  0.2628, Adjusted R-squared:  0.2564
## F-statistic: 41.27 on 8 and 926 DF,  p-value: < 2.2e-16

HOME

#### Example4.4

Effects of Job Training Grants on Worker Productivity

summary(lm(lscrap ~ grant, data=subset(jtrain, jtrain$year==1988))) ## ## Call: ## lm(formula = lscrap ~ grant, data = subset(jtrain, jtrain$year ==
##     1988))
##
## Residuals:
##     Min      1Q  Median      3Q     Max
## -3.4043 -0.9536 -0.0465  0.9636  2.8103
##
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)   0.4085     0.2406   1.698   0.0954 .
## grant         0.0566     0.4056   0.140   0.8895
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.423 on 52 degrees of freedom
##   (103 observations deleted due to missingness)
## Multiple R-squared:  0.0003744,  Adjusted R-squared:  -0.01885
## F-statistic: 0.01948 on 1 and 52 DF,  p-value: 0.8895
summary(lm(lscrap ~ grant + lscrap_1, data=subset(jtrain, jtrain$year==1988))) ## ## Call: ## lm(formula = lscrap ~ grant + lscrap_1, data = subset(jtrain, ## jtrain$year == 1988))
##
## Residuals:
##     Min      1Q  Median      3Q     Max
## -1.9146 -0.1763  0.0057  0.2308  1.5991
##
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)  0.02124    0.08910   0.238   0.8126
## grant       -0.25397    0.14703  -1.727   0.0902 .
## lscrap_1     0.83116    0.04444  18.701   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5127 on 51 degrees of freedom
##   (103 observations deleted due to missingness)
## Multiple R-squared:  0.8728, Adjusted R-squared:  0.8678
## F-statistic: 174.9 on 2 and 51 DF,  p-value: < 2.2e-16

HOME

#### Example4.5

summary(lwage_rg <- lm(lwage ~ exper + tenure + married + south + urban + black + educ*iq, data=nls80)  )
##
## Call:
## lm(formula = lwage ~ exper + tenure + married + south + urban +
##     black + educ * iq, data = nls80)
##
## Residuals:
##      Min       1Q   Median       3Q      Max
## -2.00733 -0.21715  0.01177  0.23456  1.27305
##
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)
## (Intercept)  5.6482483  0.5462963  10.339  < 2e-16 ***
## exper        0.0139072  0.0031768   4.378 1.34e-05 ***
## tenure       0.0113929  0.0024397   4.670 3.46e-06 ***
## married      0.2008658  0.0388267   5.173 2.82e-07 ***
## south       -0.0802354  0.0262560  -3.056 0.002308 **
## urban        0.1835758  0.0268586   6.835 1.49e-11 ***
## black       -0.1466989  0.0397013  -3.695 0.000233 ***
## educ         0.0184559  0.0410608   0.449 0.653192
## iq          -0.0009418  0.0051625  -0.182 0.855289
## educ:iq      0.0003399  0.0003826   0.888 0.374564
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3632 on 925 degrees of freedom
## Multiple R-squared:  0.2634, Adjusted R-squared:  0.2563
## F-statistic: 36.76 on 9 and 925 DF,  p-value: < 2.2e-16
linearHypothesis(lwage_rg, c("educ:iq =0", "iq=0"))
## Linear hypothesis test
##
## Hypothesis:
## educ:iq = 0
## iq = 0
##
## Model 1: restricted model
## Model 2: lwage ~ exper + tenure + married + south + urban + black + educ *
##     iq
##
##   Res.Df    RSS Df Sum of Sq      F   Pr(>F)
## 1    927 123.82
## 2    925 122.02  2    1.8024 6.8318 0.001134 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

HOME