Introductory Econometrics Using R

Also covered using Python and Stata

library(wooldridge)
library(stargazer)
library(plm)
library(lmtest)
library(car)
options(width=120)

#### Example 13.1. Women’s Fertility over Time

fert_reg <- lm(kids ~ educ + age + agesq + black + east + northcen + west + farm + othrural + town + smcity + y74 + y76 + y78 + y80 + y82 + y84, data=fertil1)
summary(fert_reg)
##
## Call:
## lm(formula = kids ~ educ + age + agesq + black + east + northcen +
##     west + farm + othrural + town + smcity + y74 + y76 + y78 +
##     y80 + y82 + y84, data = fertil1)
##
## Residuals:
##     Min      1Q  Median      3Q     Max
## -3.9878 -1.0086 -0.0767  0.9331  4.6548
##
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.742457   3.051767  -2.537 0.011315 *
## educ        -0.128427   0.018349  -6.999 4.44e-12 ***
## age          0.532135   0.138386   3.845 0.000127 ***
## agesq       -0.005804   0.001564  -3.710 0.000217 ***
## black        1.075658   0.173536   6.198 8.02e-10 ***
## east         0.217324   0.132788   1.637 0.101992
## northcen     0.363114   0.120897   3.004 0.002729 **
## west         0.197603   0.166913   1.184 0.236719
## farm        -0.052557   0.147190  -0.357 0.721105
## othrural    -0.162854   0.175442  -0.928 0.353481
## town         0.084353   0.124531   0.677 0.498314
## smcity       0.211879   0.160296   1.322 0.186507
## y74          0.268183   0.172716   1.553 0.120771
## y76         -0.097379   0.179046  -0.544 0.586633
## y78         -0.068666   0.181684  -0.378 0.705544
## y80         -0.071305   0.182771  -0.390 0.696511
## y82         -0.522484   0.172436  -3.030 0.002502 **
## y84         -0.545166   0.174516  -3.124 0.001831 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.555 on 1111 degrees of freedom
## Multiple R-squared:  0.1295, Adjusted R-squared:  0.1162
## F-statistic: 9.723 on 17 and 1111 DF,  p-value: < 2.2e-16
linearHypothesis(fert_reg, c("y74=0", "y76=0", "y78=0", "y80=0", "y82=0", "y84=0"))
## Linear hypothesis test
##
## Hypothesis:
## y74 = 0
## y76 = 0
## y78 = 0
## y80 = 0
## y82 = 0
## y84 = 0
##
## Model 1: restricted model
## Model 2: kids ~ educ + age + agesq + black + east + northcen + west +
##     farm + othrural + town + smcity + y74 + y76 + y78 + y80 +
##     y82 + y84
##
##   Res.Df    RSS Df Sum of Sq      F    Pr(>F)
## 1   1117 2771.0
## 2   1111 2685.9  6    85.139 5.8695 4.855e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
bptest(fert_reg)
##
##  studentized Breusch-Pagan test
##
## data:  fert_reg
## BP = 55.315, df = 17, p-value = 6.098e-06
u2 <- resid(fert_reg)**2
u2_reg <- lm(u2 ~ educ + age + agesq + black + east + northcen + west + farm + othrural + town + smcity + y74 + y76 + y78 + y80 + y82 + y84, data=fertil1)
LM <- nobs(u2_reg)*summary(u2_reg)$r.squared LM ## [1] 55.31537 #### Example 13.2. Changes in the Return to Education and the Gender Wage Gap wage_reg <- lm(lwage ~ y85 + educ + y85educ + exper + expersq + union + female + y85fem, data=cps78_85) stargazer(wage_reg, no.space=TRUE, type="text") ## ## =============================================== ## Dependent variable: ## --------------------------- ## lwage ## ----------------------------------------------- ## y85 0.118 ## (0.124) ## educ 0.075*** ## (0.007) ## y85educ 0.018** ## (0.009) ## exper 0.030*** ## (0.004) ## expersq -0.0004*** ## (0.0001) ## union 0.202*** ## (0.030) ## female -0.317*** ## (0.037) ## y85fem 0.085* ## (0.051) ## Constant 0.459*** ## (0.093) ## ----------------------------------------------- ## Observations 1,084 ## R2 0.426 ## Adjusted R2 0.422 ## Residual Std. Error 0.413 (df = 1075) ## F Statistic 99.804*** (df = 8; 1075) ## =============================================== ## Note: *p<0.1; **p<0.05; ***p<0.01 wage_reg$coef[3]*100 #Return to Education in 1978 (%)
##     educ
## 7.472091
(wage_reg$coef[3] + wage_reg$coef[4])*100 #Return to Education in 1985 (%)
##     educ
## 9.318145

#### Example 13.3. Effect of a Garbage Incinerator’s Location on Housing Prices

garb81_reg <- lm(rprice ~ nearinc, data=subset(kielmc, kielmc$year==1981)) garb78_reg <- lm(rprice ~ nearinc, data=subset(kielmc, kielmc$year==1978))
stargazer(garb81_reg, garb78_reg,  column.labels=c("Garb81", "Garb78"), no.space=TRUE, type="text")
##
## ===================================================================
##                                   Dependent variable:
##                     -----------------------------------------------
##                                         rprice
##                             Garb81                  Garb78
##                               (1)                     (2)
## -------------------------------------------------------------------
## nearinc                 -30,688.270***          -18,824.370***
##                           (5,827.709)             (4,744.594)
## Constant                101,307.500***           82,517.230***
##                           (3,093.027)             (2,653.790)
## -------------------------------------------------------------------
## Observations                  142                     179
## R2                           0.165                   0.082
## Residual Std. Error  31,238.040 (df = 140)   29,431.960 (df = 177)
## F Statistic         27.730*** (df = 1; 140) 15.741*** (df = 1; 177)
## ===================================================================
## Note:                                   *p<0.1; **p<0.05; ***p<0.01
##### Table 13.2 Effects of Incinerator Location on Housing Prices (rprice)
One <- lm(rprice ~ y81 + nearinc + y81nrinc, data=subset(kielmc))
Two <- lm(rprice ~ y81 + nearinc + y81nrinc + age + agesq, data=subset(kielmc))
Three <- lm(rprice ~ y81 + nearinc + y81nrinc + age + agesq + intst + land + area + rooms + baths, data=subset(kielmc))
stargazer(One, Two, Three,  column.labels=c("One", "Two", "Three"), no.space=TRUE, type="text")
##
## ============================================================================================
##                                               Dependent variable:
##                     ------------------------------------------------------------------------
##                                                      rprice
##                               One                     Two                    Three
##                               (1)                     (2)                     (3)
## --------------------------------------------------------------------------------------------
## y81                      18,790.290***           21,321.040***           13,928.480***
##                           (4,050.065)             (3,443.631)             (2,798.747)
## nearinc                 -18,824.370***            9,397.936*               3,780.337
##                           (4,875.322)             (4,812.222)             (4,453.415)
## y81nrinc                  -11,863.900           -21,920.270***           -14,177.930***
##                           (7,456.646)             (6,359.745)             (4,987.267)
## age                                              -1,494.424***            -739.451***
##                                                    (131.860)               (131.127)
## agesq                                              8.691***                 3.453***
##                                                     (0.848)                 (0.813)
## intst                                                                      -0.539***
##                                                                             (0.196)
## land                                                                        0.141***
##                                                                             (0.031)
## area                                                                       18.086***
##                                                                             (2.306)
## rooms                                                                     3,304.227**
##                                                                           (1,661.248)
## baths                                                                     6,977.317***
##                                                                           (2,581.321)
## Constant                 82,517.230***           89,116.540***             13,807.670
##                           (2,726.910)             (2,406.051)             (11,166.590)
## --------------------------------------------------------------------------------------------
## Observations                  321                     321                     321
## R2                           0.174                   0.414                   0.660
## Adjusted R2                  0.166                   0.405                   0.649
## Residual Std. Error  30,242.900 (df = 317)   25,543.290 (df = 315)   19,619.020 (df = 310)
## F Statistic         22.251*** (df = 3; 317) 44.591*** (df = 5; 315) 60.189*** (df = 10; 310)
## ============================================================================================
## Note:                                                            *p<0.1; **p<0.05; ***p<0.01
lOne <- lm(lprice ~ y81 + nearinc + y81nrinc, data=subset(kielmc))
lThree <- lm(lprice ~ y81 + nearinc + y81nrinc + age + agesq + lintst + lland + larea + rooms + baths, data=subset(kielmc))
stargazer(lOne, lThree,  column.labels=c("Oneln", "Threeln"), no.space=TRUE, type="text")
##
## =====================================================================
##                                    Dependent variable:
##                     -------------------------------------------------
##                                          lprice
##                              Oneln                   Threeln
##                               (1)                      (2)
## ---------------------------------------------------------------------
## y81                        0.457***                 0.426***
##                             (0.045)                  (0.028)
## nearinc                    -0.340***                  0.032
##                             (0.055)                  (0.047)
## y81nrinc                    -0.063                  -0.132**
##                             (0.083)                  (0.052)
## age                                                 -0.008***
##                                                      (0.001)
## agesq                                              0.00004***
##                                                     (0.00001)
## lintst                                               -0.061*
##                                                      (0.032)
## lland                                               0.100***
##                                                      (0.024)
## larea                                               0.351***
##                                                      (0.051)
## rooms                                               0.047***
##                                                      (0.017)
## baths                                               0.094***
##                                                      (0.028)
## Constant                   11.285***                7.652***
##                             (0.031)                  (0.416)
## ---------------------------------------------------------------------
## Observations                  321                      321
## R2                           0.409                    0.790
## Residual Std. Error    0.338 (df = 317)         0.204 (df = 310)
## F Statistic         73.149*** (df = 3; 317) 116.909*** (df = 10; 310)
## =====================================================================
## Note:                                     *p<0.1; **p<0.05; ***p<0.01

#### Example 13.4. Effect of Worker Compensation Laws on Weeks out of Work

summary(lm(ldurat~ afchnge + highearn + afhigh, data=subset(injury, injury$ky==1))) ## ## Call: ## lm(formula = ldurat ~ afchnge + highearn + afhigh, data = subset(injury, ## injury$ky == 1))
##
## Residuals:
##     Min      1Q  Median      3Q     Max
## -2.9666 -0.8872  0.0042  0.8126  4.0784
##
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.125615   0.030737  36.621  < 2e-16 ***
## afchnge     0.007657   0.044717   0.171  0.86404
## highearn    0.256479   0.047446   5.406 6.72e-08 ***
## afhigh      0.190601   0.068509   2.782  0.00542 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.269 on 5622 degrees of freedom
## Multiple R-squared:  0.02066,    Adjusted R-squared:  0.02014
## F-statistic: 39.54 on 3 and 5622 DF,  p-value: < 2.2e-16

#### Example 13.5. Sleeping versus Working

summary(lm(cslpnap ~ ctotwrk + ceduc + cmarr + cyngkid + cgdhlth, data=slp75_81))
##
## Call:
## lm(formula = cslpnap ~ ctotwrk + ceduc + cmarr + cyngkid + cgdhlth,
##     data = slp75_81)
##
## Residuals:
##     Min      1Q  Median      3Q     Max
## -2454.2  -307.2    79.8   334.4  2037.9
##
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)
## (Intercept) -92.63404   45.86590  -2.020   0.0446 *
## ctotwrk      -0.22667    0.03605  -6.287 1.58e-09 ***
## ceduc        -0.02447   48.75938  -0.001   0.9996
## cmarr       104.21395   92.85536   1.122   0.2629
## cyngkid      94.66540   87.65252   1.080   0.2813
## cgdhlth      87.57785   76.59913   1.143   0.2541
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 598.6 on 233 degrees of freedom
## Multiple R-squared:  0.1495, Adjusted R-squared:  0.1313
## F-statistic: 8.191 on 5 and 233 DF,  p-value: 3.827e-07

#### Distributed Lag of Crime Rate on Clear-Up Rate

summary(lm(clcrime ~ cclrprc1 + cclrprc2, data=crime3)) 
##
## Call:
## lm(formula = clcrime ~ cclrprc1 + cclrprc2, data = crime3)
##
## Residuals:
##     Min      1Q  Median      3Q     Max
## -1.0335 -0.2351  0.0299  0.2178  0.8263
##
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)
## (Intercept)  0.085656   0.063782   1.343   0.1854
## cclrprc1    -0.004048   0.004720  -0.858   0.3952
## cclrprc2    -0.013197   0.005195  -2.540   0.0142 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3446 on 50 degrees of freedom
##   (53 observations deleted due to missingness)
## Multiple R-squared:  0.1933, Adjusted R-squared:  0.1611
## F-statistic: 5.992 on 2 and 50 DF,  p-value: 0.004649

#### Example 13.7. Effect of Drunk Driving Laws on Traffic Fatalities

summary(lm(cdthrte ~ copen + cadmn, data=traffic1)) 
##
## Call:
## lm(formula = cdthrte ~ copen + cadmn, data = traffic1)
##
## Residuals:
##      Min       1Q   Median       3Q      Max
## -1.25261 -0.14337 -0.00321  0.19679  0.79679
##
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.49679    0.05243  -9.476 1.43e-12 ***
## copen       -0.41968    0.20559  -2.041   0.0467 *
## cadmn       -0.15060    0.11682  -1.289   0.2035
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3435 on 48 degrees of freedom
## Multiple R-squared:  0.1187, Adjusted R-squared:  0.08194
## F-statistic: 3.231 on 2 and 48 DF,  p-value: 0.04824

#### Example 13.8. Effect of Enterprise Zones on Unemployment Claims

tsezunem <- ts(ezunem)
ezon_reg <- lm(guclms ~ d82 + d83 + d84 + d85 + d86 + d87 + d88 + cez, data=ezunem)
summary(ezon_reg)
##
## Call:
## lm(formula = guclms ~ d82 + d83 + d84 + d85 + d86 + d87 + d88 +
##     cez, data = ezunem)
##
## Residuals:
##     Min      1Q  Median      3Q     Max
## -0.4925 -0.1427 -0.0092  0.1495  0.6062
##
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.32163    0.04606  -6.982 6.55e-11 ***
## d82          0.77876    0.06514  11.954  < 2e-16 ***
## d83         -0.03312    0.06514  -0.508   0.6118
## d84         -0.01714    0.06855  -0.250   0.8029
## d85          0.32308    0.06668   4.845 2.87e-06 ***
## d86          0.29215    0.06514   4.485 1.35e-05 ***
## d87          0.05395    0.06514   0.828   0.4088
## d88         -0.01705    0.06514  -0.262   0.7938
## cez         -0.18188    0.07819  -2.326   0.0212 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2161 on 167 degrees of freedom
##   (22 observations deleted due to missingness)
## Multiple R-squared:  0.623,  Adjusted R-squared:  0.6049
## F-statistic:  34.5 on 8 and 167 DF,  p-value: < 2.2e-16
(exp(ezon_reg\$coef[9])-1)*100
##       cez
## -16.62966
bptest(ezon_reg)
##
##  studentized Breusch-Pagan test
##
## data:  ezon_reg
## BP = 6.914, df = 8, p-value = 0.5459

#### *Example 13.9. County Crime Rates in North Carolina

crime4p <- pdata.frame(crime4, index = c("county", "year"))
pdim(crime4p)
## Balanced Panel: n = 90, T = 7, N = 630
crime_hetr <- plm(clcrmrte ~  d83 + d84 + d85 + d86 + d87 + clprbarr + clprbcon + clprbpri + clavgsen + clpolpc + 1, data=crime4)
stargazer(crime_hetr, single.row = TRUE, no.space=TRUE, type="text")
##
## ========================================
##                  Dependent variable:
##              ---------------------------
##                       clcrmrte
## ----------------------------------------
## d83               -0.100*** (0.025)
## d84                -0.048* (0.025)
## d85                -0.005 (0.025)
## d86                 0.028 (0.026)
## d87                 0.041 (0.026)
## clprbarr          -0.330*** (0.033)
## clprbcon          -0.240*** (0.020)
## clprbpri          -0.164*** (0.028)
## clavgsen           -0.023 (0.024)
## clpolpc           0.411*** (0.029)
## ----------------------------------------
## Observations             540
## R2                      0.448
## F Statistic   35.654*** (df = 10; 440)
## ========================================
## Note:        *p<0.1; **p<0.05; ***p<0.01
summary(crime_hetr, robust = TRUE)
## Oneway (individual) effect Within Model
##
## Call:
## plm(formula = clcrmrte ~ d83 + d84 + d85 + d86 + d87 + clprbarr +
##     clprbcon + clprbpri + clavgsen + clpolpc + 1, data = crime4)
##
## Balanced Panel: n = 90, T = 6, N = 540
##
## Residuals:
##       Min.    1st Qu.     Median    3rd Qu.       Max.
## -0.6862980 -0.0714434 -0.0017868  0.0751380  0.6628832
##
## Coefficients:
##            Estimate Std. Error  t-value  Pr(>|t|)
## d83      -0.1004876  0.0254125  -3.9543 8.945e-05 ***
## d84      -0.0483243  0.0249774  -1.9347   0.05366 .
## d85      -0.0046755  0.0249772  -0.1872   0.85160
## d86       0.0278299  0.0256981   1.0830   0.27942
## d87       0.0405086  0.0259853   1.5589   0.11974
## clprbarr -0.3298869  0.0329905  -9.9995 < 2.2e-16 ***
## clprbcon -0.2401652  0.0199024 -12.0671 < 2.2e-16 ***
## clprbpri -0.1638598  0.0280913  -5.8331 1.055e-08 ***
## clavgsen -0.0233595  0.0238217  -0.9806   0.32733
## clpolpc   0.4107709  0.0293047  14.0172 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares:    21.392
## Residual Sum of Squares: 11.817
## R-Squared:      0.44761
## F-statistic: 35.6545 on 10 and 440 DF, p-value: < 2.22e-16
bptest(crime_hetr)
##
## BP = 10.93, df = 10, p-value = 0.363