Introductory Econometrics Using R

Also covered using Python and Stata

library(wooldridge)
library(stargazer)
library(plm)
library(lmtest)
library(car)
options(width=120)

Example 13.1. Women’s Fertility over Time

fert_reg <- lm(kids ~ educ + age + agesq + black + east + northcen + west + farm + othrural + town + smcity + y74 + y76 + y78 + y80 + y82 + y84, data=fertil1)
summary(fert_reg)
## 
## Call:
## lm(formula = kids ~ educ + age + agesq + black + east + northcen + 
##     west + farm + othrural + town + smcity + y74 + y76 + y78 + 
##     y80 + y82 + y84, data = fertil1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.9878 -1.0086 -0.0767  0.9331  4.6548 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -7.742457   3.051767  -2.537 0.011315 *  
## educ        -0.128427   0.018349  -6.999 4.44e-12 ***
## age          0.532135   0.138386   3.845 0.000127 ***
## agesq       -0.005804   0.001564  -3.710 0.000217 ***
## black        1.075658   0.173536   6.198 8.02e-10 ***
## east         0.217324   0.132788   1.637 0.101992    
## northcen     0.363114   0.120897   3.004 0.002729 ** 
## west         0.197603   0.166913   1.184 0.236719    
## farm        -0.052557   0.147190  -0.357 0.721105    
## othrural    -0.162854   0.175442  -0.928 0.353481    
## town         0.084353   0.124531   0.677 0.498314    
## smcity       0.211879   0.160296   1.322 0.186507    
## y74          0.268183   0.172716   1.553 0.120771    
## y76         -0.097379   0.179046  -0.544 0.586633    
## y78         -0.068666   0.181684  -0.378 0.705544    
## y80         -0.071305   0.182771  -0.390 0.696511    
## y82         -0.522484   0.172436  -3.030 0.002502 ** 
## y84         -0.545166   0.174516  -3.124 0.001831 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.555 on 1111 degrees of freedom
## Multiple R-squared:  0.1295, Adjusted R-squared:  0.1162 
## F-statistic: 9.723 on 17 and 1111 DF,  p-value: < 2.2e-16
linearHypothesis(fert_reg, c("y74=0", "y76=0", "y78=0", "y80=0", "y82=0", "y84=0"))
## Linear hypothesis test
## 
## Hypothesis:
## y74 = 0
## y76 = 0
## y78 = 0
## y80 = 0
## y82 = 0
## y84 = 0
## 
## Model 1: restricted model
## Model 2: kids ~ educ + age + agesq + black + east + northcen + west + 
##     farm + othrural + town + smcity + y74 + y76 + y78 + y80 + 
##     y82 + y84
## 
##   Res.Df    RSS Df Sum of Sq      F    Pr(>F)    
## 1   1117 2771.0                                  
## 2   1111 2685.9  6    85.139 5.8695 4.855e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
bptest(fert_reg)
## 
##  studentized Breusch-Pagan test
## 
## data:  fert_reg
## BP = 55.315, df = 17, p-value = 6.098e-06
u2 <- resid(fert_reg)**2
u2_reg <- lm(u2 ~ educ + age + agesq + black + east + northcen + west + farm + othrural + town + smcity + y74 + y76 + y78 + y80 + y82 + y84, data=fertil1)
LM <- nobs(u2_reg)*summary(u2_reg)$r.squared
LM
## [1] 55.31537

Example 13.2. Changes in the Return to Education and the Gender Wage Gap

wage_reg <- lm(lwage ~ y85 + educ + y85educ + exper + expersq + union + female + y85fem, data=cps78_85)
stargazer(wage_reg, no.space=TRUE, type="text")
## 
## ===============================================
##                         Dependent variable:    
##                     ---------------------------
##                                lwage           
## -----------------------------------------------
## y85                            0.118           
##                               (0.124)          
## educ                         0.075***          
##                               (0.007)          
## y85educ                       0.018**          
##                               (0.009)          
## exper                        0.030***          
##                               (0.004)          
## expersq                     -0.0004***         
##                              (0.0001)          
## union                        0.202***          
##                               (0.030)          
## female                       -0.317***         
##                               (0.037)          
## y85fem                        0.085*           
##                               (0.051)          
## Constant                     0.459***          
##                               (0.093)          
## -----------------------------------------------
## Observations                   1,084           
## R2                             0.426           
## Adjusted R2                    0.422           
## Residual Std. Error      0.413 (df = 1075)     
## F Statistic          99.804*** (df = 8; 1075)  
## ===============================================
## Note:               *p<0.1; **p<0.05; ***p<0.01
wage_reg$coef[3]*100 #Return to Education in 1978 (%)
##     educ 
## 7.472091
(wage_reg$coef[3] + wage_reg$coef[4])*100 #Return to Education in 1985 (%)
##     educ 
## 9.318145

Example 13.3. Effect of a Garbage Incinerator’s Location on Housing Prices

garb81_reg <- lm(rprice ~ nearinc, data=subset(kielmc, kielmc$year==1981))
garb78_reg <- lm(rprice ~ nearinc, data=subset(kielmc, kielmc$year==1978))
stargazer(garb81_reg, garb78_reg,  column.labels=c("Garb81", "Garb78"), no.space=TRUE, type="text")
## 
## ===================================================================
##                                   Dependent variable:              
##                     -----------------------------------------------
##                                         rprice                     
##                             Garb81                  Garb78         
##                               (1)                     (2)          
## -------------------------------------------------------------------
## nearinc                 -30,688.270***          -18,824.370***     
##                           (5,827.709)             (4,744.594)      
## Constant                101,307.500***           82,517.230***     
##                           (3,093.027)             (2,653.790)      
## -------------------------------------------------------------------
## Observations                  142                     179          
## R2                           0.165                   0.082         
## Adjusted R2                  0.159                   0.076         
## Residual Std. Error  31,238.040 (df = 140)   29,431.960 (df = 177) 
## F Statistic         27.730*** (df = 1; 140) 15.741*** (df = 1; 177)
## ===================================================================
## Note:                                   *p<0.1; **p<0.05; ***p<0.01
Table 13.2 Effects of Incinerator Location on Housing Prices (rprice)
One <- lm(rprice ~ y81 + nearinc + y81nrinc, data=subset(kielmc))
Two <- lm(rprice ~ y81 + nearinc + y81nrinc + age + agesq, data=subset(kielmc))
Three <- lm(rprice ~ y81 + nearinc + y81nrinc + age + agesq + intst + land + area + rooms + baths, data=subset(kielmc))
stargazer(One, Two, Three,  column.labels=c("One", "Two", "Three"), no.space=TRUE, type="text")
## 
## ============================================================================================
##                                               Dependent variable:                           
##                     ------------------------------------------------------------------------
##                                                      rprice                                 
##                               One                     Two                    Three          
##                               (1)                     (2)                     (3)           
## --------------------------------------------------------------------------------------------
## y81                      18,790.290***           21,321.040***           13,928.480***      
##                           (4,050.065)             (3,443.631)             (2,798.747)       
## nearinc                 -18,824.370***            9,397.936*               3,780.337        
##                           (4,875.322)             (4,812.222)             (4,453.415)       
## y81nrinc                  -11,863.900           -21,920.270***           -14,177.930***     
##                           (7,456.646)             (6,359.745)             (4,987.267)       
## age                                              -1,494.424***            -739.451***       
##                                                    (131.860)               (131.127)        
## agesq                                              8.691***                 3.453***        
##                                                     (0.848)                 (0.813)         
## intst                                                                      -0.539***        
##                                                                             (0.196)         
## land                                                                        0.141***        
##                                                                             (0.031)         
## area                                                                       18.086***        
##                                                                             (2.306)         
## rooms                                                                     3,304.227**       
##                                                                           (1,661.248)       
## baths                                                                     6,977.317***      
##                                                                           (2,581.321)       
## Constant                 82,517.230***           89,116.540***             13,807.670       
##                           (2,726.910)             (2,406.051)             (11,166.590)      
## --------------------------------------------------------------------------------------------
## Observations                  321                     321                     321           
## R2                           0.174                   0.414                   0.660          
## Adjusted R2                  0.166                   0.405                   0.649          
## Residual Std. Error  30,242.900 (df = 317)   25,543.290 (df = 315)   19,619.020 (df = 310)  
## F Statistic         22.251*** (df = 3; 317) 44.591*** (df = 5; 315) 60.189*** (df = 10; 310)
## ============================================================================================
## Note:                                                            *p<0.1; **p<0.05; ***p<0.01
lOne <- lm(lprice ~ y81 + nearinc + y81nrinc, data=subset(kielmc))
lThree <- lm(lprice ~ y81 + nearinc + y81nrinc + age + agesq + lintst + lland + larea + rooms + baths, data=subset(kielmc))
stargazer(lOne, lThree,  column.labels=c("Oneln", "Threeln"), no.space=TRUE, type="text")
## 
## =====================================================================
##                                    Dependent variable:               
##                     -------------------------------------------------
##                                          lprice                      
##                              Oneln                   Threeln         
##                               (1)                      (2)           
## ---------------------------------------------------------------------
## y81                        0.457***                 0.426***         
##                             (0.045)                  (0.028)         
## nearinc                    -0.340***                  0.032          
##                             (0.055)                  (0.047)         
## y81nrinc                    -0.063                  -0.132**         
##                             (0.083)                  (0.052)         
## age                                                 -0.008***        
##                                                      (0.001)         
## agesq                                              0.00004***        
##                                                     (0.00001)        
## lintst                                               -0.061*         
##                                                      (0.032)         
## lland                                               0.100***         
##                                                      (0.024)         
## larea                                               0.351***         
##                                                      (0.051)         
## rooms                                               0.047***         
##                                                      (0.017)         
## baths                                               0.094***         
##                                                      (0.028)         
## Constant                   11.285***                7.652***         
##                             (0.031)                  (0.416)         
## ---------------------------------------------------------------------
## Observations                  321                      321           
## R2                           0.409                    0.790          
## Adjusted R2                  0.403                    0.784          
## Residual Std. Error    0.338 (df = 317)         0.204 (df = 310)     
## F Statistic         73.149*** (df = 3; 317) 116.909*** (df = 10; 310)
## =====================================================================
## Note:                                     *p<0.1; **p<0.05; ***p<0.01

Example 13.4. Effect of Worker Compensation Laws on Weeks out of Work

summary(lm(ldurat~ afchnge + highearn + afhigh, data=subset(injury, injury$ky==1)))
## 
## Call:
## lm(formula = ldurat ~ afchnge + highearn + afhigh, data = subset(injury, 
##     injury$ky == 1))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.9666 -0.8872  0.0042  0.8126  4.0784 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1.125615   0.030737  36.621  < 2e-16 ***
## afchnge     0.007657   0.044717   0.171  0.86404    
## highearn    0.256479   0.047446   5.406 6.72e-08 ***
## afhigh      0.190601   0.068509   2.782  0.00542 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.269 on 5622 degrees of freedom
## Multiple R-squared:  0.02066,    Adjusted R-squared:  0.02014 
## F-statistic: 39.54 on 3 and 5622 DF,  p-value: < 2.2e-16

Example 13.5. Sleeping versus Working

summary(lm(cslpnap ~ ctotwrk + ceduc + cmarr + cyngkid + cgdhlth, data=slp75_81))
## 
## Call:
## lm(formula = cslpnap ~ ctotwrk + ceduc + cmarr + cyngkid + cgdhlth, 
##     data = slp75_81)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2454.2  -307.2    79.8   334.4  2037.9 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -92.63404   45.86590  -2.020   0.0446 *  
## ctotwrk      -0.22667    0.03605  -6.287 1.58e-09 ***
## ceduc        -0.02447   48.75938  -0.001   0.9996    
## cmarr       104.21395   92.85536   1.122   0.2629    
## cyngkid      94.66540   87.65252   1.080   0.2813    
## cgdhlth      87.57785   76.59913   1.143   0.2541    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 598.6 on 233 degrees of freedom
## Multiple R-squared:  0.1495, Adjusted R-squared:  0.1313 
## F-statistic: 8.191 on 5 and 233 DF,  p-value: 3.827e-07

Distributed Lag of Crime Rate on Clear-Up Rate

summary(lm(clcrime ~ cclrprc1 + cclrprc2, data=crime3)) 
## 
## Call:
## lm(formula = clcrime ~ cclrprc1 + cclrprc2, data = crime3)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.0335 -0.2351  0.0299  0.2178  0.8263 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  0.085656   0.063782   1.343   0.1854  
## cclrprc1    -0.004048   0.004720  -0.858   0.3952  
## cclrprc2    -0.013197   0.005195  -2.540   0.0142 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3446 on 50 degrees of freedom
##   (53 observations deleted due to missingness)
## Multiple R-squared:  0.1933, Adjusted R-squared:  0.1611 
## F-statistic: 5.992 on 2 and 50 DF,  p-value: 0.004649

Example 13.7. Effect of Drunk Driving Laws on Traffic Fatalities

summary(lm(cdthrte ~ copen + cadmn, data=traffic1)) 
## 
## Call:
## lm(formula = cdthrte ~ copen + cadmn, data = traffic1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.25261 -0.14337 -0.00321  0.19679  0.79679 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.49679    0.05243  -9.476 1.43e-12 ***
## copen       -0.41968    0.20559  -2.041   0.0467 *  
## cadmn       -0.15060    0.11682  -1.289   0.2035    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3435 on 48 degrees of freedom
## Multiple R-squared:  0.1187, Adjusted R-squared:  0.08194 
## F-statistic: 3.231 on 2 and 48 DF,  p-value: 0.04824

Example 13.8. Effect of Enterprise Zones on Unemployment Claims

tsezunem <- ts(ezunem)
ezon_reg <- lm(guclms ~ d82 + d83 + d84 + d85 + d86 + d87 + d88 + cez, data=ezunem) 
summary(ezon_reg)
## 
## Call:
## lm(formula = guclms ~ d82 + d83 + d84 + d85 + d86 + d87 + d88 + 
##     cez, data = ezunem)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.4925 -0.1427 -0.0092  0.1495  0.6062 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.32163    0.04606  -6.982 6.55e-11 ***
## d82          0.77876    0.06514  11.954  < 2e-16 ***
## d83         -0.03312    0.06514  -0.508   0.6118    
## d84         -0.01714    0.06855  -0.250   0.8029    
## d85          0.32308    0.06668   4.845 2.87e-06 ***
## d86          0.29215    0.06514   4.485 1.35e-05 ***
## d87          0.05395    0.06514   0.828   0.4088    
## d88         -0.01705    0.06514  -0.262   0.7938    
## cez         -0.18188    0.07819  -2.326   0.0212 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2161 on 167 degrees of freedom
##   (22 observations deleted due to missingness)
## Multiple R-squared:  0.623,  Adjusted R-squared:  0.6049 
## F-statistic:  34.5 on 8 and 167 DF,  p-value: < 2.2e-16
(exp(ezon_reg$coef[9])-1)*100
##       cez 
## -16.62966
bptest(ezon_reg)
## 
##  studentized Breusch-Pagan test
## 
## data:  ezon_reg
## BP = 6.914, df = 8, p-value = 0.5459

*Example 13.9. County Crime Rates in North Carolina

crime4p <- pdata.frame(crime4, index = c("county", "year"))
pdim(crime4p)
## Balanced Panel: n = 90, T = 7, N = 630
crime_hetr <- plm(clcrmrte ~  d83 + d84 + d85 + d86 + d87 + clprbarr + clprbcon + clprbpri + clavgsen + clpolpc + 1, data=crime4)
stargazer(crime_hetr, single.row = TRUE, no.space=TRUE, type="text")
## 
## ========================================
##                  Dependent variable:    
##              ---------------------------
##                       clcrmrte          
## ----------------------------------------
## d83               -0.100*** (0.025)     
## d84                -0.048* (0.025)      
## d85                -0.005 (0.025)       
## d86                 0.028 (0.026)       
## d87                 0.041 (0.026)       
## clprbarr          -0.330*** (0.033)     
## clprbcon          -0.240*** (0.020)     
## clprbpri          -0.164*** (0.028)     
## clavgsen           -0.023 (0.024)       
## clpolpc           0.411*** (0.029)      
## ----------------------------------------
## Observations             540            
## R2                      0.448           
## Adjusted R2             0.323           
## F Statistic   35.654*** (df = 10; 440)  
## ========================================
## Note:        *p<0.1; **p<0.05; ***p<0.01
summary(crime_hetr, robust = TRUE)
## Oneway (individual) effect Within Model
## 
## Call:
## plm(formula = clcrmrte ~ d83 + d84 + d85 + d86 + d87 + clprbarr + 
##     clprbcon + clprbpri + clavgsen + clpolpc + 1, data = crime4)
## 
## Balanced Panel: n = 90, T = 6, N = 540
## 
## Residuals:
##       Min.    1st Qu.     Median    3rd Qu.       Max. 
## -0.6862980 -0.0714434 -0.0017868  0.0751380  0.6628832 
## 
## Coefficients:
##            Estimate Std. Error  t-value  Pr(>|t|)    
## d83      -0.1004876  0.0254125  -3.9543 8.945e-05 ***
## d84      -0.0483243  0.0249774  -1.9347   0.05366 .  
## d85      -0.0046755  0.0249772  -0.1872   0.85160    
## d86       0.0278299  0.0256981   1.0830   0.27942    
## d87       0.0405086  0.0259853   1.5589   0.11974    
## clprbarr -0.3298869  0.0329905  -9.9995 < 2.2e-16 ***
## clprbcon -0.2401652  0.0199024 -12.0671 < 2.2e-16 ***
## clprbpri -0.1638598  0.0280913  -5.8331 1.055e-08 ***
## clavgsen -0.0233595  0.0238217  -0.9806   0.32733    
## clpolpc   0.4107709  0.0293047  14.0172 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    21.392
## Residual Sum of Squares: 11.817
## R-Squared:      0.44761
## Adj. R-Squared: 0.32333
## F-statistic: 35.6545 on 10 and 440 DF, p-value: < 2.22e-16
bptest(crime_hetr)
## 
##  studentized Breusch-Pagan test
## 
## data:  crime_hetr
## BP = 10.93, df = 10, p-value = 0.363