13 Pooling Cross Sections across Time: Simple Panel Data Methods
Also covered using Python and Stata
13.1 Example 13.1. Women’s Fertility over Time
fert_reg <- lm(kids ~ educ + age + agesq + black + east + northcen + west + farm + othrural + town + smcity + y74 + y76 + y78 + y80 + y82 + y84, data=fertil1)
u2 <- resid(fert_reg)**2
u2_reg <- lm(u2 ~ educ + age + agesq + black + east + northcen + west + farm + othrural + town + smcity + y74 + y76 + y78 + y80 + y82 + y84, data=fertil1)
stargazer(fert_reg, u2_reg, no.space=TRUE, type="text")
##
## ============================================================
## Dependent variable:
## ----------------------------
## kids u2
## (1) (2)
## ------------------------------------------------------------
## educ -0.128*** -0.102***
## (0.018) (0.038)
## age 0.532*** 0.294
## (0.138) (0.288)
## agesq -0.006*** -0.003
## (0.002) (0.003)
## black 1.076*** 1.341***
## (0.174) (0.362)
## east 0.217 -0.065
## (0.133) (0.277)
## northcen 0.363*** 0.168
## (0.121) (0.252)
## west 0.198 0.124
## (0.167) (0.348)
## farm -0.053 -0.440
## (0.147) (0.307)
## othrural -0.163 -0.070
## (0.175) (0.366)
## town 0.084 0.037
## (0.125) (0.260)
## smcity 0.212 -0.367
## (0.160) (0.334)
## y74 0.268 -0.977***
## (0.173) (0.360)
## y76 -0.097 -0.607
## (0.179) (0.373)
## y78 -0.069 -0.745**
## (0.182) (0.379)
## y80 -0.071 -1.053***
## (0.183) (0.381)
## y82 -0.522*** -0.856**
## (0.172) (0.359)
## y84 -0.545*** -1.032***
## (0.175) (0.364)
## Constant -7.742** -3.258
## (3.052) (6.361)
## ------------------------------------------------------------
## Observations 1,129 1,129
## R2 0.130 0.049
## Adjusted R2 0.116 0.034
## Residual Std. Error (df = 1111) 1.555 3.241
## F Statistic (df = 17; 1111) 9.723*** 3.367***
## ============================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
## Linear hypothesis test
##
## Hypothesis:
## y74 = 0
## y76 = 0
## y78 = 0
## y80 = 0
## y82 = 0
## y84 = 0
##
## Model 1: restricted model
## Model 2: kids ~ educ + age + agesq + black + east + northcen + west +
## farm + othrural + town + smcity + y74 + y76 + y78 + y80 +
## y82 + y84
##
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 1117 2771.0
## 2 1111 2685.9 6 85.139 5.8695 4.855e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## studentized Breusch-Pagan test
##
## data: u2_reg
## BP = 25.437, df = 17, p-value = 0.08536
13.2 Example 13.2. Changes in the Return to Education and the Gender Wage Gap
wage_reg <- lm(lwage ~ y85 + educ + y85educ + exper + expersq + union + female + y85fem, data=cps78_85)
stargazer(wage_reg, no.space=TRUE, type="text")
##
## ===============================================
## Dependent variable:
## ---------------------------
## lwage
## -----------------------------------------------
## y85 0.118
## (0.124)
## educ 0.075***
## (0.007)
## y85educ 0.018**
## (0.009)
## exper 0.030***
## (0.004)
## expersq -0.0004***
## (0.0001)
## union 0.202***
## (0.030)
## female -0.317***
## (0.037)
## y85fem 0.085*
## (0.051)
## Constant 0.459***
## (0.093)
## -----------------------------------------------
## Observations 1,084
## R2 0.426
## Adjusted R2 0.422
## Residual Std. Error 0.413 (df = 1075)
## F Statistic 99.804*** (df = 8; 1075)
## ===============================================
## Note: *p<0.1; **p<0.05; ***p<0.01
## educ
## 7.472091
## educ
## 9.318145
13.3 Example 13.3. Effect of a Garbage Incinerator’s Location on Housing Prices
garb81_reg <- lm(rprice ~ nearinc, data=subset(kielmc, kielmc$year==1981))
garb78_reg <- lm(rprice ~ nearinc, data=subset(kielmc, kielmc$year==1978))
stargazer(garb81_reg, garb78_reg, column.labels=c("Garb81", "Garb78"), no.space=TRUE, type="text")
##
## ===================================================================
## Dependent variable:
## -----------------------------------------------
## rprice
## Garb81 Garb78
## (1) (2)
## -------------------------------------------------------------------
## nearinc -30,688.270*** -18,824.370***
## (5,827.709) (4,744.594)
## Constant 101,307.500*** 82,517.230***
## (3,093.027) (2,653.790)
## -------------------------------------------------------------------
## Observations 142 179
## R2 0.165 0.082
## Adjusted R2 0.159 0.076
## Residual Std. Error 31,238.040 (df = 140) 29,431.960 (df = 177)
## F Statistic 27.730*** (df = 1; 140) 15.741*** (df = 1; 177)
## ===================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
13.3.1 Table 13.2 Effects of Incinerator Location on Housing Prices (rprice)
One <- lm(rprice ~ y81 + nearinc + y81nrinc, data=subset(kielmc))
Two <- lm(rprice ~ y81 + nearinc + y81nrinc + age + agesq, data=subset(kielmc))
Three <- lm(rprice ~ y81 + nearinc + y81nrinc + age + agesq + intst + land + area + rooms + baths, data=subset(kielmc))
stargazer(One, Two, Three, column.labels=c("One", "Two", "Three"), no.space=TRUE, type="text")
##
## ============================================================================================
## Dependent variable:
## ------------------------------------------------------------------------
## rprice
## One Two Three
## (1) (2) (3)
## --------------------------------------------------------------------------------------------
## y81 18,790.290*** 21,321.040*** 13,928.480***
## (4,050.065) (3,443.631) (2,798.747)
## nearinc -18,824.370*** 9,397.936* 3,780.337
## (4,875.322) (4,812.222) (4,453.415)
## y81nrinc -11,863.900 -21,920.270*** -14,177.930***
## (7,456.646) (6,359.745) (4,987.267)
## age -1,494.424*** -739.451***
## (131.860) (131.127)
## agesq 8.691*** 3.453***
## (0.848) (0.813)
## intst -0.539***
## (0.196)
## land 0.141***
## (0.031)
## area 18.086***
## (2.306)
## rooms 3,304.227**
## (1,661.248)
## baths 6,977.317***
## (2,581.321)
## Constant 82,517.230*** 89,116.540*** 13,807.670
## (2,726.910) (2,406.051) (11,166.590)
## --------------------------------------------------------------------------------------------
## Observations 321 321 321
## R2 0.174 0.414 0.660
## Adjusted R2 0.166 0.405 0.649
## Residual Std. Error 30,242.900 (df = 317) 25,543.290 (df = 315) 19,619.020 (df = 310)
## F Statistic 22.251*** (df = 3; 317) 44.591*** (df = 5; 315) 60.189*** (df = 10; 310)
## ============================================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
lOne <- lm(lprice ~ y81 + nearinc + y81nrinc, data=subset(kielmc))
lThree <- lm(lprice ~ y81 + nearinc + y81nrinc + age + agesq + lintst + lland + larea + rooms + baths, data=subset(kielmc))
stargazer(lOne, lThree, column.labels=c("Oneln", "Threeln"), no.space=TRUE, type="text")
##
## =====================================================================
## Dependent variable:
## -------------------------------------------------
## lprice
## Oneln Threeln
## (1) (2)
## ---------------------------------------------------------------------
## y81 0.457*** 0.426***
## (0.045) (0.028)
## nearinc -0.340*** 0.032
## (0.055) (0.047)
## y81nrinc -0.063 -0.132**
## (0.083) (0.052)
## age -0.008***
## (0.001)
## agesq 0.00004***
## (0.00001)
## lintst -0.061*
## (0.032)
## lland 0.100***
## (0.024)
## larea 0.351***
## (0.051)
## rooms 0.047***
## (0.017)
## baths 0.094***
## (0.028)
## Constant 11.285*** 7.652***
## (0.031) (0.416)
## ---------------------------------------------------------------------
## Observations 321 321
## R2 0.409 0.790
## Adjusted R2 0.403 0.784
## Residual Std. Error 0.338 (df = 317) 0.204 (df = 310)
## F Statistic 73.149*** (df = 3; 317) 116.909*** (df = 10; 310)
## =====================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
13.4 Example 13.4. Effect of Worker Compensation Laws on Weeks out of Work
. u injury, clear . reg
##
## Call:
## lm(formula = ldurat ~ afchnge + highearn + afhigh, data = subset(injury,
## injury$ky == 1))
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.9666 -0.8872 0.0042 0.8126 4.0784
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.125615 0.030737 36.621 < 2e-16 ***
## afchnge 0.007657 0.044717 0.171 0.86404
## highearn 0.256479 0.047446 5.406 6.72e-08 ***
## afhigh 0.190601 0.068509 2.782 0.00542 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.269 on 5622 degrees of freedom
## Multiple R-squared: 0.02066, Adjusted R-squared: 0.02014
## F-statistic: 39.54 on 3 and 5622 DF, p-value: < 2.2e-16
13.5 Example 13.5. Sleeping versus Working
##
## Call:
## lm(formula = cslpnap ~ ctotwrk + ceduc + cmarr + cyngkid + cgdhlth,
## data = slp75_81)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2454.2 -307.2 79.8 334.4 2037.9
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -92.63404 45.86590 -2.020 0.0446 *
## ctotwrk -0.22667 0.03605 -6.287 1.58e-09 ***
## ceduc -0.02447 48.75938 -0.001 0.9996
## cmarr 104.21395 92.85536 1.122 0.2629
## cyngkid 94.66540 87.65252 1.080 0.2813
## cgdhlth 87.57785 76.59913 1.143 0.2541
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 598.6 on 233 degrees of freedom
## Multiple R-squared: 0.1495, Adjusted R-squared: 0.1313
## F-statistic: 8.191 on 5 and 233 DF, p-value: 3.827e-07
13.6 Distributed Lag of Crime Rate on Clear-Up Rate
##
## Call:
## lm(formula = clcrime ~ cclrprc1 + cclrprc2, data = crime3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.0335 -0.2351 0.0299 0.2178 0.8263
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.085656 0.063782 1.343 0.1854
## cclrprc1 -0.004048 0.004720 -0.858 0.3952
## cclrprc2 -0.013197 0.005195 -2.540 0.0142 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3446 on 50 degrees of freedom
## (53 observations deleted due to missingness)
## Multiple R-squared: 0.1933, Adjusted R-squared: 0.1611
## F-statistic: 5.992 on 2 and 50 DF, p-value: 0.004649
13.7 Example 13.7. Effect of Drunk Driving Laws on Traffic Fatalities
##
## Call:
## lm(formula = cdthrte ~ copen + cadmn, data = traffic1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.25261 -0.14337 -0.00321 0.19679 0.79679
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.49679 0.05243 -9.476 1.43e-12 ***
## copen -0.41968 0.20559 -2.041 0.0467 *
## cadmn -0.15060 0.11682 -1.289 0.2035
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3435 on 48 degrees of freedom
## Multiple R-squared: 0.1187, Adjusted R-squared: 0.08194
## F-statistic: 3.231 on 2 and 48 DF, p-value: 0.04824
13.8 Example 13.8. Effect of Enterprise Zones on Unemployment Claims
tsezunem <- ts(ezunem)
ezon_reg <- lm(guclms ~ d82 + d83 + d84 + d85 + d86 + d87 + d88 + cez, data=ezunem)
summary(ezon_reg)
##
## Call:
## lm(formula = guclms ~ d82 + d83 + d84 + d85 + d86 + d87 + d88 +
## cez, data = ezunem)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.4925 -0.1427 -0.0092 0.1495 0.6062
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.32163 0.04606 -6.982 6.55e-11 ***
## d82 0.77876 0.06514 11.954 < 2e-16 ***
## d83 -0.03312 0.06514 -0.508 0.6118
## d84 -0.01714 0.06855 -0.250 0.8029
## d85 0.32308 0.06668 4.845 2.87e-06 ***
## d86 0.29215 0.06514 4.485 1.35e-05 ***
## d87 0.05395 0.06514 0.828 0.4088
## d88 -0.01705 0.06514 -0.262 0.7938
## cez -0.18188 0.07819 -2.326 0.0212 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2161 on 167 degrees of freedom
## (22 observations deleted due to missingness)
## Multiple R-squared: 0.623, Adjusted R-squared: 0.6049
## F-statistic: 34.5 on 8 and 167 DF, p-value: < 2.2e-16
## cez
## -16.62966
##
## studentized Breusch-Pagan test
##
## data: ezon_reg
## BP = 6.914, df = 8, p-value = 0.5459
13.9 *Example 13.9. County Crime Rates in North Carolina
## Balanced Panel: n = 90, T = 7, N = 630
crime_hetr <- plm(clcrmrte ~ d83 + d84 + d85 + d86 + d87 + clprbarr + clprbcon + clprbpri + clavgsen + clpolpc + 1, data=crime4)
stargazer(crime_hetr, single.row = TRUE, no.space=TRUE, type="text")
##
## ========================================
## Dependent variable:
## ---------------------------
## clcrmrte
## ----------------------------------------
## d83 -0.100*** (0.025)
## d84 -0.048* (0.025)
## d85 -0.005 (0.025)
## d86 0.028 (0.026)
## d87 0.041 (0.026)
## clprbarr -0.330*** (0.033)
## clprbcon -0.240*** (0.020)
## clprbpri -0.164*** (0.028)
## clavgsen -0.023 (0.024)
## clpolpc 0.411*** (0.029)
## ----------------------------------------
## Observations 540
## R2 0.448
## Adjusted R2 0.323
## F Statistic 35.654*** (df = 10; 440)
## ========================================
## Note: *p<0.1; **p<0.05; ***p<0.01
## Oneway (individual) effect Within Model
##
## Call:
## plm(formula = clcrmrte ~ d83 + d84 + d85 + d86 + d87 + clprbarr +
## clprbcon + clprbpri + clavgsen + clpolpc + 1, data = crime4)
##
## Balanced Panel: n = 90, T = 6, N = 540
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -0.6862980 -0.0714434 -0.0017868 0.0751380 0.6628832
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## d83 -0.1004876 0.0254125 -3.9543 8.945e-05 ***
## d84 -0.0483243 0.0249774 -1.9347 0.05366 .
## d85 -0.0046755 0.0249772 -0.1872 0.85160
## d86 0.0278299 0.0256981 1.0830 0.27942
## d87 0.0405086 0.0259853 1.5589 0.11974
## clprbarr -0.3298869 0.0329905 -9.9995 < 2.2e-16 ***
## clprbcon -0.2401652 0.0199024 -12.0671 < 2.2e-16 ***
## clprbpri -0.1638598 0.0280913 -5.8331 1.055e-08 ***
## clavgsen -0.0233595 0.0238217 -0.9806 0.32733
## clpolpc 0.4107709 0.0293047 14.0172 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 21.392
## Residual Sum of Squares: 11.817
## R-Squared: 0.44761
## Adj. R-Squared: 0.32333
## F-statistic: 35.6545 on 10 and 440 DF, p-value: < 2.22e-16
##
## studentized Breusch-Pagan test
##
## data: crime_hetr
## BP = 10.93, df = 10, p-value = 0.363