Chapter 13. Pooling Cross Sections across Time#
import numpy as np
import pandas as pd
import statsmodels.stats.api as sms
import statsmodels.formula.api as smf
from statsmodels.iolib.summary2 import summary_col
from wooldridge import *
Example 13.1. Women’s Fertility over Time#
df = dataWoo('fertil1')
fert_reg = smf.ols(
'kids ~ educ + age + agesq + black + east + northcen + west + farm + othrural + town + smcity + y74 + y76 + y78 + y80 + y82 + y84',
data=df).fit()
print(fert_reg.summary())
OLS Regression Results
==============================================================================
Dep. Variable: kids R-squared: 0.130
Model: OLS Adj. R-squared: 0.116
Method: Least Squares F-statistic: 9.723
Date: Mon, 11 Dec 2023 Prob (F-statistic): 2.42e-24
Time: 18:37:36 Log-Likelihood: -2091.2
No. Observations: 1129 AIC: 4218.
Df Residuals: 1111 BIC: 4309.
Df Model: 17
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
Intercept -7.7425 3.052 -2.537 0.011 -13.730 -1.755
educ -0.1284 0.018 -6.999 0.000 -0.164 -0.092
age 0.5321 0.138 3.845 0.000 0.261 0.804
agesq -0.0058 0.002 -3.710 0.000 -0.009 -0.003
black 1.0757 0.174 6.198 0.000 0.735 1.416
east 0.2173 0.133 1.637 0.102 -0.043 0.478
northcen 0.3631 0.121 3.004 0.003 0.126 0.600
west 0.1976 0.167 1.184 0.237 -0.130 0.525
farm -0.0526 0.147 -0.357 0.721 -0.341 0.236
othrural -0.1629 0.175 -0.928 0.353 -0.507 0.181
town 0.0844 0.125 0.677 0.498 -0.160 0.329
smcity 0.2119 0.160 1.322 0.187 -0.103 0.526
y74 0.2682 0.173 1.553 0.121 -0.071 0.607
y76 -0.0974 0.179 -0.544 0.587 -0.449 0.254
y78 -0.0687 0.182 -0.378 0.706 -0.425 0.288
y80 -0.0713 0.183 -0.390 0.697 -0.430 0.287
y82 -0.5225 0.172 -3.030 0.003 -0.861 -0.184
y84 -0.5452 0.175 -3.124 0.002 -0.888 -0.203
==============================================================================
Omnibus: 9.775 Durbin-Watson: 2.011
Prob(Omnibus): 0.008 Jarque-Bera (JB): 9.966
Skew: 0.227 Prob(JB): 0.00685
Kurtosis: 2.920 Cond. No. 1.32e+05
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 1.32e+05. This might indicate that there are
strong multicollinearity or other numerical problems.
hypotheses = '(y74 =y76 = y78 = y80 = y82 = y84 = 0)'
f_test = fert_reg.f_test(hypotheses)
print(f_test)
<F test: F=5.869508671580268, p=4.85518986757229e-06, df_denom=1.11e+03, df_num=6>
bptest = sms.diagnostic.het_breuschpagan(fert_reg.resid, fert_reg.model.exog)
df2 = pd.DataFrame({'Chi-Sq':[bptest[0]],
'Prob>Chi-Sq':[bptest[1]]})
print(df2)
Chi-Sq Prob>Chi-Sq
0 55.315373 0.000006
Example 13.2. Changes in the Return to Education and the Gender Wage Gap#
print(smf.ols('lwage ~ y85 + educ + y85educ + exper + expersq + union + female + y85fem', data=dataWoo("cps78_85")).fit().summary())
OLS Regression Results
==============================================================================
Dep. Variable: lwage R-squared: 0.426
Model: OLS Adj. R-squared: 0.422
Method: Least Squares F-statistic: 99.80
Date: Mon, 11 Dec 2023 Prob (F-statistic): 4.46e-124
Time: 18:37:36 Log-Likelihood: -574.24
No. Observations: 1084 AIC: 1166.
Df Residuals: 1075 BIC: 1211.
Df Model: 8
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
Intercept 0.4589 0.093 4.911 0.000 0.276 0.642
y85 0.1178 0.124 0.952 0.341 -0.125 0.361
educ 0.0747 0.007 11.192 0.000 0.062 0.088
y85educ 0.0185 0.009 1.974 0.049 0.000 0.037
exper 0.0296 0.004 8.293 0.000 0.023 0.037
expersq -0.0004 7.75e-05 -5.151 0.000 -0.001 -0.000
union 0.2021 0.030 6.672 0.000 0.143 0.262
female -0.3167 0.037 -8.648 0.000 -0.389 -0.245
y85fem 0.0851 0.051 1.658 0.098 -0.016 0.186
==============================================================================
Omnibus: 83.747 Durbin-Watson: 1.918
Prob(Omnibus): 0.000 Jarque-Bera (JB): 317.985
Skew: -0.271 Prob(JB): 8.92e-70
Kurtosis: 5.597 Cond. No. 8.77e+03
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 8.77e+03. This might indicate that there are
strong multicollinearity or other numerical problems.
Example 13.3. Effect of a Garbage Incinerator’s Location on Housing Prices#
df = dataWoo("kielmc")
garb81_reg = smf.ols('rprice ~ nearinc', data=df[(df['year']==1981)]).fit()
garb78_reg = smf.ols('rprice ~ nearinc', data=df[(df['year']==1978)]).fit()
print(summary_col([garb81_reg, garb78_reg],stars=True,float_format='%0.3f',
model_names=['grab81\n(b/se)','grab78\n(b/se)'],
info_dict={'N':lambda x: "{0:d}".format(int(x.nobs)),
'R2':lambda x: "{:.3f}".format(x.rsquared),
'Adj.R2':lambda x: "{:.3f}".format(x.rsquared_adj)}))
==========================================
grab81 grab78
(b/se) (b/se)
------------------------------------------
Intercept 101307.514*** 82517.228***
(3093.027) (2653.790)
nearinc -30688.274*** -18824.370***
(5827.709) (4744.594)
R-squared 0.165 0.082
R-squared Adj. 0.159 0.076
N 142 179
R2 0.165 0.082
Adj.R2 0.159 0.076
==========================================
Standard errors in parentheses.
* p<.1, ** p<.05, ***p<.01
Table 13.2 Effects of Incinerator Location on Housing Prices (rprice)#
One = smf.ols('rprice ~ y81 + nearinc + y81nrinc', data=df).fit()
Two = smf.ols('rprice ~ y81 + nearinc + y81nrinc + age + agesq', data=df).fit()
Three = smf.ols('rprice ~ y81 + nearinc + y81nrinc + age + agesq + intst + land + area + rooms + baths', data=df).fit()
print(summary_col([One, Two, Three],stars=True,float_format='%0.3f',
model_names=['One\n(b/se)','Two\n(b/se)', 'Three\n(b/se)'],
info_dict={'N':lambda x: "{0:d}".format(int(x.nobs)),
'R2':lambda x: "{:.3f}".format(x.rsquared),
'Adj.R2':lambda x: "{:.3f}".format(x.rsquared_adj)}))
========================================================
One Two Three
(b/se) (b/se) (b/se)
--------------------------------------------------------
Intercept 82517.228*** 89116.535*** 13807.665
(2726.910) (2406.051) (11166.594)
R-squared 0.174 0.414 0.660
R-squared Adj. 0.166 0.405 0.649
age -1494.424*** -739.451***
(131.860) (131.127)
agesq 8.691*** 3.453***
(0.848) (0.813)
area 18.086***
(2.306)
baths 6977.317***
(2581.321)
intst -0.539***
(0.196)
land 0.141***
(0.031)
nearinc -18824.370*** 9397.936* 3780.337
(4875.322) (4812.222) (4453.415)
rooms 3304.227**
(1661.248)
y81 18790.286*** 21321.042*** 13928.476***
(4050.065) (3443.631) (2798.747)
y81nrinc -11863.903 -21920.270*** -14177.934***
(7456.646) (6359.745) (4987.267)
N 321 321 321
R2 0.174 0.414 0.660
Adj.R2 0.166 0.405 0.649
========================================================
Standard errors in parentheses.
* p<.1, ** p<.05, ***p<.01
lOne = smf.ols('lprice ~ y81 + nearinc + y81nrinc', data=df).fit()
lThree = smf.ols('lprice ~ y81 + nearinc + y81nrinc + age + agesq + lintst + lland + larea + rooms + baths', data=df).fit()
print(summary_col([lOne, lThree],stars=True,float_format='%0.3f',
model_names=['lOne\n(b/se)', 'lThree\n(b/se)'],
info_dict={'N':lambda x: "{0:d}".format(int(x.nobs)),
'R2':lambda x: "{:.3f}".format(x.rsquared),
'Adj.R2':lambda x: "{:.3f}".format(x.rsquared_adj)}))
==================================
lOne lThree
(b/se) (b/se)
----------------------------------
Intercept 11.285*** 7.652***
(0.031) (0.416)
R-squared 0.409 0.790
R-squared Adj. 0.403 0.784
age -0.008***
(0.001)
agesq 0.000***
(0.000)
baths 0.094***
(0.028)
larea 0.351***
(0.051)
lintst -0.061*
(0.032)
lland 0.100***
(0.024)
nearinc -0.340*** 0.032
(0.055) (0.047)
rooms 0.047***
(0.017)
y81 0.457*** 0.426***
(0.045) (0.028)
y81nrinc -0.063 -0.132**
(0.083) (0.052)
N 321 321
R2 0.409 0.790
Adj.R2 0.403 0.784
==================================
Standard errors in parentheses.
* p<.1, ** p<.05, ***p<.01
Example 13.4. Effect of Worker Compensation Laws on Weeks out of Work#
df = dataWoo("injury")
print(smf.ols('ldurat~ afchnge + highearn + afhigh', data=df[(df['ky']==1)]).fit().summary())
OLS Regression Results
==============================================================================
Dep. Variable: ldurat R-squared: 0.021
Model: OLS Adj. R-squared: 0.020
Method: Least Squares F-statistic: 39.54
Date: Mon, 11 Dec 2023 Prob (F-statistic): 2.81e-25
Time: 18:37:37 Log-Likelihood: -9322.0
No. Observations: 5626 AIC: 1.865e+04
Df Residuals: 5622 BIC: 1.868e+04
Df Model: 3
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
Intercept 1.1256 0.031 36.621 0.000 1.065 1.186
afchnge 0.0077 0.045 0.171 0.864 -0.080 0.095
highearn 0.2565 0.047 5.406 0.000 0.163 0.349
afhigh 0.1906 0.069 2.782 0.005 0.056 0.325
==============================================================================
Omnibus: 29.931 Durbin-Watson: 1.905
Prob(Omnibus): 0.000 Jarque-Bera (JB): 41.672
Skew: 0.037 Prob(JB): 8.93e-10
Kurtosis: 3.415 Cond. No. 6.38
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
Example 13.5. Sleeping versus Working#
print(smf.ols('cslpnap ~ ctotwrk + ceduc + cmarr + cyngkid + cgdhlth', data=dataWoo("slp75_81")).fit().summary())
OLS Regression Results
==============================================================================
Dep. Variable: cslpnap R-squared: 0.150
Model: OLS Adj. R-squared: 0.131
Method: Least Squares F-statistic: 8.191
Date: Mon, 11 Dec 2023 Prob (F-statistic): 3.83e-07
Time: 18:37:37 Log-Likelihood: -1864.4
No. Observations: 239 AIC: 3741.
Df Residuals: 233 BIC: 3762.
Df Model: 5
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
Intercept -92.6340 45.866 -2.020 0.045 -182.999 -2.269
ctotwrk -0.2267 0.036 -6.287 0.000 -0.298 -0.156
ceduc -0.0245 48.759 -0.001 1.000 -96.090 96.041
cmarr 104.2139 92.855 1.122 0.263 -78.729 287.157
cyngkid 94.6654 87.653 1.080 0.281 -78.027 267.358
cgdhlth 87.5778 76.599 1.143 0.254 -63.338 238.493
==============================================================================
Omnibus: 31.927 Durbin-Watson: 1.890
Prob(Omnibus): 0.000 Jarque-Bera (JB): 57.378
Skew: -0.719 Prob(JB): 3.47e-13
Kurtosis: 4.922 Cond. No. 2.72e+03
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 2.72e+03. This might indicate that there are
strong multicollinearity or other numerical problems.
Distributed Lag of Crime Rate on Clear-Up Rate#
print(smf.ols('clcrime ~ cclrprc1 + cclrprc2', data=dataWoo("crime3")).fit().summary())
OLS Regression Results
==============================================================================
Dep. Variable: clcrime R-squared: 0.193
Model: OLS Adj. R-squared: 0.161
Method: Least Squares F-statistic: 5.992
Date: Mon, 11 Dec 2023 Prob (F-statistic): 0.00465
Time: 18:37:37 Log-Likelihood: -17.194
No. Observations: 53 AIC: 40.39
Df Residuals: 50 BIC: 46.30
Df Model: 2
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
Intercept 0.0857 0.064 1.343 0.185 -0.042 0.214
cclrprc1 -0.0040 0.005 -0.858 0.395 -0.014 0.005
cclrprc2 -0.0132 0.005 -2.540 0.014 -0.024 -0.003
==============================================================================
Omnibus: 3.032 Durbin-Watson: 2.203
Prob(Omnibus): 0.220 Jarque-Bera (JB): 2.071
Skew: -0.344 Prob(JB): 0.355
Kurtosis: 3.681 Cond. No. 23.6
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
Example 13.7. Effect of Drunk Driving Laws on Traffic Fatalities#
ezon_reg =smf.ols('guclms ~ d82 + d83 + d84 + d85 + d86 + d87 + d88 + cez', data=dataWoo("ezunem")).fit()
print(ezon_reg.summary())
OLS Regression Results
==============================================================================
Dep. Variable: guclms R-squared: 0.623
Model: OLS Adj. R-squared: 0.605
Method: Least Squares F-statistic: 34.50
Date: Mon, 11 Dec 2023 Prob (F-statistic): 1.08e-31
Time: 18:37:37 Log-Likelihood: 24.553
No. Observations: 176 AIC: -31.11
Df Residuals: 167 BIC: -2.573
Df Model: 8
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
Intercept -0.3216 0.046 -6.982 0.000 -0.413 -0.231
d82 0.7788 0.065 11.954 0.000 0.650 0.907
d83 -0.0331 0.065 -0.508 0.612 -0.162 0.095
d84 -0.0171 0.069 -0.250 0.803 -0.152 0.118
d85 0.3231 0.067 4.845 0.000 0.191 0.455
d86 0.2922 0.065 4.485 0.000 0.164 0.421
d87 0.0539 0.065 0.828 0.409 -0.075 0.183
d88 -0.0171 0.065 -0.262 0.794 -0.146 0.112
cez -0.1819 0.078 -2.326 0.021 -0.336 -0.028
==============================================================================
Omnibus: 0.858 Durbin-Watson: 2.370
Prob(Omnibus): 0.651 Jarque-Bera (JB): 0.871
Skew: 0.166 Prob(JB): 0.647
Kurtosis: 2.905 Cond. No. 8.96
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
cez = (np.exp(-.1819) - 1) * 100
cez
-16.631529528207743
bptest = sms.diagnostic.het_breuschpagan(ezon_reg.resid, ezon_reg.model.exog)
bptest2 = pd.DataFrame({'Chi-Sq':[bptest[0]],
'Prob>Chi-Sq':[bptest[1]]})
print(bptest2)
Chi-Sq Prob>Chi-Sq
0 6.913966 0.545943
Example 13.9. County Crime Rates in North Carolina#
df = dataWoo("crime4")
hetrosced_r =smf.ols('clcrmrte ~ d83 + d84 + d85 + d86 + d87 + clprbarr + clprbcon + clprbpri + clavgsen + clpolpc + 1', data=df).fit()
robust_r =smf.ols('clcrmrte ~ d83 + d84 + d85 + d86 + d87 + clprbarr + clprbcon + clprbpri + clavgsen + clpolpc + 1', data=df).fit(cov_type='HC1')
print(summary_col([hetrosced_r, robust_r],stars=True,float_format='%0.3f',
model_names=['Hetrosced\n(b/se)', 'Robust\n(b/se)'],
info_dict={'N':lambda x: "{0:d}".format(int(x.nobs)),
'R2':lambda x: "{:.3f}".format(x.rsquared),
'Adj.R2':lambda x: "{:.3f}".format(x.rsquared_adj)}))
==================================
Hetrosced Robust
(b/se) (b/se)
----------------------------------
Intercept 0.008 0.008
(0.017) (0.015)
d83 -0.100*** -0.100***
(0.024) (0.022)
d84 -0.048** -0.048**
(0.024) (0.020)
d85 -0.005 -0.005
(0.023) (0.024)
d86 0.028 0.028
(0.024) (0.021)
d87 0.041* 0.041*
(0.024) (0.024)
clprbarr -0.327*** -0.327***
(0.030) (0.051)
clprbcon -0.238*** -0.238***
(0.018) (0.031)
clprbpri -0.165*** -0.165***
(0.026) (0.035)
clavgsen -0.022 -0.022
(0.022) (0.025)
clpolpc 0.398*** 0.398***
(0.027) (0.076)
R-squared 0.433 0.433
R-squared Adj. 0.422 0.422
N 540 540
R2 0.433 0.433
Adj.R2 0.422 0.422
==================================
Standard errors in parentheses.
* p<.1, ** p<.05, ***p<.01
bptest = sms.diagnostic.het_breuschpagan(hetrosced_r.resid, hetrosced_r.model.exog)
bptest2 = pd.DataFrame({'Chi-Sq':[bptest[0]],
'Prob>Chi-Sq':[bptest[1]]})
print(bptest2)
Chi-Sq Prob>Chi-Sq
0 10.929708 0.363021