Python for Introductory Econometrics: Chap 13

# Python for Introductory Econometrics¶

## Chapter 13. Pooling Cross Sections across Time¶

#### https://www.solomonegash.com/¶

In [1]:
import numpy as np
import pandas as pd
import scipy as sp

import statsmodels
import statsmodels.api as sm
import statsmodels.stats.api as sms
import statsmodels.formula.api as smf
from matplotlib import pyplot as plt
from statsmodels.iolib.summary2 import summary_col

from wooldridge import *


### Example 13.1. Women’s Fertility over Time¶

In [2]:
df = dataWoo('fertil1')

In [3]:
fert_reg = smf.ols('kids ~ educ + age + agesq + black + east + northcen + west + farm + othrural + town + smcity + y74 + y76 + y78 + y80 + y82 + y84', data=df).fit()
print(fert_reg.summary())

                            OLS Regression Results
==============================================================================
Dep. Variable:                   kids   R-squared:                       0.130
Method:                 Least Squares   F-statistic:                     9.723
Date:                Fri, 24 Apr 2020   Prob (F-statistic):           2.42e-24
Time:                        18:14:37   Log-Likelihood:                -2091.2
No. Observations:                1129   AIC:                             4218.
Df Residuals:                    1111   BIC:                             4309.
Df Model:                          17
Covariance Type:            nonrobust
==============================================================================
coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -7.7425      3.052     -2.537      0.011     -13.730      -1.755
educ          -0.1284      0.018     -6.999      0.000      -0.164      -0.092
age            0.5321      0.138      3.845      0.000       0.261       0.804
agesq         -0.0058      0.002     -3.710      0.000      -0.009      -0.003
black          1.0757      0.174      6.198      0.000       0.735       1.416
east           0.2173      0.133      1.637      0.102      -0.043       0.478
northcen       0.3631      0.121      3.004      0.003       0.126       0.600
west           0.1976      0.167      1.184      0.237      -0.130       0.525
farm          -0.0526      0.147     -0.357      0.721      -0.341       0.236
othrural      -0.1629      0.175     -0.928      0.353      -0.507       0.181
town           0.0844      0.125      0.677      0.498      -0.160       0.329
smcity         0.2119      0.160      1.322      0.187      -0.103       0.526
y74            0.2682      0.173      1.553      0.121      -0.071       0.607
y76           -0.0974      0.179     -0.544      0.587      -0.449       0.254
y78           -0.0687      0.182     -0.378      0.706      -0.425       0.288
y80           -0.0713      0.183     -0.390      0.697      -0.430       0.287
y82           -0.5225      0.172     -3.030      0.003      -0.861      -0.184
y84           -0.5452      0.175     -3.124      0.002      -0.888      -0.203
==============================================================================
Omnibus:                        9.775   Durbin-Watson:                   2.011
Prob(Omnibus):                  0.008   Jarque-Bera (JB):                9.966
Skew:                           0.227   Prob(JB):                      0.00685
Kurtosis:                       2.920   Cond. No.                     1.32e+05
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 1.32e+05. This might indicate that there are
strong multicollinearity or other numerical problems.

In [4]:
hypotheses = '(y74 =y76 = y78 = y80 = y82 = y84 = 0)'
f_test = fert_reg.f_test(hypotheses)
print(f_test)

<F test: F=array([[5.86950867]]), p=4.85518986757229e-06, df_denom=1111, df_num=6>

In [5]:
bptest = sms.diagnostic.het_breuschpagan(fert_reg.resid, fert_reg.model.exog)
df2 = pd.DataFrame({'Chi-Sq':[bptest[0]],
'Prob>Chi-Sq':[bptest[1]]})
print(df2)

      Chi-Sq  Prob>Chi-Sq
0  55.315373     0.000006


### Example 13.2. Changes in the Return to Education and the Gender Wage Gap¶

In [6]:
print(smf.ols('lwage ~ y85 + educ + y85educ + exper + expersq + union + female + y85fem', data=dataWoo("cps78_85")).fit().summary())

                            OLS Regression Results
==============================================================================
Dep. Variable:                  lwage   R-squared:                       0.426
Method:                 Least Squares   F-statistic:                     99.80
Date:                Fri, 24 Apr 2020   Prob (F-statistic):          4.46e-124
Time:                        18:14:37   Log-Likelihood:                -574.24
No. Observations:                1084   AIC:                             1166.
Df Residuals:                    1075   BIC:                             1211.
Df Model:                           8
Covariance Type:            nonrobust
==============================================================================
coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.4589      0.093      4.911      0.000       0.276       0.642
y85            0.1178      0.124      0.952      0.341      -0.125       0.361
educ           0.0747      0.007     11.192      0.000       0.062       0.088
y85educ        0.0185      0.009      1.974      0.049       0.000       0.037
exper          0.0296      0.004      8.293      0.000       0.023       0.037
expersq       -0.0004   7.75e-05     -5.151      0.000      -0.001      -0.000
union          0.2021      0.030      6.672      0.000       0.143       0.262
female        -0.3167      0.037     -8.648      0.000      -0.389      -0.245
y85fem         0.0851      0.051      1.658      0.098      -0.016       0.186
==============================================================================
Omnibus:                       83.747   Durbin-Watson:                   1.918
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              317.985
Skew:                          -0.271   Prob(JB):                     8.92e-70
Kurtosis:                       5.597   Cond. No.                     8.77e+03
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 8.77e+03. This might indicate that there are
strong multicollinearity or other numerical problems.


### Example 13.3. Effect of a Garbage Incinerator’s Location on Housing Prices¶

In [7]:
df = dataWoo("kielmc")
garb81_reg = smf.ols('rprice ~ nearinc', data=df[(df['year']==1981)]).fit()
garb78_reg = smf.ols('rprice ~ nearinc', data=df[(df['year']==1978)]).fit()

print(summary_col([garb81_reg, garb78_reg],stars=True,float_format='%0.3f',
model_names=['grab81\n(b/se)','grab78\n(b/se)'],
info_dict={'N':lambda x: "{0:d}".format(int(x.nobs)),
'R2':lambda x: "{:.3f}".format(x.rsquared),

=====================================
grab81        grab78
(b/se)        (b/se)
-------------------------------------
Intercept 101307.514*** 82517.228***
(3093.027)    (2653.790)
nearinc   -30688.274*** -18824.370***
(5827.709)    (4744.594)
N         142           179
R2        0.165         0.082
=====================================
Standard errors in parentheses.
* p<.1, ** p<.05, ***p<.01


### Table 13.2 Effects of Incinerator Location on Housing Prices (rprice)¶

In [8]:
One = smf.ols('rprice ~ y81 + nearinc + y81nrinc', data=df).fit()
Two = smf.ols('rprice ~ y81 + nearinc + y81nrinc + age + agesq', data=df).fit()
Three = smf.ols('rprice ~ y81 + nearinc + y81nrinc + age + agesq + intst + land + area + rooms + baths', data=df).fit()

print(summary_col([One, Two, Three],stars=True,float_format='%0.3f',
model_names=['One\n(b/se)','Two\n(b/se)', 'Three\n(b/se)'],
info_dict={'N':lambda x: "{0:d}".format(int(x.nobs)),
'R2':lambda x: "{:.3f}".format(x.rsquared),

===================================================
One           Two          Three
(b/se)        (b/se)        (b/se)
---------------------------------------------------
Intercept 82517.228***  89116.535***  13807.665
(2726.910)    (2406.051)    (11166.594)
age                     -1494.424***  -739.451***
(131.860)     (131.127)
agesq                   8.691***      3.453***
(0.848)       (0.813)
area                                  18.086***
(2.306)
baths                                 6977.317***
(2581.321)
intst                                 -0.539***
(0.196)
land                                  0.141***
(0.031)
nearinc   -18824.370*** 9397.936*     3780.337
(4875.322)    (4812.222)    (4453.415)
rooms                                 3304.227**
(1661.248)
y81       18790.286***  21321.042***  13928.476***
(4050.065)    (3443.631)    (2798.747)
y81nrinc  -11863.903    -21920.270*** -14177.934***
(7456.646)    (6359.745)    (4987.267)
N         321           321           321
R2        0.174         0.414         0.660
===================================================
Standard errors in parentheses.
* p<.1, ** p<.05, ***p<.01

In [9]:
lOne = smf.ols('lprice ~ y81 + nearinc + y81nrinc', data=df).fit()
lThree = smf.ols('lprice ~ y81 + nearinc + y81nrinc + age + agesq + lintst + lland + larea + rooms + baths', data=df).fit()

print(summary_col([lOne, lThree],stars=True,float_format='%0.3f',
model_names=['lOne\n(b/se)', 'lThree\n(b/se)'],
info_dict={'N':lambda x: "{0:d}".format(int(x.nobs)),
'R2':lambda x: "{:.3f}".format(x.rsquared),

=============================
lOne     lThree
(b/se)    (b/se)
-----------------------------
Intercept 11.285*** 7.652***
(0.031)   (0.416)
age                 -0.008***
(0.001)
agesq               0.000***
(0.000)
baths               0.094***
(0.028)
larea               0.351***
(0.051)
lintst              -0.061*
(0.032)
lland               0.100***
(0.024)
nearinc   -0.340*** 0.032
(0.055)   (0.047)
rooms               0.047***
(0.017)
y81       0.457***  0.426***
(0.045)   (0.028)
y81nrinc  -0.063    -0.132**
(0.083)   (0.052)
N         321       321
R2        0.409     0.790
=============================
Standard errors in
parentheses.
* p<.1, ** p<.05, ***p<.01


### Example 13.4. Effect of Worker Compensation Laws on Weeks out of Work¶

In [10]:
df = dataWoo("injury")
print(smf.ols('ldurat~ afchnge + highearn + afhigh', data=df[(df['ky']==1)]).fit().summary())

                            OLS Regression Results
==============================================================================
Dep. Variable:                 ldurat   R-squared:                       0.021
Method:                 Least Squares   F-statistic:                     39.54
Date:                Fri, 24 Apr 2020   Prob (F-statistic):           2.81e-25
Time:                        18:14:38   Log-Likelihood:                -9322.0
No. Observations:                5626   AIC:                         1.865e+04
Df Residuals:                    5622   BIC:                         1.868e+04
Df Model:                           3
Covariance Type:            nonrobust
==============================================================================
coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      1.1256      0.031     36.621      0.000       1.065       1.186
afchnge        0.0077      0.045      0.171      0.864      -0.080       0.095
highearn       0.2565      0.047      5.406      0.000       0.163       0.349
afhigh         0.1906      0.069      2.782      0.005       0.056       0.325
==============================================================================
Omnibus:                       29.931   Durbin-Watson:                   1.905
Prob(Omnibus):                  0.000   Jarque-Bera (JB):               41.672
Skew:                           0.037   Prob(JB):                     8.93e-10
Kurtosis:                       3.415   Cond. No.                         6.38
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.


### Example 13.5. Sleeping versus Working¶

In [11]:
print(smf.ols('cslpnap ~ ctotwrk + ceduc + cmarr + cyngkid + cgdhlth', data=dataWoo("slp75_81")).fit().summary())

                            OLS Regression Results
==============================================================================
Dep. Variable:                cslpnap   R-squared:                       0.150
Method:                 Least Squares   F-statistic:                     8.191
Date:                Fri, 24 Apr 2020   Prob (F-statistic):           3.83e-07
Time:                        18:14:38   Log-Likelihood:                -1864.4
No. Observations:                 239   AIC:                             3741.
Df Residuals:                     233   BIC:                             3762.
Df Model:                           5
Covariance Type:            nonrobust
==============================================================================
coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept    -92.6340     45.866     -2.020      0.045    -182.999      -2.269
ctotwrk       -0.2267      0.036     -6.287      0.000      -0.298      -0.156
ceduc         -0.0245     48.759     -0.001      1.000     -96.090      96.041
cmarr        104.2139     92.855      1.122      0.263     -78.729     287.157
cyngkid       94.6654     87.653      1.080      0.281     -78.027     267.358
cgdhlth       87.5778     76.599      1.143      0.254     -63.338     238.493
==============================================================================
Omnibus:                       31.927   Durbin-Watson:                   1.890
Prob(Omnibus):                  0.000   Jarque-Bera (JB):               57.378
Skew:                          -0.719   Prob(JB):                     3.47e-13
Kurtosis:                       4.922   Cond. No.                     2.72e+03
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 2.72e+03. This might indicate that there are
strong multicollinearity or other numerical problems.


### Distributed Lag of Crime Rate on Clear-Up Rate¶

In [12]:
print(smf.ols('clcrime ~ cclrprc1 + cclrprc2', data=dataWoo("crime3")).fit().summary())

                            OLS Regression Results
==============================================================================
Dep. Variable:                clcrime   R-squared:                       0.193
Method:                 Least Squares   F-statistic:                     5.992
Date:                Fri, 24 Apr 2020   Prob (F-statistic):            0.00465
Time:                        18:14:38   Log-Likelihood:                -17.194
No. Observations:                  53   AIC:                             40.39
Df Residuals:                      50   BIC:                             46.30
Df Model:                           2
Covariance Type:            nonrobust
==============================================================================
coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.0857      0.064      1.343      0.185      -0.042       0.214
cclrprc1      -0.0040      0.005     -0.858      0.395      -0.014       0.005
cclrprc2      -0.0132      0.005     -2.540      0.014      -0.024      -0.003
==============================================================================
Omnibus:                        3.032   Durbin-Watson:                   2.203
Prob(Omnibus):                  0.220   Jarque-Bera (JB):                2.071
Skew:                          -0.344   Prob(JB):                        0.355
Kurtosis:                       3.681   Cond. No.                         23.6
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.


### Example 13.7. Effect of Drunk Driving Laws on Traffic Fatalities¶

In [13]:
ezon_reg =smf.ols('guclms ~ d82 + d83 + d84 + d85 + d86 + d87 + d88 + cez', data=dataWoo("ezunem")).fit()
print(ezon_reg.summary())

                            OLS Regression Results
==============================================================================
Dep. Variable:                 guclms   R-squared:                       0.623
Method:                 Least Squares   F-statistic:                     34.50
Date:                Fri, 24 Apr 2020   Prob (F-statistic):           1.08e-31
Time:                        18:14:38   Log-Likelihood:                 24.553
No. Observations:                 176   AIC:                            -31.11
Df Residuals:                     167   BIC:                            -2.573
Df Model:                           8
Covariance Type:            nonrobust
==============================================================================
coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -0.3216      0.046     -6.982      0.000      -0.413      -0.231
d82            0.7788      0.065     11.954      0.000       0.650       0.907
d83           -0.0331      0.065     -0.508      0.612      -0.162       0.095
d84           -0.0171      0.069     -0.250      0.803      -0.152       0.118
d85            0.3231      0.067      4.845      0.000       0.191       0.455
d86            0.2922      0.065      4.485      0.000       0.164       0.421
d87            0.0539      0.065      0.828      0.409      -0.075       0.183
d88           -0.0171      0.065     -0.262      0.794      -0.146       0.112
cez           -0.1819      0.078     -2.326      0.021      -0.336      -0.028
==============================================================================
Omnibus:                        0.858   Durbin-Watson:                   2.370
Prob(Omnibus):                  0.651   Jarque-Bera (JB):                0.871
Skew:                           0.166   Prob(JB):                        0.647
Kurtosis:                       2.905   Cond. No.                         8.96
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

In [14]:
cez = (np.exp(-.1819) - 1) * 100
cez

Out[14]:
-16.631529528207743
In [15]:
bptest = sms.diagnostic.het_breuschpagan(ezon_reg.resid, ezon_reg.model.exog)
bptest2 = pd.DataFrame({'Chi-Sq':[bptest[0]],
'Prob>Chi-Sq':[bptest[1]]})
print(bptest2)

     Chi-Sq  Prob>Chi-Sq
0  6.913966     0.545943


### Example 13.9. County Crime Rates in North Carolina¶

In [16]:
df = dataWoo("crime4")
hetrosced_r =smf.ols('clcrmrte ~  d83 + d84 + d85 + d86 + d87 + clprbarr + clprbcon + clprbpri + clavgsen + clpolpc + 1', data=df).fit()
robust_r =smf.ols('clcrmrte ~  d83 + d84 + d85 + d86 + d87 + clprbarr + clprbcon + clprbpri + clavgsen + clpolpc + 1', data=df).fit(cov_type='HC1')

print(summary_col([hetrosced_r, robust_r],stars=True,float_format='%0.3f',
model_names=['Hetrosced\n(b/se)', 'Robust\n(b/se)'],
info_dict={'N':lambda x: "{0:d}".format(int(x.nobs)),
'R2':lambda x: "{:.3f}".format(x.rsquared),

=============================
Hetrosced   Robust
(b/se)    (b/se)
-----------------------------
Intercept 0.008     0.008
(0.017)   (0.015)
d83       -0.100*** -0.100***
(0.024)   (0.022)
d84       -0.048**  -0.048**
(0.024)   (0.020)
d85       -0.005    -0.005
(0.023)   (0.024)
d86       0.028     0.028
(0.024)   (0.021)
d87       0.041*    0.041*
(0.024)   (0.024)
clprbarr  -0.327*** -0.327***
(0.030)   (0.051)
clprbcon  -0.238*** -0.238***
(0.018)   (0.031)
clprbpri  -0.165*** -0.165***
(0.026)   (0.035)
clavgsen  -0.022    -0.022
(0.022)   (0.025)
clpolpc   0.398***  0.398***
(0.027)   (0.076)
N         540       540
R2        0.433     0.433
=============================
Standard errors in
parentheses.
* p<.1, ** p<.05, ***p<.01

In [17]:
bptest = sms.diagnostic.het_breuschpagan(hetrosced_r.resid, hetrosced_r.model.exog)
bptest2 = pd.DataFrame({'Chi-Sq':[bptest[0]],
'Prob>Chi-Sq':[bptest[1]]})
print(bptest2)

      Chi-Sq  Prob>Chi-Sq
0  10.929708     0.363021