Python for Introductory Econometrics: Chap 13

Python for Introductory Econometrics

Chapter 13. Pooling Cross Sections across Time

https://www.solomonegash.com/

In [1]:
import numpy as np
import pandas as pd
import scipy as sp

import statsmodels
import statsmodels.api as sm
import statsmodels.stats.api as sms 
import statsmodels.formula.api as smf
from matplotlib import pyplot as plt
from statsmodels.iolib.summary2 import summary_col

from wooldridge import *

Example 13.1. Women’s Fertility over Time

In [2]:
df = dataWoo('fertil1')
In [3]:
fert_reg = smf.ols('kids ~ educ + age + agesq + black + east + northcen + west + farm + othrural + town + smcity + y74 + y76 + y78 + y80 + y82 + y84', data=df).fit()
print(fert_reg.summary())
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                   kids   R-squared:                       0.130
Model:                            OLS   Adj. R-squared:                  0.116
Method:                 Least Squares   F-statistic:                     9.723
Date:                Fri, 24 Apr 2020   Prob (F-statistic):           2.42e-24
Time:                        18:14:37   Log-Likelihood:                -2091.2
No. Observations:                1129   AIC:                             4218.
Df Residuals:                    1111   BIC:                             4309.
Df Model:                          17                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -7.7425      3.052     -2.537      0.011     -13.730      -1.755
educ          -0.1284      0.018     -6.999      0.000      -0.164      -0.092
age            0.5321      0.138      3.845      0.000       0.261       0.804
agesq         -0.0058      0.002     -3.710      0.000      -0.009      -0.003
black          1.0757      0.174      6.198      0.000       0.735       1.416
east           0.2173      0.133      1.637      0.102      -0.043       0.478
northcen       0.3631      0.121      3.004      0.003       0.126       0.600
west           0.1976      0.167      1.184      0.237      -0.130       0.525
farm          -0.0526      0.147     -0.357      0.721      -0.341       0.236
othrural      -0.1629      0.175     -0.928      0.353      -0.507       0.181
town           0.0844      0.125      0.677      0.498      -0.160       0.329
smcity         0.2119      0.160      1.322      0.187      -0.103       0.526
y74            0.2682      0.173      1.553      0.121      -0.071       0.607
y76           -0.0974      0.179     -0.544      0.587      -0.449       0.254
y78           -0.0687      0.182     -0.378      0.706      -0.425       0.288
y80           -0.0713      0.183     -0.390      0.697      -0.430       0.287
y82           -0.5225      0.172     -3.030      0.003      -0.861      -0.184
y84           -0.5452      0.175     -3.124      0.002      -0.888      -0.203
==============================================================================
Omnibus:                        9.775   Durbin-Watson:                   2.011
Prob(Omnibus):                  0.008   Jarque-Bera (JB):                9.966
Skew:                           0.227   Prob(JB):                      0.00685
Kurtosis:                       2.920   Cond. No.                     1.32e+05
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 1.32e+05. This might indicate that there are
strong multicollinearity or other numerical problems.
In [4]:
hypotheses = '(y74 =y76 = y78 = y80 = y82 = y84 = 0)'
f_test = fert_reg.f_test(hypotheses)
print(f_test)
<F test: F=array([[5.86950867]]), p=4.85518986757229e-06, df_denom=1111, df_num=6>
In [5]:
bptest = sms.diagnostic.het_breuschpagan(fert_reg.resid, fert_reg.model.exog)
df2 = pd.DataFrame({'Chi-Sq':[bptest[0]],
                   'Prob>Chi-Sq':[bptest[1]]})
print(df2)
      Chi-Sq  Prob>Chi-Sq
0  55.315373     0.000006

Example 13.2. Changes in the Return to Education and the Gender Wage Gap

In [6]:
print(smf.ols('lwage ~ y85 + educ + y85educ + exper + expersq + union + female + y85fem', data=dataWoo("cps78_85")).fit().summary())
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                  lwage   R-squared:                       0.426
Model:                            OLS   Adj. R-squared:                  0.422
Method:                 Least Squares   F-statistic:                     99.80
Date:                Fri, 24 Apr 2020   Prob (F-statistic):          4.46e-124
Time:                        18:14:37   Log-Likelihood:                -574.24
No. Observations:                1084   AIC:                             1166.
Df Residuals:                    1075   BIC:                             1211.
Df Model:                           8                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.4589      0.093      4.911      0.000       0.276       0.642
y85            0.1178      0.124      0.952      0.341      -0.125       0.361
educ           0.0747      0.007     11.192      0.000       0.062       0.088
y85educ        0.0185      0.009      1.974      0.049       0.000       0.037
exper          0.0296      0.004      8.293      0.000       0.023       0.037
expersq       -0.0004   7.75e-05     -5.151      0.000      -0.001      -0.000
union          0.2021      0.030      6.672      0.000       0.143       0.262
female        -0.3167      0.037     -8.648      0.000      -0.389      -0.245
y85fem         0.0851      0.051      1.658      0.098      -0.016       0.186
==============================================================================
Omnibus:                       83.747   Durbin-Watson:                   1.918
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              317.985
Skew:                          -0.271   Prob(JB):                     8.92e-70
Kurtosis:                       5.597   Cond. No.                     8.77e+03
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 8.77e+03. This might indicate that there are
strong multicollinearity or other numerical problems.

Example 13.3. Effect of a Garbage Incinerator’s Location on Housing Prices

In [7]:
df = dataWoo("kielmc")
garb81_reg = smf.ols('rprice ~ nearinc', data=df[(df['year']==1981)]).fit()
garb78_reg = smf.ols('rprice ~ nearinc', data=df[(df['year']==1978)]).fit()

print(summary_col([garb81_reg, garb78_reg],stars=True,float_format='%0.3f',
                  model_names=['grab81\n(b/se)','grab78\n(b/se)'],
                 info_dict={'N':lambda x: "{0:d}".format(int(x.nobs)),
                             'R2':lambda x: "{:.3f}".format(x.rsquared), 
                           'Adj.R2':lambda x: "{:.3f}".format(x.rsquared_adj)}))
=====================================
              grab81        grab78   
              (b/se)        (b/se)   
-------------------------------------
Intercept 101307.514*** 82517.228*** 
          (3093.027)    (2653.790)   
nearinc   -30688.274*** -18824.370***
          (5827.709)    (4744.594)   
N         142           179          
R2        0.165         0.082        
Adj.R2    0.159         0.076        
=====================================
Standard errors in parentheses.
* p<.1, ** p<.05, ***p<.01

Table 13.2 Effects of Incinerator Location on Housing Prices (rprice)

In [8]:
One = smf.ols('rprice ~ y81 + nearinc + y81nrinc', data=df).fit()
Two = smf.ols('rprice ~ y81 + nearinc + y81nrinc + age + agesq', data=df).fit()
Three = smf.ols('rprice ~ y81 + nearinc + y81nrinc + age + agesq + intst + land + area + rooms + baths', data=df).fit()

print(summary_col([One, Two, Three],stars=True,float_format='%0.3f',
                  model_names=['One\n(b/se)','Two\n(b/se)', 'Three\n(b/se)'],
                 info_dict={'N':lambda x: "{0:d}".format(int(x.nobs)),
                             'R2':lambda x: "{:.3f}".format(x.rsquared), 
                           'Adj.R2':lambda x: "{:.3f}".format(x.rsquared_adj)}))
===================================================
               One           Two          Three    
              (b/se)        (b/se)        (b/se)   
---------------------------------------------------
Intercept 82517.228***  89116.535***  13807.665    
          (2726.910)    (2406.051)    (11166.594)  
age                     -1494.424***  -739.451***  
                        (131.860)     (131.127)    
agesq                   8.691***      3.453***     
                        (0.848)       (0.813)      
area                                  18.086***    
                                      (2.306)      
baths                                 6977.317***  
                                      (2581.321)   
intst                                 -0.539***    
                                      (0.196)      
land                                  0.141***     
                                      (0.031)      
nearinc   -18824.370*** 9397.936*     3780.337     
          (4875.322)    (4812.222)    (4453.415)   
rooms                                 3304.227**   
                                      (1661.248)   
y81       18790.286***  21321.042***  13928.476*** 
          (4050.065)    (3443.631)    (2798.747)   
y81nrinc  -11863.903    -21920.270*** -14177.934***
          (7456.646)    (6359.745)    (4987.267)   
N         321           321           321          
R2        0.174         0.414         0.660        
Adj.R2    0.166         0.405         0.649        
===================================================
Standard errors in parentheses.
* p<.1, ** p<.05, ***p<.01
In [9]:
lOne = smf.ols('lprice ~ y81 + nearinc + y81nrinc', data=df).fit()
lThree = smf.ols('lprice ~ y81 + nearinc + y81nrinc + age + agesq + lintst + lland + larea + rooms + baths', data=df).fit()

print(summary_col([lOne, lThree],stars=True,float_format='%0.3f',
                  model_names=['lOne\n(b/se)', 'lThree\n(b/se)'],
                 info_dict={'N':lambda x: "{0:d}".format(int(x.nobs)),
                             'R2':lambda x: "{:.3f}".format(x.rsquared), 
                           'Adj.R2':lambda x: "{:.3f}".format(x.rsquared_adj)}))
=============================
             lOne     lThree 
            (b/se)    (b/se) 
-----------------------------
Intercept 11.285*** 7.652*** 
          (0.031)   (0.416)  
age                 -0.008***
                    (0.001)  
agesq               0.000*** 
                    (0.000)  
baths               0.094*** 
                    (0.028)  
larea               0.351*** 
                    (0.051)  
lintst              -0.061*  
                    (0.032)  
lland               0.100*** 
                    (0.024)  
nearinc   -0.340*** 0.032    
          (0.055)   (0.047)  
rooms               0.047*** 
                    (0.017)  
y81       0.457***  0.426*** 
          (0.045)   (0.028)  
y81nrinc  -0.063    -0.132** 
          (0.083)   (0.052)  
N         321       321      
R2        0.409     0.790    
Adj.R2    0.403     0.784    
=============================
Standard errors in
parentheses.
* p<.1, ** p<.05, ***p<.01

Example 13.4. Effect of Worker Compensation Laws on Weeks out of Work

In [10]:
df = dataWoo("injury")
print(smf.ols('ldurat~ afchnge + highearn + afhigh', data=df[(df['ky']==1)]).fit().summary())
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                 ldurat   R-squared:                       0.021
Model:                            OLS   Adj. R-squared:                  0.020
Method:                 Least Squares   F-statistic:                     39.54
Date:                Fri, 24 Apr 2020   Prob (F-statistic):           2.81e-25
Time:                        18:14:38   Log-Likelihood:                -9322.0
No. Observations:                5626   AIC:                         1.865e+04
Df Residuals:                    5622   BIC:                         1.868e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      1.1256      0.031     36.621      0.000       1.065       1.186
afchnge        0.0077      0.045      0.171      0.864      -0.080       0.095
highearn       0.2565      0.047      5.406      0.000       0.163       0.349
afhigh         0.1906      0.069      2.782      0.005       0.056       0.325
==============================================================================
Omnibus:                       29.931   Durbin-Watson:                   1.905
Prob(Omnibus):                  0.000   Jarque-Bera (JB):               41.672
Skew:                           0.037   Prob(JB):                     8.93e-10
Kurtosis:                       3.415   Cond. No.                         6.38
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

Example 13.5. Sleeping versus Working

In [11]:
print(smf.ols('cslpnap ~ ctotwrk + ceduc + cmarr + cyngkid + cgdhlth', data=dataWoo("slp75_81")).fit().summary())
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                cslpnap   R-squared:                       0.150
Model:                            OLS   Adj. R-squared:                  0.131
Method:                 Least Squares   F-statistic:                     8.191
Date:                Fri, 24 Apr 2020   Prob (F-statistic):           3.83e-07
Time:                        18:14:38   Log-Likelihood:                -1864.4
No. Observations:                 239   AIC:                             3741.
Df Residuals:                     233   BIC:                             3762.
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept    -92.6340     45.866     -2.020      0.045    -182.999      -2.269
ctotwrk       -0.2267      0.036     -6.287      0.000      -0.298      -0.156
ceduc         -0.0245     48.759     -0.001      1.000     -96.090      96.041
cmarr        104.2139     92.855      1.122      0.263     -78.729     287.157
cyngkid       94.6654     87.653      1.080      0.281     -78.027     267.358
cgdhlth       87.5778     76.599      1.143      0.254     -63.338     238.493
==============================================================================
Omnibus:                       31.927   Durbin-Watson:                   1.890
Prob(Omnibus):                  0.000   Jarque-Bera (JB):               57.378
Skew:                          -0.719   Prob(JB):                     3.47e-13
Kurtosis:                       4.922   Cond. No.                     2.72e+03
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 2.72e+03. This might indicate that there are
strong multicollinearity or other numerical problems.

Distributed Lag of Crime Rate on Clear-Up Rate

In [12]:
print(smf.ols('clcrime ~ cclrprc1 + cclrprc2', data=dataWoo("crime3")).fit().summary())
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                clcrime   R-squared:                       0.193
Model:                            OLS   Adj. R-squared:                  0.161
Method:                 Least Squares   F-statistic:                     5.992
Date:                Fri, 24 Apr 2020   Prob (F-statistic):            0.00465
Time:                        18:14:38   Log-Likelihood:                -17.194
No. Observations:                  53   AIC:                             40.39
Df Residuals:                      50   BIC:                             46.30
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.0857      0.064      1.343      0.185      -0.042       0.214
cclrprc1      -0.0040      0.005     -0.858      0.395      -0.014       0.005
cclrprc2      -0.0132      0.005     -2.540      0.014      -0.024      -0.003
==============================================================================
Omnibus:                        3.032   Durbin-Watson:                   2.203
Prob(Omnibus):                  0.220   Jarque-Bera (JB):                2.071
Skew:                          -0.344   Prob(JB):                        0.355
Kurtosis:                       3.681   Cond. No.                         23.6
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

Example 13.7. Effect of Drunk Driving Laws on Traffic Fatalities

In [13]:
ezon_reg =smf.ols('guclms ~ d82 + d83 + d84 + d85 + d86 + d87 + d88 + cez', data=dataWoo("ezunem")).fit() 
print(ezon_reg.summary())
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                 guclms   R-squared:                       0.623
Model:                            OLS   Adj. R-squared:                  0.605
Method:                 Least Squares   F-statistic:                     34.50
Date:                Fri, 24 Apr 2020   Prob (F-statistic):           1.08e-31
Time:                        18:14:38   Log-Likelihood:                 24.553
No. Observations:                 176   AIC:                            -31.11
Df Residuals:                     167   BIC:                            -2.573
Df Model:                           8                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -0.3216      0.046     -6.982      0.000      -0.413      -0.231
d82            0.7788      0.065     11.954      0.000       0.650       0.907
d83           -0.0331      0.065     -0.508      0.612      -0.162       0.095
d84           -0.0171      0.069     -0.250      0.803      -0.152       0.118
d85            0.3231      0.067      4.845      0.000       0.191       0.455
d86            0.2922      0.065      4.485      0.000       0.164       0.421
d87            0.0539      0.065      0.828      0.409      -0.075       0.183
d88           -0.0171      0.065     -0.262      0.794      -0.146       0.112
cez           -0.1819      0.078     -2.326      0.021      -0.336      -0.028
==============================================================================
Omnibus:                        0.858   Durbin-Watson:                   2.370
Prob(Omnibus):                  0.651   Jarque-Bera (JB):                0.871
Skew:                           0.166   Prob(JB):                        0.647
Kurtosis:                       2.905   Cond. No.                         8.96
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
In [14]:
cez = (np.exp(-.1819) - 1) * 100
cez
Out[14]:
-16.631529528207743
In [15]:
bptest = sms.diagnostic.het_breuschpagan(ezon_reg.resid, ezon_reg.model.exog)
bptest2 = pd.DataFrame({'Chi-Sq':[bptest[0]],
                   'Prob>Chi-Sq':[bptest[1]]})
print(bptest2)
     Chi-Sq  Prob>Chi-Sq
0  6.913966     0.545943

Example 13.9. County Crime Rates in North Carolina

In [16]:
df = dataWoo("crime4")
hetrosced_r =smf.ols('clcrmrte ~  d83 + d84 + d85 + d86 + d87 + clprbarr + clprbcon + clprbpri + clavgsen + clpolpc + 1', data=df).fit()
robust_r =smf.ols('clcrmrte ~  d83 + d84 + d85 + d86 + d87 + clprbarr + clprbcon + clprbpri + clavgsen + clpolpc + 1', data=df).fit(cov_type='HC1')

print(summary_col([hetrosced_r, robust_r],stars=True,float_format='%0.3f',
                  model_names=['Hetrosced\n(b/se)', 'Robust\n(b/se)'],
                 info_dict={'N':lambda x: "{0:d}".format(int(x.nobs)),
                             'R2':lambda x: "{:.3f}".format(x.rsquared), 
                           'Adj.R2':lambda x: "{:.3f}".format(x.rsquared_adj)}))
=============================
          Hetrosced   Robust 
            (b/se)    (b/se) 
-----------------------------
Intercept 0.008     0.008    
          (0.017)   (0.015)  
d83       -0.100*** -0.100***
          (0.024)   (0.022)  
d84       -0.048**  -0.048** 
          (0.024)   (0.020)  
d85       -0.005    -0.005   
          (0.023)   (0.024)  
d86       0.028     0.028    
          (0.024)   (0.021)  
d87       0.041*    0.041*   
          (0.024)   (0.024)  
clprbarr  -0.327*** -0.327***
          (0.030)   (0.051)  
clprbcon  -0.238*** -0.238***
          (0.018)   (0.031)  
clprbpri  -0.165*** -0.165***
          (0.026)   (0.035)  
clavgsen  -0.022    -0.022   
          (0.022)   (0.025)  
clpolpc   0.398***  0.398*** 
          (0.027)   (0.076)  
N         540       540      
R2        0.433     0.433    
Adj.R2    0.422     0.422    
=============================
Standard errors in
parentheses.
* p<.1, ** p<.05, ***p<.01
In [17]:
bptest = sms.diagnostic.het_breuschpagan(hetrosced_r.resid, hetrosced_r.model.exog)
bptest2 = pd.DataFrame({'Chi-Sq':[bptest[0]],
                   'Prob>Chi-Sq':[bptest[1]]})
print(bptest2)
      Chi-Sq  Prob>Chi-Sq
0  10.929708     0.363021