Python for Introductory Econometrics: Chap 9

Python for Introductory Econometrics

Chapter 9. More on Specification and Data Issues

https://www.solomonegash.com/ | April 20, 2020

In [1]:
import numpy as np
import pandas as pd
import scipy as sp
import scipy.stats as ss

import statsmodels
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.iolib.summary2 import summary_col

from wooldridge import *

Example 9.1. Economic Model of Crime

In [2]:
df = dataWoo('crime1')
In [3]:
crime_hetr_r = smf.ols('narr86  ~ pcnv + avgsen + tottime + ptime86 + qemp86 + inc86 + black + hispan + 1', data=df).fit()
crime_hetr = smf.ols('narr86  ~ pcnv + avgsen + tottime + ptime86 + qemp86 + inc86 + black + hispan + pcnvsq + pt86sq + inc86sq + 1', data=df).fit()
crime_robust = smf.ols('narr86  ~ pcnv + avgsen + tottime + ptime86 + qemp86 + inc86 + black + hispan + pcnvsq + pt86sq + inc86sq + 1', data=df).fit(cov_type='HC1')
print(summary_col([crime_hetr_r, crime_hetr, crime_robust],stars=True,float_format='%0.3f',
                  model_names=['Hetrosced_r\n(b/se)','Hetrosced\n(b/se)','Robust\n(b/se)'],
                 info_dict={'N':lambda x: "{0:d}".format(int(x.nobs)),
                             'R2':lambda x: "{:.3f}".format(x.rsquared)}))
=========================================
          Hetrosced_r Hetrosced   Robust 
             (b/se)     (b/se)    (b/se) 
-----------------------------------------
Intercept 0.569***    0.505***  0.505*** 
          (0.036)     (0.037)   (0.039)  
avgsen    -0.011      -0.017    -0.017   
          (0.012)     (0.012)   (0.014)  
black     0.327***    0.292***  0.292*** 
          (0.045)     (0.045)   (0.058)  
hispan    0.194***    0.164***  0.164*** 
          (0.040)     (0.039)   (0.040)  
inc86     -0.001***   -0.003*** -0.003***
          (0.000)     (0.001)   (0.001)  
inc86sq               0.000***  0.000*** 
                      (0.000)   (0.000)  
pcnv      -0.133***   0.553***  0.553*** 
          (0.040)     (0.154)   (0.170)  
pcnvsq                -0.730*** -0.730***
                      (0.156)   (0.172)  
pt86sq                -0.030*** -0.030***
                      (0.004)   (0.006)  
ptime86   -0.041***   0.287***  0.287*** 
          (0.009)     (0.044)   (0.069)  
qemp86    -0.051***   -0.014    -0.014   
          (0.014)     (0.017)   (0.017)  
tottime   0.012       0.012     0.012    
          (0.009)     (0.009)   (0.013)  
N         2725        2725      2725     
R2        0.072       0.103     0.103    
=========================================
Standard errors in parentheses.
* p<.1, ** p<.05, ***p<.01

Example 9.2. Housing Price Equation

In [4]:
df = dataWoo("hprice1")

hprice_reg = smf.ols('price ~ lotsize + sqrft + bdrms', data=df).fit()
prhat2=hprice_reg.predict()**2
prhat3=hprice_reg.predict()**3
hprice_reg_pol = smf.ols('price ~ lotsize + sqrft + bdrms + prhat2 + prhat3 + 1 ', data=df).fit()

print(summary_col([hprice_reg, hprice_reg_pol],stars=True,float_format='%0.3f',
                  model_names=['Linear\n(b/se)','Polynomial\n(b/se)'],
                 info_dict={'N':lambda x: "{0:d}".format(int(x.nobs)),
                             'R2':lambda x: "{:.3f}".format(x.rsquared),
                           'Adj.R2':lambda x: "{:.3f}".format(x.rsquared_adj)}))
=============================
           Linear  Polynomial
           (b/se)    (b/se)  
-----------------------------
Intercept -21.770  166.097   
          (29.475) (317.433) 
bdrms     13.853   2.175     
          (9.010)  (33.888)  
lotsize   0.002*** 0.000     
          (0.001)  (0.005)   
prhat2             0.000     
                   (0.007)   
prhat3             0.000     
                   (0.000)   
sqrft     0.123*** 0.018     
          (0.013)  (0.299)   
N         88       88        
R2        0.672    0.706     
Adj.R2    0.661    0.688     
=============================
Standard errors in
parentheses.
* p<.1, ** p<.05, ***p<.01
In [5]:
hypotheses = '(prhat2  = prhat3 = 0)'
f_test = hprice_reg_pol.f_test(hypotheses)
print(f_test)
<F test: F=array([[4.66820553]]), p=0.012021711442913613, df_denom=82, df_num=2>

Logarithmic form

In [6]:
lhprice_reg = smf.ols('lprice ~ llotsize + lsqrft + bdrms', data=df).fit()
lprhat2=lhprice_reg.predict()**2
lprhat3=lhprice_reg.predict()**3
lhprice_reg_pol = smf.ols('lprice ~ llotsize + lsqrft + bdrms + lprhat2 + lprhat3 + 1 ', data=df).fit()

print(summary_col([lhprice_reg, lhprice_reg_pol],stars=True,float_format='%0.3f',
                  model_names=['Linear_L\n(b/se)','Polynomial_L\n(b/se)'],
                 info_dict={'N':lambda x: "{0:d}".format(int(x.nobs)),
                             'R2':lambda x: "{:.3f}".format(x.rsquared), 
                           'Adj.R2':lambda x: "{:.3f}".format(x.rsquared_adj)}))
===============================
          Linear_L Polynomial_L
           (b/se)     (b/se)   
-------------------------------
Intercept -1.297** 87.886      
          (0.651)  (240.974)   
bdrms     0.037    -0.925      
          (0.028)  (2.770)     
llotsize  0.168*** -4.181      
          (0.038)  (12.595)    
lprhat2            3.910       
                   (13.014)    
lprhat3            -0.193      
                   (0.752)     
lsqrft    0.700*** -17.349     
          (0.093)  (52.490)    
N         88       88          
R2        0.643    0.664       
Adj.R2    0.630    0.643       
===============================
Standard errors in parentheses.
* p<.1, ** p<.05, ***p<.01
In [7]:
hypotheses = '(lprhat2  = lprhat3 = 0)'
f_test = lhprice_reg_pol.f_test(hypotheses)
print(f_test)
<F test: F=array([[2.5650462]]), p=0.08307546624251436, df_denom=82, df_num=2>

Example 9.3. IQ as a Proxy for Ability

In [8]:
df = dataWoo("wage2")
IQA = smf.ols('lwage ~ educ + exper + tenure + married + south + urban + black + 1', data=df).fit()
IQB = smf.ols('lwage ~ educ + exper + tenure + married + south + urban + black + IQ + 1', data=df).fit()
IQC = smf.ols('lwage ~ educ + exper + tenure + married + south + urban + black + IQ + educ:IQ + 1', data=df).fit()

print(summary_col([IQA, IQB, IQC],stars=True,float_format='%0.3f',
                  model_names=['IQA\n(b/se)','IQB\n(b/se)','IQC\n(b/se)'],
                 info_dict={'N':lambda x: "{0:d}".format(int(x.nobs)),
                             'R2':lambda x: "{:.3f}".format(x.rsquared), 
                           'Adj.R2':lambda x: "{:.3f}".format(x.rsquared_adj)}))
=======================================
             IQA       IQB       IQC   
            (b/se)    (b/se)    (b/se) 
---------------------------------------
IQ                  0.004***  -0.001   
                    (0.001)   (0.005)  
Intercept 5.395***  5.176***  5.648*** 
          (0.113)   (0.128)   (0.546)  
black     -0.188*** -0.143*** -0.147***
          (0.038)   (0.039)   (0.040)  
educ      0.065***  0.054***  0.018    
          (0.006)   (0.007)   (0.041)  
educ:IQ                       0.000    
                              (0.000)  
exper     0.014***  0.014***  0.014*** 
          (0.003)   (0.003)   (0.003)  
married   0.199***  0.200***  0.201*** 
          (0.039)   (0.039)   (0.039)  
south     -0.091*** -0.080*** -0.080***
          (0.026)   (0.026)   (0.026)  
tenure    0.012***  0.011***  0.011*** 
          (0.002)   (0.002)   (0.002)  
urban     0.184***  0.182***  0.184*** 
          (0.027)   (0.027)   (0.027)  
N         935       935       935      
R2        0.253     0.263     0.263    
Adj.R2    0.247     0.256     0.256    
=======================================
Standard errors in parentheses.
* p<.1, ** p<.05, ***p<.01

Example 9.4. City Crime Rates

In [9]:
df = dataWoo("crime2")
df = df[(df['year']==87)]
crimeA = smf.ols('lcrmrte ~ unem + llawexpc + 1', data=df).fit()
crimeB = smf.ols('lcrmrte ~ unem + llawexpc + lcrmrt_1 + 1', data=df).fit()

print(summary_col([crimeA, crimeB],stars=True,float_format='%0.3f',
                  model_names=['crimeA\n(b/se)','crimeA\n(b/se)'],
                 info_dict={'N':lambda x: "{0:d}".format(int(x.nobs)),
                             'R2':lambda x: "{:.3f}".format(x.rsquared), 
                           'Adj.R2':lambda x: "{:.3f}".format(x.rsquared_adj)}))
============================
           crimeA    crimeA 
          (b/se) I (b/se) II
----------------------------
Intercept 3.343**  0.076    
          (1.251)  (0.821)  
lcrmrt_1           1.194*** 
                   (0.132)  
llawexpc  0.203    -0.140   
          (0.173)  (0.109)  
unem      -0.029   0.009    
          (0.032)  (0.020)  
N         46       46       
R2        0.057    0.680    
Adj.R2    0.013    0.657    
============================
Standard errors in
parentheses.
* p<.1, ** p<.05, ***p<.01

Example 9.8. R&D Intensity and Firm Size

In [10]:
df = dataWoo("rdchem")
RD1 = smf.ols('rdintens ~ sales + profmarg + 1', data=df).fit()
RD2 = smf.ols('rdintens ~ sales + profmarg + 1', data=df[(df['sales']<30000)]).fit()

print(summary_col([RD1, RD2],stars=True,float_format='%0.3f',
                  model_names=['RD1\n(b/se)','RD2\n(b/se)'],
                 info_dict={'N':lambda x: "{0:d}".format(int(x.nobs)),
                             'R2':lambda x: "{:.3f}".format(x.rsquared), 
                           'Adj.R2':lambda x: "{:.3f}".format(x.rsquared_adj)}))
===========================
            RD1      RD2   
           (b/se)   (b/se) 
---------------------------
Intercept 2.625*** 2.297***
          (0.586)  (0.592) 
sales     0.000    0.000** 
          (0.000)  (0.000) 
profmarg  0.045    0.048   
          (0.046)  (0.044) 
N         32       31      
R2        0.076    0.173   
Adj.R2    0.012    0.114   
===========================
Standard errors in
parentheses.
* p<.1, ** p<.05, ***p<.01

Example 9.9. R&D Intensity

In [11]:
df = dataWoo("rdchem")
lRD1 = smf.ols('lrd ~ lsales + profmarg + 1', data=df).fit()
lRD2 = smf.ols('lrd ~ lsales + profmarg + 1', data=df[(df['sales']<30000)]).fit()

print(summary_col([lRD1, lRD2],stars=True,float_format='%0.3f',
                  model_names=['RD1_Log\n(b/se)','RD2_Log\n(b/se)'],
                 info_dict={'N':lambda x: "{0:d}".format(int(x.nobs)),
                             'R2':lambda x: "{:.3f}".format(x.rsquared), 
                           'Adj.R2':lambda x: "{:.3f}".format(x.rsquared_adj)}))
=============================
           RD1_Log   RD2_Log 
            (b/se)    (b/se) 
-----------------------------
Intercept -4.378*** -4.404***
          (0.468)   (0.511)  
lsales    1.084***  1.088*** 
          (0.060)   (0.067)  
profmarg  0.022     0.022    
          (0.013)   (0.013)  
N         32        31       
R2        0.918     0.904    
Adj.R2    0.912     0.897    
=============================
Standard errors in
parentheses.
* p<.1, ** p<.05, ***p<.01

Example 9.10. State Infant Mortality Rates

In [12]:
df = dataWoo("infmrt")
df = df[(df['year']==1990)]
infant1  = smf.ols('infmort ~ lpcinc + lphysic + lpopul + 1', data=df).fit()
infant2  = smf.ols('infmort ~ lpcinc + lphysic + lpopul + 1 ', data=df[(df['DC']==0)]).fit()

print(summary_col([infant1, infant2],stars=True,float_format='%0.3f',
                  model_names=['Infmort1\n(b/se)','Infmort2\n(b/se)'],
                 info_dict={'N':lambda x: "{0:d}".format(int(x.nobs)),
                             'R2':lambda x: "{:.3f}".format(x.rsquared), 
                           'Adj.R2':lambda x: "{:.3f}".format(x.rsquared_adj)}))
===========================
          Infmort1 Infmort2
           (b/se)   (b/se) 
---------------------------
Intercept 33.859   23.955* 
          (20.428) (12.419)
lpcinc    -4.685*  -0.567  
          (2.604)  (1.641) 
lphysic   4.153*** -2.742**
          (1.513)  (1.191) 
lpopul    -0.088   0.629***
          (0.287)  (0.191) 
N         51       50      
R2        0.139    0.273   
Adj.R2    0.084    0.226   
===========================
Standard errors in
parentheses.
* p<.1, ** p<.05, ***p<.01