Python for Introductory Econometrics

Chapter 8. Heteroskedasticity

https://www.solomonegash.com/

In [1]:
import numpy as np
import pandas as pd
import scipy as sp
import scipy.stats as ss

import statsmodels
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.iolib.summary2 import summary_col

from wooldridge import *

Example 8.1. Log wage equation with Heteroskedasticity

In [2]:
df=dataWoo('wage1')

df['marrmale'] = 0
df.loc[(df['female'] ==0) & (df['married'] == 1), 'marrmale'] = 1
df['marrfem'] = 0
df.loc[(df['female'] ==1) & (df['married'] == 1), 'marrfem'] = 1
df['singfem'] = 0
df.loc[(df['female'] ==1) & (df['married'] == 0), 'singfem'] = 1
In [3]:
wage_hetr = smf.ols('lwage ~ marrmale + marrfem + singfem + educ  + exper + expersq + tenure + tenursq + 1', data=df).fit()
wage_robust = smf.ols('lwage ~ marrmale + marrfem + singfem + educ  + exper + expersq + tenure + tenursq + 1', data=df).fit(cov_type='HC1')

print(summary_col([wage_hetr, wage_robust],stars=True,float_format='%0.3f',
                  model_names=['Hetroskedastic\n(b/se)','Robust\n(b/se)'],
                 info_dict={'N':lambda x: "{0:d}".format(int(x.nobs)),
                             'R2':lambda x: "{:.3f}".format(x.rsquared)}))
==================================
          Hetroskedastic   Robust 
              (b/se)       (b/se) 
----------------------------------
Intercept 0.321***       0.321*** 
          (0.100)        (0.109)  
marrmale  0.213***       0.213*** 
          (0.055)        (0.057)  
marrfem   -0.198***      -0.198***
          (0.058)        (0.059)  
singfem   -0.110**       -0.110*  
          (0.056)        (0.057)  
educ      0.079***       0.079*** 
          (0.007)        (0.007)  
exper     0.027***       0.027*** 
          (0.005)        (0.005)  
expersq   -0.001***      -0.001***
          (0.000)        (0.000)  
tenure    0.029***       0.029*** 
          (0.007)        (0.007)  
tenursq   -0.001**       -0.001** 
          (0.000)        (0.000)  
N         526            526      
R2        0.461          0.461    
==================================
Standard errors in parentheses.
* p<.1, ** p<.05, ***p<.01

Example 8.2. Heteroskedasticity-Robust F Statistic

In [4]:
df = dataWoo('gpa3')
df = df[(df['spring']==1)]

gpa_hetr = smf.ols('cumgpa  ~ sat + hsperc + tothrs + female + black + white + 1', data=df).fit()
gpa_robust = smf.ols('cumgpa  ~ sat + hsperc + tothrs + female + black + white + 1', data=df).fit(cov_type='HC1')

print(summary_col([gpa_hetr, gpa_robust],stars=True,float_format='%0.3f',
                  model_names=['Hetroskedastic\n(b/se)','Robust\n(b/se)'],
                 info_dict={'N':lambda x: "{0:d}".format(int(x.nobs)),
                             'R2':lambda x: "{:.3f}".format(x.rsquared)}))
==================================
          Hetroskedastic   Robust 
              (b/se)       (b/se) 
----------------------------------
Intercept 1.470***       1.470*** 
          (0.230)        (0.221)  
sat       0.001***       0.001*** 
          (0.000)        (0.000)  
hsperc    -0.009***      -0.009***
          (0.001)        (0.001)  
tothrs    0.003***       0.003*** 
          (0.001)        (0.001)  
female    0.303***       0.303*** 
          (0.059)        (0.059)  
black     -0.128         -0.128   
          (0.147)        (0.119)  
white     -0.059         -0.059   
          (0.141)        (0.111)  
N         366            366      
R2        0.401          0.401    
==================================
Standard errors in parentheses.
* p<.1, ** p<.05, ***p<.01

Example 8.3. Heteroskedasticity-Robust LM Statistic

In [5]:
df = dataWoo('crime1')
df['avgsensq'] = np.square(df['avgsen'])

crime_hetr = smf.ols('narr86 ~ pcnv + avgsen + avgsensq + ptime86 + qemp86 + inc86 + black + hispan + 1', data=df).fit()
crime_robust = smf.ols('narr86 ~ pcnv + avgsen + avgsensq + ptime86 + qemp86 + inc86 + black + hispan + 1', data=df).fit(cov_type='HC1')

print(summary_col([crime_hetr, crime_robust],stars=True,float_format='%0.3f',
                  model_names=['Hetroskedastic\n(b/se)','Robust\n(b/se)'],
                 info_dict={'N':lambda x: "{0:d}".format(int(x.nobs)),
                             'R2':lambda x: "{:.3f}".format(x.rsquared)}))
==================================
          Hetroskedastic   Robust 
              (b/se)       (b/se) 
----------------------------------
Intercept 0.567***       0.567*** 
          (0.036)        (0.040)  
pcnv      -0.136***      -0.136***
          (0.040)        (0.034)  
avgsen    0.018*         0.018*   
          (0.010)        (0.010)  
avgsensq  -0.001*        -0.001** 
          (0.000)        (0.000)  
ptime86   -0.039***      -0.039***
          (0.009)        (0.006)  
qemp86    -0.051***      -0.051***
          (0.014)        (0.014)  
inc86     -0.001***      -0.001***
          (0.000)        (0.000)  
black     0.325***       0.325*** 
          (0.045)        (0.059)  
hispan    0.193***       0.193*** 
          (0.040)        (0.040)  
N         2725           2725     
R2        0.073          0.073    
==================================
Standard errors in parentheses.
* p<.1, ** p<.05, ***p<.01

*LM Statistic - not-robust (See Section 5-2)

In [6]:
crime_hetr = smf.ols('narr86 ~ pcnv + ptime86 + qemp86 + inc86 + black + hispan + 1', data=df).fit()
uhat = df.narr86 - crime_hetr.predict()

uhat_reg = smf.ols('uhat ~ pcnv + avgsen + avgsensq + ptime86 + qemp86 + inc86 + black + hispan + 1', data=df).fit()
print(uhat_reg.summary())
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                   uhat   R-squared:                       0.001
Model:                            OLS   Adj. R-squared:                 -0.002
Method:                 Least Squares   F-statistic:                    0.4319
Date:                Sun, 12 Apr 2020   Prob (F-statistic):              0.902
Time:                        19:19:41   Log-Likelihood:                -3349.2
No. Observations:                2725   AIC:                             6716.
Df Residuals:                    2716   BIC:                             6770.
Df Model:                           8                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -0.0033      0.036     -0.092      0.927      -0.074       0.067
pcnv          -0.0033      0.040     -0.082      0.935      -0.082       0.076
avgsen         0.0178      0.010      1.840      0.066      -0.001       0.037
avgsensq      -0.0005      0.000     -1.738      0.082      -0.001    6.61e-05
ptime86       -0.0016      0.009     -0.180      0.857      -0.019       0.015
qemp86         0.0005      0.014      0.033      0.974      -0.028       0.029
inc86       1.031e-05      0.000      0.030      0.976      -0.001       0.001
black         -0.0051      0.045     -0.112      0.911      -0.094       0.084
hispan        -0.0021      0.040     -0.052      0.958      -0.080       0.076
==============================================================================
Omnibus:                     2400.820   Durbin-Watson:                   1.841
Prob(Omnibus):                  0.000   Jarque-Bera (JB):           112791.202
Skew:                           3.994   Prob(JB):                         0.00
Kurtosis:                      33.489   Cond. No.                         380.
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
In [7]:
NRsq= 2725 * 0.00127
NRsq
Out[7]:
3.46075

*LM Statistic - robust

In [8]:
avgsen_reg = smf.ols('avgsen ~ pcnv + ptime86 + qemp86 + inc86 + black + hispan + 1', data=df).fit()
df['uhat_avgsen'] = (df.avgsen - avgsen_reg.predict())*uhat
avgsensq_reg = smf.ols('avgsensq ~ pcnv + ptime86 + qemp86 + inc86 + black + hispan + 1', data=df).fit()
df['uhat_avgsensq'] = (df.avgsensq - avgsensq_reg.predict())*uhat
df['one']= 1 

lm_robust = smf.ols('one ~ uhat_avgsen + uhat_avgsensq + 0', data=df).fit()
print(lm_robust.summary())
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                    one   R-squared:                       0.001
Model:                            OLS   Adj. R-squared:                  0.001
Method:                 Least Squares   F-statistic:                     2.000
Date:                Sun, 12 Apr 2020   Prob (F-statistic):              0.136
Time:                        19:19:41   Log-Likelihood:                -3864.6
No. Observations:                2725   AIC:                             7733.
Df Residuals:                    2723   BIC:                             7745.
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
=================================================================================
                    coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------
uhat_avgsen       0.0278      0.014      1.976      0.048       0.000       0.055
uhat_avgsensq    -0.0010      0.001     -1.907      0.057      -0.002    2.96e-05
==============================================================================
Omnibus:                     5819.318   Durbin-Watson:                   0.003
Prob(Omnibus):                  0.000   Jarque-Bera (JB):         34188534.150
Skew:                         -18.209   Prob(JB):                         0.00
Kurtosis:                     550.525   Cond. No.                         57.8
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
In [9]:
robust_NRsq= 2725 * 0.001467
robust_NRsq
Out[9]:
3.997575

Example 8.4. Heteroskedasticity in Housing Price Equations

In [10]:
df = dataWoo('hprice1')
hprice_hetr= smf.ols('price ~ lotsize + sqrft + bdrms + 1', data=df).fit()
df['uhat_hp'] = np.square(df.price - hprice_hetr.predict())
uhat_hp_reg = smf.ols('uhat_hp ~ lotsize + sqrft + bdrms + 1', data=df).fit() 
print(uhat_hp_reg.summary())
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                uhat_hp   R-squared:                       0.160
Model:                            OLS   Adj. R-squared:                  0.130
Method:                 Least Squares   F-statistic:                     5.339
Date:                Sun, 12 Apr 2020   Prob (F-statistic):            0.00205
Time:                        19:19:41   Log-Likelihood:                -896.99
No. Observations:                  88   AIC:                             1802.
Df Residuals:                      84   BIC:                             1812.
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept  -5522.7947   3259.478     -1.694      0.094    -1.2e+04     959.035
lotsize        0.2015      0.071      2.838      0.006       0.060       0.343
sqrft          1.6910      1.464      1.155      0.251      -1.220       4.602
bdrms       1041.7602    996.381      1.046      0.299    -939.653    3023.173
==============================================================================
Omnibus:                      115.167   Durbin-Watson:                   2.351
Prob(Omnibus):                  0.000   Jarque-Bera (JB):             2267.859
Skew:                           4.381   Prob(JB):                         0.00
Kurtosis:                      26.276   Cond. No.                     6.41e+04
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 6.41e+04. This might indicate that there are
strong multicollinearity or other numerical problems.
In [11]:
NRsq = 88*0.160
NRsq
Out[11]:
14.08
In [12]:
hprice_hetr_lp= smf.ols('lprice ~ llotsize + lsqrft + bdrms + 1', data=df).fit()
df['uhat_hp_lp'] = np.square(df.lprice - hprice_hetr_lp.predict())
uhat_hp_lp_reg = smf.ols('uhat_hp_lp ~ llotsize + lsqrft + bdrms + 1', data=df).fit() 
print(uhat_hp_lp_reg.summary())
                            OLS Regression Results                            
==============================================================================
Dep. Variable:             uhat_hp_lp   R-squared:                       0.048
Model:                            OLS   Adj. R-squared:                  0.014
Method:                 Least Squares   F-statistic:                     1.412
Date:                Sun, 12 Apr 2020   Prob (F-statistic):              0.245
Time:                        19:19:41   Log-Likelihood:                 107.40
No. Observations:                  88   AIC:                            -206.8
Df Residuals:                      84   BIC:                            -196.9
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.5100      0.258      1.978      0.051      -0.003       1.023
llotsize      -0.0070      0.015     -0.463      0.645      -0.037       0.023
lsqrft        -0.0627      0.037     -1.706      0.092      -0.136       0.010
bdrms          0.0168      0.011      1.545      0.126      -0.005       0.039
==============================================================================
Omnibus:                      115.509   Durbin-Watson:                   2.110
Prob(Omnibus):                  0.000   Jarque-Bera (JB):             2070.311
Skew:                           4.470   Prob(JB):                         0.00
Kurtosis:                      25.016   Cond. No.                         410.
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
In [13]:
NRsq = 88 * 0.048
NRsq
Out[13]:
4.224

Example 8.5. Special Form of the White Test

In [14]:
yhat = hprice_hetr_lp.predict()
yhat_sq = np.square(hprice_hetr_lp.predict())
special_reg = smf.ols('uhat_hp_lp ~ yhat + yhat_sq + 1', data=df).fit() 
print(special_reg.summary())
                            OLS Regression Results                            
==============================================================================
Dep. Variable:             uhat_hp_lp   R-squared:                       0.039
Model:                            OLS   Adj. R-squared:                  0.017
Method:                 Least Squares   F-statistic:                     1.733
Date:                Sun, 12 Apr 2020   Prob (F-statistic):              0.183
Time:                        19:19:41   Log-Likelihood:                 106.99
No. Observations:                  88   AIC:                            -208.0
Df Residuals:                      85   BIC:                            -200.5
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      5.0468      3.345      1.509      0.135      -1.604      11.698
yhat          -1.7092      1.163     -1.469      0.145      -4.022       0.604
yhat_sq        0.1451      0.101      1.437      0.154      -0.056       0.346
==============================================================================
Omnibus:                      114.590   Durbin-Watson:                   2.144
Prob(Omnibus):                  0.000   Jarque-Bera (JB):             2001.901
Skew:                           4.427   Prob(JB):                         0.00
Kurtosis:                      24.624   Cond. No.                     1.48e+04
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 1.48e+04. This might indicate that there are
strong multicollinearity or other numerical problems.
In [15]:
NRsq = 88 * 0.0392
NRsq
Out[15]:
3.4495999999999998

Example 8.6. Financial Wealth Equation

(& Table 8.2)

In [16]:
df = dataWoo('401ksubs')
df = df[(df['fsize']==1)]
df['age25sq'] = np.square(df['age'] - 25)
OLS1 = smf.ols('nettfa ~ inc + 1', data=df).fit(cov_type='HC1')
OLS2 = smf.ols('nettfa ~ inc + age25sq + male + e401k + 1',  data=df).fit(cov_type='HC1')
df['w']= 1/df.inc
WLS1 = smf.wls('nettfa ~ inc + 1', weights=df.w, data=df).fit()
WLS2 = smf.wls('nettfa ~ inc + age25sq + male + e401k+ 1', weights=df.w, data=df).fit()
WLS3 = smf.wls('nettfa ~ inc + age25sq + male + e401k+ 1', weights=df.w, data=df).fit(cov_type='HC1') # Table 8.2

print(summary_col([OLS1, OLS2, WLS1, WLS2, WLS3 ],stars=True,float_format='%0.3f',
                  model_names=['OLS1\n(b/se)','OLS2\n(b/se)', 'WLS1\n(b/se)','WLS2\n(b/se)', 'WLS3\n(b/se)'],
                 info_dict={'N':lambda x: "{0:d}".format(int(x.nobs)),
                             'R2':lambda x: "{:.3f}".format(x.rsquared)}))
===============================================================
             OLS1       OLS2       WLS1      WLS2       WLS3   
            (b/se)     (b/se)     (b/se)    (b/se)     (b/se)  
---------------------------------------------------------------
Intercept -10.571*** -20.985*** -9.581*** -16.703*** -16.703***
          (2.530)    (3.495)    (1.653)   (1.958)    (2.243)   
age25sq              0.025***             0.018***   0.018***  
                     (0.004)              (0.002)    (0.003)   
e401k                6.886***             5.188***   5.188***  
                     (2.287)              (1.703)    (1.572)   
inc       0.821***   0.771***   0.787***  0.740***   0.740***  
          (0.104)    (0.100)    (0.063)   (0.064)    (0.075)   
male                 2.478                1.841      1.841     
                     (2.058)              (1.564)    (1.311)   
N         2017       2017       2017      2017       2017      
R2        0.083      0.128      0.071     0.112      0.112     
===============================================================
Standard errors in parentheses.
* p<.1, ** p<.05, ***p<.01

Example 8.7. Demand for Cigarettes

In [17]:
df = dataWoo('smoke')
OLS_smk= smf.ols('cigs ~ lincome + lcigpric + educ + age + agesq + restaurn + 1', data=df).fit()
print(OLS_smk.summary())
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                   cigs   R-squared:                       0.053
Model:                            OLS   Adj. R-squared:                  0.046
Method:                 Least Squares   F-statistic:                     7.423
Date:                Sun, 12 Apr 2020   Prob (F-statistic):           9.50e-08
Time:                        19:19:41   Log-Likelihood:                -3236.2
No. Observations:                 807   AIC:                             6486.
Df Residuals:                     800   BIC:                             6519.
Df Model:                           6                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -3.6398     24.079     -0.151      0.880     -50.905      43.625
lincome        0.8803      0.728      1.210      0.227      -0.548       2.309
lcigpric      -0.7509      5.773     -0.130      0.897     -12.084      10.582
educ          -0.5015      0.167     -3.002      0.003      -0.829      -0.174
age            0.7707      0.160      4.813      0.000       0.456       1.085
agesq         -0.0090      0.002     -5.176      0.000      -0.012      -0.006
restaurn      -2.8251      1.112     -2.541      0.011      -5.007      -0.643
==============================================================================
Omnibus:                      225.317   Durbin-Watson:                   2.013
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              494.255
Skew:                           1.536   Prob(JB):                    4.72e-108
Kurtosis:                       5.294   Cond. No.                     1.33e+05
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 1.33e+05. This might indicate that there are
strong multicollinearity or other numerical problems.
In [18]:
luhat_sq = np.log(np.square(df.cigs - OLS_smk.predict()))
luhat_sq_reg = smf.ols('luhat_sq ~ lincome + lcigpric + educ + age + agesq + restaurn + 1', data=df).fit()
luhat_sq_hat = np.exp(luhat_sq_reg.predict())
w = 1/luhat_sq_hat
GLS_smk = smf.wls('cigs ~ lincome + lcigpric + educ + age + agesq + restaurn + 1', weights = w, data=df).fit()
print(GLS_smk.summary())
                            WLS Regression Results                            
==============================================================================
Dep. Variable:                   cigs   R-squared:                       0.113
Model:                            WLS   Adj. R-squared:                  0.107
Method:                 Least Squares   F-statistic:                     17.06
Date:                Sun, 12 Apr 2020   Prob (F-statistic):           1.32e-18
Time:                        19:19:41   Log-Likelihood:                -3207.8
No. Observations:                 807   AIC:                             6430.
Df Residuals:                     800   BIC:                             6462.
Df Model:                           6                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      5.6355     17.803      0.317      0.752     -29.311      40.582
lincome        1.2952      0.437      2.964      0.003       0.437       2.153
lcigpric      -2.9403      4.460     -0.659      0.510     -11.695       5.815
educ          -0.4634      0.120     -3.857      0.000      -0.699      -0.228
age            0.4819      0.097      4.978      0.000       0.292       0.672
agesq         -0.0056      0.001     -5.990      0.000      -0.007      -0.004
restaurn      -3.4611      0.796     -4.351      0.000      -5.023      -1.900
==============================================================================
Omnibus:                      325.055   Durbin-Watson:                   2.050
Prob(Omnibus):                  0.000   Jarque-Bera (JB):             1258.138
Skew:                           1.908   Prob(JB):                    6.29e-274
Kurtosis:                       7.780   Cond. No.                     2.30e+05
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 2.3e+05. This might indicate that there are
strong multicollinearity or other numerical problems.
In [19]:
print(summary_col([OLS_smk, GLS_smk ],stars=True,float_format='%0.3f',
                  model_names=['OLS_smk\n(b/se)','GLS_smk\n(b/se)'],
                 info_dict={'N':lambda x: "{0:d}".format(int(x.nobs)),
                             'R2':lambda x: "{:.3f}".format(x.rsquared)}))
=============================
           OLS_smk   GLS_smk 
            (b/se)    (b/se) 
-----------------------------
Intercept -3.640    5.635    
          (24.079)  (17.803) 
lincome   0.880     1.295*** 
          (0.728)   (0.437)  
lcigpric  -0.751    -2.940   
          (5.773)   (4.460)  
educ      -0.501*** -0.463***
          (0.167)   (0.120)  
age       0.771***  0.482*** 
          (0.160)   (0.097)  
agesq     -0.009*** -0.006***
          (0.002)   (0.001)  
restaurn  -2.825**  -3.461***
          (1.112)   (0.796)  
N         807       807      
R2        0.053     0.113    
=============================
Standard errors in
parentheses.
* p<.1, ** p<.05, ***p<.01

Example 8.8. Labor Force Participation of Married Women

In [20]:
df = dataWoo('mroz')
mroz_hetr = smf.ols('inlf ~ nwifeinc + educ + exper + expersq + age + kidslt6 + kidsge6 + 1', data=df).fit()
mroz_robust = smf.ols('inlf ~ nwifeinc + educ + exper + expersq + age + kidslt6 + kidsge6 + 1', data=df).fit(cov_type='HC1')

print(summary_col([mroz_hetr, mroz_robust ],stars=True,float_format='%0.3f',
                  model_names=['Hetroskedastic\n(b/se)','Robust\n(b/se)'],
                 info_dict={'N':lambda x: "{0:d}".format(int(x.nobs)),
                             'R2':lambda x: "{:.3f}".format(x.rsquared)}))
==================================
          Hetroskedastic   Robust 
              (b/se)       (b/se) 
----------------------------------
Intercept 0.586***       0.586*** 
          (0.154)        (0.152)  
nwifeinc  -0.003**       -0.003** 
          (0.001)        (0.002)  
educ      0.038***       0.038*** 
          (0.007)        (0.007)  
exper     0.039***       0.039*** 
          (0.006)        (0.006)  
expersq   -0.001***      -0.001***
          (0.000)        (0.000)  
age       -0.016***      -0.016***
          (0.002)        (0.002)  
kidslt6   -0.262***      -0.262***
          (0.034)        (0.032)  
kidsge6   0.013          0.013    
          (0.013)        (0.014)  
N         753            753      
R2        0.264          0.264    
==================================
Standard errors in parentheses.
* p<.1, ** p<.05, ***p<.01

Example 8.9. Determinants of Personal Computer Ownership

In [21]:
df = dataWoo('gpa1')

df['parcoll'] = 0
df.loc[(df['fathcoll'] ==1) | (df['mothcoll'] == 1), 'parcoll'] = 1

gpa_hetr = smf.ols('PC ~ hsGPA + ACT + parcoll  + 1', data=df).fit()
gpa_robust = smf.ols('PC ~ hsGPA + ACT + parcoll  + 1', data=df).fit(cov_type='HC1')

w = 1/(gpa_hetr.predict() - np.square(gpa_hetr.predict()))
gpa_gls = smf.wls('PC ~ hsGPA + ACT + parcoll  + 1', weights = w, data=df).fit()

print(summary_col([gpa_hetr, gpa_robust, gpa_gls],stars=True,float_format='%0.3f',
                  model_names=['Hetroskedastic\n(b/se)','Robust\n(b/se)', 'GLS\n(b/se)'],
                 info_dict={'N':lambda x: "{0:d}".format(int(x.nobs)),
                             'R2':lambda x: "{:.3f}".format(x.rsquared)}))
========================================
          Hetroskedastic  Robust   GLS  
              (b/se)      (b/se)  (b/se)
----------------------------------------
Intercept -0.000         -0.000  0.026  
          (0.491)        (0.496) (0.477)
hsGPA     0.065          0.065   0.033  
          (0.137)        (0.141) (0.130)
ACT       0.001          0.001   0.004  
          (0.015)        (0.016) (0.015)
parcoll   0.221**        0.221** 0.215**
          (0.093)        (0.088) (0.086)
N         141            141     141    
R2        0.042          0.042   0.046  
========================================
Standard errors in parentheses.
* p<.1, ** p<.05, ***p<.01