Chapter 8. Heteroskedasticity#

Home | Stata | R

import numpy as np
import statsmodels.formula.api as smf
from statsmodels.iolib.summary2 import summary_col

from wooldridge import *

Example 8.1. Log wage equation with Heteroskedasticity#

df=dataWoo('wage1')

df['marrmale'] = 0
df.loc[(df['female'] ==0) & (df['married'] == 1), 'marrmale'] = 1
df['marrfem'] = 0
df.loc[(df['female'] ==1) & (df['married'] == 1), 'marrfem'] = 1
df['singfem'] = 0
df.loc[(df['female'] ==1) & (df['married'] == 0), 'singfem'] = 1
wage_hetr = smf.ols('lwage ~ marrmale + marrfem + singfem + educ  + exper + expersq + tenure + tenursq + 1', 
                    data=df).fit()
wage_robust = smf.ols('lwage ~ marrmale + marrfem + singfem + educ  + exper + expersq + tenure + tenursq + 1', 
                      data=df).fit(cov_type='HC1')

print(summary_col([wage_hetr, wage_robust],stars=True,float_format='%0.3f',
                  model_names=['Hetroskedastic\n(b/se)','Robust\n(b/se)'],
                 info_dict={'N':lambda x: "{0:d}".format(int(x.nobs)),
                             'R2':lambda x: "{:.3f}".format(x.rsquared)}))
=======================================
               Hetroskedastic   Robust 
                   (b/se)       (b/se) 
---------------------------------------
Intercept      0.321***       0.321*** 
               (0.100)        (0.109)  
marrmale       0.213***       0.213*** 
               (0.055)        (0.057)  
marrfem        -0.198***      -0.198***
               (0.058)        (0.059)  
singfem        -0.110**       -0.110*  
               (0.056)        (0.057)  
educ           0.079***       0.079*** 
               (0.007)        (0.007)  
exper          0.027***       0.027*** 
               (0.005)        (0.005)  
expersq        -0.001***      -0.001***
               (0.000)        (0.000)  
tenure         0.029***       0.029*** 
               (0.007)        (0.007)  
tenursq        -0.001**       -0.001** 
               (0.000)        (0.000)  
R-squared      0.461          0.461    
R-squared Adj. 0.453          0.453    
N              526            526      
R2             0.461          0.461    
=======================================
Standard errors in parentheses.
* p<.1, ** p<.05, ***p<.01

Example 8.2. Heteroskedasticity-Robust F Statistic#

df = dataWoo('gpa3')
df = df[(df['spring']==1)]

gpa_hetr = smf.ols('cumgpa  ~ sat + hsperc + tothrs + female + black + white + 1', 
                   data=df).fit()
gpa_robust = smf.ols('cumgpa  ~ sat + hsperc + tothrs + female + black + white + 1', 
                     data=df).fit(cov_type='HC1')

print(summary_col([gpa_hetr, gpa_robust],stars=True,float_format='%0.3f',
                  model_names=['Hetroskedastic\n(b/se)','Robust\n(b/se)'],
                 info_dict={'N':lambda x: "{0:d}".format(int(x.nobs)),
                             'R2':lambda x: "{:.3f}".format(x.rsquared)}))
=======================================
               Hetroskedastic   Robust 
                   (b/se)       (b/se) 
---------------------------------------
Intercept      1.470***       1.470*** 
               (0.230)        (0.221)  
sat            0.001***       0.001*** 
               (0.000)        (0.000)  
hsperc         -0.009***      -0.009***
               (0.001)        (0.001)  
tothrs         0.003***       0.003*** 
               (0.001)        (0.001)  
female         0.303***       0.303*** 
               (0.059)        (0.059)  
black          -0.128         -0.128   
               (0.147)        (0.119)  
white          -0.059         -0.059   
               (0.141)        (0.111)  
R-squared      0.401          0.401    
R-squared Adj. 0.391          0.391    
N              366            366      
R2             0.401          0.401    
=======================================
Standard errors in parentheses.
* p<.1, ** p<.05, ***p<.01

Example 8.3. Heteroskedasticity-Robust LM Statistic#

df = dataWoo('crime1')
df['avgsensq'] = np.square(df['avgsen'])

crime_hetr = smf.ols('narr86 ~ pcnv + avgsen + avgsensq + ptime86 + qemp86 + inc86 + black + hispan + 1', 
                     data=df).fit()
crime_robust = smf.ols('narr86 ~ pcnv + avgsen + avgsensq + ptime86 + qemp86 + inc86 + black + hispan + 1', 
                       data=df).fit(cov_type='HC1')

print(summary_col([crime_hetr, crime_robust],stars=True,float_format='%0.3f',
                  model_names=['Hetroskedastic\n(b/se)','Robust\n(b/se)'],
                 info_dict={'N':lambda x: "{0:d}".format(int(x.nobs)),
                             'R2':lambda x: "{:.3f}".format(x.rsquared)}))
=======================================
               Hetroskedastic   Robust 
                   (b/se)       (b/se) 
---------------------------------------
Intercept      0.567***       0.567*** 
               (0.036)        (0.040)  
pcnv           -0.136***      -0.136***
               (0.040)        (0.034)  
avgsen         0.018*         0.018*   
               (0.010)        (0.010)  
avgsensq       -0.001*        -0.001** 
               (0.000)        (0.000)  
ptime86        -0.039***      -0.039***
               (0.009)        (0.006)  
qemp86         -0.051***      -0.051***
               (0.014)        (0.014)  
inc86          -0.001***      -0.001***
               (0.000)        (0.000)  
black          0.325***       0.325*** 
               (0.045)        (0.059)  
hispan         0.193***       0.193*** 
               (0.040)        (0.040)  
R-squared      0.073          0.073    
R-squared Adj. 0.070          0.070    
N              2725           2725     
R2             0.073          0.073    
=======================================
Standard errors in parentheses.
* p<.1, ** p<.05, ***p<.01

*LM Statistic - not-robust (See Section 5-2)#

crime_hetr = smf.ols('narr86 ~ pcnv + ptime86 + qemp86 + inc86 + black + hispan + 1', 
                     data=df).fit()
uhat = df.narr86 - crime_hetr.predict()

uhat_reg = smf.ols('uhat ~ pcnv + avgsen + avgsensq + ptime86 + qemp86 + inc86 + black + hispan + 1', 
                   data=df).fit()
print(uhat_reg.summary())
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                   uhat   R-squared:                       0.001
Model:                            OLS   Adj. R-squared:                 -0.002
Method:                 Least Squares   F-statistic:                    0.4319
Date:                Mon, 11 Dec 2023   Prob (F-statistic):              0.902
Time:                        18:36:59   Log-Likelihood:                -3349.2
No. Observations:                2725   AIC:                             6716.
Df Residuals:                    2716   BIC:                             6770.
Df Model:                           8                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -0.0033      0.036     -0.092      0.927      -0.074       0.067
pcnv          -0.0033      0.040     -0.082      0.935      -0.082       0.076
avgsen         0.0178      0.010      1.840      0.066      -0.001       0.037
avgsensq      -0.0005      0.000     -1.738      0.082      -0.001    6.61e-05
ptime86       -0.0016      0.009     -0.180      0.857      -0.019       0.015
qemp86         0.0005      0.014      0.033      0.974      -0.028       0.029
inc86       1.031e-05      0.000      0.030      0.976      -0.001       0.001
black         -0.0051      0.045     -0.112      0.911      -0.094       0.084
hispan        -0.0021      0.040     -0.052      0.958      -0.080       0.076
==============================================================================
Omnibus:                     2400.820   Durbin-Watson:                   1.841
Prob(Omnibus):                  0.000   Jarque-Bera (JB):           112791.202
Skew:                           3.994   Prob(JB):                         0.00
Kurtosis:                      33.489   Cond. No.                         380.
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
NRsq= 2725 * 0.00127
NRsq
3.46075

*LM Statistic - robust#

avgsen_reg = smf.ols('avgsen ~ pcnv + ptime86 + qemp86 + inc86 + black + hispan + 1', data=df).fit()
df['uhat_avgsen'] = (df.avgsen - avgsen_reg.predict())*uhat
avgsensq_reg = smf.ols('avgsensq ~ pcnv + ptime86 + qemp86 + inc86 + black + hispan + 1', data=df).fit()
df['uhat_avgsensq'] = (df.avgsensq - avgsensq_reg.predict())*uhat
df['one']= 1 

lm_robust = smf.ols('one ~ uhat_avgsen + uhat_avgsensq + 0', data=df).fit()
print(lm_robust.summary())
                                 OLS Regression Results                                
=======================================================================================
Dep. Variable:                    one   R-squared (uncentered):                   0.001
Model:                            OLS   Adj. R-squared (uncentered):              0.001
Method:                 Least Squares   F-statistic:                              2.000
Date:                Mon, 11 Dec 2023   Prob (F-statistic):                       0.136
Time:                        18:36:59   Log-Likelihood:                         -3864.6
No. Observations:                2725   AIC:                                      7733.
Df Residuals:                    2723   BIC:                                      7745.
Df Model:                           2                                                  
Covariance Type:            nonrobust                                                  
=================================================================================
                    coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------
uhat_avgsen       0.0278      0.014      1.976      0.048       0.000       0.055
uhat_avgsensq    -0.0010      0.001     -1.907      0.057      -0.002    2.96e-05
==============================================================================
Omnibus:                     5819.318   Durbin-Watson:                   0.003
Prob(Omnibus):                  0.000   Jarque-Bera (JB):         34188534.150
Skew:                         -18.209   Prob(JB):                         0.00
Kurtosis:                     550.525   Cond. No.                         57.8
==============================================================================

Notes:
[1] R² is computed without centering (uncentered) since the model does not contain a constant.
[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.
robust_NRsq= 2725 * 0.001467
robust_NRsq
3.997575

Example 8.4. Heteroskedasticity in Housing Price Equations#

df = dataWoo('hprice1')
hprice_hetr= smf.ols('price ~ lotsize + sqrft + bdrms + 1', data=df).fit()
df['uhat_hp'] = np.square(df.price - hprice_hetr.predict())
uhat_hp_reg = smf.ols('uhat_hp ~ lotsize + sqrft + bdrms + 1', data=df).fit() 
print(uhat_hp_reg.summary())
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                uhat_hp   R-squared:                       0.160
Model:                            OLS   Adj. R-squared:                  0.130
Method:                 Least Squares   F-statistic:                     5.339
Date:                Mon, 11 Dec 2023   Prob (F-statistic):            0.00205
Time:                        18:36:59   Log-Likelihood:                -896.99
No. Observations:                  88   AIC:                             1802.
Df Residuals:                      84   BIC:                             1812.
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept  -5522.7947   3259.478     -1.694      0.094    -1.2e+04     959.035
lotsize        0.2015      0.071      2.838      0.006       0.060       0.343
sqrft          1.6910      1.464      1.155      0.251      -1.220       4.602
bdrms       1041.7602    996.381      1.046      0.299    -939.653    3023.173
==============================================================================
Omnibus:                      115.167   Durbin-Watson:                   2.351
Prob(Omnibus):                  0.000   Jarque-Bera (JB):             2267.859
Skew:                           4.381   Prob(JB):                         0.00
Kurtosis:                      26.276   Cond. No.                     6.41e+04
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 6.41e+04. This might indicate that there are
strong multicollinearity or other numerical problems.
NRsq = 88*0.160
NRsq
14.08
hprice_hetr_lp= smf.ols('lprice ~ llotsize + lsqrft + bdrms + 1', data=df).fit()
df['uhat_hp_lp'] = np.square(df.lprice - hprice_hetr_lp.predict())
uhat_hp_lp_reg = smf.ols('uhat_hp_lp ~ llotsize + lsqrft + bdrms + 1', data=df).fit() 
print(uhat_hp_lp_reg.summary())
                            OLS Regression Results                            
==============================================================================
Dep. Variable:             uhat_hp_lp   R-squared:                       0.048
Model:                            OLS   Adj. R-squared:                  0.014
Method:                 Least Squares   F-statistic:                     1.412
Date:                Mon, 11 Dec 2023   Prob (F-statistic):              0.245
Time:                        18:36:59   Log-Likelihood:                 107.40
No. Observations:                  88   AIC:                            -206.8
Df Residuals:                      84   BIC:                            -196.9
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.5100      0.258      1.978      0.051      -0.003       1.023
llotsize      -0.0070      0.015     -0.463      0.645      -0.037       0.023
lsqrft        -0.0627      0.037     -1.706      0.092      -0.136       0.010
bdrms          0.0168      0.011      1.545      0.126      -0.005       0.039
==============================================================================
Omnibus:                      115.509   Durbin-Watson:                   2.110
Prob(Omnibus):                  0.000   Jarque-Bera (JB):             2070.311
Skew:                           4.470   Prob(JB):                         0.00
Kurtosis:                      25.016   Cond. No.                         410.
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
NRsq = 88 * 0.048
NRsq
4.224

Example 8.5. Special Form of the White Test#

yhat = hprice_hetr_lp.predict()
yhat_sq = np.square(hprice_hetr_lp.predict())
special_reg = smf.ols('uhat_hp_lp ~ yhat + yhat_sq + 1', data=df).fit() 
print(special_reg.summary())
                            OLS Regression Results                            
==============================================================================
Dep. Variable:             uhat_hp_lp   R-squared:                       0.039
Model:                            OLS   Adj. R-squared:                  0.017
Method:                 Least Squares   F-statistic:                     1.733
Date:                Mon, 11 Dec 2023   Prob (F-statistic):              0.183
Time:                        18:36:59   Log-Likelihood:                 106.99
No. Observations:                  88   AIC:                            -208.0
Df Residuals:                      85   BIC:                            -200.5
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      5.0468      3.345      1.509      0.135      -1.604      11.698
yhat          -1.7092      1.163     -1.469      0.145      -4.022       0.604
yhat_sq        0.1451      0.101      1.437      0.154      -0.056       0.346
==============================================================================
Omnibus:                      114.590   Durbin-Watson:                   2.144
Prob(Omnibus):                  0.000   Jarque-Bera (JB):             2001.901
Skew:                           4.427   Prob(JB):                         0.00
Kurtosis:                      24.624   Cond. No.                     1.48e+04
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 1.48e+04. This might indicate that there are
strong multicollinearity or other numerical problems.
NRsq = 88 * 0.0392
NRsq
3.4495999999999998

Example 8.6. Financial Wealth Equation#

(& Table 8.2)#

df = dataWoo('401ksubs')
df = df[(df['fsize']==1)]
df['age25sq'] = np.square(df['age'] - 25)
OLS1 = smf.ols('nettfa ~ inc + 1', data=df).fit(cov_type='HC1')
OLS2 = smf.ols('nettfa ~ inc + age25sq + male + e401k + 1',  data=df).fit(cov_type='HC1')
df['w']= 1/df.inc
WLS1 = smf.wls('nettfa ~ inc + 1', weights=df.w, data=df).fit()
WLS2 = smf.wls('nettfa ~ inc + age25sq + male + e401k+ 1', weights=df.w, data=df).fit()
WLS3 = smf.wls('nettfa ~ inc + age25sq + male + e401k+ 1', weights=df.w, data=df).fit(cov_type='HC1') # Table 8.2

print(summary_col([OLS1, OLS2, WLS1, WLS2, WLS3 ],stars=True,float_format='%0.3f',
                  model_names=['OLS1\n(b/se)','OLS2\n(b/se)', 'WLS1\n(b/se)','WLS2\n(b/se)', 'WLS3\n(b/se)'],
                 info_dict={'N':lambda x: "{0:d}".format(int(x.nobs)),
                             'R2':lambda x: "{:.3f}".format(x.rsquared)}))
====================================================================
                  OLS1       OLS2       WLS1      WLS2       WLS3   
                 (b/se)     (b/se)     (b/se)    (b/se)     (b/se)  
--------------------------------------------------------------------
Intercept      -10.571*** -20.985*** -9.581*** -16.703*** -16.703***
               (2.530)    (3.495)    (1.653)   (1.958)    (2.243)   
R-squared      0.083      0.128      0.071     0.112      0.112     
R-squared Adj. 0.082      0.126      0.070     0.110      0.110     
age25sq                   0.025***             0.018***   0.018***  
                          (0.004)              (0.002)    (0.003)   
e401k                     6.886***             5.188***   5.188***  
                          (2.287)              (1.703)    (1.572)   
inc            0.821***   0.771***   0.787***  0.740***   0.740***  
               (0.104)    (0.100)    (0.063)   (0.064)    (0.075)   
male                      2.478                1.841      1.841     
                          (2.058)              (1.564)    (1.311)   
N              2017       2017       2017      2017       2017      
R2             0.083      0.128      0.071     0.112      0.112     
====================================================================
Standard errors in parentheses.
* p<.1, ** p<.05, ***p<.01

Example 8.7. Demand for Cigarettes#

df = dataWoo('smoke')
OLS_smk= smf.ols('cigs ~ lincome + lcigpric + educ + age + agesq + restaurn + 1', data=df).fit()
print(OLS_smk.summary())
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                   cigs   R-squared:                       0.053
Model:                            OLS   Adj. R-squared:                  0.046
Method:                 Least Squares   F-statistic:                     7.423
Date:                Mon, 11 Dec 2023   Prob (F-statistic):           9.50e-08
Time:                        18:36:59   Log-Likelihood:                -3236.2
No. Observations:                 807   AIC:                             6486.
Df Residuals:                     800   BIC:                             6519.
Df Model:                           6                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -3.6398     24.079     -0.151      0.880     -50.905      43.625
lincome        0.8803      0.728      1.210      0.227      -0.548       2.309
lcigpric      -0.7509      5.773     -0.130      0.897     -12.084      10.582
educ          -0.5015      0.167     -3.002      0.003      -0.829      -0.174
age            0.7707      0.160      4.813      0.000       0.456       1.085
agesq         -0.0090      0.002     -5.176      0.000      -0.012      -0.006
restaurn      -2.8251      1.112     -2.541      0.011      -5.007      -0.643
==============================================================================
Omnibus:                      225.317   Durbin-Watson:                   2.013
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              494.255
Skew:                           1.536   Prob(JB):                    4.72e-108
Kurtosis:                       5.294   Cond. No.                     1.33e+05
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 1.33e+05. This might indicate that there are
strong multicollinearity or other numerical problems.
luhat_sq = np.log(np.square(df.cigs - OLS_smk.predict()))
luhat_sq_reg = smf.ols('luhat_sq ~ lincome + lcigpric + educ + age + agesq + restaurn + 1', 
                       data=df).fit()
luhat_sq_hat = np.exp(luhat_sq_reg.predict())
w = 1/luhat_sq_hat
GLS_smk = smf.wls('cigs ~ lincome + lcigpric + educ + age + agesq + restaurn + 1', weights = w, 
                  data=df).fit()
print(GLS_smk.summary())
                            WLS Regression Results                            
==============================================================================
Dep. Variable:                   cigs   R-squared:                       0.113
Model:                            WLS   Adj. R-squared:                  0.107
Method:                 Least Squares   F-statistic:                     17.06
Date:                Mon, 11 Dec 2023   Prob (F-statistic):           1.32e-18
Time:                        18:37:00   Log-Likelihood:                -3207.8
No. Observations:                 807   AIC:                             6430.
Df Residuals:                     800   BIC:                             6462.
Df Model:                           6                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      5.6355     17.803      0.317      0.752     -29.311      40.582
lincome        1.2952      0.437      2.964      0.003       0.437       2.153
lcigpric      -2.9403      4.460     -0.659      0.510     -11.695       5.815
educ          -0.4634      0.120     -3.857      0.000      -0.699      -0.228
age            0.4819      0.097      4.978      0.000       0.292       0.672
agesq         -0.0056      0.001     -5.990      0.000      -0.007      -0.004
restaurn      -3.4611      0.796     -4.351      0.000      -5.023      -1.900
==============================================================================
Omnibus:                      325.055   Durbin-Watson:                   2.050
Prob(Omnibus):                  0.000   Jarque-Bera (JB):             1258.138
Skew:                           1.908   Prob(JB):                    6.29e-274
Kurtosis:                       7.780   Cond. No.                     2.30e+05
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 2.3e+05. This might indicate that there are
strong multicollinearity or other numerical problems.
print(summary_col([OLS_smk, GLS_smk ],stars=True,float_format='%0.3f',
                  model_names=['OLS_smk\n(b/se)','GLS_smk\n(b/se)'],
                 info_dict={'N':lambda x: "{0:d}".format(int(x.nobs)),
                             'R2':lambda x: "{:.3f}".format(x.rsquared)}))
==================================
                OLS_smk   GLS_smk 
                 (b/se)    (b/se) 
----------------------------------
Intercept      -3.640    5.635    
               (24.079)  (17.803) 
lincome        0.880     1.295*** 
               (0.728)   (0.437)  
lcigpric       -0.751    -2.940   
               (5.773)   (4.460)  
educ           -0.501*** -0.463***
               (0.167)   (0.120)  
age            0.771***  0.482*** 
               (0.160)   (0.097)  
agesq          -0.009*** -0.006***
               (0.002)   (0.001)  
restaurn       -2.825**  -3.461***
               (1.112)   (0.796)  
R-squared      0.053     0.113    
R-squared Adj. 0.046     0.107    
N              807       807      
R2             0.053     0.113    
==================================
Standard errors in parentheses.
* p<.1, ** p<.05, ***p<.01

Example 8.8. Labor Force Participation of Married Women#

df = dataWoo('mroz')
mroz_hetr = smf.ols('inlf ~ nwifeinc + educ + exper + expersq + age + kidslt6 + kidsge6 + 1',
                    data=df).fit()
mroz_robust = smf.ols('inlf ~ nwifeinc + educ + exper + expersq + age + kidslt6 + kidsge6 + 1',
                      data=df).fit(cov_type='HC1')

print(summary_col([mroz_hetr, mroz_robust ],stars=True,float_format='%0.3f',
                  model_names=['Hetroskedastic\n(b/se)','Robust\n(b/se)'],
                 info_dict={'N':lambda x: "{0:d}".format(int(x.nobs)),
                             'R2':lambda x: "{:.3f}".format(x.rsquared)}))
=======================================
               Hetroskedastic   Robust 
                   (b/se)       (b/se) 
---------------------------------------
Intercept      0.586***       0.586*** 
               (0.154)        (0.152)  
nwifeinc       -0.003**       -0.003** 
               (0.001)        (0.002)  
educ           0.038***       0.038*** 
               (0.007)        (0.007)  
exper          0.039***       0.039*** 
               (0.006)        (0.006)  
expersq        -0.001***      -0.001***
               (0.000)        (0.000)  
age            -0.016***      -0.016***
               (0.002)        (0.002)  
kidslt6        -0.262***      -0.262***
               (0.034)        (0.032)  
kidsge6        0.013          0.013    
               (0.013)        (0.014)  
R-squared      0.264          0.264    
R-squared Adj. 0.257          0.257    
N              753            753      
R2             0.264          0.264    
=======================================
Standard errors in parentheses.
* p<.1, ** p<.05, ***p<.01

Example 8.9. Determinants of Personal Computer Ownership#

df = dataWoo('gpa1')

df['parcoll'] = 0
df.loc[(df['fathcoll'] ==1) | (df['mothcoll'] == 1), 'parcoll'] = 1

gpa_hetr = smf.ols('PC ~ hsGPA + ACT + parcoll  + 1', data=df).fit()
gpa_robust = smf.ols('PC ~ hsGPA + ACT + parcoll  + 1', data=df).fit(cov_type='HC1')

w = 1/(gpa_hetr.predict() - np.square(gpa_hetr.predict()))
gpa_gls = smf.wls('PC ~ hsGPA + ACT + parcoll  + 1', weights = w, data=df).fit()

print(summary_col([gpa_hetr, gpa_robust, gpa_gls],stars=True,float_format='%0.3f',
                  model_names=['Hetroskedastic\n(b/se)','Robust\n(b/se)', 'GLS\n(b/se)'],
                 info_dict={'N':lambda x: "{0:d}".format(int(x.nobs)),
                             'R2':lambda x: "{:.3f}".format(x.rsquared)}))
=============================================
               Hetroskedastic  Robust   GLS  
                   (b/se)      (b/se)  (b/se)
---------------------------------------------
Intercept      -0.000         -0.000  0.026  
               (0.491)        (0.496) (0.477)
hsGPA          0.065          0.065   0.033  
               (0.137)        (0.141) (0.130)
ACT            0.001          0.001   0.004  
               (0.015)        (0.016) (0.015)
parcoll        0.221**        0.221** 0.215**
               (0.093)        (0.088) (0.086)
R-squared      0.042          0.042   0.046  
R-squared Adj. 0.021          0.021   0.026  
N              141            141     141    
R2             0.042          0.042   0.046  
=============================================
Standard errors in parentheses.
* p<.1, ** p<.05, ***p<.01