Chapter 12. Serial Correlation and Heteroskedasticity in Time Series Regressions#
import numpy as np
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.iolib.summary2 import summary_col
from wooldridge import *
Example 12.1. Testing for AR(1) Serial Correlation in the Phillips Curve#
df = dataWoo("phillips")
df = df[(df['year']<1997)]
df['uhat1'] = smf.ols('df.inf ~ unem + 1', data=df).fit().resid
print(smf.ols('uhat1 ~ uhat1.shift(1)', data=df).fit().summary())
OLS Regression Results
==============================================================================
Dep. Variable: uhat1 R-squared: 0.346
Model: OLS Adj. R-squared: 0.332
Method: Least Squares F-statistic: 24.34
Date: Mon, 11 Dec 2023 Prob (F-statistic): 1.10e-05
Time: 18:37:28 Log-Likelihood: -110.88
No. Observations: 48 AIC: 225.8
Df Residuals: 46 BIC: 229.5
Df Model: 1
Covariance Type: nonrobust
==================================================================================
coef std err t P>|t| [0.025 0.975]
----------------------------------------------------------------------------------
Intercept -0.1134 0.359 -0.316 0.754 -0.837 0.610
uhat1.shift(1) 0.5730 0.116 4.934 0.000 0.339 0.807
==============================================================================
Omnibus: 6.807 Durbin-Watson: 1.354
Prob(Omnibus): 0.033 Jarque-Bera (JB): 10.853
Skew: 0.158 Prob(JB): 0.00440
Kurtosis: 5.308 Cond. No. 3.09
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
df['uhat2'] = smf.ols('df.cinf ~ unem', data=df).fit().resid
print(smf.ols('uhat2 ~ uhat2.shift(1)', data=df).fit().summary())
OLS Regression Results
==============================================================================
Dep. Variable: uhat2 R-squared: 0.002
Model: OLS Adj. R-squared: -0.020
Method: Least Squares F-statistic: 0.08254
Date: Mon, 11 Dec 2023 Prob (F-statistic): 0.775
Time: 18:37:28 Log-Likelihood: -99.620
No. Observations: 47 AIC: 203.2
Df Residuals: 45 BIC: 206.9
Df Model: 1
Covariance Type: nonrobust
==================================================================================
coef std err t P>|t| [0.025 0.975]
----------------------------------------------------------------------------------
Intercept 0.1942 0.300 0.646 0.521 -0.411 0.799
uhat2.shift(1) -0.0356 0.124 -0.287 0.775 -0.285 0.214
==============================================================================
Omnibus: 10.035 Durbin-Watson: 1.845
Prob(Omnibus): 0.007 Jarque-Bera (JB): 21.015
Skew: -0.348 Prob(JB): 2.73e-05
Kurtosis: 6.201 Cond. No. 2.42
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
Example 12.2. Testing for AR(1) Serial Correlation in the Minimum Wage Equation#
df = dataWoo("prminwge")
df['uhat'] = smf.ols('lprepop ~ lmincov + lprgnp + lusgnp + t', data=df).fit().resid
AR1c = smf.ols('uhat ~ lmincov + lprgnp + lusgnp + t + uhat.shift(1)' , data=df).fit()
AR1s = smf.ols('uhat ~ uhat.shift(1)', data=df).fit()
print(summary_col([AR1c, AR1s],stars=True,float_format='%0.3f',
model_names=['AR1c\n(b/se)','AR1s\n(b/se)'],
info_dict={'N':lambda x: "{0:d}".format(int(x.nobs)),
'R2':lambda x: "{:.3f}".format(x.rsquared),
'Adj.R2':lambda x: "{:.3f}".format(x.rsquared_adj)}))
===============================
AR1c AR1s
(b/se) (b/se)
-------------------------------
Intercept -0.851 -0.001
(1.093) (0.004)
R-squared 0.242 0.165
R-squared Adj. 0.120 0.141
lmincov 0.038
(0.035)
lprgnp -0.078
(0.071)
lusgnp 0.204
(0.195)
t -0.003
(0.004)
uhat.shift(1) 0.481*** 0.417**
(0.166) (0.159)
N 37 37
R2 0.242 0.165
Adj.R2 0.120 0.141
===============================
Standard errors in parentheses.
* p<.1, ** p<.05, ***p<.01
Example 12.3. Testing for AR(3) Serial Correlation#
df = dataWoo("barium")
df['u'] = smf.ols('lchnimp ~ lchempi + lgas + lrtwex + befile6 + affile6 + afdec6', data=df).fit().resid
AR3 = smf.ols('u ~ lchempi + lgas + lrtwex + befile6 + affile6 + afdec6 + u.shift(1) + u.shift(2) + u.shift(3) + 1', data = df).fit()
print(AR3.summary())
OLS Regression Results
==============================================================================
Dep. Variable: u R-squared: 0.116
Model: OLS Adj. R-squared: 0.048
Method: Least Squares F-statistic: 1.719
Date: Mon, 11 Dec 2023 Prob (F-statistic): 0.0920
Time: 18:37:29 Log-Likelihood: -104.56
No. Observations: 128 AIC: 229.1
Df Residuals: 118 BIC: 257.6
Df Model: 9
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
Intercept -14.3692 20.656 -0.696 0.488 -55.273 26.535
lchempi -0.1432 0.472 -0.303 0.762 -1.078 0.792
lgas 0.6233 0.886 0.704 0.483 -1.131 2.378
lrtwex 0.1787 0.391 0.457 0.649 -0.596 0.953
befile6 -0.0859 0.251 -0.342 0.733 -0.583 0.411
affile6 -0.1221 0.255 -0.479 0.632 -0.626 0.382
afdec6 -0.0668 0.274 -0.244 0.808 -0.610 0.476
u.shift(1) 0.2215 0.092 2.417 0.017 0.040 0.403
u.shift(2) 0.1340 0.092 1.454 0.148 -0.048 0.317
u.shift(3) 0.1255 0.091 1.378 0.171 -0.055 0.306
==============================================================================
Omnibus: 6.375 Durbin-Watson: 1.947
Prob(Omnibus): 0.041 Jarque-Bera (JB): 5.978
Skew: -0.444 Prob(JB): 0.0503
Kurtosis: 3.576 Cond. No. 9.78e+03
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 9.78e+03. This might indicate that there are
strong multicollinearity or other numerical problems.
hypotheses = '(u.shift(1) = u.shift(2) = u.shift(3) = 0)'
f_test = AR3.f_test(hypotheses)
print(f_test)
<F test: F=5.1229144202079056, p=0.002289781725887337, df_denom=118, df_num=3>
Example 12.4. Prais-Winsten Estimation in the Event Study#
df = dataWoo("barium")
OLS = smf.ols('lchnimp ~ lchempi + lgas + lrtwex + befile6 + affile6 + afdec6', data=df).fit()
def ols_ar1(model,rho,drop1=True):
x = model.model.exog
y = model.model.endog
ystar = y[1:]-rho*y[:-1]
xstar = x[1:,]-rho*x[:-1,]
if drop1 == False:
ystar = np.append(np.sqrt(1-rho**2)*y[0],ystar)
xstar = np.append([np.sqrt(1-rho**2)*x[0,]],xstar,axis=0)
model_ar1 = sm.OLS(ystar,xstar).fit()
return(model_ar1)
def OLSAR1(model,drop1=True):
x = model.model.exog
y = model.model.endog
e = y-x@model.params
e1 = e[:-1]; e0 = e[1:]
rho0 = np.dot(e1,e[1:])/np.dot(e1,e1)
rdiff = 1.0
while(rdiff>1.0e-5):
model1 = ols_ar1(model,rho0,drop1)
e = y - (x @ model1.params)
e1 = e[:-1]; e0 = e[1:]
rho1 = np.dot(e1,e[1:])/np.dot(e1,e1)
rdiff = np.sqrt((rho1-rho0)**2)
rho0 = rho1
print('Rho = ', rho0)
model1 = ols_ar1(model,rho0,drop1)
return(model1)
ar1_pw = OLSAR1(OLS ,drop1=False)
print(ar1_pw.summary())
Rho = 0.2910357321865985
Rho = 0.29300215914928224
Rho = 0.29319585917068286
Rho = 0.2932149681976122
Rho = 0.29321685363481764
OLS Regression Results
=======================================================================================
Dep. Variable: y R-squared (uncentered): 0.984
Model: OLS Adj. R-squared (uncentered): 0.983
Method: Least Squares F-statistic: 1096.
Date: Mon, 11 Dec 2023 Prob (F-statistic): 2.99e-108
Time: 18:37:29 Log-Likelihood: -109.41
No. Observations: 131 AIC: 232.8
Df Residuals: 124 BIC: 252.9
Df Model: 7
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
x1 -37.0777 22.778 -1.628 0.106 -82.162 8.007
x2 2.9409 0.633 4.647 0.000 1.688 4.194
x3 1.0464 0.977 1.071 0.286 -0.888 2.981
x4 1.1328 0.507 2.236 0.027 0.130 2.136
x5 -0.0165 0.319 -0.052 0.959 -0.649 0.616
x6 -0.0332 0.322 -0.103 0.918 -0.670 0.604
x7 -0.5768 0.342 -1.687 0.094 -1.254 0.100
==============================================================================
Omnibus: 9.254 Durbin-Watson: 2.087
Prob(Omnibus): 0.010 Jarque-Bera (JB): 9.875
Skew: -0.508 Prob(JB): 0.00717
Kurtosis: 3.881 Cond. No. 7.69e+03
==============================================================================
Notes:
[1] R² is computed without centering (uncentered) since the model does not contain a constant.
[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[3] The condition number is large, 7.69e+03. This might indicate that there are
strong multicollinearity or other numerical problems.
Example 12.5. Static Phillips Curve#
df = dataWoo("phillips")
df = df[(df['year']<1997)]
ols = smf.ols('df.inf ~ unem', data=df).fit()
print(ols.summary())
OLS Regression Results
==============================================================================
Dep. Variable: df.inf R-squared: 0.053
Model: OLS Adj. R-squared: 0.033
Method: Least Squares F-statistic: 2.616
Date: Mon, 11 Dec 2023 Prob (F-statistic): 0.112
Time: 18:37:29 Log-Likelihood: -124.43
No. Observations: 49 AIC: 252.9
Df Residuals: 47 BIC: 256.6
Df Model: 1
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
Intercept 1.4236 1.719 0.828 0.412 -2.035 4.882
unem 0.4676 0.289 1.617 0.112 -0.114 1.049
==============================================================================
Omnibus: 8.905 Durbin-Watson: 0.803
Prob(Omnibus): 0.012 Jarque-Bera (JB): 8.336
Skew: 0.979 Prob(JB): 0.0155
Kurtosis: 3.502 Cond. No. 23.5
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
ar1_pw = OLSAR1(ols ,drop1=False)
print(ar1_pw.summary())
Rho = 0.7306596839389279
Rho = 0.7719454382272319
Rho = 0.7792180497300137
Rho = 0.7803441605388693
Rho = 0.780514454709218
Rho = 0.7805401123896687
Rho = 0.7805439759940054
OLS Regression Results
=======================================================================================
Dep. Variable: y R-squared (uncentered): 0.241
Model: OLS Adj. R-squared (uncentered): 0.208
Method: Least Squares F-statistic: 7.446
Date: Mon, 11 Dec 2023 Prob (F-statistic): 0.00155
Time: 18:37:29 Log-Likelihood: -108.62
No. Observations: 49 AIC: 221.2
Df Residuals: 47 BIC: 225.0
Df Model: 2
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
x1 8.2959 2.231 3.718 0.001 3.807 12.785
x2 -0.7157 0.313 -2.283 0.027 -1.346 -0.085
==============================================================================
Omnibus: 7.441 Durbin-Watson: 1.910
Prob(Omnibus): 0.024 Jarque-Bera (JB): 11.678
Skew: -0.280 Prob(JB): 0.00291
Kurtosis: 5.325 Cond. No. 11.7
==============================================================================
Notes:
[1] R² is computed without centering (uncentered) since the model does not contain a constant.
[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.
Example 12.6. Differencing the Interest Rate Equation#
df = dataWoo("intdef")
def1 = df['def']
ureg = smf.ols('i3 ~ df.inf + def1', data=df).fit()
df['u'] = ureg.resid
AR1u = smf.ols('u ~ u.shift(1)', data=df).fit()
ereg = smf.ols('ci3 ~ df.cinf + cdef', data=df).fit()
df['e'] = ereg.resid
AR1e = smf.ols('e ~ e.shift(1)', data=df).fit()
print(summary_col([AR1u, AR1e, ureg, ereg],stars=True,float_format='%0.3f',
model_names=['AR1u\n(b/se)','AR1e\n(b/se)', 'ureg\n(b/se)','ereg\n(b/se)'],
info_dict={'N':lambda x: "{0:d}".format(int(x.nobs)),
'R2':lambda x: "{:.3f}".format(x.rsquared),
'Adj.R2':lambda x: "{:.3f}".format(x.rsquared_adj)}))
================================================
AR1u AR1e ureg ereg
(b/se) (b/se) (b/se) (b/se)
------------------------------------------------
Intercept 0.015 -0.041 1.733*** 0.042
(0.190) (0.166) (0.432) (0.171)
R-squared 0.377 0.005 0.602 0.176
R-squared Adj. 0.366 -0.014 0.587 0.145
cdef -0.181
(0.148)
def1 0.513***
(0.118)
df.cinf 0.149
(0.092)
df.inf 0.606***
(0.082)
e.shift(1) 0.072
(0.134)
u.shift(1) 0.623***
(0.110)
N 55 54 56 55
R2 0.377 0.005 0.602 0.176
Adj.R2 0.366 -0.014 0.587 0.145
================================================
Standard errors in parentheses.
* p<.1, ** p<.05, ***p<.01
Example 12.7. The Puerto Rican Minimum Wage#
df = dataWoo("prminwge")
OLS2 =smf.ols('lprepop ~ lmincov + lprgnp + lusgnp + t', data=df).fit()
Newey = OLS2.get_robustcov_results(cov_type='HAC',maxlags=1)
print(summary_col([OLS, Newey],stars=True,float_format='%0.3f',
model_names=['OLS\n(b/se)','Newey\n(b/se)'],
info_dict={'N':lambda x: "{0:d}".format(int(x.nobs)),
'R2':lambda x: "{:.3f}".format(x.rsquared),
'Adj.R2':lambda x: "{:.3f}".format(x.rsquared_adj)}))
=================================
OLS Newey
(b/se) (b/se)
---------------------------------
Intercept -17.803 -6.663***
(21.045) (1.375)
R-squared 0.305 0.889
R-squared Adj. 0.271 0.876
afdec6 -0.565*
(0.286)
affile6 -0.032
(0.264)
befile6 0.060
(0.261)
lchempi 3.117***
(0.479)
lgas 0.196
(0.907)
lmincov -0.212***
(0.042)
lprgnp 0.285***
(0.093)
lrtwex 0.983**
(0.400)
lusgnp 0.486*
(0.253)
t -0.027***
(0.005)
N 131 38
R2 0.305 0.889
Adj.R2 0.271 0.876
=================================
Standard errors in parentheses.
* p<.1, ** p<.05, ***p<.01
print(OLSAR1(OLS2 ,drop1=False).summary()) #PW
Rho = 0.5324697530893846
Rho = 0.5796268387566654
Rho = 0.5999046986034846
Rho = 0.6086113183871994
Rho = 0.6123431100391632
Rho = 0.6139413953891576
Rho = 0.6146257121155141
Rho = 0.6149186696926453
Rho = 0.6150440788117247
Rho = 0.6150977626621191
Rho = 0.6151207428725802
Rho = 0.6151305798698998
OLS Regression Results
=======================================================================================
Dep. Variable: y R-squared (uncentered): 0.995
Model: OLS Adj. R-squared (uncentered): 0.994
Method: Least Squares F-statistic: 1336.
Date: Mon, 11 Dec 2023 Prob (F-statistic): 4.55e-37
Time: 18:37:29 Log-Likelihood: 83.937
No. Observations: 38 AIC: -157.9
Df Residuals: 33 BIC: -149.7
Df Model: 5
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
x1 -4.6529 1.376 -3.380 0.002 -7.453 -1.852
x2 -0.1477 0.046 -3.222 0.003 -0.241 -0.054
x3 0.2514 0.116 2.159 0.038 0.014 0.488
x4 0.2557 0.232 1.103 0.278 -0.216 0.727
x5 -0.0205 0.006 -3.501 0.001 -0.032 -0.009
==============================================================================
Omnibus: 3.313 Durbin-Watson: 1.736
Prob(Omnibus): 0.191 Jarque-Bera (JB): 2.453
Skew: 0.618 Prob(JB): 0.293
Kurtosis: 3.150 Cond. No. 3.00e+03
==============================================================================
Notes:
[1] R² is computed without centering (uncentered) since the model does not contain a constant.
[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[3] The condition number is large, 3e+03. This might indicate that there are
strong multicollinearity or other numerical problems.
Example 12.8. Heteroskedasticity and the Efficient Markets Hypothesis#
df = dataWoo("nyse")
reg1 = smf.ols('df[("return")] ~ return_1', data=df).fit()
print(reg1.summary())
OLS Regression Results
==============================================================================
Dep. Variable: df[("return")] R-squared: 0.003
Model: OLS Adj. R-squared: 0.002
Method: Least Squares F-statistic: 2.399
Date: Mon, 11 Dec 2023 Prob (F-statistic): 0.122
Time: 18:37:29 Log-Likelihood: -1491.2
No. Observations: 689 AIC: 2986.
Df Residuals: 687 BIC: 2996.
Df Model: 1
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
Intercept 0.1796 0.081 2.225 0.026 0.021 0.338
return_1 0.0589 0.038 1.549 0.122 -0.016 0.134
==============================================================================
Omnibus: 114.206 Durbin-Watson: 1.997
Prob(Omnibus): 0.000 Jarque-Bera (JB): 646.991
Skew: -0.598 Prob(JB): 3.22e-141
Kurtosis: 7.594 Cond. No. 2.14
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
df['u2'] = np.square(reg1.resid)
print(smf.ols('u2 ~ return_1', data=df).fit().summary())
OLS Regression Results
==============================================================================
Dep. Variable: u2 R-squared: 0.042
Model: OLS Adj. R-squared: 0.041
Method: Least Squares F-statistic: 30.05
Date: Mon, 11 Dec 2023 Prob (F-statistic): 5.90e-08
Time: 18:37:29 Log-Likelihood: -2639.9
No. Observations: 689 AIC: 5284.
Df Residuals: 687 BIC: 5293.
Df Model: 1
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
Intercept 4.6565 0.428 10.888 0.000 3.817 5.496
return_1 -1.1041 0.201 -5.482 0.000 -1.500 -0.709
==============================================================================
Omnibus: 1296.711 Durbin-Watson: 1.443
Prob(Omnibus): 0.000 Jarque-Bera (JB): 1627670.115
Skew: 12.811 Prob(JB): 0.00
Kurtosis: 239.728 Cond. No. 2.14
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
Example 12.9. ARCH in Stock Returns#
df = dataWoo("nyse")
df['u2']= np.square(smf.ols('df[("return")] ~ return_1', data=df).fit().resid)
print(smf.ols('u2 ~ u2.shift(1)', data=df).fit().summary())
OLS Regression Results
==============================================================================
Dep. Variable: u2 R-squared: 0.114
Model: OLS Adj. R-squared: 0.112
Method: Least Squares F-statistic: 87.92
Date: Mon, 11 Dec 2023 Prob (F-statistic): 9.71e-20
Time: 18:37:29 Log-Likelihood: -2609.7
No. Observations: 688 AIC: 5223.
Df Residuals: 686 BIC: 5233.
Df Model: 1
Covariance Type: nonrobust
===============================================================================
coef std err t P>|t| [0.025 0.975]
-------------------------------------------------------------------------------
Intercept 2.9474 0.440 6.695 0.000 2.083 3.812
u2.shift(1) 0.3371 0.036 9.377 0.000 0.266 0.408
==============================================================================
Omnibus: 1343.910 Durbin-Watson: 2.028
Prob(Omnibus): 0.000 Jarque-Bera (JB): 2176433.255
Skew: 13.807 Prob(JB): 0.00
Kurtosis: 277.152 Cond. No. 13.2
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
df['u']= smf.ols('df[("return")] ~ return_1', data=df).fit().resid
print(smf.ols('u ~ u.shift(1)', data=df).fit().summary())
OLS Regression Results
==============================================================================
Dep. Variable: u R-squared: 0.000
Model: OLS Adj. R-squared: -0.001
Method: Least Squares F-statistic: 0.001354
Date: Mon, 11 Dec 2023 Prob (F-statistic): 0.971
Time: 18:37:29 Log-Likelihood: -1489.5
No. Observations: 688 AIC: 2983.
Df Residuals: 686 BIC: 2992.
Df Model: 1
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
Intercept -0.0012 0.081 -0.015 0.988 -0.159 0.157
u.shift(1) 0.0014 0.038 0.037 0.971 -0.074 0.076
==============================================================================
Omnibus: 113.368 Durbin-Watson: 2.000
Prob(Omnibus): 0.000 Jarque-Bera (JB): 640.382
Skew: -0.594 Prob(JB): 8.76e-140
Kurtosis: 7.575 Cond. No. 2.11
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.