# Chapter 10. Basic Regression Analysis with Time Series Data#

import numpy as np
import statsmodels.formula.api as smf
from statsmodels.iolib.summary2 import summary_col

from wooldridge import *


## Example 10.1. Static Phillips Curve#

df = dataWoo("phillips")
df = df[(df['year']<1997)]
print(smf.ols('df.inf ~ unem + 1', data=df).fit().summary())

                            OLS Regression Results
==============================================================================
Dep. Variable:                 df.inf   R-squared:                       0.053
Model:                            OLS   Adj. R-squared:                  0.033
Method:                 Least Squares   F-statistic:                     2.616
Date:                Mon, 11 Dec 2023   Prob (F-statistic):              0.112
Time:                        18:37:13   Log-Likelihood:                -124.43
No. Observations:                  49   AIC:                             252.9
Df Residuals:                      47   BIC:                             256.6
Df Model:                           1
Covariance Type:            nonrobust
==============================================================================
coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      1.4236      1.719      0.828      0.412      -2.035       4.882
unem           0.4676      0.289      1.617      0.112      -0.114       1.049
==============================================================================
Omnibus:                        8.905   Durbin-Watson:                   0.803
Prob(Omnibus):                  0.012   Jarque-Bera (JB):                8.336
Skew:                           0.979   Prob(JB):                       0.0155
Kurtosis:                       3.502   Cond. No.                         23.5
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.


## Example 10.2. Effects of Inflation and Deficits on Interest Rates#

df = dataWoo("intdef")
print(smf.ols('i3 ~ df.inf + df[("def")] + 1', data=df).fit().summary())

                            OLS Regression Results
==============================================================================
Dep. Variable:                     i3   R-squared:                       0.602
Model:                            OLS   Adj. R-squared:                  0.587
Method:                 Least Squares   F-statistic:                     40.09
Date:                Mon, 11 Dec 2023   Prob (F-statistic):           2.48e-11
Time:                        18:37:13   Log-Likelihood:                -112.16
No. Observations:                  56   AIC:                             230.3
Df Residuals:                      53   BIC:                             236.4
Df Model:                           2
Covariance Type:            nonrobust
===============================================================================
coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------
Intercept       1.7333      0.432      4.012      0.000       0.867       2.600
df.inf          0.6059      0.082      7.376      0.000       0.441       0.771
df[("def")]     0.5131      0.118      4.334      0.000       0.276       0.751
==============================================================================
Omnibus:                        0.260   Durbin-Watson:                   0.716
Prob(Omnibus):                  0.878   Jarque-Bera (JB):                0.015
Skew:                          -0.028   Prob(JB):                        0.992
Kurtosis:                       3.058   Cond. No.                         9.28
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.


## Example 10.3. Puerto Rican Employment and the Minimum Wage#

df = dataWoo("prminwge")
print(smf.ols('lprepop ~ lmincov + lusgnp', data=df).fit().summary())

                            OLS Regression Results
==============================================================================
Dep. Variable:                lprepop   R-squared:                       0.660
Model:                            OLS   Adj. R-squared:                  0.641
Method:                 Least Squares   F-statistic:                     34.04
Date:                Mon, 11 Dec 2023   Prob (F-statistic):           6.17e-09
Time:                        18:37:13   Log-Likelihood:                 57.376
No. Observations:                  38   AIC:                            -108.8
Df Residuals:                      35   BIC:                            -103.8
Df Model:                           2
Covariance Type:            nonrobust
==============================================================================
coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -1.0544      0.765     -1.378      0.177      -2.608       0.499
lmincov       -0.1544      0.065     -2.380      0.023      -0.286      -0.023
lusgnp        -0.0122      0.089     -0.138      0.891      -0.192       0.168
==============================================================================
Omnibus:                        0.079   Durbin-Watson:                   0.340
Prob(Omnibus):                  0.961   Jarque-Bera (JB):                0.084
Skew:                           0.073   Prob(JB):                        0.959
Kurtosis:                       2.822   Cond. No.                         676.
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.


## Example 10.4. Effects of Personal Exemption on Fertility Rates#

df = dataWoo("fertil3")
regf = smf.ols('gfr ~ pe + ww2 + pill', data=df).fit()
tsregf = smf.ols('gfr ~ pe + pe_1 + pe_2 + ww2 + pill', data=df).fit()

print(summary_col([regf, tsregf],stars=True,float_format='%0.3f',
model_names=['Model I\n(b/se)','Model II\n(b/se)'],
info_dict={'N':lambda x: "{0:d}".format(int(x.nobs)),
'R2':lambda x: "{:.3f}".format(x.rsquared),

====================================
Model I    Model II
(b/se)     (b/se)
------------------------------------
Intercept      98.682***  95.870***
(3.208)    (3.282)
R-squared      0.473      0.499
R-squared Adj. 0.450      0.459
pe             0.083***   0.073
(0.030)    (0.126)
pe_1                      -0.006
(0.156)
pe_2                      0.034
(0.126)
pill           -31.594*** -31.305***
(4.081)    (3.982)
ww2            -24.238*** -22.126**
(7.458)    (10.732)
N              72         70
R2             0.473      0.499
====================================
Standard errors in parentheses.
* p<.1, ** p<.05, ***p<.01

hypotheses = '(pe = pe_1  = pe_2 = 0)'
f_test = tsregf.f_test(hypotheses)
print(f_test)

<F test: F=3.972964046978578, p=0.011652005303125972, df_denom=64, df_num=3>


## Example 10.5. Antidumping Filings and Chemical Imports#

print(smf.ols('lchnimp ~lchempi + lgas + lrtwex + befile6 + affile6 + afdec6', data=dataWoo("barium")).fit().summary())

                            OLS Regression Results
==============================================================================
Dep. Variable:                lchnimp   R-squared:                       0.305
Model:                            OLS   Adj. R-squared:                  0.271
Method:                 Least Squares   F-statistic:                     9.064
Date:                Mon, 11 Dec 2023   Prob (F-statistic):           3.25e-08
Time:                        18:37:13   Log-Likelihood:                -114.79
No. Observations:                 131   AIC:                             243.6
Df Residuals:                     124   BIC:                             263.7
Df Model:                           6
Covariance Type:            nonrobust
==============================================================================
coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept    -17.8030     21.045     -0.846      0.399     -59.458      23.852
lchempi        3.1172      0.479      6.505      0.000       2.169       4.066
lgas           0.1964      0.907      0.217      0.829      -1.598       1.991
lrtwex         0.9830      0.400      2.457      0.015       0.191       1.775
befile6        0.0596      0.261      0.228      0.820      -0.457       0.576
affile6       -0.0324      0.264     -0.123      0.903      -0.556       0.491
afdec6        -0.5652      0.286     -1.978      0.050      -1.131       0.001
==============================================================================
Omnibus:                        9.160   Durbin-Watson:                   1.458
Prob(Omnibus):                  0.010   Jarque-Bera (JB):                9.978
Skew:                          -0.491   Prob(JB):                      0.00681
Kurtosis:                       3.930   Cond. No.                     9.62e+03
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 9.62e+03. This might indicate that there are
strong multicollinearity or other numerical problems.

100*(np.exp(-0.5652) -1)

-43.17535139053659


## Example 10.6. Election Outcomes and Economic Performance#

df= dataWoo("fair")
df = df[(df['year']<1996)]

print(smf.ols('demvote ~ partyWH + incum + partyWH:gnews + partyWH:df.inf +1', data=df).fit().summary())

                            OLS Regression Results
==============================================================================
Dep. Variable:                demvote   R-squared:                       0.663
Model:                            OLS   Adj. R-squared:                  0.573
Method:                 Least Squares   F-statistic:                     7.374
Date:                Mon, 11 Dec 2023   Prob (F-statistic):            0.00172
Time:                        18:37:13   Log-Likelihood:                 34.586
No. Observations:                  20   AIC:                            -59.17
Df Residuals:                      15   BIC:                            -54.19
Df Model:                           4
Covariance Type:            nonrobust
==================================================================================
coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------
Intercept          0.4811      0.012     39.228      0.000       0.455       0.507
partyWH           -0.0435      0.040     -1.075      0.300      -0.130       0.043
incum              0.0544      0.023      2.323      0.035       0.004       0.104
partyWH:gnews      0.0108      0.004      2.628      0.019       0.002       0.020
partyWH:df.inf    -0.0077      0.003     -2.365      0.032      -0.015      -0.001
==============================================================================
Omnibus:                        3.025   Durbin-Watson:                   2.171
Prob(Omnibus):                  0.220   Jarque-Bera (JB):                2.074
Skew:                           0.787   Prob(JB):                        0.355
Kurtosis:                       2.891   Cond. No.                         33.0
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.


## Example 10.7. Housing Investment and Prices#

df = dataWoo("hseinv")
reghi = smf.ols('linvpc ~ lprice', data=df).fit()
reghit = smf.ols('linvpc ~ lprice + t', data=df).fit()

print(summary_col([reghi, reghit],stars=True,float_format='%0.3f',
model_names=['Model I\n(b/se)','Model II\n(b/se)'],
info_dict={'N':lambda x: "{0:d}".format(int(x.nobs)),
'R2':lambda x: "{:.3f}".format(x.rsquared),

==================================
Model I   Model II
(b/se)    (b/se)
----------------------------------
Intercept      -0.550*** -0.913***
(0.043)   (0.136)
R-squared      0.208     0.341
R-squared Adj. 0.189     0.307
lprice         1.241***  -0.381
(0.382)   (0.679)
t                        0.010***
(0.004)
N              42        42
R2             0.208     0.341
==================================
Standard errors in parentheses.
* p<.1, ** p<.05, ***p<.01

df = dataWoo("fertil3")
regt = smf.ols('gfr ~ pe + ww2 + pill + t + 1', data=df).fit()
regtsq = smf.ols('gfr ~ pe + ww2 + pill + t + tsq', data=df).fit()

print(summary_col([regt, regtsq],stars=True,float_format='%0.3f',
model_names=['Model I\n(b/se)','Model II\n(b/se)'],
info_dict={'N':lambda x: "{0:d}".format(int(x.nobs)),
'R2':lambda x: "{:.3f}".format(x.rsquared),

====================================
Model I    Model II
(b/se)     (b/se)
------------------------------------
Intercept      111.769*** 124.092***
(3.358)    (4.361)
R-squared      0.662      0.727
R-squared Adj. 0.642      0.706
pe             0.279***   0.348***
(0.040)    (0.040)
pill           0.997      -10.120
(6.262)    (6.336)
t              -1.150***  -2.531***
(0.188)    (0.389)
tsq                       0.020***
(0.005)
ww2            -35.592*** -35.880***
(6.297)    (5.708)
N              72         72
R2             0.662      0.727
====================================
Standard errors in parentheses.
* p<.1, ** p<.05, ***p<.01


## Example 10.9. Puerto Rican Employment#

print(smf.ols('lprepop ~ lmincov + lusgnp + t', data=dataWoo("prminwge")).fit().summary())

                            OLS Regression Results
==============================================================================
Dep. Variable:                lprepop   R-squared:                       0.847
Model:                            OLS   Adj. R-squared:                  0.834
Method:                 Least Squares   F-statistic:                     62.78
Date:                Mon, 11 Dec 2023   Prob (F-statistic):           6.01e-14
Time:                        18:37:13   Log-Likelihood:                 72.532
No. Observations:                  38   AIC:                            -137.1
Df Residuals:                      34   BIC:                            -130.5
Df Model:                           3
Covariance Type:            nonrobust
==============================================================================
coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -8.6963      1.296     -6.711      0.000     -11.330      -6.063
lmincov       -0.1687      0.044     -3.813      0.001      -0.259      -0.079
lusgnp         1.0574      0.177      5.986      0.000       0.698       1.416
t             -0.0324      0.005     -6.442      0.000      -0.043      -0.022
==============================================================================
Omnibus:                        6.093   Durbin-Watson:                   0.908
Prob(Omnibus):                  0.048   Jarque-Bera (JB):                5.533
Skew:                           0.935   Prob(JB):                       0.0629
Kurtosis:                       3.006   Cond. No.                     4.99e+03
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 4.99e+03. This might indicate that there are
strong multicollinearity or other numerical problems.


## Example 10.10. Housing Investment#

df = dataWoo("hseinv")
print(smf.ols('linvpc ~ lprice + t', data=df).fit().summary())

                            OLS Regression Results
==============================================================================
Dep. Variable:                 linvpc   R-squared:                       0.341
Model:                            OLS   Adj. R-squared:                  0.307
Method:                 Least Squares   F-statistic:                     10.08
Date:                Mon, 11 Dec 2023   Prob (F-statistic):           0.000296
Time:                        18:37:13   Log-Likelihood:                 23.459
No. Observations:                  42   AIC:                            -40.92
Df Residuals:                      39   BIC:                            -35.71
Df Model:                           2
Covariance Type:            nonrobust
==============================================================================
coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -0.9131      0.136     -6.733      0.000      -1.187      -0.639
lprice        -0.3810      0.679     -0.561      0.578      -1.754       0.992
t              0.0098      0.004      2.798      0.008       0.003       0.017
==============================================================================
Omnibus:                        4.879   Durbin-Watson:                   1.049
Prob(Omnibus):                  0.087   Jarque-Bera (JB):                3.978
Skew:                          -0.446   Prob(JB):                        0.137
Kurtosis:                       4.216   Cond. No.                         770.
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

uhat_reg = smf.ols('linvpc ~ t', data=df).fit()
uhat = df.linvpc - uhat_reg.predict()
detrend = smf.ols('uhat ~ lprice + t', data=df).fit()
trend = smf.ols('linvpc ~ lprice + t', data=df).fit()

print(summary_col([detrend, trend],stars=True,float_format='%0.3f',
model_names=['Model I\n(b/se)','Model II\n(b/se)'],
info_dict={'N':lambda x: "{0:d}".format(int(x.nobs)),
'R2':lambda x: "{:.3f}".format(x.rsquared),

================================
Model I  Model II
(b/se)   (b/se)
--------------------------------
Intercept      -0.072  -0.913***
(0.136) (0.136)
lprice         -0.381  -0.381
(0.679) (0.679)
t              0.002   0.010***
(0.004) (0.004)
R-squared      0.008   0.341
R-squared Adj. -0.043  0.307
N              42      42
R2             0.008   0.341
================================
Standard errors in parentheses.
* p<.1, ** p<.05, ***p<.01


## Example 10.11. Effects of Antidumping Filings#

antid_month = smf.ols(
'lchnimp ~ lchempi + lgas + lrtwex + befile6 + affile6 + afdec6 + feb + mar + apr + may + jun + jul + aug + sep + oct + nov + dec + 1',
data=dataWoo("barium")).fit()
print(antid_month.summary())

                            OLS Regression Results
==============================================================================
Dep. Variable:                lchnimp   R-squared:                       0.358
Model:                            OLS   Adj. R-squared:                  0.262
Method:                 Least Squares   F-statistic:                     3.712
Date:                Mon, 11 Dec 2023   Prob (F-statistic):           1.28e-05
Time:                        18:37:14   Log-Likelihood:                -109.54
No. Observations:                 131   AIC:                             255.1
Df Residuals:                     113   BIC:                             306.8
Df Model:                          17
Covariance Type:            nonrobust
==============================================================================
coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     16.7788     32.429      0.517      0.606     -47.468      81.026
lchempi        3.2651      0.493      6.624      0.000       2.288       4.242
lgas          -1.2781      1.389     -0.920      0.359      -4.030       1.474
lrtwex         0.6630      0.471      1.407      0.162      -0.271       1.597
befile6        0.1397      0.267      0.524      0.602      -0.389       0.668
affile6        0.0126      0.279      0.045      0.964      -0.539       0.565
afdec6        -0.5213      0.302     -1.726      0.087      -1.120       0.077
feb           -0.4177      0.304     -1.372      0.173      -1.021       0.185
mar            0.0591      0.265      0.223      0.824      -0.465       0.584
apr           -0.4515      0.268     -1.682      0.095      -0.983       0.080
may            0.0333      0.269      0.124      0.902      -0.500       0.567
jun           -0.2063      0.269     -0.766      0.445      -0.740       0.327
jul            0.0038      0.279      0.014      0.989      -0.548       0.556
aug           -0.1571      0.278     -0.565      0.573      -0.708       0.394
sep           -0.1342      0.268     -0.501      0.617      -0.664       0.396
oct            0.0517      0.267      0.194      0.847      -0.477       0.580
nov           -0.2463      0.263     -0.937      0.351      -0.767       0.274
dec            0.1328      0.271      0.489      0.626      -0.405       0.671
==============================================================================
Omnibus:                        9.169   Durbin-Watson:                   1.325
Prob(Omnibus):                  0.010   Jarque-Bera (JB):                9.324
Skew:                          -0.540   Prob(JB):                      0.00945
Kurtosis:                       3.736   Cond. No.                     1.47e+04
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 1.47e+04. This might indicate that there are
strong multicollinearity or other numerical problems.

hypotheses = '(feb = mar = apr = may = jun = jul = aug = sep = oct = nov = dec= 0)'
f_test = antid_month.f_test(hypotheses)
print(f_test)

<F test: F=0.8559432867177492, p=0.585200849291337, df_denom=113, df_num=11>

antid_season = smf.ols('lchnimp ~ lchempi + lgas + lrtwex + befile6 + affile6 + afdec6 + spr + sum + fall + 1',
data=dataWoo("barium")).fit()
print(antid_season.summary())

                            OLS Regression Results
==============================================================================
Dep. Variable:                lchnimp   R-squared:                       0.310
Model:                            OLS   Adj. R-squared:                  0.258
Method:                 Least Squares   F-statistic:                     6.032
Date:                Mon, 11 Dec 2023   Prob (F-statistic):           5.79e-07
Time:                        18:37:14   Log-Likelihood:                -114.33
No. Observations:                 131   AIC:                             248.7
Df Residuals:                     121   BIC:                             277.4
Df Model:                           9
Covariance Type:            nonrobust
==============================================================================
coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept    -26.5219     23.297     -1.138      0.257     -72.645      19.602
lchempi        3.0779      0.486      6.331      0.000       2.116       4.040
lgas           0.5651      1.000      0.565      0.573      -1.415       2.545
lrtwex         1.1015      0.425      2.594      0.011       0.261       1.942
befile6        0.0767      0.265      0.289      0.773      -0.448       0.601
affile6       -0.0833      0.273     -0.305      0.761      -0.623       0.457
afdec6        -0.6212      0.295     -2.103      0.038      -1.206      -0.036
spr           -0.0412      0.151     -0.273      0.786      -0.341       0.258
sum           -0.1519      0.169     -0.897      0.371      -0.487       0.183
fall          -0.0673      0.154     -0.436      0.664      -0.373       0.239
==============================================================================
Omnibus:                        8.751   Durbin-Watson:                   1.439
Prob(Omnibus):                  0.013   Jarque-Bera (JB):                9.596
Skew:                          -0.466   Prob(JB):                      0.00825
Kurtosis:                       3.943   Cond. No.                     1.06e+04
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 1.06e+04. This might indicate that there are
strong multicollinearity or other numerical problems.

hypotheses = '(spr = sum = fall= 0)'
f_test = antid_season.f_test(hypotheses)
print(f_test)

<F test: F=0.28224471031401444, p=0.8381333029196857, df_denom=121, df_num=3>