import numpy as np
import pandas as pd
import scipy as sp
import scipy.stats as ss
import statsmodels
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.iolib.summary2 import summary_col
from linearmodels.iv import IV2SLS
from wooldridge import *
print(IV2SLS.from_formula('hours ~ 1 + [lwage ~ exper + expersq] + educ + age + kidslt6 + nwifeinc', data=dataWoo("mroz").dropna()).fit(cov_type='unadjusted'))
IV-2SLS Estimation Summary ============================================================================== Dep. Variable: hours R-squared: -2.0076 Estimator: IV-2SLS Adj. R-squared: -2.0433 No. Observations: 428 F-statistic: 17.450 Date: Sun, Apr 26 2020 P-value (F-stat) 0.0037 Time: 14:54:28 Distribution: chi2(5) Cov. Estimator: unadjusted Parameter Estimates ============================================================================== Parameter Std. Err. T-stat P-value Lower CI Upper CI ------------------------------------------------------------------------------ Intercept 2225.7 570.52 3.9011 0.0001 1107.5 3343.9 educ -183.75 58.684 -3.1312 0.0017 -298.77 -68.733 age -7.8061 9.3120 -0.8383 0.4019 -26.057 10.445 kidslt6 -198.15 181.64 -1.0909 0.2753 -554.17 157.86 nwifeinc -10.170 6.5682 -1.5483 0.1215 -23.043 2.7039 lwage 1639.6 467.27 3.5088 0.0005 723.73 2555.4 ============================================================================== Endogenous: lwage Instruments: exper, expersq Unadjusted Covariance (Homoskedastic) Debiased: False
print(IV2SLS.from_formula('lwage ~ 1 + educ + exper + expersq + [hours ~ age + kidslt6 + nwifeinc]', data=dataWoo("mroz")).fit(cov_type='unadjusted'))
IV-2SLS Estimation Summary ============================================================================== Dep. Variable: lwage R-squared: 0.1257 Estimator: IV-2SLS Adj. R-squared: 0.1174 No. Observations: 428 F-statistic: 77.012 Date: Sun, Apr 26 2020 P-value (F-stat) 0.0000 Time: 14:55:06 Distribution: chi2(4) Cov. Estimator: unadjusted Parameter Estimates ============================================================================== Parameter Std. Err. T-stat P-value Lower CI Upper CI ------------------------------------------------------------------------------ Intercept -0.6557 0.3358 -1.9527 0.0509 -1.3139 0.0024 educ 0.1103 0.0154 7.1488 0.0000 0.0801 0.1406 exper 0.0346 0.0194 1.7847 0.0743 -0.0034 0.0726 expersq -0.0007 0.0005 -1.5634 0.1179 -0.0016 0.0002 hours 0.0001 0.0003 0.4974 0.6189 -0.0004 0.0006 ============================================================================== Endogenous: hours Instruments: age, kidslt6, nwifeinc Unadjusted Covariance (Homoskedastic) Debiased: False
df = dataWoo("openness")
print(smf.ols('open ~ lpcinc + lland', data = df).fit().summary())
OLS Regression Results ============================================================================== Dep. Variable: open R-squared: 0.449 Model: OLS Adj. R-squared: 0.439 Method: Least Squares F-statistic: 45.17 Date: Sun, 26 Apr 2020 Prob (F-statistic): 4.45e-15 Time: 14:54:28 Log-Likelihood: -488.44 No. Observations: 114 AIC: 982.9 Df Residuals: 111 BIC: 991.1 Df Model: 2 Covariance Type: nonrobust ============================================================================== coef std err t P>|t| [0.025 0.975] ------------------------------------------------------------------------------ Intercept 117.0845 15.848 7.388 0.000 85.680 148.489 lpcinc 0.5465 1.493 0.366 0.715 -2.412 3.505 lland -7.5671 0.814 -9.294 0.000 -9.181 -5.954 ============================================================================== Omnibus: 56.815 Durbin-Watson: 2.147 Prob(Omnibus): 0.000 Jarque-Bera (JB): 197.784 Skew: 1.783 Prob(JB): 1.13e-43 Kurtosis: 8.377 Cond. No. 130. ============================================================================== Warnings: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
print(IV2SLS.from_formula('df.inf ~ [open ~ lland] + lpcinc + 1', data = df).fit())
IV-2SLS Estimation Summary ============================================================================== Dep. Variable: df.inf R-squared: 0.0309 Estimator: IV-2SLS Adj. R-squared: 0.0134 No. Observations: 114 F-statistic: 5.1930 Date: Sun, Apr 26 2020 P-value (F-stat) 0.0745 Time: 14:54:28 Distribution: chi2(2) Cov. Estimator: robust Parameter Estimates ============================================================================== Parameter Std. Err. T-stat P-value Lower CI Upper CI ------------------------------------------------------------------------------ Intercept 26.899 10.775 2.4964 0.0125 5.7802 48.018 lpcinc 0.3758 1.3603 0.2763 0.7823 -2.2903 3.0419 open -0.3375 0.1504 -2.2435 0.0249 -0.6323 -0.0427 ============================================================================== Endogenous: open Instruments: lland Robust Covariance (Heteroskedastic) Debiased: False
df = dataWoo("consump")
pi_reg = IV2SLS.from_formula('gc ~ 1 + [gy + r3 ~ gy_1 + gc_1 + r3_1]', data = df).fit()
print(pi_reg)
IV-2SLS Estimation Summary ============================================================================== Dep. Variable: gc R-squared: 0.6779 Estimator: IV-2SLS Adj. R-squared: 0.6578 No. Observations: 35 F-statistic: 18.305 Date: Sun, Apr 26 2020 P-value (F-stat) 0.0001 Time: 14:54:28 Distribution: chi2(2) Cov. Estimator: robust Parameter Estimates ============================================================================== Parameter Std. Err. T-stat P-value Lower CI Upper CI ------------------------------------------------------------------------------ Intercept 0.0081 0.0034 2.3694 0.0178 0.0014 0.0147 gy 0.5862 0.1371 4.2761 0.0000 0.3175 0.8549 r3 -0.0003 0.0009 -0.2961 0.7671 -0.0021 0.0015 ============================================================================== Endogenous: gy, r3 Instruments: gy_1, gc_1, r3_1 Robust Covariance (Heteroskedastic) Debiased: False
df['uhat'] = pi_reg.resids
print(smf.ols('uhat ~ uhat.shift(1)', data=df).fit().summary())
OLS Regression Results ============================================================================== Dep. Variable: uhat R-squared: 0.011 Model: OLS Adj. R-squared: -0.020 Method: Least Squares F-statistic: 0.3409 Date: Sun, 26 Apr 2020 Prob (F-statistic): 0.563 Time: 14:54:28 Log-Likelihood: 119.56 No. Observations: 34 AIC: -235.1 Df Residuals: 32 BIC: -232.1 Df Model: 1 Covariance Type: nonrobust ================================================================================= coef std err t P>|t| [0.025 0.975] --------------------------------------------------------------------------------- Intercept 0.0001 0.001 0.098 0.923 -0.002 0.003 uhat.shift(1) -0.1050 0.180 -0.584 0.563 -0.471 0.261 ============================================================================== Omnibus: 1.696 Durbin-Watson: 1.948 Prob(Omnibus): 0.428 Jarque-Bera (JB): 1.081 Skew: -0.041 Prob(JB): 0.583 Kurtosis: 2.130 Cond. No. 142. ============================================================================== Warnings: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
print(IV2SLS.from_formula('gc ~ 1 + [ gy + r3 + uhat.shift(1) ~ gy_1 + gc_1 + r3_1]', data = df).fit())
IV-2SLS Estimation Summary ============================================================================== Dep. Variable: gc R-squared: 0.4543 Estimator: IV-2SLS Adj. R-squared: 0.3998 No. Observations: 34 F-statistic: 10.708 Date: Sun, Apr 26 2020 P-value (F-stat) 0.0134 Time: 14:54:28 Distribution: chi2(3) Cov. Estimator: robust Parameter Estimates ================================================================================= Parameter Std. Err. T-stat P-value Lower CI Upper CI --------------------------------------------------------------------------------- Intercept -0.0004 0.0076 -0.0548 0.9563 -0.0153 0.0144 gy 0.9863 0.3411 2.8919 0.0038 0.3178 1.6548 r3 -0.0004 0.0012 -0.3484 0.7276 -0.0028 0.0019 uhat.shift(1) -0.5995 0.4002 -1.4982 0.1341 -1.3839 0.1848 ================================================================================= Endogenous: gy, r3, uhat.shift(1) Instruments: gy_1, gc_1, r3_1 Robust Covariance (Heteroskedastic) Debiased: False
df = dataWoo("prison")
print(IV2SLS.from_formula('gcriv ~ 1 + [gpris ~ final1 + final2] + gpolpc + gincpc + cunem + cblack + cmetro + cag0_14 + cag15_17 + cag18_24 + cag25_34', data=df).fit(cov_type='unadjusted'))
print(smf.ols('gcriv ~ gpris + gpolpc + gincpc + cunem + cblack + cmetro + cag0_14 + cag15_17 + cag18_24 + cag25_34', data=df).fit().summary())
IV-2SLS Estimation Summary ============================================================================== Dep. Variable: gcriv R-squared: -0.2447 Estimator: IV-2SLS Adj. R-squared: -0.2624 No. Observations: 714 F-statistic: 59.380 Date: Sun, Apr 26 2020 P-value (F-stat) 0.0000 Time: 14:54:29 Distribution: chi2(10) Cov. Estimator: unadjusted Parameter Estimates ============================================================================== Parameter Std. Err. T-stat P-value Lower CI Upper CI ------------------------------------------------------------------------------ Intercept 0.0363 0.0242 1.5039 0.1326 -0.0110 0.0837 gpolpc 0.0735 0.0690 1.0650 0.2869 -0.0617 0.2087 gincpc 0.9259 0.1778 5.2060 0.0000 0.5773 1.2744 cunem 0.7299 0.3548 2.0570 0.0397 0.0344 1.4253 cblack -0.0147 0.0415 -0.3553 0.7224 -0.0960 0.0665 cmetro -1.1513 1.2634 -0.9113 0.3621 -3.6275 1.3249 cag0_14 3.1702 2.2861 1.3868 0.1655 -1.3104 7.6508 cag15_17 6.6609 4.3317 1.5377 0.1241 -1.8291 15.151 cag18_24 -0.9192 2.6477 -0.3472 0.7284 -6.1086 4.2701 cag25_34 -4.3695 2.0283 -2.1543 0.0312 -8.3448 -0.3941 gpris -0.9672 0.3497 -2.7661 0.0057 -1.6526 -0.2819 ============================================================================== Endogenous: gpris Instruments: final1, final2 Unadjusted Covariance (Homoskedastic) Debiased: False OLS Regression Results ============================================================================== Dep. Variable: gcriv R-squared: 0.106 Model: OLS Adj. R-squared: 0.093 Method: Least Squares F-statistic: 8.302 Date: Sun, 26 Apr 2020 Prob (F-statistic): 8.20e-13 Time: 14:54:29 Log-Likelihood: 758.05 No. Observations: 714 AIC: -1494. Df Residuals: 703 BIC: -1444. Df Model: 10 Covariance Type: nonrobust ============================================================================== coef std err t P>|t| [0.025 0.975] ------------------------------------------------------------------------------ Intercept -0.0051 0.014 -0.372 0.710 -0.032 0.022 gpris -0.1678 0.048 -3.483 0.001 -0.262 -0.073 gpolpc 0.0938 0.058 1.604 0.109 -0.021 0.209 gincpc 0.9603 0.151 6.343 0.000 0.663 1.258 cunem 0.4068 0.279 1.460 0.145 -0.140 0.954 cblack -0.0113 0.035 -0.318 0.751 -0.081 0.058 cmetro -0.3920 1.042 -0.376 0.707 -2.438 1.654 cag0_14 4.2932 1.908 2.250 0.025 0.546 8.040 cag15_17 12.8985 2.899 4.450 0.000 7.207 18.590 cag18_24 1.8146 2.025 0.896 0.370 -2.161 5.790 cag25_34 -2.5618 1.599 -1.602 0.110 -5.702 0.578 ============================================================================== Omnibus: 48.415 Durbin-Watson: 1.866 Prob(Omnibus): 0.000 Jarque-Bera (JB): 200.346 Skew: -0.054 Prob(JB): 3.13e-44 Kurtosis: 5.593 Cond. No. 1.07e+03 ============================================================================== Warnings: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified. [2] The condition number is large, 1.07e+03. This might indicate that there are strong multicollinearity or other numerical problems.