import numpy as np
import pandas as pd
import scipy as sp
import scipy.stats as ss
import statsmodels
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.iolib.summary2 import summary_col
from linearmodels.iv import IV2SLS
from wooldridge import *
df = dataWoo('mroz')
print(smf.ols('lwage ~ 1 + educ', data=df).fit().summary())
OLS Regression Results ============================================================================== Dep. Variable: lwage R-squared: 0.118 Model: OLS Adj. R-squared: 0.116 Method: Least Squares F-statistic: 56.93 Date: Sat, 25 Apr 2020 Prob (F-statistic): 2.76e-13 Time: 19:45:13 Log-Likelihood: -441.26 No. Observations: 428 AIC: 886.5 Df Residuals: 426 BIC: 894.6 Df Model: 1 Covariance Type: nonrobust ============================================================================== coef std err t P>|t| [0.025 0.975] ------------------------------------------------------------------------------ Intercept -0.1852 0.185 -1.000 0.318 -0.549 0.179 educ 0.1086 0.014 7.545 0.000 0.080 0.137 ============================================================================== Omnibus: 91.833 Durbin-Watson: 1.985 Prob(Omnibus): 0.000 Jarque-Bera (JB): 303.790 Skew: -0.956 Prob(JB): 1.08e-66 Kurtosis: 6.658 Cond. No. 72.9 ============================================================================== Warnings: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
print(smf.ols('educ ~ 1 + fatheduc', data =df).fit().summary())
OLS Regression Results ============================================================================== Dep. Variable: educ R-squared: 0.196 Model: OLS Adj. R-squared: 0.195 Method: Least Squares F-statistic: 182.8 Date: Sat, 25 Apr 2020 Prob (F-statistic): 1.93e-37 Time: 19:45:13 Log-Likelihood: -1606.6 No. Observations: 753 AIC: 3217. Df Residuals: 751 BIC: 3226. Df Model: 1 Covariance Type: nonrobust ============================================================================== coef std err t P>|t| [0.025 0.975] ------------------------------------------------------------------------------ Intercept 9.7990 0.199 49.356 0.000 9.409 10.189 fatheduc 0.2824 0.021 13.521 0.000 0.241 0.323 ============================================================================== Omnibus: 12.639 Durbin-Watson: 1.944 Prob(Omnibus): 0.002 Jarque-Bera (JB): 21.413 Skew: -0.048 Prob(JB): 2.24e-05 Kurtosis: 3.820 Cond. No. 25.5 ============================================================================== Warnings: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
df = df.dropna()
print(IV2SLS.from_formula('lwage ~ 1 + [educ ~ fatheduc]', data = df) .fit())
IV-2SLS Estimation Summary ============================================================================== Dep. Variable: lwage R-squared: 0.0934 Estimator: IV-2SLS Adj. R-squared: 0.0913 No. Observations: 428 F-statistic: 2.5656 Date: Sat, Apr 25 2020 P-value (F-stat) 0.1092 Time: 19:45:13 Distribution: chi2(1) Cov. Estimator: robust Parameter Estimates ============================================================================== Parameter Std. Err. T-stat P-value Lower CI Upper CI ------------------------------------------------------------------------------ Intercept 0.4411 0.4643 0.9501 0.3421 -0.4689 1.3511 educ 0.0592 0.0369 1.6017 0.1092 -0.0132 0.1316 ============================================================================== Endogenous: educ Instruments: fatheduc Robust Covariance (Heteroskedastic) Debiased: False
df = dataWoo("wage2")
print(smf.ols('educ ~ 1 + sibs', data =df).fit().summary())
OLS Regression Results ============================================================================== Dep. Variable: educ R-squared: 0.057 Model: OLS Adj. R-squared: 0.056 Method: Least Squares F-statistic: 56.67 Date: Sat, 25 Apr 2020 Prob (F-statistic): 1.22e-13 Time: 19:45:13 Log-Likelihood: -2034.4 No. Observations: 935 AIC: 4073. Df Residuals: 933 BIC: 4083. Df Model: 1 Covariance Type: nonrobust ============================================================================== coef std err t P>|t| [0.025 0.975] ------------------------------------------------------------------------------ Intercept 14.1388 0.113 124.969 0.000 13.917 14.361 sibs -0.2279 0.030 -7.528 0.000 -0.287 -0.168 ============================================================================== Omnibus: 69.595 Durbin-Watson: 1.797 Prob(Omnibus): 0.000 Jarque-Bera (JB): 55.996 Skew: 0.512 Prob(JB): 6.93e-13 Kurtosis: 2.376 Cond. No. 6.33 ============================================================================== Warnings: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
print(IV2SLS.from_formula('lwage ~ 1 + [educ ~ sibs]', data = df) .fit())
IV-2SLS Estimation Summary ============================================================================== Dep. Variable: lwage R-squared: -0.0092 Estimator: IV-2SLS Adj. R-squared: -0.0103 No. Observations: 935 F-statistic: 24.850 Date: Sat, Apr 25 2020 P-value (F-stat) 0.0000 Time: 19:45:14 Distribution: chi2(1) Cov. Estimator: robust Parameter Estimates ============================================================================== Parameter Std. Err. T-stat P-value Lower CI Upper CI ------------------------------------------------------------------------------ Intercept 5.1300 0.3304 15.528 0.0000 4.4825 5.7776 educ 0.1224 0.0246 4.9850 0.0000 0.0743 0.1706 ============================================================================== Endogenous: educ Instruments: sibs Robust Covariance (Heteroskedastic) Debiased: False
df = dataWoo("bwght")
print(smf.ols('packs ~ 1 + cigprice', data =df).fit().summary())
OLS Regression Results ============================================================================== Dep. Variable: packs R-squared: 0.000 Model: OLS Adj. R-squared: -0.001 Method: Least Squares F-statistic: 0.1305 Date: Sat, 25 Apr 2020 Prob (F-statistic): 0.718 Time: 19:45:14 Log-Likelihood: -291.47 No. Observations: 1388 AIC: 586.9 Df Residuals: 1386 BIC: 597.4 Df Model: 1 Covariance Type: nonrobust ============================================================================== coef std err t P>|t| [0.025 0.975] ------------------------------------------------------------------------------ Intercept 0.0674 0.103 0.658 0.511 -0.134 0.269 cigprice 0.0003 0.001 0.361 0.718 -0.001 0.002 ============================================================================== Omnibus: 1059.058 Durbin-Watson: 1.960 Prob(Omnibus): 0.000 Jarque-Bera (JB): 15873.651 Skew: 3.562 Prob(JB): 0.00 Kurtosis: 17.957 Cond. No. 1.67e+03 ============================================================================== Warnings: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified. [2] The condition number is large, 1.67e+03. This might indicate that there are strong multicollinearity or other numerical problems.
print(IV2SLS.from_formula('lbwght ~ 1 + [packs ~ cigprice]', data = df) .fit())
IV-2SLS Estimation Summary ============================================================================== Dep. Variable: lbwght R-squared: -23.230 Estimator: IV-2SLS Adj. R-squared: -23.248 No. Observations: 1388 F-statistic: 0.1107 Date: Sat, Apr 25 2020 P-value (F-stat) 0.7394 Time: 19:45:14 Distribution: chi2(1) Cov. Estimator: robust Parameter Estimates ============================================================================== Parameter Std. Err. T-stat P-value Lower CI Upper CI ------------------------------------------------------------------------------ Intercept 4.4481 0.9387 4.7388 0.0000 2.6084 6.2879 packs 2.9887 8.9832 0.3327 0.7394 -14.618 20.595 ============================================================================== Endogenous: packs Instruments: cigprice Robust Covariance (Heteroskedastic) Debiased: False
df = dataWoo("card")
print(smf.ols('educ ~ nearc4 + exper + expersq + black + smsa + south + smsa66 + reg661 + reg662 + reg663 + reg664 + reg665 + reg666 + reg667 + reg668', data =df).fit().summary())
OLS Regression Results ============================================================================== Dep. Variable: educ R-squared: 0.477 Model: OLS Adj. R-squared: 0.474 Method: Least Squares F-statistic: 182.1 Date: Sat, 25 Apr 2020 Prob (F-statistic): 0.00 Time: 19:45:14 Log-Likelihood: -6258.5 No. Observations: 3010 AIC: 1.255e+04 Df Residuals: 2994 BIC: 1.265e+04 Df Model: 15 Covariance Type: nonrobust ============================================================================== coef std err t P>|t| [0.025 0.975] ------------------------------------------------------------------------------ Intercept 16.8485 0.211 79.805 0.000 16.435 17.262 nearc4 0.3199 0.088 3.641 0.000 0.148 0.492 exper -0.4125 0.034 -12.241 0.000 -0.479 -0.346 expersq 0.0009 0.002 0.526 0.599 -0.002 0.004 black -0.9355 0.094 -9.981 0.000 -1.119 -0.752 smsa 0.4022 0.105 3.837 0.000 0.197 0.608 south -0.0516 0.135 -0.381 0.703 -0.317 0.214 smsa66 0.0255 0.106 0.241 0.810 -0.182 0.233 reg661 -0.2103 0.202 -1.039 0.299 -0.607 0.187 reg662 -0.2889 0.147 -1.961 0.050 -0.578 -1.05e-05 reg663 -0.2382 0.143 -1.670 0.095 -0.518 0.041 reg664 -0.0931 0.186 -0.501 0.617 -0.458 0.272 reg665 -0.4829 0.188 -2.566 0.010 -0.852 -0.114 reg666 -0.5131 0.210 -2.448 0.014 -0.924 -0.102 reg667 -0.4271 0.206 -2.077 0.038 -0.830 -0.024 reg668 0.3136 0.242 1.298 0.194 -0.160 0.787 ============================================================================== Omnibus: 15.758 Durbin-Watson: 1.762 Prob(Omnibus): 0.000 Jarque-Bera (JB): 15.971 Skew: 0.177 Prob(JB): 0.000340 Kurtosis: 2.954 Cond. No. 1.49e+03 ============================================================================== Warnings: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified. [2] The condition number is large, 1.49e+03. This might indicate that there are strong multicollinearity or other numerical problems.
print(smf.ols('lwage ~ educ + exper + expersq + black + smsa + south + smsa66 + reg661 + reg662 + reg663 + reg664 + reg665 + reg666 + reg667 + reg668', data =df).fit().summary())
OLS Regression Results ============================================================================== Dep. Variable: lwage R-squared: 0.300 Model: OLS Adj. R-squared: 0.296 Method: Least Squares F-statistic: 85.48 Date: Sat, 25 Apr 2020 Prob (F-statistic): 1.74e-218 Time: 19:45:14 Log-Likelihood: -1288.8 No. Observations: 3010 AIC: 2610. Df Residuals: 2994 BIC: 2706. Df Model: 15 Covariance Type: nonrobust ============================================================================== coef std err t P>|t| [0.025 0.975] ------------------------------------------------------------------------------ Intercept 4.7394 0.072 66.259 0.000 4.599 4.880 educ 0.0747 0.003 21.351 0.000 0.068 0.082 exper 0.0848 0.007 12.806 0.000 0.072 0.098 expersq -0.0023 0.000 -7.223 0.000 -0.003 -0.002 black -0.1990 0.018 -10.906 0.000 -0.235 -0.163 smsa 0.1364 0.020 6.785 0.000 0.097 0.176 south -0.1480 0.026 -5.695 0.000 -0.199 -0.097 smsa66 0.0262 0.019 1.349 0.177 -0.012 0.064 reg661 -0.1186 0.039 -3.054 0.002 -0.195 -0.042 reg662 -0.0222 0.028 -0.786 0.432 -0.078 0.033 reg663 0.0260 0.027 0.949 0.343 -0.028 0.080 reg664 -0.0635 0.036 -1.780 0.075 -0.133 0.006 reg665 0.0095 0.036 0.262 0.794 -0.061 0.080 reg666 0.0219 0.040 0.547 0.584 -0.057 0.101 reg667 -0.0006 0.039 -0.015 0.988 -0.078 0.077 reg668 -0.1750 0.046 -3.777 0.000 -0.266 -0.084 ============================================================================== Omnibus: 59.717 Durbin-Watson: 1.880 Prob(Omnibus): 0.000 Jarque-Bera (JB): 71.222 Skew: -0.282 Prob(JB): 3.42e-16 Kurtosis: 3.501 Cond. No. 1.59e+03 ============================================================================== Warnings: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified. [2] The condition number is large, 1.59e+03. This might indicate that there are strong multicollinearity or other numerical problems.
print(IV2SLS.from_formula('lwage ~ 1 + exper + expersq + black + smsa + south + smsa66 + reg661 + reg662 + reg663 + reg664 + reg665 + reg666 + reg667 + reg668 + [educ ~ nearc4]', data=df).fit())
IV-2SLS Estimation Summary ============================================================================== Dep. Variable: lwage R-squared: 0.2382 Estimator: IV-2SLS Adj. R-squared: 0.2343 No. Observations: 3010 F-statistic: 840.83 Date: Sat, Apr 25 2020 P-value (F-stat) 0.0000 Time: 19:45:14 Distribution: chi2(15) Cov. Estimator: robust Parameter Estimates ============================================================================== Parameter Std. Err. T-stat P-value Lower CI Upper CI ------------------------------------------------------------------------------ Intercept 3.7740 0.9174 4.1137 0.0000 1.9759 5.5720 exper 0.1083 0.0233 4.6376 0.0000 0.0625 0.1540 expersq -0.0023 0.0003 -6.7128 0.0000 -0.0030 -0.0017 black -0.1468 0.0524 -2.8031 0.0051 -0.2494 -0.0441 smsa 0.1118 0.0311 3.5995 0.0003 0.0509 0.1727 south -0.1447 0.0291 -4.9775 0.0000 -0.2016 -0.0877 smsa66 0.0185 0.0205 0.9035 0.3663 -0.0217 0.0587 reg661 -0.1078 0.0410 -2.6317 0.0085 -0.1881 -0.0275 reg662 -0.0070 0.0337 -0.2091 0.8344 -0.0731 0.0590 reg663 0.0404 0.0325 1.2437 0.2136 -0.0233 0.1042 reg664 -0.0579 0.0392 -1.4771 0.1397 -0.1348 0.0189 reg665 0.0385 0.0495 0.7774 0.4369 -0.0585 0.1354 reg666 0.0551 0.0521 1.0567 0.2906 -0.0471 0.1573 reg667 0.0268 0.0501 0.5340 0.5933 -0.0714 0.1250 reg668 -0.1909 0.0507 -3.7659 0.0002 -0.2902 -0.0915 educ 0.1315 0.0540 2.4353 0.0149 0.0257 0.2373 ============================================================================== Endogenous: educ Instruments: nearc4 Robust Covariance (Heteroskedastic) Debiased: False
df = dataWoo("mroz")
df = df.dropna()
mreg1 = smf.ols('educ ~ exper + expersq + fatheduc + motheduc', data=df).fit()
hypotheses = '(fatheduc = motheduc = 0)'
f_test = mreg1.f_test(hypotheses)
print(f_test)
<F test: F=array([[55.40030043]]), p=4.268908724630835e-22, df_denom=423, df_num=2>
print(IV2SLS.from_formula('lwage ~ 1 + [educ~fatheduc + motheduc] + exper + expersq', data=df).fit())
IV-2SLS Estimation Summary ============================================================================== Dep. Variable: lwage R-squared: 0.1357 Estimator: IV-2SLS Adj. R-squared: 0.1296 No. Observations: 428 F-statistic: 18.611 Date: Sat, Apr 25 2020 P-value (F-stat) 0.0003 Time: 19:45:14 Distribution: chi2(3) Cov. Estimator: robust Parameter Estimates ============================================================================== Parameter Std. Err. T-stat P-value Lower CI Upper CI ------------------------------------------------------------------------------ Intercept 0.0481 0.4278 0.1124 0.9105 -0.7903 0.8865 exper 0.0442 0.0155 2.8546 0.0043 0.0138 0.0745 expersq -0.0009 0.0004 -2.1001 0.0357 -0.0017 -5.997e-05 educ 0.0614 0.0332 1.8503 0.0643 -0.0036 0.1264 ============================================================================== Endogenous: educ Instruments: fatheduc, motheduc Robust Covariance (Heteroskedastic) Debiased: False
print(IV2SLS.from_formula('lwage ~ 1+ educ + exper + tenure + married + south + urban + black + [IQ ~ KWW]', data = dataWoo("wage2")).fit())
IV-2SLS Estimation Summary ============================================================================== Dep. Variable: lwage R-squared: 0.1900 Estimator: IV-2SLS Adj. R-squared: 0.1830 No. Observations: 935 F-statistic: 356.33 Date: Sat, Apr 25 2020 P-value (F-stat) 0.0000 Time: 19:45:14 Distribution: chi2(8) Cov. Estimator: robust Parameter Estimates ============================================================================== Parameter Std. Err. T-stat P-value Lower CI Upper CI ------------------------------------------------------------------------------ Intercept 4.5925 0.3501 13.117 0.0000 3.9063 5.2786 educ 0.0250 0.0187 1.3410 0.1799 -0.0116 0.0616 exper 0.0144 0.0034 4.2234 0.0000 0.0077 0.0211 tenure 0.0105 0.0028 3.7258 0.0002 0.0050 0.0160 married 0.2007 0.0404 4.9616 0.0000 0.1214 0.2800 south -0.0516 0.0339 -1.5201 0.1285 -0.1180 0.0149 urban 0.1767 0.0274 6.4470 0.0000 0.1230 0.2304 black -0.0226 0.0798 -0.2826 0.7775 -0.1790 0.1339 IQ 0.0130 0.0055 2.3835 0.0171 0.0023 0.0238 ============================================================================== Endogenous: IQ Instruments: KWW Robust Covariance (Heteroskedastic) Debiased: False
df = dataWoo("mroz")
df = df[(df['inlf']==1)]
v2 = smf.ols('educ ~ exper + expersq + fatheduc + motheduc', data=df).fit().resid
print(IV2SLS.from_formula('lwage ~1 + [educ ~ fatheduc + motheduc] + exper + expersq + v2 ', data = df).fit())
IV-2SLS Estimation Summary ============================================================================== Dep. Variable: lwage R-squared: 0.1624 Estimator: IV-2SLS Adj. R-squared: 0.1544 No. Observations: 428 F-statistic: 87.093 Date: Sat, Apr 25 2020 P-value (F-stat) 0.0000 Time: 19:45:14 Distribution: chi2(4) Cov. Estimator: robust Parameter Estimates ============================================================================== Parameter Std. Err. T-stat P-value Lower CI Upper CI ------------------------------------------------------------------------------ Intercept 0.0481 0.4196 0.1146 0.9087 -0.7744 0.8706 exper 0.0442 0.0150 2.9382 0.0033 0.0147 0.0736 expersq -0.0009 0.0004 -2.1777 0.0294 -0.0017 -8.988e-05 v2 0.0582 0.0362 1.6068 0.1081 -0.0128 0.1291 educ 0.0614 0.0325 1.8906 0.0587 -0.0023 0.1250 ============================================================================== Endogenous: educ Instruments: fatheduc, motheduc Robust Covariance (Heteroskedastic) Debiased: False
print("The OLS estimate is ")
smf.ols('lwage ~ educ + exper + expersq', data=df).fit().params
The OLS estimate is
Intercept -0.522041 educ 0.107490 exper 0.041567 expersq -0.000811 dtype: float64
df = dataWoo("mroz")
df = df.dropna()
u1 = (IV2SLS.from_formula('lwage ~1 + [educ ~ fatheduc + motheduc] + exper + expersq ', data = df).fit()).resids
wreg = smf.ols('u1 ~ exper + expersq + fatheduc + motheduc', data=df).fit()
print(wreg.summary())
OLS Regression Results ============================================================================== Dep. Variable: u1 R-squared: 0.001 Model: OLS Adj. R-squared: -0.009 Method: Least Squares F-statistic: 0.09350 Date: Sat, 25 Apr 2020 Prob (F-statistic): 0.984 Time: 19:45:14 Log-Likelihood: -436.70 No. Observations: 428 AIC: 883.4 Df Residuals: 423 BIC: 903.7 Df Model: 4 Covariance Type: nonrobust ============================================================================== coef std err t P>|t| [0.025 0.975] ------------------------------------------------------------------------------ Intercept 0.0110 0.141 0.078 0.938 -0.267 0.289 exper -1.833e-05 0.013 -0.001 0.999 -0.026 0.026 expersq 7.341e-07 0.000 0.002 0.999 -0.001 0.001 fatheduc 0.0058 0.011 0.517 0.605 -0.016 0.028 motheduc -0.0066 0.012 -0.556 0.579 -0.030 0.017 ============================================================================== Omnibus: 68.934 Durbin-Watson: 1.947 Prob(Omnibus): 0.000 Jarque-Bera (JB): 246.940 Skew: -0.682 Prob(JB): 2.39e-54 Kurtosis: 6.462 Cond. No. 1.55e+03 ============================================================================== Warnings: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified. [2] The condition number is large, 1.55e+03. This might indicate that there are strong multicollinearity or other numerical problems.
LM1 = wreg.nobs * wreg.rsquared
LM1
0.3780713419637767
u2 = (IV2SLS.from_formula('lwage ~1 + [educ ~ fatheduc + motheduc + huseduc] + exper + expersq ', data = df).fit()).resids
wreg2 = smf.ols('u2 ~ exper + expersq + fatheduc + motheduc + huseduc', data=df).fit()
LM2 = wreg2.nobs * wreg2.rsquared
LM2
1.1150430012567591
(IV2SLS.from_formula('lwage ~1 + [educ ~ fatheduc + motheduc + huseduc] + exper + expersq ', data = df).fit()).params
Intercept -0.186857 exper 0.043097 expersq -0.000863 educ 0.080392 Name: parameter, dtype: float64
(IV2SLS.from_formula('lwage ~1 + [educ ~ fatheduc + motheduc] + exper + expersq ', data = df).fit()).params
Intercept 0.048100 exper 0.044170 expersq -0.000899 educ 0.061397 Name: parameter, dtype: float64
df = dataWoo("fertil1")
print(IV2SLS.from_formula('kids ~ 1 + [educ ~ meduc + feduc] + age + agesq + black + east + northcen + west + farm + othrural + town + smcity + y74 + y76 + y78 + y80 + y82 + y84', data=df).fit())
print(smf.ols('kids ~ educ + age + agesq + black + east + northcen + west + farm + othrural + town + smcity + y74 + y76 + y78 + y80 + y82 + y84', data=df).fit().summary())
IV-2SLS Estimation Summary ============================================================================== Dep. Variable: kids R-squared: 0.1281 Estimator: IV-2SLS Adj. R-squared: 0.1148 No. Observations: 1129 F-statistic: 150.13 Date: Sat, Apr 25 2020 P-value (F-stat) 0.0000 Time: 19:45:15 Distribution: chi2(17) Cov. Estimator: robust Parameter Estimates ============================================================================== Parameter Std. Err. T-stat P-value Lower CI Upper CI ------------------------------------------------------------------------------ Intercept -7.2412 3.1890 -2.2707 0.0232 -13.491 -0.9910 age 0.5236 0.1395 3.7540 0.0002 0.2502 0.7969 agesq -0.0057 0.0016 -3.6113 0.0003 -0.0088 -0.0026 black 1.0730 0.1995 5.3789 0.0000 0.6820 1.4639 east 0.2286 0.1281 1.7838 0.0745 -0.0226 0.4797 northcen 0.3744 0.1172 3.1958 0.0014 0.1448 0.6040 west 0.2076 0.1617 1.2838 0.1992 -0.1094 0.5246 farm -0.0770 0.1490 -0.5168 0.6053 -0.3690 0.2150 othrural -0.1952 0.1857 -1.0515 0.2930 -0.5592 0.1687 town 0.0818 0.1276 0.6412 0.5214 -0.1682 0.3319 smcity 0.2125 0.1527 1.3912 0.1642 -0.0869 0.5119 y74 0.2721 0.1870 1.4556 0.1455 -0.0943 0.6386 y76 -0.0945 0.1991 -0.4748 0.6349 -0.4849 0.2958 y78 -0.0573 0.1967 -0.2911 0.7710 -0.4428 0.3282 y80 -0.0532 0.1949 -0.2732 0.7847 -0.4353 0.3288 y82 -0.4962 0.1899 -2.6127 0.0090 -0.8685 -0.1240 y84 -0.5214 0.1868 -2.7904 0.0053 -0.8876 -0.1552 educ -0.1527 0.0402 -3.7949 0.0001 -0.2316 -0.0739 ============================================================================== Endogenous: educ Instruments: meduc, feduc Robust Covariance (Heteroskedastic) Debiased: False OLS Regression Results ============================================================================== Dep. Variable: kids R-squared: 0.130 Model: OLS Adj. R-squared: 0.116 Method: Least Squares F-statistic: 9.723 Date: Sat, 25 Apr 2020 Prob (F-statistic): 2.42e-24 Time: 19:45:15 Log-Likelihood: -2091.2 No. Observations: 1129 AIC: 4218. Df Residuals: 1111 BIC: 4309. Df Model: 17 Covariance Type: nonrobust ============================================================================== coef std err t P>|t| [0.025 0.975] ------------------------------------------------------------------------------ Intercept -7.7425 3.052 -2.537 0.011 -13.730 -1.755 educ -0.1284 0.018 -6.999 0.000 -0.164 -0.092 age 0.5321 0.138 3.845 0.000 0.261 0.804 agesq -0.0058 0.002 -3.710 0.000 -0.009 -0.003 black 1.0757 0.174 6.198 0.000 0.735 1.416 east 0.2173 0.133 1.637 0.102 -0.043 0.478 northcen 0.3631 0.121 3.004 0.003 0.126 0.600 west 0.1976 0.167 1.184 0.237 -0.130 0.525 farm -0.0526 0.147 -0.357 0.721 -0.341 0.236 othrural -0.1629 0.175 -0.928 0.353 -0.507 0.181 town 0.0844 0.125 0.677 0.498 -0.160 0.329 smcity 0.2119 0.160 1.322 0.187 -0.103 0.526 y74 0.2682 0.173 1.553 0.121 -0.071 0.607 y76 -0.0974 0.179 -0.544 0.587 -0.449 0.254 y78 -0.0687 0.182 -0.378 0.706 -0.425 0.288 y80 -0.0713 0.183 -0.390 0.697 -0.430 0.287 y82 -0.5225 0.172 -3.030 0.003 -0.861 -0.184 y84 -0.5452 0.175 -3.124 0.002 -0.888 -0.203 ============================================================================== Omnibus: 9.775 Durbin-Watson: 2.011 Prob(Omnibus): 0.008 Jarque-Bera (JB): 9.966 Skew: 0.227 Prob(JB): 0.00685 Kurtosis: 2.920 Cond. No. 1.32e+05 ============================================================================== Warnings: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified. [2] The condition number is large, 1.32e+05. This might indicate that there are strong multicollinearity or other numerical problems.
#Endogeneity
v2 = smf.ols('educ ~ meduc + feduc', data=df).fit().resid
print(IV2SLS.from_formula('kids ~ 1 + [educ ~ meduc + feduc] + age + agesq + black + east + northcen + west + farm + othrural + town + smcity + y74 + y76 + y78 + y80 + y82 + y84 + v2', data=df).fit())
IV-2SLS Estimation Summary ============================================================================== Dep. Variable: kids R-squared: 0.1299 Estimator: IV-2SLS Adj. R-squared: 0.1158 No. Observations: 1129 F-statistic: 177.40 Date: Sat, Apr 25 2020 P-value (F-stat) 0.0000 Time: 19:45:15 Distribution: chi2(18) Cov. Estimator: robust Parameter Estimates ============================================================================== Parameter Std. Err. T-stat P-value Lower CI Upper CI ------------------------------------------------------------------------------ Intercept -7.4075 3.1199 -2.3743 0.0176 -13.522 -1.2926 age 0.5305 0.1380 3.8437 0.0001 0.2600 0.8011 agesq -0.0058 0.0016 -3.6977 0.0002 -0.0089 -0.0027 black 1.0615 0.2014 5.2709 0.0000 0.6668 1.4563 east 0.2208 0.1265 1.7451 0.0810 -0.0272 0.4688 northcen 0.3716 0.1164 3.1913 0.0014 0.1434 0.5998 west 0.2045 0.1614 1.2672 0.2051 -0.1118 0.5207 farm -0.0652 0.1459 -0.4469 0.6549 -0.3511 0.2207 othrural -0.1778 0.1803 -0.9862 0.3240 -0.5311 0.1755 town 0.0799 0.1279 0.6246 0.5322 -0.1708 0.3306 smcity 0.2100 0.1528 1.3738 0.1695 -0.0896 0.5096 y74 0.2719 0.1862 1.4605 0.1441 -0.0930 0.6369 y76 -0.0984 0.1982 -0.4966 0.6195 -0.4868 0.2900 y78 -0.0637 0.1959 -0.3253 0.7449 -0.4477 0.3202 y80 -0.0652 0.1926 -0.3384 0.7351 -0.4426 0.3123 y82 -0.5143 0.1865 -2.7580 0.0058 -0.8799 -0.1488 y84 -0.5346 0.1847 -2.8950 0.0038 -0.8965 -0.1727 v2 0.0292 0.0425 0.6858 0.4928 -0.0542 0.1125 educ -0.1508 0.0377 -3.9940 0.0001 -0.2247 -0.0768 ============================================================================== Endogenous: educ Instruments: meduc, feduc Robust Covariance (Heteroskedastic) Debiased: False
df = dataWoo('jtrain')
df = df[(df['year']==1988)]
print(smf.ols(formula='chrsemp ~ cgrant + 1', data=df).fit().summary())
OLS Regression Results ============================================================================== Dep. Variable: chrsemp R-squared: 0.392 Model: OLS Adj. R-squared: 0.387 Method: Least Squares F-statistic: 79.37 Date: Sat, 25 Apr 2020 Prob (F-statistic): 5.69e-15 Time: 19:45:15 Log-Likelihood: -515.77 No. Observations: 125 AIC: 1036. Df Residuals: 123 BIC: 1041. Df Model: 1 Covariance Type: nonrobust ============================================================================== coef std err t P>|t| [0.025 0.975] ------------------------------------------------------------------------------ Intercept 0.5093 1.558 0.327 0.744 -2.575 3.594 cgrant 27.8779 3.129 8.909 0.000 21.684 34.072 ============================================================================== Omnibus: 56.571 Durbin-Watson: 1.953 Prob(Omnibus): 0.000 Jarque-Bera (JB): 304.543 Skew: 1.437 Prob(JB): 7.40e-67 Kurtosis: 10.086 Cond. No. 2.49 ============================================================================== Warnings: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
print(IV2SLS.from_formula(formula='clscrap ~ 1 + [chrsemp ~ cgrant]', data=df).fit())
IV-2SLS Estimation Summary ============================================================================== Dep. Variable: clscrap R-squared: 0.0159 Estimator: IV-2SLS Adj. R-squared: -0.0070 No. Observations: 45 F-statistic: 2.9434 Date: Sat, Apr 25 2020 P-value (F-stat) 0.0862 Time: 19:45:24 Distribution: chi2(1) Cov. Estimator: robust Parameter Estimates ============================================================================== Parameter Std. Err. T-stat P-value Lower CI Upper CI ------------------------------------------------------------------------------ Intercept -0.0327 0.1104 -0.2958 0.7674 -0.2491 0.1838 chrsemp -0.0142 0.0082 -1.7156 0.0862 -0.0303 0.0020 ============================================================================== Endogenous: chrsemp Instruments: cgrant Robust Covariance (Heteroskedastic) Debiased: False
print(IV2SLS.from_formula(formula='clscrap ~ 1 + chrsemp', data=df).fit())
OLS Estimation Summary ============================================================================== Dep. Variable: clscrap R-squared: 0.0619 Estimator: OLS Adj. R-squared: 0.0401 No. Observations: 45 F-statistic: 12.322 Date: Sat, Apr 25 2020 P-value (F-stat) 0.0004 Time: 19:45:26 Distribution: chi2(1) Cov. Estimator: robust Parameter Estimates ============================================================================== Parameter Std. Err. T-stat P-value Lower CI Upper CI ------------------------------------------------------------------------------ Intercept -0.1035 0.1026 -1.0093 0.3128 -0.3045 0.0975 chrsemp -0.0076 0.0022 -3.5103 0.0004 -0.0118 -0.0034 ==============================================================================