Python for Introductory Econometrics: Chap 15

# Python for Introductory Econometrics¶

## Chapter 15. Instrumental Variables Estimation and TSLS¶

#### https://www.solomonegash.com/¶

In [1]:
import numpy as np
import pandas as pd
import scipy as sp
import scipy.stats as ss

import statsmodels
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.iolib.summary2 import summary_col

from linearmodels.iv import IV2SLS

from wooldridge import *


In [2]:
df = dataWoo('mroz')
print(smf.ols('lwage ~ 1 + educ', data=df).fit().summary())

                            OLS Regression Results
==============================================================================
Dep. Variable:                  lwage   R-squared:                       0.118
Method:                 Least Squares   F-statistic:                     56.93
Date:                Sat, 25 Apr 2020   Prob (F-statistic):           2.76e-13
Time:                        19:45:13   Log-Likelihood:                -441.26
No. Observations:                 428   AIC:                             886.5
Df Residuals:                     426   BIC:                             894.6
Df Model:                           1
Covariance Type:            nonrobust
==============================================================================
coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -0.1852      0.185     -1.000      0.318      -0.549       0.179
educ           0.1086      0.014      7.545      0.000       0.080       0.137
==============================================================================
Omnibus:                       91.833   Durbin-Watson:                   1.985
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              303.790
Skew:                          -0.956   Prob(JB):                     1.08e-66
Kurtosis:                       6.658   Cond. No.                         72.9
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

In [3]:
print(smf.ols('educ ~ 1 + fatheduc', data =df).fit().summary())

                            OLS Regression Results
==============================================================================
Dep. Variable:                   educ   R-squared:                       0.196
Method:                 Least Squares   F-statistic:                     182.8
Date:                Sat, 25 Apr 2020   Prob (F-statistic):           1.93e-37
Time:                        19:45:13   Log-Likelihood:                -1606.6
No. Observations:                 753   AIC:                             3217.
Df Residuals:                     751   BIC:                             3226.
Df Model:                           1
Covariance Type:            nonrobust
==============================================================================
coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      9.7990      0.199     49.356      0.000       9.409      10.189
fatheduc       0.2824      0.021     13.521      0.000       0.241       0.323
==============================================================================
Omnibus:                       12.639   Durbin-Watson:                   1.944
Prob(Omnibus):                  0.002   Jarque-Bera (JB):               21.413
Skew:                          -0.048   Prob(JB):                     2.24e-05
Kurtosis:                       3.820   Cond. No.                         25.5
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

In [4]:
df = df.dropna()
print(IV2SLS.from_formula('lwage ~ 1 + [educ ~ fatheduc]', data = df) .fit())

                          IV-2SLS Estimation Summary
==============================================================================
Dep. Variable:                  lwage   R-squared:                      0.0934
No. Observations:                 428   F-statistic:                    2.5656
Date:                Sat, Apr 25 2020   P-value (F-stat)                0.1092
Time:                        19:45:13   Distribution:                  chi2(1)
Cov. Estimator:                robust

Parameter Estimates
==============================================================================
Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
Intercept      0.4411     0.4643     0.9501     0.3421     -0.4689      1.3511
educ           0.0592     0.0369     1.6017     0.1092     -0.0132      0.1316
==============================================================================

Endogenous: educ
Instruments: fatheduc
Robust Covariance (Heteroskedastic)
Debiased: False


In [5]:
df = dataWoo("wage2")
print(smf.ols('educ ~ 1 + sibs', data =df).fit().summary())

                            OLS Regression Results
==============================================================================
Dep. Variable:                   educ   R-squared:                       0.057
Method:                 Least Squares   F-statistic:                     56.67
Date:                Sat, 25 Apr 2020   Prob (F-statistic):           1.22e-13
Time:                        19:45:13   Log-Likelihood:                -2034.4
No. Observations:                 935   AIC:                             4073.
Df Residuals:                     933   BIC:                             4083.
Df Model:                           1
Covariance Type:            nonrobust
==============================================================================
coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     14.1388      0.113    124.969      0.000      13.917      14.361
sibs          -0.2279      0.030     -7.528      0.000      -0.287      -0.168
==============================================================================
Omnibus:                       69.595   Durbin-Watson:                   1.797
Prob(Omnibus):                  0.000   Jarque-Bera (JB):               55.996
Skew:                           0.512   Prob(JB):                     6.93e-13
Kurtosis:                       2.376   Cond. No.                         6.33
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

In [6]:
print(IV2SLS.from_formula('lwage ~ 1 + [educ ~ sibs]', data = df) .fit())

                          IV-2SLS Estimation Summary
==============================================================================
Dep. Variable:                  lwage   R-squared:                     -0.0092
No. Observations:                 935   F-statistic:                    24.850
Date:                Sat, Apr 25 2020   P-value (F-stat)                0.0000
Time:                        19:45:14   Distribution:                  chi2(1)
Cov. Estimator:                robust

Parameter Estimates
==============================================================================
Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
Intercept      5.1300     0.3304     15.528     0.0000      4.4825      5.7776
educ           0.1224     0.0246     4.9850     0.0000      0.0743      0.1706
==============================================================================

Endogenous: educ
Instruments: sibs
Robust Covariance (Heteroskedastic)
Debiased: False


### Example 15.3. Estimating the Effect of Smoking on Birth Weight¶

In [7]:
df = dataWoo("bwght")
print(smf.ols('packs ~ 1 + cigprice', data =df).fit().summary())

                            OLS Regression Results
==============================================================================
Dep. Variable:                  packs   R-squared:                       0.000
Method:                 Least Squares   F-statistic:                    0.1305
Date:                Sat, 25 Apr 2020   Prob (F-statistic):              0.718
Time:                        19:45:14   Log-Likelihood:                -291.47
No. Observations:                1388   AIC:                             586.9
Df Residuals:                    1386   BIC:                             597.4
Df Model:                           1
Covariance Type:            nonrobust
==============================================================================
coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.0674      0.103      0.658      0.511      -0.134       0.269
cigprice       0.0003      0.001      0.361      0.718      -0.001       0.002
==============================================================================
Omnibus:                     1059.058   Durbin-Watson:                   1.960
Prob(Omnibus):                  0.000   Jarque-Bera (JB):            15873.651
Skew:                           3.562   Prob(JB):                         0.00
Kurtosis:                      17.957   Cond. No.                     1.67e+03
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 1.67e+03. This might indicate that there are
strong multicollinearity or other numerical problems.

In [8]:
print(IV2SLS.from_formula('lbwght  ~ 1 + [packs ~ cigprice]', data = df) .fit())

                          IV-2SLS Estimation Summary
==============================================================================
Dep. Variable:                 lbwght   R-squared:                     -23.230
No. Observations:                1388   F-statistic:                    0.1107
Date:                Sat, Apr 25 2020   P-value (F-stat)                0.7394
Time:                        19:45:14   Distribution:                  chi2(1)
Cov. Estimator:                robust

Parameter Estimates
==============================================================================
Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
Intercept      4.4481     0.9387     4.7388     0.0000      2.6084      6.2879
packs          2.9887     8.9832     0.3327     0.7394     -14.618      20.595
==============================================================================

Endogenous: packs
Instruments: cigprice
Robust Covariance (Heteroskedastic)
Debiased: False


### Example 15.4. Using College Proximity as an IV for Education¶

In [9]:
df = dataWoo("card")
print(smf.ols('educ ~ nearc4 + exper + expersq + black + smsa + south + smsa66 + reg661 + reg662 + reg663 + reg664 + reg665 + reg666 + reg667 + reg668', data =df).fit().summary())

                            OLS Regression Results
==============================================================================
Dep. Variable:                   educ   R-squared:                       0.477
Method:                 Least Squares   F-statistic:                     182.1
Date:                Sat, 25 Apr 2020   Prob (F-statistic):               0.00
Time:                        19:45:14   Log-Likelihood:                -6258.5
No. Observations:                3010   AIC:                         1.255e+04
Df Residuals:                    2994   BIC:                         1.265e+04
Df Model:                          15
Covariance Type:            nonrobust
==============================================================================
coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     16.8485      0.211     79.805      0.000      16.435      17.262
nearc4         0.3199      0.088      3.641      0.000       0.148       0.492
exper         -0.4125      0.034    -12.241      0.000      -0.479      -0.346
expersq        0.0009      0.002      0.526      0.599      -0.002       0.004
black         -0.9355      0.094     -9.981      0.000      -1.119      -0.752
smsa           0.4022      0.105      3.837      0.000       0.197       0.608
south         -0.0516      0.135     -0.381      0.703      -0.317       0.214
smsa66         0.0255      0.106      0.241      0.810      -0.182       0.233
reg661        -0.2103      0.202     -1.039      0.299      -0.607       0.187
reg662        -0.2889      0.147     -1.961      0.050      -0.578   -1.05e-05
reg663        -0.2382      0.143     -1.670      0.095      -0.518       0.041
reg664        -0.0931      0.186     -0.501      0.617      -0.458       0.272
reg665        -0.4829      0.188     -2.566      0.010      -0.852      -0.114
reg666        -0.5131      0.210     -2.448      0.014      -0.924      -0.102
reg667        -0.4271      0.206     -2.077      0.038      -0.830      -0.024
reg668         0.3136      0.242      1.298      0.194      -0.160       0.787
==============================================================================
Omnibus:                       15.758   Durbin-Watson:                   1.762
Prob(Omnibus):                  0.000   Jarque-Bera (JB):               15.971
Skew:                           0.177   Prob(JB):                     0.000340
Kurtosis:                       2.954   Cond. No.                     1.49e+03
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 1.49e+03. This might indicate that there are
strong multicollinearity or other numerical problems.

In [10]:
print(smf.ols('lwage ~ educ + exper + expersq + black + smsa + south + smsa66 + reg661 + reg662 + reg663 + reg664 + reg665 + reg666 + reg667 + reg668', data =df).fit().summary())

                            OLS Regression Results
==============================================================================
Dep. Variable:                  lwage   R-squared:                       0.300
Method:                 Least Squares   F-statistic:                     85.48
Date:                Sat, 25 Apr 2020   Prob (F-statistic):          1.74e-218
Time:                        19:45:14   Log-Likelihood:                -1288.8
No. Observations:                3010   AIC:                             2610.
Df Residuals:                    2994   BIC:                             2706.
Df Model:                          15
Covariance Type:            nonrobust
==============================================================================
coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      4.7394      0.072     66.259      0.000       4.599       4.880
educ           0.0747      0.003     21.351      0.000       0.068       0.082
exper          0.0848      0.007     12.806      0.000       0.072       0.098
expersq       -0.0023      0.000     -7.223      0.000      -0.003      -0.002
black         -0.1990      0.018    -10.906      0.000      -0.235      -0.163
smsa           0.1364      0.020      6.785      0.000       0.097       0.176
south         -0.1480      0.026     -5.695      0.000      -0.199      -0.097
smsa66         0.0262      0.019      1.349      0.177      -0.012       0.064
reg661        -0.1186      0.039     -3.054      0.002      -0.195      -0.042
reg662        -0.0222      0.028     -0.786      0.432      -0.078       0.033
reg663         0.0260      0.027      0.949      0.343      -0.028       0.080
reg664        -0.0635      0.036     -1.780      0.075      -0.133       0.006
reg665         0.0095      0.036      0.262      0.794      -0.061       0.080
reg666         0.0219      0.040      0.547      0.584      -0.057       0.101
reg667        -0.0006      0.039     -0.015      0.988      -0.078       0.077
reg668        -0.1750      0.046     -3.777      0.000      -0.266      -0.084
==============================================================================
Omnibus:                       59.717   Durbin-Watson:                   1.880
Prob(Omnibus):                  0.000   Jarque-Bera (JB):               71.222
Skew:                          -0.282   Prob(JB):                     3.42e-16
Kurtosis:                       3.501   Cond. No.                     1.59e+03
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 1.59e+03. This might indicate that there are
strong multicollinearity or other numerical problems.

In [11]:
print(IV2SLS.from_formula('lwage ~ 1 + exper + expersq + black + smsa + south + smsa66 + reg661 + reg662 + reg663 + reg664 + reg665 + reg666 + reg667 + reg668 + [educ ~ nearc4]', data=df).fit())

                          IV-2SLS Estimation Summary
==============================================================================
Dep. Variable:                  lwage   R-squared:                      0.2382
No. Observations:                3010   F-statistic:                    840.83
Date:                Sat, Apr 25 2020   P-value (F-stat)                0.0000
Time:                        19:45:14   Distribution:                 chi2(15)
Cov. Estimator:                robust

Parameter Estimates
==============================================================================
Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
Intercept      3.7740     0.9174     4.1137     0.0000      1.9759      5.5720
exper          0.1083     0.0233     4.6376     0.0000      0.0625      0.1540
expersq       -0.0023     0.0003    -6.7128     0.0000     -0.0030     -0.0017
black         -0.1468     0.0524    -2.8031     0.0051     -0.2494     -0.0441
smsa           0.1118     0.0311     3.5995     0.0003      0.0509      0.1727
south         -0.1447     0.0291    -4.9775     0.0000     -0.2016     -0.0877
smsa66         0.0185     0.0205     0.9035     0.3663     -0.0217      0.0587
reg661        -0.1078     0.0410    -2.6317     0.0085     -0.1881     -0.0275
reg662        -0.0070     0.0337    -0.2091     0.8344     -0.0731      0.0590
reg663         0.0404     0.0325     1.2437     0.2136     -0.0233      0.1042
reg664        -0.0579     0.0392    -1.4771     0.1397     -0.1348      0.0189
reg665         0.0385     0.0495     0.7774     0.4369     -0.0585      0.1354
reg666         0.0551     0.0521     1.0567     0.2906     -0.0471      0.1573
reg667         0.0268     0.0501     0.5340     0.5933     -0.0714      0.1250
reg668        -0.1909     0.0507    -3.7659     0.0002     -0.2902     -0.0915
educ           0.1315     0.0540     2.4353     0.0149      0.0257      0.2373
==============================================================================

Endogenous: educ
Instruments: nearc4
Robust Covariance (Heteroskedastic)
Debiased: False


In [12]:
df = dataWoo("mroz")
df = df.dropna()
mreg1 = smf.ols('educ ~ exper + expersq + fatheduc + motheduc', data=df).fit()
hypotheses = '(fatheduc = motheduc = 0)'
f_test = mreg1.f_test(hypotheses)
print(f_test)

<F test: F=array([[55.40030043]]), p=4.268908724630835e-22, df_denom=423, df_num=2>

In [13]:
print(IV2SLS.from_formula('lwage ~ 1 + [educ~fatheduc + motheduc] + exper + expersq', data=df).fit())

                          IV-2SLS Estimation Summary
==============================================================================
Dep. Variable:                  lwage   R-squared:                      0.1357
No. Observations:                 428   F-statistic:                    18.611
Date:                Sat, Apr 25 2020   P-value (F-stat)                0.0003
Time:                        19:45:14   Distribution:                  chi2(3)
Cov. Estimator:                robust

Parameter Estimates
==============================================================================
Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
Intercept      0.0481     0.4278     0.1124     0.9105     -0.7903      0.8865
exper          0.0442     0.0155     2.8546     0.0043      0.0138      0.0745
expersq       -0.0009     0.0004    -2.1001     0.0357     -0.0017  -5.997e-05
educ           0.0614     0.0332     1.8503     0.0643     -0.0036      0.1264
==============================================================================

Endogenous: educ
Instruments: fatheduc, motheduc
Robust Covariance (Heteroskedastic)
Debiased: False


### Example 15.6. Using Two Test Scores as Indicators of Ability¶

In [14]:
print(IV2SLS.from_formula('lwage ~ 1+ educ + exper + tenure + married + south + urban + black + [IQ ~ KWW]', data = dataWoo("wage2")).fit())

                          IV-2SLS Estimation Summary
==============================================================================
Dep. Variable:                  lwage   R-squared:                      0.1900
No. Observations:                 935   F-statistic:                    356.33
Date:                Sat, Apr 25 2020   P-value (F-stat)                0.0000
Time:                        19:45:14   Distribution:                  chi2(8)
Cov. Estimator:                robust

Parameter Estimates
==============================================================================
Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
Intercept      4.5925     0.3501     13.117     0.0000      3.9063      5.2786
educ           0.0250     0.0187     1.3410     0.1799     -0.0116      0.0616
exper          0.0144     0.0034     4.2234     0.0000      0.0077      0.0211
tenure         0.0105     0.0028     3.7258     0.0002      0.0050      0.0160
married        0.2007     0.0404     4.9616     0.0000      0.1214      0.2800
south         -0.0516     0.0339    -1.5201     0.1285     -0.1180      0.0149
urban          0.1767     0.0274     6.4470     0.0000      0.1230      0.2304
black         -0.0226     0.0798    -0.2826     0.7775     -0.1790      0.1339
IQ             0.0130     0.0055     2.3835     0.0171      0.0023      0.0238
==============================================================================

Endogenous: IQ
Instruments: KWW
Robust Covariance (Heteroskedastic)
Debiased: False


In [15]:
df = dataWoo("mroz")
df = df[(df['inlf']==1)]
v2 = smf.ols('educ ~ exper + expersq + fatheduc + motheduc', data=df).fit().resid
print(IV2SLS.from_formula('lwage ~1 + [educ ~ fatheduc + motheduc] + exper + expersq + v2 ', data = df).fit())

                          IV-2SLS Estimation Summary
==============================================================================
Dep. Variable:                  lwage   R-squared:                      0.1624
No. Observations:                 428   F-statistic:                    87.093
Date:                Sat, Apr 25 2020   P-value (F-stat)                0.0000
Time:                        19:45:14   Distribution:                  chi2(4)
Cov. Estimator:                robust

Parameter Estimates
==============================================================================
Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
Intercept      0.0481     0.4196     0.1146     0.9087     -0.7744      0.8706
exper          0.0442     0.0150     2.9382     0.0033      0.0147      0.0736
expersq       -0.0009     0.0004    -2.1777     0.0294     -0.0017  -8.988e-05
v2             0.0582     0.0362     1.6068     0.1081     -0.0128      0.1291
educ           0.0614     0.0325     1.8906     0.0587     -0.0023      0.1250
==============================================================================

Endogenous: educ
Instruments: fatheduc, motheduc
Robust Covariance (Heteroskedastic)
Debiased: False

In [16]:
print("The OLS estimate is ")
smf.ols('lwage ~ educ + exper + expersq', data=df).fit().params

The OLS estimate is

Out[16]:
Intercept   -0.522041
educ         0.107490
exper        0.041567
expersq     -0.000811
dtype: float64

In [17]:
df = dataWoo("mroz")
df = df.dropna()
u1 = (IV2SLS.from_formula('lwage ~1 + [educ ~ fatheduc + motheduc] + exper + expersq ', data = df).fit()).resids
wreg = smf.ols('u1 ~ exper + expersq + fatheduc + motheduc', data=df).fit()
print(wreg.summary())

                            OLS Regression Results
==============================================================================
Dep. Variable:                     u1   R-squared:                       0.001
Method:                 Least Squares   F-statistic:                   0.09350
Date:                Sat, 25 Apr 2020   Prob (F-statistic):              0.984
Time:                        19:45:14   Log-Likelihood:                -436.70
No. Observations:                 428   AIC:                             883.4
Df Residuals:                     423   BIC:                             903.7
Df Model:                           4
Covariance Type:            nonrobust
==============================================================================
coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.0110      0.141      0.078      0.938      -0.267       0.289
exper      -1.833e-05      0.013     -0.001      0.999      -0.026       0.026
expersq     7.341e-07      0.000      0.002      0.999      -0.001       0.001
fatheduc       0.0058      0.011      0.517      0.605      -0.016       0.028
motheduc      -0.0066      0.012     -0.556      0.579      -0.030       0.017
==============================================================================
Omnibus:                       68.934   Durbin-Watson:                   1.947
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              246.940
Skew:                          -0.682   Prob(JB):                     2.39e-54
Kurtosis:                       6.462   Cond. No.                     1.55e+03
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 1.55e+03. This might indicate that there are
strong multicollinearity or other numerical problems.

In [18]:
LM1 = wreg.nobs * wreg.rsquared
LM1

Out[18]:
0.3780713419637767
In [19]:
u2 = (IV2SLS.from_formula('lwage ~1 + [educ ~ fatheduc + motheduc + huseduc] + exper + expersq ', data = df).fit()).resids
wreg2 = smf.ols('u2 ~ exper + expersq + fatheduc + motheduc + huseduc', data=df).fit()
LM2 = wreg2.nobs * wreg2.rsquared
LM2

Out[19]:
1.1150430012567591
In [20]:
(IV2SLS.from_formula('lwage ~1 + [educ ~ fatheduc + motheduc + huseduc] + exper + expersq ', data = df).fit()).params

Out[20]:
Intercept   -0.186857
exper        0.043097
expersq     -0.000863
educ         0.080392
Name: parameter, dtype: float64
In [21]:
(IV2SLS.from_formula('lwage ~1 + [educ ~ fatheduc + motheduc] + exper + expersq ', data = df).fit()).params

Out[21]:
Intercept    0.048100
exper        0.044170
expersq     -0.000899
educ         0.061397
Name: parameter, dtype: float64

### Example 15.9. Effect of Education on Fertility¶

In [22]:
df = dataWoo("fertil1")
print(IV2SLS.from_formula('kids ~ 1 + [educ ~ meduc + feduc] + age + agesq + black + east + northcen + west + farm + othrural + town + smcity + y74 + y76 + y78 + y80 + y82 + y84', data=df).fit())
print(smf.ols('kids ~ educ + age + agesq + black + east + northcen + west + farm + othrural + town + smcity + y74 + y76 + y78 + y80 + y82 + y84', data=df).fit().summary())

                          IV-2SLS Estimation Summary
==============================================================================
Dep. Variable:                   kids   R-squared:                      0.1281
No. Observations:                1129   F-statistic:                    150.13
Date:                Sat, Apr 25 2020   P-value (F-stat)                0.0000
Time:                        19:45:15   Distribution:                 chi2(17)
Cov. Estimator:                robust

Parameter Estimates
==============================================================================
Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
Intercept     -7.2412     3.1890    -2.2707     0.0232     -13.491     -0.9910
age            0.5236     0.1395     3.7540     0.0002      0.2502      0.7969
agesq         -0.0057     0.0016    -3.6113     0.0003     -0.0088     -0.0026
black          1.0730     0.1995     5.3789     0.0000      0.6820      1.4639
east           0.2286     0.1281     1.7838     0.0745     -0.0226      0.4797
northcen       0.3744     0.1172     3.1958     0.0014      0.1448      0.6040
west           0.2076     0.1617     1.2838     0.1992     -0.1094      0.5246
farm          -0.0770     0.1490    -0.5168     0.6053     -0.3690      0.2150
othrural      -0.1952     0.1857    -1.0515     0.2930     -0.5592      0.1687
town           0.0818     0.1276     0.6412     0.5214     -0.1682      0.3319
smcity         0.2125     0.1527     1.3912     0.1642     -0.0869      0.5119
y74            0.2721     0.1870     1.4556     0.1455     -0.0943      0.6386
y76           -0.0945     0.1991    -0.4748     0.6349     -0.4849      0.2958
y78           -0.0573     0.1967    -0.2911     0.7710     -0.4428      0.3282
y80           -0.0532     0.1949    -0.2732     0.7847     -0.4353      0.3288
y82           -0.4962     0.1899    -2.6127     0.0090     -0.8685     -0.1240
y84           -0.5214     0.1868    -2.7904     0.0053     -0.8876     -0.1552
educ          -0.1527     0.0402    -3.7949     0.0001     -0.2316     -0.0739
==============================================================================

Endogenous: educ
Instruments: meduc, feduc
Robust Covariance (Heteroskedastic)
Debiased: False
OLS Regression Results
==============================================================================
Dep. Variable:                   kids   R-squared:                       0.130
Method:                 Least Squares   F-statistic:                     9.723
Date:                Sat, 25 Apr 2020   Prob (F-statistic):           2.42e-24
Time:                        19:45:15   Log-Likelihood:                -2091.2
No. Observations:                1129   AIC:                             4218.
Df Residuals:                    1111   BIC:                             4309.
Df Model:                          17
Covariance Type:            nonrobust
==============================================================================
coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -7.7425      3.052     -2.537      0.011     -13.730      -1.755
educ          -0.1284      0.018     -6.999      0.000      -0.164      -0.092
age            0.5321      0.138      3.845      0.000       0.261       0.804
agesq         -0.0058      0.002     -3.710      0.000      -0.009      -0.003
black          1.0757      0.174      6.198      0.000       0.735       1.416
east           0.2173      0.133      1.637      0.102      -0.043       0.478
northcen       0.3631      0.121      3.004      0.003       0.126       0.600
west           0.1976      0.167      1.184      0.237      -0.130       0.525
farm          -0.0526      0.147     -0.357      0.721      -0.341       0.236
othrural      -0.1629      0.175     -0.928      0.353      -0.507       0.181
town           0.0844      0.125      0.677      0.498      -0.160       0.329
smcity         0.2119      0.160      1.322      0.187      -0.103       0.526
y74            0.2682      0.173      1.553      0.121      -0.071       0.607
y76           -0.0974      0.179     -0.544      0.587      -0.449       0.254
y78           -0.0687      0.182     -0.378      0.706      -0.425       0.288
y80           -0.0713      0.183     -0.390      0.697      -0.430       0.287
y82           -0.5225      0.172     -3.030      0.003      -0.861      -0.184
y84           -0.5452      0.175     -3.124      0.002      -0.888      -0.203
==============================================================================
Omnibus:                        9.775   Durbin-Watson:                   2.011
Prob(Omnibus):                  0.008   Jarque-Bera (JB):                9.966
Skew:                           0.227   Prob(JB):                      0.00685
Kurtosis:                       2.920   Cond. No.                     1.32e+05
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 1.32e+05. This might indicate that there are
strong multicollinearity or other numerical problems.

In [23]:
#Endogeneity
v2 = smf.ols('educ ~ meduc + feduc', data=df).fit().resid
print(IV2SLS.from_formula('kids ~ 1 + [educ ~ meduc + feduc] + age + agesq + black + east + northcen + west + farm + othrural + town + smcity + y74 + y76 + y78 + y80 + y82 + y84 + v2', data=df).fit())

                          IV-2SLS Estimation Summary
==============================================================================
Dep. Variable:                   kids   R-squared:                      0.1299
No. Observations:                1129   F-statistic:                    177.40
Date:                Sat, Apr 25 2020   P-value (F-stat)                0.0000
Time:                        19:45:15   Distribution:                 chi2(18)
Cov. Estimator:                robust

Parameter Estimates
==============================================================================
Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
Intercept     -7.4075     3.1199    -2.3743     0.0176     -13.522     -1.2926
age            0.5305     0.1380     3.8437     0.0001      0.2600      0.8011
agesq         -0.0058     0.0016    -3.6977     0.0002     -0.0089     -0.0027
black          1.0615     0.2014     5.2709     0.0000      0.6668      1.4563
east           0.2208     0.1265     1.7451     0.0810     -0.0272      0.4688
northcen       0.3716     0.1164     3.1913     0.0014      0.1434      0.5998
west           0.2045     0.1614     1.2672     0.2051     -0.1118      0.5207
farm          -0.0652     0.1459    -0.4469     0.6549     -0.3511      0.2207
othrural      -0.1778     0.1803    -0.9862     0.3240     -0.5311      0.1755
town           0.0799     0.1279     0.6246     0.5322     -0.1708      0.3306
smcity         0.2100     0.1528     1.3738     0.1695     -0.0896      0.5096
y74            0.2719     0.1862     1.4605     0.1441     -0.0930      0.6369
y76           -0.0984     0.1982    -0.4966     0.6195     -0.4868      0.2900
y78           -0.0637     0.1959    -0.3253     0.7449     -0.4477      0.3202
y80           -0.0652     0.1926    -0.3384     0.7351     -0.4426      0.3123
y82           -0.5143     0.1865    -2.7580     0.0058     -0.8799     -0.1488
y84           -0.5346     0.1847    -2.8950     0.0038     -0.8965     -0.1727
v2             0.0292     0.0425     0.6858     0.4928     -0.0542      0.1125
educ          -0.1508     0.0377    -3.9940     0.0001     -0.2247     -0.0768
==============================================================================

Endogenous: educ
Instruments: meduc, feduc
Robust Covariance (Heteroskedastic)
Debiased: False


### Example 15.10. Job Training and Worker Productivity¶

In [24]:
df = dataWoo('jtrain')
df = df[(df['year']==1988)]
print(smf.ols(formula='chrsemp ~ cgrant + 1', data=df).fit().summary())

                            OLS Regression Results
==============================================================================
Dep. Variable:                chrsemp   R-squared:                       0.392
Method:                 Least Squares   F-statistic:                     79.37
Date:                Sat, 25 Apr 2020   Prob (F-statistic):           5.69e-15
Time:                        19:45:15   Log-Likelihood:                -515.77
No. Observations:                 125   AIC:                             1036.
Df Residuals:                     123   BIC:                             1041.
Df Model:                           1
Covariance Type:            nonrobust
==============================================================================
coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.5093      1.558      0.327      0.744      -2.575       3.594
cgrant        27.8779      3.129      8.909      0.000      21.684      34.072
==============================================================================
Omnibus:                       56.571   Durbin-Watson:                   1.953
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              304.543
Skew:                           1.437   Prob(JB):                     7.40e-67
Kurtosis:                      10.086   Cond. No.                         2.49
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

In [27]:
print(IV2SLS.from_formula(formula='clscrap  ~ 1 + [chrsemp ~ cgrant]', data=df).fit())

                          IV-2SLS Estimation Summary
==============================================================================
Dep. Variable:                clscrap   R-squared:                      0.0159
No. Observations:                  45   F-statistic:                    2.9434
Date:                Sat, Apr 25 2020   P-value (F-stat)                0.0862
Time:                        19:45:24   Distribution:                  chi2(1)
Cov. Estimator:                robust

Parameter Estimates
==============================================================================
Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
Intercept     -0.0327     0.1104    -0.2958     0.7674     -0.2491      0.1838
chrsemp       -0.0142     0.0082    -1.7156     0.0862     -0.0303      0.0020
==============================================================================

Endogenous: chrsemp
Instruments: cgrant
Robust Covariance (Heteroskedastic)
Debiased: False

In [28]:
print(IV2SLS.from_formula(formula='clscrap  ~ 1 + chrsemp', data=df).fit())

                            OLS Estimation Summary
==============================================================================
Dep. Variable:                clscrap   R-squared:                      0.0619
No. Observations:                  45   F-statistic:                    12.322
Date:                Sat, Apr 25 2020   P-value (F-stat)                0.0004
Time:                        19:45:26   Distribution:                  chi2(1)
Cov. Estimator:                robust

Parameter Estimates
==============================================================================
Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
Intercept     -0.1035     0.1026    -1.0093     0.3128     -0.3045      0.0975
chrsemp       -0.0076     0.0022    -3.5103     0.0004     -0.0118     -0.0034
==============================================================================