import numpy as np
import pandas as pd
import scipy as sp
import scipy.stats as ss
import statsmodels
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.iolib.summary2 import summary_col
from wooldridge import *
df = dataWoo('crime1')
crime_hetr_r = smf.ols('narr86 ~ pcnv + avgsen + tottime + ptime86 + qemp86 + inc86 + black + hispan + 1', data=df).fit()
crime_hetr = smf.ols('narr86 ~ pcnv + avgsen + tottime + ptime86 + qemp86 + inc86 + black + hispan + pcnvsq + pt86sq + inc86sq + 1', data=df).fit()
crime_robust = smf.ols('narr86 ~ pcnv + avgsen + tottime + ptime86 + qemp86 + inc86 + black + hispan + pcnvsq + pt86sq + inc86sq + 1', data=df).fit(cov_type='HC1')
print(summary_col([crime_hetr_r, crime_hetr, crime_robust],stars=True,float_format='%0.3f',
model_names=['Hetrosced_r\n(b/se)','Hetrosced\n(b/se)','Robust\n(b/se)'],
info_dict={'N':lambda x: "{0:d}".format(int(x.nobs)),
'R2':lambda x: "{:.3f}".format(x.rsquared)}))
df = dataWoo("hprice1")
hprice_reg = smf.ols('price ~ lotsize + sqrft + bdrms', data=df).fit()
prhat2=hprice_reg.predict()**2
prhat3=hprice_reg.predict()**3
hprice_reg_pol = smf.ols('price ~ lotsize + sqrft + bdrms + prhat2 + prhat3 + 1 ', data=df).fit()
print(summary_col([hprice_reg, hprice_reg_pol],stars=True,float_format='%0.3f',
model_names=['Linear\n(b/se)','Polynomial\n(b/se)'],
info_dict={'N':lambda x: "{0:d}".format(int(x.nobs)),
'R2':lambda x: "{:.3f}".format(x.rsquared),
'Adj.R2':lambda x: "{:.3f}".format(x.rsquared_adj)}))
hypotheses = '(prhat2 = prhat3 = 0)'
f_test = hprice_reg_pol.f_test(hypotheses)
print(f_test)
lhprice_reg = smf.ols('lprice ~ llotsize + lsqrft + bdrms', data=df).fit()
lprhat2=lhprice_reg.predict()**2
lprhat3=lhprice_reg.predict()**3
lhprice_reg_pol = smf.ols('lprice ~ llotsize + lsqrft + bdrms + lprhat2 + lprhat3 + 1 ', data=df).fit()
print(summary_col([lhprice_reg, lhprice_reg_pol],stars=True,float_format='%0.3f',
model_names=['Linear_L\n(b/se)','Polynomial_L\n(b/se)'],
info_dict={'N':lambda x: "{0:d}".format(int(x.nobs)),
'R2':lambda x: "{:.3f}".format(x.rsquared),
'Adj.R2':lambda x: "{:.3f}".format(x.rsquared_adj)}))
hypotheses = '(lprhat2 = lprhat3 = 0)'
f_test = lhprice_reg_pol.f_test(hypotheses)
print(f_test)
df = dataWoo("wage2")
IQA = smf.ols('lwage ~ educ + exper + tenure + married + south + urban + black + 1', data=df).fit()
IQB = smf.ols('lwage ~ educ + exper + tenure + married + south + urban + black + IQ + 1', data=df).fit()
IQC = smf.ols('lwage ~ educ + exper + tenure + married + south + urban + black + IQ + educ:IQ + 1', data=df).fit()
print(summary_col([IQA, IQB, IQC],stars=True,float_format='%0.3f',
model_names=['IQA\n(b/se)','IQB\n(b/se)','IQC\n(b/se)'],
info_dict={'N':lambda x: "{0:d}".format(int(x.nobs)),
'R2':lambda x: "{:.3f}".format(x.rsquared),
'Adj.R2':lambda x: "{:.3f}".format(x.rsquared_adj)}))
df = dataWoo("crime2")
df = df[(df['year']==87)]
crimeA = smf.ols('lcrmrte ~ unem + llawexpc + 1', data=df).fit()
crimeB = smf.ols('lcrmrte ~ unem + llawexpc + lcrmrt_1 + 1', data=df).fit()
print(summary_col([crimeA, crimeB],stars=True,float_format='%0.3f',
model_names=['crimeA\n(b/se)','crimeA\n(b/se)'],
info_dict={'N':lambda x: "{0:d}".format(int(x.nobs)),
'R2':lambda x: "{:.3f}".format(x.rsquared),
'Adj.R2':lambda x: "{:.3f}".format(x.rsquared_adj)}))
df = dataWoo("rdchem")
RD1 = smf.ols('rdintens ~ sales + profmarg + 1', data=df).fit()
RD2 = smf.ols('rdintens ~ sales + profmarg + 1', data=df[(df['sales']<30000)]).fit()
print(summary_col([RD1, RD2],stars=True,float_format='%0.3f',
model_names=['RD1\n(b/se)','RD2\n(b/se)'],
info_dict={'N':lambda x: "{0:d}".format(int(x.nobs)),
'R2':lambda x: "{:.3f}".format(x.rsquared),
'Adj.R2':lambda x: "{:.3f}".format(x.rsquared_adj)}))
df = dataWoo("rdchem")
lRD1 = smf.ols('lrd ~ lsales + profmarg + 1', data=df).fit()
lRD2 = smf.ols('lrd ~ lsales + profmarg + 1', data=df[(df['sales']<30000)]).fit()
print(summary_col([lRD1, lRD2],stars=True,float_format='%0.3f',
model_names=['RD1_Log\n(b/se)','RD2_Log\n(b/se)'],
info_dict={'N':lambda x: "{0:d}".format(int(x.nobs)),
'R2':lambda x: "{:.3f}".format(x.rsquared),
'Adj.R2':lambda x: "{:.3f}".format(x.rsquared_adj)}))
df = dataWoo("infmrt")
df = df[(df['year']==1990)]
infant1 = smf.ols('infmort ~ lpcinc + lphysic + lpopul + 1', data=df).fit()
infant2 = smf.ols('infmort ~ lpcinc + lphysic + lpopul + 1 ', data=df[(df['DC']==0)]).fit()
print(summary_col([infant1, infant2],stars=True,float_format='%0.3f',
model_names=['Infmort1\n(b/se)','Infmort2\n(b/se)'],
info_dict={'N':lambda x: "{0:d}".format(int(x.nobs)),
'R2':lambda x: "{:.3f}".format(x.rsquared),
'Adj.R2':lambda x: "{:.3f}".format(x.rsquared_adj)}))