import numpy as np
import pandas as pd
import scipy as sp
import statsmodels
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.iolib.summary2 import summary_col
from wooldridge import *
df = dataWoo('bwght')
half= df['cigs'].count()/2
half
df2=df[:694]
bwght_ols_half = smf.ols(formula='lbwght ~ cigs + lfaminc + 1', data=df2).fit()
print(bwght_ols_half.summary())
bwght_ols = smf.ols(formula='lbwght ~ cigs + lfaminc + 1', data=df).fit()
print(bwght_ols.summary())
print(summary_col([bwght_ols_half, bwght_ols], stars=True,float_format='%0.3f',
model_names=['bwght_ols_half','bwght_ols'],
info_dict={'N':lambda x: "{0:d}".format(int(x.nobs)),
'R2':lambda x: "{:.3f}".format(x.rsquared)}))
df = dataWoo('crime1')
crime_ols = smf.ols(formula='narr86 ~ pcnv + avgsen + tottime + ptime86 + qemp86 + 1', data=df).fit()
print(crime_ols.summary())
crime_ols_r = smf.ols(formula='narr86 ~ pcnv + ptime86 + qemp86 + 1', data=df).fit()
resid = df.narr86 - crime_ols_r.predict()
print(crime_ols_r.summary())
print(" (LM, p, df) = ", crime_ols.compare_lm_test(crime_ols_r))
#Alternatively,
crime_resid = smf.ols(formula='resid ~ pcnv + avgsen + tottime + ptime86 + qemp86 + 1', data=df).fit()
print(crime_resid.summary())
LM = 2725 * 0.0015 # N'Rsq
LM