import numpy as np
import pandas as pd
import scipy as sp
import scipy.stats as ss
import statsmodels
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.iolib.summary2 import summary_col
from wooldridge import *
df = dataWoo('bwght')
bwght_ols1 = smf.ols(formula='bwght ~ cigs + faminc + 1', data=df).fit()
bwght_ols2 = smf.ols(formula='bwghtlbs ~ cigs + faminc + 1', data=df).fit()
bwght_ols3 = smf.ols(formula='bwght ~ packs + faminc + 1', data=df).fit()
print(summary_col([bwght_ols1, bwght_ols2, bwght_ols3],stars=True,float_format='%0.3f',
model_names=['bwght_ols1','bwght_ols2', 'bwght_ols3'],
info_dict={'N':lambda x: "{0:d}".format(int(x.nobs)),
'R2':lambda x: "{:.3f}".format(x.rsquared)}))
df = dataWoo('hprice2')
df1 = df[['price', 'nox', 'crime', 'rooms', 'dist', 'stratio']]
zprice = pd.DataFrame({"zprice":ss.zscore(df1.loc[:,"price"])})
znox = pd.DataFrame({"znox":ss.zscore(df1.loc[:,"nox"])})
zcrime = pd.DataFrame({"zcrime":ss.zscore(df1.loc[:,"crime"])})
zrooms = pd.DataFrame({"zrooms":ss.zscore(df1.loc[:,"rooms"])})
zdist = pd.DataFrame({"zdist":ss.zscore(df1.loc[:,"dist"])})
zstratio = pd.DataFrame({"zstratio":ss.zscore(df1.loc[:,"stratio"])})
df2 = pd.concat([zprice,znox,zcrime,zrooms,zdist,zstratio],axis=1)
hprice_std = smf.ols(formula='zprice ~ znox + zcrime + zrooms + zdist + zstratio + 1', data=df2).fit()
print(hprice_std.summary())
hprice_std = smf.ols(formula='zprice ~ znox + zcrime + zrooms + zdist + zstratio + 1', data=df2).fit()
print(hprice_std.summary())
import math
df['ldist'] = np.log(df['dist'])
hprice_log = smf.ols(formula='lprice ~ lnox + ldist + rooms + stratio + 1', data=df).fit()
print(hprice_log.summary())
hprice_eq6_7 = smf.ols(formula='lprice ~ lnox + rooms + 1', data=df).fit()
print(hprice_eq6_7.summary())
df = dataWoo('wage1')
wage_exp = smf.ols(formula='wage ~ exper + expersq + 1', data=df).fit()
print(wage_exp.summary())
df = dataWoo('hprice2')
df['ldist'] = np.log(df['dist'])
df['roomsq'] = np.square(df['rooms'])
hprice_roomsq = smf.ols(formula='lprice ~ lnox + ldist + rooms + roomsq + stratio + 1', data=df).fit()
print(hprice_roomsq.summary())
df = dataWoo('attend')
df['priGPAsq'] = np.square(df['priGPA'])
df['ACTsq'] = np.square(df['ACT'])
df['priGPA_atndrte'] = df['priGPA']*df['atndrte']
attned_perf = smf.ols(formula='stndfnl ~ atndrte + priGPA + ACT + priGPAsq + ACTsq
+ priGPA_atndrte + 1', data=df).fit()
print(attned_perf.summary())
df = dataWoo('ceosal1')
salary_lin = smf.ols(formula='salary ~ sales + roe + 1', data=df).fit()
salary_log = smf.ols(formula='lsalary ~ lsales + roe + 1', data=df).fit()
print(summary_col([salary_lin, salary_log],stars=True,float_format='%0.3f',
model_names=['salary_lin','salary_log'],
info_dict={'N':lambda x: "{0:d}".format(int(x.nobs)),
'R2':lambda x: "{:.3f}".format(x.rsquared)}))
df = dataWoo('gpa2')
df['hsizesq'] = np.square(df['hsize'])
gpa_lin = smf.ols(formula='colgpa ~ sat + hsperc + hsize + hsizesq + 1', data=df).fit()
print(gpa_lin.summary())
df['sat0'] = df['sat']-1200
df['hsize0'] = df['hsize']-5
df['hsperc0'] = df['hsperc']-30
df['hsize0sq'] = np.square(df['hsize0'])
gpa_predict = smf.ols(formula='colgpa ~ sat0 + hsperc0 + hsize0 + hsize0sq + 1', data=df).fit()
print(gpa_predict.summary())
print(summary_col([gpa_lin, gpa_predict],stars=True,float_format='%0.3f',
model_names=['salary_lin','gpa_predict'],
info_dict={'N':lambda x: "{0:d}".format(int(x.nobs)),
'R2':lambda x: "{:.3f}".format(x.rsquared)}))
gpa_lin = smf.ols(formula='colgpa ~ sat + hsperc + hsize + hsizesq + 1', data=df).fit()
print(gpa_lin.summary())
predicted_value= 1200*.0015 + 30 * -(.0139) + 5*-(.0609) + 5*5*.0055 + 1.4927
predicted_value
df = dataWoo('ceosal2')
ceo_step1 = smf.ols(formula='lsalary ~ lsales + lmktval + ceoten + 1', data=df).fit()
print(ceo_step1.summary())
uhat = df.lsalary - ceo_step1.predict()
ehat=np.exp(uhat)
ehat.mean()
mhat=np.exp(ceo_step1.predict())
ceo_step2 = smf.ols(formula='salary ~ mhat + 0', data=df).fit()
ceo_step2.params #The coef. as in equation 46.44
ceo_step3= smf.ols(formula='lsalary ~ lsales + lmktval + ceoten + 1', data=df).fit()
ceo_step3.params
ceo_step3_pred = 4.5038 + .1629*np.log(5000) + .1092*np.log(10000) + .0117*10
ceo_step3_pred
ceo_step4 = smf.ols(formula='salary ~ mhat + 0', data=df).fit()
ceo_step4.params
ceo_step4_pred = 1.117*np.exp(7.013)
ceo_step4_pred
corr = ss.pearsonr(df.salary, mhat)
print(corr) # Returns correlation coeficient and pvalue.
ceo_sal= smf.ols(formula='salary ~ sales + mktval + ceoten + 1', data=df).fit()
print(ceo_sal.summary())