import numpy as np
import pandas as pd
import statsmodels
import statsmodels.api as sm
import statsmodels.formula.api as smf
from wooldridge import *
dataWoo()
J.M. Wooldridge (2016) Introductory Econometrics: A Modern Approach, Cengage Learning, 6th edition. 401k 401ksubs admnrev affairs airfare alcohol apple approval athlet1 athlet2 attend audit barium beauty benefits beveridge big9salary bwght bwght2 campus card catholic cement census2000 ceosal1 ceosal2 charity consump corn countymurders cps78_85 cps91 crime1 crime2 crime3 crime4 discrim driving earns econmath elem94_95 engin expendshares ezanders ezunem fair fertil1 fertil2 fertil3 fish fringe gpa1 gpa2 gpa3 happiness hprice1 hprice2 hprice3 hseinv htv infmrt injury intdef intqrt inven jtrain jtrain2 jtrain3 kielmc lawsch85 loanapp lowbrth mathpnl meap00_01 meap01 meap93 meapsingle minwage mlb1 mroz murder nbasal nyse okun openness pension phillips pntsprd prison prminwge rdchem rdtelec recid rental return saving sleep75 slp75_81 smoke traffic1 traffic2 twoyear volat vote1 vote2 voucher wage1 wage2 wagepan wageprc wine
df = dataWoo('gpa1')
dataWoo('gpa1', description=True)
name of dataset: gpa1 no of variables: 29 no of observations: 141 +----------+--------------------------------+ | variable | label | +----------+--------------------------------+ | age | in years | | soph | =1 if sophomore | | junior | =1 if junior | | senior | =1 if senior | | senior5 | =1 if fifth year senior | | male | =1 if male | | campus | =1 if live on campus | | business | =1 if business major | | engineer | =1 if engineering major | | colGPA | MSU GPA | | hsGPA | high school GPA | | ACT | 'achievement' score | | job19 | =1 if job <= 19 hours | | job20 | =1 if job >= 20 hours | | drive | =1 if drive to campus | | bike | =1 if bicycle to campus | | walk | =1 if walk to campus | | voluntr | =1 if do volunteer work | | PC | =1 of pers computer at sch | | greek | =1 if fraternity or sorority | | car | =1 if own car | | siblings | =1 if have siblings | | bgfriend | =1 if boy- or girlfriend | | clubs | =1 if belong to MSU club | | skipped | avg lectures missed per week | | alcohol | avg # days per week drink alc. | | gradMI | =1 if Michigan high school | | fathcoll | =1 if father college grad | | mothcoll | =1 if mother college grad | +----------+--------------------------------+ Christopher Lemmon, a former MSU undergraduate, collected these data from a survey he took of MSU students in Fall 1994.
gpa_multiple = smf.ols(formula='colGPA ~ hsGPA + ACT + 1', data=df).fit()
print(gpa_multiple.summary())
OLS Regression Results ============================================================================== Dep. Variable: colGPA R-squared: 0.176 Model: OLS Adj. R-squared: 0.164 Method: Least Squares F-statistic: 14.78 Date: Thu, 09 Apr 2020 Prob (F-statistic): 1.53e-06 Time: 00:06:48 Log-Likelihood: -46.573 No. Observations: 141 AIC: 99.15 Df Residuals: 138 BIC: 108.0 Df Model: 2 Covariance Type: nonrobust ============================================================================== coef std err t P>|t| [0.025 0.975] ------------------------------------------------------------------------------ Intercept 1.2863 0.341 3.774 0.000 0.612 1.960 hsGPA 0.4535 0.096 4.733 0.000 0.264 0.643 ACT 0.0094 0.011 0.875 0.383 -0.012 0.031 ============================================================================== Omnibus: 3.056 Durbin-Watson: 1.885 Prob(Omnibus): 0.217 Jarque-Bera (JB): 2.469 Skew: 0.199 Prob(JB): 0.291 Kurtosis: 2.488 Cond. No. 298. ============================================================================== Warnings: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
gpa_simple = smf.ols(formula='colGPA ~ACT +1', data=df).fit()
print(gpa_simple.summary())
OLS Regression Results ============================================================================== Dep. Variable: colGPA R-squared: 0.043 Model: OLS Adj. R-squared: 0.036 Method: Least Squares F-statistic: 6.207 Date: Thu, 09 Apr 2020 Prob (F-statistic): 0.0139 Time: 00:06:48 Log-Likelihood: -57.177 No. Observations: 141 AIC: 118.4 Df Residuals: 139 BIC: 124.3 Df Model: 1 Covariance Type: nonrobust ============================================================================== coef std err t P>|t| [0.025 0.975] ------------------------------------------------------------------------------ Intercept 2.4030 0.264 9.095 0.000 1.881 2.925 ACT 0.0271 0.011 2.491 0.014 0.006 0.049 ============================================================================== Omnibus: 3.174 Durbin-Watson: 1.909 Prob(Omnibus): 0.205 Jarque-Bera (JB): 2.774 Skew: 0.248 Prob(JB): 0.250 Kurtosis: 2.525 Cond. No. 209. ============================================================================== Warnings: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
from statsmodels.iolib.summary2 import summary_col
print(summary_col([gpa_multiple,gpa_simple],stars=True,float_format='%0.2f',
info_dict={'N':lambda x: "{0:d}".format(int(x.nobs)),
'R2':lambda x: "{:.2f}".format(x.rsquared)}))
============================ colGPA I colGPA II ---------------------------- ACT 0.01 0.03** (0.01) (0.01) Intercept 1.29*** 2.40*** (0.34) (0.26) hsGPA 0.45*** (0.10) N 141 141 R2 0.18 0.04 ============================ Standard errors in parentheses. * p<.1, ** p<.05, ***p<.01
df = dataWoo('wage1')
wage_multiple = smf.ols(formula='lwage ~ educ + exper + tenure + 1', data=df).fit()
print(wage_multiple.summary())
OLS Regression Results ============================================================================== Dep. Variable: lwage R-squared: 0.316 Model: OLS Adj. R-squared: 0.312 Method: Least Squares F-statistic: 80.39 Date: Thu, 09 Apr 2020 Prob (F-statistic): 9.13e-43 Time: 00:06:48 Log-Likelihood: -313.55 No. Observations: 526 AIC: 635.1 Df Residuals: 522 BIC: 652.2 Df Model: 3 Covariance Type: nonrobust ============================================================================== coef std err t P>|t| [0.025 0.975] ------------------------------------------------------------------------------ Intercept 0.2844 0.104 2.729 0.007 0.080 0.489 educ 0.0920 0.007 12.555 0.000 0.078 0.106 exper 0.0041 0.002 2.391 0.017 0.001 0.008 tenure 0.0221 0.003 7.133 0.000 0.016 0.028 ============================================================================== Omnibus: 11.534 Durbin-Watson: 1.769 Prob(Omnibus): 0.003 Jarque-Bera (JB): 20.941 Skew: 0.021 Prob(JB): 2.84e-05 Kurtosis: 3.977 Cond. No. 135. ============================================================================== Warnings: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
df = dataWoo('401k')
pension_multiple = smf.ols(formula='prate ~ mrate + age + 1', data=df).fit()
print(pension_multiple.summary())
OLS Regression Results ============================================================================== Dep. Variable: prate R-squared: 0.092 Model: OLS Adj. R-squared: 0.091 Method: Least Squares F-statistic: 77.79 Date: Thu, 09 Apr 2020 Prob (F-statistic): 6.67e-33 Time: 00:06:48 Log-Likelihood: -6422.3 No. Observations: 1534 AIC: 1.285e+04 Df Residuals: 1531 BIC: 1.287e+04 Df Model: 2 Covariance Type: nonrobust ============================================================================== coef std err t P>|t| [0.025 0.975] ------------------------------------------------------------------------------ Intercept 80.1190 0.779 102.846 0.000 78.591 81.647 mrate 5.5213 0.526 10.499 0.000 4.490 6.553 age 0.2431 0.045 5.440 0.000 0.155 0.331 ============================================================================== Omnibus: 375.579 Durbin-Watson: 1.910 Prob(Omnibus): 0.000 Jarque-Bera (JB): 805.992 Skew: -1.387 Prob(JB): 9.57e-176 Kurtosis: 5.217 Cond. No. 32.5 ============================================================================== Warnings: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
df = dataWoo('gpa1')
gpa_multiple = smf.ols(formula='colGPA ~ hsGPA + ACT + 1', data=df).fit()
print(gpa_multiple.summary())
OLS Regression Results ============================================================================== Dep. Variable: colGPA R-squared: 0.176 Model: OLS Adj. R-squared: 0.164 Method: Least Squares F-statistic: 14.78 Date: Thu, 09 Apr 2020 Prob (F-statistic): 1.53e-06 Time: 00:06:49 Log-Likelihood: -46.573 No. Observations: 141 AIC: 99.15 Df Residuals: 138 BIC: 108.0 Df Model: 2 Covariance Type: nonrobust ============================================================================== coef std err t P>|t| [0.025 0.975] ------------------------------------------------------------------------------ Intercept 1.2863 0.341 3.774 0.000 0.612 1.960 hsGPA 0.4535 0.096 4.733 0.000 0.264 0.643 ACT 0.0094 0.011 0.875 0.383 -0.012 0.031 ============================================================================== Omnibus: 3.056 Durbin-Watson: 1.885 Prob(Omnibus): 0.217 Jarque-Bera (JB): 2.469 Skew: 0.199 Prob(JB): 0.291 Kurtosis: 2.488 Cond. No. 298. ============================================================================== Warnings: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
df = dataWoo('crime1')
crime_multiple = smf.ols(formula='narr86 ~ pcnv + ptime86 + qemp86 + 1', data=df).fit()
print(crime_multiple.summary())
OLS Regression Results ============================================================================== Dep. Variable: narr86 R-squared: 0.041 Model: OLS Adj. R-squared: 0.040 Method: Least Squares F-statistic: 39.10 Date: Thu, 09 Apr 2020 Prob (F-statistic): 9.91e-25 Time: 00:06:49 Log-Likelihood: -3394.7 No. Observations: 2725 AIC: 6797. Df Residuals: 2721 BIC: 6821. Df Model: 3 Covariance Type: nonrobust ============================================================================== coef std err t P>|t| [0.025 0.975] ------------------------------------------------------------------------------ Intercept 0.7118 0.033 21.565 0.000 0.647 0.776 pcnv -0.1499 0.041 -3.669 0.000 -0.230 -0.070 ptime86 -0.0344 0.009 -4.007 0.000 -0.051 -0.018 qemp86 -0.1041 0.010 -10.023 0.000 -0.124 -0.084 ============================================================================== Omnibus: 2394.860 Durbin-Watson: 1.836 Prob(Omnibus): 0.000 Jarque-Bera (JB): 106169.153 Skew: 4.002 Prob(JB): 0.00 Kurtosis: 32.513 Cond. No. 8.27 ============================================================================== Warnings: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
crime_multiple_2 = smf.ols(formula='narr86 ~ avgsen + pcnv + ptime86 + qemp86 + 1', data=df).fit()
print(crime_multiple_2.summary())
OLS Regression Results ============================================================================== Dep. Variable: narr86 R-squared: 0.042 Model: OLS Adj. R-squared: 0.041 Method: Least Squares F-statistic: 29.96 Date: Thu, 09 Apr 2020 Prob (F-statistic): 2.01e-24 Time: 00:06:49 Log-Likelihood: -3393.5 No. Observations: 2725 AIC: 6797. Df Residuals: 2720 BIC: 6826. Df Model: 4 Covariance Type: nonrobust ============================================================================== coef std err t P>|t| [0.025 0.975] ------------------------------------------------------------------------------ Intercept 0.7068 0.033 21.319 0.000 0.642 0.772 avgsen 0.0074 0.005 1.572 0.116 -0.002 0.017 pcnv -0.1508 0.041 -3.692 0.000 -0.231 -0.071 ptime86 -0.0374 0.009 -4.252 0.000 -0.055 -0.020 qemp86 -0.1033 0.010 -9.940 0.000 -0.124 -0.083 ============================================================================== Omnibus: 2396.990 Durbin-Watson: 1.837 Prob(Omnibus): 0.000 Jarque-Bera (JB): 106841.658 Skew: 4.006 Prob(JB): 0.00 Kurtosis: 32.611 Cond. No. 10.2 ============================================================================== Warnings: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
print(summary_col([crime_multiple,crime_multiple_2],stars=True,float_format='%0.2f',
info_dict={'N':lambda x: "{0:d}".format(int(x.nobs)),
'R2':lambda x: "{:.2f}".format(x.rsquared)}))
============================ narr86 I narr86 II ---------------------------- Intercept 0.71*** 0.71*** (0.03) (0.03) avgsen 0.01 (0.00) pcnv -0.15*** -0.15*** (0.04) (0.04) ptime86 -0.03*** -0.04*** (0.01) (0.01) qemp86 -0.10*** -0.10*** (0.01) (0.01) N 2725 2725 R2 0.04 0.04 ============================ Standard errors in parentheses. * p<.1, ** p<.05, ***p<.01
df = dataWoo('wage1')
wage_simple = smf.ols(formula='lwage ~ educ + 1', data=df).fit()
print(wage_simple.summary())
OLS Regression Results ============================================================================== Dep. Variable: lwage R-squared: 0.186 Model: OLS Adj. R-squared: 0.184 Method: Least Squares F-statistic: 119.6 Date: Thu, 09 Apr 2020 Prob (F-statistic): 3.27e-25 Time: 00:06:49 Log-Likelihood: -359.38 No. Observations: 526 AIC: 722.8 Df Residuals: 524 BIC: 731.3 Df Model: 1 Covariance Type: nonrobust ============================================================================== coef std err t P>|t| [0.025 0.975] ------------------------------------------------------------------------------ Intercept 0.5838 0.097 5.998 0.000 0.393 0.775 educ 0.0827 0.008 10.935 0.000 0.068 0.098 ============================================================================== Omnibus: 11.804 Durbin-Watson: 1.801 Prob(Omnibus): 0.003 Jarque-Bera (JB): 13.811 Skew: 0.268 Prob(JB): 0.00100 Kurtosis: 3.586 Cond. No. 60.2 ============================================================================== Warnings: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.