import numpy as np
import pandas as pd
import scipy as sp
import scipy.stats as ss
import statsmodels
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.iolib.summary2 import summary_col
from wooldridge import *
df = dataWoo('wage1')
wage_female = smf.ols(formula='wage ~ female + educ + exper + tenure + 1', data=df).fit()
print(wage_female.summary())
# b1 measures the average wage difference between men and women who have the same level of educ, exper and tenure.
OLS Regression Results ============================================================================== Dep. Variable: wage R-squared: 0.364 Model: OLS Adj. R-squared: 0.359 Method: Least Squares F-statistic: 74.40 Date: Sat, 11 Apr 2020 Prob (F-statistic): 7.30e-50 Time: 20:21:46 Log-Likelihood: -1314.2 No. Observations: 526 AIC: 2638. Df Residuals: 521 BIC: 2660. Df Model: 4 Covariance Type: nonrobust ============================================================================== coef std err t P>|t| [0.025 0.975] ------------------------------------------------------------------------------ Intercept -1.5679 0.725 -2.164 0.031 -2.991 -0.145 female -1.8109 0.265 -6.838 0.000 -2.331 -1.291 educ 0.5715 0.049 11.584 0.000 0.475 0.668 exper 0.0254 0.012 2.195 0.029 0.003 0.048 tenure 0.1410 0.021 6.663 0.000 0.099 0.183 ============================================================================== Omnibus: 185.864 Durbin-Watson: 1.794 Prob(Omnibus): 0.000 Jarque-Bera (JB): 715.580 Skew: 1.589 Prob(JB): 4.11e-156 Kurtosis: 7.749 Cond. No. 141. ============================================================================== Warnings: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
wage_female = smf.ols(formula='wage ~ female + 1', data=df).fit()
print(wage_female.summary())
# b0 is the average wage for men in the sample.
OLS Regression Results ============================================================================== Dep. Variable: wage R-squared: 0.116 Model: OLS Adj. R-squared: 0.114 Method: Least Squares F-statistic: 68.54 Date: Sat, 11 Apr 2020 Prob (F-statistic): 1.04e-15 Time: 20:21:46 Log-Likelihood: -1400.7 No. Observations: 526 AIC: 2805. Df Residuals: 524 BIC: 2814. Df Model: 1 Covariance Type: nonrobust ============================================================================== coef std err t P>|t| [0.025 0.975] ------------------------------------------------------------------------------ Intercept 7.0995 0.210 33.806 0.000 6.687 7.512 female -2.5118 0.303 -8.279 0.000 -3.108 -1.916 ============================================================================== Omnibus: 223.488 Durbin-Watson: 1.818 Prob(Omnibus): 0.000 Jarque-Bera (JB): 929.998 Skew: 1.928 Prob(JB): 1.13e-202 Kurtosis: 8.250 Cond. No. 2.57 ============================================================================== Warnings: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
#Mean wage for male and female
from pandas.api.types import CategoricalDtype
df = df.melt(['wage', 'educ','exper', 'tenure', 'female'])
df.groupby('female').mean()
wage | educ | exper | tenure | value | |
---|---|---|---|---|---|
female | |||||
0 | 7.099489 | 12.788321 | 17.558394 | 6.474453 | 32.003058 |
1 | 4.587659 | 12.317460 | 16.428571 | 3.615079 | 26.502281 |
df = dataWoo('gpa1')
colGPA_ur = smf.ols(formula='colGPA ~ PC + hsGPA + ACT + 1', data=df).fit()
print(colGPA_ur.summary())
OLS Regression Results ============================================================================== Dep. Variable: colGPA R-squared: 0.219 Model: OLS Adj. R-squared: 0.202 Method: Least Squares F-statistic: 12.83 Date: Sat, 11 Apr 2020 Prob (F-statistic): 1.93e-07 Time: 20:21:46 Log-Likelihood: -42.796 No. Observations: 141 AIC: 93.59 Df Residuals: 137 BIC: 105.4 Df Model: 3 Covariance Type: nonrobust ============================================================================== coef std err t P>|t| [0.025 0.975] ------------------------------------------------------------------------------ Intercept 1.2635 0.333 3.793 0.000 0.605 1.922 PC 0.1573 0.057 2.746 0.007 0.044 0.271 hsGPA 0.4472 0.094 4.776 0.000 0.262 0.632 ACT 0.0087 0.011 0.822 0.413 -0.012 0.029 ============================================================================== Omnibus: 2.770 Durbin-Watson: 1.870 Prob(Omnibus): 0.250 Jarque-Bera (JB): 1.863 Skew: 0.016 Prob(JB): 0.394 Kurtosis: 2.438 Cond. No. 298. ============================================================================== Warnings: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
colGPA_r = smf.ols(formula='colGPA ~ PC + 1', data=df).fit()
print(colGPA_r.summary())
OLS Regression Results ============================================================================== Dep. Variable: colGPA R-squared: 0.050 Model: OLS Adj. R-squared: 0.043 Method: Least Squares F-statistic: 7.314 Date: Sat, 11 Apr 2020 Prob (F-statistic): 0.00770 Time: 20:21:46 Log-Likelihood: -56.641 No. Observations: 141 AIC: 117.3 Df Residuals: 139 BIC: 123.2 Df Model: 1 Covariance Type: nonrobust ============================================================================== coef std err t P>|t| [0.025 0.975] ------------------------------------------------------------------------------ Intercept 2.9894 0.040 75.678 0.000 2.911 3.068 PC 0.1695 0.063 2.704 0.008 0.046 0.293 ============================================================================== Omnibus: 2.136 Durbin-Watson: 1.941 Prob(Omnibus): 0.344 Jarque-Bera (JB): 1.852 Skew: 0.160 Prob(JB): 0.396 Kurtosis: 2.539 Cond. No. 2.45 ============================================================================== Warnings: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
df = df.melt(['colGPA','PC'])
df.groupby('PC').mean()
colGPA | value | |
---|---|---|
PC | ||
0 | 2.989412 | 2.256078 |
1 | 3.158929 | 2.250529 |
df = dataWoo('jtrain')
df = df[(df['year']==1988)]
jobb_reg = smf.ols(formula='hrsemp ~ grant + lsales + lemploy + 1', data=df).fit()
print(jobb_reg.summary())
OLS Regression Results ============================================================================== Dep. Variable: hrsemp R-squared: 0.237 Model: OLS Adj. R-squared: 0.214 Method: Least Squares F-statistic: 10.44 Date: Sat, 11 Apr 2020 Prob (F-statistic): 4.80e-06 Time: 20:21:46 Log-Likelihood: -482.29 No. Observations: 105 AIC: 972.6 Df Residuals: 101 BIC: 983.2 Df Model: 3 Covariance Type: nonrobust ============================================================================== coef std err t P>|t| [0.025 0.975] ------------------------------------------------------------------------------ Intercept 46.6651 43.412 1.075 0.285 -39.453 132.783 grant 26.2545 5.592 4.695 0.000 15.162 37.347 lsales -0.9846 3.540 -0.278 0.781 -8.007 6.038 lemploy -6.0699 3.883 -1.563 0.121 -13.772 1.633 ============================================================================== Omnibus: 66.291 Durbin-Watson: 2.182 Prob(Omnibus): 0.000 Jarque-Bera (JB): 284.382 Skew: 2.191 Prob(JB): 1.77e-62 Kurtosis: 9.767 Cond. No. 285. ============================================================================== Warnings: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
df = dataWoo('hprice1')
hrpice_reg = smf.ols(formula='lprice ~ llotsize + lsqrft + bdrms + colonial + 1', data=df).fit()
print(hrpice_reg.summary())
OLS Regression Results ============================================================================== Dep. Variable: lprice R-squared: 0.649 Model: OLS Adj. R-squared: 0.632 Method: Least Squares F-statistic: 38.38 Date: Sat, 11 Apr 2020 Prob (F-statistic): 3.74e-18 Time: 20:21:46 Log-Likelihood: 26.619 No. Observations: 88 AIC: -43.24 Df Residuals: 83 BIC: -30.85 Df Model: 4 Covariance Type: nonrobust ============================================================================== coef std err t P>|t| [0.025 0.975] ------------------------------------------------------------------------------ Intercept -1.3496 0.651 -2.073 0.041 -2.644 -0.055 llotsize 0.1678 0.038 4.395 0.000 0.092 0.244 lsqrft 0.7072 0.093 7.620 0.000 0.523 0.892 bdrms 0.0268 0.029 0.934 0.353 -0.030 0.084 colonial 0.0538 0.045 1.202 0.233 -0.035 0.143 ============================================================================== Omnibus: 13.728 Durbin-Watson: 2.077 Prob(Omnibus): 0.001 Jarque-Bera (JB): 50.828 Skew: -0.053 Prob(JB): 9.18e-12 Kurtosis: 6.722 Cond. No. 411. ============================================================================== Warnings: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
df = dataWoo('wage1')
wage_reg = smf.ols(formula='lwage ~ female + educ + exper + expersq + tenure + tenursq + 1', data=df).fit()
print(wage_reg.summary())
OLS Regression Results ============================================================================== Dep. Variable: lwage R-squared: 0.441 Model: OLS Adj. R-squared: 0.434 Method: Least Squares F-statistic: 68.18 Date: Sat, 11 Apr 2020 Prob (F-statistic): 2.11e-62 Time: 20:21:46 Log-Likelihood: -260.59 No. Observations: 526 AIC: 535.2 Df Residuals: 519 BIC: 565.0 Df Model: 6 Covariance Type: nonrobust ============================================================================== coef std err t P>|t| [0.025 0.975] ------------------------------------------------------------------------------ Intercept 0.4167 0.099 4.212 0.000 0.222 0.611 female -0.2965 0.036 -8.281 0.000 -0.367 -0.226 educ 0.0802 0.007 11.868 0.000 0.067 0.093 exper 0.0294 0.005 5.916 0.000 0.020 0.039 expersq -0.0006 0.000 -5.431 0.000 -0.001 -0.000 tenure 0.0317 0.007 4.633 0.000 0.018 0.045 tenursq -0.0006 0.000 -2.493 0.013 -0.001 -0.000 ============================================================================== Omnibus: 13.111 Durbin-Watson: 1.796 Prob(Omnibus): 0.001 Jarque-Bera (JB): 24.212 Skew: -0.085 Prob(JB): 5.53e-06 Kurtosis: 4.037 Cond. No. 4.49e+03 ============================================================================== Warnings: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified. [2] The condition number is large, 4.49e+03. This might indicate that there are strong multicollinearity or other numerical problems.
level = np.exp(-.2965) - 1
level
-0.2565843727358956
df = dataWoo('wage1')
df = df.melt(['wage','lwage', 'educ','exper', 'expersq', 'tenure', 'tenursq', 'female', 'married' ])
wage_reg = smf.ols('lwage ~ female * married + educ + exper + expersq + tenure + tenursq + 1', data=df).fit()
print(wage_reg.summary())
OLS Regression Results ============================================================================== Dep. Variable: lwage R-squared: 0.461 Model: OLS Adj. R-squared: 0.460 Method: Least Squares F-statistic: 842.1 Date: Sat, 11 Apr 2020 Prob (F-statistic): 0.00 Time: 20:21:46 Log-Likelihood: -3764.3 No. Observations: 7890 AIC: 7547. Df Residuals: 7881 BIC: 7609. Df Model: 8 Covariance Type: nonrobust ================================================================================== coef std err t P>|t| [0.025 0.975] ---------------------------------------------------------------------------------- Intercept 0.3214 0.026 12.547 0.000 0.271 0.372 female -0.1104 0.014 -7.729 0.000 -0.138 -0.082 married 0.2127 0.014 15.000 0.000 0.185 0.240 female:married -0.3006 0.018 -16.353 0.000 -0.337 -0.265 educ 0.0789 0.002 46.022 0.000 0.076 0.082 exper 0.0268 0.001 19.958 0.000 0.024 0.029 expersq -0.0005 2.83e-05 -18.925 0.000 -0.001 -0.000 tenure 0.0291 0.002 16.795 0.000 0.026 0.032 tenursq -0.0005 5.92e-05 -9.002 0.000 -0.001 -0.000 ============================================================================== Omnibus: 203.516 Durbin-Watson: 1.786 Prob(Omnibus): 0.000 Jarque-Bera (JB): 482.736 Skew: -0.062 Prob(JB): 1.50e-105 Kurtosis: 4.205 Cond. No. 5.06e+03 ============================================================================== Warnings: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified. [2] The condition number is large, 5.06e+03. This might indicate that there are strong multicollinearity or other numerical problems.
df = dataWoo('beauty')
df = df[(df['female']==0)]
wage_reg = smf.ols('lwage ~ belavg + abvavg + educ + exper + expersq + union + married + black + south + goodhlth + 1', data=df).fit()
print(wage_reg.summary())
OLS Regression Results ============================================================================== Dep. Variable: lwage R-squared: 0.255 Model: OLS Adj. R-squared: 0.246 Method: Least Squares F-statistic: 27.82 Date: Sat, 11 Apr 2020 Prob (F-statistic): 5.64e-46 Time: 20:21:46 Log-Likelihood: -540.02 No. Observations: 824 AIC: 1102. Df Residuals: 813 BIC: 1154. Df Model: 10 Covariance Type: nonrobust ============================================================================== coef std err t P>|t| [0.025 0.975] ------------------------------------------------------------------------------ Intercept 0.4778 0.120 3.970 0.000 0.242 0.714 belavg -0.1647 0.053 -3.113 0.002 -0.269 -0.061 abvavg -0.0250 0.038 -0.662 0.508 -0.099 0.049 educ 0.0607 0.007 9.218 0.000 0.048 0.074 exper 0.0464 0.006 8.019 0.000 0.035 0.058 expersq -0.0007 0.000 -5.883 0.000 -0.001 -0.000 union 0.1485 0.036 4.099 0.000 0.077 0.220 married 0.0644 0.044 1.454 0.146 -0.022 0.151 black -0.2570 0.076 -3.397 0.001 -0.405 -0.108 south 0.0853 0.043 1.991 0.047 0.001 0.169 goodhlth 0.0011 0.070 0.016 0.987 -0.137 0.139 ============================================================================== Omnibus: 29.805 Durbin-Watson: 1.789 Prob(Omnibus): 0.000 Jarque-Bera (JB): 64.071 Skew: 0.180 Prob(JB): 1.22e-14 Kurtosis: 4.318 Cond. No. 6.27e+03 ============================================================================== Warnings: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified. [2] The condition number is large, 6.27e+03. This might indicate that there are strong multicollinearity or other numerical problems.
df = dataWoo('beauty')
df = df[(df['female']==1)]
beauty_reg = smf.ols('lwage ~ belavg + abvavg + educ + exper + expersq + union + married + black + south + goodhlth + 1', data=df).fit()
print(beauty_reg.summary())
OLS Regression Results ============================================================================== Dep. Variable: lwage R-squared: 0.278 Model: OLS Adj. R-squared: 0.261 Method: Least Squares F-statistic: 16.40 Date: Sat, 11 Apr 2020 Prob (F-statistic): 4.31e-25 Time: 20:21:46 Log-Likelihood: -265.56 No. Observations: 436 AIC: 553.1 Df Residuals: 425 BIC: 598.0 Df Model: 10 Covariance Type: nonrobust ============================================================================== coef std err t P>|t| [0.025 0.975] ------------------------------------------------------------------------------ Intercept -0.0773 0.144 -0.536 0.592 -0.361 0.206 belavg -0.1142 0.066 -1.718 0.087 -0.245 0.016 abvavg 0.0686 0.049 1.398 0.163 -0.028 0.165 educ 0.0757 0.009 8.502 0.000 0.058 0.093 exper 0.0295 0.007 4.109 0.000 0.015 0.044 expersq -0.0005 0.000 -2.814 0.005 -0.001 -0.000 union 0.2927 0.054 5.431 0.000 0.187 0.399 married -0.0623 0.044 -1.404 0.161 -0.149 0.025 black 0.1438 0.069 2.087 0.038 0.008 0.279 south 0.0085 0.060 0.142 0.887 -0.109 0.126 goodhlth 0.1131 0.081 1.391 0.165 -0.047 0.273 ============================================================================== Omnibus: 33.272 Durbin-Watson: 1.868 Prob(Omnibus): 0.000 Jarque-Bera (JB): 111.518 Skew: 0.238 Prob(JB): 6.08e-25 Kurtosis: 5.432 Cond. No. 3.70e+03 ============================================================================== Warnings: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified. [2] The condition number is large, 3.7e+03. This might indicate that there are strong multicollinearity or other numerical problems.
df = dataWoo('lawsch85')
df['r61_100'] = 0
df.loc[(df['rank'] > 60) & (df['rank'] <= 100), 'r61_100'] = 1
r_reg = smf.ols(formula =
'lsalary ~ top10 + r11_25 + r26_40 + r41_60 + r61_100 + LSAT + GPA + llibvol + lcost + 1',
data=df).fit()
print(r_reg.summary())
OLS Regression Results ============================================================================== Dep. Variable: lsalary R-squared: 0.911 Model: OLS Adj. R-squared: 0.905 Method: Least Squares F-statistic: 143.2 Date: Sat, 11 Apr 2020 Prob (F-statistic): 9.45e-62 Time: 20:21:46 Log-Likelihood: 146.45 No. Observations: 136 AIC: -272.9 Df Residuals: 126 BIC: -243.8 Df Model: 9 Covariance Type: nonrobust ============================================================================== coef std err t P>|t| [0.025 0.975] ------------------------------------------------------------------------------ Intercept 9.1653 0.411 22.277 0.000 8.351 9.979 top10 0.6996 0.053 13.078 0.000 0.594 0.805 r11_25 0.5935 0.039 15.049 0.000 0.515 0.672 r26_40 0.3751 0.034 11.005 0.000 0.308 0.443 r41_60 0.2628 0.028 9.399 0.000 0.207 0.318 r61_100 0.1316 0.021 6.254 0.000 0.090 0.173 LSAT 0.0057 0.003 1.858 0.066 -0.000 0.012 GPA 0.0137 0.074 0.185 0.854 -0.133 0.161 llibvol 0.0364 0.026 1.398 0.165 -0.015 0.088 lcost 0.0008 0.025 0.033 0.973 -0.049 0.051 ============================================================================== Omnibus: 9.419 Durbin-Watson: 1.926 Prob(Omnibus): 0.009 Jarque-Bera (JB): 20.478 Skew: 0.100 Prob(JB): 3.57e-05 Kurtosis: 4.890 Cond. No. 8.98e+03 ============================================================================== Warnings: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified. [2] The condition number is large, 8.98e+03. This might indicate that there are strong multicollinearity or other numerical problems.
hypotheses = '(LSAT = GPA = llibvol = lcost = 0)'
f_test = r_reg.f_test(hypotheses)
print(f_test)
<F test: F=array([[2.38531613]]), p=0.05470437645685717, df_denom=126, df_num=4>
display = np.exp(0.6996)-1
display
1.0129473674662273
df=dataWoo('wage1')
wage_reg = smf.ols('lwage ~ female*educ + exper + expersq + tenure + tenursq + 1', data=df).fit()
print(wage_reg.summary())
OLS Regression Results ============================================================================== Dep. Variable: lwage R-squared: 0.441 Model: OLS Adj. R-squared: 0.433 Method: Least Squares F-statistic: 58.37 Date: Sat, 11 Apr 2020 Prob (F-statistic): 1.67e-61 Time: 20:21:47 Log-Likelihood: -260.49 No. Observations: 526 AIC: 537.0 Df Residuals: 518 BIC: 571.1 Df Model: 7 Covariance Type: nonrobust =============================================================================== coef std err t P>|t| [0.025 0.975] ------------------------------------------------------------------------------- Intercept 0.3888 0.119 3.276 0.001 0.156 0.622 female -0.2268 0.168 -1.354 0.176 -0.556 0.102 educ 0.0824 0.008 9.725 0.000 0.066 0.099 female:educ -0.0056 0.013 -0.426 0.670 -0.031 0.020 exper 0.0293 0.005 5.886 0.000 0.020 0.039 expersq -0.0006 0.000 -5.398 0.000 -0.001 -0.000 tenure 0.0319 0.007 4.647 0.000 0.018 0.045 tenursq -0.0006 0.000 -2.509 0.012 -0.001 -0.000 ============================================================================== Omnibus: 13.302 Durbin-Watson: 1.795 Prob(Omnibus): 0.001 Jarque-Bera (JB): 24.887 Skew: -0.080 Prob(JB): 3.94e-06 Kurtosis: 4.053 Cond. No. 8.41e+03 ============================================================================== Warnings: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified. [2] The condition number is large, 8.41e+03. This might indicate that there are strong multicollinearity or other numerical problems.
hypotheses = '(female:educ = female = 0)'
f_test = wage_reg.f_test(hypotheses)
print(f_test)
<F test: F=array([[34.32554911]]), p=1.0023439572058104e-14, df_denom=518, df_num=2>
df=dataWoo('mlb1')
df = df[(df['percblck']!=0)]
mlb1_reg = smf.ols('lsalary ~ years + gamesyr + bavg + hrunsyr + rbisyr + runsyr + fldperc + allstar + black + hispan + black:percblck + hispan:perchisp + 1', data=df).fit()
print(mlb1_reg.summary())
OLS Regression Results ============================================================================== Dep. Variable: lsalary R-squared: 0.638 Model: OLS Adj. R-squared: 0.624 Method: Least Squares F-statistic: 46.48 Date: Sat, 11 Apr 2020 Prob (F-statistic): 1.31e-62 Time: 20:21:47 Log-Likelihood: -350.12 No. Observations: 330 AIC: 726.2 Df Residuals: 317 BIC: 775.6 Df Model: 12 Covariance Type: nonrobust =================================================================================== coef std err t P>|t| [0.025 0.975] ----------------------------------------------------------------------------------- Intercept 10.3437 2.183 4.739 0.000 6.050 14.638 years 0.0673 0.013 5.224 0.000 0.042 0.093 gamesyr 0.0089 0.003 2.624 0.009 0.002 0.016 bavg 0.0009 0.002 0.625 0.533 -0.002 0.004 hrunsyr 0.0146 0.016 0.889 0.375 -0.018 0.047 rbisyr 0.0045 0.008 0.593 0.553 -0.010 0.019 runsyr 0.0072 0.005 1.577 0.116 -0.002 0.016 fldperc 0.0011 0.002 0.513 0.609 -0.003 0.005 allstar 0.0075 0.003 2.621 0.009 0.002 0.013 black -0.1980 0.125 -1.578 0.116 -0.445 0.049 hispan -0.1900 0.153 -1.241 0.215 -0.491 0.111 black:percblck 0.0125 0.005 2.509 0.013 0.003 0.022 hispan:perchisp 0.0201 0.010 2.051 0.041 0.001 0.039 ============================================================================== Omnibus: 1.828 Durbin-Watson: 1.341 Prob(Omnibus): 0.401 Jarque-Bera (JB): 1.709 Skew: 0.012 Prob(JB): 0.425 Kurtosis: 3.352 Cond. No. 5.65e+04 ============================================================================== Warnings: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified. [2] The condition number is large, 5.65e+04. This might indicate that there are strong multicollinearity or other numerical problems.
hypotheses = '(black = hispan = black:percblck = hispan:perchisp = 0)'
f_test = mlb1_reg.f_test(hypotheses)
print(f_test)
<F test: F=array([[2.6478883]]), p=0.03347614735676116, df_denom=317, df_num=4>
df=dataWoo('mlb1')
df = df[(df['percblck']!=0)]
mlb1_reg_r = smf.ols('lsalary ~ years + gamesyr + bavg + hrunsyr + rbisyr + runsyr + fldperc + allstar + 1', data=df).fit()
print(mlb1_reg_r.summary())
OLS Regression Results ============================================================================== Dep. Variable: lsalary R-squared: 0.644 Model: OLS Adj. R-squared: 0.636 Method: Least Squares F-statistic: 77.94 Date: Sat, 11 Apr 2020 Prob (F-statistic): 1.35e-72 Time: 20:21:47 Log-Likelihood: -377.03 No. Observations: 353 AIC: 772.1 Df Residuals: 344 BIC: 806.9 Df Model: 8 Covariance Type: nonrobust ============================================================================== coef std err t P>|t| [0.025 0.975] ------------------------------------------------------------------------------ Intercept 10.3277 2.002 5.159 0.000 6.390 14.265 years 0.0629 0.012 5.146 0.000 0.039 0.087 gamesyr 0.0092 0.003 2.848 0.005 0.003 0.016 bavg 0.0004 0.001 0.345 0.730 -0.002 0.003 hrunsyr 0.0195 0.016 1.230 0.220 -0.012 0.051 rbisyr 0.0027 0.007 0.365 0.716 -0.012 0.017 runsyr 0.0096 0.004 2.229 0.026 0.001 0.018 fldperc 0.0012 0.002 0.600 0.549 -0.003 0.005 allstar 0.0069 0.003 2.468 0.014 0.001 0.012 ============================================================================== Omnibus: 2.170 Durbin-Watson: 1.300 Prob(Omnibus): 0.338 Jarque-Bera (JB): 2.071 Skew: 0.068 Prob(JB): 0.355 Kurtosis: 3.350 Cond. No. 5.36e+04 ============================================================================== Warnings: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified. [2] The condition number is large, 5.36e+04. This might indicate that there are strong multicollinearity or other numerical problems.
df=dataWoo('gpa3')
df = df[(df['spring']==1)]
gpa3_reg = smf.ols('cumgpa ~ female*sat + hsperc + female:hsperc + tothrs + female:tothrs + 1', data=df).fit()
print(gpa3_reg.summary())
OLS Regression Results ============================================================================== Dep. Variable: cumgpa R-squared: 0.406 Model: OLS Adj. R-squared: 0.394 Method: Least Squares F-statistic: 34.95 Date: Sat, 11 Apr 2020 Prob (F-statistic): 4.66e-37 Time: 20:21:47 Log-Likelihood: -237.26 No. Observations: 366 AIC: 490.5 Df Residuals: 358 BIC: 521.7 Df Model: 7 Covariance Type: nonrobust ================================================================================= coef std err t P>|t| [0.025 0.975] --------------------------------------------------------------------------------- Intercept 1.4808 0.207 7.142 0.000 1.073 1.889 female -0.3535 0.411 -0.861 0.390 -1.161 0.454 sat 0.0011 0.000 5.807 0.000 0.001 0.001 female:sat 0.0008 0.000 1.949 0.052 -6.88e-06 0.002 hsperc -0.0085 0.001 -6.167 0.000 -0.011 -0.006 female:hsperc -0.0005 0.003 -0.174 0.862 -0.007 0.006 tothrs 0.0023 0.001 2.718 0.007 0.001 0.004 female:tothrs -0.0001 0.002 -0.071 0.943 -0.003 0.003 ============================================================================== Omnibus: 6.858 Durbin-Watson: 1.832 Prob(Omnibus): 0.032 Jarque-Bera (JB): 9.054 Skew: -0.146 Prob(JB): 0.0108 Kurtosis: 3.713 Cond. No. 1.66e+04 ============================================================================== Warnings: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified. [2] The condition number is large, 1.66e+04. This might indicate that there are strong multicollinearity or other numerical problems.
hypotheses = '(female:tothrs = female:sat = female:hsperc = 0)'
f_test = gpa3_reg.f_test(hypotheses)
print(f_test)
<F test: F=array([[1.53389781]]), p=0.2053733562814361, df_denom=358, df_num=3>
df=dataWoo('gpa3')
df = df[(df['spring']==1)]
gpa3_reg_r = smf.ols('cumgpa ~ female + sat + hsperc + tothrs + 1', data=df).fit()
print(gpa3_reg_r.summary())
OLS Regression Results ============================================================================== Dep. Variable: cumgpa R-squared: 0.398 Model: OLS Adj. R-squared: 0.392 Method: Least Squares F-statistic: 59.74 Date: Sat, 11 Apr 2020 Prob (F-statistic): 1.10e-38 Time: 20:21:47 Log-Likelihood: -239.59 No. Observations: 366 AIC: 489.2 Df Residuals: 361 BIC: 508.7 Df Model: 4 Covariance Type: nonrobust ============================================================================== coef std err t P>|t| [0.025 0.975] ------------------------------------------------------------------------------ Intercept 1.3285 0.180 7.388 0.000 0.975 1.682 female 0.3101 0.059 5.291 0.000 0.195 0.425 sat 0.0012 0.000 7.635 0.000 0.001 0.002 hsperc -0.0084 0.001 -6.839 0.000 -0.011 -0.006 tothrs 0.0025 0.001 3.379 0.001 0.001 0.004 ============================================================================== Omnibus: 5.200 Durbin-Watson: 1.860 Prob(Omnibus): 0.074 Jarque-Bera (JB): 6.107 Skew: -0.139 Prob(JB): 0.0472 Kurtosis: 3.568 Cond. No. 6.74e+03 ============================================================================== Warnings: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified. [2] The condition number is large, 6.74e+03. This might indicate that there are strong multicollinearity or other numerical problems.
df = dataWoo('mroz')
mroz_reg = smf.ols('inlf ~ nwifeinc + educ + exper + expersq + age + kidslt6 + kidsge6 + 1', data=df).fit()
print(mroz_reg.summary())
OLS Regression Results ============================================================================== Dep. Variable: inlf R-squared: 0.264 Model: OLS Adj. R-squared: 0.257 Method: Least Squares F-statistic: 38.22 Date: Sat, 11 Apr 2020 Prob (F-statistic): 6.90e-46 Time: 20:21:47 Log-Likelihood: -423.89 No. Observations: 753 AIC: 863.8 Df Residuals: 745 BIC: 900.8 Df Model: 7 Covariance Type: nonrobust ============================================================================== coef std err t P>|t| [0.025 0.975] ------------------------------------------------------------------------------ Intercept 0.5855 0.154 3.798 0.000 0.283 0.888 nwifeinc -0.0034 0.001 -2.351 0.019 -0.006 -0.001 educ 0.0380 0.007 5.151 0.000 0.024 0.052 exper 0.0395 0.006 6.962 0.000 0.028 0.051 expersq -0.0006 0.000 -3.227 0.001 -0.001 -0.000 age -0.0161 0.002 -6.476 0.000 -0.021 -0.011 kidslt6 -0.2618 0.034 -7.814 0.000 -0.328 -0.196 kidsge6 0.0130 0.013 0.986 0.324 -0.013 0.039 ============================================================================== Omnibus: 169.137 Durbin-Watson: 0.494 Prob(Omnibus): 0.000 Jarque-Bera (JB): 36.741 Skew: -0.196 Prob(JB): 1.05e-08 Kurtosis: 1.991 Cond. No. 3.06e+03 ============================================================================== Warnings: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified. [2] The condition number is large, 3.06e+03. This might indicate that there are strong multicollinearity or other numerical problems.
df = dataWoo('crime1')
df['arr86'] = 0
df.loc[(df['narr86'] > 0), 'arr86'] = 1
crime_reg = smf.ols(' df.arr86 ~ pcnv + avgsen + tottime + ptime86 + qemp86 + 1', data=df).fit()
print(crime_reg.summary())
OLS Regression Results ============================================================================== Dep. Variable: df.arr86 R-squared: 0.047 Model: OLS Adj. R-squared: 0.046 Method: Least Squares F-statistic: 27.03 Date: Sat, 11 Apr 2020 Prob (F-statistic): 9.09e-27 Time: 20:21:47 Log-Likelihood: -1609.7 No. Observations: 2725 AIC: 3231. Df Residuals: 2719 BIC: 3267. Df Model: 5 Covariance Type: nonrobust ============================================================================== coef std err t P>|t| [0.025 0.975] ------------------------------------------------------------------------------ Intercept 0.4406 0.017 25.568 0.000 0.407 0.474 pcnv -0.1624 0.021 -7.649 0.000 -0.204 -0.121 avgsen 0.0061 0.006 0.947 0.344 -0.007 0.019 tottime -0.0023 0.005 -0.454 0.650 -0.012 0.007 ptime86 -0.0220 0.005 -4.739 0.000 -0.031 -0.013 qemp86 -0.0428 0.005 -7.925 0.000 -0.053 -0.032 ============================================================================== Omnibus: 766.502 Durbin-Watson: 1.581 Prob(Omnibus): 0.000 Jarque-Bera (JB): 474.581 Skew: 0.903 Prob(JB): 8.83e-104 Kurtosis: 2.043 Cond. No. 16.3 ============================================================================== Warnings: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
hypotheses = '(avgsen = tottime = 0)'
f_test = crime_reg.f_test(hypotheses)
print(f_test)
<F test: F=array([[1.05970044]]), p=0.346702695391494, df_denom=2719, df_num=2>
crime_reg_2 = smf.ols(' df.arr86 ~ pcnv + avgsen + tottime + ptime86 + qemp86 + black + hispan + 1', data=df).fit()
print(crime_reg_2.summary())
OLS Regression Results ============================================================================== Dep. Variable: df.arr86 R-squared: 0.068 Model: OLS Adj. R-squared: 0.066 Method: Least Squares F-statistic: 28.41 Date: Sat, 11 Apr 2020 Prob (F-statistic): 5.46e-38 Time: 20:21:47 Log-Likelihood: -1579.6 No. Observations: 2725 AIC: 3175. Df Residuals: 2717 BIC: 3222. Df Model: 7 Covariance Type: nonrobust ============================================================================== coef std err t P>|t| [0.025 0.975] ------------------------------------------------------------------------------ Intercept 0.3804 0.019 20.314 0.000 0.344 0.417 pcnv -0.1521 0.021 -7.219 0.000 -0.193 -0.111 avgsen 0.0046 0.006 0.723 0.470 -0.008 0.017 tottime -0.0026 0.005 -0.520 0.603 -0.012 0.007 ptime86 -0.0237 0.005 -5.157 0.000 -0.033 -0.015 qemp86 -0.0385 0.005 -7.123 0.000 -0.049 -0.028 black 0.1698 0.024 7.171 0.000 0.123 0.216 hispan 0.0962 0.021 4.644 0.000 0.056 0.137 ============================================================================== Omnibus: 631.338 Durbin-Watson: 1.571 Prob(Omnibus): 0.000 Jarque-Bera (JB): 443.423 Skew: 0.880 Prob(JB): 5.15e-97 Kurtosis: 2.102 Cond. No. 19.5 ============================================================================== Warnings: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
df = dataWoo('jtrain')
df = df[(df['year']==1988)]
jobb_reg = smf.ols(' lscrap ~ grant + lsales + lemploy + 1', data=df).fit()
print(jobb_reg.summary())
OLS Regression Results ============================================================================== Dep. Variable: lscrap R-squared: 0.072 Model: OLS Adj. R-squared: 0.011 Method: Least Squares F-statistic: 1.182 Date: Sat, 11 Apr 2020 Prob (F-statistic): 0.327 Time: 20:21:47 Log-Likelihood: -85.161 No. Observations: 50 AIC: 178.3 Df Residuals: 46 BIC: 186.0 Df Model: 3 Covariance Type: nonrobust ============================================================================== coef std err t P>|t| [0.025 0.975] ------------------------------------------------------------------------------ Intercept 4.9868 4.656 1.071 0.290 -4.384 14.358 grant -0.0518 0.431 -0.120 0.905 -0.920 0.816 lsales -0.4548 0.373 -1.218 0.229 -1.206 0.297 lemploy 0.6394 0.365 1.751 0.087 -0.096 1.374 ============================================================================== Omnibus: 0.246 Durbin-Watson: 1.779 Prob(Omnibus): 0.884 Jarque-Bera (JB): 0.163 Skew: -0.132 Prob(JB): 0.922 Kurtosis: 2.905 Cond. No. 380. ============================================================================== Warnings: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
df = dataWoo('fertil2')
fert_reg = smf.ols(' children ~ age + educ + 1', data=df).fit()
print(fert_reg.summary())
OLS Regression Results ============================================================================== Dep. Variable: children R-squared: 0.560 Model: OLS Adj. R-squared: 0.559 Method: Least Squares F-statistic: 2768. Date: Sat, 11 Apr 2020 Prob (F-statistic): 0.00 Time: 20:21:47 Log-Likelihood: -7881.7 No. Observations: 4361 AIC: 1.577e+04 Df Residuals: 4358 BIC: 1.579e+04 Df Model: 2 Covariance Type: nonrobust ============================================================================== coef std err t P>|t| [0.025 0.975] ------------------------------------------------------------------------------ Intercept -1.9967 0.094 -21.249 0.000 -2.181 -1.813 age 0.1748 0.003 64.631 0.000 0.170 0.180 educ -0.0899 0.006 -15.034 0.000 -0.102 -0.078 ============================================================================== Omnibus: 224.819 Durbin-Watson: 1.884 Prob(Omnibus): 0.000 Jarque-Bera (JB): 840.653 Skew: -0.066 Prob(JB): 2.85e-183 Kurtosis: 5.147 Cond. No. 123. ============================================================================== Warnings: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
df = dataWoo('fertil2')
fert_reg_2 = smf.ols(' children ~ age + educ + electric + 1', data=df).fit()
print(fert_reg_2.summary())
OLS Regression Results ============================================================================== Dep. Variable: children R-squared: 0.562 Model: OLS Adj. R-squared: 0.562 Method: Least Squares F-statistic: 1863. Date: Sat, 11 Apr 2020 Prob (F-statistic): 0.00 Time: 20:21:47 Log-Likelihood: -7863.3 No. Observations: 4358 AIC: 1.573e+04 Df Residuals: 4354 BIC: 1.576e+04 Df Model: 3 Covariance Type: nonrobust ============================================================================== coef std err t P>|t| [0.025 0.975] ------------------------------------------------------------------------------ Intercept -2.0711 0.095 -21.861 0.000 -2.257 -1.885 age 0.1770 0.003 64.855 0.000 0.172 0.182 educ -0.0788 0.006 -12.462 0.000 -0.091 -0.066 electric -0.3618 0.068 -5.317 0.000 -0.495 -0.228 ============================================================================== Omnibus: 224.823 Durbin-Watson: 1.890 Prob(Omnibus): 0.000 Jarque-Bera (JB): 841.796 Skew: -0.065 Prob(JB): 1.61e-183 Kurtosis: 5.149 Cond. No. 126. ============================================================================== Warnings: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.