6 Multiple Regression Analysis: Further Analysis
Also covered using Python and Stata
6.1 Table 6.1 Determinants of College GPA
bwght_ols1 = lm(bwght  ~ cigs  + faminc + 1, data=bwght)
bwght_ols2 = lm(bwghtlbs  ~  cigs   + faminc + 1, data=bwght)
bwght_ols3 = lm( bwght ~ packs + faminc + 1, data=bwght)
stargazer(bwght_ols1, bwght_ols2, bwght_ols3,  type="text", align=TRUE)## 
## ===============================================================
##                                       Dependent variable:      
##                                 -------------------------------
##                                   bwght    bwghtlbs    bwght   
##                                    (1)        (2)       (3)    
## ---------------------------------------------------------------
## cigs                            -0.463***  -0.029***           
##                                  (0.092)    (0.006)            
##                                                                
## packs                                                -9.268*** 
##                                                       (1.832)  
##                                                                
## faminc                           0.093***  0.006***   0.093*** 
##                                  (0.029)    (0.002)   (0.029)  
##                                                                
## Constant                        116.974*** 7.311***  116.974***
##                                  (1.049)    (0.066)   (1.049)  
##                                                                
## ---------------------------------------------------------------
## Observations                      1,388      1,388     1,388   
## R2                                0.030      0.030     0.030   
## Adjusted R2                       0.028      0.028     0.028   
## Residual Std. Error (df = 1385)   20.063     1.254     20.063  
## F Statistic (df = 2; 1385)      21.274***  21.274*** 21.274*** 
## ===============================================================
## Note:                               *p<0.1; **p<0.05; ***p<0.016.2 Example 6.1. Effects of pollution on housing prices
hprice_scores= lm(zprice ~ znox + zcrime + zrooms + zdist + zstratio + 1, data=hprice2std)
stargazer(hprice_scores,  type="text", align=TRUE)## 
## ===============================================
##                         Dependent variable:    
##                     ---------------------------
##                               zprice           
## -----------------------------------------------
## znox                         -0.340***         
##                               (0.045)          
##                                                
## zcrime                       -0.143***         
##                               (0.031)          
##                                                
## zrooms                       0.514***          
##                               (0.030)          
##                                                
## zdist                        -0.235***         
##                               (0.043)          
##                                                
## zstratio                     -0.270***         
##                               (0.030)          
##                                                
## Constant                       0.000           
##                               (0.027)          
##                                                
## -----------------------------------------------
## Observations                    506            
## R2                             0.636           
## Adjusted R2                    0.632           
## Residual Std. Error      0.607 (df = 500)      
## F Statistic          174.473*** (df = 5; 500)  
## ===============================================
## Note:               *p<0.1; **p<0.05; ***p<0.016.3 Compare with the result in Example 4.5
ldist <- log(hprice2$dist)
hprice_mols <- lm(lprice ~ lnox + ldist + rooms + stratio + 1, data=hprice2)
stargazer(hprice_mols,  type="text", align=TRUE)## 
## ===============================================
##                         Dependent variable:    
##                     ---------------------------
##                               lprice           
## -----------------------------------------------
## lnox                         -0.954***         
##                               (0.117)          
##                                                
## ldist                        -0.134***         
##                               (0.043)          
##                                                
## rooms                        0.255***          
##                               (0.019)          
##                                                
## stratio                      -0.052***         
##                               (0.006)          
##                                                
## Constant                     11.084***         
##                               (0.318)          
##                                                
## -----------------------------------------------
## Observations                    506            
## R2                             0.584           
## Adjusted R2                    0.581           
## Residual Std. Error      0.265 (df = 501)      
## F Statistic          175.855*** (df = 4; 501)  
## ===============================================
## Note:               *p<0.1; **p<0.05; ***p<0.016.4 Equation (6.7)
hprice_scores3 <- lm(lprice ~ lnox + rooms + 1, data=hprice2)
stargazer(hprice_scores3, type="text", align=TRUE)## 
## ===============================================
##                         Dependent variable:    
##                     ---------------------------
##                               lprice           
## -----------------------------------------------
## lnox                         -0.718***         
##                               (0.066)          
##                                                
## rooms                        0.306***          
##                               (0.019)          
##                                                
## Constant                     9.234***          
##                               (0.188)          
##                                                
## -----------------------------------------------
## Observations                    506            
## R2                             0.514           
## Adjusted R2                    0.512           
## Residual Std. Error      0.286 (df = 503)      
## F Statistic          265.689*** (df = 2; 503)  
## ===============================================
## Note:               *p<0.1; **p<0.05; ***p<0.016.5 Equation (6.12)
## 
## ===============================================
##                         Dependent variable:    
##                     ---------------------------
##                                wage            
## -----------------------------------------------
## exper                        0.298***          
##                               (0.041)          
##                                                
## expersq                      -0.006***         
##                               (0.001)          
##                                                
## Constant                     3.725***          
##                               (0.346)          
##                                                
## -----------------------------------------------
## Observations                    526            
## R2                             0.093           
## Adjusted R2                    0.089           
## Residual Std. Error      3.524 (df = 523)      
## F Statistic           26.740*** (df = 2; 523)  
## ===============================================
## Note:               *p<0.1; **p<0.05; ***p<0.016.6 Example 6.2. Effects of pollution on housing prices
ldist <- log(hprice2$dist)
roomsq <- (hprice2$rooms)**2
hprice2b <- cbind(hprice2, ldist, roomsq)
hprice_roomsq <- lm(lprice ~ lnox + ldist + rooms + roomsq + stratio + 1, data = hprice2b)
stargazer(hprice_roomsq,  type="text", align=TRUE)## 
## ===============================================
##                         Dependent variable:    
##                     ---------------------------
##                               lprice           
## -----------------------------------------------
## lnox                         -0.902***         
##                               (0.115)          
##                                                
## ldist                        -0.087**          
##                               (0.043)          
##                                                
## rooms                        -0.545***         
##                               (0.165)          
##                                                
## roomsq                       0.062***          
##                               (0.013)          
##                                                
## stratio                      -0.048***         
##                               (0.006)          
##                                                
## Constant                     13.385***         
##                               (0.566)          
##                                                
## -----------------------------------------------
## Observations                    506            
## R2                             0.603           
## Adjusted R2                    0.599           
## Residual Std. Error      0.259 (df = 500)      
## F Statistic          151.770*** (df = 5; 500)  
## ===============================================
## Note:               *p<0.1; **p<0.05; ***p<0.016.7 Example 6.3. Effects of attendance on final exam performance
priGPAsq <- (attend$priGPA)**2
ACTsq <- (attend$ACT)**2
priGPA_atndrte <- (attend$priGPA)*(attend$atndrte)
attend <- cbind(attend, priGPAsq, ACTsq, priGPA_atndrte)
attned_mols = lm(stndfnl ~ atndrte + priGPA + ACT + priGPAsq + ACTsq 
                      + priGPA_atndrte + 1, data=attend)
stargazer(attned_mols,  type="text", align=TRUE)## 
## ===============================================
##                         Dependent variable:    
##                     ---------------------------
##                               stndfnl          
## -----------------------------------------------
## atndrte                       -0.007           
##                               (0.010)          
##                                                
## priGPA                       -1.629***         
##                               (0.481)          
##                                                
## ACT                           -0.128           
##                               (0.098)          
##                                                
## priGPAsq                     0.296***          
##                               (0.101)          
##                                                
## ACTsq                         0.005**          
##                               (0.002)          
##                                                
## priGPA_atndrte                 0.006           
##                               (0.004)          
##                                                
## Constant                       2.050           
##                               (1.360)          
##                                                
## -----------------------------------------------
## Observations                    680            
## R2                             0.229           
## Adjusted R2                    0.222           
## Residual Std. Error      0.873 (df = 673)      
## F Statistic           33.250*** (df = 6; 673)  
## ===============================================
## Note:               *p<0.1; **p<0.05; ***p<0.016.8 Example 6.4. CEO compensation and frim perfromance
salary_lin = lm(salary ~ sales + roe + 1, data=ceosal1)
salary_log = lm(lsalary ~ lsales + roe + 1, data=ceosal1)
stargazer(salary_lin, salary_log,  type="text", align=TRUE)## 
## ===========================================================
##                                    Dependent variable:     
##                                ----------------------------
##                                    salary        lsalary   
##                                     (1)            (2)     
## -----------------------------------------------------------
## sales                              0.016*                  
##                                   (0.009)                  
##                                                            
## lsales                                          0.275***   
##                                                  (0.033)   
##                                                            
## roe                               19.631*       0.018***   
##                                   (11.077)       (0.004)   
##                                                            
## Constant                         830.631***     4.362***   
##                                  (223.905)       (0.294)   
##                                                            
## -----------------------------------------------------------
## Observations                        209            209     
## R2                                 0.029          0.282    
## Adjusted R2                        0.020          0.275    
## Residual Std. Error (df = 206)   1,358.728        0.482    
## F Statistic (df = 2; 206)         3.095**       40.452***  
## ===========================================================
## Note:                           *p<0.1; **p<0.05; ***p<0.016.9 Example 6.5. Confidence interval for predicted college GPA
hsizesq <- gpa2$hsize**2
gpa2 <- cbind(gpa2, hsizesq)
gpa_lin = lm(colgpa ~ sat + hsperc + hsize + hsizesq + 1, data=gpa2)
stargazer(gpa_lin, type="text", align=TRUE)## 
## ===============================================
##                         Dependent variable:    
##                     ---------------------------
##                               colgpa           
## -----------------------------------------------
## sat                          0.001***          
##                              (0.0001)          
##                                                
## hsperc                       -0.014***         
##                               (0.001)          
##                                                
## hsize                        -0.061***         
##                               (0.017)          
##                                                
## hsizesq                       0.005**          
##                               (0.002)          
##                                                
## Constant                     1.493***          
##                               (0.075)          
##                                                
## -----------------------------------------------
## Observations                   4,137           
## R2                             0.278           
## Adjusted R2                    0.277           
## Residual Std. Error      0.560 (df = 4132)     
## F Statistic          398.018*** (df = 4; 4132) 
## ===============================================
## Note:               *p<0.1; **p<0.05; ***p<0.01Prediction
sat0 <- (gpa2$sat - 1200)
hsize0 <- (gpa2$hsize - 5)
hsperc0 <- (gpa2$hsperc - 30)
hsize0sq <- (hsize0)**2
colgpa <- (gpa2$colgpa)
df <- data.frame(cbind(colgpa, sat0, hsperc0, hsize0, hsize0sq))
gpa_predict <- lm(colgpa ~ sat0 + hsperc0 + hsize0 + hsize0sq, data=df)
stargazer(gpa_predict, type="text", align=TRUE)## 
## ===============================================
##                         Dependent variable:    
##                     ---------------------------
##                               colgpa           
## -----------------------------------------------
## sat0                         0.001***          
##                              (0.0001)          
##                                                
## hsperc0                      -0.014***         
##                               (0.001)          
##                                                
## hsize0                        -0.006           
##                               (0.009)          
##                                                
## hsize0sq                      0.005**          
##                               (0.002)          
##                                                
## Constant                     2.700***          
##                               (0.020)          
##                                                
## -----------------------------------------------
## Observations                   4,137           
## R2                             0.278           
## Adjusted R2                    0.277           
## Residual Std. Error      0.560 (df = 4132)     
## F Statistic          398.018*** (df = 4; 4132) 
## ===============================================
## Note:               *p<0.1; **p<0.05; ***p<0.016.10 Example 6.6. Confidence Interval for Future Collage GPA
gpa_lin <- lm(colgpa ~ sat + hsperc + hsize + hsizesq + 1, data=gpa2)
stargazer(gpa_lin, type="text", align=TRUE)## 
## ===============================================
##                         Dependent variable:    
##                     ---------------------------
##                               colgpa           
## -----------------------------------------------
## sat                          0.001***          
##                              (0.0001)          
##                                                
## hsperc                       -0.014***         
##                               (0.001)          
##                                                
## hsize                        -0.061***         
##                               (0.017)          
##                                                
## hsizesq                       0.005**          
##                               (0.002)          
##                                                
## Constant                     1.493***          
##                               (0.075)          
##                                                
## -----------------------------------------------
## Observations                   4,137           
## R2                             0.278           
## Adjusted R2                    0.277           
## Residual Std. Error      0.560 (df = 4132)     
## F Statistic          398.018*** (df = 4; 4132) 
## ===============================================
## Note:               *p<0.1; **p<0.05; ***p<0.01values <- data.frame(sat=c(1200), hsperc=c(30), hsize=c(5), hsizesq=c(25))
predict(gpa_lin, values, interval = "prediction")##        fit      lwr      upr
## 1 2.700075 1.601749 3.7984026.11 Example 6.7. Predicting CEO log(salary)
Step 1
ceo_step1 = lm(lsalary ~ lsales + lmktval + ceoten + 1, data=ceosal2)
stargazer(ceo_step1, type="text", align=TRUE)## 
## ===============================================
##                         Dependent variable:    
##                     ---------------------------
##                               lsalary          
## -----------------------------------------------
## lsales                       0.163***          
##                               (0.039)          
##                                                
## lmktval                       0.109**          
##                               (0.050)          
##                                                
## ceoten                        0.012**          
##                               (0.005)          
##                                                
## Constant                     4.504***          
##                               (0.257)          
##                                                
## -----------------------------------------------
## Observations                    177            
## R2                             0.318           
## Adjusted R2                    0.306           
## Residual Std. Error      0.505 (df = 173)      
## F Statistic           26.907*** (df = 3; 173)  
## ===============================================
## Note:               *p<0.1; **p<0.05; ***p<0.01Step 2
## [1] 1.135661mhat <- exp(predict(ceo_step1))
ceo_step2 <- lm(salary ~ mhat + 0, data=ceosal2)
stargazer(ceo_step2, type="text", align=TRUE)## 
## ===============================================
##                         Dependent variable:    
##                     ---------------------------
##                               salary           
## -----------------------------------------------
## mhat                         1.117***          
##                               (0.047)          
##                                                
## -----------------------------------------------
## Observations                    177            
## R2                             0.762           
## Adjusted R2                    0.760           
## Residual Std. Error     511.870 (df = 176)     
## F Statistic          562.392*** (df = 1; 176)  
## ===============================================
## Note:               *p<0.1; **p<0.05; ***p<0.01Step 3
ceo_step3 <- lm(lsalary ~ lsales + lmktval + ceoten + 1, data=ceosal2)
ceo_step3_pred <- ceo_step3$coef[1] + ceo_step3$coef[2]*log(5000) + ceo_step3$coef[3]*log(10000) + ceo_step3$coef[4]*10
ceo_step3_pred## (Intercept) 
##    7.014077Step 4
salary<-ceosal2$salary
ceo_step4 <- lm(salary ~ mhat + 0)
ceo_step4_pred <- ceo_step4$coef[1]*exp(7.013)
ceo_step4_pred##     mhat 
## 1240.8086.12 Example 6.8. Predicting CEO salary
## [1] 0.4930322salary_reg <- lm(salary ~ sales + mktval + ceoten, data=ceosal2)
stargazer(salary_reg, type="text", align=TRUE)## 
## ===============================================
##                         Dependent variable:    
##                     ---------------------------
##                               salary           
## -----------------------------------------------
## sales                         0.019*           
##                               (0.010)          
##                                                
## mktval                        0.023**          
##                               (0.009)          
##                                                
## ceoten                       12.703**          
##                               (5.618)          
##                                                
## Constant                    613.436***         
##                              (65.237)          
##                                                
## -----------------------------------------------
## Observations                    177            
## R2                             0.201           
## Adjusted R2                    0.187           
## Residual Std. Error     529.671 (df = 173)     
## F Statistic           14.532*** (df = 3; 173)  
## ===============================================
## Note:               *p<0.1; **p<0.05; ***p<0.01