Introductory Econometrics Using R

Also covered using Python and Stata

library(wooldridge)
library(psych)
library(stargazer)
library(car)

#### Table 6.1 Determinants of College GPA

bwght_ols1 = lm(bwght  ~ cigs  + faminc + 1, data=bwght)
bwght_ols2 = lm(bwghtlbs  ~  cigs   + faminc + 1, data=bwght)
bwght_ols3 = lm( bwght ~ packs + faminc + 1, data=bwght)
stargazer(bwght_ols1, bwght_ols2, bwght_ols3,  type="text", align=TRUE)
##
## ===============================================================
##                                       Dependent variable:
##                                 -------------------------------
##                                   bwght    bwghtlbs    bwght
##                                    (1)        (2)       (3)
## ---------------------------------------------------------------
## cigs                            -0.463***  -0.029***
##                                  (0.092)    (0.006)
##
## packs                                                -9.268***
##                                                       (1.832)
##
## faminc                           0.093***  0.006***   0.093***
##                                  (0.029)    (0.002)   (0.029)
##
## Constant                        116.974*** 7.311***  116.974***
##                                  (1.049)    (0.066)   (1.049)
##
## ---------------------------------------------------------------
## Observations                      1,388      1,388     1,388
## R2                                0.030      0.030     0.030
## Adjusted R2                       0.028      0.028     0.028
## Residual Std. Error (df = 1385)   20.063     1.254     20.063
## F Statistic (df = 2; 1385)      21.274***  21.274*** 21.274***
## ===============================================================
## Note:                               *p<0.1; **p<0.05; ***p<0.01

#### Example6.1. Effects of pollution on housing prices

hprice_scores= lm(zprice ~ znox + zcrime + zrooms + zdist + zstratio + 1, data=hprice2std)
stargazer(hprice_scores,  type="text", align=TRUE)
##
## ===============================================
##                         Dependent variable:
##                     ---------------------------
##                               zprice
## -----------------------------------------------
## znox                         -0.340***
##                               (0.045)
##
## zcrime                       -0.143***
##                               (0.031)
##
## zrooms                       0.514***
##                               (0.030)
##
## zdist                        -0.235***
##                               (0.043)
##
## zstratio                     -0.270***
##                               (0.030)
##
## Constant                       0.000
##                               (0.027)
##
## -----------------------------------------------
## Observations                    506
## R2                             0.636
## Residual Std. Error      0.607 (df = 500)
## F Statistic          174.473*** (df = 5; 500)
## ===============================================
## Note:               *p<0.1; **p<0.05; ***p<0.01

ACTsq <- (attend$ACT)**2 priGPA_atndrte <- (attend$priGPA)*(attendatndrte) attend <- cbind(attend, priGPAsq, ACTsq, priGPA_atndrte) attned_mols = lm(stndfnl ~ atndrte + priGPA + ACT + priGPAsq + ACTsq + priGPA_atndrte + 1, data=attend) stargazer(attned_mols, type="text", align=TRUE) ## ## =============================================== ## Dependent variable: ## --------------------------- ## stndfnl ## ----------------------------------------------- ## atndrte -0.007 ## (0.010) ## ## priGPA -1.629*** ## (0.481) ## ## ACT -0.128 ## (0.098) ## ## priGPAsq 0.296*** ## (0.101) ## ## ACTsq 0.005** ## (0.002) ## ## priGPA_atndrte 0.006 ## (0.004) ## ## Constant 2.050 ## (1.360) ## ## ----------------------------------------------- ## Observations 680 ## R2 0.229 ## Adjusted R2 0.222 ## Residual Std. Error 0.873 (df = 673) ## F Statistic 33.250*** (df = 6; 673) ## =============================================== ## Note: *p<0.1; **p<0.05; ***p<0.01 #### Example 6.4. CEO compensation and frim perfromance salary_lin = lm(salary ~ sales + roe + 1, data=ceosal1) salary_log = lm(lsalary ~ lsales + roe + 1, data=ceosal1) stargazer(salary_lin, salary_log, type="text", align=TRUE) ## ## =========================================================== ## Dependent variable: ## ---------------------------- ## salary lsalary ## (1) (2) ## ----------------------------------------------------------- ## sales 0.016* ## (0.009) ## ## lsales 0.275*** ## (0.033) ## ## roe 19.631* 0.018*** ## (11.077) (0.004) ## ## Constant 830.631*** 4.362*** ## (223.905) (0.294) ## ## ----------------------------------------------------------- ## Observations 209 209 ## R2 0.029 0.282 ## Adjusted R2 0.020 0.275 ## Residual Std. Error (df = 206) 1,358.728 0.482 ## F Statistic (df = 2; 206) 3.095** 40.452*** ## =========================================================== ## Note: *p<0.1; **p<0.05; ***p<0.01 #### Example 6.5. Confidence interval for predicted college GPA hsizesq <- gpa2hsize**2
gpa2 <- cbind(gpa2, hsizesq)
gpa_lin = lm(colgpa ~ sat + hsperc + hsize + hsizesq + 1, data=gpa2)
stargazer(gpa_lin, type="text", align=TRUE)
##
## ===============================================
##                         Dependent variable:
##                     ---------------------------
##                               colgpa
## -----------------------------------------------
## sat                          0.001***
##                              (0.0001)
##
## hsperc                       -0.014***
##                               (0.001)
##
## hsize                        -0.061***
##                               (0.017)
##
## hsizesq                       0.005**
##                               (0.002)
##
## Constant                     1.493***
##                               (0.075)
##
## -----------------------------------------------
## Observations                   4,137
## R2                             0.278
## Residual Std. Error      0.560 (df = 4132)
## F Statistic          398.018*** (df = 4; 4132)
## ===============================================
## Note:               *p<0.1; **p<0.05; ***p<0.01

Prediction

sat0 <- (gpa2$sat - 1200) hsize0 <- (gpa2$hsize - 5)
hsperc0 <- (gpa2$hsperc - 30) hsize0sq <- (hsize0)**2 colgpa <- (gpa2$colgpa)
df <- data.frame(cbind(colgpa, sat0, hsperc0, hsize0, hsize0sq))
gpa_predict <- lm(colgpa ~ sat0 + hsperc0 + hsize0 + hsize0sq, data=df)
stargazer(gpa_predict, type="text", align=TRUE)
##
## ===============================================
##                         Dependent variable:
##                     ---------------------------
##                               colgpa
## -----------------------------------------------
## sat0                         0.001***
##                              (0.0001)
##
## hsperc0                      -0.014***
##                               (0.001)
##
## hsize0                        -0.006
##                               (0.009)
##
## hsize0sq                      0.005**
##                               (0.002)
##
## Constant                     2.700***
##                               (0.020)
##
## -----------------------------------------------
## Observations                   4,137
## R2                             0.278
## Residual Std. Error      0.560 (df = 4132)
## F Statistic          398.018*** (df = 4; 4132)
## ===============================================
## Note:               *p<0.1; **p<0.05; ***p<0.01

#### Example 6.6. Confidence Interval for Future Collage GPA

gpa_lin <- lm(colgpa ~ sat + hsperc + hsize + hsizesq + 1, data=gpa2)
stargazer(gpa_lin, type="text", align=TRUE)
##
## ===============================================
##                         Dependent variable:
##                     ---------------------------
##                               colgpa
## -----------------------------------------------
## sat                          0.001***
##                              (0.0001)
##
## hsperc                       -0.014***
##                               (0.001)
##
## hsize                        -0.061***
##                               (0.017)
##
## hsizesq                       0.005**
##                               (0.002)
##
## Constant                     1.493***
##                               (0.075)
##
## -----------------------------------------------
## Observations                   4,137
## R2                             0.278
## Residual Std. Error      0.560 (df = 4132)
## F Statistic          398.018*** (df = 4; 4132)
## ===============================================
## Note:               *p<0.1; **p<0.05; ***p<0.01
values <- data.frame(sat=c(1200), hsperc=c(30), hsize=c(5), hsizesq=c(25))
predict(gpa_lin, values, interval = "prediction")
##        fit      lwr      upr
## 1 2.700075 1.601749 3.798402

#### Example 6.7. Predicting CEO log(salary)

Step 1

ceo_step1 = lm(lsalary ~ lsales + lmktval + ceoten + 1, data=ceosal2)
stargazer(ceo_step1, type="text", align=TRUE)
##
## ===============================================
##                         Dependent variable:
##                     ---------------------------
##                               lsalary
## -----------------------------------------------
## lsales                       0.163***
##                               (0.039)
##
## lmktval                       0.109**
##                               (0.050)
##
## ceoten                        0.012**
##                               (0.005)
##
## Constant                     4.504***
##                               (0.257)
##
## -----------------------------------------------
## Observations                    177
## R2                             0.318
## Residual Std. Error      0.505 (df = 173)
## F Statistic           26.907*** (df = 3; 173)
## ===============================================
## Note:               *p<0.1; **p<0.05; ***p<0.01

Step 2

euhat<- exp(resid(ceo_step1))
mean(euhat)
## [1] 1.135661
mhat <- exp(predict(ceo_step1))
ceo_step2 <- lm(salary ~ mhat + 0, data=ceosal2)
stargazer(ceo_step2, type="text", align=TRUE)
##
## ===============================================
##                         Dependent variable:
##                     ---------------------------
##                               salary
## -----------------------------------------------
## mhat                         1.117***
##                               (0.047)
##
## -----------------------------------------------
## Observations                    177
## R2                             0.762
## Residual Std. Error     511.870 (df = 176)
## F Statistic          562.392*** (df = 1; 176)
## ===============================================
## Note:               *p<0.1; **p<0.05; ***p<0.01

Step 3

ceo_step3 <- lm(lsalary ~ lsales + lmktval + ceoten + 1, data=ceosal2)
ceo_step3_pred <- ceo_step3$coef[1] + ceo_step3$coef[2]*log(5000) + ceo_step3$coef[3]*log(10000) + ceo_step3$coef[4]*10
ceo_step3_pred
## (Intercept)
##    7.014077

Step 4

salary<-ceosal2$salary ceo_step4 <- lm(salary ~ mhat + 0) ceo_step4_pred <- ceo_step4$coef[1]*exp(7.013)
ceo_step4_pred
##     mhat
## 1240.808

#### Example6.8. Predicting CEO salary

cor(mhat, salary)
## [1] 0.4930322
salary_reg <- lm(salary ~ sales + mktval + ceoten, data=ceosal2)
stargazer(salary_reg, type="text", align=TRUE)
##
## ===============================================
##                         Dependent variable:
##                     ---------------------------
##                               salary
## -----------------------------------------------
## sales                         0.019*
##                               (0.010)
##
## mktval                        0.023**
##                               (0.009)
##
## ceoten                       12.703**
##                               (5.618)
##
## Constant                    613.436***
##                              (65.237)
##
## -----------------------------------------------
## Observations                    177
## R2                             0.201
## Note:               *p<0.1; **p<0.05; ***p<0.01