Introductory Econometrics Using R

Also covered using Python and Stata

library(wooldridge)
library(stargazer)
library(lmtest)
library(car)
options(width=120)

#### Example 8.1. Log wage equation with Heteroskedasticity

marrmale <- (wage1$female ==0 & wage1$married == 1)
marrfem <- (wage1$female ==1 & wage1$married == 1)
singfem <- (wage1$female ==1 & wage1$married == 0)
singmale <- (wage1$female ==0 & wage1$married == 0)
wage1c <- cbind(wage1, marrmale, marrfem, singfem, singmale)
wage_hetr <- lm(lwage ~ marrmale + marrfem + singfem + educ  + exper + expersq + tenure + tenursq + 1, data=wage1c)
yhat <- predict(wage_hetr)
residuals <- resid(wage_hetr)
plot(yhat, residuals, xlab="fitted values", ylab="residuals")

wage_robust <- coeftest(wage_hetr, vcov = hccm )
stargazer(wage_hetr, wage_robust, no.space=TRUE, type="text")
##
## =======================================================
##                             Dependent variable:
##                     -----------------------------------
##                              lwage
##                               OLS           coefficient
##                                                test
##                               (1)               (2)
## -------------------------------------------------------
## marrmale                   0.213***          0.213***
##                             (0.055)           (0.058)
## marrfem                    -0.198***         -0.198***
##                             (0.058)           (0.060)
## singfem                    -0.110**           -0.110*
##                             (0.056)           (0.058)
## educ                       0.079***          0.079***
##                             (0.007)           (0.008)
## exper                      0.027***          0.027***
##                             (0.005)           (0.005)
## expersq                    -0.001***         -0.001***
##                            (0.0001)          (0.0001)
## tenure                     0.029***          0.029***
##                             (0.007)           (0.007)
## tenursq                    -0.001**           -0.001*
##                            (0.0002)          (0.0003)
## Constant                   0.321***          0.321***
##                             (0.100)           (0.112)
## -------------------------------------------------------
## Observations                  526
## R2                           0.461
## Residual Std. Error    0.393 (df = 517)
## F Statistic         55.246*** (df = 8; 517)
## =======================================================
## Note:                       *p<0.1; **p<0.05; ***p<0.01

gpa3b <- subset(gpa3, gpa3$spring==1) gpa_hetr <- lm(cumgpa ~ sat + hsperc + tothrs + female + black + white + 1, data=gpa3b) yhat <- predict(gpa_hetr) residuals <- resid(gpa_hetr) plot(yhat, residuals, xlab="fitted values", ylab="residuals") gpa_robust <- coeftest(gpa_hetr, vcov = hccm) stargazer(gpa_hetr, gpa_robust, no.space=TRUE, type="text") ## ## ======================================================= ## Dependent variable: ## ----------------------------------- ## cumgpa ## OLS coefficient ## test ## (1) (2) ## ------------------------------------------------------- ## sat 0.001*** 0.001*** ## (0.0002) (0.0002) ## hsperc -0.009*** -0.009*** ## (0.001) (0.001) ## tothrs 0.003*** 0.003*** ## (0.001) (0.001) ## female 0.303*** 0.303*** ## (0.059) (0.060) ## black -0.128 -0.128 ## (0.147) (0.128) ## white -0.059 -0.059 ## (0.141) (0.120) ## Constant 1.470*** 1.470*** ## (0.230) (0.229) ## ------------------------------------------------------- ## Observations 366 ## R2 0.401 ## Adjusted R2 0.391 ## Residual Std. Error 0.469 (df = 359) ## F Statistic 39.982*** (df = 6; 359) ## ======================================================= ## Note: *p<0.1; **p<0.05; ***p<0.01 #### Example 8.3. Heteroskedasticity-Robust LM Statistic avgsensq <- (crime1$avgsen)**2
df <- cbind(crime1, avgsensq)
crime_hetr <- lm(narr86 ~ pcnv + avgsen + avgsensq + ptime86 + qemp86 + inc86 + black + hispan + 1, data=df)
yhat <- predict(crime_hetr)
residuals <- resid(crime_hetr)
plot(yhat, residuals, xlab="fitted values", ylab="residuals")

crime_robust <- coeftest(crime_hetr, vcov = hccm)
stargazer(crime_hetr, crime_robust, no.space=TRUE, type="text")
##
## ========================================================
##                             Dependent variable:
##                     ------------------------------------
##                              narr86
##                               OLS            coefficient
##                                                 test
##                               (1)                (2)
## --------------------------------------------------------
## pcnv                       -0.136***          -0.136***
##                             (0.040)            (0.034)
## avgsen                       0.018*            0.018*
##                             (0.010)            (0.010)
## avgsensq                    -0.001*           -0.001**
##                             (0.0003)          (0.0002)
## ptime86                    -0.039***          -0.039***
##                             (0.009)            (0.006)
## qemp86                     -0.051***          -0.051***
##                             (0.014)            (0.014)
## inc86                      -0.001***          -0.001***
##                             (0.0003)          (0.0002)
## black                       0.325***          0.325***
##                             (0.045)            (0.059)
## hispan                      0.193***          0.193***
##                             (0.040)            (0.040)
## Constant                    0.567***          0.567***
##                             (0.036)            (0.040)
## --------------------------------------------------------
## Observations                 2,725
## R2                           0.073
## Residual Std. Error    0.828 (df = 2716)
## F Statistic         26.655*** (df = 8; 2716)
## ========================================================
## Note:                        *p<0.1; **p<0.05; ***p<0.01
##### LM Statistic - not-robust (See Section 5-2)
crime_hetr_r <- lm(narr86 ~ pcnv + ptime86 + qemp86 + inc86 + black + hispan + 1, data=df)

residuals <- resid(crime_hetr_r)

resid_reg <- lm(residuals ~ pcnv + avgsen + avgsensq + ptime86 + qemp86 + inc86 + black + hispan + 1, data=df)
stargazer(crime_hetr_r, resid_reg, no.space=TRUE, type="text")
##
## =================================================================
##                                  Dependent variable:
##                     ---------------------------------------------
##                              narr86               residuals
##                               (1)                    (2)
## -----------------------------------------------------------------
## pcnv                       -0.132***                -0.003
##                             (0.040)                (0.040)
## avgsen                                              0.018*
##                                                    (0.010)
## avgsensq                                           -0.001*
##                                                    (0.0003)
## ptime86                    -0.038***                -0.002
##                             (0.008)                (0.009)
## qemp86                     -0.051***                0.0005
##                             (0.014)                (0.014)
## inc86                      -0.001***               0.00001
##                             (0.0003)               (0.0003)
## black                       0.330***                -0.005
##                             (0.045)                (0.045)
## hispan                      0.195***                -0.002
##                             (0.040)                (0.040)
## Constant                    0.570***                -0.003
##                             (0.036)                (0.036)
## -----------------------------------------------------------------
## Observations                 2,725                  2,725
## R2                           0.072                  0.001
## Residual Std. Error    0.829 (df = 2718)      0.828 (df = 2716)
## F Statistic         34.946*** (df = 6; 2718) 0.432 (df = 8; 2716)
## =================================================================
## Note:                                 *p<0.1; **p<0.05; ***p<0.01
LM <- nobs(resid_reg)*summary(resid_reg)$r.squared LM ## [1] 3.462601 ##### LM Statistic - robust avgsen_reg <- lm(avgsen ~ pcnv + ptime86 + qemp86 + inc86 + black + hispan + 1, data=df) resid_avg <- resid(avgsen_reg)*resid(crime_hetr_r) avgsensq_reg <- lm(avgsensq ~ pcnv + ptime86 + qemp86 + inc86 + black + hispan + 1, data=df) resid_avgsq <- resid(avgsensq_reg)*resid(crime_hetr_r) one <- (df$avgsen>=0)
df2 <- cbind(resid_avg, resid_avgsq, as.data.frame(one))
lm_robust <- lm(one ~ resid_avg + resid_avgsq + 0, data=df2)
stargazer(lm_robust, no.space=TRUE, type="text")
##
## ===============================================
##                         Dependent variable:
##                     ---------------------------
##                                 one
## -----------------------------------------------
## resid_avg                     0.028**
##                               (0.014)
## resid_avgsq                   -0.001*
##                               (0.001)
## -----------------------------------------------
## Observations                   2,725
## R2                             0.001
## Residual Std. Error      1.000 (df = 2723)
## F Statistic            2.000 (df = 2; 2723)
## ===============================================
## Note:               *p<0.1; **p<0.05; ***p<0.01
LM <- nobs(lm_robust)*summary(lm_robust)$r.squared LM ## [1] 3.997085 #### Example 8.4. Heteroskedasticity in Housing Price Equations hprice_hetr <- lm(price ~ lotsize + sqrft + bdrms + 1, data=hprice1) uhat_sq <- resid(hprice_hetr)**2 uhat_sq_reg <- lm(uhat_sq ~ lotsize + sqrft + bdrms + 1, data=hprice1) stargazer(uhat_sq_reg, no.space=TRUE, type="text") ## ## =============================================== ## Dependent variable: ## --------------------------- ## uhat_sq ## ----------------------------------------------- ## lotsize 0.202*** ## (0.071) ## sqrft 1.691 ## (1.464) ## bdrms 1,041.760 ## (996.381) ## Constant -5,522.795* ## (3,259.478) ## ----------------------------------------------- ## Observations 88 ## R2 0.160 ## Adjusted R2 0.130 ## Residual Std. Error 6,616.646 (df = 84) ## F Statistic 5.339*** (df = 3; 84) ## =============================================== ## Note: *p<0.1; **p<0.05; ***p<0.01 LM <- nobs(uhat_sq_reg)*summary(uhat_sq_reg)$r.squared
LM
## [1] 14.09239
hprice_hetr_log <- lm(lprice ~ llotsize + lsqrft + bdrms + 1, data=hprice1)
uhat_sq_log <- resid(hprice_hetr_log)**2
uhat_sq_reg_log <- lm(uhat_sq_log ~ llotsize + lsqrft + bdrms + 1, data=hprice1)
stargazer(uhat_sq_reg_log, no.space=TRUE, type="text")
##
## ===============================================
##                         Dependent variable:
##                     ---------------------------
##                             uhat_sq_log
## -----------------------------------------------
## llotsize                      -0.007
##                               (0.015)
## lsqrft                        -0.063*
##                               (0.037)
## bdrms                          0.017
##                               (0.011)
## Constant                      0.510*
##                               (0.258)
## -----------------------------------------------
## Observations                    88
## R2                             0.048
## Residual Std. Error       0.073 (df = 84)
## F Statistic             1.412 (df = 3; 84)
## ===============================================
## Note:               *p<0.1; **p<0.05; ***p<0.01
LM <- nobs(uhat_sq_reg_log)*summary(uhat_sq_reg_log)$r.squared LM ## [1] 4.223248 #### Example 8.5. Special Form of the White Test yhat <- predict(hprice_hetr_log) yhat_sq <- yhat**2 special_reg <- lm(uhat_sq_log ~ yhat + yhat_sq + 1, data=hprice1) stargazer(special_reg, no.space=TRUE, type="text") ## ## =============================================== ## Dependent variable: ## --------------------------- ## uhat_sq_log ## ----------------------------------------------- ## yhat -1.709 ## (1.163) ## yhat_sq 0.145 ## (0.101) ## Constant 5.047 ## (3.345) ## ----------------------------------------------- ## Observations 88 ## R2 0.039 ## Adjusted R2 0.017 ## Residual Std. Error 0.073 (df = 85) ## F Statistic 1.733 (df = 2; 85) ## =============================================== ## Note: *p<0.1; **p<0.05; ***p<0.01 LM <- nobs(special_reg)*summary(special_reg)$r.squared
LM
## [1] 3.447286

#### Example 8.6. Financial Wealth Equation (& Table 8.2)

k401ksubs <- subset(k401ksubs, k401ksubs$fsize==1) age25sq <- (k401ksubs$age - 25)**2

OLS1 <- lm(nettfa ~ inc + 1, data=k401ksubs)
OLS1R <- coeftest(OLS1, vcov = hccm)
OLS2 <- lm(nettfa ~ inc + age25sq + male + e401k + 1, data=k401ksubs)
OLS2R <- coeftest(OLS2, vcov = hccm)

w <- 1/k401ksubsinc WLS1 <- lm(nettfa ~ inc + 1, weights=w, data=k401ksubs) WLS2 <- lm(nettfa ~ inc + age25sq + male + e401k+ 1, weights=w, data=k401ksubs) WLS3 <- lm(nettfa ~ inc + age25sq + male + e401k+ 1, weights=w, data=k401ksubs) # Table 8.2 stargazer(OLS1R, OLS2R, WLS1, WLS2, WLS3, type="text", column.labels=c("OLS1R", "OLS2R", "WLS1", "WLS2", "WLS3"), keep.stat=c("n","rsq", "adj.rsq"), no.space=TRUE, align = TRUE) ## ## ================================================================== ## Dependent variable: ## ----------------------------------------------------- ## nettfa ## coefficient OLS ## test ## OLS1R OLS2R WLS1 WLS2 WLS3 ## (1) (2) (3) (4) (5) ## ------------------------------------------------------------------ ## inc 0.821*** 0.771*** 0.787*** 0.740*** 0.740*** ## (0.104) (0.100) (0.063) (0.064) (0.064) ## age25sq 0.025*** 0.018*** 0.018*** ## (0.004) (0.002) (0.002) ## male 2.478 1.841 1.841 ## (2.065) (1.564) (1.564) ## e401k 6.886*** 5.188*** 5.188*** ## (2.292) (1.703) (1.703) ## Constant -10.571*** -20.985*** -9.581*** -16.703*** -16.703*** ## (2.551) (3.520) (1.653) (1.958) (1.958) ## ------------------------------------------------------------------ ## Observations 2,017 2,017 2,017 ## R2 0.071 0.112 0.112 ## Adjusted R2 0.070 0.110 0.110 ## ================================================================== ## Note: *p<0.1; **p<0.05; ***p<0.01 #### Example 8.7. Demand for Cigarettes OLS_smk <- lm(cigs ~ lincome + lcigpric + educ + age + agesq + restaurn + 1, data=smoke) stargazer(OLS_smk, no.space=TRUE, single.row = TRUE, type="text") ## ## =============================================== ## Dependent variable: ## --------------------------- ## cigs ## ----------------------------------------------- ## lincome 0.880 (0.728) ## lcigpric -0.751 (5.773) ## educ -0.501*** (0.167) ## age 0.771*** (0.160) ## agesq -0.009*** (0.002) ## restaurn -2.825** (1.112) ## Constant -3.640 (24.079) ## ----------------------------------------------- ## Observations 807 ## R2 0.053 ## Adjusted R2 0.046 ## Residual Std. Error 13.405 (df = 800) ## F Statistic 7.423*** (df = 6; 800) ## =============================================== ## Note: *p<0.1; **p<0.05; ***p<0.01 luhat_sq <- log(resid(OLS_smk)**2) luhat_sq_reg <- lm(luhat_sq ~ lincome + lcigpric + educ + age + agesq + restaurn + 1, data=smoke) luhat_sq_hat <- exp(predict(luhat_sq_reg)) w = 1/luhat_sq_hat GLS_smk <- lm(cigs ~ lincome + lcigpric + educ + age + agesq + restaurn + 1, weights = w, data=smoke) stargazer(OLS_smk, GLS_smk, column.labels=c("OLS","GLS"), no.space=TRUE, type="text") ## ## =========================================================== ## Dependent variable: ## ---------------------------- ## cigs ## OLS GLS ## (1) (2) ## ----------------------------------------------------------- ## lincome 0.880 1.295*** ## (0.728) (0.437) ## lcigpric -0.751 -2.940 ## (5.773) (4.460) ## educ -0.501*** -0.463*** ## (0.167) (0.120) ## age 0.771*** 0.482*** ## (0.160) (0.097) ## agesq -0.009*** -0.006*** ## (0.002) (0.001) ## restaurn -2.825** -3.461*** ## (1.112) (0.796) ## Constant -3.640 5.635 ## (24.079) (17.803) ## ----------------------------------------------------------- ## Observations 807 807 ## R2 0.053 0.113 ## Adjusted R2 0.046 0.107 ## Residual Std. Error (df = 800) 13.405 1.579 ## F Statistic (df = 6; 800) 7.423*** 17.055*** ## =========================================================== ## Note: *p<0.1; **p<0.05; ***p<0.01 #### Example 8.8. Labor Force Participation of Married Women mroz_hetr <- lm(inlf ~ nwifeinc + educ + exper + expersq + age + kidslt6 + kidsge6 + 1, data=mroz) mroz_robust <- coeftest(mroz_hetr, vcov = hccm) stargazer(mroz_hetr, mroz_robust, column.labels=c("Herosced.","Robust"), no.space=TRUE, type="text") ## ## ======================================================= ## Dependent variable: ## ----------------------------------- ## inlf ## OLS coefficient ## test ## Herosced. Robust ## (1) (2) ## ------------------------------------------------------- ## nwifeinc -0.003** -0.003** ## (0.001) (0.002) ## educ 0.038*** 0.038*** ## (0.007) (0.007) ## exper 0.039*** 0.039*** ## (0.006) (0.006) ## expersq -0.001*** -0.001*** ## (0.0002) (0.0002) ## age -0.016*** -0.016*** ## (0.002) (0.002) ## kidslt6 -0.262*** -0.262*** ## (0.034) (0.032) ## kidsge6 0.013 0.013 ## (0.013) (0.014) ## Constant 0.586*** 0.586*** ## (0.154) (0.154) ## ------------------------------------------------------- ## Observations 753 ## R2 0.264 ## Adjusted R2 0.257 ## Residual Std. Error 0.427 (df = 745) ## F Statistic 38.218*** (df = 7; 745) ## ======================================================= ## Note: *p<0.1; **p<0.05; ***p<0.01 #### Example 8.9. Determinants of Personal Computer Ownership parcoll <- (gpa1fathcoll==1 | gpa1\$mothcoll==1)
gpa1 <- cbind(gpa1, parcoll)
gpa_hetr <- lm(PC ~ hsGPA + ACT + parcoll  + 1, data=gpa1)
gpa_robust <- coeftest(gpa_hetr, vcov = hccm)

w <- 1/(predict(gpa_hetr) - (predict(gpa_hetr)**2))
gpa_gls = lm(PC ~ hsGPA + ACT + parcoll  + 1, weights = w, data=gpa1)

stargazer(gpa_hetr, gpa_robust, gpa_gls, column.labels=c("Herosced.","Robust", "GLS"), no.space=TRUE, type="text")
##
## ============================================================
##                                     Dependent variable:
##                                -----------------------------
##                                   PC                   PC
##                                   OLS    coefficient   OLS
##                                             test
##                                Herosced.   Robust      GLS
##                                   (1)        (2)       (3)
## ------------------------------------------------------------
## hsGPA                            0.065      0.065     0.033
##                                 (0.137)    (0.148)   (0.130)
## ACT                              0.001      0.001     0.004
##                                 (0.015)    (0.017)   (0.015)
## parcoll                         0.221**    0.221**   0.215**
##                                 (0.093)    (0.090)   (0.086)
## Constant                        -0.0004    -0.0004    0.026
##                                 (0.491)    (0.512)   (0.477)
## ------------------------------------------------------------
## Observations                      141                  141
## R2                               0.042                0.046
## Note:                            *p<0.1; **p<0.05; ***p<0.01