8 Heteroscedasticity

Also covered using Python and Stata

``````rm(list = ls())
library(wooldridge)
library(stargazer)
library(lmtest)
library(car)
options(width=120)``````

8.1 Example 8.1. Log wage equation with Heteroskedasticity

``````marrmale <- (wage1\$female ==0 & wage1\$married == 1)
marrfem <- (wage1\$female ==1 & wage1\$married == 1)
singfem <- (wage1\$female ==1 & wage1\$married == 0)
singmale <- (wage1\$female ==0 & wage1\$married == 0)
wage1c <- cbind(wage1, marrmale, marrfem, singfem, singmale)
wage_hetr <- lm(lwage ~ marrmale + marrfem + singfem + educ  + exper + expersq + tenure + tenursq + 1, data=wage1c)
yhat <- predict(wage_hetr)
residuals <- resid(wage_hetr)
plot(yhat, residuals, xlab="fitted values", ylab="residuals")``````

``````wage_robust <- coeftest(wage_hetr, vcov = hccm )
stargazer(wage_hetr, wage_robust, no.space=TRUE, type="text")``````
``````##
## =======================================================
##                             Dependent variable:
##                     -----------------------------------
##                              lwage
##                               OLS           coefficient
##                                                test
##                               (1)               (2)
## -------------------------------------------------------
## marrmale                   0.213***          0.213***
##                             (0.055)           (0.058)
## marrfem                    -0.198***         -0.198***
##                             (0.058)           (0.060)
## singfem                    -0.110**           -0.110*
##                             (0.056)           (0.058)
## educ                       0.079***          0.079***
##                             (0.007)           (0.008)
## exper                      0.027***          0.027***
##                             (0.005)           (0.005)
## expersq                    -0.001***         -0.001***
##                            (0.0001)          (0.0001)
## tenure                     0.029***          0.029***
##                             (0.007)           (0.007)
## tenursq                    -0.001**           -0.001*
##                            (0.0002)          (0.0003)
## Constant                   0.321***          0.321***
##                             (0.100)           (0.112)
## -------------------------------------------------------
## Observations                  526
## R2                           0.461
## Residual Std. Error    0.393 (df = 517)
## F Statistic         55.246*** (df = 8; 517)
## =======================================================
## Note:                       *p<0.1; **p<0.05; ***p<0.01``````

8.2 Example 8.2. Heteroskedasticity-Robust F Statistic

``````gpa3b <- subset(gpa3, gpa3\$spring==1)
gpa_hetr <- lm(cumgpa  ~ sat + hsperc + tothrs + female + black + white + 1, data=gpa3b)
yhat <- predict(gpa_hetr)
residuals <- resid(gpa_hetr)
plot(yhat, residuals, xlab="fitted values", ylab="residuals")``````

``````gpa_robust <- coeftest(gpa_hetr, vcov = hccm)
stargazer(gpa_hetr, gpa_robust, no.space=TRUE, type="text")``````
``````##
## =======================================================
##                             Dependent variable:
##                     -----------------------------------
##                             cumgpa
##                               OLS           coefficient
##                                                test
##                               (1)               (2)
## -------------------------------------------------------
## sat                        0.001***          0.001***
##                            (0.0002)          (0.0002)
## hsperc                     -0.009***         -0.009***
##                             (0.001)           (0.001)
## tothrs                     0.003***          0.003***
##                             (0.001)           (0.001)
## female                     0.303***          0.303***
##                             (0.059)           (0.060)
## black                       -0.128            -0.128
##                             (0.147)           (0.128)
## white                       -0.059            -0.059
##                             (0.141)           (0.120)
## Constant                   1.470***          1.470***
##                             (0.230)           (0.229)
## -------------------------------------------------------
## Observations                  366
## R2                           0.401
## Residual Std. Error    0.469 (df = 359)
## F Statistic         39.982*** (df = 6; 359)
## =======================================================
## Note:                       *p<0.1; **p<0.05; ***p<0.01``````

8.3 Example 8.3. Heteroskedasticity-Robust LM Statistic

``````avgsensq <- (crime1\$avgsen)**2
df <- cbind(crime1, avgsensq)
crime_hetr <- lm(narr86 ~ pcnv + avgsen + avgsensq + ptime86 + qemp86 + inc86 + black + hispan + 1, data=df)
yhat <- predict(crime_hetr)
residuals <- resid(crime_hetr)
plot(yhat, residuals, xlab="fitted values", ylab="residuals")``````

``````crime_robust <- coeftest(crime_hetr, vcov = hccm)
stargazer(crime_hetr, crime_robust, no.space=TRUE, type="text")``````
``````##
## ========================================================
##                             Dependent variable:
##                     ------------------------------------
##                              narr86
##                               OLS            coefficient
##                                                 test
##                               (1)                (2)
## --------------------------------------------------------
## pcnv                       -0.136***          -0.136***
##                             (0.040)            (0.034)
## avgsen                       0.018*            0.018*
##                             (0.010)            (0.010)
## avgsensq                    -0.001*           -0.001**
##                             (0.0003)          (0.0002)
## ptime86                    -0.039***          -0.039***
##                             (0.009)            (0.006)
## qemp86                     -0.051***          -0.051***
##                             (0.014)            (0.014)
## inc86                      -0.001***          -0.001***
##                             (0.0003)          (0.0002)
## black                       0.325***          0.325***
##                             (0.045)            (0.059)
## hispan                      0.193***          0.193***
##                             (0.040)            (0.040)
## Constant                    0.567***          0.567***
##                             (0.036)            (0.040)
## --------------------------------------------------------
## Observations                 2,725
## R2                           0.073
## Residual Std. Error    0.828 (df = 2716)
## F Statistic         26.655*** (df = 8; 2716)
## ========================================================
## Note:                        *p<0.1; **p<0.05; ***p<0.01``````

8.3.1 LM Statistic - not-robust (See Section 5-2)

``````crime_hetr_r <- lm(narr86 ~ pcnv + ptime86 + qemp86 + inc86 + black + hispan + 1, data=df)

residuals <- resid(crime_hetr_r)

resid_reg <- lm(residuals ~ pcnv + avgsen + avgsensq + ptime86 + qemp86 + inc86 + black + hispan + 1, data=df)
stargazer(crime_hetr_r, resid_reg, no.space=TRUE, type="text")``````
``````##
## =================================================================
##                                  Dependent variable:
##                     ---------------------------------------------
##                              narr86               residuals
##                               (1)                    (2)
## -----------------------------------------------------------------
## pcnv                       -0.132***                -0.003
##                             (0.040)                (0.040)
## avgsen                                              0.018*
##                                                    (0.010)
## avgsensq                                           -0.001*
##                                                    (0.0003)
## ptime86                    -0.038***                -0.002
##                             (0.008)                (0.009)
## qemp86                     -0.051***                0.0005
##                             (0.014)                (0.014)
## inc86                      -0.001***               0.00001
##                             (0.0003)               (0.0003)
## black                       0.330***                -0.005
##                             (0.045)                (0.045)
## hispan                      0.195***                -0.002
##                             (0.040)                (0.040)
## Constant                    0.570***                -0.003
##                             (0.036)                (0.036)
## -----------------------------------------------------------------
## Observations                 2,725                  2,725
## R2                           0.072                  0.001
## Residual Std. Error    0.829 (df = 2718)      0.828 (df = 2716)
## F Statistic         34.946*** (df = 6; 2718) 0.432 (df = 8; 2716)
## =================================================================
## Note:                                 *p<0.1; **p<0.05; ***p<0.01``````
``````LM <- nobs(resid_reg)*summary(resid_reg)\$r.squared
LM``````
``## [1] 3.462601``

8.3.2 LM Statistic - robust

``````avgsen_reg <- lm(avgsen ~ pcnv + ptime86 + qemp86 + inc86 + black + hispan + 1, data=df)
resid_avg <- resid(avgsen_reg)*resid(crime_hetr_r)
avgsensq_reg <- lm(avgsensq ~ pcnv + ptime86 + qemp86 + inc86 + black + hispan + 1, data=df)
resid_avgsq <- resid(avgsensq_reg)*resid(crime_hetr_r)

one <- (df\$avgsen>=0)
df2 <- cbind(resid_avg, resid_avgsq, as.data.frame(one))
lm_robust <- lm(one ~ resid_avg + resid_avgsq + 0, data=df2)
stargazer(lm_robust, no.space=TRUE, type="text")``````
``````##
## ===============================================
##                         Dependent variable:
##                     ---------------------------
##                                 one
## -----------------------------------------------
## resid_avg                     0.028**
##                               (0.014)
## resid_avgsq                   -0.001*
##                               (0.001)
## -----------------------------------------------
## Observations                   2,725
## R2                             0.001
## Residual Std. Error      1.000 (df = 2723)
## F Statistic            2.000 (df = 2; 2723)
## ===============================================
## Note:               *p<0.1; **p<0.05; ***p<0.01``````
``````LM <- nobs(lm_robust)*summary(lm_robust)\$r.squared
LM``````
``## [1] 3.997085``

8.4 Example 8.4. Heteroskedasticity in Housing Price Equations

``````hprice_hetr <- lm(price ~ lotsize + sqrft + bdrms + 1, data=hprice1)
uhat_sq <- resid(hprice_hetr)**2
uhat_sq_reg <- lm(uhat_sq ~ lotsize + sqrft + bdrms + 1, data=hprice1)
stargazer(uhat_sq_reg, no.space=TRUE, type="text")``````
``````##
## ===============================================
##                         Dependent variable:
##                     ---------------------------
##                               uhat_sq
## -----------------------------------------------
## lotsize                      0.202***
##                               (0.071)
## sqrft                          1.691
##                               (1.464)
## bdrms                        1,041.760
##                              (996.381)
## Constant                    -5,522.795*
##                             (3,259.478)
## -----------------------------------------------
## Observations                    88
## R2                             0.160
## Residual Std. Error     6,616.646 (df = 84)
## F Statistic            5.339*** (df = 3; 84)
## ===============================================
## Note:               *p<0.1; **p<0.05; ***p<0.01``````
``````LM <- nobs(uhat_sq_reg)*summary(uhat_sq_reg)\$r.squared
LM``````
``## [1] 14.09239``
``````hprice_hetr_log <- lm(lprice ~ llotsize + lsqrft + bdrms + 1, data=hprice1)
uhat_sq_log <- resid(hprice_hetr_log)**2
uhat_sq_reg_log <- lm(uhat_sq_log ~ llotsize + lsqrft + bdrms + 1, data=hprice1)
stargazer(uhat_sq_reg_log, no.space=TRUE, type="text")``````
``````##
## ===============================================
##                         Dependent variable:
##                     ---------------------------
##                             uhat_sq_log
## -----------------------------------------------
## llotsize                      -0.007
##                               (0.015)
## lsqrft                        -0.063*
##                               (0.037)
## bdrms                          0.017
##                               (0.011)
## Constant                      0.510*
##                               (0.258)
## -----------------------------------------------
## Observations                    88
## R2                             0.048
## Residual Std. Error       0.073 (df = 84)
## F Statistic             1.412 (df = 3; 84)
## ===============================================
## Note:               *p<0.1; **p<0.05; ***p<0.01``````
``````LM <- nobs(uhat_sq_reg_log)*summary(uhat_sq_reg_log)\$r.squared
LM``````
``## [1] 4.223248``

8.5 Example 8.5. Special Form of the White Test

``````yhat <- predict(hprice_hetr_log)
yhat_sq <- yhat**2
special_reg <- lm(uhat_sq_log ~ yhat + yhat_sq + 1, data=hprice1)
stargazer(special_reg, no.space=TRUE, type="text")``````
``````##
## ===============================================
##                         Dependent variable:
##                     ---------------------------
##                             uhat_sq_log
## -----------------------------------------------
## yhat                          -1.709
##                               (1.163)
## yhat_sq                        0.145
##                               (0.101)
## Constant                       5.047
##                               (3.345)
## -----------------------------------------------
## Observations                    88
## R2                             0.039
## Residual Std. Error       0.073 (df = 85)
## F Statistic             1.733 (df = 2; 85)
## ===============================================
## Note:               *p<0.1; **p<0.05; ***p<0.01``````
``````LM <- nobs(special_reg)*summary(special_reg)\$r.squared
LM``````
``## [1] 3.447286``

8.6 Example 8.6. Financial Wealth Equation (& Table 8.2)

``````k401ksubs <- subset(k401ksubs, k401ksubs\$fsize==1)
age25sq <- (k401ksubs\$age - 25)**2

OLS1 <- lm(nettfa ~ inc + 1, data=k401ksubs)
OLS1R <- coeftest(OLS1, vcov = hccm)
OLS2 <- lm(nettfa ~ inc + age25sq + male + e401k + 1, data=k401ksubs)
OLS2R <- coeftest(OLS2, vcov = hccm)

w <- 1/k401ksubs\$inc
WLS1 <- lm(nettfa ~ inc + 1, weights=w, data=k401ksubs)
WLS2 <- lm(nettfa ~ inc + age25sq + male + e401k+ 1, weights=w, data=k401ksubs)
WLS3 <- lm(nettfa ~ inc + age25sq + male + e401k+ 1, weights=w, data=k401ksubs) # Table 8.2

stargazer(OLS1R, OLS2R, WLS1, WLS2, WLS3, type="text", column.labels=c("OLS1R", "OLS2R", "WLS1", "WLS2", "WLS3"), keep.stat=c("n","rsq", "adj.rsq"), no.space=TRUE, align = TRUE)``````
``````##
## ==================================================================
##                               Dependent variable:
##              -----------------------------------------------------
##                                                nettfa
##                   coefficient                    OLS
##                      test
##                OLS1R      OLS2R      WLS1       WLS2       WLS3
##                 (1)        (2)        (3)       (4)        (5)
## ------------------------------------------------------------------
## inc           0.821***   0.771***  0.787***   0.740***   0.740***
##               (0.104)    (0.100)    (0.063)   (0.064)    (0.064)
## age25sq                  0.025***             0.018***   0.018***
##                          (0.004)              (0.002)    (0.002)
## male                      2.478                1.841      1.841
##                          (2.065)              (1.564)    (1.564)
## e401k                    6.886***             5.188***   5.188***
##                          (2.292)              (1.703)    (1.703)
## Constant     -10.571*** -20.985*** -9.581*** -16.703*** -16.703***
##               (2.551)    (3.520)    (1.653)   (1.958)    (1.958)
## ------------------------------------------------------------------
## Observations                         2,017     2,017      2,017
## R2                                   0.071     0.112      0.112
## Adjusted R2                          0.070     0.110      0.110
## ==================================================================
## Note:                                  *p<0.1; **p<0.05; ***p<0.01``````

8.7 Example 8.7. Demand for Cigarettes

``````OLS_smk <- lm(cigs ~ lincome + lcigpric + educ + age + agesq + restaurn + 1, data=smoke)
stargazer(OLS_smk, no.space=TRUE, single.row = TRUE,  type="text")``````
``````##
## ===============================================
##                         Dependent variable:
##                     ---------------------------
##                                cigs
## -----------------------------------------------
## lincome                    0.880 (0.728)
## lcigpric                  -0.751 (5.773)
## educ                     -0.501*** (0.167)
## age                      0.771*** (0.160)
## agesq                    -0.009*** (0.002)
## restaurn                 -2.825** (1.112)
## Constant                  -3.640 (24.079)
## -----------------------------------------------
## Observations                    807
## R2                             0.053
## Residual Std. Error      13.405 (df = 800)
## F Statistic           7.423*** (df = 6; 800)
## ===============================================
## Note:               *p<0.1; **p<0.05; ***p<0.01``````
``````luhat_sq <- log(resid(OLS_smk)**2)
luhat_sq_reg <- lm(luhat_sq ~ lincome + lcigpric + educ + age + agesq + restaurn + 1, data=smoke)
luhat_sq_hat <- exp(predict(luhat_sq_reg))
w = 1/luhat_sq_hat
GLS_smk <- lm(cigs ~ lincome + lcigpric + educ + age + agesq + restaurn + 1, weights = w, data=smoke)

stargazer(OLS_smk, GLS_smk, column.labels=c("OLS","GLS"), no.space=TRUE, type="text")``````
``````##
## ===========================================================
##                                    Dependent variable:
##                                ----------------------------
##                                            cigs
##                                     OLS            GLS
##                                     (1)            (2)
## -----------------------------------------------------------
## lincome                            0.880        1.295***
##                                   (0.728)        (0.437)
## lcigpric                           -0.751        -2.940
##                                   (5.773)        (4.460)
## educ                             -0.501***      -0.463***
##                                   (0.167)        (0.120)
## age                               0.771***      0.482***
##                                   (0.160)        (0.097)
## agesq                            -0.009***      -0.006***
##                                   (0.002)        (0.001)
## restaurn                          -2.825**      -3.461***
##                                   (1.112)        (0.796)
## Constant                           -3.640         5.635
##                                   (24.079)      (17.803)
## -----------------------------------------------------------
## Observations                        807            807
## R2                                 0.053          0.113
## Residual Std. Error (df = 800)     13.405         1.579
## F Statistic (df = 6; 800)         7.423***      17.055***
## ===========================================================
## Note:                           *p<0.1; **p<0.05; ***p<0.01``````

8.8 Example 8.8. Labor Force Participation of Married Women

``````mroz_hetr <- lm(inlf ~ nwifeinc + educ + exper + expersq + age + kidslt6 + kidsge6 + 1, data=mroz)
mroz_robust <- coeftest(mroz_hetr, vcov = hccm)
stargazer(mroz_hetr, mroz_robust, column.labels=c("Herosced.","Robust"), no.space=TRUE, type="text")``````
``````##
## =======================================================
##                             Dependent variable:
##                     -----------------------------------
##                              inlf
##                               OLS           coefficient
##                                                test
##                            Herosced.          Robust
##                               (1)               (2)
## -------------------------------------------------------
## nwifeinc                   -0.003**          -0.003**
##                             (0.001)           (0.002)
## educ                       0.038***          0.038***
##                             (0.007)           (0.007)
## exper                      0.039***          0.039***
##                             (0.006)           (0.006)
## expersq                    -0.001***         -0.001***
##                            (0.0002)          (0.0002)
## age                        -0.016***         -0.016***
##                             (0.002)           (0.002)
## kidslt6                    -0.262***         -0.262***
##                             (0.034)           (0.032)
## kidsge6                      0.013             0.013
##                             (0.013)           (0.014)
## Constant                   0.586***          0.586***
##                             (0.154)           (0.154)
## -------------------------------------------------------
## Observations                  753
## R2                           0.264
## Residual Std. Error    0.427 (df = 745)
## F Statistic         38.218*** (df = 7; 745)
## =======================================================
## Note:                       *p<0.1; **p<0.05; ***p<0.01``````

8.9 Example 8.9. Determinants of Personal Computer Ownership

``````parcoll <- (gpa1\$fathcoll==1 | gpa1\$mothcoll==1)
gpa1 <- cbind(gpa1, parcoll)
gpa_hetr <- lm(PC ~ hsGPA + ACT + parcoll  + 1, data=gpa1)
gpa_robust <- coeftest(gpa_hetr, vcov = hccm)

w <- 1/(predict(gpa_hetr) - (predict(gpa_hetr)**2))
gpa_gls = lm(PC ~ hsGPA + ACT + parcoll  + 1, weights = w, data=gpa1)

stargazer(gpa_hetr, gpa_robust, gpa_gls, column.labels=c("Herosced.","Robust", "GLS"), no.space=TRUE, type="text")``````
``````##
## ============================================================
##                                     Dependent variable:
##                                -----------------------------
##                                   PC                   PC
##                                   OLS    coefficient   OLS
##                                             test
##                                Herosced.   Robust      GLS
##                                   (1)        (2)       (3)
## ------------------------------------------------------------
## hsGPA                            0.065      0.065     0.033
##                                 (0.137)    (0.148)   (0.130)
## ACT                              0.001      0.001     0.004
##                                 (0.015)    (0.017)   (0.015)
## parcoll                         0.221**    0.221**   0.215**
##                                 (0.093)    (0.090)   (0.086)
## Constant                        -0.0004    -0.0004    0.026
##                                 (0.491)    (0.512)   (0.477)
## ------------------------------------------------------------
## Observations                      141                  141
## R2                               0.042                0.046