Also covered using Python and Stata
library(wooldridge)
library(stargazer)
library(lmtest)
library(car)
options(width=120)
marrmale <- (wage1$female ==0 & wage1$married == 1)
marrfem <- (wage1$female ==1 & wage1$married == 1)
singfem <- (wage1$female ==1 & wage1$married == 0)
singmale <- (wage1$female ==0 & wage1$married == 0)
wage1c <- cbind(wage1, marrmale, marrfem, singfem, singmale)
wage_hetr <- lm(lwage ~ marrmale + marrfem + singfem + educ + exper + expersq + tenure + tenursq + 1, data=wage1c)
yhat <- predict(wage_hetr)
residuals <- resid(wage_hetr)
plot(yhat, residuals, xlab="fitted values", ylab="residuals")
wage_robust <- coeftest(wage_hetr, vcov = hccm )
stargazer(wage_hetr, wage_robust, no.space=TRUE, type="text")
##
## =======================================================
## Dependent variable:
## -----------------------------------
## lwage
## OLS coefficient
## test
## (1) (2)
## -------------------------------------------------------
## marrmale 0.213*** 0.213***
## (0.055) (0.058)
## marrfem -0.198*** -0.198***
## (0.058) (0.060)
## singfem -0.110** -0.110*
## (0.056) (0.058)
## educ 0.079*** 0.079***
## (0.007) (0.008)
## exper 0.027*** 0.027***
## (0.005) (0.005)
## expersq -0.001*** -0.001***
## (0.0001) (0.0001)
## tenure 0.029*** 0.029***
## (0.007) (0.007)
## tenursq -0.001** -0.001*
## (0.0002) (0.0003)
## Constant 0.321*** 0.321***
## (0.100) (0.112)
## -------------------------------------------------------
## Observations 526
## R2 0.461
## Adjusted R2 0.453
## Residual Std. Error 0.393 (df = 517)
## F Statistic 55.246*** (df = 8; 517)
## =======================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
gpa3b <- subset(gpa3, gpa3$spring==1)
gpa_hetr <- lm(cumgpa ~ sat + hsperc + tothrs + female + black + white + 1, data=gpa3b)
yhat <- predict(gpa_hetr)
residuals <- resid(gpa_hetr)
plot(yhat, residuals, xlab="fitted values", ylab="residuals")
gpa_robust <- coeftest(gpa_hetr, vcov = hccm)
stargazer(gpa_hetr, gpa_robust, no.space=TRUE, type="text")
##
## =======================================================
## Dependent variable:
## -----------------------------------
## cumgpa
## OLS coefficient
## test
## (1) (2)
## -------------------------------------------------------
## sat 0.001*** 0.001***
## (0.0002) (0.0002)
## hsperc -0.009*** -0.009***
## (0.001) (0.001)
## tothrs 0.003*** 0.003***
## (0.001) (0.001)
## female 0.303*** 0.303***
## (0.059) (0.060)
## black -0.128 -0.128
## (0.147) (0.128)
## white -0.059 -0.059
## (0.141) (0.120)
## Constant 1.470*** 1.470***
## (0.230) (0.229)
## -------------------------------------------------------
## Observations 366
## R2 0.401
## Adjusted R2 0.391
## Residual Std. Error 0.469 (df = 359)
## F Statistic 39.982*** (df = 6; 359)
## =======================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
avgsensq <- (crime1$avgsen)**2
df <- cbind(crime1, avgsensq)
crime_hetr <- lm(narr86 ~ pcnv + avgsen + avgsensq + ptime86 + qemp86 + inc86 + black + hispan + 1, data=df)
yhat <- predict(crime_hetr)
residuals <- resid(crime_hetr)
plot(yhat, residuals, xlab="fitted values", ylab="residuals")
crime_robust <- coeftest(crime_hetr, vcov = hccm)
stargazer(crime_hetr, crime_robust, no.space=TRUE, type="text")
##
## ========================================================
## Dependent variable:
## ------------------------------------
## narr86
## OLS coefficient
## test
## (1) (2)
## --------------------------------------------------------
## pcnv -0.136*** -0.136***
## (0.040) (0.034)
## avgsen 0.018* 0.018*
## (0.010) (0.010)
## avgsensq -0.001* -0.001**
## (0.0003) (0.0002)
## ptime86 -0.039*** -0.039***
## (0.009) (0.006)
## qemp86 -0.051*** -0.051***
## (0.014) (0.014)
## inc86 -0.001*** -0.001***
## (0.0003) (0.0002)
## black 0.325*** 0.325***
## (0.045) (0.059)
## hispan 0.193*** 0.193***
## (0.040) (0.040)
## Constant 0.567*** 0.567***
## (0.036) (0.040)
## --------------------------------------------------------
## Observations 2,725
## R2 0.073
## Adjusted R2 0.070
## Residual Std. Error 0.828 (df = 2716)
## F Statistic 26.655*** (df = 8; 2716)
## ========================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
crime_hetr_r <- lm(narr86 ~ pcnv + ptime86 + qemp86 + inc86 + black + hispan + 1, data=df)
residuals <- resid(crime_hetr_r)
resid_reg <- lm(residuals ~ pcnv + avgsen + avgsensq + ptime86 + qemp86 + inc86 + black + hispan + 1, data=df)
stargazer(crime_hetr_r, resid_reg, no.space=TRUE, type="text")
##
## =================================================================
## Dependent variable:
## ---------------------------------------------
## narr86 residuals
## (1) (2)
## -----------------------------------------------------------------
## pcnv -0.132*** -0.003
## (0.040) (0.040)
## avgsen 0.018*
## (0.010)
## avgsensq -0.001*
## (0.0003)
## ptime86 -0.038*** -0.002
## (0.008) (0.009)
## qemp86 -0.051*** 0.0005
## (0.014) (0.014)
## inc86 -0.001*** 0.00001
## (0.0003) (0.0003)
## black 0.330*** -0.005
## (0.045) (0.045)
## hispan 0.195*** -0.002
## (0.040) (0.040)
## Constant 0.570*** -0.003
## (0.036) (0.036)
## -----------------------------------------------------------------
## Observations 2,725 2,725
## R2 0.072 0.001
## Adjusted R2 0.070 -0.002
## Residual Std. Error 0.829 (df = 2718) 0.828 (df = 2716)
## F Statistic 34.946*** (df = 6; 2718) 0.432 (df = 8; 2716)
## =================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
LM <- nobs(resid_reg)*summary(resid_reg)$r.squared
LM
## [1] 3.462601
avgsen_reg <- lm(avgsen ~ pcnv + ptime86 + qemp86 + inc86 + black + hispan + 1, data=df)
resid_avg <- resid(avgsen_reg)*resid(crime_hetr_r)
avgsensq_reg <- lm(avgsensq ~ pcnv + ptime86 + qemp86 + inc86 + black + hispan + 1, data=df)
resid_avgsq <- resid(avgsensq_reg)*resid(crime_hetr_r)
one <- (df$avgsen>=0)
df2 <- cbind(resid_avg, resid_avgsq, as.data.frame(one))
lm_robust <- lm(one ~ resid_avg + resid_avgsq + 0, data=df2)
stargazer(lm_robust, no.space=TRUE, type="text")
##
## ===============================================
## Dependent variable:
## ---------------------------
## one
## -----------------------------------------------
## resid_avg 0.028**
## (0.014)
## resid_avgsq -0.001*
## (0.001)
## -----------------------------------------------
## Observations 2,725
## R2 0.001
## Adjusted R2 0.001
## Residual Std. Error 1.000 (df = 2723)
## F Statistic 2.000 (df = 2; 2723)
## ===============================================
## Note: *p<0.1; **p<0.05; ***p<0.01
LM <- nobs(lm_robust)*summary(lm_robust)$r.squared
LM
## [1] 3.997085
hprice_hetr <- lm(price ~ lotsize + sqrft + bdrms + 1, data=hprice1)
uhat_sq <- resid(hprice_hetr)**2
uhat_sq_reg <- lm(uhat_sq ~ lotsize + sqrft + bdrms + 1, data=hprice1)
stargazer(uhat_sq_reg, no.space=TRUE, type="text")
##
## ===============================================
## Dependent variable:
## ---------------------------
## uhat_sq
## -----------------------------------------------
## lotsize 0.202***
## (0.071)
## sqrft 1.691
## (1.464)
## bdrms 1,041.760
## (996.381)
## Constant -5,522.795*
## (3,259.478)
## -----------------------------------------------
## Observations 88
## R2 0.160
## Adjusted R2 0.130
## Residual Std. Error 6,616.646 (df = 84)
## F Statistic 5.339*** (df = 3; 84)
## ===============================================
## Note: *p<0.1; **p<0.05; ***p<0.01
LM <- nobs(uhat_sq_reg)*summary(uhat_sq_reg)$r.squared
LM
## [1] 14.09239
hprice_hetr_log <- lm(lprice ~ llotsize + lsqrft + bdrms + 1, data=hprice1)
uhat_sq_log <- resid(hprice_hetr_log)**2
uhat_sq_reg_log <- lm(uhat_sq_log ~ llotsize + lsqrft + bdrms + 1, data=hprice1)
stargazer(uhat_sq_reg_log, no.space=TRUE, type="text")
##
## ===============================================
## Dependent variable:
## ---------------------------
## uhat_sq_log
## -----------------------------------------------
## llotsize -0.007
## (0.015)
## lsqrft -0.063*
## (0.037)
## bdrms 0.017
## (0.011)
## Constant 0.510*
## (0.258)
## -----------------------------------------------
## Observations 88
## R2 0.048
## Adjusted R2 0.014
## Residual Std. Error 0.073 (df = 84)
## F Statistic 1.412 (df = 3; 84)
## ===============================================
## Note: *p<0.1; **p<0.05; ***p<0.01
LM <- nobs(uhat_sq_reg_log)*summary(uhat_sq_reg_log)$r.squared
LM
## [1] 4.223248
yhat <- predict(hprice_hetr_log)
yhat_sq <- yhat**2
special_reg <- lm(uhat_sq_log ~ yhat + yhat_sq + 1, data=hprice1)
stargazer(special_reg, no.space=TRUE, type="text")
##
## ===============================================
## Dependent variable:
## ---------------------------
## uhat_sq_log
## -----------------------------------------------
## yhat -1.709
## (1.163)
## yhat_sq 0.145
## (0.101)
## Constant 5.047
## (3.345)
## -----------------------------------------------
## Observations 88
## R2 0.039
## Adjusted R2 0.017
## Residual Std. Error 0.073 (df = 85)
## F Statistic 1.733 (df = 2; 85)
## ===============================================
## Note: *p<0.1; **p<0.05; ***p<0.01
LM <- nobs(special_reg)*summary(special_reg)$r.squared
LM
## [1] 3.447286
k401ksubs <- subset(k401ksubs, k401ksubs$fsize==1)
age25sq <- (k401ksubs$age - 25)**2
OLS1 <- lm(nettfa ~ inc + 1, data=k401ksubs)
OLS1R <- coeftest(OLS1, vcov = hccm)
OLS2 <- lm(nettfa ~ inc + age25sq + male + e401k + 1, data=k401ksubs)
OLS2R <- coeftest(OLS2, vcov = hccm)
w <- 1/k401ksubs$inc
WLS1 <- lm(nettfa ~ inc + 1, weights=w, data=k401ksubs)
WLS2 <- lm(nettfa ~ inc + age25sq + male + e401k+ 1, weights=w, data=k401ksubs)
WLS3 <- lm(nettfa ~ inc + age25sq + male + e401k+ 1, weights=w, data=k401ksubs) # Table 8.2
stargazer(OLS1R, OLS2R, WLS1, WLS2, WLS3, type="text", column.labels=c("OLS1R", "OLS2R", "WLS1", "WLS2", "WLS3"), keep.stat=c("n","rsq", "adj.rsq"), no.space=TRUE, align = TRUE)
##
## ==================================================================
## Dependent variable:
## -----------------------------------------------------
## nettfa
## coefficient OLS
## test
## OLS1R OLS2R WLS1 WLS2 WLS3
## (1) (2) (3) (4) (5)
## ------------------------------------------------------------------
## inc 0.821*** 0.771*** 0.787*** 0.740*** 0.740***
## (0.104) (0.100) (0.063) (0.064) (0.064)
## age25sq 0.025*** 0.018*** 0.018***
## (0.004) (0.002) (0.002)
## male 2.478 1.841 1.841
## (2.065) (1.564) (1.564)
## e401k 6.886*** 5.188*** 5.188***
## (2.292) (1.703) (1.703)
## Constant -10.571*** -20.985*** -9.581*** -16.703*** -16.703***
## (2.551) (3.520) (1.653) (1.958) (1.958)
## ------------------------------------------------------------------
## Observations 2,017 2,017 2,017
## R2 0.071 0.112 0.112
## Adjusted R2 0.070 0.110 0.110
## ==================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
OLS_smk <- lm(cigs ~ lincome + lcigpric + educ + age + agesq + restaurn + 1, data=smoke)
stargazer(OLS_smk, no.space=TRUE, single.row = TRUE, type="text")
##
## ===============================================
## Dependent variable:
## ---------------------------
## cigs
## -----------------------------------------------
## lincome 0.880 (0.728)
## lcigpric -0.751 (5.773)
## educ -0.501*** (0.167)
## age 0.771*** (0.160)
## agesq -0.009*** (0.002)
## restaurn -2.825** (1.112)
## Constant -3.640 (24.079)
## -----------------------------------------------
## Observations 807
## R2 0.053
## Adjusted R2 0.046
## Residual Std. Error 13.405 (df = 800)
## F Statistic 7.423*** (df = 6; 800)
## ===============================================
## Note: *p<0.1; **p<0.05; ***p<0.01
luhat_sq <- log(resid(OLS_smk)**2)
luhat_sq_reg <- lm(luhat_sq ~ lincome + lcigpric + educ + age + agesq + restaurn + 1, data=smoke)
luhat_sq_hat <- exp(predict(luhat_sq_reg))
w = 1/luhat_sq_hat
GLS_smk <- lm(cigs ~ lincome + lcigpric + educ + age + agesq + restaurn + 1, weights = w, data=smoke)
stargazer(OLS_smk, GLS_smk, column.labels=c("OLS","GLS"), no.space=TRUE, type="text")
##
## ===========================================================
## Dependent variable:
## ----------------------------
## cigs
## OLS GLS
## (1) (2)
## -----------------------------------------------------------
## lincome 0.880 1.295***
## (0.728) (0.437)
## lcigpric -0.751 -2.940
## (5.773) (4.460)
## educ -0.501*** -0.463***
## (0.167) (0.120)
## age 0.771*** 0.482***
## (0.160) (0.097)
## agesq -0.009*** -0.006***
## (0.002) (0.001)
## restaurn -2.825** -3.461***
## (1.112) (0.796)
## Constant -3.640 5.635
## (24.079) (17.803)
## -----------------------------------------------------------
## Observations 807 807
## R2 0.053 0.113
## Adjusted R2 0.046 0.107
## Residual Std. Error (df = 800) 13.405 1.579
## F Statistic (df = 6; 800) 7.423*** 17.055***
## ===========================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
mroz_hetr <- lm(inlf ~ nwifeinc + educ + exper + expersq + age + kidslt6 + kidsge6 + 1, data=mroz)
mroz_robust <- coeftest(mroz_hetr, vcov = hccm)
stargazer(mroz_hetr, mroz_robust, column.labels=c("Herosced.","Robust"), no.space=TRUE, type="text")
##
## =======================================================
## Dependent variable:
## -----------------------------------
## inlf
## OLS coefficient
## test
## Herosced. Robust
## (1) (2)
## -------------------------------------------------------
## nwifeinc -0.003** -0.003**
## (0.001) (0.002)
## educ 0.038*** 0.038***
## (0.007) (0.007)
## exper 0.039*** 0.039***
## (0.006) (0.006)
## expersq -0.001*** -0.001***
## (0.0002) (0.0002)
## age -0.016*** -0.016***
## (0.002) (0.002)
## kidslt6 -0.262*** -0.262***
## (0.034) (0.032)
## kidsge6 0.013 0.013
## (0.013) (0.014)
## Constant 0.586*** 0.586***
## (0.154) (0.154)
## -------------------------------------------------------
## Observations 753
## R2 0.264
## Adjusted R2 0.257
## Residual Std. Error 0.427 (df = 745)
## F Statistic 38.218*** (df = 7; 745)
## =======================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
parcoll <- (gpa1$fathcoll==1 | gpa1$mothcoll==1)
gpa1 <- cbind(gpa1, parcoll)
gpa_hetr <- lm(PC ~ hsGPA + ACT + parcoll + 1, data=gpa1)
gpa_robust <- coeftest(gpa_hetr, vcov = hccm)
w <- 1/(predict(gpa_hetr) - (predict(gpa_hetr)**2))
gpa_gls = lm(PC ~ hsGPA + ACT + parcoll + 1, weights = w, data=gpa1)
stargazer(gpa_hetr, gpa_robust, gpa_gls, column.labels=c("Herosced.","Robust", "GLS"), no.space=TRUE, type="text")
##
## ============================================================
## Dependent variable:
## -----------------------------
## PC PC
## OLS coefficient OLS
## test
## Herosced. Robust GLS
## (1) (2) (3)
## ------------------------------------------------------------
## hsGPA 0.065 0.065 0.033
## (0.137) (0.148) (0.130)
## ACT 0.001 0.001 0.004
## (0.015) (0.017) (0.015)
## parcoll 0.221** 0.221** 0.215**
## (0.093) (0.090) (0.086)
## Constant -0.0004 -0.0004 0.026
## (0.491) (0.512) (0.477)
## ------------------------------------------------------------
## Observations 141 141
## R2 0.042 0.046
## Adjusted R2 0.021 0.026
## Residual Std. Error (df = 137) 0.486 1.016
## F Statistic (df = 3; 137) 1.979 2.224*
## ============================================================
## Note: *p<0.1; **p<0.05; ***p<0.01