3.4 Illustration: Explaining House Prices Load libraries
library(haven)
library(stargazer)
library(dplyr)
library(MASS)
library(AER)
OLS results hedonic price function
df <- read_dta("Data/housing.dta")
OLS1 = lm(log(price)~log(lotsize) + bedrooms + bathrms + airco,data=df)
summary(OLS1)
##
## Call:
## lm(formula = log(price) ~ log(lotsize) + bedrooms + bathrms +
## airco, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.81782 -0.15562 0.00778 0.16468 0.84143
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.09378 0.23155 30.636 < 2e-16 ***
## log(lotsize) 0.40042 0.02781 14.397 < 2e-16 ***
## bedrooms 0.07770 0.01549 5.017 7.11e-07 ***
## bathrms 0.21583 0.02300 9.386 < 2e-16 ***
## airco 0.21167 0.02372 8.923 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2456 on 541 degrees of freedom
## Multiple R-squared: 0.5674, Adjusted R-squared: 0.5642
## F-statistic: 177.4 on 4 and 541 DF, p-value: < 2.2e-16
OLS results hedonic price function, extended model
OLS2 = lm(log(price)~ log(lotsize) + bedrooms + bathrms + airco + driveway +
recroom + fullbase + gashw + garagepl + prefarea + stories, data=df)
summary(OLS2)
##
## Call:
## lm(formula = log(price) ~ log(lotsize) + bedrooms + bathrms +
## airco + driveway + recroom + fullbase + gashw + garagepl +
## prefarea + stories, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.68355 -0.12247 0.00802 0.12780 0.67564
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.74509 0.21634 35.801 < 2e-16 ***
## log(lotsize) 0.30313 0.02669 11.356 < 2e-16 ***
## bedrooms 0.03440 0.01427 2.410 0.016294 *
## bathrms 0.16576 0.02033 8.154 2.52e-15 ***
## airco 0.16642 0.02134 7.799 3.29e-14 ***
## driveway 0.11020 0.02823 3.904 0.000107 ***
## recroom 0.05797 0.02605 2.225 0.026482 *
## fullbase 0.10449 0.02169 4.817 1.90e-06 ***
## gashw 0.17902 0.04389 4.079 5.22e-05 ***
## garagepl 0.04795 0.01148 4.178 3.43e-05 ***
## prefarea 0.13185 0.02267 5.816 1.04e-08 ***
## stories 0.09169 0.01261 7.268 1.30e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2104 on 534 degrees of freedom
## Multiple R-squared: 0.6865, Adjusted R-squared: 0.6801
## F-statistic: 106.3 on 11 and 534 DF, p-value: < 2.2e-16
linearHypothesis(OLS2, c("driveway = 0", "recroom =0","fullbase = 0", "gashw =0","garagepl = 0", "prefarea =0", "stories=0"))
## Linear hypothesis test
##
## Hypothesis:
## driveway = 0
## recroom = 0
## fullbase = 0
## gashw = 0
## garagepl = 0
## prefarea = 0
## stories = 0
##
## Model 1: restricted model
## Model 2: log(price) ~ log(lotsize) + bedrooms + bathrms + airco + driveway +
## recroom + fullbase + gashw + garagepl + prefarea + stories
##
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 541 32.622
## 2 534 23.638 7 8.9839 28.993 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
OLS results hedonic price function, linear model
summary(OLS3 <- lm(price~lotsize + bedrooms + bathrms + airco + driveway +
recroom + fullbase + gashw + garagepl + prefarea + stories,data=df))
##
## Call:
## lm(formula = price ~ lotsize + bedrooms + bathrms + airco + driveway +
## recroom + fullbase + gashw + garagepl + prefarea + stories,
## data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -41389 -9307 -591 7353 74875
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4038.3504 3409.4713 -1.184 0.236762
## lotsize 3.5463 0.3503 10.124 < 2e-16 ***
## bedrooms 1832.0035 1047.0002 1.750 0.080733 .
## bathrms 14335.5585 1489.9209 9.622 < 2e-16 ***
## airco 12632.8904 1555.0211 8.124 3.15e-15 ***
## driveway 6687.7789 2045.2458 3.270 0.001145 **
## recroom 4511.2838 1899.9577 2.374 0.017929 *
## fullbase 5452.3855 1588.0239 3.433 0.000642 ***
## gashw 12831.4063 3217.5971 3.988 7.60e-05 ***
## garagepl 4244.8290 840.5442 5.050 6.07e-07 ***
## prefarea 9369.5132 1669.0907 5.614 3.19e-08 ***
## stories 6556.9457 925.2899 7.086 4.37e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15420 on 534 degrees of freedom
## Multiple R-squared: 0.6731, Adjusted R-squared: 0.6664
## F-statistic: 99.97 on 11 and 534 DF, p-value: < 2.2e-16
df <- read_dta("Data/predictsp5.dta")
#library(dplyr)
df <- df %>%
mutate(bm1 = lag(b_m, n = 1),
dfr1 = lag(dfr, n = 1),
dfy1 = lag(dfy, n = 1),
logdp1 = lag(logdp, n = 1),
logdy1 = lag(logdy, n = 1),
logep1 = lag(logep, n = 1),
infl2 = lag(infl, n = 2),
ltr1 = lag(ltr, n = 1),
lty1 = lag(lty, n = 1),
tms1 = lag(tms, n = 1))
summary(Full <- lm(exret ~ bm1 + dfr1 + dfy1 + logdp1 + logdy1 + logep1 + infl2 + ltr1 + lty1 + tms1 + winter,
data= df, subset = yyyymm<="200312" ))
##
## Call:
## lm(formula = exret ~ bm1 + dfr1 + dfy1 + logdp1 + logdy1 + logep1 +
## infl2 + ltr1 + lty1 + tms1 + winter, data = df, subset = yyyymm <=
## "200312")
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.212034 -0.023642 0.000755 0.026591 0.158504
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.201192 0.062923 3.197 0.001456 **
## bm1 -0.036863 0.017917 -2.057 0.040054 *
## dfr1 0.174267 0.168065 1.037 0.300177
## dfy1 1.705778 0.687145 2.482 0.013307 *
## logdp1 0.051822 0.042463 1.220 0.222768
## logdy1 -0.055053 0.040887 -1.346 0.178627
## logep1 0.037715 0.012417 3.037 0.002485 **
## infl2 -0.360527 0.647676 -0.557 0.577965
## ltr1 0.176099 0.075575 2.330 0.020113 *
## lty1 -0.349943 0.097892 -3.575 0.000377 ***
## tms1 0.414244 0.149456 2.772 0.005741 **
## winter 0.009545 0.003262 2.927 0.003549 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0409 on 634 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.07494, Adjusted R-squared: 0.05889
## F-statistic: 4.669 on 11 and 634 DF, p-value: 6.99e-07
Stepwise regression: excludes regressors with t-ratio smaller than 1.96
df2 <- subset(df, df$yyyymm<="200312")
summary(Stepwise <- lm(exret ~ bm1 + dfy1 + logep1 + lty1 + tms1 + winter, data= df2 ))
##
## Call:
## lm(formula = exret ~ bm1 + dfy1 + logep1 + lty1 + tms1 + winter,
## data = df2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.220090 -0.024466 0.000701 0.026150 0.167360
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.208080 0.053876 3.862 0.000124 ***
## bm1 -0.041395 0.015614 -2.651 0.008219 **
## dfy1 2.012768 0.653421 3.080 0.002156 **
## logep1 0.035245 0.008954 3.936 9.18e-05 ***
## lty1 -0.366410 0.086869 -4.218 2.82e-05 ***
## tms1 0.401002 0.134763 2.976 0.003034 **
## winter 0.009089 0.003238 2.807 0.005156 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.04093 on 640 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.06538, Adjusted R-squared: 0.05662
## F-statistic: 7.462 on 6 and 640 DF, p-value: 9.683e-08
Max adjusted R-square / Min AIC
library("leaps")
leaps <- regsubsets(
exret ~ bm1 + dfr1 + dfy1 + logdp1 + logdy1 + logep1 + infl2 + ltr1 + lty1 + tms1 + winter,
data= df2, nbest=10)
par(mar=c(1, 2.1, 1.1, 1))
plot(leaps,scale="adjr2")
summary(minAIC <- lm(exret~bm1 + dfy1 + logep1 + ltr1 + lty1 + tms1 + winter, data=df2))
##
## Call:
## lm(formula = exret ~ bm1 + dfy1 + logep1 + ltr1 + lty1 + tms1 +
## winter, data = df2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.214873 -0.023876 0.001484 0.026213 0.163967
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.201568 0.053865 3.742 0.000199 ***
## bm1 -0.038220 0.015666 -2.440 0.014970 *
## dfy1 1.737098 0.667310 2.603 0.009452 **
## logep1 0.034227 0.008950 3.824 0.000144 ***
## ltr1 0.122555 0.063155 1.941 0.052755 .
## lty1 -0.350817 0.087053 -4.030 6.25e-05 ***
## tms1 0.417569 0.134743 3.099 0.002027 **
## winter 0.009189 0.003232 2.844 0.004603 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.04084 on 639 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.07086, Adjusted R-squared: 0.06068
## F-statistic: 6.961 on 7 and 639 DF, p-value: 5.234e-08
Alternatively,
#library(MASS)
df <- na.omit(df)
AIC <- stepAIC(Full, direction = "both", trace = F)
summary(AIC)
##
## Call:
## lm(formula = exret ~ bm1 + dfy1 + logep1 + ltr1 + lty1 + tms1 +
## winter, data = df, subset = yyyymm <= "200312")
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.214809 -0.023822 0.001438 0.026190 0.163974
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.203662 0.054140 3.762 0.000184 ***
## bm1 -0.038576 0.015700 -2.457 0.014272 *
## dfy1 1.747346 0.668210 2.615 0.009135 **
## logep1 0.034574 0.008996 3.843 0.000133 ***
## ltr1 0.122084 0.063207 1.931 0.053865 .
## lty1 -0.353789 0.087410 -4.047 5.81e-05 ***
## tms1 0.418881 0.134869 3.106 0.001982 **
## winter 0.009236 0.003236 2.854 0.004451 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.04086 on 638 degrees of freedom
## Multiple R-squared: 0.07098, Adjusted R-squared: 0.06078
## F-statistic: 6.963 on 7 and 638 DF, p-value: 5.21e-08
Min BIC
par(mar=c(1, 2.1, 1.1, 1))
plot(leaps, scale="bic")
summary(minBIC <- lm(exret ~ logep1 + ltr1 + lty1 + tms1 + winter, data=df2))
##
## Call:
## lm(formula = exret ~ logep1 + ltr1 + lty1 + tms1 + winter, data = df2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.213682 -0.024862 0.000898 0.026814 0.154582
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.094022 0.024134 3.896 0.000108 ***
## logep1 0.016817 0.004437 3.790 0.000165 ***
## ltr1 0.158186 0.062029 2.550 0.010998 *
## lty1 -0.212101 0.062482 -3.395 0.000730 ***
## tms1 0.512621 0.130553 3.927 9.55e-05 ***
## winter 0.010111 0.003232 3.128 0.001838 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.04105 on 641 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.05816, Adjusted R-squared: 0.05082
## F-statistic: 7.917 on 5 and 641 DF, p-value: 2.993e-07
stargazer(Full, minAIC, minBIC, Stepwise, no.space=TRUE,
type="text", keep.stat = c("ser", "N", "rsq", "adj.rsq" ),
column.labels = c("Full", "Min AIC", "Min BIC", "Stepwise"),
title ="Table 3.4 Forecasting equation S&P 500 excess returns")
##
## Table 3.4 Forecasting equation S&P 500 excess returns
## =======================================================================================
## Dependent variable:
## -------------------------------------------------------------------
## exret
## Full Min AIC Min BIC Stepwise
## (1) (2) (3) (4)
## ---------------------------------------------------------------------------------------
## bm1 -0.037** -0.038** -0.041***
## (0.018) (0.016) (0.016)
## dfr1 0.174
## (0.168)
## dfy1 1.706** 1.737*** 2.013***
## (0.687) (0.667) (0.653)
## logdp1 0.052
## (0.042)
## logdy1 -0.055
## (0.041)
## logep1 0.038*** 0.034*** 0.017*** 0.035***
## (0.012) (0.009) (0.004) (0.009)
## infl2 -0.361
## (0.648)
## ltr1 0.176** 0.123* 0.158**
## (0.076) (0.063) (0.062)
## lty1 -0.350*** -0.351*** -0.212*** -0.366***
## (0.098) (0.087) (0.062) (0.087)
## tms1 0.414*** 0.418*** 0.513*** 0.401***
## (0.149) (0.135) (0.131) (0.135)
## winter 0.010*** 0.009*** 0.010*** 0.009***
## (0.003) (0.003) (0.003) (0.003)
## Constant 0.201*** 0.202*** 0.094*** 0.208***
## (0.063) (0.054) (0.024) (0.054)
## ---------------------------------------------------------------------------------------
## Observations 646 647 647 647
## R2 0.075 0.071 0.058 0.065
## Adjusted R2 0.059 0.061 0.051 0.057
## Residual Std. Error 0.041 (df = 634) 0.041 (df = 639) 0.041 (df = 641) 0.041 (df = 640)
## =======================================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
Out-of-sample forecasting performance Jan 2004-Dec 2013
Forcast
df3 <- subset(df, df$yyyymm>"200312")
attach(df3)
Full_frc = cbind(1, bm1, dfr1, dfy1, logdp1, logdy1, logep1, infl2, ltr1, lty1, tms1, winter)%*%Full$coef
minAIC_frc <- cbind(1, bm1, dfy1, logep1, ltr1, lty1, tms1, winter)%*%minAIC$coef
minBIC_frc <- cbind(1, logep1, ltr1, lty1, tms1, winter)%*%minBIC$coef
Stepwise_frc <- cbind(1, bm1, dfy1, logep1, lty1, tms1, winter)%*%Stepwise$coef
Forecast performance: Full model
Full_frc_er <- Full_frc - exret
Full_rmse <- sqrt(sum(Full_frc_er^2)/length(Full_frc_er))*100
Full_mad <- sum(abs(Full_frc_er))/length(Full_frc_er)*100
Full_r2os = (1-sum(Full_frc_er^2)/sum((mean(df2$exret)-df3$exret)^2) )*100
Full_r2ps <- cor(Full_frc, exret, method="pearson")*100
Hit <- data.frame(forecast <- ifelse(Full_frc>0,1,0),
actual=ifelse(df3$exret>0,1,0))
Hit_matrix <- xtabs(~Hit$forecast + Hit$actual)
Full_hit_rate <- (sum(diag(Hit_matrix))/nrow(Hit))*100
Full_forecast_perf <- round(c(Full_rmse, Full_mad, Full_r2os, Full_r2ps, Full_hit_rate),3)
Full_forecast_perf
## [1] 4.800 3.416 -31.412 0.040 58.333
Forecast performance: Max R-sqaure / Min AIC model
minAIC_frc_er <- minAIC_frc - exret
minAIC_rmse <- sqrt(sum(minAIC_frc_er^2)/length(minAIC_frc_er))*100
minAIC_mad <- sum(abs(minAIC_frc_er))/length(minAIC_frc_er)*100
minAIC_r2os = (1-sum(minAIC_frc_er^2)/sum((mean(df2$exret)-df3$exret)^2) )*100
minAIC_r2ps <- cor(minAIC_frc, exret, method="pearson")*100
Hit <- data.frame(forecast <- ifelse(minAIC_frc>0,1,0),
actual=ifelse(df3$exret>0,1,0))
Hit_matrix <- xtabs(~Hit$forecast + Hit$actual)
minAIC_hit_rate <- (sum(diag(Hit_matrix))/nrow(Hit))*100
minAIC_forecast_perf <- round(c(minAIC_rmse, minAIC_mad, minAIC_r2os, minAIC_r2ps, minAIC_hit_rate),3)
minAIC_forecast_perf #In percentage
## [1] 4.726 3.339 -27.345 -0.535 59.167
Forecast performance: Min BIC model
minBIC_frc_er <- minBIC_frc - exret
minBIC_rmse <- sqrt(sum(minBIC_frc_er^2)/length(minBIC_frc_er))*100
minBIC_mad <- sum(abs(minBIC_frc_er))/length(minBIC_frc_er)*100
minBIC_r2os = (1-sum(minBIC_frc_er^2)/sum((mean(df2$exret)-df3$exret)^2) )*100
minBIC_r2ps <- cor(minBIC_frc, exret, method="pearson")*100
Hit <- data.frame(forecast <- ifelse(minBIC_frc>0,1,0),
actual=ifelse(exret>0,1,0))
Hit_matrix <- xtabs(~Hit$forecast + Hit$actual)
minBIC_hit_rate <- (sum(diag(Hit_matrix))/nrow(Hit))*100
minBIC_forecast_perf <- round(c(minBIC_rmse, minBIC_mad, minBIC_r2os, minBIC_r2ps, minBIC_hit_rate),3)
minBIC_forecast_perf
## [1] 4.306 3.107 -5.725 7.522 56.667
Stepwise_frc_er <- Stepwise_frc - exret
Stepwise_rmse <- sqrt(sum(Stepwise_frc_er^2)/length(Stepwise_frc_er))*100
Stepwise_mad <- sum(abs(Stepwise_frc_er))/length(Stepwise_frc_er)*100
Stepwise_r2os = (1-sum(Stepwise_frc_er^2)/sum((mean(df2$exret)-df3$exret)^2) )*100
Stepwise_r2ps <- cor(Stepwise_frc, exret, method="pearson")*100
Hit <- data.frame(forecast <- ifelse(Stepwise_frc>0,1,0),
actual=ifelse(df3$exret>0,1,0))
Hit_matrix <- xtabs(~Hit$forecast + Hit$actual)
Stepwise_hit_rate <- (sum(diag(Hit_matrix))/nrow(Hit))*100
Stepwise_forecast_perf <- round(c(Stepwise_rmse, Stepwise_mad, Stepwise_r2os, Stepwise_r2ps, Stepwise_hit_rate),3)
Stepwise_forecast_perf
## [1] 4.787 3.402 -30.685 -3.309 60.000
Table 3.5 final
forecast_perf = cbind(Full_forecast_perf, minBIC_forecast_perf, minBIC_forecast_perf, Stepwise_forecast_perf)
colnames(forecast_perf)=c("Full","Min AIC","Min BIC", "Stepwise")
rownames(forecast_perf)=c("RMSE","MAD","R2os","R2ps","HitRate")
forecast_perf
## Full Min AIC Min BIC Stepwise
## RMSE 4.800 4.306 4.306 4.787
## MAD 3.416 3.107 3.107 3.402
## R2os -31.412 -5.725 -5.725 -30.685
## R2ps 0.040 7.522 7.522 -3.309
## HitRate 58.333 56.667 56.667 60.000
detach(df3)
Summary statistics, 1472 individuals
df <- read_dta("Data/Bwages.dta")
df2 <- as.data.frame(cbind(wage=df$wage, educ=df$educ, exper=df$exper, male=df$male))
ag <- aggregate(. ~ male, df2, function(x) c(mean = mean(x), sd = sd(x)))
table3.2 <- do.call("data.frame", ag)
round(table3.2, 2)
## male wage.mean wage.sd educ.mean educ.sd exper.mean exper.sd
## 1 0 10.26 3.81 3.59 1.09 15.20 9.70
## 2 1 11.56 4.75 3.24 1.26 18.52 10.25
OLS results specification 1
summary(OLS1<-lm(wage ~ male + educ + exper, data=df))
##
## Call:
## lm(formula = wage ~ male + educ + exper, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.5294 -1.9686 -0.3124 1.5679 30.7015
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.213692 0.386895 0.552 0.581
## male 1.346144 0.192736 6.984 4.32e-12 ***
## educ 1.986090 0.080640 24.629 < 2e-16 ***
## exper 0.192275 0.009583 20.064 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.548 on 1468 degrees of freedom
## Multiple R-squared: 0.3656, Adjusted R-squared: 0.3643
## F-statistic: 282 on 3 and 1468 DF, p-value: < 2.2e-16
OLS results specification 2
summary(OLS2<-lm(wage ~ male + educ + exper + I(exper^2),data=df))
##
## Call:
## lm(formula = wage ~ male + educ + exper + I(exper^2), data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12.7246 -1.9519 -0.3107 1.5117 30.5951
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.8924851 0.4329127 -2.062 0.0394 *
## male 1.3336934 0.1908668 6.988 4.23e-12 ***
## educ 1.9881267 0.0798526 24.897 < 2e-16 ***
## exper 0.3579993 0.0316566 11.309 < 2e-16 ***
## I(exper^2) -0.0043692 0.0007962 -5.487 4.80e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.514 on 1467 degrees of freedom
## Multiple R-squared: 0.3783, Adjusted R-squared: 0.3766
## F-statistic: 223.2 on 4 and 1467 DF, p-value: < 2.2e-16
Residual <- resid(OLS2)
Fitted <- predict(OLS2)
par(mar=c(2, 2.1, 1.1, 1))
plot(Fitted, Residual, xlim = c(0, 20), ylim = c(-20, 40),
xlab="Fitted value", ylab="Residual", cex.main=0.8,
main = "Figure 3.1 Residuals versus fitted values, linear model")
OLS results specification 3 and the F-test
summary(OLS3 <- lm(lnwage ~ male + lneduc + lnexper + I(lnexper^2), data=df))
##
## Call:
## lm(formula = lnwage ~ male + lneduc + lnexper + I(lnexper^2),
## data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.75085 -0.15921 0.00618 0.17145 1.10533
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.26271 0.06634 19.033 < 2e-16 ***
## male 0.11794 0.01557 7.574 6.35e-14 ***
## lneduc 0.44218 0.01819 24.306 < 2e-16 ***
## lnexper 0.10982 0.05438 2.019 0.0436 *
## I(lnexper^2) 0.02601 0.01148 2.266 0.0236 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2862 on 1467 degrees of freedom
## Multiple R-squared: 0.3783, Adjusted R-squared: 0.3766
## F-statistic: 223.1 on 4 and 1467 DF, p-value: < 2.2e-16
linearHypothesis(OLS3, c("lnexper = 0", "I(lnexper^2) =0"))
## Linear hypothesis test
##
## Hypothesis:
## lnexper = 0
## I(lnexper^2) = 0
##
## Model 1: restricted model
## Model 2: lnwage ~ male + lneduc + lnexper + I(lnexper^2)
##
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 1469 158.57
## 2 1467 120.20 2 38.362 234.09 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual <- resid(OLS3)
Fitted <- predict(OLS3)
par(mar=c(2, 3.1, 1.1, 2.1))
plot(Fitted, Residual, xlim = c(1, 3), ylim = c(-2, 2),
main = "Figure 3.1 Residuals against fitted values, loglinear model",
cex.main=0.8)
OLS results specification 4
summary(OLS4 <- lm(lnwage ~ male + lneduc + lnexper, data=df))
##
## Call:
## lm(formula = lnwage ~ male + lneduc + lnexper, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.70477 -0.15862 0.00485 0.17366 1.11815
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.14473 0.04118 27.798 < 2e-16 ***
## male 0.12008 0.01556 7.715 2.22e-14 ***
## lneduc 0.43662 0.01805 24.188 < 2e-16 ***
## lnexper 0.23065 0.01073 21.488 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2867 on 1468 degrees of freedom
## Multiple R-squared: 0.3761, Adjusted R-squared: 0.3748
## F-statistic: 295 on 3 and 1468 DF, p-value: < 2.2e-16
OLS results specification 5 and the F-test
summary(OLS5 <- lm(lnwage ~ male + factor(educ) + lnexper, data=df))
##
## Call:
## lm(formula = lnwage ~ male + factor(educ) + lnexper, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.65548 -0.15138 0.01324 0.16998 1.11684
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.27189 0.04483 28.369 < 2e-16 ***
## male 0.11762 0.01546 7.610 4.88e-14 ***
## factor(educ)2 0.14364 0.03336 4.306 1.77e-05 ***
## factor(educ)3 0.30487 0.03202 9.521 < 2e-16 ***
## factor(educ)4 0.47428 0.03301 14.366 < 2e-16 ***
## factor(educ)5 0.63910 0.03322 19.237 < 2e-16 ***
## lnexper 0.23022 0.01056 21.804 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.282 on 1465 degrees of freedom
## Multiple R-squared: 0.3976, Adjusted R-squared: 0.3951
## F-statistic: 161.1 on 6 and 1465 DF, p-value: < 2.2e-16
anova(OLS4,OLS5)
## Analysis of Variance Table
##
## Model 1: lnwage ~ male + lneduc + lnexper
## Model 2: lnwage ~ male + factor(educ) + lnexper
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 1468 120.62
## 2 1465 116.47 3 4.1563 17.427 4.047e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
OLS results specification 6 and the F-test
summary(OLS6 <- lm(lnwage ~ male*factor(educ) + lnexper + lnexper*male, data=df))
##
## Call:
## lm(formula = lnwage ~ male * factor(educ) + lnexper + lnexper *
## male, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.63955 -0.15328 0.01225 0.16647 1.11698
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.21584 0.07768 15.652 < 2e-16 ***
## male 0.15375 0.09522 1.615 0.106595
## factor(educ)2 0.22411 0.06758 3.316 0.000935 ***
## factor(educ)3 0.43319 0.06323 6.851 1.08e-11 ***
## factor(educ)4 0.60191 0.06280 9.585 < 2e-16 ***
## factor(educ)5 0.75491 0.06467 11.673 < 2e-16 ***
## lnexper 0.20744 0.01655 12.535 < 2e-16 ***
## male:factor(educ)2 -0.09651 0.07770 -1.242 0.214381
## male:factor(educ)3 -0.16677 0.07340 -2.272 0.023215 *
## male:factor(educ)4 -0.17236 0.07440 -2.317 0.020663 *
## male:factor(educ)5 -0.14616 0.07551 -1.935 0.053123 .
## male:lnexper 0.04063 0.02149 1.891 0.058875 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2811 on 1460 degrees of freedom
## Multiple R-squared: 0.4032, Adjusted R-squared: 0.3988
## F-statistic: 89.69 on 11 and 1460 DF, p-value: < 2.2e-16
anova(OLS5, OLS6)
## Analysis of Variance Table
##
## Model 1: lnwage ~ male + factor(educ) + lnexper
## Model 2: lnwage ~ male * factor(educ) + lnexper + lnexper * male
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 1465 116.47
## 2 1460 115.37 5 1.0957 2.7732 0.01683 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
OLS results specification 7
summary(OLS7 <- lm(lnwage ~ male + lnexper*factor(educ), data=df))
##
## Call:
## lm(formula = lnwage ~ male + lnexper * factor(educ), data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.63623 -0.15046 0.00831 0.16713 1.12415
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.48891 0.21203 7.022 3.34e-12 ***
## male 0.11597 0.01548 7.493 1.16e-13 ***
## lnexper 0.16312 0.06539 2.494 0.0127 *
## factor(educ)2 0.06727 0.22628 0.297 0.7663
## factor(educ)3 0.13525 0.21889 0.618 0.5367
## factor(educ)4 0.20495 0.21946 0.934 0.3505
## factor(educ)5 0.34130 0.21808 1.565 0.1178
## lnexper:factor(educ)2 0.01933 0.07049 0.274 0.7839
## lnexper:factor(educ)3 0.04988 0.06821 0.731 0.4647
## lnexper:factor(educ)4 0.08784 0.06877 1.277 0.2017
## lnexper:factor(educ)5 0.09996 0.06822 1.465 0.1430
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2815 on 1461 degrees of freedom
## Multiple R-squared: 0.4012, Adjusted R-squared: 0.3971
## F-statistic: 97.9 on 10 and 1461 DF, p-value: < 2.2e-16