# 3 Interpreting and Comparing Regression Models

## 3.1 Table 3.1

3.4 Illustration: Explaining House Prices Load libraries

``````library(haven)
library(stargazer)
library(dplyr)
library(MASS)
library(AER)
rm(list = ls())``````

OLS results hedonic price function

``````df <- read_dta("Data/housing.dta")
OLS1 = lm(log(price)~log(lotsize) + bedrooms + bathrms + airco,data=df)
summary(OLS1)``````
``````##
## Call:
## lm(formula = log(price) ~ log(lotsize) + bedrooms + bathrms +
##     airco, data = df)
##
## Residuals:
##      Min       1Q   Median       3Q      Max
## -0.81782 -0.15562  0.00778  0.16468  0.84143
##
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)
## (Intercept)   7.09378    0.23155  30.636  < 2e-16 ***
## log(lotsize)  0.40042    0.02781  14.397  < 2e-16 ***
## bedrooms      0.07770    0.01549   5.017 7.11e-07 ***
## bathrms       0.21583    0.02300   9.386  < 2e-16 ***
## airco         0.21167    0.02372   8.923  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2456 on 541 degrees of freedom
## Multiple R-squared:  0.5674, Adjusted R-squared:  0.5642
## F-statistic: 177.4 on 4 and 541 DF,  p-value: < 2.2e-16``````

## 3.2 Table 3.2

OLS results hedonic price function, extended model

``````OLS2 = lm(log(price)~ log(lotsize) + bedrooms + bathrms + airco + driveway +
recroom + fullbase + gashw + garagepl + prefarea + stories, data=df)
summary(OLS2)``````
``````##
## Call:
## lm(formula = log(price) ~ log(lotsize) + bedrooms + bathrms +
##     airco + driveway + recroom + fullbase + gashw + garagepl +
##     prefarea + stories, data = df)
##
## Residuals:
##      Min       1Q   Median       3Q      Max
## -0.68355 -0.12247  0.00802  0.12780  0.67564
##
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)
## (Intercept)   7.74509    0.21634  35.801  < 2e-16 ***
## log(lotsize)  0.30313    0.02669  11.356  < 2e-16 ***
## bedrooms      0.03440    0.01427   2.410 0.016294 *
## bathrms       0.16576    0.02033   8.154 2.52e-15 ***
## airco         0.16642    0.02134   7.799 3.29e-14 ***
## driveway      0.11020    0.02823   3.904 0.000107 ***
## recroom       0.05797    0.02605   2.225 0.026482 *
## fullbase      0.10449    0.02169   4.817 1.90e-06 ***
## gashw         0.17902    0.04389   4.079 5.22e-05 ***
## garagepl      0.04795    0.01148   4.178 3.43e-05 ***
## prefarea      0.13185    0.02267   5.816 1.04e-08 ***
## stories       0.09169    0.01261   7.268 1.30e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2104 on 534 degrees of freedom
## Multiple R-squared:  0.6865, Adjusted R-squared:  0.6801
## F-statistic: 106.3 on 11 and 534 DF,  p-value: < 2.2e-16``````
``linearHypothesis(OLS2, c("driveway = 0", "recroom =0","fullbase = 0", "gashw =0","garagepl = 0", "prefarea =0", "stories=0"))``
``````## Linear hypothesis test
##
## Hypothesis:
## driveway = 0
## recroom = 0
## fullbase = 0
## gashw = 0
## garagepl = 0
## prefarea = 0
## stories = 0
##
## Model 1: restricted model
## Model 2: log(price) ~ log(lotsize) + bedrooms + bathrms + airco + driveway +
##     recroom + fullbase + gashw + garagepl + prefarea + stories
##
##   Res.Df    RSS Df Sum of Sq      F    Pr(>F)
## 1    541 32.622
## 2    534 23.638  7    8.9839 28.993 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1``````

## 3.3 Table 3.3

OLS results hedonic price function, linear model

``````summary(OLS3 <- lm(price~lotsize + bedrooms + bathrms + airco +  driveway +
recroom + fullbase + gashw + garagepl + prefarea + stories,data=df))``````
``````##
## Call:
## lm(formula = price ~ lotsize + bedrooms + bathrms + airco + driveway +
##     recroom + fullbase + gashw + garagepl + prefarea + stories,
##     data = df)
##
## Residuals:
##    Min     1Q Median     3Q    Max
## -41389  -9307   -591   7353  74875
##
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4038.3504  3409.4713  -1.184 0.236762
## lotsize         3.5463     0.3503  10.124  < 2e-16 ***
## bedrooms     1832.0035  1047.0002   1.750 0.080733 .
## bathrms     14335.5585  1489.9209   9.622  < 2e-16 ***
## airco       12632.8904  1555.0211   8.124 3.15e-15 ***
## driveway     6687.7789  2045.2458   3.270 0.001145 **
## recroom      4511.2838  1899.9577   2.374 0.017929 *
## fullbase     5452.3855  1588.0239   3.433 0.000642 ***
## gashw       12831.4063  3217.5971   3.988 7.60e-05 ***
## garagepl     4244.8290   840.5442   5.050 6.07e-07 ***
## prefarea     9369.5132  1669.0907   5.614 3.19e-08 ***
## stories      6556.9457   925.2899   7.086 4.37e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15420 on 534 degrees of freedom
## Multiple R-squared:  0.6731, Adjusted R-squared:  0.6664
## F-statistic: 99.97 on 11 and 534 DF,  p-value: < 2.2e-16``````

## 3.4 Table 3.4

``````df <- read_dta("Data/predictsp5.dta")
#library(dplyr)
df <- df %>%
mutate(bm1 = lag(b_m, n = 1),
dfr1 = lag(dfr, n = 1),
dfy1 = lag(dfy, n = 1),
logdp1 = lag(logdp, n = 1),
logdy1 = lag(logdy, n = 1),
logep1 = lag(logep, n = 1),
infl2 = lag(infl, n = 2),
ltr1 = lag(ltr, n = 1),
lty1 = lag(lty, n = 1),
tms1 = lag(tms, n = 1))
summary(Full <- lm(exret ~ bm1 + dfr1 + dfy1 + logdp1 + logdy1 + logep1 + infl2 + ltr1 + lty1 + tms1 + winter,
data= df, subset = yyyymm<="200312" ))``````
``````##
## Call:
## lm(formula = exret ~ bm1 + dfr1 + dfy1 + logdp1 + logdy1 + logep1 +
##     infl2 + ltr1 + lty1 + tms1 + winter, data = df, subset = yyyymm <=
##     "200312")
##
## Residuals:
##       Min        1Q    Median        3Q       Max
## -0.212034 -0.023642  0.000755  0.026591  0.158504
##
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)
## (Intercept)  0.201192   0.062923   3.197 0.001456 **
## bm1         -0.036863   0.017917  -2.057 0.040054 *
## dfr1         0.174267   0.168065   1.037 0.300177
## dfy1         1.705778   0.687145   2.482 0.013307 *
## logdp1       0.051822   0.042463   1.220 0.222768
## logdy1      -0.055053   0.040887  -1.346 0.178627
## logep1       0.037715   0.012417   3.037 0.002485 **
## infl2       -0.360527   0.647676  -0.557 0.577965
## ltr1         0.176099   0.075575   2.330 0.020113 *
## lty1        -0.349943   0.097892  -3.575 0.000377 ***
## tms1         0.414244   0.149456   2.772 0.005741 **
## winter       0.009545   0.003262   2.927 0.003549 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0409 on 634 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.07494,    Adjusted R-squared:  0.05889
## F-statistic: 4.669 on 11 and 634 DF,  p-value: 6.99e-07``````

Stepwise regression: excludes regressors with t-ratio smaller than 1.96

``````df2 <- subset(df, df\$yyyymm<="200312")
summary(Stepwise <- lm(exret ~ bm1 +  dfy1 + logep1 + lty1 + tms1 + winter, data= df2 ))``````
``````##
## Call:
## lm(formula = exret ~ bm1 + dfy1 + logep1 + lty1 + tms1 + winter,
##     data = df2)
##
## Residuals:
##       Min        1Q    Median        3Q       Max
## -0.220090 -0.024466  0.000701  0.026150  0.167360
##
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)
## (Intercept)  0.208080   0.053876   3.862 0.000124 ***
## bm1         -0.041395   0.015614  -2.651 0.008219 **
## dfy1         2.012768   0.653421   3.080 0.002156 **
## logep1       0.035245   0.008954   3.936 9.18e-05 ***
## lty1        -0.366410   0.086869  -4.218 2.82e-05 ***
## tms1         0.401002   0.134763   2.976 0.003034 **
## winter       0.009089   0.003238   2.807 0.005156 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.04093 on 640 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.06538,    Adjusted R-squared:  0.05662
## F-statistic: 7.462 on 6 and 640 DF,  p-value: 9.683e-08``````

Max adjusted R-square / Min AIC

``````library("leaps")
leaps <- regsubsets(
exret ~ bm1 + dfr1 + dfy1 + logdp1 + logdy1 + logep1 + infl2 + ltr1 + lty1 + tms1 + winter,
data= df2, nbest=10)
par(mar=c(1, 2.1, 1.1, 1))

``summary(minAIC <- lm(exret~bm1 + dfy1 + logep1 + ltr1 + lty1 + tms1 + winter, data=df2))``
``````##
## Call:
## lm(formula = exret ~ bm1 + dfy1 + logep1 + ltr1 + lty1 + tms1 +
##     winter, data = df2)
##
## Residuals:
##       Min        1Q    Median        3Q       Max
## -0.214873 -0.023876  0.001484  0.026213  0.163967
##
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)
## (Intercept)  0.201568   0.053865   3.742 0.000199 ***
## bm1         -0.038220   0.015666  -2.440 0.014970 *
## dfy1         1.737098   0.667310   2.603 0.009452 **
## logep1       0.034227   0.008950   3.824 0.000144 ***
## ltr1         0.122555   0.063155   1.941 0.052755 .
## lty1        -0.350817   0.087053  -4.030 6.25e-05 ***
## tms1         0.417569   0.134743   3.099 0.002027 **
## winter       0.009189   0.003232   2.844 0.004603 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.04084 on 639 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.07086,    Adjusted R-squared:  0.06068
## F-statistic: 6.961 on 7 and 639 DF,  p-value: 5.234e-08``````

Alternatively,

``````#library(MASS)
df <- na.omit(df)
AIC <- stepAIC(Full, direction = "both", trace = F)
summary(AIC)``````
``````##
## Call:
## lm(formula = exret ~ bm1 + dfy1 + logep1 + ltr1 + lty1 + tms1 +
##     winter, data = df, subset = yyyymm <= "200312")
##
## Residuals:
##       Min        1Q    Median        3Q       Max
## -0.214809 -0.023822  0.001438  0.026190  0.163974
##
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)
## (Intercept)  0.203662   0.054140   3.762 0.000184 ***
## bm1         -0.038576   0.015700  -2.457 0.014272 *
## dfy1         1.747346   0.668210   2.615 0.009135 **
## logep1       0.034574   0.008996   3.843 0.000133 ***
## ltr1         0.122084   0.063207   1.931 0.053865 .
## lty1        -0.353789   0.087410  -4.047 5.81e-05 ***
## tms1         0.418881   0.134869   3.106 0.001982 **
## winter       0.009236   0.003236   2.854 0.004451 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.04086 on 638 degrees of freedom
## Multiple R-squared:  0.07098,    Adjusted R-squared:  0.06078
## F-statistic: 6.963 on 7 and 638 DF,  p-value: 5.21e-08``````

Min BIC

``````par(mar=c(1, 2.1, 1.1, 1))
plot(leaps, scale="bic")``````

``summary(minBIC <- lm(exret ~ logep1 + ltr1 + lty1 + tms1 + winter, data=df2))``
``````##
## Call:
## lm(formula = exret ~ logep1 + ltr1 + lty1 + tms1 + winter, data = df2)
##
## Residuals:
##       Min        1Q    Median        3Q       Max
## -0.213682 -0.024862  0.000898  0.026814  0.154582
##
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)
## (Intercept)  0.094022   0.024134   3.896 0.000108 ***
## logep1       0.016817   0.004437   3.790 0.000165 ***
## ltr1         0.158186   0.062029   2.550 0.010998 *
## lty1        -0.212101   0.062482  -3.395 0.000730 ***
## tms1         0.512621   0.130553   3.927 9.55e-05 ***
## winter       0.010111   0.003232   3.128 0.001838 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.04105 on 641 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.05816,    Adjusted R-squared:  0.05082
## F-statistic: 7.917 on 5 and 641 DF,  p-value: 2.993e-07``````

## 3.5 Table 3.4 Cont’d

``````stargazer(Full, minAIC, minBIC, Stepwise, no.space=TRUE,
type="text", keep.stat = c("ser", "N", "rsq",  "adj.rsq" ),
column.labels = c("Full", "Min AIC", "Min BIC", "Stepwise"),
title ="Table 3.4 Forecasting equation S&P 500 excess returns")``````
``````##
## Table 3.4 Forecasting equation S&P 500 excess returns
## =======================================================================================
##                                             Dependent variable:
##                     -------------------------------------------------------------------
##                                                    exret
##                           Full           Min AIC          Min BIC          Stepwise
##                           (1)              (2)              (3)              (4)
## ---------------------------------------------------------------------------------------
## bm1                     -0.037**         -0.038**                         -0.041***
##                         (0.018)          (0.016)                           (0.016)
## dfr1                     0.174
##                         (0.168)
## dfy1                    1.706**          1.737***                          2.013***
##                         (0.687)          (0.667)                           (0.653)
## logdp1                   0.052
##                         (0.042)
## logdy1                   -0.055
##                         (0.041)
## logep1                  0.038***         0.034***         0.017***         0.035***
##                         (0.012)          (0.009)          (0.004)          (0.009)
## infl2                    -0.361
##                         (0.648)
## ltr1                    0.176**           0.123*          0.158**
##                         (0.076)          (0.063)          (0.062)
## lty1                   -0.350***        -0.351***        -0.212***        -0.366***
##                         (0.098)          (0.087)          (0.062)          (0.087)
## tms1                    0.414***         0.418***         0.513***         0.401***
##                         (0.149)          (0.135)          (0.131)          (0.135)
## winter                  0.010***         0.009***         0.010***         0.009***
##                         (0.003)          (0.003)          (0.003)          (0.003)
## Constant                0.201***         0.202***         0.094***         0.208***
##                         (0.063)          (0.054)          (0.024)          (0.054)
## ---------------------------------------------------------------------------------------
## Observations              646              647              647              647
## R2                       0.075            0.071            0.058            0.065
## Adjusted R2              0.059            0.061            0.051            0.057
## Residual Std. Error 0.041 (df = 634) 0.041 (df = 639) 0.041 (df = 641) 0.041 (df = 640)
## =======================================================================================
## Note:                                                       *p<0.1; **p<0.05; ***p<0.01``````

## 3.6 Table 3.5

Out-of-sample forecasting performance Jan 2004-Dec 2013

Forcast

``````df3 <- subset(df, df\$yyyymm>"200312")
attach(df3)
Full_frc = cbind(1, bm1, dfr1, dfy1, logdp1, logdy1, logep1, infl2, ltr1, lty1, tms1, winter)%*%Full\$coef
minAIC_frc <- cbind(1, bm1, dfy1, logep1, ltr1, lty1, tms1, winter)%*%minAIC\$coef
minBIC_frc <- cbind(1, logep1, ltr1, lty1, tms1, winter)%*%minBIC\$coef
Stepwise_frc <- cbind(1, bm1, dfy1, logep1, lty1, tms1, winter)%*%Stepwise\$coef``````

Forecast performance: Full model

``````Full_frc_er <- Full_frc - exret
Full_rmse <- sqrt(sum(Full_frc_er^2)/length(Full_frc_er))*100
Full_r2os = (1-sum(Full_frc_er^2)/sum((mean(df2\$exret)-df3\$exret)^2) )*100
Full_r2ps <- cor(Full_frc, exret, method="pearson")*100

Hit <- data.frame(forecast <- ifelse(Full_frc>0,1,0),
actual=ifelse(df3\$exret>0,1,0))
Hit_matrix <- xtabs(~Hit\$forecast + Hit\$actual)
Full_hit_rate <- (sum(diag(Hit_matrix))/nrow(Hit))*100

Full_forecast_perf <- round(c(Full_rmse, Full_mad, Full_r2os, Full_r2ps, Full_hit_rate),3)
Full_forecast_perf``````
``## [1]   4.800   3.416 -31.412   0.040  58.333``

Forecast performance: Max R-sqaure / Min AIC model

``````minAIC_frc_er <- minAIC_frc - exret
minAIC_rmse <- sqrt(sum(minAIC_frc_er^2)/length(minAIC_frc_er))*100
minAIC_r2os = (1-sum(minAIC_frc_er^2)/sum((mean(df2\$exret)-df3\$exret)^2) )*100
minAIC_r2ps <- cor(minAIC_frc, exret, method="pearson")*100

Hit <- data.frame(forecast <- ifelse(minAIC_frc>0,1,0),
actual=ifelse(df3\$exret>0,1,0))
Hit_matrix <- xtabs(~Hit\$forecast + Hit\$actual)
minAIC_hit_rate <- (sum(diag(Hit_matrix))/nrow(Hit))*100

minAIC_forecast_perf <- round(c(minAIC_rmse, minAIC_mad, minAIC_r2os, minAIC_r2ps, minAIC_hit_rate),3)
minAIC_forecast_perf #In percentage ``````
``## [1]   4.726   3.339 -27.345  -0.535  59.167``

Forecast performance: Min BIC model

``````minBIC_frc_er <- minBIC_frc - exret
minBIC_rmse <- sqrt(sum(minBIC_frc_er^2)/length(minBIC_frc_er))*100
minBIC_r2os = (1-sum(minBIC_frc_er^2)/sum((mean(df2\$exret)-df3\$exret)^2) )*100
minBIC_r2ps <- cor(minBIC_frc, exret, method="pearson")*100

Hit <- data.frame(forecast <- ifelse(minBIC_frc>0,1,0),
actual=ifelse(exret>0,1,0))
Hit_matrix <- xtabs(~Hit\$forecast + Hit\$actual)
minBIC_hit_rate <- (sum(diag(Hit_matrix))/nrow(Hit))*100

minBIC_forecast_perf <- round(c(minBIC_rmse, minBIC_mad, minBIC_r2os, minBIC_r2ps, minBIC_hit_rate),3)
minBIC_forecast_perf``````
``## [1]  4.306  3.107 -5.725  7.522 56.667``
``````Stepwise_frc_er <- Stepwise_frc - exret
Stepwise_rmse <- sqrt(sum(Stepwise_frc_er^2)/length(Stepwise_frc_er))*100
Stepwise_r2os = (1-sum(Stepwise_frc_er^2)/sum((mean(df2\$exret)-df3\$exret)^2) )*100
Stepwise_r2ps <- cor(Stepwise_frc, exret, method="pearson")*100

Hit <- data.frame(forecast <- ifelse(Stepwise_frc>0,1,0),
actual=ifelse(df3\$exret>0,1,0))
Hit_matrix <- xtabs(~Hit\$forecast + Hit\$actual)
Stepwise_hit_rate <- (sum(diag(Hit_matrix))/nrow(Hit))*100

Stepwise_forecast_perf <- round(c(Stepwise_rmse, Stepwise_mad, Stepwise_r2os, Stepwise_r2ps, Stepwise_hit_rate),3)
Stepwise_forecast_perf``````
``## [1]   4.787   3.402 -30.685  -3.309  60.000``

Table 3.5 final

``````forecast_perf = cbind(Full_forecast_perf, minBIC_forecast_perf, minBIC_forecast_perf, Stepwise_forecast_perf)
colnames(forecast_perf)=c("Full","Min AIC","Min BIC", "Stepwise")
forecast_perf``````
``````##            Full Min AIC Min BIC Stepwise
## RMSE      4.800   4.306   4.306    4.787
## MAD       3.416   3.107   3.107    3.402
## R2os    -31.412  -5.725  -5.725  -30.685
## R2ps      0.040   7.522   7.522   -3.309
## HitRate  58.333  56.667  56.667   60.000``````
``detach(df3)``

## 3.7 Table 3.6

Summary statistics, 1472 individuals

``````df <- read_dta("Data/Bwages.dta")

df2 <- as.data.frame(cbind(wage=df\$wage, educ=df\$educ, exper=df\$exper, male=df\$male))
ag <- aggregate(. ~ male, df2, function(x) c(mean = mean(x), sd = sd(x)))
table3.2 <- do.call("data.frame", ag)
round(table3.2, 2)``````
``````##   male wage.mean wage.sd educ.mean educ.sd exper.mean exper.sd
## 1    0     10.26    3.81      3.59    1.09      15.20     9.70
## 2    1     11.56    4.75      3.24    1.26      18.52    10.25``````

## 3.8 Table 3.7

OLS results specification 1

``summary(OLS1<-lm(wage ~ male + educ + exper, data=df))``
``````##
## Call:
## lm(formula = wage ~ male + educ + exper, data = df)
##
## Residuals:
##      Min       1Q   Median       3Q      Max
## -13.5294  -1.9686  -0.3124   1.5679  30.7015
##
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.213692   0.386895   0.552    0.581
## male        1.346144   0.192736   6.984 4.32e-12 ***
## educ        1.986090   0.080640  24.629  < 2e-16 ***
## exper       0.192275   0.009583  20.064  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.548 on 1468 degrees of freedom
## Multiple R-squared:  0.3656, Adjusted R-squared:  0.3643
## F-statistic:   282 on 3 and 1468 DF,  p-value: < 2.2e-16``````

## 3.9 Table 3.8

OLS results specification 2

``summary(OLS2<-lm(wage ~ male + educ + exper + I(exper^2),data=df))``
``````##
## Call:
## lm(formula = wage ~ male + educ + exper + I(exper^2), data = df)
##
## Residuals:
##      Min       1Q   Median       3Q      Max
## -12.7246  -1.9519  -0.3107   1.5117  30.5951
##
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.8924851  0.4329127  -2.062   0.0394 *
## male         1.3336934  0.1908668   6.988 4.23e-12 ***
## educ         1.9881267  0.0798526  24.897  < 2e-16 ***
## exper        0.3579993  0.0316566  11.309  < 2e-16 ***
## I(exper^2)  -0.0043692  0.0007962  -5.487 4.80e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.514 on 1467 degrees of freedom
## Multiple R-squared:  0.3783, Adjusted R-squared:  0.3766
## F-statistic: 223.2 on 4 and 1467 DF,  p-value: < 2.2e-16``````

## 3.10 Figure 3.1

``````Residual <- resid(OLS2)
Fitted <- predict(OLS2)
par(mar=c(2, 2.1, 1.1, 1))
plot(Fitted, Residual, xlim = c(0, 20), ylim = c(-20, 40),
xlab="Fitted value", ylab="Residual", cex.main=0.8,
main = "Figure 3.1 Residuals versus fitted values, linear model")``````

## 3.11 Table 3.9

OLS results specification 3 and the F-test

``summary(OLS3 <- lm(lnwage ~ male + lneduc + lnexper + I(lnexper^2), data=df))``
``````##
## Call:
## lm(formula = lnwage ~ male + lneduc + lnexper + I(lnexper^2),
##     data = df)
##
## Residuals:
##      Min       1Q   Median       3Q      Max
## -1.75085 -0.15921  0.00618  0.17145  1.10533
##
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)
## (Intercept)   1.26271    0.06634  19.033  < 2e-16 ***
## male          0.11794    0.01557   7.574 6.35e-14 ***
## lneduc        0.44218    0.01819  24.306  < 2e-16 ***
## lnexper       0.10982    0.05438   2.019   0.0436 *
## I(lnexper^2)  0.02601    0.01148   2.266   0.0236 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2862 on 1467 degrees of freedom
## Multiple R-squared:  0.3783, Adjusted R-squared:  0.3766
## F-statistic: 223.1 on 4 and 1467 DF,  p-value: < 2.2e-16``````
``linearHypothesis(OLS3, c("lnexper = 0", "I(lnexper^2) =0"))``
``````## Linear hypothesis test
##
## Hypothesis:
## lnexper = 0
## I(lnexper^2) = 0
##
## Model 1: restricted model
## Model 2: lnwage ~ male + lneduc + lnexper + I(lnexper^2)
##
##   Res.Df    RSS Df Sum of Sq      F    Pr(>F)
## 1   1469 158.57
## 2   1467 120.20  2    38.362 234.09 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1``````

## 3.12 Figure 3.2

``````Residual <- resid(OLS3)
Fitted <- predict(OLS3)
par(mar=c(2, 3.1, 1.1, 2.1))
plot(Fitted, Residual, xlim = c(1, 3), ylim = c(-2, 2),
main = "Figure 3.1 Residuals against fitted values, loglinear model",
cex.main=0.8)``````

## 3.13 Table 3.10

OLS results specification 4

``summary(OLS4 <- lm(lnwage ~ male + lneduc + lnexper, data=df))``
``````##
## Call:
## lm(formula = lnwage ~ male + lneduc + lnexper, data = df)
##
## Residuals:
##      Min       1Q   Median       3Q      Max
## -1.70477 -0.15862  0.00485  0.17366  1.11815
##
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)  1.14473    0.04118  27.798  < 2e-16 ***
## male         0.12008    0.01556   7.715 2.22e-14 ***
## lneduc       0.43662    0.01805  24.188  < 2e-16 ***
## lnexper      0.23065    0.01073  21.488  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2867 on 1468 degrees of freedom
## Multiple R-squared:  0.3761, Adjusted R-squared:  0.3748
## F-statistic:   295 on 3 and 1468 DF,  p-value: < 2.2e-16``````

## 3.14 Table 3.11

OLS results specification 5 and the F-test

``summary(OLS5 <- lm(lnwage ~ male + factor(educ) + lnexper, data=df))``
``````##
## Call:
## lm(formula = lnwage ~ male + factor(educ) + lnexper, data = df)
##
## Residuals:
##      Min       1Q   Median       3Q      Max
## -1.65548 -0.15138  0.01324  0.16998  1.11684
##
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)
## (Intercept)    1.27189    0.04483  28.369  < 2e-16 ***
## male           0.11762    0.01546   7.610 4.88e-14 ***
## factor(educ)2  0.14364    0.03336   4.306 1.77e-05 ***
## factor(educ)3  0.30487    0.03202   9.521  < 2e-16 ***
## factor(educ)4  0.47428    0.03301  14.366  < 2e-16 ***
## factor(educ)5  0.63910    0.03322  19.237  < 2e-16 ***
## lnexper        0.23022    0.01056  21.804  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.282 on 1465 degrees of freedom
## Multiple R-squared:  0.3976, Adjusted R-squared:  0.3951
## F-statistic: 161.1 on 6 and 1465 DF,  p-value: < 2.2e-16``````
``anova(OLS4,OLS5)``
``````## Analysis of Variance Table
##
## Model 1: lnwage ~ male + lneduc + lnexper
## Model 2: lnwage ~ male + factor(educ) + lnexper
##   Res.Df    RSS Df Sum of Sq      F    Pr(>F)
## 1   1468 120.62
## 2   1465 116.47  3    4.1563 17.427 4.047e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1``````

## 3.15 Table 3.12

OLS results specification 6 and the F-test

``summary(OLS6 <- lm(lnwage ~ male*factor(educ) + lnexper + lnexper*male, data=df))``
``````##
## Call:
## lm(formula = lnwage ~ male * factor(educ) + lnexper + lnexper *
##     male, data = df)
##
## Residuals:
##      Min       1Q   Median       3Q      Max
## -1.63955 -0.15328  0.01225  0.16647  1.11698
##
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)
## (Intercept)         1.21584    0.07768  15.652  < 2e-16 ***
## male                0.15375    0.09522   1.615 0.106595
## factor(educ)2       0.22411    0.06758   3.316 0.000935 ***
## factor(educ)3       0.43319    0.06323   6.851 1.08e-11 ***
## factor(educ)4       0.60191    0.06280   9.585  < 2e-16 ***
## factor(educ)5       0.75491    0.06467  11.673  < 2e-16 ***
## lnexper             0.20744    0.01655  12.535  < 2e-16 ***
## male:factor(educ)2 -0.09651    0.07770  -1.242 0.214381
## male:factor(educ)3 -0.16677    0.07340  -2.272 0.023215 *
## male:factor(educ)4 -0.17236    0.07440  -2.317 0.020663 *
## male:factor(educ)5 -0.14616    0.07551  -1.935 0.053123 .
## male:lnexper        0.04063    0.02149   1.891 0.058875 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2811 on 1460 degrees of freedom
## Multiple R-squared:  0.4032, Adjusted R-squared:  0.3988
## F-statistic: 89.69 on 11 and 1460 DF,  p-value: < 2.2e-16``````
``anova(OLS5, OLS6)``
``````## Analysis of Variance Table
##
## Model 1: lnwage ~ male + factor(educ) + lnexper
## Model 2: lnwage ~ male * factor(educ) + lnexper + lnexper * male
##   Res.Df    RSS Df Sum of Sq      F  Pr(>F)
## 1   1465 116.47
## 2   1460 115.37  5    1.0957 2.7732 0.01683 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1``````

## 3.16 Table 3.13

OLS results specification 7

``summary(OLS7 <- lm(lnwage ~ male + lnexper*factor(educ), data=df))``
``````##
## Call:
## lm(formula = lnwage ~ male + lnexper * factor(educ), data = df)
##
## Residuals:
##      Min       1Q   Median       3Q      Max
## -1.63623 -0.15046  0.00831  0.16713  1.12415
##
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)
## (Intercept)            1.48891    0.21203   7.022 3.34e-12 ***
## male                   0.11597    0.01548   7.493 1.16e-13 ***
## lnexper                0.16312    0.06539   2.494   0.0127 *
## factor(educ)2          0.06727    0.22628   0.297   0.7663
## factor(educ)3          0.13525    0.21889   0.618   0.5367
## factor(educ)4          0.20495    0.21946   0.934   0.3505
## factor(educ)5          0.34130    0.21808   1.565   0.1178
## lnexper:factor(educ)2  0.01933    0.07049   0.274   0.7839
## lnexper:factor(educ)3  0.04988    0.06821   0.731   0.4647
## lnexper:factor(educ)4  0.08784    0.06877   1.277   0.2017
## lnexper:factor(educ)5  0.09996    0.06822   1.465   0.1430
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2815 on 1461 degrees of freedom
## Multiple R-squared:  0.4012, Adjusted R-squared:  0.3971
## F-statistic:  97.9 on 10 and 1461 DF,  p-value: < 2.2e-16``````