Verbeek5. Modern Econometrics Using R

Chapter 3. Interpreting and Comparing Regression Models

Table 3.1

3.4 Illustration: Explaining House Prices Load libraries

library(haven)
library(stargazer)
library(dplyr)
library(MASS)
library(AER)

OLS results hedonic price function

df <- read_dta("Data/housing.dta")
OLS1 = lm(log(price)~log(lotsize) + bedrooms + bathrms + airco,data=df)
summary(OLS1)

## 
## Call:
## lm(formula = log(price) ~ log(lotsize) + bedrooms + bathrms + 
##     airco, data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.81782 -0.15562  0.00778  0.16468  0.84143 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   7.09378    0.23155  30.636  < 2e-16 ***
## log(lotsize)  0.40042    0.02781  14.397  < 2e-16 ***
## bedrooms      0.07770    0.01549   5.017 7.11e-07 ***
## bathrms       0.21583    0.02300   9.386  < 2e-16 ***
## airco         0.21167    0.02372   8.923  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2456 on 541 degrees of freedom
## Multiple R-squared:  0.5674, Adjusted R-squared:  0.5642 
## F-statistic: 177.4 on 4 and 541 DF,  p-value: < 2.2e-16

HOME | Back to top

Table 3.2

OLS results hedonic price function, extended model

OLS2 = lm(log(price)~ log(lotsize) + bedrooms + bathrms + airco + driveway + 
            recroom + fullbase + gashw + garagepl + prefarea + stories, data=df)
summary(OLS2)

## 
## Call:
## lm(formula = log(price) ~ log(lotsize) + bedrooms + bathrms + 
##     airco + driveway + recroom + fullbase + gashw + garagepl + 
##     prefarea + stories, data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.68355 -0.12247  0.00802  0.12780  0.67564 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   7.74509    0.21634  35.801  < 2e-16 ***
## log(lotsize)  0.30313    0.02669  11.356  < 2e-16 ***
## bedrooms      0.03440    0.01427   2.410 0.016294 *  
## bathrms       0.16576    0.02033   8.154 2.52e-15 ***
## airco         0.16642    0.02134   7.799 3.29e-14 ***
## driveway      0.11020    0.02823   3.904 0.000107 ***
## recroom       0.05797    0.02605   2.225 0.026482 *  
## fullbase      0.10449    0.02169   4.817 1.90e-06 ***
## gashw         0.17902    0.04389   4.079 5.22e-05 ***
## garagepl      0.04795    0.01148   4.178 3.43e-05 ***
## prefarea      0.13185    0.02267   5.816 1.04e-08 ***
## stories       0.09169    0.01261   7.268 1.30e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2104 on 534 degrees of freedom
## Multiple R-squared:  0.6865, Adjusted R-squared:  0.6801 
## F-statistic: 106.3 on 11 and 534 DF,  p-value: < 2.2e-16

linearHypothesis(OLS2, c("driveway = 0", "recroom =0","fullbase = 0", "gashw =0","garagepl = 0", "prefarea =0", "stories=0"))

## Linear hypothesis test
## 
## Hypothesis:
## driveway = 0
## recroom = 0
## fullbase = 0
## gashw = 0
## garagepl = 0
## prefarea = 0
## stories = 0
## 
## Model 1: restricted model
## Model 2: log(price) ~ log(lotsize) + bedrooms + bathrms + airco + driveway + 
##     recroom + fullbase + gashw + garagepl + prefarea + stories
## 
##   Res.Df    RSS Df Sum of Sq      F    Pr(>F)    
## 1    541 32.622                                  
## 2    534 23.638  7    8.9839 28.993 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

HOME | Back to top

Table 3.3

OLS results hedonic price function, linear model

summary(OLS3 <- lm(price~lotsize + bedrooms + bathrms + airco +  driveway + 
           recroom + fullbase + gashw + garagepl + prefarea + stories,data=df))

## 
## Call:
## lm(formula = price ~ lotsize + bedrooms + bathrms + airco + driveway + 
##     recroom + fullbase + gashw + garagepl + prefarea + stories, 
##     data = df)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -41389  -9307   -591   7353  74875 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -4038.3504  3409.4713  -1.184 0.236762    
## lotsize         3.5463     0.3503  10.124  < 2e-16 ***
## bedrooms     1832.0035  1047.0002   1.750 0.080733 .  
## bathrms     14335.5585  1489.9209   9.622  < 2e-16 ***
## airco       12632.8904  1555.0211   8.124 3.15e-15 ***
## driveway     6687.7789  2045.2458   3.270 0.001145 ** 
## recroom      4511.2838  1899.9577   2.374 0.017929 *  
## fullbase     5452.3855  1588.0239   3.433 0.000642 ***
## gashw       12831.4063  3217.5971   3.988 7.60e-05 ***
## garagepl     4244.8290   840.5442   5.050 6.07e-07 ***
## prefarea     9369.5132  1669.0907   5.614 3.19e-08 ***
## stories      6556.9457   925.2899   7.086 4.37e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15420 on 534 degrees of freedom
## Multiple R-squared:  0.6731, Adjusted R-squared:  0.6664 
## F-statistic: 99.97 on 11 and 534 DF,  p-value: < 2.2e-16

HOME | Back to top

Table 3.4

df <- read_dta("Data/predictsp5.dta")
#library(dplyr)
df <- df %>%
  mutate(bm1 = lag(b_m, n = 1),
         dfr1 = lag(dfr, n = 1),
         dfy1 = lag(dfy, n = 1),
         logdp1 = lag(logdp, n = 1),
         logdy1 = lag(logdy, n = 1),
         logep1 = lag(logep, n = 1),
         infl2 = lag(infl, n = 2),
         ltr1 = lag(ltr, n = 1),
         lty1 = lag(lty, n = 1),
         tms1 = lag(tms, n = 1))
summary(Full <- lm(exret ~ bm1 + dfr1 + dfy1 + logdp1 + logdy1 + logep1 + infl2 + ltr1 + lty1 + tms1 + winter,
           data= df, subset = yyyymm<="200312" ))

## 
## Call:
## lm(formula = exret ~ bm1 + dfr1 + dfy1 + logdp1 + logdy1 + logep1 + 
##     infl2 + ltr1 + lty1 + tms1 + winter, data = df, subset = yyyymm <= 
##     "200312")
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.212034 -0.023642  0.000755  0.026591  0.158504 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.201192   0.062923   3.197 0.001456 ** 
## bm1         -0.036863   0.017917  -2.057 0.040054 *  
## dfr1         0.174267   0.168065   1.037 0.300177    
## dfy1         1.705778   0.687145   2.482 0.013307 *  
## logdp1       0.051822   0.042463   1.220 0.222768    
## logdy1      -0.055053   0.040887  -1.346 0.178627    
## logep1       0.037715   0.012417   3.037 0.002485 ** 
## infl2       -0.360527   0.647676  -0.557 0.577965    
## ltr1         0.176099   0.075575   2.330 0.020113 *  
## lty1        -0.349943   0.097892  -3.575 0.000377 ***
## tms1         0.414244   0.149456   2.772 0.005741 ** 
## winter       0.009545   0.003262   2.927 0.003549 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0409 on 634 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.07494,    Adjusted R-squared:  0.05889 
## F-statistic: 4.669 on 11 and 634 DF,  p-value: 6.99e-07

Stepwise regression: excludes regressors with t-ratio smaller than 1.96

df2 <- subset(df, df$yyyymm<="200312")
summary(Stepwise <- lm(exret ~ bm1 +  dfy1 + logep1 + lty1 + tms1 + winter, data= df2 ))

## 
## Call:
## lm(formula = exret ~ bm1 + dfy1 + logep1 + lty1 + tms1 + winter, 
##     data = df2)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.220090 -0.024466  0.000701  0.026150  0.167360 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.208080   0.053876   3.862 0.000124 ***
## bm1         -0.041395   0.015614  -2.651 0.008219 ** 
## dfy1         2.012768   0.653421   3.080 0.002156 ** 
## logep1       0.035245   0.008954   3.936 9.18e-05 ***
## lty1        -0.366410   0.086869  -4.218 2.82e-05 ***
## tms1         0.401002   0.134763   2.976 0.003034 ** 
## winter       0.009089   0.003238   2.807 0.005156 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.04093 on 640 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.06538,    Adjusted R-squared:  0.05662 
## F-statistic: 7.462 on 6 and 640 DF,  p-value: 9.683e-08

Max adjusted R-square / Min AIC

library("leaps")
leaps <- regsubsets(
  exret ~ bm1 + dfr1 + dfy1 + logdp1 + logdy1 + logep1 + infl2 + ltr1 + lty1 + tms1 + winter, 
  data= df2, nbest=10)
par(mar=c(1, 2.1, 1.1, 1))
plot(leaps,scale="adjr2")

summary(minAIC <- lm(exret~bm1 + dfy1 + logep1 + ltr1 + lty1 + tms1 + winter, data=df2))

## 
## Call:
## lm(formula = exret ~ bm1 + dfy1 + logep1 + ltr1 + lty1 + tms1 + 
##     winter, data = df2)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.214873 -0.023876  0.001484  0.026213  0.163967 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.201568   0.053865   3.742 0.000199 ***
## bm1         -0.038220   0.015666  -2.440 0.014970 *  
## dfy1         1.737098   0.667310   2.603 0.009452 ** 
## logep1       0.034227   0.008950   3.824 0.000144 ***
## ltr1         0.122555   0.063155   1.941 0.052755 .  
## lty1        -0.350817   0.087053  -4.030 6.25e-05 ***
## tms1         0.417569   0.134743   3.099 0.002027 ** 
## winter       0.009189   0.003232   2.844 0.004603 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.04084 on 639 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.07086,    Adjusted R-squared:  0.06068 
## F-statistic: 6.961 on 7 and 639 DF,  p-value: 5.234e-08

Alternatively,

#library(MASS)
df <- na.omit(df)
AIC <- stepAIC(Full, direction = "both", trace = F)
summary(AIC)

## 
## Call:
## lm(formula = exret ~ bm1 + dfy1 + logep1 + ltr1 + lty1 + tms1 + 
##     winter, data = df, subset = yyyymm <= "200312")
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.214809 -0.023822  0.001438  0.026190  0.163974 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.203662   0.054140   3.762 0.000184 ***
## bm1         -0.038576   0.015700  -2.457 0.014272 *  
## dfy1         1.747346   0.668210   2.615 0.009135 ** 
## logep1       0.034574   0.008996   3.843 0.000133 ***
## ltr1         0.122084   0.063207   1.931 0.053865 .  
## lty1        -0.353789   0.087410  -4.047 5.81e-05 ***
## tms1         0.418881   0.134869   3.106 0.001982 ** 
## winter       0.009236   0.003236   2.854 0.004451 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.04086 on 638 degrees of freedom
## Multiple R-squared:  0.07098,    Adjusted R-squared:  0.06078 
## F-statistic: 6.963 on 7 and 638 DF,  p-value: 5.21e-08

Min BIC

par(mar=c(1, 2.1, 1.1, 1))
plot(leaps, scale="bic")

summary(minBIC <- lm(exret ~ logep1 + ltr1 + lty1 + tms1 + winter, data=df2))

## 
## Call:
## lm(formula = exret ~ logep1 + ltr1 + lty1 + tms1 + winter, data = df2)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.213682 -0.024862  0.000898  0.026814  0.154582 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.094022   0.024134   3.896 0.000108 ***
## logep1       0.016817   0.004437   3.790 0.000165 ***
## ltr1         0.158186   0.062029   2.550 0.010998 *  
## lty1        -0.212101   0.062482  -3.395 0.000730 ***
## tms1         0.512621   0.130553   3.927 9.55e-05 ***
## winter       0.010111   0.003232   3.128 0.001838 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.04105 on 641 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.05816,    Adjusted R-squared:  0.05082 
## F-statistic: 7.917 on 5 and 641 DF,  p-value: 2.993e-07

HOME | Back to top

Table 3.4 Cont’d

stargazer(Full, minAIC, minBIC, Stepwise, no.space=TRUE, 
          type="text", keep.stat = c("ser", "N", "rsq",  "adj.rsq" ), 
          column.labels = c("Full", "Min AIC", "Min BIC", "Stepwise"),
           title ="Table 3.4 Forecasting equation S&P 500 excess returns")

## 
## Table 3.4 Forecasting equation S&P 500 excess returns
## =======================================================================================
##                                             Dependent variable:                        
##                     -------------------------------------------------------------------
##                                                    exret                               
##                           Full           Min AIC          Min BIC          Stepwise    
##                           (1)              (2)              (3)              (4)       
## ---------------------------------------------------------------------------------------
## bm1                     -0.037**         -0.038**                         -0.041***    
##                         (0.018)          (0.016)                           (0.016)     
## dfr1                     0.174                                                         
##                         (0.168)                                                        
## dfy1                    1.706**          1.737***                          2.013***    
##                         (0.687)          (0.667)                           (0.653)     
## logdp1                   0.052                                                         
##                         (0.042)                                                        
## logdy1                   -0.055                                                        
##                         (0.041)                                                        
## logep1                  0.038***         0.034***         0.017***         0.035***    
##                         (0.012)          (0.009)          (0.004)          (0.009)     
## infl2                    -0.361                                                        
##                         (0.648)                                                        
## ltr1                    0.176**           0.123*          0.158**                      
##                         (0.076)          (0.063)          (0.062)                      
## lty1                   -0.350***        -0.351***        -0.212***        -0.366***    
##                         (0.098)          (0.087)          (0.062)          (0.087)     
## tms1                    0.414***         0.418***         0.513***         0.401***    
##                         (0.149)          (0.135)          (0.131)          (0.135)     
## winter                  0.010***         0.009***         0.010***         0.009***    
##                         (0.003)          (0.003)          (0.003)          (0.003)     
## Constant                0.201***         0.202***         0.094***         0.208***    
##                         (0.063)          (0.054)          (0.024)          (0.054)     
## ---------------------------------------------------------------------------------------
## Observations              646              647              647              647       
## R2                       0.075            0.071            0.058            0.065      
## Adjusted R2              0.059            0.061            0.051            0.057      
## Residual Std. Error 0.041 (df = 634) 0.041 (df = 639) 0.041 (df = 641) 0.041 (df = 640)
## =======================================================================================
## Note:                                                       *p<0.1; **p<0.05; ***p<0.01

Table 3.5

Out-of-sample forecasting performance Jan 2004-Dec 2013

Forcast

df3 <- subset(df, df$yyyymm>"200312")
attach(df3)
Full_frc = cbind(1, bm1, dfr1, dfy1, logdp1, logdy1, logep1, infl2, ltr1, lty1, tms1, winter)%*%Full$coef
minAIC_frc <- cbind(1, bm1, dfy1, logep1, ltr1, lty1, tms1, winter)%*%minAIC$coef
minBIC_frc <- cbind(1, logep1, ltr1, lty1, tms1, winter)%*%minBIC$coef
Stepwise_frc <- cbind(1, bm1, dfy1, logep1, lty1, tms1, winter)%*%Stepwise$coef

Forecast performance: Full model

Full_frc_er <- Full_frc - exret
Full_rmse <- sqrt(sum(Full_frc_er^2)/length(Full_frc_er))*100
Full_mad <- sum(abs(Full_frc_er))/length(Full_frc_er)*100
Full_r2os = (1-sum(Full_frc_er^2)/sum((mean(df2$exret)-df3$exret)^2) )*100
Full_r2ps <- cor(Full_frc, exret, method="pearson")*100

Hit <- data.frame(forecast <- ifelse(Full_frc>0,1,0), 
                  actual=ifelse(df3$exret>0,1,0))
Hit_matrix <- xtabs(~Hit$forecast + Hit$actual)
Full_hit_rate <- (sum(diag(Hit_matrix))/nrow(Hit))*100

Full_forecast_perf <- round(c(Full_rmse, Full_mad, Full_r2os, Full_r2ps, Full_hit_rate),3)
Full_forecast_perf

## [1]   4.800   3.416 -31.412   0.040  58.333

Forecast performance: Max R-sqaure / Min AIC model

minAIC_frc_er <- minAIC_frc - exret
minAIC_rmse <- sqrt(sum(minAIC_frc_er^2)/length(minAIC_frc_er))*100
minAIC_mad <- sum(abs(minAIC_frc_er))/length(minAIC_frc_er)*100
minAIC_r2os = (1-sum(minAIC_frc_er^2)/sum((mean(df2$exret)-df3$exret)^2) )*100
minAIC_r2ps <- cor(minAIC_frc, exret, method="pearson")*100

Hit <- data.frame(forecast <- ifelse(minAIC_frc>0,1,0), 
                  actual=ifelse(df3$exret>0,1,0))
Hit_matrix <- xtabs(~Hit$forecast + Hit$actual)
minAIC_hit_rate <- (sum(diag(Hit_matrix))/nrow(Hit))*100

minAIC_forecast_perf <- round(c(minAIC_rmse, minAIC_mad, minAIC_r2os, minAIC_r2ps, minAIC_hit_rate),3)
minAIC_forecast_perf #In percentage

## [1]   4.726   3.339 -27.345  -0.535  59.167

Forecast performance: Min BIC model

minBIC_frc_er <- minBIC_frc - exret
minBIC_rmse <- sqrt(sum(minBIC_frc_er^2)/length(minBIC_frc_er))*100
minBIC_mad <- sum(abs(minBIC_frc_er))/length(minBIC_frc_er)*100
minBIC_r2os = (1-sum(minBIC_frc_er^2)/sum((mean(df2$exret)-df3$exret)^2) )*100
minBIC_r2ps <- cor(minBIC_frc, exret, method="pearson")*100

Hit <- data.frame(forecast <- ifelse(minBIC_frc>0,1,0), 
                  actual=ifelse(exret>0,1,0))
Hit_matrix <- xtabs(~Hit$forecast + Hit$actual)
minBIC_hit_rate <- (sum(diag(Hit_matrix))/nrow(Hit))*100

minBIC_forecast_perf <- round(c(minBIC_rmse, minBIC_mad, minBIC_r2os, minBIC_r2ps, minBIC_hit_rate),3)
minBIC_forecast_perf

## [1]  4.306  3.107 -5.725  7.522 56.667

Stepwise_frc_er <- Stepwise_frc - exret
Stepwise_rmse <- sqrt(sum(Stepwise_frc_er^2)/length(Stepwise_frc_er))*100
Stepwise_mad <- sum(abs(Stepwise_frc_er))/length(Stepwise_frc_er)*100
Stepwise_r2os = (1-sum(Stepwise_frc_er^2)/sum((mean(df2$exret)-df3$exret)^2) )*100
Stepwise_r2ps <- cor(Stepwise_frc, exret, method="pearson")*100

Hit <- data.frame(forecast <- ifelse(Stepwise_frc>0,1,0), 
                  actual=ifelse(df3$exret>0,1,0))
Hit_matrix <- xtabs(~Hit$forecast + Hit$actual)
Stepwise_hit_rate <- (sum(diag(Hit_matrix))/nrow(Hit))*100

Stepwise_forecast_perf <- round(c(Stepwise_rmse, Stepwise_mad, Stepwise_r2os, Stepwise_r2ps, Stepwise_hit_rate),3)
Stepwise_forecast_perf

## [1]   4.787   3.402 -30.685  -3.309  60.000

Table 3.5 final

forecast_perf = cbind(Full_forecast_perf, minBIC_forecast_perf, minBIC_forecast_perf, Stepwise_forecast_perf)
colnames(forecast_perf)=c("Full","Min AIC","Min BIC", "Stepwise")
rownames(forecast_perf)=c("RMSE","MAD","R2os","R2ps","HitRate")
forecast_perf

##            Full Min AIC Min BIC Stepwise
## RMSE      4.800   4.306   4.306    4.787
## MAD       3.416   3.107   3.107    3.402
## R2os    -31.412  -5.725  -5.725  -30.685
## R2ps      0.040   7.522   7.522   -3.309
## HitRate  58.333  56.667  56.667   60.000

detach(df3)

HOME | Back to top

Table 3.6

Summary statistics, 1472 individuals

df <- read_dta("Data/Bwages.dta")

df2 <- as.data.frame(cbind(wage=df$wage, educ=df$educ, exper=df$exper, male=df$male))
ag <- aggregate(. ~ male, df2, function(x) c(mean = mean(x), sd = sd(x)))
table3.2 <- do.call("data.frame", ag)
round(table3.2, 2)

##   male wage.mean wage.sd educ.mean educ.sd exper.mean exper.sd
## 1    0     10.26    3.81      3.59    1.09      15.20     9.70
## 2    1     11.56    4.75      3.24    1.26      18.52    10.25

Table 3.7

OLS results specification 1

summary(OLS1<-lm(wage ~ male + educ + exper, data=df))

## 
## Call:
## lm(formula = wage ~ male + educ + exper, data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -13.5294  -1.9686  -0.3124   1.5679  30.7015 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.213692   0.386895   0.552    0.581    
## male        1.346144   0.192736   6.984 4.32e-12 ***
## educ        1.986090   0.080640  24.629  < 2e-16 ***
## exper       0.192275   0.009583  20.064  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.548 on 1468 degrees of freedom
## Multiple R-squared:  0.3656, Adjusted R-squared:  0.3643 
## F-statistic:   282 on 3 and 1468 DF,  p-value: < 2.2e-16

Table 3.8

OLS results specification 2

summary(OLS2<-lm(wage ~ male + educ + exper + I(exper^2),data=df))

## 
## Call:
## lm(formula = wage ~ male + educ + exper + I(exper^2), data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -12.7246  -1.9519  -0.3107   1.5117  30.5951 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.8924851  0.4329127  -2.062   0.0394 *  
## male         1.3336934  0.1908668   6.988 4.23e-12 ***
## educ         1.9881267  0.0798526  24.897  < 2e-16 ***
## exper        0.3579993  0.0316566  11.309  < 2e-16 ***
## I(exper^2)  -0.0043692  0.0007962  -5.487 4.80e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.514 on 1467 degrees of freedom
## Multiple R-squared:  0.3783, Adjusted R-squared:  0.3766 
## F-statistic: 223.2 on 4 and 1467 DF,  p-value: < 2.2e-16

Figure 3.1

Residual <- resid(OLS2)
Fitted <- predict(OLS2)
par(mar=c(2, 2.1, 1.1, 1))
plot(Fitted, Residual, xlim = c(0, 20), ylim = c(-20, 40), 
     xlab="Fitted value", ylab="Residual", cex.main=0.8,  
     main = "Figure 3.1 Residuals versus fitted values, linear model")

Table 3.9

OLS results specification 3 and the F-test

summary(OLS3 <- lm(lnwage ~ male + lneduc + lnexper + I(lnexper^2), data=df))

## 
## Call:
## lm(formula = lnwage ~ male + lneduc + lnexper + I(lnexper^2), 
##     data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.75085 -0.15921  0.00618  0.17145  1.10533 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   1.26271    0.06634  19.033  < 2e-16 ***
## male          0.11794    0.01557   7.574 6.35e-14 ***
## lneduc        0.44218    0.01819  24.306  < 2e-16 ***
## lnexper       0.10982    0.05438   2.019   0.0436 *  
## I(lnexper^2)  0.02601    0.01148   2.266   0.0236 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2862 on 1467 degrees of freedom
## Multiple R-squared:  0.3783, Adjusted R-squared:  0.3766 
## F-statistic: 223.1 on 4 and 1467 DF,  p-value: < 2.2e-16

linearHypothesis(OLS3, c("lnexper = 0", "I(lnexper^2) =0"))

## Linear hypothesis test
## 
## Hypothesis:
## lnexper = 0
## I(lnexper^2) = 0
## 
## Model 1: restricted model
## Model 2: lnwage ~ male + lneduc + lnexper + I(lnexper^2)
## 
##   Res.Df    RSS Df Sum of Sq      F    Pr(>F)    
## 1   1469 158.57                                  
## 2   1467 120.20  2    38.362 234.09 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

HOME | Back to top

Figure 3.2

Residual <- resid(OLS3)
Fitted <- predict(OLS3)
par(mar=c(2, 3.1, 1.1, 2.1))
plot(Fitted, Residual, xlim = c(1, 3), ylim = c(-2, 2), 
     main = "Figure 3.1 Residuals against fitted values, loglinear model",
     cex.main=0.8)

Table 3.10

OLS results specification 4

summary(OLS4 <- lm(lnwage ~ male + lneduc + lnexper, data=df))

## 
## Call:
## lm(formula = lnwage ~ male + lneduc + lnexper, data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.70477 -0.15862  0.00485  0.17366  1.11815 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.14473    0.04118  27.798  < 2e-16 ***
## male         0.12008    0.01556   7.715 2.22e-14 ***
## lneduc       0.43662    0.01805  24.188  < 2e-16 ***
## lnexper      0.23065    0.01073  21.488  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2867 on 1468 degrees of freedom
## Multiple R-squared:  0.3761, Adjusted R-squared:  0.3748 
## F-statistic:   295 on 3 and 1468 DF,  p-value: < 2.2e-16

Table 3.11

OLS results specification 5 and the F-test

summary(OLS5 <- lm(lnwage ~ male + factor(educ) + lnexper, data=df))

## 
## Call:
## lm(formula = lnwage ~ male + factor(educ) + lnexper, data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.65548 -0.15138  0.01324  0.16998  1.11684 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    1.27189    0.04483  28.369  < 2e-16 ***
## male           0.11762    0.01546   7.610 4.88e-14 ***
## factor(educ)2  0.14364    0.03336   4.306 1.77e-05 ***
## factor(educ)3  0.30487    0.03202   9.521  < 2e-16 ***
## factor(educ)4  0.47428    0.03301  14.366  < 2e-16 ***
## factor(educ)5  0.63910    0.03322  19.237  < 2e-16 ***
## lnexper        0.23022    0.01056  21.804  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.282 on 1465 degrees of freedom
## Multiple R-squared:  0.3976, Adjusted R-squared:  0.3951 
## F-statistic: 161.1 on 6 and 1465 DF,  p-value: < 2.2e-16

anova(OLS4,OLS5)

## Analysis of Variance Table
## 
## Model 1: lnwage ~ male + lneduc + lnexper
## Model 2: lnwage ~ male + factor(educ) + lnexper
##   Res.Df    RSS Df Sum of Sq      F    Pr(>F)    
## 1   1468 120.62                                  
## 2   1465 116.47  3    4.1563 17.427 4.047e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

HOME | Back to top

Table 3.12

OLS results specification 6 and the F-test

summary(OLS6 <- lm(lnwage ~ male*factor(educ) + lnexper + lnexper*male, data=df))

## 
## Call:
## lm(formula = lnwage ~ male * factor(educ) + lnexper + lnexper * 
##     male, data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.63955 -0.15328  0.01225  0.16647  1.11698 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         1.21584    0.07768  15.652  < 2e-16 ***
## male                0.15375    0.09522   1.615 0.106595    
## factor(educ)2       0.22411    0.06758   3.316 0.000935 ***
## factor(educ)3       0.43319    0.06323   6.851 1.08e-11 ***
## factor(educ)4       0.60191    0.06280   9.585  < 2e-16 ***
## factor(educ)5       0.75491    0.06467  11.673  < 2e-16 ***
## lnexper             0.20744    0.01655  12.535  < 2e-16 ***
## male:factor(educ)2 -0.09651    0.07770  -1.242 0.214381    
## male:factor(educ)3 -0.16677    0.07340  -2.272 0.023215 *  
## male:factor(educ)4 -0.17236    0.07440  -2.317 0.020663 *  
## male:factor(educ)5 -0.14616    0.07551  -1.935 0.053123 .  
## male:lnexper        0.04063    0.02149   1.891 0.058875 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2811 on 1460 degrees of freedom
## Multiple R-squared:  0.4032, Adjusted R-squared:  0.3988 
## F-statistic: 89.69 on 11 and 1460 DF,  p-value: < 2.2e-16

anova(OLS5, OLS6)

## Analysis of Variance Table
## 
## Model 1: lnwage ~ male + factor(educ) + lnexper
## Model 2: lnwage ~ male * factor(educ) + lnexper + lnexper * male
##   Res.Df    RSS Df Sum of Sq      F  Pr(>F)  
## 1   1465 116.47                              
## 2   1460 115.37  5    1.0957 2.7732 0.01683 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

HOME | Back to top

Table 3.13

OLS results specification 7

summary(OLS7 <- lm(lnwage ~ male + lnexper*factor(educ), data=df))

## 
## Call:
## lm(formula = lnwage ~ male + lnexper * factor(educ), data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.63623 -0.15046  0.00831  0.16713  1.12415 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            1.48891    0.21203   7.022 3.34e-12 ***
## male                   0.11597    0.01548   7.493 1.16e-13 ***
## lnexper                0.16312    0.06539   2.494   0.0127 *  
## factor(educ)2          0.06727    0.22628   0.297   0.7663    
## factor(educ)3          0.13525    0.21889   0.618   0.5367    
## factor(educ)4          0.20495    0.21946   0.934   0.3505    
## factor(educ)5          0.34130    0.21808   1.565   0.1178    
## lnexper:factor(educ)2  0.01933    0.07049   0.274   0.7839    
## lnexper:factor(educ)3  0.04988    0.06821   0.731   0.4647    
## lnexper:factor(educ)4  0.08784    0.06877   1.277   0.2017    
## lnexper:factor(educ)5  0.09996    0.06822   1.465   0.1430    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2815 on 1461 degrees of freedom
## Multiple R-squared:  0.4012, Adjusted R-squared:  0.3971 
## F-statistic:  97.9 on 10 and 1461 DF,  p-value: < 2.2e-16

HOME | Back to top

Verbeek 5ed. Modern Econometrics Using R

Solomon Negash

May 20, 2020

Chapter 3. Interpreting and Comparing Regression Models

Table 3.1

Table 3.2

Table 3.3

Table 3.4

Table 3.4 Cont’d

Table 3.5

Table 3.6

Table 3.7

Table 3.8

Figure 3.1

Table 3.9

Figure 3.2

Table 3.10

Table 3.11

Table 3.12

Table 3.13