Introductory Econometrics Using R

Also covered using Python and Stata

library(wooldridge)
library(stargazer)
library(AER)
library(dynlm)

#### Example 18.1. Housing Investment and Residential Price Inflation

tshseinv <- ts(hseinv)
reg1 <- dynlm(linvpc ~ t, data = tshseinv)
stargazer(reg1, no.space=TRUE, type="text")
##
## ===============================================
##                         Dependent variable:
##                     ---------------------------
##                               linvpc
## -----------------------------------------------
## t                            0.008***
##                               (0.002)
## Constant                     -0.841***
##                               (0.045)
## -----------------------------------------------
## Observations                    42
## R2                             0.335
## Residual Std. Error       0.142 (df = 40)
## F Statistic           20.190*** (df = 1; 40)
## ===============================================
## Note:               *p<0.1; **p<0.05; ***p<0.01
u <- resid(reg1)
RationalDL <- dynlm( u ~ gprice + L(u, 1) +  L(gprice, 1), data = tshseinv)
GeometricDL <- dynlm( u ~ gprice + L(u, 1), data = tshseinv)

stargazer(GeometricDL, RationalDL, title="Table 18.1 Distributed Lag Models for Housing Investment: log(invpc)", column.labels=c("GeometricDL", "RationalDL"), no.space=TRUE, type="text")
##
## Table 18.1 Distributed Lag Models for Housing Investment: log(invpc)
## =================================================================
##                                  Dependent variable:
##                     ---------------------------------------------
##                                           u
##                          GeometricDL             RationalDL
##                              (1)                    (2)
## -----------------------------------------------------------------
## gprice                     3.095***               3.256***
##                            (0.933)                (0.970)
## L(u, 1)                    0.340**                0.547***
##                            (0.132)                (0.152)
## L(gprice, 1)                                     -2.936***
##                                                   (0.973)
## Constant                    -0.010                 0.006
##                            (0.018)                (0.017)
## -----------------------------------------------------------------
## Observations                  41                     40
## R2                          0.407                  0.542
## Residual Std. Error    0.111 (df = 38)        0.100 (df = 36)
## F Statistic         13.022*** (df = 2; 38) 14.200*** (df = 3; 36)
## =================================================================
## Note:                                 *p<0.1; **p<0.05; ***p<0.01

#### Example 18.2. Unit Root Test for Three-Month T-Bill Rates

tsintqrt <- ts(intqrt)
uroot1 <- dynlm(cr3 ~ L(r3, 1), data = tsintqrt)
rho <- 1 + uroot1$coef[2] rho ## L(r3, 1) ## 0.9092894 uroot2 <- dynlm(r3 ~ L(r3, 1), data = tsintqrt) stargazer(uroot1, uroot2, no.space=TRUE, type="text") ## ## =========================================================== ## Dependent variable: ## ---------------------------- ## cr3 r3 ## (1) (2) ## ----------------------------------------------------------- ## L(r3, 1) -0.091** 0.909*** ## (0.037) (0.037) ## Constant 0.625** 0.625** ## (0.261) (0.261) ## ----------------------------------------------------------- ## Observations 123 123 ## R2 0.048 0.836 ## Adjusted R2 0.040 0.834 ## Residual Std. Error (df = 121) 1.228 1.228 ## F Statistic (df = 1; 121) 6.116** 614.595*** ## =========================================================== ## Note: *p<0.1; **p<0.05; ***p<0.01 #### Example 18.3. Unit Root Test for Annual U.S. Inflation tsphillips <- ts(subset(phillips, phillips$year<1997))

uroot3 <- dynlm(cinf ~ inf_1 + L(cinf, 1) , data = tsphillips)
rho1 <- 1 + uroot3$coef[2] rho1 ## inf_1 ## 0.6896748 reg2 <- dynlm(cinf ~ L(inf,1), data = tsphillips) rho2 <- 1 + reg2$coef[2]
rho2
## L(inf, 1)
## 0.6652586
stargazer(uroot3, reg2, no.space=TRUE, type="text")
##
## ==============================================================
##                                Dependent variable:
##                     ------------------------------------------
##                                        cinf
##                             (1)                   (2)
## --------------------------------------------------------------
## inf_1                    -0.310***
##                           (0.103)
## L(cinf, 1)                 0.138
##                           (0.126)
## L(inf, 1)                                      -0.335***
##                                                 (0.107)
## Constant                  1.361**               1.277**
##                           (0.517)               (0.558)
## --------------------------------------------------------------
## Observations                 47                   48
## R2                         0.172                 0.175
## Residual Std. Error   2.050 (df = 44)       2.356 (df = 46)
## F Statistic         4.568** (df = 2; 44) 9.790*** (df = 1; 46)
## ==============================================================
## Note:                              *p<0.1; **p<0.05; ***p<0.01

#### Example 18.4. Unit Root in the Log of U.S. Real Gross Domestic Product

tsinven <- ts(inven)
gdp_reg <- dynlm(ggdp ~ trend(tsinven) + L(log(gdp), 1) + L(ggdp, 1), data = tsinven)
rho_g1 <- 1 + gdp_reg$coef[3] rho_g1 ## L(log(gdp), 1) ## 0.7903792 gdp_reg2 <- dynlm(ggdp ~ L(log(gdp), 1) + L(ggdp, 1), data = tsinven) rho_g2 <- 1 + gdp_reg2$coef[2]
rho_g2
## L(log(gdp), 1)
##      0.9773125
stargazer(gdp_reg, gdp_reg2, no.space=TRUE, type="text")
##
## ============================================================
##                               Dependent variable:
##                     ----------------------------------------
##                                       ggdp
##                             (1)                  (2)
## ------------------------------------------------------------
## trend(tsinven)            0.006**
##                           (0.003)
## L(log(gdp), 1)            -0.210**             -0.023*
##                           (0.087)              (0.012)
## L(ggdp, 1)                 0.264                0.167
##                           (0.165)              (0.168)
## Constant                  1.651**              0.215**
##                           (0.666)              (0.100)
## ------------------------------------------------------------
## Observations                 35                  35
## R2                         0.268                0.156
## Residual Std. Error   0.020 (df = 31)      0.021 (df = 32)
## F Statistic         3.783** (df = 3; 31) 2.959* (df = 2; 32)
## ============================================================
## Note:                            *p<0.1; **p<0.05; ***p<0.01

#### Example 18.5. Cointegration between Fertility and Personal Exemption

tsfertil3 <- ts(fertil3)
freg1 <- dynlm(gfr ~ t + pe, data=tsfertil3 )
summary(freg1)
##
## Time series regression with "ts" data:
## Start = 1, End = 72
##
## Call:
## dynlm(formula = gfr ~ t + pe, data = tsfertil3)
##
## Residuals:
##     Min      1Q  Median      3Q     Max
## -38.659  -9.934   1.841  11.027  22.882
##
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)
## (Intercept) 109.93016    3.47526  31.632  < 2e-16 ***
## t            -0.90519    0.10899  -8.305 5.53e-12 ***
## pe            0.18666    0.03463   5.391 9.23e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 14.2 on 69 degrees of freedom
## Multiple R-squared:  0.5002, Adjusted R-squared:  0.4857
## F-statistic: 34.53 on 2 and 69 DF,  p-value: 4.064e-11
uf <- resid(freg1)

freg2 <- dynlm(cgfr ~ cpe, data=tsfertil3 ) # Regression in levels
summary(freg2)
##
## Time series regression with "ts" data:
## Start = 2, End = 72
##
## Call:
## dynlm(formula = cgfr ~ cpe, data = tsfertil3)
##
## Residuals:
##    Min     1Q Median     3Q    Max
## -7.980 -2.552 -0.377  1.866 14.854
##
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.78478    0.50204  -1.563    0.123
## cpe         -0.04268    0.02837  -1.504    0.137
##
## Residual standard error: 4.221 on 69 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.03176,    Adjusted R-squared:  0.01773
## F-statistic: 2.263 on 1 and 69 DF,  p-value: 0.137
##### Regression in levels with a single lag & time trend, manually
cu <- uf - lag(uf, -1)
freg3 <- dynlm(cu ~ L(u, 1) + L(cu, 1) + t, data=tsfertil3)
summary(freg3)
##
## Time series regression with "ts" data:
## Start = 3, End = 43
##
## Call:
## dynlm(formula = cu ~ L(u, 1) + L(cu, 1) + t, data = tsfertil3)
##
## Residuals:
##     Min      1Q  Median      3Q     Max
## -25.139  -2.752  -1.105   1.881  24.343
##
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.18791    2.49481  -1.278    0.209
## L(u, 1)     -3.18030    8.13220  -0.391    0.698
## L(cu, 1)     0.09839    0.16400   0.600    0.552
## t            0.13609    0.09699   1.403    0.169
##
## Residual standard error: 7.145 on 37 degrees of freedom
## Multiple R-squared:  0.07913,    Adjusted R-squared:  0.004463
## F-statistic:  1.06 on 3 and 37 DF,  p-value: 0.3779
##### First difference regression, with two lags (equation 11.27)
freg4 <- dynlm(cgfr ~ cpe + L(cpe, 1) +  L(cpe, 2), data = tsfertil3)
summary(freg4)
##
## Time series regression with "ts" data:
## Start = 4, End = 72
##
## Call:
## dynlm(formula = cgfr ~ cpe + L(cpe, 1) + L(cpe, 2), data = tsfertil3)
##
## Residuals:
##     Min      1Q  Median      3Q     Max
## -9.8307 -2.1842 -0.1912  1.8442 11.4506
##
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.96368    0.46776  -2.060  0.04339 *
## cpe         -0.03620    0.02677  -1.352  0.18101
## L(cpe, 1)   -0.01397    0.02755  -0.507  0.61385
## L(cpe, 2)    0.10999    0.02688   4.092  0.00012 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.859 on 65 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.2325, Adjusted R-squared:  0.1971
## F-statistic: 6.563 on 3 and 65 DF,  p-value: 0.0006054

#### Example 18.6. Cointegrating Parameter for Interest Rates

tsintqrt <- ts(intqrt)
coreg1 <- dynlm(r6 ~ r3 + cr3 + L(cr3, 1) + L(cr3, 2) + L(cr3, -1) + L(cr3, -2), data = tsintqrt)
summary(coreg1)
##
## Time series regression with "ts" data:
## Start = 4, End = 122
##
## Call:
## dynlm(formula = r6 ~ r3 + cr3 + L(cr3, 1) + L(cr3, 2) + L(cr3,
##     -1) + L(cr3, -2), data = tsintqrt)
##
## Residuals:
##      Min       1Q   Median       3Q      Max
## -0.95221 -0.13700 -0.02553  0.11817  0.99889
##
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)
## (Intercept)  0.065146   0.056952   1.144  0.25512
## r3           1.038171   0.008077 128.529  < 2e-16 ***
## cr3         -0.053123   0.019441  -2.733  0.00730 **
## L(cr3, 1)   -0.061137   0.019043  -3.210  0.00173 **
## L(cr3, 2)   -0.043778   0.018903  -2.316  0.02239 *
## L(cr3, -1)  -0.003572   0.019122  -0.187  0.85215
## L(cr3, -2)   0.012366   0.018970   0.652  0.51582
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2455 on 112 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.9942, Adjusted R-squared:  0.9938
## F-statistic:  3176 on 6 and 112 DF,  p-value: < 2.2e-16
#Test serial correlation
uc <- resid(coreg1)
coreg2 <- dynlm(r6 ~ r3 + cr3 + L(cr3, 1) + L(cr3, 2) + L(cr3, -1) + L(cr3, -2) + L(uc, 1), data = tsintqrt)
summary(coreg2)
##
## Time series regression with "ts" data:
## Start = 5, End = 122
##
## Call:
## dynlm(formula = r6 ~ r3 + cr3 + L(cr3, 1) + L(cr3, 2) + L(cr3,
##     -1) + L(cr3, -2) + L(uc, 1), data = tsintqrt)
##
## Residuals:
##      Min       1Q   Median       3Q      Max
## -0.94525 -0.12513 -0.03182  0.11479  1.01592
##
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)
## (Intercept)  0.067599   0.057711   1.171  0.24399
## r3           1.037809   0.008159 127.198  < 2e-16 ***
## cr3         -0.052997   0.019539  -2.712  0.00776 **
## L(cr3, 1)   -0.060406   0.019224  -3.142  0.00215 **
## L(cr3, 2)   -0.043814   0.018991  -2.307  0.02292 *
## L(cr3, -1)  -0.003852   0.019213  -0.200  0.84148
## L(cr3, -2)   0.010436   0.019203   0.543  0.58792
## L(uc, 1)     0.103944   0.096191   1.081  0.28224
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2464 on 110 degrees of freedom
## Multiple R-squared:  0.9942, Adjusted R-squared:  0.9938
## F-statistic:  2692 on 7 and 110 DF,  p-value: < 2.2e-16
summary(dynlm(uc ~ L(uc, 1)))
##
## Time series regression with "ts" data:
## Start = 5, End = 122
##
## Call:
## dynlm(formula = uc ~ L(uc, 1))
##
## Residuals:
##     Min      1Q  Median      3Q     Max
## -0.9384 -0.1248 -0.0324  0.1139  1.0145
##
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.415e-05  2.209e-02   0.002    0.998
## L(uc, 1)    1.032e-01  9.340e-02   1.105    0.272
##
## Residual standard error: 0.24 on 116 degrees of freedom
## Multiple R-squared:  0.01041,    Adjusted R-squared:  0.00188
## F-statistic:  1.22 on 1 and 116 DF,  p-value: 0.2716
# Compare with Simple OLS
summary(lm(r6 ~ r3, data = intqrt))
##
## Call:
## lm(formula = r6 ~ r3, data = intqrt)
##
## Residuals:
##     Min      1Q  Median      3Q     Max
## -1.1379 -0.1583 -0.0251  0.1155  1.1803
##
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.135374   0.054867   2.467    0.015 *
## r3          1.025899   0.007709 133.081   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2584 on 122 degrees of freedom
## Multiple R-squared:  0.9932, Adjusted R-squared:  0.9931
## F-statistic: 1.771e+04 on 1 and 122 DF,  p-value: < 2.2e-16

#### Example 18.7. Error Correction Model for Holding Yields

tsintqrt <- ts(intqrt)
erreg <- dynlm(chy6 ~ L(chy3, 1) + L(hy6hy3_1, 1), data = tsintqrt)
stargazer(erreg, no.space=TRUE, type="text")
##
## ===============================================
##                         Dependent variable:
##                     ---------------------------
##                                chy6
## -----------------------------------------------
## L(chy3, 1)                   1.218***
##                               (0.264)
## L(hy6hy3_1, 1)               -0.840***
##                               (0.244)
## Constant                      0.090**
##                               (0.043)
## -----------------------------------------------
## Observations                    122
## R2                             0.790
## Residual Std. Error      0.340 (df = 119)
## F Statistic          223.789*** (df = 2; 119)
## ===============================================
## Note:               *p<0.1; **p<0.05; ***p<0.01

#### Example 18.8. Forecasting the U.S. Unemployment Rate

tsphillips <- ts(phillips)
reg1 <- lm(unem ~ unem_1, data = tsphillips)
forreg1 <- dynlm(unem ~ unem_1 + inf_1, data = tsphillips)

forcast97 <- forreg1$coef[1] + forreg1$coef[2]*5.4 + forreg1$coef[3]*3 forcast97 # Forcast of unemp for 1997 ## (Intercept) ## 5.352627 unem_1f <- phillips$unem_1 -  5.4
inf_1f <- phillips$inf_1 - 3 phillips2 <- (cbind (phillips, unem_1f, inf_1f)) reg2 <- lm(unem ~ unem_1f + inf_1f, data = phillips2) reg2 ## ## Call: ## lm(formula = unem ~ unem_1f + inf_1f, data = phillips2) ## ## Coefficients: ## (Intercept) unem_1f inf_1f ## 5.3526 0.6495 0.1830 stargazer(reg1, forreg1, reg2, keep.stat=c("n","rsq", "adj.rsq"), no.space=TRUE, type="text") ## ## ========================================== ## Dependent variable: ## ----------------------------- ## unem ## OLS dynamic OLS ## linear ## (1) (2) (3) ## ------------------------------------------ ## unem_1 0.742*** 0.649*** ## (0.089) (0.078) ## inf_1 0.183*** ## (0.039) ## unem_1f 0.649*** ## (0.078) ## inf_1f 0.183*** ## (0.039) ## Constant 1.490*** 1.296*** 5.353*** ## (0.520) (0.441) (0.119) ## ------------------------------------------ ## Observations 55 55 55 ## R2 0.566 0.696 0.696 ## Adjusted R2 0.558 0.685 0.685 ## ========================================== ## Note: *p<0.1; **p<0.05; ***p<0.01 #### Example 18.9. Out-of-Sample Comparisons of Unemployment Forecasts phillips2 <- subset(phillips, phillips$year<1997)
oreg <- lm(unem ~ unem_1, data = phillips2)
RMSE <- function(error) { sqrt(mean(error^2))}
MSE <- function(error) { mean(abs(error))}
RMSE(oreg$residuals) ## [1] 1.026491 MSE(oreg$residuals)
## [1] 0.8129182
summary(abs(resid(oreg)))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.
## 0.09675 0.29792 0.72397 0.81292 1.05564 2.82708
oreg2 <- lm(unem ~ unem_1 + inf_1, data = phillips2)
RMSE(oreg2$residuals) ## [1] 0.8549463 MSE(oreg2$residuals)
## [1] 0.6493123
summary(abs(resid(oreg2)))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.
##  0.0124  0.2533  0.5500  0.6493  0.8548  2.1730
stargazer(oreg, oreg2, keep.stat=c("n","rsq", "adj.rsq"), no.space=TRUE, type="text")
##
## =========================================
##                  Dependent variable:
##              ----------------------------
##                          unem
##                   (1)            (2)
## -----------------------------------------
## unem_1          0.732***      0.647***
##                 (0.097)        (0.084)
## inf_1                         0.184***
##                                (0.041)
## Constant        1.572***       1.304**
##                 (0.577)        (0.490)
## -----------------------------------------
## Observations       48            48
## R2               0.554          0.691
## Note:         *p<0.1; **p<0.05; ***p<0.01
phillips2 <- subset(phillips, phillips$year<1997) ifreg <- lm(inf ~ inf_1 , data = phillips2) ifreg2 <- lm(inf ~ inf_1 + unem_1, data = phillips2) stargazer(ifreg, ifreg2, keep.stat=c("n","rsq", "adj.rsq"), no.space=TRUE, type="text") ## ## ========================================= ## Dependent variable: ## ---------------------------- ## inf ## (1) (2) ## ----------------------------------------- ## inf_1 0.665*** 0.659*** ## (0.107) (0.111) ## unem_1 0.057 ## (0.226) ## Constant 1.277** 0.974 ## (0.558) (1.320) ## ----------------------------------------- ## Observations 48 48 ## R2 0.457 0.457 ## Adjusted R2 0.445 0.433 ## ========================================= ## Note: *p<0.1; **p<0.05; ***p<0.01 inf96 <- subset(phillips2$inf,phillips2$year==1996) inf97 <- ifreg$coef[1] + ifreg$coef[2]*inf96 mean(inf97) ## [1] 3.272426 oreg2 <- lm(unem ~ unem_1 + inf_1, data = phillips2) unem96 <- subset(phillips2$unem,phillips2$year==1996) unem97 <- oreg2$coef[1] + oreg2$coef[2]*unem96 + oreg2$coef[3]*inf96
unem98 <- (oreg2$coef[1] + oreg2$coef[2]*unem97 + oreg2\$coef[3]*inf97)
mean(unem98)
## [1] 5.365136