9. CHAPTER 09. MODELS WITH NATURAL LOGARITHMS#

SET UP

library(foreign) # to open stata.dta files
library(psych) # for better sammary of descriptive statistics
library(repr) # to combine graphs with adjustable plot dimensions
options(repr.plot.width = 12, repr.plot.height = 6) # Plot dimensions (in inches)
options(width = 150) # To increase character width of printed output

9.1. 9.4 EXAMPLE: EARNINGS AND EDUCATION#

df = read.dta(file = "Dataset/AED_EARNINGS.DTA") 
attach(df) 
print(describe(df))
print(head(df))
          vars   n     mean       sd median  trimmed      mad  min    max  range  skew kurtosis      se
earnings     1 171 41412.69 25527.05  36000 38052.70 17049.90 1050 172000 170950  1.70     4.23 1952.10
education    2 171    14.43     2.74     14    14.45     2.97    3     20     17 -0.45     1.16    0.21
age          3 171    30.00     0.00     30    30.00     0.00   30     30      0   NaN      NaN    0.00
gender       4 171     0.00     0.00      0     0.00     0.00    0      0      0   NaN      NaN    0.00
  earnings education age gender
1    25000        14  30      0
2    40000        12  30      0
3    25000        13  30      0
4    38000        13  30      0
5    28800        12  30      0
6    31000        16  30      0

Create variables and add to data frame

lnearn = log(earnings)
df$lnearn = lnearn
lneduc = log(education)
df$lneduc = lneduc

9.1.1. Table 9.2#

table92vars = c("earnings", "lnearn", "education", "lneduc")
print(describe(df[table92vars]))
          vars   n     mean       sd   median  trimmed      mad     min       max    range  skew kurtosis      se
earnings     1 171 41412.69 25527.05 36000.00 38052.70 17049.90 1050.00 172000.00 170950.0  1.70     4.23 1952.10
lnearn       2 171    10.46     0.62    10.49    10.47     0.54    6.96     12.06      5.1 -0.90     4.74    0.05
education    3 171    14.43     2.74    14.00    14.45     2.97    3.00     20.00     17.0 -0.45     1.16    0.21
lneduc       4 171     2.65     0.22     2.64     2.66     0.20    1.10      3.00      1.9 -2.42    13.27    0.02

9.1.2. Table 9.3#

Linear model

ols.linear <- lm(earnings ~ education)
library(jtools)
summ(ols.linear, digits=3) 
MODEL INFO:
Observations: 171
Dependent Variable: earnings
Type: OLS linear regression 

MODEL FIT:
F(1,169) = 68.857, p = 0.000
R² = 0.289
Adj. R² = 0.285 

Standard errors:OLS
----------------------------------------------------------
                          Est.       S.E.   t val.       p
----------------- ------------ ---------- -------- -------
(Intercept)         -31055.915   8887.835   -3.494   0.001
education             5021.123    605.101    8.298   0.000
----------------------------------------------------------

Log-linear Model

summ(ols.loglin <- lm(lnearn ~ education), digits=3) 
MODEL INFO:
Observations: 171
Dependent Variable: lnearn
Type: OLS linear regression 

MODEL FIT:
F(1,169) = 84.743, p = 0.000
R² = 0.334
Adj. R² = 0.330 

Standard errors:OLS
--------------------------------------------------
                     Est.    S.E.   t val.       p
----------------- ------- ------- -------- -------
(Intercept)         8.561   0.210   40.825   0.000
education           0.131   0.014    9.206   0.000
--------------------------------------------------

Log-log Model

summ(ols.loglog <- lm(lnearn ~ lneduc), digits=3) 
MODEL INFO:
Observations: 171
Dependent Variable: lnearn
Type: OLS linear regression 

MODEL FIT:
F(1,169) = 67.668, p = 0.000
R² = 0.286
Adj. R² = 0.282 

Standard errors:OLS
--------------------------------------------------
                     Est.    S.E.   t val.       p
----------------- ------- ------- -------- -------
(Intercept)         6.543   0.478   13.700   0.000
lneduc              1.478   0.180    8.226   0.000
--------------------------------------------------

Linear-log Model

summ(ols.linlog <- lm(earnings ~ lneduc), digits=3) 
MODEL INFO:
Observations: 171
Dependent Variable: earnings
Type: OLS linear regression 

MODEL FIT:
F(1,169) = 50.570, p = 0.000
R² = 0.230
Adj. R² = 0.226 

Standard errors:OLS
------------------------------------------------------------
                           Est.        S.E.   t val.       p
----------------- ------------- ----------- -------- -------
(Intercept)         -102767.278   20347.617   -5.051   0.000
lneduc                54452.483    7657.259    7.111   0.000
------------------------------------------------------------

9.1.3. Figure 9.1#

par(mfrow = c(1,2))
# first panel 
plot(education,earnings, xlab="Years of completed schooling", 
    ylab="Annual earnings(in dollars)",pch=19,main="Linear Model")
abline(ols.linear)
legend(5, 150000, c("Actual",  "Fitted"), lty=c(-1,1), pch=c(19,-1), bty="o")

# second panel
plot(education,lnearn, xlab="Years of completed schooling", 
    ylab="Log annual earnings",pch=19,main="Log-linear Model")
abline(ols.loglin)
legend(5, 12, c("Actual",  "Fitted"), lty=c(-1,1), pch=c(19,-1), bty="o")
_images/bafdcc1c161fb5e9cab1283e97aff5441a6d5215350ae6b5e1be12c2f4b5a135.png

9.2. 9.5 FURTHER USES OF THE NATURAL LOGARITHM#

9.2.1. Table 9.5#

rm(list=ls())  
df = read.dta(file = "Dataset/AED_SP500INDEX.DTA")
attach(df)
print(describe(df))
print(head(df))
        vars  n    mean     sd  median trimmed    mad     min     max   range skew kurtosis    se
year       1 93 1973.00  26.99 1973.00 1973.00  34.10 1927.00 2019.00   92.00 0.00    -1.24  2.80
sp500      2 93  473.66 710.75   96.47  325.68 123.34    6.92 3230.78 3223.86 1.77     2.58 73.70
lnsp500    3 93    4.82   1.80    4.57    4.79   2.34    1.93    8.08    6.15 0.16    -1.27  0.19
  year sp500  lnsp500
1 1927 17.66 2.871302
2 1928 24.35 3.192532
3 1929 21.45 3.065725
4 1930 15.34 2.730464
5 1931  8.12 2.094330
6 1932  6.92 1.934416

To predict exponential growth in the graph in levels

summ(ols.logs <- lm(lnsp500 ~ year))
plnsp500 = predict(ols.logs)
MODEL INFO:
Observations: 93
Dependent Variable: lnsp500
Type: OLS linear regression 

MODEL FIT:
F(1,91) = 2070.52, p = 0.00
R² = 0.96
Adj. R² = 0.96 

Standard errors:OLS
--------------------------------------------------
                       Est.   S.E.   t val.      p
----------------- --------- ------ -------- ------
(Intercept)         -124.09   2.83   -43.80   0.00
year                   0.07   0.00    45.50   0.00
--------------------------------------------------

Correct for retransformation bias - see chapter 15.6. First need \(rmse^2\) the square of the root mean squared error. Code here uses knowledge that \(k = 2\)

ResSS = c(crossprod(ols.logs$residuals))
MSE = ResSS / (length(ols.logs$residuals) - 2) 
sqrt(MSE)
psp500 = exp(plnsp500)*exp(MSE/2)
MSE
0.371732757916085
0.138185243307899

9.2.2. Figure 9.2#

par(mfrow = c(1,2))
# first panel
plot(year, sp500, xlab="Year", ylab="S&P 500 Index", type="l",lty=1, 
     main="Exponential trend in levels")
points(year, psp500, type="l",lty=2)
# second panel
plot(year, lnsp500, xlab="Year", ylab="S&P 500 Index", type="l",lty=1, 
     main="Linear trend in natural logarithms")
points(year, plnsp500, type="l",lty=2)
_images/3a7feaee1efec411a56e89f8ab766b3af799c328a44e5ea636b136c5a7ae3ab7.png