CHAPTER 09. MODELS WITH NATURAL LOGARITHMS

9. CHAPTER 09. MODELS WITH NATURAL LOGARITHMS#

SET UP

library(foreign) # to open stata.dta files
library(psych) # for better sammary of descriptive statistics
library(repr) # to combine graphs with adjustable plot dimensions
options(repr.plot.width = 12, repr.plot.height = 6) # Plot dimensions (in inches)
options(width = 150) # To increase character width of printed output

9.1. 9.4 EXAMPLE: EARNINGS AND EDUCATION#

df = read.dta(file = "Dataset/AED_EARNINGS.DTA") 
attach(df) 
print(describe(df))
print(head(df))

          vars   n     mean       sd median  trimmed      mad  min    max  range  skew kurtosis      se
earnings     1 171 41412.69 25527.05  36000 38052.70 17049.90 1050 172000 170950  1.70     4.23 1952.10
education    2 171    14.43     2.74     14    14.45     2.97    3     20     17 -0.45     1.16    0.21
age          3 171    30.00     0.00     30    30.00     0.00   30     30      0   NaN      NaN    0.00
gender       4 171     0.00     0.00      0     0.00     0.00    0      0      0   NaN      NaN    0.00

  earnings education age gender
  25000        14  30      0
  40000        12  30      0
  25000        13  30      0
  38000        13  30      0
  28800        12  30      0
  31000        16  30      0

Create variables and add to data frame

lnearn = log(earnings)
df$lnearn = lnearn
lneduc = log(education)
df$lneduc = lneduc

9.1.1. Table 9.2#

table92vars = c("earnings", "lnearn", "education", "lneduc")
print(describe(df[table92vars]))

          vars   n     mean       sd   median  trimmed      mad     min       max    range  skew kurtosis      se
earnings     1 171 41412.69 25527.05 36000.00 38052.70 17049.90 1050.00 172000.00 170950.0  1.70     4.23 1952.10
lnearn       2 171    10.46     0.62    10.49    10.47     0.54    6.96     12.06      5.1 -0.90     4.74    0.05
education    3 171    14.43     2.74    14.00    14.45     2.97    3.00     20.00     17.0 -0.45     1.16    0.21
lneduc       4 171     2.65     0.22     2.64     2.66     0.20    1.10      3.00      1.9 -2.42    13.27    0.02

9.1.2. Table 9.3#

Linear model

ols.linear <- lm(earnings ~ education)
library(jtools)
summ(ols.linear, digits=3) 

MODEL INFO:
Observations: 171
Dependent Variable: earnings
Type: OLS linear regression 

MODEL FIT:
F(1,169) = 68.857, p = 0.000
R² = 0.289
Adj. R² = 0.285 

Standard errors:OLS
----------------------------------------------------------
                          Est.       S.E.   t val.       p
----------------- ------------ ---------- -------- -------
(Intercept)         -31055.915   8887.835   -3.494   0.001
education             5021.123    605.101    8.298   0.000
----------------------------------------------------------

Log-linear Model

summ(ols.loglin <- lm(lnearn ~ education), digits=3) 

MODEL INFO:
Observations: 171
Dependent Variable: lnearn
Type: OLS linear regression 

MODEL FIT:
F(1,169) = 84.743, p = 0.000
R² = 0.334
Adj. R² = 0.330 

Standard errors:OLS
--------------------------------------------------
                     Est.    S.E.   t val.       p
----------------- ------- ------- -------- -------
(Intercept)         8.561   0.210   40.825   0.000
education           0.131   0.014    9.206   0.000
--------------------------------------------------

Log-log Model

summ(ols.loglog <- lm(lnearn ~ lneduc), digits=3) 

MODEL INFO:
Observations: 171
Dependent Variable: lnearn
Type: OLS linear regression 

MODEL FIT:
F(1,169) = 67.668, p = 0.000
R² = 0.286
Adj. R² = 0.282 

Standard errors:OLS
--------------------------------------------------
                     Est.    S.E.   t val.       p
----------------- ------- ------- -------- -------
(Intercept)         6.543   0.478   13.700   0.000
lneduc              1.478   0.180    8.226   0.000
--------------------------------------------------

Linear-log Model

summ(ols.linlog <- lm(earnings ~ lneduc), digits=3) 

MODEL INFO:
Observations: 171
Dependent Variable: earnings
Type: OLS linear regression 

MODEL FIT:
F(1,169) = 50.570, p = 0.000
R² = 0.230
Adj. R² = 0.226 

Standard errors:OLS
------------------------------------------------------------
                           Est.        S.E.   t val.       p
----------------- ------------- ----------- -------- -------
(Intercept)         -102767.278   20347.617   -5.051   0.000
lneduc                54452.483    7657.259    7.111   0.000
------------------------------------------------------------

9.1.3. Figure 9.1#

par(mfrow = c(1,2))
# first panel 
plot(education,earnings, xlab="Years of completed schooling", 
    ylab="Annual earnings(in dollars)",pch=19,main="Linear Model")
abline(ols.linear)
legend(5, 150000, c("Actual",  "Fitted"), lty=c(-1,1), pch=c(19,-1), bty="o")

# second panel
plot(education,lnearn, xlab="Years of completed schooling", 
    ylab="Log annual earnings",pch=19,main="Log-linear Model")
abline(ols.loglin)
legend(5, 12, c("Actual",  "Fitted"), lty=c(-1,1), pch=c(19,-1), bty="o")

_images/bafdcc1c161fb5e9cab1283e97aff5441a6d5215350ae6b5e1be12c2f4b5a135.png

9.2. 9.5 FURTHER USES OF THE NATURAL LOGARITHM#

9.2.1. Table 9.5#

rm(list=ls())  
df = read.dta(file = "Dataset/AED_SP500INDEX.DTA")
attach(df)
print(describe(df))
print(head(df))

        vars  n    mean     sd  median trimmed    mad     min     max   range skew kurtosis    se
year       1 93 1973.00  26.99 1973.00 1973.00  34.10 1927.00 2019.00   92.00 0.00    -1.24  2.80
sp500      2 93  473.66 710.75   96.47  325.68 123.34    6.92 3230.78 3223.86 1.77     2.58 73.70
lnsp500    3 93    4.82   1.80    4.57    4.79   2.34    1.93    8.08    6.15 0.16    -1.27  0.19

  year sp500  lnsp500
1927 17.66 2.871302
1928 24.35 3.192532
1929 21.45 3.065725
1930 15.34 2.730464
1931  8.12 2.094330
1932  6.92 1.934416

To predict exponential growth in the graph in levels

summ(ols.logs <- lm(lnsp500 ~ year))
plnsp500 = predict(ols.logs)

MODEL INFO:
Observations: 93
Dependent Variable: lnsp500
Type: OLS linear regression 

MODEL FIT:
F(1,91) = 2070.52, p = 0.00
R² = 0.96
Adj. R² = 0.96 

Standard errors:OLS
--------------------------------------------------
                       Est.   S.E.   t val.      p
----------------- --------- ------ -------- ------
(Intercept)         -124.09   2.83   -43.80   0.00
year                   0.07   0.00    45.50   0.00
--------------------------------------------------

Correct for retransformation bias - see chapter 15.6. First need \(rmse^2\) the square of the root mean squared error. Code here uses knowledge that \(k = 2\)

ResSS = c(crossprod(ols.logs$residuals))
MSE = ResSS / (length(ols.logs$residuals) - 2) 
sqrt(MSE)
psp500 = exp(plnsp500)*exp(MSE/2)
MSE

0.371732757916085

0.138185243307899

9.2.2. Figure 9.2#

par(mfrow = c(1,2))
# first panel
plot(year, sp500, xlab="Year", ylab="S&P 500 Index", type="l",lty=1, 
     main="Exponential trend in levels")
points(year, psp500, type="l",lty=2)
# second panel
plot(year, lnsp500, xlab="Year", ylab="S&P 500 Index", type="l",lty=1, 
     main="Linear trend in natural logarithms")
points(year, plnsp500, type="l",lty=2)

_images/3a7feaee1efec411a56e89f8ab766b3af799c328a44e5ea636b136c5a7ae3ab7.png