Greene. Econometric Analysis, 8ed.

Chapter 2. The Linear Regression Model

Example 2.1

library(AER)
library(stargazer)

Keynes’s Consumption Function (pp.13)

df <- as.data.frame(read.csv("data/TableF2-1.csv", fileEncoding="UTF-8-BOM", header=TRUE))
plot(C ~ X, col="blue", main="FIGURE 2.1 Consumption Data, 1940-1950.", pch = 19,
xlim = c(235, 375), ylim = c(225, 350), data = df)

summary(fm <- lm(C ~ X, data=df))
##
## Call:
## lm(formula = C ~ X, data = df)
##
## Residuals:
##     Min      1Q  Median      3Q     Max
## -35.347 -26.440   9.068  20.000  31.642
##
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)  51.8951    80.8440   0.642   0.5369
## X             0.6848     0.2488   2.753   0.0224 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 27.59 on 9 degrees of freedom
## Multiple R-squared:  0.4571, Adjusted R-squared:  0.3968
## F-statistic: 7.579 on 1 and 9 DF,  p-value: 0.02237
abline(fm, col="red")
summary(fm2 <- lm(C ~ X, data=subset(df, df$W==1))) ## ## Call: ## lm(formula = C ~ X, data = subset(df, df$W == 1))
##
## Residuals:
##       3       4       5       6
## -0.4671 -2.2905 -6.0845  8.8420
##
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept) -78.8388   131.2014  -0.601    0.609
## X             0.9853     0.3929   2.507    0.129
##
## Residual standard error: 7.767 on 2 degrees of freedom
## Multiple R-squared:  0.7587, Adjusted R-squared:  0.638
## F-statistic: 6.287 on 1 and 2 DF,  p-value: 0.129
abline(fm2, col="orange")
summary(fm3 <- lm(C ~ X, data=subset(df, df$W==0))) ## ## Call: ## lm(formula = C ~ X, data = subset(df, df$W == 0))
##
## Residuals:
##         1         2         7         8         9        10        11
##   4.50494 -14.76444  -4.12361  11.11312  -0.09504  10.75802  -7.39299
##
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept) 15.90735   31.64252   0.503 0.636516
## X            0.85306    0.09894   8.622 0.000347 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.48 on 5 degrees of freedom
## Multiple R-squared:  0.937,  Adjusted R-squared:  0.9244
## F-statistic: 74.33 on 1 and 5 DF,  p-value: 0.0003465
abline(fm3, col="orange")
text(C ~ X, data = df, labels=YEAR, pch = 19,  cex = .9, pos = 3, font=3)

Example 2.2.

Earnings and Education

PSID76 <- as.data.frame(read.csv("data/TableF5-1.csv", fileEncoding="UTF-8-BOM", header=TRUE))
df <- subset(PSID76, PSID76$LFP==1) earning <- log(df$WHRS*df$WW) onevar <- lm(earning ~ WE, data = df) twovar <- lm(earning ~ WE + WA, data = df) threevar <- lm(earning ~ WE + WA + I(WA^2), data = df) stargazer(onevar, twovar, threevar, no.space=TRUE, type="text") ## ## ====================================================================================== ## Dependent variable: ## ------------------------------------------------------------------ ## earning ## (1) (2) (3) ## -------------------------------------------------------------------------------------- ## WE 0.064** 0.066*** 0.067*** ## (0.025) (0.025) (0.025) ## WA 0.013* 0.158* ## (0.008) (0.082) ## I(WA2) -0.002* ## (0.001) ## Constant 7.252*** 6.692*** 3.672** ## (0.328) (0.467) (1.768) ## -------------------------------------------------------------------------------------- ## Observations 428 428 428 ## R2 0.014 0.021 0.028 ## Adjusted R2 0.012 0.016 0.021 ## Residual Std. Error 1.203 (df = 426) 1.200 (df = 425) 1.197 (df = 424) ## F Statistic 6.239** (df = 1; 426) 4.550** (df = 2; 425) 4.094*** (df = 3; 424) ## ====================================================================================== ## Note: *p<0.1; **p<0.05; ***p<0.01 Example 2.3 The U.S. Gasoline Market df <- as.data.frame(read.csv("data/TableF2-2.csv", fileEncoding="UTF-8-BOM", header=TRUE)) lnGPP <- log(df$GASEXP/(df$POP * df$GASP))
summary(lm(lnGPP ~ log(INCOME) + log(GASP) + log(PNC) + log(PUC) + 1, data=df))
##
## Call:
## lm(formula = lnGPP ~ log(INCOME) + log(GASP) + log(PNC) + log(PUC) +
##     1, data = df)
##
## Residuals:
##      Min       1Q   Median       3Q      Max
## -0.13831 -0.02835  0.01303  0.03372  0.07431
##
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)
## (Intercept) -21.21109    0.75322 -28.160   <2e-16 ***
## log(INCOME)   1.09587    0.07771  14.102   <2e-16 ***
## log(GASP)    -0.02121    0.04377  -0.485   0.6303
## log(PNC)     -0.37361    0.15707  -2.379   0.0215 *
## log(PUC)      0.02003    0.10330   0.194   0.8471
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.05071 on 47 degrees of freedom
## Multiple R-squared:  0.9584, Adjusted R-squared:  0.9549
## F-statistic:   271 on 4 and 47 DF,  p-value: < 2.2e-16

Example 2.7

Nonzero Conditional Mean of the Disturbances

set.seed(123)
x <- abs(rnorm(1000))
eps <- rnorm(x, mean =0, sd = 1.0)
y <- 25 + 5*x +5*(x^2) + eps
fm<-lm(y~x)
plot(x,y, pch=16, cex = .4, main = "FIGURE 2.2")
abline(fm, col="red")