2 An Introduction to Linear Regression
2.2 2.1.3 Example
Load Libraries
Individual Wages
##
## Call:
## lm(formula = wage ~ male, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.160 -2.102 -0.554 1.487 33.496
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.14692 0.08122 63.37 <2e-16 ***
## male 1.16610 0.11224 10.39 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.217 on 3292 degrees of freedom
## Multiple R-squared: 0.03175, Adjusted R-squared: 0.03145
## F-statistic: 107.9 on 1 and 3292 DF, p-value: < 2.2e-16
2.3 Table 2.1
And 2.3.3 Example: Individual Wages (Continued)
stargazer(OLS1, no.space=TRUE, single.row = TRUE, type="text",
title ="Table 2.1 OLS results wage equation")
##
## Table 2.1 OLS results wage equation
## ===============================================
## Dependent variable:
## ---------------------------
## wage
## -----------------------------------------------
## male 1.166*** (0.112)
## Constant 5.147*** (0.081)
## -----------------------------------------------
## Observations 3,294
## R2 0.032
## Adjusted R2 0.031
## Residual Std. Error 3.217 (df = 3292)
## F Statistic 107.934*** (df = 1; 3292)
## ===============================================
## Note: *p<0.1; **p<0.05; ***p<0.01
2.4 2.5.2 Example
Individual Wages (Continued); Confidence interval
stargazer(OLS1, no.space=TRUE, type="text",
keep.stat=c("n", "rsq", "ser" ) , ci=TRUE, ci.level=0.95, single.row=TRUE)
##
## ===============================================
## Dependent variable:
## ---------------------------
## wage
## -----------------------------------------------
## male 1.166*** (0.946, 1.386)
## Constant 5.147*** (4.988, 5.306)
## -----------------------------------------------
## Observations 3,294
## R2 0.032
## Residual Std. Error 3.217 (df = 3292)
## ===============================================
## Note: *p<0.1; **p<0.05; ***p<0.01
2.5 Table 2.2
##
## Call:
## lm(formula = wage ~ male + school + exper, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.654 -1.967 -0.457 1.444 34.194
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.38002 0.46498 -7.269 4.50e-13 ***
## male 1.34437 0.10768 12.485 < 2e-16 ***
## school 0.63880 0.03280 19.478 < 2e-16 ***
## exper 0.12483 0.02376 5.253 1.59e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.046 on 3290 degrees of freedom
## Multiple R-squared: 0.1326, Adjusted R-squared: 0.1318
## F-statistic: 167.6 on 3 and 3290 DF, p-value: < 2.2e-16
F-test
## Linear hypothesis test
##
## Hypothesis:
## school = 0
## exper = 0
##
## Model 1: restricted model
## Model 2: wage ~ male + school + exper
##
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 3292 34077
## 2 3290 30528 2 3549 191.24 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
2.7 Table 2.3
CAPM regression (without intercept)
df <- read_dta("Data/Capm5.dta")
Food <- lm(foodrf ~ 0 + rmrf, data=df)
Durables <- lm(durblrf~ 0 + rmrf, data=df)
Construction <- lm(cnstrrf~ 0 + rmrf, data=df)
stargazer(Food, Durables, Construction, column.labels = c("Food", "Durables", "Construction"), keep.stat = c("N", "rsq", "ser" ), no.space=TRUE, type="text", title ="Table 2.3 CAPM regressions (without intercept)")
##
## Table 2.3 CAPM regressions (without intercept)
## =============================================================
## Dependent variable:
## ------------------------------
## foodrf durblrf cnstrrf
## Food Durables Construction
## (1) (2) (3)
## -------------------------------------------------------------
## rmrf 0.755*** 1.066*** 1.174***
## (0.025) (0.027) (0.025)
## -------------------------------------------------------------
## Observations 660 660 660
## R2 0.590 0.706 0.774
## Residual Std. Error (df = 659) 2.812 3.072 2.831
## =============================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
2.8 Table 2.4
CAPM regression (with intercept)
df <- read_dta("Data/Capm5.dta")
Food <- lm(foodrf ~ 1 + rmrf, data=df)
Durables <- lm(durblrf~ rmrf + 1, data=df)
Construction <- lm(cnstrrf~ rmrf, data=df)
stargazer(Food, Durables, Construction, column.labels = c("Food", "Durables", "Construction"), keep.stat = c("N", "rsq", "ser" ), no.space=TRUE, type="text", title ="Table 2.4 CAPM regressions (with intercept)")
##
## Table 2.4 CAPM regressions (with intercept)
## =============================================================
## Dependent variable:
## ------------------------------
## foodrf durblrf cnstrrf
## Food Durables Construction
## (1) (2) (3)
## -------------------------------------------------------------
## rmrf 0.747*** 1.069*** 1.174***
## (0.025) (0.027) (0.025)
## Constant 0.320*** -0.120 -0.027
## (0.110) (0.120) (0.111)
## -------------------------------------------------------------
## Observations 660 660 660
## R2 0.585 0.705 0.772
## Residual Std. Error (df = 658) 2.796 3.072 2.833
## =============================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
2.9 Table 2.5
CAPM regressions (with intercept and January dummy)
df <- read_dta("Data/Capm5.dta")
Food <- lm(foodrf ~ 1 + rmrf + jan, data=df)
Durables <- lm(durblrf~ rmrf +jan + 1, data=df)
Construction <- lm(cnstrrf~ rmrf +jan, data=df)
stargazer(Food, Durables, Construction, column.labels = c("Food", "Durables", "Construction"), keep.stat = c("N", "rsq", "ser" ), no.space=TRUE, type="text", covariate.labels=c("excess market return","January dummy"), title ="Table 2.5 CAPM regressions (with intercept and January dummy)")
##
## Table 2.5 CAPM regressions (with intercept and January dummy)
## =============================================================
## Dependent variable:
## ------------------------------
## foodrf durblrf cnstrrf
## Food Durables Construction
## (1) (2) (3)
## -------------------------------------------------------------
## excess market return 0.749*** 1.069*** 1.173***
## (0.024) (0.027) (0.025)
## January dummy -0.971** 0.081 0.605
## (0.393) (0.433) (0.399)
## Constant 0.400*** -0.126 -0.077
## (0.114) (0.126) (0.116)
## -------------------------------------------------------------
## Observations 660 660 660
## R2 0.589 0.705 0.773
## Residual Std. Error (df = 657) 2.786 3.074 2.831
## =============================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
2.10 Table 2.6
CAPM regression (with intercept) Madoff’s returns
##
## Call:
## lm(formula = fslrf ~ rmrf, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.34773 -0.48005 -0.08337 0.38865 2.97276
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.50495 0.04570 11.049 < 2e-16 ***
## rmrf 0.04089 0.01072 3.813 0.00018 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6658 on 213 degrees of freedom
## Multiple R-squared: 0.06388, Adjusted R-squared: 0.05949
## F-statistic: 14.54 on 1 and 213 DF, p-value: 0.0001801
2.11 Table 2.7
Alternative specifications with dummy variables
df <- read_dta("Data/Wages1.dta")
df$female = ifelse(df$male == 0, 1, 0)
OLSm = lm(wage ~ male, data=df)
OLSf = lm(wage ~ female, data=df)
OLS = lm(wage ~ 0 + male + female, data=df)
stargazer(OLSm, OLSf, OLS, type="text", keep.stat = c("rsq"),
column.labels = c("OLSm", "OLSf", "OLS"), title ="Table 2.7 Alternative specifications with dummy variables)")
##
## Table 2.7 Alternative specifications with dummy variables)
## ======================================
## Dependent variable:
## -----------------------------
## wage
## OLSm OLSf OLS
## (1) (2) (3)
## --------------------------------------
## male 1.166*** 6.313***
## (0.112) (0.077)
##
## female -1.166*** 5.147***
## (0.112) (0.081)
##
## Constant 5.147*** 6.313***
## (0.081) (0.077)
##
## --------------------------------------
## R2 0.032 0.032 0.764
## ======================================
## Note: *p<0.1; **p<0.05; ***p<0.01
2.12 Figure 2.3
The impact of estimating with and without an outlying observation
set.seed(123)
x <- runif(n = 50, min = 0, max = 6)
eps <- rnorm(x, mean =0, sd = 1.0)
y<- x + 0.5 + eps
y1 <- ifelse(x < 5.9, y, 0.5)
fm<-lm(y~x)
fm1<-lm(y1~x)
plot(x,y1, pch=16, cex = .4, ylab = "",
main = "Figure 2.3 The impact of estimating with and without an outlying observation", cex.main=0.8)
abline(fm, col="red")
abline(fm1, col="blue")