7. CHAPTER 7. STATISTICAL INFERENCE FOR BIVARIATE REGRESSION#

SET UP

library(foreign) # to open stata.dta files
library(psych) # for better sammary of descriptive statistics
library(repr) # to combine graphs with adjustable plot dimensions
options(repr.plot.width = 12, repr.plot.height = 6) # Plot dimensions (in inches)
options(width = 150) # To increase character width of printed output

7.1. 7.1 EXAMPLE: HOUSE PRICE AND SIZE#

rm(list=ls())  
df = read.dta(file = "Dataset/AED_HOUSE.DTA")
attach(df)
print(df_desc<-describe(df))
          vars  n      mean       sd median   trimmed      mad    min    max  range  skew kurtosis      se
price        1 29 253910.34 37390.71 244000 249296.00 22239.00 204000 375000 171000  1.48     2.23 6943.28
size         2 29   1882.76   398.27   1800   1836.00   296.52   1400   3300   1900  1.64     3.29   73.96
bedrooms     3 29      3.79     0.68      4      3.72     0.00      3      6      3  0.92     1.89    0.13
bathrooms    4 29      2.21     0.34      2      2.16     0.00      2      3      1  1.28     0.23    0.06
lotsize      5 29      2.14     0.69      2      2.16     0.00      1      3      2 -0.17    -1.00    0.13
age          6 29     36.41     7.12     35     36.20     5.93     23     51     28  0.44    -0.75    1.32
monthsold    7 29      5.97     1.68      6      6.04     1.48      3      8      5 -0.47    -1.03    0.31
list         8 29 257824.14 40860.26 245000 252364.00 23721.60 199900 386000 186100  1.65     2.70 7587.56
print(head(df))
   price size bedrooms bathrooms lotsize age monthsold   list
1 204000 1400        3       2.0       1  31         7 199900
2 212000 1600        3       3.0       2  33         5 212000
3 213000 1800        3       2.0       2  51         4 219900
4 220000 1600        3       2.0       1  49         4 229000
5 224500 2100        4       2.5       2  47         6 224500
6 229000 1700        4       2.5       2  35         3 229500

7.1.1. Table 7.1#

ols <- lm(price ~ size)
summary(ols)
Call:
lm(formula = price ~ size)

Residuals:
   Min     1Q Median     3Q    Max 
-45436 -16936   1949  17818  47932 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) 115017.28   21489.36   5.352 1.18e-05 ***
size            73.77      11.17   6.601 4.41e-07 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 23550 on 27 degrees of freedom
Multiple R-squared:  0.6175,	Adjusted R-squared:  0.6033 
F-statistic: 43.58 on 1 and 27 DF,  p-value: 4.409e-07

For confidence interval use confint funcion in MASS package

library(MASS) #install.packages("MASS") 
confint(ols)
A matrix: 2 × 2 of type dbl
2.5 % 97.5 %
(Intercept) 70924.75827 159109.80695
size 50.84202 96.70006
# install.packages("jtools")   # For nicer regression output 
library(jtools) 
summ(ols, digits=3) 
# With confidence interval
summ(ols, digits=3, confint = TRUE) 
MODEL INFO:
Observations: 29
Dependent Variable: price
Type: OLS linear regression 

MODEL FIT:
F(1,27) = 43.580, p = 0.000
R² = 0.617
Adj. R² = 0.603 

Standard errors:OLS
-----------------------------------------------------------
                          Est.        S.E.   t val.       p
----------------- ------------ ----------- -------- -------
(Intercept)         115017.283   21489.360    5.352   0.000
size                    73.771      11.175    6.601   0.000
-----------------------------------------------------------
MODEL INFO:
Observations: 29
Dependent Variable: price
Type: OLS linear regression 

MODEL FIT:
F(1,27) = 43.580, p = 0.000
R² = 0.617
Adj. R² = 0.603 

Standard errors:OLS
------------------------------------------------------------------------
                          Est.        2.5%        97.5%   t val.       p
----------------- ------------ ----------- ------------ -------- -------
(Intercept)         115017.283   70924.758   159109.807    5.352   0.000
size                    73.771      50.842       96.700    6.601   0.000
------------------------------------------------------------------------

7.2. 7.3 CONFIDENCE INTERVALS#

coef=summary(ols)$coefficients["size",1]
stderror=summary(ols)$coefficients["size",2]
N = length(price)
cbind(coef,stderror,N)
coef + c(-1,1)*stderror*qt(0.975, N-2)
A matrix: 1 × 3 of type dbl
coef stderror N
73.77104 11.17491 29
  1. 50.8420168604068
  2. 96.7000638849348

Example with artifical data

x = c(1, 2, 3, 4, 5)
y = c(1, 2, 2, 2, 3)
olsxy = lm(y ~ x)
summ(olsxy)
coef=summary(olsxy)$coefficients["x",1]
sterror=summary(olsxy)$coefficients["x",2]
N = length("x")
cbind(coef,sterror,N)
coef + c(-1,1)*stderror*qt(0.975, N-2)
MODEL INFO:
Observations: 5
Dependent Variable: y
Type: OLS linear regression 

MODEL FIT:
F(1,3) = 12.00, p = 0.04
R² = 0.80
Adj. R² = 0.73 

Standard errors:OLS
-----------------------------------------------
                    Est.   S.E.   t val.      p
----------------- ------ ------ -------- ------
(Intercept)         0.80   0.38     2.09   0.13
x                   0.40   0.12     3.46   0.04
-----------------------------------------------
A matrix: 1 × 3 of type dbl
coef sterror N
0.4 0.1154701 1
Warning message in qt(0.975, N - 2):
"NaNs produced"
  1. NaN
  2. NaN

7.3. 7.4 TESTS OF STATISTICAL SIGNIFICNACE#

rm(list=ls()) 
df = read.dta(file = "Dataset/AED_HOUSE.DTA")
attach(df)
print(describe(df))
print(head(df))
The following objects are masked from df (pos = 5):

    age, bathrooms, bedrooms, list, lotsize, monthsold, price, size
          vars  n      mean       sd median   trimmed      mad    min    max  range  skew kurtosis      se
price        1 29 253910.34 37390.71 244000 249296.00 22239.00 204000 375000 171000  1.48     2.23 6943.28
size         2 29   1882.76   398.27   1800   1836.00   296.52   1400   3300   1900  1.64     3.29   73.96
bedrooms     3 29      3.79     0.68      4      3.72     0.00      3      6      3  0.92     1.89    0.13
bathrooms    4 29      2.21     0.34      2      2.16     0.00      2      3      1  1.28     0.23    0.06
lotsize      5 29      2.14     0.69      2      2.16     0.00      1      3      2 -0.17    -1.00    0.13
age          6 29     36.41     7.12     35     36.20     5.93     23     51     28  0.44    -0.75    1.32
monthsold    7 29      5.97     1.68      6      6.04     1.48      3      8      5 -0.47    -1.03    0.31
list         8 29 257824.14 40860.26 245000 252364.00 23721.60 199900 386000 186100  1.65     2.70 7587.56
   price size bedrooms bathrooms lotsize age monthsold   list
1 204000 1400        3       2.0       1  31         7 199900
2 212000 1600        3       3.0       2  33         5 212000
3 213000 1800        3       2.0       2  51         4 219900
4 220000 1600        3       2.0       1  49         4 229000
5 224500 2100        4       2.5       2  47         6 224500
6 229000 1700        4       2.5       2  35         3 229500
ols <- lm(price ~ size)
summary(ols)
Call:
lm(formula = price ~ size)

Residuals:
   Min     1Q Median     3Q    Max 
-45436 -16936   1949  17818  47932 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) 115017.28   21489.36   5.352 1.18e-05 ***
size            73.77      11.17   6.601 4.41e-07 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 23550 on 27 degrees of freedom
Multiple R-squared:  0.6175,	Adjusted R-squared:  0.6033 
F-statistic: 43.58 on 1 and 27 DF,  p-value: 4.409e-07

7.4. 7.5 TWO-SIDED HYPOTHESIS TESTS#

summ(ols <- lm(price ~ size), digits=3)
MODEL INFO:
Observations: 29
Dependent Variable: price
Type: OLS linear regression 

MODEL FIT:
F(1,27) = 43.580, p = 0.000
R² = 0.617
Adj. R² = 0.603 

Standard errors:OLS
-----------------------------------------------------------
                          Est.        S.E.   t val.       p
----------------- ------------ ----------- -------- -------
(Intercept)         115017.283   21489.360    5.352   0.000
size                    73.771      11.175    6.601   0.000
-----------------------------------------------------------
#robvar <- vcovHC(ols, type = "HC1")
suppressPackageStartupMessages(library(lmtest)) # Use coeftest and coefci for individual tests and CI's in this package 
coeftest(ols)
coefci(ols)
t test of coefficients:

              Estimate Std. Error t value  Pr(>|t|)    
(Intercept) 115017.283  21489.360  5.3523 1.184e-05 ***
size            73.771     11.175  6.6015 4.409e-07 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
A matrix: 2 × 2 of type dbl
2.5 % 97.5 %
(Intercept) 70924.75827 159109.80695
size 50.84202 96.70006

Two-sided Test that slope coefficient of size = 90 manually

coef=summary(ols)$coefficients["size",1]
sterror=summary(ols)$coefficients["size",2]
cbind(coef,sterror)
N = length(size)
t = (coef - 90)/sterror
p = 2*(1-pt(abs(t),N-2))
tcrit = qt(.975,N-2)
print("t statistic, p-value, critical value")
cbind(t, p, tcrit)
A matrix: 1 × 2 of type dbl
coef sterror
73.77104 11.17491
[1] "t statistic, p-value, critical value"
A matrix: 1 × 3 of type dbl
t p tcrit
-1.452267 0.1579501 2.051831

Two-sided Test that slope = 90 using linearHypothesis in car

suppressPackageStartupMessages(library(car))
print(linearHypothesis(ols,c("size=90")))
Linear hypothesis test:
size = 90

Model 1: restricted model
Model 2: price ~ size

  Res.Df        RSS Df  Sum of Sq      F Pr(>F)
1     28 1.6145e+10                            
2     27 1.4975e+10  1 1169766621 2.1091  0.158

7.5. 7.6 ONE-SIDED HYPOTHESIS TESTS#

Use previous test

Halve the two-sided p-value provided

t>0 for an upper one-tailed alternative

t<0 for a lower one-tailed alternative

7.6. 7.7 ROBUST STANDARD ERRORS#

# Heteroskedastic robust
summ(ols <- lm(price ~ size), digit=3)
MODEL INFO:
Observations: 29
Dependent Variable: price
Type: OLS linear regression 

MODEL FIT:
F(1,27) = 43.580, p = 0.000
R² = 0.617
Adj. R² = 0.603 

Standard errors:OLS
-----------------------------------------------------------
                          Est.        S.E.   t val.       p
----------------- ------------ ----------- -------- -------
(Intercept)         115017.283   21489.360    5.352   0.000
size                    73.771      11.175    6.601   0.000
-----------------------------------------------------------
library(sandwich)
robvar <- vcovHC(ols, type = "HC1")
coefci(ols, vcov = robvar)
coeftest(ols, vcov = robvar)
A matrix: 2 × 2 of type dbl
2.5 % 97.5 %
(Intercept) 73367.78128 156666.7839
size 50.52448 97.0176
t test of coefficients:

              Estimate Std. Error t value  Pr(>|t|)    
(Intercept) 115017.283  20298.704  5.6662 5.120e-06 ***
size            73.771     11.330  6.5113 5.565e-07 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1