14. CHAPTER 14. REGRESSION WITH INDICATOR VARIABLES#
SET UP
library(foreign) # to open stata.dta files
library(psych) # for better sammary of descriptive statistics
library(repr) # to combine graphs with adjustable plot dimensions
options(repr.plot.width = 12, repr.plot.height = 6) # Plot dimensions (in inches)
options(width = 150) # To increase character width of printed output
14.1. 14.1 EXAMPLE: EARNINGS, GENDER, EDUCATION and TYPE OF WORKER#
Load Libraries
Show code cell source
pkgs <- c("car", "lmtest", "sandwich","stargazer","dplyr")
invisible(lapply(pkgs, function(pkg) suppressPackageStartupMessages(library(pkg, character.only = TRUE))))
14.1.1. Table 14.1#
df <- read.dta("Dataset/AED_EARNINGS_COMPLETE.DTA")
print(describe(df[, c("earnings", "gender", "education", "genderbyeduc", "age",
"genderbyage", "hours", "genderbyhours", "dself", "dprivate", "dgovt")]))
vars n mean sd median trimmed mad min max range skew kurtosis se
earnings 1 872 56368.69 51516.05 44200 47580.66 24907.68 4000 504000 500000 4.17 25.48 1744.55
gender 2 872 0.43 0.50 0 0.42 0.00 0 1 1 0.27 -1.93 0.02
education 3 872 13.85 2.88 13 13.87 1.48 0 20 20 -1.04 4.67 0.10
genderbyeduc 4 872 6.08 7.17 0 5.45 0.00 0 20 20 0.42 -1.65 0.24
age 5 872 43.31 10.68 44 43.22 13.34 25 65 40 0.04 -1.03 0.36
genderbyage 6 872 19.04 22.87 0 16.59 0.00 0 65 65 0.53 -1.43 0.77
hours 7 872 44.34 8.50 40 42.66 0.00 35 99 64 2.32 6.65 0.29
genderbyhours 8 872 18.56 21.76 0 16.56 0.00 0 80 80 0.45 -1.42 0.74
dself 9 872 0.09 0.29 0 0.00 0.00 0 1 1 2.85 6.12 0.01
dprivate 10 872 0.76 0.43 1 0.83 0.00 0 1 1 -1.22 -0.52 0.01
dgovt 11 872 0.15 0.36 0 0.06 0.00 0 1 1 1.97 1.87 0.01
14.2. 14.2 REGRESSION ON JUST A SINGLE INDICATOR VARIABLE#
14.2.1. Table 14.2 Summary statistics by gender#
df %>%
group_by(gender) %>% #female=1
summarise(across(earnings, list(n = ~sum(!is.na(.)), mean = mean, sd = sd, min = min, max = max)))
gender | earnings_n | earnings_mean | earnings_sd | earnings_min | earnings_max |
---|---|---|---|---|---|
<int> | <int> | <dbl> | <dbl> | <dbl> | <dbl> |
0 | 494 | 63476.32 | 61713.21 | 5000 | 504000 |
1 | 378 | 47079.89 | 31596.72 | 4000 | 322000 |
OLS for difference in means - heteroskedastic-robust standard errors
lm1 <- lm(earnings ~ gender, data = df)
print(coeftest(lm1, vcov = vcovHC(lm1, type = "HC1")))
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 63476.3 2777.0 22.8580 < 2.2e-16 ***
gender -16396.4 3217.4 -5.0961 4.254e-07 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
t.test(earnings ~ gender, data = df, var.equal = FALSE)
lm(earnings ~ gender, data = df)
t.test(earnings ~ gender, data = df, var.equal = TRUE)
Welch Two Sample t-test
data: earnings by gender
t = 5.0964, df = 770.41, p-value = 4.359e-07
alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
95 percent confidence interval:
10080.81 22712.04
sample estimates:
mean in group 0 mean in group 1
63476.32 47079.89
Call:
lm(formula = earnings ~ gender, data = df)
Coefficients:
(Intercept) gender
63476 -16396
Two Sample t-test
data: earnings by gender
t = 4.7139, df = 870, p-value = 2.828e-06
alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
95 percent confidence interval:
9569.628 23223.220
sample estimates:
mean in group 0 mean in group 1
63476.32 47079.89
14.3. 14.3 REGRESSION ON A SINGLE INDICATOR VARIABLE AND ADDITIONAL REGRESSORS#
mod1 <- lm(earnings ~ gender, data = df)
coeftest(mod1, vcov = vcovHC(mod1, type = "HC1"))
print(linearHypothesis(mod1, "gender"))
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 63476.3 2777.0 22.8580 < 2.2e-16 ***
gender -16396.4 3217.4 -5.0961 4.254e-07 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Linear hypothesis test:
gender = 0
Model 1: restricted model
Model 2: earnings ~ gender
Res.Df RSS Df Sum of Sq F Pr(>F)
1 871 2.3116e+12
2 870 2.2540e+12 1 5.7571e+10 22.221 2.828e-06 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod2 <- lm(earnings ~ gender + education, data = df)
coeftest(mod2, vcov = vcovHC(mod2, type = "HC1"))
print(linearHypothesis(mod2, "gender"))
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -17551.62 7993.55 -2.1957 0.02838 *
gender -18258.09 3136.14 -5.8218 8.186e-09 ***
education 5907.29 654.08 9.0315 < 2.2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Linear hypothesis test:
gender = 0
Model 1: restricted model
Model 2: earnings ~ gender + education
Res.Df RSS Df Sum of Sq F Pr(>F)
1 870 2.0731e+12
2 869 2.0019e+12 1 7.1176e+10 30.897 3.62e-08 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod3 <- lm(earnings ~ gender + education + genderbyeduc, data = df)
coeftest(mod3, vcov = vcovHC(mod3, type = "HC1"))
print(linearHypothesis(mod3, c("gender", "genderbyeduc")))
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -31451.32 11846.01 -2.6550 0.008076 **
gender 20218.80 15355.18 1.3167 0.188273
education 6920.64 946.14 7.3146 5.871e-13 ***
genderbyeduc -2764.89 1168.32 -2.3665 0.018174 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Linear hypothesis test:
gender = 0
genderbyeduc = 0
Model 1: restricted model
Model 2: earnings ~ gender + education + genderbyeduc
Res.Df RSS Df Sum of Sq F Pr(>F)
1 870 2.0731e+12
2 868 1.9891e+12 2 8.3998e+10 18.328 1.599e-08 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod4 <- lm(earnings ~ gender + education + genderbyeduc + age + hours, data = df)
coeftest(mod4, vcov = vcovHC(mod4, type = "HC1"))
print(linearHypothesis(mod4, c("gender", "genderbyeduc")))
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -107343.62 22224.38 -4.8300 1.614e-06 ***
gender 19021.71 14994.36 1.2686 0.2049278
education 6416.80 886.12 7.2414 9.806e-13 ***
genderbyeduc -2456.29 1123.17 -2.1869 0.0290153 *
age 525.99 145.01 3.6274 0.0003031 ***
hours 1324.51 352.55 3.7570 0.0001835 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Linear hypothesis test:
gender = 0
genderbyeduc = 0
Model 1: restricted model
Model 2: earnings ~ gender + education + genderbyeduc + age + hours
Res.Df RSS Df Sum of Sq F Pr(>F)
1 868 1.9121e+12
2 866 1.8542e+12 2 5.7938e+10 13.53 1.637e-06 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod5 <- lm(earnings ~ gender + genderbyeduc + education + age + hours + genderbyage + genderbyhours, data = df)
coeftest(mod5, vcov = vcovHC(mod5, type = "HC1"))
print(linearHypothesis(mod5, c("gender", "genderbyeduc", "genderbyage", "genderbyhours")))
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -120431.568 28847.148 -4.1748 3.284e-05 ***
gender 57128.997 31917.145 1.7899 0.073818 .
genderbyeduc -2123.511 1096.425 -1.9368 0.053100 .
education 6314.673 878.615 7.1871 1.432e-12 ***
age 549.475 227.683 2.4133 0.016014 *
hours 1620.814 503.869 3.2167 0.001345 **
genderbyage -49.333 270.258 -0.1825 0.855203
genderbyhours -929.575 554.247 -1.6772 0.093868 .
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Linear hypothesis test:
gender = 0
genderbyeduc = 0
genderbyage = 0
genderbyhours = 0
Model 1: restricted model
Model 2: earnings ~ gender + genderbyeduc + education + age + hours +
genderbyage + genderbyhours
Res.Df RSS Df Sum of Sq F Pr(>F)
1 868 1.9121e+12
2 864 1.8428e+12 4 6.937e+10 8.1312 1.946e-06 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
stargazer(mod1, mod2, mod3, mod4, mod5, type = "text",
se = list(sqrt(diag(vcovHC(mod1, type = "HC1"))),
sqrt(diag(vcovHC(mod2, type = "HC1"))),
sqrt(diag(vcovHC(mod3, type = "HC1"))),
sqrt(diag(vcovHC(mod4, type = "HC1"))),
sqrt(diag(vcovHC(mod5, type = "HC1")))),
keep.stat = c("n", "rsq", "adj.rsq", "f"),
report = "vcst", digits = 0)
=================================================================================================================
Dependent variable:
---------------------------------------------------------------------------------------------------
earnings
(1) (2) (3) (4) (5)
-----------------------------------------------------------------------------------------------------------------
gender -16,396 -18,258 20,219 19,022 57,129
(3,217) (3,136) (15,355) (14,994) (31,917)
t = -5 t = -6 t = 1 t = 1 t = 2
education 5,907 6,921 6,417 6,315
(654) (946) (886) (879)
t = 9 t = 7 t = 7 t = 7
genderbyeduc -2,765 -2,456 -2,124
(1,168) (1,123) (1,096)
t = -2 t = -2 t = -2
age 526 549
(145) (228)
t = 4 t = 2
hours 1,325 1,621
(353) (504)
t = 4 t = 3
genderbyage -49
(270)
t = -0
genderbyhours -930
(554)
t = -2
Constant 63,476 -17,552 -31,451 -107,344 -120,432
(2,777) (7,994) (11,846) (22,224) (28,847)
t = 23 t = -2 t = -3 t = -5 t = -4
-----------------------------------------------------------------------------------------------------------------
Observations 872 872 872 872 872
R2 0 0 0 0 0
Adjusted R2 0 0 0 0 0
F Statistic 22*** (df = 1; 870) 67*** (df = 2; 869) 47*** (df = 3; 868) 43*** (df = 5; 866) 31*** (df = 7; 864)
=================================================================================================================
Note: *p<0.1; **p<0.05; ***p<0.01
lm_joint <- lm(earnings ~ gender + genderbyeduc + education + age + hours + genderbyage + genderbyhours, data = df)
lm_female <- lm(earnings ~ gender + education + age + hours, data = df[df$gender == 1, ])
lm_male <- lm(earnings ~ gender + education + age + hours, data = df[df$gender == 0, ])
stargazer(lm_female, lm_male, lm_joint, type = "text",
se = list(sqrt(diag(vcovHC(mod1, type = "HC1"))),
sqrt(diag(vcovHC(mod2, type = "HC1"))),
sqrt(diag(vcovHC(mod3, type = "HC1"))),
sqrt(diag(vcovHC(mod4, type = "HC1"))),
sqrt(diag(vcovHC(mod5, type = "HC1")))),
keep.stat = c("n", "rsq", "adj.rsq", "f"),
report = "vcst", digits = 0)
=========================================================================
Dependent variable:
-----------------------------------------------------------
earnings
(1) (2) (3)
-------------------------------------------------------------------------
gender 57,129
(3,217) (3,136) (15,355)
t = 4
genderbyeduc -2,124
(1,168)
t = -2
education 4,191 6,315 6,315
(654) (946)
t = 10 t = 7
age 500 549 549
hours 691 1,621 1,621
genderbyage -49
genderbyhours -930
Constant -63,303 -120,432 -120,432
(2,777) (7,994) (11,846)
t = -23 t = -15 t = -10
-------------------------------------------------------------------------
Observations 378 494 872
R2 0 0 0
Adjusted R2 0 0 0
F Statistic 26*** (df = 3; 374) 37*** (df = 3; 490) 31*** (df = 7; 864)
=========================================================================
Note: *p<0.1; **p<0.05; ***p<0.01
14.4. 14.4 REGRESSION ON SETS OF INDICATOR VARIABLES#
mod6 <- lm(earnings ~ dself + dprivate + dgovt - 1, data = df)
coeftest(mod6, vcov = vcovHC(mod6, type = "HC1"))
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
dself 72306.3 9636.9 7.5031 1.541e-13 ***
dprivate 54521.3 1897.5 28.7331 < 2.2e-16 ***
dgovt 56105.4 2824.6 19.8629 < 2.2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mean(df$earnings[df$dself == 1], na.rm = TRUE)
mean(df$earnings[df$dprivate == 1], na.rm = TRUE)
mean(df$earnings[df$dgovt == 1], na.rm = TRUE)
72306.329113924
54521.2684766214
56105.3846153846
mod7 <- lm(earnings ~ age + education, data = df)
coeftest(mod7, vcov = vcovHC(mod7, type = "HC1"))
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -46875.36 11306.33 -4.1459 3.716e-05 ***
age 525.00 151.39 3.4679 0.0005503 ***
education 5811.37 641.53 9.0586 < 2.2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod8 <- lm(earnings ~ age + education + dprivate + dgovt, data = df)
coeftest(mod8, vcov = vcovHC(mod8, type = "HC1"))
print(linearHypothesis(mod8, c("dprivate", "dgovt")))
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -30150.74 13139.58 -2.2946 0.021991 *
age 487.60 149.65 3.2583 0.001164 **
education 5865.19 652.22 8.9927 < 2.2e-16 ***
dprivate -17097.85 9341.98 -1.8302 0.067561 .
dgovt -19122.92 9585.61 -1.9950 0.046360 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Linear hypothesis test:
dprivate = 0
dgovt = 0
Model 1: restricted model
Model 2: earnings ~ age + education + dprivate + dgovt
Res.Df RSS Df Sum of Sq F Pr(>F)
1 869 2.0457e+12
2 867 2.0236e+12 2 2.2108e+10 4.7358 0.009003 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod9 <- lm(earnings ~ age + education + dself + dgovt, data = df)
coeftest(mod9, vcov = vcovHC(mod9, type = "HC1"))
print(linearHypothesis(mod9, c("dself", "dgovt")))
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -47248.58 11377.07 -4.1530 3.606e-05 ***
age 487.60 149.65 3.2583 0.001164 **
education 5865.19 652.22 8.9927 < 2.2e-16 ***
dself 17097.85 9341.98 1.8302 0.067561 .
dgovt -2025.08 3098.98 -0.6535 0.513630
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Linear hypothesis test:
dself = 0
dgovt = 0
Model 1: restricted model
Model 2: earnings ~ age + education + dself + dgovt
Res.Df RSS Df Sum of Sq F Pr(>F)
1 869 2.0457e+12
2 867 2.0236e+12 2 2.2108e+10 4.7358 0.009003 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod10 <- lm(earnings ~ age + education + dself + dprivate, data = df)
coeftest(mod10, vcov = vcovHC(mod10, type = "HC1"))
print(linearHypothesis(mod10, c("dself", "dprivate")))
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -49273.66 12312.08 -4.0021 6.816e-05 ***
age 487.60 149.65 3.2583 0.001164 **
education 5865.19 652.22 8.9927 < 2.2e-16 ***
dself 19122.92 9585.61 1.9950 0.046360 *
dprivate 2025.08 3098.98 0.6535 0.513630
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Linear hypothesis test:
dself = 0
dprivate = 0
Model 1: restricted model
Model 2: earnings ~ age + education + dself + dprivate
Res.Df RSS Df Sum of Sq F Pr(>F)
1 869 2.0457e+12
2 867 2.0236e+12 2 2.2108e+10 4.7358 0.009003 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod11 <- lm(earnings ~ age + education + dself + dprivate + dgovt - 1, data = df)
coeftest(mod11, vcov = vcovHC(mod11, type = "HC1"))
print(linearHypothesis(mod11, c("dself = dprivate", "dself = dgovt")))
print(linearHypothesis(mod11, c("dself = dprivate", "dprivate = dgovt")))
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
age 487.60 149.65 3.2583 0.001164 **
education 5865.19 652.22 8.9927 < 2.2e-16 ***
dself -30150.74 13139.58 -2.2946 0.021991 *
dprivate -47248.58 11377.07 -4.1530 3.606e-05 ***
dgovt -49273.66 12312.08 -4.0021 6.816e-05 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Linear hypothesis test:
dself - dprivate = 0
dself - dgovt = 0
Model 1: restricted model
Model 2: earnings ~ age + education + dself + dprivate + dgovt - 1
Res.Df RSS Df Sum of Sq F Pr(>F)
1 869 2.0457e+12
2 867 2.0236e+12 2 2.2108e+10 4.7358 0.009003 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Linear hypothesis test:
dself - dprivate = 0
dprivate - dgovt = 0
Model 1: restricted model
Model 2: earnings ~ age + education + dself + dprivate + dgovt - 1
Res.Df RSS Df Sum of Sq F Pr(>F)
1 869 2.0457e+12
2 867 2.0236e+12 2 2.2108e+10 4.7358 0.009003 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
stargazer(mod7, mod8, mod9, mod10, mod11, type = "text",
keep = c("age", "education", "dself", "dprivate", "dgovt"),
se = list(sqrt(diag(vcovHC(mod7, type = "HC1"))),
sqrt(diag(vcovHC(mod8, type = "HC1"))),
sqrt(diag(vcovHC(mod9, type = "HC1"))),
sqrt(diag(vcovHC(mod10, type = "HC1"))),
sqrt(diag(vcovHC(mod11, type = "HC1")))),
keep.stat = c("n", "rsq", "adj.rsq", "f"),
report = "vcst", digits = 0)
=================================================================================================================
Dependent variable:
----------------------------------------------------------------------------------------------------
earnings
(1) (2) (3) (4) (5)
-----------------------------------------------------------------------------------------------------------------
age 525 488 488 488 488
(151) (150) (150) (150) (150)
t = 3 t = 3 t = 3 t = 3 t = 3
education 5,811 5,865 5,865 5,865 5,865
(642) (652) (652) (652) (652)
t = 9 t = 9 t = 9 t = 9 t = 9
dprivate -17,098 2,025 -47,249
(9,342) (3,099) (11,377)
t = -2 t = 1 t = -4
dself 17,098 19,123 -30,151
(9,342) (9,586) (13,140)
t = 2 t = 2 t = -2
dgovt -19,123 -2,025 -49,274
(9,586) (3,099) (12,312)
t = -2 t = -1 t = -4
-----------------------------------------------------------------------------------------------------------------
Observations 872 872 872 872 872
R2 0 0 0 0 1
Adjusted R2 0 0 0 0 1
F Statistic 56*** (df = 2; 869) 31*** (df = 4; 867) 31*** (df = 4; 867) 31*** (df = 4; 867) 262*** (df = 5; 867)
=================================================================================================================
Note: *p<0.1; **p<0.05; ***p<0.01
mod12 <- lm(earnings ~ dprivate + dgovt, data = df)
coeftest(mod12, vcov = vcovHC(mod12, type = "HC1"))
mod13 <- lm(earnings ~ dself + dprivate + dgovt - 1, data = df)
coeftest(mod13, vcov = vcovHC(mod13, type = "HC1"))
stargazer(mod12, mod13, type = "text",
keep = c("dself", "dprivate", "dgovt", "(Intercept)"),
se = list(sqrt(diag(vcovHC(mod12, type = "HC1"))),
sqrt(diag(vcovHC(mod13, type = "HC1")))),
keep.stat = c("n", "rsq", "adj.rsq", "f"),
report = "vcst", digits = 0)
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 72306.3 9636.9 7.5031 1.541e-13 ***
dprivate -17785.1 9821.9 -1.8108 0.07052 .
dgovt -16200.9 10042.3 -1.6133 0.10705
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
dself 72306.3 9636.9 7.5031 1.541e-13 ***
dprivate 54521.3 1897.5 28.7331 < 2.2e-16 ***
dgovt 56105.4 2824.6 19.8629 < 2.2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
===================================================
Dependent variable:
--------------------------------------
earnings
(1) (2)
---------------------------------------------------
dself 72,306
(9,637)
t = 8
dprivate -17,785 54,521
(9,822) (1,898)
t = -2 t = 29
dgovt -16,201 56,105
(10,042) (2,825)
t = -2 t = 20
---------------------------------------------------
Observations 872 872
R2 0 1
Adjusted R2 0 1
F Statistic 4** (df = 2; 869) 353*** (df = 3; 869)
===================================================
Note: *p<0.1; **p<0.05; ***p<0.01
df$typeworker <- 1 * df$dself + 2 * df$dprivate + 3 * df$dgovt
print(aggregate(earnings ~ typeworker, data = df, FUN = summary))
print(anova(lm(earnings ~ factor(typeworker), data = df)))
typeworker earnings.Min. earnings.1st Qu. earnings.Median earnings.Mean earnings.3rd Qu. earnings.Max.
1 1 4000.00 20000.00 45000.00 72306.33 91000.00 504000.00
2 2 4300.00 29550.00 42000.00 54521.27 61500.00 498000.00
3 3 10500.00 31200.00 50000.00 56105.38 71500.00 180000.00
Analysis of Variance Table
Response: earnings
Df Sum Sq Mean Sq F value Pr(>F)
factor(typeworker) 2 2.2338e+10 1.1169e+10 4.2399 0.01471 *
Residuals 869 2.2892e+12 2.6343e+09
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1