# 20 Stratified Sampling and Cluster Sampling

Also available in Stata and Python versions

## 20.1 Example 20.3

library(AER)
library(haven)
library(plm)
library(stargazer)

Cluster Correlation in Teacher Compensation

summary(POLS <- plm(lavgsal ~ bs + lstaff + lenroll + lunch, data=df, model = "pooling"))
## Pooling Model
##
## Call:
## plm(formula = lavgsal ~ bs + lstaff + lenroll + lunch, data = df,
##     model = "pooling")
##
## Unbalanced Panel: n = 537, T = 1-162, N = 1848
##
## Residuals:
##       Min.    1st Qu.     Median    3rd Qu.       Max.
## -1.2604665 -0.1097606 -0.0084917  0.1036848  0.5938342
##
## Coefficients:
##                Estimate  Std. Error  t-value  Pr(>|t|)
## (Intercept) 13.72361494  0.11210950 122.4126 < 2.2e-16 ***
## bs          -0.17743963  0.12196910  -1.4548 0.1458972
## lstaff      -0.69070249  0.01845982 -37.4165 < 2.2e-16 ***
## lenroll     -0.02924060  0.00849973  -3.4402 0.0005943 ***
## lunch       -0.00084709  0.00016249  -5.2131 2.065e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares:    100.18
## Residual Sum of Squares: 51.833
## R-Squared:      0.48261
## F-statistic: 429.778 on 4 and 1843 DF, p-value: < 2.22e-16
POLSr<-coeftest(POLS, vcov=vcovHC(POLS, type="HC0", cluster="group"))
POLSr
##
## t test of coefficients:
##
##                Estimate  Std. Error  t value Pr(>|t|)
## (Intercept) 13.72361494  0.25577473  53.6551   <2e-16 ***
## bs          -0.17743963  0.25909851  -0.6848   0.4935
## lstaff      -0.69070249  0.03522512 -19.6082   <2e-16 ***
## lenroll     -0.02924060  0.02568956  -1.1382   0.2552
## lunch       -0.00084709  0.00056977  -1.4867   0.1373
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(RE <- plm(lavgsal ~ bs + lstaff + lenroll + lunch, data=df, model = "random", effects="individual", grouping(distid)))
## Oneway (individual) effect Random Effect Model
##    (Swamy-Arora's transformation)
##
## Call:
## plm(formula = lavgsal ~ bs + lstaff + lenroll + lunch, data = df,
##     subset = grouping(distid), model = "random", effects = "individual")
##
## Unbalanced Panel: n = 537, T = 1-162, N = 1848
##
## Effects:
##                    var  std.dev share
## idiosyncratic 0.009993 0.099966 0.353
## individual    0.018295 0.135257 0.647
## theta:
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.
##  0.4056  0.6075  0.7111  0.6928  0.8126  0.9420
##
## Residuals:
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max.
## -1.15324 -0.04593  0.01183  0.00772  0.06531  0.48351
##
## Coefficients:
##                Estimate  Std. Error  z-value  Pr(>|z|)
## (Intercept) 13.37652699  0.09727090 137.5183 < 2.2e-16 ***
## bs          -0.38776347  0.11181785  -3.4678 0.0005247 ***
## lstaff      -0.61740149  0.01525485 -40.4725 < 2.2e-16 ***
## lenroll     -0.02649717  0.00756230  -3.5039 0.0004586 ***
## lunch        0.00031924  0.00017921   1.7814 0.0748505 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares:    5102.8
## Residual Sum of Squares: 18.225
## R-Squared:      0.99653
## Chisq: 1913.5 on 4 DF, p-value: < 2.22e-16
REr<-coeftest(RE, vcov=vcovHC(RE, type="HC0", cluster="group"))
REr
##
## t test of coefficients:
##
##                Estimate  Std. Error  t value Pr(>|t|)
## (Intercept) 13.37652699  0.19853627  67.3757  < 2e-16 ***
## bs          -0.38776347  0.15099621  -2.5680  0.01031 *
## lstaff      -0.61740149  0.03676321 -16.7940  < 2e-16 ***
## lenroll     -0.02649717  0.01149101  -2.3059  0.02123 *
## lunch        0.00031924  0.00019447   1.6416  0.10084
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(FE <- plm(lavgsal ~ bs + lstaff + lenroll + lunch, data=df, model = "within"))
## Oneway (individual) effect Within Model
##
## Call:
## plm(formula = lavgsal ~ bs + lstaff + lenroll + lunch, data = df,
##     model = "within")
##
## Unbalanced Panel: n = 537, T = 1-162, N = 1848
##
## Residuals:
##      Min.   1st Qu.    Median   3rd Qu.      Max.
## -1.138102 -0.035492  0.000000  0.038447  0.393448
##
## Coefficients:
##            Estimate  Std. Error  t-value  Pr(>|t|)
## bs      -0.49484486  0.13303902  -3.7195  0.000208 ***
## lstaff  -0.62189008  0.01675652 -37.1133 < 2.2e-16 ***
## lenroll -0.05150631  0.00940037  -5.4792 5.121e-08 ***
## lunch    0.00051379  0.00020878   2.4609  0.013987 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares:    28.932
## Residual Sum of Squares: 13.061
## R-Squared:      0.54856
## F-statistic: 397.048 on 4 and 1307 DF, p-value: < 2.22e-16
FEr<-coeftest(FE, vcov=vcovHC(FE, type="HC0", cluster="group"))
FEr
##
## t test of coefficients:
##
##            Estimate  Std. Error  t value  Pr(>|t|)
## bs      -0.49484486  0.19334145  -2.5594   0.01060 *
## lstaff  -0.62189008  0.04309424 -14.4309 < 2.2e-16 ***
## lenroll -0.05150631  0.01306237  -3.9431  8.47e-05 ***
## lunch    0.00051379  0.00021229   2.4202   0.01565 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
stargazer(POLS, POLSr, RE, REr, FE, FEr, type="text", no.space=TRUE, keep.stat = c("N","rsq"), column.labels = c("POLS", "POLSr", "RE", "REr", "FE", "FEr"), title = "Table 20.1 Salary-Benefits Trade-off for Michigan Teachers")
##
## Table 20.1 Salary-Benefits Trade-off for Michigan Teachers
## ==============================================================================
##                                     Dependent variable:
##              -----------------------------------------------------------------
##               lavgsal               lavgsal               lavgsal
##                panel   coefficient   panel   coefficient   panel   coefficient
##               linear      test      linear      test      linear      test
##                POLS       POLSr       RE         REr        FE         FEr
##                 (1)        (2)        (3)        (4)        (5)        (6)
## ------------------------------------------------------------------------------
## bs            -0.177     -0.177    -0.388***  -0.388**   -0.495***  -0.495**
##               (0.122)    (0.259)    (0.112)    (0.151)    (0.133)    (0.193)
## lstaff       -0.691***  -0.691***  -0.617***  -0.617***  -0.622***  -0.622***
##               (0.018)    (0.035)    (0.015)    (0.037)    (0.017)    (0.043)
## lenroll      -0.029***   -0.029    -0.026***  -0.026**   -0.052***  -0.052***
##               (0.008)    (0.026)    (0.008)    (0.011)    (0.009)    (0.013)
## lunch        -0.001***   -0.001     0.0003*    0.0003     0.001**    0.001**
##              (0.0002)    (0.001)   (0.0002)   (0.0002)   (0.0002)   (0.0002)
## Constant     13.724***  13.724***  13.377***  13.377***
##               (0.112)    (0.256)    (0.097)    (0.199)
## ------------------------------------------------------------------------------
## Observations   1,848                 1,848                 1,848
## R2             0.483                 0.997                 0.549
## ==============================================================================
## Note:                                              *p<0.1; **p<0.05; ***p<0.01

## 20.2 Example 20.4

df <-  pdata.frame(df, index = c("schid", "year"))
summary(FE <- plm(math4 ~ lavgrexp + lunch + lenrol + factor(year), data=df, model = "within"))
## Oneway (individual) effect Within Model
##
## Call:
## plm(formula = math4 ~ lavgrexp + lunch + lenrol + factor(year),
##     data = df, model = "within")
##
## Unbalanced Panel: n = 1683, T = 3-5, N = 7150
##
## Residuals:
##       Min.    1st Qu.     Median    3rd Qu.       Max.
## -53.483863  -5.826143   0.090337   5.911614  55.305855
##
## Coefficients:
##                   Estimate Std. Error t-value  Pr(>|t|)
## lavgrexp          6.288376   2.098685  2.9963  0.002745 **
## lunch            -0.021507   0.031219 -0.6889  0.490900
## lenrol           -2.038461   1.791604 -1.1378  0.255260
## factor(year)1995 11.619204   0.554523 20.9535 < 2.2e-16 ***
## factor(year)1996 13.055607   0.663095 19.6889 < 2.2e-16 ***
## factor(year)1997 10.147710   0.702407 14.4471 < 2.2e-16 ***
## factor(year)1998 23.414040   0.718724 32.5772 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares:    1094500
## Residual Sum of Squares: 700280
## R-Squared:      0.36019
## F-statistic: 439.11 on 7 and 5460 DF, p-value: < 2.22e-16
FEr_schid<-coeftest(FE, vcov=vcovHC(FE, type="HC0", cluster="group"))
FEr_schid
##
## t test of coefficients:
##
##                   Estimate Std. Error t value  Pr(>|t|)
## lavgrexp          6.288376   2.429404  2.5884  0.009667 **
## lunch            -0.021507   0.039042 -0.5509  0.581747
## lenrol           -2.038461   1.787686 -1.1403  0.254220
## factor(year)1995 11.619204   0.535425 21.7009 < 2.2e-16 ***
## factor(year)1996 13.055607   0.690538 18.9064 < 2.2e-16 ***
## factor(year)1997 10.147710   0.732055 13.8619 < 2.2e-16 ***
## factor(year)1998 23.414040   0.766352 30.5526 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
stargazer(FE, FEr_schid, type="text", no.space=TRUE, keep.stat = c("N","rsq"), column.labels = c("FE", "FEr"), title = "Table 20.2 Fixed Effects Estimation of Spending on Test Pass Rates")
##
## Table 20.2 Fixed Effects Estimation of Spending on Test Pass Rates
## =============================================
##                      Dependent variable:
##                  ----------------------------
##                      math4
##                      panel       coefficient
##                      linear         test
##                        FE            FEr
##                       (1)            (2)
## ---------------------------------------------
## lavgrexp            6.288***      6.288***
##                     (2.099)        (2.429)
## lunch                -0.022        -0.022
##                     (0.031)        (0.039)
## lenrol               -2.038        -2.038
##                     (1.792)        (1.788)
## factor(year)1995   11.619***      11.619***
##                     (0.555)        (0.535)
## factor(year)1996   13.056***      13.056***
##                     (0.663)        (0.691)
## factor(year)1997   10.148***      10.148***
##                     (0.702)        (0.732)
## factor(year)1998   23.414***      23.414***
##                     (0.719)        (0.766)
## ---------------------------------------------
## Observations         7,150
## R2                   0.360
## =============================================
## Note:             *p<0.1; **p<0.05; ***p<0.01