II Econometric Analysis Using R

### Chapter 19 - Censored Data, Sample Selection, and Attrition

Also available in Stata and Python versions

#### Example 19.6

library(wooldridge)
library(stargazer)
library(AER)
library(mfx)
library(sampleSelection)

Wage Offer Equation for Married Women

summary(OLS <- lm(lwage ~ educ + exper + expersq, data=mroz))
##
## Call:
## lm(formula = lwage ~ educ + exper + expersq, data = mroz)
##
## Residuals:
##      Min       1Q   Median       3Q      Max
## -3.08404 -0.30627  0.04952  0.37498  2.37115
##
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.5220406  0.1986321  -2.628  0.00890 **
## educ         0.1074896  0.0141465   7.598 1.94e-13 ***
## exper        0.0415665  0.0131752   3.155  0.00172 **
## expersq     -0.0008112  0.0003932  -2.063  0.03974 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6664 on 424 degrees of freedom
##   (325 observations deleted due to missingness)
## Multiple R-squared:  0.1568, Adjusted R-squared:  0.1509
## F-statistic: 26.29 on 3 and 424 DF,  p-value: 1.302e-15
summary(Heckman <-selection(inlf ~ nwifeinc + educ + exper + expersq + age + kidslt6 + kidsge6, lwage ~ educ + exper + expersq, data=mroz, method="2step"))
## --------------------------------------------
## Tobit 2 model (sample selection model)
## 2-step Heckman / heckit estimation
## 753 observations (325 censored and 428 observed)
## 15 free parameters (df = 739)
## Probit selection equation:
##              Estimate Std. Error t value Pr(>|t|)
## (Intercept)  0.270077   0.508593   0.531  0.59556
## nwifeinc    -0.012024   0.004840  -2.484  0.01320 *
## educ         0.130905   0.025254   5.183 2.81e-07 ***
## exper        0.123348   0.018716   6.590 8.34e-11 ***
## expersq     -0.001887   0.000600  -3.145  0.00173 **
## age         -0.052853   0.008477  -6.235 7.61e-10 ***
## kidslt6     -0.868328   0.118522  -7.326 6.21e-13 ***
## kidsge6      0.036005   0.043477   0.828  0.40786
## Outcome equation:
##               Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.5781032  0.3050062  -1.895  0.05843 .
## educ         0.1090655  0.0155230   7.026 4.83e-12 ***
## exper        0.0438873  0.0162611   2.699  0.00712 **
## expersq     -0.0008591  0.0004389  -1.957  0.05068 .
##    Error terms:
##               Estimate Std. Error t value Pr(>|t|)
## invMillsRatio  0.03226    0.13362   0.241    0.809
## sigma          0.66363         NA      NA       NA
## rho            0.04861         NA      NA       NA
## --------------------------------------------
summary(HeckmanFull <-selection(inlf ~ nwifeinc + educ + exper + expersq + age + kidslt6 + kidsge6, lwage ~ nwifeinc + educ + exper + expersq + age + kidslt6 + kidsge6, data=mroz, method="2step"))
## --------------------------------------------
## Tobit 2 model (sample selection model)
## 2-step Heckman / heckit estimation
## 753 observations (325 censored and 428 observed)
## 19 free parameters (df = 735)
## Probit selection equation:
##              Estimate Std. Error t value Pr(>|t|)
## (Intercept)  0.270077   0.508593   0.531  0.59556
## nwifeinc    -0.012024   0.004840  -2.484  0.01320 *
## educ         0.130905   0.025254   5.183 2.82e-07 ***
## exper        0.123348   0.018716   6.590 8.37e-11 ***
## expersq     -0.001887   0.000600  -3.145  0.00173 **
## age         -0.052853   0.008477  -6.235 7.63e-10 ***
## kidslt6     -0.868328   0.118522  -7.326 6.24e-13 ***
## kidsge6      0.036005   0.043477   0.828  0.40786
## Outcome equation:
##               Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.5602852  0.4587672  -1.221 0.222370
## nwifeinc     0.0038434  0.0044919   0.856 0.392492
## educ         0.1187171  0.0340507   3.486 0.000518 ***
## exper        0.0598358  0.0336730   1.777 0.075987 .
## expersq     -0.0010523  0.0006381  -1.649 0.099566 .
## age         -0.0111580  0.0134792  -0.828 0.408054
## kidslt6     -0.1880451  0.2308275  -0.815 0.415533
## kidsge6     -0.0122255  0.0296063  -0.413 0.679775
##    Error terms:
##               Estimate Std. Error t value Pr(>|t|)
## invMillsRatio   0.2885     0.4636   0.622    0.534
## sigma           0.6896         NA      NA       NA
## rho             0.4183         NA      NA       NA
## --------------------------------------------
summary(OLSfull <- lm(lwage ~ educ + exper + expersq + nwifeinc + age + kidslt6 + kidsge6, data=mroz))
##
## Call:
## lm(formula = lwage ~ educ + exper + expersq + nwifeinc + age +
##     kidslt6 + kidsge6, data = mroz)
##
## Residuals:
##     Min      1Q  Median      3Q     Max
## -3.0827 -0.3160  0.0532  0.3810  2.3551
##
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.3579972  0.3182963  -1.125  0.26135
## educ         0.0998844  0.0150975   6.616 1.13e-10 ***
## exper        0.0407097  0.0133723   3.044  0.00248 **
## expersq     -0.0007473  0.0004018  -1.860  0.06358 .
## nwifeinc     0.0056942  0.0033195   1.715  0.08701 .
## age         -0.0035204  0.0054145  -0.650  0.51593
## kidslt6     -0.0558725  0.0886034  -0.631  0.52865
## kidsge6     -0.0176484  0.0278910  -0.633  0.52723
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6667 on 420 degrees of freedom
##   (325 observations deleted due to missingness)
## Multiple R-squared:  0.1641, Adjusted R-squared:  0.1502
## F-statistic: 11.78 on 7 and 420 DF,  p-value: 1.023e-13
stargazer(OLS, Heckman, HeckmanFull,OLSfull, no.space=TRUE, type="text", title = "Table 19.1 Wage Offer Equation for Married Women")
##
## Table 19.1 Wage Offer Equation for Married Women
## ===============================================================================================
##                                                 Dependent variable:
##                     ---------------------------------------------------------------------------
##                                                        lwage
##                               OLS                    selection                    OLS
##                               (1)                (2)           (3)                (4)
## -----------------------------------------------------------------------------------------------
## nwifeinc                                                      0.004             0.006*
##                                                              (0.004)            (0.003)
## educ                       0.107***           0.109***      0.119***           0.100***
##                             (0.014)            (0.016)       (0.034)            (0.015)
## exper                      0.042***           0.044***       0.060*            0.041***
##                             (0.013)            (0.016)       (0.034)            (0.013)
## expersq                    -0.001**            -0.001*       -0.001*            -0.001*
##                            (0.0004)           (0.0004)       (0.001)           (0.0004)
## age                                                          -0.011             -0.004
##                                                              (0.013)            (0.005)
## kidslt6                                                      -0.188             -0.056
##                                                              (0.231)            (0.089)
## kidsge6                                                      -0.012             -0.018
##                                                              (0.030)            (0.028)
## Constant                   -0.522***           -0.578*       -0.560             -0.358
##                             (0.199)            (0.305)       (0.459)            (0.318)
## -----------------------------------------------------------------------------------------------
## Observations                  428                753           753                428
## R2                           0.157                                               0.164
## rho                                             0.049         0.418
## Inverse Mills Ratio                         0.032 (0.134) 0.288 (0.464)
## Residual Std. Error    0.666 (df = 424)                                    0.667 (df = 420)
## F Statistic         26.286*** (df = 3; 424)                             11.779*** (df = 7; 420)
## ===============================================================================================
## Note:                                                               *p<0.1; **p<0.05; ***p<0.01

#### Example 19.7

Education Endogenous and Sample Selection

summary(Probit <- glm(inlf ~ exper + expersq + nwifeinc + age +  kidslt6 + kidsge6 + motheduc
+ fatheduc + huseduc,  data=mroz, family=binomial(link="probit")))
##
## Call:
## glm(formula = inlf ~ exper + expersq + nwifeinc + age + kidslt6 +
##     kidsge6 + motheduc + fatheduc + huseduc, family = binomial(link = "probit"),
##     data = mroz)
##
## Deviance Residuals:
##     Min       1Q   Median       3Q      Max
## -2.2193  -0.9413   0.4550   0.8604   2.2870
##
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)
## (Intercept)  1.1466683  0.4916633   2.332  0.01969 *
## exper        0.1285092  0.0185409   6.931 4.17e-12 ***
## expersq     -0.0019474  0.0005935  -3.281  0.00103 **
## nwifeinc    -0.0074295  0.0050157  -1.481  0.13854
## age         -0.0527656  0.0085094  -6.201 5.62e-10 ***
## kidslt6     -0.8149250  0.1163696  -7.003 2.51e-12 ***
## kidsge6      0.0241509  0.0436677   0.553  0.58022
## motheduc     0.0295320  0.0186856   1.580  0.11400
## fatheduc     0.0133487  0.0178497   0.748  0.45456
## huseduc      0.0161394  0.0197305   0.818  0.41336
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
##     Null deviance: 1029.75  on 752  degrees of freedom
## Residual deviance:  822.64  on 743  degrees of freedom
## AIC: 842.64
##
## Number of Fisher Scoring iterations: 4
linearHypothesis(Probit, c("motheduc = 0", "fatheduc =0", "huseduc =0"))
## Linear hypothesis test
##
## Hypothesis:
## motheduc = 0
## fatheduc = 0
## huseduc = 0
##
## Model 1: restricted model
## Model 2: inlf ~ exper + expersq + nwifeinc + age + kidslt6 + kidsge6 +
##     motheduc + fatheduc + huseduc
##
##   Res.Df Df  Chisq Pr(>Chisq)
## 1    746
## 2    743  3 8.0245    0.04551 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
IMR <- dnorm(predict(Probit))/pnorm(predict(Probit))

wIMR <- ivreg(lwage ~ educ + exper + expersq + IMR | exper + expersq + IMR +nwifeinc + age +  kidslt6 + kidsge6 + motheduc + fatheduc + huseduc, data = mroz)

woutIMR <- ivreg(lwage ~ educ + exper + expersq | exper + expersq + nwifeinc + age +  kidslt6 + kidsge6 + motheduc + fatheduc + huseduc, data = mroz)

stargazer(wIMR, woutIMR, no.space=TRUE, type="text")
##
## =====================================================
##                            Dependent variable:
##                     ---------------------------------
##                                   lwage
##                           (1)              (2)
## -----------------------------------------------------
## educ                    0.088***         0.087***
##                         (0.021)          (0.021)
## exper                   0.046***         0.043***
##                         (0.017)          (0.013)
## expersq                 -0.001**         -0.001**
##                         (0.0004)         (0.0004)
## IMR                      0.040
##                         (0.133)
## Constant                 -0.325           -0.270
##                         (0.333)          (0.280)
## -----------------------------------------------------
## Observations              428              428
## R2                       0.153            0.153
## Residual Std. Error 0.669 (df = 423) 0.668 (df = 424)
## =====================================================
## Note:                     *p<0.1; **p<0.05; ***p<0.01
wIMR2 <- ivreg(lwage ~ educ + exper + expersq + IMR | exper + expersq + IMR + motheduc + fatheduc + huseduc, data = mroz)

woutIMR2 <- ivreg(lwage ~ educ + exper + expersq | exper + expersq + motheduc + fatheduc + huseduc, data = mroz)

stargazer(wIMR2, woutIMR2, no.space=TRUE, type="text")
##
## =====================================================
##                            Dependent variable:
##                     ---------------------------------
##                                   lwage
##                           (1)              (2)
## -----------------------------------------------------
## educ                    0.081***         0.080***
##                         (0.022)          (0.022)
## exper                   0.046***         0.043***
##                         (0.017)          (0.013)
## expersq                 -0.001**         -0.001**
##                         (0.0004)         (0.0004)
## IMR                      0.036
##                         (0.134)
## Constant                 -0.234           -0.187
##                         (0.337)          (0.285)
## -----------------------------------------------------
## Observations              428              428
## R2                       0.150            0.150
## Residual Std. Error 0.670 (df = 423) 0.669 (df = 424)
## =====================================================
## Note:                     *p<0.1; **p<0.05; ***p<0.01

#### Example 19.8

Wage Offer Equation for Married Women

summary(Tobit <- tobit(inlf ~  nwifeinc + motheduc + fatheduc + huseduc + exper + expersq + age + kidslt6 + kidsge6 ,  data=mroz, left = 0 ))
##
## Call:
## tobit(formula = inlf ~ nwifeinc + motheduc + fatheduc + huseduc +
##     exper + expersq + age + kidslt6 + kidsge6, left = 0, data = mroz)
##
## Observations:
##          Total  Left-censored     Uncensored Right-censored
##            753            325            428              0
##
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)
## (Intercept)  0.7832485  0.2644459   2.962  0.00306 **
## nwifeinc    -0.0042396  0.0027476  -1.543  0.12283
## motheduc     0.0134540  0.0101337   1.328  0.18429
## fatheduc     0.0082528  0.0096746   0.853  0.39364
## huseduc      0.0112281  0.0105784   1.061  0.28850
## exper        0.0779554  0.0104014   7.495 6.64e-14 ***
## expersq     -0.0012600  0.0003276  -3.847  0.00012 ***
## age         -0.0287968  0.0045577  -6.318 2.65e-10 ***
## kidslt6     -0.4729478  0.0657462  -7.194 6.31e-13 ***
## kidsge6      0.0216831  0.0233501   0.929  0.35309
## Log(scale)  -0.3687450  0.0386299  -9.546  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Scale: 0.6916
##
## Gaussian distribution
## Number of Newton-Raphson Iterations: 4
## Log-likelihood: -690.1 on 11 Df
## Wald-statistic: 201.6 on 9 Df, p-value: < 2.22e-16
v2 <- resid(Tobit)

Heckman <-selection(inlf ~ nwifeinc + educ + exper + expersq + age + kidslt6 + kidsge6, lwage ~ educ + exper + expersq + v2, data=mroz, method="2step")

OLSr <- lm(lwage ~ educ + exper + expersq + v2, data=mroz)
OLSur <- lm(lwage ~ educ + exper + expersq + +nwifeinc + age +  kidslt6 + kidsge6, data=mroz)

stargazer(Heckman, OLSr, OLSur, no.space=TRUE, type="text")
##
## ==================================================================================
##                                          Dependent variable:
##                     --------------------------------------------------------------
##                                                 lwage
##                       selection                          OLS
##                          (1)                 (2)                     (3)
## ----------------------------------------------------------------------------------
## educ                   0.103***           0.108***                0.100***
##                        (0.020)             (0.014)                 (0.015)
## exper                  0.046***           0.046***                0.041***
##                        (0.017)             (0.017)                 (0.013)
## expersq                -0.001**           -0.001**                 -0.001*
##                        (0.0005)           (0.0005)                (0.0004)
## v2                      0.163               0.053
##                        (0.325)             (0.128)
## nwifeinc                                                           0.006*
##                                                                    (0.003)
## age                                                                -0.004
##                                                                    (0.005)
## kidslt6                                                            -0.056
##                                                                    (0.089)
## kidsge6                                                            -0.018
##                                                                    (0.028)
## Constant                -0.508            -0.589**                 -0.358
##                        (0.336)             (0.255)                 (0.318)
## ----------------------------------------------------------------------------------
## Observations             753                 428                     428
## R2                                          0.157                   0.164
## Note:                                                  *p<0.1; **p<0.05; ***p<0.01