II Econometric Analysis Using R

Chapter 19 - Censored Data, Sample Selection, and Attrition

Also available in Stata and Python versions

Example 19.6

Load libraries

library(wooldridge)
library(stargazer)
library(AER)
library(mfx)
library(sampleSelection)

Wage Offer Equation for Married Women

summary(OLS <- lm(lwage ~ educ + exper + expersq, data=mroz))
## 
## Call:
## lm(formula = lwage ~ educ + exper + expersq, data = mroz)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -3.08404 -0.30627  0.04952  0.37498  2.37115 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.5220406  0.1986321  -2.628  0.00890 ** 
## educ         0.1074896  0.0141465   7.598 1.94e-13 ***
## exper        0.0415665  0.0131752   3.155  0.00172 ** 
## expersq     -0.0008112  0.0003932  -2.063  0.03974 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6664 on 424 degrees of freedom
##   (325 observations deleted due to missingness)
## Multiple R-squared:  0.1568, Adjusted R-squared:  0.1509 
## F-statistic: 26.29 on 3 and 424 DF,  p-value: 1.302e-15
summary(Heckman <-selection(inlf ~ nwifeinc + educ + exper + expersq + age + kidslt6 + kidsge6, lwage ~ educ + exper + expersq, data=mroz, method="2step"))
## --------------------------------------------
## Tobit 2 model (sample selection model)
## 2-step Heckman / heckit estimation
## 753 observations (325 censored and 428 observed)
## 15 free parameters (df = 739)
## Probit selection equation:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.270077   0.508593   0.531  0.59556    
## nwifeinc    -0.012024   0.004840  -2.484  0.01320 *  
## educ         0.130905   0.025254   5.183 2.81e-07 ***
## exper        0.123348   0.018716   6.590 8.34e-11 ***
## expersq     -0.001887   0.000600  -3.145  0.00173 ** 
## age         -0.052853   0.008477  -6.235 7.61e-10 ***
## kidslt6     -0.868328   0.118522  -7.326 6.21e-13 ***
## kidsge6      0.036005   0.043477   0.828  0.40786    
## Outcome equation:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.5781032  0.3050062  -1.895  0.05843 .  
## educ         0.1090655  0.0155230   7.026 4.83e-12 ***
## exper        0.0438873  0.0162611   2.699  0.00712 ** 
## expersq     -0.0008591  0.0004389  -1.957  0.05068 .  
## Multiple R-Squared:0.1569,   Adjusted R-Squared:0.149
##    Error terms:
##               Estimate Std. Error t value Pr(>|t|)
## invMillsRatio  0.03226    0.13362   0.241    0.809
## sigma          0.66363         NA      NA       NA
## rho            0.04861         NA      NA       NA
## --------------------------------------------
summary(HeckmanFull <-selection(inlf ~ nwifeinc + educ + exper + expersq + age + kidslt6 + kidsge6, lwage ~ nwifeinc + educ + exper + expersq + age + kidslt6 + kidsge6, data=mroz, method="2step"))
## --------------------------------------------
## Tobit 2 model (sample selection model)
## 2-step Heckman / heckit estimation
## 753 observations (325 censored and 428 observed)
## 19 free parameters (df = 735)
## Probit selection equation:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.270077   0.508593   0.531  0.59556    
## nwifeinc    -0.012024   0.004840  -2.484  0.01320 *  
## educ         0.130905   0.025254   5.183 2.82e-07 ***
## exper        0.123348   0.018716   6.590 8.37e-11 ***
## expersq     -0.001887   0.000600  -3.145  0.00173 ** 
## age         -0.052853   0.008477  -6.235 7.63e-10 ***
## kidslt6     -0.868328   0.118522  -7.326 6.24e-13 ***
## kidsge6      0.036005   0.043477   0.828  0.40786    
## Outcome equation:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.5602852  0.4587672  -1.221 0.222370    
## nwifeinc     0.0038434  0.0044919   0.856 0.392492    
## educ         0.1187171  0.0340507   3.486 0.000518 ***
## exper        0.0598358  0.0336730   1.777 0.075987 .  
## expersq     -0.0010523  0.0006381  -1.649 0.099566 .  
## age         -0.0111580  0.0134792  -0.828 0.408054    
## kidslt6     -0.1880451  0.2308275  -0.815 0.415533    
## kidsge6     -0.0122255  0.0296063  -0.413 0.679775    
## Multiple R-Squared:0.1649,   Adjusted R-Squared:0.1489
##    Error terms:
##               Estimate Std. Error t value Pr(>|t|)
## invMillsRatio   0.2885     0.4636   0.622    0.534
## sigma           0.6896         NA      NA       NA
## rho             0.4183         NA      NA       NA
## --------------------------------------------
summary(OLSfull <- lm(lwage ~ educ + exper + expersq + nwifeinc + age + kidslt6 + kidsge6, data=mroz))
## 
## Call:
## lm(formula = lwage ~ educ + exper + expersq + nwifeinc + age + 
##     kidslt6 + kidsge6, data = mroz)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.0827 -0.3160  0.0532  0.3810  2.3551 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.3579972  0.3182963  -1.125  0.26135    
## educ         0.0998844  0.0150975   6.616 1.13e-10 ***
## exper        0.0407097  0.0133723   3.044  0.00248 ** 
## expersq     -0.0007473  0.0004018  -1.860  0.06358 .  
## nwifeinc     0.0056942  0.0033195   1.715  0.08701 .  
## age         -0.0035204  0.0054145  -0.650  0.51593    
## kidslt6     -0.0558725  0.0886034  -0.631  0.52865    
## kidsge6     -0.0176484  0.0278910  -0.633  0.52723    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6667 on 420 degrees of freedom
##   (325 observations deleted due to missingness)
## Multiple R-squared:  0.1641, Adjusted R-squared:  0.1502 
## F-statistic: 11.78 on 7 and 420 DF,  p-value: 1.023e-13
stargazer(OLS, Heckman, HeckmanFull,OLSfull, no.space=TRUE, type="text", title = "Table 19.1 Wage Offer Equation for Married Women")
## 
## Table 19.1 Wage Offer Equation for Married Women
## ===============================================================================================
##                                                 Dependent variable:                            
##                     ---------------------------------------------------------------------------
##                                                        lwage                                   
##                               OLS                    selection                    OLS          
##                               (1)                (2)           (3)                (4)          
## -----------------------------------------------------------------------------------------------
## nwifeinc                                                      0.004             0.006*         
##                                                              (0.004)            (0.003)        
## educ                       0.107***           0.109***      0.119***           0.100***        
##                             (0.014)            (0.016)       (0.034)            (0.015)        
## exper                      0.042***           0.044***       0.060*            0.041***        
##                             (0.013)            (0.016)       (0.034)            (0.013)        
## expersq                    -0.001**            -0.001*       -0.001*            -0.001*        
##                            (0.0004)           (0.0004)       (0.001)           (0.0004)        
## age                                                          -0.011             -0.004         
##                                                              (0.013)            (0.005)        
## kidslt6                                                      -0.188             -0.056         
##                                                              (0.231)            (0.089)        
## kidsge6                                                      -0.012             -0.018         
##                                                              (0.030)            (0.028)        
## Constant                   -0.522***           -0.578*       -0.560             -0.358         
##                             (0.199)            (0.305)       (0.459)            (0.318)        
## -----------------------------------------------------------------------------------------------
## Observations                  428                753           753                428          
## R2                           0.157                                               0.164         
## Adjusted R2                  0.151                                               0.150         
## rho                                             0.049         0.418                            
## Inverse Mills Ratio                         0.032 (0.134) 0.288 (0.464)                        
## Residual Std. Error    0.666 (df = 424)                                    0.667 (df = 420)    
## F Statistic         26.286*** (df = 3; 424)                             11.779*** (df = 7; 420)
## ===============================================================================================
## Note:                                                               *p<0.1; **p<0.05; ***p<0.01

HOME | Back to top

Example 19.7

Education Endogenous and Sample Selection

summary(Probit <- glm(inlf ~ exper + expersq + nwifeinc + age +  kidslt6 + kidsge6 + motheduc
               + fatheduc + huseduc,  data=mroz, family=binomial(link="probit")))
## 
## Call:
## glm(formula = inlf ~ exper + expersq + nwifeinc + age + kidslt6 + 
##     kidsge6 + motheduc + fatheduc + huseduc, family = binomial(link = "probit"), 
##     data = mroz)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.2193  -0.9413   0.4550   0.8604   2.2870  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  1.1466683  0.4916633   2.332  0.01969 *  
## exper        0.1285092  0.0185409   6.931 4.17e-12 ***
## expersq     -0.0019474  0.0005935  -3.281  0.00103 ** 
## nwifeinc    -0.0074295  0.0050157  -1.481  0.13854    
## age         -0.0527656  0.0085094  -6.201 5.62e-10 ***
## kidslt6     -0.8149250  0.1163696  -7.003 2.51e-12 ***
## kidsge6      0.0241509  0.0436677   0.553  0.58022    
## motheduc     0.0295320  0.0186856   1.580  0.11400    
## fatheduc     0.0133487  0.0178497   0.748  0.45456    
## huseduc      0.0161394  0.0197305   0.818  0.41336    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1029.75  on 752  degrees of freedom
## Residual deviance:  822.64  on 743  degrees of freedom
## AIC: 842.64
## 
## Number of Fisher Scoring iterations: 4
linearHypothesis(Probit, c("motheduc = 0", "fatheduc =0", "huseduc =0"))
## Linear hypothesis test
## 
## Hypothesis:
## motheduc = 0
## fatheduc = 0
## huseduc = 0
## 
## Model 1: restricted model
## Model 2: inlf ~ exper + expersq + nwifeinc + age + kidslt6 + kidsge6 + 
##     motheduc + fatheduc + huseduc
## 
##   Res.Df Df  Chisq Pr(>Chisq)  
## 1    746                       
## 2    743  3 8.0245    0.04551 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
IMR <- dnorm(predict(Probit))/pnorm(predict(Probit))

wIMR <- ivreg(lwage ~ educ + exper + expersq + IMR | exper + expersq + IMR +nwifeinc + age +  kidslt6 + kidsge6 + motheduc + fatheduc + huseduc, data = mroz)

woutIMR <- ivreg(lwage ~ educ + exper + expersq | exper + expersq + nwifeinc + age +  kidslt6 + kidsge6 + motheduc + fatheduc + huseduc, data = mroz)

stargazer(wIMR, woutIMR, no.space=TRUE, type="text")
## 
## =====================================================
##                            Dependent variable:       
##                     ---------------------------------
##                                   lwage              
##                           (1)              (2)       
## -----------------------------------------------------
## educ                    0.088***         0.087***    
##                         (0.021)          (0.021)     
## exper                   0.046***         0.043***    
##                         (0.017)          (0.013)     
## expersq                 -0.001**         -0.001**    
##                         (0.0004)         (0.0004)    
## IMR                      0.040                       
##                         (0.133)                      
## Constant                 -0.325           -0.270     
##                         (0.333)          (0.280)     
## -----------------------------------------------------
## Observations              428              428       
## R2                       0.153            0.153      
## Adjusted R2              0.145            0.147      
## Residual Std. Error 0.669 (df = 423) 0.668 (df = 424)
## =====================================================
## Note:                     *p<0.1; **p<0.05; ***p<0.01
wIMR2 <- ivreg(lwage ~ educ + exper + expersq + IMR | exper + expersq + IMR + motheduc + fatheduc + huseduc, data = mroz)

woutIMR2 <- ivreg(lwage ~ educ + exper + expersq | exper + expersq + motheduc + fatheduc + huseduc, data = mroz)

stargazer(wIMR2, woutIMR2, no.space=TRUE, type="text")
## 
## =====================================================
##                            Dependent variable:       
##                     ---------------------------------
##                                   lwage              
##                           (1)              (2)       
## -----------------------------------------------------
## educ                    0.081***         0.080***    
##                         (0.022)          (0.022)     
## exper                   0.046***         0.043***    
##                         (0.017)          (0.013)     
## expersq                 -0.001**         -0.001**    
##                         (0.0004)         (0.0004)    
## IMR                      0.036                       
##                         (0.134)                      
## Constant                 -0.234           -0.187     
##                         (0.337)          (0.285)     
## -----------------------------------------------------
## Observations              428              428       
## R2                       0.150            0.150      
## Adjusted R2              0.142            0.144      
## Residual Std. Error 0.670 (df = 423) 0.669 (df = 424)
## =====================================================
## Note:                     *p<0.1; **p<0.05; ***p<0.01

HOME | Back to top

Example 19.8

Wage Offer Equation for Married Women

summary(Tobit <- tobit(inlf ~  nwifeinc + motheduc + fatheduc + huseduc + exper + expersq + age + kidslt6 + kidsge6 ,  data=mroz, left = 0 ))
## 
## Call:
## tobit(formula = inlf ~ nwifeinc + motheduc + fatheduc + huseduc + 
##     exper + expersq + age + kidslt6 + kidsge6, left = 0, data = mroz)
## 
## Observations:
##          Total  Left-censored     Uncensored Right-censored 
##            753            325            428              0 
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  0.7832485  0.2644459   2.962  0.00306 ** 
## nwifeinc    -0.0042396  0.0027476  -1.543  0.12283    
## motheduc     0.0134540  0.0101337   1.328  0.18429    
## fatheduc     0.0082528  0.0096746   0.853  0.39364    
## huseduc      0.0112281  0.0105784   1.061  0.28850    
## exper        0.0779554  0.0104014   7.495 6.64e-14 ***
## expersq     -0.0012600  0.0003276  -3.847  0.00012 ***
## age         -0.0287968  0.0045577  -6.318 2.65e-10 ***
## kidslt6     -0.4729478  0.0657462  -7.194 6.31e-13 ***
## kidsge6      0.0216831  0.0233501   0.929  0.35309    
## Log(scale)  -0.3687450  0.0386299  -9.546  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Scale: 0.6916 
## 
## Gaussian distribution
## Number of Newton-Raphson Iterations: 4 
## Log-likelihood: -690.1 on 11 Df
## Wald-statistic: 201.6 on 9 Df, p-value: < 2.22e-16
v2 <- resid(Tobit) 

Heckman <-selection(inlf ~ nwifeinc + educ + exper + expersq + age + kidslt6 + kidsge6, lwage ~ educ + exper + expersq + v2, data=mroz, method="2step")

OLSr <- lm(lwage ~ educ + exper + expersq + v2, data=mroz)
OLSur <- lm(lwage ~ educ + exper + expersq + +nwifeinc + age +  kidslt6 + kidsge6, data=mroz)

stargazer(Heckman, OLSr, OLSur, no.space=TRUE, type="text")
## 
## ==================================================================================
##                                          Dependent variable:                      
##                     --------------------------------------------------------------
##                                                 lwage                             
##                       selection                          OLS                      
##                          (1)                 (2)                     (3)          
## ----------------------------------------------------------------------------------
## educ                   0.103***           0.108***                0.100***        
##                        (0.020)             (0.014)                 (0.015)        
## exper                  0.046***           0.046***                0.041***        
##                        (0.017)             (0.017)                 (0.013)        
## expersq                -0.001**           -0.001**                 -0.001*        
##                        (0.0005)           (0.0005)                (0.0004)        
## v2                      0.163               0.053                                 
##                        (0.325)             (0.128)                                
## nwifeinc                                                           0.006*         
##                                                                    (0.003)        
## age                                                                -0.004         
##                                                                    (0.005)        
## kidslt6                                                            -0.056         
##                                                                    (0.089)        
## kidsge6                                                            -0.018         
##                                                                    (0.028)        
## Constant                -0.508            -0.589**                 -0.358         
##                        (0.336)             (0.255)                 (0.318)        
## ----------------------------------------------------------------------------------
## Observations             753                 428                     428          
## R2                                          0.157                   0.164         
## Adjusted R2                                 0.149                   0.150         
## rho                     -0.186                                                    
## Inverse Mills Ratio -0.125 (0.340)                                                
## Residual Std. Error                   0.667 (df = 423)        0.667 (df = 420)    
## F Statistic                        19.720*** (df = 4; 423) 11.779*** (df = 7; 420)
## ==================================================================================
## Note:                                                  *p<0.1; **p<0.05; ***p<0.01

HOME | Back to top