Introductory Econometrics Using R

Also covered using Python and Stata

library(wooldridge)
library(stargazer)
library(AER)
library(mfx)
library(censReg)
library(sampleSelection)

#### Example 17.1. Married Women’s Labor Force Participation

mOLS <- lm(inlf ~ nwifeinc + educ + exper + expersq + age + kidslt6 + kidsge6, data = mroz)
mLogit <- glm(inlf ~ nwifeinc + educ + exper + expersq + age + kidslt6 + kidsge6, family=binomial(link="logit"), data = mroz)
mProbit <- glm(inlf ~ nwifeinc + educ + exper + expersq + age + kidslt6 + kidsge6, family=binomial(link="probit"), data = mroz)

stargazer(mOLS, mLogit, mProbit, keep.stat=c("n"), no.space=TRUE, type="text", title = "Table 17.1 LPM, Logit, and Probit Estimates of Labor Force Participation: (inlf)")
##
## Table 17.1 LPM, Logit, and Probit Estimates of Labor Force Participation: (inlf)
## ==========================================
##                   Dependent variable:
##              -----------------------------
##                          inlf
##                 OLS    logistic   probit
##                 (1)       (2)       (3)
## ------------------------------------------
## nwifeinc     -0.003**  -0.021**  -0.012**
##               (0.001)   (0.008)   (0.005)
## educ         0.038***  0.221***  0.131***
##               (0.007)   (0.043)   (0.025)
## exper        0.039***  0.206***  0.123***
##               (0.006)   (0.032)   (0.019)
## expersq      -0.001*** -0.003*** -0.002***
##              (0.0002)   (0.001)   (0.001)
## age          -0.016*** -0.088*** -0.053***
##               (0.002)   (0.015)   (0.008)
## kidslt6      -0.262*** -1.443*** -0.868***
##               (0.034)   (0.204)   (0.118)
## kidsge6        0.013     0.060     0.036
##               (0.013)   (0.075)   (0.044)
## Constant     0.586***    0.425     0.270
##               (0.154)   (0.860)   (0.508)
## ------------------------------------------
## Observations    753       753       753
## ==========================================
## Note:          *p<0.1; **p<0.05; ***p<0.01
meLogit<- logitmfx(inlf ~ nwifeinc + educ + exper + expersq + age + kidslt6 + kidsge6, data = mroz, atmean = FALSE)
meLogit
## Call:
## logitmfx(formula = inlf ~ nwifeinc + educ + exper + expersq +
##     age + kidslt6 + kidsge6, data = mroz, atmean = FALSE)
##
## Marginal Effects:
##                dF/dx   Std. Err.       z     P>|z|
## nwifeinc -0.00381181  0.00153898 -2.4769  0.013255 *
## educ      0.03949652  0.00846811  4.6641 3.099e-06 ***
## exper     0.03676411  0.00655577  5.6079 2.048e-08 ***
## expersq  -0.00056326  0.00018795 -2.9968  0.002728 **
## age      -0.01571936  0.00293269 -5.3600 8.320e-08 ***
## kidslt6  -0.25775366  0.04263493 -6.0456 1.489e-09 ***
## kidsge6   0.01073482  0.01339130  0.8016  0.422769
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
meProbit <- probitmfx(inlf ~ nwifeinc + educ + exper + expersq + age + kidslt6 + kidsge6, data = mroz, atmean = FALSE)
meProbit
## Call:
## probitmfx(formula = inlf ~ nwifeinc + educ + exper + expersq +
##     age + kidslt6 + kidsge6, data = mroz, atmean = FALSE)
##
## Marginal Effects:
##                dF/dx   Std. Err.       z     P>|z|
## nwifeinc -0.00361618  0.00146972 -2.4604  0.013876 *
## educ      0.03937009  0.00726571  5.4186 6.006e-08 ***
## exper     0.03709734  0.00516823  7.1780 7.076e-13 ***
## expersq  -0.00056755  0.00017708 -3.2050  0.001351 **
## age      -0.01589566  0.00235868 -6.7392 1.592e-11 ***
## kidslt6  -0.26115346  0.03190239 -8.1860 2.700e-16 ***
## kidsge6   0.01082889  0.01322413  0.8189  0.412859
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#### Example 17.2. Married Women’s Annual Labor Supply

sOLS <- lm(hours ~ nwifeinc + educ + exper + expersq + age + kidslt6 + kidsge6, data = mroz)
sTobit <- censReg(hours ~ nwifeinc + educ + exper + expersq + age + kidslt6 + kidsge6, data = mroz)
stargazer(sOLS, sTobit, title="Table 17.3 OLS and Tobit Estimation of Annual Hours Worked: (hours)", no.space=TRUE, type="text")
##
## Table 17.3 OLS and Tobit Estimation of Annual Hours Worked: (hours)
## =======================================================
##                             Dependent variable:
##                     -----------------------------------
##                                    hours
##                               OLS            censored
##                                             regression
##                               (1)               (2)
## -------------------------------------------------------
## nwifeinc                    -3.447           -8.814**
##                             (2.544)           (4.459)
## educ                       28.761**          80.646***
##                            (12.955)          (21.583)
## exper                      65.673***        131.564***
##                             (9.963)          (17.279)
## expersq                    -0.700**          -1.864***
##                             (0.325)           (0.538)
## age                       -30.512***        -54.405***
##                             (4.364)           (7.419)
## kidslt6                   -442.090***       -894.022***
##                            (58.847)          (111.878)
## kidsge6                     -32.779           -16.218
##                            (23.176)          (38.641)
## logSigma                                     7.023***
##                                               (0.037)
## Constant                 1,330.482***        965.305**
##                            (270.785)         (446.436)
## -------------------------------------------------------
## Observations                  753               753
## R2                           0.266
## Log Likelihood                              -3,819.095
## Akaike Inf. Crit.                            7,656.189
## Bayesian Inf. Crit.                          7,697.806
## Residual Std. Error   750.179 (df = 745)
## F Statistic         38.495*** (df = 7; 745)
## =======================================================
## Note:                       *p<0.1; **p<0.05; ***p<0.01
summary(margEff(sTobit))
##          Marg. Eff. Std. Error t value  Pr(>|t|)
## nwifeinc   -5.32644    2.69073 -1.9796 0.0481217 *
## educ       48.73409   12.96341  3.7594 0.0001837 ***
## exper      79.50423   10.30497  7.7151 3.886e-14 ***
## expersq    -1.12651    0.32326 -3.4848 0.0005213 ***
## age       -32.87692    4.45770 -7.3753 4.383e-13 ***
## kidslt6  -540.25683   66.62393 -8.1091 2.220e-15 ***
## kidsge6    -9.80053   23.36134 -0.4195 0.6749580
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#### Example 17.3. Poisson Regression for Number of Arrests

pOLS <- lm(narr86 ~ pcnv + avgsen + tottime + ptime86 + qemp86 + inc86 + black + hispan + born60, data=crime1)
pPoisson <- glm(narr86 ~ pcnv + avgsen + tottime + ptime86 + qemp86 + inc86 + black + hispan +born60, family=poisson, data=crime1)

stargazer(pOLS, pPoisson, title="Table 17.5 Determinants of Number of Arrests for Young Men: (narr86)",  no.space=TRUE, type="text")
##
## Table 17.5 Determinants of Number of Arrests for Young Men: (narr86)
## =======================================================
##                             Dependent variable:
##                     -----------------------------------
##                                   narr86
##                               OLS             Poisson
##                               (1)               (2)
## -------------------------------------------------------
## pcnv                       -0.132***         -0.402***
##                             (0.040)           (0.085)
## avgsen                       -0.011            -0.024
##                             (0.012)           (0.020)
## tottime                      0.012             0.024*
##                             (0.009)           (0.015)
## ptime86                    -0.041***         -0.099***
##                             (0.009)           (0.021)
## qemp86                     -0.051***           -0.038
##                             (0.014)           (0.029)
## inc86                      -0.001***         -0.008***
##                             (0.0003)          (0.001)
## black                       0.327***          0.661***
##                             (0.045)           (0.074)
## hispan                      0.194***          0.500***
##                             (0.040)           (0.074)
## born60                       -0.022            -0.051
##                             (0.033)           (0.064)
## Constant                    0.577***         -0.600***
##                             (0.038)           (0.067)
## -------------------------------------------------------
## Observations                 2,725             2,725
## R2                           0.072
## Log Likelihood                               -2,248.761
## Akaike Inf. Crit.                            4,517.522
## Residual Std. Error    0.829 (df = 2715)
## F Statistic         23.572*** (df = 9; 2715)
## =======================================================
## Note:                       *p<0.1; **p<0.05; ***p<0.01

#### Example 17.4. Duration of Recidivism

scnreg <- survreg(Surv(ldurat, cens, type = "right") ~ workprg + priors + tserved + felon + alcohol + drugs + black + married + educ + age, data = recid, dist = "gaussian")
## Warning in survreg.fit(X, Y, weights, offset, init = init, controlvals =
## control, : Ran out of iterations and did not converge
summary(scnreg)
##
## Call:
## survreg(formula = Surv(ldurat, cens, type = "right") ~ workprg +
##     priors + tserved + felon + alcohol + drugs + black + married +
##     educ + age, data = recid, dist = "gaussian")
##                 Value Std. Error       z       p
## (Intercept)  4.01e+00   8.31e-03  482.43 < 2e-16
## workprg      1.64e-02   3.05e-03    5.39 6.9e-08
## priors      -4.91e-02   5.96e-04  -82.44 < 2e-16
## tserved     -8.83e-03   8.67e-05 -101.88 < 2e-16
## felon        1.38e-01   3.53e-03   39.04 < 2e-16
## alcohol     -2.40e-01   3.79e-03  -63.26 < 2e-16
## drugs       -9.15e-02   3.48e-03  -26.31 < 2e-16
## black       -1.62e-01   2.92e-03  -55.51 < 2e-16
## married      9.00e-02   3.34e-03   26.94 < 2e-16
## educ         6.13e-03   5.80e-04   10.57 < 2e-16
## age          9.19e-04   1.35e-05   68.23 < 2e-16
## Log(scale)  -3.13e+00   0.00e+00    -Inf < 2e-16
##
## Scale= 0.0437
##
## Gaussian distribution
## Loglik(model)= -20013.8   Loglik(intercept only)= 1488.1
##  Chisq= -43003.86 on 10 degrees of freedom, p= 1
## Number of Newton-Raphson Iterations: 30
## n= 1445

#### Example 17.5. Wage Offer Equation for Married Women

ols <- lm(lwage ~ educ + exper + expersq, data=mroz)
selection <-selection(inlf ~ educ + exper + expersq + nwifeinc + age + kidslt6 + kidsge6, lwage ~ educ + exper + expersq, data=mroz, method="2step")

stargazer(ols, selection, title = "Table 17.7 Wage Offer Equation for Married Women: (lwage)", no.space=TRUE, type="text")
##
## Table 17.7 Wage Offer Equation for Married Women: (lwage)
## =========================================================
##                              Dependent variable:
##                     -------------------------------------
##                                     lwage
##                               OLS             selection
##                               (1)                (2)
## ---------------------------------------------------------
## educ                       0.107***           0.109***
##                             (0.014)            (0.016)
## exper                      0.042***           0.044***
##                             (0.013)            (0.016)
## expersq                    -0.001**            -0.001*
##                            (0.0004)           (0.0004)
## Constant                   -0.522***           -0.578*
##                             (0.199)            (0.305)
## ---------------------------------------------------------
## Observations                  428                753
## R2                           0.157
## rho                                             0.049
## Inverse Mills Ratio                         0.032 (0.134)
## Residual Std. Error    0.666 (df = 424)
## F Statistic         26.286*** (df = 3; 424)
## =========================================================
## Note:                         *p<0.1; **p<0.05; ***p<0.01
summary(selection)
## --------------------------------------------
## Tobit 2 model (sample selection model)
## 2-step Heckman / heckit estimation
## 753 observations (325 censored and 428 observed)
## 15 free parameters (df = 739)
## Probit selection equation:
##              Estimate Std. Error t value Pr(>|t|)
## (Intercept)  0.270077   0.508593   0.531  0.59556
## educ         0.130905   0.025254   5.183 2.81e-07 ***
## exper        0.123348   0.018716   6.590 8.34e-11 ***
## expersq     -0.001887   0.000600  -3.145  0.00173 **
## nwifeinc    -0.012024   0.004840  -2.484  0.01320 *
## age         -0.052853   0.008477  -6.235 7.61e-10 ***
## kidslt6     -0.868328   0.118522  -7.326 6.21e-13 ***
## kidsge6      0.036005   0.043477   0.828  0.40786
## Outcome equation:
##               Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.5781032  0.3050062  -1.895  0.05843 .
## educ         0.1090655  0.0155230   7.026 4.83e-12 ***
## exper        0.0438873  0.0162611   2.699  0.00712 **
## expersq     -0.0008591  0.0004389  -1.957  0.05068 .
## --------------------------------------------