Introductory Econometrics Using R

Also covered using Python and Stata

library(wooldridge)
library(stargazer)
library(sandwich)
library(lmtest)
library(car)
options(width=120)

#### Example 9.1. Economic Model of Crime

crime_hetr_r <- lm(narr86  ~ pcnv + avgsen + tottime + ptime86 + qemp86 + inc86 + black + hispan + 1, data=crime1)
crime_hetr <- lm(narr86  ~ pcnv + avgsen + tottime + ptime86 + qemp86 + inc86 + black + hispan + pcnvsq + pt86sq + inc86sq + 1, data=crime1)
crime_robust <- coeftest(crime_hetr, vcov = vcovHC(crime_hetr, "HC1"))

stargazer(crime_hetr_r, crime_hetr, crime_robust, column.labels=c("Hetrosc1.","Hetrosc2", "Robust"), keep.stat=c("n","rsq", "adj.rsq"), no.space=TRUE, type="text")
##
## =============================================
##                    Dependent variable:
##              --------------------------------
##                     narr86
##                      OLS          coefficient
##                                      test
##              Hetrosc1.  Hetrosc2    Robust
##                 (1)       (2)         (3)
## ---------------------------------------------
## pcnv         -0.133***  0.553***   0.553***
##               (0.040)   (0.154)     (0.170)
## avgsen        -0.011     -0.017     -0.017
##               (0.012)   (0.012)     (0.014)
## tottime        0.012     0.012       0.012
##               (0.009)   (0.009)     (0.013)
## ptime86      -0.041***  0.287***   0.287***
##               (0.009)   (0.044)     (0.069)
## qemp86       -0.051***   -0.014     -0.014
##               (0.014)   (0.017)     (0.017)
## inc86        -0.001*** -0.003***   -0.003***
##              (0.0003)   (0.001)     (0.001)
## black        0.327***   0.292***   0.292***
##               (0.045)   (0.045)     (0.058)
## hispan       0.194***   0.164***   0.164***
##               (0.040)   (0.039)     (0.040)
## pcnvsq                 -0.730***   -0.730***
##                         (0.156)     (0.172)
## pt86sq                 -0.030***   -0.030***
##                         (0.004)     (0.006)
## inc86sq                0.00001*** 0.00001***
##                        (0.00000)   (0.00000)
## Constant     0.569***   0.505***   0.505***
##               (0.036)   (0.037)     (0.039)
## ---------------------------------------------
## Observations   2,725     2,725
## R2             0.072     0.103
## =============================================
## Note:             *p<0.1; **p<0.05; ***p<0.01

#### Example 9.2. Housing Price Equation

hprice_reg <- lm(price ~ lotsize + sqrft + bdrms, data=hprice1)
prhat2<-predict(hprice_reg)**2
prhat3<-predict(hprice_reg)**3
hprice_reg_pol <- lm(price ~ lotsize + sqrft + bdrms + prhat2 + prhat3 + 1 , data=hprice1)
stargazer(hprice_reg, hprice_reg_pol, keep.stat=c("n","rsq", "adj.rsq"), no.space=TRUE, type="text")
##
## =========================================
##                  Dependent variable:
##              ----------------------------
##                         price
##                   (1)            (2)
## -----------------------------------------
## lotsize         0.002***       0.0002
##                 (0.001)        (0.005)
## sqrft           0.123***        0.018
##                 (0.013)        (0.299)
## bdrms            13.853         2.175
##                 (9.010)       (33.888)
## prhat2                         0.0004
##                                (0.007)
## prhat3                         0.00000
##                               (0.00001)
## Constant        -21.770        166.097
##                 (29.475)      (317.433)
## -----------------------------------------
## Observations       88            88
## R2               0.672          0.706
## =========================================
## Note:         *p<0.1; **p<0.05; ***p<0.01
linearHypothesis(hprice_reg_pol, c("prhat2 = 0", "prhat3 =0"))
## Linear hypothesis test
##
## Hypothesis:
## prhat2 = 0
## prhat3 = 0
##
## Model 1: restricted model
## Model 2: price ~ lotsize + sqrft + bdrms + prhat2 + prhat3 + 1
##
##   Res.Df    RSS Df Sum of Sq      F  Pr(>F)
## 1     84 300724
## 2     82 269984  2     30740 4.6682 0.01202 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Log form

lhprice_reg <- lm(lprice ~ llotsize + lsqrft + bdrms, data=hprice1)
lprhat2<-predict(lhprice_reg)**2
lprhat3<-predict(lhprice_reg)**3
lhprice_reg_pol <- lm(lprice ~ llotsize + lsqrft + bdrms + lprhat2 + lprhat3 + 1 , data=hprice1)
stargazer(lhprice_reg, lhprice_reg_pol, keep.stat=c("n","rsq", "adj.rsq"), no.space=TRUE, type="text")
##
## =========================================
##                  Dependent variable:
##              ----------------------------
##                         lprice
##                   (1)            (2)
## -----------------------------------------
## llotsize        0.168***       -4.181
##                 (0.038)       (12.595)
## lsqrft          0.700***       -17.349
##                 (0.093)       (52.490)
## bdrms            0.037         -0.925
##                 (0.028)        (2.770)
## lprhat2                         3.910
##                               (13.014)
## lprhat3                        -0.193
##                                (0.752)
## Constant        -1.297**       87.886
##                 (0.651)       (240.974)
## -----------------------------------------
## Observations       88            88
## R2               0.643          0.664
## =========================================
## Note:         *p<0.1; **p<0.05; ***p<0.01
linearHypothesis(lhprice_reg_pol, c("lprhat2 = 0", "lprhat3 =0"))
## Linear hypothesis test
##
## Hypothesis:
## lprhat2 = 0
## lprhat3 = 0
##
## Model 1: restricted model
## Model 2: lprice ~ llotsize + lsqrft + bdrms + lprhat2 + lprhat3 + 1
##
##   Res.Df    RSS Df Sum of Sq     F  Pr(>F)
## 1     84 2.8626
## 2     82 2.6940  2   0.16854 2.565 0.08308 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#### Example 9.3. IQ as a Proxy for Ability

IQA <- lm(lwage ~ educ + exper + tenure + married + south + urban + black + 1, data = wage2)
IQB <- lm(lwage ~ educ + exper + tenure + married + south + urban + black + IQ + 1, data = wage2)
IQC <- lm(lwage ~ educ + exper + tenure + married + south + urban + black + IQ + educ:IQ + 1, data = wage2)

stargazer(IQA, IQB, IQC, column.labels=c("IQA","IQB", "IQC"), keep.stat=c("n","rsq", "adj.rsq"), no.space=TRUE, type="text")
##
## ==========================================
##                   Dependent variable:
##              -----------------------------
##                          lwage
##                 IQA       IQB       IQC
##                 (1)       (2)       (3)
## ------------------------------------------
## educ         0.065***  0.054***    0.018
##               (0.006)   (0.007)   (0.041)
## exper        0.014***  0.014***  0.014***
##               (0.003)   (0.003)   (0.003)
## tenure       0.012***  0.011***  0.011***
##               (0.002)   (0.002)   (0.002)
## married      0.199***  0.200***  0.201***
##               (0.039)   (0.039)   (0.039)
## south        -0.091*** -0.080*** -0.080***
##               (0.026)   (0.026)   (0.026)
## urban        0.184***  0.182***  0.184***
##               (0.027)   (0.027)   (0.027)
## black        -0.188*** -0.143*** -0.147***
##               (0.038)   (0.039)   (0.040)
## IQ                     0.004***   -0.001
##                         (0.001)   (0.005)
## educ:IQ                           0.0003
##                                  (0.0004)
## Constant     5.395***  5.176***  5.648***
##               (0.113)   (0.128)   (0.546)
## ------------------------------------------
## Observations    935       935       935
## R2             0.253     0.263     0.263
## Adjusted R2    0.247     0.256     0.256
## ==========================================
## Note:          *p<0.1; **p<0.05; ***p<0.01

crime2b <- subset(crime2, crime2$year==87) crimeA <- lm(lcrmrte ~ unem + llawexpc + 1, data=crime2b) crimeB <- lm(lcrmrte ~ unem + llawexpc + lcrmrt_1 + 1, data=crime2b) stargazer(crimeA, crimeB, column.labels=c("crimeA","crimeB"), keep.stat=c("n","rsq", "adj.rsq"), no.space=TRUE, type="text") ## ## ========================================= ## Dependent variable: ## ---------------------------- ## lcrmrte ## crimeA crimeB ## (1) (2) ## ----------------------------------------- ## unem -0.029 0.009 ## (0.032) (0.020) ## llawexpc 0.203 -0.140 ## (0.173) (0.109) ## lcrmrt_1 1.194*** ## (0.132) ## Constant 3.343** 0.076 ## (1.251) (0.821) ## ----------------------------------------- ## Observations 46 46 ## R2 0.057 0.680 ## Adjusted R2 0.013 0.657 ## ========================================= ## Note: *p<0.1; **p<0.05; ***p<0.01 #### Example 9.8. R&D Intensity and Firm Size RD1 <- lm(rdintens ~ sales + profmarg + 1, data=rdchem) RD2 <- lm(rdintens ~ sales + profmarg + 1, data=subset(rdchem, rdchem$sales<30000))
stargazer(RD1, RD2, column.labels=c("RD1","RD2"), keep.stat=c("n","rsq", "adj.rsq"), no.space=TRUE, type="text")
##
## =========================================
##                  Dependent variable:
##              ----------------------------
##                        rdintens
##                   RD1            RD2
##                   (1)            (2)
## -----------------------------------------
## sales            0.0001       0.0002**
##                (0.00004)      (0.0001)
## profmarg         0.045          0.048
##                 (0.046)        (0.044)
## Constant        2.625***      2.297***
##                 (0.586)        (0.592)
## -----------------------------------------
## Observations       32            31
## R2               0.076          0.173
## =========================================
## Note:         *p<0.1; **p<0.05; ***p<0.01

#### Example 9.9. R&D Intensity

lRD1 <-  lm(lrd ~ lsales + profmarg + 1, data=rdchem)
lRD2 <-  lm(lrd ~ lsales + profmarg + 1, data=subset(rdchem, rdchem$sales<30000)) stargazer(lRD1, lRD2, column.labels=c("RD1logfn","RD2logfn"), keep.stat=c("n","rsq", "adj.rsq"), no.space=TRUE, type="text") ## ## ========================================= ## Dependent variable: ## ---------------------------- ## lrd ## RD1logfn RD2logfn ## (1) (2) ## ----------------------------------------- ## lsales 1.084*** 1.088*** ## (0.060) (0.067) ## profmarg 0.022 0.022 ## (0.013) (0.013) ## Constant -4.378*** -4.404*** ## (0.468) (0.511) ## ----------------------------------------- ## Observations 32 31 ## R2 0.918 0.904 ## Adjusted R2 0.912 0.897 ## ========================================= ## Note: *p<0.1; **p<0.05; ***p<0.01 #### Example 9.10. State Infant Mortality Rates infant1 <- lm(infmort ~ lpcinc + lphysic + lpopul + 1 , data = subset(infmrt, infmrt$year==1990))
infant2 <- lm(infmort ~ lpcinc + lphysic + lpopul + 1 ,data = subset(infmrt, infmrt$year==1990 & infmrt$DC==0))
stargazer(infant1, infant2, column.labels=c("Infmort1","Infmort2"), keep.stat=c("n","rsq", "adj.rsq"), no.space=TRUE, type="text")
##
## =========================================
##                  Dependent variable:
##              ----------------------------
##                        infmort
##                 Infmort1      Infmort2
##                   (1)            (2)
## -----------------------------------------
## lpcinc          -4.685*        -0.567
##                 (2.604)        (1.641)
## lphysic         4.153***      -2.742**
##                 (1.513)        (1.191)
## lpopul           -0.088       0.629***
##                 (0.287)        (0.191)
## Constant         33.859        23.955*
##                 (20.428)      (12.419)
## -----------------------------------------
## Observations       51            50
## R2               0.139          0.273
## Note:         *p<0.1; **p<0.05; ***p<0.01