9 More on Specification and Data Issues
Also covered using Python and Stata
library(wooldridge)
library(stargazer)
library(sandwich)
library(lmtest)
library(car)
rm(list = ls())
9.1 Example 9.1. Economic Model of Crime
crime_hetr_r <- lm(narr86 ~ pcnv + avgsen + tottime + ptime86 + qemp86 + inc86 + black + hispan + 1, data=crime1)
crime_hetr <- lm(narr86 ~ pcnv + avgsen + tottime + ptime86 + qemp86 + inc86 + black + hispan + pcnvsq + pt86sq + inc86sq + 1, data=crime1)
crime_robust <- coeftest(crime_hetr, vcov = vcovHC(crime_hetr, "HC1"))
stargazer(crime_hetr_r, crime_hetr, crime_robust, column.labels=c("Hetrosc1.","Hetrosc2", "Robust"), keep.stat=c("n","rsq", "adj.rsq"), no.space=TRUE, type="text")
##
## =============================================
## Dependent variable:
## --------------------------------
## narr86
## OLS coefficient
## test
## Hetrosc1. Hetrosc2 Robust
## (1) (2) (3)
## ---------------------------------------------
## pcnv -0.133*** 0.553*** 0.553***
## (0.040) (0.154) (0.170)
## avgsen -0.011 -0.017 -0.017
## (0.012) (0.012) (0.014)
## tottime 0.012 0.012 0.012
## (0.009) (0.009) (0.013)
## ptime86 -0.041*** 0.287*** 0.287***
## (0.009) (0.044) (0.069)
## qemp86 -0.051*** -0.014 -0.014
## (0.014) (0.017) (0.017)
## inc86 -0.001*** -0.003*** -0.003***
## (0.0003) (0.001) (0.001)
## black 0.327*** 0.292*** 0.292***
## (0.045) (0.045) (0.058)
## hispan 0.194*** 0.164*** 0.164***
## (0.040) (0.039) (0.040)
## pcnvsq -0.730*** -0.730***
## (0.156) (0.172)
## pt86sq -0.030*** -0.030***
## (0.004) (0.006)
## inc86sq 0.00001*** 0.00001***
## (0.00000) (0.00000)
## Constant 0.569*** 0.505*** 0.505***
## (0.036) (0.037) (0.039)
## ---------------------------------------------
## Observations 2,725 2,725
## R2 0.072 0.103
## Adjusted R2 0.070 0.100
## =============================================
## Note: *p<0.1; **p<0.05; ***p<0.01
9.2 Example 9.2. Housing Price Equation
hprice_reg <- lm(price ~ lotsize + sqrft + bdrms, data=hprice1)
prhat2<-predict(hprice_reg)**2
prhat3<-predict(hprice_reg)**3
hprice_reg_pol <- lm(price ~ lotsize + sqrft + bdrms + prhat2 + prhat3 + 1 , data=hprice1)
stargazer(hprice_reg, hprice_reg_pol, keep.stat=c("n","rsq", "adj.rsq"), no.space=TRUE, type="text")
##
## =========================================
## Dependent variable:
## ----------------------------
## price
## (1) (2)
## -----------------------------------------
## lotsize 0.002*** 0.0002
## (0.001) (0.005)
## sqrft 0.123*** 0.018
## (0.013) (0.299)
## bdrms 13.853 2.175
## (9.010) (33.888)
## prhat2 0.0004
## (0.007)
## prhat3 0.00000
## (0.00001)
## Constant -21.770 166.097
## (29.475) (317.433)
## -----------------------------------------
## Observations 88 88
## R2 0.672 0.706
## Adjusted R2 0.661 0.688
## =========================================
## Note: *p<0.1; **p<0.05; ***p<0.01
## Linear hypothesis test
##
## Hypothesis:
## prhat2 = 0
## prhat3 = 0
##
## Model 1: restricted model
## Model 2: price ~ lotsize + sqrft + bdrms + prhat2 + prhat3 + 1
##
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 84 300724
## 2 82 269984 2 30740 4.6682 0.01202 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Log form
lhprice_reg <- lm(lprice ~ llotsize + lsqrft + bdrms, data=hprice1)
lprhat2<-predict(lhprice_reg)**2
lprhat3<-predict(lhprice_reg)**3
lhprice_reg_pol <- lm(lprice ~ llotsize + lsqrft + bdrms + lprhat2 + lprhat3 + 1 , data=hprice1)
stargazer(lhprice_reg, lhprice_reg_pol, keep.stat=c("n","rsq", "adj.rsq"), no.space=TRUE, type="text")
##
## =========================================
## Dependent variable:
## ----------------------------
## lprice
## (1) (2)
## -----------------------------------------
## llotsize 0.168*** -4.181
## (0.038) (12.595)
## lsqrft 0.700*** -17.349
## (0.093) (52.490)
## bdrms 0.037 -0.925
## (0.028) (2.770)
## lprhat2 3.910
## (13.014)
## lprhat3 -0.193
## (0.752)
## Constant -1.297** 87.886
## (0.651) (240.974)
## -----------------------------------------
## Observations 88 88
## R2 0.643 0.664
## Adjusted R2 0.630 0.643
## =========================================
## Note: *p<0.1; **p<0.05; ***p<0.01
## Linear hypothesis test
##
## Hypothesis:
## lprhat2 = 0
## lprhat3 = 0
##
## Model 1: restricted model
## Model 2: lprice ~ llotsize + lsqrft + bdrms + lprhat2 + lprhat3 + 1
##
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 84 2.8626
## 2 82 2.6940 2 0.16854 2.565 0.08308 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
9.3 Example 9.3. IQ as a Proxy for Ability
IQA <- lm(lwage ~ educ + exper + tenure + married + south + urban + black + 1, data = wage2)
IQB <- lm(lwage ~ educ + exper + tenure + married + south + urban + black + IQ + 1, data = wage2)
IQC <- lm(lwage ~ educ + exper + tenure + married + south + urban + black + IQ + educ:IQ + 1, data = wage2)
stargazer(IQA, IQB, IQC, column.labels=c("IQA","IQB", "IQC"), keep.stat=c("n","rsq", "adj.rsq"), no.space=TRUE, type="text")
##
## ==========================================
## Dependent variable:
## -----------------------------
## lwage
## IQA IQB IQC
## (1) (2) (3)
## ------------------------------------------
## educ 0.065*** 0.054*** 0.018
## (0.006) (0.007) (0.041)
## exper 0.014*** 0.014*** 0.014***
## (0.003) (0.003) (0.003)
## tenure 0.012*** 0.011*** 0.011***
## (0.002) (0.002) (0.002)
## married 0.199*** 0.200*** 0.201***
## (0.039) (0.039) (0.039)
## south -0.091*** -0.080*** -0.080***
## (0.026) (0.026) (0.026)
## urban 0.184*** 0.182*** 0.184***
## (0.027) (0.027) (0.027)
## black -0.188*** -0.143*** -0.147***
## (0.038) (0.039) (0.040)
## IQ 0.004*** -0.001
## (0.001) (0.005)
## educ:IQ 0.0003
## (0.0004)
## Constant 5.395*** 5.176*** 5.648***
## (0.113) (0.128) (0.546)
## ------------------------------------------
## Observations 935 935 935
## R2 0.253 0.263 0.263
## Adjusted R2 0.247 0.256 0.256
## ==========================================
## Note: *p<0.1; **p<0.05; ***p<0.01
9.4 Example 9.4. City Crime Rates
crime2b <- subset(crime2, crime2$year==87)
crimeA <- lm(lcrmrte ~ unem + llawexpc + 1, data=crime2b)
crimeB <- lm(lcrmrte ~ unem + llawexpc + lcrmrt_1 + 1, data=crime2b)
stargazer(crimeA, crimeB, column.labels=c("crimeA","crimeB"), keep.stat=c("n","rsq", "adj.rsq"), no.space=TRUE, type="text")
##
## =========================================
## Dependent variable:
## ----------------------------
## lcrmrte
## crimeA crimeB
## (1) (2)
## -----------------------------------------
## unem -0.029 0.009
## (0.032) (0.020)
## llawexpc 0.203 -0.140
## (0.173) (0.109)
## lcrmrt_1 1.194***
## (0.132)
## Constant 3.343** 0.076
## (1.251) (0.821)
## -----------------------------------------
## Observations 46 46
## R2 0.057 0.680
## Adjusted R2 0.013 0.657
## =========================================
## Note: *p<0.1; **p<0.05; ***p<0.01
9.5 Example 9.8. R&D Intensity and Firm Size
RD1 <- lm(rdintens ~ sales + profmarg + 1, data=rdchem)
RD2 <- lm(rdintens ~ sales + profmarg + 1, data=subset(rdchem, rdchem$sales<30000))
stargazer(RD1, RD2, column.labels=c("RD1","RD2"), keep.stat=c("n","rsq", "adj.rsq"), no.space=TRUE, type="text")
##
## =========================================
## Dependent variable:
## ----------------------------
## rdintens
## RD1 RD2
## (1) (2)
## -----------------------------------------
## sales 0.0001 0.0002**
## (0.00004) (0.0001)
## profmarg 0.045 0.048
## (0.046) (0.044)
## Constant 2.625*** 2.297***
## (0.586) (0.592)
## -----------------------------------------
## Observations 32 31
## R2 0.076 0.173
## Adjusted R2 0.012 0.114
## =========================================
## Note: *p<0.1; **p<0.05; ***p<0.01
9.6 Example 9.9. R&D Intensity
lRD1 <- lm(lrd ~ lsales + profmarg + 1, data=rdchem)
lRD2 <- lm(lrd ~ lsales + profmarg + 1, data=subset(rdchem, rdchem$sales<30000))
stargazer(lRD1, lRD2, column.labels=c("RD1logfn","RD2logfn"), keep.stat=c("n","rsq", "adj.rsq"), no.space=TRUE, type="text")
##
## =========================================
## Dependent variable:
## ----------------------------
## lrd
## RD1logfn RD2logfn
## (1) (2)
## -----------------------------------------
## lsales 1.084*** 1.088***
## (0.060) (0.067)
## profmarg 0.022 0.022
## (0.013) (0.013)
## Constant -4.378*** -4.404***
## (0.468) (0.511)
## -----------------------------------------
## Observations 32 31
## R2 0.918 0.904
## Adjusted R2 0.912 0.897
## =========================================
## Note: *p<0.1; **p<0.05; ***p<0.01
9.7 Example 9.10. State Infant Mortality Rates
infant1 <- lm(infmort ~ lpcinc + lphysic + lpopul + 1 , data = subset(infmrt, infmrt$year==1990))
infant2 <- lm(infmort ~ lpcinc + lphysic + lpopul + 1 ,data = subset(infmrt, infmrt$year==1990 & infmrt$DC==0))
stargazer(infant1, infant2, column.labels=c("Infmort1","Infmort2"), keep.stat=c("n","rsq", "adj.rsq"), no.space=TRUE, type="text")
##
## =========================================
## Dependent variable:
## ----------------------------
## infmort
## Infmort1 Infmort2
## (1) (2)
## -----------------------------------------
## lpcinc -4.685* -0.567
## (2.604) (1.641)
## lphysic 4.153*** -2.742**
## (1.513) (1.191)
## lpopul -0.088 0.629***
## (0.287) (0.191)
## Constant 33.859 23.955*
## (20.428) (12.419)
## -----------------------------------------
## Observations 51 50
## R2 0.139 0.273
## Adjusted R2 0.084 0.226
## =========================================
## Note: *p<0.1; **p<0.05; ***p<0.01
.