9 More on Specification and Data Issues

Also covered using Python and Stata

library(wooldridge)
library(stargazer)
library(sandwich)
library(lmtest)
library(car)
rm(list = ls())

9.1 Example 9.1. Economic Model of Crime

crime_hetr_r <- lm(narr86  ~ pcnv + avgsen + tottime + ptime86 + qemp86 + inc86 + black + hispan + 1, data=crime1)
crime_hetr <- lm(narr86  ~ pcnv + avgsen + tottime + ptime86 + qemp86 + inc86 + black + hispan + pcnvsq + pt86sq + inc86sq + 1, data=crime1)
crime_robust <- coeftest(crime_hetr, vcov = vcovHC(crime_hetr, "HC1"))

stargazer(crime_hetr_r, crime_hetr, crime_robust, column.labels=c("Hetrosc1.","Hetrosc2", "Robust"), keep.stat=c("n","rsq", "adj.rsq"), no.space=TRUE, type="text")
## 
## =============================================
##                    Dependent variable:       
##              --------------------------------
##                     narr86                   
##                      OLS          coefficient
##                                      test    
##              Hetrosc1.  Hetrosc2    Robust   
##                 (1)       (2)         (3)    
## ---------------------------------------------
## pcnv         -0.133***  0.553***   0.553***  
##               (0.040)   (0.154)     (0.170)  
## avgsen        -0.011     -0.017     -0.017   
##               (0.012)   (0.012)     (0.014)  
## tottime        0.012     0.012       0.012   
##               (0.009)   (0.009)     (0.013)  
## ptime86      -0.041***  0.287***   0.287***  
##               (0.009)   (0.044)     (0.069)  
## qemp86       -0.051***   -0.014     -0.014   
##               (0.014)   (0.017)     (0.017)  
## inc86        -0.001*** -0.003***   -0.003*** 
##              (0.0003)   (0.001)     (0.001)  
## black        0.327***   0.292***   0.292***  
##               (0.045)   (0.045)     (0.058)  
## hispan       0.194***   0.164***   0.164***  
##               (0.040)   (0.039)     (0.040)  
## pcnvsq                 -0.730***   -0.730*** 
##                         (0.156)     (0.172)  
## pt86sq                 -0.030***   -0.030*** 
##                         (0.004)     (0.006)  
## inc86sq                0.00001*** 0.00001*** 
##                        (0.00000)   (0.00000) 
## Constant     0.569***   0.505***   0.505***  
##               (0.036)   (0.037)     (0.039)  
## ---------------------------------------------
## Observations   2,725     2,725               
## R2             0.072     0.103               
## Adjusted R2    0.070     0.100               
## =============================================
## Note:             *p<0.1; **p<0.05; ***p<0.01

9.2 Example 9.2. Housing Price Equation

hprice_reg <- lm(price ~ lotsize + sqrft + bdrms, data=hprice1)
prhat2<-predict(hprice_reg)**2
prhat3<-predict(hprice_reg)**3
hprice_reg_pol <- lm(price ~ lotsize + sqrft + bdrms + prhat2 + prhat3 + 1 , data=hprice1)
stargazer(hprice_reg, hprice_reg_pol, keep.stat=c("n","rsq", "adj.rsq"), no.space=TRUE, type="text")
## 
## =========================================
##                  Dependent variable:     
##              ----------------------------
##                         price            
##                   (1)            (2)     
## -----------------------------------------
## lotsize         0.002***       0.0002    
##                 (0.001)        (0.005)   
## sqrft           0.123***        0.018    
##                 (0.013)        (0.299)   
## bdrms            13.853         2.175    
##                 (9.010)       (33.888)   
## prhat2                         0.0004    
##                                (0.007)   
## prhat3                         0.00000   
##                               (0.00001)  
## Constant        -21.770        166.097   
##                 (29.475)      (317.433)  
## -----------------------------------------
## Observations       88            88      
## R2               0.672          0.706    
## Adjusted R2      0.661          0.688    
## =========================================
## Note:         *p<0.1; **p<0.05; ***p<0.01
linearHypothesis(hprice_reg_pol, c("prhat2 = 0", "prhat3 =0"))
## Linear hypothesis test
## 
## Hypothesis:
## prhat2 = 0
## prhat3 = 0
## 
## Model 1: restricted model
## Model 2: price ~ lotsize + sqrft + bdrms + prhat2 + prhat3 + 1
## 
##   Res.Df    RSS Df Sum of Sq      F  Pr(>F)  
## 1     84 300724                              
## 2     82 269984  2     30740 4.6682 0.01202 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Log form

lhprice_reg <- lm(lprice ~ llotsize + lsqrft + bdrms, data=hprice1)
lprhat2<-predict(lhprice_reg)**2
lprhat3<-predict(lhprice_reg)**3
lhprice_reg_pol <- lm(lprice ~ llotsize + lsqrft + bdrms + lprhat2 + lprhat3 + 1 , data=hprice1)
stargazer(lhprice_reg, lhprice_reg_pol, keep.stat=c("n","rsq", "adj.rsq"), no.space=TRUE, type="text")
## 
## =========================================
##                  Dependent variable:     
##              ----------------------------
##                         lprice           
##                   (1)            (2)     
## -----------------------------------------
## llotsize        0.168***       -4.181    
##                 (0.038)       (12.595)   
## lsqrft          0.700***       -17.349   
##                 (0.093)       (52.490)   
## bdrms            0.037         -0.925    
##                 (0.028)        (2.770)   
## lprhat2                         3.910    
##                               (13.014)   
## lprhat3                        -0.193    
##                                (0.752)   
## Constant        -1.297**       87.886    
##                 (0.651)       (240.974)  
## -----------------------------------------
## Observations       88            88      
## R2               0.643          0.664    
## Adjusted R2      0.630          0.643    
## =========================================
## Note:         *p<0.1; **p<0.05; ***p<0.01
linearHypothesis(lhprice_reg_pol, c("lprhat2 = 0", "lprhat3 =0"))
## Linear hypothesis test
## 
## Hypothesis:
## lprhat2 = 0
## lprhat3 = 0
## 
## Model 1: restricted model
## Model 2: lprice ~ llotsize + lsqrft + bdrms + lprhat2 + lprhat3 + 1
## 
##   Res.Df    RSS Df Sum of Sq     F  Pr(>F)  
## 1     84 2.8626                             
## 2     82 2.6940  2   0.16854 2.565 0.08308 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

9.3 Example 9.3. IQ as a Proxy for Ability

IQA <- lm(lwage ~ educ + exper + tenure + married + south + urban + black + 1, data = wage2)
IQB <- lm(lwage ~ educ + exper + tenure + married + south + urban + black + IQ + 1, data = wage2)
IQC <- lm(lwage ~ educ + exper + tenure + married + south + urban + black + IQ + educ:IQ + 1, data = wage2)

stargazer(IQA, IQB, IQC, column.labels=c("IQA","IQB", "IQC"), keep.stat=c("n","rsq", "adj.rsq"), no.space=TRUE, type="text")
## 
## ==========================================
##                   Dependent variable:     
##              -----------------------------
##                          lwage            
##                 IQA       IQB       IQC   
##                 (1)       (2)       (3)   
## ------------------------------------------
## educ         0.065***  0.054***    0.018  
##               (0.006)   (0.007)   (0.041) 
## exper        0.014***  0.014***  0.014*** 
##               (0.003)   (0.003)   (0.003) 
## tenure       0.012***  0.011***  0.011*** 
##               (0.002)   (0.002)   (0.002) 
## married      0.199***  0.200***  0.201*** 
##               (0.039)   (0.039)   (0.039) 
## south        -0.091*** -0.080*** -0.080***
##               (0.026)   (0.026)   (0.026) 
## urban        0.184***  0.182***  0.184*** 
##               (0.027)   (0.027)   (0.027) 
## black        -0.188*** -0.143*** -0.147***
##               (0.038)   (0.039)   (0.040) 
## IQ                     0.004***   -0.001  
##                         (0.001)   (0.005) 
## educ:IQ                           0.0003  
##                                  (0.0004) 
## Constant     5.395***  5.176***  5.648*** 
##               (0.113)   (0.128)   (0.546) 
## ------------------------------------------
## Observations    935       935       935   
## R2             0.253     0.263     0.263  
## Adjusted R2    0.247     0.256     0.256  
## ==========================================
## Note:          *p<0.1; **p<0.05; ***p<0.01

9.4 Example 9.4. City Crime Rates

crime2b <- subset(crime2, crime2$year==87)
crimeA <- lm(lcrmrte ~ unem + llawexpc + 1, data=crime2b)
crimeB <- lm(lcrmrte ~ unem + llawexpc + lcrmrt_1 + 1, data=crime2b)
stargazer(crimeA, crimeB, column.labels=c("crimeA","crimeB"), keep.stat=c("n","rsq", "adj.rsq"), no.space=TRUE, type="text")
## 
## =========================================
##                  Dependent variable:     
##              ----------------------------
##                        lcrmrte           
##                  crimeA        crimeB    
##                   (1)            (2)     
## -----------------------------------------
## unem             -0.029         0.009    
##                 (0.032)        (0.020)   
## llawexpc         0.203         -0.140    
##                 (0.173)        (0.109)   
## lcrmrt_1                      1.194***   
##                                (0.132)   
## Constant        3.343**         0.076    
##                 (1.251)        (0.821)   
## -----------------------------------------
## Observations       46            46      
## R2               0.057          0.680    
## Adjusted R2      0.013          0.657    
## =========================================
## Note:         *p<0.1; **p<0.05; ***p<0.01

9.5 Example 9.8. R&D Intensity and Firm Size

RD1 <-  lm(rdintens ~ sales + profmarg + 1, data=rdchem) 
RD2 <-  lm(rdintens ~ sales + profmarg + 1, data=subset(rdchem, rdchem$sales<30000))
stargazer(RD1, RD2, column.labels=c("RD1","RD2"), keep.stat=c("n","rsq", "adj.rsq"), no.space=TRUE, type="text")
## 
## =========================================
##                  Dependent variable:     
##              ----------------------------
##                        rdintens          
##                   RD1            RD2     
##                   (1)            (2)     
## -----------------------------------------
## sales            0.0001       0.0002**   
##                (0.00004)      (0.0001)   
## profmarg         0.045          0.048    
##                 (0.046)        (0.044)   
## Constant        2.625***      2.297***   
##                 (0.586)        (0.592)   
## -----------------------------------------
## Observations       32            31      
## R2               0.076          0.173    
## Adjusted R2      0.012          0.114    
## =========================================
## Note:         *p<0.1; **p<0.05; ***p<0.01

9.6 Example 9.9. R&D Intensity

lRD1 <-  lm(lrd ~ lsales + profmarg + 1, data=rdchem) 
lRD2 <-  lm(lrd ~ lsales + profmarg + 1, data=subset(rdchem, rdchem$sales<30000))
stargazer(lRD1, lRD2, column.labels=c("RD1logfn","RD2logfn"), keep.stat=c("n","rsq", "adj.rsq"), no.space=TRUE, type="text")
## 
## =========================================
##                  Dependent variable:     
##              ----------------------------
##                          lrd             
##                 RD1logfn      RD2logfn   
##                   (1)            (2)     
## -----------------------------------------
## lsales          1.084***      1.088***   
##                 (0.060)        (0.067)   
## profmarg         0.022          0.022    
##                 (0.013)        (0.013)   
## Constant       -4.378***      -4.404***  
##                 (0.468)        (0.511)   
## -----------------------------------------
## Observations       32            31      
## R2               0.918          0.904    
## Adjusted R2      0.912          0.897    
## =========================================
## Note:         *p<0.1; **p<0.05; ***p<0.01

9.7 Example 9.10. State Infant Mortality Rates

infant1 <- lm(infmort ~ lpcinc + lphysic + lpopul + 1 , data = subset(infmrt, infmrt$year==1990))
infant2 <- lm(infmort ~ lpcinc + lphysic + lpopul + 1 ,data = subset(infmrt, infmrt$year==1990 & infmrt$DC==0))
stargazer(infant1, infant2, column.labels=c("Infmort1","Infmort2"), keep.stat=c("n","rsq", "adj.rsq"), no.space=TRUE, type="text")
## 
## =========================================
##                  Dependent variable:     
##              ----------------------------
##                        infmort           
##                 Infmort1      Infmort2   
##                   (1)            (2)     
## -----------------------------------------
## lpcinc          -4.685*        -0.567    
##                 (2.604)        (1.641)   
## lphysic         4.153***      -2.742**   
##                 (1.513)        (1.191)   
## lpopul           -0.088       0.629***   
##                 (0.287)        (0.191)   
## Constant         33.859        23.955*   
##                 (20.428)      (12.419)   
## -----------------------------------------
## Observations       51            50      
## R2               0.139          0.273    
## Adjusted R2      0.084          0.226    
## =========================================
## Note:         *p<0.1; **p<0.05; ***p<0.01

.