Python for Introductory Econometrics: Chap 14

# Python for Introductory Econometrics¶

## Chapter 14. Advanced Panel Data Methods¶

#### https://www.solomonegash.com/¶

In [1]:
import numpy as np
import pandas as pd
import scipy as sp
import scipy.stats as ss

import statsmodels
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.iolib.summary2 import summary_col

from wooldridge import *


### Example 14.1. Effect of Job Training on Firm Scrap Rates¶

In [2]:
df = dataWoo("jtrain")
dfp= pd.DataFrame(df.set_index(['fcode', 'year'], inplace=True))
from linearmodels import PanelOLS
fe1 = PanelOLS.from_formula('lscrap ~1 + d88 + d89 + grant + grant_1 + EntityEffects', data=df).fit()
print(fe1)

                          PanelOLS Estimation Summary
================================================================================
Dep. Variable:                 lscrap   R-squared:                        0.2010
Estimator:                   PanelOLS   R-squared (Between):             -0.0177
No. Observations:                 162   R-squared (Within):               0.2010
Date:                Fri, Apr 24 2020   R-squared (Overall):              0.0021
Time:                        21:36:45   Log-likelihood                   -80.946
F-statistic:                      6.5426
Entities:                          54   P-value                           0.0001
Avg Obs:                       3.0000   Distribution:                   F(4,104)
Min Obs:                       3.0000
Max Obs:                       3.0000   F-statistic (robust):             6.5426
P-value                           0.0001
Time periods:                       3   Distribution:                   F(4,104)
Avg Obs:                       54.000
Min Obs:                       54.000
Max Obs:                       54.000

Parameter Estimates
==============================================================================
Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
Intercept      0.5974     0.0677     8.8202     0.0000      0.4631      0.7318
d88           -0.0802     0.1095    -0.7327     0.4654     -0.2973      0.1369
d89           -0.2472     0.1332    -1.8556     0.0663     -0.5114      0.0170
grant         -0.2523     0.1506    -1.6751     0.0969     -0.5510      0.0464
grant_1       -0.4216     0.2102    -2.0057     0.0475     -0.8384     -0.0048
==============================================================================

F-test for Poolability: 24.661
P-value: 0.0000
Distribution: F(53,104)

Included effects: Entity

In [3]:
fe2 = PanelOLS.from_formula('lscrap ~1 + d88 + d89 + grant + EntityEffects', data=df).fit()
print(fe2)

                          PanelOLS Estimation Summary
================================================================================
Dep. Variable:                 lscrap   R-squared:                        0.1701
Estimator:                   PanelOLS   R-squared (Between):             -0.0028
No. Observations:                 162   R-squared (Within):               0.1701
Date:                Fri, Apr 24 2020   R-squared (Overall):              0.0129
Time:                        21:36:45   Log-likelihood                   -84.020
F-statistic:                      7.1760
Entities:                          54   P-value                           0.0002
Avg Obs:                       3.0000   Distribution:                   F(3,105)
Min Obs:                       3.0000
Max Obs:                       3.0000   F-statistic (robust):             7.1760
P-value                           0.0002
Time periods:                       3   Distribution:                   F(3,105)
Avg Obs:                       54.000
Min Obs:                       54.000
Max Obs:                       54.000

Parameter Estimates
==============================================================================
Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
Intercept      0.5974     0.0687     8.6960     0.0000      0.4612      0.7337
d88           -0.1401     0.1068    -1.3110     0.1927     -0.3519      0.0718
d89           -0.4270     0.0999    -4.2732     0.0000     -0.6252     -0.2289
grant         -0.0822     0.1263    -0.6511     0.5164     -0.3326      0.1681
==============================================================================

F-test for Poolability: 23.900
P-value: 0.0000
Distribution: F(53,105)

Included effects: Entity


In [4]:
df = dataWoo("wagepan")
year = pd.Categorical(df.year)
dfp= pd.DataFrame(df.set_index(['nr','year'], inplace=True))
df['year'] = year
fe1 = PanelOLS.from_formula('lwage ~ 1 + union + married + year*educ + EntityEffects', data=df, drop_absorbed=True).fit()
print(fe1)

                          PanelOLS Estimation Summary
================================================================================
Dep. Variable:                  lwage   R-squared:                        0.1708
Estimator:                   PanelOLS   R-squared (Between):              0.0905
No. Observations:                4360   R-squared (Within):               0.1708
Date:                Fri, Apr 24 2020   R-squared (Overall):              0.1277
Time:                        21:36:46   Log-likelihood                   -1350.7
F-statistic:                      48.907
Entities:                         545   P-value                           0.0000
Avg Obs:                       8.0000   Distribution:                 F(16,3799)
Min Obs:                       8.0000
Max Obs:                       8.0000   F-statistic (robust):             48.907
P-value                           0.0000
Time periods:                       8   Distribution:                 F(16,3799)
Avg Obs:                       545.00
Min Obs:                       545.00
Max Obs:                       545.00

Parameter Estimates
=====================================================================================
Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
-------------------------------------------------------------------------------------
Intercept             1.3625     0.0162     83.903     0.0000      1.3306      1.3943
year[T.1981]         -0.0224     0.1459    -0.1537     0.8779     -0.3084      0.2636
year[T.1982]         -0.0058     0.1459    -0.0395     0.9685     -0.2917      0.2802
year[T.1983]          0.0104     0.1459     0.0715     0.9430     -0.2755      0.2964
year[T.1984]          0.0844     0.1459     0.5785     0.5630     -0.2016      0.3703
year[T.1985]          0.0497     0.1459     0.3409     0.7332     -0.2362      0.3357
year[T.1986]          0.0656     0.1459     0.4497     0.6530     -0.2204      0.3516
year[T.1987]          0.0904     0.1459     0.6201     0.5352     -0.1955      0.3764
union                 0.0830     0.0194     4.2671     0.0000      0.0449      0.1211
married               0.0548     0.0184     2.9773     0.0029      0.0187      0.0909
year[T.1981]:educ     0.0116     0.0123     0.9448     0.3448     -0.0125      0.0356
year[T.1982]:educ     0.0148     0.0123     1.2061     0.2279     -0.0093      0.0388
year[T.1983]:educ     0.0171     0.0123     1.3959     0.1628     -0.0069      0.0412
year[T.1984]:educ     0.0166     0.0123     1.3521     0.1764     -0.0075      0.0406
year[T.1985]:educ     0.0237     0.0123     1.9316     0.0535     -0.0004      0.0478
year[T.1986]:educ     0.0274     0.0123     2.2334     0.0256      0.0033      0.0515
year[T.1987]:educ     0.0304     0.0123     2.4798     0.0132      0.0064      0.0545
=====================================================================================

F-test for Poolability: 8.0932
P-value: 0.0000
Distribution: F(544,3799)

Included effects: Entity


### Example 14.3.Effect of Job Training on Firm Scrap Rates¶

In [5]:
df = dataWoo("jtrain")
dfp= pd.DataFrame(df.set_index(['fcode', 'year'], inplace=True))
from linearmodels import PanelOLS
fe1 = PanelOLS.from_formula('lscrap ~1 + d88 + d89 + grant + grant_1 + lsales + lemploy + EntityEffects', data=df).fit()
print(fe1)

                          PanelOLS Estimation Summary
================================================================================
Dep. Variable:                 lscrap   R-squared:                        0.2131
Estimator:                   PanelOLS   R-squared (Between):             -0.0797
No. Observations:                 148   R-squared (Within):               0.2131
Date:                Fri, Apr 24 2020   R-squared (Overall):             -0.0494
Time:                        21:36:46   Log-likelihood                   -68.887
F-statistic:                      4.1063
Entities:                          51   P-value                           0.0011
Avg Obs:                       2.9020   Distribution:                    F(6,91)
Min Obs:                       1.0000
Max Obs:                       3.0000   F-statistic (robust):             4.1063
P-value                           0.0011
Time periods:                       3   Distribution:                    F(6,91)
Avg Obs:                       49.333
Min Obs:                       47.000
Max Obs:                       51.000

Parameter Estimates
==============================================================================
Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
Intercept      2.1155     3.1084     0.6806     0.4979     -4.0590      8.2900
d88           -0.0040     0.1195    -0.0331     0.9736     -0.2414      0.2335
d89           -0.1322     0.1537    -0.8601     0.3920     -0.4375      0.1731
grant         -0.2968     0.1571    -1.8891     0.0621     -0.6088      0.0153
grant_1       -0.5356     0.2242    -2.3888     0.0190     -0.9809     -0.0902
lsales        -0.0869     0.2597    -0.3345     0.7388     -0.6027      0.4290
lemploy       -0.0764     0.3503    -0.2180     0.8279     -0.7722      0.6194
==============================================================================

F-test for Poolability: 20.748
P-value: 0.0000
Distribution: F(50,91)

Included effects: Entity


### Example 14.4. A Wage Equation Using Panel Data¶

In [6]:
df = dataWoo("wagepan")
year = pd.Categorical(df.year)
dfp= pd.DataFrame(df.set_index(['nr','year'], inplace=True))
df['year'] = year
FE = PanelOLS.from_formula('lwage ~ 1 + educ + black + hisp + exper + expersq + married + union + year + EntityEffects', data=df, drop_absorbed=True).fit()
from linearmodels import PooledOLS
POLS = PooledOLS.from_formula('lwage ~ 1 + educ + black + hisp + exper + expersq + married + union + year', data=df).fit()
from linearmodels import RandomEffects
RE = RandomEffects.from_formula('lwage ~ 1 + educ + black + hisp + exper + expersq + married + union + year', data=df).fit()

from linearmodels.panel import compare
print(compare({'Pooled':POLS, 'RE':RE, 'FE':FE}))

                            Model Comparison
=======================================================================
Pooled                RE             FE
-----------------------------------------------------------------------
Dep. Variable                    lwage             lwage          lwage
Estimator                    PooledOLS     RandomEffects       PanelOLS
No. Observations                  4360              4360           4360
R-squared                       0.1893            0.1806         0.1806
R-Squared (Within)              0.1692            0.1799         0.1806
R-Squared (Between)             0.2066            0.1853        -0.0052
R-Squared (Overall)             0.1893            0.1828         0.0807
F-statistic                     72.459            68.409         83.851
P-value (F-stat)                0.0000            0.0000         0.0000
=====================     ============   ===============   ============
Intercept                       0.0921            0.0234         1.4260
(1.1761)          (0.1546)       (77.748)
year[T.1981]                    0.0583            0.0404         0.1512
(1.9214)          (1.6362)       (6.8883)
year[T.1982]                    0.0628            0.0309         0.2530
(1.8900)          (0.9519)       (10.360)
year[T.1983]                    0.0620            0.0202         0.3544
(1.6915)          (0.4840)       (12.121)
year[T.1984]                    0.0905            0.0430         0.4901
(2.2566)          (0.8350)       (13.529)
year[T.1985]                    0.1092            0.0577         0.6175
(2.5200)          (0.9383)       (13.648)
year[T.1986]                    0.1420            0.0918         0.7655
(3.0580)          (1.2834)       (13.638)
year[T.1987]                    0.1738            0.1348         0.9250
(3.5165)          (1.6504)       (13.450)
educ                            0.0913            0.0919
(17.442)          (8.5744)
black                          -0.1392           -0.1394
(-5.9049)         (-2.9054)
hisp                            0.0160            0.0217
(0.7703)          (0.5078)
exper                           0.0672            0.1058
(4.9095)          (6.8706)
expersq                        -0.0024           -0.0047        -0.0052
(-2.9413)         (-6.8623)      (-7.3612)
married                         0.1083            0.0638         0.0467
(6.8997)          (3.8035)       (2.5494)
union                           0.1825            0.1059         0.0800
(10.635)          (5.9289)       (4.1430)
======================= ============== ================= ==============
Effects                                                          Entity
-----------------------------------------------------------------------

T-stats reported in parentheses