PSID1976 {AER} | R Documentation |
Cross-section data originating from the 1976 Panel Study of Income Dynamics (PSID), based on data for the previous year, 1975.
data("PSID1976")
A data frame containing 753 observations on 22 variables.
wage > 0
or hours > 0
.)wage
. This variable is currently kept for
consistency with earlier releases. It will be removed in the near future.)participation == "yes"
, then select only those
women with non-zero wage. Only 325 women work in 1975 and have a non-zero wage in 1976.This data set is also known as the Mroz (1987) data.
Warning: typical applications using these data employ the variable
wage
aka earnings
as the dependent variable. The variable
repwage
is the reported wage in a 1976 interview, named RPWG by Greene (2003).
Online complements to Greene (2003). Table F4.1.
http://pages.stern.nyu.edu/~wgreene/Text/tables/tablelist5.htm
Greene, W.H. (2003). Econometric Analysis, 5th edition. Upper Saddle River, NJ: Prentice Hall.
McCullough, B.D. (2004). Some Details of Nonlinear Estimation. In: Altman, M., Gill, J., and McDonald, M.P.: Numerical Issues in Statistical Computing for the Social Scientist. Hoboken, NJ: John Wiley, Ch. 8, 199–218.
Mroz, T.A. (1987). The Sensitivity of an Empirical Model of Married Women's Hours of Work to Economic and Statistical Assumptions. Econometrica, 55, 765–799.
Wooldridge, J.M. (2002). Econometric Analysis of Cross-Section and Panel Data. Cambridge, MA: MIT Press.
## data and transformations data("PSID1976") PSID1976$kids <- with(PSID1976, factor((youngkids + oldkids) > 0, levels = c(FALSE, TRUE), labels = c("no", "yes"))) PSID1976$nwincome <- with(PSID1976, (fincome - hours * wage)/1000) PSID1976$partnum <- as.numeric(PSID1976$participation) - 1 ################### ## Greene (2003) ## ################### ## Example 4.1, Table 4.2 ## (reproduced in Example 7.1, Table 7.1) gr_lm <- lm(log(hours * wage) ~ age + I(age^2) + education + kids, data = PSID1976, subset = participation == "yes") summary(gr_lm) vcov(gr_lm) ## Example 4.5 summary(gr_lm) ## or equivalently gr_lm1 <- lm(log(hours * wage) ~ 1, data = PSID1976, subset = participation == "yes") anova(gr_lm1, gr_lm) ## Example 21.4, p. 681 gr_probit1 <- glm(participation ~ age + I(age^2) + fincome + education + kids, data = PSID1976, family = binomial(link = "probit") ) gr_probit2 <- glm(participation ~ age + I(age^2) + fincome + education, data = PSID1976, family = binomial(link = "probit")) gr_probit3 <- glm(participation ~ kids/(age + I(age^2) + fincome + education), data = PSID1976, family = binomial(link = "probit")) ## LR test of all coefficients lrtest(gr_probit1) ## Chow-type test lrtest(gr_probit2, gr_probit3) ## equivalently: anova(gr_probit2, gr_probit3, test = "Chisq") ## Table 21.3 summary(gr_probit1) ## Example 22.8, Table 22.7, p. 786 library("sampleSelection") gr_2step <- selection(participation ~ age + I(age^2) + fincome + education + kids, wage ~ experience + I(experience^2) + education + city, data = PSID1976, method = "2step") gr_ml <- selection(participation ~ age + I(age^2) + fincome + education + kids, wage ~ experience + I(experience^2) + education + city, data = PSID1976, method = "ml") gr_ols <- lm(wage ~ experience + I(experience^2) + education + city, data = PSID1976, subset = participation == "yes") ## NOTE: ML estimates agree with Greene, 5e errata. ## Standard errors are based on the Hessian (here), while Greene has BHHH/OPG. ####################### ## Wooldridge (2002) ## ####################### ## Table 15.1, p. 468 wl_lpm <- lm(partnum ~ nwincome + education + experience + I(experience^2) + age + youngkids + oldkids, data = PSID1976) wl_logit <- glm(participation ~ nwincome + education + experience + I(experience^2) + age + youngkids + oldkids, family = binomial, data = PSID1976) wl_probit <- glm(participation ~ nwincome + education + experience + I(experience^2) + age + youngkids + oldkids, family = binomial(link = "probit"), data = PSID1976) ## (same as Altman et al.) ## convenience functions pseudoR2 <- function(obj) 1 - as.vector(logLik(obj)/logLik(update(obj, . ~ 1))) misclass <- function(obj) 1 - sum(diag(prop.table(table( model.response(model.frame(obj)), round(fitted(obj)))))) coeftest(wl_logit) logLik(wl_logit) misclass(wl_logit) pseudoR2(wl_logit) coeftest(wl_probit) logLik(wl_probit) misclass(wl_probit) pseudoR2(wl_probit) ## Table 16.2, p. 528 form <- hours ~ nwincome + education + experience + I(experience^2) + age + youngkids + oldkids wl_ols <- lm(form, data = PSID1976) wl_tobit <- tobit(form, data = PSID1976) summary(wl_ols) summary(wl_tobit) ####################### ## McCullough (2004) ## ####################### ## p. 203 mc_probit <- glm(participation ~ nwincome + education + experience + I(experience^2) + age + youngkids + oldkids, family = binomial(link = "probit"), data = PSID1976) mc_tobit <- tobit(hours ~ nwincome + education + experience + I(experience^2) + age + youngkids + oldkids, data = PSID1976) coeftest(mc_probit) coeftest(mc_tobit) coeftest(mc_tobit, vcov = vcovOPG)