Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Psid all year ind var #53

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 36 additions & 3 deletions R/build.panel.r
Original file line number Diff line number Diff line change
Expand Up @@ -367,7 +367,7 @@ build.panel <- function(datadir=NULL,fam.vars,ind.vars=NULL,heads.only=FALSE,cur
flog.info('psidR: currently working on data for year %d',years[iy])


# keeping only relevant columns from individual file
# keeping only relevant columns from individual file
# subset only if requested.
curr <- ids[list(years[iy])]
if (years[iy] == 1968){
Expand All @@ -391,7 +391,7 @@ build.panel <- function(datadir=NULL,fam.vars,ind.vars=NULL,heads.only=FALSE,cur

# issue https://github.com/floswald/psidR/issues/4
# ------------------------------------------------
# check for NA in ind.vars: these are years when a certain variable isn not available in the individual index file.
# check for NA in ind.vars: these are years when a certain variable is not available in the individual index file.
# adjust for first year (1968) when `sequence` was not available
ind.notnas <- NULL
if (any(is.na(ind.vars.yr))){
Expand Down Expand Up @@ -786,11 +786,15 @@ build.psid <- function(datadr="~/datasets/psid/",small=TRUE){
#' getNamesPSID("ER17013", cwf, years = NULL)
#' getNamesPSID("ER17013", cwf, years = c(2005, 2007, 2009))
getNamesPSID <- function(aname, cwf, years = NULL,file = NULL){

# find the given var name in cwf
myvar <- which(cwf == aname, arr.ind=TRUE)
## variables that begin with Y

# find the year cols in cwf (those with names that begin with Y)
ynames.all <- grep("^Y", colnames(cwf))
ynames.labs <- grep("^Y", colnames(cwf),value = TRUE)

# retrieve the year-specific var names from cwf
if (is.null(years)){
yearkeep <- ynames.all
} else {
Expand All @@ -801,6 +805,35 @@ getNamesPSID <- function(aname, cwf, years = NULL,file = NULL){
}
ovalue <- transpose(cwf[myvar[1], yearkeep, drop = FALSE])
od = data.frame(year = ynames.labs, variable = ovalue$V1)

# a quick and dirty fix here
#--------------------------------------------------------------------------------------------------
# The psid-cross-year-index crosswalk (cwf) has an issue with those all-year-individual-level vars
# (e.g. SEX OF INDIVIDUAL ER32000), even though they are in fact included in every waves of the survey, in
# this crosswalk, they are only put in the column corresponding with the most recent year.
#
# Since the psid-cross-year-index crosswalk is what we for reference here, when we call this
# function with these all year vars, we would get some empty cells.
#
# As a solution, we would check if the given var is in the following all-year-individual-level var list,
# and fill in the blanks if needed.
#--------------------------------------------------------------------------------------------------
all_year_ind_var_li <- c('ER31987', 'ER31988', 'ER31989', 'ER31990', 'ER31991', 'ER31992',
'ER31993', 'ER31994', 'ER31995', 'ER31996', 'ER31997', 'ER32000',
'ER32001', 'ER32002', 'ER32003', 'ER32004', 'ER32005', 'ER32006',
'ER32007', 'ER32008', 'ER32009', 'ER32010', 'ER32011', 'ER32012',
'ER32013', 'ER32014', 'ER32015', 'ER32016', 'ER32017', 'ER32018',
'ER32019', 'ER32020', 'ER32021', 'ER32022', 'ER32023', 'ER32024',
'ER32025', 'ER32026', 'ER32027', 'ER32028', 'ER32029', 'ER32030',
'ER32031', 'ER32032', 'ER32033', 'ER32034', 'ER32035', 'ER32036',
'ER32037', 'ER32038', 'ER32039', 'ER32040', 'ER32041', 'ER32042',
'ER32043', 'ER32044', 'ER32045', 'ER32046', 'ER32047', 'ER32048',
'ER32049', 'ER32050', 'ER32051')
if (is.element(aname, all_year_ind_var_li)){
od$variable = aname
}

# write to specified file
if (!is.null(file)){
write.table(od,file = file,row.names = FALSE)
}
Expand Down