From 67dfc8ff97b4566e3b2036f9cc92285d6af51ae9 Mon Sep 17 00:00:00 2001 From: youran wu Date: Thu, 5 May 2022 21:42:13 -0400 Subject: [PATCH 1/2] more comments --- R/build.panel.r | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/R/build.panel.r b/R/build.panel.r index 11ab5dc..7ba3024 100644 --- a/R/build.panel.r +++ b/R/build.panel.r @@ -367,7 +367,7 @@ build.panel <- function(datadir=NULL,fam.vars,ind.vars=NULL,heads.only=FALSE,cur flog.info('psidR: currently working on data for year %d',years[iy]) - # keeping only relevant columns from individual file + # keeping only relevant columns from individual file # subset only if requested. curr <- ids[list(years[iy])] if (years[iy] == 1968){ @@ -391,7 +391,7 @@ build.panel <- function(datadir=NULL,fam.vars,ind.vars=NULL,heads.only=FALSE,cur # issue https://github.com/floswald/psidR/issues/4 # ------------------------------------------------ - # check for NA in ind.vars: these are years when a certain variable isn not available in the individual index file. + # check for NA in ind.vars: these are years when a certain variable is not available in the individual index file. # adjust for first year (1968) when `sequence` was not available ind.notnas <- NULL if (any(is.na(ind.vars.yr))){ @@ -786,11 +786,15 @@ build.psid <- function(datadr="~/datasets/psid/",small=TRUE){ #' getNamesPSID("ER17013", cwf, years = NULL) #' getNamesPSID("ER17013", cwf, years = c(2005, 2007, 2009)) getNamesPSID <- function(aname, cwf, years = NULL,file = NULL){ + + # find the given var name in cwf myvar <- which(cwf == aname, arr.ind=TRUE) - ## variables that begin with Y + + # find the year cols in cwf (those with names that begin with Y) ynames.all <- grep("^Y", colnames(cwf)) ynames.labs <- grep("^Y", colnames(cwf),value = TRUE) + # retrieve the year-specific var names from cwf if (is.null(years)){ yearkeep <- ynames.all } else { From 2fa24730a2cf47fb87a60e6ee8f4974ff9ee0400 Mon Sep 17 00:00:00 2001 From: youran wu Date: Thu, 5 May 2022 21:42:46 -0400 Subject: [PATCH 2/2] fixed the all-year individual-level var issue --- R/build.panel.r | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/R/build.panel.r b/R/build.panel.r index 7ba3024..bac0123 100644 --- a/R/build.panel.r +++ b/R/build.panel.r @@ -805,6 +805,35 @@ getNamesPSID <- function(aname, cwf, years = NULL,file = NULL){ } ovalue <- transpose(cwf[myvar[1], yearkeep, drop = FALSE]) od = data.frame(year = ynames.labs, variable = ovalue$V1) + + # a quick and dirty fix here + #-------------------------------------------------------------------------------------------------- + # The psid-cross-year-index crosswalk (cwf) has an issue with those all-year-individual-level vars + # (e.g. SEX OF INDIVIDUAL ER32000), even though they are in fact included in every waves of the survey, in + # this crosswalk, they are only put in the column corresponding with the most recent year. + # + # Since the psid-cross-year-index crosswalk is what we for reference here, when we call this + # function with these all year vars, we would get some empty cells. + # + # As a solution, we would check if the given var is in the following all-year-individual-level var list, + # and fill in the blanks if needed. + #-------------------------------------------------------------------------------------------------- + all_year_ind_var_li <- c('ER31987', 'ER31988', 'ER31989', 'ER31990', 'ER31991', 'ER31992', + 'ER31993', 'ER31994', 'ER31995', 'ER31996', 'ER31997', 'ER32000', + 'ER32001', 'ER32002', 'ER32003', 'ER32004', 'ER32005', 'ER32006', + 'ER32007', 'ER32008', 'ER32009', 'ER32010', 'ER32011', 'ER32012', + 'ER32013', 'ER32014', 'ER32015', 'ER32016', 'ER32017', 'ER32018', + 'ER32019', 'ER32020', 'ER32021', 'ER32022', 'ER32023', 'ER32024', + 'ER32025', 'ER32026', 'ER32027', 'ER32028', 'ER32029', 'ER32030', + 'ER32031', 'ER32032', 'ER32033', 'ER32034', 'ER32035', 'ER32036', + 'ER32037', 'ER32038', 'ER32039', 'ER32040', 'ER32041', 'ER32042', + 'ER32043', 'ER32044', 'ER32045', 'ER32046', 'ER32047', 'ER32048', + 'ER32049', 'ER32050', 'ER32051') + if (is.element(aname, all_year_ind_var_li)){ + od$variable = aname + } + + # write to specified file if (!is.null(file)){ write.table(od,file = file,row.names = FALSE) }