diff --git a/src/fields_processing.jl b/src/fields_processing.jl index 37db178..e890074 100644 --- a/src/fields_processing.jl +++ b/src/fields_processing.jl @@ -88,6 +88,7 @@ process_custom(dataset, fields_entry) = """ function process_ordinal(dataset, fields_entry) field = only_one_field(fields_entry["fields"]) + output_df = DataFrame() for phenotype_entry in fields_entry["phenotypes"] operation = !haskey(phenotype_entry, "operation") ? "first" : phenotype_entry["operation"] if operation == "first" @@ -99,11 +100,12 @@ function process_ordinal(dataset, fields_entry) output[index] = val end end - return DataFrame([Symbol(phenotype_entry["name"]) => output]) + output_df[!, Symbol(phenotype_entry["name"])] = output else throw(ArgumentError("Only `first` operation supported for now.")) end end + return output_df end """ @@ -111,6 +113,7 @@ end """ function process_continuous(dataset, fields_entry) field = only_one_field(fields_entry["fields"]) + output_df = DataFrame() for phenotype_entry in fields_entry["phenotypes"] operation = !haskey(phenotype_entry, "operation") ? "first" : phenotype_entry["operation"] if operation == "first" @@ -122,11 +125,12 @@ function process_continuous(dataset, fields_entry) output[index] = val end end - return DataFrame([Symbol(phenotype_entry["name"]) => output]) + output_df[!, Symbol(phenotype_entry["name"])] = output else throw(ArgumentError("Only `first` operation supported for now.")) end end + return output_df end """ @@ -134,16 +138,16 @@ end """ function process_integer(dataset, fields_entry) field = only_one_field(fields_entry["fields"]) + output_df = DataFrame() for phenotype_entry in fields_entry["phenotypes"] operation = !haskey(phenotype_entry, "operation") ? "first" : phenotype_entry["operation"] if operation == "first" - column = dataset[!, Symbol(field, "-0.0")] - return DataFrame([Symbol(phenotype_entry["name"]) => column]) + output_df[!, Symbol(phenotype_entry["name"])] = dataset[!, Symbol(field, "-0.0")] else throw(ArgumentError("Only `first` operation supported for now.")) end end - + return output_df end """ @@ -151,6 +155,7 @@ end """ function process_categorical(dataset, fields_entry) field = only_one_field(fields_entry["fields"]) + output_df = DataFrame() for phenotype_entry in fields_entry["phenotypes"] operation = !haskey(phenotype_entry, "operation") ? "first" : phenotype_entry["operation"] if operation == "first" @@ -170,12 +175,13 @@ function process_categorical(dataset, fields_entry) codings_output[index] = output[index] ∈ codings end end - return DataFrame([Symbol(phenotype_entry["name"]) => codings_output]) + output_df[!, Symbol(phenotype_entry["name"])] = codings_output else - return DataFrame([Symbol(phenotype_entry["name"]) => output]) + output_df[!, Symbol(phenotype_entry["name"])] = output end else throw(ArgumentError("Only `first` operation supported for now.")) end end + return output_df end diff --git a/test/config/config_with_custom_fields.yaml b/test/config/config_with_custom_fields.yaml index b37a78b..7d052f2 100644 --- a/test/config/config_with_custom_fields.yaml +++ b/test/config/config_with_custom_fields.yaml @@ -62,7 +62,10 @@ traits: - fields: 21000 phenotypes: - - name: ethnicity + - name: ethnicity_1001 + codings: [1001] + - name: ethnicity_3002 + codings: [3002] - fields: 22001 phenotypes: diff --git a/test/datasets_extraction.jl b/test/datasets_extraction.jl index 9e699e7..29fcaed 100644 --- a/test/datasets_extraction.jl +++ b/test/datasets_extraction.jl @@ -28,7 +28,7 @@ end filter_and_extract(parsed_args) traits = CSV.read(parsed_args["out"], DataFrame) - @test size(traits) == (10, 23) + @test size(traits) == (10, 24) # Custom fields are simply pushed forward @test traits.dummyfield == [2, 2, 2, 2, 2, 2, 2, 2, 2, 2] @@ -82,7 +82,8 @@ end [79.5, 82.61, 81.0, 78.8, 83.16, 73.7, 81.72, 84.0, 75.34, missing] ) - @test traits[!, "ethnicity"] == [1001, 2, 3002, 6, 1001, 1001, 1001, 1001, 1001, 4001] + @test traits[!, "ethnicity_1001"] == [1, 0, 0, 0, 1, 1, 1, 1, 1, 0] + @test traits[!, "ethnicity_3002"] == [0, 0, 1, 0, 0, 0, 0, 0, 0, 0] test_column_with_missing( traits[!, "genetic sex"],