Merge pull request #6 from TARGENE/fix_type_issues

update testset
TARGENE · May 8, 2023 · 3e83250 · 3e83250
2 parents d32d5ac + 2d1635c
commit 3e83250
Show file tree

Hide file tree

Showing 3 changed files with 20 additions and 10 deletions.
diff --git a/src/fields_processing.jl b/src/fields_processing.jl
@@ -88,6 +88,7 @@ process_custom(dataset, fields_entry) =
 """
 function process_ordinal(dataset, fields_entry)
     field = only_one_field(fields_entry["fields"])
+    output_df = DataFrame()
     for phenotype_entry in fields_entry["phenotypes"]
         operation = !haskey(phenotype_entry, "operation") ? "first" : phenotype_entry["operation"] 
         if operation == "first"
@@ -99,18 +100,20 @@ function process_ordinal(dataset, fields_entry)
                     output[index] = val
                 end
             end
-            return DataFrame([Symbol(phenotype_entry["name"]) => output])
+            output_df[!, Symbol(phenotype_entry["name"])] = output
         else
             throw(ArgumentError("Only `first` operation supported for now."))
         end
     end
+    return output_df
 end
 
 """
     process_continuous(dataset, fields_entry)
 """
 function process_continuous(dataset, fields_entry)
     field = only_one_field(fields_entry["fields"])
+    output_df = DataFrame()
     for phenotype_entry in fields_entry["phenotypes"]
         operation = !haskey(phenotype_entry, "operation") ? "first" : phenotype_entry["operation"] 
         if operation == "first"
@@ -122,35 +125,37 @@ function process_continuous(dataset, fields_entry)
                     output[index] = val
                 end
             end
-            return DataFrame([Symbol(phenotype_entry["name"]) => output])
+            output_df[!, Symbol(phenotype_entry["name"])] = output
         else
             throw(ArgumentError("Only `first` operation supported for now."))
         end
     end
+    return output_df
 end
 
 """
     process_integer(dataset, fields_entry)
 """
 function process_integer(dataset, fields_entry)
     field = only_one_field(fields_entry["fields"])
+    output_df = DataFrame()
     for phenotype_entry in fields_entry["phenotypes"]
         operation = !haskey(phenotype_entry, "operation") ? "first" : phenotype_entry["operation"] 
         if operation == "first"
-            column = dataset[!, Symbol(field, "-0.0")]
-            return DataFrame([Symbol(phenotype_entry["name"]) => column])
+            output_df[!, Symbol(phenotype_entry["name"])] = dataset[!, Symbol(field, "-0.0")]
         else
             throw(ArgumentError("Only `first` operation supported for now."))
         end
     end
-
+    return output_df
 end
 
 """
     process_categorical(dataset, fields_entry)
 """
 function process_categorical(dataset, fields_entry)
     field = only_one_field(fields_entry["fields"])
+    output_df = DataFrame()
     for phenotype_entry in fields_entry["phenotypes"]
         operation = !haskey(phenotype_entry, "operation") ? "first" : phenotype_entry["operation"] 
         if operation == "first"
@@ -170,12 +175,13 @@ function process_categorical(dataset, fields_entry)
                         codings_output[index] = output[index] ∈ codings
                     end
                 end
-                return DataFrame([Symbol(phenotype_entry["name"]) => codings_output])
+                output_df[!, Symbol(phenotype_entry["name"])] = codings_output
             else
-                return DataFrame([Symbol(phenotype_entry["name"]) => output])
+                output_df[!, Symbol(phenotype_entry["name"])] = output
             end
         else
             throw(ArgumentError("Only `first` operation supported for now."))
         end
     end
+    return output_df
 end
diff --git a/test/config/config_with_custom_fields.yaml b/test/config/config_with_custom_fields.yaml
@@ -62,7 +62,10 @@ traits:
 
   - fields: 21000
     phenotypes:
-      - name: ethnicity
+      - name: ethnicity_1001
+        codings: [1001]
+      - name: ethnicity_3002
+        codings: [3002]
 
   - fields: 22001
     phenotypes:

diff --git a/test/datasets_extraction.jl b/test/datasets_extraction.jl
@@ -28,7 +28,7 @@ end
 
     filter_and_extract(parsed_args)
     traits = CSV.read(parsed_args["out"], DataFrame)
-    @test size(traits) == (10, 23)
+    @test size(traits) == (10, 24)
 
     # Custom fields are simply pushed forward
     @test traits.dummyfield == [2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
@@ -82,7 +82,8 @@ end
         [79.5, 82.61, 81.0, 78.8, 83.16, 73.7, 81.72, 84.0, 75.34, missing]
     )
 
-    @test traits[!, "ethnicity"] == [1001, 2, 3002, 6, 1001, 1001, 1001, 1001, 1001, 4001]
+    @test traits[!, "ethnicity_1001"] == [1, 0, 0, 0, 1, 1, 1, 1, 1, 0]
+    @test traits[!, "ethnicity_3002"] == [0, 0, 1, 0, 0, 0, 0, 0, 0, 0]
 
     test_column_with_missing(
         traits[!, "genetic sex"],