Big-Life-Lab · rvyuha · Apr 20, 2021 · Apr 27, 2021 · May 4, 2021 · Jun 1, 2021
diff --git a/Targets_implementation.Rmd b/Targets_implementation.Rmd
@@ -0,0 +1,150 @@
+---
+title: "Targets_implementation"
+author: "Rostyslav Vyuha"
+date: "April 14, 2021"
+output: html_document
+---
+
+```{r setup, include=FALSE}
+knitr::opts_chunk$set(echo = TRUE)
+```
+
+# Main Export Functions
+
+### Verify Targets
+
+The purpose of this function would be to verify the correct order names and arguments in the passed targets.
+This is done by comparing it against the read in modules
+
+  The function would contain 2 arguments **targets_source** and **modules_path**.
+The **targets_source** would either be the tar_target list or in most cases the path to the _targets.R
+The **modules_path** would be the file path to the modules_map.csv
+
+This function would not make any changes to the _targets.R file or the list itself
+and would simply output warnings and a boolean representing the validity of the targets with the module.
+
+#### List of warnings
+- error when a module step is missing
+- error when module steps are out of order
+- error when module step contains wrong arguments
+
+#### Example function usage
+
+```{r, echo=FALSE}
+verify_targets(targets_source = "/assets/specs/targets/depression_targets.R", modules_path = "/assets/specs/targets/modules_map.csv")
+```
+This returns TRUE if depression_targets.R contains everything inside modules_map and in correct order with correct arguments
+
+### Run bllflow Targets
+
+This function would be responsible for running the targets with arguments filled in by the bllflow object.
+
+Excluding the arguments mentioned above this function would contain 2 arguments **targets_source** and **bllflow_object**.
+The **targets_source** would be identical to the one to verify targets.
+The **bllflow_object** would be the bllflow object created upon config initialization with mandatory checks for modules.csv and variables.csv as well as a present working_data.
+
+The function would first run verify targets to confirm correct order and presence of steps. Then it would modify the tar_targets arguments to reflect their true value rather then the shorthand (roles).
+Once the tar_targets were modified accordingly the _targets.R file is written and tar_make() is executed, letting targets handle the returns and the pipeline
+
+#### Example function usage
+
+```{r, echo=FALSE}
+run_bllflow_targets(targets_source = "/assets/specs/targets/depression_targets.R", bllflow_object = hui_object)
+```
+
+This would create a _targets.R in base package directory using the targets found at targets_source.
+It would essentially be a copy and paste except for the Special Arguments which would be populated using the bllflow_object
+
+### Create _targets tepmlate
+
+This function would be responsible for creating the basic bllflow_targets.R file which would only be populated by the steps in modules.csv
+
+The function would once again contain only 2 arguments **target_path** and **modules_path**
+
+The function would utilize the shorthand(roles) notation when writing the functions for ease of use for the analyst
+
+#### Example function usage
+
+```{r, echo=FALSE}
+create_targets_tepmlate(target_path = "/assets/specs/targets/depression_targets.R", modules_path = "/assets/specs/targets/modules_map.csv")
+```
+
+This would create a barebones depression_targets.R with only things found in the passed modules
+
+### Create _targets list
+
+This functions would be responsible for creating a list containing tar_target objects.
+
+The function would accept 1 mandatory arguments **modules_path** and one optional argument **target_path**.
+If a **target_path** is supplied the existing tar_targets list is read in and appended and verified before being returned, if no **target_path** is supplied a barebone template type list is created from the modules_map
+
+#### Example function usage
+
+```{r, echo=FALSE}
+create_targets_list(modules_path = "/assets/specs/targets/modules_map.csv")
+```
+
+This would create a barebones tar_targets list with only things found in the passed modules
+
+# Contents of modules.csv
+
+### Step_id
+
+The step_id column must contain a unique identifier for the step being performed.
+
+### Step_function
+
+The step_function column contains the name of the function being performed in this step. This must match a function name present in the environment during execution.
+
+### Step_argument_name
+
+The step_argument_name as the name implies contains the name of a single argument
+
+### step_argument_value
+
+The step_argument_value contains the value for a single argument that matches the name in step_argument_name
+
+#### Special Arguments
+
+*role* This would search for variables matching the role in variables.csv and be replaced with vecor of var names during run time.
+*data* This would pass the object attached to the bllflow object inside the data list ie: bllflow$data[[<whats inside data>]], alternatively it can be a reference to data generated by a previous step.
+*formula* This would create a left side = right side formula ie: formula[role["outcome"], role["predictor"], sep = "+"] would result in "outcome1 + outcome2 + outcome3 ~ predictor1 + predictor2 + predictor3"
+
+### Step_description
+
+This column contains the step description this is used to populate comments in template creation function. It should contain a helpful description of what this step is responsible for.
+
+### Step_order
+
+This column contains the order in which steps should be executed
+
+# Contents of modules_map
+
+### Module_Name
+
+The name of the module being included
+
+### Module_Path
+
+The relative path to the module being loaded
+
+### Module_description
+
+The module being ran
+
+### Module_order
+
+The order in which modules are ran
+
+# Example function usage
+
+## Verify_targets
+
+
+## run_bllflow_targets
+
+
+
+
+
+
diff --git a/assets/specs/targets/depression_imputation_module.csv b/assets/specs/targets/depression_imputation_module.csv
@@ -0,0 +1,7 @@
+step_ID,step_function,step_arguments,step_description,step_order
+create_depression_score_imputation_dataset,create_depression_score_imputation_dataset_function,"(data = data[""study_dataset""], variables = role[""create_depression_score_imputation_dataset""], survey_cycle_variable = role[""survey_cycle""], survey_cycle_lower_limit = 2003, survey_cycle_upper_limit = 2014)",Create the dataset with which we will impute the depression score variable. Only include the survey cycles from 2003 to 2014 since mood disorder is one of the strongest predictors of depression score and it was only available during these cycles in the PUMF,1
+impute_depression_score,impute_depression_score_function,"(data = data[""create_depression_score_imputation_dataset""], 	outcome = role[""impute_depression_score_outcome""], predictors = role[""impute_depression_score_predictors""], 	num_multiple_imputations = 5, method = polr)",Imputes the depression score variables using the MICE method. Use a polytomous logistic regression method since there are multiple categories in the depression score variable.,2
+merge_depression_score_imputed_dataset,merge_depression_score_imputed_dataset_function,"(depression_score_imputed_data = data[""impute_depression_score""], study_dataset = data[""study_dataset""], merge_by = role[""id""])",Merge the depression score imputed dataset back into the original study dataset using the id column.,3
+create_mood_disorder_imputation_dataset,create_mood_disorder_imputation_dataset_function,"(data = data[""study_dataset""], variables = 	role[""create_mood_disorder_imputation_dataset""],  survey_cycle_variable = role[""survey_cycle""], survey_cycle_lower_limit = 2001, survey_cycle_upper_limit = 2014)","Create the dataset with which we will impute the mood disorder variable. Include all the cycles we have, which is everything from 2001 to 2014.",4
+impute_mood_disorder,impute_mood_disorder_function,"(data = data[""create_mood_disorder_imputation_dataset""], outcome = role[""impute_mood_disorder_outcome""], predictors = role[""impute_mood_disorder_predictors""], 	num_multiple_imputations = 	5, method = logreg)","Impute the mood disorder variable using MICE method with 5 iterations. Use the logsitc regression model since mood disorder has only 2 categories, Yes and No.",5
+merge_imputed_mood_disorder_data,merge_imputed_mood_disorder_data_function,"(mood_disorder_imputed_data = data[""impute_mood_disorder""], study_dataset = data[""study_dataset""], merge_by = role[""id""]",Merge the mood disorder imputed dataset back into the original study dataset using the id column.,6
diff --git a/assets/specs/targets/depression_targets.R b/assets/specs/targets/depression_targets.R
@@ -0,0 +1,66 @@
+library(targets)
+library(huiport) # The package containing functions found in depression_imputation_module
+Hui_impute <- create_targets_tepmlate()
+
+list(
+  # Create the dataset with which we will impute the depression score variable. Only include the survey cycles from 2003 to 2014 since mood disorder is one of the strongest predictors of depression score and it was only available during these cycles in the PUMF
+  tar_target(
+    create_depression_score_imputation_dataset,
+    create_depression_score_imputation_dataset_function(
+      data = data["study_dataset"],
+      variables = role["create_depression_score_imputation_dataset"],
+      survey_cycle_variable = role["survey_cycle"], 
+      survey_cycle_lower_limit = 2003,
+      survey_cycle_upper_limit = 2014
+    ),
+    # Imputes the depression score variables using the MICE method. Use a polytomous logistic regression method since there are multiple categories in the depression score variable.
+    tar_target(
+      impute_depression_score,
+      impute_depression_score_function(
+        data = create_depression_score_imputation_dataset,
+        outcome = role["impute_depression_score_outcome"],
+        predictors = role["impute_depression_score_predictors"],
+        num_multiple_imputations = 5,
+        method = "polr"
+      )
+    ),
+    # Merge the depression score imputed dataset back into the original study dataset using the id column.
+    tar_target(
+      merge_depression_score_imputed_dataset,
+      merge_depression_score_imputed_dataset_function(
+        depression_score_imputed_data = data["impute_depression_score"],
+        study_dataset = data["study_dataset"],
+        merge_by = role["id"]
+      )
+    ),
+    # Create the dataset with which we will impute the mood disorder variable. Include all the cycles we have, which is everything from 2001 to 2014.
+    tar_target(
+      create_mood_disorder_imputation_dataset,
+      create_mood_disorder_imputation_dataset_function(
+        data = data["study_dataset"],
+        variables = 	role["create_mood_disorder_imputation_dataset"],
+        survey_cycle_variable = role["survey_cycle"],
+        survey_cycle_lower_limit = 2001,
+        survey_cycle_upper_limit = 2014
+      )
+    ),
+    # Impute the mood disorder variable using MICE method with 5 iterations. Use the logsitc regression model since mood disorder has only 2 categories, Yes and No.
+    tar_target(
+      impute_mood_disorder,
+      impute_mood_disorder_function(
+        data = data["create_mood_disorder_imputation_dataset"],
+        outcome = role["impute_mood_disorder_outcome"],
+        predictors = role["impute_mood_disorder_predictors"],
+        num_multiple_imputations = 	5,
+        method = "logreg"
+      )
+    ),
+    # Merge the mood disorder imputed dataset back into the original study dataset using the id column.
+    tar_target(
+      merge_imputed_mood_disorder_data,
+      merge_imputed_mood_disorder_data_function(
+        mood_disorder_imputed_data = data["impute_mood_disorder"],
+        study_dataset = data["study_dataset"],
+        merge_by = role["id"]
+      )
+    )
diff --git a/assets/specs/targets/modules_map.csv b/assets/specs/targets/modules_map.csv
@@ -0,0 +1,2 @@
+module_name,module_path,module_description,module_order
+depression_imputation,./depression_imputation_module.csv,This module is responsible for imputing the depression score and mood disorder variables within the CCHS-PUMF from cycles 2001 to 2014.,1
diff --git a/assets/specs/targets/targets_test.R b/assets/specs/targets/targets_test.R
@@ -0,0 +1,19 @@
+context("Modules Test")
+library(targets)
+source("verification_expected_input.R")
+
+test_that("Module verification returns TRUE when it matches modules.csv",{
+  expect_true(verify_targets(targets_source = input_one, modules_path = "./modules_map.csv"))
+})
+
+test_that("Module verification returns appropriate error when a module step is missing",{
+  expect_error((verify_targets(targets_source = input_two, modules_path = "./modules_map.csv"), "Missing step merge_imputed_mood_disorder_data")
+})
+
+test_that("Module verification returns appropriate error when module steps are out of order",{
+  expect_error((verify_targets(targets_source = input_two, modules_path = "./modules_map.csv"), "Wrong order of steps")
+})
+
+test_that("Module verification returns appropriate error when module step contains wrong arguments",{
+  expect_error((verify_targets(targets_source = input_two, modules_path = "./modules_map.csv"), "create_depression_score_imputation_dataset contains invalid step arguments")
+})