From cb5fc372464aaeedb9f91145232e5085fb34a0c5 Mon Sep 17 00:00:00 2001
From: shawnwangnih <108429233+shawnwangnih@users.noreply.github.com>
Date: Mon, 30 Sep 2024 12:49:09 -0400
Subject: [PATCH 01/23] update study_participants

---
 config/es_indices_ccdi_model.yml | 28 +++++++++++++++++++++++-----
 1 file changed, 23 insertions(+), 5 deletions(-)

diff --git a/config/es_indices_ccdi_model.yml b/config/es_indices_ccdi_model.yml
index 5823613..ff190be 100644
--- a/config/es_indices_ccdi_model.yml
+++ b/config/es_indices_ccdi_model.yml
@@ -13,8 +13,17 @@ Indices:
         type: keyword
       sex_at_birth:
         type: keyword
-      last_known_survival_status:
-        type: keyword
+      survival_filters:
+        type: nested
+        properties:
+          last_known_survival_status:
+            type: keyword
+          age_at_event_free_survival_status:
+            type: integer
+          event_free_survival_status:
+            type: keyword
+          first_event:
+            type: keyword
       sample_diagnosis_file_filters:
         type: nested
         properties:
@@ -326,15 +335,24 @@ Indices:
             OPTIONAL MATCH (st:study)<-[:of_participant]-(p)
             OPTIONAL MATCH (st)<-[:of_study_personnel]-(stp:study_personnel)
             OPTIONAL MATCH (st)<-[:of_study_funding]-(stf:study_funding)
-            WITH p, sample_diagnosis_file_filter, COLLECT(DISTINCT su.last_known_survival_status) as vital_status, st, stf, stp
+            WITH p, sample_diagnosis_file_filter, 
+            COLLECT({COLLECT(
+                  DISTINCT 
+                  CASE 
+                    WHEN su.last_known_survival_status = 'Dead' THEN 'Dead'
+                    ELSE su.last_known_survival_status
+                  END
+                  ) AS last_known_survival_status,
+                  COLLECT(DISTINCT su.event_free_survival_status ) as event_free_survival_status,
+                  COLLECT(DISTINCT su.first_event ) as first_event) }
+            as survival_filters, st, stf, stp
             RETURN DISTINCT
               p.id as id,
               p.id as pid,
               p.participant_id as participant_id,
               apoc.text.split(p.race, ';') as race,
               p.sex_at_birth as sex_at_birth,
-              case when 'Dead' in vital_status then ['Dead']
-                  else vital_status end as last_known_survival_status,
+              survival_filters as survival_filters,
               sample_diagnosis_file_filter AS sample_diagnosis_file_filters,
               st.study_id as study_id,
               st.dbgap_accession as dbgap_accession,

From 3c5912a615d4488dac0a177383690791b17d6940 Mon Sep 17 00:00:00 2001
From: shawnwangnih <108429233+shawnwangnih@users.noreply.github.com>
Date: Mon, 30 Sep 2024 13:00:38 -0400
Subject: [PATCH 02/23] update study_participants

---
 config/es_indices_ccdi_model.yml | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/config/es_indices_ccdi_model.yml b/config/es_indices_ccdi_model.yml
index ff190be..9d6f784 100644
--- a/config/es_indices_ccdi_model.yml
+++ b/config/es_indices_ccdi_model.yml
@@ -335,17 +335,20 @@ Indices:
             OPTIONAL MATCH (st:study)<-[:of_participant]-(p)
             OPTIONAL MATCH (st)<-[:of_study_personnel]-(stp:study_personnel)
             OPTIONAL MATCH (st)<-[:of_study_funding]-(stf:study_funding)
-            WITH p, sample_diagnosis_file_filter, 
-            COLLECT({COLLECT(
-                  DISTINCT 
-                  CASE 
-                    WHEN su.last_known_survival_status = 'Dead' THEN 'Dead'
-                    ELSE su.last_known_survival_status
-                  END
-                  ) AS last_known_survival_status,
-                  COLLECT(DISTINCT su.event_free_survival_status ) as event_free_survival_status,
-                  COLLECT(DISTINCT su.first_event ) as first_event) }
-            as survival_filters, st, stf, stp
+WITH p, sample_diagnosis_file_filter, 
+  COLLECT(DISTINCT CASE 
+      WHEN su.last_known_survival_status = 'Dead' THEN 'Dead'
+      ELSE su.last_known_survival_status
+  END) AS last_known_survival_status,
+  COLLECT(DISTINCT su.event_free_survival_status) as event_free_survival_status,
+  COLLECT(DISTINCT su.first_event) as first_event,
+  COLLECT(DISTINCT su.age_at_event_free_survival_status) as age_at_event_free_survival_status,  st, stf, stp
+WITH p, sample_diagnosis_file_filter, 
+  COLLECT({last_known_survival_status: last_known_survival_status, 
+   event_free_survival_status: event_free_survival_status, 
+   first_event: first_event,
+   age_at_event_free_survival_status: age_at_event_free_survival_status} )AS survival_filters,
+  st, stf, stp
             RETURN DISTINCT
               p.id as id,
               p.id as pid,

From 1f2c0f376725ba9d82ad7da4682f183628fc768b Mon Sep 17 00:00:00 2001
From: shawnwangnih <108429233+shawnwangnih@users.noreply.github.com>
Date: Mon, 30 Sep 2024 13:29:17 -0400
Subject: [PATCH 03/23] update study_participants two other queries

---
 config/es_indices_ccdi_model.yml | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/config/es_indices_ccdi_model.yml b/config/es_indices_ccdi_model.yml
index 9d6f784..8599ae7 100644
--- a/config/es_indices_ccdi_model.yml
+++ b/config/es_indices_ccdi_model.yml
@@ -373,7 +373,6 @@ WITH p, sample_diagnosis_file_filter,
             null as participant_id,
             null as race,
             null as sex_at_birth,
-            null as last_known_survival_status,
             COLLECT(DISTINCT {
                 sample_anatomic_site: null,
                 participant_age_at_collection: null,
@@ -394,6 +393,12 @@ WITH p, sample_diagnosis_file_filter,
                 library_source_molecule: null,
                 library_strategy: null
             }) AS sample_diagnosis_file_filters,
+            COLLECT(DISTINCT {
+                last_known_survival_status: null,
+                age_at_event_free_survival_status: null,
+                event_free_survival_status: null,
+                first_event: null,
+            }) AS survival_filters,
             st.study_id as study_id,
             st.dbgap_accession as dbgap_accession,
             st.study_acronym as study_acronym,
@@ -414,7 +419,12 @@ WITH p, sample_diagnosis_file_filter,
             null as participant_id,
             null as race,
             null as sex_at_birth,
-            null as last_known_survival_status,
+            COLLECT(DISTINCT {
+                null as last_known_survival_status,
+                null as age_at_event_free_survival_status,
+                null as event_free_survival_status,
+                null as first_event,
+            }) AS survival_filters,
             COLLECT(DISTINCT {
                 sample_anatomic_site: apoc.text.split(sm.anatomic_site, ';'),
                 participant_age_at_collection: sm.participant_age_at_collection,

From e9bbfac9f7326955bc8af30be0d92d40b320764e Mon Sep 17 00:00:00 2001
From: shawnwangnih <108429233+shawnwangnih@users.noreply.github.com>
Date: Mon, 30 Sep 2024 13:53:40 -0400
Subject: [PATCH 04/23] update study_participants two other queries

---
 config/es_indices_ccdi_model.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/config/es_indices_ccdi_model.yml b/config/es_indices_ccdi_model.yml
index 8599ae7..be7a950 100644
--- a/config/es_indices_ccdi_model.yml
+++ b/config/es_indices_ccdi_model.yml
@@ -344,10 +344,10 @@ WITH p, sample_diagnosis_file_filter,
   COLLECT(DISTINCT su.first_event) as first_event,
   COLLECT(DISTINCT su.age_at_event_free_survival_status) as age_at_event_free_survival_status,  st, stf, stp
 WITH p, sample_diagnosis_file_filter, 
-  COLLECT({last_known_survival_status: last_known_survival_status, 
+ {last_known_survival_status: last_known_survival_status, 
    event_free_survival_status: event_free_survival_status, 
    first_event: first_event,
-   age_at_event_free_survival_status: age_at_event_free_survival_status} )AS survival_filters,
+   age_at_event_free_survival_status: age_at_event_free_survival_status}  AS survival_filters,
   st, stf, stp
             RETURN DISTINCT
               p.id as id,

From bc479df8d36c6623b810969decc92a1a4b73b9d7 Mon Sep 17 00:00:00 2001
From: shawnwangnih <108429233+shawnwangnih@users.noreply.github.com>
Date: Tue, 1 Oct 2024 10:42:42 -0400
Subject: [PATCH 05/23] update study_participants two other queries

---
 config/es_indices_ccdi_model.yml | 26 ++++++++++++--------------
 1 file changed, 12 insertions(+), 14 deletions(-)

diff --git a/config/es_indices_ccdi_model.yml b/config/es_indices_ccdi_model.yml
index be7a950..9f03098 100644
--- a/config/es_indices_ccdi_model.yml
+++ b/config/es_indices_ccdi_model.yml
@@ -335,20 +335,18 @@ Indices:
             OPTIONAL MATCH (st:study)<-[:of_participant]-(p)
             OPTIONAL MATCH (st)<-[:of_study_personnel]-(stp:study_personnel)
             OPTIONAL MATCH (st)<-[:of_study_funding]-(stf:study_funding)
-WITH p, sample_diagnosis_file_filter, 
-  COLLECT(DISTINCT CASE 
-      WHEN su.last_known_survival_status = 'Dead' THEN 'Dead'
-      ELSE su.last_known_survival_status
-  END) AS last_known_survival_status,
-  COLLECT(DISTINCT su.event_free_survival_status) as event_free_survival_status,
-  COLLECT(DISTINCT su.first_event) as first_event,
-  COLLECT(DISTINCT su.age_at_event_free_survival_status) as age_at_event_free_survival_status,  st, stf, stp
-WITH p, sample_diagnosis_file_filter, 
- {last_known_survival_status: last_known_survival_status, 
-   event_free_survival_status: event_free_survival_status, 
-   first_event: first_event,
-   age_at_event_free_survival_status: age_at_event_free_survival_status}  AS survival_filters,
-  st, stf, stp
+            WITH p, sample_diagnosis_file_filter,  st, stf, stp,
+            COLLECT(DISTINCT CASE 
+                WHEN su.last_known_survival_status = 'Dead' THEN 'Dead'
+                ELSE su.last_known_survival_status
+            END) AS last_known_survival_status,
+            COLLECT(DISTINCT su.event_free_survival_status) as event_free_survival_status,
+            COLLECT(DISTINCT su.first_event) as first_event,
+            COLLECT(DISTINCT su.age_at_event_free_survival_status) as age_at_event_free_survival_status
+            WITH p, sample_diagnosis_file_filter, st, stf, stp,  COLLECT(DISTINCT {last_known_survival_status: last_known_survival_status, 
+              event_free_survival_status: event_free_survival_status, 
+              first_event: first_event,
+              age_at_event_free_survival_status: age_at_event_free_survival_status} ) AS survival_filters
             RETURN DISTINCT
               p.id as id,
               p.id as pid,

From 538a10a710a58b3d3b2d59d23152bed889bdde00 Mon Sep 17 00:00:00 2001
From: shawnwangnih <108429233+shawnwangnih@users.noreply.github.com>
Date: Tue, 1 Oct 2024 10:54:44 -0400
Subject: [PATCH 06/23] update participants query

---
 config/es_indices_ccdi_model.yml | 28 +++++++++++++++++++++++-----
 1 file changed, 23 insertions(+), 5 deletions(-)

diff --git a/config/es_indices_ccdi_model.yml b/config/es_indices_ccdi_model.yml
index 9f03098..5faa995 100644
--- a/config/es_indices_ccdi_model.yml
+++ b/config/es_indices_ccdi_model.yml
@@ -480,8 +480,17 @@ Indices:
         type: keyword
       alternate_participant_id:
         type: keyword
-      last_known_survival_status:
-        type: keyword
+      survival_filters:
+        type: nested
+        properties:
+          last_known_survival_status:
+            type: keyword
+          age_at_event_free_survival_status:
+            type: integer
+          event_free_survival_status:
+            type: keyword
+          first_event:
+            type: keyword
       sample_diagnosis_file_filters:
         type: nested
         properties:
@@ -802,7 +811,17 @@ Indices:
             OPTIONAL MATCH (st:study)<-[:of_participant]-(p)
             OPTIONAL MATCH (st)<-[:of_study_personnel]-(stp:study_personnel)
             OPTIONAL MATCH (st)<-[:of_study_funding]-(stf:study_funding)
-            WITH p, sy, sample_diagnosis_file_filter, COLLECT(DISTINCT su.last_known_survival_status) as vital_status, file, st, stf, stp
+            WITH p, sy, sample_diagnosis_file_filter, COLLECT(DISTINCT CASE 
+                WHEN su.last_known_survival_status = 'Dead' THEN 'Dead'
+                ELSE su.last_known_survival_status
+            END) AS last_known_survival_status,
+            COLLECT(DISTINCT su.event_free_survival_status) as event_free_survival_status,
+            COLLECT(DISTINCT su.first_event) as first_event,
+            COLLECT(DISTINCT su.age_at_event_free_survival_status) as age_at_event_free_survival_status, file, st, stf, stp
+                        WITH p, sy, sample_diagnosis_file_filter, COLLECT(DISTINCT {last_known_survival_status: last_known_survival_status, 
+              event_free_survival_status: event_free_survival_status, 
+              first_event: first_event,
+              age_at_event_free_survival_status: age_at_event_free_survival_status} ) AS survival_filters, file, st, stf, stp
             RETURN DISTINCT
               p.id as id,
               p.participant_id as participant_id,
@@ -810,8 +829,7 @@ Indices:
               p.race as race_str,
               p.sex_at_birth as sex_at_birth,
               apoc.text.join(Collect(distinct sy.synonym_id), ',') as alternate_participant_id,
-              case when 'Dead' in vital_status then ['Dead']
-                    else vital_status end as last_known_survival_status,
+              survival_filters as survival_filters,
               sample_diagnosis_file_filter AS sample_diagnosis_file_filters,
               st.study_id as study_id,
               st.dbgap_accession as dbgap_accession,

From be35c030baa28be966b3d7484c9285f94ca47579 Mon Sep 17 00:00:00 2001
From: shawnwangnih <108429233+shawnwangnih@users.noreply.github.com>
Date: Tue, 1 Oct 2024 14:25:28 -0400
Subject: [PATCH 07/23] update diagnosis query

---
 config/es_indices_ccdi_model.yml | 65 ++++++++++++++++++++++++++------
 1 file changed, 53 insertions(+), 12 deletions(-)

diff --git a/config/es_indices_ccdi_model.yml b/config/es_indices_ccdi_model.yml
index 5faa995..fe1369c 100644
--- a/config/es_indices_ccdi_model.yml
+++ b/config/es_indices_ccdi_model.yml
@@ -883,8 +883,17 @@ Indices:
         type: keyword
       study_name:
         type: keyword
-      last_known_survival_status:
-        type: keyword
+      survival_filters:
+        type: nested
+        properties:
+          last_known_survival_status:
+            type: keyword
+          age_at_event_free_survival_status:
+            type: integer
+          event_free_survival_status:
+            type: keyword
+          first_event:
+            type: keyword
       sample_file_filters:
         type: nested
         properties:
@@ -1051,7 +1060,17 @@ Indices:
           OPTIONAL MATCH (st:study)<-[:of_participant]-(p)
           OPTIONAL MATCH (st)<-[:of_study_personnel]-(stp:study_personnel)
           OPTIONAL MATCH (st)<-[:of_study_funding]-(stf:study_funding)
-          WITH p, cell_line_pdx_file_filters, general_file_filters, participant_clinical_measure_file_filters,participant_radiology_file_filters, file, COLLECT(DISTINCT su.last_known_survival_status) as vital_status, st, stf, stp, dg
+          WITH p, cell_line_pdx_file_filters, general_file_filters, participant_clinical_measure_file_filters,participant_radiology_file_filters, file, COLLECT(DISTINCT CASE 
+                WHEN su.last_known_survival_status = 'Dead' THEN 'Dead'
+                ELSE su.last_known_survival_status
+            END) AS last_known_survival_status,
+            COLLECT(DISTINCT su.event_free_survival_status) as event_free_survival_status,
+            COLLECT(DISTINCT su.first_event) as first_event,
+            COLLECT(DISTINCT su.age_at_event_free_survival_status) as age_at_event_free_survival_status, st, stf, stp, dg
+          WITH p, cell_line_pdx_file_filters, general_file_filters, participant_clinical_measure_file_filters,participant_radiology_file_filters, file, COLLECT(DISTINCT {last_known_survival_status: last_known_survival_status, 
+              event_free_survival_status: event_free_survival_status, 
+              first_event: first_event,
+              age_at_event_free_survival_status: age_at_event_free_survival_status} ) AS survival_filters, st, stf, stp, dg
           RETURN DISTINCT
             dg.id as id,
             p.id as pid,
@@ -1074,8 +1093,7 @@ Indices:
             st.dbgap_accession as dbgap_accession,
             st.study_acronym as study_acronym,
             st.study_name as study_name,
-            case when 'Dead' in vital_status then ['Dead']
-                  else vital_status end as last_known_survival_status,       
+            survival_filters as survival_filters,    
             apoc.coll.union(cell_line_pdx_file_filters, general_file_filters) + participant_clinical_measure_file_filters + participant_radiology_file_filters AS sample_file_filters,
             COUNT(DISTINCT file.id) as file_count,
             COLLECT(DISTINCT file.id) as files
@@ -1147,7 +1165,17 @@ Indices:
           OPTIONAL MATCH (st:study)<-[:of_participant]-(p)
           OPTIONAL MATCH (st)<-[:of_study_personnel]-(stp:study_personnel)
           OPTIONAL MATCH (st)<-[:of_study_funding]-(stf:study_funding)
-          WITH dg, p, sm, sample_file_filter, file, COLLECT(DISTINCT su.last_known_survival_status) as vital_status, st, stf, stp
+          WITH dg, p, sm, sample_file_filter, file, COLLECT(DISTINCT CASE 
+              WHEN su.last_known_survival_status = 'Dead' THEN 'Dead'
+              ELSE su.last_known_survival_status
+          END) AS last_known_survival_status,
+          COLLECT(DISTINCT su.event_free_survival_status) as event_free_survival_status,
+          COLLECT(DISTINCT su.first_event) as first_event,
+          COLLECT(DISTINCT su.age_at_event_free_survival_status) as age_at_event_free_survival_status, st, stf, stp
+          WITH dg, p, sm, sample_file_filter, file, COLLECT(DISTINCT {last_known_survival_status: last_known_survival_status, 
+          event_free_survival_status: event_free_survival_status, 
+          first_event: first_event,
+          age_at_event_free_survival_status: age_at_event_free_survival_status} ) AS survival_filters, st, stf, stp
           RETURN DISTINCT
             dg.id as id,
             p.id as pid,
@@ -1170,8 +1198,7 @@ Indices:
             st.dbgap_accession as dbgap_accession,
             st.study_acronym as study_acronym,
             st.study_name as study_name,
-            case when 'Dead' in vital_status then ['Dead']
-                  else vital_status end as last_known_survival_status,       
+            survival_filters as survival_filters,      
             sample_file_filter AS sample_file_filters,
             COUNT(DISTINCT file.id) as file_count,
             COLLECT(DISTINCT file.id) as files
@@ -1223,7 +1250,17 @@ Indices:
           OPTIONAL MATCH (st:study)<-[:of_participant]-(p)
           OPTIONAL MATCH (st)<-[:of_study_personnel]-(stp:study_personnel)
           OPTIONAL MATCH (st)<-[:of_study_funding]-(stf:study_funding)
-          WITH dg, p, sid, sample_id, sample_file_filter, files, COLLECT(DISTINCT su.last_known_survival_status) as vital_status, st, stf, stp
+          WITH dg, p, sid, sample_id, sample_file_filter, files, COLLECT(DISTINCT CASE 
+                WHEN su.last_known_survival_status = 'Dead' THEN 'Dead'
+                ELSE su.last_known_survival_status
+            END) AS last_known_survival_status,
+            COLLECT(DISTINCT su.event_free_survival_status) as event_free_survival_status,
+            COLLECT(DISTINCT su.first_event) as first_event,
+            COLLECT(DISTINCT su.age_at_event_free_survival_status) as age_at_event_free_survival_status, st, stf, stp
+          WITH dg, p, sid, sample_id, sample_file_filter, files, COLLECT(DISTINCT {last_known_survival_status: last_known_survival_status, 
+              event_free_survival_status: event_free_survival_status, 
+              first_event: first_event,
+              age_at_event_free_survival_status: age_at_event_free_survival_status} ) AS survival_filters, st, stf, stp
           RETURN DISTINCT
             dg.id as id,
             p.id as pid,
@@ -1246,8 +1283,7 @@ Indices:
             st.dbgap_accession as dbgap_accession,
             st.study_acronym as study_acronym,
             st.study_name as study_name,
-            case when 'Dead' in vital_status then ['Dead']
-                  else vital_status end as last_known_survival_status,       
+            survival_filters as survival_filters,      
             sample_file_filter AS sample_file_filters,
             size(files) as file_count,
             files as files
@@ -1310,7 +1346,12 @@ Indices:
             st.dbgap_accession as dbgap_accession,
             st.study_acronym as study_acronym,
             st.study_name as study_name,
-            null as last_known_survival_status,       
+            COLLECT(DISTINCT {
+                last_known_survival_status: null,
+                age_at_event_free_survival_status: null,
+                event_free_survival_status: null,
+                first_event: null,
+            }) AS survival_filters,   
             sample_file_filter AS sample_file_filters,
             COUNT(DISTINCT file.id) as file_count,
             COLLECT(DISTINCT file.id) as files

From 17707b8ed7b08daf643e29c7a191719bd098117c Mon Sep 17 00:00:00 2001
From: shawnwangnih <108429233+shawnwangnih@users.noreply.github.com>
Date: Tue, 8 Oct 2024 10:34:57 -0400
Subject: [PATCH 08/23] create survivals

---
 bento                            |   2 +-
 config/es_indices_ccdi_model.yml | 420 ++++++++++++++++++++++++++++++-
 2 files changed, 411 insertions(+), 11 deletions(-)

diff --git a/bento b/bento
index d644aac..1fda519 160000
--- a/bento
+++ b/bento
@@ -1 +1 @@
-Subproject commit d644aac1198ad56b9dc2a7e95f8173f6eae271e6
+Subproject commit 1fda5197855eabb4884d89231a34550d36bb606d
diff --git a/config/es_indices_ccdi_model.yml b/config/es_indices_ccdi_model.yml
index fe1369c..5551d89 100644
--- a/config/es_indices_ccdi_model.yml
+++ b/config/es_indices_ccdi_model.yml
@@ -838,6 +838,381 @@ Indices:
               COUNT(DISTINCT file.id) as file_count,
               COLLECT(DISTINCT file.id) as files
 
+  - index_name: survivals
+    type: neo4j
+    mapping:
+      id:
+        type: keyword
+      participant_id:
+        type: keyword
+        normalizer: lowercase
+      race:
+        type: keyword
+      race_str:
+        type: keyword
+      sex_at_birth:
+        type: keyword
+      alternate_participant_id:
+        type: keyword
+      survival_filters:
+        type: nested
+        properties:
+          last_known_survival_status:
+            type: keyword
+          age_at_event_free_survival_status:
+            type: integer
+          event_free_survival_status:
+            type: keyword
+          first_event:
+            type: keyword
+      sample_diagnosis_file_filters:
+        type: nested
+        properties:
+          sample_anatomic_site:
+            type: keyword
+          participant_age_at_collection:
+            type: integer
+          sample_tumor_status:
+            type: keyword
+          tumor_classification:
+            type: keyword
+          age_at_diagnosis:
+            type: integer
+          diagnosis_anatomic_site:
+            type: keyword
+          disease_phase:
+            type: keyword
+          diagnosis_classification_system:
+            type: keyword
+          diagnosis_basis:
+            type: keyword
+          tumor_grade_source:
+            type: keyword  
+          tumor_stage_source:
+            type: keyword              
+          diagnosis:
+            type: keyword
+          assay_method:
+            type: keyword
+          file_type:
+            type: keyword
+          library_selection:
+            type: keyword
+          library_source_material:
+            type: keyword
+          library_source_molecule:
+            type: keyword
+          library_strategy:
+            type: keyword
+      study_id:
+        type: keyword
+      dbgap_accession:
+        type: keyword
+      study_acronym:
+        type: keyword
+      study_name:
+        type: keyword
+      file_count:
+        type: integer
+      files:
+        type: text
+        fields:
+          keyword:
+            type: keyword
+    # Cypher query will be used to retrieve data from Neo4j, and index into Elasticsearch
+    cypher_queries:
+      - query: |
+          MATCH (p:participant)
+          optional match (p)<--(sm:sample)
+          optional match (p)<--(file)
+          where (file: clinical_measure_file or file: radiology_file)
+          with distinct p, sm, file
+          with p, collect(DISTINCT {
+                      sample_anatomic_site: apoc.text.split(sm.anatomic_site, ';'),
+                      participant_age_at_collection: sm.participant_age_at_collection,
+                      sample_tumor_status: sm.sample_tumor_status,
+                      tumor_classification: sm.tumor_classification,
+                      assay_method: CASE labels(file)[0] WHEN 'clinical_measure_file' THEN 'Clinical data'
+                                        WHEN 'radiology_file' THEN 'Radiology imaging'
+                                        ELSE null END,
+                      file_type: file.file_type,
+                      library_source_material: null,
+                      library_source_molecule: null,
+                      library_strategy: null
+              }) as sample_clinical_radiology_file_filter
+          optional match (p)<--(sm:sample)<--(file)
+          where (file: sequencing_file or file: methylation_array_file or file: pathology_file or file: cytogenomic_file)
+          with p, sample_clinical_radiology_file_filter, collect(DISTINCT {
+                      sample_anatomic_site: apoc.text.split(sm.anatomic_site, ';'),
+                      participant_age_at_collection: sm.participant_age_at_collection,
+                      sample_tumor_status: sm.sample_tumor_status,
+                      tumor_classification: sm.tumor_classification,
+                      assay_method: CASE LABELS(file)[0]
+                                              WHEN 'sequencing_file' THEN 'Sequencing'
+                                              WHEN 'cytogenomic_file' THEN 'Cytogenomic'
+                                              WHEN 'pathology_file' THEN 'Pathology imaging'
+                                              WHEN 'methylation_array_file' THEN 'Methylation array'
+                                              ELSE null END,
+                      file_type: file.file_type,
+                      library_selection: CASE LABELS(file)[0]
+                                    WHEN 'sequencing_file' THEN file.library_selection
+                                    ELSE null END,
+                      library_source_material: CASE LABELS(file)[0]
+                                    WHEN 'sequencing_file' THEN file.library_source_material
+                                    ELSE null END,
+                      library_source_molecule: CASE LABELS(file)[0]
+                                    WHEN 'sequencing_file' THEN file.library_source_molecule
+                                    ELSE null END,
+                      library_strategy: CASE LABELS(file)[0]
+                                    WHEN 'sequencing_file' THEN file.library_strategy
+                                    ELSE null END
+              }) as sample_sequencing_cytogenomic_pathology_methylation_file_filter
+            with p, apoc.coll.union(sample_clinical_radiology_file_filter, sample_sequencing_cytogenomic_pathology_methylation_file_filter) as sample_file_filters
+            optional match (p)<--(dg:diagnosis)
+            with p, sample_file_filters, dg
+            unwind sample_file_filters as sample_file_filter
+            with p, collect(apoc.map.merge(sample_file_filter, {
+                age_at_diagnosis: dg.age_at_diagnosis,
+                diagnosis_anatomic_site: apoc.text.split(dg.anatomic_site, ';'),
+                disease_phase: dg.disease_phase,
+                diagnosis_classification_system: dg.diagnosis_classification_system,
+                diagnosis_basis: dg.diagnosis_basis, 
+                tumor_grade_source: dg.tumor_grade_source,
+                tumor_stage_source: dg.tumor_stage_source,          
+                diagnosis: dg.diagnosis
+              })) as sample_diagnosis_file_filter
+            optional match (p)<--(sm:sample)<--(dg:diagnosis)
+            optional match (sm)<--(file)
+            where (file: sequencing_file or file: methylation_array_file or file: pathology_file or file: cytogenomic_file)
+            with p, sample_diagnosis_file_filter, COLLECT(DISTINCT {
+                    sample_anatomic_site: apoc.text.split(sm.anatomic_site, ';'),
+                    participant_age_at_collection: sm.participant_age_at_collection,
+                    sample_tumor_status: sm.sample_tumor_status,
+                    tumor_classification: sm.tumor_classification,
+                    age_at_diagnosis: dg.age_at_diagnosis,
+                    diagnosis_anatomic_site: apoc.text.split(dg.anatomic_site, ';'),
+                    disease_phase: dg.disease_phase,
+                    diagnosis_classification_system: dg.diagnosis_classification_system,
+                    diagnosis_basis: dg.diagnosis_basis, 
+                    tumor_grade_source: dg.tumor_grade_source,
+                    tumor_stage_source: dg.tumor_stage_source,          
+                    diagnosis: dg.diagnosis,
+                    assay_method: CASE LABELS(file)[0]
+                                    WHEN 'sequencing_file' THEN 'Sequencing'
+                                    WHEN 'cytogenomic_file' THEN 'Cytogenomic'
+                                    WHEN 'pathology_file' THEN 'Pathology imaging'
+                                    WHEN 'methylation_array_file' THEN 'Methylation array'
+                                    ELSE null END,
+                    file_type: file.file_type,
+                    library_selection: CASE LABELS(file)[0]
+                              WHEN 'sequencing_file' THEN file.library_selection
+                              ELSE null END,
+                    library_source_material: CASE LABELS(file)[0]
+                                    WHEN 'sequencing_file' THEN file.library_source_material
+                                    ELSE null END,
+                    library_source_molecule: CASE LABELS(file)[0]
+                                    WHEN 'sequencing_file' THEN file.library_source_molecule
+                                    ELSE null END,
+                    library_strategy: CASE LABELS(file)[0]
+                              WHEN 'sequencing_file' THEN file.library_strategy
+                              ELSE null END
+                }) AS sample_diagnosis_filters_1
+            with p, apoc.coll.union(sample_diagnosis_file_filter, sample_diagnosis_filters_1) as sample_diagnosis_file_filters
+            optional match (p)<--(sm:sample)<--(dg:diagnosis)
+            optional match (p)<--(file)
+            where (file: clinical_measure_file or file: radiology_file)
+            with p, sample_diagnosis_file_filters, COLLECT(DISTINCT {
+                    sample_anatomic_site: apoc.text.split(sm.anatomic_site, ';'),
+                    participant_age_at_collection: sm.participant_age_at_collection,
+                    sample_tumor_status: sm.sample_tumor_status,
+                    tumor_classification: sm.tumor_classification,
+                    age_at_diagnosis: dg.age_at_diagnosis,
+                    diagnosis_anatomic_site: apoc.text.split(dg.anatomic_site, ';'),
+                    disease_phase: dg.disease_phase,
+                    diagnosis_classification_system: dg.diagnosis_classification_system,
+                    diagnosis_basis: dg.diagnosis_basis, 
+                    tumor_grade_source: dg.tumor_grade_source,
+                    tumor_stage_source: dg.tumor_stage_source,          
+                    diagnosis: dg.diagnosis,
+                    assay_method: CASE labels(file)[0] WHEN 'clinical_measure_file' THEN 'Clinical data'
+                                        WHEN 'radiology_file' THEN 'Radiology imaging'
+                                        ELSE null END,
+                    file_type: file.file_type,
+                    library_selection: null,
+                    library_source_material: null,
+                    library_source_molecule: null,
+                    library_strategy: null
+                }) AS sample_diagnosis_filters_2
+            with p, apoc.coll.union(sample_diagnosis_file_filters, sample_diagnosis_filters_2) as sample_diagnosis_file_filter
+          optional MATCH (p)<-[:of_sample]-(sm1:sample)<--(cl)<--(sm2:sample)
+          WHERE (cl: cell_line or cl: pdx)
+          optional Match (sm2)<--(file)
+          WHERE (file: sequencing_file OR file:pathology_file OR file:methylation_array_file OR file:cytogenomic_file)
+          optional Match (sm1)<--(dg:diagnosis)
+          with p, sample_diagnosis_file_filter, COLLECT(DISTINCT {
+                    sample_anatomic_site: apoc.text.split(sm1.anatomic_site, ';'),
+                    participant_age_at_collection: sm1.participant_age_at_collection,
+                    sample_tumor_status: sm1.sample_tumor_status,
+                    tumor_classification: sm1.tumor_classification,
+                    age_at_diagnosis: dg.age_at_diagnosis,
+                    diagnosis_anatomic_site: apoc.text.split(dg.anatomic_site, ';'),
+                    disease_phase: dg.disease_phase,
+                    diagnosis_classification_system: dg.diagnosis_classification_system,
+                    diagnosis_basis: dg.diagnosis_basis, 
+                    tumor_grade_source: dg.tumor_grade_source,
+                    tumor_stage_source: dg.tumor_stage_source,          
+                    diagnosis: dg.diagnosis,
+                    assay_method: CASE LABELS(file)[0]
+                                    WHEN 'sequencing_file' THEN 'Sequencing'
+                                    WHEN 'cytogenomic_file' THEN 'Cytogenomic'
+                                    WHEN 'pathology_file' THEN 'Pathology imaging'
+                                    WHEN 'methylation_array_file' THEN 'Methylation array'
+                                    ELSE null END,
+                    file_type: file.file_type,
+                    library_selection: CASE LABELS(file)[0]
+                              WHEN 'sequencing_file' THEN file.library_selection
+                              ELSE null END,
+                    library_source_material: CASE LABELS(file)[0]
+                                    WHEN 'sequencing_file' THEN file.library_source_material
+                                    ELSE null END,
+                    library_source_molecule: CASE LABELS(file)[0]
+                                    WHEN 'sequencing_file' THEN file.library_source_molecule
+                                    ELSE null END,
+                    library_strategy: CASE LABELS(file)[0]
+                              WHEN 'sequencing_file' THEN file.library_strategy
+                              ELSE null END
+                }) AS sample_diagnosis_filters_1
+            with p, apoc.coll.union(sample_diagnosis_file_filter, sample_diagnosis_filters_1) as sample_diagnosis_file_filters
+          optional MATCH (p)<-[:of_sample]-(sm1:sample)<--(cl)<--(sm2:sample)
+          WHERE (cl: cell_line or cl: pdx)
+          optional Match (sm2)<--(file)
+          WHERE (file: sequencing_file OR file:pathology_file OR file:methylation_array_file OR file:cytogenomic_file)
+          optional Match (sm2)<--(dg:diagnosis)
+          with p, sample_diagnosis_file_filters, COLLECT(DISTINCT {
+                    sample_anatomic_site: apoc.text.split(sm2.anatomic_site, ';'),
+                    participant_age_at_collection: sm2.participant_age_at_collection,
+                    sample_tumor_status: sm2.sample_tumor_status,
+                    tumor_classification: sm2.tumor_classification,
+                    age_at_diagnosis: dg.age_at_diagnosis,
+                    diagnosis_anatomic_site: apoc.text.split(dg.anatomic_site, ';'),
+                    disease_phase: dg.disease_phase,
+                    diagnosis_classification_system: dg.diagnosis_classification_system,
+                    diagnosis_basis: dg.diagnosis_basis, 
+                    tumor_grade_source: dg.tumor_grade_source,
+                    tumor_stage_source: dg.tumor_stage_source,          
+                    diagnosis: dg.diagnosis,
+                    assay_method: CASE LABELS(file)[0]
+                                    WHEN 'sequencing_file' THEN 'Sequencing'
+                                    WHEN 'cytogenomic_file' THEN 'Cytogenomic'
+                                    WHEN 'pathology_file' THEN 'Pathology imaging'
+                                    WHEN 'methylation_array_file' THEN 'Methylation array'
+                                    ELSE null END,
+                    file_type: file.file_type,
+                    library_selection: CASE LABELS(file)[0]
+                              WHEN 'sequencing_file' THEN file.library_selection
+                              ELSE null END,
+                    library_source_material: CASE LABELS(file)[0]
+                                    WHEN 'sequencing_file' THEN file.library_source_material
+                                    ELSE null END,
+                    library_source_molecule: CASE LABELS(file)[0]
+                                    WHEN 'sequencing_file' THEN file.library_source_molecule
+                                    ELSE null END,
+                    library_strategy: CASE LABELS(file)[0]
+                              WHEN 'sequencing_file' THEN file.library_strategy
+                              ELSE null END
+                }) AS sample_diagnosis_filters_2
+            with p, apoc.coll.union(sample_diagnosis_file_filters, sample_diagnosis_filters_2) as sample_diagnosis_file_filter
+          optional MATCH (p)<-[:of_sample]-(sm1:sample)<--(cl)<--(sm2:sample)
+          WHERE (cl: cell_line or cl: pdx)
+          optional Match (sm1)<--(dg:diagnosis)
+          optional match (p)<--(file)
+          where (file: clinical_measure_file or file: radiology_file)
+          with p, sample_diagnosis_file_filter, COLLECT(DISTINCT {
+                    sample_anatomic_site: apoc.text.split(sm1.anatomic_site, ';'),
+                    participant_age_at_collection: sm1.participant_age_at_collection,
+                    sample_tumor_status: sm1.sample_tumor_status,
+                    tumor_classification: sm1.tumor_classification,
+                    age_at_diagnosis: dg.age_at_diagnosis,
+                    diagnosis_anatomic_site: apoc.text.split(dg.anatomic_site, ';'),
+                    disease_phase: dg.disease_phase,
+                    diagnosis_classification_system: dg.diagnosis_classification_system,
+                    diagnosis_basis: dg.diagnosis_basis, 
+                    tumor_grade_source: dg.tumor_grade_source,
+                    tumor_stage_source: dg.tumor_stage_source,          
+                    diagnosis: dg.diagnosis,
+                    assay_method: CASE labels(file)[0] WHEN 'clinical_measure_file' THEN 'Clinical data'
+                                        WHEN 'radiology_file' THEN 'Radiology imaging'
+                                        ELSE null END,
+                    file_type: file.file_type,
+                    library_selection: null,
+                    library_source_material: null,
+                    library_source_molecule: null,
+                    library_strategy: null
+                }) AS sample_diagnosis_filters_3
+            with p, apoc.coll.union(sample_diagnosis_file_filter, sample_diagnosis_filters_3) as sample_diagnosis_file_filters
+          optional MATCH (p)<-[:of_sample]-(sm1:sample)<--(cl)<--(sm2:sample)
+          WHERE (cl: cell_line or cl: pdx)
+          optional Match (sm2)<--(dg:diagnosis)
+          optional match (p)<--(file)
+          where (file: clinical_measure_file or file: radiology_file)
+          with p, sample_diagnosis_file_filters, COLLECT(DISTINCT {
+                    sample_anatomic_site: apoc.text.split(sm2.anatomic_site, ';'),
+                    participant_age_at_collection: sm2.participant_age_at_collection,
+                    sample_tumor_status: sm2.sample_tumor_status,
+                    tumor_classification: sm2.tumor_classification,
+                    age_at_diagnosis: dg.age_at_diagnosis,
+                    diagnosis_anatomic_site: apoc.text.split(dg.anatomic_site, ';'),
+                    disease_phase: dg.disease_phase,
+                    diagnosis_classification_system: dg.diagnosis_classification_system,
+                    diagnosis_basis: dg.diagnosis_basis, 
+                    tumor_grade_source: dg.tumor_grade_source,
+                    tumor_stage_source: dg.tumor_stage_source,          
+                    diagnosis: dg.diagnosis,
+                    assay_method: CASE labels(file)[0] WHEN 'clinical_measure_file' THEN 'Clinical data'
+                                        WHEN 'radiology_file' THEN 'Radiology imaging'
+                                        ELSE null END,
+                    file_type: file.file_type,
+                    library_selection: null,
+                    library_source_material: null,
+                    library_source_molecule: null,
+                    library_strategy: null
+                }) AS sample_diagnosis_filters_4
+            with p, apoc.coll.union(sample_diagnosis_file_filters, sample_diagnosis_filters_4) as sample_diagnosis_file_filter
+            OPTIONAL MATCH (p)<-[*..4]-(file)
+            WHERE (file:clinical_measure_file OR file: sequencing_file OR file:pathology_file OR file:radiology_file OR file:methylation_array_file OR file:cytogenomic_file)
+            OPTIONAL MATCH (p)<-[:of_survival]-(su:survival)
+            OPTIONAL MATCH (p)<-[:of_synonym]-(sy:synonym)
+            OPTIONAL MATCH (st:study)<-[:of_participant]-(p)
+            OPTIONAL MATCH (st)<-[:of_study_personnel]-(stp:study_personnel)
+            OPTIONAL MATCH (st)<-[:of_study_funding]-(stf:study_funding)
+            WITH p, sy, sample_diagnosis_file_filter, COLLECT(DISTINCT CASE 
+                WHEN su.last_known_survival_status = 'Dead' THEN 'Dead'
+                ELSE su.last_known_survival_status
+            END) AS last_known_survival_status,
+            COLLECT(DISTINCT su.event_free_survival_status) as event_free_survival_status,
+            COLLECT(DISTINCT su.first_event) as first_event,
+            COLLECT(DISTINCT su.age_at_event_free_survival_status) as age_at_event_free_survival_status, file, st, stf, stp
+                        WITH p, sy, sample_diagnosis_file_filter, COLLECT(DISTINCT {last_known_survival_status: last_known_survival_status, 
+              event_free_survival_status: event_free_survival_status, 
+              first_event: first_event,
+              age_at_event_free_survival_status: age_at_event_free_survival_status} ) AS survival_filters, file, st, stf, stp
+            RETURN DISTINCT
+              p.id as id,
+              p.participant_id as participant_id,
+              apoc.text.split(p.race, ';') as race,
+              p.race as race_str,
+              p.sex_at_birth as sex_at_birth,
+              apoc.text.join(Collect(distinct sy.synonym_id), ',') as alternate_participant_id,
+              survival_filters as survival_filters,
+              sample_diagnosis_file_filter AS sample_diagnosis_file_filters,
+              st.study_id as study_id,
+              st.dbgap_accession as dbgap_accession,
+              st.study_acronym as study_acronym,
+              st.study_name as study_name,
+              COUNT(DISTINCT file.id) as file_count,
+              COLLECT(DISTINCT file.id) as files
+
+
   - index_name: diagnosis
     type: neo4j
     mapping:
@@ -1520,8 +1895,17 @@ Indices:
             type: keyword
           library_strategy:
             type: keyword
-      last_known_survival_status:
-        type: keyword
+      survival_filters:
+        type: nested
+        properties:
+          last_known_survival_status:
+            type: keyword
+          age_at_event_free_survival_status:
+            type: integer
+          event_free_survival_status:
+            type: keyword
+          first_event:
+            type: keyword
       file_count:
         type: integer
       direct_file_count:
@@ -1633,12 +2017,19 @@ Indices:
             direct_file_count: COUNT(DISTINCT direct_file.id)
           }) AS opensearch_data
           OPTIONAL MATCH (sm)-[*..3]->(:participant)<-[:of_survival]-(su:survival)
-          WITH sm, opensearch_data, COLLECT(DISTINCT su.last_known_survival_status) as vital_status
+          WITH sm, opensearch_data, COLLECT(DISTINCT CASE 
+                WHEN su.last_known_survival_status = 'Dead' THEN 'Dead'
+                ELSE su.last_known_survival_status
+            END) AS last_known_survival_status,
+            COLLECT(DISTINCT su.event_free_survival_status) as event_free_survival_status,
+            COLLECT(DISTINCT su.first_event) as first_event,
+            COLLECT(DISTINCT su.age_at_event_free_survival_status) as age_at_event_free_survival_status
+            WITH sm, opensearch_data,  COLLECT(DISTINCT {last_known_survival_status: last_known_survival_status, 
+              event_free_survival_status: event_free_survival_status, 
+              first_event: first_event,
+              age_at_event_free_survival_status: age_at_event_free_survival_status} ) AS survival_filters
           WITH sm, apoc.map.merge(opensearch_data, {
-            last_known_survival_status: CASE 
-                WHEN 'Dead' IN vital_status THEN ['Dead']
-                ELSE vital_status 
-              END
+            survival_filters: survival_filters
           }) AS opensearch_data
           return opensearch_data
         page_size: 500
@@ -1678,7 +2069,7 @@ Indices:
                 tumor_stage_source: dg.tumor_stage_source,
                 diagnosis: dg.diagnosis
             }) AS diagnosis_filters,
-            null as last_known_survival_status,
+            null AS survival_filters,
             CASE COLLECT(file) WHEN [] THEN []
                       ELSE COLLECT(DISTINCT {
                           assay_method: CASE LABELS(file)[0]
@@ -1817,8 +2208,17 @@ Indices:
             type: keyword   
           diagnosis:
             type: keyword
-      last_known_survival_status:
-        type: keyword
+      survival_filters:
+        type: nested
+        properties:
+          last_known_survival_status:
+            type: keyword
+          age_at_event_free_survival_status:
+            type: integer
+          event_free_survival_status:
+            type: keyword
+          first_event:
+            type: keyword
       library_selection:
         type: keyword
         fields:

From d7e77615f32bb27be72a40c6956a035aea07467d Mon Sep 17 00:00:00 2001
From: shawnwangnih <108429233+shawnwangnih@users.noreply.github.com>
Date: Wed, 9 Oct 2024 12:36:13 -0400
Subject: [PATCH 09/23] update file

---
 config/es_indices_ccdi_model.yml | 114 ++++++++++++++++++++++++-------
 1 file changed, 89 insertions(+), 25 deletions(-)

diff --git a/config/es_indices_ccdi_model.yml b/config/es_indices_ccdi_model.yml
index 5551d89..dce5eec 100644
--- a/config/es_indices_ccdi_model.yml
+++ b/config/es_indices_ccdi_model.yml
@@ -2172,8 +2172,18 @@ Indices:
                 type: keyword   
               diagnosis:
                 type: keyword
-          last_known_survival_status:
-            type: keyword
+          survival_filters:
+            type: nested
+            properties:
+              last_known_survival_status:
+                type: keyword
+              age_at_event_free_survival_status:
+                type: integer
+              event_free_survival_status:
+                type: keyword
+              first_event:
+                type: keyword
+
       participant_filters:
         type: nested
         properties:
@@ -2338,6 +2348,18 @@ Indices:
           OPTIONAL MATCH (p)<-[:of_survival]-(su:survival)
           OPTIONAL MATCH (st)<-[:of_study_personnel]-(stp:study_personnel)
           OPTIONAL MATCH (st)<-[:of_study_funding]-(stf:study_funding)
+          with file, sample_diagnosis_filter,COLLECT(DISTINCT CASE 
+                WHEN su.last_known_survival_status = 'Dead' THEN 'Dead'
+                ELSE su.last_known_survival_status
+            END) AS last_known_survival_status,
+            COLLECT(DISTINCT su.event_free_survival_status) as event_free_survival_status,
+            COLLECT(DISTINCT su.first_event) as first_event,
+            COLLECT(DISTINCT su.age_at_event_free_survival_status) as age_at_event_free_survival_status
+          with file, sample_diagnosis_filter,COLLECT(DISTINCT {last_known_survival_status: last_known_survival_status, 
+              event_free_survival_status: event_free_survival_status, 
+              first_event: first_event,
+              age_at_event_free_survival_status: age_at_event_free_survival_status} ) AS survival_filters
+
           RETURN DISTINCT
             file.id as id,
             p.id as pid,
@@ -2367,8 +2389,7 @@ Indices:
               sex_at_birth: p.sex_at_birth
             }) AS participant_filters,
             sample_diagnosis_filter AS sample_diagnosis_filters,
-            case when 'Dead' in COLLECT(DISTINCT su.last_known_survival_status) then ['Dead']
-                  else COLLECT(DISTINCT su.last_known_survival_status) end as last_known_survival_status,     
+            survival_filters as survival_filters,  
             null AS library_selection,
             null AS library_source_material,
             null AS library_source_molecule,
@@ -2461,15 +2482,23 @@ Indices:
                                         tumor_grade_source: dg.tumor_grade_source,
                                         tumor_stage_source: dg.tumor_stage_source,
                                         diagnosis: dg.diagnosis
-                                    })) AS sample_diagnosis_filter_6, COLLECT(DISTINCT su.last_known_survival_status) as vital_status
-          with file, p, apoc.coll.union(sample_diagnosis_filter_5, sample_diagnosis_filter_6) as sample_diagnosis_filter, vital_status
+                                    })) AS sample_diagnosis_filter_6, COLLECT(DISTINCT CASE 
+                WHEN su.last_known_survival_status = 'Dead' THEN 'Dead'
+                ELSE su.last_known_survival_status
+            END) AS last_known_survival_status,
+            COLLECT(DISTINCT su.event_free_survival_status) as event_free_survival_status,
+            COLLECT(DISTINCT su.first_event) as first_event,
+            COLLECT(DISTINCT su.age_at_event_free_survival_status) as age_at_event_free_survival_status
+          with file, p, apoc.coll.union(sample_diagnosis_filter_5, sample_diagnosis_filter_6) as sample_diagnosis_filter, COLLECT(DISTINCT {last_known_survival_status: last_known_survival_status, 
+              event_free_survival_status: event_free_survival_status, 
+              first_event: first_event,
+              age_at_event_free_survival_status: age_at_event_free_survival_status} ) AS survival_filters
           with file, collect(DISTINCT {
                 participant_id: p.participant_id,
                 race: apoc.text.split(p.race, ';'),
                 sex_at_birth: p.sex_at_birth,
                 sample_diagnosis_filters: sample_diagnosis_filter,
-                last_known_survival_status: case when 'Dead' in vital_status then ['Dead']
-                  else vital_status end
+                survival_filters: survival_filters
             }) as combined_filter_1
           MATCH (st:study)<-[:of_clinical_measure_file]-(file)
           OPTIONAL MATCH (st)<--(cl)<--(sm:sample)
@@ -2494,7 +2523,7 @@ Indices:
                 race: null,
                 sex_at_birth: null,
                 sample_diagnosis_filters: sample_diagnosis_filter,
-                last_known_survival_status: null
+                survival_filters: survival_filters
           }) as combined_filter_2
           with file, apoc.coll.union(combined_filter_1, combined_filter_2) as combined_filter
           MATCH (st:study)<-[:of_clinical_measure_file]-(file)
@@ -2523,7 +2552,7 @@ Indices:
             combined_filter as combined_filters,
             null as participant_filters,
             null as sample_diagnosis_filters,
-            null as last_known_survival_status,
+            null as survival_filters,
             null AS library_selection,
             null AS library_source_material,
             null AS library_source_molecule,
@@ -2632,7 +2661,17 @@ Indices:
           OPTIONAL MATCH (p)<-[:of_survival]-(su:survival)
           OPTIONAL MATCH (st)<-[:of_study_personnel]-(stp:study_personnel)
           OPTIONAL MATCH (st)<-[:of_study_funding]-(stf:study_funding)
-          with file, p, sample_diagnosis_filter, sm1, sm, st, COLLECT(DISTINCT su.last_known_survival_status) as vital_status, stf, stp
+          with file, p, sample_diagnosis_filter, sm1, sm, st, COLLECT(DISTINCT CASE 
+                WHEN su.last_known_survival_status = 'Dead' THEN 'Dead'
+                ELSE su.last_known_survival_status
+            END) AS last_known_survival_status,
+            COLLECT(DISTINCT su.event_free_survival_status) as event_free_survival_status,
+            COLLECT(DISTINCT su.first_event) as first_event,
+            COLLECT(DISTINCT su.age_at_event_free_survival_status) as age_at_event_free_survival_status, stf, stp
+          with file, p, sample_diagnosis_filter, sm1, sm, st, COLLECT(DISTINCT {last_known_survival_status: last_known_survival_status, 
+              event_free_survival_status: event_free_survival_status, 
+              first_event: first_event,
+              age_at_event_free_survival_status: age_at_event_free_survival_status} ) AS survival_filters, stf, stp
           RETURN DISTINCT
             file.id as id,
             p.id as pid,
@@ -2666,8 +2705,7 @@ Indices:
                 sex_at_birth: p.sex_at_birth
             }) AS participant_filters,
             sample_diagnosis_filter AS sample_diagnosis_filters,
-            case when 'Dead' in vital_status then ['Dead']
-                  else vital_status end as last_known_survival_status,
+            survival_filters as survival_filters,
             CASE LABELS(file)[0] WHEN 'sequencing_file' THEN file.library_selection
                                         ELSE null END AS library_selection,
             CASE LABELS(file)[0] WHEN 'sequencing_file' THEN file.library_source_material
@@ -2730,7 +2768,7 @@ Indices:
                 tumor_stage_source: dg.tumor_stage_source,
                 diagnosis: dg.diagnosis
             }) AS sample_diagnosis_filters, 
-            null as last_known_survival_status,
+            null as survival_filters,
             CASE LABELS(file)[0]
                       WHEN 'sequencing_file' THEN file.library_selection
                       ELSE null END AS library_selection,
@@ -2838,7 +2876,17 @@ Indices:
           OPTIONAL MATCH (p)<-[:of_survival]-(su:survival)
           OPTIONAL MATCH (st)<-[:of_study_personnel]-(stp:study_personnel)
           OPTIONAL MATCH (st)<-[:of_study_funding]-(stf:study_funding)
-          with distinct p, sm, st, sample_diagnosis_filter, COLLECT(DISTINCT su.last_known_survival_status) as vital_status, stf, stp
+          with distinct p, sm, st, sample_diagnosis_filter, COLLECT(DISTINCT CASE 
+                WHEN su.last_known_survival_status = 'Dead' THEN 'Dead'
+                ELSE su.last_known_survival_status
+            END) AS last_known_survival_status,
+            COLLECT(DISTINCT su.event_free_survival_status) as event_free_survival_status,
+            COLLECT(DISTINCT su.first_event) as first_event,
+            COLLECT(DISTINCT su.age_at_event_free_survival_status) as age_at_event_free_survival_status, stf, stp
+          with distinct p, sm, st, sample_diagnosis_filter, COLLECT(DISTINCT {last_known_survival_status: last_known_survival_status, 
+              event_free_survival_status: event_free_survival_status, 
+              first_event: first_event,
+              age_at_event_free_survival_status: age_at_event_free_survival_status} ) AS survival_filters, stf, stp
           RETURN DISTINCT
             null as id,
             p.id as pid,
@@ -2862,9 +2910,7 @@ Indices:
                 race: apoc.text.split(p.race, ';'),
                 sex_at_birth: p.sex_at_birth
             }) AS participant_filters,
-            case when 'Dead' in vital_status then ['Dead']
-                  else vital_status end as last_known_survival_status,         
-            sample_diagnosis_filter AS sample_diagnosis_filters,
+            survival_filters as survival_filters,
             null AS library_selection,
             null AS library_source_material,
             null AS library_source_molecule,
@@ -2876,7 +2922,17 @@ Indices:
           OPTIONAL MATCH (p)<-[:of_survival]-(su:survival)
           OPTIONAL MATCH (st)<-[:of_study_personnel]-(stp:study_personnel)
           OPTIONAL MATCH (st)<-[:of_study_funding]-(stf:study_funding)
-          with sm, p, st, dg, COLLECT(DISTINCT su.last_known_survival_status) as vital_status, stf, stp
+          with sm, p, st, dg, COLLECT(DISTINCT CASE 
+                WHEN su.last_known_survival_status = 'Dead' THEN 'Dead'
+                ELSE su.last_known_survival_status
+            END) AS last_known_survival_status,
+            COLLECT(DISTINCT su.event_free_survival_status) as event_free_survival_status,
+            COLLECT(DISTINCT su.first_event) as first_event,
+            COLLECT(DISTINCT su.age_at_event_free_survival_status) as age_at_event_free_survival_status, stf, stp
+          with sm, p, st, dg, COLLECT(DISTINCT {last_known_survival_status: last_known_survival_status, 
+              event_free_survival_status: event_free_survival_status, 
+              first_event: first_event,
+              age_at_event_free_survival_status: age_at_event_free_survival_status} ) AS survival_filters, stf, stp
           RETURN DISTINCT
             null as id,
             p.id as pid,
@@ -2914,8 +2970,7 @@ Indices:
                 tumor_stage_source: dg.tumor_stage_source,
                 diagnosis: dg.diagnosis
             }) AS sample_diagnosis_filters,
-            case when 'Dead' in vital_status then ['Dead']
-                  else vital_status end as last_known_survival_status,
+            survival_filters as survival_filters,
             null AS library_selection,
             null AS library_source_material,
             null AS library_source_molecule,
@@ -2947,7 +3002,7 @@ Indices:
             null as files,
             null as combined_filters,
             null AS participant_filters,
-            null as last_known_survival_status,
+            null as survival_filters,
             COLLECT(DISTINCT {
                 sample_anatomic_site: apoc.text.split(sm.anatomic_site, ';'),
                 participant_age_at_collection: sm.participant_age_at_collection,
@@ -2973,7 +3028,17 @@ Indices:
           OPTIONAL MATCH (p)<-[:of_survival]-(su:survival)
           OPTIONAL MATCH (st)<-[:of_study_personnel]-(stp:study_personnel)
           OPTIONAL MATCH (st)<-[:of_study_funding]-(stf:study_funding)
-          with p, st, dg, COLLECT(DISTINCT su.last_known_survival_status) as vital_status, stf, stp
+          with p, st, dg, COLLECT(DISTINCT CASE 
+                WHEN su.last_known_survival_status = 'Dead' THEN 'Dead'
+                ELSE su.last_known_survival_status
+            END) AS last_known_survival_status,
+            COLLECT(DISTINCT su.event_free_survival_status) as event_free_survival_status,
+            COLLECT(DISTINCT su.first_event) as first_event,
+            COLLECT(DISTINCT su.age_at_event_free_survival_status) as age_at_event_free_survival_status, stf, stp
+          with p, st, dg, COLLECT(DISTINCT {last_known_survival_status: last_known_survival_status, 
+              event_free_survival_status: event_free_survival_status, 
+              first_event: first_event,
+              age_at_event_free_survival_status: age_at_event_free_survival_status} ) AS survival_filters, stf, stp
           RETURN DISTINCT
             null as id,
             p.id as pid,
@@ -3011,8 +3076,7 @@ Indices:
                 tumor_stage_source: dg.tumor_stage_source,
                 diagnosis_classification: dg.diagnosis_classification
             }) AS sample_diagnosis_filters, 
-            case when 'Dead' in vital_status then ['Dead']
-                  else vital_status end as last_known_survival_status,
+            survival_filters as survival_filters,
             null AS library_selection,
             null AS library_source_material,
             null AS library_source_molecule,

From c22da39ee7c99fd21da1b2b43bf9a85396c75659 Mon Sep 17 00:00:00 2001
From: shawnwangnih <108429233+shawnwangnih@users.noreply.github.com>
Date: Wed, 16 Oct 2024 10:56:45 -0400
Subject: [PATCH 10/23]  add treatments indices

---
 config/es_indices_ccdi_model.yml | 604 ++++++++++++++++++++++++++++---
 1 file changed, 556 insertions(+), 48 deletions(-)

diff --git a/config/es_indices_ccdi_model.yml b/config/es_indices_ccdi_model.yml
index dce5eec..2db6f7a 100644
--- a/config/es_indices_ccdi_model.yml
+++ b/config/es_indices_ccdi_model.yml
@@ -24,6 +24,22 @@ Indices:
             type: keyword
           first_event:
             type: keyword
+      treatment_filters:
+        type: nested
+        properties:
+          treatment_type:
+            type: keyword
+          treatment_agent:
+            type: keyword
+          age_at_treatment_start:
+            type: integer
+      treatment_response_filters:
+        type: nested
+        properties:
+          response_category:
+            type: keyword
+          age_at_response:
+            type: integer
       sample_diagnosis_file_filters:
         type: nested
         properties:
@@ -332,21 +348,21 @@ Indices:
                 }) AS sample_diagnosis_filters_4
             with p, apoc.coll.union(sample_diagnosis_file_filters, sample_diagnosis_filters_4) as sample_diagnosis_file_filter
             OPTIONAL MATCH (p)<-[:of_survival]-(su:survival)
+            OPTIONAL MATCH (p)<-[:of_treatment]-(tm:treatment)
+            OPTIONAL MATCH (p)<-[:of_treatment_response]-(tr:treatment_response)
             OPTIONAL MATCH (st:study)<-[:of_participant]-(p)
             OPTIONAL MATCH (st)<-[:of_study_personnel]-(stp:study_personnel)
             OPTIONAL MATCH (st)<-[:of_study_funding]-(stf:study_funding)
             WITH p, sample_diagnosis_file_filter,  st, stf, stp,
-            COLLECT(DISTINCT CASE 
-                WHEN su.last_known_survival_status = 'Dead' THEN 'Dead'
-                ELSE su.last_known_survival_status
-            END) AS last_known_survival_status,
-            COLLECT(DISTINCT su.event_free_survival_status) as event_free_survival_status,
-            COLLECT(DISTINCT su.first_event) as first_event,
-            COLLECT(DISTINCT su.age_at_event_free_survival_status) as age_at_event_free_survival_status
-            WITH p, sample_diagnosis_file_filter, st, stf, stp,  COLLECT(DISTINCT {last_known_survival_status: last_known_survival_status, 
-              event_free_survival_status: event_free_survival_status, 
-              first_event: first_event,
-              age_at_event_free_survival_status: age_at_event_free_survival_status} ) AS survival_filters
+            COLLECT(DISTINCT {last_known_survival_status: su.last_known_survival_status, 
+              event_free_survival_status: su.event_free_survival_status, 
+              first_event: su.first_event,
+              age_at_event_free_survival_status: su.age_at_event_free_survival_status} ) AS survival_filters,
+            COLLECT(DISTINCT{treatment_type: tm.treatment_type,
+            treatment_agent: tm.treatment_agent,
+            age_at_treatment_start: tm.age_at_treatment_start}) as treatment_filters,
+            COLLECT(DISTINCT{response_category: tr.response_category,
+            age_at_response: tr.age_at_response}) as treatment_response_filters      
             RETURN DISTINCT
               p.id as id,
               p.id as pid,
@@ -354,6 +370,8 @@ Indices:
               apoc.text.split(p.race, ';') as race,
               p.sex_at_birth as sex_at_birth,
               survival_filters as survival_filters,
+              treatment_filters as treatment_filters,
+              treatment_response_filters as treatment_response_filters,
               sample_diagnosis_file_filter AS sample_diagnosis_file_filters,
               st.study_id as study_id,
               st.dbgap_accession as dbgap_accession,
@@ -397,6 +415,11 @@ Indices:
                 event_free_survival_status: null,
                 first_event: null,
             }) AS survival_filters,
+            COLLECT(DISTINCT{treatment_type: null,
+            treatment_agent: null,
+            age_at_treatment_start: null}) as treatment_filters,
+            COLLECT(DISTINCT{response_category: null,
+            age_at_response: null}) as treatment_response_filters,            
             st.study_id as study_id,
             st.dbgap_accession as dbgap_accession,
             st.study_acronym as study_acronym,
@@ -423,6 +446,11 @@ Indices:
                 null as event_free_survival_status,
                 null as first_event,
             }) AS survival_filters,
+            COLLECT(DISTINCT{null as treatment_type,
+            tnull as treatment_agent,
+            null as age_at_treatment_start}) as treatment_filters,
+            COLLECT(DISTINCT{null as response_category,
+            null as age_at_response}) as treatment_response_filters  
             COLLECT(DISTINCT {
                 sample_anatomic_site: apoc.text.split(sm.anatomic_site, ';'),
                 participant_age_at_collection: sm.participant_age_at_collection,
@@ -491,6 +519,22 @@ Indices:
             type: keyword
           first_event:
             type: keyword
+      treatment_filters:
+        type: nested
+        properties:
+          treatment_type:
+            type: keyword
+          treatment_agent:
+            type: keyword
+          age_at_treatment_start:
+            type: integer
+      treatment_response_filters:
+        type: nested
+        properties:
+          response_category:
+            type: keyword
+          age_at_response:
+            type: integer
       sample_diagnosis_file_filters:
         type: nested
         properties:
@@ -807,21 +851,22 @@ Indices:
             OPTIONAL MATCH (p)<-[*..4]-(file)
             WHERE (file:clinical_measure_file OR file: sequencing_file OR file:pathology_file OR file:radiology_file OR file:methylation_array_file OR file:cytogenomic_file)
             OPTIONAL MATCH (p)<-[:of_survival]-(su:survival)
+            OPTIONAL MATCH (p)<-[:of_treatment]-(tm:treatment)
+            OPTIONAL MATCH (p)<-[:of_treatment_response]-(tr:treatment_response)
             OPTIONAL MATCH (p)<-[:of_synonym]-(sy:synonym)
             OPTIONAL MATCH (st:study)<-[:of_participant]-(p)
             OPTIONAL MATCH (st)<-[:of_study_personnel]-(stp:study_personnel)
             OPTIONAL MATCH (st)<-[:of_study_funding]-(stf:study_funding)
-            WITH p, sy, sample_diagnosis_file_filter, COLLECT(DISTINCT CASE 
-                WHEN su.last_known_survival_status = 'Dead' THEN 'Dead'
-                ELSE su.last_known_survival_status
-            END) AS last_known_survival_status,
-            COLLECT(DISTINCT su.event_free_survival_status) as event_free_survival_status,
-            COLLECT(DISTINCT su.first_event) as first_event,
-            COLLECT(DISTINCT su.age_at_event_free_survival_status) as age_at_event_free_survival_status, file, st, stf, stp
-                        WITH p, sy, sample_diagnosis_file_filter, COLLECT(DISTINCT {last_known_survival_status: last_known_survival_status, 
-              event_free_survival_status: event_free_survival_status, 
-              first_event: first_event,
-              age_at_event_free_survival_status: age_at_event_free_survival_status} ) AS survival_filters, file, st, stf, stp
+            WITH p, sy, sample_diagnosis_file_filter, 
+            COLLECT(DISTINCT {last_known_survival_status: su.last_known_survival_status, 
+              event_free_survival_status: su.event_free_survival_status, 
+              first_event: su.first_event,
+              age_at_event_free_survival_status: su.age_at_event_free_survival_status} ) AS survival_filters,
+            COLLECT(DISTINCT{treatment_type: tm.treatment_type,
+            treatment_agent: tm.treatment_agent,
+            age_at_treatment_start: tm.age_at_treatment_start}) as treatment_filters,
+            COLLECT(DISTINCT{response_category: tr.response_category,
+            age_at_response: tr.age_at_response}) as treatment_response_filters,file, st, stf, stp
             RETURN DISTINCT
               p.id as id,
               p.participant_id as participant_id,
@@ -830,6 +875,8 @@ Indices:
               p.sex_at_birth as sex_at_birth,
               apoc.text.join(Collect(distinct sy.synonym_id), ',') as alternate_participant_id,
               survival_filters as survival_filters,
+              treatment_filters as treatment_filters,
+              treatment_response_filters as treatment_response_filters,
               sample_diagnosis_file_filter AS sample_diagnosis_file_filters,
               st.study_id as study_id,
               st.dbgap_accession as dbgap_accession,
@@ -848,11 +895,392 @@ Indices:
         normalizer: lowercase
       race:
         type: keyword
-      race_str:
-        type: keyword
       sex_at_birth:
         type: keyword
-      alternate_participant_id:
+      last_known_survival_status:
+        type: keyword
+      age_at_event_free_survival_status:
+        type: integer
+      event_free_survival_status:
+        type: keyword
+      first_event:
+        type: keyword
+      treatment_filters:
+        type: nested
+        properties:
+          treatment_type:
+            type: keyword
+          treatment_agent:
+            type: keyword
+          age_at_treatment_start:
+            type: integer
+      treatment_response_filters:
+        type: nested
+        properties:
+          response_category:
+            type: keyword
+          age_at_response:
+            type: integer
+      sample_diagnosis_file_filters:
+        type: nested
+        properties:
+          sample_anatomic_site:
+            type: keyword
+          participant_age_at_collection:
+            type: integer
+          sample_tumor_status:
+            type: keyword
+          tumor_classification:
+            type: keyword
+          age_at_diagnosis:
+            type: integer
+          diagnosis_anatomic_site:
+            type: keyword
+          disease_phase:
+            type: keyword
+          diagnosis_classification_system:
+            type: keyword
+          diagnosis_basis:
+            type: keyword
+          tumor_grade_source:
+            type: keyword  
+          tumor_stage_source:
+            type: keyword              
+          diagnosis:
+            type: keyword
+          assay_method:
+            type: keyword
+          file_type:
+            type: keyword
+          library_selection:
+            type: keyword
+          library_source_material:
+            type: keyword
+          library_source_molecule:
+            type: keyword
+          library_strategy:
+            type: keyword
+      study_id:
+        type: keyword
+      dbgap_accession:
+        type: keyword
+      study_acronym:
+        type: keyword
+      study_name:
+        type: keyword
+      file_count:
+        type: integer
+      files:
+        type: text
+        fields:
+          keyword:
+            type: keyword
+    # Cypher query will be used to retrieve data from Neo4j, and index into Elasticsearch
+    cypher_queries:
+      - query: |
+          MATCH (p:participant)
+          optional match (p)<--(sm:sample)
+          optional match (p)<--(file)
+          where (file: clinical_measure_file or file: radiology_file)
+          with distinct p, sm, file
+          with p, collect(DISTINCT {
+                      sample_anatomic_site: apoc.text.split(sm.anatomic_site, ';'),
+                      participant_age_at_collection: sm.participant_age_at_collection,
+                      sample_tumor_status: sm.sample_tumor_status,
+                      tumor_classification: sm.tumor_classification,
+                      assay_method: CASE labels(file)[0] WHEN 'clinical_measure_file' THEN 'Clinical data'
+                                        WHEN 'radiology_file' THEN 'Radiology imaging'
+                                        ELSE null END,
+                      file_type: file.file_type,
+                      library_source_material: null,
+                      library_source_molecule: null,
+                      library_strategy: null
+              }) as sample_clinical_radiology_file_filter
+          optional match (p)<--(sm:sample)<--(file)
+          where (file: sequencing_file or file: methylation_array_file or file: pathology_file or file: cytogenomic_file)
+          with p, sample_clinical_radiology_file_filter, collect(DISTINCT {
+                      sample_anatomic_site: apoc.text.split(sm.anatomic_site, ';'),
+                      participant_age_at_collection: sm.participant_age_at_collection,
+                      sample_tumor_status: sm.sample_tumor_status,
+                      tumor_classification: sm.tumor_classification,
+                      assay_method: CASE LABELS(file)[0]
+                                              WHEN 'sequencing_file' THEN 'Sequencing'
+                                              WHEN 'cytogenomic_file' THEN 'Cytogenomic'
+                                              WHEN 'pathology_file' THEN 'Pathology imaging'
+                                              WHEN 'methylation_array_file' THEN 'Methylation array'
+                                              ELSE null END,
+                      file_type: file.file_type,
+                      library_selection: CASE LABELS(file)[0]
+                                    WHEN 'sequencing_file' THEN file.library_selection
+                                    ELSE null END,
+                      library_source_material: CASE LABELS(file)[0]
+                                    WHEN 'sequencing_file' THEN file.library_source_material
+                                    ELSE null END,
+                      library_source_molecule: CASE LABELS(file)[0]
+                                    WHEN 'sequencing_file' THEN file.library_source_molecule
+                                    ELSE null END,
+                      library_strategy: CASE LABELS(file)[0]
+                                    WHEN 'sequencing_file' THEN file.library_strategy
+                                    ELSE null END
+              }) as sample_sequencing_cytogenomic_pathology_methylation_file_filter
+            with p, apoc.coll.union(sample_clinical_radiology_file_filter, sample_sequencing_cytogenomic_pathology_methylation_file_filter) as sample_file_filters
+            optional match (p)<--(dg:diagnosis)
+            with p, sample_file_filters, dg
+            unwind sample_file_filters as sample_file_filter
+            with p, collect(apoc.map.merge(sample_file_filter, {
+                age_at_diagnosis: dg.age_at_diagnosis,
+                diagnosis_anatomic_site: apoc.text.split(dg.anatomic_site, ';'),
+                disease_phase: dg.disease_phase,
+                diagnosis_classification_system: dg.diagnosis_classification_system,
+                diagnosis_basis: dg.diagnosis_basis, 
+                tumor_grade_source: dg.tumor_grade_source,
+                tumor_stage_source: dg.tumor_stage_source,          
+                diagnosis: dg.diagnosis
+              })) as sample_diagnosis_file_filter
+            optional match (p)<--(sm:sample)<--(dg:diagnosis)
+            optional match (sm)<--(file)
+            where (file: sequencing_file or file: methylation_array_file or file: pathology_file or file: cytogenomic_file)
+            with p, sample_diagnosis_file_filter, COLLECT(DISTINCT {
+                    sample_anatomic_site: apoc.text.split(sm.anatomic_site, ';'),
+                    participant_age_at_collection: sm.participant_age_at_collection,
+                    sample_tumor_status: sm.sample_tumor_status,
+                    tumor_classification: sm.tumor_classification,
+                    age_at_diagnosis: dg.age_at_diagnosis,
+                    diagnosis_anatomic_site: apoc.text.split(dg.anatomic_site, ';'),
+                    disease_phase: dg.disease_phase,
+                    diagnosis_classification_system: dg.diagnosis_classification_system,
+                    diagnosis_basis: dg.diagnosis_basis, 
+                    tumor_grade_source: dg.tumor_grade_source,
+                    tumor_stage_source: dg.tumor_stage_source,          
+                    diagnosis: dg.diagnosis,
+                    assay_method: CASE LABELS(file)[0]
+                                    WHEN 'sequencing_file' THEN 'Sequencing'
+                                    WHEN 'cytogenomic_file' THEN 'Cytogenomic'
+                                    WHEN 'pathology_file' THEN 'Pathology imaging'
+                                    WHEN 'methylation_array_file' THEN 'Methylation array'
+                                    ELSE null END,
+                    file_type: file.file_type,
+                    library_selection: CASE LABELS(file)[0]
+                              WHEN 'sequencing_file' THEN file.library_selection
+                              ELSE null END,
+                    library_source_material: CASE LABELS(file)[0]
+                                    WHEN 'sequencing_file' THEN file.library_source_material
+                                    ELSE null END,
+                    library_source_molecule: CASE LABELS(file)[0]
+                                    WHEN 'sequencing_file' THEN file.library_source_molecule
+                                    ELSE null END,
+                    library_strategy: CASE LABELS(file)[0]
+                              WHEN 'sequencing_file' THEN file.library_strategy
+                              ELSE null END
+                }) AS sample_diagnosis_filters_1
+            with p, apoc.coll.union(sample_diagnosis_file_filter, sample_diagnosis_filters_1) as sample_diagnosis_file_filters
+            optional match (p)<--(sm:sample)<--(dg:diagnosis)
+            optional match (p)<--(file)
+            where (file: clinical_measure_file or file: radiology_file)
+            with p, sample_diagnosis_file_filters, COLLECT(DISTINCT {
+                    sample_anatomic_site: apoc.text.split(sm.anatomic_site, ';'),
+                    participant_age_at_collection: sm.participant_age_at_collection,
+                    sample_tumor_status: sm.sample_tumor_status,
+                    tumor_classification: sm.tumor_classification,
+                    age_at_diagnosis: dg.age_at_diagnosis,
+                    diagnosis_anatomic_site: apoc.text.split(dg.anatomic_site, ';'),
+                    disease_phase: dg.disease_phase,
+                    diagnosis_classification_system: dg.diagnosis_classification_system,
+                    diagnosis_basis: dg.diagnosis_basis, 
+                    tumor_grade_source: dg.tumor_grade_source,
+                    tumor_stage_source: dg.tumor_stage_source,          
+                    diagnosis: dg.diagnosis,
+                    assay_method: CASE labels(file)[0] WHEN 'clinical_measure_file' THEN 'Clinical data'
+                                        WHEN 'radiology_file' THEN 'Radiology imaging'
+                                        ELSE null END,
+                    file_type: file.file_type,
+                    library_selection: null,
+                    library_source_material: null,
+                    library_source_molecule: null,
+                    library_strategy: null
+                }) AS sample_diagnosis_filters_2
+            with p, apoc.coll.union(sample_diagnosis_file_filters, sample_diagnosis_filters_2) as sample_diagnosis_file_filter
+          optional MATCH (p)<-[:of_sample]-(sm1:sample)<--(cl)<--(sm2:sample)
+          WHERE (cl: cell_line or cl: pdx)
+          optional Match (sm2)<--(file)
+          WHERE (file: sequencing_file OR file:pathology_file OR file:methylation_array_file OR file:cytogenomic_file)
+          optional Match (sm1)<--(dg:diagnosis)
+          with p, sample_diagnosis_file_filter, COLLECT(DISTINCT {
+                    sample_anatomic_site: apoc.text.split(sm1.anatomic_site, ';'),
+                    participant_age_at_collection: sm1.participant_age_at_collection,
+                    sample_tumor_status: sm1.sample_tumor_status,
+                    tumor_classification: sm1.tumor_classification,
+                    age_at_diagnosis: dg.age_at_diagnosis,
+                    diagnosis_anatomic_site: apoc.text.split(dg.anatomic_site, ';'),
+                    disease_phase: dg.disease_phase,
+                    diagnosis_classification_system: dg.diagnosis_classification_system,
+                    diagnosis_basis: dg.diagnosis_basis, 
+                    tumor_grade_source: dg.tumor_grade_source,
+                    tumor_stage_source: dg.tumor_stage_source,          
+                    diagnosis: dg.diagnosis,
+                    assay_method: CASE LABELS(file)[0]
+                                    WHEN 'sequencing_file' THEN 'Sequencing'
+                                    WHEN 'cytogenomic_file' THEN 'Cytogenomic'
+                                    WHEN 'pathology_file' THEN 'Pathology imaging'
+                                    WHEN 'methylation_array_file' THEN 'Methylation array'
+                                    ELSE null END,
+                    file_type: file.file_type,
+                    library_selection: CASE LABELS(file)[0]
+                              WHEN 'sequencing_file' THEN file.library_selection
+                              ELSE null END,
+                    library_source_material: CASE LABELS(file)[0]
+                                    WHEN 'sequencing_file' THEN file.library_source_material
+                                    ELSE null END,
+                    library_source_molecule: CASE LABELS(file)[0]
+                                    WHEN 'sequencing_file' THEN file.library_source_molecule
+                                    ELSE null END,
+                    library_strategy: CASE LABELS(file)[0]
+                              WHEN 'sequencing_file' THEN file.library_strategy
+                              ELSE null END
+                }) AS sample_diagnosis_filters_1
+            with p, apoc.coll.union(sample_diagnosis_file_filter, sample_diagnosis_filters_1) as sample_diagnosis_file_filters
+          optional MATCH (p)<-[:of_sample]-(sm1:sample)<--(cl)<--(sm2:sample)
+          WHERE (cl: cell_line or cl: pdx)
+          optional Match (sm2)<--(file)
+          WHERE (file: sequencing_file OR file:pathology_file OR file:methylation_array_file OR file:cytogenomic_file)
+          optional Match (sm2)<--(dg:diagnosis)
+          with p, sample_diagnosis_file_filters, COLLECT(DISTINCT {
+                    sample_anatomic_site: apoc.text.split(sm2.anatomic_site, ';'),
+                    participant_age_at_collection: sm2.participant_age_at_collection,
+                    sample_tumor_status: sm2.sample_tumor_status,
+                    tumor_classification: sm2.tumor_classification,
+                    age_at_diagnosis: dg.age_at_diagnosis,
+                    diagnosis_anatomic_site: apoc.text.split(dg.anatomic_site, ';'),
+                    disease_phase: dg.disease_phase,
+                    diagnosis_classification_system: dg.diagnosis_classification_system,
+                    diagnosis_basis: dg.diagnosis_basis, 
+                    tumor_grade_source: dg.tumor_grade_source,
+                    tumor_stage_source: dg.tumor_stage_source,          
+                    diagnosis: dg.diagnosis,
+                    assay_method: CASE LABELS(file)[0]
+                                    WHEN 'sequencing_file' THEN 'Sequencing'
+                                    WHEN 'cytogenomic_file' THEN 'Cytogenomic'
+                                    WHEN 'pathology_file' THEN 'Pathology imaging'
+                                    WHEN 'methylation_array_file' THEN 'Methylation array'
+                                    ELSE null END,
+                    file_type: file.file_type,
+                    library_selection: CASE LABELS(file)[0]
+                              WHEN 'sequencing_file' THEN file.library_selection
+                              ELSE null END,
+                    library_source_material: CASE LABELS(file)[0]
+                                    WHEN 'sequencing_file' THEN file.library_source_material
+                                    ELSE null END,
+                    library_source_molecule: CASE LABELS(file)[0]
+                                    WHEN 'sequencing_file' THEN file.library_source_molecule
+                                    ELSE null END,
+                    library_strategy: CASE LABELS(file)[0]
+                              WHEN 'sequencing_file' THEN file.library_strategy
+                              ELSE null END
+                }) AS sample_diagnosis_filters_2
+            with p, apoc.coll.union(sample_diagnosis_file_filters, sample_diagnosis_filters_2) as sample_diagnosis_file_filter
+          optional MATCH (p)<-[:of_sample]-(sm1:sample)<--(cl)<--(sm2:sample)
+          WHERE (cl: cell_line or cl: pdx)
+          optional Match (sm1)<--(dg:diagnosis)
+          optional match (p)<--(file)
+          where (file: clinical_measure_file or file: radiology_file)
+          with p, sample_diagnosis_file_filter, COLLECT(DISTINCT {
+                    sample_anatomic_site: apoc.text.split(sm1.anatomic_site, ';'),
+                    participant_age_at_collection: sm1.participant_age_at_collection,
+                    sample_tumor_status: sm1.sample_tumor_status,
+                    tumor_classification: sm1.tumor_classification,
+                    age_at_diagnosis: dg.age_at_diagnosis,
+                    diagnosis_anatomic_site: apoc.text.split(dg.anatomic_site, ';'),
+                    disease_phase: dg.disease_phase,
+                    diagnosis_classification_system: dg.diagnosis_classification_system,
+                    diagnosis_basis: dg.diagnosis_basis, 
+                    tumor_grade_source: dg.tumor_grade_source,
+                    tumor_stage_source: dg.tumor_stage_source,          
+                    diagnosis: dg.diagnosis,
+                    assay_method: CASE labels(file)[0] WHEN 'clinical_measure_file' THEN 'Clinical data'
+                                        WHEN 'radiology_file' THEN 'Radiology imaging'
+                                        ELSE null END,
+                    file_type: file.file_type,
+                    library_selection: null,
+                    library_source_material: null,
+                    library_source_molecule: null,
+                    library_strategy: null
+                }) AS sample_diagnosis_filters_3
+            with p, apoc.coll.union(sample_diagnosis_file_filter, sample_diagnosis_filters_3) as sample_diagnosis_file_filters
+          optional MATCH (p)<-[:of_sample]-(sm1:sample)<--(cl)<--(sm2:sample)
+          WHERE (cl: cell_line or cl: pdx)
+          optional Match (sm2)<--(dg:diagnosis)
+          optional match (p)<--(file)
+          where (file: clinical_measure_file or file: radiology_file)
+          with p, sample_diagnosis_file_filters, COLLECT(DISTINCT {
+                    sample_anatomic_site: apoc.text.split(sm2.anatomic_site, ';'),
+                    participant_age_at_collection: sm2.participant_age_at_collection,
+                    sample_tumor_status: sm2.sample_tumor_status,
+                    tumor_classification: sm2.tumor_classification,
+                    age_at_diagnosis: dg.age_at_diagnosis,
+                    diagnosis_anatomic_site: apoc.text.split(dg.anatomic_site, ';'),
+                    disease_phase: dg.disease_phase,
+                    diagnosis_classification_system: dg.diagnosis_classification_system,
+                    diagnosis_basis: dg.diagnosis_basis, 
+                    tumor_grade_source: dg.tumor_grade_source,
+                    tumor_stage_source: dg.tumor_stage_source,          
+                    diagnosis: dg.diagnosis,
+                    assay_method: CASE labels(file)[0] WHEN 'clinical_measure_file' THEN 'Clinical data'
+                                        WHEN 'radiology_file' THEN 'Radiology imaging'
+                                        ELSE null END,
+                    file_type: file.file_type,
+                    library_selection: null,
+                    library_source_material: null,
+                    library_source_molecule: null,
+                    library_strategy: null
+                }) AS sample_diagnosis_filters_4
+            with p, apoc.coll.union(sample_diagnosis_file_filters, sample_diagnosis_filters_4) as sample_diagnosis_file_filter
+            OPTIONAL MATCH (p)<-[*..4]-(file)
+            WHERE (file:clinical_measure_file OR file: sequencing_file OR file:pathology_file OR file:radiology_file OR file:methylation_array_file OR file:cytogenomic_file)
+            OPTIONAL MATCH (p)<-[:of_survival]-(su:survival)
+            OPTIONAL MATCH (p)<-[:of_treatment]-(tm:treatment)
+            OPTIONAL MATCH (p)<-[:of_treatment_response]-(tr:treatment_response)
+            OPTIONAL MATCH (p)<-[:of_synonym]-(sy:synonym)
+            OPTIONAL MATCH (st:study)<-[:of_participant]-(p)
+            OPTIONAL MATCH (st)<-[:of_study_personnel]-(stp:study_personnel)
+            OPTIONAL MATCH (st)<-[:of_study_funding]-(stf:study_funding)
+            WITH su, p, sy, sample_diagnosis_file_filter,  
+            COLLECT(DISTINCT{treatment_type: tm.treatment_type,
+            treatment_agent: tm.treatment_agent,
+            age_at_treatment_start: tm.age_at_treatment_start}) as treatment_filters,
+            COLLECT(DISTINCT{response_category: tr.response_category,
+            age_at_response: tr.age_at_response}) as treatment_response_filters, file, st, stf, stp            
+            RETURN DISTINCT
+              p.id as id,
+              p.participant_id as participant_id,
+              apoc.text.split(p.race, ';') as race,
+              p.race as race_str,
+              p.sex_at_birth as sex_at_birth,
+              apoc.text.join(Collect(distinct sy.synonym_id), ',') as alternate_participant_id,
+              su.last_known_survival_status as last_known_survival_status,
+              su.age_at_event_free_survival_status as age_at_event_free_survival_status,
+              su.event_free_survival_status as event_free_survival_status,
+              su.first_event as first_event,
+              treatment_filters as treatment_filters,
+              treatment_response_filters as treatment_response_filters,
+              sample_diagnosis_file_filter AS sample_diagnosis_file_filters,
+              st.study_id as study_id,
+              st.dbgap_accession as dbgap_accession,
+              st.study_acronym as study_acronym,
+              st.study_name as study_name,
+              COUNT(DISTINCT file.id) as file_count,
+              COLLECT(DISTINCT file.id) as files
+
+  - index_name: treatments
+    type: neo4j
+    mapping:
+      id:
+        type: keyword
+      participant_id:
+        type: keyword
+        normalizer: lowercase
+      race:
+        type: keyword
+      sex_at_birth:
         type: keyword
       survival_filters:
         type: nested
@@ -865,6 +1293,19 @@ Indices:
             type: keyword
           first_event:
             type: keyword
+      treatment_type:
+        type: keyword
+      treatment_agent:
+        type: keyword
+      age_at_treatment_start:
+        type: integer
+      treatment_response_filters:
+        type: nested
+        properties:
+          response_category:
+            type: keyword
+          age_at_response:
+            type: integer
       sample_diagnosis_file_filters:
         type: nested
         properties:
@@ -1181,21 +1622,18 @@ Indices:
             OPTIONAL MATCH (p)<-[*..4]-(file)
             WHERE (file:clinical_measure_file OR file: sequencing_file OR file:pathology_file OR file:radiology_file OR file:methylation_array_file OR file:cytogenomic_file)
             OPTIONAL MATCH (p)<-[:of_survival]-(su:survival)
+            OPTIONAL MATCH (p)<-[:of_treatment]-(tm:treatment)
+            OPTIONAL MATCH (p)<-[:of_treatment_response]-(tr:treatment_response)
             OPTIONAL MATCH (p)<-[:of_synonym]-(sy:synonym)
             OPTIONAL MATCH (st:study)<-[:of_participant]-(p)
             OPTIONAL MATCH (st)<-[:of_study_personnel]-(stp:study_personnel)
             OPTIONAL MATCH (st)<-[:of_study_funding]-(stf:study_funding)
-            WITH p, sy, sample_diagnosis_file_filter, COLLECT(DISTINCT CASE 
-                WHEN su.last_known_survival_status = 'Dead' THEN 'Dead'
-                ELSE su.last_known_survival_status
-            END) AS last_known_survival_status,
-            COLLECT(DISTINCT su.event_free_survival_status) as event_free_survival_status,
-            COLLECT(DISTINCT su.first_event) as first_event,
-            COLLECT(DISTINCT su.age_at_event_free_survival_status) as age_at_event_free_survival_status, file, st, stf, stp
-                        WITH p, sy, sample_diagnosis_file_filter, COLLECT(DISTINCT {last_known_survival_status: last_known_survival_status, 
-              event_free_survival_status: event_free_survival_status, 
-              first_event: first_event,
-              age_at_event_free_survival_status: age_at_event_free_survival_status} ) AS survival_filters, file, st, stf, stp
+            WITH su, p, sy, sample_diagnosis_file_filter,  
+            COLLECT(DISTINCT{treatment_type: tm.treatment_type,
+            treatment_agent: tm.treatment_agent,
+            age_at_treatment_start: tm.age_at_treatment_start}) as treatment_filters,
+            COLLECT(DISTINCT{response_category: tr.response_category,
+            age_at_response: tr.age_at_response}) as treatment_response_filters, file, st, stf, stp            
             RETURN DISTINCT
               p.id as id,
               p.participant_id as participant_id,
@@ -1203,7 +1641,12 @@ Indices:
               p.race as race_str,
               p.sex_at_birth as sex_at_birth,
               apoc.text.join(Collect(distinct sy.synonym_id), ',') as alternate_participant_id,
-              survival_filters as survival_filters,
+              su.last_known_survival_status as last_known_survival_status,
+              su.age_at_event_free_survival_status as age_at_event_free_survival_status,
+              su.event_free_survival_status as event_free_survival_status,
+              su.first_event as first_event,
+              treatment_filters as treatment_filters,
+              treatment_response_filters as treatment_response_filters,
               sample_diagnosis_file_filter AS sample_diagnosis_file_filters,
               st.study_id as study_id,
               st.dbgap_accession as dbgap_accession,
@@ -1213,6 +1656,7 @@ Indices:
               COLLECT(DISTINCT file.id) as files
 
 
+
   - index_name: diagnosis
     type: neo4j
     mapping:
@@ -1269,6 +1713,22 @@ Indices:
             type: keyword
           first_event:
             type: keyword
+      treatment_filters:
+        type: nested
+        properties:
+          treatment_type:
+            type: keyword
+          treatment_agent:
+            type: keyword
+          age_at_treatment_start:
+            type: integer
+      treatment_response_filters:
+        type: nested
+        properties:
+          response_category:
+            type: keyword
+          age_at_response:
+            type: integer
       sample_file_filters:
         type: nested
         properties:
@@ -1431,7 +1891,9 @@ Indices:
           OPTIONAL MATCH (p)<-[*..4]-(file)
           WHERE (file:clinical_measure_file OR file: sequencing_file OR file:pathology_file OR file:radiology_file OR file:methylation_array_file OR file:cytogenomic_file)
           OPTIONAL MATCH (p)<-[:of_survival]-(su:survival)
-          with p, cell_line_pdx_file_filters, general_file_filters, participant_clinical_measure_file_filters,participant_radiology_file_filters, dg, file, su
+            OPTIONAL MATCH (p)<-[:of_treatment]-(tm:treatment)
+            OPTIONAL MATCH (p)<-[:of_treatment_response]-(tr:treatment_response)
+          with p, cell_line_pdx_file_filters, general_file_filters, participant_clinical_measure_file_filters,participant_radiology_file_filters, dg, file, su. tm, tr
           OPTIONAL MATCH (st:study)<-[:of_participant]-(p)
           OPTIONAL MATCH (st)<-[:of_study_personnel]-(stp:study_personnel)
           OPTIONAL MATCH (st)<-[:of_study_funding]-(stf:study_funding)
@@ -1441,11 +1903,16 @@ Indices:
             END) AS last_known_survival_status,
             COLLECT(DISTINCT su.event_free_survival_status) as event_free_survival_status,
             COLLECT(DISTINCT su.first_event) as first_event,
-            COLLECT(DISTINCT su.age_at_event_free_survival_status) as age_at_event_free_survival_status, st, stf, stp, dg
+            COLLECT(DISTINCT su.age_at_event_free_survival_status) as age_at_event_free_survival_status, 
+                        COLLECT(DISTINCT{treatment_type: tm.treatment_type,
+            treatment_agent: tm.treatment_agent,
+            age_at_treatment_start: tm.age_at_treatment_start}) as treatment_filters,
+            COLLECT(DISTINCT{response_category: tr.response_category,
+            age_at_response: tr.age_at_response}) as treatment_response_filters, st, stf, stp, dg
           WITH p, cell_line_pdx_file_filters, general_file_filters, participant_clinical_measure_file_filters,participant_radiology_file_filters, file, COLLECT(DISTINCT {last_known_survival_status: last_known_survival_status, 
               event_free_survival_status: event_free_survival_status, 
               first_event: first_event,
-              age_at_event_free_survival_status: age_at_event_free_survival_status} ) AS survival_filters, st, stf, stp, dg
+              age_at_event_free_survival_status: age_at_event_free_survival_status} ) AS survival_filters, st, stf, stp, dg, treatment_filters,treatment_response_filters
           RETURN DISTINCT
             dg.id as id,
             p.id as pid,
@@ -1468,7 +1935,9 @@ Indices:
             st.dbgap_accession as dbgap_accession,
             st.study_acronym as study_acronym,
             st.study_name as study_name,
-            survival_filters as survival_filters,    
+            survival_filters as survival_filters, 
+            treatment_filters as treatment_filters,
+            treatment_response_filters as treatment_response_filters,  
             apoc.coll.union(cell_line_pdx_file_filters, general_file_filters) + participant_clinical_measure_file_filters + participant_radiology_file_filters AS sample_file_filters,
             COUNT(DISTINCT file.id) as file_count,
             COLLECT(DISTINCT file.id) as files
@@ -1537,6 +2006,8 @@ Indices:
           optional match (sm)<--(file)
           where (file: sequencing_file OR file:pathology_file OR file:methylation_array_file OR file:cytogenomic_file)
           OPTIONAL MATCH (p)<-[:of_survival]-(su:survival)
+          OPTIONAL MATCH (p)<-[:of_treatment]-(tm:treatment)
+          OPTIONAL MATCH (p)<-[:of_treatment_response]-(tr:treatment_response)
           OPTIONAL MATCH (st:study)<-[:of_participant]-(p)
           OPTIONAL MATCH (st)<-[:of_study_personnel]-(stp:study_personnel)
           OPTIONAL MATCH (st)<-[:of_study_funding]-(stf:study_funding)
@@ -1546,11 +2017,16 @@ Indices:
           END) AS last_known_survival_status,
           COLLECT(DISTINCT su.event_free_survival_status) as event_free_survival_status,
           COLLECT(DISTINCT su.first_event) as first_event,
-          COLLECT(DISTINCT su.age_at_event_free_survival_status) as age_at_event_free_survival_status, st, stf, stp
+          COLLECT(DISTINCT su.age_at_event_free_survival_status) as age_at_event_free_survival_status, 
+            COLLECT(DISTINCT{treatment_type: tm.treatment_type,
+            treatment_agent: tm.treatment_agent,
+            age_at_treatment_start: tm.age_at_treatment_start}) as treatment_filters,
+            COLLECT(DISTINCT{response_category: tr.response_category,
+            age_at_response: tr.age_at_response}) as treatment_response_filters, st, stf, stp
           WITH dg, p, sm, sample_file_filter, file, COLLECT(DISTINCT {last_known_survival_status: last_known_survival_status, 
           event_free_survival_status: event_free_survival_status, 
           first_event: first_event,
-          age_at_event_free_survival_status: age_at_event_free_survival_status} ) AS survival_filters, st, stf, stp
+          age_at_event_free_survival_status: age_at_event_free_survival_status} ) AS survival_filters, st, stf, stp,treatment_filters,treatment_response_filters	
           RETURN DISTINCT
             dg.id as id,
             p.id as pid,
@@ -1573,7 +2049,9 @@ Indices:
             st.dbgap_accession as dbgap_accession,
             st.study_acronym as study_acronym,
             st.study_name as study_name,
-            survival_filters as survival_filters,      
+            survival_filters as survival_filters, 
+            treatment_filters as treatment_filters,
+            treatment_response_filters as treatment_response_filters,     
             sample_file_filter AS sample_file_filters,
             COUNT(DISTINCT file.id) as file_count,
             COLLECT(DISTINCT file.id) as files
@@ -1622,6 +2100,8 @@ Indices:
           with dg, sample_file_filter, collect(distinct file.id) as files, apoc.coll.union(collect(distinct sm1.id), collect(distinct sm.id)) as sid, apoc.coll.union(collect(distinct sm1.sample_id), collect(distinct sm.sample_id))  as sample_id
           optional match (p:participant)<-[*..4]-(dg)
           OPTIONAL MATCH (p)<-[:of_survival]-(su:survival)
+          OPTIONAL MATCH (p)<-[:of_treatment]-(tm:treatment)
+          OPTIONAL MATCH (p)<-[:of_treatment_response]-(tr:treatment_response)
           OPTIONAL MATCH (st:study)<-[:of_participant]-(p)
           OPTIONAL MATCH (st)<-[:of_study_personnel]-(stp:study_personnel)
           OPTIONAL MATCH (st)<-[:of_study_funding]-(stf:study_funding)
@@ -1631,11 +2111,16 @@ Indices:
             END) AS last_known_survival_status,
             COLLECT(DISTINCT su.event_free_survival_status) as event_free_survival_status,
             COLLECT(DISTINCT su.first_event) as first_event,
-            COLLECT(DISTINCT su.age_at_event_free_survival_status) as age_at_event_free_survival_status, st, stf, stp
+            COLLECT(DISTINCT su.age_at_event_free_survival_status) as age_at_event_free_survival_status,
+            COLLECT(DISTINCT{treatment_type: tm.treatment_type,
+            treatment_agent: tm.treatment_agent,
+            age_at_treatment_start: tm.age_at_treatment_start}) as treatment_filters,
+            COLLECT(DISTINCT{response_category: tr.response_category,
+            age_at_response: tr.age_at_response}) as treatment_response_filters, st, stf, stp
           WITH dg, p, sid, sample_id, sample_file_filter, files, COLLECT(DISTINCT {last_known_survival_status: last_known_survival_status, 
               event_free_survival_status: event_free_survival_status, 
               first_event: first_event,
-              age_at_event_free_survival_status: age_at_event_free_survival_status} ) AS survival_filters, st, stf, stp
+              age_at_event_free_survival_status: age_at_event_free_survival_status} ) AS survival_filters, st, stf, stp,treatment_filters,treatment_response_filters	
           RETURN DISTINCT
             dg.id as id,
             p.id as pid,
@@ -1658,7 +2143,9 @@ Indices:
             st.dbgap_accession as dbgap_accession,
             st.study_acronym as study_acronym,
             st.study_name as study_name,
-            survival_filters as survival_filters,      
+            survival_filters as survival_filters,
+            treatment_filters as treatment_filters,
+            treatment_response_filters as treatment_response_filters,    
             sample_file_filter AS sample_file_filters,
             size(files) as file_count,
             files as files
@@ -1726,7 +2213,12 @@ Indices:
                 age_at_event_free_survival_status: null,
                 event_free_survival_status: null,
                 first_event: null,
-            }) AS survival_filters,   
+            }) AS survival_filters,
+            COLLECT(DISTINCT{treatment_type: null,
+            treatment_agent: null,
+            age_at_treatment_start: null}) as treatment_filters,
+            COLLECT(DISTINCT{response_category: null,
+            age_at_response: null}) as treatment_response_filters, 
             sample_file_filter AS sample_file_filters,
             COUNT(DISTINCT file.id) as file_count,
             COLLECT(DISTINCT file.id) as files
@@ -1906,6 +2398,22 @@ Indices:
             type: keyword
           first_event:
             type: keyword
+      treatment_filters:
+        type: nested
+        properties:
+          treatment_type:
+            type: keyword
+          treatment_agent:
+            type: keyword
+          age_at_treatment_start:
+            type: integer
+      treatment_response_filters:
+        type: nested
+        properties:
+          response_category:
+            type: keyword
+          age_at_response:
+            type: integer
       file_count:
         type: integer
       direct_file_count:

From bbbd894f3da4f73ce16ee9e9299ebb929ee1a08e Mon Sep 17 00:00:00 2001
From: shawnwangnih <108429233+shawnwangnih@users.noreply.github.com>
Date: Wed, 16 Oct 2024 11:28:53 -0400
Subject: [PATCH 11/23]  add treatments and treament_response indices

---
 config/es_indices_ccdi_model.yml | 647 +++++++++++++++++++++++++------
 1 file changed, 523 insertions(+), 124 deletions(-)

diff --git a/config/es_indices_ccdi_model.yml b/config/es_indices_ccdi_model.yml
index 2db6f7a..25fbf23 100644
--- a/config/es_indices_ccdi_model.yml
+++ b/config/es_indices_ccdi_model.yml
@@ -1628,10 +1628,10 @@ Indices:
             OPTIONAL MATCH (st:study)<-[:of_participant]-(p)
             OPTIONAL MATCH (st)<-[:of_study_personnel]-(stp:study_personnel)
             OPTIONAL MATCH (st)<-[:of_study_funding]-(stf:study_funding)
-            WITH su, p, sy, sample_diagnosis_file_filter,  
-            COLLECT(DISTINCT{treatment_type: tm.treatment_type,
-            treatment_agent: tm.treatment_agent,
-            age_at_treatment_start: tm.age_at_treatment_start}) as treatment_filters,
+            WITH tm, p, sy, sample_diagnosis_file_filter,	COLLECT(DISTINCT {last_known_survival_status: su.last_known_survival_status, 
+              event_free_survival_status: su.event_free_survival_status, 
+              first_event: su.first_event,
+              age_at_event_free_survival_status: su.age_at_event_free_survival_status} ) AS survival_filters,
             COLLECT(DISTINCT{response_category: tr.response_category,
             age_at_response: tr.age_at_response}) as treatment_response_filters, file, st, stf, stp            
             RETURN DISTINCT
@@ -1641,10 +1641,10 @@ Indices:
               p.race as race_str,
               p.sex_at_birth as sex_at_birth,
               apoc.text.join(Collect(distinct sy.synonym_id), ',') as alternate_participant_id,
-              su.last_known_survival_status as last_known_survival_status,
-              su.age_at_event_free_survival_status as age_at_event_free_survival_status,
-              su.event_free_survival_status as event_free_survival_status,
-              su.first_event as first_event,
+              survival_filters as survival_filters,
+              tm.treatment_type as treatment_type,
+              tm.treatment_agent as treatment_agent,
+              tm.age_at_treatment_start as age_at_treatment_start,
               treatment_filters as treatment_filters,
               treatment_response_filters as treatment_response_filters,
               sample_diagnosis_file_filter AS sample_diagnosis_file_filters,
@@ -1655,7 +1655,390 @@ Indices:
               COUNT(DISTINCT file.id) as file_count,
               COLLECT(DISTINCT file.id) as files
 
-
+  - index_name: treatment_responses
+    type: neo4j
+    mapping:
+      id:
+        type: keyword
+      participant_id:
+        type: keyword
+        normalizer: lowercase
+      race:
+        type: keyword
+      sex_at_birth:
+        type: keyword
+      survival_filters:
+        type: nested
+        properties:
+          last_known_survival_status:
+            type: keyword
+          age_at_event_free_survival_status:
+            type: integer
+          event_free_survival_status:
+            type: keyword
+          first_event:
+            type: keyword
+      treatment_filters:
+        type: nested
+        properties:
+          treatment_type:
+            type: keyword
+          treatment_agent:
+            type: keyword
+          age_at_treatment_start:
+            type: integer
+      response_category:
+        type: keyword
+      age_at_response:
+        type: integer
+      sample_diagnosis_file_filters:
+        type: nested
+        properties:
+          sample_anatomic_site:
+            type: keyword
+          participant_age_at_collection:
+            type: integer
+          sample_tumor_status:
+            type: keyword
+          tumor_classification:
+            type: keyword
+          age_at_diagnosis:
+            type: integer
+          diagnosis_anatomic_site:
+            type: keyword
+          disease_phase:
+            type: keyword
+          diagnosis_classification_system:
+            type: keyword
+          diagnosis_basis:
+            type: keyword
+          tumor_grade_source:
+            type: keyword  
+          tumor_stage_source:
+            type: keyword              
+          diagnosis:
+            type: keyword
+          assay_method:
+            type: keyword
+          file_type:
+            type: keyword
+          library_selection:
+            type: keyword
+          library_source_material:
+            type: keyword
+          library_source_molecule:
+            type: keyword
+          library_strategy:
+            type: keyword
+      study_id:
+        type: keyword
+      dbgap_accession:
+        type: keyword
+      study_acronym:
+        type: keyword
+      study_name:
+        type: keyword
+      file_count:
+        type: integer
+      files:
+        type: text
+        fields:
+          keyword:
+            type: keyword
+    # Cypher query will be used to retrieve data from Neo4j, and index into Elasticsearch
+    cypher_queries:
+      - query: |
+          MATCH (p:participant)
+          optional match (p)<--(sm:sample)
+          optional match (p)<--(file)
+          where (file: clinical_measure_file or file: radiology_file)
+          with distinct p, sm, file
+          with p, collect(DISTINCT {
+                      sample_anatomic_site: apoc.text.split(sm.anatomic_site, ';'),
+                      participant_age_at_collection: sm.participant_age_at_collection,
+                      sample_tumor_status: sm.sample_tumor_status,
+                      tumor_classification: sm.tumor_classification,
+                      assay_method: CASE labels(file)[0] WHEN 'clinical_measure_file' THEN 'Clinical data'
+                                        WHEN 'radiology_file' THEN 'Radiology imaging'
+                                        ELSE null END,
+                      file_type: file.file_type,
+                      library_source_material: null,
+                      library_source_molecule: null,
+                      library_strategy: null
+              }) as sample_clinical_radiology_file_filter
+          optional match (p)<--(sm:sample)<--(file)
+          where (file: sequencing_file or file: methylation_array_file or file: pathology_file or file: cytogenomic_file)
+          with p, sample_clinical_radiology_file_filter, collect(DISTINCT {
+                      sample_anatomic_site: apoc.text.split(sm.anatomic_site, ';'),
+                      participant_age_at_collection: sm.participant_age_at_collection,
+                      sample_tumor_status: sm.sample_tumor_status,
+                      tumor_classification: sm.tumor_classification,
+                      assay_method: CASE LABELS(file)[0]
+                                              WHEN 'sequencing_file' THEN 'Sequencing'
+                                              WHEN 'cytogenomic_file' THEN 'Cytogenomic'
+                                              WHEN 'pathology_file' THEN 'Pathology imaging'
+                                              WHEN 'methylation_array_file' THEN 'Methylation array'
+                                              ELSE null END,
+                      file_type: file.file_type,
+                      library_selection: CASE LABELS(file)[0]
+                                    WHEN 'sequencing_file' THEN file.library_selection
+                                    ELSE null END,
+                      library_source_material: CASE LABELS(file)[0]
+                                    WHEN 'sequencing_file' THEN file.library_source_material
+                                    ELSE null END,
+                      library_source_molecule: CASE LABELS(file)[0]
+                                    WHEN 'sequencing_file' THEN file.library_source_molecule
+                                    ELSE null END,
+                      library_strategy: CASE LABELS(file)[0]
+                                    WHEN 'sequencing_file' THEN file.library_strategy
+                                    ELSE null END
+              }) as sample_sequencing_cytogenomic_pathology_methylation_file_filter
+            with p, apoc.coll.union(sample_clinical_radiology_file_filter, sample_sequencing_cytogenomic_pathology_methylation_file_filter) as sample_file_filters
+            optional match (p)<--(dg:diagnosis)
+            with p, sample_file_filters, dg
+            unwind sample_file_filters as sample_file_filter
+            with p, collect(apoc.map.merge(sample_file_filter, {
+                age_at_diagnosis: dg.age_at_diagnosis,
+                diagnosis_anatomic_site: apoc.text.split(dg.anatomic_site, ';'),
+                disease_phase: dg.disease_phase,
+                diagnosis_classification_system: dg.diagnosis_classification_system,
+                diagnosis_basis: dg.diagnosis_basis, 
+                tumor_grade_source: dg.tumor_grade_source,
+                tumor_stage_source: dg.tumor_stage_source,          
+                diagnosis: dg.diagnosis
+              })) as sample_diagnosis_file_filter
+            optional match (p)<--(sm:sample)<--(dg:diagnosis)
+            optional match (sm)<--(file)
+            where (file: sequencing_file or file: methylation_array_file or file: pathology_file or file: cytogenomic_file)
+            with p, sample_diagnosis_file_filter, COLLECT(DISTINCT {
+                    sample_anatomic_site: apoc.text.split(sm.anatomic_site, ';'),
+                    participant_age_at_collection: sm.participant_age_at_collection,
+                    sample_tumor_status: sm.sample_tumor_status,
+                    tumor_classification: sm.tumor_classification,
+                    age_at_diagnosis: dg.age_at_diagnosis,
+                    diagnosis_anatomic_site: apoc.text.split(dg.anatomic_site, ';'),
+                    disease_phase: dg.disease_phase,
+                    diagnosis_classification_system: dg.diagnosis_classification_system,
+                    diagnosis_basis: dg.diagnosis_basis, 
+                    tumor_grade_source: dg.tumor_grade_source,
+                    tumor_stage_source: dg.tumor_stage_source,          
+                    diagnosis: dg.diagnosis,
+                    assay_method: CASE LABELS(file)[0]
+                                    WHEN 'sequencing_file' THEN 'Sequencing'
+                                    WHEN 'cytogenomic_file' THEN 'Cytogenomic'
+                                    WHEN 'pathology_file' THEN 'Pathology imaging'
+                                    WHEN 'methylation_array_file' THEN 'Methylation array'
+                                    ELSE null END,
+                    file_type: file.file_type,
+                    library_selection: CASE LABELS(file)[0]
+                              WHEN 'sequencing_file' THEN file.library_selection
+                              ELSE null END,
+                    library_source_material: CASE LABELS(file)[0]
+                                    WHEN 'sequencing_file' THEN file.library_source_material
+                                    ELSE null END,
+                    library_source_molecule: CASE LABELS(file)[0]
+                                    WHEN 'sequencing_file' THEN file.library_source_molecule
+                                    ELSE null END,
+                    library_strategy: CASE LABELS(file)[0]
+                              WHEN 'sequencing_file' THEN file.library_strategy
+                              ELSE null END
+                }) AS sample_diagnosis_filters_1
+            with p, apoc.coll.union(sample_diagnosis_file_filter, sample_diagnosis_filters_1) as sample_diagnosis_file_filters
+            optional match (p)<--(sm:sample)<--(dg:diagnosis)
+            optional match (p)<--(file)
+            where (file: clinical_measure_file or file: radiology_file)
+            with p, sample_diagnosis_file_filters, COLLECT(DISTINCT {
+                    sample_anatomic_site: apoc.text.split(sm.anatomic_site, ';'),
+                    participant_age_at_collection: sm.participant_age_at_collection,
+                    sample_tumor_status: sm.sample_tumor_status,
+                    tumor_classification: sm.tumor_classification,
+                    age_at_diagnosis: dg.age_at_diagnosis,
+                    diagnosis_anatomic_site: apoc.text.split(dg.anatomic_site, ';'),
+                    disease_phase: dg.disease_phase,
+                    diagnosis_classification_system: dg.diagnosis_classification_system,
+                    diagnosis_basis: dg.diagnosis_basis, 
+                    tumor_grade_source: dg.tumor_grade_source,
+                    tumor_stage_source: dg.tumor_stage_source,          
+                    diagnosis: dg.diagnosis,
+                    assay_method: CASE labels(file)[0] WHEN 'clinical_measure_file' THEN 'Clinical data'
+                                        WHEN 'radiology_file' THEN 'Radiology imaging'
+                                        ELSE null END,
+                    file_type: file.file_type,
+                    library_selection: null,
+                    library_source_material: null,
+                    library_source_molecule: null,
+                    library_strategy: null
+                }) AS sample_diagnosis_filters_2
+            with p, apoc.coll.union(sample_diagnosis_file_filters, sample_diagnosis_filters_2) as sample_diagnosis_file_filter
+          optional MATCH (p)<-[:of_sample]-(sm1:sample)<--(cl)<--(sm2:sample)
+          WHERE (cl: cell_line or cl: pdx)
+          optional Match (sm2)<--(file)
+          WHERE (file: sequencing_file OR file:pathology_file OR file:methylation_array_file OR file:cytogenomic_file)
+          optional Match (sm1)<--(dg:diagnosis)
+          with p, sample_diagnosis_file_filter, COLLECT(DISTINCT {
+                    sample_anatomic_site: apoc.text.split(sm1.anatomic_site, ';'),
+                    participant_age_at_collection: sm1.participant_age_at_collection,
+                    sample_tumor_status: sm1.sample_tumor_status,
+                    tumor_classification: sm1.tumor_classification,
+                    age_at_diagnosis: dg.age_at_diagnosis,
+                    diagnosis_anatomic_site: apoc.text.split(dg.anatomic_site, ';'),
+                    disease_phase: dg.disease_phase,
+                    diagnosis_classification_system: dg.diagnosis_classification_system,
+                    diagnosis_basis: dg.diagnosis_basis, 
+                    tumor_grade_source: dg.tumor_grade_source,
+                    tumor_stage_source: dg.tumor_stage_source,          
+                    diagnosis: dg.diagnosis,
+                    assay_method: CASE LABELS(file)[0]
+                                    WHEN 'sequencing_file' THEN 'Sequencing'
+                                    WHEN 'cytogenomic_file' THEN 'Cytogenomic'
+                                    WHEN 'pathology_file' THEN 'Pathology imaging'
+                                    WHEN 'methylation_array_file' THEN 'Methylation array'
+                                    ELSE null END,
+                    file_type: file.file_type,
+                    library_selection: CASE LABELS(file)[0]
+                              WHEN 'sequencing_file' THEN file.library_selection
+                              ELSE null END,
+                    library_source_material: CASE LABELS(file)[0]
+                                    WHEN 'sequencing_file' THEN file.library_source_material
+                                    ELSE null END,
+                    library_source_molecule: CASE LABELS(file)[0]
+                                    WHEN 'sequencing_file' THEN file.library_source_molecule
+                                    ELSE null END,
+                    library_strategy: CASE LABELS(file)[0]
+                              WHEN 'sequencing_file' THEN file.library_strategy
+                              ELSE null END
+                }) AS sample_diagnosis_filters_1
+            with p, apoc.coll.union(sample_diagnosis_file_filter, sample_diagnosis_filters_1) as sample_diagnosis_file_filters
+          optional MATCH (p)<-[:of_sample]-(sm1:sample)<--(cl)<--(sm2:sample)
+          WHERE (cl: cell_line or cl: pdx)
+          optional Match (sm2)<--(file)
+          WHERE (file: sequencing_file OR file:pathology_file OR file:methylation_array_file OR file:cytogenomic_file)
+          optional Match (sm2)<--(dg:diagnosis)
+          with p, sample_diagnosis_file_filters, COLLECT(DISTINCT {
+                    sample_anatomic_site: apoc.text.split(sm2.anatomic_site, ';'),
+                    participant_age_at_collection: sm2.participant_age_at_collection,
+                    sample_tumor_status: sm2.sample_tumor_status,
+                    tumor_classification: sm2.tumor_classification,
+                    age_at_diagnosis: dg.age_at_diagnosis,
+                    diagnosis_anatomic_site: apoc.text.split(dg.anatomic_site, ';'),
+                    disease_phase: dg.disease_phase,
+                    diagnosis_classification_system: dg.diagnosis_classification_system,
+                    diagnosis_basis: dg.diagnosis_basis, 
+                    tumor_grade_source: dg.tumor_grade_source,
+                    tumor_stage_source: dg.tumor_stage_source,          
+                    diagnosis: dg.diagnosis,
+                    assay_method: CASE LABELS(file)[0]
+                                    WHEN 'sequencing_file' THEN 'Sequencing'
+                                    WHEN 'cytogenomic_file' THEN 'Cytogenomic'
+                                    WHEN 'pathology_file' THEN 'Pathology imaging'
+                                    WHEN 'methylation_array_file' THEN 'Methylation array'
+                                    ELSE null END,
+                    file_type: file.file_type,
+                    library_selection: CASE LABELS(file)[0]
+                              WHEN 'sequencing_file' THEN file.library_selection
+                              ELSE null END,
+                    library_source_material: CASE LABELS(file)[0]
+                                    WHEN 'sequencing_file' THEN file.library_source_material
+                                    ELSE null END,
+                    library_source_molecule: CASE LABELS(file)[0]
+                                    WHEN 'sequencing_file' THEN file.library_source_molecule
+                                    ELSE null END,
+                    library_strategy: CASE LABELS(file)[0]
+                              WHEN 'sequencing_file' THEN file.library_strategy
+                              ELSE null END
+                }) AS sample_diagnosis_filters_2
+            with p, apoc.coll.union(sample_diagnosis_file_filters, sample_diagnosis_filters_2) as sample_diagnosis_file_filter
+          optional MATCH (p)<-[:of_sample]-(sm1:sample)<--(cl)<--(sm2:sample)
+          WHERE (cl: cell_line or cl: pdx)
+          optional Match (sm1)<--(dg:diagnosis)
+          optional match (p)<--(file)
+          where (file: clinical_measure_file or file: radiology_file)
+          with p, sample_diagnosis_file_filter, COLLECT(DISTINCT {
+                    sample_anatomic_site: apoc.text.split(sm1.anatomic_site, ';'),
+                    participant_age_at_collection: sm1.participant_age_at_collection,
+                    sample_tumor_status: sm1.sample_tumor_status,
+                    tumor_classification: sm1.tumor_classification,
+                    age_at_diagnosis: dg.age_at_diagnosis,
+                    diagnosis_anatomic_site: apoc.text.split(dg.anatomic_site, ';'),
+                    disease_phase: dg.disease_phase,
+                    diagnosis_classification_system: dg.diagnosis_classification_system,
+                    diagnosis_basis: dg.diagnosis_basis, 
+                    tumor_grade_source: dg.tumor_grade_source,
+                    tumor_stage_source: dg.tumor_stage_source,          
+                    diagnosis: dg.diagnosis,
+                    assay_method: CASE labels(file)[0] WHEN 'clinical_measure_file' THEN 'Clinical data'
+                                        WHEN 'radiology_file' THEN 'Radiology imaging'
+                                        ELSE null END,
+                    file_type: file.file_type,
+                    library_selection: null,
+                    library_source_material: null,
+                    library_source_molecule: null,
+                    library_strategy: null
+                }) AS sample_diagnosis_filters_3
+            with p, apoc.coll.union(sample_diagnosis_file_filter, sample_diagnosis_filters_3) as sample_diagnosis_file_filters
+          optional MATCH (p)<-[:of_sample]-(sm1:sample)<--(cl)<--(sm2:sample)
+          WHERE (cl: cell_line or cl: pdx)
+          optional Match (sm2)<--(dg:diagnosis)
+          optional match (p)<--(file)
+          where (file: clinical_measure_file or file: radiology_file)
+          with p, sample_diagnosis_file_filters, COLLECT(DISTINCT {
+                    sample_anatomic_site: apoc.text.split(sm2.anatomic_site, ';'),
+                    participant_age_at_collection: sm2.participant_age_at_collection,
+                    sample_tumor_status: sm2.sample_tumor_status,
+                    tumor_classification: sm2.tumor_classification,
+                    age_at_diagnosis: dg.age_at_diagnosis,
+                    diagnosis_anatomic_site: apoc.text.split(dg.anatomic_site, ';'),
+                    disease_phase: dg.disease_phase,
+                    diagnosis_classification_system: dg.diagnosis_classification_system,
+                    diagnosis_basis: dg.diagnosis_basis, 
+                    tumor_grade_source: dg.tumor_grade_source,
+                    tumor_stage_source: dg.tumor_stage_source,          
+                    diagnosis: dg.diagnosis,
+                    assay_method: CASE labels(file)[0] WHEN 'clinical_measure_file' THEN 'Clinical data'
+                                        WHEN 'radiology_file' THEN 'Radiology imaging'
+                                        ELSE null END,
+                    file_type: file.file_type,
+                    library_selection: null,
+                    library_source_material: null,
+                    library_source_molecule: null,
+                    library_strategy: null
+                }) AS sample_diagnosis_filters_4
+            with p, apoc.coll.union(sample_diagnosis_file_filters, sample_diagnosis_filters_4) as sample_diagnosis_file_filter
+            OPTIONAL MATCH (p)<-[*..4]-(file)
+            WHERE (file:clinical_measure_file OR file: sequencing_file OR file:pathology_file OR file:radiology_file OR file:methylation_array_file OR file:cytogenomic_file)
+            OPTIONAL MATCH (p)<-[:of_survival]-(su:survival)
+            OPTIONAL MATCH (p)<-[:of_treatment]-(tm:treatment)
+            OPTIONAL MATCH (p)<-[:of_treatment_response]-(tr:treatment_response)
+            OPTIONAL MATCH (p)<-[:of_synonym]-(sy:synonym)
+            OPTIONAL MATCH (st:study)<-[:of_participant]-(p)
+            OPTIONAL MATCH (st)<-[:of_study_personnel]-(stp:study_personnel)
+            OPTIONAL MATCH (st)<-[:of_study_funding]-(stf:study_funding)
+            WITH tr, p, sy, sample_diagnosis_file_filter,	COLLECT(DISTINCT {last_known_survival_status: su.last_known_survival_status, 
+              event_free_survival_status: su.event_free_survival_status, 
+              first_event: su.first_event,
+              age_at_event_free_survival_status: su.age_at_event_free_survival_status} ) AS survival_filters,
+              COLLECT(DISTINCT{treatment_type: tm.treatment_type,
+            treatment_agent: tm.treatment_agent,
+            age_at_treatment_start: tm.age_at_treatment_start}) as treatment_filters, file, st, stf, stp            
+            RETURN DISTINCT
+              p.id as id,
+              p.participant_id as participant_id,
+              apoc.text.split(p.race, ';') as race,
+              p.race as race_str,
+              p.sex_at_birth as sex_at_birth,
+              apoc.text.join(Collect(distinct sy.synonym_id), ',') as alternate_participant_id,
+              survival_filters as survival_filters,
+              tr.response_category as response_category,
+              tr.age_at_response as age_at_response,
+              treatment_filters as treatment_filters,
+              treatment_response_filters as treatment_response_filters,
+              sample_diagnosis_file_filter AS sample_diagnosis_file_filters,
+              st.study_id as study_id,
+              st.dbgap_accession as dbgap_accession,
+              st.study_acronym as study_acronym,
+              st.study_name as study_name,
+              COUNT(DISTINCT file.id) as file_count,
+              COLLECT(DISTINCT file.id) as files
 
   - index_name: diagnosis
     type: neo4j
@@ -1897,22 +2280,15 @@ Indices:
           OPTIONAL MATCH (st:study)<-[:of_participant]-(p)
           OPTIONAL MATCH (st)<-[:of_study_personnel]-(stp:study_personnel)
           OPTIONAL MATCH (st)<-[:of_study_funding]-(stf:study_funding)
-          WITH p, cell_line_pdx_file_filters, general_file_filters, participant_clinical_measure_file_filters,participant_radiology_file_filters, file, COLLECT(DISTINCT CASE 
-                WHEN su.last_known_survival_status = 'Dead' THEN 'Dead'
-                ELSE su.last_known_survival_status
-            END) AS last_known_survival_status,
-            COLLECT(DISTINCT su.event_free_survival_status) as event_free_survival_status,
-            COLLECT(DISTINCT su.first_event) as first_event,
-            COLLECT(DISTINCT su.age_at_event_free_survival_status) as age_at_event_free_survival_status, 
-                        COLLECT(DISTINCT{treatment_type: tm.treatment_type,
+          WITH p, cell_line_pdx_file_filters, general_file_filters, participant_clinical_measure_file_filters,participant_radiology_file_filters, file, COLLECT(DISTINCT {last_known_survival_status: su.last_known_survival_status, 
+              event_free_survival_status: su.event_free_survival_status, 
+              first_event: su.first_event,
+              age_at_event_free_survival_status: su.age_at_event_free_survival_status} ) AS survival_filters, 
+          COLLECT(DISTINCT{treatment_type: tm.treatment_type,
             treatment_agent: tm.treatment_agent,
             age_at_treatment_start: tm.age_at_treatment_start}) as treatment_filters,
             COLLECT(DISTINCT{response_category: tr.response_category,
             age_at_response: tr.age_at_response}) as treatment_response_filters, st, stf, stp, dg
-          WITH p, cell_line_pdx_file_filters, general_file_filters, participant_clinical_measure_file_filters,participant_radiology_file_filters, file, COLLECT(DISTINCT {last_known_survival_status: last_known_survival_status, 
-              event_free_survival_status: event_free_survival_status, 
-              first_event: first_event,
-              age_at_event_free_survival_status: age_at_event_free_survival_status} ) AS survival_filters, st, stf, stp, dg, treatment_filters,treatment_response_filters
           RETURN DISTINCT
             dg.id as id,
             p.id as pid,
@@ -2011,22 +2387,15 @@ Indices:
           OPTIONAL MATCH (st:study)<-[:of_participant]-(p)
           OPTIONAL MATCH (st)<-[:of_study_personnel]-(stp:study_personnel)
           OPTIONAL MATCH (st)<-[:of_study_funding]-(stf:study_funding)
-          WITH dg, p, sm, sample_file_filter, file, COLLECT(DISTINCT CASE 
-              WHEN su.last_known_survival_status = 'Dead' THEN 'Dead'
-              ELSE su.last_known_survival_status
-          END) AS last_known_survival_status,
-          COLLECT(DISTINCT su.event_free_survival_status) as event_free_survival_status,
-          COLLECT(DISTINCT su.first_event) as first_event,
-          COLLECT(DISTINCT su.age_at_event_free_survival_status) as age_at_event_free_survival_status, 
+          WITH dg, p, sm, sample_file_filter, file, COLLECT(DISTINCT {last_known_survival_status: su.last_known_survival_status, 
+              event_free_survival_status: su.event_free_survival_status, 
+              first_event: su.first_event,
+              age_at_event_free_survival_status: su.age_at_event_free_survival_status} ) AS survival_filters, 
             COLLECT(DISTINCT{treatment_type: tm.treatment_type,
             treatment_agent: tm.treatment_agent,
             age_at_treatment_start: tm.age_at_treatment_start}) as treatment_filters,
             COLLECT(DISTINCT{response_category: tr.response_category,
-            age_at_response: tr.age_at_response}) as treatment_response_filters, st, stf, stp
-          WITH dg, p, sm, sample_file_filter, file, COLLECT(DISTINCT {last_known_survival_status: last_known_survival_status, 
-          event_free_survival_status: event_free_survival_status, 
-          first_event: first_event,
-          age_at_event_free_survival_status: age_at_event_free_survival_status} ) AS survival_filters, st, stf, stp,treatment_filters,treatment_response_filters	
+            age_at_response: tr.age_at_response}) as treatment_response_filters, st, stf, stp         
           RETURN DISTINCT
             dg.id as id,
             p.id as pid,
@@ -2105,22 +2474,15 @@ Indices:
           OPTIONAL MATCH (st:study)<-[:of_participant]-(p)
           OPTIONAL MATCH (st)<-[:of_study_personnel]-(stp:study_personnel)
           OPTIONAL MATCH (st)<-[:of_study_funding]-(stf:study_funding)
-          WITH dg, p, sid, sample_id, sample_file_filter, files, COLLECT(DISTINCT CASE 
-                WHEN su.last_known_survival_status = 'Dead' THEN 'Dead'
-                ELSE su.last_known_survival_status
-            END) AS last_known_survival_status,
-            COLLECT(DISTINCT su.event_free_survival_status) as event_free_survival_status,
-            COLLECT(DISTINCT su.first_event) as first_event,
-            COLLECT(DISTINCT su.age_at_event_free_survival_status) as age_at_event_free_survival_status,
+          WITH dg, p, sid, sample_id, sample_file_filter, files, COLLECT(DISTINCT {last_known_survival_status: su.last_known_survival_status, 
+              event_free_survival_status: su.event_free_survival_status, 
+              first_event: su.first_event,
+              age_at_event_free_survival_status: su.age_at_event_free_survival_status} ) AS survival_filters,
             COLLECT(DISTINCT{treatment_type: tm.treatment_type,
             treatment_agent: tm.treatment_agent,
             age_at_treatment_start: tm.age_at_treatment_start}) as treatment_filters,
             COLLECT(DISTINCT{response_category: tr.response_category,
-            age_at_response: tr.age_at_response}) as treatment_response_filters, st, stf, stp
-          WITH dg, p, sid, sample_id, sample_file_filter, files, COLLECT(DISTINCT {last_known_survival_status: last_known_survival_status, 
-              event_free_survival_status: event_free_survival_status, 
-              first_event: first_event,
-              age_at_event_free_survival_status: age_at_event_free_survival_status} ) AS survival_filters, st, stf, stp,treatment_filters,treatment_response_filters	
+            age_at_response: tr.age_at_response}) as treatment_response_filters, st, stf, stp          
           RETURN DISTINCT
             dg.id as id,
             p.id as pid,
@@ -2525,20 +2887,26 @@ Indices:
             direct_file_count: COUNT(DISTINCT direct_file.id)
           }) AS opensearch_data
           OPTIONAL MATCH (sm)-[*..3]->(:participant)<-[:of_survival]-(su:survival)
-          WITH sm, opensearch_data, COLLECT(DISTINCT CASE 
-                WHEN su.last_known_survival_status = 'Dead' THEN 'Dead'
-                ELSE su.last_known_survival_status
-            END) AS last_known_survival_status,
-            COLLECT(DISTINCT su.event_free_survival_status) as event_free_survival_status,
-            COLLECT(DISTINCT su.first_event) as first_event,
-            COLLECT(DISTINCT su.age_at_event_free_survival_status) as age_at_event_free_survival_status
-            WITH sm, opensearch_data,  COLLECT(DISTINCT {last_known_survival_status: last_known_survival_status, 
-              event_free_survival_status: event_free_survival_status, 
-              first_event: first_event,
-              age_at_event_free_survival_status: age_at_event_free_survival_status} ) AS survival_filters
+          WITH sm, opensearch_data, COLLECT(DISTINCT {last_known_survival_status: su.last_known_survival_status, 
+              event_free_survival_status: su.event_free_survival_status, 
+              first_event: su.first_event,
+              age_at_event_free_survival_status: su.age_at_event_free_survival_status} ) AS survival_filters
           WITH sm, apoc.map.merge(opensearch_data, {
             survival_filters: survival_filters
           }) AS opensearch_data
+          OPTIONAL MATCH (sm)-[*..3]->(:participant)<-[:of_treatment]-(tm:treatment)
+          WITH sm, opensearch_data, COLLECT(DISTINCT{treatment_type: tm.treatment_type,
+            treatment_agent: tm.treatment_agent,
+            age_at_treatment_start: tm.age_at_treatment_start}) as treatment_filters
+          WITH sm, apoc.map.merge(opensearch_data, {
+            treatment_filters: treatment_filters
+          }) AS opensearch_data
+          OPTIONAL MATCH (sm)-[*..3]->(:participant)<-[:of_treatment_response]-(tr:treatment_response)
+          WITH sm, opensearch_data, COLLECT(DISTINCT{response_category: tr.response_category,
+            age_at_response: tr.age_at_response}) as treatment_response_filters
+          WITH sm, apoc.map.merge(opensearch_data, {
+            treatment_response_filters: treatment_response_filters
+          }) AS opensearch_data
           return opensearch_data
         page_size: 500
       - query: |
@@ -2578,6 +2946,8 @@ Indices:
                 diagnosis: dg.diagnosis
             }) AS diagnosis_filters,
             null AS survival_filters,
+            null as treatment_filters.
+            null as treatment_response_filters,
             CASE COLLECT(file) WHEN [] THEN []
                       ELSE COLLECT(DISTINCT {
                           assay_method: CASE LABELS(file)[0]
@@ -2737,6 +3107,22 @@ Indices:
             type: keyword
           first_event:
             type: keyword
+      treatment_filters:
+        type: nested
+        properties:
+          treatment_type:
+            type: keyword
+          treatment_agent:
+            type: keyword
+          age_at_treatment_start:
+            type: integer
+      treatment_response_filters:
+        type: nested
+        properties:
+          response_category:
+            type: keyword
+          age_at_response:
+            type: integer
       library_selection:
         type: keyword
         fields:
@@ -2854,20 +3240,19 @@ Indices:
           MATCH (st:study)<-[:of_participant]-(p)
           OPTIONAL MATCH (st)<-[:of_publication]-(pub:publication)
           OPTIONAL MATCH (p)<-[:of_survival]-(su:survival)
+          OPTIONAL MATCH (p)<-[:of_treatment]-(tm:treatment)
+          OPTIONAL MATCH (p)<-[:of_treatment_response]-(tr:treatment_response)
           OPTIONAL MATCH (st)<-[:of_study_personnel]-(stp:study_personnel)
           OPTIONAL MATCH (st)<-[:of_study_funding]-(stf:study_funding)
-          with file, sample_diagnosis_filter,COLLECT(DISTINCT CASE 
-                WHEN su.last_known_survival_status = 'Dead' THEN 'Dead'
-                ELSE su.last_known_survival_status
-            END) AS last_known_survival_status,
-            COLLECT(DISTINCT su.event_free_survival_status) as event_free_survival_status,
-            COLLECT(DISTINCT su.first_event) as first_event,
-            COLLECT(DISTINCT su.age_at_event_free_survival_status) as age_at_event_free_survival_status
-          with file, sample_diagnosis_filter,COLLECT(DISTINCT {last_known_survival_status: last_known_survival_status, 
-              event_free_survival_status: event_free_survival_status, 
-              first_event: first_event,
-              age_at_event_free_survival_status: age_at_event_free_survival_status} ) AS survival_filters
-
+          with file, sample_diagnosis_filter,COLLECT(DISTINCT {last_known_survival_status: su.last_known_survival_status, 
+              event_free_survival_status: su.event_free_survival_status, 
+              first_event: su.first_event,
+              age_at_event_free_survival_status: su.age_at_event_free_survival_status} ) AS survival_filters,
+            COLLECT(DISTINCT{treatment_type: tm.treatment_type,
+            treatment_agent: tm.treatment_agent,
+            age_at_treatment_start: tm.age_at_treatment_start}) as treatment_filters,
+            COLLECT(DISTINCT{response_category: tr.response_category,
+            age_at_response: tr.age_at_response}) as treatment_response_filters
           RETURN DISTINCT
             file.id as id,
             p.id as pid,
@@ -2964,6 +3349,8 @@ Indices:
           optional MATCH (p)<-[:of_sample]-(sm1:sample)<-[*2..2]-(sm:sample)
           OPTIONAL MATCH (sm)<--(dg:diagnosis)
           OPTIONAL MATCH (p)<-[:of_survival]-(su:survival)
+          OPTIONAL MATCH (p)<-[:of_treatment]-(tm:treatment)
+          OPTIONAL MATCH (p)<-[:of_treatment_response]-(tr:treatment_response)
           with file, p, sample_diagnosis_filter_5, apoc.coll.union(COLLECT(DISTINCT {
                                         sample_anatomic_site: apoc.text.split(sm1.anatomic_site, ';'),
                                         participant_age_at_collection: sm1.participant_age_at_collection,
@@ -2990,23 +3377,23 @@ Indices:
                                         tumor_grade_source: dg.tumor_grade_source,
                                         tumor_stage_source: dg.tumor_stage_source,
                                         diagnosis: dg.diagnosis
-                                    })) AS sample_diagnosis_filter_6, COLLECT(DISTINCT CASE 
-                WHEN su.last_known_survival_status = 'Dead' THEN 'Dead'
-                ELSE su.last_known_survival_status
-            END) AS last_known_survival_status,
-            COLLECT(DISTINCT su.event_free_survival_status) as event_free_survival_status,
-            COLLECT(DISTINCT su.first_event) as first_event,
-            COLLECT(DISTINCT su.age_at_event_free_survival_status) as age_at_event_free_survival_status
-          with file, p, apoc.coll.union(sample_diagnosis_filter_5, sample_diagnosis_filter_6) as sample_diagnosis_filter, COLLECT(DISTINCT {last_known_survival_status: last_known_survival_status, 
-              event_free_survival_status: event_free_survival_status, 
-              first_event: first_event,
-              age_at_event_free_survival_status: age_at_event_free_survival_status} ) AS survival_filters
+                                    })) AS sample_diagnosis_filter_6, COLLECT(DISTINCT {last_known_survival_status: su.last_known_survival_status, 
+              event_free_survival_status: su.event_free_survival_status, 
+              first_event: su.first_event,
+              age_at_event_free_survival_status: su.age_at_event_free_survival_status} ) AS survival_filters,
+            COLLECT(DISTINCT{treatment_type: tm.treatment_type,
+            treatment_agent: tm.treatment_agent,
+            age_at_treatment_start: tm.age_at_treatment_start}) as treatment_filters,
+            COLLECT(DISTINCT{response_category: tr.response_category,
+            age_at_response: tr.age_at_response}) as treatment_response_filters
           with file, collect(DISTINCT {
                 participant_id: p.participant_id,
                 race: apoc.text.split(p.race, ';'),
                 sex_at_birth: p.sex_at_birth,
                 sample_diagnosis_filters: sample_diagnosis_filter,
-                survival_filters: survival_filters
+                survival_filters: survival_filters,
+                treatment_filters: treatment_filters,
+                treatment_response_filters: treatment_response_filters
             }) as combined_filter_1
           MATCH (st:study)<-[:of_clinical_measure_file]-(file)
           OPTIONAL MATCH (st)<--(cl)<--(sm:sample)
@@ -3167,19 +3554,19 @@ Indices:
           MATCH (st:study)<-[:of_participant]-(p)
           OPTIONAL MATCH (st)<-[:of_publication]-(pub:publication)
           OPTIONAL MATCH (p)<-[:of_survival]-(su:survival)
+          OPTIONAL MATCH (p)<-[:of_treatment]-(tm:treatment)
+          OPTIONAL MATCH (p)<-[:of_treatment_response]-(tr:treatment_response)
           OPTIONAL MATCH (st)<-[:of_study_personnel]-(stp:study_personnel)
           OPTIONAL MATCH (st)<-[:of_study_funding]-(stf:study_funding)
-          with file, p, sample_diagnosis_filter, sm1, sm, st, COLLECT(DISTINCT CASE 
-                WHEN su.last_known_survival_status = 'Dead' THEN 'Dead'
-                ELSE su.last_known_survival_status
-            END) AS last_known_survival_status,
-            COLLECT(DISTINCT su.event_free_survival_status) as event_free_survival_status,
-            COLLECT(DISTINCT su.first_event) as first_event,
-            COLLECT(DISTINCT su.age_at_event_free_survival_status) as age_at_event_free_survival_status, stf, stp
-          with file, p, sample_diagnosis_filter, sm1, sm, st, COLLECT(DISTINCT {last_known_survival_status: last_known_survival_status, 
-              event_free_survival_status: event_free_survival_status, 
-              first_event: first_event,
-              age_at_event_free_survival_status: age_at_event_free_survival_status} ) AS survival_filters, stf, stp
+          with file, p, sample_diagnosis_filter, sm1, sm, st, COLLECT(DISTINCT {last_known_survival_status: su.last_known_survival_status, 
+              event_free_survival_status: su.event_free_survival_status, 
+              first_event: su.first_event,
+              age_at_event_free_survival_status: su.age_at_event_free_survival_status} ) AS survival_filters,
+            COLLECT(DISTINCT{treatment_type: tm.treatment_type,
+            treatment_agent: tm.treatment_agent,
+            age_at_treatment_start: tm.age_at_treatment_start}) as treatment_filters,
+            COLLECT(DISTINCT{response_category: tr.response_category,
+            age_at_response: tr.age_at_response}) as treatment_response_filters, stf, stp 
           RETURN DISTINCT
             file.id as id,
             p.id as pid,
@@ -3214,6 +3601,8 @@ Indices:
             }) AS participant_filters,
             sample_diagnosis_filter AS sample_diagnosis_filters,
             survival_filters as survival_filters,
+            treatment_filters as treatment_filters,
+            treatment_response_filters as treatment_response_filters,
             CASE LABELS(file)[0] WHEN 'sequencing_file' THEN file.library_selection
                                         ELSE null END AS library_selection,
             CASE LABELS(file)[0] WHEN 'sequencing_file' THEN file.library_source_material
@@ -3277,6 +3666,8 @@ Indices:
                 diagnosis: dg.diagnosis
             }) AS sample_diagnosis_filters, 
             null as survival_filters,
+            null as treatment_filters,
+            null as treatment_response_filters,
             CASE LABELS(file)[0]
                       WHEN 'sequencing_file' THEN file.library_selection
                       ELSE null END AS library_selection,
@@ -3382,19 +3773,19 @@ Indices:
           with p, sm1, sm, apoc.coll.union(sample_diagnosis_filter_3, sample_diagnosis_filter_4) as sample_diagnosis_filter
           MATCH (st:study)<--(p)<--(sm1)<-[*2..2]-(sm)
           OPTIONAL MATCH (p)<-[:of_survival]-(su:survival)
+          OPTIONAL MATCH (p)<-[:of_treatment]-(tm:treatment)
+          OPTIONAL MATCH (p)<-[:of_treatment_response]-(tr:treatment_response)
           OPTIONAL MATCH (st)<-[:of_study_personnel]-(stp:study_personnel)
           OPTIONAL MATCH (st)<-[:of_study_funding]-(stf:study_funding)
-          with distinct p, sm, st, sample_diagnosis_filter, COLLECT(DISTINCT CASE 
-                WHEN su.last_known_survival_status = 'Dead' THEN 'Dead'
-                ELSE su.last_known_survival_status
-            END) AS last_known_survival_status,
-            COLLECT(DISTINCT su.event_free_survival_status) as event_free_survival_status,
-            COLLECT(DISTINCT su.first_event) as first_event,
-            COLLECT(DISTINCT su.age_at_event_free_survival_status) as age_at_event_free_survival_status, stf, stp
-          with distinct p, sm, st, sample_diagnosis_filter, COLLECT(DISTINCT {last_known_survival_status: last_known_survival_status, 
-              event_free_survival_status: event_free_survival_status, 
-              first_event: first_event,
-              age_at_event_free_survival_status: age_at_event_free_survival_status} ) AS survival_filters, stf, stp
+          with distinct p, sm, st, sample_diagnosis_filter, COLLECT(DISTINCT {last_known_survival_status: su.last_known_survival_status, 
+              event_free_survival_status: su.event_free_survival_status, 
+              first_event: su.first_event,
+              age_at_event_free_survival_status: su.age_at_event_free_survival_status} ) AS survival_filters,
+            COLLECT(DISTINCT{treatment_type: tm.treatment_type,
+            treatment_agent: tm.treatment_agent,
+            age_at_treatment_start: tm.age_at_treatment_start}) as treatment_filters,
+            COLLECT(DISTINCT{response_category: tr.response_category,
+            age_at_response: tr.age_at_response}) as treatment_response_filters, stf, stp
           RETURN DISTINCT
             null as id,
             p.id as pid,
@@ -3419,6 +3810,8 @@ Indices:
                 sex_at_birth: p.sex_at_birth
             }) AS participant_filters,
             survival_filters as survival_filters,
+            treatment_filters as treatment_filters,
+            treatment_response_filters as treatment_response_filters,
             null AS library_selection,
             null AS library_source_material,
             null AS library_source_molecule,
@@ -3428,19 +3821,19 @@ Indices:
           where not ((sm)<-[*..3]-(:sequencing_file)) and not ((sm)<-[*..3]-(:cytogenomic_file)) and not ((sm)<-[*..3]-(:pathology_file)) and not ((sm)<-[*..3]-(:methylation_array_file)) and not ((p)<--(:radiology_file)) and not ((p)<--(:clinical_measure_file))
           OPTIONAL MATCH (p)<-[*..2]-(dg:diagnosis)
           OPTIONAL MATCH (p)<-[:of_survival]-(su:survival)
+          OPTIONAL MATCH (p)<-[:of_treatment]-(tm:treatment)
+          OPTIONAL MATCH (p)<-[:of_treatment_response]-(tr:treatment_response)
           OPTIONAL MATCH (st)<-[:of_study_personnel]-(stp:study_personnel)
           OPTIONAL MATCH (st)<-[:of_study_funding]-(stf:study_funding)
-          with sm, p, st, dg, COLLECT(DISTINCT CASE 
-                WHEN su.last_known_survival_status = 'Dead' THEN 'Dead'
-                ELSE su.last_known_survival_status
-            END) AS last_known_survival_status,
-            COLLECT(DISTINCT su.event_free_survival_status) as event_free_survival_status,
-            COLLECT(DISTINCT su.first_event) as first_event,
-            COLLECT(DISTINCT su.age_at_event_free_survival_status) as age_at_event_free_survival_status, stf, stp
-          with sm, p, st, dg, COLLECT(DISTINCT {last_known_survival_status: last_known_survival_status, 
-              event_free_survival_status: event_free_survival_status, 
-              first_event: first_event,
-              age_at_event_free_survival_status: age_at_event_free_survival_status} ) AS survival_filters, stf, stp
+          with sm, p, st, dg, COLLECT(DISTINCT {last_known_survival_status: su.last_known_survival_status, 
+              event_free_survival_status: su.event_free_survival_status, 
+              first_event: su.first_event,
+              age_at_event_free_survival_status: su.age_at_event_free_survival_status} ) AS survival_filters,
+            COLLECT(DISTINCT{treatment_type: tm.treatment_type,
+            treatment_agent: tm.treatment_agent,
+            age_at_treatment_start: tm.age_at_treatment_start}) as treatment_filters,
+            COLLECT(DISTINCT{response_category: tr.response_category,
+            age_at_response: tr.age_at_response}) as treatment_response_filters, stf, stp
           RETURN DISTINCT
             null as id,
             p.id as pid,
@@ -3479,6 +3872,8 @@ Indices:
                 diagnosis: dg.diagnosis
             }) AS sample_diagnosis_filters,
             survival_filters as survival_filters,
+            treatment_filters as treatment_filters,
+            treatment_response_filters as treatment_response_filters,
             null AS library_selection,
             null AS library_source_material,
             null AS library_source_molecule,
@@ -3511,6 +3906,8 @@ Indices:
             null as combined_filters,
             null AS participant_filters,
             null as survival_filters,
+            null as treatment_filters,
+            null as treatment_response_filters,
             COLLECT(DISTINCT {
                 sample_anatomic_site: apoc.text.split(sm.anatomic_site, ';'),
                 participant_age_at_collection: sm.participant_age_at_collection,
@@ -3534,19 +3931,19 @@ Indices:
           where not ((p)<--(:sample)) and not ((p)<--(:radiology_file)) and not ((p)<--(:clinical_measure_file))
           OPTIONAL MATCH (p)<-[:of_diagnosis]-(dg:diagnosis)
           OPTIONAL MATCH (p)<-[:of_survival]-(su:survival)
+          OPTIONAL MATCH (p)<-[:of_treatment]-(tm:treatment)
+          OPTIONAL MATCH (p)<-[:of_treatment_response]-(tr:treatment_response)
           OPTIONAL MATCH (st)<-[:of_study_personnel]-(stp:study_personnel)
           OPTIONAL MATCH (st)<-[:of_study_funding]-(stf:study_funding)
-          with p, st, dg, COLLECT(DISTINCT CASE 
-                WHEN su.last_known_survival_status = 'Dead' THEN 'Dead'
-                ELSE su.last_known_survival_status
-            END) AS last_known_survival_status,
-            COLLECT(DISTINCT su.event_free_survival_status) as event_free_survival_status,
-            COLLECT(DISTINCT su.first_event) as first_event,
-            COLLECT(DISTINCT su.age_at_event_free_survival_status) as age_at_event_free_survival_status, stf, stp
-          with p, st, dg, COLLECT(DISTINCT {last_known_survival_status: last_known_survival_status, 
-              event_free_survival_status: event_free_survival_status, 
-              first_event: first_event,
-              age_at_event_free_survival_status: age_at_event_free_survival_status} ) AS survival_filters, stf, stp
+          with p, st, dg, COLLECT(DISTINCT {last_known_survival_status: su.last_known_survival_status, 
+              event_free_survival_status: su.event_free_survival_status, 
+              first_event: su.first_event,
+              age_at_event_free_survival_status: su.age_at_event_free_survival_status} ) AS survival_filters,
+            COLLECT(DISTINCT{treatment_type: tm.treatment_type,
+            treatment_agent: tm.treatment_agent,
+            age_at_treatment_start: tm.age_at_treatment_start}) as treatment_filters,
+            COLLECT(DISTINCT{response_category: tr.response_category,
+            age_at_response: tr.age_at_response}) as treatment_response_filters, stf, stp          
           RETURN DISTINCT
             null as id,
             p.id as pid,
@@ -3585,6 +3982,8 @@ Indices:
                 diagnosis_classification: dg.diagnosis_classification
             }) AS sample_diagnosis_filters, 
             survival_filters as survival_filters,
+            treatment_filters as treatment_filters,
+            treatment_response_filters as treatment_response_filters,
             null AS library_selection,
             null AS library_source_material,
             null AS library_source_molecule,

From 36d9cab06b6fa56d51d9fe7984e19b276a17d330 Mon Sep 17 00:00:00 2001
From: shawnwangnih <108429233+shawnwangnih@users.noreply.github.com>
Date: Wed, 16 Oct 2024 13:03:45 -0400
Subject: [PATCH 12/23]  fix treatments and treament_response indices

---
 config/es_indices_ccdi_model.yml | 2 --
 1 file changed, 2 deletions(-)

diff --git a/config/es_indices_ccdi_model.yml b/config/es_indices_ccdi_model.yml
index 25fbf23..39c43a7 100644
--- a/config/es_indices_ccdi_model.yml
+++ b/config/es_indices_ccdi_model.yml
@@ -1645,7 +1645,6 @@ Indices:
               tm.treatment_type as treatment_type,
               tm.treatment_agent as treatment_agent,
               tm.age_at_treatment_start as age_at_treatment_start,
-              treatment_filters as treatment_filters,
               treatment_response_filters as treatment_response_filters,
               sample_diagnosis_file_filter AS sample_diagnosis_file_filters,
               st.study_id as study_id,
@@ -2031,7 +2030,6 @@ Indices:
               tr.response_category as response_category,
               tr.age_at_response as age_at_response,
               treatment_filters as treatment_filters,
-              treatment_response_filters as treatment_response_filters,
               sample_diagnosis_file_filter AS sample_diagnosis_file_filters,
               st.study_id as study_id,
               st.dbgap_accession as dbgap_accession,

From 4726022cb767804b141ff02bcd45a1c4809fa99a Mon Sep 17 00:00:00 2001
From: shawnwangnih <108429233+shawnwangnih@users.noreply.github.com>
Date: Wed, 16 Oct 2024 14:18:13 -0400
Subject: [PATCH 13/23]  fix treatments and treament_response indices

---
 config/es_indices_ccdi_model.yml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/config/es_indices_ccdi_model.yml b/config/es_indices_ccdi_model.yml
index 39c43a7..e61086f 100644
--- a/config/es_indices_ccdi_model.yml
+++ b/config/es_indices_ccdi_model.yml
@@ -2272,9 +2272,9 @@ Indices:
           OPTIONAL MATCH (p)<-[*..4]-(file)
           WHERE (file:clinical_measure_file OR file: sequencing_file OR file:pathology_file OR file:radiology_file OR file:methylation_array_file OR file:cytogenomic_file)
           OPTIONAL MATCH (p)<-[:of_survival]-(su:survival)
-            OPTIONAL MATCH (p)<-[:of_treatment]-(tm:treatment)
-            OPTIONAL MATCH (p)<-[:of_treatment_response]-(tr:treatment_response)
-          with p, cell_line_pdx_file_filters, general_file_filters, participant_clinical_measure_file_filters,participant_radiology_file_filters, dg, file, su. tm, tr
+          OPTIONAL MATCH (p)<-[:of_treatment]-(tm:treatment)
+          OPTIONAL MATCH (p)<-[:of_treatment_response]-(tr:treatment_response)
+          with p, cell_line_pdx_file_filters, general_file_filters, participant_clinical_measure_file_filters,participant_radiology_file_filters, dg, file, su, tm, tr
           OPTIONAL MATCH (st:study)<-[:of_participant]-(p)
           OPTIONAL MATCH (st)<-[:of_study_personnel]-(stp:study_personnel)
           OPTIONAL MATCH (st)<-[:of_study_funding]-(stf:study_funding)
@@ -3242,7 +3242,7 @@ Indices:
           OPTIONAL MATCH (p)<-[:of_treatment_response]-(tr:treatment_response)
           OPTIONAL MATCH (st)<-[:of_study_personnel]-(stp:study_personnel)
           OPTIONAL MATCH (st)<-[:of_study_funding]-(stf:study_funding)
-          with file, sample_diagnosis_filter,COLLECT(DISTINCT {last_known_survival_status: su.last_known_survival_status, 
+          with file, p, st, sample_diagnosis_filter,COLLECT(DISTINCT {last_known_survival_status: su.last_known_survival_status, 
               event_free_survival_status: su.event_free_survival_status, 
               first_event: su.first_event,
               age_at_event_free_survival_status: su.age_at_event_free_survival_status} ) AS survival_filters,

From 383c4bdca1897f5648d04b515d4b09902ffc85e8 Mon Sep 17 00:00:00 2001
From: shawnwangnih <108429233+shawnwangnih@users.noreply.github.com>
Date: Wed, 16 Oct 2024 14:27:59 -0400
Subject: [PATCH 14/23]  fix treatments and treament_response indices

---
 config/es_indices_ccdi_model.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/config/es_indices_ccdi_model.yml b/config/es_indices_ccdi_model.yml
index e61086f..83b6eba 100644
--- a/config/es_indices_ccdi_model.yml
+++ b/config/es_indices_ccdi_model.yml
@@ -3384,6 +3384,7 @@ Indices:
             age_at_treatment_start: tm.age_at_treatment_start}) as treatment_filters,
             COLLECT(DISTINCT{response_category: tr.response_category,
             age_at_response: tr.age_at_response}) as treatment_response_filters
+          with file, p, apoc.coll.union(sample_diagnosis_filter_5, sample_diagnosis_filter_6) as sample_diagnosis_filter, survival_filters,  treatment_filters, treatment_response_filters
           with file, collect(DISTINCT {
                 participant_id: p.participant_id,
                 race: apoc.text.split(p.race, ';'),

From 1214adf9e2ad33c56fc257aaf4b11a451bbc447e Mon Sep 17 00:00:00 2001
From: shawnwangnih <108429233+shawnwangnih@users.noreply.github.com>
Date: Wed, 16 Oct 2024 14:40:55 -0400
Subject: [PATCH 15/23]  fix treatments and treament_response indices

---
 config/es_indices_ccdi_model.yml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/config/es_indices_ccdi_model.yml b/config/es_indices_ccdi_model.yml
index 83b6eba..da6bc41 100644
--- a/config/es_indices_ccdi_model.yml
+++ b/config/es_indices_ccdi_model.yml
@@ -3416,8 +3416,7 @@ Indices:
                 participant_id: null,
                 race: null,
                 sex_at_birth: null,
-                sample_diagnosis_filters: sample_diagnosis_filter,
-                survival_filters: survival_filters
+                sample_diagnosis_filters: sample_diagnosis_filter 
           }) as combined_filter_2
           with file, apoc.coll.union(combined_filter_1, combined_filter_2) as combined_filter
           MATCH (st:study)<-[:of_clinical_measure_file]-(file)

From 2769d28483f5710e5863c783192005aa7890c91f Mon Sep 17 00:00:00 2001
From: shawnwangnih <108429233+shawnwangnih@users.noreply.github.com>
Date: Wed, 16 Oct 2024 16:21:30 -0400
Subject: [PATCH 16/23]  fix treatments and treament_response indices

---
 config/es_indices_ccdi_model.yml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/config/es_indices_ccdi_model.yml b/config/es_indices_ccdi_model.yml
index da6bc41..7708437 100644
--- a/config/es_indices_ccdi_model.yml
+++ b/config/es_indices_ccdi_model.yml
@@ -413,7 +413,7 @@ Indices:
                 last_known_survival_status: null,
                 age_at_event_free_survival_status: null,
                 event_free_survival_status: null,
-                first_event: null,
+                first_event: null
             }) AS survival_filters,
             COLLECT(DISTINCT{treatment_type: null,
             treatment_agent: null,
@@ -444,7 +444,7 @@ Indices:
                 null as last_known_survival_status,
                 null as age_at_event_free_survival_status,
                 null as event_free_survival_status,
-                null as first_event,
+                null as first_event
             }) AS survival_filters,
             COLLECT(DISTINCT{null as treatment_type,
             tnull as treatment_agent,
@@ -2572,7 +2572,7 @@ Indices:
                 last_known_survival_status: null,
                 age_at_event_free_survival_status: null,
                 event_free_survival_status: null,
-                first_event: null,
+                first_event: null
             }) AS survival_filters,
             COLLECT(DISTINCT{treatment_type: null,
             treatment_agent: null,
@@ -2944,7 +2944,7 @@ Indices:
                 diagnosis: dg.diagnosis
             }) AS diagnosis_filters,
             null AS survival_filters,
-            null as treatment_filters.
+            null as treatment_filters,
             null as treatment_response_filters,
             CASE COLLECT(file) WHEN [] THEN []
                       ELSE COLLECT(DISTINCT {

From 8a98aef766fa04d57ba718ba136523d42fe05dbd Mon Sep 17 00:00:00 2001
From: shawnwangnih <108429233+shawnwangnih@users.noreply.github.com>
Date: Wed, 16 Oct 2024 16:25:38 -0400
Subject: [PATCH 17/23]  fix treatments and treament_response indices

---
 config/es_indices_ccdi_model.yml | 16 ++--------------
 1 file changed, 2 insertions(+), 14 deletions(-)

diff --git a/config/es_indices_ccdi_model.yml b/config/es_indices_ccdi_model.yml
index 7708437..1bc02c0 100644
--- a/config/es_indices_ccdi_model.yml
+++ b/config/es_indices_ccdi_model.yml
@@ -3048,18 +3048,6 @@ Indices:
                 type: keyword   
               diagnosis:
                 type: keyword
-          survival_filters:
-            type: nested
-            properties:
-              last_known_survival_status:
-                type: keyword
-              age_at_event_free_survival_status:
-                type: integer
-              event_free_survival_status:
-                type: keyword
-              first_event:
-                type: keyword
-
       participant_filters:
         type: nested
         properties:
@@ -3677,7 +3665,7 @@ Indices:
                       WHEN 'sequencing_file' THEN file.library_strategy
                       ELSE null END AS library_strategy
       - query: |
-          MATCH (st:study)<--(p:participant)<--(sm1:sample)<-[*2..2]-(sm:sample)
+         MATCH (st:study)<--(p:participant)<--(sm1:sample)<-[*2..2]-(sm:sample)
           where not ((sm)<--(:sequencing_file)) and not ((sm)<--(:cytogenomic_file)) and not ((sm)<--(:pathology_file)) and not ((sm)<--(:methylation_array_file)) and not ((p)<--(:radiology_file)) and not ((p)<--(:clinical_measure_file))
           OPTIONAL MATCH (p)<-[:of_diagnosis]-(dg:diagnosis)
           with p, sm1, sm, apoc.coll.union(COLLECT(DISTINCT {
@@ -3846,7 +3834,7 @@ Indices:
             st.study_id AS study_id,
             st.dbgap_accession as dbgap_accession,
             st.study_acronym as study_acronym,
-            st.study_name as study_name,
+            st.study_name as study_name, 
             p.participant_id AS participant_id,
             sm.sample_id AS sample_id,
             null as files,

From b3eec61fceac2337c2f3b5490e35f2563e1d54f6 Mon Sep 17 00:00:00 2001
From: shawnwangnih <108429233+shawnwangnih@users.noreply.github.com>
Date: Wed, 16 Oct 2024 17:47:27 -0400
Subject: [PATCH 18/23]  fix treatments and treament_response indices

---
 config/es_indices_ccdi_model.yml | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/config/es_indices_ccdi_model.yml b/config/es_indices_ccdi_model.yml
index 1bc02c0..434a9a5 100644
--- a/config/es_indices_ccdi_model.yml
+++ b/config/es_indices_ccdi_model.yml
@@ -3021,6 +3021,33 @@ Indices:
             type: keyword
           sex_at_birth:
             type: keyword
+          survival_filters:
+            type: nested
+            properties:
+              last_known_survival_status:
+                type: keyword
+              age_at_event_free_survival_status:
+                type: integer
+              event_free_survival_status:
+                type: keyword
+              first_event:
+                type: keyword
+          treatment_filters:
+            type: nested
+            properties:
+              treatment_type:
+                type: keyword
+              treatment_agent:
+                type: keyword
+              age_at_treatment_start:
+                type: integer
+          treatment_response_filters:
+            type: nested
+            properties:
+              response_category:
+                type: keyword
+              age_at_response:
+                type: integer
           sample_diagnosis_filters:
             type: nested
             properties:
@@ -3269,6 +3296,8 @@ Indices:
             }) AS participant_filters,
             sample_diagnosis_filter AS sample_diagnosis_filters,
             survival_filters as survival_filters,  
+            treatment_filters as treatment_filters,
+            treatment_response_filters as treatment_response_filters,
             null AS library_selection,
             null AS library_source_material,
             null AS library_source_molecule,
@@ -3434,6 +3463,8 @@ Indices:
             null as participant_filters,
             null as sample_diagnosis_filters,
             null as survival_filters,
+            null as treatment_filters,
+            null as treatment_response_filters,
             null AS library_selection,
             null AS library_source_material,
             null AS library_source_molecule,

From 8a0271a7d10cdda8b3d99c3cf7acc795d27221bd Mon Sep 17 00:00:00 2001
From: shawnwangnih <108429233+shawnwangnih@users.noreply.github.com>
Date: Thu, 17 Oct 2024 10:52:36 -0400
Subject: [PATCH 19/23]  fix study_participant indice

---
 config/es_indices_ccdi_model.yml | 124 ++++++++++++++++---------------
 1 file changed, 65 insertions(+), 59 deletions(-)

diff --git a/config/es_indices_ccdi_model.yml b/config/es_indices_ccdi_model.yml
index 434a9a5..ff5ad9d 100644
--- a/config/es_indices_ccdi_model.yml
+++ b/config/es_indices_ccdi_model.yml
@@ -427,69 +427,75 @@ Indices:
       - query: |
           MATCH (st:study)
           MATCH (st)<-[:of_cell_line|of_pdx]-(cl)<--(sm:sample)
-          Where (cl: cell_line or cl: pdx)
-          optional Match (sm)<--(dg:diagnosis)
-          optional Match (sm)<--(file)
-          WHERE (file: sequencing_file OR file:pathology_file OR file:methylation_array_file OR file:cytogenomic_file)
+          WHERE (cl:cell_line OR cl:pdx)
+          OPTIONAL MATCH (sm)<--(dg:diagnosis)
+          OPTIONAL MATCH (sm)<--(file)
+          WHERE (file:sequencing_file OR file:pathology_file OR file:methylation_array_file OR file:cytogenomic_file)
           OPTIONAL MATCH (st)<-[:of_study_personnel]-(stp:study_personnel)
           OPTIONAL MATCH (st)<-[:of_study_funding]-(stf:study_funding)
           WITH file, dg, sm, st, stf, stp
           RETURN DISTINCT
-            null as id,
-            null as pid,
-            null as participant_id,
-            null as race,
-            null as sex_at_birth,
-            COLLECT(DISTINCT {
-                null as last_known_survival_status,
-                null as age_at_event_free_survival_status,
-                null as event_free_survival_status,
-                null as first_event
-            }) AS survival_filters,
-            COLLECT(DISTINCT{null as treatment_type,
-            tnull as treatment_agent,
-            null as age_at_treatment_start}) as treatment_filters,
-            COLLECT(DISTINCT{null as response_category,
-            null as age_at_response}) as treatment_response_filters  
-            COLLECT(DISTINCT {
-                sample_anatomic_site: apoc.text.split(sm.anatomic_site, ';'),
-                participant_age_at_collection: sm.participant_age_at_collection,
-                sample_tumor_status: sm.sample_tumor_status,
-                tumor_classification: sm.tumor_classification,
-                age_at_diagnosis: dg.age_at_diagnosis,
-                diagnosis_anatomic_site: apoc.text.split(dg.anatomic_site, ';'),
-                disease_phase: dg.disease_phase,
-                diagnosis_classification_system: dg.diagnosis_classification_system,
-                diagnosis_basis: dg.diagnosis_basis, 
-                tumor_grade_source: dg.tumor_grade_source,
-                tumor_stage_source: dg.tumor_stage_source,          
-                diagnosis: dg.diagnosis,
-                assay_method: CASE LABELS(file)[0]
-                                WHEN 'sequencing_file' THEN 'Sequencing'
-                                WHEN 'cytogenomic_file' THEN 'Cytogenomic'
-                                WHEN 'pathology_file' THEN 'Pathology imaging'
-                                WHEN 'methylation_array_file' THEN 'Methylation array'
-                                ELSE null END,
-                file_type: CASE LABELS(file)[0]
-                          When null then null
-                          ELSE file.file_type end,
-                library_selection: CASE LABELS(file)[0]
-                          WHEN 'sequencing_file' THEN file.library_selection
-                          ELSE null END,
-                library_source_material: CASE LABELS(file)[0]
-                          WHEN 'sequencing_file' THEN file.library_source_material
-                          ELSE null END,
-                library_source_molecule: CASE LABELS(file)[0]
-                          WHEN 'sequencing_file' THEN file.library_source_molecule
-                          ELSE null END,
-                library_strategy: CASE LABELS(file)[0]
-                          WHEN 'sequencing_file' THEN file.library_strategy
-                          ELSE null END
-            }) AS sample_diagnosis_file_filters,
-            st.study_id as study_id,
-            st.dbgap_accession as dbgap_accession,
-            st.study_acronym as study_acronym,
-            st.study_name as study_name
+          null AS id,
+          null AS pid,
+          null AS participant_id,
+          null AS race,
+          null AS sex_at_birth,
+          COLLECT(DISTINCT {
+          last_known_survival_status: null,
+          age_at_event_free_survival_status: null,
+          event_free_survival_status: null,
+              first_event: null
+          }) AS survival_filters,
+          COLLECT(DISTINCT {
+          treatment_type: null,
+          treatment_agent: null,
+          age_at_treatment_start: null
+          }) AS treatment_filters,
+          COLLECT(DISTINCT {
+          response_category: null,
+          age_at_response: null
+          }) AS treatment_response_filters,
+          COLLECT(DISTINCT {
+          sample_anatomic_site: apoc.text.split(sm.anatomic_site, ';'),
+          participant_age_at_collection: sm.participant_age_at_collection,
+          sample_tumor_status: sm.sample_tumor_status,
+          tumor_classification: sm.tumor_classification,
+          age_at_diagnosis: dg.age_at_diagnosis,
+          diagnosis_anatomic_site: apoc.text.split(dg.anatomic_site, ';'),
+          disease_phase: dg.disease_phase,
+          diagnosis_classification_system: dg.diagnosis_classification_system,
+          diagnosis_basis: dg.diagnosis_basis,
+          tumor_grade_source: dg.tumor_grade_source,
+          tumor_stage_source: dg.tumor_stage_source,
+          diagnosis: dg.diagnosis,
+          assay_method: CASE LABELS(file)[0]
+              WHEN 'sequencing_file' THEN 'Sequencing'
+              WHEN 'cytogenomic_file' THEN 'Cytogenomic'
+              WHEN 'pathology_file' THEN 'Pathology imaging'
+              WHEN 'methylation_array_file' THEN 'Methylation array'
+              ELSE null END,
+          file_type: CASE LABELS(file)[0]
+              WHEN null THEN null
+              ELSE file.file_type END,
+          library_selection: CASE LABELS(file)[0]
+              WHEN 'sequencing_file' THEN file.library_selection
+              ELSE null END,
+          library_source_material: CASE LABELS(file)[0]
+              WHEN 'sequencing_file' THEN file.library_source_material
+              ELSE null END,
+          library_source_molecule: CASE LABELS(file)[0]
+              WHEN 'sequencing_file' THEN file.library_source_molecule
+              ELSE null END,
+          library_strategy: CASE LABELS(file)[0]
+              WHEN 'sequencing_file' THEN file.library_strategy
+              ELSE null END
+          }) AS sample_diagnosis_file_filters,
+          st.study_id AS study_id,
+          st.dbgap_accession AS dbgap_accession,
+          st.study_acronym AS study_acronym,
+          st.study_name AS study_name
+
+
     
 
   - index_name: participants

From 2f72a5aa3267a17db6fe114b494d51bd18c026ef Mon Sep 17 00:00:00 2001
From: shawnwangnih <108429233+shawnwangnih@users.noreply.github.com>
Date: Thu, 17 Oct 2024 12:39:11 -0400
Subject: [PATCH 20/23] add sample_diagnosis_filter back

---
 config/es_indices_ccdi_model.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/config/es_indices_ccdi_model.yml b/config/es_indices_ccdi_model.yml
index ff5ad9d..2b8b82a 100644
--- a/config/es_indices_ccdi_model.yml
+++ b/config/es_indices_ccdi_model.yml
@@ -3832,6 +3832,7 @@ Indices:
                 race: apoc.text.split(p.race, ';'),
                 sex_at_birth: p.sex_at_birth
             }) AS participant_filters,
+            sample_diagnosis_filter as sample_diagnosis_filter,
             survival_filters as survival_filters,
             treatment_filters as treatment_filters,
             treatment_response_filters as treatment_response_filters,

From 7fcea228bfcafaeca743e1a4ed54afd720361f28 Mon Sep 17 00:00:00 2001
From: shawnwangnih <108429233+shawnwangnih@users.noreply.github.com>
Date: Thu, 17 Oct 2024 15:13:39 -0400
Subject: [PATCH 21/23] fix spelling

---
 config/es_indices_ccdi_model.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/config/es_indices_ccdi_model.yml b/config/es_indices_ccdi_model.yml
index 2b8b82a..dde3ecc 100644
--- a/config/es_indices_ccdi_model.yml
+++ b/config/es_indices_ccdi_model.yml
@@ -3832,7 +3832,7 @@ Indices:
                 race: apoc.text.split(p.race, ';'),
                 sex_at_birth: p.sex_at_birth
             }) AS participant_filters,
-            sample_diagnosis_filter as sample_diagnosis_filter,
+            sample_diagnosis_filter as sample_diagnosis_filters,
             survival_filters as survival_filters,
             treatment_filters as treatment_filters,
             treatment_response_filters as treatment_response_filters,

From a397ea05940f1a20330095165f7aafb66f4c9f4b Mon Sep 17 00:00:00 2001
From: shawnwangnih <108429233+shawnwangnih@users.noreply.github.com>
Date: Fri, 18 Oct 2024 13:25:57 -0400
Subject: [PATCH 22/23] update id

---
 config/es_indices_ccdi_model.yml | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/config/es_indices_ccdi_model.yml b/config/es_indices_ccdi_model.yml
index dde3ecc..4e5031d 100644
--- a/config/es_indices_ccdi_model.yml
+++ b/config/es_indices_ccdi_model.yml
@@ -1256,7 +1256,8 @@ Indices:
             COLLECT(DISTINCT{response_category: tr.response_category,
             age_at_response: tr.age_at_response}) as treatment_response_filters, file, st, stf, stp            
             RETURN DISTINCT
-              p.id as id,
+              su.id as id,
+              p.id as pid,
               p.participant_id as participant_id,
               apoc.text.split(p.race, ';') as race,
               p.race as race_str,
@@ -1641,7 +1642,7 @@ Indices:
             COLLECT(DISTINCT{response_category: tr.response_category,
             age_at_response: tr.age_at_response}) as treatment_response_filters, file, st, stf, stp            
             RETURN DISTINCT
-              p.id as id,
+              tm.id as id,
               p.participant_id as participant_id,
               apoc.text.split(p.race, ';') as race,
               p.race as race_str,
@@ -2026,7 +2027,7 @@ Indices:
             treatment_agent: tm.treatment_agent,
             age_at_treatment_start: tm.age_at_treatment_start}) as treatment_filters, file, st, stf, stp            
             RETURN DISTINCT
-              p.id as id,
+              tr.id as id,
               p.participant_id as participant_id,
               apoc.text.split(p.race, ';') as race,
               p.race as race_str,

From 934065e357b00a6070c699040d6df8a3b42698c3 Mon Sep 17 00:00:00 2001
From: shawnwangnih <108429233+shawnwangnih@users.noreply.github.com>
Date: Fri, 18 Oct 2024 16:53:36 -0400
Subject: [PATCH 23/23] fix age_at_last_known_survival_status

---
 config/es_indices_ccdi_model.yml | 54 ++++++++++++++++----------------
 1 file changed, 27 insertions(+), 27 deletions(-)

diff --git a/config/es_indices_ccdi_model.yml b/config/es_indices_ccdi_model.yml
index 4e5031d..c5f474f 100644
--- a/config/es_indices_ccdi_model.yml
+++ b/config/es_indices_ccdi_model.yml
@@ -18,7 +18,7 @@ Indices:
         properties:
           last_known_survival_status:
             type: keyword
-          age_at_event_free_survival_status:
+          age_at_last_known_survival_status:
             type: integer
           event_free_survival_status:
             type: keyword
@@ -357,7 +357,7 @@ Indices:
             COLLECT(DISTINCT {last_known_survival_status: su.last_known_survival_status, 
               event_free_survival_status: su.event_free_survival_status, 
               first_event: su.first_event,
-              age_at_event_free_survival_status: su.age_at_event_free_survival_status} ) AS survival_filters,
+              age_at_last_known_survival_status: su.age_at_last_known_survival_status} ) AS survival_filters,
             COLLECT(DISTINCT{treatment_type: tm.treatment_type,
             treatment_agent: tm.treatment_agent,
             age_at_treatment_start: tm.age_at_treatment_start}) as treatment_filters,
@@ -411,7 +411,7 @@ Indices:
             }) AS sample_diagnosis_file_filters,
             COLLECT(DISTINCT {
                 last_known_survival_status: null,
-                age_at_event_free_survival_status: null,
+                age_at_last_known_survival_status: null,
                 event_free_survival_status: null,
                 first_event: null
             }) AS survival_filters,
@@ -442,7 +442,7 @@ Indices:
           null AS sex_at_birth,
           COLLECT(DISTINCT {
           last_known_survival_status: null,
-          age_at_event_free_survival_status: null,
+          age_at_last_known_survival_status: null,
           event_free_survival_status: null,
               first_event: null
           }) AS survival_filters,
@@ -519,7 +519,7 @@ Indices:
         properties:
           last_known_survival_status:
             type: keyword
-          age_at_event_free_survival_status:
+          age_at_last_known_survival_status:
             type: integer
           event_free_survival_status:
             type: keyword
@@ -867,7 +867,7 @@ Indices:
             COLLECT(DISTINCT {last_known_survival_status: su.last_known_survival_status, 
               event_free_survival_status: su.event_free_survival_status, 
               first_event: su.first_event,
-              age_at_event_free_survival_status: su.age_at_event_free_survival_status} ) AS survival_filters,
+              age_at_last_known_survival_status: su.age_at_last_known_survival_status} ) AS survival_filters,
             COLLECT(DISTINCT{treatment_type: tm.treatment_type,
             treatment_agent: tm.treatment_agent,
             age_at_treatment_start: tm.age_at_treatment_start}) as treatment_filters,
@@ -905,7 +905,7 @@ Indices:
         type: keyword
       last_known_survival_status:
         type: keyword
-      age_at_event_free_survival_status:
+      age_at_last_known_survival_status:
         type: integer
       event_free_survival_status:
         type: keyword
@@ -1264,7 +1264,7 @@ Indices:
               p.sex_at_birth as sex_at_birth,
               apoc.text.join(Collect(distinct sy.synonym_id), ',') as alternate_participant_id,
               su.last_known_survival_status as last_known_survival_status,
-              su.age_at_event_free_survival_status as age_at_event_free_survival_status,
+              su.age_at_last_known_survival_status as age_at_last_known_survival_status,
               su.event_free_survival_status as event_free_survival_status,
               su.first_event as first_event,
               treatment_filters as treatment_filters,
@@ -1294,7 +1294,7 @@ Indices:
         properties:
           last_known_survival_status:
             type: keyword
-          age_at_event_free_survival_status:
+          age_at_last_known_survival_status:
             type: integer
           event_free_survival_status:
             type: keyword
@@ -1638,7 +1638,7 @@ Indices:
             WITH tm, p, sy, sample_diagnosis_file_filter,	COLLECT(DISTINCT {last_known_survival_status: su.last_known_survival_status, 
               event_free_survival_status: su.event_free_survival_status, 
               first_event: su.first_event,
-              age_at_event_free_survival_status: su.age_at_event_free_survival_status} ) AS survival_filters,
+              age_at_last_known_survival_status: su.age_at_last_known_survival_status} ) AS survival_filters,
             COLLECT(DISTINCT{response_category: tr.response_category,
             age_at_response: tr.age_at_response}) as treatment_response_filters, file, st, stf, stp            
             RETURN DISTINCT
@@ -1678,7 +1678,7 @@ Indices:
         properties:
           last_known_survival_status:
             type: keyword
-          age_at_event_free_survival_status:
+          age_at_last_known_survival_status:
             type: integer
           event_free_survival_status:
             type: keyword
@@ -2022,7 +2022,7 @@ Indices:
             WITH tr, p, sy, sample_diagnosis_file_filter,	COLLECT(DISTINCT {last_known_survival_status: su.last_known_survival_status, 
               event_free_survival_status: su.event_free_survival_status, 
               first_event: su.first_event,
-              age_at_event_free_survival_status: su.age_at_event_free_survival_status} ) AS survival_filters,
+              age_at_last_known_survival_status: su.age_at_last_known_survival_status} ) AS survival_filters,
               COLLECT(DISTINCT{treatment_type: tm.treatment_type,
             treatment_agent: tm.treatment_agent,
             age_at_treatment_start: tm.age_at_treatment_start}) as treatment_filters, file, st, stf, stp            
@@ -2095,7 +2095,7 @@ Indices:
         properties:
           last_known_survival_status:
             type: keyword
-          age_at_event_free_survival_status:
+          age_at_last_known_survival_status:
             type: integer
           event_free_survival_status:
             type: keyword
@@ -2288,7 +2288,7 @@ Indices:
           WITH p, cell_line_pdx_file_filters, general_file_filters, participant_clinical_measure_file_filters,participant_radiology_file_filters, file, COLLECT(DISTINCT {last_known_survival_status: su.last_known_survival_status, 
               event_free_survival_status: su.event_free_survival_status, 
               first_event: su.first_event,
-              age_at_event_free_survival_status: su.age_at_event_free_survival_status} ) AS survival_filters, 
+              age_at_last_known_survival_status: su.age_at_last_known_survival_status} ) AS survival_filters, 
           COLLECT(DISTINCT{treatment_type: tm.treatment_type,
             treatment_agent: tm.treatment_agent,
             age_at_treatment_start: tm.age_at_treatment_start}) as treatment_filters,
@@ -2395,7 +2395,7 @@ Indices:
           WITH dg, p, sm, sample_file_filter, file, COLLECT(DISTINCT {last_known_survival_status: su.last_known_survival_status, 
               event_free_survival_status: su.event_free_survival_status, 
               first_event: su.first_event,
-              age_at_event_free_survival_status: su.age_at_event_free_survival_status} ) AS survival_filters, 
+              age_at_last_known_survival_status: su.age_at_last_known_survival_status} ) AS survival_filters, 
             COLLECT(DISTINCT{treatment_type: tm.treatment_type,
             treatment_agent: tm.treatment_agent,
             age_at_treatment_start: tm.age_at_treatment_start}) as treatment_filters,
@@ -2482,7 +2482,7 @@ Indices:
           WITH dg, p, sid, sample_id, sample_file_filter, files, COLLECT(DISTINCT {last_known_survival_status: su.last_known_survival_status, 
               event_free_survival_status: su.event_free_survival_status, 
               first_event: su.first_event,
-              age_at_event_free_survival_status: su.age_at_event_free_survival_status} ) AS survival_filters,
+              age_at_last_known_survival_status: su.age_at_last_known_survival_status} ) AS survival_filters,
             COLLECT(DISTINCT{treatment_type: tm.treatment_type,
             treatment_agent: tm.treatment_agent,
             age_at_treatment_start: tm.age_at_treatment_start}) as treatment_filters,
@@ -2577,7 +2577,7 @@ Indices:
             st.study_name as study_name,
             COLLECT(DISTINCT {
                 last_known_survival_status: null,
-                age_at_event_free_survival_status: null,
+                age_at_last_known_survival_status: null,
                 event_free_survival_status: null,
                 first_event: null
             }) AS survival_filters,
@@ -2759,7 +2759,7 @@ Indices:
         properties:
           last_known_survival_status:
             type: keyword
-          age_at_event_free_survival_status:
+          age_at_last_known_survival_status:
             type: integer
           event_free_survival_status:
             type: keyword
@@ -2895,7 +2895,7 @@ Indices:
           WITH sm, opensearch_data, COLLECT(DISTINCT {last_known_survival_status: su.last_known_survival_status, 
               event_free_survival_status: su.event_free_survival_status, 
               first_event: su.first_event,
-              age_at_event_free_survival_status: su.age_at_event_free_survival_status} ) AS survival_filters
+              age_at_last_known_survival_status: su.age_at_last_known_survival_status} ) AS survival_filters
           WITH sm, apoc.map.merge(opensearch_data, {
             survival_filters: survival_filters
           }) AS opensearch_data
@@ -3033,7 +3033,7 @@ Indices:
             properties:
               last_known_survival_status:
                 type: keyword
-              age_at_event_free_survival_status:
+              age_at_last_known_survival_status:
                 type: integer
               event_free_survival_status:
                 type: keyword
@@ -3121,7 +3121,7 @@ Indices:
         properties:
           last_known_survival_status:
             type: keyword
-          age_at_event_free_survival_status:
+          age_at_last_known_survival_status:
             type: integer
           event_free_survival_status:
             type: keyword
@@ -3267,7 +3267,7 @@ Indices:
           with file, p, st, sample_diagnosis_filter,COLLECT(DISTINCT {last_known_survival_status: su.last_known_survival_status, 
               event_free_survival_status: su.event_free_survival_status, 
               first_event: su.first_event,
-              age_at_event_free_survival_status: su.age_at_event_free_survival_status} ) AS survival_filters,
+              age_at_last_known_survival_status: su.age_at_last_known_survival_status} ) AS survival_filters,
             COLLECT(DISTINCT{treatment_type: tm.treatment_type,
             treatment_agent: tm.treatment_agent,
             age_at_treatment_start: tm.age_at_treatment_start}) as treatment_filters,
@@ -3402,7 +3402,7 @@ Indices:
                                     })) AS sample_diagnosis_filter_6, COLLECT(DISTINCT {last_known_survival_status: su.last_known_survival_status, 
               event_free_survival_status: su.event_free_survival_status, 
               first_event: su.first_event,
-              age_at_event_free_survival_status: su.age_at_event_free_survival_status} ) AS survival_filters,
+              age_at_last_known_survival_status: su.age_at_last_known_survival_status} ) AS survival_filters,
             COLLECT(DISTINCT{treatment_type: tm.treatment_type,
             treatment_agent: tm.treatment_agent,
             age_at_treatment_start: tm.age_at_treatment_start}) as treatment_filters,
@@ -3585,7 +3585,7 @@ Indices:
           with file, p, sample_diagnosis_filter, sm1, sm, st, COLLECT(DISTINCT {last_known_survival_status: su.last_known_survival_status, 
               event_free_survival_status: su.event_free_survival_status, 
               first_event: su.first_event,
-              age_at_event_free_survival_status: su.age_at_event_free_survival_status} ) AS survival_filters,
+              age_at_last_known_survival_status: su.age_at_last_known_survival_status} ) AS survival_filters,
             COLLECT(DISTINCT{treatment_type: tm.treatment_type,
             treatment_agent: tm.treatment_agent,
             age_at_treatment_start: tm.age_at_treatment_start}) as treatment_filters,
@@ -3804,7 +3804,7 @@ Indices:
           with distinct p, sm, st, sample_diagnosis_filter, COLLECT(DISTINCT {last_known_survival_status: su.last_known_survival_status, 
               event_free_survival_status: su.event_free_survival_status, 
               first_event: su.first_event,
-              age_at_event_free_survival_status: su.age_at_event_free_survival_status} ) AS survival_filters,
+              age_at_last_known_survival_status: su.age_at_last_known_survival_status} ) AS survival_filters,
             COLLECT(DISTINCT{treatment_type: tm.treatment_type,
             treatment_agent: tm.treatment_agent,
             age_at_treatment_start: tm.age_at_treatment_start}) as treatment_filters,
@@ -3853,7 +3853,7 @@ Indices:
           with sm, p, st, dg, COLLECT(DISTINCT {last_known_survival_status: su.last_known_survival_status, 
               event_free_survival_status: su.event_free_survival_status, 
               first_event: su.first_event,
-              age_at_event_free_survival_status: su.age_at_event_free_survival_status} ) AS survival_filters,
+              age_at_last_known_survival_status: su.age_at_last_known_survival_status} ) AS survival_filters,
             COLLECT(DISTINCT{treatment_type: tm.treatment_type,
             treatment_agent: tm.treatment_agent,
             age_at_treatment_start: tm.age_at_treatment_start}) as treatment_filters,
@@ -3963,7 +3963,7 @@ Indices:
           with p, st, dg, COLLECT(DISTINCT {last_known_survival_status: su.last_known_survival_status, 
               event_free_survival_status: su.event_free_survival_status, 
               first_event: su.first_event,
-              age_at_event_free_survival_status: su.age_at_event_free_survival_status} ) AS survival_filters,
+              age_at_last_known_survival_status: su.age_at_last_known_survival_status} ) AS survival_filters,
             COLLECT(DISTINCT{treatment_type: tm.treatment_type,
             treatment_agent: tm.treatment_agent,
             age_at_treatment_start: tm.age_at_treatment_start}) as treatment_filters,