From c788215ae7de720a08704e9a19dd5a3e983df7f0 Mon Sep 17 00:00:00 2001 From: Sigfried Gold Date: Fri, 1 Dec 2023 12:22:08 -0500 Subject: [PATCH] Added more abbreviated form of codeset counts to help with n3c comparison. --- backend/db/ddl-10-codeset_counts.jinja.sql | 26 +++++- backend/db/ddl-11-all_csets.jinja.sql | 1 + backend/db/ddl-6-cset_members_items.jinja.sql | 30 +++++-- .../db/ddl-9-members_items_summary.jinja.sql | 7 +- backend/routes/db.py | 88 +++++++++++-------- frontend/src/components/N3CRecommended.jsx | 6 +- 6 files changed, 106 insertions(+), 52 deletions(-) diff --git a/backend/db/ddl-10-codeset_counts.jinja.sql b/backend/db/ddl-10-codeset_counts.jinja.sql index 9248ed1a4..5f55f0323 100644 --- a/backend/db/ddl-10-codeset_counts.jinja.sql +++ b/backend/db/ddl-10-codeset_counts.jinja.sql @@ -2,6 +2,28 @@ DROP TABLE IF EXISTS {{schema}}codeset_counts{{optional_suffix}} CASCADE; CREATE TABLE {{schema}}codeset_counts{{optional_suffix}} AS -SELECT codeset_id, JSON_OBJECT_AGG(grp, cnt) AS counts FROM {{schema}}members_items_summary GROUP BY codeset_id; +WITH m1 AS ( + SELECT m1.codeset_id, json_object_agg(m1.grp, m1.cnt) AS counts + FROM {{schema}}members_items_summary m1 + GROUP BY codeset_id +), m2 AS ( + SELECT codeset_id, json_object_agg(flags, cnt) AS flag_cnts + FROM ( + SELECT codeset_id, flags, cnt + FROM {{schema}}members_items_summary + WHERE length(flags) > 0 + /* do we care about the items with no flags? + UNION + SELECT codeset_id, 'No flags' AS flags, SUM(cnt) AS cnt + FROM {{schema}}members_items_summary + WHERE grp LIKE '%no flags' + GROUP BY codeset_id + */ + ) nf + GROUP BY codeset_id +) +SELECT m1.*, m2.flag_cnts +FROM m1 +LEFT JOIN m2 ON m1.codeset_id = m2.codeset_id; -CREATE INDEX csc1{{optional_index_suffix}} ON {{schema}}codeset_counts{{optional_suffix}}(codeset_id); +CREATE INDEX csc1{{optional_index_suffix}} ON {{schema}}codeset_counts{{optional_suffix}}(codeset_id); \ No newline at end of file diff --git a/backend/db/ddl-11-all_csets.jinja.sql b/backend/db/ddl-11-all_csets.jinja.sql index 7ea79df3d..b621c36fe 100644 --- a/backend/db/ddl-11-all_csets.jinja.sql +++ b/backend/db/ddl-11-all_csets.jinja.sql @@ -55,6 +55,7 @@ WITH ac AS (SELECT DISTINCT cs.codeset_id, ) SELECT ac.*, cscnt.counts, + cscnt.flag_cnts, CAST(cscnt.counts->>'Members' as int) as concepts, rcon.name AS container_creator, rver.name AS codeset_creator diff --git a/backend/db/ddl-6-cset_members_items.jinja.sql b/backend/db/ddl-6-cset_members_items.jinja.sql index ada23f883..d2d8f4715 100644 --- a/backend/db/ddl-6-cset_members_items.jinja.sql +++ b/backend/db/ddl-6-cset_members_items.jinja.sql @@ -7,6 +7,7 @@ SELECT COALESCE(csm.concept_id, item.concept_id) AS concept_id, csm.codeset_id IS NOT NULL AS csm, item.codeset_id IS NOT NULL AS item, + item.flags, array_to_string(array_remove(ARRAY[ CASE WHEN item."isExcluded" THEN 'isExcluded' ELSE NULL END, CASE WHEN item."includeDescendants" THEN 'includeDescendants' ELSE NULL END, @@ -15,15 +16,30 @@ SELECT item."isExcluded", item."includeDescendants", item."includeMapped" -FROM {{schema}}concept_set_members csm -FULL OUTER JOIN {{schema}}concept_set_version_item item -ON csm.codeset_id = item.codeset_id - AND csm.concept_id = item.concept_id +FROM {{schema}}concept_set_members{{optional_suffix}} csm, +FULL OUTER JOIN ( + SELECT + codeset_id, + concept_id, + "isExcluded", + "includeDescendants", + "includeMapped", + array_to_string( + ARRAY[ + CASE WHEN bool_or("includeDescendants") THEN 'D' END, + CASE WHEN bool_or("includeMapped") THEN 'M' END, + CASE WHEN bool_or("isExcluded") THEN 'X' END + ]::text[], '' + ) AS flags + FROM {{schema}}concept_set_version_item{{optional_suffix}} + GROUP BY 1,2,3,4,5 +) AS item ON csm.codeset_id = item.codeset_id + AND csm.concept_id = item.concept_id WHERE csm.codeset_id IS NOT NULL OR item.codeset_id IS NOT NULL; -CREATE INDEX csmi_idx1{{optional_index_suffix}} ON {{schema}}cset_members_items{{optional_suffix}}(codeset_id); +CREATE INDEX {{optional_index_suffix}}csmi_idx1 ON {{schema}}cset_members_items{{optional_suffix}}(codeset_id); -CREATE INDEX csmi_idx2{{optional_index_suffix}} ON {{schema}}cset_members_items{{optional_suffix}}(concept_id); +CREATE INDEX {{optional_index_suffix}}csmi_idx2 ON {{schema}}cset_members_items{{optional_suffix}}(concept_id); -CREATE INDEX csmi_idx3{{optional_index_suffix}} ON {{schema}}cset_members_items{{optional_suffix}}(codeset_id, concept_id); \ No newline at end of file +CREATE INDEX {{optional_index_suffix}}csmi_idx3 ON {{schema}}cset_members_items{{optional_suffix}}(codeset_id, concept_id); diff --git a/backend/db/ddl-9-members_items_summary.jinja.sql b/backend/db/ddl-9-members_items_summary.jinja.sql index 5805d2c23..7cd472747 100644 --- a/backend/db/ddl-9-members_items_summary.jinja.sql +++ b/backend/db/ddl-9-members_items_summary.jinja.sql @@ -16,12 +16,13 @@ SELECT CASE WHEN LENGTH(item_flags) > 0 THEN item_flags ELSE 'no flags' END ELSE '' END AS grp, + flags, COUNT(*) AS cnt FROM {{schema}}cset_members_items -GROUP by 1,2 +GROUP by 1,2,3 UNION -SELECT codeset_id, 'Members' AS grp, SUM(CASE WHEN csm THEN 1 ELSE 0 END) AS cnt FROM {{schema}}cset_members_items GROUP by 1,2 +SELECT codeset_id, 'Members' AS grp, NULL, SUM(CASE WHEN csm THEN 1 ELSE 0 END) AS cnt FROM {{schema}}cset_members_items GROUP by 1,2 UNION -SELECT codeset_id, 'Expression items' AS grp, SUM(CASE WHEN item THEN 1 ELSE 0 END) AS cnt FROM {{schema}}cset_members_items GROUP by 1,2; +SELECT codeset_id, 'Expression items' AS grp, NULL, SUM(CASE WHEN item THEN 1 ELSE 0 END) AS cnt FROM {{schema}}cset_members_items GROUP by 1,2; CREATE INDEX mis1{{optional_index_suffix}} ON {{schema}}members_items_summary{{optional_suffix}}(codeset_id); diff --git a/backend/routes/db.py b/backend/routes/db.py index a7386f141..7fa9cfd20 100644 --- a/backend/routes/db.py +++ b/backend/routes/db.py @@ -725,48 +725,62 @@ def n3c_comparison_rpt(): return rpt +def get_comparison_rpt(con, codeset_id_1: int, codeset_id_2: int) -> Dict[str, Union[str, None]]: + cset_1 = get_csets([codeset_id_1])[0] + cset_2 = get_csets([codeset_id_2])[0] + + cset_1_only = sql_query(con, """ + SELECT 'removed ' || concept_id || ' ' || concept_name AS diff FROM ( + SELECT concept_id, concept_name FROM concept_set_members WHERE codeset_id = :codeset_id_1 + EXCEPT + SELECT concept_id, concept_name FROM concept_set_members WHERE codeset_id = :cset_2_codeset_id + ) x + """, {'codeset_id_1': codeset_id_1, 'cset_2_codeset_id': codeset_id_2}) + # orig_only = [dict(r) for r in orig_only] + cset_1_only = [dict(r)['diff'] for r in cset_1_only] + + cset_2_only = sql_query(con, """ + SELECT 'added ' || concept_id || ' ' || concept_name AS diff FROM ( + SELECT concept_id, concept_name FROM concept_set_members WHERE codeset_id = :cset_2_codeset_id + EXCEPT + SELECT concept_id, concept_name FROM concept_set_members WHERE codeset_id = :codeset_id_1 + ) x + """, {'codeset_id_1': codeset_id_1, 'cset_2_codeset_id': codeset_id_2}) + # cset_2_only = [dict(r) for r in cset_2_only] + cset_2_only = [dict(r)['diff'] for r in cset_2_only] + + diffs = cset_1_only + cset_2_only + + flag_cnts_1 = ', flags: ' + ', '.join([f'{k}: {v}' for k, v in cset_1['flag_cnts'].items()]) if cset_1['flag_cnts'] else '' + flag_cnts_2 = ', flags: ' + ', '.join([f'{k}: {v}' for k, v in cset_2['flag_cnts'].items()]) if cset_2['flag_cnts'] else '' + + rpt = { + 'name': cset_1['concept_set_name'], + 'cset_1': f"{cset_1['codeset_id']} v{cset_1['version']}, vocab {cset_1['omop_vocab_version']}; {cset_1['distinct_person_cnt']} pts, {cset_1['concepts']} concepts{flag_cnts_1}", + 'cset_2': f"{cset_2['codeset_id']} v{cset_2['version']}, vocab {cset_2['omop_vocab_version']}; {cset_2['distinct_person_cnt']} pts, {cset_2['concepts']} concepts{flag_cnts_2}", + 'author': cset_1['codeset_creator'], + 'cset_1_codeset_id': codeset_id_1, + # 'cset_1_version': cset_1['version'], + 'cset_2_codeset_id': codeset_id_2, + # 'cset_2_version': cset_2['version'], + # 'cset_1_only': cset_1_only, + # 'cset_2_only': cset_2_only, + 'diffs': diffs, + } + return rpt + + def generate_n3c_comparison_rpt(): with get_db_connection() as con: pairs = sql_query(con, "SELECT orig_codeset_id, new_codeset_id FROM public.codeset_comparison") + i = 1 for pair in pairs: pair = list(dict(pair).values()) - csets = get_csets(pair) - - orig_only = sql_query(con, """ - SELECT 'removed ' || concept_id || ' ' || concept_name AS diff FROM ( - SELECT concept_id, concept_name FROM concept_set_members WHERE codeset_id = :orig_codeset_id - EXCEPT - SELECT concept_id, concept_name FROM concept_set_members WHERE codeset_id = :new_codeset_id - ) x - """, {'orig_codeset_id': pair[0], 'new_codeset_id': pair[1]}) - # orig_only = [dict(r) for r in orig_only] - orig_only = [dict(r)['diff'] for r in orig_only] - - new_only = sql_query(con, """ - SELECT 'added ' || concept_id || ' ' || concept_name AS diff FROM ( - SELECT concept_id, concept_name FROM concept_set_members WHERE codeset_id = :new_codeset_id - EXCEPT - SELECT concept_id, concept_name FROM concept_set_members WHERE codeset_id = :orig_codeset_id - ) x - """, {'orig_codeset_id': pair[0], 'new_codeset_id': pair[1]}) - # new_only = [dict(r) for r in new_only] - new_only = [dict(r)['diff'] for r in new_only] - - diffs = orig_only + new_only - - rpt = { - 'name': csets[0]['concept_set_name'], - 'orig': f"{csets[0]['codeset_id']} v{csets[0]['version']}, vocab {csets[0]['omop_vocab_version']}", - 'new': f"{csets[1]['codeset_id']} v{csets[1]['version']}, vocab {csets[1]['omop_vocab_version']}", - 'author': csets[0]['codeset_creator'], - 'orig_codeset_id': pair[0], - # 'orig_version': csets[0]['version'], - 'new_codeset_id': pair[1], - # 'new_version': csets[1]['version'], - # 'orig_only': orig_only, - # 'new_only': new_only, - 'diffs': diffs, - } + print(f"Processing {str(pair)} {i} of {len(pairs)}") + i += 1 + + rpt = get_comparison_rpt(con, *pair) + run_sql(con, """ UPDATE public.codeset_comparison SET rpt = :rpt diff --git a/frontend/src/components/N3CRecommended.jsx b/frontend/src/components/N3CRecommended.jsx index e45d4f561..f70c86b34 100644 --- a/frontend/src/components/N3CRecommended.jsx +++ b/frontend/src/components/N3CRecommended.jsx @@ -112,11 +112,11 @@ export const N3CComparisonRpt = () => { columns = [ {grow: 4, sortable: true, name: "Name", selector: row => row.name}, {grow: 2, sortable: true, name: "Author", selector: row => row.author}, - {grow: 3, sortable: true, name: "Orig", selector: row => row.orig}, - {grow: 3, sortable: true, name: "New", selector: row => row.new}, + {grow: 3, sortable: true, name: "Orig", selector: row => row.cset_1, wrap: true}, + {grow: 3, sortable: true, name: "New", selector: row => row.cset_2, wrap: true}, {grow: 2, name: "Compare", selector: row => (