Skip to content

Commit

Permalink
Added more abbreviated form of codeset counts to help with n3c
Browse files Browse the repository at this point in the history
comparison.
  • Loading branch information
Sigfried committed Dec 1, 2023
1 parent dda8f27 commit c788215
Show file tree
Hide file tree
Showing 6 changed files with 106 additions and 52 deletions.
26 changes: 24 additions & 2 deletions backend/db/ddl-10-codeset_counts.jinja.sql
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,28 @@
DROP TABLE IF EXISTS {{schema}}codeset_counts{{optional_suffix}} CASCADE;

CREATE TABLE {{schema}}codeset_counts{{optional_suffix}} AS
SELECT codeset_id, JSON_OBJECT_AGG(grp, cnt) AS counts FROM {{schema}}members_items_summary GROUP BY codeset_id;
WITH m1 AS (
SELECT m1.codeset_id, json_object_agg(m1.grp, m1.cnt) AS counts
FROM {{schema}}members_items_summary m1
GROUP BY codeset_id
), m2 AS (
SELECT codeset_id, json_object_agg(flags, cnt) AS flag_cnts
FROM (
SELECT codeset_id, flags, cnt
FROM {{schema}}members_items_summary
WHERE length(flags) > 0
/* do we care about the items with no flags?
UNION
SELECT codeset_id, 'No flags' AS flags, SUM(cnt) AS cnt
FROM {{schema}}members_items_summary
WHERE grp LIKE '%no flags'
GROUP BY codeset_id
*/
) nf
GROUP BY codeset_id
)
SELECT m1.*, m2.flag_cnts
FROM m1
LEFT JOIN m2 ON m1.codeset_id = m2.codeset_id;

CREATE INDEX csc1{{optional_index_suffix}} ON {{schema}}codeset_counts{{optional_suffix}}(codeset_id);
CREATE INDEX csc1{{optional_index_suffix}} ON {{schema}}codeset_counts{{optional_suffix}}(codeset_id);
1 change: 1 addition & 0 deletions backend/db/ddl-11-all_csets.jinja.sql
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ WITH ac AS (SELECT DISTINCT cs.codeset_id,
)
SELECT ac.*,
cscnt.counts,
cscnt.flag_cnts,
CAST(cscnt.counts->>'Members' as int) as concepts,
rcon.name AS container_creator,
rver.name AS codeset_creator
Expand Down
30 changes: 23 additions & 7 deletions backend/db/ddl-6-cset_members_items.jinja.sql
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ SELECT
COALESCE(csm.concept_id, item.concept_id) AS concept_id,
csm.codeset_id IS NOT NULL AS csm,
item.codeset_id IS NOT NULL AS item,
item.flags,
array_to_string(array_remove(ARRAY[
CASE WHEN item."isExcluded" THEN 'isExcluded' ELSE NULL END,
CASE WHEN item."includeDescendants" THEN 'includeDescendants' ELSE NULL END,
Expand All @@ -15,15 +16,30 @@ SELECT
item."isExcluded",
item."includeDescendants",
item."includeMapped"
FROM {{schema}}concept_set_members csm
FULL OUTER JOIN {{schema}}concept_set_version_item item
ON csm.codeset_id = item.codeset_id
AND csm.concept_id = item.concept_id
FROM {{schema}}concept_set_members{{optional_suffix}} csm,
FULL OUTER JOIN (
SELECT
codeset_id,
concept_id,
"isExcluded",
"includeDescendants",
"includeMapped",

This comment has been minimized.

Copy link
@joeflack4

joeflack4 Dec 1, 2023

Member

D M X eh? Seems good, doing this upstream.

array_to_string(
ARRAY[
CASE WHEN bool_or("includeDescendants") THEN 'D' END,
CASE WHEN bool_or("includeMapped") THEN 'M' END,
CASE WHEN bool_or("isExcluded") THEN 'X' END
]::text[], ''
) AS flags
FROM {{schema}}concept_set_version_item{{optional_suffix}}
GROUP BY 1,2,3,4,5
) AS item ON csm.codeset_id = item.codeset_id
AND csm.concept_id = item.concept_id
WHERE csm.codeset_id IS NOT NULL
OR item.codeset_id IS NOT NULL;

CREATE INDEX csmi_idx1{{optional_index_suffix}} ON {{schema}}cset_members_items{{optional_suffix}}(codeset_id);
CREATE INDEX {{optional_index_suffix}}csmi_idx1 ON {{schema}}cset_members_items{{optional_suffix}}(codeset_id);

CREATE INDEX csmi_idx2{{optional_index_suffix}} ON {{schema}}cset_members_items{{optional_suffix}}(concept_id);
CREATE INDEX {{optional_index_suffix}}csmi_idx2 ON {{schema}}cset_members_items{{optional_suffix}}(concept_id);

CREATE INDEX csmi_idx3{{optional_index_suffix}} ON {{schema}}cset_members_items{{optional_suffix}}(codeset_id, concept_id);
CREATE INDEX {{optional_index_suffix}}csmi_idx3 ON {{schema}}cset_members_items{{optional_suffix}}(codeset_id, concept_id);
7 changes: 4 additions & 3 deletions backend/db/ddl-9-members_items_summary.jinja.sql
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,13 @@ SELECT
CASE WHEN LENGTH(item_flags) > 0 THEN item_flags ELSE 'no flags' END
ELSE '' END
AS grp,
flags,
COUNT(*) AS cnt
FROM {{schema}}cset_members_items
GROUP by 1,2
GROUP by 1,2,3
UNION
SELECT codeset_id, 'Members' AS grp, SUM(CASE WHEN csm THEN 1 ELSE 0 END) AS cnt FROM {{schema}}cset_members_items GROUP by 1,2
SELECT codeset_id, 'Members' AS grp, NULL, SUM(CASE WHEN csm THEN 1 ELSE 0 END) AS cnt FROM {{schema}}cset_members_items GROUP by 1,2
UNION
SELECT codeset_id, 'Expression items' AS grp, SUM(CASE WHEN item THEN 1 ELSE 0 END) AS cnt FROM {{schema}}cset_members_items GROUP by 1,2;
SELECT codeset_id, 'Expression items' AS grp, NULL, SUM(CASE WHEN item THEN 1 ELSE 0 END) AS cnt FROM {{schema}}cset_members_items GROUP by 1,2;

CREATE INDEX mis1{{optional_index_suffix}} ON {{schema}}members_items_summary{{optional_suffix}}(codeset_id);
88 changes: 51 additions & 37 deletions backend/routes/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -725,48 +725,62 @@ def n3c_comparison_rpt():
return rpt


def get_comparison_rpt(con, codeset_id_1: int, codeset_id_2: int) -> Dict[str, Union[str, None]]:

This comment has been minimized.

Copy link
@joeflack4

joeflack4 Dec 1, 2023

Member

@Sigfried Curious as to how/where this is used.
Does rpt stand for "report"?

cset_1 = get_csets([codeset_id_1])[0]
cset_2 = get_csets([codeset_id_2])[0]

cset_1_only = sql_query(con, """
SELECT 'removed ' || concept_id || ' ' || concept_name AS diff FROM (
SELECT concept_id, concept_name FROM concept_set_members WHERE codeset_id = :codeset_id_1
EXCEPT
SELECT concept_id, concept_name FROM concept_set_members WHERE codeset_id = :cset_2_codeset_id
) x
""", {'codeset_id_1': codeset_id_1, 'cset_2_codeset_id': codeset_id_2})
# orig_only = [dict(r) for r in orig_only]
cset_1_only = [dict(r)['diff'] for r in cset_1_only]

cset_2_only = sql_query(con, """
SELECT 'added ' || concept_id || ' ' || concept_name AS diff FROM (
SELECT concept_id, concept_name FROM concept_set_members WHERE codeset_id = :cset_2_codeset_id
EXCEPT
SELECT concept_id, concept_name FROM concept_set_members WHERE codeset_id = :codeset_id_1
) x
""", {'codeset_id_1': codeset_id_1, 'cset_2_codeset_id': codeset_id_2})
# cset_2_only = [dict(r) for r in cset_2_only]
cset_2_only = [dict(r)['diff'] for r in cset_2_only]

diffs = cset_1_only + cset_2_only

flag_cnts_1 = ', flags: ' + ', '.join([f'{k}: {v}' for k, v in cset_1['flag_cnts'].items()]) if cset_1['flag_cnts'] else ''
flag_cnts_2 = ', flags: ' + ', '.join([f'{k}: {v}' for k, v in cset_2['flag_cnts'].items()]) if cset_2['flag_cnts'] else ''

rpt = {
'name': cset_1['concept_set_name'],
'cset_1': f"{cset_1['codeset_id']} v{cset_1['version']}, vocab {cset_1['omop_vocab_version']}; {cset_1['distinct_person_cnt']} pts, {cset_1['concepts']} concepts{flag_cnts_1}",
'cset_2': f"{cset_2['codeset_id']} v{cset_2['version']}, vocab {cset_2['omop_vocab_version']}; {cset_2['distinct_person_cnt']} pts, {cset_2['concepts']} concepts{flag_cnts_2}",
'author': cset_1['codeset_creator'],
'cset_1_codeset_id': codeset_id_1,
# 'cset_1_version': cset_1['version'],
'cset_2_codeset_id': codeset_id_2,
# 'cset_2_version': cset_2['version'],
# 'cset_1_only': cset_1_only,
# 'cset_2_only': cset_2_only,
'diffs': diffs,
}
return rpt


def generate_n3c_comparison_rpt():
with get_db_connection() as con:
pairs = sql_query(con, "SELECT orig_codeset_id, new_codeset_id FROM public.codeset_comparison")
i = 1
for pair in pairs:
pair = list(dict(pair).values())
csets = get_csets(pair)

orig_only = sql_query(con, """
SELECT 'removed ' || concept_id || ' ' || concept_name AS diff FROM (
SELECT concept_id, concept_name FROM concept_set_members WHERE codeset_id = :orig_codeset_id
EXCEPT
SELECT concept_id, concept_name FROM concept_set_members WHERE codeset_id = :new_codeset_id
) x
""", {'orig_codeset_id': pair[0], 'new_codeset_id': pair[1]})
# orig_only = [dict(r) for r in orig_only]
orig_only = [dict(r)['diff'] for r in orig_only]

new_only = sql_query(con, """
SELECT 'added ' || concept_id || ' ' || concept_name AS diff FROM (
SELECT concept_id, concept_name FROM concept_set_members WHERE codeset_id = :new_codeset_id
EXCEPT
SELECT concept_id, concept_name FROM concept_set_members WHERE codeset_id = :orig_codeset_id
) x
""", {'orig_codeset_id': pair[0], 'new_codeset_id': pair[1]})
# new_only = [dict(r) for r in new_only]
new_only = [dict(r)['diff'] for r in new_only]

diffs = orig_only + new_only

rpt = {
'name': csets[0]['concept_set_name'],
'orig': f"{csets[0]['codeset_id']} v{csets[0]['version']}, vocab {csets[0]['omop_vocab_version']}",
'new': f"{csets[1]['codeset_id']} v{csets[1]['version']}, vocab {csets[1]['omop_vocab_version']}",
'author': csets[0]['codeset_creator'],
'orig_codeset_id': pair[0],
# 'orig_version': csets[0]['version'],
'new_codeset_id': pair[1],
# 'new_version': csets[1]['version'],
# 'orig_only': orig_only,
# 'new_only': new_only,
'diffs': diffs,
}
print(f"Processing {str(pair)} {i} of {len(pairs)}")
i += 1

rpt = get_comparison_rpt(con, *pair)

run_sql(con, """
UPDATE public.codeset_comparison
SET rpt = :rpt
Expand Down
6 changes: 3 additions & 3 deletions frontend/src/components/N3CRecommended.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -112,11 +112,11 @@ export const N3CComparisonRpt = () => {
columns = [
{grow: 4, sortable: true, name: "Name", selector: row => row.name},
{grow: 2, sortable: true, name: "Author", selector: row => row.author},
{grow: 3, sortable: true, name: "Orig", selector: row => row.orig},
{grow: 3, sortable: true, name: "New", selector: row => row.new},
{grow: 3, sortable: true, name: "Orig", selector: row => row.cset_1, wrap: true},
{grow: 3, sortable: true, name: "New", selector: row => row.cset_2, wrap: true},
{grow: 2, name: "Compare", selector: row => (
<Button
to={`/cset-comparison?codeset_ids=${row.orig_codeset_id}&codeset_ids=${row.new_codeset_id}`}
to={`/cset-comparison?codeset_ids=${row.cset_1_codeset_id}&codeset_ids=${row.cset_2_codeset_id}`}
component={Link}
style={{margin: '7px', textTransform: 'none'}}
>
Expand Down

0 comments on commit c788215

Please sign in to comment.