diff --git a/.run/backend.run.xml b/.run/backend.run.xml
index 4d8187fa5..0b97d12aa 100644
--- a/.run/backend.run.xml
+++ b/.run/backend.run.xml
@@ -5,6 +5,9 @@
+
+
+
diff --git a/backend/api_logger.py b/backend/api_logger.py
index d688ccd3b..df10c9148 100644
--- a/backend/api_logger.py
+++ b/backend/api_logger.py
@@ -42,7 +42,7 @@ async def start_rpt(self, request: Request, params: Dict):
if len(v) > 20:
# change any params with len > 20 to just log the len
rpt_params[k + '_len'] = len(v)
- elif k == 'codeset_ids' or k == 'id':
+ elif k in ['codeset_ids', 'id', ]:
# put codeset_ids in a separate column (is this going to be helpful?)
codeset_ids = v
diff --git a/backend/db/ddl-19-apirun_groups.jinja.sql b/backend/db/ddl-19-apirun_groups.jinja.sql
index ca55c1176..e7f3d0265 100644
--- a/backend/db/ddl-19-apirun_groups.jinja.sql
+++ b/backend/db/ddl-19-apirun_groups.jinja.sql
@@ -25,8 +25,8 @@ WITH RankedGroups AS (
SELECT
*,
ROW_NUMBER() OVER (PARTITION BY api_call_group_id ORDER BY timestamp::timestamp DESC) AS rn
- FROM
- public.api_runs
+ FROM public.api_runs
+ WHERE api_call_group_id IS NOT NULL
)
SELECT
host,
diff --git a/backend/db/ddl-20-concept_graph.jinja.sql b/backend/db/ddl-20-concept_graph.jinja.sql
index b17f07331..e838de172 100644
--- a/backend/db/ddl-20-concept_graph.jinja.sql
+++ b/backend/db/ddl-20-concept_graph.jinja.sql
@@ -1,14 +1,25 @@
DROP TABLE IF EXISTS concept_graph CASCADE;
CREATE TABLE IF NOT EXISTS concept_graph AS (
- SELECT ancestor_concept_id AS source_id, descendant_concept_id AS target_id
+ SELECT ancestor_concept_id AS source_id,
+ -- 'Child of' AS relationship_id,
+ descendant_concept_id AS target_id
FROM concept_ancestor
WHERE min_levels_of_separation = 1
+ /*
UNION
- SELECT concept_id_1, concept_id_2
+ SELECT concept_id_1,
+ relationship_id,
+ concept_id_2
FROM concept_relationship
- WHERE relationship_id = 'Is a'
+ WHERE relationship_id IN ('Is a', 'Replaces')
+ */
);
+
+CREATE INDEX cg_idx1 ON concept_graph(source_id);
+
+CREATE INDEX cg_idx2 ON concept_graph(target_id);
+
/*
# load_csv(con, 'relationship', 'dataset', schema='n3c')
# rels = sql_query(con, f"""
@@ -38,7 +49,4 @@ CREATE TABLE IF NOT EXISTS concept_graph AS (
) y
"""
*/
-
-CREATE INDEX cg_idx1 ON concept_graph(source_id);
-
-CREATE INDEX cg_idx2 ON concept_graph(target_id);
\ No newline at end of file
+ SELECT 1; -- in case ending on a comment breaks the ddl parser
\ No newline at end of file
diff --git a/backend/routes/graph.py b/backend/routes/graph.py
index 1bc00fe6e..2606f5bbc 100644
--- a/backend/routes/graph.py
+++ b/backend/routes/graph.py
@@ -5,9 +5,13 @@
from typing import List, Union, Tuple #, Dict, Set
from collections import defaultdict
-
+from itertools import combinations
import networkx
+# from igraph import Graph
+import pydot
+from networkx.drawing.nx_pydot import to_pydot, from_pydot
+
from fastapi import APIRouter, Query, Request
# from fastapi.responses import JSONResponse
# from fastapi.responses import Response
@@ -18,9 +22,9 @@
from sqlalchemy.sql import text
from backend.config import CONFIG
-from backend.db.utils import sql_query, get_db_connection, SCHEMA
+from backend.db.utils import sql_query, get_db_connection, SCHEMA, sql_in
from backend.api_logger import Api_logger
-from backend.utils import pdump, get_timer, commify
+from backend.utils import pdump, get_timer, commify, powerset
VERBOSE = True
PROJECT_DIR = Path(os.path.dirname(__file__)).parent.parent
@@ -38,25 +42,55 @@ def subgraph():
@router.get("/indented-concept-list")
-async def indented_concept_list(request: Request, id: List[int] = Query(...)): # id is a list of concept ids
- return await indented_concept_list_post(request=request, id=id)
+async def indented_concept_list(request: Request, codeset_ids: List[int] = Query(...),
+ extra_concept_ids: Union[List[int], None] = []) -> List:
+ return await indented_concept_list_post(
+ request=request, codeset_ids=codeset_ids, extra_concept_ids=extra_concept_ids)
@router.post("/indented-concept-list")
-async def indented_concept_list_post(request: Request, id: Union[List[int], None] = None) -> List:
+async def indented_concept_list_post(request: Request, codeset_ids: List[int],
+ extra_concept_ids: Union[List[int], None] = []) -> List:
rpt = Api_logger()
- await rpt.start_rpt(request, params={'concept_ids': id})
+ await rpt.start_rpt(request, params={
+ 'codeset_ids': codeset_ids, 'extra_concept_ids': extra_concept_ids})
try:
- paths = all_paths(REL_GRAPH, id)
+ with get_db_connection() as con:
+ csmi = sql_query(
+ con,
+ f"""
+ SELECT *
+ FROM {SCHEMA}.cset_members_items
+ WHERE codeset_id {sql_in(codeset_ids)}
+ """
+ )
+ concept_ids = set([c['concept_id'] for c in csmi])
+ concept_ids.update(extra_concept_ids)
+
+ nodes_in_graph = set()
+ orphans = set()
+ for cid in concept_ids:
+ if cid in REL_GRAPH:
+ nodes_in_graph.add(cid)
+ else:
+ orphans.add(cid)
+
+ preferred_concept_ids = set([c['concept_id']
+ for c in csmi
+ if c['item'] and not c['concept_id'] in orphans])
+
+ paths = all_paths(REL_GRAPH, nodes_in_graph, preferred_concept_ids)
tree = paths_as_indented_tree(paths)
+ for o in orphans:
+ tree.append((0, o))
+
await rpt.finish(rows=len(tree))
except Exception as e:
await rpt.log_error(e)
raise e
return tree
-
@router.get("/concept-graph")
async def concept_graph(request: Request, id: List[int] = Query(...)): # id is a list of concept ids
return await concept_graph_post(request=request, id=id)
@@ -69,8 +103,12 @@ async def concept_graph_post(request: Request, id: Union[List[int], None] = None
try:
sg, filled_gaps = fill_in_gaps(REL_GRAPH, id, return_missing=True)
- layout = nx.kamada_kawai_layout(sg)
- layout = {k: list(v) for k, v in layout.items()}
+ P = to_pydot(sg)
+ layout = from_pydot(P)
+ # layout = {k: list(v) for k, v in _layout.items()} # networkx doesn't seem to have sugiyama
+ # g = Graph.from_networkx(sg)
+ # _layout = g.layout_sugiyama()
+ # layout = {v["_nx_name"]: _layout[idx] for idx, v in enumerate(g.vs)}
await rpt.finish(rows=len(sg))
except Exception as e:
await rpt.log_error(e)
@@ -78,36 +116,231 @@ async def concept_graph_post(request: Request, id: Union[List[int], None] = None
return {'edges': list(sg.edges), 'layout': layout, 'filled_gaps': filled_gaps}
-def all_paths(g: networkx.DiGraph, nodes: List[int]) -> List[List[int]]:
+def from_pydot_layout(g):
+ pass
+# def find_nearest_common_ancestor(G, nodes):
+# all_ancestors = [set(nx.ancestors(G, node)) for node in nodes]
+# common_ancestors = set.intersection(*all_ancestors)
+#
+# # Find the lowest common ancestor
+# lowest_common_ancestor = None
+# for ancestor in common_ancestors:
+# if all(ancestor in ancestors or ancestor == node for node, ancestors in zip(nodes, all_ancestors)):
+# if lowest_common_ancestor is None or not ancestor in nx.ancestors(G, lowest_common_ancestor):
+# lowest_common_ancestor = ancestor
+#
+# return lowest_common_ancestor
+#
+#
+# def connect_roots(G, target_nodes):
+# # Find the nearest common ancestor
+# nca = find_nearest_common_ancestor(G, target_nodes)
+#
+# # Create a subgraph including the paths from the nearest common ancestor to the target nodes
+# edges_to_include = set()
+# for node in target_nodes:
+# path = nx.shortest_path(G, nca, node)
+# edges_to_include.update([tuple(l) for l in zip(path, path[1:])])
+#
+# SG = G.edge_subgraph(edges_to_include).copy()
+# return SG
+
+
+def connect_nodes(G, target_nodes, preferred_nodes=[]):
+ """
+ Connects all nodes in target_nodes to the nearest common ancestor.
+ preferred nodes are the version item nodes with includeDescendants checked.
+ They should be a subset of target_nodes
+ Besides those, only item members not descended from one of those needs
+ to be connected, but there shouldn't be any (unless their connected was
+ lost in vocabulary updates.)
+
+ """
+
+ nodes_to_connect = set(preferred_nodes) # gets smaller as nodes are connected by ancestors
+ target_nodes = set(target_nodes)
+ if nodes_to_connect.difference(target_nodes):
+ raise Exception(f"preferred_nodes should be a subset of target_nodes")
+ nodes_connected = set() # eventually should include all target_nodes
+ ancestors_to_add = set() # ancestor and path to them nodes that will be included in the final subgraph
+ weird = []
+ nodes_already_connected = set()
+ for a, b in combinations(nodes_to_connect, 2):
+ if a == b:
+ continue
+ try:
+ if nx.has_path(G, a, b):
+ weird.append((a, b))
+ nodes_to_connect.discard(b)
+ nodes_already_connected.add(b)
+ elif nx.has_path(G, b, a):
+ weird.append((b, a))
+ nodes_to_connect.discard(a)
+ nodes_already_connected.add(a)
+ except nx.NetworkXNoPath:
+ continue
+
+ unrooted_children = get_unrooted_children(
+ G, nodes_to_connect, target_nodes.difference(nodes_to_connect))
+ if (unrooted_children):
+ # print("wasn't expecting to find unrooted children") # except if vocab changes disconnected them from any other nodes
+ nodes_to_connect.update(unrooted_children)
+
+ combo_sizes = list(range(len(nodes_to_connect), 1, -1))
+ if not combo_sizes: # one node is ancestor to all the others
+ return G.subgraph(target_nodes)
+
+ everything_is_connected = False
+ for set_size in combo_sizes:
+ for combo in combinations(nodes_to_connect, set_size):
+ common_ancestor, path_nodes = get_best_common_ancestor(G, combo)
+ if not common_ancestor:
+ continue
+
+ nodes_connected.update(combo)
+ ancestors_to_add.add(common_ancestor)
+ ancestors_to_add.update(path_nodes)
+ nodes_to_connect -= set(combo)
+ nodes_to_connect -= path_nodes
+
+ if nodes_to_connect.difference(unrooted_children):
+ # not done yet
+ if target_nodes.difference(nodes_connected): # sanity check
+ continue
+ else:
+ raise Exception("wasn't expecting that!")
+ else:
+ if target_nodes.difference(nodes_already_connected).difference(nodes_connected):
+ # sanity check
+ raise Exception("wasn't expecting that!")
+ else:
+ everything_is_connected = True
+ # raise Exception("something went wrong in connect_nodes")
+ if everything_is_connected:
+ break
+
+ all_nodes = target_nodes.union(ancestors_to_add)
+ sg = G.subgraph(all_nodes)
+ return sg
+
+
+def get_best_common_ancestor(G, nodes):
+ all_ancestors = [set(nx.ancestors(G, node)) for node in nodes]
+ common_ancestors = set.intersection(*all_ancestors)
+
+ if not common_ancestors:
+ return None, None
+
+ path_nodes = set()
+
+ if len(common_ancestors) == 1:
+ common_ancestor = common_ancestors.pop()
+ for node in nodes:
+ path = nx.shortest_path(G, common_ancestor, node)
+ path_nodes.update(path[1: -1])
+
+ elif len(common_ancestors) > 1:
+ max_distances = {}
+ for ca in common_ancestors:
+ for node in nodes:
+ path = nx.shortest_path(G, ca, node)
+ path_nodes.update(path[1: -1])
+ max_distances[ca] = max([len(path) - 1 for tn in nodes])
+
+ min_distance = min(max_distances.values())
+ min_distance_ancestors = [node for node, dist in max_distances.items() if dist == min_distance]
+ if len(min_distance_ancestors) == 1:
+ common_ancestor = min_distance_ancestors[0]
+ else:
+ raise Exception(f"can't choose best ancestor from {str(min_distance_ancestors)} for {str(nodes)}")
+ else:
+ raise Exception(f"get_best_ancestor broken for {str(nodes)}")
+
+ return common_ancestor, path_nodes
+
+
+def get_paths_to_roots(G, node):
+ # find all paths from a node to all its root nodes
+ paths = []
+ for parent in G.predecessors(node):
+ paths.append(nx.shortest_path(G, parent, node))
+
+ shortest_path = min(paths, key=len)
+ return shortest_path
+
+
+def get_unrooted_children(G, roots, children):
+ unrooted = []
+ for child in children:
+ rooted = False
+ for root in roots:
+ try:
+ if nx.shortest_path(G, root, child):
+ rooted = True
+ break
+ except nx.NetworkXNoPath:
+ continue
+ if not rooted:
+ unrooted.append(child)
+ return unrooted
+
+
+# test code for the above:
+# G = nx.DiGraph([('a','b'), ('a','c'), ('b','d'), ('b','e'), ('c','f'), ('2', 'c'), ('1', '2'), ('1', 'a')])
+# target_nodes = ['d', 'e', 'f']
+# assert connect_roots(G, target_nodes).edges == nx.DiGraph([('a','b'), ('a','c'), ('b','d'), ('b','e'), ('c','f')]).edges
+# print(list(connect_roots(G, target_nodes).edges))
+
+def all_paths(g: networkx.DiGraph, nodes: set, preferred_nodes: set = set()) -> List[List[int]]:
"""
Creates a subgraph from g using nodes.
+ Fills in gaps (connect_nodes)
Identifies root and leaf nodes in this subgraph.
Generates all simple paths from each root to each leaf in the original graph g.
Returns the list of all such paths.
"""
- sg = g.subgraph(nodes)
+ # sg = g.subgraph(nodes) # this way give a view of g, which is frozen
+ # sg = nx.DiGraph(g.subgraph(nodes)) #.copy()) # Creates an independent copy of the subgraph
+ sg = g.subgraph(nodes).copy() # Creates an independent copy of the subgraph
+
+ nodes = set(nodes)
+ sg = connect_nodes(g, nodes, preferred_nodes)
roots = [node for node, degree in sg.in_degree() if degree == 0]
leaves = [node for node, degree in sg.out_degree() if degree == 0]
+
paths = []
- missing_nodes = set() # nodes needed to traverse all paths but not present in nodes list
- paths_with_missing_nodes = []
- descendants_of_missing = set()
paths_node_is_in = defaultdict(list)
+ # already filling in missing in connect_nodes, so no need to do it here anymore
+ # missing_nodes = set() # nodes needed to traverse all paths but not present in nodes list
+ # all_nodes = set(nodes)
+ # paths_with_missing_nodes = []
+ # descendants_of_missing = set()
for root in roots:
for leaf in leaves:
- _paths = list(nx.all_simple_paths(g, root, leaf))
+ # TODO: fix here, it can get really slow, like with http://127.0.0.1:8000/indented-concept-list?codeset_ids=417730759&codeset_ids=423850600&codeset_ids=966671711&codeset_ids=577774492
+ _paths = list(nx.all_simple_paths(sg, root, leaf))
for path in _paths:
# if len(path) > 1: # do i need this? don't think so; it might hide solitary nodes
paths.append(path)
for node in path:
if path not in paths_node_is_in[node]:
paths_node_is_in[node].append(path)
- if node not in nodes:
- missing_nodes.add(node)
- if not path in paths_with_missing_nodes:
- paths_with_missing_nodes.append(path)
- for d in path[path.index(node) + 1:]:
- descendants_of_missing.add(d)
+ # if node not in nodes:
+ # missing_nodes.add(node)
+ # all_nodes.add(node)
+ # if not path in paths_with_missing_nodes:
+ # paths_with_missing_nodes.append(path)
+ # for d in path[path.index(node) + 1:]:
+ # descendants_of_missing.add(d)
+
+ # assert missing_nodes == all_nodes.difference(nodes)
+ # all_nodes_in_paths = set()
+ # for path in paths:
+ # for node in path:
+ # all_nodes_in_paths.add(node)
+ #
+ # if all_nodes.difference(all_nodes_in_paths) != set(): # if there are nodes that are not in any path
+
# if a path contains a missing node and all its descendants
# show up in other paths, the path is not needed
@@ -116,21 +349,22 @@ def all_paths(g: networkx.DiGraph, nodes: List[int]) -> List[List[int]]:
# TODO: figure out if the concept_relationship edges are really needed
# TODO: test that this code is doing what it should (and code above too, while you're at it)
- descendants_of_missing = descendants_of_missing.difference(missing_nodes) # in case missing have missing descendants
-
- for path_with_missing in paths_with_missing_nodes:
- # for each path with missing nodes, check if their descendants show up in other paths
- nodes_to_check = set(path_with_missing).intersection(descendants_of_missing)
- # we have to make sure that every descendant node in this path appears in other paths
- nodes_not_elsewhere = []
- for node in nodes_to_check:
- if not [p for p in paths_node_is_in[node] if p not in paths_with_missing_nodes]:
- # this node does not appear in any non-missing-node paths
- nodes_not_elsewhere.append(node)
- break;
- if not nodes_not_elsewhere:
- # every node appears elsewhere; safe to remove
- paths.remove(path_with_missing)
+ # might still need some of this to pick up orphans or not sure what, but commenting out for now
+ # descendants_of_missing = descendants_of_missing.difference(missing_nodes) # in case missing have missing descendants
+ #
+ # for path_with_missing in paths_with_missing_nodes:
+ # # for each path with missing nodes, check if their descendants show up in other paths
+ # nodes_to_check = set(path_with_missing).intersection(descendants_of_missing)
+ # # we have to make sure that every descendant node in this path appears in other paths
+ # nodes_not_elsewhere = []
+ # for node in nodes_to_check:
+ # if not [p for p in paths_node_is_in[node] if p not in paths_with_missing_nodes]:
+ # # this node does not appear in any non-missing-node paths
+ # nodes_not_elsewhere.append(node)
+ # break;
+ # if not nodes_not_elsewhere:
+ # # every node appears elsewhere; safe to remove
+ # paths.remove(path_with_missing)
return paths
@@ -356,48 +590,56 @@ def create_rel_graphs(save_to_pickle: bool):
timer('get edge records')
edge_generator = generate_graph_edges()
- timer('make graph')
G = nx.DiGraph()
+ if save_to_pickle:
+ msg = 'loading and pickling'
+ pickle_file = open(GRAPH_PATH, 'ab')
+ else:
+ msg = 'loading'
+ pickle_file = None
+
+ timer(msg)
+ rownum = 0
+ chunk_size = 10000
+ msg = msg.replace('ing', 'ed')
+ edges = []
+ chunks_loaded = 0
for source, target in edge_generator:
- G.add_edge(source, target)
+ edges.append((source, target))
+ rownum += 1
+ if rownum >= chunk_size:
+ chunks_loaded += 1
+ G.add_edges_from(edges)
+ edges = []
+ if chunks_loaded % 100 == 0:
+ timer(f'{commify(chunks_loaded * chunk_size)} rows {msg}')
+ rownum = 0
- # edges = [tuple(e.values()) for e in rels]
- # G = nx.from_edgelist(edges, nx.DiGraph)
if save_to_pickle:
- timer(f'write pickle for G with {len(G.nodes)} nodes')
- # nx.write_gpickle(G, GRAPH_PATH) # networkx 4 doesn't have its own pickle
- with open(GRAPH_PATH, 'wb') as f:
- pickle.dump(G, f, pickle.HIGHEST_PROTOCOL)
- # timer('make undirected version')
- # Gu = G.to_undirected()
- # if save_to_pickle:
- # timer('write pickle for that')
- # # nx.write_gpickle(Gu, GRAPH_UNDIRECTED_PATH)
- # with open(GRAPH_UNDIRECTED_PATH, 'wb') as f:
- # pickle.dump(Gu, f, pickle.HIGHEST_PROTOCOL)
+ timer('saving to pickle')
+ pickle.dump(G, pickle_file) # , pickle.HIGHEST_PROTOCOL
+
timer('done')
return G # , Gu
def load_relationship_graph(save_if_not_exists=True):
timer = get_timer('./load_relationship_graph')
- G = None
timer(f'loading {GRAPH_PATH}')
if os.path.isfile(GRAPH_PATH):
- # G = nx.read_gpickle(GRAPH_PATH)
- with open(GRAPH_PATH, 'rb') as f:
- G = pickle.load(f)
- # if G and os.path.isfile(GRAPH_UNDIRECTED_PATH):
- # timer(f'loaded {commify(len(G.nodes))}; loading {GRAPH_UNDIRECTED_PATH}')
- # # Gu = nx.read_gpickle(GRAPH_UNDIRECTED_PATH)
- # with open(GRAPH_UNDIRECTED_PATH, 'rb') as f:
- # Gu = pickle.load(f)
- # timer(f'loaded {commify(len(Gu.nodes))}')
+ G = nx.DiGraph()
+ with open(GRAPH_PATH, 'rb') as pickle_file:
+ G = pickle.load(pickle_file)
+ # while True:
+ # try:
+ # chunk = pickle.load(pickle_file)
+ # G.add_edges_from(chunk)
+ # except EOFError:
+ # break # End of file reached
else:
- # G, Gu = create_rel_graphs(save_if_not_exists)
G = create_rel_graphs(save_if_not_exists)
timer('done')
- return G # , Gu
+ return G
LOAD_FROM_PICKLE = False
diff --git a/backend/utils.py b/backend/utils.py
index cb7bedb57..543eaed9b 100644
--- a/backend/utils.py
+++ b/backend/utils.py
@@ -1,5 +1,6 @@
"""Backend utilities"""
import datetime
+from itertools import chain, combinations
from functools import wraps, reduce
import json
import operator
@@ -16,6 +17,12 @@
from backend.config import CONFIG
+def powerset(iterable):
+ """powerset([1,2,3]) --> () (1,) (2,) (3,) (1,2) (1,3) (2,3) (1,2,3)"""
+ s = list(iterable)
+ return chain.from_iterable(combinations(s, r) for r in range(len(s)+1))
+
+
def commify(n):
"""Those dirty commies
░░░░░░░░░░▀▀▀██████▄▄▄░░░░░░░░░░
diff --git a/frontend/src/components/CsetComparisonPage.jsx b/frontend/src/components/CsetComparisonPage.jsx
index b7c62f5bd..a276bf9ca 100644
--- a/frontend/src/components/CsetComparisonPage.jsx
+++ b/frontend/src/components/CsetComparisonPage.jsx
@@ -60,15 +60,17 @@ function CsetComparisonPage() {
const concept_ids_by_codeset_id = await dataGetter.fetchAndCacheItems(dataGetter.apiCalls.concept_ids_by_codeset_id, codeset_ids);
let concept_ids = union(flatten(Object.values(concept_ids_by_codeset_id)));
- // have to get indentedCids, which might contain more concept_ids after filling gaps
- const indentedCids = await dataGetter.fetchAndCacheItems(dataGetter.apiCalls.indented_concept_list, concept_ids, );
- // indentedCids = [[, ], ...]
- concept_ids = union(concept_ids/*.map(String)*/, indentedCids.map(d => d[1])).sort();
-
if (!isEmpty(newCset)) {
concept_ids = union(concept_ids, Object.values(newCset.definitions).map(d => d.concept_id));
}
+ // have to get indentedCids, which might contain more concept_ids after filling gaps
+ const extra_concept_ids = []; // not collecting these yet
+ const indentedCids = await dataGetter.fetchAndCacheItems(
+ dataGetter.apiCalls.indented_concept_list, { codeset_ids, extra_concept_ids });
+ // indentedCids = [[, ], ...]
+ concept_ids = union(concept_ids/*.map(String)*/, indentedCids.map(d => d[1])).sort();
+
promises.push(dataGetter.fetchAndCacheItems(dataGetter.apiCalls.concepts, concept_ids));
let [ csmi, selected_csets, conceptLookup, ] = await Promise.all(promises);
@@ -94,8 +96,8 @@ function CsetComparisonPage() {
const concepts = Object.values(conceptLookup);
- const conceptsCids = concepts.map(d => d.concept_id + '').sort();
- console.assert(intersection(conceptsCids, concept_ids.map(String)).length === concept_ids.length,
+ const conceptsCids = concepts.map(d => d.concept_id).sort();
+ console.assert(intersection(conceptsCids, concept_ids).length === concept_ids.length,
"%o", {concepts, conceptsCids, concept_ids});
const currentUserId = (await whoami).id;
diff --git a/frontend/src/state/DataGetter.jsx b/frontend/src/state/DataGetter.jsx
index 9cc513a4f..9f28bb9c5 100644
--- a/frontend/src/state/DataGetter.jsx
+++ b/frontend/src/state/DataGetter.jsx
@@ -226,7 +226,7 @@ class DataGetter {
// formatResultsFunc: edges => edges.map(edge => edge.map(String)), // might need this!!
},
indented_concept_list: { // expects paramList of concept_ids
- expectedParams: [], // concept_ids
+ expectedParams: {}, // codeset_ids plus extra concept_ids if any requested
api: 'indented-concept-list',
apiGetParamName: 'id',
makeQueryString: concept_ids => createSearchParams({id: concept_ids}),
@@ -315,8 +315,8 @@ class DataGetter {
async fetchAndCacheItems(apiDef, params) {
if (typeof(apiDef.expectedParams) !== typeof(params)) {
// apiDef.expectedParams, for now, can be undefined (all_csets) or
- // array (everything else). In future might have occasion to handle
- // objects or strings
+ // array (everything else).
+ // for indented_concept_list: { codeset_ids: [], additional_concept_ids: [] }
throw new Error("passed wrong type");
}
@@ -324,6 +324,18 @@ class DataGetter {
const dataCache = this.dataCache;
+ if (apiDef.api === 'indented-concept-list') { // indented_concept_list: { codeset_ids: [], additional_concept_ids: [] }
+ const {codeset_ids, extra_concept_ids} = params;
+ let cacheKey = codeset_ids.join(',') + ';' + extra_concept_ids.join(',');
+
+ let data = dataCache.cacheGet([apiDef.cacheSlice, cacheKey]);
+ if (isEmpty(data)) {
+ data = await this.axiosCall(apiDef.api, {...apiDef, data: params, backend: true, });
+ dataCache.cachePut([apiDef.cacheSlice, cacheKey], data);
+ }
+ return data;
+
+ }
if (typeof(apiDef.expectedParams) === 'undefined') {
// handle no-param calls (all_csets, whoami) here; get from cache or fetch and cache
let data = dataCache.cacheGet([apiDef.cacheSlice]);
diff --git a/frontend/tests/README.md b/frontend/tests/README.md
index b4daf60d0..f4fba2e73 100644
--- a/frontend/tests/README.md
+++ b/frontend/tests/README.md
@@ -11,13 +11,20 @@ what optimization is being performed and tested. Can take multiple values delimi
|---------------------|------------------------------------------------------|:------------:|:--------:|
| `as-is` or no value | Use code without attempting any special optimization | yes | yes |
| `no-cache` | Disables caching -- in DataCache.cachePut | yes | yes |
-| `no-rxext-from-api` | RxNorm Extension codes left out of backend API results. See [below](#No-RxNorm-Extension-codes)| no | no |
+| `no-rxext-from-api` | RxNorm Extension codes left out of backend API results. See [below](#no-rxnorm-extension-codes)| no | no |
## Notes
### No RxNorm Extension codes
-This could be implemented by excluding these codes from `concept_set_members`, `concept_set_version_item`, and maybe even `concept`, `concept_relationship`, and `concept_ancestor` and all downstream derived tables. But that would make it impossible to tell user how many RxNorm Extension codes have been omitted.
+1. This could be implemented by excluding these codes from `concept_set_members`, `concept_set_version_item`,
+ and maybe even `concept`, `concept_relationship`, and `concept_ancestor` and all downstream derived tables.
+ But that would make it impossible to tell user how many RxNorm Extension codes have been omitted.
-It could be done by eliminating the codes as a last step of each api call that returns `concept_id`s, but that
-would actually increase api processing time and only save time/memory in the frontend receiving and processing results.
\ No newline at end of file
+
+2. It could be done by eliminating the codes as a last step of each api call that returns `concept_id`s,
+ but that would actually increase api processing time and only save time/memory in the frontend
+ receiving and processing results.
+
+3. What I'm going to try is #1, but into a special schema (n3c_no_rxnorm), just to see how much it helps
+ with the antibiotics test, which currently crashes.
diff --git a/frontend/tests/large-cset-handling.test.js b/frontend/tests/large-cset-handling.test.js
index e5ff11d27..fdc6efa46 100644
--- a/frontend/tests/large-cset-handling.test.js
+++ b/frontend/tests/large-cset-handling.test.js
@@ -8,7 +8,6 @@
(c) Command?: make a command that sets the URL in a file and then import that into the playwright test
(d) playwright.config.js? - (didn't work; kinda makes sense since that url doesn't get passed)
*/
-// @ts-check
import {selectedConfigs, deploymentConfigs} from "./setup-test-environments";
import {parse} from 'csv-parse/sync';
@@ -18,14 +17,25 @@ const { PerformanceObserver, performance } = require('node:perf_hooks');
const experiment = 'no_cache';
-const configsToRun = 'local'; // only run these tests in local for now
-// const configsToRun = selectedConfigs; // uncomment to run on dev or prod
+// const configsToRun = 'local'; // only run these tests in local for now
+const configsToRun = selectedConfigs; // uncomment to run on dev or prod
/* setUp ---------------------------------------------------------------------------------------------------------------
test.beforeAll(async () => {
test.setTimeout(10000); // 10 seconds
}); */
+/*
+| test_type | test_name | expected result | codeset_ids |
+|---------------------|------------------------|-----------------|------------------------------------------------------------------|
+| many small | neurological | fast | 1000002657, 241882304, 464777695, 488007883, 1000087163 |
+| single 2000 | autoimmune 1 | not bad | 101398605, |
+| mixed 6000 to 21000 | Sulfonylureas | | 417730759, 423850600, 966671711, 577774492 |
+| mixed 30 to 3000 | autoimmune 2 | | 101398605, 947369784, 287650725, 283328624, 115052941 |
+| single 30000 | antibiotics 1 | | 909552172 |
+| many 5000 | many-5000-what-is-this | | 295817643, 613313946, 613313946, 781483910, 986994148, 671755133 |
+| single small | single-small-again | | 1000002363 |
+ */
const tests_csv = `
testType,testName,codeset_ids
single small,single-small,1000002363
@@ -65,12 +75,13 @@ async function getMem(page, prefix, fields) {
return mem;
}
-for (const csets_test of tests) {
- let {testType, testName, codeset_ids} = csets_test;
- codeset_ids = codeset_ids.split(',');
- for (const envName in configsToRun) {
- const appUrl = deploymentConfigs[envName];
+for (const envName in configsToRun) {
+ const appUrl = deploymentConfigs[envName];
+ for (const csets_test of tests) {
+ let {testType, testName, codeset_ids} = csets_test;
+ codeset_ids = codeset_ids.split(',');
test(testName, async({page, browser, context}, testInfo) => {
+ testInfo.attach('started', {body: `${testName} on ${envName}`})
console.log(`running ${testName} on ${envName}`);
page.setDefaultTimeout(120000);
/* if (testName === 'single-small-second-time') {
diff --git a/frontend/tests/setup-test-environments.js b/frontend/tests/setup-test-environments.js
index c19eab080..fbec2128f 100644
--- a/frontend/tests/setup-test-environments.js
+++ b/frontend/tests/setup-test-environments.js
@@ -13,7 +13,6 @@ playwright test`, where dev is short for 'development' and prod is short for 'p
(c) Command?: make a command that sets the URL in a file and then import that into the playwright test
(d) playwright.config.js? - (didn't work; kinda makes sense since that url doesn't get passed)
*/
-// @ts-check
import {DEPLOYMENT} from "../src/env";
const { test, expect } = require('@playwright/test');
@@ -62,7 +61,6 @@ function getSelectedConfigs(estr) {
}
export let selectedConfigs = {};
envsString = 'local';
- */
export class ReportStash {
data = {
@@ -79,6 +77,7 @@ export class ReportStash {
report = {...firstCols, ...lastCols};
}
}
+*/
/*
export function logTestResult(result) {
diff --git a/frontend/tests/test-reporter.js b/frontend/tests/test-reporter.js
index 538f00d99..f2a72603b 100644
--- a/frontend/tests/test-reporter.js
+++ b/frontend/tests/test-reporter.js
@@ -102,11 +102,12 @@ class MyReporter {
onStdOut(chunk, test, result) {
console.log(`${test} output: ${chunk}`);
}
- */
onStdErr(chunk, test, result) {
+ let c = typeof(chunk) === 'string' ? chunk : JSON.stringify(chunk);
console.log(`${test} error: ${chunk}`);
- debugger;
+ // debugger;
}
+ */
}
module.exports = MyReporter;
\ No newline at end of file