diff --git a/src/metakb/database.py b/src/metakb/database.py index bbbe4aee..47bc729e 100644 --- a/src/metakb/database.py +++ b/src/metakb/database.py @@ -76,7 +76,7 @@ def _get_credentials( "coding_constraint": "CREATE CONSTRAINT coding_constraint IF NOT EXISTS FOR (c:Coding) REQUIRE (c.code, c.label, c.system) IS UNIQUE;", "gene_id_constraint": "CREATE CONSTRAINT gene_id_constraint IF NOT EXISTS FOR (n:Gene) REQUIRE n.id IS UNIQUE;", "disease_id_constraint": "CREATE CONSTRAINT disease_id_constraint IF NOT EXISTS FOR (n:Disease) REQUIRE n.id IS UNIQUE;", - "therapeuticprocedure_id_constraint": "CREATE CONSTRAINT therapeuticprocedure_id_constraint IF NOT EXISTS FOR (n:TherapeuticProcedure) REQUIRE n.id IS UNIQUE;", + "therapy_id_constraint": "CREATE CONSTRAINT therapy_id_constraint IF NOT EXISTS FOR (n:Therapy) REQUIRE n.id IS UNIQUE;", "variation_id_constraint": "CREATE CONSTRAINT variation_id_constraint IF NOT EXISTS FOR (n:Variation) REQUIRE n.id IS UNIQUE;", "categoricalvariant_id_constraint": "CREATE CONSTRAINT categoricalvariant_id_constraint IF NOT EXISTS FOR (n:CategoricalVariant) REQUIRE n.id IS UNIQUE;", "variantgroup_id_constraint": "CREATE CONSTRAINT variantgroup_id_constraint IF NOT EXISTS FOR (n:VariantGroup) REQUIRE n.id IS UNIQUE;", diff --git a/src/metakb/load_data.py b/src/metakb/load_data.py index baf5b203..3afef352 100644 --- a/src/metakb/load_data.py +++ b/src/metakb/load_data.py @@ -8,6 +8,7 @@ from metakb.database import get_driver from metakb.normalizers import VICC_NORMALIZER_DATA, ViccDiseaseNormalizerData +from metakb.transformers.base import TherapyType _logger = logging.getLogger(__name__) @@ -78,10 +79,8 @@ def _add_method(tx: ManagedTransaction, method: dict, ids_in_stmts: set[str]) -> is_reported_in = method.get("reportedIn") if is_reported_in: # Method's documents are unique and do not currently have IDs - # They also only have one document - document = is_reported_in[0] - _add_document(tx, document, ids_in_stmts) - doc_doi = document["doi"] + _add_document(tx, is_reported_in, ids_in_stmts) + doc_doi = is_reported_in["doi"] query += f""" MERGE (d:Document {{ doi:'{doc_doi}' }}) MERGE (m) -[:IS_REPORTED_IN] -> (d) @@ -105,16 +104,13 @@ def _add_gene_or_disease( obj = obj_in.copy() - obj_type = obj["type"] + obj_type = obj["conceptType"] if obj_type not in {"Gene", "Disease"}: msg = f"Invalid object type: {obj_type}" raise TypeError(msg) - obj_keys = [ - _create_parameterized_query( - obj, ("id", "label", "description", "alternativeLabels", "type") - ) - ] + obj["conceptType"] = obj_type + obj_keys = [_create_parameterized_query(obj, ("id", "label", "conceptType"))] _add_mappings_and_exts_to_obj(obj, obj_keys) obj_keys = ", ".join(obj_keys) @@ -126,72 +122,73 @@ def _add_gene_or_disease( tx.run(query, **obj) -def _add_therapeutic_procedure( +def _add_therapy_or_group( tx: ManagedTransaction, - therapeutic_procedure: dict, + therapy_in: dict, ids_in_stmts: set[str], ) -> None: - """Add therapeutic procedure node and its relationships + """Add therapy or therapy group node and its relationships :param tx: Transaction object provided to transaction functions - :param therapeutic_procedure: Therapeutic procedure CDM object + :param therapy: Therapy Mappable Concept or Therapy Group object :param ids_in_stmts: IDs found in statements - :raises TypeError: When therapeutic procedure type is invalid + :raises TypeError: When therapy type is invalid """ - if therapeutic_procedure["id"] not in ids_in_stmts: + if therapy_in["id"] not in ids_in_stmts: return - tp = therapeutic_procedure.copy() + therapy = therapy_in.copy() + concept_type = therapy.get("conceptType") + group_type = therapy.get("groupType", {}).get("label") - tp_type = tp["type"] - if tp_type == "TherapeuticAgent": - _add_therapeutic_agent(tx, tp) - elif tp_type in {"CombinationTherapy", "TherapeuticSubstituteGroup"}: - keys = [_create_parameterized_query(tp, ("id", "type"))] + if concept_type: + _add_therapy(tx, therapy) + elif group_type in TherapyType.__members__.values(): + therapy["groupType"] = group_type + keys = [_create_parameterized_query(therapy, ("id", "groupType"))] - _add_mappings_and_exts_to_obj(tp, keys) + _add_mappings_and_exts_to_obj(therapy, keys) keys = ", ".join(keys) - query = f"MERGE (tp:{tp_type}:TherapeuticProcedure {{ {keys} }})" - tx.run(query, **tp) + query = f"MERGE (tg:{group_type}:Therapy {{ {keys} }})" + tx.run(query, **therapy) - tas = tp["components"] if tp_type == "CombinationTherapy" else tp["substitutes"] - for ta in tas: - _add_therapeutic_agent(tx, ta) + for ta in therapy["therapies"]: + _add_therapy(tx, ta) query = f""" - MERGE (tp:{tp_type}:TherapeuticProcedure {{id: '{tp['id']}'}}) - MERGE (ta:TherapeuticAgent:TherapeuticProcedure {{id: '{ta['id']}'}}) + MERGE (tg:{group_type}:Therapy {{id: '{therapy['id']}'}}) + MERGE (t:Therapy {{id: '{ta['id']}'}}) """ - if tp_type == "CombinationTherapy": - query += "MERGE (tp) -[:HAS_COMPONENTS] -> (ta)" + if group_type == TherapyType.COMBINATION_THERAPY: + query += "MERGE (tg) -[:HAS_COMPONENTS] -> (t)" else: - query += "MERGE (tp) -[:HAS_SUBSTITUTES] -> (ta)" + query += "MERGE (tg) -[:HAS_SUBSTITUTES] -> (t)" tx.run(query) else: - msg = f"Invalid therapeutic procedure type: {tp_type}" + msg = f"Therapy `conceptType` not provided and invalid `groupType` provided: {group_type}" raise TypeError(msg) -def _add_therapeutic_agent(tx: ManagedTransaction, therapeutic_agent: dict) -> None: +def _add_therapy(tx: ManagedTransaction, therapeutic_agent: dict) -> None: """Add therapeutic agent node and its relationships :param tx: Transaction object provided to transaction functions :param therapeutic_agent: Therapeutic Agent CDM object """ - ta = therapeutic_agent.copy() + therapy = therapeutic_agent.copy() nonnull_keys = [ - _create_parameterized_query(ta, ("id", "label", "alternativeLabels", "type")) + _create_parameterized_query(therapy, ("id", "label", "conceptType")) ] - _add_mappings_and_exts_to_obj(ta, nonnull_keys) + _add_mappings_and_exts_to_obj(therapy, nonnull_keys) nonnull_keys = ", ".join(nonnull_keys) query = f""" - MERGE (ta:TherapeuticAgent:TherapeuticProcedure {{ {nonnull_keys} }}) + MERGE (t:Therapy {{ {nonnull_keys} }}) """ - tx.run(query, **ta) + tx.run(query, **therapy) def _add_location(tx: ManagedTransaction, location_in: dict) -> None: @@ -243,6 +240,12 @@ def _add_variation(tx: ManagedTransaction, variation_in: dict) -> None: v["state"] = json.dumps(state) v_keys.append("v.state=$state") + for ext in v.get("extensions") or []: + key = ext["name"] + if key == "mane_genes": + v[key] = json.dumps(ext["value"]) + v_keys.append(f"v.{key}=${key}") + v_keys = ", ".join(v_keys) query = f""" @@ -278,15 +281,13 @@ def _add_categorical_variant( cv = categorical_variant_in.copy() mp_nonnull_keys = [ - _create_parameterized_query( - cv, ("id", "label", "description", "alternativeLabels", "type") - ) + _create_parameterized_query(cv, ("id", "label", "description", "type")) ] _add_mappings_and_exts_to_obj(cv, mp_nonnull_keys) mp_keys = ", ".join(mp_nonnull_keys) - defining_context = cv["constraints"][0]["definingContext"] + defining_context = cv["constraints"][0]["allele"] _add_variation(tx, defining_context) dc_type = defining_context["type"] @@ -301,10 +302,10 @@ def _add_categorical_variant( query = f""" {members_match} - MERGE (dc:Variation:{dc_type} {{ id: '{defining_context['id']}' }}) - MERGE (dc) -[:HAS_LOCATION] -> (loc) + MERGE (cv:Variation:{dc_type} {{ id: '{defining_context['id']}' }}) + MERGE (cv) -[:HAS_LOCATION] -> (loc) MERGE (v:Variation:{cv['type']} {{ {mp_keys} }}) - MERGE (v) -[:HAS_DEFINING_CONTEXT] -> (dc) + MERGE (v) -[:HAS_DEFINING_CONTEXT] -> (cv) {members_relation} """ tx.run(query, **cv) @@ -374,11 +375,11 @@ def _add_obj_id_to_set(obj: dict, ids_set: set[str]) -> None: for obj in [ statement.get("specifiedBy"), # method statement.get("reportedIn"), - statement.get("subjectVariant"), - statement.get("objectTherapeutic"), - statement.get("objectCondition"), - statement.get("conditionQualifier"), - statement.get("geneContextQualifier"), + statement.get("proposition", {}).get("subjectVariant"), + statement.get("proposition", {}).get("objectTherapeutic"), + statement.get("proposition", {}).get("objectCondition"), + statement.get("proposition", {}).get("conditionQualifier"), + statement.get("proposition", {}).get("geneContextQualifier"), ]: if obj: if isinstance(obj, list): @@ -399,7 +400,7 @@ def _add_statement(tx: ManagedTransaction, statement_in: dict) -> None: statement = statement_in.copy() statement_type = statement["type"] statement_keys = _create_parameterized_query( - statement, ("id", "description", "direction", "predicate", "type") + statement, ("id", "description", "direction", "type") ) match_line = "" @@ -412,12 +413,21 @@ def _add_statement(tx: ManagedTransaction, statement_in: dict) -> None: match_line += f"MERGE ({name} {{ id: '{ri_doc_id}'}})\n" rel_line += f"MERGE (s) -[:IS_REPORTED_IN] -> ({name})\n" - allele_origin = statement.get("alleleOriginQualifier") + proposition = statement["proposition"] + statement["propositionType"] = proposition["type"] + match_line += "SET s.propositionType=$propositionType\n" + + allele_origin = proposition.get("alleleOriginQualifier") if allele_origin: - statement["alleleOriginQualifier"] = allele_origin + statement["alleleOriginQualifier"] = allele_origin["label"] match_line += "SET s.alleleOriginQualifier=$alleleOriginQualifier\n" - gene_context_id = statement.get("geneContextQualifier", {}).get("id") + predicate = proposition.get("predicate") + if predicate: + statement["predicate"] = predicate + match_line += "SET s.predicate=$predicate\n" + + gene_context_id = proposition.get("geneContextQualifier", {}).get("id") if gene_context_id: match_line += f"MERGE (g:Gene {{id: '{gene_context_id}'}})\n" rel_line += "MERGE (s) -[:HAS_GENE_CONTEXT] -> (g)\n" @@ -428,7 +438,11 @@ def _add_statement(tx: ManagedTransaction, statement_in: dict) -> None: coding = statement.get("strength") if coding: - coding_key_fields = ("code", "label", "system") + coding["url"] = next( + ext["value"] for ext in coding["extensions"] if ext["name"] == "url" + ) + + coding_key_fields = ("primaryCode", "label", "url") coding_keys = _create_parameterized_query( coding, coding_key_fields, entity_param_prefix="coding_" @@ -441,23 +455,25 @@ def _add_statement(tx: ManagedTransaction, statement_in: dict) -> None: match_line += f"MERGE (c:Coding {{ {coding_keys} }})\n" rel_line += "MERGE (s) -[:HAS_STRENGTH] -> (c)\n" - variant_id = statement["subjectVariant"]["id"] + variant_id = proposition["subjectVariant"]["id"] match_line += f"MERGE (v:Variation {{ id: '{variant_id}' }})\n" rel_line += "MERGE (s) -[:HAS_VARIANT] -> (v)\n" - therapeutic = statement.get("objectTherapeutic") + therapeutic = proposition.get("objectTherapeutic") if therapeutic: therapeutic_id = therapeutic["id"] - match_line += f"MERGE (t:TherapeuticProcedure {{ id: '{therapeutic_id}' }})\n" + match_line += f"MERGE (t:Therapy {{ id: '{therapeutic_id}' }})\n" rel_line += "MERGE (s) -[:HAS_THERAPEUTIC] -> (t)\n" - tumor_type = statement.get("conditionQualifier") or statement.get("objectCondition") + tumor_type = proposition.get("conditionQualifier") or proposition.get( + "objectCondition" + ) tumor_type_id = tumor_type["id"] match_line += f"MERGE (tt:Condition {{ id: '{tumor_type_id}' }})\n" rel_line += "MERGE (s) -[:HAS_TUMOR_TYPE] -> (tt)\n" query = f""" - MERGE (s:{statement_type}:StudyStatement:Statement {{ {statement_keys} }}) + MERGE (s:{statement_type}:StudyStatement {{ {statement_keys} }}) {match_line} {rel_line} """ @@ -493,8 +509,8 @@ def add_transformed_data(driver: Driver, data: dict) -> None: for obj in data.get(obj_type, []): session.execute_write(_add_gene_or_disease, obj, ids_in_stmts) - for tp in data.get("therapeutic_procedures", []): - session.execute_write(_add_therapeutic_procedure, tp, ids_in_stmts) + for tp in data.get("therapies", []): + session.execute_write(_add_therapy_or_group, tp, ids_in_stmts) # This should always be done last for statement in statements: diff --git a/src/metakb/normalizers.py b/src/metakb/normalizers.py index 572c7568..8fc92097 100644 --- a/src/metakb/normalizers.py +++ b/src/metakb/normalizers.py @@ -12,7 +12,7 @@ from disease.database.database import AWS_ENV_VAR_NAME as DISEASE_AWS_ENV_VAR_NAME from disease.query import QueryHandler as DiseaseQueryHandler from disease.schemas import NormalizationService as NormalizedDisease -from ga4gh.core.entity_models import Extension +from ga4gh.core.models import Extension from ga4gh.vrs.models import ( Allele, CopyNumberChange, @@ -303,10 +303,7 @@ def get_regulatory_approval_extension( """ regulatory_approval_extension = None tn_resp_exts = ( - therapy_norm_resp.model_dump() - .get("therapeutic_agent", {}) - .get("extensions") - or [] + therapy_norm_resp.model_dump().get("therapy", {}).get("extensions") or [] ) tn_ext = [v for v in tn_resp_exts if v["name"] == "regulatory_approval"] @@ -337,7 +334,7 @@ def get_regulatory_approval_extension( if indication_ext["value"] == matched_ext_value: matched_ind = { "id": indication["id"], - "type": indication["type"], + "conceptType": indication["conceptType"], "label": indication["label"], } diff --git a/src/metakb/query.py b/src/metakb/query.py index a676744d..2cf58257 100644 --- a/src/metakb/query.py +++ b/src/metakb/query.py @@ -5,20 +5,15 @@ from copy import copy from enum import Enum -from ga4gh.cat_vrs.core_models import CategoricalVariant, DefiningContextConstraint -from ga4gh.core.domain_models import ( - CommonDomainType, - Disease, - Gene, - TherapeuticAgent, - TherapeuticProcedure, -) -from ga4gh.core.entity_models import Coding, Document, Extension, Method -from ga4gh.va_spec.profiles.var_study_stmt import ( +from ga4gh.cat_vrs.models import CategoricalVariant, DefiningAlleleConstraint +from ga4gh.core.models import Extension, MappableConcept +from ga4gh.va_spec.aac_2017.models import ( VariantDiagnosticStudyStatement, VariantPrognosticStudyStatement, VariantTherapeuticResponseStudyStatement, ) +from ga4gh.va_spec.base.core import Document, Method +from ga4gh.va_spec.base.domain_entities import TherapyGroup from ga4gh.vrs.models import Expression, Variation from neo4j import Driver from neo4j.graph import Node @@ -39,6 +34,7 @@ ServiceMeta, ) from metakb.schemas.app import SourceName +from metakb.transformers.base import TherapyType logger = logging.getLogger(__name__) @@ -63,18 +59,11 @@ class TherapeuticRelation(str, Enum): HAS_SUBSTITUTES = "HAS_SUBSTITUTES" -class TherapeuticProcedureType(str, Enum): - """Constrain possible values for kinds of therapeutic procedures.""" - - COMBINATION = "CombinationTherapy" - SUBSTITUTES = "TherapeuticSubstituteGroup" - - -# Statement types to corresponding class mapping -STMT_TYPE_TO_CLASS = { - "VariantDiagnosticStudyStatement": VariantDiagnosticStudyStatement, - "VariantPrognosticStudyStatement": VariantPrognosticStudyStatement, - "VariantTherapeuticResponseStudyStatement": VariantTherapeuticResponseStudyStatement, +# Proposition types to corresponding class mapping +PROP_TYPE_TO_CLASS = { + "VariantDiagnosticProposition": VariantDiagnosticStudyStatement, + "VariantPrognosticProposition": VariantPrognosticStudyStatement, + "VariantTherapeuticResponseProposition": VariantTherapeuticResponseStudyStatement, } @@ -456,8 +445,8 @@ def _get_statements( if normalized_therapy: query += """ - OPTIONAL MATCH (s) -[:HAS_THERAPEUTIC] -> (tp:TherapeuticAgent {normalizer_id:$t_id}) - OPTIONAL MATCH (s) -[:HAS_THERAPEUTIC] -> () -[:HAS_SUBSTITUTES|HAS_COMPONENTS] -> (ta:TherapeuticAgent {normalizer_id:$t_id}) + OPTIONAL MATCH (s) -[:HAS_THERAPEUTIC] -> (tp:Therapy {normalizer_id:$t_id}) + OPTIONAL MATCH (s) -[:HAS_THERAPEUTIC] -> () -[:HAS_SUBSTITUTES|HAS_COMPONENTS] -> (ta:Therapy {normalizer_id:$t_id}) WITH s, tp, ta WHERE tp IS NOT NULL OR ta IS NOT NULL """ @@ -478,7 +467,13 @@ def _get_statements( return [s[0] for s in self.driver.execute_query(query, params).records] - def _get_nested_stmts(self, statement_nodes: list[Node]) -> list[dict]: + def _get_nested_stmts( + self, statement_nodes: list[Node] + ) -> list[ + VariantDiagnosticStudyStatement + | VariantPrognosticStudyStatement + | VariantTherapeuticResponseStudyStatement + ]: """Get a list of nested statements. :param statement_nodes: A list of Statement Nodes @@ -500,30 +495,33 @@ def _get_nested_stmts(self, statement_nodes: list[Node]) -> list[dict]: return nested_stmts - def _get_nested_stmt(self, stmt_node: Node) -> dict: + def _get_nested_stmt( + self, stmt_node: Node + ) -> ( + VariantDiagnosticStudyStatement + | VariantPrognosticStudyStatement + | VariantTherapeuticResponseStudyStatement + ): """Get information related to a statement - Only VariantTherapeuticResponseStudyStatement, VariantPrognosticStudyStatement, - and VariantDiagnosticStudyStatement are supported at the moment :param stmt_node: Neo4j graph node for statement :return: Nested statement """ - study_stmt_type = stmt_node["type"] - if study_stmt_type not in { - "VariantTherapeuticResponseStudyStatement", - "VariantPrognosticStudyStatement", - "VariantDiagnosticStudyStatement", - }: + prop_type = stmt_node["propositionType"] + if prop_type not in PROP_TYPE_TO_CLASS: return {} - if study_stmt_type == "VariantTherapeuticResponseStudyStatement": + if prop_type == "VariantTherapeuticResponseProposition": condition_key = "conditionQualifier" else: condition_key = "objectCondition" params = { - condition_key: None, - "subjectVariant": None, + "proposition": { + "alleleOriginQualifier": {"label": None}, + "predicate": stmt_node["predicate"], + condition_key: None, + }, "strength": None, "reportedIn": [], "specifiedBy": None, @@ -547,27 +545,32 @@ def _get_nested_stmt(self, stmt_node: Node) -> dict: node = data["n"] if rel_type == "HAS_TUMOR_TYPE": - params[condition_key] = self._get_disease(node) + params["proposition"][condition_key] = self._get_disease(node) elif rel_type == "HAS_VARIANT": - params["subjectVariant"] = self._get_cat_var(node) + params["proposition"]["subjectVariant"] = self._get_cat_var(node) elif rel_type == "HAS_GENE_CONTEXT": - params["geneContextQualifier"] = self._get_gene_context_qualifier( - statement_id + params["proposition"]["geneContextQualifier"] = ( + self._get_gene_context_qualifier(statement_id) + ) + params["proposition"]["alleleOriginQualifier"]["label"] = stmt_node.get( + "alleleOriginQualifier" ) - params["alleleOriginQualifier"] = stmt_node.get("alleleOriginQualifier") elif rel_type == "IS_SPECIFIED_BY": - node["reportedIn"] = [self._get_method_document(node["id"])] + node["reportedIn"] = self._get_method_document(node["id"]) params["specifiedBy"] = Method(**node) elif rel_type == "IS_REPORTED_IN": params["reportedIn"].append(self._get_document(node)) elif rel_type == "HAS_STRENGTH": - params["strength"] = Coding(**node) + node["extensions"] = [Extension(name="url", value=node["url"])] + params["strength"] = MappableConcept(**node) elif rel_type == "HAS_THERAPEUTIC": - params["objectTherapeutic"] = self._get_therapeutic_procedure(node) + params["proposition"]["objectTherapeutic"] = ( + self._get_therapeutic_procedure(node) + ) else: logger.warning("relation type not supported: %s", rel_type) - return STMT_TYPE_TO_CLASS[study_stmt_type](**params).model_dump() + return PROP_TYPE_TO_CLASS[prop_type](**params) @staticmethod def _get_vicc_normalizer_extension(node: dict) -> ViccNormalizerDataExtension: @@ -581,7 +584,7 @@ def _get_vicc_normalizer_extension(node: dict) -> ViccNormalizerDataExtension: "label": node["normalizer_label"], } - if node["type"] == CommonDomainType.DISEASE: + if node["conceptType"] == "Disease": params["mondo_id"] = node.get("normalizer_mondo_id") ext_val = ViccDiseaseNormalizerData(**params) else: @@ -589,15 +592,22 @@ def _get_vicc_normalizer_extension(node: dict) -> ViccNormalizerDataExtension: return ViccNormalizerDataExtension(value=ext_val.model_dump()) - def _get_disease(self, node: dict) -> Disease: + def _get_disease(self, node: dict) -> MappableConcept: """Get disease data from a node with relationship ``HAS_TUMOR_TYPE`` :param node: Disease node data - :return: Disease object + :return: Disease mappable concept object """ node["mappings"] = _deserialize_field(node, "mappings") - node["extensions"] = [self._get_vicc_normalizer_extension(node)] - return Disease(**node) + extensions = [self._get_vicc_normalizer_extension(node)] + descr = node.get("description") + if descr: + extensions.append(Extension(name="description", value=descr)) + aliases = node.get("aliases") + if aliases: + extensions.append(Extension(name="aliases", value=json.loads(aliases))) + node["extensions"] = extensions + return MappableConcept(**node) def _get_variations(self, cv_id: str, relation: VariationRelation) -> list[dict]: """Get list of variations associated to categorical variant @@ -636,6 +646,11 @@ def _get_variations(self, cv_id: str, relation: VariationRelation) -> list[dict] v_params["location"]["sequenceReference"] = json.loads( loc_params["sequence_reference"] ) + mane_genes = v_params.get("mane_genes") + if mane_genes: + v_params["extensions"] = [ + Extension(name="mane_genes", value=json.loads(mane_genes)) + ] variations.append(Variation(**v_params).model_dump()) return variations @@ -672,10 +687,15 @@ def _get_cat_var(self, node: dict) -> CategoricalVariant: ) ) + if "aliases" in node: + extensions.append( + Extension(name="aliases", value=json.loads(node["aliases"])) + ) + node["extensions"] = extensions or None node["constraints"] = [ - DefiningContextConstraint( - definingContext=self._get_variations( + DefiningAlleleConstraint( + allele=self._get_variations( node["id"], VariationRelation.HAS_DEFINING_CONTEXT )[0] ) @@ -685,7 +705,7 @@ def _get_cat_var(self, node: dict) -> CategoricalVariant: ) return CategoricalVariant(**node) - def _get_gene_context_qualifier(self, statement_id: str) -> Gene | None: + def _get_gene_context_qualifier(self, statement_id: str) -> MappableConcept | None: """Get gene context qualifier data for a statement :param statement_id: ID of statement node @@ -712,8 +732,16 @@ def _get_gene_context_qualifier(self, statement_id: str) -> Gene | None: gene_node = results.records[0].data()["g"] gene_node["mappings"] = _deserialize_field(gene_node, "mappings") - gene_node["extensions"] = [self._get_vicc_normalizer_extension(gene_node)] - return Gene(**gene_node) + extensions = [self._get_vicc_normalizer_extension(gene_node)] + descr = gene_node.get("description") + if descr: + extensions.append(Extension(name="description", value=descr)) + aliases = gene_node.get("aliases") + if aliases: + extensions.append(Extension(name="aliases", value=json.loads(aliases))) + + gene_node["extensions"] = extensions + return MappableConcept(**gene_node) def _get_method_document(self, method_id: str) -> Document | None: """Get document for a given method @@ -749,15 +777,18 @@ def _get_document(node: dict) -> Document: def _get_therapeutic_procedure( self, node: dict, - ) -> TherapeuticProcedure | TherapeuticAgent | None: + ) -> MappableConcept | None: """Get therapeutic procedure from a node with relationship ``HAS_THERAPEUTIC`` :param node: Therapeutic node data. This will be mutated. :return: Therapeutic procedure if node type is supported. Currently, therapeutic action is not supported. """ - node_type = node["type"] - if node_type in {"CombinationTherapy", "TherapeuticSubstituteGroup"}: + node_type = node.get("groupType") or node.get("conceptType") + if node_type in { + TherapyType.COMBINATION_THERAPY, + TherapyType.THERAPEUTIC_SUBSTITUTE_GROUP, + }: civic_therapy_interaction_type = node.get("civic_therapy_interaction_type") if civic_therapy_interaction_type: node["extensions"] = [ @@ -767,22 +798,24 @@ def _get_therapeutic_procedure( ) ] - if node_type == "CombinationTherapy": - node["components"] = self._get_therapeutic_agents( + if node_type == TherapyType.COMBINATION_THERAPY: + node["therapies"] = self._get_therapeutic_agents( node["id"], - TherapeuticProcedureType.COMBINATION, + TherapyType.COMBINATION_THERAPY, TherapeuticRelation.HAS_COMPONENTS, ) else: - node["substitutes"] = self._get_therapeutic_agents( + node["therapies"] = self._get_therapeutic_agents( node["id"], - TherapeuticProcedureType.SUBSTITUTES, + TherapyType.THERAPEUTIC_SUBSTITUTE_GROUP, TherapeuticRelation.HAS_SUBSTITUTES, ) - therapeutic = TherapeuticProcedure(**node) - elif node_type == "TherapeuticAgent": - therapeutic = self._get_therapeutic_agent(node) + node["groupType"] = MappableConcept(label=node_type) + + therapeutic = TherapyGroup(**node) + elif node_type == TherapyType.THERAPY: + therapeutic = self._get_therapy(node) else: logger.warning("node type not supported: %s", node_type) therapeutic = None @@ -792,14 +825,14 @@ def _get_therapeutic_procedure( def _get_therapeutic_agents( self, tp_id: str, - tp_type: TherapeuticProcedureType, + tp_type: TherapyType, tp_relation: TherapeuticRelation, - ) -> list[TherapeuticAgent]: + ) -> list[MappableConcept]: """Get list of therapeutic agents for therapeutic combination or substitutes group :param tp_id: ID for combination therapy or therapeutic substitute group - :param tp_type: Therapeutic Procedure type + :param tp_type: Therapeutic object type :param tp_relation: Relationship type for therapeutic procedure and therapeutic agent :return: List of Therapeutic Agents for a combination therapy or therapeutic @@ -807,7 +840,7 @@ def _get_therapeutic_agents( """ query = f""" MATCH (tp:{tp_type.value} {{ id: $tp_id }}) -[:{tp_relation.value}] - -> (ta:TherapeuticAgent) + -> (ta:Therapy) RETURN ta """ therapeutic_agents = [] @@ -815,15 +848,15 @@ def _get_therapeutic_agents( for r in results: r_params = r.data() ta_params = r_params["ta"] - ta = self._get_therapeutic_agent(ta_params) + ta = self._get_therapy(ta_params) therapeutic_agents.append(ta) return therapeutic_agents - def _get_therapeutic_agent(self, in_ta_params: dict) -> TherapeuticAgent: - """Transform input parameters into TherapeuticAgent object + def _get_therapy(self, in_ta_params: dict) -> MappableConcept: + """Transform input parameters into Therapy object :param in_ta_params: Therapeutic Agent node properties - :return: TherapeuticAgent + :return: Therapy """ ta_params = copy(in_ta_params) ta_params["mappings"] = _deserialize_field(ta_params, "mappings") @@ -834,9 +867,11 @@ def _get_therapeutic_agent(self, in_ta_params: dict) -> TherapeuticAgent: extensions.append( Extension(name="regulatory_approval", value=regulatory_approval) ) - + aliases = ta_params.get("aliases") + if aliases: + extensions.append(Extension(name="aliases", value=json.loads(aliases))) ta_params["extensions"] = extensions - return TherapeuticAgent(**ta_params) + return MappableConcept(**ta_params) async def batch_search_statements( self, @@ -924,7 +959,5 @@ async def batch_search_statements( result = session.run(query, v_ids=variation_ids, skip=start, limit=limit) statement_nodes = [r[0] for r in result] response.statement_ids = [n["id"] for n in statement_nodes] - stmts = self._get_nested_stmts(statement_nodes) - - response.statements = [STMT_TYPE_TO_CLASS[s["type"]](**s) for s in stmts] + response.statements = self._get_nested_stmts(statement_nodes) return response diff --git a/src/metakb/schemas/api.py b/src/metakb/schemas/api.py index 59251e94..6d98c17e 100644 --- a/src/metakb/schemas/api.py +++ b/src/metakb/schemas/api.py @@ -2,7 +2,7 @@ from typing import Literal -from ga4gh.va_spec.profiles.var_study_stmt import ( +from ga4gh.va_spec.aac_2017.models import ( VariantDiagnosticStudyStatement, VariantPrognosticStudyStatement, VariantTherapeuticResponseStudyStatement, diff --git a/src/metakb/transformers/base.py b/src/metakb/transformers/base.py index a8504fde..04956521 100644 --- a/src/metakb/transformers/base.py +++ b/src/metakb/transformers/base.py @@ -14,21 +14,16 @@ from disease.schemas import ( NormalizationService as NormalizedDisease, ) -from ga4gh.cat_vrs.core_models import CategoricalVariant +from ga4gh.cat_vrs.models import CategoricalVariant from ga4gh.core import sha512t24u -from ga4gh.core.domain_models import ( - CombinationTherapy, - Disease, - Gene, - TherapeuticAgent, - TherapeuticSubstituteGroup, -) -from ga4gh.core.entity_models import Coding, Document, Extension, Method -from ga4gh.va_spec.profiles.var_study_stmt import ( +from ga4gh.core.models import Extension, MappableConcept +from ga4gh.va_spec.aac_2017.models import ( VariantDiagnosticStudyStatement, VariantPrognosticStudyStatement, VariantTherapeuticResponseStudyStatement, ) +from ga4gh.va_spec.base.core import Document, Method +from ga4gh.va_spec.base.domain_entities import TherapyGroup from ga4gh.vrs.models import Allele from gene.schemas import NormalizeService as NormalizedGene from pydantic import BaseModel, StrictStr, ValidationError @@ -49,7 +44,7 @@ # Normalizer response type to attribute name NORMALIZER_INSTANCE_TO_ATTR = { NormalizedDisease: "disease", - NormalizedTherapy: "therapeutic_agent", + NormalizedTherapy: "therapy", NormalizedGene: "gene", } @@ -89,10 +84,10 @@ class MoaEvidenceLevel(str, Enum): INFERENTIAL = "moa.evidence_level:inferential_evidence" -class TherapeuticProcedureType(str, Enum): - """Define types for supported Therapeutic Procedures""" +class TherapyType(str, Enum): + """Define types for supported therapies""" - THERAPEUTIC_AGENT = "TherapeuticAgent" + THERAPY = "Therapy" THERAPEUTIC_SUBSTITUTE_GROUP = "TherapeuticSubstituteGroup" COMBINATION_THERAPY = "CombinationTherapy" @@ -118,11 +113,9 @@ class TransformedData(BaseModel): ] = [] categorical_variants: list[CategoricalVariant] = [] variations: list[Allele] = [] - genes: list[Gene] = [] - therapeutic_procedures: list[ - TherapeuticAgent | TherapeuticSubstituteGroup | CombinationTherapy - ] = [] - conditions: list[Disease] = [] + genes: list[MappableConcept] = [] + therapies: list[MappableConcept | TherapyGroup] = [] + conditions: list[MappableConcept] = [] methods: list[Method] = [] documents: list[Document] = [] @@ -134,26 +127,22 @@ class Transformer(ABC): Method( id=MethodId.CIVIC_EID_SOP, label="CIViC Curation SOP (2019)", - reportedIn=[ - Document( - label="Danos et al., 2019, Genome Med.", - title="Standard operating procedure for curation and clinical interpretation of variants in cancer", - doi="10.1186/s13073-019-0687-x", - pmid=31779674, - ) - ], + reportedIn=Document( + label="Danos et al., 2019, Genome Med.", + title="Standard operating procedure for curation and clinical interpretation of variants in cancer", + doi="10.1186/s13073-019-0687-x", + pmid=31779674, + ), ), Method( id=MethodId.MOA_ASSERTION_BIORXIV, label="MOAlmanac (2021)", - reportedIn=[ - Document( - label="Reardon, B., Moore, N.D., Moore, N.S. et al.", - title="Integrating molecular profiles into clinical frameworks through the Molecular Oncology Almanac to prospectively guide precision oncology", - doi="10.1038/s43018-021-00243-3", - pmid=35121878, - ) - ], + reportedIn=Document( + label="Reardon, B., Moore, N.D., Moore, N.S. et al.", + title="Integrating molecular profiles into clinical frameworks through the Molecular Oncology Almanac to prospectively guide precision oncology", + doi="10.1038/s43018-021-00243-3", + pmid=35121878, + ), ), ] methods_mapping: ClassVar[dict[MethodId, Method]] = {m.id: m for m in _methods} @@ -274,7 +263,7 @@ def __init__( self.able_to_normalize = {} self.unable_to_normalize = { "conditions": set(), - "therapeutic_procedures": set(), + "therapies": set(), } self.evidence_level_to_vicc_concept_mapping = ( @@ -314,19 +303,21 @@ def extract_harvested_data(self) -> _HarvestedData: def _evidence_level_to_vicc_concept_mapping( self, - ) -> dict[MoaEvidenceLevel | CivicEvidenceLevel, Coding]: + ) -> dict[MoaEvidenceLevel | CivicEvidenceLevel, MappableConcept]: """Get mapping of source evidence level to vicc concept vocab :return: Dictionary containing mapping from source evidence level (key) - to corresponding vicc concept vocab (value) represented as Coding object + to corresponding vicc concept vocab (value) represented as MappableConcept object """ mappings = {} for item in self._vicc_concept_vocabs: for exact_mapping in item.exact_mappings: - mappings[exact_mapping] = Coding( - code=item.id.split(":")[-1], + mappings[exact_mapping] = MappableConcept( + primaryCode=item.id.split(":")[-1], label=item.term, - system="https://go.osu.edu/evidence-codes", + extensions=[ + Extension(name="url", value="https://go.osu.edu/evidence-codes") + ], ) return mappings @@ -344,11 +335,11 @@ def _get_digest_for_str_lists(str_list: list[str]) -> str: return sha512t24u(blob) @abstractmethod - def _get_therapeutic_agent(self, therapy: dict) -> TherapeuticAgent | None: - """Get Therapeutic Agent representation for source therapy object + def _get_therapy(self, therapy: dict) -> MappableConcept | None: + """Get therapy mappable concept for source therapy object :param therapy: source therapy object - :return: If able to normalize therapy, returns therapeutic agent + :return: If able to normalize therapy, returns therapy mappable concept """ @abstractmethod @@ -357,7 +348,7 @@ def _get_therapeutic_substitute_group( therapeutic_sub_group_id: str, therapies: list[dict], therapy_interaction_type: str, - ) -> TherapeuticSubstituteGroup | None: + ) -> TherapyGroup | None: """Get Therapeutic Substitute Group for therapies :param therapeutic_sub_group_id: ID for Therapeutic Substitute Group @@ -370,9 +361,9 @@ def _get_therapeutic_substitute_group( def _get_combination_therapy( self, combination_therapy_id: str, - therapies: list[dict], + therapies_in: list[dict], therapy_interaction_type: str, - ) -> CombinationTherapy | None: + ) -> TherapyGroup | None: """Get Combination Therapy representation for source therapies :param combination_therapy_id: ID for Combination Therapy @@ -381,23 +372,23 @@ def _get_combination_therapy( :return: If able to normalize all therapy objects in `therapies`, returns Combination Therapy """ - components = [] + therapies = [] source_name = type(self).__name__.lower().replace("transformer", "") - for therapy in therapies: + for therapy in therapies_in: if source_name == SourceName.MOA: - therapeutic_procedure_id = f"moa.therapy:{therapy}" + therapy_id = f"moa.therapy:{therapy}" else: - therapeutic_procedure_id = f"civic.tid:{therapy['id']}" - ta = self._add_therapeutic_procedure( - therapeutic_procedure_id, + therapy_id = f"civic.tid:{therapy['id']}" + therapy_mc = self._add_therapy( + therapy_id, [therapy], - TherapeuticProcedureType.THERAPEUTIC_AGENT, + TherapyType.THERAPY, ) - if not ta: + if not therapy_mc: return None - components.append(ta) + therapies.append(therapy_mc) extensions = [ Extension( @@ -409,8 +400,11 @@ def _get_combination_therapy( ] try: - ct = CombinationTherapy( - id=combination_therapy_id, components=components, extensions=extensions + tg = TherapyGroup( + id=combination_therapy_id, + therapies=therapies, + extensions=extensions, + groupType=MappableConcept(label=TherapyType.COMBINATION_THERAPY.value), ) except ValidationError as e: # if combination validation checks fail @@ -418,72 +412,56 @@ def _get_combination_therapy( "ValidationError raised when attempting to create CombinationTherapy: %s", e, ) - ct = None + tg = None - return ct + return tg - def _add_therapeutic_procedure( + def _add_therapy( self, - therapeutic_procedure_id: str, + therapy_id: str, therapies: list[dict], - therapeutic_procedure_type: TherapeuticProcedureType, + therapy_type: TherapyType, therapy_interaction_type: str | None = None, - ) -> TherapeuticAgent | TherapeuticSubstituteGroup | CombinationTherapy | None: - """Create or get Therapeutic Procedure given therapies - First look in cache for existing Therapeutic Procedure, if not found will - attempt to normalize. Will add `therapeutic_procedure_id` to - `therapeutic_procedures` and `able_to_normalize['therapeutic_procedures']` if - therapy-normalizer is able to normalize all `therapies`. Else, will add the - `therapeutic_procedure_id` to `unable_to_normalize['therapeutic_procedures']` - - :param therapeutic_procedure_id: ID for therapeutic procedure - :param therapies: List of therapy objects. If `therapeutic_procedure_type` - is `TherapeuticProcedureType.THERAPEUTIC_AGENT`, the list will only contain - a single therapy. - :param therapeutic_procedure_type: The type of therapeutic procedure + ) -> MappableConcept | None: + """Create or get therapy mappable concept given therapies + First look in cache for existing therapy, if not found will attempt to + normalize. Will add `therapy_id` to `therapies` and + `able_to_normalize['therapies']` if therapy-normalizer is able to normalize all + `therapies`. Else, will add the `therapy_id` to + `unable_to_normalize['therapies']` + + :param therapy_id: ID for therapy + :param therapies: List of therapy objects. If `therapy_type` is + `TherapyType.THERAPY`, the list will only contain a single therapy. + :param therapy_type: The type of therapy :param therapy_interaction_type: drug interaction type - :return: Therapeutic procedure, if successful normalization + :return: Therapy mappable concept, if successful normalization """ - tp = self.able_to_normalize["therapeutic_procedures"].get( - therapeutic_procedure_id - ) - if tp: - return tp - - if ( - therapeutic_procedure_id - not in self.unable_to_normalize["therapeutic_procedures"] - ): - if therapeutic_procedure_type == TherapeuticProcedureType.THERAPEUTIC_AGENT: - tp = self._get_therapeutic_agent(therapies[0]) - elif ( - therapeutic_procedure_type - == TherapeuticProcedureType.THERAPEUTIC_SUBSTITUTE_GROUP - ): - tp = self._get_therapeutic_substitute_group( - therapeutic_procedure_id, therapies, therapy_interaction_type + therapy = self.able_to_normalize["therapies"].get(therapy_id) + if therapy: + return therapy + + if therapy_id not in self.unable_to_normalize["therapies"]: + if therapy_type == TherapyType.THERAPY: + therapy = self._get_therapy(therapies[0]) + elif therapy_type == TherapyType.THERAPEUTIC_SUBSTITUTE_GROUP: + therapy = self._get_therapeutic_substitute_group( + therapy_id, therapies, therapy_interaction_type ) - elif ( - therapeutic_procedure_type - == TherapeuticProcedureType.COMBINATION_THERAPY - ): - tp = self._get_combination_therapy( - therapeutic_procedure_id, therapies, therapy_interaction_type + elif therapy_type == TherapyType.COMBINATION_THERAPY: + therapy = self._get_combination_therapy( + therapy_id, therapies, therapy_interaction_type ) else: # not supported return None - if tp: - self.able_to_normalize["therapeutic_procedures"][ - therapeutic_procedure_id - ] = tp - self.processed_data.therapeutic_procedures.append(tp) + if therapy: + self.able_to_normalize["therapies"][therapy_id] = therapy + self.processed_data.therapies.append(therapy) else: - self.unable_to_normalize["therapeutic_procedures"].add( - therapeutic_procedure_id - ) - return tp + self.unable_to_normalize["therapies"].add(therapy_id) + return therapy @staticmethod def _get_vicc_normalizer_extension( diff --git a/src/metakb/transformers/civic.py b/src/metakb/transformers/civic.py index 9fbb3c4b..885ddf8a 100644 --- a/src/metakb/transformers/civic.py +++ b/src/metakb/transformers/civic.py @@ -5,30 +5,30 @@ from enum import Enum from pathlib import Path -from ga4gh.cat_vrs.core_models import CategoricalVariant, DefiningContextConstraint -from ga4gh.core.domain_models import ( - Disease, - Gene, - TherapeuticAgent, - TherapeuticSubstituteGroup, -) -from ga4gh.core.entity_models import ( +from ga4gh.cat_vrs.models import CategoricalVariant, DefiningAlleleConstraint +from ga4gh.core.models import ( Coding, ConceptMapping, - Direction, - Document, Extension, + MappableConcept, Relation, ) -from ga4gh.va_spec.profiles.var_study_stmt import ( - AlleleOriginQualifier, - DiagnosticPredicate, - PrognosticPredicate, - TherapeuticResponsePredicate, +from ga4gh.va_spec.aac_2017.models import ( + VariantDiagnosticProposition, VariantDiagnosticStudyStatement, + VariantPrognosticProposition, VariantPrognosticStudyStatement, + VariantTherapeuticResponseProposition, VariantTherapeuticResponseStudyStatement, ) +from ga4gh.va_spec.base.core import ( + DiagnosticPredicate, + Direction, + Document, + PrognosticPredicate, + TherapeuticResponsePredicate, +) +from ga4gh.va_spec.base.domain_entities import TherapyGroup from ga4gh.vrs.models import Expression, Syntax, Variation from pydantic import BaseModel, ValidationError @@ -40,7 +40,7 @@ from metakb.transformers.base import ( CivicEvidenceLevel, MethodId, - TherapeuticProcedureType, + TherapyType, Transformer, ) @@ -111,9 +111,9 @@ class _CivicInteractionType(str, Enum): class _TherapeuticMetadata(BaseModel): """Define model for CIVIC therapeutic metadata""" - procedure_id: str + therapy_id: str interaction_type: _CivicInteractionType | None - procedure_type: TherapeuticProcedureType + therapy_type: TherapyType therapies: list[dict] @@ -137,7 +137,7 @@ class _VariationCache(BaseModel): civic_gene_id: str variant_types: list[Coding] | None = None mappings: list[ConceptMapping] | None = None - aliases: list[str] | None = None + aliases: list[Extension] | None = None coordinates: dict | None members: list[Variation] | None = None @@ -177,7 +177,7 @@ def __init__( "variations": {}, # will store _VariationCache data "categorical_variants": {}, "conditions": {}, - "therapeutic_procedures": {}, + "therapies": {}, "genes": {}, } @@ -264,9 +264,9 @@ def _add_variant_study_stmt( ) -> None: """Create Variant Study Statement given CIViC Evidence Items. Will add associated values to ``processed_data`` instance variable - (``therapeutic_procedures``, ``conditions``, and ``documents``). + (``therapies``, ``conditions``, and ``documents``). ``able_to_normalize`` and ``unable_to_normalize`` will also be mutated for - associated therapeutic_procedures and conditions. + associated therapies and conditions. :param evidence_item: CIViC Evidence Item :param mp_id_to_v_id_mapping: Molecular Profile ID to Variant ID mapping @@ -314,10 +314,10 @@ def _add_variant_study_stmt( if evidence_type == _CivicEvidenceType.PREDICTIVE: therapeutic_metadata = self._get_therapeutic_metadata(evidence_item) if therapeutic_metadata: - civic_therapeutic = self._add_therapeutic_procedure( - therapeutic_metadata.procedure_id, + civic_therapeutic = self._add_therapy( + therapeutic_metadata.therapy_id, therapeutic_metadata.therapies, - therapeutic_metadata.procedure_type, + therapeutic_metadata.therapy_type, therapeutic_metadata.interaction_type, ) if not civic_therapeutic: @@ -339,35 +339,43 @@ def _add_variant_study_stmt( variant_origin = evidence_item["variant_origin"].upper() if variant_origin == "SOMATIC": - allele_origin_qualifier = AlleleOriginQualifier.SOMATIC + allele_origin_qualifier = MappableConcept(label="somatic") elif variant_origin in {"RARE_GERMLINE", "COMMON_GERMLINE"}: - allele_origin_qualifier = AlleleOriginQualifier.GERMLINE + allele_origin_qualifier = MappableConcept(label="germline") else: allele_origin_qualifier = None - params = { + stmt_params = { "id": evidence_item["name"].lower().replace("eid", "civic.eid:"), "description": evidence_item["description"] if evidence_item["description"] else None, "direction": direction, "strength": strength, - "predicate": predicate, - "subjectVariant": mp, - "alleleOriginQualifier": allele_origin_qualifier, - "geneContextQualifier": civic_gene, "specifiedBy": self.processed_data.methods[0], "reportedIn": [document], + } + + prop_params = { + "predicate": predicate, condition_key: civic_disease, + "alleleOriginQualifier": allele_origin_qualifier, + "geneContextQualifier": civic_gene, + "subjectVariant": mp, } if evidence_type == _CivicEvidenceType.PREDICTIVE: - params["objectTherapeutic"] = civic_therapeutic - statement = VariantTherapeuticResponseStudyStatement(**params) + prop_params["objectTherapeutic"] = civic_therapeutic + stmt_params["proposition"] = VariantTherapeuticResponseProposition( + **prop_params + ) + statement = VariantTherapeuticResponseStudyStatement(**stmt_params) elif evidence_type == _CivicEvidenceType.PROGNOSTIC: - statement = VariantPrognosticStudyStatement(**params) + stmt_params["proposition"] = VariantPrognosticProposition(**prop_params) + statement = VariantPrognosticStudyStatement(**stmt_params) else: - statement = VariantDiagnosticStudyStatement(**params) + stmt_params["proposition"] = VariantDiagnosticProposition(**prop_params) + statement = VariantDiagnosticStudyStatement(**stmt_params) self.processed_data.statements.append(statement) @@ -406,20 +414,25 @@ def _add_categorical_variants( if civic_variation_data.vrs_variation.root.type != "Allele": continue + extensions = [] + # Get aliases from MP and Variant record - aliases = civic_variation_data.aliases or [] + if civic_variation_data.aliases: + aliases = civic_variation_data.aliases[0].value + else: + aliases = [] for a in mp["aliases"] or []: - if not SNP_RE.match(a): + if not SNP_RE.match(a) and a not in aliases: aliases.append(a) + if aliases: + extensions.append(Extension(name="aliases", value=aliases)) # Get molecular profile score data mp_score = mp["molecular_profile_score"] if mp_score: - extensions = [ + extensions.append( Extension(name="CIViC Molecular Profile Score", value=mp_score) - ] - else: - extensions = [] + ) # Get CIViC representative coordinate and Variant types data for ext_key, var_key in [ @@ -437,11 +450,10 @@ def _add_categorical_variants( description=mp["description"], label=mp["name"], constraints=[ - DefiningContextConstraint( - definingContext=civic_variation_data.vrs_variation, + DefiningAlleleConstraint( + allele=civic_variation_data.vrs_variation.root, ) ], - alternativeLabels=list(set(aliases)) or None, mappings=civic_variation_data.mappings, extensions=extensions or None, members=civic_variation_data.members, @@ -627,6 +639,7 @@ async def _add_variations(self, variants: list[dict]) -> None: ) else: aliases.append(a) + extensions = [Extension(name="aliases", value=aliases)] if aliases else [] if variant["coordinates"]: coordinates = { @@ -641,7 +654,7 @@ async def _add_variations(self, variants: list[dict]) -> None: civic_gene_id=f"civic.gid:{variant['gene_id']}", variant_types=variant_types_value or None, mappings=mappings or None, - alternativeLabels=aliases or None, + aliases=extensions or None, coordinates=coordinates or None, members=members, ) @@ -680,11 +693,23 @@ def _add_genes(self, genes: list[dict]) -> None: queries ) + extensions = [ + self._get_vicc_normalizer_extension(normalized_gene_id, gene_norm_resp) + ] + + if gene["aliases"]: + extensions.append(Extension(name="aliases", value=gene["aliases"])) + + if gene["description"]: + extensions.append( + Extension(name="description", value=gene["description"]) + ) + if normalized_gene_id: - civic_gene = Gene( + civic_gene = MappableConcept( id=gene_id, + conceptType="Gene", label=gene["name"], - description=gene["description"] if gene["description"] else None, mappings=[ ConceptMapping( coding=Coding( @@ -694,12 +719,7 @@ def _add_genes(self, genes: list[dict]) -> None: relation=Relation.EXACT_MATCH, ) ], - alternativeLabels=gene["aliases"] if gene["aliases"] else None, - extensions=[ - self._get_vicc_normalizer_extension( - normalized_gene_id, gene_norm_resp - ) - ], + extensions=extensions, ) self.able_to_normalize["genes"][gene_id] = civic_gene self.processed_data.genes.append(civic_gene) @@ -710,7 +730,7 @@ def _add_genes(self, genes: list[dict]) -> None: queries, ) - def _add_disease(self, disease: dict) -> Disease | None: + def _add_disease(self, disease: dict) -> MappableConcept | None: """Create or get disease given CIViC disease. First looks in cache for existing disease, if not found will attempt to normalize. Will add CIViC disease ID to ``processed_data.conditions`` and @@ -734,7 +754,7 @@ def _add_disease(self, disease: dict) -> Disease | None: self.unable_to_normalize["conditions"].add(disease_id) return vrs_disease - def _get_disease(self, disease: dict) -> Disease | None: + def _get_disease(self, disease: dict) -> MappableConcept | None: """Get Disease object for a CIViC disease :param disease: CIViC disease record @@ -774,8 +794,9 @@ def _get_disease(self, disease: dict) -> Disease | None: ) return None - return Disease( + return MappableConcept( id=disease_id, + conceptType="Disease", label=display_name, mappings=mappings if mappings else None, extensions=[ @@ -788,30 +809,30 @@ def _get_disease(self, disease: dict) -> Disease | None: def _get_therapeutic_substitute_group( self, therapeutic_sub_group_id: str, - therapies: list[dict], + therapies_in: list[dict], therapy_interaction_type: str, - ) -> TherapeuticSubstituteGroup | None: + ) -> TherapyGroup | None: """Get Therapeutic Substitute Group for CIViC therapies :param therapeutic_sub_group_id: ID for Therapeutic Substitute Group - :param therapies: List of CIViC therapy objects + :param therapies_in: List of CIViC therapy objects :param therapy_interaction_type: Therapy interaction type provided by CIViC :return: If able to normalize all therapy objects in `therapies`, returns Therapeutic Substitute Group """ - substitutes = [] + therapies = [] - for therapy in therapies: - therapeutic_procedure_id = f"civic.tid:{therapy['id']}" - ta = self._add_therapeutic_procedure( - therapeutic_procedure_id, + for therapy in therapies_in: + therapy_id = f"civic.tid:{therapy['id']}" + therapy = self._add_therapy( + therapy_id, [therapy], - TherapeuticProcedureType.THERAPEUTIC_AGENT, + TherapyType.THERAPY, ) - if not ta: + if not therapy: return None - substitutes.append(ta) + therapies.append(therapy) extensions = [ Extension( @@ -820,9 +841,12 @@ def _get_therapeutic_substitute_group( ] try: - tsg = TherapeuticSubstituteGroup( + tg = TherapyGroup( id=therapeutic_sub_group_id, - substitutes=substitutes, + groupType=MappableConcept( + label=TherapyType.THERAPEUTIC_SUBSTITUTE_GROUP.value + ), + therapies=therapies, extensions=extensions, ) except ValidationError as e: @@ -831,15 +855,15 @@ def _get_therapeutic_substitute_group( "ValidationError raised when attempting to create TherapeuticSubstituteGroup: %s", {e}, ) - tsg = None + tg = None - return tsg + return tg - def _get_therapeutic_agent(self, therapy: dict) -> TherapeuticAgent | None: - """Get Therapeutic Agent for CIViC therapy + def _get_therapy(self, therapy: dict) -> MappableConcept | None: + """Get Therapy mappable concept for CIViC therapy :param therapy: CIViC therapy object - :return: If able to normalize therapy, returns therapeutic agent + :return: If able to normalize therapy, returns therapy mappable concept """ therapy_id = f"civic.tid:{therapy['id']}" label = therapy["name"] @@ -885,11 +909,14 @@ def _get_therapeutic_agent(self, therapy: dict) -> TherapeuticAgent | None: if regulatory_approval_extension: extensions.append(regulatory_approval_extension) - return TherapeuticAgent( + if therapy["aliases"]: + extensions.append(Extension(name="aliases", value=therapy["aliases"])) + + return MappableConcept( id=therapy_id, label=label, + conceptType="Therapy", mappings=mappings if mappings else None, - alternativeLabels=therapy["aliases"] if therapy["aliases"] else None, extensions=extensions, ) @@ -903,26 +930,22 @@ def _get_therapeutic_metadata( """ therapies = evidence_item["therapies"] if len(therapies) == 1: - # Add TherapeuticAgent - therapeutic_procedure_id = f"civic.tid:{therapies[0]['id']}" + # Add therapy + therapy_id = f"civic.tid:{therapies[0]['id']}" therapy_interaction_type = None - therapeutic_procedure_type = TherapeuticProcedureType.THERAPEUTIC_AGENT + therapy_type = TherapyType.THERAPY else: - # Add TherapeuticSubstituteGroup + # Add therapy group therapy_interaction_type = evidence_item["therapy_interaction_type"] therapeutic_ids = [f"civic.tid:{t['id']}" for t in therapies] therapeutic_digest = self._get_digest_for_str_lists(therapeutic_ids) if therapy_interaction_type == _CivicInteractionType.SUBSTITUTES: - therapeutic_procedure_id = f"civic.tsgid:{therapeutic_digest}" - therapeutic_procedure_type = ( - TherapeuticProcedureType.THERAPEUTIC_SUBSTITUTE_GROUP - ) + therapy_id = f"civic.tsgid:{therapeutic_digest}" + therapy_type = TherapyType.THERAPEUTIC_SUBSTITUTE_GROUP elif therapy_interaction_type == _CivicInteractionType.COMBINATION: - therapeutic_procedure_id = f"civic.ctid:{therapeutic_digest}" - therapeutic_procedure_type = ( - TherapeuticProcedureType.COMBINATION_THERAPY - ) + therapy_id = f"civic.ctid:{therapeutic_digest}" + therapy_type = TherapyType.COMBINATION_THERAPY else: _logger.debug( "civic therapy_interaction_type not supported: %s", @@ -931,9 +954,9 @@ def _get_therapeutic_metadata( return None return _TherapeuticMetadata( - procedure_id=therapeutic_procedure_id, + therapy_id=therapy_id, interaction_type=therapy_interaction_type, - procedure_type=therapeutic_procedure_type, + therapy_type=therapy_type, therapies=therapies, ) diff --git a/src/metakb/transformers/moa.py b/src/metakb/transformers/moa.py index 35b9019f..89feddc6 100644 --- a/src/metakb/transformers/moa.py +++ b/src/metakb/transformers/moa.py @@ -5,29 +5,28 @@ from pathlib import Path from urllib.parse import quote -from ga4gh.cat_vrs.core_models import CategoricalVariant, DefiningContextConstraint +from ga4gh.cat_vrs.models import CategoricalVariant, DefiningAlleleConstraint from ga4gh.core import sha512t24u -from ga4gh.core.domain_models import ( - CombinationTherapy, - Disease, - Gene, - TherapeuticAgent, - TherapeuticSubstituteGroup, -) -from ga4gh.core.entity_models import ( +from ga4gh.core.models import ( Coding, ConceptMapping, - Document, Extension, + MappableConcept, Relation, ) -from ga4gh.va_spec.profiles.var_study_stmt import ( - AlleleOriginQualifier, - PrognosticPredicate, - TherapeuticResponsePredicate, +from ga4gh.va_spec.aac_2017.models import ( + VariantPrognosticProposition, VariantPrognosticStudyStatement, + VariantTherapeuticResponseProposition, VariantTherapeuticResponseStudyStatement, ) +from ga4gh.va_spec.base.core import ( + Direction, + Document, + PrognosticPredicate, + TherapeuticResponsePredicate, +) +from ga4gh.va_spec.base.domain_entities import TherapyGroup from ga4gh.vrs.models import Variation from metakb import APP_ROOT @@ -38,7 +37,7 @@ from metakb.transformers.base import ( MethodId, MoaEvidenceLevel, - TherapeuticProcedureType, + TherapyType, Transformer, ) @@ -71,7 +70,7 @@ def __init__( self.able_to_normalize = { "variations": {}, "conditions": {}, - "therapeutic_procedures": {}, + "therapies": {}, "genes": {}, "documents": {}, } @@ -95,9 +94,9 @@ async def transform(self, harvested_data: MoaHarvestedData) -> None: async def _add_variant_study_stmt(self, assertion: dict) -> None: """Create Variant Study Statements from MOA assertions. Will add associated values to ``processed_data`` instance variable - (``therapeutic_procedures``, ``conditions``, and ``statements``). + (``therapies``, ``conditions``, and ``statements``). ``able_to_normalize`` and ``unable_to_normalize`` will - also be mutated for associated therapeutic_procedures and conditions. + also be mutated for associated therapies and conditions. :param assertions: MOA assertion record """ @@ -136,48 +135,70 @@ async def _add_variant_study_stmt(self, assertion: dict) -> None: feature_type = assertion["variant"]["feature_type"] if feature_type == "somatic_variant": - allele_origin_qualifier = AlleleOriginQualifier.SOMATIC + allele_origin_qualifier = MappableConcept(label="somatic") elif feature_type == "germline_variant": - allele_origin_qualifier = AlleleOriginQualifier.GERMLINE + allele_origin_qualifier = MappableConcept(label="germline") else: allele_origin_qualifier = None - params = { + stmt_params = { "id": assertion_id, "description": assertion["description"], "strength": strength, - "subjectVariant": variation_gene_map["cv"], - "alleleOriginQualifier": allele_origin_qualifier, - "geneContextQualifier": variation_gene_map["moa_gene"], "specifiedBy": self.processed_data.methods[0], "reportedIn": [document], } + prop_params = { + "alleleOriginQualifier": allele_origin_qualifier, + "geneContextQualifier": variation_gene_map["moa_gene"], + "subjectVariant": variation_gene_map["cv"], + } - if assertion["favorable_prognosis"] == "": - params["conditionQualifier"] = moa_disease - params["predicate"] = ( - TherapeuticResponsePredicate.RESISTANCE - if assertion["therapy"]["resistance"] - else TherapeuticResponsePredicate.SENSITIVITY - ) - params["objectTherapeutic"] = self._get_therapeutic_procedure(assertion) + if assertion["favorable_prognosis"] == "": # can be either 0, 1, or "" + prop_params["objectTherapeutic"] = self._get_therapy_or_group(assertion) - if not params["objectTherapeutic"]: + if not prop_params["objectTherapeutic"]: logger.debug( - "%s has no therapeutic procedure for therapy_name %s", + "%s has no therapy for therapy_name %s", assertion_id, assertion["therapy"]["name"], ) return - statement = VariantTherapeuticResponseStudyStatement(**params) - else: - params["objectCondition"] = moa_disease - params["predicate"] = ( - PrognosticPredicate.BETTER_OUTCOME - if assertion["favorable_prognosis"] - else PrognosticPredicate.WORSE_OUTCOME + + if assertion["therapy"]["resistance"] != "": # can be either 0, 1, or "" + predicate = TherapeuticResponsePredicate.RESISTANCE + stmt_params["direction"] = ( + Direction.SUPPORTS + if assertion["therapy"]["resistance"] + else Direction.DISPUTES + ) + else: + predicate = TherapeuticResponsePredicate.SENSITIVITY + stmt_params["direction"] = ( + Direction.SUPPORTS + if assertion["therapy"]["sensitivity"] + else Direction.DISPUTES + ) + + prop_params["predicate"] = predicate + prop_params["conditionQualifier"] = moa_disease + stmt_params["proposition"] = VariantTherapeuticResponseProposition( + **prop_params ) - statement = VariantPrognosticStudyStatement(**params) + statement = VariantTherapeuticResponseStudyStatement(**stmt_params) + else: + if assertion["favorable_prognosis"]: + predicate = PrognosticPredicate.BETTER_OUTCOME + direction = Direction.SUPPORTS + else: + predicate = PrognosticPredicate.WORSE_OUTCOME + direction = Direction.DISPUTES + + prop_params["predicate"] = predicate + stmt_params["direction"] = direction + prop_params["objectCondition"] = moa_disease + stmt_params["proposition"] = VariantPrognosticProposition(**prop_params) + statement = VariantPrognosticStudyStatement(**stmt_params) self.processed_data.statements.append(statement) @@ -291,7 +312,7 @@ async def _add_categorical_variants(self, variants: list[dict]) -> None: cv = CategoricalVariant( id=moa_variant_id, label=feature, - constraints=[DefiningContextConstraint(definingContext=moa_variation)], + constraints=[DefiningAlleleConstraint(allele=moa_variation.root)], mappings=mappings or None, extensions=extensions, members=members, @@ -356,8 +377,9 @@ def _add_genes(self, genes: list[str]) -> None: [gene] ) if normalized_gene_id: - moa_gene = Gene( + moa_gene = MappableConcept( id=f"moa.normalize.gene:{quote(gene)}", + conceptType="Gene", label=gene, extensions=[ self._get_vicc_normalizer_extension( @@ -405,13 +427,13 @@ def _add_documents(self, sources: list) -> None: self.able_to_normalize["documents"][source_id] = document self.processed_data.documents.append(document) - def _get_therapeutic_procedure( + def _get_therapy_or_group( self, assertion: dict - ) -> TherapeuticAgent | TherapeuticSubstituteGroup | CombinationTherapy | None: - """Get therapeutic procedure object + ) -> MappableConcept | TherapyGroup | None: + """Get therapy mappable concept (single) or therapy group (multiple) :param assertion: MOA assertion record - :return: Therapeutic procedure object, if found and able to be normalized + :return: Therapy object, if found and able to be normalized """ therapy = assertion["therapy"] therapy_name = therapy["name"] @@ -429,9 +451,7 @@ def _get_therapeutic_procedure( "RADIATION THERAPY", "TARGETED THERAPY", }: - therapeutic_procedure_type = ( - TherapeuticProcedureType.COMBINATION_THERAPY - ) + therapy_type = TherapyType.COMBINATION_THERAPY else: # skipping HORMONE and CHEMOTHERAPY for now return None @@ -440,16 +460,16 @@ def _get_therapeutic_procedure( therapeutic_digest = self._get_digest_for_str_lists( [f"moa.therapy:{tn}" for tn in therapies] ) - therapeutic_procedure_id = f"moa.ctid:{therapeutic_digest}" + therapy_id = f"moa.ctid:{therapeutic_digest}" else: - therapeutic_procedure_id = f"moa.therapy:{therapy_name}" + therapy_id = f"moa.therapy:{therapy_name}" therapies = [{"label": therapy_name}] - therapeutic_procedure_type = TherapeuticProcedureType.THERAPEUTIC_AGENT + therapy_type = TherapyType.THERAPY - return self._add_therapeutic_procedure( - therapeutic_procedure_id, + return self._add_therapy( + therapy_id, therapies, - therapeutic_procedure_type, + therapy_type, therapy_interaction_type, ) @@ -467,13 +487,13 @@ def _get_therapeutic_substitute_group( :return: None, since not supported by MOA """ - def _get_therapeutic_agent(self, therapy: dict) -> TherapeuticAgent | None: - """Get Therapeutic Agent for a MOA therapy name. + def _get_therapy(self, therapy: dict) -> MappableConcept | None: + """Get Therapy mappable concept for a MOA therapy name. Will run `label` through therapy-normalizer. :param therapy: MOA therapy name - :return: If able to normalize therapy, returns therapeutic agent + :return: If able to normalize therapy, returns therapy mappable concept """ ( therapy_norm_resp, @@ -497,8 +517,9 @@ def _get_therapeutic_agent(self, therapy: dict) -> TherapeuticAgent | None: if regulatory_approval_extension: extensions.append(regulatory_approval_extension) - return TherapeuticAgent( - id=f"moa.{therapy_norm_resp.therapeutic_agent.id}", + return MappableConcept( + id=f"moa.{therapy_norm_resp.therapy.id}", + conceptType="Therapy", label=therapy["label"], extensions=extensions, ) @@ -514,8 +535,8 @@ def _add_disease(self, disease: dict) -> dict | None: ``unable_to_normalize['conditions']``. Since there may be duplicate Oncotree code/terms with different names, the first - name will be used as the Disease label. Others will be added to the - alternativeLabels field. + name will be used as the Disease label. Others will be added to the extensions + aliases field. :param disease: MOA disease object :return: Disease object if disease-normalizer was able to normalize @@ -538,10 +559,18 @@ def _add_disease(self, disease: dict) -> dict | None: if vrs_disease: source_disease_name = disease["name"] if source_disease_name != vrs_disease.label: - vrs_disease.alternativeLabels = vrs_disease.alternativeLabels or [] - - if source_disease_name not in vrs_disease.alternativeLabels: - vrs_disease.alternativeLabels.append(source_disease_name) + if not vrs_disease.extensions: + vrs_disease.extensions = [ + Extension(name="aliases", value=[source_disease_name]) + ] + else: + for ext in vrs_disease.extensions: + if ( + ext.name == "aliases" + and source_disease_name not in ext.value + ): + ext.value.append(source_disease_name) + break return vrs_disease vrs_disease = None @@ -554,7 +583,7 @@ def _add_disease(self, disease: dict) -> dict | None: self.unable_to_normalize["conditions"].add(disease_id) return vrs_disease - def _get_disease(self, disease: dict) -> Disease | None: + def _get_disease(self, disease: dict) -> MappableConcept | None: """Get Disease object for a MOA disease :param disease: MOA disease record @@ -594,8 +623,9 @@ def _get_disease(self, disease: dict) -> Disease | None: logger.debug("Disease Normalizer unable to normalize: %s", queries) return None - return Disease( + return MappableConcept( id=f"moa.{disease_norm_resp.disease.id}", + conceptType="Disease", label=disease_name, mappings=mappings if mappings else None, extensions=[ diff --git a/tests/conftest.py b/tests/conftest.py index ec42e754..c9e08807 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -74,7 +74,7 @@ def cetuximab_extensions(): "has_indications": [ { "id": "mesh:D009369", - "type": "Disease", + "conceptType": "Disease", "label": "Neoplasms", "mappings": [ { @@ -85,7 +85,7 @@ def cetuximab_extensions(): }, { "id": "mesh:D015179", - "type": "Disease", + "conceptType": "Disease", "label": "Colorectal Neoplasms", "mappings": [ { @@ -96,7 +96,7 @@ def cetuximab_extensions(): }, { "id": "mesh:D006258", - "type": "Disease", + "conceptType": "Disease", "label": "Head and Neck Neoplasms", "mappings": [ { @@ -107,7 +107,7 @@ def cetuximab_extensions(): }, { "id": "mesh:D002294", - "type": "Disease", + "conceptType": "Disease", "label": "Carcinoma, Squamous Cell", "mappings": [ { @@ -140,7 +140,7 @@ def encorafenib_extensions(): "has_indications": [ { "id": "mesh:D008545", - "type": "Disease", + "conceptType": "Disease", "label": "Melanoma", "mappings": [ { @@ -151,7 +151,7 @@ def encorafenib_extensions(): }, { "id": "mesh:D009369", - "type": "Disease", + "conceptType": "Disease", "label": "Neoplasms", "mappings": [ { @@ -174,9 +174,7 @@ def civic_mpid33(civic_vid33): "type": "CategoricalVariant", "description": "EGFR L858R has long been recognized as a functionally significant mutation in cancer, and is one of the most prevalent single mutations in lung cancer. Best described in non-small cell lung cancer (NSCLC), the mutation seems to confer sensitivity to first and second generation TKI's like gefitinib and neratinib. NSCLC patients with this mutation treated with TKI's show increased overall and progression-free survival, as compared to chemotherapy alone. Third generation TKI's are currently in clinical trials that specifically focus on mutant forms of EGFR, a few of which have shown efficacy in treating patients that failed to respond to earlier generation TKI therapies.", "label": "EGFR L858R", - "constraints": [ - {"definingContext": civic_vid33, "type": "DefiningContextConstraint"} - ], + "constraints": [{"allele": civic_vid33, "type": "DefiningAlleleConstraint"}], "members": [ { "id": "ga4gh:VA.gV7_dnvF8SQSeUdvgDFhU65zK_csc6VE", @@ -219,9 +217,16 @@ def civic_mpid33(civic_vid33): "expressions": [ {"syntax": "hgvs.g", "value": "NC_000007.13:g.55259515T>G"} ], + "extensions": [ + { + "name": "mane_genes", + "value": [ + {"ncbi_gene_id": 1956, "hgnc_id": 3236, "symbol": "EGFR"} + ], + } + ], }, ], - "alternativeLabels": ["LEU813ARG", "LEU858ARG", "L813R"], "mappings": [ { "coding": { @@ -264,6 +269,7 @@ def civic_mpid33(civic_vid33): }, ], "extensions": [ + {"name": "aliases", "value": ["LEU813ARG", "LEU858ARG", "L813R"]}, { "name": "CIViC representative coordinate", "value": { @@ -320,20 +326,25 @@ def civic_eid2997_study_stmt( """Create CIVIC EID2997 Study Statement test fixture. Uses TherapeuticAgent.""" return { "id": "civic.eid:2997", - "type": "VariantTherapeuticResponseStudyStatement", + "type": "Statement", "description": "Afatinib, an irreversible inhibitor of the ErbB family of tyrosine kinases has been approved in the US for the first-line treatment of patients with metastatic non-small-cell lung cancer (NSCLC) who have tumours with EGFR exon 19 deletions or exon 21 (L858R) substitution mutations as detected by a US FDA-approved test", "direction": "supports", "strength": { - "code": "e000001", + "primaryCode": "e000001", "label": "authoritative evidence", - "system": "https://go.osu.edu/evidence-codes", + "extensions": [ + {"name": "url", "value": "https://go.osu.edu/evidence-codes"} + ], + }, + "proposition": { + "type": "VariantTherapeuticResponseProposition", + "predicate": "predictsSensitivityTo", + "objectTherapeutic": civic_tid146, + "conditionQualifier": civic_did8, + "alleleOriginQualifier": {"label": "somatic"}, + "geneContextQualifier": civic_gid19, + "subjectVariant": civic_mpid33, }, - "predicate": "predictsSensitivityTo", - "subjectVariant": civic_mpid33, - "objectTherapeutic": civic_tid146, - "conditionQualifier": civic_did8, - "alleleOriginQualifier": "somatic", - "geneContextQualifier": civic_gid19, "specifiedBy": civic_method, "reportedIn": [civic_source592], } @@ -344,9 +355,8 @@ def civic_gid5(): """Create test fixture for CIViC GID5.""" return { "id": "civic.gid:5", - "type": "Gene", + "conceptType": "Gene", "label": "BRAF", - "description": "BRAF mutations are found to be recurrent in many cancer types. Of these, the mutation of valine 600 to glutamic acid (V600E) is the most prevalent. V600E has been determined to be an activating mutation, and cells that harbor it, along with other V600 mutations are sensitive to the BRAF inhibitor dabrafenib. It is also common to use MEK inhibition as a substitute for BRAF inhibitors, and the MEK inhibitor trametinib has seen some success in BRAF mutant melanomas. BRAF mutations have also been correlated with poor prognosis in many cancer types, although there is at least one study that questions this conclusion in papillary thyroid cancer.\n\nOncogenic BRAF mutations are divided into three categories that determine their sensitivity to inhibitors.\nClass 1 BRAF mutations (V600) are RAS-independent, signal as monomers and are sensitive to current RAF monomer inhibitors.\nClass 2 BRAF mutations (K601E, K601N, K601T, L597Q, L597V, G469A, G469V, G469R, G464V, G464E, and fusions) are RAS-independent, signaling as constitutive dimers and are resistant to vemurafenib. Such mutants may be sensitive to novel RAF dimer inhibitors or MEK inhibitors.\nClass 3 BRAF mutations (D287H, V459L, G466V, G466E, G466A, S467L, G469E, N581S, N581I, D594N, D594G, D594A, D594H, F595L, G596D, and G596R) with low or absent kinase activity are RAS-dependent and they activate ERK by increasing their binding to activated RAS and wild-type CRAF. Class 3 BRAF mutations coexist with mutations in RAS or NF1 in melanoma may be treated with MEK inhibitors. In epithelial tumors such as CRC or NSCLC may be effectively treated with combinations that include inhibitors of receptor tyrosine kinase.", "mappings": [ { "coding": { @@ -356,20 +366,27 @@ def civic_gid5(): "relation": "exactMatch", } ], - "alternativeLabels": [ - "B-RAF1", - "B-raf", - "BRAF", - "BRAF-1", - "BRAF1", - "NS7", - "RAFB1", - ], "extensions": [ + { + "name": "description", + "value": "BRAF mutations are found to be recurrent in many cancer types. Of these, the mutation of valine 600 to glutamic acid (V600E) is the most prevalent. V600E has been determined to be an activating mutation, and cells that harbor it, along with other V600 mutations are sensitive to the BRAF inhibitor dabrafenib. It is also common to use MEK inhibition as a substitute for BRAF inhibitors, and the MEK inhibitor trametinib has seen some success in BRAF mutant melanomas. BRAF mutations have also been correlated with poor prognosis in many cancer types, although there is at least one study that questions this conclusion in papillary thyroid cancer.\n\nOncogenic BRAF mutations are divided into three categories that determine their sensitivity to inhibitors.\nClass 1 BRAF mutations (V600) are RAS-independent, signal as monomers and are sensitive to current RAF monomer inhibitors.\nClass 2 BRAF mutations (K601E, K601N, K601T, L597Q, L597V, G469A, G469V, G469R, G464V, G464E, and fusions) are RAS-independent, signaling as constitutive dimers and are resistant to vemurafenib. Such mutants may be sensitive to novel RAF dimer inhibitors or MEK inhibitors.\nClass 3 BRAF mutations (D287H, V459L, G466V, G466E, G466A, S467L, G469E, N581S, N581I, D594N, D594G, D594A, D594H, F595L, G596D, and G596R) with low or absent kinase activity are RAS-dependent and they activate ERK by increasing their binding to activated RAS and wild-type CRAF. Class 3 BRAF mutations coexist with mutations in RAS or NF1 in melanoma may be treated with MEK inhibitors. In epithelial tumors such as CRC or NSCLC may be effectively treated with combinations that include inhibitors of receptor tyrosine kinase.", + }, { "name": VICC_NORMALIZER_DATA, "value": {"id": "hgnc:1097", "label": "BRAF"}, - } + }, + { + "name": "aliases", + "value": [ + "B-RAF1", + "B-raf", + "BRAF", + "BRAF-1", + "BRAF1", + "NS7", + "RAFB1", + ], + }, ], } @@ -421,6 +438,12 @@ def braf_v600e_genomic(): "sequence": "A", }, "state": {"type": "LiteralSequenceExpression", "sequence": "T"}, + "extensions": [ + { + "name": "mane_genes", + "value": [{"ncbi_gene_id": 673, "hgnc_id": 1097, "symbol": "BRAF"}], + } + ], } @@ -438,9 +461,7 @@ def civic_mpid12(civic_vid12, braf_v600e_genomic): "type": "CategoricalVariant", "description": "BRAF V600E has been shown to be recurrent in many cancer types. It is one of the most widely studied variants in cancer. This variant is correlated with poor prognosis in certain cancer types, including colorectal cancer and papillary thyroid cancer. The targeted therapeutic dabrafenib has been shown to be effective in clinical trials with an array of BRAF mutations and cancer types. Dabrafenib has also shown to be effective when combined with the MEK inhibitor trametinib in colorectal cancer and melanoma. However, in patients with TP53, CDKN2A and KRAS mutations, dabrafenib resistance has been reported. Ipilimumab, regorafenib, vemurafenib, and a number of combination therapies have been successful in treating V600E mutations. However, cetuximab and panitumumab have been largely shown to be ineffective without supplementary treatment.", "label": "BRAF V600E", - "constraints": [ - {"definingContext": civic_vid12, "type": "DefiningContextConstraint"} - ], + "constraints": [{"allele": civic_vid12, "type": "DefiningAlleleConstraint"}], "members": [ genomic_rep, { @@ -464,7 +485,6 @@ def civic_mpid12(civic_vid12, braf_v600e_genomic): "state": {"type": "LiteralSequenceExpression", "sequence": "A"}, }, ], - "alternativeLabels": ["VAL600GLU", "V640E", "VAL640GLU"], "mappings": [ { "coding": { @@ -500,6 +520,7 @@ def civic_mpid12(civic_vid12, braf_v600e_genomic): }, ], "extensions": [ + {"name": "aliases", "value": ["VAL600GLU", "V640E", "VAL640GLU"]}, { "name": "CIViC representative coordinate", "value": { @@ -564,9 +585,8 @@ def civic_gid19(): """Create test fixture for CIViC GID19.""" return { "id": "civic.gid:19", - "type": "Gene", + "conceptType": "Gene", "label": "EGFR", - "description": "EGFR is widely recognized for its importance in cancer. Amplification and mutations have been shown to be driving events in many cancer types. Its role in non-small cell lung cancer, glioblastoma and basal-like breast cancers has spurred many research and drug development efforts. Tyrosine kinase inhibitors have shown efficacy in EGFR amplfied tumors, most notably gefitinib and erlotinib. Mutations in EGFR have been shown to confer resistance to these drugs, particularly the variant T790M, which has been functionally characterized as a resistance marker for both of these drugs. The later generation TKI's have seen some success in treating these resistant cases, and targeted sequencing of the EGFR locus has become a common practice in treatment of non-small cell lung cancer. Overproduction of ligands is another possible mechanism of activation of EGFR. ERBB ligands include EGF, TGF-a, AREG, EPG, BTC, HB-EGF, EPR and NRG1-4 (for detailed information please refer to the respective ligand section).", "mappings": [ { "coding": { @@ -576,21 +596,28 @@ def civic_gid19(): "relation": "exactMatch", } ], - "alternativeLabels": [ - "EGFR", - "ERBB", - "ERBB1", - "ERRP", - "HER1", - "NISBD2", - "PIG61", - "mENA", - ], "extensions": [ + { + "name": "description", + "value": "EGFR is widely recognized for its importance in cancer. Amplification and mutations have been shown to be driving events in many cancer types. Its role in non-small cell lung cancer, glioblastoma and basal-like breast cancers has spurred many research and drug development efforts. Tyrosine kinase inhibitors have shown efficacy in EGFR amplfied tumors, most notably gefitinib and erlotinib. Mutations in EGFR have been shown to confer resistance to these drugs, particularly the variant T790M, which has been functionally characterized as a resistance marker for both of these drugs. The later generation TKI's have seen some success in treating these resistant cases, and targeted sequencing of the EGFR locus has become a common practice in treatment of non-small cell lung cancer. Overproduction of ligands is another possible mechanism of activation of EGFR. ERBB ligands include EGF, TGF-a, AREG, EPG, BTC, HB-EGF, EPR and NRG1-4 (for detailed information please refer to the respective ligand section).", + }, + { + "name": "aliases", + "value": [ + "EGFR", + "ERBB", + "ERBB1", + "ERRP", + "HER1", + "NISBD2", + "PIG61", + "mENA", + ], + }, { "name": VICC_NORMALIZER_DATA, "value": {"id": "hgnc:3236", "label": "EGFR"}, - } + }, ], } @@ -600,7 +627,7 @@ def civic_tid146(): """Create test fixture for CIViC TID146.""" return { "id": "civic.tid:146", - "type": "TherapeuticAgent", + "conceptType": "Therapy", "label": "Afatinib", "mappings": [ { @@ -611,13 +638,16 @@ def civic_tid146(): "relation": "exactMatch", } ], - "alternativeLabels": [ - "BIBW2992", - "BIBW 2992", - "BIBW-2992", - "(2e)-N-(4-(3-Chloro-4-Fluoroanilino)-7-(((3s)-Oxolan-3-yl)Oxy)Quinoxazolin-6-yl)-4-(Dimethylamino)But-2-Enamide", - ], "extensions": [ + { + "name": "aliases", + "value": [ + "(2e)-N-(4-(3-Chloro-4-Fluoroanilino)-7-(((3s)-Oxolan-3-yl)Oxy)Quinoxazolin-6-yl)-4-(Dimethylamino)But-2-Enamide", + "BIBW 2992", + "BIBW-2992", + "BIBW2992", + ], + }, { "name": "regulatory_approval", "value": { @@ -625,7 +655,7 @@ def civic_tid146(): "has_indications": [ { "id": "hemonc:642", - "type": "Disease", + "conceptType": "Disease", "label": "Non-small cell lung cancer", "mappings": [ { @@ -636,7 +666,7 @@ def civic_tid146(): }, { "id": "hemonc:25316", - "type": "Disease", + "conceptType": "Disease", "label": "Non-small cell lung cancer squamous", }, ], @@ -658,7 +688,7 @@ def civic_did8(): """Create test fixture for CIViC DID8.""" return { "id": "civic.did:8", - "type": "Disease", + "conceptType": "Disease", "label": "Lung Non-small Cell Carcinoma", "mappings": [ { @@ -698,7 +728,7 @@ def civic_tid28(): """Create test fixture for CIViC therapy ID 28""" return { "id": "civic.tid:28", - "type": "TherapeuticAgent", + "conceptType": "Therapy", "label": "Panitumumab", "mappings": [ { @@ -709,19 +739,22 @@ def civic_tid28(): "relation": "exactMatch", } ], - "alternativeLabels": [ - "ABX-EGF", - "ABX-EGF Monoclonal Antibody", - "ABX-EGF, Clone E7.6.3", - "E7.6.3", - "Human IgG2K Monoclonal Antibody", - "MoAb ABX-EGF", - "MoAb E7.6.3", - "Monoclonal Antibody ABX-EGF", - "Monoclonal Antibody E7.6.3", - "Vectibix", - ], "extensions": [ + { + "name": "aliases", + "value": [ + "ABX-EGF", + "ABX-EGF Monoclonal Antibody", + "ABX-EGF, Clone E7.6.3", + "E7.6.3", + "Human IgG2K Monoclonal Antibody", + "MoAb ABX-EGF", + "MoAb E7.6.3", + "Monoclonal Antibody ABX-EGF", + "Monoclonal Antibody E7.6.3", + "Vectibix", + ], + }, { "name": VICC_NORMALIZER_DATA, "value": { @@ -736,7 +769,7 @@ def civic_tid28(): "has_indications": [ { "id": "mesh:D009369", - "type": "Disease", + "conceptType": "Disease", "label": "Neoplasms", "mappings": [ { @@ -747,7 +780,7 @@ def civic_tid28(): }, { "id": "mesh:D015179", - "type": "Disease", + "conceptType": "Disease", "label": "Colorectal Neoplasms", "mappings": [ { @@ -768,7 +801,7 @@ def civic_tid16(cetuximab_extensions): """Create test fixture for CIViC therapy ID 16""" return { "id": "civic.tid:16", - "type": "TherapeuticAgent", + "conceptType": "Therapy", "label": "Cetuximab", "mappings": [ { @@ -779,27 +812,32 @@ def civic_tid16(cetuximab_extensions): "relation": "exactMatch", } ], - "alternativeLabels": [ - "Cetuximab Biosimilar CDP-1", - "Cetuximab Biosimilar CMAB009", - "Cetuximab Biosimilar KL 140", - "Chimeric Anti-EGFR Monoclonal Antibody", - "Chimeric MoAb C225", - "Chimeric Monoclonal Antibody C225", - "Erbitux", - "IMC-C225", + "extensions": [ + *cetuximab_extensions, + { + "name": "aliases", + "value": [ + "Cetuximab Biosimilar CDP-1", + "Cetuximab Biosimilar CMAB009", + "Cetuximab Biosimilar KL 140", + "Chimeric Anti-EGFR Monoclonal Antibody", + "Chimeric MoAb C225", + "Chimeric Monoclonal Antibody C225", + "Erbitux", + "IMC-C225", + ], + }, ], - "extensions": cetuximab_extensions, } @pytest.fixture(scope="session") def civic_tsg(civic_tid16, civic_tid28): - """Create test fixture for CIViC TherapeuticSubstituteGroup""" + """Create test fixture for CIViC therapy subsitutes""" return { - "type": "TherapeuticSubstituteGroup", "id": "civic.tsgid:7IxyhCwID0QYyVCP2xuIyYvwwu-S_HrZ", - "substitutes": [civic_tid16, civic_tid28], + "therapies": [civic_tid16, civic_tid28], + "groupType": {"label": "TherapeuticSubstituteGroup"}, "extensions": [ { "name": "civic_therapy_interaction_type", @@ -814,7 +852,7 @@ def civic_tid483(encorafenib_extensions): """Create test fixture for CIViC Therapy ID 483""" return { "id": "civic.tid:483", - "type": "TherapeuticAgent", + "conceptType": "Therapy", "label": "Encorafenib", "mappings": [ { @@ -825,18 +863,20 @@ def civic_tid483(encorafenib_extensions): "relation": "exactMatch", } ], - "alternativeLabels": ["Braftovi", "LGX 818", "LGX-818", "LGX818"], - "extensions": encorafenib_extensions, + "extensions": [ + *encorafenib_extensions, + {"name": "aliases", "value": ["Braftovi", "LGX 818", "LGX-818", "LGX818"]}, + ], } @pytest.fixture(scope="session") def civic_ct(civic_tid483, civic_tid16): - """Create test fixture for CIViC CombinationTherapy""" + """Create test fixture for CIViC combination therapy""" return { - "type": "CombinationTherapy", "id": "civic.ctid:P1PY89shAjemg7jquQ0V9pg1VnYnkPeK", - "components": [civic_tid483, civic_tid16], + "therapies": [civic_tid483, civic_tid16], + "groupType": {"label": "CombinationTherapy"}, "extensions": [ { "name": "civic_therapy_interaction_type", @@ -851,7 +891,7 @@ def civic_did11(): """Create test fixture for CIViC Disease ID 11""" return { "id": "civic.did:11", - "type": "Disease", + "conceptType": "Disease", "label": "Colorectal Cancer", "mappings": [ { @@ -882,20 +922,25 @@ def civic_eid816_study_stmt( """Create CIVIC EID816 study statement test fixture. Uses TherapeuticSubstituteGroup.""" return { "id": "civic.eid:816", - "type": "VariantTherapeuticResponseStudyStatement", + "type": "Statement", "description": "This meta-analysis of 7 randomized control trials evaluating overall survival (OS) (8 for progression free survival) could not definitely state that survival benefit of anti-EGFR monoclonal antibodies is limited to patients with wild type BRAF. In other words, the authors believe that there is insufficient data to justify the exclusion of anti-EGFR monoclonal antibody therapy for patients with mutant BRAF. In these studies, mutant BRAF specifically meant the V600E mutation.", "direction": "disputes", "strength": { - "code": "e000005", + "primaryCode": "e000005", "label": "clinical cohort evidence", - "system": "https://go.osu.edu/evidence-codes", + "extensions": [ + {"name": "url", "value": "https://go.osu.edu/evidence-codes"} + ], + }, + "proposition": { + "type": "VariantTherapeuticResponseProposition", + "predicate": "predictsResistanceTo", + "subjectVariant": civic_mpid12, + "objectTherapeutic": civic_tsg, + "conditionQualifier": civic_did11, + "alleleOriginQualifier": {"label": "somatic"}, + "geneContextQualifier": civic_gid5, }, - "predicate": "predictsResistanceTo", - "subjectVariant": civic_mpid12, - "objectTherapeutic": civic_tsg, - "conditionQualifier": civic_did11, - "alleleOriginQualifier": "somatic", - "geneContextQualifier": civic_gid5, "specifiedBy": civic_method, "reportedIn": [ { @@ -920,20 +965,25 @@ def civic_eid9851_study_stmt( """Create CIVIC EID9851 study statement test fixture. Uses CombinationTherapy.""" return { "id": "civic.eid:9851", - "type": "VariantTherapeuticResponseStudyStatement", + "type": "Statement", "description": "The open-label phase 3 BEACON CRC trial included 665 patients with BRAF V600E-mutated metastatic CRC. Patients were randomly assigned in a 1:1:1 ratio to receive encorafenib, binimetinib, and cetuximab (triplet-therapy group); encorafenib and cetuximab (doublet-therapy group); or the investigators\u2019 choice of either cetuximab and irinotecan or cetuximab and FOLFIRI. The median overall survival was 8.4 months (95% CI, 7.5 to 11.0) in the doublet-therapy group and 5.4 months (95% CI, 4.8 to 6.6) in the control group, with a significantly lower risk of death compared to the control group (hazard ratio for death doublet-group vs. control, 0.60; 95% CI, 0.45 to 0.79; P<0.001). The confirmed response rate was 26% (95% CI, 18 to 35) in the triplet-therapy group, 20% in the doublet-therapy group (95% CI 13 to 29) and 2% (95% CI, 0 to 7) in the control group (doublet group vs. control P<0.001). Median PFS was 4.2 months (95% CI, 3.7 to 5.4) in the doublet-therapy group, and 1.5 months (95% CI, 1.5 to 1.7) in the control group (hazard ratio for disease progression doublet-group vs control, 0.40; 95% CI, 0.31 to 0.52, P<0.001).", "direction": "supports", "strength": { - "code": "e000001", + "primaryCode": "e000001", "label": "authoritative evidence", - "system": "https://go.osu.edu/evidence-codes", + "extensions": [ + {"name": "url", "value": "https://go.osu.edu/evidence-codes"} + ], + }, + "proposition": { + "type": "VariantTherapeuticResponseProposition", + "predicate": "predictsSensitivityTo", + "subjectVariant": civic_mpid12, + "objectTherapeutic": civic_ct, + "conditionQualifier": civic_did11, + "alleleOriginQualifier": {"label": "somatic"}, + "geneContextQualifier": civic_gid5, }, - "predicate": "predictsSensitivityTo", - "subjectVariant": civic_mpid12, - "objectTherapeutic": civic_ct, - "conditionQualifier": civic_did11, - "alleleOriginQualifier": "somatic", - "geneContextQualifier": civic_gid5, "specifiedBy": civic_method, "reportedIn": [ { @@ -1015,18 +1065,23 @@ def civic_eid26_study_stmt( "description": "In acute myloid leukemia patients, D816 mutation is associated with earlier relapse and poorer prognosis than wildtype KIT.", "direction": "supports", "strength": { - "code": "e000005", + "primaryCode": "e000005", "label": "clinical cohort evidence", - "system": "https://go.osu.edu/evidence-codes", + "extensions": [ + {"name": "url", "value": "https://go.osu.edu/evidence-codes"} + ], + }, + "proposition": { + "type": "VariantPrognosticProposition", + "predicate": "associatedWithWorseOutcomeFor", + "alleleOriginQualifier": {"label": "somatic"}, + "subjectVariant": civic_mpid65, + "geneContextQualifier": civic_gid29, + "objectCondition": civic_did3, }, - "predicate": "associatedWithWorseOutcomeFor", - "alleleOriginQualifier": "somatic", - "subjectVariant": civic_mpid65, - "geneContextQualifier": civic_gid29, - "objectCondition": civic_did3, "specifiedBy": civic_method, "reportedIn": [pmid_16384925], - "type": "VariantPrognosticStudyStatement", + "type": "Statement", } @@ -1077,9 +1132,7 @@ def civic_mpid65(civic_vid65): "type": "CategoricalVariant", "description": "KIT D816V is a mutation observed in acute myeloid leukemia (AML). This variant has been linked to poorer prognosis and worse outcome in AML patients.", "label": "KIT D816V", - "constraints": [ - {"definingContext": civic_vid65, "type": "DefiningContextConstraint"} - ], + "constraints": [{"allele": civic_vid65, "type": "DefiningAlleleConstraint"}], "members": [ { "id": "ga4gh:VA.MQQ62X5KMlj9gDKjOkE1lIZjAY9k_7g4", @@ -1144,9 +1197,16 @@ def civic_mpid65(civic_vid65): "sequence": "A", }, "state": {"type": "LiteralSequenceExpression", "sequence": "T"}, + "extensions": [ + { + "name": "mane_genes", + "value": [ + {"ncbi_gene_id": 3815, "hgnc_id": 6342, "symbol": "KIT"} + ], + } + ], }, ], - "alternativeLabels": ["ASP816VAL"], "mappings": [ { "coding": { @@ -1175,6 +1235,7 @@ def civic_mpid65(civic_vid65): }, ], "extensions": [ + {"name": "aliases", "value": ["ASP816VAL"]}, { "name": "CIViC representative coordinate", "value": { @@ -1212,7 +1273,7 @@ def civic_did3(): """Create test fixture for CIViC DID3.""" return { "id": "civic.did:3", - "type": "Disease", + "conceptType": "Disease", "label": "Acute Myeloid Leukemia", "extensions": [ { @@ -1241,15 +1302,21 @@ def civic_gid29(): """Create test fixture for CIViC GID29.""" return { "id": "civic.gid:29", - "type": "Gene", + "conceptType": "Gene", "label": "KIT", - "description": "c-KIT activation has been shown to have oncogenic activity in gastrointestinal stromal tumors (GISTs), melanomas, lung cancer, and other tumor types. The targeted therapeutics nilotinib and sunitinib have shown efficacy in treating KIT overactive patients, and are in late-stage trials in melanoma and GIST. KIT overactivity can be the result of many genomic events from genomic amplification to overexpression to missense mutations. Missense mutations have been shown to be key players in mediating clinical response and acquired resistance in patients being treated with these targeted therapeutics.", - "alternativeLabels": ["MASTC", "KIT", "SCFR", "PBT", "CD117", "C-Kit"], "extensions": [ + { + "name": "description", + "value": "c-KIT activation has been shown to have oncogenic activity in gastrointestinal stromal tumors (GISTs), melanomas, lung cancer, and other tumor types. The targeted therapeutics nilotinib and sunitinib have shown efficacy in treating KIT overactive patients, and are in late-stage trials in melanoma and GIST. KIT overactivity can be the result of many genomic events from genomic amplification to overexpression to missense mutations. Missense mutations have been shown to be key players in mediating clinical response and acquired resistance in patients being treated with these targeted therapeutics.", + }, + { + "name": "aliases", + "value": ["MASTC", "KIT", "SCFR", "PBT", "CD117", "C-Kit"], + }, { "name": "vicc_normalizer_data", "value": {"id": "hgnc:6342", "label": "KIT"}, - } + }, ], "mappings": [ { @@ -1311,19 +1378,25 @@ def moa_aid66_study_stmt( "id": "moa.assertion:66", "description": "T315I mutant ABL1 in p210 BCR-ABL cells resulted in retained high levels of phosphotyrosine at increasing concentrations of inhibitor STI-571, whereas wildtype appropriately received inhibition.", "strength": { - "code": "e000009", + "primaryCode": "e000009", "label": "preclinical evidence", - "system": "https://go.osu.edu/evidence-codes", + "extensions": [ + {"name": "url", "value": "https://go.osu.edu/evidence-codes"} + ], + }, + "direction": "supports", + "proposition": { + "type": "VariantTherapeuticResponseProposition", + "predicate": "predictsResistanceTo", + "subjectVariant": moa_vid66, + "objectTherapeutic": moa_imatinib, + "conditionQualifier": moa_chronic_myelogenous_leukemia, + "alleleOriginQualifier": {"label": "somatic"}, + "geneContextQualifier": moa_abl1, }, - "predicate": "predictsResistanceTo", - "subjectVariant": moa_vid66, - "objectTherapeutic": moa_imatinib, - "conditionQualifier": moa_chronic_myelogenous_leukemia, - "alleleOriginQualifier": "somatic", - "geneContextQualifier": moa_abl1, "specifiedBy": moa_method, "reportedIn": [moa_source45], - "type": "VariantTherapeuticResponseStudyStatement", + "type": "Statement", } @@ -1336,7 +1409,7 @@ def moa_vid66(): "label": "ABL1 p.T315I (Missense)", "constraints": [ { - "definingContext": { + "allele": { "id": "ga4gh:VA.D6NzpWXKqBnbcZZrXNSXj4tMUwROKbsQ", "digest": "D6NzpWXKqBnbcZZrXNSXj4tMUwROKbsQ", "type": "Allele", @@ -1354,7 +1427,7 @@ def moa_vid66(): }, "state": {"type": "LiteralSequenceExpression", "sequence": "I"}, }, - "type": "DefiningContextConstraint", + "type": "DefiningAlleleConstraint", } ], "members": [ @@ -1417,7 +1490,7 @@ def moa_abl1(): """Create a test fixture for MOA ABL1 Gene.""" return { "id": "moa.normalize.gene:ABL1", - "type": "Gene", + "conceptType": "Gene", "label": "ABL1", "extensions": [ { @@ -1433,7 +1506,7 @@ def moa_imatinib(): """Create a test fixture for MOA Imatinib Therapy.""" return { "id": "moa.normalize.therapy.rxcui:282388", - "type": "TherapeuticAgent", + "conceptType": "Therapy", "label": "Imatinib", "extensions": [ { @@ -1443,7 +1516,7 @@ def moa_imatinib(): "has_indications": [ { "id": "hemonc:669", - "type": "Disease", + "conceptType": "Disease", "label": "Systemic mastocytosis", "mappings": [ { @@ -1454,7 +1527,7 @@ def moa_imatinib(): }, { "id": "hemonc:582", - "type": "Disease", + "conceptType": "Disease", "label": "Chronic myelogenous leukemia", "mappings": [ { @@ -1465,7 +1538,7 @@ def moa_imatinib(): }, { "id": "hemonc:24309", - "type": "Disease", + "conceptType": "Disease", "label": "Acute lymphoblastic leukemia", "mappings": [ { @@ -1476,7 +1549,7 @@ def moa_imatinib(): }, { "id": "hemonc:634", - "type": "Disease", + "conceptType": "Disease", "label": "Myelodysplastic syndrome", "mappings": [ { @@ -1487,7 +1560,7 @@ def moa_imatinib(): }, { "id": "hemonc:602", - "type": "Disease", + "conceptType": "Disease", "label": "Gastrointestinal stromal tumor", "mappings": [ { @@ -1498,12 +1571,12 @@ def moa_imatinib(): }, { "id": "hemonc:33893", - "type": "Disease", + "conceptType": "Disease", "label": "Chronic myelogenous leukemia pediatric", }, { "id": "hemonc:667", - "type": "Disease", + "conceptType": "Disease", "label": "Soft tissue sarcoma", "mappings": [ { @@ -1514,7 +1587,7 @@ def moa_imatinib(): }, { "id": "hemonc:616", - "type": "Disease", + "conceptType": "Disease", "label": "Hypereosinophilic syndrome", "mappings": [ { @@ -1542,7 +1615,7 @@ def moa_chronic_myelogenous_leukemia(): """Create test fixture for MOA Chronic Myelogenous Leukemia.""" return { "id": "moa.normalize.disease.ncit:C3174", - "type": "Disease", + "conceptType": "Disease", "label": "Chronic Myelogenous Leukemia", "extensions": [ { @@ -1573,15 +1646,13 @@ def civic_method(): return { "id": "civic.method:2019", "label": "CIViC Curation SOP (2019)", - "reportedIn": [ - { - "label": "Danos et al., 2019, Genome Med.", - "title": "Standard operating procedure for curation and clinical interpretation of variants in cancer", - "doi": "10.1186/s13073-019-0687-x", - "pmid": 31779674, - "type": "Document", - } - ], + "reportedIn": { + "label": "Danos et al., 2019, Genome Med.", + "title": "Standard operating procedure for curation and clinical interpretation of variants in cancer", + "doi": "10.1186/s13073-019-0687-x", + "pmid": 31779674, + "type": "Document", + }, "type": "Method", } @@ -1592,15 +1663,13 @@ def moa_method(): return { "id": "moa.method:2021", "label": "MOAlmanac (2021)", - "reportedIn": [ - { - "label": "Reardon, B., Moore, N.D., Moore, N.S. et al.", - "title": "Integrating molecular profiles into clinical frameworks through the Molecular Oncology Almanac to prospectively guide precision oncology", - "doi": "10.1038/s43018-021-00243-3", - "pmid": 35121878, - "type": "Document", - } - ], + "reportedIn": { + "label": "Reardon, B., Moore, N.D., Moore, N.S. et al.", + "title": "Integrating molecular profiles into clinical frameworks through the Molecular Oncology Almanac to prospectively guide precision oncology", + "doi": "10.1038/s43018-021-00243-3", + "pmid": 35121878, + "type": "Document", + }, "type": "Method", } diff --git a/tests/unit/database/test_database.py b/tests/unit/database/test_database.py index 23346171..5609e022 100644 --- a/tests/unit/database/test_database.py +++ b/tests/unit/database/test_database.py @@ -224,7 +224,7 @@ def test_gene_rules( check_node_labels("Gene", expected_labels, 1) gene = get_node_by_id(civic_gid5["id"]) - extension_names = {"normalizer_label", "normalizer_id"} + extension_names = {"normalizer_label", "normalizer_id", "description", "aliases"} check_extension_props(gene, civic_gid5["extensions"], extension_names) expected_keys = { "normalizer_id", @@ -233,8 +233,8 @@ def test_gene_rules( "id", "description", "mappings", - "type", - "alternativeLabels", + "conceptType", + "aliases", } check_node_props(gene, civic_gid5, expected_keys, extension_names) @@ -334,7 +334,7 @@ def test_categorical_variant_rules( "id", "label", "description", - "alternativeLabels", + "aliases", "civic_molecular_profile_score", "civic_representative_coordinate", "mappings", @@ -344,7 +344,10 @@ def test_categorical_variant_rules( assert cv["type"] == civic_mpid12["type"] assert cv["label"] == civic_mpid12["label"] assert cv["description"] == civic_mpid12["description"] - assert set(cv["alternativeLabels"]) == set(civic_mpid12["alternativeLabels"]) + expected_aliases = next( + ext for ext in civic_mpid12["extensions"] if ext["name"] == "aliases" + )["value"] + assert set(json.loads(cv["aliases"])) == set(expected_aliases) assert isinstance(cv["civic_molecular_profile_score"], float) crc = json.loads(cv["civic_representative_coordinate"]) assert set(crc.keys()) == { @@ -415,7 +418,7 @@ def test_therapeutic_procedure_rules( ): """Verify property and relationship rules for Therapeutic Procedure nodes.""" check_unique_property("TherapeuticProcedure", "id") - # min_rels is 0 because TherapeuticAgent may not be attached to statement directly, + # min_rels is 0 because Therapy may not be attached to statement directly, # but through CombinationTherapy and TherapeuticSubstituteGroup check_relation_count( "TherapeuticProcedure", @@ -426,7 +429,7 @@ def test_therapeutic_procedure_rules( direction="in", ) check_relation_count( - "CombinationTherapy", "TherapeuticAgent", "HAS_COMPONENTS", max_rels=None + "CombinationTherapy", "Therapy", "HAS_COMPONENTS", max_rels=None ) check_relation_count( "CombinationTherapy", @@ -437,7 +440,7 @@ def test_therapeutic_procedure_rules( ) check_relation_count( "TherapeuticSubstituteGroup", - "TherapeuticAgent", + "Therapy", "HAS_SUBSTITUTES", max_rels=None, ) @@ -450,29 +453,30 @@ def test_therapeutic_procedure_rules( ) expected_node_labels = [ - {"TherapeuticProcedure", "TherapeuticAgent"}, - {"TherapeuticProcedure", "CombinationTherapy"}, - {"TherapeuticProcedure", "TherapeuticSubstituteGroup"}, + {"Therapy"}, + {"Therapy", "CombinationTherapy"}, + {"Therapy", "TherapeuticSubstituteGroup"}, ] - check_node_labels("TherapeuticProcedure", expected_node_labels, 3) + check_node_labels("Therapy", expected_node_labels, 3) - # Test TherapeuticAgent + # Test Therapy ta = get_node_by_id(civic_tid146["id"]) extension_names = { "normalizer_id", "normalizer_label", "regulatory_approval", + "aliases", } check_extension_props(ta, civic_tid146["extensions"], extension_names) expected_keys = { "id", "label", - "alternativeLabels", + "aliases", "normalizer_id", "normalizer_label", "regulatory_approval", "mappings", - "type", + "conceptType", } check_node_props(ta, civic_tid146, expected_keys, extension_names) @@ -481,14 +485,14 @@ def test_therapeutic_procedure_rules( check_extension_props( ct, civic_ct["extensions"], {"civic_therapy_interaction_type"} ) - assert ct["type"] == civic_ct["type"] + assert ct["groupType"] == civic_ct["groupType"]["label"] # Test TherapeuticSubstituteGroup tsg = get_node_by_id(civic_tsg["id"]) check_extension_props( tsg, civic_tsg["extensions"], {"civic_therapy_interaction_type"} ) - assert tsg["type"] == tsg["type"] + assert tsg["groupType"] == civic_tsg["groupType"]["label"] def test_condition_rules( @@ -523,7 +527,7 @@ def test_condition_rules( "normalizer_id", "normalizer_label", "normalizer_mondo_id", - "type", + "conceptType", } check_node_props(disease, civic_did8, expected_keys, extension_names) @@ -550,11 +554,9 @@ def test_statement_rules( check_relation_count("Statement", "Gene", "HAS_GENE_CONTEXT", max_rels=None) expected_node_labels = [ - {"Statement", "StudyStatement", "VariantTherapeuticResponseStudyStatement"}, - {"Statement", "StudyStatement", "VariantPrognosticStudyStatement"}, - {"Statement", "StudyStatement", "VariantDiagnosticStudyStatement"}, + {"Statement", "StudyStatement"}, ] - check_node_labels("Statement", expected_node_labels, 3) + check_node_labels("Statement", expected_node_labels, 1) cite_query = """ MATCH (s:Statement) @@ -575,11 +577,14 @@ def test_statement_rules( "predicate", "alleleOriginQualifier", "type", + "propositionType", } civic_eid2997_ss_cp = civic_eid2997_study_stmt.copy() - civic_eid2997_ss_cp["alleleOriginQualifier"] = civic_eid2997_ss_cp[ + civic_eid2997_ss_cp["alleleOriginQualifier"] = civic_eid2997_ss_cp["proposition"][ "alleleOriginQualifier" - ] + ]["label"] + civic_eid2997_ss_cp["predicate"] = civic_eid2997_ss_cp["proposition"]["predicate"] + civic_eid2997_ss_cp["propositionType"] = "VariantTherapeuticResponseProposition" check_node_props(statement, civic_eid2997_ss_cp, expected_keys) diff --git a/tests/unit/search/test_search_statements.py b/tests/unit/search/test_search_statements.py index 5e8aaac5..3a90b6b4 100644 --- a/tests/unit/search/test_search_statements.py +++ b/tests/unit/search/test_search_statements.py @@ -1,7 +1,7 @@ """Test search statement methods""" import pytest -from ga4gh.core.entity_models import Extension +from ga4gh.core.models import Extension from metakb.normalizers import VICC_NORMALIZER_DATA from metakb.query import QueryHandler @@ -181,16 +181,13 @@ async def test_general_search_statements(query_handler): assert_general_search_stmts(resp) expected_therapy_id = "rxcui:318341" for statement in resp.statements: - tp = statement.objectTherapeutic.root - if tp.type == "TherapeuticAgent": + tp = statement.proposition.objectTherapeutic.root + + if hasattr(tp, "conceptType"): assert _get_normalizer_id(tp.extensions) == expected_therapy_id else: - therapeutics = ( - tp.components if tp.type == "CombinationTherapy" else tp.substitutes - ) - found_expected = False - for therapeutic in therapeutics: + for therapeutic in tp.therapies: if _get_normalizer_id(therapeutic.extensions) == expected_therapy_id: found_expected = True break @@ -212,15 +209,15 @@ async def test_general_search_statements(query_handler): for statement in resp.statements: assert ( - statement.subjectVariant.constraints[0].root.definingContext.root.id + statement.proposition.subjectVariant.constraints[0].root.allele.id == expected_variation_id ) assert ( - _get_normalizer_id(statement.objectTherapeutic.root.extensions) + _get_normalizer_id(statement.proposition.objectTherapeutic.root.extensions) == expected_therapy_id ) assert ( - _get_normalizer_id(statement.conditionQualifier.root.extensions) + _get_normalizer_id(statement.proposition.conditionQualifier.root.extensions) == expected_disease_id ) diff --git a/tests/unit/transformers/test_civic_transformer_diagnostic.py b/tests/unit/transformers/test_civic_transformer_diagnostic.py index ac28ba23..b0167ad4 100644 --- a/tests/unit/transformers/test_civic_transformer_diagnostic.py +++ b/tests/unit/transformers/test_civic_transformer_diagnostic.py @@ -37,7 +37,7 @@ def civic_mpid99(): "label": "PDGFRA D842V", "constraints": [ { - "definingContext": { + "allele": { "id": "ga4gh:VA.Dy7soaZQU1vH9Eb93xG_pJyhu7xTDDC9", "type": "Allele", "label": "D842V", @@ -59,7 +59,7 @@ def civic_mpid99(): }, "state": {"type": "LiteralSequenceExpression", "sequence": "V"}, }, - "type": "DefiningContextConstraint", + "type": "DefiningAlleleConstraint", } ], "members": [ @@ -104,9 +104,16 @@ def civic_mpid99(): "sequence": "A", }, "state": {"type": "LiteralSequenceExpression", "sequence": "T"}, + "extensions": [ + { + "name": "mane_genes", + "value": [ + {"ncbi_gene_id": 5156, "hgnc_id": 8803, "symbol": "PDGFRA"} + ], + } + ], }, ], - "alternativeLabels": ["ASP842VAL"], "mappings": [ { "coding": { @@ -135,6 +142,7 @@ def civic_mpid99(): }, ], "extensions": [ + {"name": "aliases", "value": ["ASP842VAL"]}, { "name": "CIViC representative coordinate", "value": { @@ -172,9 +180,8 @@ def civic_gid38(): """Create test fixture for CIViC GID38.""" return { "id": "civic.gid:38", - "type": "Gene", + "conceptType": "Gene", "label": "PDGFRA", - "description": "Commonly mutated in GI tract tumors, PDGFR family genes (mutually exclusive to KIT mutations) are a hallmark of gastrointestinal stromal tumors. Gene fusions involving the PDGFRA kinase domain are highly correlated with eosinophilia, and the WHO classifies myeloid and lymphoid neoplasms with these characteristics as a distinct disorder. Mutations in the 842 region of PDGFRA have been often found to confer resistance to the tyrosine kinase inhibitor, imatinib.", "mappings": [ { "coding": { @@ -184,12 +191,19 @@ def civic_gid38(): "relation": "exactMatch", } ], - "alternativeLabels": ["CD140A", "PDGFR-2", "PDGFR2", "PDGFRA"], "extensions": [ + { + "name": "description", + "value": "Commonly mutated in GI tract tumors, PDGFR family genes (mutually exclusive to KIT mutations) are a hallmark of gastrointestinal stromal tumors. Gene fusions involving the PDGFRA kinase domain are highly correlated with eosinophilia, and the WHO classifies myeloid and lymphoid neoplasms with these characteristics as a distinct disorder. Mutations in the 842 region of PDGFRA have been often found to confer resistance to the tyrosine kinase inhibitor, imatinib.", + }, + { + "name": "aliases", + "value": ["CD140A", "PDGFR-2", "PDGFR2", "PDGFRA"], + }, { "name": VICC_NORMALIZER_DATA, "value": {"id": "hgnc:8803", "label": "PDGFRA"}, - } + }, ], } @@ -199,7 +213,7 @@ def civic_did2(): """Create test fixture for CIViC DID2.""" return { "id": "civic.did:2", - "type": "Disease", + "conceptType": "Disease", "label": "Gastrointestinal Stromal Tumor", "mappings": [ { @@ -231,15 +245,20 @@ def civic_eid2_study_stmt(civic_method, civic_mpid99, civic_gid38, civic_did2): "description": "GIST tumors harboring PDGFRA D842V mutation are more likely to be benign than malignant.", "direction": "supports", "strength": { - "code": "e000005", + "primaryCode": "e000005", "label": "clinical cohort evidence", - "system": "https://go.osu.edu/evidence-codes", + "extensions": [ + {"name": "url", "value": "https://go.osu.edu/evidence-codes"} + ], + }, + "proposition": { + "type": "VariantDiagnosticProposition", + "predicate": "isDiagnosticExclusionCriterionFor", + "alleleOriginQualifier": {"label": "somatic"}, + "subjectVariant": civic_mpid99, + "geneContextQualifier": civic_gid38, + "objectCondition": civic_did2, }, - "predicate": "isDiagnosticExclusionCriterionFor", - "alleleOriginQualifier": "somatic", - "subjectVariant": civic_mpid99, - "geneContextQualifier": civic_gid38, - "objectCondition": civic_did2, "specifiedBy": civic_method, "reportedIn": [ { @@ -250,7 +269,7 @@ def civic_eid2_study_stmt(civic_method, civic_mpid99, civic_gid38, civic_did2): "type": "Document", } ], - "type": "VariantDiagnosticStudyStatement", + "type": "Statement", } @@ -264,7 +283,7 @@ def civic_mpid113(): "label": "RET M918T", "constraints": [ { - "definingContext": { + "allele": { "id": "ga4gh:VA.hEybNB_CeKflfFhT5AKOU5i1lgZPP-aS", "type": "Allele", "label": "M918T", @@ -286,7 +305,7 @@ def civic_mpid113(): }, "state": {"type": "LiteralSequenceExpression", "sequence": "T"}, }, - "type": "DefiningContextConstraint", + "type": "DefiningAlleleConstraint", } ], "members": [ @@ -331,9 +350,16 @@ def civic_mpid113(): "sequence": "T", }, "state": {"type": "LiteralSequenceExpression", "sequence": "C"}, + "extensions": [ + { + "name": "mane_genes", + "value": [ + {"ncbi_gene_id": 5979, "hgnc_id": 9967, "symbol": "RET"} + ], + } + ], }, ], - "alternativeLabels": ["MET918THR"], "mappings": [ { "coding": { @@ -362,6 +388,7 @@ def civic_mpid113(): }, ], "extensions": [ + {"name": "aliases", "value": ["MET918THR"]}, { "name": "CIViC representative coordinate", "value": { @@ -399,9 +426,8 @@ def civic_gid42(): """Create test fixture for CIViC GID42.""" return { "id": "civic.gid:42", - "type": "Gene", + "conceptType": "Gene", "label": "RET", - "description": "RET mutations and the RET fusion RET-PTC lead to activation of this tyrosine kinase receptor and are associated with thyroid cancers. RET point mutations are the most common mutations identified in medullary thyroid cancer (MTC) with germline and somatic mutations in RET associated with hereditary and sporadic forms, respectively. The most common somatic form mutation is M918T (exon 16) and a variety of other mutations effecting exons 10, 11 and 15 have been described. The prognostic significance of these mutations have been hotly debated in the field, however, data suggests that some RET mutation may confer drug resistance. Highly selective and well-tolerated RET inhibitors, selpercatinib (LOXO-292) and pralsetinib (BLU-667), have been FDA approved recently for the treatment of RET fusion-positive non-small-cell lung cancer, RET fusion-positive thyroid cancer and RET-mutant medullary thyroid cancer.", "mappings": [ { "coding": { @@ -411,22 +437,29 @@ def civic_gid42(): "relation": "exactMatch", } ], - "alternativeLabels": [ - "CDHF12", - "CDHR16", - "HSCR1", - "MEN2A", - "MEN2B", - "MTC1", - "PTC", - "RET", - "RET-ELE1", - ], "extensions": [ + { + "name": "description", + "value": "RET mutations and the RET fusion RET-PTC lead to activation of this tyrosine kinase receptor and are associated with thyroid cancers. RET point mutations are the most common mutations identified in medullary thyroid cancer (MTC) with germline and somatic mutations in RET associated with hereditary and sporadic forms, respectively. The most common somatic form mutation is M918T (exon 16) and a variety of other mutations effecting exons 10, 11 and 15 have been described. The prognostic significance of these mutations have been hotly debated in the field, however, data suggests that some RET mutation may confer drug resistance. Highly selective and well-tolerated RET inhibitors, selpercatinib (LOXO-292) and pralsetinib (BLU-667), have been FDA approved recently for the treatment of RET fusion-positive non-small-cell lung cancer, RET fusion-positive thyroid cancer and RET-mutant medullary thyroid cancer.", + }, + { + "name": "aliases", + "value": [ + "CDHF12", + "CDHR16", + "HSCR1", + "MEN2A", + "MEN2B", + "MTC1", + "PTC", + "RET", + "RET-ELE1", + ], + }, { "name": VICC_NORMALIZER_DATA, "value": {"id": "hgnc:9967", "label": "RET"}, - } + }, ], } @@ -436,7 +469,7 @@ def civic_did15(): """Create test fixture for CIViC DID15.""" return { "id": "civic.did:15", - "type": "Disease", + "conceptType": "Disease", "label": "Medullary Thyroid Carcinoma", "mappings": [ { @@ -468,15 +501,20 @@ def civic_eid74_study_stmt(civic_method, civic_mpid113, civic_gid42, civic_did15 "description": "In patients with medullary carcinoma, the presence of RET M918T mutation is associated with increased probability of lymph node metastases.", "direction": "supports", "strength": { - "code": "e000005", + "primaryCode": "e000005", "label": "clinical cohort evidence", - "system": "https://go.osu.edu/evidence-codes", + "extensions": [ + {"name": "url", "value": "https://go.osu.edu/evidence-codes"} + ], + }, + "proposition": { + "type": "VariantDiagnosticProposition", + "predicate": "isDiagnosticInclusionCriterionFor", + "alleleOriginQualifier": {"label": "somatic"}, + "subjectVariant": civic_mpid113, + "geneContextQualifier": civic_gid42, + "objectCondition": civic_did15, }, - "predicate": "isDiagnosticInclusionCriterionFor", - "alleleOriginQualifier": "somatic", - "subjectVariant": civic_mpid113, - "geneContextQualifier": civic_gid42, - "objectCondition": civic_did15, "specifiedBy": civic_method, "reportedIn": [ { @@ -487,7 +525,7 @@ def civic_eid74_study_stmt(civic_method, civic_mpid113, civic_gid42, civic_did15 "type": "Document", } ], - "type": "VariantDiagnosticStudyStatement", + "type": "Statement", } diff --git a/tests/unit/transformers/test_moa_transformer_prognostic.py b/tests/unit/transformers/test_moa_transformer_prognostic.py index c14f7185..dfe6c9b1 100644 --- a/tests/unit/transformers/test_moa_transformer_prognostic.py +++ b/tests/unit/transformers/test_moa_transformer_prognostic.py @@ -38,7 +38,7 @@ def moa_vid141(): "label": "BCOR p.N1425S (Missense)", "constraints": [ { - "definingContext": { + "allele": { "id": "ga4gh:VA.pDuCLNI3mHF25uUPNSDM8LbP8p4Fsuay", "digest": "pDuCLNI3mHF25uUPNSDM8LbP8p4Fsuay", "type": "Allele", @@ -56,7 +56,7 @@ def moa_vid141(): }, "state": {"type": "LiteralSequenceExpression", "sequence": "S"}, }, - "type": "DefiningContextConstraint", + "type": "DefiningAlleleConstraint", } ], "members": [ @@ -78,6 +78,14 @@ def moa_vid141(): "sequence": "T", }, "state": {"type": "LiteralSequenceExpression", "sequence": "C"}, + "extensions": [ + { + "name": "mane_genes", + "value": [ + {"ncbi_gene_id": 54880, "hgnc_id": 20893, "symbol": "BCOR"} + ], + } + ], } ], "extensions": [ @@ -112,7 +120,7 @@ def moa_myelodysplasia(): """Create test fixture for MOA disease Myelodysplasia""" return { "id": "moa.normalize.disease.ncit:C3247", - "type": "Disease", + "conceptType": "Disease", "label": "Myelodysplasia", "extensions": [ { @@ -142,7 +150,7 @@ def moa_bcor(): """Create MOA gene BCOR test fixture""" return { "id": "moa.normalize.gene:BCOR", - "type": "Gene", + "conceptType": "Gene", "label": "BCOR", "extensions": [ { @@ -174,18 +182,24 @@ def moa_aid141_study_stmt( """Create MOA AID 141 study statement test fixture.""" return { "id": "moa.assertion:141", - "type": "VariantPrognosticStudyStatement", + "type": "Statement", + "direction": "disputes", "description": "More frequent in Chronic Myelomonocytic Leukemia.", "strength": { - "code": "e000007", + "primaryCode": "e000007", "label": "observational study evidence", - "system": "https://go.osu.edu/evidence-codes", + "extensions": [ + {"name": "url", "value": "https://go.osu.edu/evidence-codes"} + ], + }, + "proposition": { + "type": "VariantPrognosticProposition", + "predicate": "associatedWithWorseOutcomeFor", + "subjectVariant": moa_vid141, + "objectCondition": moa_myelodysplasia, + "alleleOriginQualifier": {"label": "somatic"}, + "geneContextQualifier": moa_bcor, }, - "predicate": "associatedWithWorseOutcomeFor", - "subjectVariant": moa_vid141, - "objectCondition": moa_myelodysplasia, - "alleleOriginQualifier": "somatic", - "geneContextQualifier": moa_bcor, "specifiedBy": moa_method, "reportedIn": [moa_source60], } @@ -200,7 +214,7 @@ def moa_vid532(): "label": "SF3B1 p.E622D (Missense)", "constraints": [ { - "definingContext": { + "allele": { "id": "ga4gh:VA.53EXGCEm1KH4W4ygbovgD_fFWskECrAJ", "digest": "53EXGCEm1KH4W4ygbovgD_fFWskECrAJ", "type": "Allele", @@ -218,7 +232,7 @@ def moa_vid532(): }, "state": {"type": "LiteralSequenceExpression", "sequence": "D"}, }, - "type": "DefiningContextConstraint", + "type": "DefiningAlleleConstraint", } ], "members": [ @@ -240,6 +254,14 @@ def moa_vid532(): "sequence": "C", }, "state": {"type": "LiteralSequenceExpression", "sequence": "G"}, + "extensions": [ + { + "name": "mane_genes", + "value": [ + {"ncbi_gene_id": 23451, "hgnc_id": 10768, "symbol": "SF3B1"} + ], + } + ], } ], "extensions": [ @@ -281,7 +303,7 @@ def moa_sf3b1(): """Create MOA gene SF3B1 test fixture""" return { "id": "moa.normalize.gene:SF3B1", - "type": "Gene", + "conceptType": "Gene", "label": "SF3B1", "extensions": [ { @@ -311,18 +333,24 @@ def moa_aid532_study_stmt( """Create MOA AID 532 study statement test fixture.""" return { "id": "moa.assertion:532", - "type": "VariantPrognosticStudyStatement", + "type": "Statement", + "direction": "supports", "description": "The National Comprehensive Cancer Network\u00ae (NCCN\u00ae) highlights SF3B1 E622, Y623, R625, N626, H662, T663, K666, K700E, I704, G740, G742, and D781 missense variants as being associated with a favorable prognosis in patients with myelodysplastic syndromes.", "strength": { - "code": "e000003", + "primaryCode": "e000003", "label": "professional guideline evidence", - "system": "https://go.osu.edu/evidence-codes", + "extensions": [ + {"name": "url", "value": "https://go.osu.edu/evidence-codes"} + ], + }, + "proposition": { + "type": "VariantPrognosticProposition", + "predicate": "associatedWithBetterOutcomeFor", + "subjectVariant": moa_vid532, + "objectCondition": moa_myelodysplasia, + "alleleOriginQualifier": {"label": "somatic"}, + "geneContextQualifier": moa_sf3b1, }, - "predicate": "associatedWithBetterOutcomeFor", - "subjectVariant": moa_vid532, - "objectCondition": moa_myelodysplasia, - "alleleOriginQualifier": "somatic", - "geneContextQualifier": moa_sf3b1, "specifiedBy": moa_method, "reportedIn": [moa_source33], } diff --git a/tests/unit/transformers/test_moa_transformer_therapeutic.py b/tests/unit/transformers/test_moa_transformer_therapeutic.py index d7997b98..b4d8736e 100644 --- a/tests/unit/transformers/test_moa_transformer_therapeutic.py +++ b/tests/unit/transformers/test_moa_transformer_therapeutic.py @@ -41,7 +41,7 @@ def moa_vid144(braf_v600e_genomic): "label": "BRAF p.V600E (Missense)", "constraints": [ { - "definingContext": { + "allele": { "id": "ga4gh:VA.j4XnsLZcdzDIYa5pvvXM7t1wn9OITr0L", "digest": "j4XnsLZcdzDIYa5pvvXM7t1wn9OITr0L", "type": "Allele", @@ -59,7 +59,7 @@ def moa_vid144(braf_v600e_genomic): }, "state": {"type": "LiteralSequenceExpression", "sequence": "E"}, }, - "type": "DefiningContextConstraint", + "type": "DefiningAlleleConstraint", } ], "members": [genomic_rep], @@ -102,7 +102,7 @@ def moa_cetuximab(cetuximab_extensions): """Create a test fixture for MOA Cetuximab""" return { "id": "moa.normalize.therapy.rxcui:318341", - "type": "TherapeuticAgent", + "conceptType": "Therapy", "label": "Cetuximab", "extensions": cetuximab_extensions, } @@ -113,7 +113,7 @@ def moa_encorafenib(encorafenib_extensions): """Create test fixture for MOA Encorafenib""" return { "id": "moa.normalize.therapy.rxcui:2049106", - "type": "TherapeuticAgent", + "conceptType": "Therapy", "label": "Encorafenib", "extensions": encorafenib_extensions, } @@ -124,62 +124,68 @@ def moa_aid154_study_stmt(moa_vid144, moa_cetuximab, moa_encorafenib, moa_method """Create MOA AID 154 study statement test fixture. Uses CombinationTherapy.""" return { "id": "moa.assertion:154", - "type": "VariantTherapeuticResponseStudyStatement", + "type": "Statement", + "direction": "supports", "description": "The U.S. Food and Drug Administration (FDA) granted regular approval to encorafenib in combination with cetuximab for the treatment of adult patients with metastatic colorectal cancer (CRC) with BRAF V600E mutation, as detected by an FDA-approved test, after prior therapy.", "strength": { - "code": "e000002", + "primaryCode": "e000002", "label": "FDA recognized evidence", - "system": "https://go.osu.edu/evidence-codes", - }, - "predicate": "predictsSensitivityTo", - "subjectVariant": moa_vid144, - "objectTherapeutic": { - "type": "CombinationTherapy", - "id": "moa.ctid:ZGlEkRBR4st6Y_nijjuR1KUV7EFHIF_S", - "components": [moa_cetuximab, moa_encorafenib], "extensions": [ - { - "name": "moa_therapy_type", - "value": "Targeted therapy", - } + {"name": "url", "value": "https://go.osu.edu/evidence-codes"} ], }, - "conditionQualifier": { - "id": "moa.normalize.disease.ncit:C5105", - "type": "Disease", - "label": "Colorectal Adenocarcinoma", - "extensions": [ - { - "name": VICC_NORMALIZER_DATA, - "value": { - "id": "ncit:C5105", - "label": "Colorectal Adenocarcinoma", - "mondo_id": "0005008", - }, - } - ], - "mappings": [ - { - "coding": { - "label": "Colorectal Adenocarcinoma", - "system": "https://oncotree.mskcc.org/", - "code": "COADREAD", - }, - "relation": "exactMatch", - } - ], - }, - "alleleOriginQualifier": "somatic", - "geneContextQualifier": { - "id": "moa.normalize.gene:BRAF", - "type": "Gene", - "label": "BRAF", - "extensions": [ - { - "name": VICC_NORMALIZER_DATA, - "value": {"id": "hgnc:1097", "label": "BRAF"}, - } - ], + "proposition": { + "type": "VariantTherapeuticResponseProposition", + "predicate": "predictsSensitivityTo", + "subjectVariant": moa_vid144, + "objectTherapeutic": { + "groupType": {"label": "CombinationTherapy"}, + "id": "moa.ctid:ZGlEkRBR4st6Y_nijjuR1KUV7EFHIF_S", + "therapies": [moa_cetuximab, moa_encorafenib], + "extensions": [ + { + "name": "moa_therapy_type", + "value": "Targeted therapy", + } + ], + }, + "conditionQualifier": { + "id": "moa.normalize.disease.ncit:C5105", + "conceptType": "Disease", + "label": "Colorectal Adenocarcinoma", + "extensions": [ + { + "name": VICC_NORMALIZER_DATA, + "value": { + "id": "ncit:C5105", + "label": "Colorectal Adenocarcinoma", + "mondo_id": "0005008", + }, + } + ], + "mappings": [ + { + "coding": { + "label": "Colorectal Adenocarcinoma", + "system": "https://oncotree.mskcc.org/", + "code": "COADREAD", + }, + "relation": "exactMatch", + } + ], + }, + "alleleOriginQualifier": {"label": "somatic"}, + "geneContextQualifier": { + "id": "moa.normalize.gene:BRAF", + "conceptType": "Gene", + "label": "BRAF", + "extensions": [ + { + "name": VICC_NORMALIZER_DATA, + "value": {"id": "hgnc:1097", "label": "BRAF"}, + } + ], + }, }, "specifiedBy": moa_method, "reportedIn": [