Skip to content

Commit

Permalink
feat: add a matching process for existing graphs
Browse files Browse the repository at this point in the history
  • Loading branch information
lairgiyassir committed Sep 17, 2024
1 parent 49fb162 commit 27ab7b6
Showing 1 changed file with 47 additions and 4 deletions.
51 changes: 47 additions & 4 deletions itext2kg/utils/matcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,12 +65,55 @@ def create_union_list(self, list1: List[Dict], list2: List[Dict]) -> List[Dict]:
def process_lists(self, list1: List[Dict], list2: List[Dict], for_entity_or_relation: Literal['entity', 'relation'], threshold: float = 0.8) -> Tuple[List[Dict], List[Dict]]:
"""
Process two lists to generate new lists based on specified conditions.
:param list1: First list to process.
:param list2: Second list to be compared against.
:param list1: First list to process (local items).
:param list2: Second list to be compared against (global items).
:param for_entity_or_relation: Specifies whether the processing is for entities or relations.
:return: Two processed lists.
"""
list3 = [self.find_match(obj1, list2, for_entity_or_relation, threshold=threshold) for obj1 in list1]
list4 = self.create_union_list(list3, list2)
list3 = [self.find_match(obj1, list2, for_entity_or_relation, threshold=threshold) for obj1 in list1] #matched_local_items
list4 = self.create_union_list(list3, list2) #new_global_items
return list3, list4


def match_entities_and_update_relationships(
self,
entities1: List[Dict],
entities2: List[Dict],
relationships1: List[Dict],
relationships2: List[Dict],
rel_threshold: float = 0.8,
ent_threshold: float = 0.8
) -> Tuple[List[Dict], List[Dict]]:
"""
Match two lists of entities and update the relationships list accordingly.
:param entities1: First list of entities to match.
:param entities2: Second list of entities to match against.
:param relationships1: First list of relationships to update.
:param relationships2: Second list of relationships to compare.
:param threshold: Threshold for cosine similarity matching.
:return: Updated entities list and relationships list.
"""
# Step 1: Match the entities from both lists
matched_entities1, global_entities = self.process_lists(entities1, entities2, 'entity', ent_threshold)
matched_relations, _ = self.process_lists(relationships1, relationships2, 'relation', rel_threshold)

# Create a mapping from old entity names to matched entity names
entity_name_mapping = {entity['name']: matched_entity['name'] for entity, matched_entity in zip(entities1, matched_entities1) if entity['name'] != matched_entity['name']}

# Step 2: Update relationships based on matched entities
def update_relationships(relationships: List[Dict]) -> List[Dict]:
updated_relationships = []
for rel in relationships:
updated_rel = rel.copy()
# Update the 'startNode' and 'endNode' with matched entity names
if rel['startNode'] in entity_name_mapping:
updated_rel['startNode'] = entity_name_mapping[rel['startNode']]
if rel['endNode'] in entity_name_mapping:
updated_rel['endNode'] = entity_name_mapping[rel['endNode']]
updated_relationships.append(updated_rel)
return updated_relationships
relationships2.extend(update_relationships(matched_relations))

return global_entities, relationships2

0 comments on commit 27ab7b6

Please sign in to comment.