From 7ba003ea83f5a673fc169efcc04ad8d8d42cc662 Mon Sep 17 00:00:00 2001 From: Raphael Mitsch Date: Tue, 14 Nov 2023 09:35:35 +0100 Subject: [PATCH] Incorporate review feedback. --- .../tasks/entity_linker/candidate_selector.py | 6 ++-- spacy_llm/tasks/entity_linker/parser.py | 2 +- spacy_llm/tasks/entity_linker/registry.py | 29 +++++++++---------- 3 files changed, 18 insertions(+), 19 deletions(-) diff --git a/spacy_llm/tasks/entity_linker/candidate_selector.py b/spacy_llm/tasks/entity_linker/candidate_selector.py index b03c49b0..633d7067 100644 --- a/spacy_llm/tasks/entity_linker/candidate_selector.py +++ b/spacy_llm/tasks/entity_linker/candidate_selector.py @@ -10,15 +10,15 @@ from .util import UNAVAILABLE_ENTITY_DESC -class PipelineCandidateSelector: - """Callable generated by loading and wrapping a spaCy pipeline with an EL component and a filled knowledge base.""" +class KBCandidateSelector: + """Initializes a spaCy InMemoryLookupKB and uses its candidate selection mechanism to return entity candidates.""" def __init__( self, kb_loader: InMemoryLookupKBLoader, top_n: int, ): - """Generates CandidateSelector. Note that this class has to be initialized (.initialize()) before being used. + """Generates KBCandidateSelector. Note that this class has to be initialized (.initialize()) before being used. kb_loader (InMemoryLookupKBLoader): KB loader. top_n (int): Top n candidates to include in prompt. """ diff --git a/spacy_llm/tasks/entity_linker/parser.py b/spacy_llm/tasks/entity_linker/parser.py index d38ca55a..b3c4076a 100644 --- a/spacy_llm/tasks/entity_linker/parser.py +++ b/spacy_llm/tasks/entity_linker/parser.py @@ -37,7 +37,7 @@ def parse_responses_v1( label=ent.label, vector=ent.vector, vector_norm=ent.vector_norm, - kb_id=solution.replace("NIL", EntityLinker.NIL), + kb_id=solution if solution != "NIL" else EntityLinker.NIL, ) for ent, solution in zip(ents, solutions) ] diff --git a/spacy_llm/tasks/entity_linker/registry.py b/spacy_llm/tasks/entity_linker/registry.py index f215fe9a..10e34ed0 100644 --- a/spacy_llm/tasks/entity_linker/registry.py +++ b/spacy_llm/tasks/entity_linker/registry.py @@ -6,7 +6,7 @@ from ...registry import registry from ...ty import ExamplesConfigType, FewshotExample, TaskResponseParser -from .candidate_selector import PipelineCandidateSelector +from .candidate_selector import KBCandidateSelector from .parser import parse_responses_v1 from .task import DEFAULT_EL_TEMPLATE_V1, EntityLinkerTask from .ty import EntDescReader, InMemoryLookupKBLoader @@ -32,19 +32,18 @@ def make_entitylinker_task( """ raw_examples = examples() if callable(examples) else examples example_type = prompt_example_type or ELExample - examples = [example_type(**eg) for eg in raw_examples] if raw_examples else None + examples = [example_type(**eg) for eg in raw_examples] if raw_examples else [] # Ensure there is a reason for every solution, even if it's empty. This makes templating easier. - if examples: - for example in examples: - if example.reasons is None: - example.reasons = [""] * len(example.solutions) - elif 0 < len(example.reasons) < len(example.solutions): - warnings.warn( - f"The number of reasons doesn't match the number of solutions ({len(example.reasons)} " - f"vs. {len(example.solutions)}). There must be one reason per solution for an entity " - f"linking example, or no reasons at all. Ignoring all specified reasons." - ) - example.reasons = [""] * len(example.solutions) + for example in examples: + if example.reasons is None: + example.reasons = [""] * len(example.solutions) + elif 0 < len(example.reasons) < len(example.solutions): + warnings.warn( + f"The number of reasons doesn't match the number of solutions ({len(example.reasons)} " + f"vs. {len(example.solutions)}). There must be one reason per solution for an entity " + f"linking example, or no reasons at all. Ignoring all specified reasons." + ) + example.reasons = [""] * len(example.solutions) return EntityLinkerTask( template=template, @@ -59,14 +58,14 @@ def make_entitylinker_task( def make_candidate_selector_pipeline( kb_loader: InMemoryLookupKBLoader, top_n: int = 5, -) -> PipelineCandidateSelector: +) -> KBCandidateSelector: """Generates CandidateSelector. Note that this class has to be initialized (.initialize()) before being used. kb_loader (InMemoryLookupKBLoader): KB loader. top_n (int): Top n candidates to include in prompt. """ # Note: we could also move the class implementation here directly. This was just done to separate registration from # implementation code. - return PipelineCandidateSelector( + return KBCandidateSelector( kb_loader=kb_loader, top_n=top_n, )