diff --git a/agents-api/agents_api/common/nlp.py b/agents-api/agents_api/common/nlp.py index 2fec52421..bc4d33383 100644 --- a/agents-api/agents_api/common/nlp.py +++ b/agents-api/agents_api/common/nlp.py @@ -8,13 +8,14 @@ nlp = spacy.load("en_core_web_sm") -def extract_keywords(text: str, top_n: int = 10) -> list[str]: +def extract_keywords(text: str, top_n: int = 10, clean: bool = True) -> list[str]: """ Extracts significant keywords and phrases from the text. Args: text (str): The input text to process. top_n (int): Number of top keywords to extract based on frequency. + clean (bool): Strip non-alphanumeric characters from keywords. Returns: List[str]: A list of extracted keywords/phrases. @@ -46,6 +47,9 @@ def extract_keywords(text: str, top_n: int = 10) -> list[str]: # Get top_n keywords keywords = [item for item, count in freq.most_common(top_n)] + if clean: + keywords = [re.sub(r"[^\w\s\-_]+", "", kw) for kw in keywords] + return keywords @@ -212,5 +216,6 @@ def paragraph_to_custom_queries(paragraph: str) -> list[str]: """ queries = [text_to_custom_query(sentence.text) for sentence in nlp(paragraph).sents] + queries = [q for q in queries if q] return queries diff --git a/agents-api/agents_api/models/utils.py b/agents-api/agents_api/models/utils.py index e182de077..0b0c41edd 100644 --- a/agents-api/agents_api/models/utils.py +++ b/agents-api/agents_api/models/utils.py @@ -8,7 +8,7 @@ from pydantic import BaseModel from ..common.utils.cozo import uuid_int_list_to_uuid4 -from ..env import do_verify_developer, do_verify_developer_owns_resource +from ..env import debug, do_verify_developer, do_verify_developer_owns_resource P = ParamSpec("P") T = TypeVar("T") @@ -185,8 +185,8 @@ def make_cozo_json_query(fields): def cozo_query( func: Callable[P, tuple[str | list[str | None], dict]] | None = None, - debug: bool | None = None, - only_on_error: bool = False, + debug: bool | None = debug, + only_on_error: bool = True, ): def cozo_query_dec(func: Callable[P, tuple[str | list[Any], dict]]): """