From 5e10d2b682f25aef44e3ab667993d04e66c2a671 Mon Sep 17 00:00:00 2001 From: Deepak <89829542+Deepak-Kesavan@users.noreply.github.com> Date: Tue, 10 Dec 2024 10:18:05 +0530 Subject: [PATCH 1/3] Changes for line-item extraction prompt type Signed-off-by: Deepak <89829542+Deepak-Kesavan@users.noreply.github.com> --- .../static/select_choices.json | 3 +- ...003_alter_toolstudioprompt_enforce_type.py | 36 +++++++++ .../prompt_studio/prompt_studio_v2/models.py | 1 + .../src/unstract/prompt_service/constants.py | 1 + .../src/unstract/prompt_service/helper.py | 73 ++++++++++++++++++- .../src/unstract/prompt_service/main.py | 35 +++++++++ 6 files changed, 146 insertions(+), 3 deletions(-) create mode 100644 backend/prompt_studio/prompt_studio_v2/migrations/0003_alter_toolstudioprompt_enforce_type.py diff --git a/backend/prompt_studio/prompt_studio_core_v2/static/select_choices.json b/backend/prompt_studio/prompt_studio_core_v2/static/select_choices.json index f9e002f7d..2e260a452 100644 --- a/backend/prompt_studio/prompt_studio_core_v2/static/select_choices.json +++ b/backend/prompt_studio/prompt_studio_core_v2/static/select_choices.json @@ -15,7 +15,8 @@ "boolean":"boolean", "json":"json", "table":"table", - "record":"record" + "record":"record", + "line_item":"line-item" }, "output_processing":{ "DEFAULT":"Default" diff --git a/backend/prompt_studio/prompt_studio_v2/migrations/0003_alter_toolstudioprompt_enforce_type.py b/backend/prompt_studio/prompt_studio_v2/migrations/0003_alter_toolstudioprompt_enforce_type.py new file mode 100644 index 000000000..af359ec9a --- /dev/null +++ b/backend/prompt_studio/prompt_studio_v2/migrations/0003_alter_toolstudioprompt_enforce_type.py @@ -0,0 +1,36 @@ +# Generated by Django 4.2.1 on 2024-12-10 04:13 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("prompt_studio_v2", "0002_alter_toolstudioprompt_enforce_type"), + ] + + operations = [ + migrations.AlterField( + model_name="toolstudioprompt", + name="enforce_type", + field=models.TextField( + blank=True, + choices=[ + ("Text", "Response sent as Text"), + ("number", "Response sent as number"), + ("email", "Response sent as email"), + ("date", "Response sent as date"), + ("boolean", "Response sent as boolean"), + ("json", "Response sent as json"), + ("table", "Response sent as table"), + ( + "record", + "Response sent for records. Entries of records are list of logical and organized individual entities with distint values", + ), + ("line-item", "Response sent as line-item"), + ], + db_comment="Field to store the type in which the response to be returned.", + default="Text", + ), + ), + ] diff --git a/backend/prompt_studio/prompt_studio_v2/models.py b/backend/prompt_studio/prompt_studio_v2/models.py index 9cd37c36f..afe978763 100644 --- a/backend/prompt_studio/prompt_studio_v2/models.py +++ b/backend/prompt_studio/prompt_studio_v2/models.py @@ -27,6 +27,7 @@ class EnforceType(models.TextChoices): "logical and organized individual " "entities with distint values" ) + LINE_ITEM = "line-item", ("Response sent as line-item") class PromptType(models.TextChoices): PROMPT = "PROMPT", "Response sent as Text" diff --git a/prompt-service/src/unstract/prompt_service/constants.py b/prompt-service/src/unstract/prompt_service/constants.py index e905eec1e..d6e9f36d7 100644 --- a/prompt-service/src/unstract/prompt_service/constants.py +++ b/prompt-service/src/unstract/prompt_service/constants.py @@ -71,6 +71,7 @@ class PromptServiceContants: ENABLE_HIGHLIGHT = "enable_highlight" FILE_PATH = "file_path" HIGHLIGHT_DATA = "highlight_data" + LINE_ITEM = "line-item" class RunLevel(Enum): diff --git a/prompt-service/src/unstract/prompt_service/helper.py b/prompt-service/src/unstract/prompt_service/helper.py index cf4ef63cb..50ce64542 100644 --- a/prompt-service/src/unstract/prompt_service/helper.py +++ b/prompt-service/src/unstract/prompt_service/helper.py @@ -16,6 +16,11 @@ from unstract.sdk.exceptions import SdkError from unstract.sdk.llm import LLM +PAID_FEATURE_MSG = ( + "It is a cloud / enterprise feature. If you have purchased a plan and still " + "face this issue, please contact support" +) + load_dotenv() # Global variable to store plugins @@ -295,8 +300,8 @@ def run_completion( extract_json=prompt_type.lower() != PSKeys.TEXT, ) answer: str = completion[PSKeys.RESPONSE].text - highlight_data = completion.get(PSKeys.HIGHLIGHT_DATA) - if all([metadata, highlight_data, prompt_key]): + highlight_data = completion.get(PSKeys.HIGHLIGHT_DATA, []) + if all([metadata, prompt_key]): metadata.setdefault(PSKeys.HIGHLIGHT_DATA, {})[prompt_key] = highlight_data return answer # TODO: Catch and handle specific exception here @@ -333,3 +338,67 @@ def extract_table( except table_extractor["exception_cls"] as e: msg = f"Couldn't extract table. {e}" raise APIError(message=msg) + + +def extract_line_item( + tool_settings: dict[str, Any], + output: dict[str, Any], + plugins: dict[str, dict[str, Any]], + structured_output: dict[str, Any], + llm: LLM, + file_path: str, +) -> dict[str, Any]: + # Adjust file path to read from the extract folder + base_name = os.path.splitext(os.path.basename(file_path))[ + 0 + ] # Get the base name without extension + extract_file_path = os.path.join( + os.path.dirname(file_path), "extract", f"{base_name}.txt" + ) + + # Read file content into context + if not os.path.exists(extract_file_path): + raise FileNotFoundError( + f"The file at path '{extract_file_path}' does not exist." + ) + + with open(extract_file_path, encoding="utf-8") as file: + context = file.read() + + prompt = construct_prompt( + preamble=tool_settings.get(PSKeys.PREAMBLE, ""), + prompt=output["promptx"], + postamble=tool_settings.get(PSKeys.POSTAMBLE, ""), + grammar_list=tool_settings.get(PSKeys.GRAMMAR, []), + context=context, + platform_postamble="", + ) + # return run_completion( + # llm=llm, + # prompt=prompt, + # metadata=metadata, + # prompt_key=output[PSKeys.NAME], + # prompt_type=output.get(PSKeys.TYPE, PSKeys.TEXT), + # enable_highlight=enable_highlight, + # file_path=file_path, + # ) + line_item_extraction_plugin: dict[str, Any] = plugins.get( + "line-item-extraction", {} + ) + if not line_item_extraction_plugin: + raise APIError(PAID_FEATURE_MSG) + try: + line_item_extraction = line_item_extraction_plugin["entrypoint_cls"]( + llm=llm, + tool_settings=tool_settings, + output=output, + structured_output=structured_output, + logger=current_app.logger, + prompt=prompt, + ) + answer = line_item_extraction.run() + structured_output[output[PSKeys.NAME]] = answer + return structured_output + except line_item_extraction["exception_cls"] as e: + msg = f"Couldn't extract table. {e}" + raise APIError(message=msg) diff --git a/prompt-service/src/unstract/prompt_service/main.py b/prompt-service/src/unstract/prompt_service/main.py index 12c2242e0..43ea0052e 100644 --- a/prompt-service/src/unstract/prompt_service/main.py +++ b/prompt-service/src/unstract/prompt_service/main.py @@ -12,6 +12,7 @@ from unstract.prompt_service.exceptions import APIError, ErrorResponse, NoPayloadError from unstract.prompt_service.helper import ( construct_and_run_prompt, + extract_line_item, extract_table, extract_variable, get_cleaned_context, @@ -250,6 +251,40 @@ def prompt_processor() -> Any: "Error while extracting table for the prompt", ) raise api_error + elif output[PSKeys.TYPE] == PSKeys.LINE_ITEM: + try: + structured_output = extract_line_item( + tool_settings=tool_settings, + output=output, + plugins=plugins, + structured_output=structured_output, + llm=llm, + file_path=file_path, + ) + metadata = query_usage_metadata(token=platform_key, metadata=metadata) + response = { + PSKeys.METADATA: metadata, + PSKeys.OUTPUT: structured_output, + } + return response + except APIError as e: + app.logger.error( + "Failed to extract line-item for the prompt %s: %s", + output[PSKeys.NAME], + str(e), + ) + publish_log( + log_events_id, + { + "tool_id": tool_id, + "prompt_key": prompt_name, + "doc_name": doc_name, + }, + LogLevel.ERROR, + RunLevel.RUN, + "Error while extracting line-item for the prompt", + ) + raise e try: context: set[str] = set() From 668ed04bd21ae341e91edc62d4a462fde2627fc0 Mon Sep 17 00:00:00 2001 From: Deepak <89829542+Deepak-Kesavan@users.noreply.github.com> Date: Tue, 10 Dec 2024 10:32:22 +0530 Subject: [PATCH 2/3] Removed commented out code Signed-off-by: Deepak <89829542+Deepak-Kesavan@users.noreply.github.com> --- prompt-service/src/unstract/prompt_service/helper.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/prompt-service/src/unstract/prompt_service/helper.py b/prompt-service/src/unstract/prompt_service/helper.py index 50ce64542..ac6152d48 100644 --- a/prompt-service/src/unstract/prompt_service/helper.py +++ b/prompt-service/src/unstract/prompt_service/helper.py @@ -373,15 +373,6 @@ def extract_line_item( context=context, platform_postamble="", ) - # return run_completion( - # llm=llm, - # prompt=prompt, - # metadata=metadata, - # prompt_key=output[PSKeys.NAME], - # prompt_type=output.get(PSKeys.TYPE, PSKeys.TEXT), - # enable_highlight=enable_highlight, - # file_path=file_path, - # ) line_item_extraction_plugin: dict[str, Any] = plugins.get( "line-item-extraction", {} ) From bd0e83228b27c2953ecc7155d61e1e1ac349e77f Mon Sep 17 00:00:00 2001 From: Deepak <89829542+Deepak-Kesavan@users.noreply.github.com> Date: Mon, 16 Dec 2024 05:01:53 +0530 Subject: [PATCH 3/3] Minor fix Signed-off-by: Deepak <89829542+Deepak-Kesavan@users.noreply.github.com> --- prompt-service/src/unstract/prompt_service/helper.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/prompt-service/src/unstract/prompt_service/helper.py b/prompt-service/src/unstract/prompt_service/helper.py index ac6152d48..a9c927c5d 100644 --- a/prompt-service/src/unstract/prompt_service/helper.py +++ b/prompt-service/src/unstract/prompt_service/helper.py @@ -383,13 +383,13 @@ def extract_line_item( llm=llm, tool_settings=tool_settings, output=output, + prompt=prompt, structured_output=structured_output, logger=current_app.logger, - prompt=prompt, ) answer = line_item_extraction.run() structured_output[output[PSKeys.NAME]] = answer return structured_output - except line_item_extraction["exception_cls"] as e: + except line_item_extraction_plugin["exception_cls"] as e: msg = f"Couldn't extract table. {e}" raise APIError(message=msg)