From 5e10d2b682f25aef44e3ab667993d04e66c2a671 Mon Sep 17 00:00:00 2001
From: Deepak <89829542+Deepak-Kesavan@users.noreply.github.com>
Date: Tue, 10 Dec 2024 10:18:05 +0530
Subject: [PATCH 1/3] Changes for line-item extraction prompt type

Signed-off-by: Deepak <89829542+Deepak-Kesavan@users.noreply.github.com>
---
 .../static/select_choices.json                |  3 +-
 ...003_alter_toolstudioprompt_enforce_type.py | 36 +++++++++
 .../prompt_studio/prompt_studio_v2/models.py  |  1 +
 .../src/unstract/prompt_service/constants.py  |  1 +
 .../src/unstract/prompt_service/helper.py     | 73 ++++++++++++++++++-
 .../src/unstract/prompt_service/main.py       | 35 +++++++++
 6 files changed, 146 insertions(+), 3 deletions(-)
 create mode 100644 backend/prompt_studio/prompt_studio_v2/migrations/0003_alter_toolstudioprompt_enforce_type.py

diff --git a/backend/prompt_studio/prompt_studio_core_v2/static/select_choices.json b/backend/prompt_studio/prompt_studio_core_v2/static/select_choices.json
index f9e002f7d..2e260a452 100644
--- a/backend/prompt_studio/prompt_studio_core_v2/static/select_choices.json
+++ b/backend/prompt_studio/prompt_studio_core_v2/static/select_choices.json
@@ -15,7 +15,8 @@
         "boolean":"boolean",
         "json":"json",
         "table":"table",
-        "record":"record"
+        "record":"record",
+        "line_item":"line-item"
     },
     "output_processing":{
         "DEFAULT":"Default"
diff --git a/backend/prompt_studio/prompt_studio_v2/migrations/0003_alter_toolstudioprompt_enforce_type.py b/backend/prompt_studio/prompt_studio_v2/migrations/0003_alter_toolstudioprompt_enforce_type.py
new file mode 100644
index 000000000..af359ec9a
--- /dev/null
+++ b/backend/prompt_studio/prompt_studio_v2/migrations/0003_alter_toolstudioprompt_enforce_type.py
@@ -0,0 +1,36 @@
+# Generated by Django 4.2.1 on 2024-12-10 04:13
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ("prompt_studio_v2", "0002_alter_toolstudioprompt_enforce_type"),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name="toolstudioprompt",
+            name="enforce_type",
+            field=models.TextField(
+                blank=True,
+                choices=[
+                    ("Text", "Response sent as Text"),
+                    ("number", "Response sent as number"),
+                    ("email", "Response sent as email"),
+                    ("date", "Response sent as date"),
+                    ("boolean", "Response sent as boolean"),
+                    ("json", "Response sent as json"),
+                    ("table", "Response sent as table"),
+                    (
+                        "record",
+                        "Response sent for records. Entries of records are list of logical and organized individual entities with distint values",
+                    ),
+                    ("line-item", "Response sent as line-item"),
+                ],
+                db_comment="Field to store the type in             which the response to be returned.",
+                default="Text",
+            ),
+        ),
+    ]
diff --git a/backend/prompt_studio/prompt_studio_v2/models.py b/backend/prompt_studio/prompt_studio_v2/models.py
index 9cd37c36f..afe978763 100644
--- a/backend/prompt_studio/prompt_studio_v2/models.py
+++ b/backend/prompt_studio/prompt_studio_v2/models.py
@@ -27,6 +27,7 @@ class EnforceType(models.TextChoices):
             "logical and organized individual "
             "entities with distint values"
         )
+        LINE_ITEM = "line-item", ("Response sent as line-item")
 
     class PromptType(models.TextChoices):
         PROMPT = "PROMPT", "Response sent as Text"
diff --git a/prompt-service/src/unstract/prompt_service/constants.py b/prompt-service/src/unstract/prompt_service/constants.py
index e905eec1e..d6e9f36d7 100644
--- a/prompt-service/src/unstract/prompt_service/constants.py
+++ b/prompt-service/src/unstract/prompt_service/constants.py
@@ -71,6 +71,7 @@ class PromptServiceContants:
     ENABLE_HIGHLIGHT = "enable_highlight"
     FILE_PATH = "file_path"
     HIGHLIGHT_DATA = "highlight_data"
+    LINE_ITEM = "line-item"
 
 
 class RunLevel(Enum):
diff --git a/prompt-service/src/unstract/prompt_service/helper.py b/prompt-service/src/unstract/prompt_service/helper.py
index cf4ef63cb..50ce64542 100644
--- a/prompt-service/src/unstract/prompt_service/helper.py
+++ b/prompt-service/src/unstract/prompt_service/helper.py
@@ -16,6 +16,11 @@
 from unstract.sdk.exceptions import SdkError
 from unstract.sdk.llm import LLM
 
+PAID_FEATURE_MSG = (
+    "It is a cloud / enterprise feature. If you have purchased a plan and still "
+    "face this issue, please contact support"
+)
+
 load_dotenv()
 
 # Global variable to store plugins
@@ -295,8 +300,8 @@ def run_completion(
             extract_json=prompt_type.lower() != PSKeys.TEXT,
         )
         answer: str = completion[PSKeys.RESPONSE].text
-        highlight_data = completion.get(PSKeys.HIGHLIGHT_DATA)
-        if all([metadata, highlight_data, prompt_key]):
+        highlight_data = completion.get(PSKeys.HIGHLIGHT_DATA, [])
+        if all([metadata, prompt_key]):
             metadata.setdefault(PSKeys.HIGHLIGHT_DATA, {})[prompt_key] = highlight_data
         return answer
     # TODO: Catch and handle specific exception here
@@ -333,3 +338,67 @@ def extract_table(
     except table_extractor["exception_cls"] as e:
         msg = f"Couldn't extract table. {e}"
         raise APIError(message=msg)
+
+
+def extract_line_item(
+    tool_settings: dict[str, Any],
+    output: dict[str, Any],
+    plugins: dict[str, dict[str, Any]],
+    structured_output: dict[str, Any],
+    llm: LLM,
+    file_path: str,
+) -> dict[str, Any]:
+    # Adjust file path to read from the extract folder
+    base_name = os.path.splitext(os.path.basename(file_path))[
+        0
+    ]  # Get the base name without extension
+    extract_file_path = os.path.join(
+        os.path.dirname(file_path), "extract", f"{base_name}.txt"
+    )
+
+    # Read file content into context
+    if not os.path.exists(extract_file_path):
+        raise FileNotFoundError(
+            f"The file at path '{extract_file_path}' does not exist."
+        )
+
+    with open(extract_file_path, encoding="utf-8") as file:
+        context = file.read()
+
+    prompt = construct_prompt(
+        preamble=tool_settings.get(PSKeys.PREAMBLE, ""),
+        prompt=output["promptx"],
+        postamble=tool_settings.get(PSKeys.POSTAMBLE, ""),
+        grammar_list=tool_settings.get(PSKeys.GRAMMAR, []),
+        context=context,
+        platform_postamble="",
+    )
+    # return run_completion(
+    #     llm=llm,
+    #     prompt=prompt,
+    #     metadata=metadata,
+    #     prompt_key=output[PSKeys.NAME],
+    #     prompt_type=output.get(PSKeys.TYPE, PSKeys.TEXT),
+    #     enable_highlight=enable_highlight,
+    #     file_path=file_path,
+    # )
+    line_item_extraction_plugin: dict[str, Any] = plugins.get(
+        "line-item-extraction", {}
+    )
+    if not line_item_extraction_plugin:
+        raise APIError(PAID_FEATURE_MSG)
+    try:
+        line_item_extraction = line_item_extraction_plugin["entrypoint_cls"](
+            llm=llm,
+            tool_settings=tool_settings,
+            output=output,
+            structured_output=structured_output,
+            logger=current_app.logger,
+            prompt=prompt,
+        )
+        answer = line_item_extraction.run()
+        structured_output[output[PSKeys.NAME]] = answer
+        return structured_output
+    except line_item_extraction["exception_cls"] as e:
+        msg = f"Couldn't extract table. {e}"
+        raise APIError(message=msg)
diff --git a/prompt-service/src/unstract/prompt_service/main.py b/prompt-service/src/unstract/prompt_service/main.py
index 12c2242e0..43ea0052e 100644
--- a/prompt-service/src/unstract/prompt_service/main.py
+++ b/prompt-service/src/unstract/prompt_service/main.py
@@ -12,6 +12,7 @@
 from unstract.prompt_service.exceptions import APIError, ErrorResponse, NoPayloadError
 from unstract.prompt_service.helper import (
     construct_and_run_prompt,
+    extract_line_item,
     extract_table,
     extract_variable,
     get_cleaned_context,
@@ -250,6 +251,40 @@ def prompt_processor() -> Any:
                     "Error while extracting table for the prompt",
                 )
                 raise api_error
+        elif output[PSKeys.TYPE] == PSKeys.LINE_ITEM:
+            try:
+                structured_output = extract_line_item(
+                    tool_settings=tool_settings,
+                    output=output,
+                    plugins=plugins,
+                    structured_output=structured_output,
+                    llm=llm,
+                    file_path=file_path,
+                )
+                metadata = query_usage_metadata(token=platform_key, metadata=metadata)
+                response = {
+                    PSKeys.METADATA: metadata,
+                    PSKeys.OUTPUT: structured_output,
+                }
+                return response
+            except APIError as e:
+                app.logger.error(
+                    "Failed to extract line-item for the prompt %s: %s",
+                    output[PSKeys.NAME],
+                    str(e),
+                )
+                publish_log(
+                    log_events_id,
+                    {
+                        "tool_id": tool_id,
+                        "prompt_key": prompt_name,
+                        "doc_name": doc_name,
+                    },
+                    LogLevel.ERROR,
+                    RunLevel.RUN,
+                    "Error while extracting line-item for the prompt",
+                )
+                raise e
 
         try:
             context: set[str] = set()

From 668ed04bd21ae341e91edc62d4a462fde2627fc0 Mon Sep 17 00:00:00 2001
From: Deepak <89829542+Deepak-Kesavan@users.noreply.github.com>
Date: Tue, 10 Dec 2024 10:32:22 +0530
Subject: [PATCH 2/3] Removed commented out code

Signed-off-by: Deepak <89829542+Deepak-Kesavan@users.noreply.github.com>
---
 prompt-service/src/unstract/prompt_service/helper.py | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/prompt-service/src/unstract/prompt_service/helper.py b/prompt-service/src/unstract/prompt_service/helper.py
index 50ce64542..ac6152d48 100644
--- a/prompt-service/src/unstract/prompt_service/helper.py
+++ b/prompt-service/src/unstract/prompt_service/helper.py
@@ -373,15 +373,6 @@ def extract_line_item(
         context=context,
         platform_postamble="",
     )
-    # return run_completion(
-    #     llm=llm,
-    #     prompt=prompt,
-    #     metadata=metadata,
-    #     prompt_key=output[PSKeys.NAME],
-    #     prompt_type=output.get(PSKeys.TYPE, PSKeys.TEXT),
-    #     enable_highlight=enable_highlight,
-    #     file_path=file_path,
-    # )
     line_item_extraction_plugin: dict[str, Any] = plugins.get(
         "line-item-extraction", {}
     )

From bd0e83228b27c2953ecc7155d61e1e1ac349e77f Mon Sep 17 00:00:00 2001
From: Deepak <89829542+Deepak-Kesavan@users.noreply.github.com>
Date: Mon, 16 Dec 2024 05:01:53 +0530
Subject: [PATCH 3/3] Minor fix

Signed-off-by: Deepak <89829542+Deepak-Kesavan@users.noreply.github.com>
---
 prompt-service/src/unstract/prompt_service/helper.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/prompt-service/src/unstract/prompt_service/helper.py b/prompt-service/src/unstract/prompt_service/helper.py
index ac6152d48..a9c927c5d 100644
--- a/prompt-service/src/unstract/prompt_service/helper.py
+++ b/prompt-service/src/unstract/prompt_service/helper.py
@@ -383,13 +383,13 @@ def extract_line_item(
             llm=llm,
             tool_settings=tool_settings,
             output=output,
+            prompt=prompt,
             structured_output=structured_output,
             logger=current_app.logger,
-            prompt=prompt,
         )
         answer = line_item_extraction.run()
         structured_output[output[PSKeys.NAME]] = answer
         return structured_output
-    except line_item_extraction["exception_cls"] as e:
+    except line_item_extraction_plugin["exception_cls"] as e:
         msg = f"Couldn't extract table. {e}"
         raise APIError(message=msg)