Merge pull request #3 from Zipstack/feat/update-response-dict

feat: Updated response dict to use snake case
Zipstack · Jun 14, 2024 · 98d9886 · 98d9886
2 parents ac1ae41 + 4feac34
commit 98d9886
Show file tree

Hide file tree

Showing 22 changed files with 672 additions and 32 deletions.
diff --git a/.github/workflows/ci_test.yaml b/.github/workflows/ci_test.yaml
@@ -0,0 +1,63 @@
+name: Run tox tests
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+    types: [opened, synchronize, reopened, ready_for_review]
+    branches: [main]
+
+jobs:
+  test:
+    if: github.event.pull_request.draft == false
+    runs-on: ubuntu-latest
+
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v3
+
+    - name: Set up Python
+      uses: actions/setup-python@v4
+      with:
+        python-version: '3.9'
+
+    - name: Cache tox environments
+      uses: actions/cache@v3
+      with:
+        path: .tox/
+        key: ${{ runner.os }}-tox-${{ hashFiles('**/pyproject.toml', '**/tox.ini') }}
+        restore-keys: |
+          ${{ runner.os }}-tox-
+    - name: Install tox
+      run: pip install tox
+
+    - name: Create test env
+      shell: bash
+      run: |
+        cp tests/sample.env tests/.env
+        sed -i "s|LLMWHISPERER_API_KEY=|LLMWHISPERER_API_KEY=${{ secrets.LLMWHISPERER_API_KEY }}|" tests/.env
+
+    - name: Run tox
+      id: tox
+      run: |
+        tox
+    - name: Render the report to the PR
+      uses: marocchino/sticky-pull-request-comment@v2
+      with:
+        header: llmw-py-client-test-report
+        recreate: true
+        path: llmw-py-client-report.md
+
+    - name: Output reports to the job summary when tests fail
+      shell: bash
+      run: |
+        if [ -f "llmw-py-client-report.md" ]; then
+          {
+            echo "<details><summary>Worker Test Report</summary>"
+            echo ""
+            cat "llmw-py-client-report.md"
+            echo ""
+            echo "</details>"
+          } >> "$GITHUB_STEP_SUMMARY"
+        fi
diff --git a/.gitignore b/.gitignore
@@ -50,6 +50,7 @@ coverage.xml
 .hypothesis/
 .pytest_cache/
 cover/
+*-report.md
 
 # Translations
 *.mo

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -12,6 +12,8 @@ repos:
     rev: v4.5.0
     hooks:
       - id: trailing-whitespace
+      # TODO: Exclude tests/test_data directory
+        exclude: ^tests/test_data/
         exclude_types:
           - "markdown"
       - id: end-of-file-fixer

diff --git a/README.md b/README.md
@@ -23,7 +23,7 @@ pip install llmwhisperer-client
 First, import the `LLMWhispererClient` from the `client` module:
 
 ```python
-from llmwhisperer.client import LLMWhispererClient
+from unstract.llmwhisperer.client import LLMWhispererClient
 ```
 
 Then, create an instance of the `LLMWhispererClient`:
@@ -105,7 +105,7 @@ try:
         pages_to_extract="1,2",
         timeout=2,
     )
-    if result["statusCode"] == 202:
+    if result["status_code"] == 202:
         print("Timeout occured. Whisper request accepted.")
         print(f"Whisper hash: {result['whisper-hash']}")
         while True:

diff --git a/pdm.lock b/pdm.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -50,7 +50,7 @@ lint = [
 
 [tool.pdm.version]
 source = "file"
-path = "src/llmwhisperer/__init__.py"
+path = "src/unstract/llmwhisperer/__init__.py"
 
 [tool.isort]
 line_length = 120
@@ -75,6 +75,6 @@ log_level = "INFO"
 log_cli = true
 
 [tool.pdm.scripts]
-test.cmd = "pytest"
+test.cmd = "pytest -s -v"
 test.env_file = "tests/.env"
 test.help = "Runs pytests for LLM Whisperer client"
diff --git a/src/llmwhisperer/__init__.py → src/unstract/llmwhisperer/__init__.py b/src/llmwhisperer/__init__.py → src/unstract/llmwhisperer/__init__.py
@@ -1,4 +1,7 @@
-__version__ = "0.1.1"
+__version__ = "0.2.0"
+
+from .client import LLMWhispererClient  # noqa: F401
+
 
 def get_sdk_version():
     """Returns the SDK version."""

diff --git a/src/llmwhisperer/client.py → src/unstract/llmwhisperer/client.py b/src/llmwhisperer/client.py → src/unstract/llmwhisperer/client.py
@@ -22,7 +22,7 @@
 
 import requests
 
-from llmwhisperer.utils import LLMWhispererUtils
+from unstract.llmwhisperer.utils import LLMWhispererUtils
 
 BASE_URL = "https://llmwhisperer-api.unstract.com/v1"
 
@@ -141,7 +141,7 @@ def get_usage_info(self) -> dict:
         response = s.send(prepared, timeout=self.api_timeout)
         if response.status_code != 200:
             err = json.loads(response.text)
-            err["statusCode"] = response.status_code
+            err["status_code"] = response.status_code
             raise LLMWhispererClientException(err)
         return json.loads(response.text)
 
@@ -213,15 +213,15 @@ def whisper(
         if url == "" and file_path == "":
             raise LLMWhispererClientException(
                 {
-                    "statusCode": -1,
+                    "status_code": -1,
                     "message": "Either url or file_path must be provided",
                 }
             )
 
         if timeout < 0 or timeout > 200:
             raise LLMWhispererClientException(
                 {
-                    "statusCode": -1,
+                    "status_code": -1,
                     "message": "timeout must be between 0 and 200",
                 }
             )
@@ -243,14 +243,14 @@ def whisper(
         response = s.send(prepared, timeout=self.api_timeout)
         if response.status_code != 200 and response.status_code != 202:
             message = json.loads(response.text)
-            message["statusCode"] = response.status_code
+            message["status_code"] = response.status_code
             raise LLMWhispererClientException(message)
         if response.status_code == 202:
             message = json.loads(response.text)
-            message["statusCode"] = response.status_code
+            message["status_code"] = response.status_code
             return message
         return {
-            "statusCode": response.status_code,
+            "status_code": response.status_code,
             "extracted_text": response.text,
             "whisper_hash": response.headers["whisper-hash"],
         }
@@ -269,7 +269,7 @@ def whisper_status(self, whisper_hash: str) -> dict:
 
         Returns:
             dict: A dictionary containing the status of the whisper operation. The keys in the
-                  dictionary include 'statusCode' and the status details.
+                  dictionary include 'status_code' and the status details.
 
         Raises:
             LLMWhispererClientException: If the API request fails, it raises an exception with
@@ -285,10 +285,10 @@ def whisper_status(self, whisper_hash: str) -> dict:
         response = s.send(prepared, timeout=self.api_timeout)
         if response.status_code != 200:
             err = json.loads(response.text)
-            err["statusCode"] = response.status_code
+            err["status_code"] = response.status_code
             raise LLMWhispererClientException(err)
         message = json.loads(response.text)
-        message["statusCode"] = response.status_code
+        message["status_code"] = response.status_code
         return message
 
     def whisper_retrieve(self, whisper_hash: str) -> dict:
@@ -320,11 +320,11 @@ def whisper_retrieve(self, whisper_hash: str) -> dict:
         response = s.send(prepared, timeout=self.api_timeout)
         if response.status_code != 200:
             err = json.loads(response.text)
-            err["statusCode"] = response.status_code
+            err["status_code"] = response.status_code
             raise LLMWhispererClientException(err)
 
         return {
-            "statusCode": response.status_code,
+            "status_code": response.status_code,
             "extracted_text": response.text,
         }
 
@@ -366,8 +366,8 @@ def highlight_data(self, whisper_hash: str, search_text: str) -> dict:
         response = s.send(prepared, timeout=self.api_timeout)
         if response.status_code != 200:
             err = json.loads(response.text)
-            err["statusCode"] = response.status_code
+            err["status_code"] = response.status_code
             raise LLMWhispererClientException(err)
         result = json.loads(response.text)
-        result["statusCode"] = response.status_code
+        result["status_code"] = response.status_code
         return result
diff --git a/src/llmwhisperer/utils.py → src/unstract/llmwhisperer/utils.py b/src/llmwhisperer/utils.py → src/unstract/llmwhisperer/utils.py
diff --git a/tests/test_client.py → tests/client_test.py b/tests/test_client.py → tests/client_test.py
@@ -1,22 +1,17 @@
 import logging
+import os
 import unittest
+from pathlib import Path
 
 import pytest
 
-from llmwhisperer.client import LLMWhispererClient
+from unstract.llmwhisperer import LLMWhispererClient
 
 logger = logging.getLogger(__name__)
 
 
-@pytest.fixture
-def llm_whisperer_client():
-    # Create an instance of the client
-    client = LLMWhispererClient()
-    return client
-
-
-def test_get_usage_info(llm_whisperer_client):
-    usage_info = llm_whisperer_client.get_usage_info()
+def test_get_usage_info(client):
+    usage_info = client.get_usage_info()
     logger.info(usage_info)
     assert isinstance(usage_info, dict), "usage_info should be a dictionary"
     expected_keys = [
@@ -30,6 +25,38 @@ def test_get_usage_info(llm_whisperer_client):
     assert set(usage_info.keys()) == set(expected_keys), f"usage_info {usage_info} does not contain the expected keys"
 
 
+@pytest.mark.parametrize(
+    "processing_mode, output_mode, input_file",
+    [
+        ("ocr", "line-printer", "restaurant_invoice_photo.pdf"),
+        ("ocr", "line-printer", "credit_card.pdf"),
+        ("ocr", "line-printer", "handwritten-form.pdf"),
+        ("ocr", "text", "restaurant_invoice_photo.pdf"),
+        ("text", "line-printer", "restaurant_invoice_photo.pdf"),
+        ("text", "text", "handwritten-form.pdf"),
+    ],
+)
+def test_whisper(client, data_dir, processing_mode, output_mode, input_file):
+    file_path = os.path.join(data_dir, input_file)
+    response = client.whisper(
+        processing_mode=processing_mode,
+        output_mode=output_mode,
+        file_path=file_path,
+        timeout=200,
+    )
+    logger.debug(response)
+
+    exp_basename = f"{Path(input_file).stem}.{processing_mode}.{output_mode}.txt"
+    exp_file = os.path.join(data_dir, "expected", exp_basename)
+    with open(exp_file, encoding="utf-8") as f:
+        exp = f.read()
+
+    assert isinstance(response, dict)
+    assert response["status_code"] == 200
+    assert response["extracted_text"] == exp
+
+
+# TODO: Review and port to pytest based tests
 class TestLLMWhispererClient(unittest.TestCase):
     @unittest.skip("Skipping test_whisper")
     def test_whisper(self):

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -0,0 +1,17 @@
+import os
+
+import pytest
+
+from unstract.llmwhisperer.client import LLMWhispererClient
+
+
+@pytest.fixture(name="client")
+def llm_whisperer_client():
+    # Create an instance of the client
+    client = LLMWhispererClient()
+    return client
+
+
+@pytest.fixture(name="data_dir", scope="session")
+def test_data_dir():
+    return os.path.join(os.path.dirname(__file__), "test_data")
diff --git a/tests/test_files/credit_card.pdf → tests/test_data/credit_card.pdf b/tests/test_files/credit_card.pdf → tests/test_data/credit_card.pdf