Skip to content

Commit

Permalink
Merge branch 'main' into fix/confidence-data-to-prompt-service
Browse files Browse the repository at this point in the history
  • Loading branch information
vishnuszipstack authored Dec 10, 2024
2 parents 3568aff + 5fd0928 commit fcd63b8
Show file tree
Hide file tree
Showing 12 changed files with 3,812 additions and 70 deletions.
114 changes: 59 additions & 55 deletions backend/pdm.lock

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion backend/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ dependencies = [
# Indirect local dependencies usually need to be added in their own projects
# as: https://pdm-project.org/latest/usage/dependency/#local-dependencies.
#
# However, such indirect local dependencies which are not direct depedency of
# However, such indirect local dependencies which are not direct dependency of
# main project appear as absolute paths in pdm.lock of main project, making it
# impossible to check in the lock file.
#
Expand Down Expand Up @@ -77,6 +77,7 @@ dev = [
"-e unstract-tool-registry @ file:///${PROJECT_ROOT}/../unstract/tool-registry",
"-e unstract-tool-sandbox @ file:///${PROJECT_ROOT}/../unstract/tool-sandbox",
"-e unstract-workflow-execution @ file:///${PROJECT_ROOT}/../unstract/workflow-execution",
"-e unstract-filesystem @ file:///${PROJECT_ROOT}/../unstract/filesystem",
]

[tool.pytest.ini_options]
Expand Down
1 change: 1 addition & 0 deletions docker/scripts/pdm-lock-gen/pdm-lock.sh
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ directories=(
"backend"
"prompt-service"
"worker"
"unstract/filesystem"
"unstract/core"
"unstract/flags"
"platform-service"
Expand Down
3 changes: 3 additions & 0 deletions pdm.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion tools/classifier/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
# Add your dependencies here

# Required for all unstract tools
unstract-sdk~=0.53.1
unstract-sdk~=0.54.0rc5
# TODO: remove once it added in sdk
s3fs[boto3]==2024.6.0
4 changes: 3 additions & 1 deletion tools/structure/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
# Add your dependencies here

# Required for all unstract tools
unstract-sdk~=0.53.2
unstract-sdk~=0.54.0rc5
# TODO: remove once it added in sdk
s3fs[boto3]==2024.6.0
25 changes: 19 additions & 6 deletions tools/structure/src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from typing import Any, Callable, Optional

from constants import SettingsKeys # type: ignore [attr-defined]
from unstract.sdk.constants import LogLevel, LogState, MetadataKey
from unstract.sdk.constants import LogLevel, LogState, MetadataKey, ToolEnv
from unstract.sdk.index import Index
from unstract.sdk.prompt import PromptTool
from unstract.sdk.tool.base import BaseTool
Expand Down Expand Up @@ -97,7 +97,10 @@ def run(
_, file_name = os.path.split(input_file)
if summarize_as_source:
file_name = SettingsKeys.SUMMARIZE
tool_data_dir = Path(self.get_env_or_die(SettingsKeys.TOOL_DATA_DIR))
if hasattr(self, "workflow_filestorage"):
tool_data_dir = Path(self.get_env_or_die(ToolEnv.EXECUTION_DATA_DIR))
else:
tool_data_dir = Path(self.get_env_or_die(SettingsKeys.TOOL_DATA_DIR))
execution_run_data_folder = Path(
self.get_env_or_die(SettingsKeys.EXECUTION_RUN_DATA_FOLDER)
)
Expand Down Expand Up @@ -253,8 +256,13 @@ def run(
self.stream_log("Writing parsed output...")
source_name = self.get_exec_metadata.get(MetadataKey.SOURCE_NAME)
output_path = Path(output_dir) / f"{Path(source_name).stem}.json"
with open(output_path, "w", encoding="utf-8") as f:
f.write(structured_output)
if hasattr(self, "workflow_filestorage"):
self.workflow_filestorage.json_dump(
path=output_path, data=structured_output_dict
)
else:
with open(output_path, "w", encoding="utf-8") as f:
f.write(structured_output)
except OSError as e:
self.stream_error_and_exit(f"Error creating output file: {e}")
except json.JSONDecodeError as e:
Expand Down Expand Up @@ -336,8 +344,13 @@ def _summarize_and_index(
structure_output = json.loads(response[SettingsKeys.STRUCTURE_OUTPUT])
summarized_context = structure_output.get(SettingsKeys.DATA, "")
self.stream_log("Writing summarized context to a file")
with open(summarize_file_path, "w", encoding="utf-8") as f:
f.write(summarized_context)
if hasattr(self, "workflow_filestorage"):
self.workflow_filestorage.write(
path=summarize_file_path, mode="w", data=summarized_context
)
else:
with open(summarize_file_path, "w", encoding="utf-8") as f:
f.write(summarized_context)

self.stream_log("Indexing summarized context")
summarize_file_hash: str = ToolUtils.get_hash_from_file(
Expand Down
4 changes: 3 additions & 1 deletion tools/text_extractor/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
# Add your dependencies here

# Required for all unstract tools
unstract-sdk~=0.53.1
unstract-sdk~=0.54.0rc5
# TODO: remove once it added in sdk
s3fs[boto3]==2024.6.0
Loading

0 comments on commit fcd63b8

Please sign in to comment.