Skip to content

Commit

Permalink
Merge branch 'main' into fix/coverage-count-prompt-studio
Browse files Browse the repository at this point in the history
  • Loading branch information
jaseemjaskp authored Dec 4, 2024
2 parents 3f324c5 + 2d29154 commit 26c4827
Show file tree
Hide file tree
Showing 19 changed files with 807 additions and 487 deletions.
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -632,7 +632,6 @@ unstract/tool-registry/tool_registry_config/registry.yaml
# Docker related
# End of https://www.toptal.com/developers/gitignore/api/windows,macos,linux,pycharm,pycharm+all,pycharm+iml,python,visualstudiocode,react,django
docker/temp/*
docker/init.sql/*
docker/*.env
!docker/sample*.env
docker/public_tools.json
Expand Down
5 changes: 0 additions & 5 deletions backend/entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,6 @@ cmd=$1
if [ "$cmd" = "migrate" ]; then
echo "Migration initiated"
.venv/bin/python manage.py migrate
elif [ "$cmd" = "prepare_and_migrate" ]; then
echo "Creating schema in database"
.venv/bin/python manage.py create_schema
echo "Migration initiated"
.venv/bin/python manage.py migrate
fi

# NOTE: Leaving below for reference incase required in the future
Expand Down
6 changes: 0 additions & 6 deletions backend/init.sql

This file was deleted.

3 changes: 3 additions & 0 deletions backend/sample.env
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,9 @@ TOOL_REGISTRY_CONFIG_PATH="/data/tool_registry_config"
# Flipt Service
FLIPT_SERVICE_AVAILABLE=False

# Remote storage config for tool registry
TOOL_REGISTRY_STORAGE_CREDENTIALS='{"provider":"local"}'


# File System Configuration for Workflow and API Execution

Expand Down
2 changes: 1 addition & 1 deletion docker/docker-compose-dev-essentials.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ services:
- "5432:5432"
volumes:
- postgres_data:/var/lib/postgresql/data/
- ../backend/init.sql:/docker-entrypoint-initdb.d/init.sql
- ./scripts/db-setup/db_setup.sh:/docker-entrypoint-initdb.d/db_setup.sh
env_file:
- ./essentials.env
labels:
Expand Down
9 changes: 8 additions & 1 deletion docker/dockerfiles/platform.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,14 @@ ENV BUILD_CONTEXT_PATH platform-service
ENV BUILD_PACKAGES_PATH unstract
ENV PDM_VERSION 2.16.1

RUN pip install --no-cache-dir -U pip pdm~=${PDM_VERSION}; \
RUN apt-get update; \
apt-get --no-install-recommends install -y \
# unstract sdk
build-essential libmagic-dev; \
\
apt-get clean && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*; \
\
pip install --no-cache-dir -U pip pdm~=${PDM_VERSION}; \
\
# Creates a non-root user with an explicit UID and adds permission to access the /app folder
# For more info, please refer to https://aka.ms/vscode-docker-python-configure-containers
Expand Down
4 changes: 3 additions & 1 deletion docker/sample.essentials.env
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@

# Refer https://hub.docker.com/_/postgres#:~:text=How%20to%20extend%20this%20image
POSTGRES_USER=unstract_dev
POSTGRES_PASSWORD=unstract_pass
POSTGRES_DB=unstract_db
# Used by db setup script
POSTGRES_SCHEMA=unstract

MINIO_ROOT_USER=minio
MINIO_ROOT_PASSWORD=minio123
Expand Down
9 changes: 9 additions & 0 deletions docker/scripts/db-setup/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Unstract DB Setup Script

[The db_setup.sh](/docker/scripts/db-setup/db_setup.sh) script helps setup the postgres database by making use of environment variables defined in the `.essentials.env` (user copy of the [sample.essentials.env](/docker/sample.essentials.env))

- POSTGRES_USER
- POSTGRES_DB
- POSTGRES_SCHEMA

This script helps setup the DB user and creates a new schema as well.
12 changes: 12 additions & 0 deletions docker/scripts/db-setup/db_setup.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/bin/bash
echo "Creating DB '$POSTGRES_DB' and schema '$POSTGRES_SCHEMA' with user '$POSTGRES_USER'"

psql -U ${POSTGRES_USER} -d ${POSTGRES_DB}<<-END
ALTER ROLE ${POSTGRES_USER} SET client_encoding TO 'utf8';
ALTER ROLE ${POSTGRES_USER} SET default_transaction_isolation TO 'read committed';
ALTER ROLE ${POSTGRES_USER} SET timezone TO 'UTC';
ALTER USER ${POSTGRES_USER} CREATEDB;
CREATE DATABASE ${POSTGRES_DB};
GRANT ALL PRIVILEGES ON DATABASE ${POSTGRES_DB} TO ${POSTGRES_USER};
CREATE SCHEMA IF NOT EXISTS ${POSTGRES_SCHEMA}
END
887 changes: 455 additions & 432 deletions platform-service/pdm.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion platform-service/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ dependencies = [
"cryptography>=41.0.7",
"unstract-flags @ file:///${PROJECT_ROOT}/../unstract/flags",
"requests>=2.31.0",
"unstract-sdk~=0.54.0rc1",
"unstract-sdk==0.54.0rc2",
"gcsfs==2024.10.0",
]
requires-python = ">=3.9,<3.11.1"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,21 +30,26 @@ def __init__(
self.file_path = file_path

if check_feature_flag_status(FeatureFlag.REMOTE_FILE_STORAGE):
self.file_storage, self.file_path = self.__get_storage_credentials()
self.file_storage, self.file_path = self._get_storage_credentials()
self.model_token_data = self._get_model_token_data()

if check_feature_flag_status(FeatureFlag.REMOTE_FILE_STORAGE):

def __get_storage_credentials(self) -> tuple[PermanentFileStorage, str]:
def _get_storage_credentials(self) -> tuple[PermanentFileStorage, str]:
try:
# Not creating constants for now for the keywords below as this
# logic ought to change in the near future to maintain unformity
# across services
file_storage = json.loads(os.environ.get("FILE_STORAGE_CREDENTIALS"))
provider = FileStorageProvider(file_storage["provider"])
credentials = file_storage["credentials"]
credentials = file_storage.get("credentials", {})
file_path = file_storage["model_prices_file_path"]
return PermanentFileStorage(provider, **credentials), file_path
except KeyError as e:
app.logger.error(
f"Required credentials is missing in the env: {str(e)}"
)
raise e
except FileStorageError as e:
app.logger.error(
"Error while initialising storage: %s",
Expand Down Expand Up @@ -142,8 +147,6 @@ def _fetch_and_save_json(self) -> Optional[dict[str, Any]]:
if check_feature_flag_status(FeatureFlag.REMOTE_FILE_STORAGE):
self.file_storage.json_dump(
path=self.file_path,
mode="w",
encoding="utf-8",
data=json_data,
ensure_ascii=False,
indent=4,
Expand Down
15 changes: 0 additions & 15 deletions run-platform.sh
Original file line number Diff line number Diff line change
Expand Up @@ -268,20 +268,6 @@ build_services() {
fi
}

create_backend_schema() {

if [ "$first_setup" = false ]; then
return
fi

pushd ${script_dir}/docker 1>/dev/null

echo -e "$blue_text""Creating a schema for Unstract in the database""$default_text"
VERSION=$opt_version $docker_compose_cmd run backend prepare_and_migrate
# TODO: Run migrations here once its removed from backend's entrypoint

popd 1>/dev/null
}

run_services() {
pushd ${script_dir}/docker 1>/dev/null
Expand Down Expand Up @@ -345,7 +331,6 @@ parse_args $*
do_git_pull
setup_env
build_services
create_backend_schema
run_services
#
# Run Unstract platform - END
Expand Down
1 change: 1 addition & 0 deletions unstract/tool-registry/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ dependencies = [
# Hence instead, add the dependencies without version constraints where the
# assumption is they are added as direct dependencies in main project itself.
"unstract-tool-sandbox",
"unstract-flags",
]
requires-python = ">=3.9,<3.11.1"
readme = "README.md"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
from typing import Any


class FeatureFlag:
"""Temporary feature flags."""

# For enabling remote storage feature
REMOTE_FILE_STORAGE = "remote_file_storage"


class Tools:
TOOLS_DIRECTORY = "tools"
IMAGE_LATEST_TAG = "latest"
Expand Down
10 changes: 7 additions & 3 deletions unstract/tool-registry/src/unstract/tool_registry/helper.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import logging
from typing import Any, Optional

from unstract.sdk.file_storage import FileStorage, FileStorageProvider
from unstract.tool_registry.constants import PropKey
from unstract.tool_registry.dto import Tool, ToolMeta
from unstract.tool_registry.exceptions import (
Expand All @@ -20,6 +21,7 @@ def __init__(
registry: str,
private_tools_file: str,
public_tools_file: str,
fs: FileStorage = FileStorage(FileStorageProvider.LOCAL),
) -> None:
"""Helper class for ToolRegistry.
Expand All @@ -32,6 +34,7 @@ def __init__(
self.registry_file = registry
self.private_tools_file = private_tools_file
self.public_tools_file = public_tools_file
self.fs = fs
self.tools = self._load_tools_from_registry_file()
if self.tools:
logger.info(f"Loaded tools from registry YAML: {self.tools}")
Expand Down Expand Up @@ -214,7 +217,7 @@ def save_registry(self, data: dict[str, Any]) -> None:
RegistryNotFound: _description_
"""
try:
ToolUtils.save_registry(self.registry_file, data=data)
ToolUtils.save_registry(self.registry_file, data=data, fs=self.fs)
except FileNotFoundError:
logger.error(f"File not found: {self.registry_file}")
raise RegistryNotFound()
Expand Down Expand Up @@ -298,7 +301,9 @@ def get_all_tools_from_disk(self) -> dict[str, dict[str, Any]]:
tools = {}
for tool_file in tool_files:
try:
data = ToolUtils.get_all_tools_from_disk(file_path=tool_file)
data = ToolUtils.get_all_tools_from_disk(
file_path=tool_file, fs=self.fs
)
if not data:
logger.info(f"No data from {tool_file}")
tool_version_list = [
Expand All @@ -309,7 +314,6 @@ def get_all_tools_from_disk(self) -> dict[str, dict[str, Any]]:
tools.update(data)
except FileNotFoundError:
logger.warning(f"Unable to find tool file to load tools: {tool_file}")
pass
return tools

def get_tool_data_by_id(self, tool_uid: str) -> dict[str, Any]:
Expand Down
53 changes: 50 additions & 3 deletions unstract/tool-registry/src/unstract/tool_registry/tool_registry.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,37 @@
import json
import logging
import os
from typing import Any, Optional

from unstract.tool_registry.constants import PropKey, ToolJsonField, ToolKey
from unstract.tool_registry.constants import (
FeatureFlag,
PropKey,
ToolJsonField,
ToolKey,
)
from unstract.tool_registry.dto import Tool
from unstract.tool_registry.exceptions import InvalidToolURLException
from unstract.tool_registry.helper import ToolRegistryHelper
from unstract.tool_registry.schema_validator import JsonSchemaValidator
from unstract.tool_registry.tool_utils import ToolUtils

from unstract.flags.feature_flag import check_feature_flag_status

if check_feature_flag_status(FeatureFlag.REMOTE_FILE_STORAGE):
from unstract.sdk.exceptions import FileStorageError
from unstract.sdk.file_storage import FileStorageProvider, PermanentFileStorage

logger = logging.getLogger(__name__)


class ToolRegistry:
REGISTRY_FILE = "registry.yaml"
PRIVATE_TOOL_CONFIG_FILE = "private_tools.json"
PUBLIC_TOOL_CONFIG_FILE = "public_tools.json"
if check_feature_flag_status(FeatureFlag.REMOTE_FILE_STORAGE):
PRIVATE_TOOL_CONFIG_FILE = "private_tools_remote_storage.json"
PUBLIC_TOOL_CONFIG_FILE = "public_tools_remote_storage.json"
else:
PRIVATE_TOOL_CONFIG_FILE = "private_tools.json"
PUBLIC_TOOL_CONFIG_FILE = "public_tools.json"

def __init__(
self,
Expand Down Expand Up @@ -45,12 +61,43 @@ def __init__(
"Env 'TOOL_REGISTRY_CONFIG_PATH' is not set, please add the tool "
"registry JSONs and YAML to a directory and set the env."
)

if check_feature_flag_status(FeatureFlag.REMOTE_FILE_STORAGE):
self.fs = self._get_storage_credentials()
else:
self.fs = None
self.helper = ToolRegistryHelper(
registry=os.path.join(directory, registry_file),
private_tools_file=os.path.join(directory, private_tools),
public_tools_file=os.path.join(directory, public_tools),
fs=self.fs,
)

if check_feature_flag_status(FeatureFlag.REMOTE_FILE_STORAGE):

def _get_storage_credentials(self) -> PermanentFileStorage:
try:
# Not creating constants for now for the keywords below as this
# logic ought to change in the near future to maintain unformity
# across services
file_storage = json.loads(
os.environ.get("TOOL_REGISTRY_STORAGE_CREDENTIALS", {})
)
provider = FileStorageProvider(file_storage["provider"])
credentials = file_storage.get("credentials", {})
return PermanentFileStorage(provider, **credentials)
except KeyError as e:
logger.error(f"Required credentials is missing in the env: {str(e)}")
raise e
except FileStorageError as e:
logger.error(
"Error while initialising storage: %s",
e,
stack_info=True,
exc_info=True,
)
raise e

def load_all_tools_to_disk(self) -> None:
self.helper.load_all_tools_to_disk()

Expand Down
Loading

0 comments on commit 26c4827

Please sign in to comment.