diff --git a/x2text-service/README.md b/x2text-service/README.md index 9eff29002..b44ecf026 100644 --- a/x2text-service/README.md +++ b/x2text-service/README.md @@ -52,3 +52,27 @@ Response samples: status code : 200 OK ``` + +## Migration for x2text_audit Table to New Schema +*(Applicable for users upgrading from versions before [v0.93.0](https://github.com/Zipstack/unstract/releases/tag/v0.93.0) to [v0.93.0](https://github.com/Zipstack/unstract/releases/tag/v0.93.0) or higher. This migration is not required for fresh installations or users already on v0.93.0 or a later version.)* + +### Migration Description +This migration transfers data from the `public.x2text_audit_old` table to the `.x2text_audit` table. It ensures the new table and schema exist before inserting the data. The ON CONFLICT DO NOTHING clause prevents duplicate records during the migration. + +- **Step 1: Update .env Configuration**: Ensure the `.env` file is updated with the correct value for `DB_SCHEMA`, as specified in `sample.env`. The value should match the `.env` configuration used in the `backend service`. +- **Step 2: Run the x2-Text Service**: Start the `x2-text service`. This step will automatically create the `x2text_audit` table in the schema defined by `DB_SCHEMA`. +- **Step 3: Execute the SQL Migration Query** *(Or Step 4)*: Run the following query in your database to migrate data from the old table to the new schema: + +```sql +INSERT INTO .x2text_audit (id, created_at, org_id, file_name, file_type, file_size_in_kb, status) +SELECT id, created_at, org_id, file_name, file_type, file_size_in_kb, status +FROM public.x2text_audit_old +ON CONFLICT DO NOTHING +``` +Replace `` with the actual schema name specified in the .env file. + +- **Step 4: Optional Automation (Using `psql`)** +Alternatively, save the migration SQL in a file (e.g., `migration.sql`) and execute it using `psql`: +```bash +psql -U -d -f path/to/migration.sql +``` diff --git a/x2text-service/app/authentication_middleware.py b/x2text-service/app/authentication_middleware.py index 7b9f36e51..241b766e4 100644 --- a/x2text-service/app/authentication_middleware.py +++ b/x2text-service/app/authentication_middleware.py @@ -1,5 +1,7 @@ from typing import Any, Optional +from app.constants import DBTable +from app.env import Env from app.models import be_db from flask import Request, current_app, request @@ -24,8 +26,8 @@ def validate_bearer_token(cls, token: Optional[str]) -> bool: if token is None: current_app.logger.error("Authentication failed. Empty bearer token") return False - - query = f"SELECT * FROM account_platformkey WHERE key = '{token}'" + platform_key_table = f'"{Env.DB_SCHEMA}".{DBTable.PLATFORM_KEY}' + query = f"SELECT * FROM {platform_key_table} WHERE key = '{token}'" cursor = be_db.execute_sql(query) result_row = cursor.fetchone() cursor.close() @@ -70,16 +72,31 @@ def get_token_from_auth_header(cls, request: Request) -> Optional[str]: return None @classmethod - def get_account_from_bearer_token(cls, token: Optional[str]) -> str: - query = ( - "SELECT organization_id FROM account_platformkey " f"WHERE key='{token}'" + def get_organization_from_bearer_token( + cls, token: str + ) -> tuple[Optional[int], str]: + """Retrieve organization ID and identifier using a bearer token. + + Args: + token (str): The bearer token (platform key). + + Returns: + tuple[int, str]: organization uid and organization identifier + """ + platform_key_table = f'"{Env.DB_SCHEMA}".{DBTable.PLATFORM_KEY}' + organization_table = f'"{Env.DB_SCHEMA}".{DBTable.ORGANIZATION}' + + organization_uid: Optional[int] = cls.execute_query( + f"SELECT organization_id FROM {platform_key_table} WHERE key=%s", (token,) ) - organization = AuthenticationMiddleware.execute_query(query) - query_org = ( - "SELECT schema_name FROM account_organization " f"WHERE id='{organization}'" + if organization_uid is None: + return None, None + + organization_identifier: Optional[str] = cls.execute_query( + f"SELECT organization_id FROM {organization_table} WHERE id=%s", + (organization_uid,), ) - schema_name: str = AuthenticationMiddleware.execute_query(query_org) - return schema_name + return organization_uid, organization_identifier @classmethod def execute_query(cls, query: str) -> Any: diff --git a/x2text-service/app/constants.py b/x2text-service/app/constants.py new file mode 100644 index 000000000..4529257d7 --- /dev/null +++ b/x2text-service/app/constants.py @@ -0,0 +1,3 @@ +class DBTable: + PLATFORM_KEY = "platform_key" + ORGANIZATION = "organization" diff --git a/x2text-service/app/controllers/controller.py b/x2text-service/app/controllers/controller.py index 16dab861e..8f743cded 100644 --- a/x2text-service/app/controllers/controller.py +++ b/x2text-service/app/controllers/controller.py @@ -91,7 +91,9 @@ def process() -> Any: file_size_in_kb = int(request.headers["Content-Length"]) / 1024 bearer_token = AuthenticationMiddleware.get_token_from_auth_header(request) - org_id = AuthenticationMiddleware.get_account_from_bearer_token(bearer_token) + _, org_id = AuthenticationMiddleware.get_organization_from_bearer_token( + bearer_token + ) x2_text_audit: X2TextAudit = X2TextAudit.create( org_id=org_id, diff --git a/x2text-service/app/env.py b/x2text-service/app/env.py new file mode 100644 index 000000000..738102ea0 --- /dev/null +++ b/x2text-service/app/env.py @@ -0,0 +1,57 @@ +import os +from typing import Optional + +from dotenv import load_dotenv + +load_dotenv() + + +class EnvManager: + missing_settings: list[str] = [] + + @classmethod + def get_required_setting( + cls, setting_key: str, default: Optional[str] = None + ) -> Optional[str]: + """Get the value of an environment variable specified by the given key. + Add missing keys to `missing_settings` so that exception can be raised + at the end. + + Args: + key (str): The key of the environment variable + default (Optional[str], optional): Default value to return incase of + env not found. Defaults to None. + + Returns: + Optional[str]: The value of the environment variable if found, + otherwise the default value. + """ + data = os.environ.get(setting_key, default) + if not data: + cls.missing_settings.append(setting_key) + return data + + @classmethod + def raise_for_missing_envs(cls) -> None: + """Raises an error if some settings are not configured. + + Raises: + ValueError: Error mentioning envs which are not configured. + """ + if cls.missing_settings: + ERROR_MESSAGE = "Below required settings are missing.\n" + ",\n".join( + cls.missing_settings + ) + raise ValueError(ERROR_MESSAGE) + + +class Env: + DB_SCHEMA = EnvManager.get_required_setting("DB_SCHEMA") + DB_HOST = EnvManager.get_required_setting("DB_HOST") + DB_PORT = int(EnvManager.get_required_setting("DB_PORT", 5432)) + DB_USERNAME = EnvManager.get_required_setting("DB_USERNAME") + DB_PASSWORD = EnvManager.get_required_setting("DB_PASSWORD") + DB_NAME = EnvManager.get_required_setting("DB_NAME") + + +EnvManager.raise_for_missing_envs() diff --git a/x2text-service/app/models.py b/x2text-service/app/models.py index a084049c4..424d0a2b6 100644 --- a/x2text-service/app/models.py +++ b/x2text-service/app/models.py @@ -1,22 +1,15 @@ import datetime import uuid -from os import environ as env import peewee - -PG_BE_HOST = env.get("DB_HOST") -PG_BE_PORT = int(env.get("DB_PORT", 5432)) -PG_BE_USERNAME = env.get("DB_USERNAME") -PG_BE_PASSWORD = env.get("DB_PASSWORD") -PG_BE_DATABASE = env.get("DB_NAME") - +from app.env import Env be_db = peewee.PostgresqlDatabase( - PG_BE_DATABASE, - user=PG_BE_USERNAME, - password=PG_BE_PASSWORD, - host=PG_BE_HOST, - port=PG_BE_PORT, + Env.DB_NAME, + user=Env.DB_USERNAME, + password=Env.DB_PASSWORD, + host=Env.DB_HOST, + port=Env.DB_PORT, ) @@ -32,3 +25,4 @@ class X2TextAudit(peewee.Model): class Meta: database = be_db # This model uses the "BE_DB" database. table_name = "x2text_audit" + schema = Env.DB_SCHEMA diff --git a/x2text-service/sample.env b/x2text-service/sample.env index 139fadfad..df25e0b09 100644 --- a/x2text-service/sample.env +++ b/x2text-service/sample.env @@ -10,3 +10,4 @@ DB_PORT=5432 DB_USERNAME=unstract_dev DB_PASSWORD=unstract_pass DB_NAME=unstract_db +DB_SCHEMA="unstract"