Skip to content

Commit

Permalink
added v2 changes in x2tex service (#867)
Browse files Browse the repository at this point in the history
* added v2 changes in x2tex service

* minor update in readme
  • Loading branch information
muhammad-ali-e authored Dec 4, 2024
1 parent 4109251 commit 47ebeac
Show file tree
Hide file tree
Showing 7 changed files with 122 additions and 24 deletions.
24 changes: 24 additions & 0 deletions x2text-service/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,3 +52,27 @@ Response samples:
status code : 200
OK
```

## Migration for x2text_audit Table to New Schema
*(Applicable for users upgrading from versions before [v0.93.0](https://github.com/Zipstack/unstract/releases/tag/v0.93.0) to [v0.93.0](https://github.com/Zipstack/unstract/releases/tag/v0.93.0) or higher. This migration is not required for fresh installations or users already on v0.93.0 or a later version.)*

### Migration Description
This migration transfers data from the `public.x2text_audit_old` table to the `<db_schema>.x2text_audit` table. It ensures the new table and schema exist before inserting the data. The ON CONFLICT DO NOTHING clause prevents duplicate records during the migration.

- **Step 1: Update .env Configuration**: Ensure the `.env` file is updated with the correct value for `DB_SCHEMA`, as specified in `sample.env`. The value should match the `.env` configuration used in the `backend service`.
- **Step 2: Run the x2-Text Service**: Start the `x2-text service`. This step will automatically create the `x2text_audit` table in the schema defined by `DB_SCHEMA`.
- **Step 3: Execute the SQL Migration Query** *(Or Step 4)*: Run the following query in your database to migrate data from the old table to the new schema:

```sql
INSERT INTO <DB_SCHEMA>.x2text_audit (id, created_at, org_id, file_name, file_type, file_size_in_kb, status)
SELECT id, created_at, org_id, file_name, file_type, file_size_in_kb, status
FROM public.x2text_audit_old
ON CONFLICT DO NOTHING
```
Replace `<DB_SCHEMA>` with the actual schema name specified in the .env file.

- **Step 4: Optional Automation (Using `psql`)**
Alternatively, save the migration SQL in a file (e.g., `migration.sql`) and execute it using `psql`:
```bash
psql -U <username> -d <database_name> -f path/to/migration.sql
```
37 changes: 27 additions & 10 deletions x2text-service/app/authentication_middleware.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from typing import Any, Optional

from app.constants import DBTable
from app.env import Env
from app.models import be_db
from flask import Request, current_app, request

Expand All @@ -24,8 +26,8 @@ def validate_bearer_token(cls, token: Optional[str]) -> bool:
if token is None:
current_app.logger.error("Authentication failed. Empty bearer token")
return False

query = f"SELECT * FROM account_platformkey WHERE key = '{token}'"
platform_key_table = f'"{Env.DB_SCHEMA}".{DBTable.PLATFORM_KEY}'
query = f"SELECT * FROM {platform_key_table} WHERE key = '{token}'"
cursor = be_db.execute_sql(query)
result_row = cursor.fetchone()
cursor.close()
Expand Down Expand Up @@ -70,16 +72,31 @@ def get_token_from_auth_header(cls, request: Request) -> Optional[str]:
return None

@classmethod
def get_account_from_bearer_token(cls, token: Optional[str]) -> str:
query = (
"SELECT organization_id FROM account_platformkey " f"WHERE key='{token}'"
def get_organization_from_bearer_token(
cls, token: str
) -> tuple[Optional[int], str]:
"""Retrieve organization ID and identifier using a bearer token.
Args:
token (str): The bearer token (platform key).
Returns:
tuple[int, str]: organization uid and organization identifier
"""
platform_key_table = f'"{Env.DB_SCHEMA}".{DBTable.PLATFORM_KEY}'
organization_table = f'"{Env.DB_SCHEMA}".{DBTable.ORGANIZATION}'

organization_uid: Optional[int] = cls.execute_query(
f"SELECT organization_id FROM {platform_key_table} WHERE key=%s", (token,)
)
organization = AuthenticationMiddleware.execute_query(query)
query_org = (
"SELECT schema_name FROM account_organization " f"WHERE id='{organization}'"
if organization_uid is None:
return None, None

organization_identifier: Optional[str] = cls.execute_query(
f"SELECT organization_id FROM {organization_table} WHERE id=%s",
(organization_uid,),
)
schema_name: str = AuthenticationMiddleware.execute_query(query_org)
return schema_name
return organization_uid, organization_identifier

@classmethod
def execute_query(cls, query: str) -> Any:
Expand Down
3 changes: 3 additions & 0 deletions x2text-service/app/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
class DBTable:
PLATFORM_KEY = "platform_key"
ORGANIZATION = "organization"
4 changes: 3 additions & 1 deletion x2text-service/app/controllers/controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,9 @@ def process() -> Any:
file_size_in_kb = int(request.headers["Content-Length"]) / 1024

bearer_token = AuthenticationMiddleware.get_token_from_auth_header(request)
org_id = AuthenticationMiddleware.get_account_from_bearer_token(bearer_token)
_, org_id = AuthenticationMiddleware.get_organization_from_bearer_token(
bearer_token
)

x2_text_audit: X2TextAudit = X2TextAudit.create(
org_id=org_id,
Expand Down
57 changes: 57 additions & 0 deletions x2text-service/app/env.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import os
from typing import Optional

from dotenv import load_dotenv

load_dotenv()


class EnvManager:
missing_settings: list[str] = []

@classmethod
def get_required_setting(
cls, setting_key: str, default: Optional[str] = None
) -> Optional[str]:
"""Get the value of an environment variable specified by the given key.
Add missing keys to `missing_settings` so that exception can be raised
at the end.
Args:
key (str): The key of the environment variable
default (Optional[str], optional): Default value to return incase of
env not found. Defaults to None.
Returns:
Optional[str]: The value of the environment variable if found,
otherwise the default value.
"""
data = os.environ.get(setting_key, default)
if not data:
cls.missing_settings.append(setting_key)
return data

@classmethod
def raise_for_missing_envs(cls) -> None:
"""Raises an error if some settings are not configured.
Raises:
ValueError: Error mentioning envs which are not configured.
"""
if cls.missing_settings:
ERROR_MESSAGE = "Below required settings are missing.\n" + ",\n".join(
cls.missing_settings
)
raise ValueError(ERROR_MESSAGE)


class Env:
DB_SCHEMA = EnvManager.get_required_setting("DB_SCHEMA")
DB_HOST = EnvManager.get_required_setting("DB_HOST")
DB_PORT = int(EnvManager.get_required_setting("DB_PORT", 5432))
DB_USERNAME = EnvManager.get_required_setting("DB_USERNAME")
DB_PASSWORD = EnvManager.get_required_setting("DB_PASSWORD")
DB_NAME = EnvManager.get_required_setting("DB_NAME")


EnvManager.raise_for_missing_envs()
20 changes: 7 additions & 13 deletions x2text-service/app/models.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,15 @@
import datetime
import uuid
from os import environ as env

import peewee

PG_BE_HOST = env.get("DB_HOST")
PG_BE_PORT = int(env.get("DB_PORT", 5432))
PG_BE_USERNAME = env.get("DB_USERNAME")
PG_BE_PASSWORD = env.get("DB_PASSWORD")
PG_BE_DATABASE = env.get("DB_NAME")

from app.env import Env

be_db = peewee.PostgresqlDatabase(
PG_BE_DATABASE,
user=PG_BE_USERNAME,
password=PG_BE_PASSWORD,
host=PG_BE_HOST,
port=PG_BE_PORT,
Env.DB_NAME,
user=Env.DB_USERNAME,
password=Env.DB_PASSWORD,
host=Env.DB_HOST,
port=Env.DB_PORT,
)


Expand All @@ -32,3 +25,4 @@ class X2TextAudit(peewee.Model):
class Meta:
database = be_db # This model uses the "BE_DB" database.
table_name = "x2text_audit"
schema = Env.DB_SCHEMA
1 change: 1 addition & 0 deletions x2text-service/sample.env
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ DB_PORT=5432
DB_USERNAME=unstract_dev
DB_PASSWORD=unstract_pass
DB_NAME=unstract_db
DB_SCHEMA="unstract"

0 comments on commit 47ebeac

Please sign in to comment.