diff --git a/.coveragerc b/.coveragerc index 04b8918..b67bcb4 100644 --- a/.coveragerc +++ b/.coveragerc @@ -7,6 +7,7 @@ omit = sqlalchemy_mate/docs/* sqlalchemy_mate/tests/* sqlalchemy_mate/vendor/* + sqlalchemy_mate/patterns/large_binary_column/local.py [report] # Regexes for lines to exclude from consideration diff --git a/.gitignore b/.gitignore index 47f9230..b7e3bed 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ sqlalchemy_mate_venv/ sqlalchemy_mate.egg-info/ sqlalchemy_mate-*/ +debug/data/ tmp/ .db.json diff --git a/bin/run_test_on_many_sqlalchemy_versions.py b/bin/run_test_on_many_sqlalchemy_versions.py index 7846bee..a7b917a 100644 --- a/bin/run_test_on_many_sqlalchemy_versions.py +++ b/bin/run_test_on_many_sqlalchemy_versions.py @@ -2,6 +2,8 @@ import subprocess from sqlalchemy_mate.paths import dir_project_root, dir_venv_bin +from rich import print as rprint +from rich.panel import Panel sqlalchemy_versions = [ "2.0.0", @@ -16,6 +18,7 @@ path_venv_pip = dir_venv_bin / "pip" with dir_project_root.cwd(): for version in sqlalchemy_versions: + rprint(Panel(f"Test on sqlalchemy.__version__ = {version}")) subprocess.run( [ f"{path_venv_pip}", @@ -23,4 +26,4 @@ f"sqlalchemy=={version}", ] ) - subprocess.run(["pyops", "cov-only"]) + subprocess.run(["pyops", "cov-only"], check=True) diff --git a/debug/test_file_backed_column.py b/debug/test_file_backed_column.py new file mode 100644 index 0000000..14181b4 --- /dev/null +++ b/debug/test_file_backed_column.py @@ -0,0 +1,392 @@ +# -*- coding: utf-8 -*- + +""" +一个用于展示如何正确实现 SQL DB 和本地文件双写一致性的问题. +""" + +import typing as T +from datetime import datetime + +import shutil +from pathlib import Path + +import sqlalchemy as sa +import sqlalchemy.orm as orm +import sqlalchemy_mate.api as sam + +from rich import print as rprint +from rich import box +from rich.console import Console +from rich.panel import Panel + +local = sam.patterns.large_binary_column.local +console = Console() + + +def get_utc_now() -> datetime: + return datetime.utcnow() + + +Base = orm.declarative_base() + + +class Task(Base, sam.ExtendedBase): + __tablename__ = "tasks" + + _settings_major_attrs = ["url"] + + url: orm.Mapped[str] = orm.mapped_column(sa.String, primary_key=True) + update_at: orm.Mapped[datetime] = orm.mapped_column(sa.DateTime) + html: orm.Mapped[T.Optional[str]] = orm.mapped_column(sa.String, nullable=True) + image: orm.Mapped[T.Optional[str]] = orm.mapped_column(sa.String, nullable=True) + + +engine = sa.create_engine("sqlite:///:memory:") +Base.metadata.create_all(engine) + +dir_root = Path(__file__).absolute().parent.joinpath("data") +shutil.rmtree(dir_root, ignore_errors=True) +dir_root.mkdir(parents=True, exist_ok=True) + +# ------------------------------------------------------------------------------ +# Create a Row but SQL INSERT failed +# ------------------------------------------------------------------------------ +______Create_a_Row_but_SQL_INSERT_failed = None +rprint( + Panel( + "Create a Row but SQL INSERT failed", box=box.DOUBLE, border_style="bold green" + ) +) + +url = "https://www.example.com" + +html_content_1 = b"this is html 1" +image_content_1 = b"this is image 1" +utc_now = get_utc_now() + +rprint(Panel("Write file first, then write DB")) +write_file_result = local.write_file( + api_calls=[ + local.WriteFileApiCall( + column="html", + binary=html_content_1, + old_path=None, + ), + local.WriteFileApiCall( + column="image", + binary=image_content_1, + old_path=None, + ), + ], + pk=url, + dir_root=dir_root, + is_pk_path_safe=False, +) +rprint(write_file_result) + + +class UserError(Exception): + pass + + +with orm.Session(engine) as ses: + try: + with ses.begin(): + task1 = Task( + url=url, + update_at=utc_now, + # this is a helper method that convert the write file results + # to INSERT / UPDATE values + **write_file_result.to_values(), + ) + # intentionally raises an error to simulate a database failure + raise UserError() + ses.add(task1) + rprint("SQL INSERT Succeeded!") + except Exception as e: + rprint(f"SQL INSERT Failed! Error: {e!r}") + # clean up created file when create row failed + # if you don't want to do that, just don't run this method + write_file_result.clean_up_new_file_when_create_or_update_row_failed() + + +rprint(Panel("Database row should not exists")) +rprint(f"{ses.get(Task, url) = }") +assert ses.get(Task, url) is None +rprint(Panel("file should be deleted")) +values = write_file_result.to_values() +html_path = values["html"] +image_path = values["image"] +rprint(f"{Path(html_path).exists() = }") +rprint(f"{Path(image_path).exists() = }") +assert Path(html_path).exists() is False +assert Path(image_path).exists() is False + + +# ------------------------------------------------------------------------------ +# Create a Row and SQL INSERT succeeded +# ------------------------------------------------------------------------------ +______Create_a_Row_and_SQL_INSERT_succeeded = None +rprint( + Panel( + "Create a Row and SQL INSERT succeeded", + box=box.DOUBLE, + border_style="bold green", + ) +) + +utc_now = get_utc_now() + +rprint(Panel("Write file first, then write DB")) +write_file_result = local.write_file( + api_calls=[ + local.WriteFileApiCall( + column="html", + binary=html_content_1, + old_path=None, + ), + local.WriteFileApiCall( + column="image", + binary=image_content_1, + old_path=None, + ), + ], + pk=url, + dir_root=dir_root, + is_pk_path_safe=False, +) +rprint(write_file_result) + + +class UserError(Exception): + pass + + +with orm.Session(engine) as ses: + try: + with ses.begin(): + task1 = Task( + url=url, + update_at=utc_now, + # this is a helper method that convert the write file results + # to INSERT / UPDATE values + **write_file_result.to_values(), + ) + ses.add(task1) + rprint("SQL INSERT Succeeded!") + except Exception as e: + rprint(f"SQL INSERT Failed! Error: {e!r}") + # clean up created file when create row failed + # if you don't want to do that, just don't run this method + write_file_result.clean_up_new_file_when_create_or_update_row_failed() + + +rprint(Panel("Database row should not exists")) +task1: Task = ses.get(Task, url) +assert task1.url == url +assert task1.update_at == utc_now +rprint(Panel("file should be created")) +values = write_file_result.to_values() +html_path = values["html"] +image_path = values["image"] +rprint(f"{Path(html_path).read_bytes() = }") +rprint(f"{Path(image_path).read_bytes() = }") +assert Path(html_path).read_bytes() == html_content_1 +assert Path(image_path).read_bytes() == image_content_1 + + +# ------------------------------------------------------------------------------ +# Update a Row but SQL UPDATE failed +# ------------------------------------------------------------------------------ +______Update_a_Row_but_SQL_UPDATE_failed = None +rprint( + Panel( + "Update a Row but SQL UPDATE failed", box=box.DOUBLE, border_style="bold green" + ) +) + +html_content_2 = b"this is html 2" +image_content_2 = b"this is image 2" +utc_now = get_utc_now() + +rprint(Panel("Write S3 first, then write DB")) +write_file_result = local.write_file( + api_calls=[ + local.WriteFileApiCall( + column="html", + binary=html_content_2, + # since this is an updates, you have to specify the old file, + # even it is None. we need this information to clean up old file + # when SQL UPDATE succeeded + old_path=Path(task1.html), + ), + local.WriteFileApiCall( + column="image", + binary=image_content_2, + # since this is an updates, you have to specify the old file, + # even it is None. we need this information to clean up old file + # when SQL UPDATE succeeded + old_path=Path(task1.image), + ), + ], + pk=url, + dir_root=dir_root, + is_pk_path_safe=False, +) +rprint(write_file_result) + +with orm.Session(engine) as ses: + try: + with ses.begin(): + stmt = ( + sa.update(Task) + .where(Task.url == url) + .values(update_at=utc_now, **write_file_result.to_values()) + ) + # intentionally raises an error to simulate a database failure + raise UserError() + ses.execute(stmt) + print("SQL UPDATE Succeeded!") + # clean up file when update row succeeded + # if you don't want to do that, just don't run this method + write_file_result.clean_up_old_file_when_update_row_succeeded() + except Exception as e: + rprint(f"SQL UPDATE Failed! Error: {e!r}") + # clean up created file when update row failed + # if you don't want to do that, just don't run this method + write_file_result.clean_up_new_file_when_create_or_update_row_failed() + +rprint(Panel("Database row should not be updated")) +task2: Task = ses.get(Task, url) +rprint(task2.__dict__) +assert task2.update_at < utc_now +rprint(Panel("Old file should still be there")) +rprint(f"{Path(task1.html).read_bytes() = }") +rprint(f"{Path(task1.image).read_bytes() = }") +assert Path(task1.html).read_bytes() == html_content_1 +assert Path(task1.image).read_bytes() == image_content_1 +rprint(Panel("New file should be deleted")) +values = write_file_result.to_values() +html_path = values["html"] +image_path = values["image"] +rprint(f"{Path(html_path).exists() = }") +rprint(f"{Path(image_path).exists() = }") +assert Path(html_path).exists() is False +assert Path(image_path).exists() is False + + +# ------------------------------------------------------------------------------ +# Update a Row and SQL UPDATE succeeded +# ------------------------------------------------------------------------------ +______Update_a_Row_and_SQL_UPDATE_succeeded = None +rprint( + Panel( + "Update a Row and SQL UPDATE succeeded", + box=box.DOUBLE, + border_style="bold green", + ) +) +utc_now = get_utc_now() + +rprint(Panel("Write S3 first, then write DB")) +write_file_result = local.write_file( + api_calls=[ + local.WriteFileApiCall( + column="html", + binary=html_content_2, + # since this is an updates, you have to specify the old file, + # even it is None. we need this information to clean up old file + # when SQL UPDATE succeeded + old_path=Path(task1.html), + ), + local.WriteFileApiCall( + column="image", + binary=image_content_2, + # since this is an updates, you have to specify the old file, + # even it is None. we need this information to clean up old file + # when SQL UPDATE succeeded + old_path=Path(task1.image), + ), + ], + pk=url, + dir_root=dir_root, + is_pk_path_safe=False, +) +rprint(write_file_result) + +with orm.Session(engine) as ses: + try: + with ses.begin(): + stmt = ( + sa.update(Task) + .where(Task.url == url) + .values(update_at=utc_now, **write_file_result.to_values()) + ) + ses.execute(stmt) + print("SQL UPDATE Succeeded!") + # clean up file when update row succeeded + # if you don't want to do that, just don't run this method + write_file_result.clean_up_old_file_when_update_row_succeeded() + except Exception as e: + rprint(f"SQL UPDATE Failed! Error: {e!r}") + # clean up created file when update row failed + # if you don't want to do that, just don't run this method + write_file_result.clean_up_new_file_when_create_or_update_row_failed() + +rprint(Panel("Database row should be updated")) +task2: Task = ses.get(Task, url) +rprint(task2.__dict__) +assert task2.update_at == utc_now +rprint(Panel("Old file should be deleted")) +rprint(f"{Path(task1.html).exists() = }") +rprint(f"{Path(task1.image).exists() = }") +assert Path(task1.html).exists() is False +assert Path(task1.image).exists() is False +rprint(Panel("New file should be created")) +rprint(f"{Path(task2.html).read_bytes() = }") +rprint(f"{Path(task2.image).read_bytes() = }") +assert Path(task2.html).read_bytes() == html_content_2 +assert Path(task2.image).read_bytes() == image_content_2 + +# ------------------------------------------------------------------------------ +# Delete a Row and SQL DELETE succeeded +# ------------------------------------------------------------------------------ +______Delete_a_Row_and_SQL_DELETE_succeeded = None +rprint( + Panel( + "Delete a Row and SQL DELETE succeeded", + box=box.DOUBLE, + border_style="bold green", + ) +) + +rprint(Panel("Delete DB first, then delete S3")) + +with orm.Session(engine) as ses: + task3: Task = ses.get(Task, url) + try: + stmt = sa.delete(Task).where(Task.url == url) + res = ses.execute(stmt) + ses.commit() + if res.rowcount == 1: + print("SQL DELETE Succeeded!") + # clean up old s3 object when delete row succeeded + # if you don't want to do that, just don't run this method + if task3.html: + Path(task3.html).unlink() + if task3.image: + Path(task3.image).unlink() + else: + print("SQL DELETE Failed! No row affected.") + except Exception as e: + ses.rollback() + rprint(f"SQL DELETE Failed! Error: {e!r}") + +rprint(Panel("Database row should be deleted")) +rprint(f"{ses.get(Task, url) = }") +assert ses.get(Task, url) is None +rprint(Panel("Old S3 object should be deleted")) +rprint(f"{Path(task3.html).exists() = }") +rprint(f"{Path(task3.image).exists() = }") +assert Path(task3.html).exists() is False +assert Path(task3.image).exists() is False diff --git a/docs/source/01-Core-API/index.ipynb b/docs/source/01-Core-API/index.ipynb new file mode 100644 index 0000000..3224886 --- /dev/null +++ b/docs/source/01-Core-API/index.ipynb @@ -0,0 +1,648 @@ +{ + "cells": [ + { + "cell_type": "raw", + "id": "70f66ca5-bd75-4eff-a7e0-b13745e01055", + "metadata": { + "editable": true, + "raw_mimetype": "text/restructuredtext", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + ".. _core-api:\n", + "\n", + "Core API\n", + "==============================================================================\n", + "\n", + "In this section, we demonstrate the simplified version with ``sqlalchemy_mate`` to manipulate data using core API.\n", + "\n", + "First, let's define a table to get start, everything looks normal so far." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "7a250643-b72d-4db2-a683-3e8133e63e2e", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "import sqlalchemy as sa\n", + "\n", + "metadata = sa.MetaData()\n", + "\n", + "t_users = sa.Table(\n", + " \"users\", metadata,\n", + " sa.Column(\"id\", sa.Integer, primary_key=True),\n", + " sa.Column(\"name\", sa.String, nullable=True)\n", + ")\n", + "\n", + "# For syntax testing, you could use sqlite\n", + "# But you could see significant performance improvement in main stream\n", + "# sql database for bulk inserts\n", + "engine = sa.create_engine(\"sqlite:///:memory:\")\n", + "\n", + "metadata.create_all(engine)" + ] + }, + { + "cell_type": "markdown", + "id": "5054901c-cae1-4955-9392-b9c852d3d9f9", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## Bulk insert and Count rows\n", + "\n", + "We want to insert 3 random user data into the database and do some basic query." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "a627404d-a627-4490-8176-25d01cf81057", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "import random\n", + "\n", + "three_user_data_list = [\n", + " dict(id=random.randint(1, 1000))\n", + " for _ in range(3)\n", + "]" + ] + }, + { + "cell_type": "markdown", + "id": "d85213f0-41f6-4833-849f-ae713d08f3c8", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "**With** ``sqlalchemy_mate``" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "da96be78-9192-488a-ae96-bd5ea1463354", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "row_counts = 3\n", + "user = (996, None)\n" + ] + } + ], + "source": [ + "import sqlalchemy_mate.api as sam\n", + "\n", + "# do bulk insert\n", + "sam.inserting.smart_insert(engine, t_users, three_user_data_list)\n", + "\n", + "# returns number of row in a table.\n", + "row_counts = sam.selecting.count_row(engine, t_users)\n", + "print(f\"{row_counts = }\")\n", + "\n", + "# return one row by primary key values\n", + "user = sam.selecting.by_pk(engine=engine, table=t_users, id_=three_user_data_list[0][\"id\"])\n", + "print(f\"{user = }\")" + ] + }, + { + "cell_type": "markdown", + "id": "3248c810-75c8-4617-ace0-8763aa4a0290", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "**Without** ``sqlalchemy_mate``" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "af8aa793-6435-4499-99bd-ce14d8a2a939", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "row_counts = 3\n", + "user = (996, None)\n" + ] + } + ], + "source": [ + "with engine.connect() as connection:\n", + " # let's have a fresh start first\n", + " connection.execute(t_users.delete())\n", + " connection.commit()\n", + " \n", + " # do bulk insert\n", + " connection.execute(t_users.insert(), three_user_data_list)\n", + " connection.commit()\n", + "\n", + " # returns number of row in a table.\n", + " stmt = sa.select(sa.func.count()).select_from(t_users)\n", + " row_counts = connection.execute(stmt).one()[0]\n", + " print(f\"{row_counts = }\")\n", + " # return one row by primary key values\n", + " stmt = sa.select(t_users).where(t_users.c.id==three_user_data_list[0][\"id\"])\n", + " user = connection.execute(stmt).one()\n", + " print(f\"{user = }\")" + ] + }, + { + "cell_type": "markdown", + "id": "6cc56490-d7ae-4385-a5fa-6d9f6aa37ff6", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## Smart Single / Bulk Insert\n", + "\n", + "Now we already have 3 items in database, let's try to insert 1,000 users to the table." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "bbad4635-48e5-473e-af1f-b7a32089a3a6", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "thousands_user_data_list = [\n", + " dict(id=id_)\n", + " for id_ in range(1, 1000+1)\n", + "]" + ] + }, + { + "cell_type": "markdown", + "id": "14ddecb1-1318-42d3-bc61-f3060d1110fe", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "**With** ``sqlalchemy_mate``" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "24c337bf-eb9f-436c-af1a-3e429643c5d1", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "op_count = 57\n", + "ins_count = 997\n", + "elapsed = 0.019205\n", + "row_counts = 1000\n" + ] + } + ], + "source": [ + "import time\n", + "\n", + "start_time = time.process_time()\n", + "# this is the smart insert API, only one line\n", + "op_count, ins_count = sam.inserting.smart_insert(engine=engine, table=t_users, data=thousands_user_data_list)\n", + "elapsed = time.process_time() - start_time\n", + "print(f\"{op_count = }\") # 60 bulk INSERT sql command fired to database\n", + "print(f\"{ins_count = }\") # 997 data inserted\n", + "print(f\"{elapsed = :.6f}\") # 0.020140 in local postgres database\n", + "row_counts = sam.selecting.count_row(engine, t_users)\n", + "print(f\"{row_counts = }\") # now we have 1000 rows" + ] + }, + { + "cell_type": "markdown", + "id": "e749d188-6baa-4568-b9fd-58e9df389cfd", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "**Without** ``sqlalchemy_mate``" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "558932a5-eca7-413a-872e-6682d806c573", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "elapsed = 0.181163\n", + "row_counts = 1000\n" + ] + } + ], + "source": [ + "# Core insert logic = 7 line\n", + "from sqlalchemy.exc import IntegrityError\n", + "\n", + "with engine.connect() as connection:\n", + " connection.execute(t_users.delete())\n", + " connection.commit()\n", + "\n", + " ins = t_users.insert()\n", + " connection.execute(ins, three_user_data_list)\n", + " \n", + " start_time = time.process_time() \n", + " ins = t_users.insert()\n", + " for user_data in thousands_user_data_list:\n", + " try:\n", + " connection.execute(ins, user_data)\n", + " connection.commit()\n", + " except IntegrityError:\n", + " connection.rollback()\n", + " elapsed = time.process_time() - start_time\n", + " print(f\"{elapsed = :.6f}\") # 0.181163\n", + " row_counts = connection.execute(sa.select(sa.func.count()).select_from(t_users)).one()[0]\n", + " print(f\"{row_counts = }\")" + ] + }, + { + "cell_type": "markdown", + "id": "29469d64-5c1c-43e6-9d97-241ac91a6690", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "``sqlachemy_mate`` is significantly faster than native ``sqlalchemy``. Because it smartly split big dataset into smaller pack, hence the total number of ``INSERT sql`` actually fired to database is greatly reduced. In this test case, ``sqlclhemy_mate`` is 10x faster with a Postgres DB on local, in real use case it could save more times because they are remote user." + ] + }, + { + "cell_type": "markdown", + "id": "45420230-feeb-4b62-9868-12495176ba47", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## Smart Single / Bulk Update\n", + "\n", + "A common update use case is to locate a row by primary key, and update non primary key fields.\n", + "\n", + "**With** ``sqlalchemy_mate``" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "ec86658e-a49c-403c-b637-900e47e49884", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "before: user = (1, None)\n", + "after: user = (1, 'Alice')\n" + ] + }, + { + "data": { + "text/plain": [ + "(3, 0)" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# update\n", + "# before, it is {\"id\": 1, \"name\": None}\n", + "user = sam.selecting.by_pk(engine, t_users, 1)\n", + "print(f\"before: {user = }\")\n", + "\n", + "# do single update\n", + "user_data = dict(id=1, name=\"Alice\")\n", + "sam.updating.update_all(engine=engine, table=t_users, data=user_data)\n", + "\n", + "# after, it is {\"id\": 1, \"name\": \"Alice\"}\n", + "user = sam.selecting.by_pk(engine=engine, table=t_users, id_=1)\n", + "print(f\"after: {user = }\")\n", + "\n", + "# do multiple update\n", + "user_data_list = [\n", + " dict(id=1, name=\"Alice\"),\n", + " dict(id=2, name=\"Bob\"),\n", + " dict(id=3, name=\"Cathy\"),\n", + "]\n", + "sam.updating.update_all(engine=engine, table=t_users, data=user_data_list)" + ] + }, + { + "cell_type": "markdown", + "id": "afc15e3c-c47b-430b-b69f-dfb9f5b157e4", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "**Without** ``sqlalchemy_mate``" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "c8bdac4e-2abb-45a9-bfcc-790714aa4300", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# do single update\n", + "with engine.connect() as connection:\n", + " connection.execute(t_users.update().where(t_users.c.id==1).values(name=\"Alice\"))\n", + " connection.commit()\n", + "\n", + "# do multiple update\n", + "user_data_list = [\n", + " dict(id=1, name=\"Alice\"),\n", + " dict(id=2, name=\"Bob\"),\n", + " dict(id=3, name=\"Cathy\"),\n", + "]\n", + "with engine.connect() as connection:\n", + " for user in user_data_list:\n", + " stmt = t_users.update().where(t_users.c.id==user[\"id\"]).values(**user)\n", + " connection.execute(stmt)\n", + " connection.commit()" + ] + }, + { + "cell_type": "markdown", + "id": "db399c79-01e6-401c-93fb-0605ca769405", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## Smart Single Bulk Upsert" + ] + }, + { + "cell_type": "raw", + "id": "c984b13a-1da4-4a59-a20a-bd42dc3d0354", + "metadata": { + "editable": true, + "raw_mimetype": "text/restructuredtext", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "Upsert is a database dependent feature that not available in all sql system. :meth:`~sqlalchemy_mate.crud.updating.upsert_all`` function made upsert generally available to all SQL system and super easy to use. Internally there's an optimization that collect \"to insert\" items and bulk insert them fast." + ] + }, + { + "cell_type": "markdown", + "id": "7cace76f-d0b0-4e83-8dfa-b6fbbab7efe8", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "**With** ``sqlalchemy_mate``" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "2af85aef-4d67-4790-930a-8078cc82ddb8", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "update_counter = 2\n", + "insert_counter = 2\n" + ] + } + ], + "source": [ + "# prepare your data\n", + "user_data_list = [\n", + " dict(id=999, name=\"Alice\"),\n", + " dict(id=1000, name=\"Bob\"),\n", + " dict(id=1001, name=\"Cathy\"),\n", + " dict(id=1002, name=\"David\"),\n", + "]\n", + "\n", + "# use ``upsert_all`` method\n", + "update_counter, insert_counter = sam.updating.upsert_all(engine=engine, table=t_users, data=user_data_list)\n", + "print(f\"{update_counter = }\")\n", + "print(f\"{insert_counter = }\")" + ] + }, + { + "cell_type": "markdown", + "id": "7b030073-2e79-4e38-8140-d17f6ddeee63", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## Selecting Shortcuts" + ] + }, + { + "cell_type": "raw", + "id": "bccf8e19-4336-4d86-a5c6-6f0947aca9dd", + "metadata": { + "editable": true, + "raw_mimetype": "text/restructuredtext", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "- See :mod:`~sqlalchemy_mate.crud.selecting`" + ] + }, + { + "cell_type": "markdown", + "id": "6658f37f-7e1a-415f-a605-6bd6592a4d63", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## Deleteing Short cuts" + ] + }, + { + "cell_type": "raw", + "id": "bb2dcb73-7b33-4fd2-b8d4-f814c39057c8", + "metadata": { + "editable": true, + "raw_mimetype": "text/restructuredtext", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "- See :mod:`~sqlalchemy_mate.crud.deleting`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3fd126db-5aa7-4b69-96a8-965e55da2734", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/source/01-Core-API/index.rst b/docs/source/01-Core-API/index.rst deleted file mode 100644 index 4ee0c1c..0000000 --- a/docs/source/01-Core-API/index.rst +++ /dev/null @@ -1,208 +0,0 @@ -.. _core-api: - -Core API -============================================================================== -In this section, we demonstrate the simplified version with ``sqlalchemy_mate`` to manipulate data using core API. - -First, let's define a table to get start, everything looks normal so far. - -.. code-block:: python - - import sqlalchemy as sa - - metadata = sa.MetaData() - - t_users = sa.Table( - "users", metadata, - sa.Column("id", sa.Integer, primary_key=True), - sa.Column("name", sa.String, nullable=True) - ) - - # For syntax testing, you could use sqlite - # But you could see significant performance improvement in main stream - # sql database for bulk inserts - engine = sa.create_engine("sqlite:///:memory:") - - metadata.create_all(engine) - - -Bulk insert and Count rows ------------------------------------------------------------------------------- - -We want to insert 3 random user data into the database and do some basic query: - -.. code-block:: python - - import random - - user_data_list = [ - dict(id=random.randint(1, 1000), - for _ in range(3) - ] - -**With** ``sqlalchemy_mate`` - -.. code-block:: python - - import sqlalchemy_mate.api as sam - - # do bulk insert - sam.inserting.smart_insert(engine, t_users, user_data_list) - - # returns number of row in a table. - # should returns 3 - row_counts = sam.selecting.count_row(engine, t_users) - - # return one row by primary key values - # {"id": 1, "name": None} - user = sam.selecting.by_pk(engine, t_users, 1) - -**Without** ``sqlalchemy_mate`` - -.. code-block:: python - - with engine.connect() as connection: - # do bulk insert - connection.execute(t_users.insert(), user_data_list) - - # returns number of row in a table. - # should returns 3 - row_counts = connection.execute(sa.select(sa.func.count()).select_from(t_users)).one()[0] - - # return one row by primary key values - # {"id": 1, "name": None} - user = connection.execute(sa.select(t_users).where(t_users.c.id==1)).one() - -Now the syntax sugar looks like just so so, let's move to the next example. - - -Smart Single / Bulk Insert ------------------------------------------------------------------------------- - -Now we already have 3 items in database, let's try to insert 1,000 users to the table. - -.. code-block:: python - - user_data_list = [ - dict(id=id_) - for id_ in range(1, 1000+1) - ] - -**With** ``sqlalchemy_mate`` - -.. code-block:: python - - # Core insert logic = 1 line - import time - - start_time = time.process_time() - op_count, ins_count = sam.inserting.smart_insert(engine, t_users, user_data_list) - elapsed = time.process_time() - start_time - print(op_count) # 60 bulk INSERT sql command fired to database - print(ins_count) # 997 data inserted - print(elapsed) # 0.019572 in local postgres database - row_counts = sam.selecting.count_row(engine, t_users) - print(row_counts) # now we have 1000 rows - -**Without** ``sqlalchemy_mate`` - -.. code-block:: python - - # Core insert logic = 7 line - import time - from sqlalchemy.exc import IntegrityError - - start_time = time.process_time() - with engine.connect() as connection: - ins = t_users.insert() - for user_data in user_data_list: - try: - connection.execute(ins, user_data) - except IntegrityError: - pass - elapsed = time.process_time() - start_time - print(elapsed) # 0.181163 - row_counts = connection.execute(sa.select(sa.func.count()).select_from(t_users)).one()[0] - print(row_counts) - -``sqlachemy_mate`` is significantly faster than native ``sqlalchemy``. Because it smartly split big dataset into smaller pack, hence the total number of ``INSERT sql`` actually fired to database is greatly reduced. In this test case, ``sqlclhemy_mate`` is 10x faster with a Postgres DB on local, in real use case it could save more times because they are remote user. - - -Smart Single / Bulk Update ------------------------------------------------------------------------------- - -A common update use case is to locate a row by primary key, and update non primary key fields. - -**With** ``sqlalchemy_mate`` - -.. code-block:: python - - # update - # before, it is {"id": 1, "name": None} - print(sam.selecting.by_pk(engine, t_users, 1)) - - # do single update - user_data = dict(id=1, name="Alice") - sam.updating.update_all(engine, t_users, user_data) - - # after, it is {"id": 1, "name": None} - print(sam.selecting.by_pk(engine, t_users, 1)) - - # do multiple update - user_data_list = [ - dict(id=1, name="Alice"), - dict(id=2, name="Bob"), - dict(id=3, name="Cathy"), - ] - sam.updating.update_all(engine, t_users, user_data_list) - -**Without** ``sqlalchemy_mate`` - -.. code-block:: python - - # do single update - with engine.connect() as connection: - connection.execute(t_users.update().where(t_users.c.id==1).values(name="Alice")) - - # do multiple update - user_data_list = [ - dict(id=1, name="Alice"), - dict(id=2, name="Bob"), - dict(id=3, name="Cathy"), - ] - with engine.connect() as connection: - for user in user_data_list: - connection.execute(t_users.update().where(t_users.c.id==user["id"]).values(**user) - - -Smart Single Bulk Upsert ------------------------------------------------------------------------------- - -Upsert is a database dependent feature that not available in all sql system. :meth:`~sqlalchemy_mate.crud.updating.upsert_all`` function made upsert generally available to all SQL system and super easy to use. Internally there's an optimization that collect "to insert" items and bulk insert them fast. - -**With** ``sqlalchemy_mate`` - -.. code-block:: python - - # prepare your data - user_data_list = [ - dict(id=999, name="Alice"), - dict(id=1000, name="Bob"), - dict(id=1001, name="Cathy"), - dict(id=1002, name="David"), - ] - - # use ``upsert_all`` method - sam.updating.upsert_all(engine, t_users, user_data_list) - - -Selecting Shortcuts ------------------------------------------------------------------------------- - -- See :mod:`~sqlalchemy_mate.crud.selecting` - - -Deleteing Short cuts ------------------------------------------------------------------------------- - -- See :mod:`~sqlalchemy_mate.crud.selecting` diff --git a/docs/source/02-ORM-API/index.ipynb b/docs/source/02-ORM-API/index.ipynb new file mode 100644 index 0000000..4e43b7e --- /dev/null +++ b/docs/source/02-ORM-API/index.ipynb @@ -0,0 +1,789 @@ +{ + "cells": [ + { + "cell_type": "raw", + "id": "3ba48356-9281-4e4b-8abc-689c4e079e9a", + "metadata": { + "editable": true, + "raw_mimetype": "text/restructuredtext", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + ".. _orm-api:\n", + "\n", + "ORM API\n", + "==============================================================================\n", + "\n", + "\n", + "Extended Declarative Base\n", + "------------------------------------------------------------------------------\n", + ":class:`sqlalchemy_mate.api.ExtendedBase ` is a Mixin class that enables many convenient methods for your ORM model." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "71915054-79a8-4dbd-83cc-dc1a49f4df53", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "import sqlalchemy as sa\n", + "import sqlalchemy.orm as orm\n", + "import sqlalchemy_mate.api as sam\n", + "from rich import print as rprint\n", + "\n", + "Base = orm.declarative_base()\n", + "\n", + "\n", + "# add sqlalchemy_mate.ExtendedBase Mixin class\n", + "class User(Base, sam.ExtendedBase):\n", + " __tablename__ = \"users\"\n", + "\n", + " id: orm.Mapped[int] = orm.mapped_column(sa.Integer, primary_key=True)\n", + " name: orm.Mapped[str] = orm.mapped_column(sa.String, nullable=True)\n", + " # you can also do this\n", + " # id: int = sa.Column(sa.Integer, primary_key=True)\n", + " # name: str = sa.Column(sa.String, nullable=True)\n", + "\n", + " # put important columns here\n", + " # you can choose to print those columns only with ``.glance()`` method.\n", + " _settings_major_attrs = [\n", + " id,\n", + " ]" + ] + }, + { + "cell_type": "markdown", + "id": "539c929a-59eb-4535-a6eb-b6cd1eb26ee8", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## Python dict liked interface\n", + "\n", + "A data model should be have a nicer print." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "27d125ad-51c9-4e1a-9865-9c878ccf985e", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "User(id=1, name='Alice')\n" + ] + } + ], + "source": [ + "user = User(id=1, name=\"Alice\")\n", + "print(user)" + ] + }, + { + "cell_type": "markdown", + "id": "21496842-229a-4160-9679-2d70aa32ba7a", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "Convert data model to / from generic python data type should be easy." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "f472bd2c-b0fb-47e2-ad91-16ad7f80f915", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
['id', 'name']\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1m[\u001b[0m\u001b[32m'id'\u001b[0m, \u001b[32m'name'\u001b[0m\u001b[1m]\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "rprint(User.keys())" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "4b10f2d0-6d5e-4464-bbaf-eb0ab9b62a96", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
['id', 'name']\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1m[\u001b[0m\u001b[32m'id'\u001b[0m, \u001b[32m'name'\u001b[0m\u001b[1m]\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "rprint(user.keys())" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "2b5abe54-92bb-43e4-90e2-2b29f5941366", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
[1, 'Alice']\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1m[\u001b[0m\u001b[1;36m1\u001b[0m, \u001b[32m'Alice'\u001b[0m\u001b[1m]\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "rprint(user.values())" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "ab36f3ce-5e00-4f63-a887-f159efb4555f", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
[('id', 1), ('name', 'Alice')]\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1m[\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'id'\u001b[0m, \u001b[1;36m1\u001b[0m\u001b[1m)\u001b[0m, \u001b[1m(\u001b[0m\u001b[32m'name'\u001b[0m, \u001b[32m'Alice'\u001b[0m\u001b[1m)\u001b[0m\u001b[1m]\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "rprint(user.items())" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "4c375c04-8d9d-4a1b-9503-33a737e44131", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
{'id': 1, 'name': 'Alice'}\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1m{\u001b[0m\u001b[32m'id'\u001b[0m: \u001b[1;36m1\u001b[0m, \u001b[32m'name'\u001b[0m: \u001b[32m'Alice'\u001b[0m\u001b[1m}\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "rprint(user.to_dict())" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "a364f205-1f34-4ee9-9dce-39205c0ec50b", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
OrderedDict([('id', 1), ('name', 'Alice')])\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;35mOrderedDict\u001b[0m\u001b[1m(\u001b[0m\u001b[1m[\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'id'\u001b[0m, \u001b[1;36m1\u001b[0m\u001b[1m)\u001b[0m, \u001b[1m(\u001b[0m\u001b[32m'name'\u001b[0m, \u001b[32m'Alice'\u001b[0m\u001b[1m)\u001b[0m\u001b[1m]\u001b[0m\u001b[1m)\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "rprint(user.to_OrderedDict())" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "5a33638e-03eb-4015-870b-266396e603bc", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
(1,)\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1m(\u001b[0m\u001b[1;36m1\u001b[0m,\u001b[1m)\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "rprint(user.pk_values())" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "49aece90-08ce-4e01-9fc9-6371eb8eea5a", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "User(id=1)\n" + ] + } + ], + "source": [ + "user.glance() # only print important columns" + ] + }, + { + "cell_type": "markdown", + "id": "7cd00afb-41a3-4b90-8952-3f1d6bdc57bc", + "metadata": {}, + "source": [ + "## Absorb and Revise\n", + "\n", + "Python dict can update values based on another dict, A data model should be able to do it to." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "e0460a45-05fe-4bd8-9480-d36f202b3ae1", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
User(id=1, name='Bob')\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;35mUser\u001b[0m\u001b[1m(\u001b[0m\u001b[33mid\u001b[0m=\u001b[1;36m1\u001b[0m, \u001b[33mname\u001b[0m=\u001b[32m'Bob'\u001b[0m\u001b[1m)\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
User(id=1, name='Cathy')\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;35mUser\u001b[0m\u001b[1m(\u001b[0m\u001b[33mid\u001b[0m=\u001b[1;36m1\u001b[0m, \u001b[33mname\u001b[0m=\u001b[32m'Cathy'\u001b[0m\u001b[1m)\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# update values based on another data model, it is similar to ``dict.update()``\n", + "user_bob = User(name=\"Bob\")\n", + "user.absorb(user_bob)\n", + "rprint(user)\n", + "\n", + "# update values based on generic python dict, it is similar to ``dict.update()``\n", + "user.revise({\"name\": \"Cathy\"})\n", + "rprint(user)" + ] + }, + { + "cell_type": "markdown", + "id": "fa534a1e-d3d5-4bed-9d27-53cda157cd53", + "metadata": {}, + "source": [ + "## Insert, Select, Update with ORM Model" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "d4fe2381-5e95-4af7-9b04-cd827b6e2c9a", + "metadata": {}, + "outputs": [], + "source": [ + "engine = sa.create_engine(\"sqlite:///:memory:\")\n", + "sam.test_connection(engine, timeout=3)\n", + "\n", + "Base.metadata.create_all(engine)\n", + "# Delete all data, make sure we have a fresh start\n", + "User.delete_all(engine)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "fb2b7c31-c059-407c-9a9d-a43d18c166e6", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0\n" + ] + } + ], + "source": [ + "# Count rows in a table\n", + "print(User.count_all(engine))" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "fa3d0b95-98db-4cac-a51f-d4e9dd53208c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
3\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;36m3\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Bulk insert\n", + "user_list = [\n", + " User(id=57),\n", + " User(id=264),\n", + " User(id=697),\n", + "]\n", + "User.smart_insert(engine, user_list)\n", + "rprint(User.count_all(engine))" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "76a3c649-e745-4a8c-9046-e36ee784b7c9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
User(id=57, name=None)\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;35mUser\u001b[0m\u001b[1m(\u001b[0m\u001b[33mid\u001b[0m=\u001b[1;36m57\u001b[0m, \u001b[33mname\u001b[0m=\u001b[3;35mNone\u001b[0m\u001b[1m)\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Get single object by primary key values\n", + "user = User.by_pk(engine, 57)\n", + "rprint(user) # User(id=57)" + ] + }, + { + "cell_type": "markdown", + "id": "59f47c52-6af1-4ec4-8b87-1b48432e857c", + "metadata": {}, + "source": [ + "## Smart Insert Handles Primary Key conflicts efficiently" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "3f60cb57-ed47-40a8-8e3e-fa0b8b462443", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
1000\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;36m1000\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Bulk insert, handle primary key conflicts efficiently\n", + "user_list = [User(id=id_) for id_ in range(1, 1000 + 1)]\n", + "User.smart_insert(engine, user_list)\n", + "rprint(User.count_all(engine)) # 1000" + ] + }, + { + "cell_type": "markdown", + "id": "b0a4c6a0-5574-4807-a533-bf543933740c", + "metadata": {}, + "source": [ + "## Bulk Upsert (Insert + Update) should be easy" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "39346608-ebe2-4267-b06f-f1761bbc1758", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
User(id=999, name=None)\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;35mUser\u001b[0m\u001b[1m(\u001b[0m\u001b[33mid\u001b[0m=\u001b[1;36m999\u001b[0m, \u001b[33mname\u001b[0m=\u001b[3;35mNone\u001b[0m\u001b[1m)\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
User(id=1000, name=None)\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;35mUser\u001b[0m\u001b[1m(\u001b[0m\u001b[33mid\u001b[0m=\u001b[1;36m1000\u001b[0m, \u001b[33mname\u001b[0m=\u001b[3;35mNone\u001b[0m\u001b[1m)\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
None\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[3;35mNone\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
None\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[3;35mNone\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# before upsert\n", + "rprint(User.by_pk(engine, 999))\n", + "rprint(User.by_pk(engine, 1000))\n", + "rprint(User.by_pk(engine, 1001))\n", + "rprint(User.by_pk(engine, 1002))" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "771ab8b1-29bc-4cf1-8957-f1da6df516c8", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(2, 2)" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Bulk update + insert, locate rows by primary key values\n", + "user_list = [\n", + " User(id=999, name=\"Alice\"),\n", + " User(id=1000, name=\"Bob\"),\n", + " User(id=1001, name=\"Cathy\"),\n", + " User(id=1002, name=\"David\"),\n", + "]\n", + "User.upsert_all(engine, user_list)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "bbb4507c-820d-4145-bb87-256636cfb2c2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
User(id=999, name='Alice')\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;35mUser\u001b[0m\u001b[1m(\u001b[0m\u001b[33mid\u001b[0m=\u001b[1;36m999\u001b[0m, \u001b[33mname\u001b[0m=\u001b[32m'Alice'\u001b[0m\u001b[1m)\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
User(id=1000, name='Bob')\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;35mUser\u001b[0m\u001b[1m(\u001b[0m\u001b[33mid\u001b[0m=\u001b[1;36m1000\u001b[0m, \u001b[33mname\u001b[0m=\u001b[32m'Bob'\u001b[0m\u001b[1m)\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
User(id=1001, name='Cathy')\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;35mUser\u001b[0m\u001b[1m(\u001b[0m\u001b[33mid\u001b[0m=\u001b[1;36m1001\u001b[0m, \u001b[33mname\u001b[0m=\u001b[32m'Cathy'\u001b[0m\u001b[1m)\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
User(id=1002, name='David')\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;35mUser\u001b[0m\u001b[1m(\u001b[0m\u001b[33mid\u001b[0m=\u001b[1;36m1002\u001b[0m, \u001b[33mname\u001b[0m=\u001b[32m'David'\u001b[0m\u001b[1m)\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# after upsert\n", + "rprint(User.by_pk(engine, 999))\n", + "rprint(User.by_pk(engine, 1000))\n", + "rprint(User.by_pk(engine, 1001))\n", + "rprint(User.by_pk(engine, 1002))" + ] + }, + { + "cell_type": "markdown", + "id": "2f7464c3-f050-4112-b61f-4932c8db60e2", + "metadata": {}, + "source": [ + "## Run raw SQL query\n", + "\n", + "The ``Class.by_sql()`` assumes that the returned row is the given ORM model. You cannot do this with JOIN." + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "1e32bae0-8633-4cd3-bae1-0b7269f8c391", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
[User(id=999, name='Alice'), User(id=1000, name='Bob'), User(id=1001, name='Cathy'), User(id=1002, name='David')]\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1m[\u001b[0m\u001b[1;35mUser\u001b[0m\u001b[1m(\u001b[0m\u001b[33mid\u001b[0m=\u001b[1;36m999\u001b[0m, \u001b[33mname\u001b[0m=\u001b[32m'Alice'\u001b[0m\u001b[1m)\u001b[0m, \u001b[1;35mUser\u001b[0m\u001b[1m(\u001b[0m\u001b[33mid\u001b[0m=\u001b[1;36m1000\u001b[0m, \u001b[33mname\u001b[0m=\u001b[32m'Bob'\u001b[0m\u001b[1m)\u001b[0m, \u001b[1;35mUser\u001b[0m\u001b[1m(\u001b[0m\u001b[33mid\u001b[0m=\u001b[1;36m1001\u001b[0m, \u001b[33mname\u001b[0m=\u001b[32m'Cathy'\u001b[0m\u001b[1m)\u001b[0m, \u001b[1;35mUser\u001b[0m\u001b[1m(\u001b[0m\u001b[33mid\u001b[0m=\u001b[1;36m1002\u001b[0m, \u001b[33mname\u001b[0m=\u001b[32m'David'\u001b[0m\u001b[1m)\u001b[0m\u001b[1m]\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "results = User.by_sql(\n", + " engine,\n", + " sql=\"\"\"\n", + " SELECT *\n", + " FROM users\n", + " WHERE users.id >= 999\n", + " \"\"\",\n", + ")\n", + "rprint(results)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a407773a-62a8-49a9-baf1-e85def202ff3", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/source/02-ORM-API/index.py b/docs/source/02-ORM-API/index.py new file mode 100644 index 0000000..fe8915f --- /dev/null +++ b/docs/source/02-ORM-API/index.py @@ -0,0 +1,101 @@ +# -*- coding: utf-8 -*- + +import sqlalchemy as sa +import sqlalchemy.orm as orm +import sqlalchemy_mate.api as sam +from rich import print as rprint + +Base = orm.declarative_base() + + +# add sqlalchemy_mate.ExtendedBase Mixin class +class User(Base, sam.ExtendedBase): + __tablename__ = "users" + + id: orm.Mapped[int] = orm.mapped_column(sa.Integer, primary_key=True) + name: orm.Mapped[str] = orm.mapped_column(sa.String, nullable=True) + # you can also do this + # id: int = sa.Column(sa.Integer, primary_key=True) + # name: str = sa.Column(sa.String, nullable=True) + + # put important columns here + # you can choose to print those columns only with ``.glance()`` method. + _settings_major_attrs = [ + id, + ] + + +user = User(id=1, name="Alice") +rprint(user) + +rprint(User.keys()) +rprint(user.keys()) +rprint(user.values()) +rprint(user.items()) +rprint(user.to_dict()) +rprint(user.to_OrderedDict()) +rprint(user.pk_values()) +user.glance() # only print important columns + +# update values based on another data model, it is similar to ``dict.update()`` +user_bob = User(name="Bob") +user.absorb(user_bob) +rprint(user) + +# update values based on generic python dict, it is similar to ``dict.update()`` +user.revise({"name": "Cathy"}) +rprint(user) + +from sqlalchemy.orm import Session + +engine = sa.create_engine("sqlite:///:memory:") +sam.test_connection(engine, timeout=3) + +Base.metadata.create_all(engine) + +# Delete all data, make sure we have a fresh start +User.delete_all(engine) +# Count rows in a table +print(User.count_all(engine)) + +# Bulk insert +user_list = [ + User(id=57), + User(id=264), + User(id=697), +] +User.smart_insert(engine, user_list) +rprint(User.count_all(engine)) + +# Get single object by primary key values +user = User.by_pk(engine, 57) +rprint(user) # User(id=57) + +# Bulk insert, handle primary key conflicts efficiently +user_list = [User(id=id_) for id_ in range(1, 1000 + 1)] +User.smart_insert(engine, user_list) +rprint(User.count_all(engine)) # 1000 + +# Bulk update + insert, locate rows by primary key values +user_list = [ + User(id=999, name="Alice"), + User(id=1000, name="Bob"), + User(id=1001, name="Cathy"), + User(id=1002, name="David"), +] +User.upsert_all(engine, user_list) + +rprint(User.by_pk(engine, 999).name) # Alice +rprint(User.by_pk(engine, 1001).name) # Cathy +rprint(User.count_all(engine)) # 1002 + +# Run raw SQL query +results = User.by_sql( + engine, + sql=""" + SELECT * + FROM users + WHERE users.id >= 999 + """, +) +rprint(results) diff --git a/docs/source/02-ORM-API/index.rst b/docs/source/02-ORM-API/index.rst deleted file mode 100644 index cc421ed..0000000 --- a/docs/source/02-ORM-API/index.rst +++ /dev/null @@ -1,127 +0,0 @@ -.. _orm-api: - -ORM API -============================================================================== - - -Extended Declarative Base ------------------------------------------------------------------------------- -``sqlalchemy_mate.ExtendedBase`` is a Mixin class that enables many convenient method for your ORM model. In this - -.. code-block:: python - - import sqlalchemy as sa - import sqlalchemy.orm as orm - import sqlalchemy_mate.api as sam - - Base = orm.declarative_base() - - # add sqlalchemy_mate.ExtendedBase Mixin class - class User(Base, sam.ExtendedBase): - __tablename__ = "users" - - id: orm.Mapped[int] = orm.mapped_column(sa.Integer, primary_key=True) - name: orm.Mapped[str] = orm.mapped_column(sa.String, nullable=True) - - # put important columns here - # you can choose to print those columns only with ``.glance()`` method. - _settings_major_attrs = [id, ] - -A data model should be have a nicer print: - -.. code-block:: python - - user = User(id=1, name="Alice") - print(user) - # User(id=1, name='Alice') - -Convert data model to / from generic python data type should be easy: - -.. code-block:: python - - print(User.keys()) # class would work too, # ['id', 'name'] - print(user.keys()) # ['id', 'name'] - print(user.values()) # [1, 'Alice'] - print(user.items()) # [('id', 1), ('name', 'Alice')] - print(user.to_dict()) # {'id': 1, 'name': 'Alice'} - print(user.to_OrderedDict()) # OrderedDict([('id', 1), ('name', 'Alice')]) - print(user.pk_values()) # (1,) - user.glance() # User(id=1) - -Python dict can update values based on another dict, A data model should be able to do it to. - -.. code-block:: python - - # update values based on another data model - user_bob = User(name="Bob") - user.absorb(user_bob) - print(user) # User(id=1, name='Bob') - - # update values based on generic python dict - user.revise({"name": "Cathy"}) - print(user) # User(id=1, name='Cathy') - - -Insert, Select, Update ------------------------------------------------------------------------------- -Talk is cheap, show me the Code. - -.. code-block:: python - - from sqlalchemy.orm import Session - - engine = sa.create_engine("sqlite:///:memory:") - sam.test_connection(engine, timeout=3) - - Base.metadata.create_all(engine) - - # Delete all data, make sure we have a fresh start - User.delete_all(engine) - # Count rows in a table - print(User.count_all(engine)) # 0 - - # Bulk insert - user_list = [ - User(id=57), - User(id=264), - User(id=697), - ] - User.smart_insert(engine, user_list) - print(User.count_all(engine)) # 3 - - # Get single object by primary key values - user = User.by_pk(engine, 57) - print(user) # User(id=57) - - # Bulk insert, handle primary key conflicts efficiently - user_list = [ - User(id=id_) - for id_ in range(1, 1000+1) - ] - User.smart_insert(engine, user_list) - print(User.count_all(engine)) # 1000 - - # Bulk update + insert, locate rows by primary key values - user_list = [ - User(id=999, name="Alice"), - User(id=1000, name="Bob"), - User(id=1001, name="Cathy"), - User(id=1002, name="David"), - ] - User.upsert_all(engine, user_list) - - print(User.by_pk(engine, 999).name) # Alice - print(User.by_pk(engine, 1001).name) # Cathy - print(User.count_all(engine)) # 1002 - - # Run raw SQL query - results = User.by_sql( - engine, - sql=""" - SELECT * - FROM users - WHERE users.id >= 999 - """ - ) - # [User(id=999, name='Alice'), User(id=1000, name='Bob'), User(id=1001, name='Cathy'), User(id=1002, name='David')] - print(results) diff --git a/docs/source/03-Other-Helpers/index.ipynb b/docs/source/03-Other-Helpers/index.ipynb new file mode 100644 index 0000000..4751c07 --- /dev/null +++ b/docs/source/03-Other-Helpers/index.ipynb @@ -0,0 +1,383 @@ +{ + "cells": [ + { + "cell_type": "raw", + "id": "b49c0583-0b74-42fd-8461-f0304fbead95", + "metadata": { + "editable": true, + "raw_mimetype": "text/restructuredtext", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + ".. _other-helpers\n", + "\n", + "Other Helpers\n", + "================================================================================\n", + "\n", + "\n", + "User Friendly Engine Creator\n", + "--------------------------------------------------------------------------------\n", + "`This sqlalchemy Official Document `_ tells you the correct connection string to use for different DB driver. Who wants to Google the API document everytime?\n", + "\n", + ":class:`sqlalchemy_mate.api.EngineCreator ` leveraged the IDE / Code Editor that provide a user friendly interface to pass in DB connection specs and choose the underlying python driver." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "1e292520-4ad0-4769-a59a-924abf6ec9ba", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Nothing happen, test connection passed, which is good!\n" + ] + } + ], + "source": [ + "import sqlalchemy as sa\n", + "import sqlalchemy.orm as orm\n", + "import sqlalchemy_mate.api as sam\n", + "\n", + "# An Postgres DB example\n", + "# First, you use EngineCreator class to create the db connection specs\n", + "# Second, you choose to use which python driver, IDE will tell you\n", + "# all options you have\n", + "engine_psql = sam.EngineCreator(\n", + " username=\"postgres\",\n", + " password=\"password\",\n", + " database=\"postgres\",\n", + " host=\"localhost\",\n", + " port=40311,\n", + ").create_postgresql_pg8000()\n", + "\n", + "# You can use test_connection method to perform test connection and\n", + "# raise error if timeout.\n", + "sam.test_connection(engine_psql, timeout=3);\n", + "\n", + "# A sqlite example\n", + "engine_sqlite = sam.EngineCreator().create_sqlite(path=\"/tmp/db.sqlite\")\n", + "sam.test_connection(engine_sqlite, timeout=1);\n", + "\n", + "print(\"Nothing happen, test connection passed, which is good!\")" + ] + }, + { + "cell_type": "markdown", + "id": "5d37bf12-033d-48eb-a920-e10c1f03a983", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## Ascii Table Printer\n", + "\n", + "Lots of CLI DB client can print result in pretty Ascii Table. ``sqlalchemy_mate`` can do that too.\n", + "\n", + "First let's insert some sample data:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "8eafd519-be2d-41b9-b71f-faffa7d1d3c5", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "Base = orm.declarative_base()\n", + "\n", + "\n", + "class User(Base, sam.ExtendedBase):\n", + " __tablename__ = \"users\"\n", + "\n", + " id: orm.Mapped[int] = orm.mapped_column(sa.Integer, primary_key=True)\n", + " name: orm.Mapped[str] = orm.mapped_column(sa.String, nullable=True)\n", + "\n", + "\n", + "t_users = User.__table__\n", + "\n", + "engine = engine_sqlite\n", + "Base.metadata.create_all(engine)\n", + "User.smart_insert(\n", + " engine,\n", + " [\n", + " User(id=1, name=\"Alice\"),\n", + " User(id=2, name=\"Bob\"),\n", + " User(id=3, name=\"Cathy\"),\n", + " User(id=4, name=\"David\"),\n", + " User(id=5, name=\"Edward\"),\n", + " User(id=6, name=\"Frank\"),\n", + " User(id=7, name=\"George\"),\n", + " ],\n", + ");" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "ee3f8a39-aa67-42f6-88f8-c1aa04ac7951", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+----+--------+\n", + "| id | name |\n", + "+----+--------+\n", + "| 1 | Alice |\n", + "| 2 | Bob |\n", + "| 3 | Cathy |\n", + "| 4 | David |\n", + "| 5 | Edward |\n", + "| 6 | Frank |\n", + "| 7 | George |\n", + "+----+--------+\n" + ] + } + ], + "source": [ + "# pretty table from ORM class\n", + "print(sam.pt.from_everything(everything=User, engine_or_session=engine, limit=10))" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "9d620b79-0b2a-47e1-97ff-ae4de52b7c43", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+----+-------+\n", + "| id | name |\n", + "+----+-------+\n", + "| 1 | Alice |\n", + "| 2 | Bob |\n", + "| 3 | Cathy |\n", + "+----+-------+\n" + ] + } + ], + "source": [ + "# from Table\n", + "print(sam.pt.from_everything(t_users, engine, limit=3))" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "bb2003b3-809a-4c57-a7da-324deaff47f2", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+--------+\n", + "| name |\n", + "+--------+\n", + "| David |\n", + "| Edward |\n", + "+--------+\n" + ] + } + ], + "source": [ + "# from ORM styled select statement\n", + "print(\n", + " sam.pt.from_everything(\n", + " sa.select(User.name).where(User.id >= 4).limit(2),\n", + " engine,\n", + " )\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "c224faa0-6f80-4b91-856c-4ec96ebe180e", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+--------+\n", + "| name |\n", + "+--------+\n", + "| David |\n", + "| Edward |\n", + "| Frank |\n", + "| George |\n", + "+--------+\n" + ] + } + ], + "source": [ + "# from SQL expression styled select statement\n", + "print(\n", + " sam.pt.from_everything(\n", + " sa.select(t_users.c.name).where(User.id >= 4),\n", + " engine,\n", + " )\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "5ae96881-404d-4880-be41-7cf3affcd78f", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+----+\n", + "| id |\n", + "+----+\n", + "| 5 |\n", + "+----+\n" + ] + } + ], + "source": [ + "# from Raw SQL text\n", + "print(\n", + " sam.pt.from_everything(\n", + " \"SELECT id FROM users WHERE name = 'Edward'\",\n", + " engine,\n", + " )\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "87a23f95-8097-4d1d-846d-1dfa821ee502", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+----+-------+\n", + "| id | name |\n", + "+----+-------+\n", + "| 1 | Alice |\n", + "| 2 | Bob |\n", + "| 3 | Cathy |\n", + "+----+-------+\n" + ] + } + ], + "source": [ + "# from list of dict\n", + "print(\n", + " sam.pt.from_everything(\n", + " [\n", + " {\"id\": 1, \"name\": \"Alice\"},\n", + " {\"id\": 2, \"name\": \"Bob\"},\n", + " {\"id\": 3, \"name\": \"Cathy\"},\n", + " ]\n", + " )\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "b20a11ad-7a2e-40e2-9a04-05576da9bc7f", + "metadata": { + "editable": true, + "raw_mimetype": "text/restructuredtext", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "For more information, see :mod:`~sqlalchemy_mate.pt`" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/source/03-Other-Helpers/index.py b/docs/source/03-Other-Helpers/index.py new file mode 100644 index 0000000..8a68ca2 --- /dev/null +++ b/docs/source/03-Other-Helpers/index.py @@ -0,0 +1,92 @@ +import sqlalchemy as sa +import sqlalchemy.orm as orm +import sqlalchemy_mate.api as sam + +# An Postgres DB example +# First, you use EngineCreator class to create the db connection specs +# Second, you choose to use which python driver, IDE will tell you +# all options you have +engine_psql = sam.EngineCreator( + username="postgres", + password="password", + database="postgres", + host="localhost", + port=40311, +).create_postgresql_pg8000() + +# You can use test_connection method to perform test connection and +# raise error if timeout. +sam.test_connection(engine_psql, timeout=3) + +# A sqlite example +engine_sqlite = sam.EngineCreator().create_sqlite(path="/tmp/db.sqlite") +sam.test_connection(engine_sqlite, timeout=1) + + +Base = orm.declarative_base() + + +class User(Base, sam.ExtendedBase): + __tablename__ = "users" + + id: orm.Mapped[int] = orm.mapped_column(sa.Integer, primary_key=True) + name: orm.Mapped[str] = orm.mapped_column(sa.String, nullable=True) + + +t_users = User.__table__ + +engine = engine_sqlite +Base.metadata.create_all(engine) +User.smart_insert( + engine, + [ + User(id=1, name="Alice"), + User(id=2, name="Bob"), + User(id=3, name="Cathy"), + User(id=4, name="David"), + User(id=5, name="Edward"), + User(id=6, name="Frank"), + User(id=7, name="George"), + ], +) + +# pretty table from ORM class +print(sam.pt.from_everything(everything=User, engine_or_session=engine, limit=10)) + +# from Table +print(sam.pt.from_everything(t_users, engine, limit=3)) + +# from ORM styled select statement +print( + sam.pt.from_everything( + sa.select(User.name).where(User.id >= 4).limit(2), + engine, + ) +) + +# from SQL expression styled select statement +print( + sam.pt.from_everything( + sa.select(t_users.c.name).where(User.id >= 4), + engine, + ) +) + +# from Raw SQL text +print( + sam.pt.from_everything( + "SELECT id FROM users WHERE name = 'Edward'", + engine, + ) +) + +# from list of dict +print( + sam.pt.from_everything( + [ + {"id": 1, "name": "Alice"}, + {"id": 2, "name": "Bob"}, + {"id": 3, "name": "Cathy"}, + ] + ) +) diff --git a/docs/source/03-Other-Helpers/index.rst b/docs/source/03-Other-Helpers/index.rst deleted file mode 100644 index 4b0f1ac..0000000 --- a/docs/source/03-Other-Helpers/index.rst +++ /dev/null @@ -1,161 +0,0 @@ -.. _other}-helpers: - -Other Helpers -============================================================================== - - -User Friendly Engine Creator ------------------------------------------------------------------------------- -`This sqlalchemy Official Document `_ tells you the correct connection string to use for different DB driver. Who wants to Google the API document everytime? - -``sqlalchemy_mate.EngineCreator`` leveraged the IDE / Code Editor that provide a user friendly interface to pass in DB connection specs and choose the underlying python driver. - -.. code-block:: python - - import sqlalchemy_mate.api as sam - - # An Postgres DB example - # First, you use EngineCreator class to create the db connection specs - # Second, you choose to use which python driver, IDE will tell you - # all options you have - engine_psql = sam.EngineCreator( - username="postgres", - password="password", - database="postgres", - host="localhost", - port=43347, - ).create_postgresql_psycopg2() - - # You can use test_connection method to perform test connection and - # raise error if timeout. - sam.test_connection(engine_sqlite, timeout=3) - - # A sqlite example - engine_sqlite = sam.EngineCreator().create_sqlite(path="/tmp/db.sqlite") - sam.test_connection(engine_sqlite, timeout=1) - -For more information, see :mod:`~sqlalchemy_mate.engine_creator` - - -Ascii Table Printer ------------------------------------------------------------------------------- - -Lots of CLI DB client can print result in pretty Ascii Table. ``sqlalchemy_mate`` can do that too. - -First let's insert some sample data: - -.. code-block:: python - - import sqlalchemy as sa - from sqlalchemy.orm import declarative_base - import sqlalchemy_mate.api as sam - - Base = declarative_base() - - - class User(Base, sam.ExtendedBase): - __tablename__ = "users" - - id = sa.Column(sa.Integer, primary_key=True) - name = sa.Column(sa.String, nullable=True) - - - t_users = User.__table__ - - engine = sam.EngineCreator().create_sqlite() - Base.metadata.create_all(engine) - User.smart_insert( - engine, - [ - User(id=1, name="Alice"), - User(id=2, name="Bob"), - User(id=3, name="Cathy"), - User(id=4, name="David"), - User(id=5, name="Edward"), - User(id=6, name="Frank"), - User(id=7, name="George"), - ] - ) - -**Now let's create some query and print Ascii table**: - -.. code-block:: python - - import sqlalchemy_mate.api as sam - - # from ORM class - print(sam.pt.from_everything(User, engine)) - +----+--------+ - | id | name | - +----+--------+ - | 1 | Alice | - | 2 | Bob | - | 3 | Cathy | - | 4 | David | - | 5 | Edward | - | 6 | Frank | - | 7 | George | - +----+--------+ - - # from Table - print(sam.pt.from_everything(t_users, engine, limit=3)) - +----+-------+ - | id | name | - +----+-------+ - | 1 | Alice | - | 2 | Bob | - | 3 | Cathy | - +----+-------+ - - # from ORM styled select statement - print(sam.pt.from_everything( - sa.select(User.name).where(User.id >= 4).limit(2), - engine, - )) - +--------+ - | name | - +--------+ - | David | - | Edward | - +--------+ - - # from SQL expression styled select statement - print(sam.pt.from_everything( - sa.select(t_users.c.name).where(User.id >= 4), - engine - )) - +--------+ - | name | - +--------+ - | David | - | Edward | - | Frank | - | George | - +--------+ - - # from Raw SQL text - print(sam.pt.from_everything( - "SELECT id FROM users WHERE name = 'Edward'", - engine - )) - +----+ - | id | - +----+ - | 5 | - +----+ - - # from list of dict - print(sam.pt.from_everything([ - {"id": 1, "name": "Alice"}, - {"id": 2, "name": "Bob"}, - {"id": 3, "name": "Cathy"}, - ])) - +----+-------+ - | id | name | - +----+-------+ - | 1 | Alice | - | 2 | Bob | - | 3 | Cathy | - +----+-------+ - -For more information, see :mod:`~sqlalchemy_mate.pt` diff --git a/docs/source/04-Custom-Types/index.ipynb b/docs/source/04-Custom-Types/index.ipynb new file mode 100644 index 0000000..ffca66e --- /dev/null +++ b/docs/source/04-Custom-Types/index.ipynb @@ -0,0 +1,305 @@ +{ + "cells": [ + { + "cell_type": "raw", + "id": "09ac12cb-e309-4d40-9c9f-638935491084", + "metadata": { + "editable": true, + "raw_mimetype": "text/restructuredtext", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + ".. _custom-types:\n", + "\n", + "Custom Types\n", + "================================================================================" + ] + }, + { + "cell_type": "markdown", + "id": "5644d631-fbe3-4e1e-a378-f0d7b185b661", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## Compressed String\n", + "\n", + "A unicode string, but compressed. See [example](https://github.com/MacHu-GWU/sqlalchemy_mate-project/blob/master/tests/types/test_types_compressed.py).\n", + "\n", + "\n", + "## Compressed Binary\n", + "\n", + "A big binary blob, but compressed. See [example](https://github.com/MacHu-GWU/sqlalchemy_mate-project/blob/master/tests/types/test_types_compressed.py).\n", + "\n", + "\n", + "## Compressed JSON\n", + "\n", + "A json serializable object, but compressed. See [example](https://github.com/MacHu-GWU/sqlalchemy_mate-project/blob/master/tests/types/test_types_compressed_json.py)." + ] + }, + { + "cell_type": "markdown", + "id": "cac6d152-013d-454e-a491-8cc0b148651a", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## JSON Serializable\n", + "\n", + "Any JSON serializable object, if implemented ``to_json(self):`` and ``from_json(cls, json_str):`` method." + ] + }, + { + "cell_type": "markdown", + "id": "6a483934-170d-4cea-bade-597345839130", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "declare your JSON serializable object, which will be the value of a column in Database." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "3a0dda0e-b742-42ae-9123-637af22521e6", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "import jsonpickle\n", + "\n", + "\n", + "# a custom python class\n", + "class ComputerDetails:\n", + " def __init__(self, os: str, cpu: int, memory: int, disk: int):\n", + " self.os = os\n", + " self.cpu = cpu\n", + " self.memory = memory\n", + " self.disk = disk\n", + "\n", + " def to_json(self) -> str:\n", + " return jsonpickle.encode(self)\n", + "\n", + " @classmethod\n", + " def from_json(cls, json_str: str) -> \"Computer\":\n", + " return jsonpickle.decode(json_str)" + ] + }, + { + "cell_type": "markdown", + "id": "02a82656-fc64-419a-bb11-cf94ee09ba8f", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "then declare your ORM model" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "dbf99a11-fb22-4ad0-871c-30e1d983b08b", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "import sqlalchemy as sa\n", + "import sqlalchemy.orm as orm\n", + "import sqlalchemy_mate.api as sam\n", + "\n", + "Base = orm.declarative_base()\n", + "\n", + "\n", + "class Computer(Base):\n", + " __tablename__ = \"computer\"\n", + "\n", + " id: orm.Mapped[int] = orm.mapped_column(sa.Integer, primary_key=True)\n", + " # make sure you set the type hint and factory_class right\n", + " details: orm.Mapped[ComputerDetails] = orm.mapped_column(\n", + " sam.types.JSONSerializableType(factory_class=ComputerDetails),\n", + " nullable=True,\n", + " )" + ] + }, + { + "cell_type": "markdown", + "id": "7bd45246-36eb-496f-8e1a-3a68be0e4ea9", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "Use ORM to insert a row and get the ``Computer`` object." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "167772b9-9f45-4f1d-aa1b-b8d59c08345a", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "computer.details.os = 'Linux'\n", + "computer.details.cpu = 4\n", + "computer.details.memory = 8\n", + "computer.details.disk = 256\n" + ] + } + ], + "source": [ + "engine = sam.engine_creator.EngineCreator().create_sqlite(\n", + " \"/tmp/sqlalchemy_mate_json_serializable.sqlite\"\n", + ")\n", + "Base.metadata.create_all(engine)\n", + "sam.deleting.delete_all(engine, Computer.__table__)\n", + "\n", + "\n", + "with orm.Session(engine) as ses:\n", + " computer = Computer(\n", + " id=1,\n", + " details=ComputerDetails(\n", + " os=\"Linux\",\n", + " cpu=4,\n", + " memory=8,\n", + " disk=256,\n", + " ),\n", + " )\n", + "\n", + " ses.add(computer)\n", + " ses.commit()\n", + "\n", + " computer = ses.get(Computer, 1)\n", + " print(f\"{computer.details.os = }\")\n", + " print(f\"{computer.details.cpu = }\")\n", + " print(f\"{computer.details.memory = }\")\n", + " print(f\"{computer.details.disk = }\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "2b724e9c-ca79-4869-974b-64ef7376ec6a", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "If you query the raw table definition (The ComputerDetail column is a str (JSON encoded)), you will see the encoded JSON." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "b0d6daa2-4469-4cc5-b644-6449603de30f", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(1, '{\"py/object\": \"__main__.ComputerDetails\", \"os\": \"Linux\", \"cpu\": 4, \"memory\": 8, \"disk\": 256}')\n" + ] + } + ], + "source": [ + "t_computer = sa.Table(\n", + " \"computer\",\n", + " sa.MetaData(),\n", + " sa.Column(\"id\", sa.Integer, primary_key=True),\n", + " sa.Column(\"details\", sa.String),\n", + ")\n", + "\n", + "\n", + "with engine.connect() as conn:\n", + " stmt = sa.select(t_computer)\n", + " for row in conn.execute(stmt).all():\n", + " print(row)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "98b8bb90-c525-42ac-abcd-749ed549bf91", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/source/04-Custom-Types/index.py b/docs/source/04-Custom-Types/index.py new file mode 100644 index 0000000..9fcb0c4 --- /dev/null +++ b/docs/source/04-Custom-Types/index.py @@ -0,0 +1,78 @@ +# -*- coding: utf-8 -*- + +import jsonpickle + + +# a custom python class +class ComputerDetails: + def __init__(self, os: str, cpu: int, memory: int, disk: int): + self.os = os + self.cpu = cpu + self.memory = memory + self.disk = disk + + def to_json(self) -> str: + return jsonpickle.encode(self) + + @classmethod + def from_json(cls, json_str: str) -> "Computer": + return jsonpickle.decode(json_str) + + +import sqlalchemy as sa +import sqlalchemy.orm as orm +import sqlalchemy_mate.api as sam + +Base = orm.declarative_base() + + +class Computer(Base): + __tablename__ = "computer" + + id: orm.Mapped[int] = orm.mapped_column(sa.Integer, primary_key=True) + details: orm.Mapped[ComputerDetails] = orm.mapped_column( + sam.types.JSONSerializableType(factory_class=ComputerDetails), + nullable=True, + ) + + +engine = sam.engine_creator.EngineCreator().create_sqlite( + "/tmp/sqlalchemy_mate_json_serializable.sqlite" +) +Base.metadata.create_all(engine) +sam.deleting.delete_all(engine, Computer.__table__) + + +with orm.Session(engine) as ses: + computer = Computer( + id=1, + details=ComputerDetails( + os="Linux", + cpu=4, + memory=8, + disk=256, + ), + ) + + ses.add(computer) + ses.commit() + + computer = ses.get(Computer, 1) + print(f"{computer.details.os = }") + print(f"{computer.details.cpu = }") + print(f"{computer.details.memory = }") + print(f"{computer.details.disk = }") + + +t_computer = sa.Table( + "computer", + sa.MetaData(), + sa.Column("id", sa.Integer, primary_key=True), + sa.Column("details", sa.String), +) + + +with engine.connect() as conn: + stmt = sa.select(t_computer) + for row in conn.execute(stmt).all(): + print(row) diff --git a/docs/source/04-Custom-Types/index.rst b/docs/source/04-Custom-Types/index.rst deleted file mode 100644 index 35824f1..0000000 --- a/docs/source/04-Custom-Types/index.rst +++ /dev/null @@ -1,61 +0,0 @@ -.. _custom-types: - -Custom Types -============================================================================== - - -Compressed String ------------------------------------------------------------------------------- -A unicode string, but compressed. - - -Compressed Binary ------------------------------------------------------------------------------- -A big binary blob, but compressed. - - -Compressed JSON ------------------------------------------------------------------------------- -A json serializable object, but compressed. - - -JSON Serializable ------------------------------------------------------------------------------- -Any JSON serializable object, if implemented ``to_json(self):`` and ``from_json(cls, json_str):`` method. - -.. code-block:: python - - import sqlalchemy.orm as orm - import jsonpickle - - # a custom python class - class ComputerDetails: - def __init__(self, ...): - ... - - def to_json(self) -> str: - return jsonpickle.encode(self) - - @classmethod - def from_json(cls, json_str: str) -> 'Computer': - return cls(**jsonpickle.decode(json_str)) - - Base = orm.declarative_base() - - class Computer(Base): - id = Column(Integer, primary_key) - details = Column(JSONSerializableType(factory_class=Computer) - - ... - - computer = Computer( - id=1, - details=ComputerDetails(...), - ) - - with Session(engine) as session: - session.add(computer) - session.commit() - - computer = session.get(Computer, 1) - print(computer.details) diff --git a/docs/source/05-Patterns/Large-Binary-Column-AWS-S3-Backend/index.ipynb b/docs/source/05-Patterns/Large-Binary-Column-AWS-S3-Backend/index.ipynb new file mode 100644 index 0000000..c581962 --- /dev/null +++ b/docs/source/05-Patterns/Large-Binary-Column-AWS-S3-Backend/index.ipynb @@ -0,0 +1,1999 @@ +{ + "cells": [ + { + "cell_type": "raw", + "id": "f7e5eedb-a37b-4184-8019-65a076a832f5", + "metadata": { + "editable": true, + "raw_mimetype": "text/restructuredtext", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + ".. _large-binary-column-aws-s3-backend:\n", + "\n", + "Large Binary Column AWS S3 Backend\n", + "================================================================================" + ] + }, + { + "cell_type": "markdown", + "id": "565e4361d87af00f", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "source": [ + "Storing large binary data directly in a relational database can lead to performance and scalability issues. As the size of the binary data grows, it consumes valuable database disk space and increases the I/O overhead, potentially impacting query performance and overall database efficiency. To address this challenge, the recommended best practice is to employ a pattern that leverages external storage backends for storing large binary data while maintaining a reference to the data's location within the database.\n", + "By storing only a unique resource identifier (URI) as a column in the relational database, the actual binary data is offloaded to a dedicated storage layer. This approach allows for better utilization of database resources, as the database focuses on storing structured data and efficient querying. The external storage backend, such as a file system or cloud storage like Amazon S3, is optimized for handling large binary objects, providing scalability and cost-effectiveness.\n" + ] + }, + { + "cell_type": "markdown", + "id": "d147e1e66cc5fc72", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "source": [ + "## Example\n", + "\n", + "In this comprehensive example, we aim to demonstrate the complete lifecycle management of large binary data using the pattern that leverages an external storage system like **Amazon S3** in conjunction with a relational database. The example covers various scenarios to showcase the proper handling of data consistency and integrity.\n", + "\n", + "1. In the first scenario, we attempt to **create a new row** in the database with a column containing a reference to the large binary data stored in S3. However, if the SQL ``INSERT`` operation fails unexpectedly, it is crucial to maintain data consistency by removing the orphaned S3 object (optionally, your choice). This ensures that there are no dangling references or unused data in the external storage.\n", + "2. The second scenario illustrates a successful **creation of a row** with a large binary data column. Here, we can observe how the binary data is efficiently stored in S3 and the corresponding reference is inserted into the database column.\n", + "3. In the third scenario, we try to **update the value of a large binary column in an existing row**. If the SQL ``UPDATE`` operation fails, it is essential to maintain the integrity of the data. We can see that the old S3 object remains unchanged, and the new S3 object, if created, is removed (optionally, your choice) to keep the system in a consistent state.\n", + "4. The fourth scenario demonstrates a successful **update of a large binary column value**. In this case, we can observe how the old S3 object is deleted (optionally, your choice) to free up storage space, and the new S3 object is created to reflect the updated binary data. This ensures that the database and S3 remain in sync.\n", + "5. Finally, the fifth scenario showcases the **deletion of a row** containing a large binary column. When a row is deleted from the database, it is important to clean up (optionally, your choice) the associated S3 object as well. By removing the corresponding S3 object, we maintain data consistency and prevent any orphaned binary data from lingering in the external storage." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "6cf77294c8e5cef6", + "metadata": { + "ExecuteTime": { + "end_time": "2024-05-27T03:59:11.815530Z", + "start_time": "2024-05-27T03:59:11.811233Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [], + "source": [ + "from datetime import datetime\n", + "\n", + "from s3pathlib import S3Path, context\n", + "from boto_session_manager import BotoSesManager\n", + "\n", + "import sqlalchemy as sa\n", + "import sqlalchemy.orm as orm\n", + "import sqlalchemy_mate.api as sam\n", + "\n", + "from rich import print as rprint\n", + "from rich import box\n", + "from rich.console import Console\n", + "from rich.panel import Panel" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "49d8868e132f812", + "metadata": { + "ExecuteTime": { + "end_time": "2024-05-27T03:59:11.986118Z", + "start_time": "2024-05-27T03:59:11.978669Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [], + "source": [ + "aws_s3 = sam.patterns.large_binary_column.aws_s3\n", + "console = Console()\n", + "\n", + "\n", + "def get_utc_now() -> datetime:\n", + " return datetime.utcnow()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "74c494d3cb9461df", + "metadata": { + "ExecuteTime": { + "end_time": "2024-05-27T03:59:12.104814Z", + "start_time": "2024-05-27T03:59:12.097902Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [], + "source": [ + "Base = orm.declarative_base()\n", + "\n", + "\n", + "class Task(Base):\n", + " __tablename__ = \"tasks\"\n", + "\n", + " url = orm.mapped_column(sa.String, primary_key=True)\n", + " update_at = orm.mapped_column(sa.DateTime)\n", + " html = orm.mapped_column(sa.String, nullable=True)\n", + " image = orm.mapped_column(sa.String, nullable=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "50ad3f2c0d06eba", + "metadata": { + "ExecuteTime": { + "end_time": "2024-05-27T03:59:12.251597Z", + "start_time": "2024-05-27T03:59:12.245455Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [], + "source": [ + "engine = sa.create_engine(\"sqlite:///:memory:\")\n", + "Base.metadata.create_all(engine)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "8bb68ffe3c194396", + "metadata": { + "ExecuteTime": { + "end_time": "2024-05-27T03:59:12.604228Z", + "start_time": "2024-05-27T03:59:12.417362Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [], + "source": [ + "bsm = BotoSesManager()\n", + "context.attach_boto_session(bsm.boto_ses)\n", + "bucket = f\"{bsm.aws_account_alias}-{bsm.aws_region}-data\"\n", + "s3dir_root = S3Path(\n", + " f\"s3://{bucket}/projects/sqlalchemy_mate/patterns/s3backed_column/data/\"\n", + ").to_dir()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "e2bd8aadc4608fe5", + "metadata": { + "ExecuteTime": { + "end_time": "2024-05-27T03:59:12.793218Z", + "start_time": "2024-05-27T03:59:12.605161Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "S3Path('s3://bmt-app-dev-us-east-1-data/projects/sqlalchemy_mate/patterns/s3backed_column/data/')" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# clean up everything in database and s3 to ensure a fresh start\n", + "with engine.connect() as conn:\n", + " conn.execute(Task.__table__.delete())\n", + " conn.commit()\n", + "s3dir_root.delete()" + ] + }, + { + "cell_type": "markdown", + "id": "d1db1addbd837b0d", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "source": [ + "## Create a Row but SQL INSERT failed" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "1340de490c306421", + "metadata": { + "ExecuteTime": { + "end_time": "2024-05-27T03:59:12.861527Z", + "start_time": "2024-05-27T03:59:12.855009Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
╔═════════════════════════════════════════════════════════════════════════════════════════════════════════════════╗\n",
+       " Create a Row but SQL INSERT failed                                                                              \n",
+       "╚═════════════════════════════════════════════════════════════════════════════════════════════════════════════════╝\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;32m╔═════════════════════════════════════════════════════════════════════════════════════════════════════════════════╗\u001b[0m\n", + "\u001b[1;32m║\u001b[0m Create a Row but SQL INSERT failed \u001b[1;32m║\u001b[0m\n", + "\u001b[1;32m╚═════════════════════════════════════════════════════════════════════════════════════════════════════════════════╝\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "rprint(\n", + " Panel(\n", + " \"Create a Row but SQL INSERT failed\", box=box.DOUBLE, border_style=\"bold green\"\n", + " )\n", + ")\n", + "\n", + "url = \"https://www.example.com\"\n", + "html_content_1 = b\"this is html 1\"\n", + "image_content_1 = b\"this is image 1\"\n", + "html_additional_kwargs = dict(ContentType=\"text/html\")\n", + "image_additional_kwargs = dict(ContentType=\"image/jpeg\")\n", + "utc_now = get_utc_now()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "6a759875fe426e7d", + "metadata": { + "ExecuteTime": { + "end_time": "2024-05-27T03:59:13.190204Z", + "start_time": "2024-05-27T03:59:12.987739Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
╭─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮\n",
+       "│ Write S3 first, then write DB                                                                                   │\n",
+       "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
+       "
\n" + ], + "text/plain": [ + "╭─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮\n", + "│ Write S3 first, then write DB │\n", + "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
PutS3Result(\n",
+       "    s3_client=<botocore.client.S3 object at 0x108bfb340>,\n",
+       "    put_s3backed_column_results=[\n",
+       "        PutS3BackedColumnResult(\n",
+       "            column='html',\n",
+       "            old_s3_uri=None,\n",
+       "            new_s3_uri='s3://bmt-app-dev-us-east-1-data/projects/sqlalchemy_mate/patterns/s3backed_column/data/aHR0\n",
+       "cHM6Ly93d3cuZXhhbXBsZS5jb20=/col=html/md5=3fcad0c630c89f84a266b0d828a97c33',\n",
+       "            executed=True,\n",
+       "            cleanup_function=<bound method ClientCreator._create_api_method.<locals>._api_call of \n",
+       "<botocore.client.S3 object at 0x108bfb340>>,\n",
+       "            cleanup_old_kwargs=None,\n",
+       "            cleanup_new_kwargs={\n",
+       "                'Bucket': 'bmt-app-dev-us-east-1-data',\n",
+       "                'Key': \n",
+       "'projects/sqlalchemy_mate/patterns/s3backed_column/data/aHR0cHM6Ly93d3cuZXhhbXBsZS5jb20=/col=html/md5=3fcad0c630c89\n",
+       "f84a266b0d828a97c33'\n",
+       "            }\n",
+       "        ),\n",
+       "        PutS3BackedColumnResult(\n",
+       "            column='image',\n",
+       "            old_s3_uri=None,\n",
+       "            new_s3_uri='s3://bmt-app-dev-us-east-1-data/projects/sqlalchemy_mate/patterns/s3backed_column/data/aHR0\n",
+       "cHM6Ly93d3cuZXhhbXBsZS5jb20=/col=image/md5=9a48fd95141e47117d14e1524a83ae0e',\n",
+       "            executed=True,\n",
+       "            cleanup_function=<bound method ClientCreator._create_api_method.<locals>._api_call of \n",
+       "<botocore.client.S3 object at 0x108bfb340>>,\n",
+       "            cleanup_old_kwargs=None,\n",
+       "            cleanup_new_kwargs={\n",
+       "                'Bucket': 'bmt-app-dev-us-east-1-data',\n",
+       "                'Key': \n",
+       "'projects/sqlalchemy_mate/patterns/s3backed_column/data/aHR0cHM6Ly93d3cuZXhhbXBsZS5jb20=/col=image/md5=9a48fd95141e\n",
+       "47117d14e1524a83ae0e'\n",
+       "            }\n",
+       "        )\n",
+       "    ]\n",
+       ")\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;35mPutS3Result\u001b[0m\u001b[1m(\u001b[0m\n", + " \u001b[33ms3_client\u001b[0m=\u001b[1m<\u001b[0m\u001b[1;95mbotocore.client.S3\u001b[0m\u001b[39m object at \u001b[0m\u001b[1;36m0x108bfb340\u001b[0m\u001b[39m>,\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[33mput_s3backed_column_results\u001b[0m\u001b[39m=\u001b[0m\u001b[1;39m[\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[1;35mPutS3BackedColumnResult\u001b[0m\u001b[1;39m(\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[33mcolumn\u001b[0m\u001b[39m=\u001b[0m\u001b[32m'html'\u001b[0m\u001b[39m,\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[33mold_s3_uri\u001b[0m\u001b[39m=\u001b[0m\u001b[3;35mNone\u001b[0m\u001b[39m,\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[33mnew_s3_uri\u001b[0m\u001b[39m=\u001b[0m\u001b[32m's3://bmt-app-dev-us-east-1-data/projects/sqlalchemy_mate/patterns/s3backed_column/data/\u001b[0m\u001b[32maHR0\u001b[0m\n", + "\u001b[32mcHM6Ly93d3cuZXhhbXBsZS5jb20\u001b[0m\u001b[32m=/\u001b[0m\u001b[32mcol\u001b[0m\u001b[32m=\u001b[0m\u001b[32mhtml\u001b[0m\u001b[32m/\u001b[0m\u001b[32mmd5\u001b[0m\u001b[32m=\u001b[0m\u001b[32m3fcad0c630c89f84a266b0d828a97c33\u001b[0m\u001b[32m'\u001b[0m\u001b[39m,\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[33mexecuted\u001b[0m\u001b[39m=\u001b[0m\u001b[3;92mTrue\u001b[0m\u001b[39m,\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[33mcleanup_function\u001b[0m\u001b[39m=._api_call of \u001b[0m\n", + "\u001b[39m>,\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[33mcleanup_old_kwargs\u001b[0m\u001b[39m=\u001b[0m\u001b[3;35mNone\u001b[0m\u001b[39m,\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[33mcleanup_new_kwargs\u001b[0m\u001b[39m=\u001b[0m\u001b[1;39m{\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[32m'Bucket'\u001b[0m\u001b[39m: \u001b[0m\u001b[32m'bmt-app-dev-us-east-1-data'\u001b[0m\u001b[39m,\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[32m'Key'\u001b[0m\u001b[39m: \u001b[0m\n", + "\u001b[32m'projects/sqlalchemy_mate/patterns/s3backed_column/data/\u001b[0m\u001b[32maHR0cHM6Ly93d3cuZXhhbXBsZS5jb20\u001b[0m\u001b[32m=/\u001b[0m\u001b[32mcol\u001b[0m\u001b[32m=\u001b[0m\u001b[32mhtml\u001b[0m\u001b[32m/\u001b[0m\u001b[32mmd5\u001b[0m\u001b[32m=\u001b[0m\u001b[32m3fcad0c630c89\u001b[0m\n", + "\u001b[32mf84a266b0d828a97c33\u001b[0m\u001b[32m'\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[1;39m}\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m,\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[1;35mPutS3BackedColumnResult\u001b[0m\u001b[1;39m(\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[33mcolumn\u001b[0m\u001b[39m=\u001b[0m\u001b[32m'image'\u001b[0m\u001b[39m,\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[33mold_s3_uri\u001b[0m\u001b[39m=\u001b[0m\u001b[3;35mNone\u001b[0m\u001b[39m,\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[33mnew_s3_uri\u001b[0m\u001b[39m=\u001b[0m\u001b[32m's3://bmt-app-dev-us-east-1-data/projects/sqlalchemy_mate/patterns/s3backed_column/data/\u001b[0m\u001b[32maHR0\u001b[0m\n", + "\u001b[32mcHM6Ly93d3cuZXhhbXBsZS5jb20\u001b[0m\u001b[32m=/\u001b[0m\u001b[32mcol\u001b[0m\u001b[32m=\u001b[0m\u001b[32mimage\u001b[0m\u001b[32m/\u001b[0m\u001b[32mmd5\u001b[0m\u001b[32m=\u001b[0m\u001b[32m9a48fd95141e47117d14e1524a83ae0e\u001b[0m\u001b[32m'\u001b[0m\u001b[39m,\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[33mexecuted\u001b[0m\u001b[39m=\u001b[0m\u001b[3;92mTrue\u001b[0m\u001b[39m,\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[33mcleanup_function\u001b[0m\u001b[39m=._api_call of \u001b[0m\n", + "\u001b[39m\u001b[0m\u001b[1m>\u001b[0m,\n", + " \u001b[33mcleanup_old_kwargs\u001b[0m=\u001b[3;35mNone\u001b[0m,\n", + " \u001b[33mcleanup_new_kwargs\u001b[0m=\u001b[1m{\u001b[0m\n", + " \u001b[32m'Bucket'\u001b[0m: \u001b[32m'bmt-app-dev-us-east-1-data'\u001b[0m,\n", + " \u001b[32m'Key'\u001b[0m: \n", + "\u001b[32m'projects/sqlalchemy_mate/patterns/s3backed_column/data/\u001b[0m\u001b[32maHR0cHM6Ly93d3cuZXhhbXBsZS5jb20\u001b[0m\u001b[32m=/\u001b[0m\u001b[32mcol\u001b[0m\u001b[32m=\u001b[0m\u001b[32mimage\u001b[0m\u001b[32m/\u001b[0m\u001b[32mmd5\u001b[0m\u001b[32m=\u001b[0m\u001b[32m9a48fd95141e\u001b[0m\n", + "\u001b[32m47117d14e1524a83ae0e\u001b[0m\u001b[32m'\u001b[0m\n", + " \u001b[1m}\u001b[0m\n", + " \u001b[1m)\u001b[0m\n", + " \u001b[1m]\u001b[0m\n", + "\u001b[1m)\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "rprint(Panel(\"Write S3 first, then write DB\"))\n", + "put_s3_result = aws_s3.put_s3(\n", + " api_calls=[\n", + " aws_s3.PutS3ApiCall(\n", + " column=\"html\",\n", + " binary=html_content_1,\n", + " old_s3_uri=None,\n", + " extra_put_object_kwargs=html_additional_kwargs,\n", + " ),\n", + " aws_s3.PutS3ApiCall(\n", + " column=\"image\",\n", + " binary=image_content_1,\n", + " old_s3_uri=None,\n", + " extra_put_object_kwargs=image_additional_kwargs,\n", + " ),\n", + " ],\n", + " s3_client=bsm.s3_client,\n", + " pk=url,\n", + " bucket=s3dir_root.bucket,\n", + " prefix=s3dir_root.key,\n", + " update_at=utc_now,\n", + " is_pk_url_safe=False,\n", + ")\n", + "rprint(put_s3_result)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "ec9b35fb16ffc66b", + "metadata": { + "ExecuteTime": { + "end_time": "2024-05-27T03:59:13.203243Z", + "start_time": "2024-05-27T03:59:13.200274Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
SQL INSERT Failed! Error: UserError()\n",
+       "
\n" + ], + "text/plain": [ + "SQL INSERT Failed! Error: \u001b[1;35mUserError\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "class UserError(Exception):\n", + " pass\n", + "\n", + "\n", + "with orm.Session(engine) as ses:\n", + " try:\n", + " with ses.begin():\n", + " task1 = Task(\n", + " url=url,\n", + " update_at=utc_now,\n", + " # this is a helper method that convert the put s3 results\n", + " # to INSERT / UPDATE values\n", + " **put_s3_result.to_values(),\n", + " )\n", + " # intentionally raises an error to simulate a database failure\n", + " raise UserError()\n", + " ses.add(task1)\n", + " rprint(\"SQL INSERT Succeeded!\")\n", + " except Exception as e:\n", + " rprint(f\"SQL INSERT Failed! Error: {e!r}\")\n", + " # clean up created s3 object when create row failed\n", + " # if you don't want to do that, just don't run this method\n", + " put_s3_result.clean_up_created_s3_object_when_create_or_update_row_failed()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "6946bbf35cd87839", + "metadata": { + "ExecuteTime": { + "end_time": "2024-05-27T03:59:13.498782Z", + "start_time": "2024-05-27T03:59:13.378302Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
╭─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮\n",
+       "│ Database row should not exists                                                                                  │\n",
+       "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
+       "
\n" + ], + "text/plain": [ + "╭─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮\n", + "│ Database row should not exists │\n", + "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
ses.get(Task, url) = None\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;35mses.get\u001b[0m\u001b[1m(\u001b[0mTask, url\u001b[1m)\u001b[0m = \u001b[3;35mNone\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
╭─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮\n",
+       "│ S3 object should be deleted                                                                                     │\n",
+       "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
+       "
\n" + ], + "text/plain": [ + "╭─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮\n", + "│ S3 object should be deleted │\n", + "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
S3Path(html_s3_uri).exists() = False\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;35mS3Path\u001b[0m\u001b[1m(\u001b[0mhtml_s3_uri\u001b[1m)\u001b[0m\u001b[1;35m.exists\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m = \u001b[3;91mFalse\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
S3Path(image_s3_uri).exists() = False\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;35mS3Path\u001b[0m\u001b[1m(\u001b[0mimage_s3_uri\u001b[1m)\u001b[0m\u001b[1;35m.exists\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m = \u001b[3;91mFalse\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "rprint(Panel(\"Database row should not exists\"))\n", + "rprint(f\"{ses.get(Task, url) = }\")\n", + "assert ses.get(Task, url) is None\n", + "rprint(Panel(\"S3 object should be deleted\"))\n", + "values = put_s3_result.to_values()\n", + "html_s3_uri = values[\"html\"]\n", + "image_s3_uri = values[\"image\"]\n", + "rprint(f\"{S3Path(html_s3_uri).exists() = }\")\n", + "rprint(f\"{S3Path(image_s3_uri).exists() = }\")\n", + "assert S3Path(html_s3_uri).exists() is False\n", + "assert S3Path(image_s3_uri).exists() is False" + ] + }, + { + "cell_type": "markdown", + "id": "5172803499ea3e3e", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "source": [ + "## Create a Row and SQL INSERT succeeded" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "527a4865728b8d96", + "metadata": { + "ExecuteTime": { + "end_time": "2024-05-27T03:59:13.637090Z", + "start_time": "2024-05-27T03:59:13.580442Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
╔═════════════════════════════════════════════════════════════════════════════════════════════════════════════════╗\n",
+       " Create a Row and SQL INSERT succeeded                                                                           \n",
+       "╚═════════════════════════════════════════════════════════════════════════════════════════════════════════════════╝\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;32m╔═════════════════════════════════════════════════════════════════════════════════════════════════════════════════╗\u001b[0m\n", + "\u001b[1;32m║\u001b[0m Create a Row and SQL INSERT succeeded \u001b[1;32m║\u001b[0m\n", + "\u001b[1;32m╚═════════════════════════════════════════════════════════════════════════════════════════════════════════════════╝\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "rprint(\n", + " Panel(\n", + " \"Create a Row and SQL INSERT succeeded\",\n", + " box=box.DOUBLE,\n", + " border_style=\"bold green\",\n", + " )\n", + ")\n", + "utc_now = get_utc_now()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "cc76125d77d8db49", + "metadata": { + "ExecuteTime": { + "end_time": "2024-05-27T03:59:13.746531Z", + "start_time": "2024-05-27T03:59:13.741525Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
╭─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮\n",
+       "│ Write S3 first, then write DB                                                                                   │\n",
+       "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
+       "
\n" + ], + "text/plain": [ + "╭─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮\n", + "│ Write S3 first, then write DB │\n", + "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
PutS3Result(\n",
+       "    s3_client=<botocore.client.S3 object at 0x108bfb340>,\n",
+       "    put_s3backed_column_results=[\n",
+       "        PutS3BackedColumnResult(\n",
+       "            column='html',\n",
+       "            old_s3_uri=None,\n",
+       "            new_s3_uri='s3://bmt-app-dev-us-east-1-data/projects/sqlalchemy_mate/patterns/s3backed_column/data/aHR0\n",
+       "cHM6Ly93d3cuZXhhbXBsZS5jb20=/col=html/md5=3fcad0c630c89f84a266b0d828a97c33',\n",
+       "            executed=True,\n",
+       "            cleanup_function=<bound method ClientCreator._create_api_method.<locals>._api_call of \n",
+       "<botocore.client.S3 object at 0x108bfb340>>,\n",
+       "            cleanup_old_kwargs=None,\n",
+       "            cleanup_new_kwargs={\n",
+       "                'Bucket': 'bmt-app-dev-us-east-1-data',\n",
+       "                'Key': \n",
+       "'projects/sqlalchemy_mate/patterns/s3backed_column/data/aHR0cHM6Ly93d3cuZXhhbXBsZS5jb20=/col=html/md5=3fcad0c630c89\n",
+       "f84a266b0d828a97c33'\n",
+       "            }\n",
+       "        ),\n",
+       "        PutS3BackedColumnResult(\n",
+       "            column='image',\n",
+       "            old_s3_uri=None,\n",
+       "            new_s3_uri='s3://bmt-app-dev-us-east-1-data/projects/sqlalchemy_mate/patterns/s3backed_column/data/aHR0\n",
+       "cHM6Ly93d3cuZXhhbXBsZS5jb20=/col=image/md5=9a48fd95141e47117d14e1524a83ae0e',\n",
+       "            executed=True,\n",
+       "            cleanup_function=<bound method ClientCreator._create_api_method.<locals>._api_call of \n",
+       "<botocore.client.S3 object at 0x108bfb340>>,\n",
+       "            cleanup_old_kwargs=None,\n",
+       "            cleanup_new_kwargs={\n",
+       "                'Bucket': 'bmt-app-dev-us-east-1-data',\n",
+       "                'Key': \n",
+       "'projects/sqlalchemy_mate/patterns/s3backed_column/data/aHR0cHM6Ly93d3cuZXhhbXBsZS5jb20=/col=image/md5=9a48fd95141e\n",
+       "47117d14e1524a83ae0e'\n",
+       "            }\n",
+       "        )\n",
+       "    ]\n",
+       ")\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;35mPutS3Result\u001b[0m\u001b[1m(\u001b[0m\n", + " \u001b[33ms3_client\u001b[0m=\u001b[1m<\u001b[0m\u001b[1;95mbotocore.client.S3\u001b[0m\u001b[39m object at \u001b[0m\u001b[1;36m0x108bfb340\u001b[0m\u001b[39m>,\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[33mput_s3backed_column_results\u001b[0m\u001b[39m=\u001b[0m\u001b[1;39m[\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[1;35mPutS3BackedColumnResult\u001b[0m\u001b[1;39m(\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[33mcolumn\u001b[0m\u001b[39m=\u001b[0m\u001b[32m'html'\u001b[0m\u001b[39m,\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[33mold_s3_uri\u001b[0m\u001b[39m=\u001b[0m\u001b[3;35mNone\u001b[0m\u001b[39m,\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[33mnew_s3_uri\u001b[0m\u001b[39m=\u001b[0m\u001b[32m's3://bmt-app-dev-us-east-1-data/projects/sqlalchemy_mate/patterns/s3backed_column/data/\u001b[0m\u001b[32maHR0\u001b[0m\n", + "\u001b[32mcHM6Ly93d3cuZXhhbXBsZS5jb20\u001b[0m\u001b[32m=/\u001b[0m\u001b[32mcol\u001b[0m\u001b[32m=\u001b[0m\u001b[32mhtml\u001b[0m\u001b[32m/\u001b[0m\u001b[32mmd5\u001b[0m\u001b[32m=\u001b[0m\u001b[32m3fcad0c630c89f84a266b0d828a97c33\u001b[0m\u001b[32m'\u001b[0m\u001b[39m,\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[33mexecuted\u001b[0m\u001b[39m=\u001b[0m\u001b[3;92mTrue\u001b[0m\u001b[39m,\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[33mcleanup_function\u001b[0m\u001b[39m=._api_call of \u001b[0m\n", + "\u001b[39m>,\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[33mcleanup_old_kwargs\u001b[0m\u001b[39m=\u001b[0m\u001b[3;35mNone\u001b[0m\u001b[39m,\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[33mcleanup_new_kwargs\u001b[0m\u001b[39m=\u001b[0m\u001b[1;39m{\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[32m'Bucket'\u001b[0m\u001b[39m: \u001b[0m\u001b[32m'bmt-app-dev-us-east-1-data'\u001b[0m\u001b[39m,\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[32m'Key'\u001b[0m\u001b[39m: \u001b[0m\n", + "\u001b[32m'projects/sqlalchemy_mate/patterns/s3backed_column/data/\u001b[0m\u001b[32maHR0cHM6Ly93d3cuZXhhbXBsZS5jb20\u001b[0m\u001b[32m=/\u001b[0m\u001b[32mcol\u001b[0m\u001b[32m=\u001b[0m\u001b[32mhtml\u001b[0m\u001b[32m/\u001b[0m\u001b[32mmd5\u001b[0m\u001b[32m=\u001b[0m\u001b[32m3fcad0c630c89\u001b[0m\n", + "\u001b[32mf84a266b0d828a97c33\u001b[0m\u001b[32m'\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[1;39m}\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m,\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[1;35mPutS3BackedColumnResult\u001b[0m\u001b[1;39m(\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[33mcolumn\u001b[0m\u001b[39m=\u001b[0m\u001b[32m'image'\u001b[0m\u001b[39m,\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[33mold_s3_uri\u001b[0m\u001b[39m=\u001b[0m\u001b[3;35mNone\u001b[0m\u001b[39m,\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[33mnew_s3_uri\u001b[0m\u001b[39m=\u001b[0m\u001b[32m's3://bmt-app-dev-us-east-1-data/projects/sqlalchemy_mate/patterns/s3backed_column/data/\u001b[0m\u001b[32maHR0\u001b[0m\n", + "\u001b[32mcHM6Ly93d3cuZXhhbXBsZS5jb20\u001b[0m\u001b[32m=/\u001b[0m\u001b[32mcol\u001b[0m\u001b[32m=\u001b[0m\u001b[32mimage\u001b[0m\u001b[32m/\u001b[0m\u001b[32mmd5\u001b[0m\u001b[32m=\u001b[0m\u001b[32m9a48fd95141e47117d14e1524a83ae0e\u001b[0m\u001b[32m'\u001b[0m\u001b[39m,\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[33mexecuted\u001b[0m\u001b[39m=\u001b[0m\u001b[3;92mTrue\u001b[0m\u001b[39m,\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[33mcleanup_function\u001b[0m\u001b[39m=._api_call of \u001b[0m\n", + "\u001b[39m\u001b[0m\u001b[1m>\u001b[0m,\n", + " \u001b[33mcleanup_old_kwargs\u001b[0m=\u001b[3;35mNone\u001b[0m,\n", + " \u001b[33mcleanup_new_kwargs\u001b[0m=\u001b[1m{\u001b[0m\n", + " \u001b[32m'Bucket'\u001b[0m: \u001b[32m'bmt-app-dev-us-east-1-data'\u001b[0m,\n", + " \u001b[32m'Key'\u001b[0m: \n", + "\u001b[32m'projects/sqlalchemy_mate/patterns/s3backed_column/data/\u001b[0m\u001b[32maHR0cHM6Ly93d3cuZXhhbXBsZS5jb20\u001b[0m\u001b[32m=/\u001b[0m\u001b[32mcol\u001b[0m\u001b[32m=\u001b[0m\u001b[32mimage\u001b[0m\u001b[32m/\u001b[0m\u001b[32mmd5\u001b[0m\u001b[32m=\u001b[0m\u001b[32m9a48fd95141e\u001b[0m\n", + "\u001b[32m47117d14e1524a83ae0e\u001b[0m\u001b[32m'\u001b[0m\n", + " \u001b[1m}\u001b[0m\n", + " \u001b[1m)\u001b[0m\n", + " \u001b[1m]\u001b[0m\n", + "\u001b[1m)\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "rprint(Panel(\"Write S3 first, then write DB\"))\n", + "put_s3_result = aws_s3.put_s3(\n", + " api_calls=[\n", + " aws_s3.PutS3ApiCall(\n", + " column=\"html\",\n", + " binary=html_content_1,\n", + " old_s3_uri=None,\n", + " extra_put_object_kwargs=html_additional_kwargs,\n", + " ),\n", + " aws_s3.PutS3ApiCall(\n", + " column=\"image\",\n", + " binary=image_content_1,\n", + " old_s3_uri=None,\n", + " extra_put_object_kwargs=image_additional_kwargs,\n", + " ),\n", + " ],\n", + " s3_client=bsm.s3_client,\n", + " pk=url,\n", + " bucket=s3dir_root.bucket,\n", + " prefix=s3dir_root.key,\n", + " update_at=utc_now,\n", + " is_pk_url_safe=False,\n", + ")\n", + "rprint(put_s3_result)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "13f3a56e2f63e4cc", + "metadata": { + "ExecuteTime": { + "end_time": "2024-05-27T03:59:13.953354Z", + "start_time": "2024-05-27T03:59:13.896520Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
SQL INSERT Succeeded!\n",
+       "
\n" + ], + "text/plain": [ + "SQL INSERT Succeeded!\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "with orm.Session(engine) as ses:\n", + " try:\n", + " with ses.begin():\n", + " task1 = Task(\n", + " url=url,\n", + " update_at=utc_now,\n", + " # this is a helper method that convert the put s3 results\n", + " # to INSERT / UPDATE values\n", + " **put_s3_result.to_values(),\n", + " )\n", + " ses.add(task1)\n", + " rprint(\"SQL INSERT Succeeded!\")\n", + " except Exception as e:\n", + " rprint(f\"SQL INSERT Failed, error: {e!r}\")\n", + " # clean up created s3 object when create row failed\n", + " # if you don't want to do that, just don't run this method\n", + " put_s3_result.clean_up_created_s3_object_when_create_or_update_row_failed()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "4a027b79d0825ffd", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
╭─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮\n",
+       "│ Database row should be inserted                                                                                 │\n",
+       "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
+       "
\n" + ], + "text/plain": [ + "╭─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮\n", + "│ Database row should be inserted │\n", + "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
{\n",
+       "    '_sa_instance_state': <sqlalchemy.orm.state.InstanceState object at 0x108c13dc0>,\n",
+       "    'update_at': datetime.datetime(2024, 5, 27, 5, 24, 22, 781233),\n",
+       "    'url': 'https://www.example.com',\n",
+       "    'image': \n",
+       "'s3://bmt-app-dev-us-east-1-data/projects/sqlalchemy_mate/patterns/s3backed_column/data/aHR0cHM6Ly93d3cuZXhhbXBsZS5\n",
+       "jb20=/col=image/md5=9a48fd95141e47117d14e1524a83ae0e',\n",
+       "    'html': \n",
+       "'s3://bmt-app-dev-us-east-1-data/projects/sqlalchemy_mate/patterns/s3backed_column/data/aHR0cHM6Ly93d3cuZXhhbXBsZS5\n",
+       "jb20=/col=html/md5=3fcad0c630c89f84a266b0d828a97c33'\n",
+       "}\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1m{\u001b[0m\n", + " \u001b[32m'_sa_instance_state'\u001b[0m: \u001b[1m<\u001b[0m\u001b[1;95msqlalchemy.orm.state.InstanceState\u001b[0m\u001b[39m object at \u001b[0m\u001b[1;36m0x108c13dc0\u001b[0m\u001b[1m>\u001b[0m,\n", + " \u001b[32m'update_at'\u001b[0m: \u001b[1;35mdatetime.datetime\u001b[0m\u001b[1m(\u001b[0m\u001b[1;36m2024\u001b[0m, \u001b[1;36m5\u001b[0m, \u001b[1;36m27\u001b[0m, \u001b[1;36m5\u001b[0m, \u001b[1;36m24\u001b[0m, \u001b[1;36m22\u001b[0m, \u001b[1;36m781233\u001b[0m\u001b[1m)\u001b[0m,\n", + " \u001b[32m'url'\u001b[0m: \u001b[32m'https://www.example.com'\u001b[0m,\n", + " \u001b[32m'image'\u001b[0m: \n", + "\u001b[32m's3://bmt-app-dev-us-east-1-data/projects/sqlalchemy_mate/patterns/s3backed_column/data/\u001b[0m\u001b[32maHR0cHM6Ly93d3cuZXhhbXBsZS5\u001b[0m\n", + "\u001b[32mjb20\u001b[0m\u001b[32m=/\u001b[0m\u001b[32mcol\u001b[0m\u001b[32m=\u001b[0m\u001b[32mimage\u001b[0m\u001b[32m/\u001b[0m\u001b[32mmd5\u001b[0m\u001b[32m=\u001b[0m\u001b[32m9a48fd95141e47117d14e1524a83ae0e\u001b[0m\u001b[32m'\u001b[0m,\n", + " \u001b[32m'html'\u001b[0m: \n", + "\u001b[32m's3://bmt-app-dev-us-east-1-data/projects/sqlalchemy_mate/patterns/s3backed_column/data/\u001b[0m\u001b[32maHR0cHM6Ly93d3cuZXhhbXBsZS5\u001b[0m\n", + "\u001b[32mjb20\u001b[0m\u001b[32m=/\u001b[0m\u001b[32mcol\u001b[0m\u001b[32m=\u001b[0m\u001b[32mhtml\u001b[0m\u001b[32m/\u001b[0m\u001b[32mmd5\u001b[0m\u001b[32m=\u001b[0m\u001b[32m3fcad0c630c89f84a266b0d828a97c33\u001b[0m\u001b[32m'\u001b[0m\n", + "\u001b[1m}\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
╭─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮\n",
+       "│ S3 object should be created                                                                                     │\n",
+       "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
+       "
\n" + ], + "text/plain": [ + "╭─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮\n", + "│ S3 object should be created │\n", + "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
S3Path(task1.html).read_bytes() = b'<html>this is html 1</html>'\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;35mS3Path\u001b[0m\u001b[1m(\u001b[0mtask1.html\u001b[1m)\u001b[0m\u001b[1;35m.read_bytes\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m = \u001b[32mb'\u001b[0m\u001b[32m<\u001b[0m\u001b[32mhtml\u001b[0m\u001b[32m>this is html 1\u001b[0m\u001b[32m'\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
S3Path(task1.image).read_bytes() = b'this is image 1'\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;35mS3Path\u001b[0m\u001b[1m(\u001b[0mtask1.image\u001b[1m)\u001b[0m\u001b[1;35m.read_bytes\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m = \u001b[32mb'this is image 1'\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "rprint(Panel(\"Database row should be inserted\"))\n", + "task1: Task = ses.get(Task, url)\n", + "rprint(task1.__dict__)\n", + "assert task1.url == url\n", + "assert task1.update_at == utc_now\n", + "rprint(Panel(\"S3 object should be created\"))\n", + "rprint(f\"{S3Path(task1.html).read_bytes() = }\")\n", + "rprint(f\"{S3Path(task1.image).read_bytes() = }\")\n", + "assert S3Path(task1.html).read_bytes() == html_content_1\n", + "assert S3Path(task1.image).read_bytes() == image_content_1" + ] + }, + { + "cell_type": "markdown", + "id": "74b61bbb-39a4-4df5-bc41-f8bd3db073f5", + "metadata": {}, + "source": [ + "## Update a Row but SQL UPDATE failed" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "198f810f-137f-40c8-a016-6d491494f7de", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
╔═════════════════════════════════════════════════════════════════════════════════════════════════════════════════╗\n",
+       " Update a Row but SQL UPDATE failed                                                                              \n",
+       "╚═════════════════════════════════════════════════════════════════════════════════════════════════════════════════╝\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;32m╔═════════════════════════════════════════════════════════════════════════════════════════════════════════════════╗\u001b[0m\n", + "\u001b[1;32m║\u001b[0m Update a Row but SQL UPDATE failed \u001b[1;32m║\u001b[0m\n", + "\u001b[1;32m╚═════════════════════════════════════════════════════════════════════════════════════════════════════════════════╝\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "rprint(\n", + " Panel(\n", + " \"Update a Row but SQL UPDATE failed\", box=box.DOUBLE, border_style=\"bold green\"\n", + " )\n", + ")\n", + "\n", + "html_content_2 = b\"this is html 2\"\n", + "image_content_2 = b\"this is image 2\"\n", + "utc_now = get_utc_now()" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "ea720a63-72ec-43db-8e6f-2486dce3b5a8", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
╭─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮\n",
+       "│ Write S3 first, then write DB                                                                                   │\n",
+       "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
+       "
\n" + ], + "text/plain": [ + "╭─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮\n", + "│ Write S3 first, then write DB │\n", + "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
PutS3Result(\n",
+       "    s3_client=<botocore.client.S3 object at 0x108bfb340>,\n",
+       "    put_s3backed_column_results=[\n",
+       "        PutS3BackedColumnResult(\n",
+       "            column='html',\n",
+       "            old_s3_uri='s3://bmt-app-dev-us-east-1-data/projects/sqlalchemy_mate/patterns/s3backed_column/data/aHR0\n",
+       "cHM6Ly93d3cuZXhhbXBsZS5jb20=/col=html/md5=3fcad0c630c89f84a266b0d828a97c33',\n",
+       "            new_s3_uri='s3://bmt-app-dev-us-east-1-data/projects/sqlalchemy_mate/patterns/s3backed_column/data/aHR0\n",
+       "cHM6Ly93d3cuZXhhbXBsZS5jb20=/col=html/md5=1970166ef8b56a6221f64f063daec3e3',\n",
+       "            executed=True,\n",
+       "            cleanup_function=<bound method ClientCreator._create_api_method.<locals>._api_call of \n",
+       "<botocore.client.S3 object at 0x108bfb340>>,\n",
+       "            cleanup_old_kwargs={\n",
+       "                'Bucket': 'bmt-app-dev-us-east-1-data',\n",
+       "                'Key': \n",
+       "'projects/sqlalchemy_mate/patterns/s3backed_column/data/aHR0cHM6Ly93d3cuZXhhbXBsZS5jb20=/col=html/md5=3fcad0c630c89\n",
+       "f84a266b0d828a97c33'\n",
+       "            },\n",
+       "            cleanup_new_kwargs={\n",
+       "                'Bucket': 'bmt-app-dev-us-east-1-data',\n",
+       "                'Key': \n",
+       "'projects/sqlalchemy_mate/patterns/s3backed_column/data/aHR0cHM6Ly93d3cuZXhhbXBsZS5jb20=/col=html/md5=1970166ef8b56\n",
+       "a6221f64f063daec3e3'\n",
+       "            }\n",
+       "        ),\n",
+       "        PutS3BackedColumnResult(\n",
+       "            column='image',\n",
+       "            old_s3_uri='s3://bmt-app-dev-us-east-1-data/projects/sqlalchemy_mate/patterns/s3backed_column/data/aHR0\n",
+       "cHM6Ly93d3cuZXhhbXBsZS5jb20=/col=image/md5=9a48fd95141e47117d14e1524a83ae0e',\n",
+       "            new_s3_uri='s3://bmt-app-dev-us-east-1-data/projects/sqlalchemy_mate/patterns/s3backed_column/data/aHR0\n",
+       "cHM6Ly93d3cuZXhhbXBsZS5jb20=/col=image/md5=b3459abcea88aaa928939311a8a2f9f0',\n",
+       "            executed=True,\n",
+       "            cleanup_function=<bound method ClientCreator._create_api_method.<locals>._api_call of \n",
+       "<botocore.client.S3 object at 0x108bfb340>>,\n",
+       "            cleanup_old_kwargs={\n",
+       "                'Bucket': 'bmt-app-dev-us-east-1-data',\n",
+       "                'Key': \n",
+       "'projects/sqlalchemy_mate/patterns/s3backed_column/data/aHR0cHM6Ly93d3cuZXhhbXBsZS5jb20=/col=image/md5=9a48fd95141e\n",
+       "47117d14e1524a83ae0e'\n",
+       "            },\n",
+       "            cleanup_new_kwargs={\n",
+       "                'Bucket': 'bmt-app-dev-us-east-1-data',\n",
+       "                'Key': \n",
+       "'projects/sqlalchemy_mate/patterns/s3backed_column/data/aHR0cHM6Ly93d3cuZXhhbXBsZS5jb20=/col=image/md5=b3459abcea88\n",
+       "aaa928939311a8a2f9f0'\n",
+       "            }\n",
+       "        )\n",
+       "    ]\n",
+       ")\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;35mPutS3Result\u001b[0m\u001b[1m(\u001b[0m\n", + " \u001b[33ms3_client\u001b[0m=\u001b[1m<\u001b[0m\u001b[1;95mbotocore.client.S3\u001b[0m\u001b[39m object at \u001b[0m\u001b[1;36m0x108bfb340\u001b[0m\u001b[39m>,\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[33mput_s3backed_column_results\u001b[0m\u001b[39m=\u001b[0m\u001b[1;39m[\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[1;35mPutS3BackedColumnResult\u001b[0m\u001b[1;39m(\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[33mcolumn\u001b[0m\u001b[39m=\u001b[0m\u001b[32m'html'\u001b[0m\u001b[39m,\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[33mold_s3_uri\u001b[0m\u001b[39m=\u001b[0m\u001b[32m's3://bmt-app-dev-us-east-1-data/projects/sqlalchemy_mate/patterns/s3backed_column/data/\u001b[0m\u001b[32maHR0\u001b[0m\n", + "\u001b[32mcHM6Ly93d3cuZXhhbXBsZS5jb20\u001b[0m\u001b[32m=/\u001b[0m\u001b[32mcol\u001b[0m\u001b[32m=\u001b[0m\u001b[32mhtml\u001b[0m\u001b[32m/\u001b[0m\u001b[32mmd5\u001b[0m\u001b[32m=\u001b[0m\u001b[32m3fcad0c630c89f84a266b0d828a97c33\u001b[0m\u001b[32m'\u001b[0m\u001b[39m,\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[33mnew_s3_uri\u001b[0m\u001b[39m=\u001b[0m\u001b[32m's3://bmt-app-dev-us-east-1-data/projects/sqlalchemy_mate/patterns/s3backed_column/data/\u001b[0m\u001b[32maHR0\u001b[0m\n", + "\u001b[32mcHM6Ly93d3cuZXhhbXBsZS5jb20\u001b[0m\u001b[32m=/\u001b[0m\u001b[32mcol\u001b[0m\u001b[32m=\u001b[0m\u001b[32mhtml\u001b[0m\u001b[32m/\u001b[0m\u001b[32mmd5\u001b[0m\u001b[32m=\u001b[0m\u001b[32m1970166ef8b56a6221f64f063daec3e3\u001b[0m\u001b[32m'\u001b[0m\u001b[39m,\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[33mexecuted\u001b[0m\u001b[39m=\u001b[0m\u001b[3;92mTrue\u001b[0m\u001b[39m,\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[33mcleanup_function\u001b[0m\u001b[39m=._api_call of \u001b[0m\n", + "\u001b[39m>,\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[33mcleanup_old_kwargs\u001b[0m\u001b[39m=\u001b[0m\u001b[1;39m{\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[32m'Bucket'\u001b[0m\u001b[39m: \u001b[0m\u001b[32m'bmt-app-dev-us-east-1-data'\u001b[0m\u001b[39m,\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[32m'Key'\u001b[0m\u001b[39m: \u001b[0m\n", + "\u001b[32m'projects/sqlalchemy_mate/patterns/s3backed_column/data/\u001b[0m\u001b[32maHR0cHM6Ly93d3cuZXhhbXBsZS5jb20\u001b[0m\u001b[32m=/\u001b[0m\u001b[32mcol\u001b[0m\u001b[32m=\u001b[0m\u001b[32mhtml\u001b[0m\u001b[32m/\u001b[0m\u001b[32mmd5\u001b[0m\u001b[32m=\u001b[0m\u001b[32m3fcad0c630c89\u001b[0m\n", + "\u001b[32mf84a266b0d828a97c33\u001b[0m\u001b[32m'\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[1;39m}\u001b[0m\u001b[39m,\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[33mcleanup_new_kwargs\u001b[0m\u001b[39m=\u001b[0m\u001b[1;39m{\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[32m'Bucket'\u001b[0m\u001b[39m: \u001b[0m\u001b[32m'bmt-app-dev-us-east-1-data'\u001b[0m\u001b[39m,\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[32m'Key'\u001b[0m\u001b[39m: \u001b[0m\n", + "\u001b[32m'projects/sqlalchemy_mate/patterns/s3backed_column/data/\u001b[0m\u001b[32maHR0cHM6Ly93d3cuZXhhbXBsZS5jb20\u001b[0m\u001b[32m=/\u001b[0m\u001b[32mcol\u001b[0m\u001b[32m=\u001b[0m\u001b[32mhtml\u001b[0m\u001b[32m/\u001b[0m\u001b[32mmd5\u001b[0m\u001b[32m=\u001b[0m\u001b[32m1970166ef8b56\u001b[0m\n", + "\u001b[32ma6221f64f063daec3e3\u001b[0m\u001b[32m'\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[1;39m}\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m,\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[1;35mPutS3BackedColumnResult\u001b[0m\u001b[1;39m(\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[33mcolumn\u001b[0m\u001b[39m=\u001b[0m\u001b[32m'image'\u001b[0m\u001b[39m,\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[33mold_s3_uri\u001b[0m\u001b[39m=\u001b[0m\u001b[32m's3://bmt-app-dev-us-east-1-data/projects/sqlalchemy_mate/patterns/s3backed_column/data/\u001b[0m\u001b[32maHR0\u001b[0m\n", + "\u001b[32mcHM6Ly93d3cuZXhhbXBsZS5jb20\u001b[0m\u001b[32m=/\u001b[0m\u001b[32mcol\u001b[0m\u001b[32m=\u001b[0m\u001b[32mimage\u001b[0m\u001b[32m/\u001b[0m\u001b[32mmd5\u001b[0m\u001b[32m=\u001b[0m\u001b[32m9a48fd95141e47117d14e1524a83ae0e\u001b[0m\u001b[32m'\u001b[0m\u001b[39m,\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[33mnew_s3_uri\u001b[0m\u001b[39m=\u001b[0m\u001b[32m's3://bmt-app-dev-us-east-1-data/projects/sqlalchemy_mate/patterns/s3backed_column/data/\u001b[0m\u001b[32maHR0\u001b[0m\n", + "\u001b[32mcHM6Ly93d3cuZXhhbXBsZS5jb20\u001b[0m\u001b[32m=/\u001b[0m\u001b[32mcol\u001b[0m\u001b[32m=\u001b[0m\u001b[32mimage\u001b[0m\u001b[32m/\u001b[0m\u001b[32mmd5\u001b[0m\u001b[32m=\u001b[0m\u001b[32mb3459abcea88aaa928939311a8a2f9f0\u001b[0m\u001b[32m'\u001b[0m\u001b[39m,\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[33mexecuted\u001b[0m\u001b[39m=\u001b[0m\u001b[3;92mTrue\u001b[0m\u001b[39m,\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[33mcleanup_function\u001b[0m\u001b[39m=._api_call of \u001b[0m\n", + "\u001b[39m\u001b[0m\u001b[1m>\u001b[0m,\n", + " \u001b[33mcleanup_old_kwargs\u001b[0m=\u001b[1m{\u001b[0m\n", + " \u001b[32m'Bucket'\u001b[0m: \u001b[32m'bmt-app-dev-us-east-1-data'\u001b[0m,\n", + " \u001b[32m'Key'\u001b[0m: \n", + "\u001b[32m'projects/sqlalchemy_mate/patterns/s3backed_column/data/\u001b[0m\u001b[32maHR0cHM6Ly93d3cuZXhhbXBsZS5jb20\u001b[0m\u001b[32m=/\u001b[0m\u001b[32mcol\u001b[0m\u001b[32m=\u001b[0m\u001b[32mimage\u001b[0m\u001b[32m/\u001b[0m\u001b[32mmd5\u001b[0m\u001b[32m=\u001b[0m\u001b[32m9a48fd95141e\u001b[0m\n", + "\u001b[32m47117d14e1524a83ae0e\u001b[0m\u001b[32m'\u001b[0m\n", + " \u001b[1m}\u001b[0m,\n", + " \u001b[33mcleanup_new_kwargs\u001b[0m=\u001b[1m{\u001b[0m\n", + " \u001b[32m'Bucket'\u001b[0m: \u001b[32m'bmt-app-dev-us-east-1-data'\u001b[0m,\n", + " \u001b[32m'Key'\u001b[0m: \n", + "\u001b[32m'projects/sqlalchemy_mate/patterns/s3backed_column/data/\u001b[0m\u001b[32maHR0cHM6Ly93d3cuZXhhbXBsZS5jb20\u001b[0m\u001b[32m=/\u001b[0m\u001b[32mcol\u001b[0m\u001b[32m=\u001b[0m\u001b[32mimage\u001b[0m\u001b[32m/\u001b[0m\u001b[32mmd5\u001b[0m\u001b[32m=\u001b[0m\u001b[32mb3459abcea88\u001b[0m\n", + "\u001b[32maaa928939311a8a2f9f0\u001b[0m\u001b[32m'\u001b[0m\n", + " \u001b[1m}\u001b[0m\n", + " \u001b[1m)\u001b[0m\n", + " \u001b[1m]\u001b[0m\n", + "\u001b[1m)\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "rprint(Panel(\"Write S3 first, then write DB\"))\n", + "put_s3_result = aws_s3.put_s3(\n", + " api_calls=[\n", + " aws_s3.PutS3ApiCall(\n", + " column=\"html\",\n", + " binary=html_content_2,\n", + " # since this is an updates, you have to specify the old s3 object,\n", + " # even it is None. we need this information to clean up old s3 object\n", + " # when SQL UPDATE succeeded\n", + " old_s3_uri=task1.html,\n", + " extra_put_object_kwargs=html_additional_kwargs,\n", + " ),\n", + " aws_s3.PutS3ApiCall(\n", + " column=\"image\",\n", + " binary=image_content_2,\n", + " # since this is an updates, you have to specify the old s3 object,\n", + " # even it is None. we need this information to clean up old s3 object\n", + " # when SQL UPDATE succeeded\n", + " old_s3_uri=task1.image,\n", + " extra_put_object_kwargs=image_additional_kwargs,\n", + " ),\n", + " ],\n", + " s3_client=bsm.s3_client,\n", + " pk=url,\n", + " bucket=s3dir_root.bucket,\n", + " prefix=s3dir_root.key,\n", + " update_at=utc_now,\n", + " is_pk_url_safe=False,\n", + ")\n", + "rprint(put_s3_result)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "d803f017-49cf-4ca5-8dae-176221856288", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
SQL UPDATE Failed! Error: UserError()\n",
+       "
\n" + ], + "text/plain": [ + "SQL UPDATE Failed! Error: \u001b[1;35mUserError\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "with orm.Session(engine) as ses:\n", + " try:\n", + " with ses.begin():\n", + " stmt = (\n", + " sa.update(Task).where(Task.url == url)\n", + " # this is a helper method that convert the put s3 results\n", + " # to INSERT / UPDATE values\n", + " .values(update_at=utc_now, **put_s3_result.to_values())\n", + " )\n", + " # intentionally raises an error to simulate a database failure\n", + " raise UserError()\n", + " ses.execute(stmt)\n", + " print(\"SQL UPDATE Succeeded!\")\n", + " # clean up old s3 object when update row succeeded\n", + " # if you don't want to do that, just don't run this method\n", + " put_s3_result.clean_up_old_s3_object_when_update_row_succeeded()\n", + " except Exception as e:\n", + " rprint(f\"SQL UPDATE Failed! Error: {e!r}\")\n", + " # clean up created s3 object when update row failed\n", + " # if you don't want to do that, just don't run this method\n", + " put_s3_result.clean_up_created_s3_object_when_create_or_update_row_failed()" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "06b5444f-6caf-4fd0-8715-085fed76d718", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
╭─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮\n",
+       "│ Database row should not be updated                                                                              │\n",
+       "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
+       "
\n" + ], + "text/plain": [ + "╭─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮\n", + "│ Database row should not be updated │\n", + "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
{\n",
+       "    '_sa_instance_state': <sqlalchemy.orm.state.InstanceState object at 0x108b674c0>,\n",
+       "    'update_at': datetime.datetime(2024, 5, 27, 5, 24, 22, 781233),\n",
+       "    'url': 'https://www.example.com',\n",
+       "    'image': \n",
+       "'s3://bmt-app-dev-us-east-1-data/projects/sqlalchemy_mate/patterns/s3backed_column/data/aHR0cHM6Ly93d3cuZXhhbXBsZS5\n",
+       "jb20=/col=image/md5=9a48fd95141e47117d14e1524a83ae0e',\n",
+       "    'html': \n",
+       "'s3://bmt-app-dev-us-east-1-data/projects/sqlalchemy_mate/patterns/s3backed_column/data/aHR0cHM6Ly93d3cuZXhhbXBsZS5\n",
+       "jb20=/col=html/md5=3fcad0c630c89f84a266b0d828a97c33'\n",
+       "}\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1m{\u001b[0m\n", + " \u001b[32m'_sa_instance_state'\u001b[0m: \u001b[1m<\u001b[0m\u001b[1;95msqlalchemy.orm.state.InstanceState\u001b[0m\u001b[39m object at \u001b[0m\u001b[1;36m0x108b674c0\u001b[0m\u001b[1m>\u001b[0m,\n", + " \u001b[32m'update_at'\u001b[0m: \u001b[1;35mdatetime.datetime\u001b[0m\u001b[1m(\u001b[0m\u001b[1;36m2024\u001b[0m, \u001b[1;36m5\u001b[0m, \u001b[1;36m27\u001b[0m, \u001b[1;36m5\u001b[0m, \u001b[1;36m24\u001b[0m, \u001b[1;36m22\u001b[0m, \u001b[1;36m781233\u001b[0m\u001b[1m)\u001b[0m,\n", + " \u001b[32m'url'\u001b[0m: \u001b[32m'https://www.example.com'\u001b[0m,\n", + " \u001b[32m'image'\u001b[0m: \n", + "\u001b[32m's3://bmt-app-dev-us-east-1-data/projects/sqlalchemy_mate/patterns/s3backed_column/data/\u001b[0m\u001b[32maHR0cHM6Ly93d3cuZXhhbXBsZS5\u001b[0m\n", + "\u001b[32mjb20\u001b[0m\u001b[32m=/\u001b[0m\u001b[32mcol\u001b[0m\u001b[32m=\u001b[0m\u001b[32mimage\u001b[0m\u001b[32m/\u001b[0m\u001b[32mmd5\u001b[0m\u001b[32m=\u001b[0m\u001b[32m9a48fd95141e47117d14e1524a83ae0e\u001b[0m\u001b[32m'\u001b[0m,\n", + " \u001b[32m'html'\u001b[0m: \n", + "\u001b[32m's3://bmt-app-dev-us-east-1-data/projects/sqlalchemy_mate/patterns/s3backed_column/data/\u001b[0m\u001b[32maHR0cHM6Ly93d3cuZXhhbXBsZS5\u001b[0m\n", + "\u001b[32mjb20\u001b[0m\u001b[32m=/\u001b[0m\u001b[32mcol\u001b[0m\u001b[32m=\u001b[0m\u001b[32mhtml\u001b[0m\u001b[32m/\u001b[0m\u001b[32mmd5\u001b[0m\u001b[32m=\u001b[0m\u001b[32m3fcad0c630c89f84a266b0d828a97c33\u001b[0m\u001b[32m'\u001b[0m\n", + "\u001b[1m}\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
╭─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮\n",
+       "│ Old S3 object should still be there                                                                             │\n",
+       "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
+       "
\n" + ], + "text/plain": [ + "╭─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮\n", + "│ Old S3 object should still be there │\n", + "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
S3Path(task1.html).read_bytes() = b'<html>this is html 1</html>'\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;35mS3Path\u001b[0m\u001b[1m(\u001b[0mtask1.html\u001b[1m)\u001b[0m\u001b[1;35m.read_bytes\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m = \u001b[32mb'\u001b[0m\u001b[32m<\u001b[0m\u001b[32mhtml\u001b[0m\u001b[32m>this is html 1\u001b[0m\u001b[32m'\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
S3Path(task1.image).read_bytes() = b'this is image 1'\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;35mS3Path\u001b[0m\u001b[1m(\u001b[0mtask1.image\u001b[1m)\u001b[0m\u001b[1;35m.read_bytes\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m = \u001b[32mb'this is image 1'\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
╭─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮\n",
+       "│ New S3 object should be deleted                                                                                 │\n",
+       "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
+       "
\n" + ], + "text/plain": [ + "╭─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮\n", + "│ New S3 object should be deleted │\n", + "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
S3Path(html_s3_uri).exists() = False\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;35mS3Path\u001b[0m\u001b[1m(\u001b[0mhtml_s3_uri\u001b[1m)\u001b[0m\u001b[1;35m.exists\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m = \u001b[3;91mFalse\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
S3Path(image_s3_uri).exists() = False\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;35mS3Path\u001b[0m\u001b[1m(\u001b[0mimage_s3_uri\u001b[1m)\u001b[0m\u001b[1;35m.exists\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m = \u001b[3;91mFalse\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "rprint(Panel(\"Database row should not be updated\"))\n", + "task2: Task = ses.get(Task, url)\n", + "rprint(task2.__dict__)\n", + "assert task2.update_at < utc_now\n", + "rprint(Panel(\"Old S3 object should still be there\"))\n", + "rprint(f\"{S3Path(task1.html).read_bytes() = }\")\n", + "rprint(f\"{S3Path(task1.image).read_bytes() = }\")\n", + "assert S3Path(task1.html).read_bytes() == html_content_1\n", + "assert S3Path(task1.image).read_bytes() == image_content_1\n", + "rprint(Panel(\"New S3 object should be deleted\"))\n", + "values = put_s3_result.to_values()\n", + "html_s3_uri = values[\"html\"]\n", + "image_s3_uri = values[\"image\"]\n", + "rprint(f\"{S3Path(html_s3_uri).exists() = }\")\n", + "rprint(f\"{S3Path(image_s3_uri).exists() = }\")\n", + "assert S3Path(html_s3_uri).exists() is False\n", + "assert S3Path(image_s3_uri).exists() is False" + ] + }, + { + "cell_type": "markdown", + "id": "7ec211aa-c9fc-436e-8399-f50e834eb7f7", + "metadata": {}, + "source": [ + "## Update a Row and SQL UPDATE succeeded" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "9ec4114e-50a6-473b-84a8-8f46adf25460", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
╔═════════════════════════════════════════════════════════════════════════════════════════════════════════════════╗\n",
+       " Update a Row and SQL UPDATE succeeded                                                                           \n",
+       "╚═════════════════════════════════════════════════════════════════════════════════════════════════════════════════╝\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;32m╔═════════════════════════════════════════════════════════════════════════════════════════════════════════════════╗\u001b[0m\n", + "\u001b[1;32m║\u001b[0m Update a Row and SQL UPDATE succeeded \u001b[1;32m║\u001b[0m\n", + "\u001b[1;32m╚═════════════════════════════════════════════════════════════════════════════════════════════════════════════════╝\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "rprint(\n", + " Panel(\n", + " \"Update a Row and SQL UPDATE succeeded\",\n", + " box=box.DOUBLE,\n", + " border_style=\"bold green\",\n", + " )\n", + ")\n", + "utc_now = get_utc_now()" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "d717d29a-f06c-4a09-834d-db423fb5fff4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
╭─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮\n",
+       "│ Write S3 first, then write DB                                                                                   │\n",
+       "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
+       "
\n" + ], + "text/plain": [ + "╭─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮\n", + "│ Write S3 first, then write DB │\n", + "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
PutS3Result(\n",
+       "    s3_client=<botocore.client.S3 object at 0x108bfb340>,\n",
+       "    put_s3backed_column_results=[\n",
+       "        PutS3BackedColumnResult(\n",
+       "            column='html',\n",
+       "            old_s3_uri='s3://bmt-app-dev-us-east-1-data/projects/sqlalchemy_mate/patterns/s3backed_column/data/aHR0\n",
+       "cHM6Ly93d3cuZXhhbXBsZS5jb20=/col=html/md5=3fcad0c630c89f84a266b0d828a97c33',\n",
+       "            new_s3_uri='s3://bmt-app-dev-us-east-1-data/projects/sqlalchemy_mate/patterns/s3backed_column/data/aHR0\n",
+       "cHM6Ly93d3cuZXhhbXBsZS5jb20=/col=html/md5=1970166ef8b56a6221f64f063daec3e3',\n",
+       "            executed=True,\n",
+       "            cleanup_function=<bound method ClientCreator._create_api_method.<locals>._api_call of \n",
+       "<botocore.client.S3 object at 0x108bfb340>>,\n",
+       "            cleanup_old_kwargs={\n",
+       "                'Bucket': 'bmt-app-dev-us-east-1-data',\n",
+       "                'Key': \n",
+       "'projects/sqlalchemy_mate/patterns/s3backed_column/data/aHR0cHM6Ly93d3cuZXhhbXBsZS5jb20=/col=html/md5=3fcad0c630c89\n",
+       "f84a266b0d828a97c33'\n",
+       "            },\n",
+       "            cleanup_new_kwargs={\n",
+       "                'Bucket': 'bmt-app-dev-us-east-1-data',\n",
+       "                'Key': \n",
+       "'projects/sqlalchemy_mate/patterns/s3backed_column/data/aHR0cHM6Ly93d3cuZXhhbXBsZS5jb20=/col=html/md5=1970166ef8b56\n",
+       "a6221f64f063daec3e3'\n",
+       "            }\n",
+       "        ),\n",
+       "        PutS3BackedColumnResult(\n",
+       "            column='image',\n",
+       "            old_s3_uri='s3://bmt-app-dev-us-east-1-data/projects/sqlalchemy_mate/patterns/s3backed_column/data/aHR0\n",
+       "cHM6Ly93d3cuZXhhbXBsZS5jb20=/col=image/md5=9a48fd95141e47117d14e1524a83ae0e',\n",
+       "            new_s3_uri='s3://bmt-app-dev-us-east-1-data/projects/sqlalchemy_mate/patterns/s3backed_column/data/aHR0\n",
+       "cHM6Ly93d3cuZXhhbXBsZS5jb20=/col=image/md5=b3459abcea88aaa928939311a8a2f9f0',\n",
+       "            executed=True,\n",
+       "            cleanup_function=<bound method ClientCreator._create_api_method.<locals>._api_call of \n",
+       "<botocore.client.S3 object at 0x108bfb340>>,\n",
+       "            cleanup_old_kwargs={\n",
+       "                'Bucket': 'bmt-app-dev-us-east-1-data',\n",
+       "                'Key': \n",
+       "'projects/sqlalchemy_mate/patterns/s3backed_column/data/aHR0cHM6Ly93d3cuZXhhbXBsZS5jb20=/col=image/md5=9a48fd95141e\n",
+       "47117d14e1524a83ae0e'\n",
+       "            },\n",
+       "            cleanup_new_kwargs={\n",
+       "                'Bucket': 'bmt-app-dev-us-east-1-data',\n",
+       "                'Key': \n",
+       "'projects/sqlalchemy_mate/patterns/s3backed_column/data/aHR0cHM6Ly93d3cuZXhhbXBsZS5jb20=/col=image/md5=b3459abcea88\n",
+       "aaa928939311a8a2f9f0'\n",
+       "            }\n",
+       "        )\n",
+       "    ]\n",
+       ")\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;35mPutS3Result\u001b[0m\u001b[1m(\u001b[0m\n", + " \u001b[33ms3_client\u001b[0m=\u001b[1m<\u001b[0m\u001b[1;95mbotocore.client.S3\u001b[0m\u001b[39m object at \u001b[0m\u001b[1;36m0x108bfb340\u001b[0m\u001b[39m>,\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[33mput_s3backed_column_results\u001b[0m\u001b[39m=\u001b[0m\u001b[1;39m[\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[1;35mPutS3BackedColumnResult\u001b[0m\u001b[1;39m(\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[33mcolumn\u001b[0m\u001b[39m=\u001b[0m\u001b[32m'html'\u001b[0m\u001b[39m,\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[33mold_s3_uri\u001b[0m\u001b[39m=\u001b[0m\u001b[32m's3://bmt-app-dev-us-east-1-data/projects/sqlalchemy_mate/patterns/s3backed_column/data/\u001b[0m\u001b[32maHR0\u001b[0m\n", + "\u001b[32mcHM6Ly93d3cuZXhhbXBsZS5jb20\u001b[0m\u001b[32m=/\u001b[0m\u001b[32mcol\u001b[0m\u001b[32m=\u001b[0m\u001b[32mhtml\u001b[0m\u001b[32m/\u001b[0m\u001b[32mmd5\u001b[0m\u001b[32m=\u001b[0m\u001b[32m3fcad0c630c89f84a266b0d828a97c33\u001b[0m\u001b[32m'\u001b[0m\u001b[39m,\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[33mnew_s3_uri\u001b[0m\u001b[39m=\u001b[0m\u001b[32m's3://bmt-app-dev-us-east-1-data/projects/sqlalchemy_mate/patterns/s3backed_column/data/\u001b[0m\u001b[32maHR0\u001b[0m\n", + "\u001b[32mcHM6Ly93d3cuZXhhbXBsZS5jb20\u001b[0m\u001b[32m=/\u001b[0m\u001b[32mcol\u001b[0m\u001b[32m=\u001b[0m\u001b[32mhtml\u001b[0m\u001b[32m/\u001b[0m\u001b[32mmd5\u001b[0m\u001b[32m=\u001b[0m\u001b[32m1970166ef8b56a6221f64f063daec3e3\u001b[0m\u001b[32m'\u001b[0m\u001b[39m,\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[33mexecuted\u001b[0m\u001b[39m=\u001b[0m\u001b[3;92mTrue\u001b[0m\u001b[39m,\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[33mcleanup_function\u001b[0m\u001b[39m=._api_call of \u001b[0m\n", + "\u001b[39m>,\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[33mcleanup_old_kwargs\u001b[0m\u001b[39m=\u001b[0m\u001b[1;39m{\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[32m'Bucket'\u001b[0m\u001b[39m: \u001b[0m\u001b[32m'bmt-app-dev-us-east-1-data'\u001b[0m\u001b[39m,\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[32m'Key'\u001b[0m\u001b[39m: \u001b[0m\n", + "\u001b[32m'projects/sqlalchemy_mate/patterns/s3backed_column/data/\u001b[0m\u001b[32maHR0cHM6Ly93d3cuZXhhbXBsZS5jb20\u001b[0m\u001b[32m=/\u001b[0m\u001b[32mcol\u001b[0m\u001b[32m=\u001b[0m\u001b[32mhtml\u001b[0m\u001b[32m/\u001b[0m\u001b[32mmd5\u001b[0m\u001b[32m=\u001b[0m\u001b[32m3fcad0c630c89\u001b[0m\n", + "\u001b[32mf84a266b0d828a97c33\u001b[0m\u001b[32m'\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[1;39m}\u001b[0m\u001b[39m,\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[33mcleanup_new_kwargs\u001b[0m\u001b[39m=\u001b[0m\u001b[1;39m{\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[32m'Bucket'\u001b[0m\u001b[39m: \u001b[0m\u001b[32m'bmt-app-dev-us-east-1-data'\u001b[0m\u001b[39m,\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[32m'Key'\u001b[0m\u001b[39m: \u001b[0m\n", + "\u001b[32m'projects/sqlalchemy_mate/patterns/s3backed_column/data/\u001b[0m\u001b[32maHR0cHM6Ly93d3cuZXhhbXBsZS5jb20\u001b[0m\u001b[32m=/\u001b[0m\u001b[32mcol\u001b[0m\u001b[32m=\u001b[0m\u001b[32mhtml\u001b[0m\u001b[32m/\u001b[0m\u001b[32mmd5\u001b[0m\u001b[32m=\u001b[0m\u001b[32m1970166ef8b56\u001b[0m\n", + "\u001b[32ma6221f64f063daec3e3\u001b[0m\u001b[32m'\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[1;39m}\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m,\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[1;35mPutS3BackedColumnResult\u001b[0m\u001b[1;39m(\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[33mcolumn\u001b[0m\u001b[39m=\u001b[0m\u001b[32m'image'\u001b[0m\u001b[39m,\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[33mold_s3_uri\u001b[0m\u001b[39m=\u001b[0m\u001b[32m's3://bmt-app-dev-us-east-1-data/projects/sqlalchemy_mate/patterns/s3backed_column/data/\u001b[0m\u001b[32maHR0\u001b[0m\n", + "\u001b[32mcHM6Ly93d3cuZXhhbXBsZS5jb20\u001b[0m\u001b[32m=/\u001b[0m\u001b[32mcol\u001b[0m\u001b[32m=\u001b[0m\u001b[32mimage\u001b[0m\u001b[32m/\u001b[0m\u001b[32mmd5\u001b[0m\u001b[32m=\u001b[0m\u001b[32m9a48fd95141e47117d14e1524a83ae0e\u001b[0m\u001b[32m'\u001b[0m\u001b[39m,\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[33mnew_s3_uri\u001b[0m\u001b[39m=\u001b[0m\u001b[32m's3://bmt-app-dev-us-east-1-data/projects/sqlalchemy_mate/patterns/s3backed_column/data/\u001b[0m\u001b[32maHR0\u001b[0m\n", + "\u001b[32mcHM6Ly93d3cuZXhhbXBsZS5jb20\u001b[0m\u001b[32m=/\u001b[0m\u001b[32mcol\u001b[0m\u001b[32m=\u001b[0m\u001b[32mimage\u001b[0m\u001b[32m/\u001b[0m\u001b[32mmd5\u001b[0m\u001b[32m=\u001b[0m\u001b[32mb3459abcea88aaa928939311a8a2f9f0\u001b[0m\u001b[32m'\u001b[0m\u001b[39m,\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[33mexecuted\u001b[0m\u001b[39m=\u001b[0m\u001b[3;92mTrue\u001b[0m\u001b[39m,\u001b[0m\n", + "\u001b[39m \u001b[0m\u001b[33mcleanup_function\u001b[0m\u001b[39m=._api_call of \u001b[0m\n", + "\u001b[39m\u001b[0m\u001b[1m>\u001b[0m,\n", + " \u001b[33mcleanup_old_kwargs\u001b[0m=\u001b[1m{\u001b[0m\n", + " \u001b[32m'Bucket'\u001b[0m: \u001b[32m'bmt-app-dev-us-east-1-data'\u001b[0m,\n", + " \u001b[32m'Key'\u001b[0m: \n", + "\u001b[32m'projects/sqlalchemy_mate/patterns/s3backed_column/data/\u001b[0m\u001b[32maHR0cHM6Ly93d3cuZXhhbXBsZS5jb20\u001b[0m\u001b[32m=/\u001b[0m\u001b[32mcol\u001b[0m\u001b[32m=\u001b[0m\u001b[32mimage\u001b[0m\u001b[32m/\u001b[0m\u001b[32mmd5\u001b[0m\u001b[32m=\u001b[0m\u001b[32m9a48fd95141e\u001b[0m\n", + "\u001b[32m47117d14e1524a83ae0e\u001b[0m\u001b[32m'\u001b[0m\n", + " \u001b[1m}\u001b[0m,\n", + " \u001b[33mcleanup_new_kwargs\u001b[0m=\u001b[1m{\u001b[0m\n", + " \u001b[32m'Bucket'\u001b[0m: \u001b[32m'bmt-app-dev-us-east-1-data'\u001b[0m,\n", + " \u001b[32m'Key'\u001b[0m: \n", + "\u001b[32m'projects/sqlalchemy_mate/patterns/s3backed_column/data/\u001b[0m\u001b[32maHR0cHM6Ly93d3cuZXhhbXBsZS5jb20\u001b[0m\u001b[32m=/\u001b[0m\u001b[32mcol\u001b[0m\u001b[32m=\u001b[0m\u001b[32mimage\u001b[0m\u001b[32m/\u001b[0m\u001b[32mmd5\u001b[0m\u001b[32m=\u001b[0m\u001b[32mb3459abcea88\u001b[0m\n", + "\u001b[32maaa928939311a8a2f9f0\u001b[0m\u001b[32m'\u001b[0m\n", + " \u001b[1m}\u001b[0m\n", + " \u001b[1m)\u001b[0m\n", + " \u001b[1m]\u001b[0m\n", + "\u001b[1m)\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "rprint(Panel(\"Write S3 first, then write DB\"))\n", + "put_s3_result = aws_s3.put_s3(\n", + " api_calls=[\n", + " aws_s3.PutS3ApiCall(\n", + " column=\"html\",\n", + " binary=html_content_2,\n", + " # since this is an updates, you have to specify the old s3 object,\n", + " # even it is None. we need this information to clean up old s3 object\n", + " # when SQL UPDATE succeeded\n", + " old_s3_uri=task1.html,\n", + " extra_put_object_kwargs=html_additional_kwargs,\n", + " ),\n", + " aws_s3.PutS3ApiCall(\n", + " column=\"image\",\n", + " binary=image_content_2,\n", + " # since this is an updates, you have to specify the old s3 object,\n", + " # even it is None. we need this information to clean up old s3 object\n", + " # when SQL UPDATE succeeded\n", + " old_s3_uri=task1.image,\n", + " extra_put_object_kwargs=image_additional_kwargs,\n", + " ),\n", + " ],\n", + " s3_client=bsm.s3_client,\n", + " pk=url,\n", + " bucket=s3dir_root.bucket,\n", + " prefix=s3dir_root.key,\n", + " update_at=utc_now,\n", + " is_pk_url_safe=False,\n", + ")\n", + "rprint(put_s3_result)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "c5161ceb-f439-4467-bcae-642a4e987fb4", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "SQL UPDATE Succeeded!\n" + ] + } + ], + "source": [ + "with orm.Session(engine) as ses:\n", + " try:\n", + " with ses.begin():\n", + " stmt = (\n", + " sa.update(Task).where(Task.url == url)\n", + " # this is a helper method that convert the put s3 results\n", + " # to INSERT / UPDATE values\n", + " .values(update_at=utc_now, **put_s3_result.to_values())\n", + " )\n", + " ses.execute(stmt)\n", + " print(\"SQL UPDATE Succeeded!\")\n", + " # clean up old s3 object when update row succeeded\n", + " # if you don't want to do that, just don't run this method\n", + " put_s3_result.clean_up_old_s3_object_when_update_row_succeeded()\n", + " except Exception as e:\n", + " rprint(f\"SQL UPDATE Failed! Error: {e!r}\")\n", + " # clean up created s3 object when update row failed\n", + " # if you don't want to do that, just don't run this method\n", + " put_s3_result.clean_up_created_s3_object_when_create_or_update_row_failed()" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "d61d70d0-b989-485b-93a8-4f25efb93a80", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
╭─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮\n",
+       "│ Database row should be updated                                                                                  │\n",
+       "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
+       "
\n" + ], + "text/plain": [ + "╭─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮\n", + "│ Database row should be updated │\n", + "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
{\n",
+       "    '_sa_instance_state': <sqlalchemy.orm.state.InstanceState object at 0x108c11a80>,\n",
+       "    'update_at': datetime.datetime(2024, 5, 27, 5, 25, 59, 171833),\n",
+       "    'url': 'https://www.example.com',\n",
+       "    'image': \n",
+       "'s3://bmt-app-dev-us-east-1-data/projects/sqlalchemy_mate/patterns/s3backed_column/data/aHR0cHM6Ly93d3cuZXhhbXBsZS5\n",
+       "jb20=/col=image/md5=b3459abcea88aaa928939311a8a2f9f0',\n",
+       "    'html': \n",
+       "'s3://bmt-app-dev-us-east-1-data/projects/sqlalchemy_mate/patterns/s3backed_column/data/aHR0cHM6Ly93d3cuZXhhbXBsZS5\n",
+       "jb20=/col=html/md5=1970166ef8b56a6221f64f063daec3e3'\n",
+       "}\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1m{\u001b[0m\n", + " \u001b[32m'_sa_instance_state'\u001b[0m: \u001b[1m<\u001b[0m\u001b[1;95msqlalchemy.orm.state.InstanceState\u001b[0m\u001b[39m object at \u001b[0m\u001b[1;36m0x108c11a80\u001b[0m\u001b[1m>\u001b[0m,\n", + " \u001b[32m'update_at'\u001b[0m: \u001b[1;35mdatetime.datetime\u001b[0m\u001b[1m(\u001b[0m\u001b[1;36m2024\u001b[0m, \u001b[1;36m5\u001b[0m, \u001b[1;36m27\u001b[0m, \u001b[1;36m5\u001b[0m, \u001b[1;36m25\u001b[0m, \u001b[1;36m59\u001b[0m, \u001b[1;36m171833\u001b[0m\u001b[1m)\u001b[0m,\n", + " \u001b[32m'url'\u001b[0m: \u001b[32m'https://www.example.com'\u001b[0m,\n", + " \u001b[32m'image'\u001b[0m: \n", + "\u001b[32m's3://bmt-app-dev-us-east-1-data/projects/sqlalchemy_mate/patterns/s3backed_column/data/\u001b[0m\u001b[32maHR0cHM6Ly93d3cuZXhhbXBsZS5\u001b[0m\n", + "\u001b[32mjb20\u001b[0m\u001b[32m=/\u001b[0m\u001b[32mcol\u001b[0m\u001b[32m=\u001b[0m\u001b[32mimage\u001b[0m\u001b[32m/\u001b[0m\u001b[32mmd5\u001b[0m\u001b[32m=\u001b[0m\u001b[32mb3459abcea88aaa928939311a8a2f9f0\u001b[0m\u001b[32m'\u001b[0m,\n", + " \u001b[32m'html'\u001b[0m: \n", + "\u001b[32m's3://bmt-app-dev-us-east-1-data/projects/sqlalchemy_mate/patterns/s3backed_column/data/\u001b[0m\u001b[32maHR0cHM6Ly93d3cuZXhhbXBsZS5\u001b[0m\n", + "\u001b[32mjb20\u001b[0m\u001b[32m=/\u001b[0m\u001b[32mcol\u001b[0m\u001b[32m=\u001b[0m\u001b[32mhtml\u001b[0m\u001b[32m/\u001b[0m\u001b[32mmd5\u001b[0m\u001b[32m=\u001b[0m\u001b[32m1970166ef8b56a6221f64f063daec3e3\u001b[0m\u001b[32m'\u001b[0m\n", + "\u001b[1m}\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
╭─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮\n",
+       "│ Old S3 object should be deleted                                                                                 │\n",
+       "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
+       "
\n" + ], + "text/plain": [ + "╭─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮\n", + "│ Old S3 object should be deleted │\n", + "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
S3Path(task1.html).exists() = False\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;35mS3Path\u001b[0m\u001b[1m(\u001b[0mtask1.html\u001b[1m)\u001b[0m\u001b[1;35m.exists\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m = \u001b[3;91mFalse\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
S3Path(task1.image).exists() = False\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;35mS3Path\u001b[0m\u001b[1m(\u001b[0mtask1.image\u001b[1m)\u001b[0m\u001b[1;35m.exists\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m = \u001b[3;91mFalse\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
╭─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮\n",
+       "│ New S3 object should be created                                                                                 │\n",
+       "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
+       "
\n" + ], + "text/plain": [ + "╭─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮\n", + "│ New S3 object should be created │\n", + "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
S3Path(task2.html).read_bytes() = b'<html>this is html 2</html>'\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;35mS3Path\u001b[0m\u001b[1m(\u001b[0mtask2.html\u001b[1m)\u001b[0m\u001b[1;35m.read_bytes\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m = \u001b[32mb'\u001b[0m\u001b[32m<\u001b[0m\u001b[32mhtml\u001b[0m\u001b[32m>this is html 2\u001b[0m\u001b[32m'\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
S3Path(task2.image).read_bytes() = b'this is image 2'\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;35mS3Path\u001b[0m\u001b[1m(\u001b[0mtask2.image\u001b[1m)\u001b[0m\u001b[1;35m.read_bytes\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m = \u001b[32mb'this is image 2'\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "rprint(Panel(\"Database row should be updated\"))\n", + "task2: Task = ses.get(Task, url)\n", + "rprint(task2.__dict__)\n", + "assert task2.update_at == utc_now\n", + "rprint(Panel(\"Old S3 object should be deleted\"))\n", + "rprint(f\"{S3Path(task1.html).exists() = }\")\n", + "rprint(f\"{S3Path(task1.image).exists() = }\")\n", + "assert S3Path(task1.html).exists() is False\n", + "assert S3Path(task1.image).exists() is False\n", + "rprint(Panel(\"New S3 object should be created\"))\n", + "rprint(f\"{S3Path(task2.html).read_bytes() = }\")\n", + "rprint(f\"{S3Path(task2.image).read_bytes() = }\")\n", + "assert S3Path(task2.html).read_bytes() == html_content_2\n", + "assert S3Path(task2.image).read_bytes() == image_content_2" + ] + }, + { + "cell_type": "markdown", + "id": "019d46c6-f178-4d31-81e2-aed8ce02614d", + "metadata": {}, + "source": [ + "## Delete a Row and SQL DELETE succeeded" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "5c2b355d-89dd-4f4e-b892-71f71a6bdab5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
╔═════════════════════════════════════════════════════════════════════════════════════════════════════════════════╗\n",
+       " Delete a Row and SQL DELETE succeeded                                                                           \n",
+       "╚═════════════════════════════════════════════════════════════════════════════════════════════════════════════════╝\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;32m╔═════════════════════════════════════════════════════════════════════════════════════════════════════════════════╗\u001b[0m\n", + "\u001b[1;32m║\u001b[0m Delete a Row and SQL DELETE succeeded \u001b[1;32m║\u001b[0m\n", + "\u001b[1;32m╚═════════════════════════════════════════════════════════════════════════════════════════════════════════════════╝\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "rprint(\n", + " Panel(\n", + " \"Delete a Row and SQL DELETE succeeded\",\n", + " box=box.DOUBLE,\n", + " border_style=\"bold green\",\n", + " )\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "fdb28522-798a-4b10-8542-f5b7f13e59fc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
╭─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮\n",
+       "│ Delete DB first, then delete S3                                                                                 │\n",
+       "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
+       "
\n" + ], + "text/plain": [ + "╭─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮\n", + "│ Delete DB first, then delete S3 │\n", + "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "SQL DELETE Succeeded!\n" + ] + } + ], + "source": [ + "rprint(Panel(\"Delete DB first, then delete S3\"))\n", + "with orm.Session(engine) as ses:\n", + " task3: Task = ses.get(Task, url)\n", + " try:\n", + " stmt = sa.delete(Task).where(Task.url == url)\n", + " res = ses.execute(stmt)\n", + " ses.commit()\n", + " if res.rowcount == 1:\n", + " print(\"SQL DELETE Succeeded!\")\n", + " # clean up old s3 object when delete row succeeded\n", + " # if you don't want to do that, just don't run this method\n", + " if task3.html:\n", + " S3Path(task3.html).delete()\n", + " if task3.image:\n", + " S3Path(task3.image).delete()\n", + " else:\n", + " print(\"SQL DELETE Failed! No row affected.\")\n", + " except Exception as e:\n", + " ses.rollback()\n", + " rprint(f\"SQL DELETE Failed! Error: {e!r}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "35619b46-1637-46a1-bd79-cdb8cb83e62d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
╭─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮\n",
+       "│ Database row should be deleted                                                                                  │\n",
+       "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
+       "
\n" + ], + "text/plain": [ + "╭─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮\n", + "│ Database row should be deleted │\n", + "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
ses.get(Task, url) = None\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;35mses.get\u001b[0m\u001b[1m(\u001b[0mTask, url\u001b[1m)\u001b[0m = \u001b[3;35mNone\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
╭─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮\n",
+       "│ Old S3 object should be deleted                                                                                 │\n",
+       "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
+       "
\n" + ], + "text/plain": [ + "╭─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮\n", + "│ Old S3 object should be deleted │\n", + "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
S3Path(task3.html).exists() = False\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;35mS3Path\u001b[0m\u001b[1m(\u001b[0mtask3.html\u001b[1m)\u001b[0m\u001b[1;35m.exists\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m = \u001b[3;91mFalse\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
S3Path(task3.image).exists() = False\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;35mS3Path\u001b[0m\u001b[1m(\u001b[0mtask3.image\u001b[1m)\u001b[0m\u001b[1;35m.exists\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m = \u001b[3;91mFalse\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "rprint(Panel(\"Database row should be deleted\"))\n", + "rprint(f\"{ses.get(Task, url) = }\")\n", + "assert ses.get(Task, url) is None\n", + "rprint(Panel(\"Old S3 object should be deleted\"))\n", + "rprint(f\"{S3Path(task3.html).exists() = }\")\n", + "rprint(f\"{S3Path(task3.image).exists() = }\")\n", + "assert S3Path(task3.html).exists() is False\n", + "assert S3Path(task3.image).exists() is False\n" + ] + }, + { + "cell_type": "markdown", + "id": "fcdcc8aa-9aa9-4e76-b57a-da7f428116a5", + "metadata": {}, + "source": [ + "## Conclusion\n", + "\n", + "By leveraging the capabilities of ``sqlalchemy_mate``, developers can build scalable and efficient systems that handle large binary data with ease. The module's Pythonic interface, flexibility in storage backends, and extensibility make it a powerful tool for managing the lifecycle of large binary objects while ensuring data consistency and integrity." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ab7b3fa6-450f-4e1b-95ec-975ca7dce2a5", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/source/05-Patterns/Large-Binary-Column-AWS-S3-Backend/test_s3_backed_column.py b/docs/source/05-Patterns/Large-Binary-Column-AWS-S3-Backend/test_s3_backed_column.py new file mode 100644 index 0000000..1b48d55 --- /dev/null +++ b/docs/source/05-Patterns/Large-Binary-Column-AWS-S3-Backend/test_s3_backed_column.py @@ -0,0 +1,428 @@ +# -*- coding: utf-8 -*- + +""" +一个用于展示如何正确实现 SQL DB 和 AWS S3 双写一致性的问题的例子. + +Storing large binary data directly in a relational database can lead to performance and scalability issues. As the size of the binary data grows, it consumes valuable database disk space and increases the I/O overhead, potentially impacting query performance and overall database efficiency. To address this challenge, the recommended best practice is to employ a pattern that leverages external storage backends for storing large binary data while maintaining a reference to the data's location within the database. +By storing only a unique resource identifier (URI) as a column in the relational database, the actual binary data is offloaded to a dedicated storage layer. This approach allows for better utilization of database resources, as the database focuses on storing structured data and efficient querying. The external storage backend, such as a file system or cloud storage like Amazon S3, is optimized for handling large binary objects, providing scalability and cost-effectiveness. + +**Example** + +In this comprehensive example, we aim to demonstrate the complete lifecycle management of large binary data using the pattern that leverages an external storage system like **Amazon S3** in conjunction with a relational database. The example covers various scenarios to showcase the proper handling of data consistency and integrity. + +1. In the first scenario, we attempt to **create a new row** in the database with a column containing a reference to the large binary data stored in S3. However, if the SQL ``INSERT`` operation fails unexpectedly, it is crucial to maintain data consistency by removing the orphaned S3 object (optionally, your choice). This ensures that there are no dangling references or unused data in the external storage. +2. The second scenario illustrates a successful **creation of a row** with a large binary data column. Here, we can observe how the binary data is efficiently stored in S3 and the corresponding reference is inserted into the database column. +3. In the third scenario, we try to **update the value of a large binary column in an existing row**. If the SQL ``UPDATE`` operation fails, it is essential to maintain the integrity of the data. We can see that the old S3 object remains unchanged, and the new S3 object, if created, is removed (optionally, your choice) to keep the system in a consistent state. +4. The fourth scenario demonstrates a successful **update of a large binary column value**. In this case, we can observe how the old S3 object is deleted (optionally, your choice) to free up storage space, and the new S3 object is created to reflect the updated binary data. This ensures that the database and S3 remain in sync. +5. Finally, the fifth scenario showcases the **deletion of a row** containing a large binary column. When a row is deleted from the database, it is important to clean up (optionally, your choice) the associated S3 object as well. By removing the corresponding S3 object, we maintain data consistency and prevent any orphaned binary data from lingering in the external storage. + +**Conclusion** + +By leveraging the capabilities of sqlalchemy_mate, developers can build scalable and efficient systems that handle large binary data with ease. The module's Pythonic interface, flexibility in storage backends, and extensibility make it a powerful tool for managing the lifecycle of large binary objects while ensuring data consistency and integrity. +""" + +import typing as T +from datetime import datetime + +from s3pathlib import S3Path, context +from boto_session_manager import BotoSesManager + +import sqlalchemy as sa +import sqlalchemy.orm as orm +import sqlalchemy_mate.api as sam + +from rich import print as rprint +from rich import box +from rich.console import Console +from rich.panel import Panel + +aws_s3 = sam.patterns.large_binary_column.aws_s3 +console = Console() + + +def get_utc_now() -> datetime: + return datetime.utcnow() + + +Base = orm.declarative_base() + + +class Task(Base): + __tablename__ = "tasks" + + url: orm.Mapped[str] = orm.mapped_column(sa.String, primary_key=True) + update_at: orm.Mapped[datetime] = orm.mapped_column(sa.DateTime) + html: orm.Mapped[T.Optional[str]] = orm.mapped_column(sa.String, nullable=True) + image: orm.Mapped[T.Optional[str]] = orm.mapped_column(sa.String, nullable=True) + + +engine = sa.create_engine("sqlite:///:memory:") +Base.metadata.create_all(engine) + +bsm = BotoSesManager() +context.attach_boto_session(bsm.boto_ses) +bucket = f"{bsm.aws_account_alias}-{bsm.aws_region}-data" +s3dir_root = S3Path( + f"s3://{bucket}/projects/sqlalchemy_mate/patterns/s3backed_column/data/" +).to_dir() + +# clean up everything in database and s3 to ensure a fresh start +with engine.connect() as conn: + conn.execute(Task.__table__.delete()) + conn.commit() +s3dir_root.delete() + + +# ------------------------------------------------------------------------------ +# Create a Row but SQL INSERT failed +# ------------------------------------------------------------------------------ +______Create_a_Row_but_SQL_INSERT_failed = None +rprint( + Panel( + "Create a Row but SQL INSERT failed", box=box.DOUBLE, border_style="bold green" + ) +) + +url = "https://www.example.com" +html_content_1 = b"this is html 1" +image_content_1 = b"this is image 1" +html_additional_kwargs = dict(ContentType="text/html") +image_additional_kwargs = dict(ContentType="image/jpeg") +utc_now = get_utc_now() + +rprint(Panel("Write S3 first, then write DB")) +put_s3_result = aws_s3.put_s3( + api_calls=[ + aws_s3.PutS3ApiCall( + column="html", + binary=html_content_1, + old_s3_uri=None, + extra_put_object_kwargs=html_additional_kwargs, + ), + aws_s3.PutS3ApiCall( + column="image", + binary=image_content_1, + old_s3_uri=None, + extra_put_object_kwargs=image_additional_kwargs, + ), + ], + s3_client=bsm.s3_client, + pk=url, + bucket=s3dir_root.bucket, + prefix=s3dir_root.key, + update_at=utc_now, + is_pk_url_safe=False, +) +rprint(put_s3_result) + + +class UserError(Exception): + pass + + +with orm.Session(engine) as ses: + try: + with ses.begin(): + task1 = Task( + url=url, + update_at=utc_now, + # this is a helper method that convert the put s3 results + # to INSERT / UPDATE values + **put_s3_result.to_values(), + ) + # intentionally raises an error to simulate a database failure + raise UserError() + ses.add(task1) + rprint("SQL INSERT Succeeded!") + except Exception as e: + rprint(f"SQL INSERT Failed! Error: {e!r}") + # clean up created s3 object when create row failed + # if you don't want to do that, just don't run this method + put_s3_result.clean_up_created_s3_object_when_create_or_update_row_failed() + +rprint(Panel("Database row should not exists")) +rprint(f"{ses.get(Task, url) = }") +assert ses.get(Task, url) is None +rprint(Panel("S3 object should be deleted")) +values = put_s3_result.to_values() +html_s3_uri = values["html"] +image_s3_uri = values["image"] +rprint(f"{S3Path(html_s3_uri).exists() = }") +rprint(f"{S3Path(image_s3_uri).exists() = }") +assert S3Path(html_s3_uri).exists() is False +assert S3Path(image_s3_uri).exists() is False + +# ------------------------------------------------------------------------------ +# Create a Row and SQL INSERT succeeded +# ------------------------------------------------------------------------------ +______Create_a_Row_and_SQL_INSERT_succeeded = None +rprint( + Panel( + "Create a Row and SQL INSERT succeeded", + box=box.DOUBLE, + border_style="bold green", + ) +) +utc_now = get_utc_now() + +rprint(Panel("Write S3 first, then write DB")) +put_s3_result = aws_s3.put_s3( + api_calls=[ + aws_s3.PutS3ApiCall( + column="html", + binary=html_content_1, + old_s3_uri=None, + extra_put_object_kwargs=html_additional_kwargs, + ), + aws_s3.PutS3ApiCall( + column="image", + binary=image_content_1, + old_s3_uri=None, + extra_put_object_kwargs=image_additional_kwargs, + ), + ], + s3_client=bsm.s3_client, + pk=url, + bucket=s3dir_root.bucket, + prefix=s3dir_root.key, + update_at=utc_now, + is_pk_url_safe=False, +) +rprint(put_s3_result) + +with orm.Session(engine) as ses: + try: + with ses.begin(): + task1 = Task( + url=url, + update_at=utc_now, + # this is a helper method that convert the put s3 results + # to INSERT / UPDATE values + **put_s3_result.to_values(), + ) + ses.add(task1) + rprint("SQL INSERT Succeeded!") + except Exception as e: + rprint(f"SQL INSERT Failed, error: {e!r}") + # clean up created s3 object when create row failed + # if you don't want to do that, just don't run this method + put_s3_result.clean_up_created_s3_object_when_create_or_update_row_failed() + +rprint(Panel("Database row should be inserted")) +task1: Task = ses.get(Task, url) +rprint(task1.__dict__) +assert task1.url == url +assert task1.update_at == utc_now +rprint(Panel("S3 object should be created")) +rprint(f"{S3Path(task1.html).read_bytes() = }") +rprint(f"{S3Path(task1.image).read_bytes() = }") +assert S3Path(task1.html).read_bytes() == html_content_1 +assert S3Path(task1.image).read_bytes() == image_content_1 + +# ------------------------------------------------------------------------------ +# Update a Row but SQL UPDATE failed +# ------------------------------------------------------------------------------ +______Update_a_Row_but_SQL_UPDATE_failed = None +rprint( + Panel( + "Update a Row but SQL UPDATE failed", box=box.DOUBLE, border_style="bold green" + ) +) + +html_content_2 = b"this is html 2" +image_content_2 = b"this is image 2" +utc_now = get_utc_now() + +rprint(Panel("Write S3 first, then write DB")) +put_s3_result = aws_s3.put_s3( + api_calls=[ + aws_s3.PutS3ApiCall( + column="html", + binary=html_content_2, + # since this is an updates, you have to specify the old s3 object, + # even it is None. we need this information to clean up old s3 object + # when SQL UPDATE succeeded + old_s3_uri=task1.html, + extra_put_object_kwargs=html_additional_kwargs, + ), + aws_s3.PutS3ApiCall( + column="image", + binary=image_content_2, + # since this is an updates, you have to specify the old s3 object, + # even it is None. we need this information to clean up old s3 object + # when SQL UPDATE succeeded + old_s3_uri=task1.image, + extra_put_object_kwargs=image_additional_kwargs, + ), + ], + s3_client=bsm.s3_client, + pk=url, + bucket=s3dir_root.bucket, + prefix=s3dir_root.key, + update_at=utc_now, + is_pk_url_safe=False, +) +rprint(put_s3_result) + +with orm.Session(engine) as ses: + try: + with ses.begin(): + stmt = ( + sa.update(Task).where(Task.url == url) + # this is a helper method that convert the put s3 results + # to INSERT / UPDATE values + .values(update_at=utc_now, **put_s3_result.to_values()) + ) + # intentionally raises an error to simulate a database failure + raise UserError() + ses.execute(stmt) + print("SQL UPDATE Succeeded!") + # clean up old s3 object when update row succeeded + # if you don't want to do that, just don't run this method + put_s3_result.clean_up_old_s3_object_when_update_row_succeeded() + except Exception as e: + rprint(f"SQL UPDATE Failed! Error: {e!r}") + # clean up created s3 object when update row failed + # if you don't want to do that, just don't run this method + put_s3_result.clean_up_created_s3_object_when_create_or_update_row_failed() + +rprint(Panel("Database row should not be updated")) +task2: Task = ses.get(Task, url) +rprint(task2.__dict__) +assert task2.update_at < utc_now +rprint(Panel("Old S3 object should still be there")) +rprint(f"{S3Path(task1.html).read_bytes() = }") +rprint(f"{S3Path(task1.image).read_bytes() = }") +assert S3Path(task1.html).read_bytes() == html_content_1 +assert S3Path(task1.image).read_bytes() == image_content_1 +rprint(Panel("New S3 object should be deleted")) +values = put_s3_result.to_values() +html_s3_uri = values["html"] +image_s3_uri = values["image"] +rprint(f"{S3Path(html_s3_uri).exists() = }") +rprint(f"{S3Path(image_s3_uri).exists() = }") +assert S3Path(html_s3_uri).exists() is False +assert S3Path(image_s3_uri).exists() is False + +# ------------------------------------------------------------------------------ +# Update a Row and SQL UPDATE succeeded +# ------------------------------------------------------------------------------ +______Update_a_Row_and_SQL_UPDATE_succeeded = None +rprint( + Panel( + "Update a Row and SQL UPDATE succeeded", + box=box.DOUBLE, + border_style="bold green", + ) +) +utc_now = get_utc_now() + +rprint(Panel("Write S3 first, then write DB")) +put_s3_result = aws_s3.put_s3( + api_calls=[ + aws_s3.PutS3ApiCall( + column="html", + binary=html_content_2, + # since this is an updates, you have to specify the old s3 object, + # even it is None. we need this information to clean up old s3 object + # when SQL UPDATE succeeded + old_s3_uri=task1.html, + extra_put_object_kwargs=html_additional_kwargs, + ), + aws_s3.PutS3ApiCall( + column="image", + binary=image_content_2, + # since this is an updates, you have to specify the old s3 object, + # even it is None. we need this information to clean up old s3 object + # when SQL UPDATE succeeded + old_s3_uri=task1.image, + extra_put_object_kwargs=image_additional_kwargs, + ), + ], + s3_client=bsm.s3_client, + pk=url, + bucket=s3dir_root.bucket, + prefix=s3dir_root.key, + update_at=utc_now, + is_pk_url_safe=False, +) +rprint(put_s3_result) + +with orm.Session(engine) as ses: + try: + with ses.begin(): + stmt = ( + sa.update(Task).where(Task.url == url) + # this is a helper method that convert the put s3 results + # to INSERT / UPDATE values + .values(update_at=utc_now, **put_s3_result.to_values()) + ) + ses.execute(stmt) + print("SQL UPDATE Succeeded!") + # clean up old s3 object when update row succeeded + # if you don't want to do that, just don't run this method + put_s3_result.clean_up_old_s3_object_when_update_row_succeeded() + except Exception as e: + rprint(f"SQL UPDATE Failed! Error: {e!r}") + # clean up created s3 object when update row failed + # if you don't want to do that, just don't run this method + put_s3_result.clean_up_created_s3_object_when_create_or_update_row_failed() + +rprint(Panel("Database row should be updated")) +task2: Task = ses.get(Task, url) +rprint(task2.__dict__) +assert task2.update_at == utc_now +rprint(Panel("Old S3 object should be deleted")) +rprint(f"{S3Path(task1.html).exists() = }") +rprint(f"{S3Path(task1.image).exists() = }") +assert S3Path(task1.html).exists() is False +assert S3Path(task1.image).exists() is False +rprint(Panel("New S3 object should be created")) +rprint(f"{S3Path(task2.html).read_bytes() = }") +rprint(f"{S3Path(task2.image).read_bytes() = }") +assert S3Path(task2.html).read_bytes() == html_content_2 +assert S3Path(task2.image).read_bytes() == image_content_2 + + +# ------------------------------------------------------------------------------ +# Delete a Row and SQL DELETE succeeded +# ------------------------------------------------------------------------------ +______Delete_a_Row_and_SQL_DELETE_succeeded = None +rprint( + Panel( + "Delete a Row and SQL DELETE succeeded", + box=box.DOUBLE, + border_style="bold green", + ) +) + +rprint(Panel("Delete DB first, then delete S3")) + +with orm.Session(engine) as ses: + task3: Task = ses.get(Task, url) + try: + stmt = sa.delete(Task).where(Task.url == url) + res = ses.execute(stmt) + ses.commit() + if res.rowcount == 1: + print("SQL DELETE Succeeded!") + # clean up old s3 object when delete row succeeded + # if you don't want to do that, just don't run this method + if task3.html: + S3Path(task3.html).delete() + if task3.image: + S3Path(task3.image).delete() + else: + print("SQL DELETE Failed! No row affected.") + except Exception as e: + ses.rollback() + rprint(f"SQL DELETE Failed! Error: {e!r}") + +rprint(Panel("Database row should be deleted")) +rprint(f"{ses.get(Task, url) = }") +assert ses.get(Task, url) is None +rprint(Panel("Old S3 object should be deleted")) +rprint(f"{S3Path(task3.html).exists() = }") +rprint(f"{S3Path(task3.image).exists() = }") +assert S3Path(task3.html).exists() is False +assert S3Path(task3.image).exists() is False diff --git a/docs/source/05-Patterns/index.rst b/docs/source/05-Patterns/index.rst index 6acb475..b9dac78 100644 --- a/docs/source/05-Patterns/index.rst +++ b/docs/source/05-Patterns/index.rst @@ -1,3 +1,4 @@ Patterns ============================================================================== .. autotoctree:: + :maxdepth: 1 diff --git a/docs/source/sqlalchemy_mate/patterns/__init__.rst b/docs/source/sqlalchemy_mate/patterns/__init__.rst index 69ce0ac..4ececa1 100644 --- a/docs/source/sqlalchemy_mate/patterns/__init__.rst +++ b/docs/source/sqlalchemy_mate/patterns/__init__.rst @@ -10,6 +10,7 @@ sub packages and modules .. toctree:: :maxdepth: 1 + large_binary_column status_tracker api \ No newline at end of file diff --git a/docs/source/sqlalchemy_mate/patterns/large_binary_column/__init__.rst b/docs/source/sqlalchemy_mate/patterns/large_binary_column/__init__.rst new file mode 100644 index 0000000..da385a4 --- /dev/null +++ b/docs/source/sqlalchemy_mate/patterns/large_binary_column/__init__.rst @@ -0,0 +1,19 @@ +large_binary_column +=================== + +.. automodule:: sqlalchemy_mate.patterns.large_binary_column + :members: + +sub packages and modules +------------------------ + +.. toctree:: + :maxdepth: 1 + + api + aws_s3 + aws_s3_api + helpers + local + local_api + \ No newline at end of file diff --git a/docs/source/sqlalchemy_mate/patterns/large_binary_column/api.rst b/docs/source/sqlalchemy_mate/patterns/large_binary_column/api.rst new file mode 100644 index 0000000..2d7e556 --- /dev/null +++ b/docs/source/sqlalchemy_mate/patterns/large_binary_column/api.rst @@ -0,0 +1,5 @@ +api +=== + +.. automodule:: sqlalchemy_mate.patterns.large_binary_column.api + :members: \ No newline at end of file diff --git a/docs/source/sqlalchemy_mate/patterns/large_binary_column/aws_s3.rst b/docs/source/sqlalchemy_mate/patterns/large_binary_column/aws_s3.rst new file mode 100644 index 0000000..5658b28 --- /dev/null +++ b/docs/source/sqlalchemy_mate/patterns/large_binary_column/aws_s3.rst @@ -0,0 +1,5 @@ +aws_s3 +====== + +.. automodule:: sqlalchemy_mate.patterns.large_binary_column.aws_s3 + :members: \ No newline at end of file diff --git a/docs/source/sqlalchemy_mate/patterns/large_binary_column/aws_s3_api.rst b/docs/source/sqlalchemy_mate/patterns/large_binary_column/aws_s3_api.rst new file mode 100644 index 0000000..33a49f1 --- /dev/null +++ b/docs/source/sqlalchemy_mate/patterns/large_binary_column/aws_s3_api.rst @@ -0,0 +1,5 @@ +aws_s3_api +========== + +.. automodule:: sqlalchemy_mate.patterns.large_binary_column.aws_s3_api + :members: \ No newline at end of file diff --git a/docs/source/sqlalchemy_mate/patterns/large_binary_column/helpers.rst b/docs/source/sqlalchemy_mate/patterns/large_binary_column/helpers.rst new file mode 100644 index 0000000..4b0a288 --- /dev/null +++ b/docs/source/sqlalchemy_mate/patterns/large_binary_column/helpers.rst @@ -0,0 +1,5 @@ +helpers +======= + +.. automodule:: sqlalchemy_mate.patterns.large_binary_column.helpers + :members: \ No newline at end of file diff --git a/docs/source/sqlalchemy_mate/patterns/large_binary_column/local.rst b/docs/source/sqlalchemy_mate/patterns/large_binary_column/local.rst new file mode 100644 index 0000000..4bfb35a --- /dev/null +++ b/docs/source/sqlalchemy_mate/patterns/large_binary_column/local.rst @@ -0,0 +1,5 @@ +local +===== + +.. automodule:: sqlalchemy_mate.patterns.large_binary_column.local + :members: \ No newline at end of file diff --git a/docs/source/sqlalchemy_mate/patterns/large_binary_column/local_api.rst b/docs/source/sqlalchemy_mate/patterns/large_binary_column/local_api.rst new file mode 100644 index 0000000..f42d12a --- /dev/null +++ b/docs/source/sqlalchemy_mate/patterns/large_binary_column/local_api.rst @@ -0,0 +1,5 @@ +local_api +========= + +.. automodule:: sqlalchemy_mate.patterns.large_binary_column.local_api + :members: \ No newline at end of file diff --git a/requirements-test.txt b/requirements-test.txt index 79f6b5c..bdf4eec 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -5,4 +5,7 @@ pytest-cov # coverage test pg8000 attrs superjson -pandas +pandas>=2.0.0,<3.0.0 +moto>=4.1.12,<5.0.0 +boto_session_manager>=1.7.2,<2.0.0 +s3pathlib>=2.1.2,<3.0.0 diff --git a/sqlalchemy_mate/_version.py b/sqlalchemy_mate/_version.py index 80cb21c..18929f3 100644 --- a/sqlalchemy_mate/_version.py +++ b/sqlalchemy_mate/_version.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -__version__ = "2.0.0.1" +__version__ = "2.0.0.2" if __name__ == "__main__": print(__version__) diff --git a/sqlalchemy_mate/crud/selecting.py b/sqlalchemy_mate/crud/selecting.py index 48fe3b2..6cde99d 100644 --- a/sqlalchemy_mate/crud/selecting.py +++ b/sqlalchemy_mate/crud/selecting.py @@ -76,9 +76,8 @@ def by_pk( else: if len(table.primary_key) != 1: raise ValueError - return connection.execute( - sa.select(table).where(list(table.primary_key)[0] == id_) - ).fetchone() + stmt = sa.select(table).where(list(table.primary_key)[0] == id_) + return connection.execute(stmt).fetchone() def select_all( diff --git a/sqlalchemy_mate/orm/extended_declarative_base.py b/sqlalchemy_mate/orm/extended_declarative_base.py index 0dc26c9..1516bf5 100644 --- a/sqlalchemy_mate/orm/extended_declarative_base.py +++ b/sqlalchemy_mate/orm/extended_declarative_base.py @@ -12,7 +12,7 @@ from sqlalchemy import inspect, func, text, select, update, Column from sqlalchemy.sql.expression import TextClause from sqlalchemy.engine import Engine -from sqlalchemy.orm import declarative_base, Session, InstrumentedAttribute +from sqlalchemy.orm import declarative_base, Session, InstrumentedAttribute, MappedColumn from sqlalchemy.exc import IntegrityError from sqlalchemy.orm.exc import FlushError @@ -225,6 +225,8 @@ def _major_attrs(cls): for item in cls._settings_major_attrs: if isinstance(item, Column): l.append(item.name) + elif isinstance(item, MappedColumn): + l.append(item.name) elif isinstance(item, str): l.append(item) else: # pragma: no cover diff --git a/sqlalchemy_mate/patterns/api.py b/sqlalchemy_mate/patterns/api.py index 122438b..19e1faa 100644 --- a/sqlalchemy_mate/patterns/api.py +++ b/sqlalchemy_mate/patterns/api.py @@ -1,3 +1,4 @@ # -*- coding: utf-8 -*- from .status_tracker import api as status_tracker +from .large_binary_column import api as large_binary_column diff --git a/sqlalchemy_mate/patterns/large_binary_column/__init__.py b/sqlalchemy_mate/patterns/large_binary_column/__init__.py new file mode 100644 index 0000000..ed99a3b --- /dev/null +++ b/sqlalchemy_mate/patterns/large_binary_column/__init__.py @@ -0,0 +1,19 @@ +# -*- coding: utf-8 -*- + +""" +The large binary column pattern is a technique designed to optimize +storage and performance when dealing with sizable binary data, +typically exceeding 1KB. Instead of directly storing the binary data in the +relational database, this approach involves saving the data in +a dedicated storage layer and keeping only a reference to its location, +in the form of a unique resource identifier (URI), within the database. +By adopting this pattern, valuable database disk space and I/O resources +can be conserved. + +This module provides an implementation of the large binary column pattern, +offering flexibility in terms of storage backends. Users have the option to +utilize various storage solutions, such as the file system or Amazon S3, + depending on their specific requirements. Furthermore, the module is designed + with extensibility in mind, allowing users to implement additional + storage backends to suit their needs. +""" diff --git a/sqlalchemy_mate/patterns/large_binary_column/api.py b/sqlalchemy_mate/patterns/large_binary_column/api.py new file mode 100644 index 0000000..8289542 --- /dev/null +++ b/sqlalchemy_mate/patterns/large_binary_column/api.py @@ -0,0 +1,4 @@ +# -*- coding: utf-8 -*- + +from . import local_api as local +from . import aws_s3_api as aws_s3 diff --git a/sqlalchemy_mate/patterns/large_binary_column/aws_s3.py b/sqlalchemy_mate/patterns/large_binary_column/aws_s3.py new file mode 100644 index 0000000..aacbeae --- /dev/null +++ b/sqlalchemy_mate/patterns/large_binary_column/aws_s3.py @@ -0,0 +1,368 @@ +# -*- coding: utf-8 -*- + +""" +Use Amazon S3 as the storage backend. +""" + +import typing as T +import dataclasses +from datetime import datetime + +import botocore.exceptions +from ...vendor.iterable import group_by + +from .helpers import get_md5, encode_pk, T_PK, execute_write + +if T.TYPE_CHECKING: # pragma: no cover + from mypy_boto3_s3.client import S3Client + + +# ------------------------------------------------------------------------------ +# Helpers +# ------------------------------------------------------------------------------ +def split_s3_uri(s3_uri: str) -> T.Tuple[str, str]: + """ + Split AWS S3 URI, returns bucket and key. + """ + parts = s3_uri.split("/") + bucket = parts[2] + key = "/".join(parts[3:]) + return bucket, key + + +def join_s3_uri(bucket: str, key: str) -> str: + """ + Join AWS S3 URI from bucket and key. + """ + return "s3://{}/{}".format(bucket, key) + + +def is_s3_object_exists( + s3_client: "S3Client", + bucket: str, + key: str, +) -> bool: # pragma: no cover + try: + s3_client.head_object(Bucket=bucket, Key=key) + return True + except botocore.exceptions.ClientError as e: + if e.response["Error"]["Code"] == "404": + return False + else: # pragma: no cover + raise e + except Exception as e: # pragma: no cover + raise e + + +def batch_delete_s3_objects( + s3_client: "S3Client", + s3_uri_list: T.List[str], +): + """ + Batch delete many S3 objects. If they share the same bucket, then use + the ``s3_client.delete_objects`` method. If they do not share the same bucket, + then use ``s3_client.delete_object`` method. + + :param s3_client: ``boto3.client("s3")`` object. + :param s3_uri_list: example: ["s3://bucket/key1", "s3://bucket/key2"]. + """ + buckets = list() + keys = list() + pairs = list() + for s3_uri in s3_uri_list: + bucket, key = split_s3_uri(s3_uri) + pairs.append((bucket, key)) + + buckets.append(bucket) + keys.append(key) + + groups = group_by(pairs, get_key=lambda x: x[0]) + for bucket, bucket_key_pairs in groups.items(): + s3_client.delete_objects( + Bucket=bucket, + Delete=dict(Objects=[dict(Key=key) for _, key in bucket_key_pairs]), + ) + + +def normalize_s3_prefix(prefix: str) -> str: + if prefix.startswith("/"): # pragma: no cover + prefix = prefix[1:] + elif prefix.endswith("/"): + prefix = prefix[:-1] + return prefix + + +def get_s3_key( + pk: str, + column: str, + binary: bytes, + prefix: str, +) -> str: + """ + todo + """ + prefix = normalize_s3_prefix(prefix) + md5 = get_md5(binary) + return f"{prefix}/{pk}/col={column}/md5={md5}" + + +# ------------------------------------------------------------------------------ +# Low level API +# ------------------------------------------------------------------------------ +@dataclasses.dataclass +class PutS3BackedColumnResult: + """ + The returned object of :func:`put_s3backed_column`. + + :param column: which column is about to be created/updated. + :param old_s3_uri: the old S3 URI, if it is a "create", then it is None. + :param new_s3_uri: + :param executed: + :param cleanup_function: + :param cleanup_old_kwargs: + :param cleanup_new_kwargs: + """ + + # fmt: off + column: str = dataclasses.field() + old_s3_uri: str = dataclasses.field() + new_s3_uri: str = dataclasses.field() + executed: bool = dataclasses.field() + cleanup_function: T.Callable = dataclasses.field() + cleanup_old_kwargs: T.Optional[T.Dict[str, T.Any]] = dataclasses.field(default=None) + cleanup_new_kwargs: T.Optional[T.Dict[str, T.Any]] = dataclasses.field(default=None) + # fmt: on + + +def put_s3backed_column( + column: str, + binary: bytes, + old_s3_uri: T.Optional[str], + s3_client: "S3Client", + pk: T_PK, + bucket: str, + prefix: str, + update_at: datetime, + is_pk_url_safe: bool = False, + extra_put_object_kwargs: T.Optional[T.Dict[str, T.Any]] = None, +) -> PutS3BackedColumnResult: + """ + Put the binary data of a column to S3. + + :param column: which column is about to be created/updated. + :param binary: the binary data of the column to be written to S3. + :param old_s3_uri: if it is a "create row", then it is None. + if it is a "update row", then it is the old value of the column (could be None). + :param s3_client: ``boto3.client("s3")`` object. + :param pk: the primary key of the row. It is used to generate the S3 key. + it could be a single value or a tuple of values when primary key is a + compound key. + :param bucket: the S3 bucket name where you store the binary data. + :param prefix: the common prefix of the S3 key. the prefix and the pk together + will form the S3 key. + :param update_at: logical timestamp of the "create row" or "update row", + it will be written to the S3 object's metadata. + :param is_pk_url_safe: whether the primary key is URL safe or not. If the + primary key has special character, then you should set it to True. + Set to False only if you are sure that the primary key is URL safe. + :param extra_put_object_kwargs: additional custom keyword arguments for + ``s3_client.put_object()`` API. + """ + # check_exists_function + url_safe_pk = encode_pk(pk=pk, is_pk_url_safe=is_pk_url_safe, delimiter="/") + s3_key = get_s3_key(pk=url_safe_pk, column=column, binary=binary, prefix=prefix) + new_s3_uri = join_s3_uri(bucket=bucket, key=s3_key) + check_exists_function = is_s3_object_exists + check_exists_kwargs = dict( + s3_client=s3_client, + bucket=bucket, + key=s3_key, + ) + # write_function + if extra_put_object_kwargs is None: # pragma: no cover + extra_put_object_kwargs = dict() + metadata = {"update_at": update_at.isoformat()} + try: + metadata.update(extra_put_object_kwargs.pop("Metadata")) + except KeyError: # pragma: no cover + pass + write_function = s3_client.put_object + write_kwargs = dict( + Bucket=bucket, + Key=s3_key, + Body=binary, + Metadata=metadata, + **extra_put_object_kwargs, + ) + executed = execute_write( + write_function=write_function, + write_kwargs=write_kwargs, + check_exists_function=check_exists_function, + check_exists_kwargs=check_exists_kwargs, + ) + # cleanup_function + cleanup_function = s3_client.delete_object + if old_s3_uri: + _bucket, _s3_key = split_s3_uri(old_s3_uri) + cleanup_old_kwargs = dict(Bucket=_bucket, Key=_s3_key) + else: + cleanup_old_kwargs = None + cleanup_new_kwargs = dict(Bucket=bucket, Key=s3_key) + return PutS3BackedColumnResult( + column=column, + old_s3_uri=old_s3_uri, + new_s3_uri=new_s3_uri, + executed=executed, + cleanup_function=cleanup_function, + cleanup_old_kwargs=cleanup_old_kwargs, + cleanup_new_kwargs=cleanup_new_kwargs, + ) + + +def clean_up_created_s3_object_when_create_or_update_row_failed( + s3_client: "S3Client", + new_s3_uri: str, + executed: bool, +): # pragma: no cover + """ + After ``s3_client.put_object()``, we need to create / update the row. + If the create / update row failed, we may need to clean up the created S3 object. + to ensure data consistency between S3 and database. + + :param s3_client: ``boto3.client("s3")`` object. + :param new_s3_uri: the new S3 URI. + :param executed: whether the ``s3_client.put_object()`` is executed. + """ + if executed: + bucket, key = split_s3_uri(new_s3_uri) + s3_client.delete_object(Bucket=bucket, Key=key) + + +def clean_up_old_s3_object_when_update_row_succeeded( + s3_client: "S3Client", + old_s3_uri: T.Optional[str], + executed: bool, +): # pragma: no cover + """ + Let's say after ``s3_client.put_object()``, we need to update the row. + If the update row failed, we may need to clean up the old S3 object. + + :param s3_client: ``boto3.client("s3")`` object. + :param old_s3_uri: the old S3 URI. + :param executed: whether the ``s3_client.put_object()`` is executed. + """ + if executed: + if old_s3_uri: + bucket, key = split_s3_uri(old_s3_uri) + s3_client.delete_object(Bucket=bucket, Key=key) + + +# ------------------------------------------------------------------------------ +# High Level API +# ------------------------------------------------------------------------------ +@dataclasses.dataclass +class PutS3ApiCall: + """ + A data container of the arguments that will be used in ``s3_client.put_object()``. + + :param column: which column is about to be created/updated. + :param binary: the binary data of the column to be written to S3. + :param old_s3_uri: if it is a "create row", then it is None. + if it is a "update row", then it is the old value of the column (could be None). + :param extra_put_object_kwargs: additional custom keyword arguments for + ``s3_client.put_object()`` API. + """ + + # fmt: off + column: str = dataclasses.field() + binary: bytes = dataclasses.field() + old_s3_uri: T.Optional[str] = dataclasses.field() + extra_put_object_kwargs: T.Optional[T.Dict[str, T.Any]] = dataclasses.field(default_factory=dict) + # fmt: on + + +@dataclasses.dataclass +class PutS3Result: + """ + The returned object of :func:`put_s3_result`. + """ + + s3_client: "S3Client" = dataclasses.field() + put_s3backed_column_results: T.List[PutS3BackedColumnResult] = dataclasses.field() + + def to_values(self) -> T.Dict[str, str]: + """ + Return a dictionary of column name and S3 uri that can be used in the + SQL ``UPDATE ... VALUES ...`` statement. The key is the column name, + and the value is the S3 URI. + """ + return {res.column: res.new_s3_uri for res in self.put_s3backed_column_results} + + def clean_up_created_s3_object_when_create_or_update_row_failed(self): + """ + A wrapper of :func:`clean_up_created_s3_object_when_create_or_update_row_failed`. + """ + s3_uri_list = list() + for res in self.put_s3backed_column_results: + if res.executed: + s3_uri_list.append(res.new_s3_uri) + batch_delete_s3_objects(s3_client=self.s3_client, s3_uri_list=s3_uri_list) + + def clean_up_old_s3_object_when_update_row_succeeded(self): + """ + A wrapper of :func:`clean_up_old_s3_object_when_update_row_succeeded`. + """ + s3_uri_list = list() + for res in self.put_s3backed_column_results: + if res.executed: + if res.old_s3_uri: + s3_uri_list.append(res.old_s3_uri) + batch_delete_s3_objects(s3_client=self.s3_client, s3_uri_list=s3_uri_list) + + +def put_s3( + api_calls: T.List[PutS3ApiCall], + s3_client: "S3Client", + pk: T_PK, + bucket: str, + prefix: str, + update_at: datetime, + is_pk_url_safe: bool = False, +): + """ + Put the binary data of a column to S3. + + :param api_calls: a list of :class:`PutS3ApiCall` objects. It defines how to + put the binary data of multiple columns to S3. + :param s3_client: ``boto3.client("s3")`` object. + :param pk: the primary key of the row. It is used to generate the S3 key. + it could be a single value or a tuple of values when primary key is a + compound key. + :param bucket: the S3 bucket name where you store the binary data. + :param prefix: the common prefix of the S3 key. the prefix and the pk together + will form the S3 key. + :param update_at: logical timestamp of the "create row" or "update row", + it will be written to the S3 object's metadata. + :param is_pk_url_safe: whether the primary key is URL safe or not. If the + primary key has special character, then you should set it to True. + Set to False only if you are sure that the primary key is URL safe. + """ + put_s3backed_column_results = list() + for api_call in api_calls: + put_s3backed_column_result = put_s3backed_column( + column=api_call.column, + binary=api_call.binary, + old_s3_uri=api_call.old_s3_uri, + s3_client=s3_client, + pk=pk, + bucket=bucket, + prefix=prefix, + update_at=update_at, + is_pk_url_safe=is_pk_url_safe, + extra_put_object_kwargs=api_call.extra_put_object_kwargs, + ) + put_s3backed_column_results.append(put_s3backed_column_result) + return PutS3Result( + s3_client=s3_client, + put_s3backed_column_results=put_s3backed_column_results, + ) diff --git a/sqlalchemy_mate/patterns/large_binary_column/aws_s3_api.py b/sqlalchemy_mate/patterns/large_binary_column/aws_s3_api.py new file mode 100644 index 0000000..a300411 --- /dev/null +++ b/sqlalchemy_mate/patterns/large_binary_column/aws_s3_api.py @@ -0,0 +1,9 @@ +# -*- coding: utf-8 -*- + +from .aws_s3 import PutS3BackedColumnResult +from .aws_s3 import put_s3backed_column +from .aws_s3 import clean_up_created_s3_object_when_create_or_update_row_failed +from .aws_s3 import clean_up_old_s3_object_when_update_row_succeeded +from .aws_s3 import PutS3ApiCall +from .aws_s3 import PutS3Result +from .aws_s3 import put_s3 diff --git a/sqlalchemy_mate/patterns/large_binary_column/helpers.py b/sqlalchemy_mate/patterns/large_binary_column/helpers.py new file mode 100644 index 0000000..b2d4cb5 --- /dev/null +++ b/sqlalchemy_mate/patterns/large_binary_column/helpers.py @@ -0,0 +1,68 @@ +# -*- coding: utf-8 -*- + +import typing as T +import base64 +import hashlib + + +def get_md5(b: bytes) -> str: + return hashlib.md5(b).hexdigest() + + +def get_sha256(b: bytes) -> str: + return hashlib.sha256(b).hexdigest() + + +def b64encode_str(s: str) -> str: + return base64.urlsafe_b64encode(s.encode("utf-8")).decode("utf-8") + + +def b64decode_str(s: str) -> str: + return base64.urlsafe_b64decode(s.encode("utf-8")).decode("utf-8") + + +T_SINGLE_PK = T.Union[str, int] +T_PK = T.Union[T_SINGLE_PK, T.Iterable[T_SINGLE_PK]] + + +def encode_pk( + pk: T_PK, + is_pk_url_safe: bool, + delimiter: str = "/", +) -> str: + """ + :param pk: primary key of the row. It could be a single value or a list of values + (when pk is compound). + :param is_pk_url_safe: whether the primary key is URL safe. If it's not, you need to + encode it with b64encode. + :param delimiter: the delimiter to join the primary key values. + """ + if is_pk_url_safe: + if isinstance(pk, str): + return pk + else: + return delimiter.join(pk) + else: + if isinstance(pk, str): + return b64encode_str(pk) + else: + return delimiter.join(b64encode_str(p) for p in pk) + + +def execute_write( + write_function: T.Callable, + write_kwargs: T.Dict[str, T.Any], + check_exists_function: T.Optional[T.Callable] = None, + check_exists_kwargs: T.Optional[T.Dict[str, T.Any]] = None, +) -> bool: + """ + :return: a boolean flag indicating whether the write operation is executed. + """ + if check_exists_function is None: + exists = False + else: + exists = check_exists_function(**check_exists_kwargs) + if exists is False: + write_function(**write_kwargs) + executed = not exists + return executed diff --git a/sqlalchemy_mate/patterns/large_binary_column/local.py b/sqlalchemy_mate/patterns/large_binary_column/local.py new file mode 100644 index 0000000..dbe7712 --- /dev/null +++ b/sqlalchemy_mate/patterns/large_binary_column/local.py @@ -0,0 +1,190 @@ +# -*- coding: utf-8 -*- + +""" +Use local file system as the storage backend. +""" + +import typing as T +import os +import dataclasses +from pathlib import Path + +from .helpers import get_md5, encode_pk, T_PK, execute_write + + +# ------------------------------------------------------------------------------ +# Helpers +# ------------------------------------------------------------------------------ +def get_path( + pk: str, + column: str, + binary: bytes, + dir_root: Path, +) -> Path: + """ + todo + """ + md5 = get_md5(binary) + return dir_root.joinpath(pk, f"col={column}", f"md5={md5}") + + +if os.name == "nt": + path_sep = "\\" +elif os.name == "posix": + path_sep = "/" +else: # pragma: no cover + raise NotImplementedError + + +# ------------------------------------------------------------------------------ +# Low level API +# ------------------------------------------------------------------------------ +@dataclasses.dataclass +class WriteFileBackedColumnResult: + # fmt: off + column: str = dataclasses.field() + old_path: Path = dataclasses.field() + new_path: Path = dataclasses.field() + executed: bool = dataclasses.field() + cleanup_function: T.Callable = dataclasses.field() + cleanup_old_kwargs: T.Optional[T.Dict[str, T.Any]] = dataclasses.field(default=None) + cleanup_new_kwargs: T.Optional[T.Dict[str, T.Any]] = dataclasses.field(default=None) + # fmt: on + + +def write_binary( + path: Path, + binary: bytes, +): + try: + path.write_bytes(binary), + except FileNotFoundError: + path.parent.mkdir(parents=True, exist_ok=True) + path.write_bytes(binary) + + +def write_file_backed_column( + column: str, + binary: bytes, + old_path: T.Optional[Path], + pk: T_PK, + dir_root: Path, + is_pk_path_safe: bool = False, + extra_write_kwargs: T.Optional[T.Dict[str, T.Any]] = None, +) -> WriteFileBackedColumnResult: + # check_exists_function + url_safe_pk = encode_pk(pk=pk, is_pk_url_safe=is_pk_path_safe, delimiter=path_sep) + new_path = get_path(pk=url_safe_pk, column=column, binary=binary, dir_root=dir_root) + check_exists_function = new_path.exists + check_exists_kwargs = dict() + # write_function + if extra_write_kwargs is None: + extra_write_kwargs = dict() + write_function = write_binary + write_kwargs = dict(path=new_path, binary=binary, **extra_write_kwargs) + executed = execute_write( + write_function=write_function, + write_kwargs=write_kwargs, + check_exists_function=check_exists_function, + check_exists_kwargs=check_exists_kwargs, + ) + # cleanup_function + cleanup_function = new_path.unlink + if old_path: + cleanup_old_kwargs = dict() + else: + cleanup_old_kwargs = None + cleanup_new_kwargs = dict() + return WriteFileBackedColumnResult( + column=column, + old_path=old_path, + new_path=new_path, + executed=executed, + cleanup_function=cleanup_function, + cleanup_old_kwargs=cleanup_old_kwargs, + cleanup_new_kwargs=cleanup_new_kwargs, + ) + + +def clean_up_new_file_when_create_or_update_row_failed( + new_path: Path, + executed: bool, +): + """ + todo + """ + if executed: + new_path.unlink() + + +def clean_up_old_file_when_update_row_succeeded( + old_path: T.Optional[Path], + executed: bool, +): + """ + todo + """ + if executed: + if old_path: + old_path.unlink() + + +# ------------------------------------------------------------------------------ +# High Level API +# ------------------------------------------------------------------------------ +@dataclasses.dataclass +class WriteFileApiCall: + # fmt: off + column: str = dataclasses.field() + binary: bytes = dataclasses.field() + old_path: T.Optional[Path] = dataclasses.field() + extra_write_kwargs: T.Optional[T.Dict[str, T.Any]] = dataclasses.field(default_factory=dict) + # fmt: on + + +@dataclasses.dataclass +class WriteFileResult: + write_file_backed_column_results: T.List[WriteFileBackedColumnResult] = ( + dataclasses.field() + ) + + def to_values(self) -> T.Dict[str, str]: + return { + res.column: str(res.new_path) + for res in self.write_file_backed_column_results + } + + def clean_up_new_file_when_create_or_update_row_failed(self): + for res in self.write_file_backed_column_results: + clean_up_new_file_when_create_or_update_row_failed( + new_path=res.new_path, executed=res.executed + ) + + def clean_up_old_file_when_update_row_succeeded(self): + for res in self.write_file_backed_column_results: + clean_up_old_file_when_update_row_succeeded( + old_path=res.old_path, executed=res.executed + ) + + +def write_file( + api_calls: T.List[WriteFileApiCall], + pk: T_PK, + dir_root: Path, + is_pk_path_safe: bool = False, +): + write_file_backed_column_results = list() + for api_call in api_calls: + write_file_backed_column_result = write_file_backed_column( + column=api_call.column, + binary=api_call.binary, + old_path=api_call.old_path, + pk=pk, + dir_root=dir_root, + is_pk_path_safe=is_pk_path_safe, + extra_write_kwargs=api_call.extra_write_kwargs, + ) + write_file_backed_column_results.append(write_file_backed_column_result) + return WriteFileResult( + write_file_backed_column_results=write_file_backed_column_results, + ) diff --git a/sqlalchemy_mate/patterns/large_binary_column/local_api.py b/sqlalchemy_mate/patterns/large_binary_column/local_api.py new file mode 100644 index 0000000..47bfce2 --- /dev/null +++ b/sqlalchemy_mate/patterns/large_binary_column/local_api.py @@ -0,0 +1,9 @@ +# -*- coding: utf-8 -*- + +from .local import WriteFileBackedColumnResult +from .local import write_file_backed_column +from .local import clean_up_new_file_when_create_or_update_row_failed +from .local import clean_up_old_file_when_update_row_succeeded +from .local import WriteFileApiCall +from .local import WriteFileResult +from .local import write_file diff --git a/sqlalchemy_mate/tests/mock_aws.py b/sqlalchemy_mate/tests/mock_aws.py new file mode 100644 index 0000000..aa1afdf --- /dev/null +++ b/sqlalchemy_mate/tests/mock_aws.py @@ -0,0 +1,102 @@ +# -*- coding: utf-8 -*- + +""" +Simplify using moto for unit testing. + +.. versionchanged:: 0.1.2 + + Fix a bug that the ``_mock_list`` class attribute got messed up when having + multiple sub class. Now we use ``_mocked`` class attribute and tracking + mocked service in separate list for each subclass. +""" + +import typing as T +from boto_session_manager import BotoSesManager + +__version__ = "0.1.2" + + +class BaseMockTest: + """ + Simple base class for mocking AWS services. + + Usage:: + + import moto + + class Test(BaseMockTest): + mock_list = [ + moto.mock_s3, + ] + + @classmethod + def setup_class_post_hook(cls): + cls.bsm.s3_client.create_bucket(Bucket="my-bucket") + cls.bsm.s3_client.put_object( + Bucket="my-bucket", + Key="file.txt", + Body="hello world", + ) + + def test(self): + assert ( + self.bsm.s3_client.get_object(Bucket="my-bucket", Key="file.txt")["Body"] + .read() + .decode("utf-8") + == "hello world" + ) + + """ + + use_mock: bool = True + region_name: str = "us-east-1" + mock_list: list = [] + + # Don't overwrite the following + bsm: T.Optional[BotoSesManager] = None + _mocked: T.Dict[T.Any, list] = dict() + + @classmethod + def setup_moto(cls): + if cls.use_mock: + cls._mocked[cls] = [] + for mock_abc in cls.mock_list: + mocker = mock_abc() + mocker.start() + cls._mocked[cls].append(mocker) + cls.bsm = BotoSesManager(region_name=cls.region_name) + + @classmethod + def teardown_moto(cls): + if cls.use_mock: + for mocker in cls._mocked[cls]: + mocker.stop() + cls.bsm = None + + @classmethod + def setup_class_pre_hook(cls): # pragma: no cover + pass + + @classmethod + def setup_class_post_hook(cls): # pragma: no cover + pass + + @classmethod + def setup_class(cls): + cls.setup_class_pre_hook() + cls.setup_moto() + cls.setup_class_post_hook() + + @classmethod + def teardown_class_pre_hook(cls): # pragma: no cover + pass + + @classmethod + def teardown_class_post_hook(cls): # pragma: no cover + pass + + @classmethod + def teardown_class(cls): + cls.teardown_class_pre_hook() + cls.teardown_moto() + cls.teardown_class_post_hook() diff --git a/sqlalchemy_mate/vendor/iterable.py b/sqlalchemy_mate/vendor/iterable.py new file mode 100644 index 0000000..a5847b6 --- /dev/null +++ b/sqlalchemy_mate/vendor/iterable.py @@ -0,0 +1,552 @@ +# -*- coding: utf-8 -*- + +""" +This module provides plenty of useful functions for iterable object manipulation. +""" + +import typing as T +import random +import collections +import itertools + +__version__ = "0.1.1" + +def flatten(iterable: T.Iterable) -> T.Iterable: + """ + Flatten one layer of nesting. + + Example:: + + >>> list(flatten([[0, 1], [2, 3]]) + [0, 1, 2, 3] + + >>> list(flatten(["ab", "cd"]) + ["a", "b", "c", "d"] + + **中文文档** + + 将二维列表压平成一维列表. + """ + return itertools.chain.from_iterable(iterable) + + +def flatten_all(nested_iterable: T.Iterable) -> T.Iterable: + """Flatten arbitrary depth of nesting. Good for unknown nesting structure + iterable object. + + Example:: + + >>> list(flatten_all([[1, 2], "abc", [3, ["x", "y", "z"]], 4])) + [1, 2, "abc", 3, "x", "y", "z", 4] + + **中文文档** + + 将任意维度的列表压平成一维列表. + + 注: 使用 ``hasattr(i, "__iter__")`` 方法做是否是可循环对象的判断, 性能要高于其他 + 任何方法, 例如: ``isinstance(i, collections.Iterable)``. + """ + for item in nested_iterable: + if hasattr(item, "__iter__") and not isinstance(item, str): + for i in flatten_all(item): + yield i + else: + yield item + + +def nth(iterable: T.Iterable, n: int, default=None): + """ + Returns the nth item or a default value. + + Example:: + + >>> nth([0, 1, 2], 1) + 1 + + >>> nth([0, 1, 2], 100) + None + + **中文文档** + + 取出一个可循环对象中的第 n 个元素。等效于 ``list(iterable)[n]``, 但占用极小的内存. + 因为 ``list(iterable)`` 要将所有元素放在内存中并生成一个新列表. 该方法常用于 + 那些无法做取 index 操作的可循环对象. + """ + return next(itertools.islice(iterable, n, None), default) + + +def take(iterable: T.Iterable, n: int) -> T.Iterable: + """ + Return first n items of the iterable as a list. + + Example:: + + >>> take([0, 1, 2], 2) + [0, 1] + + **中文文档** + + 取出可循环对象中的前 n 个元素. 等效于 ``list(iterable)[:n]``, 但占用极小的内存. + 因为 ``list(iterable)`` 要将所有元素放在内存中并生成一个新列表. 该方法常用于 + 那些无法做取 index 操作的可循环对象. + """ + return list(itertools.islice(iterable, n)) + + +def pull(iterable: T.Iterable, n: int) -> list: + """Return last n items of the iterable as a list. + + Example:: + + >>> pull([0, 1, 2], 3) + [1, 2] + + **中文文档** + + 取出可循环对象中的最后 n 个元素. 等效于 ``list(iterable)[-n:]``, 但占用极小的内存. + 因为 ``list(iterable)`` 要将所有元素放在内存中并生成一个新列表. 该方法常用于 + 那些无法做取 index 操作的可循环对象. + """ + fifo = collections.deque(maxlen=n) + for i in iterable: + fifo.append(i) + return list(fifo) + + +def shuffled(lst: list) -> list: + """Returns the shuffled iterable. + + Example:: + + >>> shuffled([0, 1, 2]) + [2, 0, 1] + + **中文文档** + + 打乱一个可循环对象中所有元素的顺序. 并打包成列表返回. + """ + return random.sample(lst, len(lst)) + + +def grouper(iterable: T.Iterable, n: int, fillvalue=None): + """Collect data into fixed-length chunks or blocks. + + Example:: + + >>> list(grouper(range(10), n=3, fillvalue=None)) + [(0, 1, 2), (3, 4, 5), (6, 7, 8), (9, None, None)] + + **中文文档** + + 将一个序列按照尺寸 n, 依次打包输出, 如果元素不够 n 的包, 则用 ``fillvalue`` 中的值填充. + """ + args = [iter(iterable)] * n + return itertools.zip_longest(*args, fillvalue=fillvalue) + + +def grouper_list(iterable: T.Iterable, n: int) -> T.Iterable[list]: + """Evenly divide list into fixed-length piece, no filled value if chunk + size smaller than fixed-length. + + Example:: + + >>> list(grouper(range(10), n=3) + [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]] + + **中文文档** + + 将一个列表按照尺寸 n, 依次打包输出, 有多少输出多少, 并不强制填充包的大小到 n. + + 下列实现是按照性能从高到低进行排列的: + + - 方法1: 建立一个 counter, 在向 chunk 中添加元素时, 同时将 counter 与 n 比较, + 如果一致则 yield. 然后在最后将剩余的 item 视情况 yield. + - 方法2: 建立一个 list, 每次添加一个元素, 并检查 size. (显然性能较差) + - 方法3: 调用 grouper() 函数, 然后对里面的 None 元素进行清理. + """ + chunk = list() + counter = 0 + for item in iterable: + counter += 1 + chunk.append(item) + if counter == n: + yield chunk + chunk = list() + counter = 0 + if len(chunk) > 0: + yield chunk + + +def grouper_dict(dct: dict, n: int, dict_type=dict) -> T.Iterable[dict]: + """ + Evenly divide dictionary into fixed-length piece, no filled value if + chunk size smaller than fixed-length. Notice: dict is unordered in python, + this method suits better for collections.OrdereDict. + + Example:: + >>> d = {"a": 1, "b": 2, "c": 3, "d": 4, "e": 5} + >>> list(grouper_dict(d, 2) + [{"a": 1, "b": 2}, {"c": 3, "d": 4}, {"e": 5}] + + **中文文档** + + 将一个字典按照尺寸 n, 依次打包输出, 有多少输出多少, 并不强制填充包的大小到 n. + """ + chunk = dict_type() + counter = 0 + for k, v in dct.items(): + counter += 1 + chunk[k] = v + if counter == n: + yield chunk + chunk = dict_type() + counter = 0 + if len(chunk) > 0: + yield chunk + + +# --- Window --- +def running_window(lst: list, size: int): + """ + Generate n-size running window. + + Example:: + + >>> for i in running_window([1, 2, 3, 4, 5], size=3): + ... print(i) + [1, 2, 3] + [2, 3, 4] + [3, 4, 5] + + **中文文档** + + 简单滑窗函数. + """ + if size > len(lst): + raise ValueError("size can not be greater than length of iterable.") + + fifo = collections.deque(maxlen=size) + for i in lst: + fifo.append(i) + if len(fifo) == size: + yield list(fifo) + + +def cycle_running_window(lst: list, size: int) -> list: + """ + Generate n-size cycle running window. + + Example:: + + >>> for i in cycle_running_window([1, 2, 3, 4, 5], size=3): + ... print(i) + [1, 2, 3] + [2, 3, 4] + [3, 4, 5] + [4, 5, 1] + [5, 1, 2] + + **中文文档** + + 循环位移滑窗函数. + """ + if size > len(lst): + raise ValueError("size can not be greater than length of iterable.") + + fifo = collections.deque(maxlen=size) + cycle = itertools.cycle(lst) + counter = itertools.count(1) + length = len(lst) + for i in cycle: + fifo.append(i) + if len(fifo) == size: + yield list(fifo) + if next(counter) == length: + break + + +# --- Cycle --- +def cycle_slice(sliceable: list, start: int, end: int) -> list: + """ + Given a list, return the right-hand cycle direction slice from start to end. + + Example:: + + >>> array = [0, 1, 2, 3] + >>> cycle_slice(array, 1, 3) # from array[1] to array[3] + [1, 2] + + >>> cycle_slice(array, 3, 1) # from array[3] to array[1] + [3, 0] + """ + length = len(sliceable) + + if length == 0: + raise ValueError("sliceable cannot be empty!") + start = start % length + end = end % length + + if end > start: + return sliceable[start:end] + elif end <= start: + return sliceable[start:] + sliceable[:end] + + +def cycle_dist( + x: T.Union[int, float], + y: T.Union[int, float], + perimeter: T.Union[int, float], +) -> T.Union[int, float]: + """Find Distance between x, y by means of a n-length cycle. + + :param x: + :param y: + :param perimeter: + + Example: + + >>> cycle_dist(1, 23, 24) + 2 + + >>> cycle_dist(5, 13, 24) + 8 + + >>> cycle_dist(0.0, 2.4, 1.0) + 0.4 + + >>> cycle_dist(0.0, 2.6, 1.0) + 0.4 + + **中文文档** + + 假设坐标轴是一个环, 计算两点之间在环上的最短距离. + """ + dist = abs(x - y) % perimeter + if dist > 0.5 * perimeter: + dist = perimeter - dist + return dist + + +# --- Shift --- +def cyclic_shift(array: list, shift: int) -> list: + """ + + :params array: list like iterable object + :params shift: number of movement + + Example:: + + >>> cyclic_shift([0, 1, 2], 1) + [2, 0, 1] + + >>> cyclic_shift([0, 1, 2], -1) + [1, 2, 0] + + **中文文档** + + 循环位移函数. + """ + shift = shift % len(array) + return array[-shift:] + array[:-shift] + + +def shift_and_trim(array: list, shift: int) -> list: + """ + Shift and trim unneeded item. + + :params array: list like iterable object + :params shift: number of movement + + Example:: + + >>> array = [0, 1, 2] + + >>> shift_and_trim(array, 0) + [0, 1, 2] + + >>> shift_and_trim(array, 1) + [0, 1] + + >>> shift_and_trim(array, -1) + [1, 2] + + >>> shift_and_trim(array, 3) + [] + + >>> shift_and_trim(array, -3) + [] + """ + length = len(array) + if length == 0: + return [] + + if (shift >= length) or (shift <= -length): + return [] + elif shift < 0: + return array[-shift:] + elif shift > 0: + return array[:-shift] + else: + return list(array) + + +def shift_and_pad(array: list, shift: int, pad: T.Any = "__null__") -> list: + """ + Shift and pad with item. + + :params array: list like iterable object + :params shift: number of movement + :params pad: any value + + Example:: + + >>> array = [0, 1, 2] + + >>> shift_and_pad(array, 0) + [0, 1, 2] + + >>> shift_and_pad(array, 1) + [0, 0, 1] + + >>> shift_and_pad(array, -1) + [1, 2, 2] + + >>> shift_and_pad(array, 3) + [0, 0, 0] + + >>> shift_and_pad(array, -3) + [2, 2, 2] + + >>> shift_and_pad(array, -1, None) + [None, 0, 1] + """ + length = len(array) + if length == 0: + return [] + + if pad == "__null__": + if shift > 0: + padding_item = array[0] + elif shift < 0: + padding_item = array[-1] + else: + padding_item = None + else: + padding_item = pad + + if abs(shift) >= length: + return length * [ + padding_item, + ] + elif shift == 0: + return list(array) + elif shift > 0: + return [ + padding_item, + ] * shift + array[:-shift] + elif shift < 0: + return ( + array[-shift:] + + [ + padding_item, + ] + * -shift + ) + else: # pragma: no cover + raise NotImplementedError + + +def size_of_generator(generator: T.Iterable, memory_efficient=True) -> int: + """Get number of items in a generator function. + + - memory_efficient = True, 3 times slower, but memory_efficient. + - memory_efficient = False, faster, but cost more memory. + + **中文文档** + + 计算一个生成器函数中的元素的个数。使用memory_efficient=True的方法可以避免将生成器中的 + 所有元素放入内存, 但是速度稍慢于memory_efficient=False的方法。 + """ + if memory_efficient: + counter = 0 + for _ in generator: + counter += 1 + return counter + else: + return len(list(generator)) + + +def difference(array: list, k: int = 1) -> list: + """ + 简单差分函数. + + Example:: + + >>> difference([0, 1, 3, 6, 10], 0) + [0, 0, 0, 0, 0] + + >>> difference([0, 1, 3, 6, 10], 1) + [1, 2, 3, 4] + + >>> difference([0, 1, 3, 6, 10], 2) + [3, 5, 7] + """ + if (len(array) - k) < 1: + raise ValueError() + if k < 0: + raise ValueError("k has to be greater or equal than zero!") + elif k == 0: + return [i - i for i in array] + else: + return [j - i for i, j in zip(array[:-k], array[k:])] + + +KT = T.TypeVar("KT") +VT = T.TypeVar("VT") + +def group_by( + iterable: T.Iterable[VT], + get_key: T.Callable[[VT], KT], +) -> T.Dict[KT, T.List[VT]]: + """ + Group items by it's key, with type hint. + + Example:: + + >>> class Record: + ... def __init__(self, product: str, date: str, sale: int): + ... self.product = product + ... self.date = date + ... self.sale = sale + + >>> records = [ + ... Record("apple", "2020-01-01", 10), + ... Record("apple", "2020-01-02", 20), + ... Record("apple", "2020-01-03", 30), + ... Record("banana", "2020-01-01", 10), + ... Record("banana", "2020-01-02", 20), + ... Record("banana", "2020-01-03", 30), + ... ] + + >>> group_by(records, lambda x: x.product) + { + "apple": [ + Record("apple", "2020-01-01", 10), + Record("apple", "2020-01-02", 20), + Record("apple", "2020-01-03", 30), + ], + "banana": [ + Record("banana", "2020-01-01", 10), + Record("banana", "2020-01-02", 20), + Record("banana", "2020-01-03", 30), + ], + } + """ + grouped = dict() + for item in iterable: + key = get_key(item) + try: + grouped[key].append(item) + except KeyError: + grouped[key] = [item] + return grouped diff --git a/tests/crud/test_crud_inserting.py b/tests/crud/test_crud_inserting.py index db92c61..80743b5 100644 --- a/tests/crud/test_crud_inserting.py +++ b/tests/crud/test_crud_inserting.py @@ -91,7 +91,7 @@ def test_smart_insert(self): # ------ After State ------ assert elapse1 < elapse2 - def _test_smart_insert_single_row(self): + def test_smart_insert_single_row(self): assert count_row(self.engine, t_smart_insert) == 0 data = {"id": 1} diff --git a/tests/patterns/large_binary_column/test_large_binary_column_aws_s3.py b/tests/patterns/large_binary_column/test_large_binary_column_aws_s3.py new file mode 100644 index 0000000..7d192f5 --- /dev/null +++ b/tests/patterns/large_binary_column/test_large_binary_column_aws_s3.py @@ -0,0 +1,333 @@ +# -*- coding: utf-8 -*- + +import pytest + +from datetime import datetime + +import moto +from s3pathlib import S3Path, context + +import sqlalchemy as sa +import sqlalchemy.orm as orm +from sqlalchemy_mate.patterns.large_binary_column import aws_s3_api as aws_s3 +from sqlalchemy_mate.tests.mock_aws import BaseMockTest +from sqlalchemy_mate.tests.api import ( + IS_WINDOWS, + engine_sqlite, + engine_psql, +) + + +def get_utc_now() -> datetime: + return datetime.utcnow() + + +Base = orm.declarative_base() + + +class Task(Base): + __tablename__ = "tasks" + + url = orm.mapped_column(sa.String, primary_key=True) + update_at = orm.mapped_column(sa.DateTime) + html = orm.mapped_column(sa.String, nullable=True) + image = orm.mapped_column(sa.String, nullable=True) + + +bucket: str = "mybucket" +s3dir_root = S3Path( + f"s3://{bucket}/projects/sqlalchemy_mate/patterns/s3backed_column/data/" +).to_dir() + + +class BaseTest(BaseMockTest): + engine: sa.Engine = None + + mock_list = [ + moto.mock_s3, + ] + + @classmethod + def setup_class_post_hook(cls): + Base.metadata.create_all(cls.engine) + context.attach_boto_session(cls.bsm.boto_ses) + cls.bsm.s3_client.create_bucket(Bucket=bucket) + s3dir_root.delete() + + def setup_method(self): + with self.engine.connect() as conn: + conn.execute(Task.__table__.delete()) + conn.commit() + + def test(self): + engine = self.engine + bsm = self.bsm + + url = "https://www.example.com" + html_content_1 = b"this is html 1" + image_content_1 = b"this is image 1" + html_additional_kwargs = dict(ContentType="text/html") + image_additional_kwargs = dict(ContentType="image/jpeg") + utc_now = get_utc_now() + + put_s3_result = aws_s3.put_s3( + api_calls=[ + aws_s3.PutS3ApiCall( + column="html", + binary=html_content_1, + old_s3_uri=None, + extra_put_object_kwargs=html_additional_kwargs, + ), + aws_s3.PutS3ApiCall( + column="image", + binary=image_content_1, + old_s3_uri=None, + extra_put_object_kwargs=image_additional_kwargs, + ), + ], + s3_client=bsm.s3_client, + pk=url, + bucket=s3dir_root.bucket, + prefix=s3dir_root.key, + update_at=utc_now, + is_pk_url_safe=False, + ) + + class UserError(Exception): + pass + + with orm.Session(engine) as ses: + try: + with ses.begin(): + task1 = Task( + url=url, + update_at=utc_now, + # this is a helper method that convert the put s3 results + # to INSERT / UPDATE values + **put_s3_result.to_values(), + ) + # intentionally raises an error to simulate a database failure + raise UserError() + ses.add(task1) + except Exception as e: + # clean up created s3 object when create row failed + # if you don't want to do that, just don't run this method + put_s3_result.clean_up_created_s3_object_when_create_or_update_row_failed() + + assert ses.get(Task, url) is None + values = put_s3_result.to_values() + html_s3_uri = values["html"] + image_s3_uri = values["image"] + assert S3Path(html_s3_uri).exists() is False + assert S3Path(image_s3_uri).exists() is False + + utc_now = get_utc_now() + + put_s3_result = aws_s3.put_s3( + api_calls=[ + aws_s3.PutS3ApiCall( + column="html", + binary=html_content_1, + old_s3_uri=None, + extra_put_object_kwargs=html_additional_kwargs, + ), + aws_s3.PutS3ApiCall( + column="image", + binary=image_content_1, + old_s3_uri=None, + extra_put_object_kwargs=image_additional_kwargs, + ), + ], + s3_client=bsm.s3_client, + pk=url, + bucket=s3dir_root.bucket, + prefix=s3dir_root.key, + update_at=utc_now, + is_pk_url_safe=False, + ) + + with orm.Session(engine) as ses: + try: + with ses.begin(): + task1 = Task( + url=url, + update_at=utc_now, + # this is a helper method that convert the put s3 results + # to INSERT / UPDATE values + **put_s3_result.to_values(), + ) + ses.add(task1) + except Exception as e: + # clean up created s3 object when create row failed + # if you don't want to do that, just don't run this method + put_s3_result.clean_up_created_s3_object_when_create_or_update_row_failed() + + task1: Task = ses.get(Task, url) + assert task1.url == url + assert task1.update_at == utc_now + assert S3Path(task1.html).read_bytes() == html_content_1 + assert S3Path(task1.image).read_bytes() == image_content_1 + + html_content_2 = b"this is html 2" + image_content_2 = b"this is image 2" + utc_now = get_utc_now() + + put_s3_result = aws_s3.put_s3( + api_calls=[ + aws_s3.PutS3ApiCall( + column="html", + binary=html_content_2, + # since this is an updates, you have to specify the old s3 object, + # even it is None. we need this information to clean up old s3 object + # when SQL UPDATE succeeded + old_s3_uri=task1.html, + extra_put_object_kwargs=html_additional_kwargs, + ), + aws_s3.PutS3ApiCall( + column="image", + binary=image_content_2, + # since this is an updates, you have to specify the old s3 object, + # even it is None. we need this information to clean up old s3 object + # when SQL UPDATE succeeded + old_s3_uri=task1.image, + extra_put_object_kwargs=image_additional_kwargs, + ), + ], + s3_client=bsm.s3_client, + pk=url, + bucket=s3dir_root.bucket, + prefix=s3dir_root.key, + update_at=utc_now, + is_pk_url_safe=False, + ) + + with orm.Session(engine) as ses: + try: + with ses.begin(): + stmt = ( + sa.update(Task).where(Task.url == url) + # this is a helper method that convert the put s3 results + # to INSERT / UPDATE values + .values(update_at=utc_now, **put_s3_result.to_values()) + ) + # intentionally raises an error to simulate a database failure + raise UserError() + ses.execute(stmt) + # clean up old s3 object when update row succeeded + # if you don't want to do that, just don't run this method + put_s3_result.clean_up_old_s3_object_when_update_row_succeeded() + except Exception as e: + # clean up created s3 object when update row failed + # if you don't want to do that, just don't run this method + put_s3_result.clean_up_created_s3_object_when_create_or_update_row_failed() + + task2: Task = ses.get(Task, url) + assert task2.update_at < utc_now + assert S3Path(task1.html).read_bytes() == html_content_1 + assert S3Path(task1.image).read_bytes() == image_content_1 + values = put_s3_result.to_values() + html_s3_uri = values["html"] + image_s3_uri = values["image"] + assert S3Path(html_s3_uri).exists() is False + assert S3Path(image_s3_uri).exists() is False + + # ------------------------------------------------------------------------------ + # Update a Row and SQL UPDATE succeeded + # ------------------------------------------------------------------------------ + utc_now = get_utc_now() + + put_s3_result = aws_s3.put_s3( + api_calls=[ + aws_s3.PutS3ApiCall( + column="html", + binary=html_content_2, + # since this is an updates, you have to specify the old s3 object, + # even it is None. we need this information to clean up old s3 object + # when SQL UPDATE succeeded + old_s3_uri=task1.html, + extra_put_object_kwargs=html_additional_kwargs, + ), + aws_s3.PutS3ApiCall( + column="image", + binary=image_content_2, + # since this is an updates, you have to specify the old s3 object, + # even it is None. we need this information to clean up old s3 object + # when SQL UPDATE succeeded + old_s3_uri=task1.image, + extra_put_object_kwargs=image_additional_kwargs, + ), + ], + s3_client=bsm.s3_client, + pk=url, + bucket=s3dir_root.bucket, + prefix=s3dir_root.key, + update_at=utc_now, + is_pk_url_safe=False, + ) + + with orm.Session(engine) as ses: + try: + with ses.begin(): + stmt = ( + sa.update(Task).where(Task.url == url) + # this is a helper method that convert the put s3 results + # to INSERT / UPDATE values + .values(update_at=utc_now, **put_s3_result.to_values()) + ) + ses.execute(stmt) + # clean up old s3 object when update row succeeded + # if you don't want to do that, just don't run this method + put_s3_result.clean_up_old_s3_object_when_update_row_succeeded() + except Exception as e: + # clean up created s3 object when update row failed + # if you don't want to do that, just don't run this method + put_s3_result.clean_up_created_s3_object_when_create_or_update_row_failed() + + task2: Task = ses.get(Task, url) + assert task2.update_at == utc_now + assert S3Path(task1.html).exists() is False + assert S3Path(task1.image).exists() is False + assert S3Path(task2.html).read_bytes() == html_content_2 + assert S3Path(task2.image).read_bytes() == image_content_2 + + with orm.Session(engine) as ses: + task3: Task = ses.get(Task, url) + try: + stmt = sa.delete(Task).where(Task.url == url) + res = ses.execute(stmt) + ses.commit() + if res.rowcount == 1: + # clean up old s3 object when delete row succeeded + # if you don't want to do that, just don't run this method + if task3.html: + S3Path(task3.html).delete() + if task3.image: + S3Path(task3.image).delete() + except Exception as e: + ses.rollback() + + assert ses.get(Task, url) is None + assert S3Path(task3.html).exists() is False + assert S3Path(task3.image).exists() is False + + +class TestSqlite(BaseTest): + engine = engine_sqlite + + +@pytest.mark.skipif( + IS_WINDOWS, + reason="no psql service container for windows", +) +class TestPsql(BaseTest): # pragma: no cover + engine = engine_psql + + +if __name__ == "__main__": + from sqlalchemy_mate.tests.api import run_cov_test + + run_cov_test( + __file__, + "sqlalchemy_mate.patterns.large_binary_column.aws_s3", + preview=False, + ) diff --git a/tests/patterns/large_binary_column/test_large_binary_column_helpers.py b/tests/patterns/large_binary_column/test_large_binary_column_helpers.py new file mode 100644 index 0000000..fd2aafa --- /dev/null +++ b/tests/patterns/large_binary_column/test_large_binary_column_helpers.py @@ -0,0 +1,80 @@ +# -*- coding: utf-8 -*- + +from sqlalchemy_mate.patterns.large_binary_column.helpers import ( + get_md5, + get_sha256, + b64encode_str, + b64decode_str, + encode_pk, + execute_write, +) + + +def test_get_md5(): + assert len(get_md5(b"hello world")) == 32 + + +def test_get_sha256(): + assert len(get_sha256(b"hello world")) == 64 + + +def test_b64_encode_decode_str(): + url = "http://www.example.com" + b64 = b64encode_str(url) + assert b64decode_str(b64) == url + + +def test_encode_pk(): + assert encode_pk("a", is_pk_url_safe=True) == "a" + assert encode_pk(("a", "b"), is_pk_url_safe=True) == "a/b" + assert ":" not in encode_pk("a", is_pk_url_safe=False) + assert ":" not in encode_pk(("a", "b"), is_pk_url_safe=False) + + +def test_execute_write(): + def write_function(): + pass + + def return_true(): + return True + + def return_false(): + return False + + write_kwargs = {} + + assert ( + execute_write( + write_function=write_function, + write_kwargs=write_kwargs, + check_exists_function=None, + check_exists_kwargs=None, + ) + is True + ) + assert ( + execute_write( + write_function=write_function, + write_kwargs=write_kwargs, + check_exists_function=return_true, + check_exists_kwargs={}, + ) + is False + ) + assert ( + execute_write( + write_function=write_function, + write_kwargs=write_kwargs, + check_exists_function=return_false, + check_exists_kwargs={}, + ) + is True + ) + + +if __name__ == "__main__": + from sqlalchemy_mate.tests.api import run_cov_test + + run_cov_test( + __file__, "sqlalchemy_mate.patterns.large_binary_column.helpers", preview=False + ) diff --git a/tests/test_api.py b/tests/test_api.py index 1b7f08f..42aae78 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -39,6 +39,32 @@ def test(): _ = sam.patterns.status_tracker.JobMixin _ = sam.patterns.status_tracker.Updates + _ = sam.patterns.large_binary_column.aws_s3 + _ = sam.patterns.large_binary_column.aws_s3.PutS3BackedColumnResult + _ = sam.patterns.large_binary_column.aws_s3.put_s3backed_column + _ = ( + sam.patterns.large_binary_column.aws_s3.clean_up_created_s3_object_when_create_or_update_row_failed + ) + _ = ( + sam.patterns.large_binary_column.aws_s3.clean_up_old_s3_object_when_update_row_succeeded + ) + _ = sam.patterns.large_binary_column.aws_s3.PutS3ApiCall + _ = sam.patterns.large_binary_column.aws_s3.PutS3Result + _ = sam.patterns.large_binary_column.aws_s3.put_s3 + + _ = sam.patterns.large_binary_column.local + _ = sam.patterns.large_binary_column.local.WriteFileBackedColumnResult + _ = sam.patterns.large_binary_column.local.write_file_backed_column + _ = ( + sam.patterns.large_binary_column.local.clean_up_new_file_when_create_or_update_row_failed + ) + _ = ( + sam.patterns.large_binary_column.local.clean_up_old_file_when_update_row_succeeded + ) + _ = sam.patterns.large_binary_column.local.WriteFileApiCall + _ = sam.patterns.large_binary_column.local.WriteFileResult + _ = sam.patterns.large_binary_column.local.write_file + if __name__ == "__main__": from sqlalchemy_mate.tests.api import run_cov_test diff --git a/tests/types/test_types_json_serializable.py b/tests/types/test_types_json_serializable.py index 0bfa9ba..949eebe 100644 --- a/tests/types/test_types_json_serializable.py +++ b/tests/types/test_types_json_serializable.py @@ -63,6 +63,7 @@ def test_exception(self): def test_read_and_write(self): with orm.Session(self.engine) as ses: user = ses.get(User, self.id_) + print(user.profile) assert isinstance(user.profile, Profile) assert user.profile.dob == self.profile.dob