diff --git a/examples/python/jupyter_notebook/CipherStash-Getting-Started.ipynb b/examples/python/jupyter_notebook/CipherStash-Getting-Started.ipynb index b43d906..090f4d9 100644 --- a/examples/python/jupyter_notebook/CipherStash-Getting-Started.ipynb +++ b/examples/python/jupyter_notebook/CipherStash-Getting-Started.ipynb @@ -7,7 +7,7 @@ "source": [ "# Getting Started with CipherStash and Jupyter Notebook\n", "\n", - "This notebook describes how to get started with CipherStash using Python3, Jupyter Notebook, psycopg2 and SQLAlchemy.and\n", + "This notebook describes how to get started with CipherStash using Python3, Jupyter Notebook, psycopg2 and Django.\n", "\n", "## Prerequisites\n", "\n", @@ -48,6 +48,69 @@ "Otherwise, sign up to [CipherStash](https://cipherstash.com/signup), install [CipherStash CLI](https://github.com/cipherstash/cli-releases/releases/latest), and do the following steps:" ] }, + { + "cell_type": "markdown", + "id": "1847f62f-e77d-4fe7-b7e6-3356b405beee", + "metadata": {}, + "source": [ + "### Install dependency packages\n", + "As we will need to restart the Jupyter notebook kernel after installation, install `psycopg2`, `django` and `eqlpy` (from a specific branch).\n", + "Also, currently we only support psycopg2, not psycopg 3. Uninstall it if you have it:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1afdc85e-20ec-442a-a1d0-5a00ca0d0cba", + "metadata": {}, + "outputs": [], + "source": [ + "%pip uninstall --yes psycopg eqlpy" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4b0a726a-96e0-4fc9-a846-7f4d747ba847", + "metadata": {}, + "outputs": [], + "source": [ + "%pip install psycopg2 django" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "91fbf4ee-a195-45c1-8e65-05ed60ef1311", + "metadata": {}, + "outputs": [], + "source": [ + "! git clone https://github.com/cipherstash/eqlpy.git --branch cip-981-add-dgjango-orm-support" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "96c523b8-89a8-4e96-836c-c24c71b057bb", + "metadata": {}, + "outputs": [], + "source": [ + "%pip install -e eqlpy/" + ] + }, + { + "cell_type": "markdown", + "id": "8f233255-5e16-4025-8a4f-ee76a1504566", + "metadata": {}, + "source": [ + "### Restart kernel\n", + "\n", + "From the top menu bar of Jupyter notebook (not the browser), select \"Kernel\" then \"Restart kernel\".\n", + "When you get prompted whether you want to restart the kernel, select \"Restart\".\n", + "\n", + "**Without this step, you might not have access to eqlpy**" + ] + }, { "cell_type": "markdown", "id": "0d2fd39b-86a2-4e34-a214-7c9bf13f9579", @@ -203,7 +266,7 @@ "metadata": {}, "outputs": [], "source": [ - "%env CS_ENCRYPTION__CLIENT_KEY=a4627031a16b7065726d75746174696f6e9008090a040b000e050c0d020103060f076770325f66726f6da16b7065726d75746174696f6e900d04090e07050f0c030b0608000a01026570325f746fa16b7065726d75746174696f6e90030b0f08070201060d0c05090e00040a627033a16b7065726d75746174696f6e9821031018190a04020e0815181b181c18200c05181d14130900181e0d0712161701181a1818060f11181f0b" + "%env CS_ENCRYPTION__CLIENT_KEY=" ] }, { @@ -220,7 +283,9 @@ "cell_type": "code", "execution_count": null, "id": "dd60b2fa-14c0-4206-b5c8-74b8d83b1086", - "metadata": {}, + "metadata": { + "scrolled": true + }, "outputs": [], "source": [ "! docker compose up postgres -d" @@ -325,7 +390,9 @@ "cell_type": "code", "execution_count": null, "id": "3c2b1e45-7fb4-4ed4-9fdd-4d964c3d8225", - "metadata": {}, + "metadata": { + "scrolled": true + }, "outputs": [], "source": [ "! docker compose up proxy -d" @@ -338,7 +405,7 @@ "source": [ "The command above should start CipherStash Proxy.\n", "At any point, you can check the logs to see if there are any errors in your terminal window.\n", - "From the directory where your docker-copmose.yml is located (`jupyter_notebook/` by default):\n", + "From the directory where your docker-compose.yml is located (`jupyter_notebook/` by default):\n", "\n", "> docker compose logs -f proxy" ] @@ -374,26 +441,6 @@ "The constructor for `EqlText` takes the string value, the table name (`\"examples\"`) and the column name (`\"encrypted_utf8_str\"`)." ] }, - { - "cell_type": "markdown", - "id": "637d0665-e53b-40e5-b685-aa1aa4354e46", - "metadata": {}, - "source": [ - "### Install psycopg2 and sqlalchemy\n", - "\n", - "Install `psycopg2` and `sqlalchemy` if you have not done so yet:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "bb1cc638-bd6b-4776-98a7-0e25a778be96", - "metadata": {}, - "outputs": [], - "source": [ - "%pip install psycopg2 sqlalchemy eqlpy" - ] - }, { "cell_type": "markdown", "id": "1bbca0fa-4942-4e40-bffa-a3eb2027c671", @@ -401,7 +448,7 @@ "source": [ "### Import class definitions\n", "\n", - "There are some classes defined for encrytped types in this project directory.\n", + "There are some classes defined for encrypted types in this project directory.\n", "They are in `eql_types.py` in the `eqlpy` package if you are interested in implementation details." ] }, @@ -770,7 +817,7 @@ "\n", "* Containment query\n", "* Field extraction\n", - "* WHERE with field exctraction\n", + "* WHERE with field extraction\n", "* ORDER BY with field extraction\n", "* GROUP BY with field extraction\n", "\n", @@ -1017,11 +1064,11 @@ "id": "bf498c2e-00fa-4746-85b3-c4008b55003d", "metadata": {}, "source": [ - "## Using SQLAlchemy\n", + "## Using Django ORM\n", "\n", "### Model definitions and example data\n", "\n", - "To use SQLAlchemy with CipherStash Encrypt, it is necessary to have model classes that can handle the format conversion.\n", + "To use Django ORM with CipherStash Encrypt, it is necessary to have model classes that can handle the format conversion.\n", "\n", "Import the model definition and create some records as below:" ] @@ -1029,37 +1076,89 @@ { "cell_type": "code", "execution_count": null, - "id": "24fda18e-f382-428a-9b49-d859eca04ace", + "id": "095c512d-3c04-4524-bc1d-70e120d62277", "metadata": {}, "outputs": [], "source": [ - "from sqlalchemy import create_engine, select, text\n", - "from sqlalchemy.orm import sessionmaker\n", - "from eqlpy.eqlalchemy import *\n", + "import django\n", + "from django.conf import settings\n", + "from django.db.models import Q, F, Value\n", "from datetime import date\n", - "from example_model import Example\n", + "from eqlpy.eql_types import EqlText, EqlFloat, EqlJsonb\n", + "from eqlpy.eqldjango import *\n", + "\n", + "# Configure Django\n", + "if not settings.configured:\n", + " settings.configure(\n", + " DEBUG=True,\n", + " INSTALLED_APPS=[\n", + " 'eqlpy.eqldjango',\n", + " ],\n", + " DATABASES={\n", + " \"default\": {\n", + " \"ENGINE\": \"django.db.backends.postgresql\",\n", + " \"NAME\": \"cipherstash_getting_started\",\n", + " \"USER\": \"postgres\",\n", + " \"PASSWORD\": \"postgres\",\n", + " \"HOST\": \"localhost\",\n", + " \"PORT\": \"6432\",\n", + " }\n", + " },\n", + " SECRET_KEY=\"some-secret-key\",\n", + " )\n", + " print(\"Django setup done.\")\n", + "else:\n", + " print(\"Django settings already configured. No change applied.\")\n", "\n", - "# Creating engine. Optionally add echo=True to see the SQL statetments dumped to stdout\n", - "engine = create_engine('postgresql://postgres:postgres@localhost:6432/cipherstash_getting_started')\n", - "Session = sessionmaker(bind=engine)\n", - "session = Session()\n", + "django.setup()\n", "\n", - "BaseModel.metadata.create_all(engine) # Create table for models if it's not created yetbelow and\n", + "from example_model_django import Example" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "24fda18e-f382-428a-9b49-d859eca04ace", + "metadata": {}, + "outputs": [], + "source": [ + "from example_model_django import Example\n", "\n", - "# Clear data if there is any from previous runs\n", - "session.query(Example).delete()\n", + "import os\n", + "os.environ[\"DJANGO_ALLOW_ASYNC_UNSAFE\"] = \"false\"\n", "\n", - "ex = Example(e_utf8_str = \"example record 1\", e_jsonb = {'a': {'b': 1}}, e_int = 42, e_float = 3.14, e_date = date(2024, 10, 25), e_bool=False)\n", - "session.add(ex)\n", - "session.commit()\n", + "Example.objects.all().delete()\n", "\n", - "ex = Example(e_utf8_str = \"example record 2\", e_jsonb = {'a': {'c': 2}}, e_int = 43, e_float = 1.41, e_date = date(2024, 10, 26), e_bool=True)\n", - "session.add(ex)\n", - "session.commit()\n", "\n", - "ex = Example(e_utf8_str = \"example record 3\", e_jsonb = {'a': {'b': 1}}, e_int = 44, e_float = 2.718, e_date = date(2024, 10, 27), e_bool=True)\n", - "session.add(ex)\n", - "session.commit()\n", + "example1 = Example(\n", + " encrypted_int=42,\n", + " encrypted_boolean=False,\n", + " encrypted_utf8_str='example record 1',\n", + " encrypted_date=date(2024, 10, 25),\n", + " encrypted_float=3.14,\n", + " encrypted_jsonb={'a': {'b': 1}, 'num': 2},\n", + ")\n", + "example1.save()\n", + "\n", + "example2 = Example(\n", + " encrypted_int=43,\n", + " encrypted_boolean=True,\n", + " encrypted_utf8_str='example record 2',\n", + " encrypted_date=date(2024, 10, 26),\n", + " encrypted_float=1.41,\n", + " encrypted_jsonb={'a': {'c': 2}, 'num': 0},\n", + ")\n", + "example2.save()\n", + "\n", + "example3 = Example(\n", + " encrypted_int=44,\n", + " encrypted_boolean=True,\n", + " encrypted_utf8_str='example record 3',\n", + " encrypted_date=date(2024, 10, 27),\n", + " encrypted_float=2.718,\n", + " encrypted_jsonb={'a': {'b': 3}, 'num': 1},\n", + ")\n", + "example3.save()\n", "\n", "print(\"Example data creation done\")" ] @@ -1082,12 +1181,11 @@ "outputs": [], "source": [ "# UNIQUE\n", - "query_text = text('cs_unique_v1(encrypted_utf8_str) == cs_unique_v1(:term)')\n", - "query = select(Example).where(query_text).params(term=EqlText(\"example record 1\", \"examples\", \"encrypted_utf8_str\").to_db_format())\n", - "results = session.execute(query).scalars().all()\n", + "term = EqlText(\"example record 1\", \"examples\", \"encrypted_utf8_str\").to_db_format(\"unique\")\n", + "query = Q(CsEquals(CsUniqueV1(F('encrypted_utf8_str')), CsUniqueV1(Value(term))))\n", "\n", - "for e in results:\n", - " print(f\"UNIQUE query results: {e}\")\n" + "results = Example.objects.filter(query).values()\n", + "results" ] }, { @@ -1105,7 +1203,7 @@ "metadata": {}, "outputs": [], "source": [ - "results[0].encrypted_utf8_str" + "results[0]['encrypted_utf8_str']" ] }, { @@ -1115,7 +1213,7 @@ "source": [ "### Querying by partial match\n", "\n", - "Partial matching can also performed with SQLAlchemy:" + "Partial matching can also performed with Django:" ] }, { @@ -1126,12 +1224,9 @@ "outputs": [], "source": [ "# MATCH\n", - "query_text = text('cs_match_v1(encrypted_utf8_str) @> cs_match_v1(:term)')\n", - "query = select(Example).where(query_text).params(term=EqlText(\"example record\", \"examples\", \"encrypted_utf8_str\").to_db_format())\n", - "results = session.execute(query).scalars().all()\n", - "\n", - "for e in results:\n", - " print(f\"MATCH query results: {e}\")" + "term = EqlText(\"record 3\", \"examples\", \"encrypted_utf8_str\").to_db_format(\"match\")\n", + "query = Q(CsContains(CsMatchV1(F('encrypted_utf8_str')), CsMatchV1(Value(term))))\n", + "Example.objects.filter(query).values()" ] }, { @@ -1141,7 +1236,7 @@ "source": [ "### Query by ORE\n", "\n", - "ORE queries can be peformed too:" + "ORE queries can be peformed too. `CsGt` and `CsLt` are equivalent of (`>` and `<`):" ] }, { @@ -1152,14 +1247,9 @@ "outputs": [], "source": [ "# ORE\n", - "cur.execute(\"SELECT * FROM examples WHERE cs_ore_64_8_v1(encrypted_float) > cs_ore_64_8_v1(%s)\", (EqlFloat(100.15, \"examples\", \"encrypted_float\").to_db_format(),))\n", - "\n", - "query_text = text('cs_ore_64_8_v1(encrypted_float) > cs_ore_64_8_v1(:term)')\n", - "query = select(Example).where(query_text).params(term=EqlFloat(2.0, \"examples\", \"encrypted_float\").to_db_format())\n", - "results = session.execute(query).scalars().all()\n", - "\n", - "for e in results:\n", - " print(f\"ORE query results: {e}\")" + "term = EqlFloat(2.0, \"examples\", \"encrypted_float\").to_db_format(\"ore\")\n", + "query = Q(CsGt(CsOre648V1(F('encrypted_float')), CsOre648V1(Value(term))))\n", + "Example.objects.filter(query).all().values()" ] }, { @@ -1169,7 +1259,7 @@ "source": [ "### Querying by JSONB containment\n", "\n", - "These records can be queried by JSONB containment too:" + "These records can be queried by JSONB containment too. `CsContains` is the equivalent of `@>`:" ] }, { @@ -1180,19 +1270,17 @@ "outputs": [], "source": [ "# JSONB\n", - "\n", - "query_text = text(\n", - " \"cs_ste_vec_v1(encrypted_jsonb) @> cs_ste_vec_v1(:term)\"\n", - ")\n", - "\n", - "query = select(Example).where(query_text).params(\n", - " term=EqlJsonb({'a': { 'b': 1 } }, \"examples\", \"encrypted_jsonb\").to_db_format(\"ste_vec\")\n", - ")\n", - "result = session.execute(query).scalars().all()\n", - "\n", - "for e in result:\n", - " print(f\"JSONB results: {e}\")\n", - " print(f\"JSONB field: {e.encrypted_jsonb}\")\n" + "term = EqlJsonb({\"a\": {}}, \"examples\", \"encrypted_jsonb\").to_db_format(\"ste_vec\")\n", + "query = Q(CsContains(CsSteVecV1(F('encrypted_jsonb')), CsSteVecV1(Value(term))))\n", + "Example.objects.filter(query).values()" + ] + }, + { + "cell_type": "markdown", + "id": "5f9d3433-1773-4ef2-b3eb-a02a38807ae4", + "metadata": {}, + "source": [ + "### JSONB field extraction" ] }, { @@ -1202,12 +1290,54 @@ "metadata": {}, "outputs": [], "source": [ - "# JSONB containment 1\n", - "cur.execute(\"SELECT * from examples WHERE cs_ste_vec_v1(encrypted_jsonb) @> cs_ste_vec_v1(%s)\", (EqlJsonb({'a': { 'b': 1 } }, \"examples\", \"encrypted_jsonb\").to_db_format(\"ste_vec\"),))\n", - "found = cur.fetchall()\n", - "for f in found:\n", - " print(f\"Record Found with JSONB query: {EqlRow(mapping, f).row}\\n\")\n", - " print(f\"JSONB inside the found record: {EqlJsonb.from_parsed_json(f['encrypted_jsonb'])}\\n\")" + "# JSONB field extraction\n", + "term = EqlJsonb(\"$.a\", \"examples\", \"encrypted_jsonb\").to_db_format(\"ejson_path\")\n", + "results = Example.objects.annotate(extracted_value = CsSteVecValueV1(F(\"encrypted_jsonb\"), Value(term))).values_list('extracted_value', flat=True)\n", + "\n", + "[EqlJsonb.from_parsed_json(json.loads(result)) for result in list(results)]" + ] + }, + { + "cell_type": "markdown", + "id": "81036bad-d048-4303-9638-0d3509bfc9d9", + "metadata": {}, + "source": [ + "### JSONB field extraction in WHERE clause" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b4969db5-5c03-4972-9d36-67b70335a9b9", + "metadata": {}, + "outputs": [], + "source": [ + "# JSONB field extraction in WHERE clause\n", + "term1=EqlJsonb(\"$.num\", \"examples\", \"encrypted_jsonb\").to_db_format(\"ejson_path\")\n", + "term2=EqlJsonb(1, \"examples\", \"encrypted_jsonb\").to_db_format(\"ste_vec\")\n", + "query = Q(CsLt(CsSteVecTermV1(F('encrypted_jsonb'), Value(term1)), CsSteVecTermV1(Value(term2))))\n", + "Example.objects.filter(query).values()" + ] + }, + { + "cell_type": "markdown", + "id": "e4feb6f0-e275-4572-a4ee-db84f1d69f22", + "metadata": {}, + "source": [ + "### JSONB field extraction in ORDER BY clause" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "df9a8237-5b68-4887-bb19-0e220e6a8f9e", + "metadata": {}, + "outputs": [], + "source": [ + "# JSONB field extraction in ORDER BY clause (desc)\n", + "term = EqlJsonb(\"$.num\", \"examples\", \"encrypted_jsonb\").to_db_format(\"ejson_path\")\n", + "results = Example.objects.order_by(CsSteVecTermV1(F(\"encrypted_jsonb\"), Value(term)).desc())\n", + "results.values()" ] }, { @@ -1225,18 +1355,17 @@ "metadata": {}, "outputs": [], "source": [ - "if len(results) > 0:\n", - " record_id = results[0].id\n", - "\n", - " results[0].encrypted_utf8_str = 'example record 1 UPDATED'\n", - " results[0].encrypted_jsonb = json.dumps({'z': {'y': 0}})\n", - " session.commit()\n", - "\n", - " updated = session.query(Example).where(Example.id == record_id).first()\n", - "\n", - " print(f\"Updated record: {updated}\")\n", - "else:\n", - " print(\"Unexpected: results are empty\")" + "example = Example.objects.first()\n", + "print(f\"\")\n", + "print()\n", + "print(\"Updating\")\n", + "print()\n", + "example.encrypted_utf8_str = \"UPDATED STRING\"\n", + "example.encrypted_int = 128\n", + "print(\"Reloading\")\n", + "print()\n", + "reloaded = Example.objects.get(id=example.id)\n", + "print(f\"\")" ] }, { @@ -1257,7 +1386,7 @@ "outputs": [], "source": [ "%env PGPASSWORD=postgres\n", - "! psql -h localhost -p 5432 -U postgres -x -c \"select * from examples where id = {record_id};\" cipherstash_getting_started" + "! psql -h localhost -p 5432 -U postgres -x -c \"select * from examples where id = {reloaded.id};\" cipherstash_getting_started" ] }, { diff --git a/examples/python/jupyter_notebook/example_model_django.py b/examples/python/jupyter_notebook/example_model_django.py new file mode 100644 index 0000000..2abc26c --- /dev/null +++ b/examples/python/jupyter_notebook/example_model_django.py @@ -0,0 +1,20 @@ +from django.db import models +from eqlpy.eqldjango import * + +# Example model for Django ORM +class Example(models.Model): + encrypted_int = EncryptedInt(table="examples", column="encrypted_int", null=True) + encrypted_boolean = EncryptedBoolean( + table="examples", column="encrypted_boolean", null=True + ) + encrypted_date = EncryptedDate(table="examples", column="encrypted_date", null=True) + encrypted_float = EncryptedFloat(table="examples", column="encrypted_float", null=True) + encrypted_utf8_str = EncryptedText( + table="examples", column="encrypted_utf8_str", null=True + ) + encrypted_jsonb = EncryptedJsonb(table="examples", column="encrypted_jsonb", null=True) + + class Meta: + app_label = "eqlpy.eqldjango" + db_table = "examples" +