From 8c8abb5c4c258e32941110a9ce0938e1328290b3 Mon Sep 17 00:00:00 2001 From: Fokko Driesprong Date: Mon, 11 Dec 2023 17:50:32 +0100 Subject: [PATCH] Add SQLite Catalog support (#178) Co-authored-by: Brian "bits" Olsen --- mkdocs/docs/configuration.md | 21 +- poetry.lock | 19 +- pyiceberg/catalog/__init__.py | 2 +- pyproject.toml | 1 + tests/catalog/test_sql.py | 560 ++++++++++++++++++++++++---------- 5 files changed, 433 insertions(+), 170 deletions(-) diff --git a/mkdocs/docs/configuration.md b/mkdocs/docs/configuration.md index c74f1bea25..fa861908f9 100644 --- a/mkdocs/docs/configuration.md +++ b/mkdocs/docs/configuration.md @@ -140,8 +140,9 @@ catalog: ## SQL Catalog -The SQL catalog requires a database for its backend. As of now, pyiceberg only supports PostgreSQL through psycopg2. -The database connection has to be configured using the `uri` property (see SQLAlchemy's [documentation for URL format](https://docs.sqlalchemy.org/en/20/core/engines.html#backend-specific-urls)): +The SQL catalog requires a database for its backend. PyIceberg supports PostgreSQL and SQLite through psycopg2. The database connection has to be configured using the `uri` property. See SQLAlchemy's [documentation for URL format](https://docs.sqlalchemy.org/en/20/core/engines.html#backend-specific-urls): + +For PostgreSQL: ```yaml catalog: @@ -150,6 +151,22 @@ catalog: uri: postgresql+psycopg2://username:password@localhost/mydatabase ``` +In the case of SQLite: + + + +!!! warning inline end "Development only" + SQLite is not built for concurrency, you should use this catalog for exploratory or development purposes. + + + +```yaml +catalog: + default: + type: sql + uri: sqlite:////tmp/pyiceberg.db +``` + ## Hive Catalog ```yaml diff --git a/poetry.lock b/poetry.lock index 678c4f3c14..ac709d6ecf 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2160,7 +2160,6 @@ files = [ {file = "psycopg2_binary-2.9.9-cp311-cp311-win32.whl", hash = "sha256:dc4926288b2a3e9fd7b50dc6a1909a13bbdadfc67d93f3374d984e56f885579d"}, {file = "psycopg2_binary-2.9.9-cp311-cp311-win_amd64.whl", hash = "sha256:b76bedd166805480ab069612119ea636f5ab8f8771e640ae103e05a4aae3e417"}, {file = "psycopg2_binary-2.9.9-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:8532fd6e6e2dc57bcb3bc90b079c60de896d2128c5d9d6f24a63875a95a088cf"}, - {file = "psycopg2_binary-2.9.9-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b0605eaed3eb239e87df0d5e3c6489daae3f7388d455d0c0b4df899519c6a38d"}, {file = "psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f8544b092a29a6ddd72f3556a9fcf249ec412e10ad28be6a0c0d948924f2212"}, {file = "psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2d423c8d8a3c82d08fe8af900ad5b613ce3632a1249fd6a223941d0735fce493"}, {file = "psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2e5afae772c00980525f6d6ecf7cbca55676296b580c0e6abb407f15f3706996"}, @@ -2169,8 +2168,6 @@ files = [ {file = "psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:cb16c65dcb648d0a43a2521f2f0a2300f40639f6f8c1ecbc662141e4e3e1ee07"}, {file = "psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:911dda9c487075abd54e644ccdf5e5c16773470a6a5d3826fda76699410066fb"}, {file = "psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:57fede879f08d23c85140a360c6a77709113efd1c993923c59fde17aa27599fe"}, - {file = "psycopg2_binary-2.9.9-cp312-cp312-win32.whl", hash = "sha256:64cf30263844fa208851ebb13b0732ce674d8ec6a0c86a4e160495d299ba3c93"}, - {file = "psycopg2_binary-2.9.9-cp312-cp312-win_amd64.whl", hash = "sha256:81ff62668af011f9a48787564ab7eded4e9fb17a4a6a74af5ffa6a457400d2ab"}, {file = "psycopg2_binary-2.9.9-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:2293b001e319ab0d869d660a704942c9e2cce19745262a8aba2115ef41a0a42a"}, {file = "psycopg2_binary-2.9.9-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:03ef7df18daf2c4c07e2695e8cfd5ee7f748a1d54d802330985a78d2a5a6dca9"}, {file = "psycopg2_binary-2.9.9-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0a602ea5aff39bb9fac6308e9c9d82b9a35c2bf288e184a816002c9fae930b77"}, @@ -2532,6 +2529,20 @@ importlib-metadata = {version = ">=4", markers = "python_version < \"3.10\""} docs = ["furo", "jaraco.packaging (>=9.3)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] testing = ["pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy (>=0.9.1)", "pytest-ruff", "types-docutils"] +[[package]] +name = "pytest-lazy-fixture" +version = "0.6.3" +description = "It helps to use fixtures in pytest.mark.parametrize" +optional = false +python-versions = "*" +files = [ + {file = "pytest-lazy-fixture-0.6.3.tar.gz", hash = "sha256:0e7d0c7f74ba33e6e80905e9bfd81f9d15ef9a790de97993e34213deb5ad10ac"}, + {file = "pytest_lazy_fixture-0.6.3-py3-none-any.whl", hash = "sha256:e0b379f38299ff27a653f03eaa69b08a6fd4484e46fd1c9907d984b9f9daeda6"}, +] + +[package.dependencies] +pytest = ">=3.2.5" + [[package]] name = "pytest-mock" version = "3.12.0" @@ -3575,4 +3586,4 @@ zstandard = ["zstandard"] [metadata] lock-version = "2.0" python-versions = "^3.8" -content-hash = "4fc73f7d4d8d05e60386a9bd5f1931f9d9bd6797e236614152d39db875a470f5" +content-hash = "585abae5d6303e325f4a0451d367e831f1f8ecd292cb3679564173bdf6ff7de3" diff --git a/pyiceberg/catalog/__init__.py b/pyiceberg/catalog/__init__.py index c83fd1c792..993be87ddd 100644 --- a/pyiceberg/catalog/__init__.py +++ b/pyiceberg/catalog/__init__.py @@ -153,7 +153,7 @@ def infer_catalog_type(name: str, catalog_properties: RecursiveDict) -> Optional return CatalogType.REST elif uri.startswith("thrift"): return CatalogType.HIVE - elif uri.startswith("postgresql"): + elif uri.startswith(("sqlite", "postgresql")): return CatalogType.SQL else: raise ValueError(f"Could not infer the catalog type from the uri: {uri}") diff --git a/pyproject.toml b/pyproject.toml index 5b907ec3aa..a3d0bbb464 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -74,6 +74,7 @@ sqlalchemy = { version = "^2.0.18", optional = true } [tool.poetry.dev-dependencies] pytest = "7.4.3" pytest-checkdocs = "2.10.1" +pytest-lazy-fixture = "0.6.3" pre-commit = "3.5.0" fastavro = "1.9.1" coverage = { version = "^7.3.2", extras = ["toml"] } diff --git a/tests/catalog/test_sql.py b/tests/catalog/test_sql.py index 56d2c16c10..95dc24ad15 100644 --- a/tests/catalog/test_sql.py +++ b/tests/catalog/test_sql.py @@ -21,6 +21,7 @@ import pytest from pytest import TempPathFactory +from pytest_lazyfixture import lazy_fixture from sqlalchemy.exc import ArgumentError, IntegrityError from pyiceberg.catalog import Identifier @@ -62,16 +63,28 @@ def fixture_another_random_identifier(warehouse: Path, database_name: str, table return database_name, table_name -@pytest.fixture(name="test_catalog", scope="module") -def fixture_test_catalog(warehouse: Path) -> Generator[SqlCatalog, None, None]: +@pytest.fixture(scope="module") +def catalog_memory(warehouse: Path) -> Generator[SqlCatalog, None, None]: props = { "uri": "sqlite+pysqlite:///:memory:", "warehouse": f"file://{warehouse}", } - test_catalog = SqlCatalog("test_sql_catalog", **props) - test_catalog.create_tables() - yield test_catalog - test_catalog.destroy_tables() + catalog = SqlCatalog("test_sql_catalog", **props) + catalog.create_tables() + yield catalog + catalog.destroy_tables() + + +@pytest.fixture(scope="module") +def catalog_sqlite(warehouse: Path) -> Generator[SqlCatalog, None, None]: + props = { + "uri": "sqlite:////tmp/sql-catalog.db", + "warehouse": f"file://{warehouse}", + } + catalog = SqlCatalog("test_sql_catalog", **props) + catalog.create_tables() + yield catalog + catalog.destroy_tables() def test_creation_with_no_uri() -> None: @@ -84,309 +97,516 @@ def test_creation_with_unsupported_uri() -> None: SqlCatalog("test_ddb_catalog", uri="unsupported:xxx") -def test_create_tables_idempotency(test_catalog: SqlCatalog) -> None: +@pytest.mark.parametrize( + 'catalog', + [ + lazy_fixture('catalog_memory'), + lazy_fixture('catalog_sqlite'), + ], +) +def test_create_tables_idempotency(catalog: SqlCatalog) -> None: # Second initialization should not fail even if tables are already created - test_catalog.create_tables() - test_catalog.create_tables() + catalog.create_tables() + catalog.create_tables() -def test_create_table_default_sort_order( - test_catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier -) -> None: +@pytest.mark.parametrize( + 'catalog', + [ + lazy_fixture('catalog_memory'), + lazy_fixture('catalog_sqlite'), + ], +) +def test_create_table_default_sort_order(catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier) -> None: database_name, _table_name = random_identifier - test_catalog.create_namespace(database_name) - table = test_catalog.create_table(random_identifier, table_schema_nested) + catalog.create_namespace(database_name) + table = catalog.create_table(random_identifier, table_schema_nested) assert table.sort_order().order_id == 0, "Order ID must match" assert table.sort_order().is_unsorted is True, "Order must be unsorted" - test_catalog.drop_table(random_identifier) + catalog.drop_table(random_identifier) -def test_create_table_custom_sort_order( - test_catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier -) -> None: +@pytest.mark.parametrize( + 'catalog', + [ + lazy_fixture('catalog_memory'), + lazy_fixture('catalog_sqlite'), + ], +) +def test_create_table_custom_sort_order(catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier) -> None: database_name, _table_name = random_identifier - test_catalog.create_namespace(database_name) + catalog.create_namespace(database_name) order = SortOrder(SortField(source_id=2, transform=IdentityTransform(), null_order=NullOrder.NULLS_FIRST)) - table = test_catalog.create_table(random_identifier, table_schema_nested, sort_order=order) + table = catalog.create_table(random_identifier, table_schema_nested, sort_order=order) given_sort_order = table.sort_order() assert given_sort_order.order_id == 1, "Order ID must match" assert len(given_sort_order.fields) == 1, "Order must have 1 field" assert given_sort_order.fields[0].direction == SortDirection.ASC, "Direction must match" assert given_sort_order.fields[0].null_order == NullOrder.NULLS_FIRST, "Null order must match" assert isinstance(given_sort_order.fields[0].transform, IdentityTransform), "Transform must match" - test_catalog.drop_table(random_identifier) + catalog.drop_table(random_identifier) +@pytest.mark.parametrize( + 'catalog', + [ + lazy_fixture('catalog_memory'), + lazy_fixture('catalog_sqlite'), + ], +) def test_create_table_with_default_warehouse_location( - warehouse: Path, test_catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier + warehouse: Path, catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier ) -> None: database_name, _table_name = random_identifier - test_catalog.create_namespace(database_name) - test_catalog.create_table(random_identifier, table_schema_nested) - table = test_catalog.load_table(random_identifier) - assert table.identifier == (test_catalog.name,) + random_identifier + catalog.create_namespace(database_name) + catalog.create_table(random_identifier, table_schema_nested) + table = catalog.load_table(random_identifier) + assert table.identifier == (catalog.name,) + random_identifier assert table.metadata_location.startswith(f"file://{warehouse}") assert os.path.exists(table.metadata_location[len("file://") :]) - test_catalog.drop_table(random_identifier) + catalog.drop_table(random_identifier) -def test_create_duplicated_table(test_catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier) -> None: +@pytest.mark.parametrize( + 'catalog', + [ + lazy_fixture('catalog_memory'), + lazy_fixture('catalog_sqlite'), + ], +) +def test_create_duplicated_table(catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier) -> None: database_name, _table_name = random_identifier - test_catalog.create_namespace(database_name) - test_catalog.create_table(random_identifier, table_schema_nested) + catalog.create_namespace(database_name) + catalog.create_table(random_identifier, table_schema_nested) with pytest.raises(TableAlreadyExistsError): - test_catalog.create_table(random_identifier, table_schema_nested) + catalog.create_table(random_identifier, table_schema_nested) -def test_create_table_with_non_existing_namespace(test_catalog: SqlCatalog, table_schema_nested: Schema, table_name: str) -> None: +@pytest.mark.parametrize( + 'catalog', + [ + lazy_fixture('catalog_memory'), + lazy_fixture('catalog_sqlite'), + ], +) +def test_create_table_with_non_existing_namespace(catalog: SqlCatalog, table_schema_nested: Schema, table_name: str) -> None: identifier = ("invalid", table_name) with pytest.raises(NoSuchNamespaceError): - test_catalog.create_table(identifier, table_schema_nested) + catalog.create_table(identifier, table_schema_nested) -def test_create_table_without_namespace(test_catalog: SqlCatalog, table_schema_nested: Schema, table_name: str) -> None: +@pytest.mark.parametrize( + 'catalog', + [ + lazy_fixture('catalog_memory'), + lazy_fixture('catalog_sqlite'), + ], +) +def test_create_table_without_namespace(catalog: SqlCatalog, table_schema_nested: Schema, table_name: str) -> None: with pytest.raises(ValueError): - test_catalog.create_table(table_name, table_schema_nested) + catalog.create_table(table_name, table_schema_nested) -def test_register_table(test_catalog: SqlCatalog, random_identifier: Identifier, metadata_location: str) -> None: +@pytest.mark.parametrize( + 'catalog', + [ + lazy_fixture('catalog_memory'), + lazy_fixture('catalog_sqlite'), + ], +) +def test_register_table(catalog: SqlCatalog, random_identifier: Identifier, metadata_location: str) -> None: database_name, _table_name = random_identifier - test_catalog.create_namespace(database_name) - table = test_catalog.register_table(random_identifier, metadata_location) - assert table.identifier == (test_catalog.name,) + random_identifier + catalog.create_namespace(database_name) + table = catalog.register_table(random_identifier, metadata_location) + assert table.identifier == (catalog.name,) + random_identifier assert table.metadata_location == metadata_location assert os.path.exists(metadata_location) - test_catalog.drop_table(random_identifier) + catalog.drop_table(random_identifier) -def test_register_existing_table(test_catalog: SqlCatalog, random_identifier: Identifier, metadata_location: str) -> None: +@pytest.mark.parametrize( + 'catalog', + [ + lazy_fixture('catalog_memory'), + lazy_fixture('catalog_sqlite'), + ], +) +def test_register_existing_table(catalog: SqlCatalog, random_identifier: Identifier, metadata_location: str) -> None: database_name, _table_name = random_identifier - test_catalog.create_namespace(database_name) - test_catalog.register_table(random_identifier, metadata_location) + catalog.create_namespace(database_name) + catalog.register_table(random_identifier, metadata_location) with pytest.raises(TableAlreadyExistsError): - test_catalog.register_table(random_identifier, metadata_location) + catalog.register_table(random_identifier, metadata_location) -def test_register_table_with_non_existing_namespace(test_catalog: SqlCatalog, metadata_location: str, table_name: str) -> None: +@pytest.mark.parametrize( + 'catalog', + [ + lazy_fixture('catalog_memory'), + lazy_fixture('catalog_sqlite'), + ], +) +def test_register_table_with_non_existing_namespace(catalog: SqlCatalog, metadata_location: str, table_name: str) -> None: identifier = ("invalid", table_name) with pytest.raises(NoSuchNamespaceError): - test_catalog.register_table(identifier, metadata_location) + catalog.register_table(identifier, metadata_location) -def test_register_table_without_namespace(test_catalog: SqlCatalog, metadata_location: str, table_name: str) -> None: +@pytest.mark.parametrize( + 'catalog', + [ + lazy_fixture('catalog_memory'), + lazy_fixture('catalog_sqlite'), + ], +) +def test_register_table_without_namespace(catalog: SqlCatalog, metadata_location: str, table_name: str) -> None: with pytest.raises(ValueError): - test_catalog.register_table(table_name, metadata_location) + catalog.register_table(table_name, metadata_location) -def test_load_table(test_catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier) -> None: +@pytest.mark.parametrize( + 'catalog', + [ + lazy_fixture('catalog_memory'), + lazy_fixture('catalog_sqlite'), + ], +) +def test_load_table(catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier) -> None: database_name, _table_name = random_identifier - test_catalog.create_namespace(database_name) - table = test_catalog.create_table(random_identifier, table_schema_nested) - loaded_table = test_catalog.load_table(random_identifier) + catalog.create_namespace(database_name) + table = catalog.create_table(random_identifier, table_schema_nested) + loaded_table = catalog.load_table(random_identifier) assert table.identifier == loaded_table.identifier assert table.metadata_location == loaded_table.metadata_location assert table.metadata == loaded_table.metadata -def test_load_table_from_self_identifier( - test_catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier -) -> None: +@pytest.mark.parametrize( + 'catalog', + [ + lazy_fixture('catalog_memory'), + lazy_fixture('catalog_sqlite'), + ], +) +def test_load_table_from_self_identifier(catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier) -> None: database_name, _table_name = random_identifier - test_catalog.create_namespace(database_name) - table = test_catalog.create_table(random_identifier, table_schema_nested) - intermediate = test_catalog.load_table(random_identifier) - assert intermediate.identifier == (test_catalog.name,) + random_identifier - loaded_table = test_catalog.load_table(intermediate.identifier) + catalog.create_namespace(database_name) + table = catalog.create_table(random_identifier, table_schema_nested) + intermediate = catalog.load_table(random_identifier) + assert intermediate.identifier == (catalog.name,) + random_identifier + loaded_table = catalog.load_table(intermediate.identifier) assert table.identifier == loaded_table.identifier assert table.metadata_location == loaded_table.metadata_location assert table.metadata == loaded_table.metadata -def test_drop_table(test_catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier) -> None: +@pytest.mark.parametrize( + 'catalog', + [ + lazy_fixture('catalog_memory'), + lazy_fixture('catalog_sqlite'), + ], +) +def test_drop_table(catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier) -> None: database_name, _table_name = random_identifier - test_catalog.create_namespace(database_name) - table = test_catalog.create_table(random_identifier, table_schema_nested) - assert table.identifier == (test_catalog.name,) + random_identifier - test_catalog.drop_table(random_identifier) + catalog.create_namespace(database_name) + table = catalog.create_table(random_identifier, table_schema_nested) + assert table.identifier == (catalog.name,) + random_identifier + catalog.drop_table(random_identifier) with pytest.raises(NoSuchTableError): - test_catalog.load_table(random_identifier) + catalog.load_table(random_identifier) -def test_drop_table_from_self_identifier( - test_catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier -) -> None: +@pytest.mark.parametrize( + 'catalog', + [ + lazy_fixture('catalog_memory'), + lazy_fixture('catalog_sqlite'), + ], +) +def test_drop_table_from_self_identifier(catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier) -> None: database_name, _table_name = random_identifier - test_catalog.create_namespace(database_name) - table = test_catalog.create_table(random_identifier, table_schema_nested) - assert table.identifier == (test_catalog.name,) + random_identifier - test_catalog.drop_table(table.identifier) + catalog.create_namespace(database_name) + table = catalog.create_table(random_identifier, table_schema_nested) + assert table.identifier == (catalog.name,) + random_identifier + catalog.drop_table(table.identifier) with pytest.raises(NoSuchTableError): - test_catalog.load_table(table.identifier) + catalog.load_table(table.identifier) with pytest.raises(NoSuchTableError): - test_catalog.load_table(random_identifier) + catalog.load_table(random_identifier) -def test_drop_table_that_does_not_exist(test_catalog: SqlCatalog, random_identifier: Identifier) -> None: +@pytest.mark.parametrize( + 'catalog', + [ + lazy_fixture('catalog_memory'), + lazy_fixture('catalog_sqlite'), + ], +) +def test_drop_table_that_does_not_exist(catalog: SqlCatalog, random_identifier: Identifier) -> None: with pytest.raises(NoSuchTableError): - test_catalog.drop_table(random_identifier) + catalog.drop_table(random_identifier) +@pytest.mark.parametrize( + 'catalog', + [ + lazy_fixture('catalog_memory'), + lazy_fixture('catalog_sqlite'), + ], +) def test_rename_table( - test_catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier, another_random_identifier: Identifier + catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier, another_random_identifier: Identifier ) -> None: from_database_name, _from_table_name = random_identifier to_database_name, _to_table_name = another_random_identifier - test_catalog.create_namespace(from_database_name) - test_catalog.create_namespace(to_database_name) - table = test_catalog.create_table(random_identifier, table_schema_nested) - assert table.identifier == (test_catalog.name,) + random_identifier - test_catalog.rename_table(random_identifier, another_random_identifier) - new_table = test_catalog.load_table(another_random_identifier) - assert new_table.identifier == (test_catalog.name,) + another_random_identifier + catalog.create_namespace(from_database_name) + catalog.create_namespace(to_database_name) + table = catalog.create_table(random_identifier, table_schema_nested) + assert table.identifier == (catalog.name,) + random_identifier + catalog.rename_table(random_identifier, another_random_identifier) + new_table = catalog.load_table(another_random_identifier) + assert new_table.identifier == (catalog.name,) + another_random_identifier assert new_table.metadata_location == table.metadata_location with pytest.raises(NoSuchTableError): - test_catalog.load_table(random_identifier) + catalog.load_table(random_identifier) +@pytest.mark.parametrize( + 'catalog', + [ + lazy_fixture('catalog_memory'), + lazy_fixture('catalog_sqlite'), + ], +) def test_rename_table_from_self_identifier( - test_catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier, another_random_identifier: Identifier + catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier, another_random_identifier: Identifier ) -> None: from_database_name, _from_table_name = random_identifier to_database_name, _to_table_name = another_random_identifier - test_catalog.create_namespace(from_database_name) - test_catalog.create_namespace(to_database_name) - table = test_catalog.create_table(random_identifier, table_schema_nested) - assert table.identifier == (test_catalog.name,) + random_identifier - test_catalog.rename_table(table.identifier, another_random_identifier) - new_table = test_catalog.load_table(another_random_identifier) - assert new_table.identifier == (test_catalog.name,) + another_random_identifier + catalog.create_namespace(from_database_name) + catalog.create_namespace(to_database_name) + table = catalog.create_table(random_identifier, table_schema_nested) + assert table.identifier == (catalog.name,) + random_identifier + catalog.rename_table(table.identifier, another_random_identifier) + new_table = catalog.load_table(another_random_identifier) + assert new_table.identifier == (catalog.name,) + another_random_identifier assert new_table.metadata_location == table.metadata_location with pytest.raises(NoSuchTableError): - test_catalog.load_table(table.identifier) + catalog.load_table(table.identifier) with pytest.raises(NoSuchTableError): - test_catalog.load_table(random_identifier) + catalog.load_table(random_identifier) +@pytest.mark.parametrize( + 'catalog', + [ + lazy_fixture('catalog_memory'), + lazy_fixture('catalog_sqlite'), + ], +) def test_rename_table_to_existing_one( - test_catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier, another_random_identifier: Identifier + catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier, another_random_identifier: Identifier ) -> None: from_database_name, _from_table_name = random_identifier to_database_name, _to_table_name = another_random_identifier - test_catalog.create_namespace(from_database_name) - test_catalog.create_namespace(to_database_name) - table = test_catalog.create_table(random_identifier, table_schema_nested) - assert table.identifier == (test_catalog.name,) + random_identifier - new_table = test_catalog.create_table(another_random_identifier, table_schema_nested) - assert new_table.identifier == (test_catalog.name,) + another_random_identifier + catalog.create_namespace(from_database_name) + catalog.create_namespace(to_database_name) + table = catalog.create_table(random_identifier, table_schema_nested) + assert table.identifier == (catalog.name,) + random_identifier + new_table = catalog.create_table(another_random_identifier, table_schema_nested) + assert new_table.identifier == (catalog.name,) + another_random_identifier with pytest.raises(TableAlreadyExistsError): - test_catalog.rename_table(random_identifier, another_random_identifier) + catalog.rename_table(random_identifier, another_random_identifier) -def test_rename_missing_table( - test_catalog: SqlCatalog, random_identifier: Identifier, another_random_identifier: Identifier -) -> None: +@pytest.mark.parametrize( + 'catalog', + [ + lazy_fixture('catalog_memory'), + lazy_fixture('catalog_sqlite'), + ], +) +def test_rename_missing_table(catalog: SqlCatalog, random_identifier: Identifier, another_random_identifier: Identifier) -> None: to_database_name, _to_table_name = another_random_identifier - test_catalog.create_namespace(to_database_name) + catalog.create_namespace(to_database_name) with pytest.raises(NoSuchTableError): - test_catalog.rename_table(random_identifier, another_random_identifier) + catalog.rename_table(random_identifier, another_random_identifier) +@pytest.mark.parametrize( + 'catalog', + [ + lazy_fixture('catalog_memory'), + lazy_fixture('catalog_sqlite'), + ], +) def test_rename_table_to_missing_namespace( - test_catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier, another_random_identifier: Identifier + catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier, another_random_identifier: Identifier ) -> None: from_database_name, _from_table_name = random_identifier - test_catalog.create_namespace(from_database_name) - table = test_catalog.create_table(random_identifier, table_schema_nested) - assert table.identifier == (test_catalog.name,) + random_identifier + catalog.create_namespace(from_database_name) + table = catalog.create_table(random_identifier, table_schema_nested) + assert table.identifier == (catalog.name,) + random_identifier with pytest.raises(NoSuchNamespaceError): - test_catalog.rename_table(random_identifier, another_random_identifier) + catalog.rename_table(random_identifier, another_random_identifier) +@pytest.mark.parametrize( + 'catalog', + [ + lazy_fixture('catalog_memory'), + lazy_fixture('catalog_sqlite'), + ], +) def test_list_tables( - test_catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier, another_random_identifier: Identifier + catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier, another_random_identifier: Identifier ) -> None: database_name_1, _table_name_1 = random_identifier database_name_2, _table_name_2 = another_random_identifier - test_catalog.create_namespace(database_name_1) - test_catalog.create_namespace(database_name_2) - test_catalog.create_table(random_identifier, table_schema_nested) - test_catalog.create_table(another_random_identifier, table_schema_nested) - identifier_list = test_catalog.list_tables(database_name_1) + catalog.create_namespace(database_name_1) + catalog.create_namespace(database_name_2) + catalog.create_table(random_identifier, table_schema_nested) + catalog.create_table(another_random_identifier, table_schema_nested) + identifier_list = catalog.list_tables(database_name_1) assert len(identifier_list) == 1 assert random_identifier in identifier_list - identifier_list = test_catalog.list_tables(database_name_2) + identifier_list = catalog.list_tables(database_name_2) assert len(identifier_list) == 1 assert another_random_identifier in identifier_list -def test_create_namespace(test_catalog: SqlCatalog, database_name: str) -> None: - test_catalog.create_namespace(database_name) - assert (database_name,) in test_catalog.list_namespaces() +@pytest.mark.parametrize( + 'catalog', + [ + lazy_fixture('catalog_memory'), + lazy_fixture('catalog_sqlite'), + ], +) +def test_create_namespace(catalog: SqlCatalog, database_name: str) -> None: + catalog.create_namespace(database_name) + assert (database_name,) in catalog.list_namespaces() -def test_create_duplicate_namespace(test_catalog: SqlCatalog, database_name: str) -> None: - test_catalog.create_namespace(database_name) +@pytest.mark.parametrize( + 'catalog', + [ + lazy_fixture('catalog_memory'), + lazy_fixture('catalog_sqlite'), + ], +) +def test_create_duplicate_namespace(catalog: SqlCatalog, database_name: str) -> None: + catalog.create_namespace(database_name) with pytest.raises(NamespaceAlreadyExistsError): - test_catalog.create_namespace(database_name) + catalog.create_namespace(database_name) -def test_create_namespaces_sharing_same_prefix(test_catalog: SqlCatalog, database_name: str) -> None: - test_catalog.create_namespace(database_name + "_1") +@pytest.mark.parametrize( + 'catalog', + [ + lazy_fixture('catalog_memory'), + lazy_fixture('catalog_sqlite'), + ], +) +def test_create_namespaces_sharing_same_prefix(catalog: SqlCatalog, database_name: str) -> None: + catalog.create_namespace(database_name + "_1") # Second namespace is a prefix of the first one, make sure it can be added. - test_catalog.create_namespace(database_name) + catalog.create_namespace(database_name) -def test_create_namespace_with_comment_and_location(test_catalog: SqlCatalog, database_name: str) -> None: +@pytest.mark.parametrize( + 'catalog', + [ + lazy_fixture('catalog_memory'), + lazy_fixture('catalog_sqlite'), + ], +) +def test_create_namespace_with_comment_and_location(catalog: SqlCatalog, database_name: str) -> None: test_location = "/test/location" test_properties = { "comment": "this is a test description", "location": test_location, } - test_catalog.create_namespace(namespace=database_name, properties=test_properties) - loaded_database_list = test_catalog.list_namespaces() + catalog.create_namespace(namespace=database_name, properties=test_properties) + loaded_database_list = catalog.list_namespaces() assert (database_name,) in loaded_database_list - properties = test_catalog.load_namespace_properties(database_name) + properties = catalog.load_namespace_properties(database_name) assert properties["comment"] == "this is a test description" assert properties["location"] == test_location +@pytest.mark.parametrize( + 'catalog', + [ + lazy_fixture('catalog_memory'), + lazy_fixture('catalog_sqlite'), + ], +) @pytest.mark.filterwarnings("ignore") -def test_create_namespace_with_null_properties(test_catalog: SqlCatalog, database_name: str) -> None: +def test_create_namespace_with_null_properties(catalog: SqlCatalog, database_name: str) -> None: with pytest.raises(IntegrityError): - test_catalog.create_namespace(namespace=database_name, properties={None: "value"}) # type: ignore + catalog.create_namespace(namespace=database_name, properties={None: "value"}) # type: ignore with pytest.raises(IntegrityError): - test_catalog.create_namespace(namespace=database_name, properties={"key": None}) # type: ignore + catalog.create_namespace(namespace=database_name, properties={"key": None}) # type: ignore -def test_list_namespaces(test_catalog: SqlCatalog, database_list: List[str]) -> None: +@pytest.mark.parametrize( + 'catalog', + [ + lazy_fixture('catalog_memory'), + lazy_fixture('catalog_sqlite'), + ], +) +def test_list_namespaces(catalog: SqlCatalog, database_list: List[str]) -> None: for database_name in database_list: - test_catalog.create_namespace(database_name) - db_list = test_catalog.list_namespaces() + catalog.create_namespace(database_name) + db_list = catalog.list_namespaces() for database_name in database_list: assert (database_name,) in db_list - assert len(test_catalog.list_namespaces(database_name)) == 1 + assert len(catalog.list_namespaces(database_name)) == 1 -def test_list_non_existing_namespaces(test_catalog: SqlCatalog) -> None: +@pytest.mark.parametrize( + 'catalog', + [ + lazy_fixture('catalog_memory'), + lazy_fixture('catalog_sqlite'), + ], +) +def test_list_non_existing_namespaces(catalog: SqlCatalog) -> None: with pytest.raises(NoSuchNamespaceError): - test_catalog.list_namespaces("does_not_exist") + catalog.list_namespaces("does_not_exist") -def test_drop_namespace(test_catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier) -> None: +@pytest.mark.parametrize( + 'catalog', + [ + lazy_fixture('catalog_memory'), + lazy_fixture('catalog_sqlite'), + ], +) +def test_drop_namespace(catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier) -> None: database_name, table_name = random_identifier - test_catalog.create_namespace(database_name) - assert (database_name,) in test_catalog.list_namespaces() - test_catalog.create_table((database_name, table_name), table_schema_nested) + catalog.create_namespace(database_name) + assert (database_name,) in catalog.list_namespaces() + catalog.create_table((database_name, table_name), table_schema_nested) with pytest.raises(NamespaceNotEmptyError): - test_catalog.drop_namespace(database_name) - test_catalog.drop_table((database_name, table_name)) - test_catalog.drop_namespace(database_name) - assert (database_name,) not in test_catalog.list_namespaces() - - -def test_load_namespace_properties(test_catalog: SqlCatalog, database_name: str) -> None: + catalog.drop_namespace(database_name) + catalog.drop_table((database_name, table_name)) + catalog.drop_namespace(database_name) + assert (database_name,) not in catalog.list_namespaces() + + +@pytest.mark.parametrize( + 'catalog', + [ + lazy_fixture('catalog_memory'), + lazy_fixture('catalog_sqlite'), + ], +) +def test_load_namespace_properties(catalog: SqlCatalog, database_name: str) -> None: warehouse_location = "/test/location" test_properties = { "comment": "this is a test description", @@ -396,20 +616,34 @@ def test_load_namespace_properties(test_catalog: SqlCatalog, database_name: str) "test_property3": "3", } - test_catalog.create_namespace(database_name, test_properties) - listed_properties = test_catalog.load_namespace_properties(database_name) + catalog.create_namespace(database_name, test_properties) + listed_properties = catalog.load_namespace_properties(database_name) for k, v in listed_properties.items(): assert k in test_properties assert v == test_properties[k] -def test_load_empty_namespace_properties(test_catalog: SqlCatalog, database_name: str) -> None: - test_catalog.create_namespace(database_name) - listed_properties = test_catalog.load_namespace_properties(database_name) +@pytest.mark.parametrize( + 'catalog', + [ + lazy_fixture('catalog_memory'), + lazy_fixture('catalog_sqlite'), + ], +) +def test_load_empty_namespace_properties(catalog: SqlCatalog, database_name: str) -> None: + catalog.create_namespace(database_name) + listed_properties = catalog.load_namespace_properties(database_name) assert listed_properties == {"exists": "true"} -def test_update_namespace_properties(test_catalog: SqlCatalog, database_name: str) -> None: +@pytest.mark.parametrize( + 'catalog', + [ + lazy_fixture('catalog_memory'), + lazy_fixture('catalog_sqlite'), + ], +) +def test_update_namespace_properties(catalog: SqlCatalog, database_name: str) -> None: warehouse_location = "/test/location" test_properties = { "comment": "this is a test description", @@ -420,8 +654,8 @@ def test_update_namespace_properties(test_catalog: SqlCatalog, database_name: st } removals = {"test_property1", "test_property2", "test_property3", "should_not_removed"} updates = {"test_property4": "4", "test_property5": "5", "comment": "updated test description"} - test_catalog.create_namespace(database_name, test_properties) - update_report = test_catalog.update_namespace_properties(database_name, removals, updates) + catalog.create_namespace(database_name, test_properties) + update_report = catalog.update_namespace_properties(database_name, removals, updates) for k in updates.keys(): assert k in update_report.updated for k in removals: @@ -429,4 +663,4 @@ def test_update_namespace_properties(test_catalog: SqlCatalog, database_name: st assert k in update_report.missing else: assert k in update_report.removed - assert "updated test description" == test_catalog.load_namespace_properties(database_name)["comment"] + assert "updated test description" == catalog.load_namespace_properties(database_name)["comment"]