From f94e6edd570a587c78fa0f2bf45a15cd24fe7c5f Mon Sep 17 00:00:00 2001 From: Ben Cassell <98852248+benc-db@users.noreply.github.com> Date: Tue, 17 Dec 2024 09:00:48 -0800 Subject: [PATCH 1/3] Trying lock approach for dependency management (#878) --- CHANGELOG.md | 6 + pyproject.toml | 7 ++ uv.lock | 306 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 319 insertions(+) create mode 100644 uv.lock diff --git a/CHANGELOG.md b/CHANGELOG.md index a4c107a9..98bdcc16 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,9 @@ +## dbt-databricks 1.9.2 (TBD) + +### Under the Hood + +- Switch to UV and locks for dependency management ([878](https://github.com/databricks/dbt-databricks/pull/878)) + ## dbt-databricks 1.9.1 (December 16, 2024) ### Features diff --git a/pyproject.toml b/pyproject.toml index d2f728d8..a42a8f4e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -62,7 +62,14 @@ check-sdist = [ "pip freeze | grep dbt-databricks", ] +[tool.hatch.env] +requires = ["hatch-pip-compile"] + [tool.hatch.envs.default] +type = "pip-compile" +pip-compile-resolver = "uv" +lock-filename = "uv.lock" +pip-compile-constraint = "default" dependencies = [ "dbt_common @ git+https://github.com/dbt-labs/dbt-common.git", "dbt-adapters @ git+https://github.com/dbt-labs/dbt-adapters.git@main", diff --git a/uv.lock b/uv.lock new file mode 100644 index 00000000..a5783157 --- /dev/null +++ b/uv.lock @@ -0,0 +1,306 @@ +# +# This file is autogenerated by hatch-pip-compile with Python 3.9 +# +# - dbt_common@ git+https://github.com/dbt-labs/dbt-common.git +# - dbt-adapters@ git+https://github.com/dbt-labs/dbt-adapters.git@main +# - dbt-core@ git+https://github.com/dbt-labs/dbt-core.git@main#subdirectory=core +# - dbt-tests-adapter@ git+https://github.com/dbt-labs/dbt-adapters.git@main#subdirectory=dbt-tests-adapter +# - dbt-spark@ git+https://github.com/dbt-labs/dbt-spark.git@main +# - pytest +# - pytest-xdist +# - pytest-dotenv +# - freezegun +# - mypy +# - pre-commit +# - ruff +# - types-requests +# - debugpy +# - pydantic<2,>=1.10.0 +# - databricks-sdk==0.17.0 +# - databricks-sql-connector<4.0.0,>=3.5.0 +# - dbt-adapters<2.0,>=1.7.0 +# - dbt-common<2.0,>=1.10.0 +# - dbt-core<2.0,>=1.8.7 +# - dbt-spark<2.0,>=1.8.0 +# - keyring>=23.13.0 +# - pydantic>=1.10.0 +# + +agate==1.9.1 + # via + # dbt-adapters + # dbt-common + # dbt-core +attrs==24.3.0 + # via + # jsonschema + # referencing +babel==2.16.0 + # via agate +backports-tarfile==1.2.0 + # via jaraco-context +cachetools==5.5.0 + # via google-auth +certifi==2024.12.14 + # via requests +cfgv==3.4.0 + # via pre-commit +charset-normalizer==3.4.0 + # via requests +click==8.1.7 + # via + # dbt-core + # dbt-semantic-interfaces +colorama==0.4.6 + # via dbt-common +daff==1.3.46 + # via dbt-core +databricks-sdk==0.17.0 + # via hatch.envs.default +databricks-sql-connector==3.6.0 + # via hatch.envs.default +dbt-adapters @ git+https://github.com/dbt-labs/dbt-adapters.git@e3964d76c1719baf5e3fe689d385aec1d8535d15 + # via + # hatch.envs.default + # dbt-core + # dbt-spark + # dbt-tests-adapter +dbt-common @ git+https://github.com/dbt-labs/dbt-common.git@c72ea7e3abf70ce632d30722036dd0b4afcaf330 + # via + # hatch.envs.default + # dbt-adapters + # dbt-core + # dbt-spark +dbt-core @ git+https://github.com/dbt-labs/dbt-core.git@6c61cb7f7adbdce8edec35a887d6c766a401e403#subdirectory=core + # via + # hatch.envs.default + # dbt-spark + # dbt-tests-adapter +dbt-extractor==0.5.1 + # via dbt-core +dbt-semantic-interfaces==0.8.3 + # via dbt-core +dbt-spark @ git+https://github.com/dbt-labs/dbt-spark.git@a38a288d7d3868c88313350f7d369223b0f03a05 + # via hatch.envs.default +dbt-tests-adapter @ git+https://github.com/dbt-labs/dbt-adapters.git@e3964d76c1719baf5e3fe689d385aec1d8535d15#subdirectory=dbt-tests-adapter + # via hatch.envs.default +debugpy==1.8.11 + # via hatch.envs.default +deepdiff==7.0.1 + # via dbt-common +distlib==0.3.9 + # via virtualenv +et-xmlfile==2.0.0 + # via openpyxl +exceptiongroup==1.2.2 + # via pytest +execnet==2.1.1 + # via pytest-xdist +filelock==3.16.1 + # via virtualenv +freezegun==1.5.1 + # via + # hatch.envs.default + # dbt-tests-adapter +google-auth==2.37.0 + # via databricks-sdk +identify==2.6.3 + # via pre-commit +idna==3.10 + # via requests +importlib-metadata==6.11.0 + # via + # dbt-semantic-interfaces + # keyring +iniconfig==2.0.0 + # via pytest +isodate==0.6.1 + # via + # agate + # dbt-common +jaraco-classes==3.4.0 + # via keyring +jaraco-context==6.0.1 + # via keyring +jaraco-functools==4.1.0 + # via keyring +jinja2==3.1.4 + # via + # dbt-common + # dbt-core + # dbt-semantic-interfaces +jsonschema==4.23.0 + # via + # dbt-common + # dbt-semantic-interfaces +jsonschema-specifications==2024.10.1 + # via jsonschema +keyring==25.5.0 + # via hatch.envs.default +leather==0.4.0 + # via agate +lz4==4.3.3 + # via databricks-sql-connector +markupsafe==3.0.2 + # via jinja2 +mashumaro==3.14 + # via + # dbt-adapters + # dbt-common + # dbt-core +more-itertools==10.5.0 + # via + # dbt-semantic-interfaces + # jaraco-classes + # jaraco-functools +msgpack==1.1.0 + # via mashumaro +mypy==1.13.0 + # via hatch.envs.default +mypy-extensions==1.0.0 + # via mypy +networkx==3.2.1 + # via dbt-core +nodeenv==1.9.1 + # via pre-commit +numpy==1.26.4 + # via + # databricks-sql-connector + # pandas + # pyarrow +oauthlib==3.2.2 + # via databricks-sql-connector +openpyxl==3.1.5 + # via databricks-sql-connector +ordered-set==4.1.0 + # via deepdiff +packaging==24.2 + # via + # dbt-core + # pytest +pandas==2.2.3 + # via databricks-sql-connector +parsedatetime==2.6 + # via agate +pathspec==0.12.1 + # via + # dbt-common + # dbt-core +platformdirs==4.3.6 + # via virtualenv +pluggy==1.5.0 + # via pytest +pre-commit==4.0.1 + # via hatch.envs.default +protobuf==5.29.1 + # via + # dbt-adapters + # dbt-common + # dbt-core +pyarrow==16.1.0 + # via databricks-sql-connector +pyasn1==0.6.1 + # via + # pyasn1-modules + # rsa +pyasn1-modules==0.4.1 + # via google-auth +pydantic==1.10.19 + # via + # hatch.envs.default + # dbt-semantic-interfaces +pytest==8.3.4 + # via + # hatch.envs.default + # pytest-dotenv + # pytest-xdist +pytest-dotenv==0.5.2 + # via hatch.envs.default +pytest-xdist==3.6.1 + # via hatch.envs.default +python-dateutil==2.9.0.post0 + # via + # dbt-common + # dbt-semantic-interfaces + # freezegun + # pandas +python-dotenv==1.0.1 + # via pytest-dotenv +python-slugify==8.0.4 + # via agate +pytimeparse==1.1.8 + # via agate +pytz==2024.2 + # via + # dbt-adapters + # dbt-core + # pandas +pyyaml==6.0.2 + # via + # dbt-core + # dbt-semantic-interfaces + # dbt-tests-adapter + # pre-commit +referencing==0.35.1 + # via + # jsonschema + # jsonschema-specifications +requests==2.32.3 + # via + # databricks-sdk + # databricks-sql-connector + # dbt-common + # dbt-core + # snowplow-tracker +rpds-py==0.22.3 + # via + # jsonschema + # referencing +rsa==4.9 + # via google-auth +ruff==0.8.3 + # via hatch.envs.default +six==1.17.0 + # via + # isodate + # python-dateutil + # thrift +snowplow-tracker==1.0.4 + # via dbt-core +sqlparams==6.1.0 + # via dbt-spark +sqlparse==0.5.3 + # via dbt-core +text-unidecode==1.3 + # via python-slugify +thrift==0.20.0 + # via databricks-sql-connector +tomli==2.2.1 + # via + # mypy + # pytest +types-requests==2.32.0.20241016 + # via + # hatch.envs.default + # snowplow-tracker +typing-extensions==4.12.2 + # via + # dbt-adapters + # dbt-common + # dbt-core + # dbt-semantic-interfaces + # mashumaro + # mypy + # pydantic + # snowplow-tracker +tzdata==2024.2 + # via pandas +urllib3==2.2.3 + # via + # databricks-sql-connector + # requests + # types-requests +virtualenv==20.28.0 + # via pre-commit +zipp==3.21.0 + # via importlib-metadata From 477b74582f2758847d1fa7e2b2968c7b32e8220e Mon Sep 17 00:00:00 2001 From: Ben Cassell Date: Tue, 17 Dec 2024 09:16:47 -0800 Subject: [PATCH 2/3] Revert "Trying lock approach for dependency management (#878)" This reverts commit f94e6edd570a587c78fa0f2bf45a15cd24fe7c5f. --- CHANGELOG.md | 6 - pyproject.toml | 7 -- uv.lock | 306 ------------------------------------------------- 3 files changed, 319 deletions(-) delete mode 100644 uv.lock diff --git a/CHANGELOG.md b/CHANGELOG.md index 98bdcc16..a4c107a9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,9 +1,3 @@ -## dbt-databricks 1.9.2 (TBD) - -### Under the Hood - -- Switch to UV and locks for dependency management ([878](https://github.com/databricks/dbt-databricks/pull/878)) - ## dbt-databricks 1.9.1 (December 16, 2024) ### Features diff --git a/pyproject.toml b/pyproject.toml index a42a8f4e..d2f728d8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -62,14 +62,7 @@ check-sdist = [ "pip freeze | grep dbt-databricks", ] -[tool.hatch.env] -requires = ["hatch-pip-compile"] - [tool.hatch.envs.default] -type = "pip-compile" -pip-compile-resolver = "uv" -lock-filename = "uv.lock" -pip-compile-constraint = "default" dependencies = [ "dbt_common @ git+https://github.com/dbt-labs/dbt-common.git", "dbt-adapters @ git+https://github.com/dbt-labs/dbt-adapters.git@main", diff --git a/uv.lock b/uv.lock deleted file mode 100644 index a5783157..00000000 --- a/uv.lock +++ /dev/null @@ -1,306 +0,0 @@ -# -# This file is autogenerated by hatch-pip-compile with Python 3.9 -# -# - dbt_common@ git+https://github.com/dbt-labs/dbt-common.git -# - dbt-adapters@ git+https://github.com/dbt-labs/dbt-adapters.git@main -# - dbt-core@ git+https://github.com/dbt-labs/dbt-core.git@main#subdirectory=core -# - dbt-tests-adapter@ git+https://github.com/dbt-labs/dbt-adapters.git@main#subdirectory=dbt-tests-adapter -# - dbt-spark@ git+https://github.com/dbt-labs/dbt-spark.git@main -# - pytest -# - pytest-xdist -# - pytest-dotenv -# - freezegun -# - mypy -# - pre-commit -# - ruff -# - types-requests -# - debugpy -# - pydantic<2,>=1.10.0 -# - databricks-sdk==0.17.0 -# - databricks-sql-connector<4.0.0,>=3.5.0 -# - dbt-adapters<2.0,>=1.7.0 -# - dbt-common<2.0,>=1.10.0 -# - dbt-core<2.0,>=1.8.7 -# - dbt-spark<2.0,>=1.8.0 -# - keyring>=23.13.0 -# - pydantic>=1.10.0 -# - -agate==1.9.1 - # via - # dbt-adapters - # dbt-common - # dbt-core -attrs==24.3.0 - # via - # jsonschema - # referencing -babel==2.16.0 - # via agate -backports-tarfile==1.2.0 - # via jaraco-context -cachetools==5.5.0 - # via google-auth -certifi==2024.12.14 - # via requests -cfgv==3.4.0 - # via pre-commit -charset-normalizer==3.4.0 - # via requests -click==8.1.7 - # via - # dbt-core - # dbt-semantic-interfaces -colorama==0.4.6 - # via dbt-common -daff==1.3.46 - # via dbt-core -databricks-sdk==0.17.0 - # via hatch.envs.default -databricks-sql-connector==3.6.0 - # via hatch.envs.default -dbt-adapters @ git+https://github.com/dbt-labs/dbt-adapters.git@e3964d76c1719baf5e3fe689d385aec1d8535d15 - # via - # hatch.envs.default - # dbt-core - # dbt-spark - # dbt-tests-adapter -dbt-common @ git+https://github.com/dbt-labs/dbt-common.git@c72ea7e3abf70ce632d30722036dd0b4afcaf330 - # via - # hatch.envs.default - # dbt-adapters - # dbt-core - # dbt-spark -dbt-core @ git+https://github.com/dbt-labs/dbt-core.git@6c61cb7f7adbdce8edec35a887d6c766a401e403#subdirectory=core - # via - # hatch.envs.default - # dbt-spark - # dbt-tests-adapter -dbt-extractor==0.5.1 - # via dbt-core -dbt-semantic-interfaces==0.8.3 - # via dbt-core -dbt-spark @ git+https://github.com/dbt-labs/dbt-spark.git@a38a288d7d3868c88313350f7d369223b0f03a05 - # via hatch.envs.default -dbt-tests-adapter @ git+https://github.com/dbt-labs/dbt-adapters.git@e3964d76c1719baf5e3fe689d385aec1d8535d15#subdirectory=dbt-tests-adapter - # via hatch.envs.default -debugpy==1.8.11 - # via hatch.envs.default -deepdiff==7.0.1 - # via dbt-common -distlib==0.3.9 - # via virtualenv -et-xmlfile==2.0.0 - # via openpyxl -exceptiongroup==1.2.2 - # via pytest -execnet==2.1.1 - # via pytest-xdist -filelock==3.16.1 - # via virtualenv -freezegun==1.5.1 - # via - # hatch.envs.default - # dbt-tests-adapter -google-auth==2.37.0 - # via databricks-sdk -identify==2.6.3 - # via pre-commit -idna==3.10 - # via requests -importlib-metadata==6.11.0 - # via - # dbt-semantic-interfaces - # keyring -iniconfig==2.0.0 - # via pytest -isodate==0.6.1 - # via - # agate - # dbt-common -jaraco-classes==3.4.0 - # via keyring -jaraco-context==6.0.1 - # via keyring -jaraco-functools==4.1.0 - # via keyring -jinja2==3.1.4 - # via - # dbt-common - # dbt-core - # dbt-semantic-interfaces -jsonschema==4.23.0 - # via - # dbt-common - # dbt-semantic-interfaces -jsonschema-specifications==2024.10.1 - # via jsonschema -keyring==25.5.0 - # via hatch.envs.default -leather==0.4.0 - # via agate -lz4==4.3.3 - # via databricks-sql-connector -markupsafe==3.0.2 - # via jinja2 -mashumaro==3.14 - # via - # dbt-adapters - # dbt-common - # dbt-core -more-itertools==10.5.0 - # via - # dbt-semantic-interfaces - # jaraco-classes - # jaraco-functools -msgpack==1.1.0 - # via mashumaro -mypy==1.13.0 - # via hatch.envs.default -mypy-extensions==1.0.0 - # via mypy -networkx==3.2.1 - # via dbt-core -nodeenv==1.9.1 - # via pre-commit -numpy==1.26.4 - # via - # databricks-sql-connector - # pandas - # pyarrow -oauthlib==3.2.2 - # via databricks-sql-connector -openpyxl==3.1.5 - # via databricks-sql-connector -ordered-set==4.1.0 - # via deepdiff -packaging==24.2 - # via - # dbt-core - # pytest -pandas==2.2.3 - # via databricks-sql-connector -parsedatetime==2.6 - # via agate -pathspec==0.12.1 - # via - # dbt-common - # dbt-core -platformdirs==4.3.6 - # via virtualenv -pluggy==1.5.0 - # via pytest -pre-commit==4.0.1 - # via hatch.envs.default -protobuf==5.29.1 - # via - # dbt-adapters - # dbt-common - # dbt-core -pyarrow==16.1.0 - # via databricks-sql-connector -pyasn1==0.6.1 - # via - # pyasn1-modules - # rsa -pyasn1-modules==0.4.1 - # via google-auth -pydantic==1.10.19 - # via - # hatch.envs.default - # dbt-semantic-interfaces -pytest==8.3.4 - # via - # hatch.envs.default - # pytest-dotenv - # pytest-xdist -pytest-dotenv==0.5.2 - # via hatch.envs.default -pytest-xdist==3.6.1 - # via hatch.envs.default -python-dateutil==2.9.0.post0 - # via - # dbt-common - # dbt-semantic-interfaces - # freezegun - # pandas -python-dotenv==1.0.1 - # via pytest-dotenv -python-slugify==8.0.4 - # via agate -pytimeparse==1.1.8 - # via agate -pytz==2024.2 - # via - # dbt-adapters - # dbt-core - # pandas -pyyaml==6.0.2 - # via - # dbt-core - # dbt-semantic-interfaces - # dbt-tests-adapter - # pre-commit -referencing==0.35.1 - # via - # jsonschema - # jsonschema-specifications -requests==2.32.3 - # via - # databricks-sdk - # databricks-sql-connector - # dbt-common - # dbt-core - # snowplow-tracker -rpds-py==0.22.3 - # via - # jsonschema - # referencing -rsa==4.9 - # via google-auth -ruff==0.8.3 - # via hatch.envs.default -six==1.17.0 - # via - # isodate - # python-dateutil - # thrift -snowplow-tracker==1.0.4 - # via dbt-core -sqlparams==6.1.0 - # via dbt-spark -sqlparse==0.5.3 - # via dbt-core -text-unidecode==1.3 - # via python-slugify -thrift==0.20.0 - # via databricks-sql-connector -tomli==2.2.1 - # via - # mypy - # pytest -types-requests==2.32.0.20241016 - # via - # hatch.envs.default - # snowplow-tracker -typing-extensions==4.12.2 - # via - # dbt-adapters - # dbt-common - # dbt-core - # dbt-semantic-interfaces - # mashumaro - # mypy - # pydantic - # snowplow-tracker -tzdata==2024.2 - # via pandas -urllib3==2.2.3 - # via - # databricks-sql-connector - # requests - # types-requests -virtualenv==20.28.0 - # via pre-commit -zipp==3.21.0 - # via importlib-metadata From 5f6412d71c12a31ba47446f1e4ec3d642691616a Mon Sep 17 00:00:00 2001 From: Ben Cassell <98852248+benc-db@users.noreply.github.com> Date: Thu, 19 Dec 2024 13:03:34 -0800 Subject: [PATCH 3/3] Refactor reading env vars (#888) --- CHANGELOG.md | 6 +++ dbt/adapters/databricks/connections.py | 16 +++---- dbt/adapters/databricks/credentials.py | 8 ++-- dbt/adapters/databricks/global_state.py | 58 +++++++++++++++++++++++++ dbt/adapters/databricks/impl.py | 6 +-- dbt/adapters/databricks/logging.py | 4 +- tests/unit/test_adapter.py | 33 +++++++++----- 7 files changed, 102 insertions(+), 29 deletions(-) create mode 100644 dbt/adapters/databricks/global_state.py diff --git a/CHANGELOG.md b/CHANGELOG.md index a4c107a9..59af816c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,9 @@ +## dbt-databricks 1.9.2 (TBD) + +### Under the Hood + +- Refactor global state reading ([888](https://github.com/databricks/dbt-databricks/pull/888)) + ## dbt-databricks 1.9.1 (December 16, 2024) ### Features diff --git a/dbt/adapters/databricks/connections.py b/dbt/adapters/databricks/connections.py index 509686d7..0b523574 100644 --- a/dbt/adapters/databricks/connections.py +++ b/dbt/adapters/databricks/connections.py @@ -59,6 +59,7 @@ CursorCreate, ) from dbt.adapters.databricks.events.other_events import QueryError +from dbt.adapters.databricks.global_state import GlobalState from dbt.adapters.databricks.logging import logger from dbt.adapters.databricks.python_models.run_tracking import PythonRunTracker from dbt.adapters.databricks.utils import redact_credentials @@ -86,9 +87,6 @@ DBR_VERSION_REGEX = re.compile(r"([1-9][0-9]*)\.(x|0|[1-9][0-9]*)") -# toggle for session managements that minimizes the number of sessions opened/closed -USE_LONG_SESSIONS = os.getenv("DBT_DATABRICKS_LONG_SESSIONS", "True").upper() == "TRUE" - # Number of idle seconds before a connection is automatically closed. Only applicable if # USE_LONG_SESSIONS is true. # Updated when idle times of 180s were causing errors @@ -475,6 +473,8 @@ def add_query( auto_begin: bool = True, bindings: Optional[Any] = None, abridge_sql_log: bool = False, + retryable_exceptions: tuple[type[Exception], ...] = tuple(), + retry_limit: int = 1, *, close_cursor: bool = False, ) -> tuple[Connection, Any]: @@ -707,7 +707,7 @@ def get_response(cls, cursor: DatabricksSQLCursorWrapper) -> DatabricksAdapterRe class ExtendedSessionConnectionManager(DatabricksConnectionManager): def __init__(self, profile: AdapterRequiredConfig, mp_context: SpawnContext) -> None: assert ( - USE_LONG_SESSIONS + GlobalState.get_use_long_sessions() ), "This connection manager should only be used when USE_LONG_SESSIONS is enabled" super().__init__(profile, mp_context) self.threads_compute_connections: dict[ @@ -910,7 +910,7 @@ def open(cls, connection: Connection) -> Connection: # Once long session management is no longer under the USE_LONG_SESSIONS toggle # this should be renamed and replace the _open class method. assert ( - USE_LONG_SESSIONS + GlobalState.get_use_long_sessions() ), "This path, '_open2', should only be reachable with USE_LONG_SESSIONS" databricks_connection = cast(DatabricksDBTConnection, connection) @@ -1013,7 +1013,7 @@ def _get_http_path(query_header_context: Any, creds: DatabricksCredentials) -> O # If there is no node we return the http_path for the default compute. if not query_header_context: - if not USE_LONG_SESSIONS: + if not GlobalState.get_use_long_sessions(): logger.debug(f"Thread {thread_id}: using default compute resource.") return creds.http_path @@ -1021,7 +1021,7 @@ def _get_http_path(query_header_context: Any, creds: DatabricksCredentials) -> O # If none is specified return the http_path for the default compute. compute_name = _get_compute_name(query_header_context) if not compute_name: - if not USE_LONG_SESSIONS: + if not GlobalState.get_use_long_sessions(): logger.debug(f"On thread {thread_id}: {relation_name} using default compute resource.") return creds.http_path @@ -1037,7 +1037,7 @@ def _get_http_path(query_header_context: Any, creds: DatabricksCredentials) -> O f"does not specify http_path, relation: {relation_name}" ) - if not USE_LONG_SESSIONS: + if not GlobalState.get_use_long_sessions(): logger.debug( f"On thread {thread_id}: {relation_name} using compute resource '{compute_name}'." ) diff --git a/dbt/adapters/databricks/credentials.py b/dbt/adapters/databricks/credentials.py index 7a318cad..387d0e76 100644 --- a/dbt/adapters/databricks/credentials.py +++ b/dbt/adapters/databricks/credentials.py @@ -19,10 +19,10 @@ CredentialSaveError, CredentialShardEvent, ) +from dbt.adapters.databricks.global_state import GlobalState from dbt.adapters.databricks.logging import logger CATALOG_KEY_IN_SESSION_PROPERTIES = "databricks.catalog" -DBT_DATABRICKS_INVOCATION_ENV = "DBT_DATABRICKS_INVOCATION_ENV" DBT_DATABRICKS_INVOCATION_ENV_REGEX = re.compile("^[A-z0-9\\-]+$") EXTRACT_CLUSTER_ID_FROM_HTTP_PATH_REGEX = re.compile(r"/?sql/protocolv1/o/\d+/(.*)") DBT_DATABRICKS_HTTP_SESSION_HEADERS = "DBT_DATABRICKS_HTTP_SESSION_HEADERS" @@ -150,7 +150,7 @@ def validate_creds(self) -> None: @classmethod def get_invocation_env(cls) -> Optional[str]: - invocation_env = os.environ.get(DBT_DATABRICKS_INVOCATION_ENV) + invocation_env = GlobalState.get_invocation_env() if invocation_env: # Thrift doesn't allow nested () so we need to ensure # that the passed user agent is valid. @@ -160,9 +160,7 @@ def get_invocation_env(cls) -> Optional[str]: @classmethod def get_all_http_headers(cls, user_http_session_headers: dict[str, str]) -> dict[str, str]: - http_session_headers_str: Optional[str] = os.environ.get( - DBT_DATABRICKS_HTTP_SESSION_HEADERS - ) + http_session_headers_str = GlobalState.get_http_session_headers() http_session_headers_dict: dict[str, str] = ( { diff --git a/dbt/adapters/databricks/global_state.py b/dbt/adapters/databricks/global_state.py new file mode 100644 index 00000000..de240d39 --- /dev/null +++ b/dbt/adapters/databricks/global_state.py @@ -0,0 +1,58 @@ +import os +from typing import ClassVar, Optional + + +class GlobalState: + """Global state is a bad idea, but since we don't control instantiation, better to have it in a + single place than scattered throughout the codebase. + """ + + __use_long_sessions: ClassVar[Optional[bool]] = None + + @classmethod + def get_use_long_sessions(cls) -> bool: + if cls.__use_long_sessions is None: + cls.__use_long_sessions = ( + os.getenv("DBT_DATABRICKS_LONG_SESSIONS", "True").upper() == "TRUE" + ) + return cls.__use_long_sessions + + __invocation_env: ClassVar[Optional[str]] = None + __invocation_env_set: ClassVar[bool] = False + + @classmethod + def get_invocation_env(cls) -> Optional[str]: + if not cls.__invocation_env_set: + cls.__invocation_env = os.getenv("DBT_DATABRICKS_INVOCATION_ENV") + cls.__invocation_env_set = True + return cls.__invocation_env + + __session_headers: ClassVar[Optional[str]] = None + __session_headers_set: ClassVar[bool] = False + + @classmethod + def get_http_session_headers(cls) -> Optional[str]: + if not cls.__session_headers_set: + cls.__session_headers = os.getenv("DBT_DATABRICKS_HTTP_SESSION_HEADERS") + cls.__session_headers_set = True + return cls.__session_headers + + __describe_char_bypass: ClassVar[Optional[bool]] = None + + @classmethod + def get_char_limit_bypass(cls) -> bool: + if cls.__describe_char_bypass is None: + cls.__describe_char_bypass = ( + os.getenv("DBT_DESCRIBE_TABLE_2048_CHAR_BYPASS", "False").upper() == "TRUE" + ) + return cls.__describe_char_bypass + + __connector_log_level: ClassVar[Optional[str]] = None + + @classmethod + def get_connector_log_level(cls) -> str: + if cls.__connector_log_level is None: + cls.__connector_log_level = os.getenv( + "DBT_DATABRICKS_CONNECTOR_LOG_LEVEL", "WARN" + ).upper() + return cls.__connector_log_level diff --git a/dbt/adapters/databricks/impl.py b/dbt/adapters/databricks/impl.py index dce432c9..15c333e2 100644 --- a/dbt/adapters/databricks/impl.py +++ b/dbt/adapters/databricks/impl.py @@ -32,10 +32,10 @@ ) from dbt.adapters.databricks.column import DatabricksColumn from dbt.adapters.databricks.connections import ( - USE_LONG_SESSIONS, DatabricksConnectionManager, ExtendedSessionConnectionManager, ) +from dbt.adapters.databricks.global_state import GlobalState from dbt.adapters.databricks.python_models.python_submissions import ( AllPurposeClusterPythonJobHelper, JobClusterPythonJobHelper, @@ -142,7 +142,7 @@ def get_identifier_list_string(table_names: set[str]) -> str: """ _identifier = "|".join(table_names) - bypass_2048_char_limit = os.environ.get("DBT_DESCRIBE_TABLE_2048_CHAR_BYPASS", "false") + bypass_2048_char_limit = GlobalState.get_char_limit_bypass() if bypass_2048_char_limit == "true": _identifier = _identifier if len(_identifier) < 2048 else "*" return _identifier @@ -154,7 +154,7 @@ class DatabricksAdapter(SparkAdapter): Relation = DatabricksRelation Column = DatabricksColumn - if USE_LONG_SESSIONS: + if GlobalState.get_use_long_sessions(): ConnectionManager: type[DatabricksConnectionManager] = ExtendedSessionConnectionManager else: ConnectionManager = DatabricksConnectionManager diff --git a/dbt/adapters/databricks/logging.py b/dbt/adapters/databricks/logging.py index d0f1d42b..81e7449e 100644 --- a/dbt/adapters/databricks/logging.py +++ b/dbt/adapters/databricks/logging.py @@ -1,7 +1,7 @@ -import os from logging import Handler, LogRecord, getLogger from typing import Union +from dbt.adapters.databricks.global_state import GlobalState from dbt.adapters.events.logging import AdapterLogger logger = AdapterLogger("Databricks") @@ -22,7 +22,7 @@ def emit(self, record: LogRecord) -> None: dbt_adapter_logger = AdapterLogger("databricks-sql-connector") pysql_logger = getLogger("databricks.sql") -pysql_logger_level = os.environ.get("DBT_DATABRICKS_CONNECTOR_LOG_LEVEL", "WARN").upper() +pysql_logger_level = GlobalState.get_connector_log_level() pysql_logger.setLevel(pysql_logger_level) pysql_handler = DbtCoreHandler(dbt_logger=dbt_adapter_logger, level=pysql_logger_level) diff --git a/tests/unit/test_adapter.py b/tests/unit/test_adapter.py index 78ae12cb..d42fa5e1 100644 --- a/tests/unit/test_adapter.py +++ b/tests/unit/test_adapter.py @@ -11,8 +11,6 @@ from dbt.adapters.databricks.column import DatabricksColumn from dbt.adapters.databricks.credentials import ( CATALOG_KEY_IN_SESSION_PROPERTIES, - DBT_DATABRICKS_HTTP_SESSION_HEADERS, - DBT_DATABRICKS_INVOCATION_ENV, ) from dbt.adapters.databricks.impl import get_identifier_list_string from dbt.adapters.databricks.relation import DatabricksRelation, DatabricksRelationType @@ -114,7 +112,10 @@ def test_invalid_custom_user_agent(self): with pytest.raises(DbtValidationError) as excinfo: config = self._get_config() adapter = DatabricksAdapter(config, get_context("spawn")) - with patch.dict("os.environ", **{DBT_DATABRICKS_INVOCATION_ENV: "(Some-thing)"}): + with patch( + "dbt.adapters.databricks.global_state.GlobalState.get_invocation_env", + return_value="(Some-thing)", + ): connection = adapter.acquire_connection("dummy") connection.handle # trigger lazy-load @@ -128,8 +129,9 @@ def test_custom_user_agent(self): "dbt.adapters.databricks.connections.dbsql.connect", new=self._connect_func(expected_invocation_env="databricks-workflows"), ): - with patch.dict( - "os.environ", **{DBT_DATABRICKS_INVOCATION_ENV: "databricks-workflows"} + with patch( + "dbt.adapters.databricks.global_state.GlobalState.get_invocation_env", + return_value="databricks-workflows", ): connection = adapter.acquire_connection("dummy") connection.handle # trigger lazy-load @@ -190,9 +192,9 @@ def _test_environment_http_headers( "dbt.adapters.databricks.connections.dbsql.connect", new=self._connect_func(expected_http_headers=expected_http_headers), ): - with patch.dict( - "os.environ", - **{DBT_DATABRICKS_HTTP_SESSION_HEADERS: http_headers_str}, + with patch( + "dbt.adapters.databricks.global_state.GlobalState.get_http_session_headers", + return_value=http_headers_str, ): connection = adapter.acquire_connection("dummy") connection.handle # trigger lazy-load @@ -912,7 +914,10 @@ def test_describe_table_extended_2048_char_limit(self): assert get_identifier_list_string(table_names) == "|".join(table_names) # If environment variable is set, then limit the number of characters - with patch.dict("os.environ", **{"DBT_DESCRIBE_TABLE_2048_CHAR_BYPASS": "true"}): + with patch( + "dbt.adapters.databricks.global_state.GlobalState.get_char_limit_bypass", + return_value="true", + ): # Long list of table names is capped assert get_identifier_list_string(table_names) == "*" @@ -941,7 +946,10 @@ def test_describe_table_extended_should_limit(self): table_names = set([f"customers_{i}" for i in range(200)]) # If environment variable is set, then limit the number of characters - with patch.dict("os.environ", **{"DBT_DESCRIBE_TABLE_2048_CHAR_BYPASS": "true"}): + with patch( + "dbt.adapters.databricks.global_state.GlobalState.get_char_limit_bypass", + return_value="true", + ): # Long list of table names is capped assert get_identifier_list_string(table_names) == "*" @@ -954,7 +962,10 @@ def test_describe_table_extended_may_limit(self): table_names = set([f"customers_{i}" for i in range(200)]) # If environment variable is set, then we may limit the number of characters - with patch.dict("os.environ", **{"DBT_DESCRIBE_TABLE_2048_CHAR_BYPASS": "true"}): + with patch( + "dbt.adapters.databricks.global_state.GlobalState.get_char_limit_bypass", + return_value="true", + ): # But a short list of table names is not capped assert get_identifier_list_string(list(table_names)[:5]) == "|".join( list(table_names)[:5]