From 7fedb0ac655b14371c546af04b743ef5644bffcf Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Thu, 24 Nov 2022 01:25:54 +0100 Subject: [PATCH 1/7] merging stash --- dlt/cli/config_toml_writer.py | 2 +- dlt/cli/init_command.py | 55 +++++++++++++++++++-------- dlt/common/configuration/accessors.py | 3 +- 3 files changed, 43 insertions(+), 17 deletions(-) diff --git a/dlt/cli/config_toml_writer.py b/dlt/cli/config_toml_writer.py index 718aaec74f..3bd9f757e0 100644 --- a/dlt/cli/config_toml_writer.py +++ b/dlt/cli/config_toml_writer.py @@ -42,8 +42,8 @@ def write_spec(toml_table: TOMLTable, config: BaseConfiguration) -> None: def write_values(toml: tomlkit.TOMLDocument, values: Iterable[WritableConfigValue]) -> None: # print(values) - toml_table: TOMLTable = toml # type: ignore for value in values: + toml_table: TOMLTable = toml # type: ignore for namespace in value.namespaces: if namespace not in toml_table: inner_table = tomlkit.table(True) diff --git a/dlt/cli/init_command.py b/dlt/cli/init_command.py index 2b887dad77..a005efe538 100644 --- a/dlt/cli/init_command.py +++ b/dlt/cli/init_command.py @@ -13,10 +13,12 @@ from dlt.common.git import clone_repo from dlt.common.configuration.providers.toml import ConfigTomlProvider, SecretsTomlProvider from dlt.common.configuration.resolve import is_secret_hint +from dlt.common.configuration.accessors import DLT_SECRETS_VALUE, DLT_CONFIG_VALUE from dlt.common.exceptions import DltException from dlt.common.logger import DLT_PKG_NAME from dlt.common.normalizers.names.snake_case import normalize_schema_name from dlt.common.destination import DestinationReference +from dlt.common.reflection.utils import set_ast_parents from dlt.common.schema.exceptions import InvalidSchemaName from dlt.common.storages.file_storage import FileStorage from dlt.common.typing import AnyType, is_optional_type @@ -30,16 +32,15 @@ from dlt.cli.config_toml_writer import WritableConfigValue, write_values - REQUIREMENTS_TXT = "requirements.txt" PYPROJECT_TOML = "pyproject.toml" def _clone_init_repo(branch: str) -> Tuple[FileStorage, List[str], str]: # return tuple is (file storage for cloned repo, list of template files to copy, the default pipeline template script) - # template_dir = "/tmp/tmptz2omtdf" # tempfile.mkdtemp() - template_dir = tempfile.mkdtemp() - clone_repo("https://github.com/scale-vector/python-dlt-init-template.git", template_dir, branch=branch) + template_dir = "/home/rudolfix/src/python-dlt-init-template" + # template_dir = tempfile.mkdtemp() + # clone_repo("https://github.com/scale-vector/python-dlt-init-template.git", template_dir, branch=branch) clone_storage = FileStorage(template_dir) @@ -58,7 +59,8 @@ def _clone_init_repo(branch: str) -> Tuple[FileStorage, List[str], str]: def _parse_init_script(script_source: str, init_script_name: str) -> PipelineScriptVisitor: # parse the script first tree = ast.parse(source=script_source) - visitor = PipelineScriptVisitor(script_source, add_parents=True) + set_ast_parents(tree) + visitor = PipelineScriptVisitor(script_source) visitor.visit(tree) if len(visitor.mod_aliases) == 0: raise CliCommandException("init", f"The pipeline script {init_script_name} does not import dlt or has bizarre import structure") @@ -100,7 +102,30 @@ def _detect_required_configs(visitor: PipelineScriptVisitor, script_module: Modu # all detected configs with namespaces required_config: Dict[str, WritableConfigValue] = {} - # skip sources without spec. those are not imported and most probably are inner functions + # skip sources without spec. those are not imported and most probably are inner functions. also skip the sources that are not called + # also skip the sources that are called from functions, the parent of call object to the source must be None (no outer function) + known_imported_sources = {name: _SOURCES[name] for name in visitor.known_sources if name in _SOURCES and name in visitor.known_source_calls and any(call.parent is None for call in visitor.known_source_calls[name])} + + for source_name, source_info in known_imported_sources.items(): + source_config = source_info.SPEC() + spec_fields = source_config.get_resolvable_fields() + for field_name, field_type in spec_fields.items(): + val_store = None + # all secrets must go to secrets.toml + if is_secret_hint(field_type): + val_store = required_secrets + # all configs that are required and do not have a default value must go to config.toml + elif not is_optional_type(field_type) and getattr(source_config, field_name) is None: + val_store = required_config + + if val_store is not None: + # use full namespaces if we have many sources + namespaces = () if len(known_imported_sources) == 1 else ("sources", source_name) + val_store[source_name + ":" + field_name] = WritableConfigValue(field_name, field_type, namespaces) + + return required_secrets, required_config + + known_imported_calls = {name: calls for name, calls in visitor.known_source_calls.items() if name in _SOURCES} for pipeline_name, call_nodes in known_imported_calls.items(): @@ -116,6 +141,7 @@ def _detect_required_configs(visitor: PipelineScriptVisitor, script_module: Modu call_info = visitor.source_segment(call_node) raise CliCommandException("init", f"In {init_script_name} the source/resource {pipeline_name} call {call_info} looks wrong: {ty_ex}") # find all the arguments that are not sufficiently bound + print(bound_args) for arg_name, arg_node in bound_args.arguments.items(): # check if argument is in spec and is not optional. optional arguments won't be added to config/secrets arg_type = spec_fields.get(arg_name) @@ -127,12 +153,12 @@ def _detect_required_configs(visitor: PipelineScriptVisitor, script_module: Modu value_provided = ast.literal_eval(arg_node) is not None if isinstance(arg_node, ast.Attribute) and arg_node.attr == "value": attr_source = visitor.source_segment(arg_node) - if attr_source.endswith("config.value"): + if attr_source.endswith(DLT_CONFIG_VALUE): value_provided = False from_placeholder = True if from_secrets: raise CliCommandException("init", f"The pipeline script {init_script_name} calls source/resource {pipeline_name} where argument {arg_name} is a secret but it requests it via {attr_source}") - if attr_source.endswith("secrets.value"): + if attr_source.endswith(DLT_SECRETS_VALUE): value_provided = False from_placeholder = True from_secrets = True @@ -229,7 +255,7 @@ def init_command(pipeline_name: str, destination_name: str, branch: str) -> None # find all arguments in all calls to replace transformed_nodes = _find_argument_nodes_to_replace( visitor, - [("destination", destination_name), ("pipeline_name", pipeline_name)], + [("destination", destination_name), ("pipeline_name", pipeline_name), ("dataset_name", pipeline_name + "_data")], init_script_name ) @@ -251,12 +277,6 @@ def init_command(pipeline_name: str, destination_name: str, branch: str) -> None # modify the script dest_script_source = _rewrite_script(visitor.source, transformed_nodes) - # generate tomls with comments - secrets_prov = SecretsTomlProvider() - write_values(secrets_prov._toml, required_secrets.values()) - config_prov = ConfigTomlProvider() - write_values(config_prov._toml, required_config.values()) - # welcome message click.echo() click.echo("Your new pipeline %s is ready to be customized!" % fmt.bold(pipeline_name)) @@ -295,6 +315,11 @@ def init_command(pipeline_name: str, destination_name: str, branch: str) -> None # create script dest_storage.save(dest_pipeline_script, dest_script_source) + # generate tomls with comments + secrets_prov = SecretsTomlProvider() + write_values(secrets_prov._toml, required_secrets.values()) + config_prov = ConfigTomlProvider() + write_values(config_prov._toml, required_config.values()) # write toml files secrets_prov._write_toml() config_prov._write_toml() diff --git a/dlt/common/configuration/accessors.py b/dlt/common/configuration/accessors.py index d34b7425b8..c2bd5f83d8 100644 --- a/dlt/common/configuration/accessors.py +++ b/dlt/common/configuration/accessors.py @@ -12,7 +12,8 @@ from dlt.common.schema.utils import coerce_value from dlt.common.typing import AnyType, ConfigValue - +DLT_SECRETS_VALUE = "secrets.value" +DLT_CONFIG_VALUE = "config.value" TConfigAny = TypeVar("TConfigAny", bound=Any) class _Accessor(abc.ABC): From 2514be6db8694f6b1678c37b8ea1cf1eef616ebe Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Thu, 24 Nov 2022 11:36:26 +0100 Subject: [PATCH 2/7] adds pythongit dep as required, fixes github test workflows, adds astunparse, bumps version --- .github/workflows/test_common.yml | 2 +- .github/workflows/test_loader_bigquery.yml | 2 +- .github/workflows/test_loader_redshift.yml | 2 +- Makefile | 3 +- poetry.lock | 65 ++++++++++++++++------ pyproject.toml | 27 ++++----- 6 files changed, 67 insertions(+), 34 deletions(-) diff --git a/.github/workflows/test_common.yml b/.github/workflows/test_common.yml index c4bac42901..80eb722a30 100644 --- a/.github/workflows/test_common.yml +++ b/.github/workflows/test_common.yml @@ -55,7 +55,7 @@ jobs: - name: Install dependencies # if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true' - run: poetry install --no-interaction + run: poetry install --no-interaction --no-dev # - name: Install self # run: poetry install --no-interaction diff --git a/.github/workflows/test_loader_bigquery.yml b/.github/workflows/test_loader_bigquery.yml index 6707d236f5..a301de28c2 100644 --- a/.github/workflows/test_loader_bigquery.yml +++ b/.github/workflows/test_loader_bigquery.yml @@ -61,7 +61,7 @@ jobs: - name: Install dependencies # if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true' - run: poetry install --no-interaction -E bigquery + run: poetry install --no-interaction --no-dev -E bigquery # - name: Install self # run: poetry install --no-interaction diff --git a/.github/workflows/test_loader_redshift.yml b/.github/workflows/test_loader_redshift.yml index d268a3dd56..2eb5e5631a 100644 --- a/.github/workflows/test_loader_redshift.yml +++ b/.github/workflows/test_loader_redshift.yml @@ -62,7 +62,7 @@ jobs: - name: Install dependencies # if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true' - run: poetry install --no-interaction -E redshift + run: poetry install --no-interaction --no-dev -E redshift # - name: Install self # run: poetry install --no-interaction diff --git a/Makefile b/Makefile index b19868d358..697a27abd8 100644 --- a/Makefile +++ b/Makefile @@ -58,7 +58,8 @@ build-library: poetry build publish-library: build-library - poetry publish -u __token__ + # provide the token via poetry config pypi-token.pypi your-api-token + poetry publish build-image-tags: @echo ${IMG} diff --git a/poetry.lock b/poetry.lock index 1245ade3fe..0de4ed993b 100644 --- a/poetry.lock +++ b/poetry.lock @@ -45,6 +45,18 @@ six = "*" [package.extras] test = ["astroid (<=2.5.3)", "pytest"] +[[package]] +name = "astunparse" +version = "1.6.3" +description = "An AST unparser for Python" +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +six = ">=1.6.1,<2.0" +wheel = ">=0.23.0,<1.0" + [[package]] name = "asyncstdlib" version = "3.10.5" @@ -111,14 +123,14 @@ yaml = ["PyYAML"] [[package]] name = "boto3" -version = "1.26.13" +version = "1.26.15" description = "The AWS SDK for Python" category = "main" optional = true python-versions = ">= 3.7" [package.dependencies] -botocore = ">=1.29.13,<1.30.0" +botocore = ">=1.29.15,<1.30.0" jmespath = ">=0.7.1,<2.0.0" s3transfer = ">=0.6.0,<0.7.0" @@ -127,7 +139,7 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] [[package]] name = "botocore" -version = "1.29.13" +version = "1.29.15" description = "Low-level, data-driven core of boto 3." category = "main" optional = true @@ -1272,7 +1284,7 @@ python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" [[package]] name = "sentry-sdk" -version = "1.11.0" +version = "1.11.1" description = "Python client for Sentry (https://sentry.io)" category = "main" optional = false @@ -1468,7 +1480,7 @@ types-urllib3 = "<1.27" [[package]] name = "types-setuptools" -version = "65.6.0.0" +version = "65.6.0.1" description = "Typing stubs for setuptools" category = "main" optional = false @@ -1484,7 +1496,7 @@ python-versions = "*" [[package]] name = "types-sqlalchemy" -version = "1.4.53.8" +version = "1.4.53.17" description = "Typing stubs for SQLAlchemy" category = "dev" optional = false @@ -1538,6 +1550,17 @@ python-versions = ">=3.7" [package.extras] watchdog = ["watchdog"] +[[package]] +name = "wheel" +version = "0.38.4" +description = "A built-package format for Python" +category = "main" +optional = false +python-versions = ">=3.7" + +[package.extras] +test = ["pytest (>=3.0.0)"] + [[package]] name = "zipp" version = "3.10.0" @@ -1560,7 +1583,7 @@ redshift = ["psycopg2-binary", "psycopg2cffi"] [metadata] lock-version = "1.1" python-versions = ">=3.8,<4.0" -content-hash = "ba9e1c9e1e02c1e775274794ae03ab760b93029cd6a39b168e6edf4054f99dde" +content-hash = "d4902ca8715231ab01c141573647a0efa90283f22e5237056dc282d5760dfc13" [metadata.files] agate = [ @@ -1575,6 +1598,10 @@ asttokens = [ {file = "asttokens-2.1.0-py2.py3-none-any.whl", hash = "sha256:1b28ed85e254b724439afc783d4bee767f780b936c3fe8b3275332f42cf5f561"}, {file = "asttokens-2.1.0.tar.gz", hash = "sha256:4aa76401a151c8cc572d906aad7aea2a841780834a19d780f4321c0fe1b54635"}, ] +astunparse = [ + {file = "astunparse-1.6.3-py2.py3-none-any.whl", hash = "sha256:c2652417f2c8b5bb325c885ae329bdf3f86424075c4fd1a128674bc6fba4b8e8"}, + {file = "astunparse-1.6.3.tar.gz", hash = "sha256:5ad93a8456f0d084c3456d059fd9a92cce667963232cbf763eac3bc5b7940872"}, +] asyncstdlib = [ {file = "asyncstdlib-3.10.5-py2.py3-none-any.whl", hash = "sha256:fcf828d1064f578e639728544717268e7a18500f7dfee3f80af3aa5609601037"}, {file = "asyncstdlib-3.10.5.tar.gz", hash = "sha256:e9fc35e811d5b51d8fe3d17b62234b5e8e1a2513364228081cd88637b3a3506a"}, @@ -1595,12 +1622,12 @@ bandit = [ {file = "bandit-1.7.4.tar.gz", hash = "sha256:2d63a8c573417bae338962d4b9b06fbc6080f74ecd955a092849e1e65c717bd2"}, ] boto3 = [ - {file = "boto3-1.26.13-py3-none-any.whl", hash = "sha256:a8ad13a23745b6d4a56d5bdde53a7a80cd7b40016cd411b9a94e6bbfb2ca5dd2"}, - {file = "boto3-1.26.13.tar.gz", hash = "sha256:853cf4b2136c4deec4e01a17b89126377bfca30223535795d879ca65af4c4a69"}, + {file = "boto3-1.26.15-py3-none-any.whl", hash = "sha256:0e455bc50190cec1af819c9e4a257130661c4f2fad1e211b4dd2cb8f9af89464"}, + {file = "boto3-1.26.15.tar.gz", hash = "sha256:e2bfc955fb70053951589d01919c9233c6ef091ae1404bb5249a0f27e05b6b36"}, ] botocore = [ - {file = "botocore-1.29.13-py3-none-any.whl", hash = "sha256:9c73a180fad9a7da7797530ced3b5069872bff915b1ae9fa11fc1ed79b584c8e"}, - {file = "botocore-1.29.13.tar.gz", hash = "sha256:9d39db398f472c0aa97098870c8c4cf12636b2667a18e694fea5fae046af907e"}, + {file = "botocore-1.29.15-py3-none-any.whl", hash = "sha256:02cfa6d060c50853a028b36ada96f4ddb225948bf9e7e0a4dc5b72f9e3878f15"}, + {file = "botocore-1.29.15.tar.gz", hash = "sha256:7d4e148870c98bbaab04b0c85b4d3565fc00fec6148cab9da96ab4419dbfb941"}, ] cachetools = [ {file = "cachetools-5.2.0-py3-none-any.whl", hash = "sha256:f9f17d2aec496a9aa6b76f53e3b614c965223c061982d434d160f930c698a9db"}, @@ -2574,8 +2601,8 @@ semver = [ {file = "semver-2.13.0.tar.gz", hash = "sha256:fa0fe2722ee1c3f57eac478820c3a5ae2f624af8264cbdf9000c980ff7f75e3f"}, ] sentry-sdk = [ - {file = "sentry-sdk-1.11.0.tar.gz", hash = "sha256:e7b78a1ddf97a5f715a50ab8c3f7a93f78b114c67307785ee828ef67a5d6f117"}, - {file = "sentry_sdk-1.11.0-py2.py3-none-any.whl", hash = "sha256:f467e6c7fac23d4d42bc83eb049c400f756cd2d65ab44f0cc1165d0c7c3d40bc"}, + {file = "sentry-sdk-1.11.1.tar.gz", hash = "sha256:675f6279b6bb1fea09fd61751061f9a90dca3b5929ef631dd50dc8b3aeb245e9"}, + {file = "sentry_sdk-1.11.1-py2.py3-none-any.whl", hash = "sha256:8b4ff696c0bdcceb3f70bbb87a57ba84fd3168b1332d493fcd16c137f709578c"}, ] setuptools = [ {file = "setuptools-65.6.0-py3-none-any.whl", hash = "sha256:6211d2f5eddad8757bd0484923ca7c0a6302ebc4ab32ea5e94357176e0ca0840"}, @@ -2740,16 +2767,16 @@ types-requests = [ {file = "types_requests-2.28.11.5-py3-none-any.whl", hash = "sha256:091d4a5a33c1b4f20d8b1b952aa8fa27a6e767c44c3cf65e56580df0b05fd8a9"}, ] types-setuptools = [ - {file = "types-setuptools-65.6.0.0.tar.gz", hash = "sha256:3270beadb99bc6fa4b9a5cc35106e7144a5c33028ce48992b10e74adc8cc5c82"}, - {file = "types_setuptools-65.6.0.0-py3-none-any.whl", hash = "sha256:9466c5c057dc8e5a0f0dc3b2b0b4b4284524d45bd1cd80ac62b96f19da1459b0"}, + {file = "types-setuptools-65.6.0.1.tar.gz", hash = "sha256:a03cf72f336929c9405f485dd90baef31a401776675f785f69a5a519f0b099ca"}, + {file = "types_setuptools-65.6.0.1-py3-none-any.whl", hash = "sha256:c957599502195ab98e90f0560466fa963f6a23373905e6d4e1772dbfaf1e44b7"}, ] types-simplejson = [ {file = "types-simplejson-3.18.0.0.tar.gz", hash = "sha256:857adb13190abd65d0d103be965152e79a6842c5663e01e8681bde1895713f52"}, {file = "types_simplejson-3.18.0.0-py3-none-any.whl", hash = "sha256:f8a8428f753574fa3b7eb290756776f0fb6b4ad9cae72bf8c0c9534eeda6398c"}, ] types-sqlalchemy = [ - {file = "types-SQLAlchemy-1.4.53.8.tar.gz", hash = "sha256:b77be0ee0a3f3bc7e8acce1b5ad127a74bc87f81cbf3ae1a81eb4d8fd2de01cf"}, - {file = "types_SQLAlchemy-1.4.53.8-py3-none-any.whl", hash = "sha256:df2f88b7ddf685ddfe00dae05f81080ae70c653f7a643c9e0d7c6cee534cb88d"}, + {file = "types-SQLAlchemy-1.4.53.17.tar.gz", hash = "sha256:f469d5165ea024c0d936ca9ea89eba551cff6159fa2389bd8aa84afffaa730f6"}, + {file = "types_SQLAlchemy-1.4.53.17-py3-none-any.whl", hash = "sha256:328579deec569cddc9d8ea6f888b312e9b8f97bbd4a1007ec24f10d07079bfd9"}, ] types-urllib3 = [ {file = "types-urllib3-1.26.25.4.tar.gz", hash = "sha256:eec5556428eec862b1ac578fb69aab3877995a99ffec9e5a12cf7fbd0cc9daee"}, @@ -2771,6 +2798,10 @@ werkzeug = [ {file = "Werkzeug-2.1.2-py3-none-any.whl", hash = "sha256:72a4b735692dd3135217911cbeaa1be5fa3f62bffb8745c5215420a03dc55255"}, {file = "Werkzeug-2.1.2.tar.gz", hash = "sha256:1ce08e8093ed67d638d63879fd1ba3735817f7a80de3674d293f5984f25fb6e6"}, ] +wheel = [ + {file = "wheel-0.38.4-py3-none-any.whl", hash = "sha256:b60533f3f5d530e971d6737ca6d58681ee434818fab630c83a734bb10c083ce8"}, + {file = "wheel-0.38.4.tar.gz", hash = "sha256:965f5259b566725405b05e7cf774052044b1ed30119b5d586b2703aafe8719ac"}, +] zipp = [ {file = "zipp-3.10.0-py3-none-any.whl", hash = "sha256:4fcb6f278987a6605757302a6e40e896257570d11c51628968ccb2a47e80c6c1"}, {file = "zipp-3.10.0.tar.gz", hash = "sha256:7a7262fd930bd3e36c50b9a64897aec3fafff3dfdeec9623ae22b40e93f99bb8"}, diff --git a/pyproject.toml b/pyproject.toml index ec52ed57e3..0b8f497c5c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "python-dlt" -version = "0.2.0a1" +version = "0.2.0a2" description = "DLT is an open-source python-native scalable data loading framework that does not require any devops efforts to run." authors = ["ScaleVector "] maintainers = [ "Marcin Rudolf ", "Adrian Brudaru ",] @@ -35,6 +35,18 @@ semver = "^2.13.0" sentry-sdk = "^1.4.3" hexbytes = "^0.2.2" cachetools = "^5.2.0" +GitPython = "^3.1.26" +tzdata = "^2022.1" +tomlkit = "^0.11.3" +asyncstdlib = "^3.10.5" +pathvalidate = "^2.5.2" +SQLAlchemy = "^1.3.5" +typing-extensions = "^4.0.0" +makefun = "^1.15.0" +click = "^8.1.3" +requirements-parser = "^0.5.0" +setuptools = "^65.6.0" +humanize = "^4.4.0" psycopg2-binary = {version = "^2.9.1", optional = true} # use this dependency as the current version of psycopg2cffi does not have sql module @@ -46,21 +58,10 @@ google-cloud-bigquery = {version = "^2.26.0", optional = true, python = "<3.11"} google-cloud-bigquery-storage = {version = "^2.13.0", optional = true, python = "<3.11"} pyarrow = {version = "^8.0.0", optional = true} -GitPython = {version = "^3.1.26", optional = true} dbt-core = {version = ">=1.1.0,<1.2.0", optional = true} dbt-redshift = {version = ">=1.0.0,<1.2.0", optional = true} dbt-bigquery = {version = ">=1.0.0,<1.2.0", optional = true, python = "<3.11"} -tzdata = "^2022.1" -tomlkit = "^0.11.3" -asyncstdlib = "^3.10.5" -pathvalidate = "^2.5.2" -SQLAlchemy = "^1.3.5" -typing-extensions = "^4.0.0" -makefun = "^1.15.0" -click = "^8.1.3" -requirements-parser = "^0.5.0" -setuptools = "^65.6.0" -humanize = "^4.4.0" +astunparse = "^1.6.3" [tool.poetry.dev-dependencies] From d4b006b74b66afebb07bd9cafb8bf7ce2c362b0b Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Thu, 24 Nov 2022 11:37:17 +0100 Subject: [PATCH 3/7] adds spec reflection and literal default parsing in function signatures + tests --- dlt/common/configuration/inject.py | 61 +------ dlt/common/reflection/__init__.py | 0 dlt/common/reflection/function_visitor.py | 13 ++ dlt/common/reflection/spec.py | 102 ++++++++++++ dlt/common/reflection/utils.py | 62 +++++++ tests/common/configuration/test_inject.py | 145 +++++----------- tests/common/reflection/__init__.py | 0 tests/common/reflection/test_reflect_spec.py | 166 +++++++++++++++++++ 8 files changed, 394 insertions(+), 155 deletions(-) create mode 100644 dlt/common/reflection/__init__.py create mode 100644 dlt/common/reflection/function_visitor.py create mode 100644 dlt/common/reflection/spec.py create mode 100644 dlt/common/reflection/utils.py create mode 100644 tests/common/reflection/__init__.py create mode 100644 tests/common/reflection/test_reflect_spec.py diff --git a/dlt/common/configuration/inject.py b/dlt/common/configuration/inject.py index be90392158..fa0b9619fc 100644 --- a/dlt/common/configuration/inject.py +++ b/dlt/common/configuration/inject.py @@ -1,18 +1,15 @@ -import re import inspect from makefun import wraps -from types import ModuleType from typing import Callable, Dict, Type, Any, Optional, Tuple, TypeVar, overload from inspect import Signature, Parameter -from dlt.common.typing import AnyType, DictStrAny, StrAny, TFun, AnyFun +from dlt.common.typing import DictStrAny, StrAny, TFun, AnyFun from dlt.common.configuration.resolve import resolve_configuration, inject_namespace -from dlt.common.configuration.specs.base_configuration import BaseConfiguration, is_valid_hint, configspec +from dlt.common.configuration.specs.base_configuration import BaseConfiguration from dlt.common.configuration.specs.config_namespace_context import ConfigNamespacesContext -from dlt.common.utils import get_callable_name +from dlt.common.reflection.spec import spec_from_signature + -# [^.^_]+ splits by . or _ -_SLEEPING_CAT_SPLIT = re.compile("[^.^_]+") _LAST_DLT_CONFIG = "_dlt_config" _ORIGINAL_ARGS = "_dlt_orig_args" TConfiguration = TypeVar("TConfiguration", bound=BaseConfiguration) @@ -20,6 +17,7 @@ _FUNC_SPECS: Dict[int, Type[BaseConfiguration]] = {} + def get_fun_spec(f: AnyFun) -> Type[BaseConfiguration]: return _FUNC_SPECS.get(id(f)) @@ -50,7 +48,7 @@ def decorator(f: TFun) -> TFun: namespace_context = ConfigNamespacesContext() if spec is None: - SPEC = _spec_from_signature(_get_spec_name_from_f(f), inspect.getmodule(f), sig, only_kw) + SPEC = spec_from_signature(f, sig, only_kw) else: SPEC = spec @@ -129,50 +127,3 @@ def last_config(**kwargs: Any) -> BaseConfiguration: def get_orig_args(**kwargs: Any) -> Tuple[Tuple[Any], DictStrAny]: return kwargs[_ORIGINAL_ARGS] # type: ignore - - -def _get_spec_name_from_f(f: AnyFun) -> str: - func_name = get_callable_name(f, "__qualname__").replace(".", "") # func qual name contains position in the module, separated by dots - - def _first_up(s: str) -> str: - return s[0].upper() + s[1:] - - return "".join(map(_first_up, _SLEEPING_CAT_SPLIT.findall(func_name))) + "Configuration" - - -def _spec_from_signature(name: str, module: ModuleType, sig: Signature, kw_only: bool = False) -> Type[BaseConfiguration]: - # synthesize configuration from the signature - fields: Dict[str, Any] = {} - annotations: Dict[str, Any] = {} - - for p in sig.parameters.values(): - # skip *args and **kwargs, skip typical method params and if kw_only flag is set: accept KEYWORD ONLY args - if p.kind not in (Parameter.VAR_KEYWORD, Parameter.VAR_POSITIONAL) and p.name not in ["self", "cls"] and \ - (kw_only and p.kind == Parameter.KEYWORD_ONLY or not kw_only): - field_type = AnyType if p.annotation == Parameter.empty else p.annotation - if is_valid_hint(field_type): - field_default = None if p.default == Parameter.empty else p.default - # try to get type from default - if field_type is AnyType and field_default: - field_type = type(field_default) - # make type optional if explicit None is provided as default - if p.default is None: - field_type = Optional[field_type] - # set annotations - annotations[p.name] = field_type - # set field with default value - fields[p.name] = field_default - - # new type goes to the module where sig was declared - fields["__module__"] = module.__name__ - # set annotations so they are present in __dict__ - fields["__annotations__"] = annotations - # synthesize type - T: Type[BaseConfiguration] = type(name, (BaseConfiguration,), fields) - # add to the module - setattr(module, name, T) - SPEC = configspec(init=False)(T) - # print(f"SYNTHESIZED {SPEC} in {inspect.getmodule(SPEC)} for sig {sig}") - # import dataclasses - # print("\n".join(map(str, dataclasses.fields(SPEC)))) - return SPEC diff --git a/dlt/common/reflection/__init__.py b/dlt/common/reflection/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/dlt/common/reflection/function_visitor.py b/dlt/common/reflection/function_visitor.py new file mode 100644 index 0000000000..3b89403745 --- /dev/null +++ b/dlt/common/reflection/function_visitor.py @@ -0,0 +1,13 @@ +import ast +from ast import NodeVisitor +from typing import Any + +class FunctionVisitor(NodeVisitor): + def __init__(self, source: str): + self.source = source + self.top_func: ast.FunctionDef = None + + def visit_FunctionDef(self, node: ast.FunctionDef) -> Any: + if not self.top_func: + self.top_func = node + super().generic_visit(node) diff --git a/dlt/common/reflection/spec.py b/dlt/common/reflection/spec.py new file mode 100644 index 0000000000..c1eed9ad13 --- /dev/null +++ b/dlt/common/reflection/spec.py @@ -0,0 +1,102 @@ +import re +import inspect +from typing import Dict, List, Type, Any, Optional +from inspect import Signature, Parameter + +from dlt.common.typing import AnyType, AnyFun, TSecretValue +from dlt.common.configuration.specs.base_configuration import BaseConfiguration, is_valid_hint, is_secret_hint, configspec +from dlt.common.configuration.accessors import DLT_CONFIG_VALUE, DLT_SECRETS_VALUE +from dlt.common.reflection.utils import get_func_def_node, get_literal_defaults +from dlt.common.utils import get_callable_name + +# [^.^_]+ splits by . or _ +_SLEEPING_CAT_SPLIT = re.compile("[^.^_]+") + + +def _get_spec_name_from_f(f: AnyFun) -> str: + func_name = get_callable_name(f, "__qualname__").replace(".", "") # func qual name contains position in the module, separated by dots + + def _first_up(s: str) -> str: + return s[0].upper() + s[1:] + + return "".join(map(_first_up, _SLEEPING_CAT_SPLIT.findall(func_name))) + "Configuration" + + +def spec_from_signature(f: AnyFun, sig: Signature, kw_only: bool = False) -> Type[BaseConfiguration]: + name = _get_spec_name_from_f(f) + module = inspect.getmodule(f) + + # check if spec for that function exists + spec_id = name # f"SPEC_{name}_kw_only_{kw_only}" + if hasattr(module, spec_id): + return getattr(module, spec_id) # type: ignore + + # find all the arguments that have following defaults + literal_defaults: Dict[str, str] = None + + def dlt_config_literal_to_type(arg_name: str) -> AnyType: + nonlocal literal_defaults + + if literal_defaults is None: + try: + node = get_func_def_node(f) + literal_defaults = get_literal_defaults(node) + except Exception: + # ignore exception during parsing. it is almost impossible to test all cases of function definitions + literal_defaults = {} + + if arg_name in literal_defaults: + literal_default = literal_defaults[arg_name] + if literal_default.endswith(DLT_CONFIG_VALUE): + return AnyType + if literal_default.endswith(DLT_SECRETS_VALUE): + return TSecretValue + return None + + # synthesize configuration from the signature + fields: Dict[str, Any] = {} + annotations: Dict[str, Any] = {} + + for p in sig.parameters.values(): + # skip *args and **kwargs, skip typical method params and if kw_only flag is set: accept KEYWORD ONLY args + if p.kind not in (Parameter.VAR_KEYWORD, Parameter.VAR_POSITIONAL) and p.name not in ["self", "cls"] and \ + (kw_only and p.kind == Parameter.KEYWORD_ONLY or not kw_only): + field_type = AnyType if p.annotation == Parameter.empty else p.annotation + # only valid hints and parameters with defaults are eligible + if is_valid_hint(field_type) and p.default != Parameter.empty: + # try to get type from default + if field_type is AnyType and p.default is not None: + field_type = type(p.default) + # make type optional if explicit None is provided as default + if p.default is None: + # check if the defaults were attributes of the form .config.value or .secrets.value + type_from_literal = dlt_config_literal_to_type(p.name) + if type_from_literal is None: + # optional type + field_type = Optional[field_type] + elif type_from_literal is TSecretValue: + # override type with secret value if secrets.value + # print(f"Param {p.name} is REQUIRED: secrets literal") + if not is_secret_hint(field_type): + # TODO: generate typed SecretValue + field_type = TSecretValue + else: + # keep type mandatory if config.value + # print(f"Param {p.name} is REQUIRED: config literal") + pass + + # set annotations + annotations[p.name] = field_type + # set field with default value + fields[p.name] = p.default + + # new type goes to the module where sig was declared + fields["__module__"] = module.__name__ + # set annotations so they are present in __dict__ + fields["__annotations__"] = annotations + # synthesize type + T: Type[BaseConfiguration] = type(name, (BaseConfiguration,), fields) + SPEC = configspec(init=False)(T) + # add to the module + setattr(module, spec_id, SPEC) + return SPEC \ No newline at end of file diff --git a/dlt/common/reflection/utils.py b/dlt/common/reflection/utils.py new file mode 100644 index 0000000000..bdbf4518a3 --- /dev/null +++ b/dlt/common/reflection/utils.py @@ -0,0 +1,62 @@ +import re +import ast +import inspect +import astunparse +from typing import Dict, List, Optional + +from dlt.common.typing import AnyFun + + +def get_literal_defaults(node: ast.FunctionDef) -> Dict[str, str]: + defaults: List[ast.expr] = [] + if node.args.defaults: + defaults.extend(node.args.defaults) + if node.args.kw_defaults: + defaults.extend(node.args.kw_defaults) + args: List[ast.arg] = [] + if node.args.posonlyargs: + args.extend(node.args.posonlyargs) + if node.args.args: + args.extend(node.args.args) + if node.args.kwonlyargs: + args.extend(node.args.kwonlyargs) + + # zip args and defaults + literal_defaults: Dict[str, str] = {} + for arg, default in zip(reversed(args), reversed(defaults)): + if default: + literal_defaults[str(arg.arg)] = astunparse.unparse(default).strip() + + return literal_defaults + + +def get_func_def_node(f: AnyFun) -> ast.FunctionDef: + # this will be slow + source, lineno = inspect.findsource(inspect.unwrap(f)) + + for node in ast.walk(ast.parse("".join(source))): + if isinstance(node, ast.FunctionDef): + f_lineno = node.lineno - 1 + # get line number of first decorator + if node.decorator_list: + f_lineno = node.decorator_list[0].lineno - 1 + # line number and function name must match + if f_lineno == lineno and node.name == f.__name__: + return node + return None + + +def get_outer_func_def(node: ast.AST) -> Optional[ast.FunctionDef]: + if not hasattr(node, "parent"): + raise ValueError("No parent information in node, not enabled in visitor", node) + while not isinstance(node.parent, ast.FunctionDef): # type: ignore + if node.parent is None: # type: ignore + return None + node = node.parent # type: ignore + return node # type: ignore + + +def set_ast_parents(tree: ast.AST) -> None: + for node in ast.walk(tree): + for child in ast.iter_child_nodes(node): + child.parent = node if node is not tree else None # type: ignore diff --git a/tests/common/configuration/test_inject.py b/tests/common/configuration/test_inject.py index ad47df4c7e..f4f201a29f 100644 --- a/tests/common/configuration/test_inject.py +++ b/tests/common/configuration/test_inject.py @@ -1,11 +1,16 @@ import inspect from typing import Any, Optional -import dlt +import pytest + +import dlt from dlt.common import Decimal -from dlt.common.typing import TSecretValue -from dlt.common.configuration.inject import _spec_from_signature, _get_spec_name_from_f, get_fun_spec, with_config +from dlt.common.configuration.exceptions import ConfigFieldMissingException +from dlt.common.typing import TSecretValue, is_optional_type +from dlt.common.configuration.inject import get_fun_spec, with_config from dlt.common.configuration.specs import BaseConfiguration, RunConfiguration +from dlt.common.reflection.spec import spec_from_signature, _get_spec_name_from_f +from dlt.common.reflection.utils import get_func_def_node, get_literal_defaults from tests.utils import preserve_environ from tests.common.configuration.utils import environment @@ -15,101 +20,10 @@ _CONFIG_DEFAULT = RunConfiguration() -def test_synthesize_spec_from_sig() -> None: - - # spec from typed signature without defaults - - def f_typed(p1: str, p2: Decimal, p3: Any, p4: Optional[RunConfiguration], p5: TSecretValue) -> None: - pass - - SPEC = _spec_from_signature(f_typed.__name__, inspect.getmodule(f_typed), inspect.signature(f_typed)) - assert SPEC.p1 is None - assert SPEC.p2 is None - assert SPEC.p3 is None - assert SPEC.p4 is None - assert SPEC.p5 is None - fields = SPEC().get_resolvable_fields() - assert fields == {"p1": str, "p2": Decimal, "p3": Any, "p4": Optional[RunConfiguration], "p5": TSecretValue} - - # spec from typed signatures with defaults - - def f_typed_default(t_p1: str = "str", t_p2: Decimal = _DECIMAL_DEFAULT, t_p3: Any = _SECRET_DEFAULT, t_p4: RunConfiguration = _CONFIG_DEFAULT, t_p5: str = None) -> None: - pass - - SPEC = _spec_from_signature(f_typed_default.__name__, inspect.getmodule(f_typed_default), inspect.signature(f_typed_default)) - assert SPEC.t_p1 == "str" - assert SPEC.t_p2 == _DECIMAL_DEFAULT - assert SPEC.t_p3 == _SECRET_DEFAULT - assert isinstance(SPEC.t_p4, RunConfiguration) - assert SPEC.t_p5 is None - fields = SPEC().get_resolvable_fields() - # Any will not assume TSecretValue type because at runtime it's a str - # setting default as None will convert type into optional (t_p5) - assert fields == {"t_p1": str, "t_p2": Decimal, "t_p3": str, "t_p4": RunConfiguration, "t_p5": Optional[str]} - - # spec from untyped signature - - def f_untyped(untyped_p1, untyped_p2) -> None: - pass - - SPEC = _spec_from_signature(f_untyped.__name__, inspect.getmodule(f_untyped), inspect.signature(f_untyped)) - assert SPEC.untyped_p1 is None - assert SPEC.untyped_p2 is None - fields = SPEC().get_resolvable_fields() - assert fields == {"untyped_p1": Any, "untyped_p2": Any,} - - # spec types derived from defaults - - - def f_untyped_default(untyped_p1 = "str", untyped_p2 = _DECIMAL_DEFAULT, untyped_p3 = _CONFIG_DEFAULT, untyped_p4 = None) -> None: - pass - - - SPEC = _spec_from_signature(f_untyped_default.__name__, inspect.getmodule(f_untyped_default), inspect.signature(f_untyped_default)) - assert SPEC.untyped_p1 == "str" - assert SPEC.untyped_p2 == _DECIMAL_DEFAULT - assert isinstance(SPEC.untyped_p3, RunConfiguration) - assert SPEC.untyped_p4 is None - fields = SPEC().get_resolvable_fields() - # untyped_p4 converted to Optional[Any] - assert fields == {"untyped_p1": str, "untyped_p2": Decimal, "untyped_p3": RunConfiguration, "untyped_p4": Optional[Any]} - - # spec from signatures containing positional only and keywords only args - - def f_pos_kw_only(pos_only_1, pos_only_2: str = "default", /, *, kw_only_1, kw_only_2: int = 2) -> None: - pass - - SPEC = _spec_from_signature(f_pos_kw_only.__name__, inspect.getmodule(f_pos_kw_only), inspect.signature(f_pos_kw_only)) - assert SPEC.pos_only_1 is None - assert SPEC.pos_only_2 == "default" - assert SPEC.kw_only_1 is None - assert SPEC.kw_only_2 == 2 - fields = SPEC().get_resolvable_fields() - assert fields == {"pos_only_1": Any, "pos_only_2": str, "kw_only_1": Any, "kw_only_2": int} - - # kw_only = True will filter in keywords only parameters - SPEC = _spec_from_signature(f_pos_kw_only.__name__, inspect.getmodule(f_pos_kw_only), inspect.signature(f_pos_kw_only), kw_only=True) - assert SPEC.kw_only_1 is None - assert SPEC.kw_only_2 == 2 - assert not hasattr(SPEC, "pos_only_1") - fields = SPEC().get_resolvable_fields() - assert fields == {"kw_only_1": Any, "kw_only_2": int} - - def f_variadic(var_1: str, *args, kw_var_1: str, **kwargs) -> None: - pass - - SPEC = _spec_from_signature(f_variadic.__name__, inspect.getmodule(f_variadic), inspect.signature(f_variadic)) - assert SPEC.var_1 is None - assert SPEC.kw_var_1 is None - assert not hasattr(SPEC, "args") - fields = SPEC().get_resolvable_fields() - assert fields == {"var_1": str, "kw_var_1": str} - - def test_arguments_are_explicit(environment: Any) -> None: @with_config - def f_var(user, path): + def f_var(user=dlt.config.value, path=dlt.config.value): # explicit args "survive" the injection: they have precedence over env assert user == "explicit user" assert path == "explicit path" @@ -119,7 +33,7 @@ def f_var(user, path): f_var("explicit user", "explicit path") @with_config - def f_var_env(user, path): + def f_var_env(user=dlt.config.value, path=dlt.config.value): assert user == "env user" assert path == "explicit path" @@ -128,6 +42,36 @@ def f_var_env(user, path): f_var_env(path="explicit path", user=None) +def test_arguments_dlt_literal_defaults_are_required(environment: Any) -> None: + + @with_config + def f_config(user=dlt.config.value): + assert user is not None + return user + + @with_config + def f_secret(password=dlt.secrets.value): + # explicit args "survive" the injection: they have precedence over env + assert password is not None + return password + + # call without user present + with pytest.raises(ConfigFieldMissingException) as py_ex: + f_config() + assert py_ex.value.fields == ["user"] + with pytest.raises(ConfigFieldMissingException) as py_ex: + f_config(None) + assert py_ex.value.fields == ["user"] + + environment["USER"] = "user" + assert f_config() == "user" + assert f_config(None) == "user" + + environment["PASSWORD"] = "password" + assert f_secret() == "password" + assert f_secret(None) == "password" + + def test_inject_with_non_injectable_param() -> None: # one of parameters in signature has not valid hint and is skipped (ie. from_pipe) pass @@ -145,10 +89,10 @@ def test_inject_with_auto_namespace(environment: Any) -> None: environment["PIPE__VALUE"] = "test" @with_config(auto_namespace=True) - def f(pipeline_name, value): + def f(pipeline_name=dlt.config.value, value=dlt.secrets.value): assert value == "test" - f("pipe", dlt.config.value) + f("pipe") # make sure the spec is available for decorated fun assert get_fun_spec(f) is not None @@ -209,7 +153,7 @@ def test_auto_derived_spec_type_name() -> None: class AutoNameTest: @with_config - def __init__(self, pos_par, /, kw_par) -> None: + def __init__(self, pos_par=dlt.secrets.value, /, kw_par=None) -> None: pass @classmethod @@ -232,4 +176,5 @@ def stuff_test(pos_par, /, kw_par) -> None: assert "TestAutoDerivedSpecTypeNameAutoNameTestInitConfiguration" in globals() # instantiate C: BaseConfiguration = globals()["TestAutoDerivedSpecTypeNameAutoNameTestInitConfiguration"]() - assert C.get_resolvable_fields() == {"pos_par": Any, "kw_par": Any} \ No newline at end of file + # pos_par converted to secrets, kw_par converted to optional + assert C.get_resolvable_fields() == {"pos_par": TSecretValue, "kw_par": Optional[Any]} \ No newline at end of file diff --git a/tests/common/reflection/__init__.py b/tests/common/reflection/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/common/reflection/test_reflect_spec.py b/tests/common/reflection/test_reflect_spec.py new file mode 100644 index 0000000000..fe755125b7 --- /dev/null +++ b/tests/common/reflection/test_reflect_spec.py @@ -0,0 +1,166 @@ +import inspect +from typing import Any, Optional + +import dlt +from dlt.common import Decimal +from dlt.common.typing import TSecretValue, is_optional_type +from dlt.common.configuration.inject import get_fun_spec, with_config +from dlt.common.configuration.specs import BaseConfiguration, RunConfiguration +from dlt.common.reflection.spec import spec_from_signature, _get_spec_name_from_f +from dlt.common.reflection.utils import get_func_def_node, get_literal_defaults + + +_DECIMAL_DEFAULT = Decimal("0.01") +_SECRET_DEFAULT = TSecretValue("PASS") +_CONFIG_DEFAULT = RunConfiguration() + + +def test_synthesize_spec_from_sig() -> None: + + # spec from typed signature without defaults + + def f_typed(p1: str = None, p2: Decimal = None, p3: Any = None, p4: Optional[RunConfiguration] = None, p5: TSecretValue = dlt.secrets.value) -> None: + pass + + SPEC = spec_from_signature(f_typed, inspect.signature(f_typed)) + assert SPEC.p1 is None + assert SPEC.p2 is None + assert SPEC.p3 is None + assert SPEC.p4 is None + assert SPEC.p5 is None + fields = SPEC().get_resolvable_fields() + assert fields == {"p1": Optional[str], "p2": Optional[Decimal], "p3": Optional[Any], "p4": Optional[RunConfiguration], "p5": TSecretValue} + + # spec from typed signatures with defaults + + def f_typed_default(t_p1: str = "str", t_p2: Decimal = _DECIMAL_DEFAULT, t_p3: Any = _SECRET_DEFAULT, t_p4: RunConfiguration = _CONFIG_DEFAULT, t_p5: str = None) -> None: + pass + + SPEC = spec_from_signature(f_typed_default, inspect.signature(f_typed_default)) + assert SPEC.t_p1 == "str" + assert SPEC.t_p2 == _DECIMAL_DEFAULT + assert SPEC.t_p3 == _SECRET_DEFAULT + assert isinstance(SPEC.t_p4, RunConfiguration) + assert SPEC.t_p5 is None + fields = SPEC().get_resolvable_fields() + # Any will not assume TSecretValue type because at runtime it's a str + # setting default as None will convert type into optional (t_p5) + assert fields == {"t_p1": str, "t_p2": Decimal, "t_p3": str, "t_p4": RunConfiguration, "t_p5": Optional[str]} + + # spec from untyped signature + + def f_untyped(untyped_p1 = None, untyped_p2 = dlt.config.value) -> None: + pass + + SPEC = spec_from_signature(f_untyped, inspect.signature(f_untyped)) + assert SPEC.untyped_p1 is None + assert SPEC.untyped_p2 is None + fields = SPEC().get_resolvable_fields() + assert fields == {"untyped_p1": Optional[Any], "untyped_p2": Any} + + # spec types derived from defaults + + + def f_untyped_default(untyped_p1 = "str", untyped_p2 = _DECIMAL_DEFAULT, untyped_p3 = _CONFIG_DEFAULT, untyped_p4 = None) -> None: + pass + + + SPEC = spec_from_signature(f_untyped_default, inspect.signature(f_untyped_default)) + assert SPEC.untyped_p1 == "str" + assert SPEC.untyped_p2 == _DECIMAL_DEFAULT + assert isinstance(SPEC.untyped_p3, RunConfiguration) + assert SPEC.untyped_p4 is None + fields = SPEC().get_resolvable_fields() + # untyped_p4 converted to Optional[Any] + assert fields == {"untyped_p1": str, "untyped_p2": Decimal, "untyped_p3": RunConfiguration, "untyped_p4": Optional[Any]} + + # spec from signatures containing positional only and keywords only args + + def f_pos_kw_only(pos_only_1=dlt.config.value, pos_only_2: str = "default", /, *, kw_only_1=None, kw_only_2: int = 2) -> None: + pass + + SPEC = spec_from_signature(f_pos_kw_only, inspect.signature(f_pos_kw_only)) + assert SPEC.pos_only_1 is None + assert SPEC.pos_only_2 == "default" + assert SPEC.kw_only_1 is None + assert SPEC.kw_only_2 == 2 + fields = SPEC().get_resolvable_fields() + assert fields == {"pos_only_1": Any, "pos_only_2": str, "kw_only_1": Optional[Any], "kw_only_2": int} + + # kw_only = True will filter in keywords only parameters + # deregister spec to disable cache + del globals()[SPEC.__name__] + SPEC = spec_from_signature(f_pos_kw_only, inspect.signature(f_pos_kw_only), kw_only=True) + assert SPEC.kw_only_1 is None + assert SPEC.kw_only_2 == 2 + assert not hasattr(SPEC, "pos_only_1") + fields = SPEC().get_resolvable_fields() + assert fields == {"kw_only_1": Optional[Any], "kw_only_2": int} + + def f_variadic(var_1: str = "A", *args, kw_var_1: str, **kwargs) -> None: + print(locals()) + + SPEC = spec_from_signature(f_variadic, inspect.signature(f_variadic)) + assert SPEC.var_1 == "A" + assert not hasattr(SPEC, "kw_var_1") # kw parameters that must be explicitly passed are removed + assert not hasattr(SPEC, "args") + fields = SPEC().get_resolvable_fields() + assert fields == {"var_1": str} + + +def f_top_kw_defaults_args(arg1, arg2 = "top", arg3 = dlt.config.value, *args, kw1, kw_lit = "12131", kw_secret_val = dlt.secrets.value, **kwargs): + pass + + +def test_argument_have_dlt_config_defaults() -> None: + + def f_defaults( + req_val, config_val = dlt.config.value, secret_val = dlt.secrets.value, /, + pos_cf = None, pos_cf_val = dlt.config.value, pos_secret_val = dlt.secrets.value, *, + kw_val = None, kw_cf_val = dlt.config.value, kw_secret_val = dlt.secrets.value): + pass + + @with_config + def f_kw_defaults(*, kw1 = dlt.config.value, kw_lit = "12131", kw_secret_val = dlt.secrets.value, **kwargs): + pass + + @with_config + @with_config + def f_kw_defaults_args(arg1, arg2 = 2, arg3 = dlt.config.value, *args, kw1, kw_lit = "12131", kw_secret_val = dlt.secrets.value, **kwargs): + pass + + + node = get_func_def_node(f_defaults) + assert node.name == "f_defaults" + literal_defaults = get_literal_defaults(node) + assert literal_defaults == {'kw_secret_val': 'dlt.secrets.value', 'kw_cf_val': 'dlt.config.value', 'kw_val': 'None', 'pos_secret_val': 'dlt.secrets.value', 'pos_cf_val': 'dlt.config.value', 'pos_cf': 'None', 'secret_val': 'dlt.secrets.value', 'config_val': 'dlt.config.value'} + SPEC = spec_from_signature(f_defaults, inspect.signature(f_defaults)) + fields = SPEC().get_resolvable_fields() + # fields market with dlt config are not optional, same for required fields + for arg in ["config_val", "secret_val", "pos_cf_val", "pos_secret_val", "kw_cf_val", "kw_secret_val"]: + assert not is_optional_type(fields[arg]) + for arg in ["pos_cf", "kw_val"]: + assert is_optional_type(fields[arg]) + # explicit pram does not go into spec + assert not hasattr(SPEC, "req_val") + + node = get_func_def_node(f_kw_defaults) + assert node.name == "f_kw_defaults" + literal_defaults = get_literal_defaults(node) + assert literal_defaults == {'kw_secret_val': 'dlt.secrets.value', 'kw_lit': "'12131'", "kw1": "dlt.config.value"} + SPEC = spec_from_signature(f_kw_defaults, inspect.signature(f_kw_defaults)) + fields = SPEC().get_resolvable_fields() + assert not is_optional_type(fields["kw_lit"]) + assert not is_optional_type(fields["kw1"]) + assert not is_optional_type(fields["kw_secret_val"]) + + node = get_func_def_node(f_kw_defaults_args) + assert node.name == "f_kw_defaults_args" + literal_defaults = get_literal_defaults(node) + # print(literal_defaults) + assert literal_defaults == {'kw_secret_val': 'dlt.secrets.value', 'kw_lit': "'12131'", 'arg3': 'dlt.config.value', 'arg2': '2'} + + node = get_func_def_node(f_top_kw_defaults_args) + assert node.name == "f_top_kw_defaults_args" + literal_defaults = get_literal_defaults(node) + assert literal_defaults == {'kw_secret_val': 'dlt.secrets.value', 'kw_lit': "'12131'", 'arg3': 'dlt.config.value', 'arg2': "'top'"} From a535a5e251de8a1bc4c2cc34f363d3decef01bdf Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Thu, 24 Nov 2022 11:39:47 +0100 Subject: [PATCH 4/7] uses dlt default literals for init command --- dlt/cli/_dlt.py | 1 + dlt/cli/config_toml_writer.py | 5 +- dlt/cli/init_command.py | 78 ++++--------------- tests/common/configuration/test_accessors.py | 4 +- .../configuration/test_toml_provider.py | 9 ++- 5 files changed, 27 insertions(+), 70 deletions(-) diff --git a/dlt/cli/_dlt.py b/dlt/cli/_dlt.py index 7c05de94b1..d62fa29962 100644 --- a/dlt/cli/_dlt.py +++ b/dlt/cli/_dlt.py @@ -31,6 +31,7 @@ def init_command_wrapper(pipeline_name: str, destination_name: str, branch: str) init_command(pipeline_name, destination_name, branch) except Exception as ex: click.secho(str(ex), err=True, fg="red") + # TODO: display stack trace if with debug flag def main() -> None: diff --git a/dlt/cli/config_toml_writer.py b/dlt/cli/config_toml_writer.py index 3bd9f757e0..c46eb58048 100644 --- a/dlt/cli/config_toml_writer.py +++ b/dlt/cli/config_toml_writer.py @@ -2,8 +2,8 @@ import tomlkit from tomlkit.items import Table as TOMLTable -from dlt.common.configuration.resolve import extract_inner_hint, is_base_configuration_hint -from dlt.common.configuration.specs.base_configuration import BaseConfiguration +from dlt.common.configuration.resolve import extract_inner_hint +from dlt.common.configuration.specs.base_configuration import BaseConfiguration, is_base_configuration_hint from dlt.common.typing import AnyType, is_final_type, is_optional_type @@ -41,7 +41,6 @@ def write_spec(toml_table: TOMLTable, config: BaseConfiguration) -> None: def write_values(toml: tomlkit.TOMLDocument, values: Iterable[WritableConfigValue]) -> None: - # print(values) for value in values: toml_table: TOMLTable = toml # type: ignore for namespace in value.namespaces: diff --git a/dlt/cli/init_command.py b/dlt/cli/init_command.py index a005efe538..3c104c2d2b 100644 --- a/dlt/cli/init_command.py +++ b/dlt/cli/init_command.py @@ -12,7 +12,7 @@ from dlt.common.git import clone_repo from dlt.common.configuration.providers.toml import ConfigTomlProvider, SecretsTomlProvider -from dlt.common.configuration.resolve import is_secret_hint +from dlt.common.configuration.specs.base_configuration import is_secret_hint from dlt.common.configuration.accessors import DLT_SECRETS_VALUE, DLT_CONFIG_VALUE from dlt.common.exceptions import DltException from dlt.common.logger import DLT_PKG_NAME @@ -38,9 +38,9 @@ def _clone_init_repo(branch: str) -> Tuple[FileStorage, List[str], str]: # return tuple is (file storage for cloned repo, list of template files to copy, the default pipeline template script) - template_dir = "/home/rudolfix/src/python-dlt-init-template" - # template_dir = tempfile.mkdtemp() - # clone_repo("https://github.com/scale-vector/python-dlt-init-template.git", template_dir, branch=branch) + # template_dir = "~/src/python-dlt-init-template" + template_dir = tempfile.mkdtemp() + clone_repo("https://github.com/scale-vector/python-dlt-init-template.git", template_dir, branch=branch) clone_storage = FileStorage(template_dir) @@ -96,6 +96,7 @@ def _find_argument_nodes_to_replace(visitor: PipelineScriptVisitor, replace_node raise CliCommandException("init", f"The pipeline script {init_script_name} is not explicitly passing the '{t_arg_name}' argument to 'pipeline' or 'run' function. In init script the default and configured values are not accepted.") return transformed_nodes + def _detect_required_configs(visitor: PipelineScriptVisitor, script_module: ModuleType, init_script_name: str) -> Tuple[Dict[str, WritableConfigValue], Dict[str, WritableConfigValue]]: # all detected secrets with namespaces required_secrets: Dict[str, WritableConfigValue] = {} @@ -104,7 +105,8 @@ def _detect_required_configs(visitor: PipelineScriptVisitor, script_module: Modu # skip sources without spec. those are not imported and most probably are inner functions. also skip the sources that are not called # also skip the sources that are called from functions, the parent of call object to the source must be None (no outer function) - known_imported_sources = {name: _SOURCES[name] for name in visitor.known_sources if name in _SOURCES and name in visitor.known_source_calls and any(call.parent is None for call in visitor.known_source_calls[name])} + known_imported_sources = {name: _SOURCES[name] for name in visitor.known_sources + if name in _SOURCES and name in visitor.known_source_calls and any(call.parent is None for call in visitor.known_source_calls[name])} # type: ignore for source_name, source_info in known_imported_sources.items(): source_config = source_info.SPEC() @@ -119,58 +121,9 @@ def _detect_required_configs(visitor: PipelineScriptVisitor, script_module: Modu val_store = required_config if val_store is not None: - # use full namespaces if we have many sources - namespaces = () if len(known_imported_sources) == 1 else ("sources", source_name) - val_store[source_name + ":" + field_name] = WritableConfigValue(field_name, field_type, namespaces) - - return required_secrets, required_config - - - known_imported_calls = {name: calls for name, calls in visitor.known_source_calls.items() if name in _SOURCES} - - for pipeline_name, call_nodes in known_imported_calls.items(): - source_config = _SOURCES.get(pipeline_name).SPEC() - spec_fields = source_config.get_resolvable_fields() - source_sig = inspect.signature(getattr(script_module, pipeline_name)) - # bind all calls - for call_node in call_nodes: - try: - bound_args = source_sig.bind(*call_node.args, **{str(kwd.arg):kwd.value for kwd in call_node.keywords}) - bound_args.apply_defaults() - except TypeError as ty_ex: - call_info = visitor.source_segment(call_node) - raise CliCommandException("init", f"In {init_script_name} the source/resource {pipeline_name} call {call_info} looks wrong: {ty_ex}") - # find all the arguments that are not sufficiently bound - print(bound_args) - for arg_name, arg_node in bound_args.arguments.items(): - # check if argument is in spec and is not optional. optional arguments won't be added to config/secrets - arg_type = spec_fields.get(arg_name) - if arg_type and not is_optional_type(arg_type): - value_provided = True - from_placeholder = False - from_secrets = is_secret_hint(arg_type) - if isinstance(arg_node, ast.Constant): - value_provided = ast.literal_eval(arg_node) is not None - if isinstance(arg_node, ast.Attribute) and arg_node.attr == "value": - attr_source = visitor.source_segment(arg_node) - if attr_source.endswith(DLT_CONFIG_VALUE): - value_provided = False - from_placeholder = True - if from_secrets: - raise CliCommandException("init", f"The pipeline script {init_script_name} calls source/resource {pipeline_name} where argument {arg_name} is a secret but it requests it via {attr_source}") - if attr_source.endswith(DLT_SECRETS_VALUE): - value_provided = False - from_placeholder = True - from_secrets = True - # was value provided in the call args? - if not value_provided: - # do we have sufficient information if arg_name is config or secret? - if arg_type is AnyType and not from_placeholder: - raise CliCommandException("init", f"The pipeline script {init_script_name} in source/resource '{pipeline_name}' does not provide enough information if argument '{arg_name}' is a secret or a config value. Use 'dlt.config.value' or 'dlt.secret.value' or (strongly suggested) type the source/resource function signature.") - val_store = required_secrets if from_secrets else required_config - # use full namespaces if we have many sources - namespaces = () if len(known_imported_calls) == 1 else ("sources", pipeline_name) - val_store[pipeline_name + ":" + arg_name] = WritableConfigValue(arg_name, arg_type, namespaces) + # we are sure that all resources come from single file so we can put them in single namespace + # namespaces = () if len(known_imported_sources) == 1 else ("sources", source_name) + val_store[source_name + ":" + field_name] = WritableConfigValue(field_name, field_type, ()) return required_secrets, required_config @@ -264,8 +217,10 @@ def init_command(pipeline_name: str, destination_name: str, branch: str) -> None if len(_SOURCES) == 0: raise CliCommandException("init", f"The pipeline script {init_script_name} is not creating or importing any sources or resources") + for source_q_name, source_config in _SOURCES.items(): if source_q_name not in visitor.known_sources: + print(visitor.known_sources) raise CliCommandException("init", f"The pipeline script {init_script_name} imports a source/resource {source_config.f.__name__} from module {source_config.module.__name__}. In init scripts you must declare all sources and resources in single file.") # detect all the required secrets and configs that should go into tomls files @@ -301,13 +256,10 @@ def init_command(pipeline_name: str, destination_name: str, branch: str) -> None if dest_storage.has_file(REQUIREMENTS_TXT): click.echo("Your python dependencies are kept in %s. Please add the dependency for %s as follows:" % (fmt.bold(REQUIREMENTS_TXT), fmt.bold(DLT_PKG_NAME))) click.echo(req_dep_line) + click.echo("To install dlt with the %s extra using pip:" % fmt.bold(destination_name)) else: - if click.confirm("%s not found. Should I create one?" % REQUIREMENTS_TXT): - requirements_txt = req_dep_line - click.echo("* %s created. Install it with:\npip3 install -r %s" % (fmt.bold(REQUIREMENTS_TXT), REQUIREMENTS_TXT)) - else: - click.echo("Do not forget to install dlt with the %s extra using:") - click.echo(f"pip3 install {DLT_PKG_NAME}[{destination_name}]") + requirements_txt = req_dep_line + click.echo("* %s created. Install it with:\npip3 install -r %s" % (fmt.bold(REQUIREMENTS_TXT), REQUIREMENTS_TXT)) # copy files at the very end for file_name in TEMPLATE_FILES + toml_files: diff --git a/tests/common/configuration/test_accessors.py b/tests/common/configuration/test_accessors.py index 566d283678..e398973405 100644 --- a/tests/common/configuration/test_accessors.py +++ b/tests/common/configuration/test_accessors.py @@ -110,11 +110,11 @@ def test_secrets_separation(toml_providers: ConfigProvidersContext) -> None: def test_access_access_injection(toml_providers: ConfigProvidersContext) -> None: @dlt.source - def the_source(api_type, credentials: GcpClientCredentials, databricks_creds: ConnectionStringCredentials): + def the_source(api_type=dlt.config.value, credentials: GcpClientCredentials=dlt.secrets.value, databricks_creds: ConnectionStringCredentials=dlt.secrets.value): assert api_type == "REST" assert credentials.client_email == "loader@a7513.iam.gserviceaccount.com" assert databricks_creds.drivername == "databricks+connector" return dlt.resource([1,2,3], name="data") # inject first argument, the rest pass explicitly - the_source(dlt.config.value, dlt.secrets["destination.credentials"], dlt.secrets["databricks.credentials"]) + the_source(credentials=dlt.secrets["destination.credentials"], databricks_creds=dlt.secrets["databricks.credentials"]) diff --git a/tests/common/configuration/test_toml_provider.py b/tests/common/configuration/test_toml_provider.py index e85b01e527..edce080554 100644 --- a/tests/common/configuration/test_toml_provider.py +++ b/tests/common/configuration/test_toml_provider.py @@ -77,10 +77,15 @@ def test_toml_mixed_config_inject(toml_providers: ConfigProvidersContext) -> Non # get data from both providers @with_config - def mixed_val(api_type, secret_value: TSecretValue, typecheck: Any): + def mixed_val(api_type=dlt.config.value, secret_value: TSecretValue = dlt.secrets.value, typecheck: Any = dlt.config.value): return api_type, secret_value, typecheck - _tup = mixed_val(dlt.config.value, dlt.secrets.value, dlt.config.value) + _tup = mixed_val(None, None, None) + assert _tup[0] == "REST" + assert _tup[1] == "2137" + assert isinstance(_tup[2], dict) + + _tup = mixed_val() assert _tup[0] == "REST" assert _tup[1] == "2137" assert isinstance(_tup[2], dict) From 49fc9a0ee8d990c11123bcb62643bc566a1c3e85 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Thu, 24 Nov 2022 11:40:31 +0100 Subject: [PATCH 5/7] more decorator variants in script visitor, detection of outer functions via parent, allows resources to be called in script inspector --- dlt/reflection/script_inspector.py | 6 +++--- dlt/reflection/script_visitor.py | 26 +++++++++++++++----------- 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/dlt/reflection/script_inspector.py b/dlt/reflection/script_inspector.py index e925f8cfff..0327623d3b 100644 --- a/dlt/reflection/script_inspector.py +++ b/dlt/reflection/script_inspector.py @@ -9,7 +9,7 @@ from dlt.common.typing import DictStrAny from dlt.pipeline import Pipeline -from dlt.extract.source import DltSource, DltResource +from dlt.extract.source import DltSource, PipeIterator def patch__init__(self: Any, *args: Any, **kwargs: Any) -> None: @@ -31,7 +31,7 @@ def inspect_pipeline_script(script_path: str) -> ModuleType: try: # patch entry points to pipeline, sources and resources to prevent pipeline from running - with patch.object(Pipeline, '__init__', patch__init__), patch.object(DltSource, '__init__', patch__init__), patch.object(DltResource, '__init__', patch__init__): + with patch.object(Pipeline, '__init__', patch__init__), patch.object(DltSource, '__init__', patch__init__), patch.object(PipeIterator, '__init__', patch__init__): return import_module(module) finally: # remove script module path @@ -41,4 +41,4 @@ def inspect_pipeline_script(script_path: str) -> ModuleType: class PipelineIsRunning(DltException): def __init__(self, obj: object, args: Tuple[str, ...], kwargs: DictStrAny) -> None: - super().__init__(f"The pipeline script instantiates the pipeline on import: {obj.__class__.__name__}", obj, args, kwargs) + super().__init__(f"The pipeline script instantiates the pipeline on import. Did you forget to use if __name__ == 'main':? in {obj.__class__.__name__}", obj, args, kwargs) diff --git a/dlt/reflection/script_visitor.py b/dlt/reflection/script_visitor.py index 5bf9236004..1f15f8f4e5 100644 --- a/dlt/reflection/script_visitor.py +++ b/dlt/reflection/script_visitor.py @@ -1,7 +1,9 @@ import inspect import ast +import astunparse from ast import NodeVisitor from typing import Any, Dict, List +from dlt.common.reflection.utils import get_outer_func_def import dlt.reflection.names as n @@ -9,9 +11,8 @@ class PipelineScriptVisitor(NodeVisitor): - def __init__(self, source: str, add_parents: bool = False): + def __init__(self, source: str): self.source = source - self.add_parents = add_parents self.mod_aliases: Dict[str, str] = {} self.func_aliases: Dict[str, str] = {} @@ -21,13 +22,6 @@ def __init__(self, source: str, add_parents: bool = False): self.known_sources: Dict[str, ast.FunctionDef] = {} self.known_source_calls: Dict[str, List[ast.Call]] = {} - def visit(self, tree: ast.AST) -> Any: - if self.add_parents: - for node in ast.walk(tree): - for child in ast.iter_child_nodes(node): - child.parent = node if node is not tree else None # type: ignore - super().visit(tree) - def visit_Import(self, node: ast.Import) -> Any: # reflect on imported modules for alias in node.names: @@ -57,7 +51,13 @@ def visit_ImportFrom(self, node: ast.ImportFrom) -> Any: def visit_FunctionDef(self, node: ast.FunctionDef) -> Any: # find all sources and resources by inspecting decorators for deco in node.decorator_list: - alias_name = self.source_segment(deco) + # decorators can be function calls, attributes or names + if isinstance(deco, (ast.Name, ast.Attribute)): + alias_name = astunparse.unparse(deco).strip() + elif isinstance(deco, ast.Call): + alias_name = astunparse.unparse(deco.func).strip() + else: + raise ValueError(self.source_segment(deco), type(deco), "Unknown decorator form") fn = self.func_aliases.get(alias_name) if fn in [n.SOURCE, n.RESOURCE]: self.known_sources[str(node.name)] = node @@ -65,13 +65,15 @@ def visit_FunctionDef(self, node: ast.FunctionDef) -> Any: def visit_Call(self, node: ast.Call) -> Any: # check if this is a call to any of known functions - alias_name = self.source_segment(node.func) + alias_name = astunparse.unparse(node.func).strip() fn = self.func_aliases.get(alias_name) if not fn: # try a fallback to "run" function that may be called on pipeline or source if isinstance(node.func, ast.Attribute) and node.func.attr == n.RUN: fn = n.RUN if fn: + # set parent to the outer function + node.parent = get_outer_func_def(node) # type: ignore sig = n.SIGNATURES[fn] try: # bind the signature where the argument values are the corresponding ast nodes @@ -86,6 +88,8 @@ def visit_Call(self, node: ast.Call) -> Any: else: # check if this is a call to any known source if alias_name in self.known_sources: + # set parent to the outer function + node.parent = get_outer_func_def(node) # type: ignore source_calls = self.known_source_calls.setdefault(alias_name, []) source_calls.append(node) From 3dd61f96b38ffdaafea706edf5f30f65d9c18fc3 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Thu, 24 Nov 2022 11:41:14 +0100 Subject: [PATCH 6/7] moves hint utils from resolve to base_configuration, sets TSecretValue to derive from Any and uses literal defaults instead of ConfigValue --- dlt/common/configuration/accessors.py | 4 ++-- dlt/common/configuration/resolve.py | 15 +-------------- .../configuration/specs/base_configuration.py | 18 +++++++++++++++--- dlt/common/destination.py | 4 ++-- dlt/common/storages/live_schema_storage.py | 14 +++----------- dlt/common/storages/load_storage.py | 13 +++---------- dlt/common/storages/normalize_storage.py | 12 ++---------- dlt/common/storages/schema_storage.py | 13 +++---------- dlt/common/typing.py | 3 ++- dlt/destinations/bigquery/__init__.py | 6 +++--- dlt/destinations/dummy/__init__.py | 6 +++--- dlt/destinations/postgres/__init__.py | 6 +++--- dlt/destinations/redshift/__init__.py | 6 +++--- dlt/extract/typing.py | 2 +- dlt/load/load.py | 7 ++++--- dlt/normalize/normalize.py | 8 ++++---- examples/sources/google_sheets.py | 4 ++-- 17 files changed, 56 insertions(+), 85 deletions(-) diff --git a/dlt/common/configuration/accessors.py b/dlt/common/configuration/accessors.py index c2bd5f83d8..6bb61a700b 100644 --- a/dlt/common/configuration/accessors.py +++ b/dlt/common/configuration/accessors.py @@ -7,7 +7,7 @@ from dlt.common.configuration.providers.provider import ConfigProvider from dlt.common.configuration.resolve import deserialize_value -from dlt.common.configuration.specs import BaseConfiguration +from dlt.common.configuration.specs.base_configuration import is_base_configuration_hint from dlt.common.configuration.specs.config_providers_context import ConfigProvidersContext from dlt.common.schema.utils import coerce_value from dlt.common.typing import AnyType, ConfigValue @@ -34,7 +34,7 @@ def get(self, field: str, expected_type: Type[TConfigAny] = None) -> TConfigAny: return None # cast to required type if expected_type: - if inspect.isclass(expected_type) and issubclass(expected_type, BaseConfiguration): + if is_base_configuration_hint(expected_type): c = expected_type() if isinstance(value, dict): c.update(value) diff --git a/dlt/common/configuration/resolve.py b/dlt/common/configuration/resolve.py index b890395e23..de8528374f 100644 --- a/dlt/common/configuration/resolve.py +++ b/dlt/common/configuration/resolve.py @@ -1,5 +1,4 @@ import ast -import inspect from collections.abc import Mapping as C_Mapping from typing import Any, Dict, ContextManager, List, Optional, Sequence, Tuple, Type, TypeVar, get_origin @@ -8,7 +7,7 @@ from dlt.common.typing import AnyType, StrAny, TSecretValue, is_final_type, is_optional_type, extract_inner_type from dlt.common.schema.utils import coerce_value, py_type_to_sc_type -from dlt.common.configuration.specs.base_configuration import BaseConfiguration, CredentialsConfiguration, ContainerInjectableContext, get_config_if_union +from dlt.common.configuration.specs.base_configuration import BaseConfiguration, CredentialsConfiguration, is_secret_hint, get_config_if_union, is_base_configuration_hint, is_context_hint from dlt.common.configuration.specs.config_namespace_context import ConfigNamespacesContext from dlt.common.configuration.container import Container from dlt.common.configuration.specs.config_providers_context import ConfigProvidersContext @@ -68,18 +67,6 @@ def serialize_value(value: Any) -> Any: return coerce_value("text", value_dt, value) -def is_secret_hint(hint: Type[Any]) -> bool: - return hint is TSecretValue or (inspect.isclass(hint) and issubclass(hint, CredentialsConfiguration)) - - -def is_base_configuration_hint(hint: Type[Any]) -> bool: - return inspect.isclass(hint) and issubclass(hint, BaseConfiguration) - - -def is_context_hint(hint: Type[Any]) -> bool: - return inspect.isclass(hint) and issubclass(hint, ContainerInjectableContext) - - def extract_inner_hint(hint: Type[Any]) -> Type[Any]: # extract hint from Optional / Literal / NewType hints inner_hint = extract_inner_type(hint) diff --git a/dlt/common/configuration/specs/base_configuration.py b/dlt/common/configuration/specs/base_configuration.py index 4cedd76895..ffc05a0bd8 100644 --- a/dlt/common/configuration/specs/base_configuration.py +++ b/dlt/common/configuration/specs/base_configuration.py @@ -8,7 +8,7 @@ else: TDtcField = dataclasses.Field -from dlt.common.typing import TAnyClass, extract_inner_type, is_optional_type +from dlt.common.typing import TAnyClass, TSecretValue, extract_inner_type, is_optional_type from dlt.common.schema.utils import py_type_to_sc_type from dlt.common.configuration.exceptions import ConfigFieldMissingTypeHintException, ConfigFieldTypeHintNotSupported @@ -18,6 +18,18 @@ _F_ContainerInjectableContext: Any = type(object) +def is_secret_hint(hint: Type[Any]) -> bool: + return hint is TSecretValue or (inspect.isclass(hint) and issubclass(hint, CredentialsConfiguration)) + + +def is_base_configuration_hint(hint: Type[Any]) -> bool: + return inspect.isclass(hint) and issubclass(hint, BaseConfiguration) + + +def is_context_hint(hint: Type[Any]) -> bool: + return inspect.isclass(hint) and issubclass(hint, ContainerInjectableContext) + + def is_valid_hint(hint: Type[Any]) -> bool: hint = extract_inner_type(hint) hint = get_config_if_union(hint) or hint @@ -28,7 +40,7 @@ def is_valid_hint(hint: Type[Any]) -> bool: if hint is ClassVar: # class vars are skipped by dataclass return True - if inspect.isclass(hint) and issubclass(hint, BaseConfiguration): + if is_base_configuration_hint(hint): return True with contextlib.suppress(TypeError): py_type_to_sc_type(hint) @@ -38,7 +50,7 @@ def is_valid_hint(hint: Type[Any]) -> bool: def get_config_if_union(hint: Type[Any]) -> Type[Any]: if get_origin(hint) is Union: - return next((t for t in get_args(hint) if inspect.isclass(t) and issubclass(t, BaseConfiguration)), None) + return next((t for t in get_args(hint) if is_base_configuration_hint(t)), None) return None diff --git a/dlt/common/destination.py b/dlt/common/destination.py index 7f14636c69..6d5ffea7b5 100644 --- a/dlt/common/destination.py +++ b/dlt/common/destination.py @@ -6,9 +6,9 @@ from dlt.common.schema import Schema from dlt.common.schema.typing import TTableSchema -from dlt.common.typing import ConfigValue, TypeAlias from dlt.common.configuration import configspec from dlt.common.configuration.specs import BaseConfiguration, CredentialsConfiguration, ContainerInjectableContext +from dlt.common.configuration.accessors import config # known loader file formats @@ -142,7 +142,7 @@ class DestinationReference(Protocol): def capabilities(self) -> DestinationCapabilitiesContext: ... - def client(self, schema: Schema, initial_config: DestinationClientConfiguration = ConfigValue) -> "JobClientBase": + def client(self, schema: Schema, initial_config: DestinationClientConfiguration = config.value) -> "JobClientBase": ... def spec(self) -> Type[DestinationClientConfiguration]: diff --git a/dlt/common/storages/live_schema_storage.py b/dlt/common/storages/live_schema_storage.py index ae39d63a3a..223652d26d 100644 --- a/dlt/common/storages/live_schema_storage.py +++ b/dlt/common/storages/live_schema_storage.py @@ -1,22 +1,14 @@ -from typing import Any, Dict, overload +from typing import Dict, overload -from dlt.common.typing import ConfigValue from dlt.common.schema.schema import Schema from dlt.common.storages.schema_storage import SchemaStorage from dlt.common.configuration.specs import SchemaVolumeConfiguration +from dlt.common.configuration.accessors import config class LiveSchemaStorage(SchemaStorage): - @overload - def __init__(self, config: SchemaVolumeConfiguration, makedirs: bool = False) -> None: - ... - - @overload - def __init__(self, config: SchemaVolumeConfiguration = ConfigValue, makedirs: bool = False) -> None: - ... - - def __init__(self, config: SchemaVolumeConfiguration = None, makedirs: bool = False) -> None: + def __init__(self, config: SchemaVolumeConfiguration = config.value, makedirs: bool = False) -> None: self.live_schemas: Dict[str, Schema] = {} super().__init__(config, makedirs) diff --git a/dlt/common/storages/load_storage.py b/dlt/common/storages/load_storage.py index dc02f4fe21..ca18bc4e0b 100644 --- a/dlt/common/storages/load_storage.py +++ b/dlt/common/storages/load_storage.py @@ -5,10 +5,11 @@ from dlt.common import json, pendulum from dlt.common.configuration.inject import with_config -from dlt.common.typing import ConfigValue, DictStrAny, StrAny +from dlt.common.typing import DictStrAny, StrAny from dlt.common.storages.file_storage import FileStorage from dlt.common.data_writers import TLoaderFileFormat, DataWriter from dlt.common.configuration.specs import LoadVolumeConfiguration +from dlt.common.configuration.accessors import config from dlt.common.exceptions import TerminalValueError from dlt.common.schema import Schema, TSchemaUpdate, TTableSchemaColumns from dlt.common.storages.versioned_storage import VersionedStorage @@ -42,21 +43,13 @@ class LoadStorage(DataItemStorage, VersionedStorage): ALL_SUPPORTED_FILE_FORMATS: Set[TLoaderFileFormat] = set(get_args(TLoaderFileFormat)) - @overload - def __init__(self, is_owner: bool, preferred_file_format: TLoaderFileFormat, supported_file_formats: Iterable[TLoaderFileFormat], config: LoadVolumeConfiguration) -> None: - ... - - @overload - def __init__(self, is_owner: bool, preferred_file_format: TLoaderFileFormat, supported_file_formats: Iterable[TLoaderFileFormat], config: LoadVolumeConfiguration = ConfigValue) -> None: - ... - @with_config(spec=LoadVolumeConfiguration, namespaces=("load",)) def __init__( self, is_owner: bool, preferred_file_format: TLoaderFileFormat, supported_file_formats: Iterable[TLoaderFileFormat], - config: LoadVolumeConfiguration = ConfigValue + config: LoadVolumeConfiguration = config.value ) -> None: if not LoadStorage.ALL_SUPPORTED_FILE_FORMATS.issuperset(supported_file_formats): raise TerminalValueError(supported_file_formats) diff --git a/dlt/common/storages/normalize_storage.py b/dlt/common/storages/normalize_storage.py index f03ff5ffe2..bf38749a47 100644 --- a/dlt/common/storages/normalize_storage.py +++ b/dlt/common/storages/normalize_storage.py @@ -6,7 +6,7 @@ from dlt.common.configuration import with_config from dlt.common.configuration.specs import NormalizeVolumeConfiguration from dlt.common.storages.versioned_storage import VersionedStorage -from dlt.common.typing import ConfigValue +from dlt.common.configuration.accessors import config class TParsedNormalizeFileName(NamedTuple): @@ -20,16 +20,8 @@ class NormalizeStorage(VersionedStorage): STORAGE_VERSION: ClassVar[str] = "1.0.0" EXTRACTED_FOLDER: ClassVar[str] = "extracted" # folder within the volume where extracted files to be normalized are stored - @overload - def __init__(self, is_owner: bool, config: NormalizeVolumeConfiguration) -> None: - ... - - @overload - def __init__(self, is_owner: bool, config: NormalizeVolumeConfiguration = ConfigValue) -> None: - ... - @with_config(spec=NormalizeVolumeConfiguration, namespaces=("normalize",)) - def __init__(self, is_owner: bool, config: NormalizeVolumeConfiguration = ConfigValue) -> None: + def __init__(self, is_owner: bool, config: NormalizeVolumeConfiguration = config.value) -> None: super().__init__(NormalizeStorage.STORAGE_VERSION, is_owner, FileStorage(config.normalize_volume_path, "t", makedirs=is_owner)) self.config = config if is_owner: diff --git a/dlt/common/storages/schema_storage.py b/dlt/common/storages/schema_storage.py index bdc4019b8b..b8729905ff 100644 --- a/dlt/common/storages/schema_storage.py +++ b/dlt/common/storages/schema_storage.py @@ -5,9 +5,10 @@ from dlt.common.configuration import with_config from dlt.common.configuration.specs import SchemaVolumeConfiguration, TSchemaFileFormat from dlt.common.configuration.specs.schema_volume_configuration import SchemaFileExtensions +from dlt.common.configuration.accessors import config from dlt.common.storages.file_storage import FileStorage from dlt.common.schema import Schema, verify_schema_hash -from dlt.common.typing import DictStrAny, ConfigValue +from dlt.common.typing import DictStrAny from dlt.common.storages.exceptions import InStorageSchemaModified, SchemaNotFoundError @@ -17,16 +18,8 @@ class SchemaStorage(Mapping[str, Schema]): SCHEMA_FILE_NAME = "schema.%s" NAMED_SCHEMA_FILE_PATTERN = f"%s.{SCHEMA_FILE_NAME}" - @overload - def __init__(self, config: SchemaVolumeConfiguration, makedirs: bool = False) -> None: - ... - - @overload - def __init__(self, config: SchemaVolumeConfiguration = ConfigValue, makedirs: bool = False) -> None: - ... - @with_config(spec=SchemaVolumeConfiguration, namespaces=("schema",)) - def __init__(self, config: SchemaVolumeConfiguration = ConfigValue, makedirs: bool = False) -> None: + def __init__(self, config: SchemaVolumeConfiguration = config.value, makedirs: bool = False) -> None: self.config = config self.storage = FileStorage(config.schema_volume_path, makedirs=makedirs) diff --git a/dlt/common/typing.py b/dlt/common/typing.py index e3af994f94..76a74dd44a 100644 --- a/dlt/common/typing.py +++ b/dlt/common/typing.py @@ -25,7 +25,8 @@ TFun = TypeVar("TFun", bound=AnyFun) # any function TAny = TypeVar("TAny", bound=Any) TAnyClass = TypeVar("TAnyClass", bound=object) -TSecretValue = NewType("TSecretValue", str) # represent secret value ie. coming from Kubernetes/Docker secrets or other providers +# represent secret value ie. coming from Kubernetes/Docker secrets or other providers +TSecretValue = NewType("TSecretValue", Any) # type: ignore TDataItem: TypeAlias = object # a single data item as extracted from data source TDataItems: TypeAlias = Union[TDataItem, List[TDataItem]] # a single or many data items as extracted from the data source diff --git a/dlt/destinations/bigquery/__init__.py b/dlt/destinations/bigquery/__init__.py index 8403035523..a22cbdf3c3 100644 --- a/dlt/destinations/bigquery/__init__.py +++ b/dlt/destinations/bigquery/__init__.py @@ -2,15 +2,15 @@ from dlt.common.data_writers.escape import escape_bigquery_identifier from dlt.common.schema.schema import Schema -from dlt.common.typing import ConfigValue from dlt.common.configuration import with_config +from dlt.common.configuration.accessors import config from dlt.common.destination import DestinationCapabilitiesContext, JobClientBase, DestinationClientConfiguration from dlt.destinations.bigquery.configuration import BigQueryClientConfiguration @with_config(spec=BigQueryClientConfiguration, namespaces=("destination", "bigquery",)) -def _configure(config: BigQueryClientConfiguration = ConfigValue) -> BigQueryClientConfiguration: +def _configure(config: BigQueryClientConfiguration = config.value) -> BigQueryClientConfiguration: return config @@ -31,7 +31,7 @@ def capabilities() -> DestinationCapabilitiesContext: return caps -def client(schema: Schema, initial_config: DestinationClientConfiguration = ConfigValue) -> JobClientBase: +def client(schema: Schema, initial_config: DestinationClientConfiguration = config.value) -> JobClientBase: # import client when creating instance so capabilities and config specs can be accessed without dependencies installed from dlt.destinations.bigquery.bigquery import BigQueryClient diff --git a/dlt/destinations/dummy/__init__.py b/dlt/destinations/dummy/__init__.py index d5c1ea5b22..f42a54c6f6 100644 --- a/dlt/destinations/dummy/__init__.py +++ b/dlt/destinations/dummy/__init__.py @@ -1,15 +1,15 @@ from typing import Type from dlt.common.schema.schema import Schema -from dlt.common.typing import ConfigValue from dlt.common.configuration import with_config +from dlt.common.configuration.accessors import config from dlt.common.destination import DestinationCapabilitiesContext, JobClientBase, DestinationClientConfiguration from dlt.destinations.dummy.configuration import DummyClientConfiguration @with_config(spec=DummyClientConfiguration, namespaces=("destination", "dummy",)) -def _configure(config: DummyClientConfiguration = ConfigValue) -> DummyClientConfiguration: +def _configure(config: DummyClientConfiguration = config.value) -> DummyClientConfiguration: return config @@ -29,7 +29,7 @@ def capabilities() -> DestinationCapabilitiesContext: return caps -def client(schema: Schema, initial_config: DestinationClientConfiguration = ConfigValue) -> JobClientBase: +def client(schema: Schema, initial_config: DestinationClientConfiguration = config.value) -> JobClientBase: # import client when creating instance so capabilities and config specs can be accessed without dependencies installed from dlt.destinations.dummy.dummy import DummyClient diff --git a/dlt/destinations/postgres/__init__.py b/dlt/destinations/postgres/__init__.py index 906f19d461..e81bf86b40 100644 --- a/dlt/destinations/postgres/__init__.py +++ b/dlt/destinations/postgres/__init__.py @@ -1,8 +1,8 @@ from typing import Type from dlt.common.schema.schema import Schema -from dlt.common.typing import ConfigValue from dlt.common.configuration import with_config +from dlt.common.configuration.accessors import config from dlt.common.data_writers.escape import escape_postgres_identifier, escape_postgres_literal from dlt.common.destination import DestinationCapabilitiesContext, JobClientBase, DestinationClientConfiguration @@ -10,7 +10,7 @@ @with_config(spec=PostgresClientConfiguration, namespaces=("destination", "postgres",)) -def _configure(config: PostgresClientConfiguration = ConfigValue) -> PostgresClientConfiguration: +def _configure(config: PostgresClientConfiguration = config.value) -> PostgresClientConfiguration: return config @@ -32,7 +32,7 @@ def capabilities() -> DestinationCapabilitiesContext: return caps -def client(schema: Schema, initial_config: DestinationClientConfiguration = ConfigValue) -> JobClientBase: +def client(schema: Schema, initial_config: DestinationClientConfiguration = config.value) -> JobClientBase: # import client when creating instance so capabilities and config specs can be accessed without dependencies installed from dlt.destinations.postgres.postgres import PostgresClient diff --git a/dlt/destinations/redshift/__init__.py b/dlt/destinations/redshift/__init__.py index 89a4435def..a1378c0030 100644 --- a/dlt/destinations/redshift/__init__.py +++ b/dlt/destinations/redshift/__init__.py @@ -1,8 +1,8 @@ from typing import Type from dlt.common.schema.schema import Schema -from dlt.common.typing import ConfigValue from dlt.common.configuration import with_config +from dlt.common.configuration.accessors import config from dlt.common.data_writers.escape import escape_redshift_identifier, escape_redshift_literal from dlt.common.destination import DestinationCapabilitiesContext, JobClientBase, DestinationClientConfiguration @@ -10,7 +10,7 @@ @with_config(spec=RedshiftClientConfiguration, namespaces=("destination", "redshift",)) -def _configure(config: RedshiftClientConfiguration = ConfigValue) -> RedshiftClientConfiguration: +def _configure(config: RedshiftClientConfiguration = config.value) -> RedshiftClientConfiguration: return config @@ -31,7 +31,7 @@ def capabilities() -> DestinationCapabilitiesContext: return caps -def client(schema: Schema, initial_config: DestinationClientConfiguration = ConfigValue) -> JobClientBase: +def client(schema: Schema, initial_config: DestinationClientConfiguration = config.value) -> JobClientBase: # import client when creating instance so capabilities and config specs can be accessed without dependencies installed from dlt.destinations.redshift.redshift import RedshiftClient diff --git a/dlt/extract/typing.py b/dlt/extract/typing.py index 70fee3dfb6..4e1423ad2b 100644 --- a/dlt/extract/typing.py +++ b/dlt/extract/typing.py @@ -1,4 +1,4 @@ -from typing import Any, Callable, Protocol, TypedDict, TypeVar, Union, Awaitable +from typing import Any, Callable, TypedDict, TypeVar, Union, Awaitable from dlt.common.typing import TDataItem, TDataItems from dlt.common.schema.typing import TTableSchemaColumns, TWriteDisposition diff --git a/dlt/load/load.py b/dlt/load/load.py index 7a0e84dc49..91b6e32a68 100644 --- a/dlt/load/load.py +++ b/dlt/load/load.py @@ -4,7 +4,8 @@ from dlt.common import sleep, logger from dlt.common.configuration import with_config -from dlt.common.typing import ConfigValue, StrAny +from dlt.common.configuration.accessors import config +from dlt.common.typing import StrAny from dlt.common.runners import TRunMetrics, Runnable, workermethod from dlt.common.logger import pretty_format_exception from dlt.common.exceptions import TerminalValueError @@ -34,8 +35,8 @@ def __init__( destination: DestinationReference, collector: CollectorRegistry = REGISTRY, is_storage_owner: bool = False, - config: LoaderConfiguration = ConfigValue, - initial_client_config: DestinationClientConfiguration = ConfigValue + config: LoaderConfiguration = config.value, + initial_client_config: DestinationClientConfiguration = config.value ) -> None: self.config = config self.initial_client_config = initial_client_config diff --git a/dlt/normalize/normalize.py b/dlt/normalize/normalize.py index 894c89d053..2b9c650c22 100644 --- a/dlt/normalize/normalize.py +++ b/dlt/normalize/normalize.py @@ -5,8 +5,8 @@ from dlt.common import pendulum, signals, json, logger from dlt.common.configuration import with_config -from dlt.common.configuration.specs.load_volume_configuration import LoadVolumeConfiguration -from dlt.common.configuration.specs.normalize_volume_configuration import NormalizeVolumeConfiguration +from dlt.common.configuration.accessors import config +from dlt.common.configuration.specs import LoadVolumeConfiguration, NormalizeVolumeConfiguration from dlt.common.data_writers.writers import TLoaderFileFormat from dlt.common.json import custom_pua_decode from dlt.common.runners import TRunMetrics, Runnable @@ -14,7 +14,7 @@ from dlt.common.storages.exceptions import SchemaNotFoundError from dlt.common.storages import NormalizeStorage, SchemaStorage, LoadStorage from dlt.common.telemetry import get_logging_extras -from dlt.common.typing import ConfigValue, StrAny, TDataItem +from dlt.common.typing import TDataItem from dlt.common.exceptions import PoolException from dlt.common.schema import TSchemaUpdate, Schema from dlt.common.schema.exceptions import CannotCoerceColumnException @@ -36,7 +36,7 @@ class Normalize(Runnable[ProcessPool]): load_package_counter: Counter = None @with_config(spec=NormalizeConfiguration, namespaces=("normalize",)) - def __init__(self, collector: CollectorRegistry = REGISTRY, schema_storage: SchemaStorage = None, config: NormalizeConfiguration = ConfigValue) -> None: + def __init__(self, collector: CollectorRegistry = REGISTRY, schema_storage: SchemaStorage = None, config: NormalizeConfiguration = config.value) -> None: self.config = config self.loader_file_format = config.destination_capabilities.preferred_loader_file_format self.pool: ProcessPool = None diff --git a/examples/sources/google_sheets.py b/examples/sources/google_sheets.py index 7727ad753c..7960cea41a 100644 --- a/examples/sources/google_sheets.py +++ b/examples/sources/google_sheets.py @@ -2,7 +2,7 @@ import dlt from dlt.common.configuration.specs import GcpClientCredentialsWithDefault -from dlt.common.typing import ConfigValue, DictStrAny, StrAny +from dlt.common.typing import DictStrAny, StrAny from dlt.common.exceptions import MissingDependencyException try: @@ -24,7 +24,7 @@ def _initialize_sheets(credentials: GcpClientCredentialsWithDefault) -> Any: @dlt.source -def google_spreadsheet(spreadsheet_id: str, sheet_names: Sequence[str], credentials: Union[GcpClientCredentialsWithDefault, str, StrAny] = ConfigValue) -> Any: +def google_spreadsheet(spreadsheet_id: str, sheet_names: Sequence[str], credentials: Union[GcpClientCredentialsWithDefault, str, StrAny] = dlt.secrets.value) -> Any: sheets = _initialize_sheets(cast(GcpClientCredentialsWithDefault, credentials)) From 3f6a72e2f15545fbc95075e45ac3fedf1ad96671 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Thu, 24 Nov 2022 11:45:56 +0100 Subject: [PATCH 7/7] installs dev deps in github workflows --- .github/workflows/test_common.yml | 2 +- .github/workflows/test_loader_bigquery.yml | 2 +- .github/workflows/test_loader_redshift.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test_common.yml b/.github/workflows/test_common.yml index 80eb722a30..c4bac42901 100644 --- a/.github/workflows/test_common.yml +++ b/.github/workflows/test_common.yml @@ -55,7 +55,7 @@ jobs: - name: Install dependencies # if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true' - run: poetry install --no-interaction --no-dev + run: poetry install --no-interaction # - name: Install self # run: poetry install --no-interaction diff --git a/.github/workflows/test_loader_bigquery.yml b/.github/workflows/test_loader_bigquery.yml index a301de28c2..6707d236f5 100644 --- a/.github/workflows/test_loader_bigquery.yml +++ b/.github/workflows/test_loader_bigquery.yml @@ -61,7 +61,7 @@ jobs: - name: Install dependencies # if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true' - run: poetry install --no-interaction --no-dev -E bigquery + run: poetry install --no-interaction -E bigquery # - name: Install self # run: poetry install --no-interaction diff --git a/.github/workflows/test_loader_redshift.yml b/.github/workflows/test_loader_redshift.yml index 2eb5e5631a..d268a3dd56 100644 --- a/.github/workflows/test_loader_redshift.yml +++ b/.github/workflows/test_loader_redshift.yml @@ -62,7 +62,7 @@ jobs: - name: Install dependencies # if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true' - run: poetry install --no-interaction --no-dev -E redshift + run: poetry install --no-interaction -E redshift # - name: Install self # run: poetry install --no-interaction