diff --git a/.coveragerc b/.coveragerc deleted file mode 100644 index 693eb6b6..00000000 --- a/.coveragerc +++ /dev/null @@ -1,7 +0,0 @@ -[report] -# Regexes for lines to exclude from consideration -exclude_lines = - pragma: no cover - -omit = - magnus/cli.py diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 85bc4b95..e6a75bdb 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -14,6 +14,22 @@ jobs: with: python-version: 3.8 - run: python -m pip install poetry + - run: | + # Download the binary + curl -sLO https://github.com/argoproj/argo-workflows/releases/download/v3.5.4/argo-linux-amd64.gz + + # Unzip + gunzip argo-linux-amd64.gz + + # Make binary executable + chmod +x argo-linux-amd64 + + # Move binary to path + mv ./argo-linux-amd64 /usr/local/bin/argo + + # Test installation + argo version + - run: | python -m poetry install poetry run tox diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index c823695e..999a6c2c 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -4,6 +4,7 @@ on: - "magnus/**" branches: - "main" + - "rc" jobs: PRCheck: @@ -13,10 +14,26 @@ jobs: - uses: actions/setup-python@v4 with: python-version: 3.8 + - run: | + # Download the binary + curl -sLO https://github.com/argoproj/argo-workflows/releases/download/v3.5.4/argo-linux-amd64.gz + + # Unzip + gunzip argo-linux-amd64.gz + + # Make binary executable + chmod +x argo-linux-amd64 + + # Move binary to path + mv ./argo-linux-amd64 /usr/local/bin/argo + + # Test installation + argo version - run: python -m pip install poetry - run: | python -m poetry install poetry run tox + Release: runs-on: ubuntu-latest needs: PRCheck @@ -27,15 +44,28 @@ jobs: - uses: actions/setup-python@v4 with: python-version: 3.8 - - run: python -m pip install python-semantic-release==7.34.6 + - run: python -m pip install python-semantic-release==8.0.7 - name: Figure version + continue-on-error: true id: last_tag run: | - VERSION=$(semantic-release print-version --patch) - echo $VERSION + CURRENT=$(git tag --sort=-committerdate -l | head -n 1) + echo "current: $CURRENT" + + VERSION=$(python -m semantic-release --noop --strict version --no-push --no-commit --print) + echo "New: $VERSION" + + if [ "$CURRENT" == "$VERSION" ]; then + echo "version="" >> $GITHUB_OUTPUT + exit 1 + fi + echo "version=$VERSION" >> $GITHUB_OUTPUT + exit 0 + - name: Apply new tag + if: steps.last_tag.outcome == 'success' env: VERSION: ${{ steps.last_tag.outputs.version }} uses: actions/github-script@v6 @@ -50,6 +80,7 @@ jobs: sha: context.sha }) - name: Publish to PyPI + if: steps.last_tag.outcome == 'success' env: PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }} LAST_TAG: ${{ steps.last_tag.outputs.version }} @@ -60,6 +91,7 @@ jobs: poetry publish --build - name: "Create release" + if: steps.last_tag.outcome == 'success' env: RELEASE_TAG: ${{ steps.last_tag.outputs.version }} uses: "actions/github-script@v6" diff --git a/.gitignore b/.gitignore index 59b4199c..40695b3a 100644 --- a/.gitignore +++ b/.gitignore @@ -141,3 +141,16 @@ cython_debug/ # VSCode specific settings .vscode + + +.run_log_store/ + +.catalog/ + +cov.xml + +.DS_Store + +data/ + +example_bak/ diff --git a/.mypy.ini b/.mypy.ini index e6d4eb6f..a564d7ca 100644 --- a/.mypy.ini +++ b/.mypy.ini @@ -1,6 +1,24 @@ [mypy] -implicit_optional = True -ignore_missing_imports = True +exclude = _*FF.py +ignore_missing_imports = True +plugins = pydantic.mypy +show_error_codes = True + +follow_imports = silent +warn_redundant_casts = True +warn_unused_ignores = True +#disallow_any_generics = True # This complicates it a lot +check_untyped_defs = True +implicit_reexport = True + +# for strict mypy: (this is the tricky one :-)) +#disallow_untyped_defs = True + +[pydantic-mypy] +init_forbid_extra = True +init_typed = True +warn_required_dynamic_aliases = True +warn_untyped_fields = True [mypy-ruamel.*] ignore_missing_imports = True diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c24bcbba..4b80822a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -27,3 +27,16 @@ repos: # pre-commit's default_language_version, see # https://pre-commit.com/#top_level-default_language_version language_version: python3.8 + + - repo: https://github.com/jorisroovers/gitlint + rev: v0.19.1 + hooks: + - id: gitlint + args: + [ + --contrib=CT1, + --ignore=title-trailing-punctuation, + --ignore=T3, + --ignore=B6, + --msg-filename, + ] diff --git a/README.md b/README.md index c94b9741..75d45cf6 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@

Logo

---- +

python: @@ -17,365 +17,383 @@ Tests: Docs:

---- +
- +Magnus is a simplified workflow definition language that helps in: -**Magnus** is a *thin* layer of abstraction over the underlying infrastructure to enable data scientist and -machine learning engineers. It provides: +- **Streamlined Design Process:** Magnus enables users to efficiently plan their pipelines with +[stubbed nodes](https://astrazeneca.github.io/magnus-core/concepts/stub), along with offering support for various structures such as +[tasks](https://astrazeneca.github.io/magnus-core/concepts/task), [parallel branches](https://astrazeneca.github.io/magnus-core/concepts/parallel), and [loops or map branches](https://astrazeneca.github.io/magnus-core/concepts/map) +in both [yaml](https://astrazeneca.github.io/magnus-core/concepts/pipeline) or a [python SDK](https://astrazeneca.github.io/magnus-core/sdk) for maximum flexibility. -- A way to execute Jupyter notebooks/python functions in local or remote platforms. -- A framework to define complex pipelines via YAML or Python SDK. -- Robust and *automatic* logging to ensure maximum reproducibility of experiments. -- A framework to interact with secret managers ranging from environment variables to other vendors. -- Interactions with various experiment tracking tools. +- **Incremental Development:** Build your pipeline piece by piece with Magnus, which allows for the +implementation of tasks as [python functions](https://astrazeneca.github.io/magnus-core/concepts/task/#python_functions), +[notebooks](https://astrazeneca.github.io/magnus-core/concepts/task/#notebooks), or [shell scripts](https://astrazeneca.github.io/magnus-core/concepts/task/#shell), +adapting to the developer's preferred tools and methods. -## What does **thin** mean? +- **Robust Testing:** Ensure your pipeline performs as expected with the ability to test using sampled data. Magnus +also provides the capability to [mock and patch tasks](https://astrazeneca.github.io/magnus-core/configurations/executors/mocked) +for thorough evaluation before full-scale deployment. -- We really have no say in what happens within your notebooks or python functions. -- We do not dictate how the infrastructure should be configured as long as it satisfies some *basic* criteria. - - The underlying infrastructure should support container execution and an orchestration framework. - - Some way to handle secrets either via environment variables or secrets manager. - - A blob storage or some way to store your intermediate artifacts. - - A database or blob storage to store logs. -- We have no opinion of how your structure your project. -- We do not creep into your CI/CD practices but it is your responsibility to provide the same environment where ever -the execution happens. This is usually via git, virtual environment manager and docker. -- We transpile to the orchestration framework that is used by your teams to do the heavy lifting. - -## What does it do? +- **Seamless Deployment:** Transition from the development stage to production with ease. +Magnus simplifies the process by requiring [only configuration changes](https://astrazeneca.github.io/magnus-core/configurations/overview) +to adapt to different environments, including support for [argo workflows](https://astrazeneca.github.io/magnus-core/configurations/executors/argo). +- **Efficient Debugging:** Quickly identify and resolve issues in pipeline execution with Magnus's local +debugging features. Retrieve data from failed tasks and [retry failures](https://astrazeneca.github.io/magnus-core/concepts/run-log/#retrying_failures) +using your chosen debugging tools to maintain a smooth development experience. -![works](assets/work.png) +Along with the developer friendly features, magnus also acts as an interface to production grade concepts +such as [data catalog](https://astrazeneca.github.io/magnus-core/concepts/catalog), [reproducibility](https://astrazeneca.github.io/magnus-core/concepts/run-log), +[experiment tracking](https://astrazeneca.github.io/magnus-core/concepts/experiment-tracking) +and secure [access to secrets](https://astrazeneca.github.io/magnus-core/concepts/secrets). -### Shift Left +
-Magnus provides patterns typically used in production environments even in the development phase. +## What does it do? -- Reduces the need for code refactoring during production phase of the project. -- Enables best practices and understanding of infrastructure patterns. -- Run the same code on your local machines or in production environments. -:sparkles: :sparkles: Happy Experimenting!! :sparkles: :sparkles: +![works](assets/work.png) - +
## Documentation [More details about the project and how to use it available here](https://astrazeneca.github.io/magnus-core/). -## Extensions - -All the services of magnus are extendable by design, please refer to -[magnus extensions](https://github.com/AstraZeneca/magnus-extensions) +
## Installation - - The minimum python version that magnus supports is 3.8 -## pip - -magnus is a python package and should be installed as any other. ```shell pip install magnus ``` -We recommend that you install magnus in a virtual environment specific to the project and also poetry for your -application development. +Please look at the [installation guide](https://astrazeneca.github.io/magnus-core/usage) +for more information. + +
+ +## Example + +Your application code. Use pydantic models as DTO. + +Assumed to be present at ```functions.py``` +```python +from pydantic import BaseModel + +class InnerModel(BaseModel): + """ + A pydantic model representing a group of related parameters. + """ + + foo: int + bar: str + + +class Parameter(BaseModel): + """ + A pydantic model representing the parameters of the whole pipeline. + """ + + x: int + y: InnerModel + + +def return_parameter() -> Parameter: + """ + The annotation of the return type of the function is not mandatory + but it is a good practice. -The command to install in a poetry managed virtual environment + Returns: + Parameter: The parameters that should be used in downstream steps. + """ + # Return type of a function should be a pydantic model + return Parameter(x=1, y=InnerModel(foo=10, bar="hello world")) + +def display_parameter(x: int, y: InnerModel): + """ + Annotating the arguments of the function is important for + magnus to understand the type of parameters you want. + + Input args can be a pydantic model or the individual attributes. + """ + print(x) + # >>> prints 1 + print(y) + # >>> prints InnerModel(foo=10, bar="hello world") ``` -poetry add magnus + +### Application code using driver functions. + +The code is runnable without any orchestration framework. + +```python +from functions import return_parameter, display_parameter + +my_param = return_parameter() +display_parameter(my_param.x, my_param.y) ``` - +### Orchestration using magnus + + + + + + + + -``` yaml + -def pipeline(): - first = Task(name='step parameters', command="lambda x: {'x': int(x) + 1}", command_type='python-lambda', - next_node='step shell') - second = Task(name='step shell', command='mkdir data ; env >> data/data.txt', - command_type='shell', catalog={'put': '*'}) + +
python SDKyaml

-## Example Run +Example present at: ```examples/python-tasks.py``` - +Run it as: ```python examples/python-tasks.py``` -To give you a flavour of how magnus works, lets create a simple pipeline. +```python +from magnus import Pipeline, Task -Copy the contents of this yaml into getting-started.yaml or alternatively in a python file if you are using the SDK. +def main(): + step1 = Task( + name="step1", + command="examples.functions.return_parameter", + ) + step2 = Task( + name="step2", + command="examples.functions.display_parameter", + terminate_with_success=True, + ) ---- -!!! Note + step1 >> step2 - The below execution would create a folder called 'data' in the current working directory. - The command as given should work in linux/macOS but for windows, please change accordingly. + pipeline = Pipeline( + start_at=step1, + steps=[step1, step2], + add_terminal_nodes=True, + ) ---- + pipeline.execute() + + +if __name__ == "__main__": + main() +``` - - +

+ +Example present at: ```examples/python-tasks.yaml``` + + +Execute via the cli: ```magnus execute -f examples/python-tasks.yaml``` + +```yaml dag: - description: Getting started - start_at: step parameters + description: | + This is a simple pipeline that does 3 steps in sequence. + In this example: + 1. First step: returns a "parameter" x as a Pydantic model + 2. Second step: Consumes that parameter and prints it + + This pipeline demonstrates one way to pass small data from one step to another. + + start_at: step 1 steps: - step parameters: + step 1: type: task - command_type: python-lambda - command: "lambda x: {'x': int(x) + 1}" - next: step shell - step shell: + command_type: python # (2) + command: examples.functions.return_parameter # (1) + next: step 2 + step 2: type: task - command_type: shell - command: mkdir data ; env >> data/data.txt # For Linux/macOS + command_type: python + command: examples.functions.display_parameter next: success - catalog: - put: - - "*" success: type: success fail: type: fail ``` -The same could also be defined via a Python SDK. - -```python - -#in pipeline.py -from magnus import Pipeline, Task +

- pipeline = Pipeline(name='getting_started') - pipeline.construct([first, second]) - pipeline.execute(parameters_file='parameters.yaml') +### Transpile to argo workflows -if __name__ == '__main__': - pipeline() +No code change, just change the configuration. +```yaml +executor: + type: "argo" + config: + image: magnus:demo + persistent_volumes: + - name: magnus-volume + mount_path: /mnt + +run_log_store: + type: file-system + config: + log_folder: /mnt/run_log_store ``` +More details can be found in [argo configuration](https://astrazeneca.github.io/magnus-core/configurations/executors/argo). +Execute the code as ```magnus execute -f examples/python-tasks.yaml -c examples/configs/argo-config.yam``` -Since the pipeline expects a parameter ```x```, lets provide that using ```parameters.yaml``` +
+ Expand ```yaml -x: 3 +apiVersion: argoproj.io/v1alpha1 +kind: Workflow +metadata: + generateName: magnus-dag- + annotations: {} + labels: {} +spec: + activeDeadlineSeconds: 172800 + entrypoint: magnus-dag + podGC: + strategy: OnPodCompletion + retryStrategy: + limit: '0' + retryPolicy: Always + backoff: + duration: '120' + factor: 2 + maxDuration: '3600' + serviceAccountName: default-editor + templates: + - name: magnus-dag + failFast: true + dag: + tasks: + - name: step-1-task-uvdp7h + template: step-1-task-uvdp7h + depends: '' + - name: step-2-task-772vg3 + template: step-2-task-772vg3 + depends: step-1-task-uvdp7h.Succeeded + - name: success-success-igzq2e + template: success-success-igzq2e + depends: step-2-task-772vg3.Succeeded + - name: step-1-task-uvdp7h + container: + image: magnus:demo + command: + - magnus + - execute_single_node + - '{{workflow.parameters.run_id}}' + - step%1 + - --log-level + - WARNING + - --file + - examples/python-tasks.yaml + - --config-file + - examples/configs/argo-config.yaml + volumeMounts: + - name: executor-0 + mountPath: /mnt + imagePullPolicy: '' + resources: + limits: + memory: 1Gi + cpu: 250m + requests: + memory: 1Gi + cpu: 250m + - name: step-2-task-772vg3 + container: + image: magnus:demo + command: + - magnus + - execute_single_node + - '{{workflow.parameters.run_id}}' + - step%2 + - --log-level + - WARNING + - --file + - examples/python-tasks.yaml + - --config-file + - examples/configs/argo-config.yaml + volumeMounts: + - name: executor-0 + mountPath: /mnt + imagePullPolicy: '' + resources: + limits: + memory: 1Gi + cpu: 250m + requests: + memory: 1Gi + cpu: 250m + - name: success-success-igzq2e + container: + image: magnus:demo + command: + - magnus + - execute_single_node + - '{{workflow.parameters.run_id}}' + - success + - --log-level + - WARNING + - --file + - examples/python-tasks.yaml + - --config-file + - examples/configs/argo-config.yaml + volumeMounts: + - name: executor-0 + mountPath: /mnt + imagePullPolicy: '' + resources: + limits: + memory: 1Gi + cpu: 250m + requests: + memory: 1Gi + cpu: 250m + templateDefaults: + activeDeadlineSeconds: 7200 + timeout: 10800s + arguments: + parameters: + - name: run_id + value: '{{workflow.uid}}' + volumes: + - name: executor-0 + persistentVolumeClaim: + claimName: magnus-volume + ``` +
-And let's run the pipeline using: -``` shell - magnus execute --file getting-started.yaml --parameters-file parameters.yaml -``` +## Pipelines can be: -If you are using the python SDK: +### Linear -``` -poetry run python pipeline.py -``` +A simple linear pipeline with tasks either +[python functions](https://astrazeneca.github.io/magnus-core/concepts/task/#python_functions), +[notebooks](https://astrazeneca.github.io/magnus-core/concepts/task/#notebooks), or [shell scripts](https://astrazeneca.github.io/magnus-core/concepts/task/#shell) - - -You should see a list of warnings but your terminal output should look something similar to this: - - - - -``` json -{ - "run_id": "20230131195647", - "dag_hash": "", - "use_cached": false, - "tag": "", - "original_run_id": "", - "status": "SUCCESS", - "steps": { - "step parameters": { - "name": "step parameters", - "internal_name": "step parameters", - "status": "SUCCESS", - "step_type": "task", - "message": "", - "mock": false, - "code_identities": [ - { - "code_identifier": "e15d1374aac217f649972d11fe772e61b5a2478d", - "code_identifier_type": "git", - "code_identifier_dependable": true, - "code_identifier_url": "INTENTIONALLY REMOVED", - "code_identifier_message": "" - } - ], - "attempts": [ - { - "attempt_number": 0, - "start_time": "2023-01-31 19:56:55.007931", - "end_time": "2023-01-31 19:56:55.009273", - "duration": "0:00:00.001342", - "status": "SUCCESS", - "message": "" - } - ], - "user_defined_metrics": {}, - "branches": {}, - "data_catalog": [] - }, - "step shell": { - "name": "step shell", - "internal_name": "step shell", - "status": "SUCCESS", - "step_type": "task", - "message": "", - "mock": false, - "code_identities": [ - { - "code_identifier": "e15d1374aac217f649972d11fe772e61b5a2478d", - "code_identifier_type": "git", - "code_identifier_dependable": true, - "code_identifier_url": "INTENTIONALLY REMOVED", - "code_identifier_message": "" - } - ], - "attempts": [ - { - "attempt_number": 0, - "start_time": "2023-01-31 19:56:55.128697", - "end_time": "2023-01-31 19:56:55.150878", - "duration": "0:00:00.022181", - "status": "SUCCESS", - "message": "" - } - ], - "user_defined_metrics": {}, - "branches": {}, - "data_catalog": [ - { - "name": "data/data.txt", - "data_hash": "7e91b0a9ff8841a3b5bf2c711f58bcc0cbb6a7f85b9bc92aa65e78cdda59a96e", - "catalog_relative_path": "20230131195647/data/data.txt", - "catalog_handler_location": ".catalog", - "stage": "put" - } - ] - }, - "success": { - "name": "success", - "internal_name": "success", - "status": "SUCCESS", - "step_type": "success", - "message": "", - "mock": false, - "code_identities": [ - { - "code_identifier": "e15d1374aac217f649972d11fe772e61b5a2478d", - "code_identifier_type": "git", - "code_identifier_dependable": true, - "code_identifier_url": "INTENTIONALLY REMOVED", - "code_identifier_message": "" - } - ], - "attempts": [ - { - "attempt_number": 0, - "start_time": "2023-01-31 19:56:55.239877", - "end_time": "2023-01-31 19:56:55.240116", - "duration": "0:00:00.000239", - "status": "SUCCESS", - "message": "" - } - ], - "user_defined_metrics": {}, - "branches": {}, - "data_catalog": [] - } - }, - "parameters": { - "x": 4 - }, - "run_config": { - "executor": { - "type": "local", - "config": { - "enable_parallel": false, - "placeholders": {} - } - }, - "run_log_store": { - "type": "buffered", - "config": {} - }, - "catalog": { - "type": "file-system", - "config": { - "compute_data_folder": "data", - "catalog_location": ".catalog" - } - }, - "secrets": { - "type": "do-nothing", - "config": {} - }, - "experiment_tracker": { - "type": "do-nothing", - "config": {} - }, - "variables": {}, - "pipeline": { - "start_at": "step parameters", - "name": "getting_started", - "description": "", - "max_time": 86400, - "steps": { - "step parameters": { - "mode_config": {}, - "next_node": "step shell", - "command": "lambda x: {'x': int(x) + 1}", - "command_type": "python-lambda", - "command_config": {}, - "catalog": {}, - "retry": 1, - "on_failure": "", - "type": "task" - }, - "step shell": { - "mode_config": {}, - "next_node": "success", - "command": "mkdir data ; env >> data/data.txt", - "command_type": "shell", - "command_config": {}, - "catalog": { - "put": "*" - }, - "retry": 1, - "on_failure": "", - "type": "task" - }, - "success": { - "mode_config": {}, - "type": "success" - }, - "fail": { - "mode_config": {}, - "type": "fail" - } - } - } - } -} -``` +[![](https://mermaid.ink/img/pako:eNpl0bFuwyAQBuBXQVdZTqTESpxMDJ0ytkszhgwnOCcoNo4OaFVZfvcSx20tGSQ4fn0wHB3o1hBIyLJOWGeDFJ3Iq7r90lfkkA9HHfmTUpnX1hFyLvrHzDLl_qB4-1BOOZGGD3TfSikvTDSNFqdj2sT2vBTr9euQlXNWjqycsN2c7UZWFMUE7udwP0L3y6JenNKiyfvz8t8_b-gavT9QJYY0PcDtjeTLptrAChriBq1JzeoeWkG4UkMKZCoN8k2Bcn1yGEN7_HYaZOBIK4h3g4EOFi-MDcgKa59SMja0_P7s_vAJ_Q_YOH6o?type=png)](https://mermaid.live/edit#pako:eNpl0bFuwyAQBuBXQVdZTqTESpxMDJ0ytkszhgwnOCcoNo4OaFVZfvcSx20tGSQ4fn0wHB3o1hBIyLJOWGeDFJ3Iq7r90lfkkA9HHfmTUpnX1hFyLvrHzDLl_qB4-1BOOZGGD3TfSikvTDSNFqdj2sT2vBTr9euQlXNWjqycsN2c7UZWFMUE7udwP0L3y6JenNKiyfvz8t8_b-gavT9QJYY0PcDtjeTLptrAChriBq1JzeoeWkG4UkMKZCoN8k2Bcn1yGEN7_HYaZOBIK4h3g4EOFi-MDcgKa59SMja0_P7s_vAJ_Q_YOH6o) + +### [Parallel branches](https://astrazeneca.github.io/magnus-core/concepts/parallel) - +Execute branches in parallel -You should see that ```data``` folder being created with a file called ```data.txt``` in it. -This is according to the command in ```step shell```. +[![](https://mermaid.ink/img/pako:eNp9k01rwzAMhv-K8S4ZtJCzDzuMLmWwwkh2KMQ7eImShiZ2sB1KKf3vs52PpsWNT7LySHqlyBeciRwwwUUtTtmBSY2-YsopR8MpQUfAdCdBBekWNBpvv6-EkFICzGAtWcUTDW3wYy20M7lr5QGBK2j-anBAkH4M1z6grnjpy17xAiTwDII07jj6HK8-VnVZBspITnpjztyoVkLLJOy3Qfrdm6gQEu2370Io7WLORo84PbRoA_oOl9BBg4UHbHR58UkMWq_fxjrOnhLRx1nH0SgkjlBjh7ekxNKGc0NelDLknhePI8qf7MVNr_31nm1wwNTeM2Ao6pmf-3y3Mp7WlqA7twOnXfKs17zt-6azmim1gQL1A0NKS3EE8hKZE4Yezm3chIVFiFe4AdmwKjdv7mIjKNYHaIBiYsycySPFlF8NxzotkjPPMNGygxXu2pxp2FSslKzBpGC1Ml7IKy3krn_E7i1f_wEayTcn?type=png)](https://mermaid.live/edit#pako:eNp9k01rwzAMhv-K8S4ZtJCzDzuMLmWwwkh2KMQ7eImShiZ2sB1KKf3vs52PpsWNT7LySHqlyBeciRwwwUUtTtmBSY2-YsopR8MpQUfAdCdBBekWNBpvv6-EkFICzGAtWcUTDW3wYy20M7lr5QGBK2j-anBAkH4M1z6grnjpy17xAiTwDII07jj6HK8-VnVZBspITnpjztyoVkLLJOy3Qfrdm6gQEu2370Io7WLORo84PbRoA_oOl9BBg4UHbHR58UkMWq_fxjrOnhLRx1nH0SgkjlBjh7ekxNKGc0NelDLknhePI8qf7MVNr_31nm1wwNTeM2Ao6pmf-3y3Mp7WlqA7twOnXfKs17zt-6azmim1gQL1A0NKS3EE8hKZE4Yezm3chIVFiFe4AdmwKjdv7mIjKNYHaIBiYsycySPFlF8NxzotkjPPMNGygxXu2pxp2FSslKzBpGC1Ml7IKy3krn_E7i1f_wEayTcn) -You should also see a folder ```.catalog``` being created with a single folder corresponding to the run_id of this run. +### [loops or map](https://astrazeneca.github.io/magnus-core/concepts/map) +Execute a pipeline over an iterable parameter. +[![](https://mermaid.ink/img/pako:eNqVlF1rwjAUhv9KyG4qKNR-3AS2m8nuBgN3Z0Sy5tQG20SSdE7E_76kVVEr2CY3Ied9Tx6Sk3PAmeKACc5LtcsKpi36nlGZFbXciHwfLN79CuWiBLMcEULWGkBSaeosA2OCxbxdXMd89Get2bZASsLiSyuvQE2mJZXIjW27t2rOmQZ3Gp9rD6UjatWnwy7q6zPPukd50WTydmemEiS_QbQ79RwxGoQY9UaMuojRA8TCXexzyHgQZNwbMu5Cxl3IXNX6OWMyiDHpzZh0GZMHjOK3xz2mgxjT3oxplzG9MPp5_nVOhwJjteDwOg3HyFj3L1dCcvh7DUc-iftX18n6Waet1xX8cG908vpKHO6OW7cvkeHm5GR2b3drdvaSGTODHLW37mxabYC8fLgRhlfxpjNdwmEets-Dx7gCXTHBXQc8-D2KbQEVUEzckjO9oZjKo9Ox2qr5XmaYWF3DGNdbzizMBHOVVWGSs9K4XeDCKv3ZttSmsx7_AYa341E?type=png)](https://mermaid.live/edit#pako:eNqVlF1rwjAUhv9KyG4qKNR-3AS2m8nuBgN3Z0Sy5tQG20SSdE7E_76kVVEr2CY3Ied9Tx6Sk3PAmeKACc5LtcsKpi36nlGZFbXciHwfLN79CuWiBLMcEULWGkBSaeosA2OCxbxdXMd89Get2bZASsLiSyuvQE2mJZXIjW27t2rOmQZ3Gp9rD6UjatWnwy7q6zPPukd50WTydmemEiS_QbQ79RwxGoQY9UaMuojRA8TCXexzyHgQZNwbMu5Cxl3IXNX6OWMyiDHpzZh0GZMHjOK3xz2mgxjT3oxplzG9MPp5_nVOhwJjteDwOg3HyFj3L1dCcvh7DUc-iftX18n6Waet1xX8cG908vpKHO6OW7cvkeHm5GR2b3drdvaSGTODHLW37mxabYC8fLgRhlfxpjNdwmEets-Dx7gCXTHBXQc8-D2KbQEVUEzckjO9oZjKo9Ox2qr5XmaYWF3DGNdbzizMBHOVVWGSs9K4XeDCKv3ZttSmsx7_AYa341E) -To understand more about the input and output, please head over to the -[documentation](https://project-magnus.github.io/magnus-core/). +### [Arbitrary nesting](https://astrazeneca.github.io/magnus-core/concepts/nesting/) diff --git a/docs/.DS_Store b/docs/.DS_Store new file mode 100644 index 00000000..b996586c Binary files /dev/null and b/docs/.DS_Store differ diff --git a/docs/RELEASES.md b/docs/RELEASES.md deleted file mode 100644 index 6a7286c8..00000000 --- a/docs/RELEASES.md +++ /dev/null @@ -1,113 +0,0 @@ -# Release history - -## v0.4.1 - -- Bug fix with MLflow creeping in. -- Improved documentation. -- Improved CI process. -## v0.4.0 (2023-02-09) - -- Added Experiment tracking interface -- Added Python SDK support -- Added configuration validation via Pydantic - - -## v0.3.11 (2022-07-12) - -- Added a env-secrets-manager which gets its secrets from environment, Issue 58 - - -## v0.3.10 (2022-06-30) - -- Local container mode can accept a config parameter to allow it to run local system, Issue 52 - -## v0.3.9 (2022-06-29) - -- Bug fix in catalog get function, Issue 54. -- Enhancement of sending log levels to different executing environments, Issue 53 -- Map variable sent in as environment variable to non-shell tasks, Issue 51 - -## v0.3.8 (2022-06-29) - -- Exposing secrets as environment variables if the user requested via secret_as_env in the config. - The key of the secret_as_env should be the name of the environment variable while the value should be a secret name. - -## v0.3.7 (2022-06-27) - -- Fixing a bug with empty placeholders - -## v0.3.6 (2022-06-25) - -- Run ID is exposed to the user as interaction function or via environment variable MAGNUS_RUN_ID - -## v0.3.5 (2022-05-24) - -- Cataloging happens w.r.t to source location for flexibility - -## v0.3.4 (2022-05-19) - -- Bug fix with JSON decoding of parameters - -## v0.3.3 (2022-04-23) - -- Bug fix with executor interactions with run log store parameters. - -## v0.3.2 (2022-04-23) - -- Added the functionality of build_docker to the CLI. - -## v0.3.1 (2022-04-23) - -- Bug fix with executor interactions with run log store parameters. - -## v0.3.0 (2022-03-30) - -- Supporting execute_step which executes only a single node of the pipeline - -## v0.2.4 (2022-03-28) - -- Bug fix with optional git commit identifiers. - - -## v0.2.3 (2022-03-18) - -- local container mode has a provision to send in environment variables to the container from the config. - -## v0.2.2 (2022-02-23) - -- Enabling configurations to have placeholders that individual nodes can use to over-ride. -- Provided API in the executor to resolve the effective config by using global and local config - -## v0.2.1 (2022-02-22) - -- Updated docs to clarify the extension capabilities of the CLI and nodes -- Removed demo-renderer argument parsing block as parameters come from parameters - -## v0.2 (2022-02-22) - -- Moved magnus CLI to click. -- magnus command group can be extended to add more commands by other packages. - -Breaking changes: - -- Contextualized config parameters for executors -- Parameters to be sent in via parameters file - -## v0.1.2 (2022-02-08) - -- Command config provided for all command types to pass in additional configuration. -- Moved to plugin based model using stevedore for nodes and tasks. -- Added support for notebooks as command types with optional install of papermill. - -## v0.1.1 (2022-02-03) - -- Bug fix with demo-renderer and as-is -- Moved to plugin based model using stevedore for executor, run log store, catalog, secrets and integrations - -## v0.1.0 (2022-01-21) - -- First release to open source. -- Compute: local, local-container, demo-renderer -- Run log store: local, buffered. -- Catalog: local, do-nothing. -- Secrets: dotenv, do-nothing. diff --git a/docs/assets/screenshots/argo-expose-parameters.png b/docs/assets/screenshots/argo-expose-parameters.png new file mode 100644 index 00000000..72bb4ddc Binary files /dev/null and b/docs/assets/screenshots/argo-expose-parameters.png differ diff --git a/docs/assets/screenshots/argo-kubeflow-exec.png b/docs/assets/screenshots/argo-kubeflow-exec.png new file mode 100644 index 00000000..9b2d624b Binary files /dev/null and b/docs/assets/screenshots/argo-kubeflow-exec.png differ diff --git a/docs/assets/screenshots/argo-kubeflow-ui.png b/docs/assets/screenshots/argo-kubeflow-ui.png new file mode 100644 index 00000000..e1f08d78 Binary files /dev/null and b/docs/assets/screenshots/argo-kubeflow-ui.png differ diff --git a/docs/assets/screenshots/argo-nested.png b/docs/assets/screenshots/argo-nested.png new file mode 100644 index 00000000..9e9323d4 Binary files /dev/null and b/docs/assets/screenshots/argo-nested.png differ diff --git a/docs/assets/screenshots/argo-parallel-map.png b/docs/assets/screenshots/argo-parallel-map.png new file mode 100644 index 00000000..9f20cd49 Binary files /dev/null and b/docs/assets/screenshots/argo-parallel-map.png differ diff --git a/docs/assets/screenshots/argo-sequential-map.png b/docs/assets/screenshots/argo-sequential-map.png new file mode 100644 index 00000000..f69ab5f6 Binary files /dev/null and b/docs/assets/screenshots/argo-sequential-map.png differ diff --git a/docs/assets/screenshots/argo-workflows-gant.png b/docs/assets/screenshots/argo-workflows-gant.png new file mode 100644 index 00000000..45f756d9 Binary files /dev/null and b/docs/assets/screenshots/argo-workflows-gant.png differ diff --git a/docs/assets/screenshots/argo-workflows-logs.png b/docs/assets/screenshots/argo-workflows-logs.png new file mode 100644 index 00000000..feb7af58 Binary files /dev/null and b/docs/assets/screenshots/argo-workflows-logs.png differ diff --git a/docs/assets/screenshots/mlflow.png b/docs/assets/screenshots/mlflow.png new file mode 100644 index 00000000..eb3a9983 Binary files /dev/null and b/docs/assets/screenshots/mlflow.png differ diff --git a/docs/assets/screenshots/mlflow_example.png b/docs/assets/screenshots/mlflow_example.png new file mode 100644 index 00000000..0dfd0fa2 Binary files /dev/null and b/docs/assets/screenshots/mlflow_example.png differ diff --git a/docs/assets/screenshots/mlflow_step.png b/docs/assets/screenshots/mlflow_step.png new file mode 100644 index 00000000..e1abcfb3 Binary files /dev/null and b/docs/assets/screenshots/mlflow_step.png differ diff --git a/docs/assets/screenshots/notebook_api_parameters.png b/docs/assets/screenshots/notebook_api_parameters.png new file mode 100644 index 00000000..28df7e51 Binary files /dev/null and b/docs/assets/screenshots/notebook_api_parameters.png differ diff --git a/docs/assets/screenshots/notebook_env_parameters.png b/docs/assets/screenshots/notebook_env_parameters.png new file mode 100644 index 00000000..358ad2c2 Binary files /dev/null and b/docs/assets/screenshots/notebook_env_parameters.png differ diff --git a/docs/assets/screenshots/notebook_input_parameters.png b/docs/assets/screenshots/notebook_input_parameters.png new file mode 100644 index 00000000..92508c3c Binary files /dev/null and b/docs/assets/screenshots/notebook_input_parameters.png differ diff --git a/docs/assets/screenshots/notebook_native_parameters.png b/docs/assets/screenshots/notebook_native_parameters.png new file mode 100644 index 00000000..fe700c07 Binary files /dev/null and b/docs/assets/screenshots/notebook_native_parameters.png differ diff --git a/docs/assets/screenshots/notebook_output_parameters.png b/docs/assets/screenshots/notebook_output_parameters.png new file mode 100644 index 00000000..a0dcaa1f Binary files /dev/null and b/docs/assets/screenshots/notebook_output_parameters.png differ diff --git a/docs/assets/screenshots/simple_notebook.png b/docs/assets/screenshots/simple_notebook.png new file mode 100644 index 00000000..9e4772cc Binary files /dev/null and b/docs/assets/screenshots/simple_notebook.png differ diff --git a/docs/assets/whatdo.png b/docs/assets/whatdo.png new file mode 100644 index 00000000..77f2ce04 Binary files /dev/null and b/docs/assets/whatdo.png differ diff --git a/docs/command-line.md b/docs/command-line.md deleted file mode 100644 index f047f214..00000000 --- a/docs/command-line.md +++ /dev/null @@ -1,294 +0,0 @@ -# Command line options - -## Executing a pipeline - -You can execute a pipeline by the following command: - -```shell -magnus execute -``` - ---- -!!! Note - - For the above command to work, make sure you are in the environment where magnus was installed. - - If you are using poetry, you can also invoke magnus by ```poetry run magnus execute``` ---- - -The complete options available are: - -``` -Usage: magnus execute [OPTIONS] - - Entry point to executing a pipeline. This command is most commonly used - either to execute a pipeline or to translate the pipeline definition to - another language. - - You can re-run an older run by providing the run_id of the older run in - --use-cached. Ensure that the catalogs and run logs are accessible by the - present configuration. - -Options: - -f, --file TEXT The pipeline definition file [default: - pipeline.yaml] - -c, --config-file TEXT config file, in yaml, to be used for the run - -p, --parameters-file TEXT Parameters, in yaml, accessible by the - application - --log-level [INFO|DEBUG|WARNING|ERROR|FATAL] - The log level [default: WARNING] - --tag TEXT A tag attached to the run - --run-id TEXT An optional run_id, one would be generated - if not provided - --use-cached TEXT Provide the previous run_id to re-run. - --help Show this message and exit. -``` - -### Dag definition/config - -The file containing the dag definition and the config to be used. - -Provided by ```-f```, ```--file``` option on magnus cli. - -Defaults to ```pipeline.yaml``` if nothing is provided. - - -### Configurations file - -The yaml file containing the configurations used to run magnus. The configurations provided here would over-ride any -configuration variables. - -Provided by ```-c```, ```--config-file``` option on magnus cli. - -Defaults to None, if nothing is provided. -Read more about different ways you can configure magnus runs here. - - - -### Parameters file - -The yaml file containing the initial set of parameters that the application can access. Individual steps of the -pipeline can still add/update parameters as required. - -Provided by ```-p```, ```--parameters-file``` option to magnus cli. - -Defaults to None, if nothing is provided. -You can also pass parameters by environmental variables prefixed by ```MAGNUS_PRM_``` - -### Log level - -To control the logging level of magnus only. This does not affect your application logs what so ever. - -Provided by ```--log-level``` option on magnus cli. - -Available options are: DEBUG, INFO, WARNING, ERROR, CRITICAL. - -Defaults to INFO if nothing is provided. - -### Tag - -A friendly way to tag experiments or runs together. - -Provided by ```--tag``` option on magnus cli. - -Defaults to None if nothing is provided. - -### Run id - -An unique run identifier for the run. - -Provided by ```--run-id``` on magnus cli. - -We generate one based on Timestamp if one is not provided. - - -### Use cached - -Enables you to re-run a previous run provided by the run-id. - -Example: - -```shell -magnus execute --file example.yaml --run-id 20210506051758 --use-cached old_run_id -``` - -## Executing a Jupyter notebook - -This method could be used to run a Jupyter notebook in any environment. - -The complete options are: - -``` - -Usage: magnus execute_notebook [OPTIONS] FILENAME - - Entry point to execute a Jupyter notebook in isolation. - - The notebook would be executed in the environment defined by the config file or default if none. - -Options: - -c, --config-file TEXT config file, in yaml, to be used for the run - -p, --parameters-file TEXT Parameters, in yaml, accessible by the - application - --log-level [INFO|DEBUG|WARNING|ERROR|FATAL] - The log level [default: WARNING] - -d, --data-folder TEXT The catalog data folder - -put, --put-in-catalog TEXT The data to put from the catalog - --tag TEXT A tag attached to the run - --run-id TEXT An optional run_id, one would be generated - if not provided - --help Show this message and exit. - -``` - -## Executing a python function - -This method could be used to run a python function in any environment. - -The complete options are: - -``` -Usage: magnus execute_function [OPTIONS] COMMAND - - Entry point to execute a python function in isolation. - - The function would be executed in the environment defined by the config file - or default if none. - -Options: - -c, --config-file TEXT config file, in yaml, to be used for the run - -p, --parameters-file TEXT Parameters, in yaml, accessible by the - application - --log-level [INFO|DEBUG|WARNING|ERROR|FATAL] - The log level [default: WARNING] - -d, --data-folder TEXT The catalog data folder - -put, --put-in-catalog TEXT The data to put from the catalog - --tag TEXT A tag attached to the run - --run-id TEXT An optional run_id, one would be generated - if not provided - --help Show this message and exit. -``` - -## Executing a single step - -This method could be used to run a single step in isolation. - -The complete options are: - -``` -Usage: magnus execute_step [OPTIONS] STEP_NAME - - Entry point to executing a single step of the pipeline. - - This command is helpful to run only one step of the pipeline in isolation. - Only the steps of the parent dag could be invoked using this method. - - You can re-run an older run by providing the run_id of the older run in - --use-cached. Ensure that the catalogs and run logs are accessible by the - present configuration. - - When running map states, ensure that the parameter to iterate on is - available in parameter space. - -Options: - -f, --file TEXT The pipeline definition file [default: - pipeline.yaml] - -c, --config-file TEXT config file, in yaml, to be used for the run - -p, --parameters-file TEXT Parameters, in yaml, accessible by the - application - --log-level [INFO|DEBUG|WARNING|ERROR|FATAL] - The log level [default: WARNING] - --tag TEXT A tag attached to the run - --run-id TEXT An optional run_id, one would be generated - if not provided - --use-cached TEXT Provide the previous run_id to re-run. - --help Show this message and exit.``` - -``` - -The options have the same meaning as executing a pipeline. - -**Design thought:** This method could be handy to debug a single node of the pipeline or run a single step of the pipeline -in other environments by changing the config. - - -## Building docker images - -This method is a utility tool to assist in building docker images. - -It is preferred that you have a docker file that you can provide to the utility tool using the ```-f/--docker-file``` -option. We can auto-generate a opinionated dockerfile but it is unlikely to fit your needs perfectly. - -For the auto-generation of the dockerfile: - -- You can provide the style of dependency management. Currently, poetry, pipenv are supported. Any other would revert -to using requirements.txt dependency style. -- The base image is python 3.7 -- By default, we add only git tracked contents into the ```app``` folder of the image. But you can over-ride it -with ```--all``` option to add all content to the image. - -Please be aware that using ```--all``` might add sensitive data into the docker image. - -The options available are: - -``` -Usage: magnus build_docker [OPTIONS] IMAGE_NAME - - A utility function to create docker images from the existing codebase. - - It is advised to provide your own dockerfile as much as possible. If you do - not have one handy, you can use --dry-run functionality to see if the auto- - generated one suits your needs. - - If you are auto-generating the dockerfile: BEWARE!! Over-riding the default - options assumes you know what you are doing! BEWARE!! - - 1). By default, only git tracked files are added to the docker image. - - 2). The auto-generated dockerfile uses, python 3.7 as the default image and - adds the current folder. - -Options: - -f, --docker-file TEXT The dockerfile to be used. If None, we generate one - -s, --style TEXT The method used to get requirements [default: - poetry] - -t, --tag TEXT The tag assigned to the image [default: latest] - -c, --commit-tag Use commit id as tag. Over-rides tag option - [default: False] - -d, --dry-run Generate the dockerfile, but NOT the image - [default: False] - --git-tracked / --all Controls what should be added to image. All vs git- - tracked [default: git-tracked] - --help Show this message and exit. -``` - - - -## Extensions - -Magnus internally uses click to perform CLI operations and base command is given below. - -```python -@with_plugins(iter_entry_points('magnus.cli_plugins')) -@click.group() -@click.version_option() -def cli(): - """ - Welcome to magnus. Please provide the command that you want to use. - All commands have options that you can see by magnus --help - """ - pass - -``` - -You can provide custom extensions to the command line capabilities by extending the namespace ```magnus.cli_plugins``` - -```toml -# For example, as part of your pyproject.toml -[tool.poetry.plugins."magnus.cli_plugins"] -"aws-ecr = "YOUR_PACKAGE:push_to_ecr" -``` - -This extension than can be used as - -```magnus aws-ecr ``` diff --git a/docs/concepts/catalog-implementations/do-nothing.md b/docs/concepts/catalog-implementations/do-nothing.md deleted file mode 100644 index f4a15c10..00000000 --- a/docs/concepts/catalog-implementations/do-nothing.md +++ /dev/null @@ -1,18 +0,0 @@ -# Do nothing catalog provider - -Use this catalog provider if you do not want to use the cataloging functionality. - -The complete configuration: -```yaml -catalog: - type: do-nothing - -``` - -The individual steps could have ```get``` and ```put``` phases but since the catalog handler does nothing, these files -would not be cataloged. - - -## Design thought - -Use this catalog type to temporarily switch of cataloging in local executor for debugging purposes. diff --git a/docs/concepts/catalog-implementations/file-system.md b/docs/concepts/catalog-implementations/file-system.md deleted file mode 100644 index df293cf2..00000000 --- a/docs/concepts/catalog-implementations/file-system.md +++ /dev/null @@ -1,95 +0,0 @@ -# FileSystem - -This catalog provider uses local file system to store/retrieve the data generated by intermediate steps of the dag. - -The complete configuration: -```yaml -catalog: - type: file-system - config: - compute_data_folder : - catalog_location: - -``` - -## compute_data_folder - -Catalog would need a local compute data folder to get/put the contents. -This is defaulted to ```data``` folder if nothing is provided. - -Individual steps of the dag could over-ride this global default. - -Example - -```yaml -# In config.yaml -catalog: - type: file-system - config: - compute_data_folder : data/ - -# in pipeline.yaml -dag: - start_at: Cool function - steps: - Cool function: - type: task - command: my_module.my_cool_function - next: Success - catalog: - compute_data_folder: data/processed_data/ - get: - - '*' - put: - - '*' - Success: - type: success - Fail: - type: fail -``` - -or via the Python SDK: - -```python -from magnus import Task, Pipeline - -def pipeline(): - catalog = { - 'compute_data_folder': 'data/processed_data/', - 'get': ['*'], - 'put': ['*'] - } - first = Task(name='Cool function', command='my_module.my_cool_function', catalog=catalog) - -``` - -In this example, while the global default of the ```compute_data_folder``` is ```data/```, the step ```Cool function``` -stored the generated data in ```data/processed_data/``` and this would be used for cataloging. - -The same can also be achieved by using a glob pattern of ```processed_data/*``` if you prefer. - -## catalog_location - -By default, the catalog would be stored at ```.catalog``` of the root directory of the project. You can override this -by providing an alternate location. - -Example: -```yaml -catalog: - type: file-system - config: - catalog_location: /tmp/data -``` - -Would start using ```/tmp/data/``` to catalog the data. - - ---- -!!! Note - - FileSystem catalog is only applicable if all of the steps are on the same compute. - - To re-run an older run on a different compute, you can simply place the files in the catalog location of the re-run - if its not centrally accessible. - ---- diff --git a/docs/concepts/catalog.md b/docs/concepts/catalog.md index 2ed71532..a7ae7c98 100644 --- a/docs/concepts/catalog.md +++ b/docs/concepts/catalog.md @@ -1,224 +1,482 @@ -# Overview +!!! note "Opt out" -Catalog provides a way to store and retrieve data generated by the individual steps of the dag either to downstream -steps of the dag. Catalog also provides a way to reproduce a historic magnus run on any other machine. Along with -the actual file, we also store the SHA id of the data catalog object in the logs to enable diagnostics. + Pipelines need not use the ```catalog``` if they prefer other ways to transfer + data between tasks. The default configuration of ```do-nothing``` is no-op by design. + We kindly request to raise a feature request to make us aware of the eco-system. -Magnus stores the data generated for every run in the catalog indexed by the unique run_id of the run. This enables -you to re-run an older run and debug in case of any errors with the actual datasets used. +Catalog provides a way to store and retrieve data generated by the individual steps of the dag to downstream +steps of the dag. It can be any storage system that indexes its data by a unique identifier. ---- -!!! Note +For example, a local directory structure partitioned by a ```run_id``` or S3 bucket prefixed by ```run_id```. - Since the data is stored per-run, it might cause the catalog to inflate a lot. Please consider some clean up - mechanisms to regularly prune runs that are no longer relevant. ---- +!!! tip inline end "Checkpoint" -As with all services of magnus, there are several providers of catalog and you can easily extend to create your own -cataloging system and use it in your runs. + Cataloging happens even if the step execution eventually fails. This behavior + can be used to recover from a failed run from a checkpoint. -## Configuration -Configuring the catalog can be done as follows. -```yaml -catalog: - type: - config: -``` - -### type - -The type of catalog you want. This should be one of the catalog types already available. - -By default FileSystem Catalog is given if no config is provided. - -### config - -Any configuration variables accepted by the catalog provider. - -## Configuration within Step - -Within a step of the dag, the catalog can be configured by - -```yaml -catalog: - ... +The directory structure within a partition is the same as the project directory structure. This enables you to +get/put data in the catalog as if you are working with local directory structure. Every interaction with the catalog +(either by API or configuration) results in an entry in the [```run log```](/concepts/run-log/#step_log) -dag: - steps: - step name: - ... - catalog: - compute_data_folder: # optional - get: - - list - put: - - list +Internally, magnus also uses the catalog to store execution logs of tasks i.e stdout and stderr from +[python](/concepts/task/#python) or [shell](/concepts/task/#shell) and executed notebook from [notebook tasks](/concepts/task/#notebook). - ... -``` +Since the catalog captures the data files flowing through the pipeline and the execution logs, it enables you +to debug failed pipelines or keep track of data lineage. -or via the Python SDK: -```python -from magnus import Task -catalog = { - 'compute_data_folder': '' # point to the directory from where the data should be extracted - 'get': [], # All the files to get from the catalog - 'put': [] # All the files to put in the catalog -} -first = Task(name='Cool function', command='my_module.my_cool_function', catalog=catalog) +!!! warning "Storage considerations" -``` + Since the data is stored per-run, it might cause the catalog to inflate. -### compute_data_folder + Please consider some clean up + mechanisms to regularly prune catalog for executions that are not relevant. -The ```compute_data_folder``` for a single step could be different from the global ```compute_data_folder``` -and you can provide it by using the catalog settings for that step. -The actual cataloging is done in two stages: -- get: Get the data mentioned in the ```get``` from the catalog to ```compute_data_folder``` before executing the node. -- put: Store all the data mentioned in ```put``` from the ```compute_data_folder``` to catalog after executing the node. -Both ```get``` and ```put``` can accept glob patterns. Internally we use -[Pathlib match function](https://docs.python.org/3/library/pathlib.html#pathlib.PurePath.match) -to match the name to pattern. +## Example ---- -!!! Note - The ```put``` stage of the cataloging checks if the data source has been obtained from ```get``` phase and - only puts a new record if there were changes observed during the execution of the node. ---- -## Interaction within code +=== "Configuration" -You can also interact with the catalog within your python programs if it is convenient than providing it in yaml. + Below is a sample configuration that uses the local file system as a catalog store. + The default location of the catalog is ```.catalog``` and is configurable. ---8<-- [start:how-do-i-pass-data] + Every execution of the pipeline will create a sub-directory of name ```run_id``` to store the artifacts + generated from the execution of the pipeline. -### Get from catalog + ```yaml + --8<-- "examples/configs/fs-catalog.yaml" + ``` -To get a file from the catalog, use ```get_from_catalog``` from magnus. + 1. Use local file system as a central catalog, defaults to ```.catalog``` -For example, the below code gets the file ```interesting_data.csv``` from the catalog into ```data/``` folder. +=== "python sdk" + In the below example, the steps ```create_content_in_data_folder``` and ```create_content_in_another_folder``` + create content for downstream steps, i.e ```retrieve_content_from_both``` to consume. -```python -from magnus import get_from_catalog + !!! note "Delete?" -def my_function(): - get_from_catalog('interesting.csv', destination_folder='data/') + Since we are executing in local compute and creating sub-directory ```another```, it might be mistaken that + we are not cataloging anything. We delete ```another``` directory between steps + to demonstrate that we indeed move files in and out of the catalog. -``` + The highlighted lines in the below example show how to specify the files to get/put from the catalog using python SDK. -### Put in catalog + ```python linenums="1" hl_lines="44 52 68" + --8<-- "examples/concepts/catalog.py" + ``` -To put a file into the catalog, use ```put_in_catalog``` from magnus. +=== "yaml" -For example, the below code puts the file ```data/interesting_data.csv``` from the data folder into catalog. + In the below example, the steps ```data_create``` and ```another_create``` create content for + downstream steps, i.e ```retrieve``` to consume. + !!! note "Delete?" -```python -from magnus import put_in_catalog + Since we are executing in local compute and creating sub-directory ```another```, it might be mistaken that + we are not cataloging anything. We delete ```another``` directory between steps + to demonstrate that we indeed move files in and out of the catalog. -def my_function(): - put_in_catalog('data/interesting.csv') + The highlighted lines in the below example show how to specify the files to get/put from the catalog using + yaml. -``` ---- -!!! Note + ```yaml linenums="1" hl_lines="19-21 26-28 38-40" + --8<-- "examples/concepts/catalog.yaml" + ``` - Unlike ```put``` phase of the cataloging process, put_in_catalog does not check if the cataloging object has - changed and does a blind update. +!!! note "glob pattern" ---- + We use [glob pattern](https://docs.python.org/3/library/glob.html) to search for files. ---8<-- [end:how-do-i-pass-data] + Note that, the pattern to recursively match all directories is ```**/*``` -## Passing Data Objects ---8<-- [start:how-do-i-pass-objects] +The execution results in the ```catalog``` populated with the artifacts and the execution logs of the tasks. -While the is good for files, it is inconvenient to dump and load the object into files for the cataloging to happen. -Magnus provides utility functions to make it easier. -### Get object from catalog +=== "Directory structure" -To get a object from the catalog, use ```get_object``` from magnus. + The directory structure within the ```catalog``` for the execution, i.e meek-stonebraker-0626, resembles + the project directory structure. -For example, the below code gets a pandas dataframe from previous steps, called ```interesting_data``` from the catalog. - - -```python -from magnus import get_object - -def my_function(): - df = get_object("interesting_data") + The execution logs of all the tasks are also present in the ```catalog```. + ``` + >>> tree .catalog + .catalog + └── meek-stonebraker-0626 + ├── another + │   └── world.txt + ├── create_content_in_another_folder.execution.log + ├── create_content_in_data_folder.execution.log + ├── data + │   └── hello.txt + ├── delete_another_folder.execution.log + └── retrieve_content_from_both.execution.log + + 4 directories, 6 files + ``` + +=== "Run log" + + The run log captures the data identities of the data flowing through the catalog. + + + ```json linenums="1" hl_lines="38-53 84-99 169-191" + { + "run_id": "meek-stonebraker-0626", + "dag_hash": "", + "use_cached": false, + "tag": "", + "original_run_id": "", + "status": "SUCCESS", + "steps": { + "create_content_in_data_folder": { + "name": "create_content_in_data_folder", + "internal_name": "create_content_in_data_folder", + "status": "SUCCESS", + "step_type": "task", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "6029841c3737fe1163e700b4324d22a469993bb0", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-01-06 06:26:56.279278", + "end_time": "2024-01-06 06:26:56.284564", + "duration": "0:00:00.005286", + "status": "SUCCESS", + "message": "", + "parameters": {} + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [ + { + "name": "create_content_in_data_folder.execution.log", + "data_hash": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", + "catalog_relative_path": "meek-stonebraker-0626/create_content_in_data_folder.execution.log", + "catalog_handler_location": ".catalog", + "stage": "put" + }, + { + "name": "data/hello.txt", + "data_hash": "6ccad99847c78bfdc7a459399c9957893675d4fec2d675cec750b50ab4842542", + "catalog_relative_path": "meek-stonebraker-0626/data/hello.txt", + "catalog_handler_location": ".catalog", + "stage": "put" + } + ] + }, + "create_content_in_another_folder": { + "name": "create_content_in_another_folder", + "internal_name": "create_content_in_another_folder", + "status": "SUCCESS", + "step_type": "task", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "6029841c3737fe1163e700b4324d22a469993bb0", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-01-06 06:26:56.353734", + "end_time": "2024-01-06 06:26:56.357519", + "duration": "0:00:00.003785", + "status": "SUCCESS", + "message": "", + "parameters": {} + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [ + { + "name": "create_content_in_another_folder.execution.log", + "data_hash": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", + "catalog_relative_path": "meek-stonebraker-0626/create_content_in_another_folder.execution.log", + "catalog_handler_location": ".catalog", + "stage": "put" + }, + { + "name": "another/world.txt", + "data_hash": "869ae2ac8365d5353250fc502b084a28b2029f951ea7da0a6948f82172accdfd", + "catalog_relative_path": "meek-stonebraker-0626/another/world.txt", + "catalog_handler_location": ".catalog", + "stage": "put" + } + ] + }, + "delete_another_folder": { + "name": "delete_another_folder", + "internal_name": "delete_another_folder", + "status": "SUCCESS", + "step_type": "task", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "6029841c3737fe1163e700b4324d22a469993bb0", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-01-06 06:26:56.428437", + "end_time": "2024-01-06 06:26:56.450148", + "duration": "0:00:00.021711", + "status": "SUCCESS", + "message": "", + "parameters": {} + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [ + { + "name": "delete_another_folder.execution.log", + "data_hash": "a9b49c92ed63cb54a8b02c0271a925d9fac254034ed45df83f3ff24c0bd53ef6", + "catalog_relative_path": "meek-stonebraker-0626/delete_another_folder.execution.log", + "catalog_handler_location": ".catalog", + "stage": "put" + } + ] + }, + "retrieve_content_from_both": { + "name": "retrieve_content_from_both", + "internal_name": "retrieve_content_from_both", + "status": "SUCCESS", + "step_type": "task", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "6029841c3737fe1163e700b4324d22a469993bb0", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-01-06 06:26:56.520948", + "end_time": "2024-01-06 06:26:56.530135", + "duration": "0:00:00.009187", + "status": "SUCCESS", + "message": "", + "parameters": {} + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [ + { + "name": "data/hello.txt", + "data_hash": "6ccad99847c78bfdc7a459399c9957893675d4fec2d675cec750b50ab4842542", + "catalog_relative_path": "data/hello.txt", + "catalog_handler_location": ".catalog", + "stage": "get" + }, + { + "name": "another/world.txt", + "data_hash": "869ae2ac8365d5353250fc502b084a28b2029f951ea7da0a6948f82172accdfd", + "catalog_relative_path": "another/world.txt", + "catalog_handler_location": ".catalog", + "stage": "get" + }, + { + "name": "retrieve_content_from_both.execution.log", + "data_hash": "0a085cb15df6c70c5859b44cc62bfdc98383600ba4f2983124375a4f64f1ae83", + "catalog_relative_path": "meek-stonebraker-0626/retrieve_content_from_both.execution.log", + "catalog_handler_location": ".catalog", + "stage": "put" + } + ] + }, + "success": { + "name": "success", + "internal_name": "success", + "status": "SUCCESS", + "step_type": "success", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "6029841c3737fe1163e700b4324d22a469993bb0", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-01-06 06:26:56.591948", + "end_time": "2024-01-06 06:26:56.592032", + "duration": "0:00:00.000084", + "status": "SUCCESS", + "message": "", + "parameters": {} + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [] + } + }, + "parameters": {}, + "run_config": { + "executor": { + "service_name": "local", + "service_type": "executor", + "enable_parallel": false, + "placeholders": {} + }, + "run_log_store": { + "service_name": "buffered", + "service_type": "run_log_store" + }, + "secrets_handler": { + "service_name": "do-nothing", + "service_type": "secrets" + }, + "catalog_handler": { + "service_name": "file-system", + "service_type": "catalog" + }, + "experiment_tracker": { + "service_name": "do-nothing", + "service_type": "experiment_tracker" + }, + "pipeline_file": "", + "parameters_file": "", + "configuration_file": "examples/configs/fs-catalog.yaml", + "tag": "", + "run_id": "meek-stonebraker-0626", + "variables": {}, + "use_cached": false, + "original_run_id": "", + "dag": { + "start_at": "create_content_in_data_folder", + "name": "", + "description": "", + "internal_branch_name": "", + "steps": { + "create_content_in_data_folder": { + "type": "task", + "name": "create_content_in_data_folder", + "internal_name": "create_content_in_data_folder", + "internal_branch_name": "", + "is_composite": false + }, + "create_content_in_another_folder": { + "type": "task", + "name": "create_content_in_another_folder", + "internal_name": "create_content_in_another_folder", + "internal_branch_name": "", + "is_composite": false + }, + "retrieve_content_from_both": { + "type": "task", + "name": "retrieve_content_from_both", + "internal_name": "retrieve_content_from_both", + "internal_branch_name": "", + "is_composite": false + }, + "delete_another_folder": { + "type": "task", + "name": "delete_another_folder", + "internal_name": "delete_another_folder", + "internal_branch_name": "", + "is_composite": false + }, + "success": { + "type": "success", + "name": "success", + "internal_name": "success", + "internal_branch_name": "", + "is_composite": false + }, + "fail": { + "type": "fail", + "name": "fail", + "internal_name": "fail", + "internal_branch_name": "", + "is_composite": false + } + } + }, + "dag_hash": "", + "execution_plan": "chained" + } + } + ``` + + + +## Using python API + +Files could also be cataloged using [python API](/interactions) + + +This functionality is possible in [python](/concepts/task/#python_functions) +and [notebook](/concepts/task/#notebook) tasks. + +```python linenums="1" hl_lines="11 23 35 45" +--8<-- "examples/concepts/catalog_api.py" ``` -Be aware that, the function would raise an exception if ```interesting_data``` was not added to catalog before. - -### Put object in catalog -To put a object into the catalog, use ```put_object``` from magnus. -For example, the below code puts the dataframe ```interesting_data``` into the catalog as ```interesting_data.pickle```. +## Passing Data Objects -```python -from magnus import put_object - -def my_function(): - put_object(data=interesting_data, name="interesting_data") - -``` - ---- -!!! Note - - We internally use pickle for the serialization and deserialization. Please raise a feature request if you need - other kind of serializers. - ---- - ---8<-- [end:how-do-i-pass-objects] - -## Parameterized definition - -As with any part of the magnus configuration, you can parameterize the configuration of catalog to switch between -catalog providers without changing the base definition. - -Please follow the example provided [here](../dag/#parameterized_definition) for more information. - - -## Extensions - -You can easily extend magnus to bring in your custom provider, if a default -implementation does not exist or you are not happy with the implementation. - -[Extensions are being actively developed and can be found here.](https://github.com/AstraZeneca/magnus-extensions) - -The ```BaseCatalog``` implementation is as follows: +Data objects can be shared between [python](/concepts/task/#python_functions) or [notebook](/concepts/task/#notebook) tasks, +instead of serializing data and deserializing to file structure, using +[get_object](/interactions/#magnus.get_object) and [put_object](/interactions/#magnus.put_object). -```python -# You can find the source code in magnus/catalog.py ---8<-- "magnus/catalog.py:docs" +Internally, we use [pickle](https:/docs.python.org/3/library/pickle.html) to serialize and +deserialize python objects. Please ensure that the object can be serialized via pickle. -``` +### Example +In the below example, the step ```put_data_object``` puts a pydantic object into the catalog while the step +```retrieve_object``` retrieves the pydantic object from the catalog and prints it. -The custom extensions should be registered as part of the namespace: ```catalog``` for it to be -loaded. +You can run this example by ```python run examples/concepts/catalog_object.py``` -```toml -# For example, as part of your pyproject.toml -[tool.poetry.plugins."catalog"] -"gfs" = "YOUR_PACKAGE:GFStorage" +```python linenums="1" hl_lines="10 30 38" +--8<-- "examples/concepts/catalog_object.py" ``` diff --git a/docs/concepts/command-types.md b/docs/concepts/command-types.md deleted file mode 100644 index 90b859ff..00000000 --- a/docs/concepts/command-types.md +++ /dev/null @@ -1,162 +0,0 @@ -# Command types - -## Python - -By default, ```python``` is the command type. You can mention the python function that you want to invoke -using the ```command``` section. - -For example, in the dag definition below, the command type is defaulted to python and magnus invokes -```my_module.my_function``` as part of the step. - -```yaml -dag: - steps: - step1: - command: my_module.my_function - ... -``` - -Or via the python SDK: - -```python -from magnus import Task - -first = Task(name='step1', command='my_module.my_function') -``` - -The function arguments are dynamically introspected from the parameter space. - -The return value of the function should always be a dictionary for parameters and are added as key-value pairs -into the parameter space. Non dictionary arguments are ignored with a warning. - -More [examples](../../examples) - -Any console output from the function is automatically uploaded to the catalog for future reference. - - -## Shell - -You can have shell commands as part of magnus dag definition. The ```command``` provided in the config is -invoked as part of the step. - -For example, in the dag definition below, ```step``` invokes the ```ls``` command as part of the pipeline. -You can use this ```command_type``` to have non-python executables as part of your pipeline. - -```yaml -dag: - steps: - step1: - command: ls - command_type: shell - ... -``` - -Or via the python SDK: - -```python -from magnus import Task - -first = Task(name='step1', command='ls', command_type='shell') -``` - - -Please note that, magnus will be able to send in the existing parameters using environmental variables prefixed with -```MAGNUS_PRM_``` but would not be able to collect any return parameters. Similarly, the functionality of -secrets should be handled by the ```script``` and would not be done by magnus. - -The cataloging functionality works as designed and can be used to sync data in and out the ```compute_data_folder```. - -More [examples](../../examples) - -## Python lambda expression - -Using ```command_type: python-lambda```, you can provide a lambda expression as ```command```. For example: - -``` -lambda x : int(x) + 1 -``` - -Or via the python SDK: - -```python -from magnus import Task - -first = Task(name='step1', command='lambda x : int(x) + 1', command_type='python-lambda') -``` - - -is a valid lambda expression. Note that, you cannot have ```_```or ```__``` as part of your string. This is just a -security feature to -[avoid malicious code injections](https://nedbatchelder.com/blog/201206/eval_really_is_dangerous.html). - -The parameters used as part of the lambda expression are introspected and provided dynamically from the parameter space. - -This command type is designed to provide simpler ways to manipulate parameter space. - -## Notebook - -In magnus, you can execute Jupyter notebooks by ```command_type: notebook```. The ```command``` should be the path -to the notebook you want to execute. - ---- -!!! Note - - For ```command_type: notebook``` to work, you need to install optional packages by: - - pip install magnus[notebook] ---- - - -Internally, we use [ploomber](https://ploomber.io/) for inspection and execution -of the notebook. Any ```parameters``` defined in the notebook would be introspected and dynamically provided at runtime -from the parameter space. - -The path of the output of execution is obtained by post-fixing ```_out``` to the input notebook but can be configured -by ```command_config``` as shown below. - -```yaml -dag: - steps: - step1: - command: notebooks/input.ipynb - command_type: notebook - command_config: - notebook_output_path: notebooks/output.ipynb - ... -``` - -Or via the python SDK: - -```python -from magnus import Task - -first = Task(name='step1', command='notebooks/input.ipynb', command_type='notebook', - command_config={'notebook_output_path': 'notebooks/output.ipynb'}) -``` - -Since the kernel used is the same as the execution environment via ploomber, anything that you can do via the python -function should be available via the notebook. - -The output notebook is automatically uploaded to the catalog for future reference. - -## Extensions - -You can extend and implement your ```command_types``` by extending the base class of the command type. - -[Extensions are being actively developed and can be found here.](https://github.com/AstraZeneca/magnus-extensions) - -```python -#Example implementations can be found in magnus/tasks.py ---8<-- "magnus/tasks.py:docs" - -``` - -The custom extensions should be registered as part of the namespace: ```tasks``` for it to be -loaded. - -```toml -# For example, as part of your pyproject.toml -[tool.poetry.plugins."tasks"] -"sql" = "YOUR_PACKAGE:SQLtaskType" - -``` diff --git a/docs/concepts/dag.md b/docs/concepts/dag.md index b81f1cd0..e69de29b 100644 --- a/docs/concepts/dag.md +++ b/docs/concepts/dag.md @@ -1,155 +0,0 @@ -# Dag - -Dag or directed acyclic graphs are a way to define your work flows. Its a graph representation of the series of -tasks you want to perform and the order of it. - -In magnus, a lot of design emphasis was on making sure that a dag once defined should not change for -deployment purposes. The dag is also version controlled and as part of your code repositories to promote good -software engineering practices. These design decisions should enable experimentation to happen in -*interactive* modes while engineering teams can use their preferred **Continuos Integration** tools to -operationalize experiments once they are mature without changing code or the dag. - -We see the dag as a **contract** between the engineering teams and data science teams. While the data science teams -can focus on **what** should be the part of the pipeline, the engineering teams can focus on the -**how** to operationalize it. - -The configuration of a dag: -```yaml -dag: - start_at: - name: # optional - description: #optional - max_time: # Optional - steps: -``` - -### description (optional) - -A short description of the dag - -### max_time (optional) - -The maximum compute time that this dag could run in seconds. - ---- -!!! Note - - Maximum run time is just a definition in the dag and the actual implementation depends upon the mode of execution. - - For example, interactive modes can completely ignore the maximum run time. - - Orchestration modes might have a default if one is not provided. For example: AWS step functions defaults maximum run time for a a state machine to be 86400 seconds. ---- - -### start_at - -The node/step in the steps to start the traversal of the graph. -A node of this name should be present in the steps. - -### steps - -A mapping of steps with each step belonging to one of the [defined types](nodes.md). - -## Example -Assuming this is in dag-concepts.yaml -```yaml -# in dag-concepts.yaml -dag: - start_at: Hello - steps: - Hello: - type: task - command: my_module.say_hello - next: Success - Success: - type: success - Fail: - type: fail -``` - -or via the Python SDK: - -```python -# in pipeline.py -from magnus import Task, Pipeline - -def pipeline(): - first = Task(name='Hello', command='my_module.say_hello') - pipeline = Pipeline(start_at=first, name='dag-concepts') - pipeline.construct([first]) - - pipeline.execute() - -if __name__ == '__main__': - pipeline() -``` - -And the following code in my_module.py -```python -# in my_module.py - -def say_hello(name=world): - print(f'Hello {name}') -``` - -We can execute the dag by: -```shell -export MAGNUS_PRM_name=universe -magnus execute --file dag-concepts.yaml -``` - -or via the Python SDK: -``` -python pipeline.py -``` - -You should be able to see ```Hello universe``` in the logs. - -## Parameterized Definition - ---8<-- [start:how-do-i-parameterize] - -Magnus allows dag definitions to be parameterized by using placeholders. We use [python String templates](https://docs.python.org/3.7/library/string.html#template-strings) to enable parameter substitution. As we use, [safe_substitution](https://docs.python.org/3.7/library/string.html#string.Template.safe_substitute) it means that we silently ignore any parameter that is not found. -You should make sure that the parameters are properly defined. - -### Example of variables -Assuming this is in dag-variable.yaml -```yaml -dag: - start_at: Hello - steps: - Hello: - type: task - command: ${module_name} - next: Success - Success: - type: success - Fail: - type: fail -``` - -Magnus variables can be defined by environmental variables, any string with a prefix ```MAGNUS_VAR_``` is considered a -variable. - -```shell -export MAGNUS_VAR_module_name=my_module.say_hello -export MAGNUS_PRM_name="universe" -``` -and with the same python code [as before](#example), we can achieve the same result by: -```shell -magnus execute --file dag-variable.yaml -``` - -Magnus would resolve the placeholders at the load of the dag definition. - -The variables are also applied on the configuration file. - -### Design thought behind variables - -Variables are a great way to have a generalized definition of the dag and the config parameters, especially if that -value is not known before hand. - -For example: consider a containerized execution of the pipeline. The container tag to run might be only known after -building the container and this can be supplied dynamically at run time. - ---8<-- [end:how-do-i-parameterize] diff --git a/docs/concepts/executor-implementations/demo-renderer.md b/docs/concepts/executor-implementations/demo-renderer.md deleted file mode 100644 index e1d25ad6..00000000 --- a/docs/concepts/executor-implementations/demo-renderer.md +++ /dev/null @@ -1,19 +0,0 @@ -# Demo Renderer - -In this compute mode, we translate the dag into a bash script to demonstrate the idea of dag translation. Composite -nodes like ```parallel```, ```dag``` and ```map``` are not allowed as part of the definition. - -In this set up, we ignore max run time set on the dag completely. - -## Configuration - - -The full configuration of the executor is: - -```yaml -executor: - type: demo-renderer -``` - -The parameters that have to be passed could be done either via environment variables prefixed by ```MAGNUS_PRM_``` -or by the command line like the [example shown here](../../../getting_started/example-deployment/#execution). diff --git a/docs/concepts/executor-implementations/local-container.md b/docs/concepts/executor-implementations/local-container.md deleted file mode 100644 index 8828bb30..00000000 --- a/docs/concepts/executor-implementations/local-container.md +++ /dev/null @@ -1,119 +0,0 @@ -# Local Container - - -Local container is an interactive executor. In this, the traversal of the dag is done on the -local computer but the -actual execution happens on a container (running on local machine). This executor should enable you to test -the pipeline and execution of your code in containers. This executor could also be useful in -debugging a container based cloud run. - -In this executor, max run time is completely ignored. - -Apart from Buffered Run Log store, all other run log stores are compatible. -All secrets and catalog providers are compatible with this executor. - ---- -!!! Note - - Magnus does not build the docker image for you but uses a docker image provided. ---- -## Configuration - - -The full configuration of this executor is: - -```yaml -executor: - type: local-container - config: - enable_parallel: - docker_image: - environment: # is passed in to docker.containers.create(environment=environment) -``` - -### Enabling parallel - -By default, none of the branches in parallel or a map node are executed in parallel. -You can enable it by setting enable_parallel to True. - - - -### Docker image - -The ```docker_image``` to run the individual nodes of the graph. - ---- -!!! Requirements - - The docker image should have magnus installed in it and available as CMD. -
- The docker image should also its working directory as your project root. -
- Please use python3.8 or higher. ---- - -An example docker image to start with: - -```dockerfile -# Python 3.7 Image without Dependencies -FROM python:3.8 - -LABEL maintainer= - -# If you want git versioning ability -RUN apt-get update && apt-get install -y --no-install-recommends \ - git \ - && rm -rf /var/lib/apt/lists/* - -RUN pip install poetry - -ENV VIRTUAL_ENV=/opt/venv -RUN python -m virtualenv --python=/usr/local/bin/python $VIRTUAL_ENV -ENV PATH="$VIRTUAL_ENV/bin:$PATH" - -COPY . /app -WORKDIR /app - -RUN poetry install -``` - -### Node over-rides - - -The docker image/environment provided at ```executor``` can be over-ridden by individual nodes of the graph by providing - a ```executor_config``` as part of the definition. Since ```executor_config``` is universally used by all modes, the -over-rides should be provided within the context of the executor type. - -For example: - -```yaml -run_log: - type: file-system - -executor: - type: local-container - config: - docker_image: project_default - -dag: - description: Getting started - start_at: step1 - steps: - step1: - type: as-is - executor_config: - local-container: - docker_image: step1_image - command: my_function_does_all.func - next: step2 - step2: - type: as-is - next: step3 - step3: - type: success - step4: - type: fail -``` - -In the above example, if we assume project_default and step1_image to be 2 different images that satisfy -the requirements, step1 would run in step1_image while the remaining nodes would run in project_default image. diff --git a/docs/concepts/executor-implementations/local.md b/docs/concepts/executor-implementations/local.md deleted file mode 100644 index 6f211d16..00000000 --- a/docs/concepts/executor-implementations/local.md +++ /dev/null @@ -1,27 +0,0 @@ -# Local - - -Local executor is an interactive mode. In this, magnus does the traversal of the graph and execution of nodes -on the local compute from which it is executed. - -In this set up, we ignore max run time set on the dag completely. - -All types of secrets, catalog and run log store are compatible with this executor. And this compute executor is default -if no executor if provided in the dag definition. - -## Configuration - -The full configuration of local executor is: - -```yaml -executor: - - type: local - config: - enable_parallel: -``` - -### Enabling parallel - -By default, none of the branches in parallel or a map node are executed in parallel. -You can enable it by setting enable_parallel to True. diff --git a/docs/concepts/executor.md b/docs/concepts/executor.md index 89d6215a..6e024d56 100644 --- a/docs/concepts/executor.md +++ b/docs/concepts/executor.md @@ -1,86 +1,308 @@ -# Executors +Executors are the heart of magnus, they traverse the workflow and execute the tasks within the +workflow while coordinating with different services +(eg. [run log](/concepts/run-log), [catalog](/concepts/catalog), [secrets](/concepts/secrets) etc) -In magnus, executors essentially represent either the compute resource or the orchestration framework. +To enable workflows run in varied computational environments, we distinguish between two core functions of +any workflow engine. -Conceptually, a executor can be one of two types: -- **Interactive executors**: In this mode, magnus does the work of executing the pipeline/function/notebook +`Graph Traversal` -```shell -magnus execute --file my-project.yaml --config-file config.yaml +: Involves following the user-defined workflow graph to its eventual conclusion. + The navigation process encompasses the sequential execution of tasks or complex tasks + such as parallel paths. It also includes decision-making regarding the + pathway to follow in case of task failure and the upkeep of the + overall status of graph execution. -magnus execute_notebook interesting.ipynb --config-file config.yaml +`Executing Individual Steps` -magnus execute_function my_module.my_function --config-file config.yaml -``` - -Magnus takes care of the execution of the pipeline/function or notebook in the compute you requested. Examples -of this executors or local, local container etc. - -- **Orchestration executors**: In this mode, the dag definition is transpiled to your preferred orchestration language -of dag definition. To still achieve the capabilities of interactive executors, the orchestration language is -directed to call an internal method instead of your actual function. - -Specifically, the orchestration is asked to call - -```shell -magnus execute_single_node --file my-project.yaml --config-file config.yaml --step-name step-to-call -``` +: This refers to the concrete execution of the task as specified by the user + along with allowing for data flow between tasks. + This could involve activities such as launching a container or initiating a SQL query, + among others. -The branches of the original dag are also translated to the orchestrators language if its supported. If the -orchestration executors does not support a feature that magnus supports, you could still make it work by a mixed model. +## Graph Traversal -Examples of orchestration modes are aws step functions, kubeflow job specification, argo job specification etc. +In magnus, the graph traversal can be performed by magnus itself or can be handed over to other +orchestration frameworks (e.g Argo workflows, AWS step functions). -## Configuration +### Example -As with any system within magnus, configuration of an executor can be done by: +Below is a simple pipeline definition that does one task of printing "Hello World". -```yaml -executor: - type: - config: +```yaml linenums="1" +--8<-- "examples/concepts/task_shell_simple.yaml" ``` -### type - -The type of mode provider you want. This should be one of the executors types already available. - -Local executor is provided by default if nothing is provided. - -### config - -Any configuration parameters the execution provider accepts. - -## Parameterized definition - -As with any part of the magnus configuration, you can parameterize the configuration of executor to switch between -execution providers without changing the base definition. - -Please follow the example provided [here](../dag/#parameterized_definition) for more information. - -## Extensions - -You can easily extend magnus to interpret the dag definition to a orchestration language of choice, if a default -implementation does not exist or you are not happy with the implementation. - -[Extensions are being actively developed and can be found here.](https://github.com/AstraZeneca/magnus-extensions) - -The ```BaseExecutor``` implementation is as follows: - -```python -# The code can be found at magnus/executor.py -# The "_private" methods should not be touched without significant knowledge about the design - ---8<-- "magnus/executor.py:docs" - +The above pipeline can be executed by the *default* config to execute it locally or could be +translated to argo specification just by changing the configuration. + +=== "Default Configuration" + + The configuration defines the local compute to the execution environment with the ```run log``` + being completely in memory and buffered with no other services active. + + You can execute the pipeline in default configuration by: + + ```magnus execute -f examples/concepts/task_shell_simple.yaml``` + + ``` yaml linenums="1" + --8<-- "examples/configs/default.yaml" + ``` + + 1. Run the pipeline in local environment. + 2. Use the buffer as run log, this will not persist the run log to disk. + 3. Do not move any files to central storage. + 4. Do not use any secrets manager. + 5. Do not integrate with any experiment tracking tools + +=== "Argo Configuration" + + In this configuration, we are using [argo workflows](https://argoproj.github.io/argo-workflows/) + as our workflow engine. We are also instructing the workflow engine to use a docker image, + ```magnus:demo``` defined in line #4, as our execution environment. Please read + [containerised environments](/configurations/executors/container-environments) for more information. + + Since magnus needs to track the execution status of the workflow, we are using a ```run log``` + which is persistent and available in for jobs in kubernetes environment. + + + You can execute the pipeline in argo configuration by: + + ```magnus execute -f examples/concepts/task_shell_simple.yaml -c examples/configs/argo-config.yaml``` + + ``` yaml linenums="1" + --8<-- "examples/configs/argo-config.yaml" + ``` + + 1. Use argo workflows as the execution engine to run the pipeline. + 2. Run this docker image for every step of the pipeline. The docker image should have the same directory structure + as the project directory. + 3. Mount the volume from Kubernetes persistent volumes (magnus-volume) to /mnt directory. + 4. Resource constraints for the container runtime. + 5. Since every step runs in a container, the run log should be persisted. Here we are using the file-system as our + run log store. + 6. Kubernetes PVC is mounted to every container as ```/mnt```, use that to surface the run log to every step. + + +=== "Transpiled Workflow" + + In the below generated argo workflow template: + + - Lines 10-17 define a ```dag``` with tasks that corresponding to the tasks in + the example workflow. + - The graph traversal rules follow the the same rules as our workflow. The + step ```success-success-ou7qlf``` in line #15 only happens if the step ```shell-task-dz3l3t``` + defined in line #12 succeeds. + - The execution fails if any of the tasks fail. Both argo workflows and magnus ```run log``` + mark the execution as failed. + + + ```yaml linenums="1" + apiVersion: argoproj.io/v1alpha1 + kind: Workflow + metadata: + generateName: magnus-dag- + annotations: {} + labels: {} + spec: + activeDeadlineSeconds: 172800 + entrypoint: magnus-dag + podGC: + strategy: OnPodCompletion + retryStrategy: + limit: '0' + retryPolicy: Always + backoff: + duration: '120' + factor: 2 + maxDuration: '3600' + serviceAccountName: default-editor + templates: + - name: magnus-dag + failFast: true + dag: + tasks: + - name: shell-task-4jy8pl + template: shell-task-4jy8pl + depends: '' + - name: success-success-djhm6j + template: success-success-djhm6j + depends: shell-task-4jy8pl.Succeeded + - name: shell-task-4jy8pl + container: + image: magnus:demo + command: + - magnus + - execute_single_node + - '{{workflow.parameters.run_id}}' + - shell + - --log-level + - WARNING + - --file + - examples/concepts/task_shell_simple.yaml + - --config-file + - examples/configs/argo-config.yaml + volumeMounts: + - name: executor-0 + mountPath: /mnt + imagePullPolicy: '' + resources: + limits: + memory: 1Gi + cpu: 250m + requests: + memory: 1Gi + cpu: 250m + - name: success-success-djhm6j + container: + image: magnus:demo + command: + - magnus + - execute_single_node + - '{{workflow.parameters.run_id}}' + - success + - --log-level + - WARNING + - --file + - examples/concepts/task_shell_simple.yaml + - --config-file + - examples/configs/argo-config.yaml + volumeMounts: + - name: executor-0 + mountPath: /mnt + imagePullPolicy: '' + resources: + limits: + memory: 1Gi + cpu: 250m + requests: + memory: 1Gi + cpu: 250m + templateDefaults: + activeDeadlineSeconds: 7200 + timeout: 10800s + arguments: + parameters: + - name: run_id + value: '{{workflow.uid}}' + volumes: + - name: executor-0 + persistentVolumeClaim: + claimName: magnus-volume + + + ``` + + +As seen from the above example, once a [pipeline is defined in magnus](/concepts/pipeline) either via yaml or SDK, we can +run the pipeline in different environments just by providing a different configuration. Most often, there is +no need to change the code or deviate from standard best practices while coding. + + +## Step Execution + +!!! note + + This section is to understand the internal mechanism of magnus and not required if you just want to + use different executors. + + +Independent of traversal, all the tasks are executed within the ```context``` of magnus. + +A closer look at the actual task implemented as part of transpiled workflow in argo +specification details the inner workings. Below is a snippet of the argo specification from +lines 18 to 34. + +```yaml linenums="18" +- name: shell-task-dz3l3t + container: + image: magnus-example:latest + command: + - magnus + - execute_single_node + - '{{workflow.parameters.run_id}}' + - shell + - --log-level + - WARNING + - --file + - examples/concepts/task_shell_simple.yaml + - --config-file + - examples/configs/argo-config.yaml + volumeMounts: + - name: executor-0 + mountPath: /mnt ``` -The custom extensions should be registered as part of the namespace: ```executor``` for it to be -loaded. - -```toml -# For example, as part of your pyproject.toml -[tool.poetry.plugins."executor"] -"kubeflow" = "YOUR_PACKAGE:Kubeflow" +The actual ```command``` to run is not the ```command``` defined in the workflow, +i.e ```echo hello world```, but a command in the CLI of magnus which specifies the workflow file, +the step name and the configuration file. + +### Context of magnus + +Any ```task``` defined by the user as part of the workflow always runs as a *sub-command* of +magnus. In that sense, magnus follows the +[decorator pattern](https://en.wikipedia.org/wiki/Decorator_pattern) without being part of the +application codebase. + +In a very simplistic sense, the below stubbed-code explains the context of magnus during +execution of a task. + +```python linenums="1" + +def execute_single_node(workflow, step_name, configuration): + + ##### PRE EXECUTION ##### + # Instantiate the service providers of run_log and catalog + # These are provided as part of the configuration. + run_log = configuration.get_run_log() # (1) + catalog = configuration.get_catalog() # (2) + + step = workflow.get_step(step_name) # (3) + + # Get the current parameters set by the initial parameters + # or by previous steps. + existing_parameters = run_log.get_parameters() + # Get the data requested by the step and populate + # the data folder defined in the catalog configuration + catalog.get_data(step.get_from_catalog) # (4) + + # Choose the parameters to pass into the function and + # the right data type. + task_parameters = filter_and_cast_parameters(existing_parameters, step.task) # (5) + + ##### END PRE EXECUTION ##### + try: + # We call the actual task here!! + updated_parameters = step.task(**task_parameters) # (6) + except: + update_status_in_run_log(step, FAIL) + send_error_response() # (7) + + ##### POST EXECUTION ##### + run_log.update_parameters(updated_parameters) # (8) + catalog.put_data(step.put_into_catalog) # (9) + update_status_in_run_log(step, SUCCESS) + send_success_response() # (10) + ##### END POST EXECUTION ##### ``` + +1. The [run log](/concepts/run-log) maintains the state of the execution of the tasks and subsequently the pipeline. It also +holds the latest state of parameters along with captured metrics. +2. The [catalog](/concepts/catalog) contains the information about the data flowing through the pipeline. You can get/put +artifacts generated during the current execution of the pipeline to a central storage. +3. Read the workflow and get the [step definition](/concepts/task) which holds the ```command``` or ```function``` to +execute along with the other optional information. +4. Any artifacts from previous steps that are needed to execute the current step can be +[retrieved from the catalog](/concepts/catalog). +5. The current function or step might need only some of the +[parameters casted as pydantic models](/concepts/task/#accessing_parameters), filter and cast them appropriately. +6. At this point in time, we have the required parameters and data to execute the actual command. The command can +internally request for more data using the [python API](/interactions) or record +[experiment tracking metrics](/concepts/experiment-tracking). +7. If the task failed, we update the run log with that information and also raise an exception for the +workflow engine to handle. Any [on-failure](/concepts/pipeline/#on_failure) traversals are already handled +as part of the workflow definition. +8. Upon successful execution, we update the run log with current state of parameters for downstream steps. +9. Any artifacts generated from this step are [put into the central storage](/concepts/catalog) for downstream steps. +10. We send a success message to the workflow engine and mark the step as completed. diff --git a/docs/concepts/experiment-tracking.md b/docs/concepts/experiment-tracking.md index 49017f8f..a3b4863f 100644 --- a/docs/concepts/experiment-tracking.md +++ b/docs/concepts/experiment-tracking.md @@ -1,128 +1,468 @@ # Overview -Tracking and recording key metrics from your experiment makes data science a "scientific" process. In magnus, we define -an experiment as anything that touched data and produced some insight. For example, the number of rows in a database -or a CSV could be something that needs to be recorded for later insight. +[Run log](/concepts/run-log) stores a lot of information about the execution along with the metrics captured +during the execution of the pipeline. + + +## Example + + +=== "Using the API" + + The highlighted lines in the below example show how to [use the API](/interactions/#magnus.track_this) + + Any pydantic model as a value would be dumped as a dict, respecting the alias, before tracking it. + + You can run this example by ```python run examples/concepts/experiment_tracking_api.py``` + + ```python linenums="1" hl_lines="10 24-26" + --8<-- "examples/concepts/experiment_tracking_api.py" + ``` + + +=== "Using environment variables" + + The highlighted lines in the below example show how to use environment variables to track metrics. + + Only string values are allowed to be environment variables. Numeric values sent in as strings are converted + to int/float before storing them as metrics. + + There is no support for boolean values in environment variables. + + ```yaml linenums="1" hl_lines="16-18" + --8<-- "examples/concepts/experiment_tracking_env.yaml" + ``` + +=== "Run log entry" + + Any experiment tracking metrics found during the execution of the task are stored in + ```user_defined_metrics``` field of the step log. + + For example, below is the content for the shell execution. + + ```json linenums="1" hl_lines="36-42" + { + "run_id": "blazing-colden-0544", + "dag_hash": "4494aeb907ef950934fbcc34b226f72134d06687", + "use_cached": false, + "tag": "", + "original_run_id": "", + "status": "SUCCESS", + "steps": { + "shell": { + "name": "shell", + "internal_name": "shell", + "status": "SUCCESS", + "step_type": "task", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "793b052b8b603760ff1eb843597361219832b61c", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-01-09 05:44:42.841295", + "end_time": "2024-01-09 05:44:42.849938", + "duration": "0:00:00.008643", + "status": "SUCCESS", + "message": "", + "parameters": {} + } + ], + "user_defined_metrics": { + "eggs": { + "ham": "world" + }, + "answer": 42.0, + "spam": "hello" + }, + "branches": {}, + "data_catalog": [ + { + "name": "shell.execution.log", + "data_hash": "07723e6188e7893ac79e8f07b7cc15dd1a62d2974335f173a0b5a6e58a3735d6", + "catalog_relative_path": "blazing-colden-0544/shell.execution.log", + "catalog_handler_location": ".catalog", + "stage": "put" + } + ] + }, + "success": { + "name": "success", + "internal_name": "success", + "status": "SUCCESS", + "step_type": "success", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "793b052b8b603760ff1eb843597361219832b61c", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-01-09 05:44:42.913905", + "end_time": "2024-01-09 05:44:42.913963", + "duration": "0:00:00.000058", + "status": "SUCCESS", + "message": "", + "parameters": {} + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [] + } + }, + "parameters": {}, + "run_config": { + "executor": { + "service_name": "local", + "service_type": "executor", + "enable_parallel": false, + "placeholders": {} + }, + "run_log_store": { + "service_name": "buffered", + "service_type": "run_log_store" + }, + "secrets_handler": { + "service_name": "do-nothing", + "service_type": "secrets" + }, + "catalog_handler": { + "service_name": "file-system", + "service_type": "catalog" + }, + "experiment_tracker": { + "service_name": "do-nothing", + "service_type": "experiment_tracker" + }, + "pipeline_file": "examples/concepts/experiment_tracking_env.yaml", + "parameters_file": null, + "configuration_file": null, + "tag": "", + "run_id": "blazing-colden-0544", + "variables": {}, + "use_cached": false, + "original_run_id": "", + "dag": { + "start_at": "shell", + "name": "", + "description": "An example pipeline to demonstrate setting experiment tracking metrics\nusing environment variables. Any environment variable with + prefix\n'MAGNUS_TRACK_' will be recorded as a metric captured during the step.\n\nYou can run this pipeline as:\n magnus execute -f + examples/concepts/experiment_tracking_env.yaml\n", + "internal_branch_name": "", + "steps": { + "shell": { + "type": "task", + "name": "shell", + "internal_name": "shell", + "internal_branch_name": "", + "is_composite": false + }, + "success": { + "type": "success", + "name": "success", + "internal_name": "success", + "internal_branch_name": "", + "is_composite": false + }, + "fail": { + "type": "fail", + "name": "fail", + "internal_name": "fail", + "internal_branch_name": "", + "is_composite": false + } + } + }, + "dag_hash": "4494aeb907ef950934fbcc34b226f72134d06687", + "execution_plan": "chained" + } + } + ``` + + +## Incremental tracking + +It is possible to track metrics over time within a task. To do so, use the ```step``` parameter in the API +or post-fixing ```_STEP_``` and the increment when using environment variables. + +The step is defaulted to be 0. + +### Example + +=== "Using the API" + + The highlighted lines in the below example show how to [use the API](/interactions/#magnus.track_this) with + the step parameter. + + You can run this example by ```python run examples/concepts/experiment_tracking_step.py``` + + ```python linenums="1" hl_lines="11 25-28" + --8<-- "examples/concepts/experiment_tracking_step.py" + ``` + +=== "Using environment variables" + + The highlighted lines in the below example show how to use environment variables to track metrics. + + ```yaml linenums="1" hl_lines="16-20" + --8<-- "examples/concepts/experiment_tracking_env_step.yaml" + ``` + +=== "Run log entry" + + ```json linenums="1" hl_lines="36-51" + { + "run_id": "blocking-stonebraker-1545", + "dag_hash": "", + "use_cached": false, + "tag": "", + "original_run_id": "", + "status": "SUCCESS", + "steps": { + "Emit Metrics": { + "name": "Emit Metrics", + "internal_name": "Emit Metrics", + "status": "SUCCESS", + "step_type": "task", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "858c4df44f15d81139341641c63ead45042e0d89", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-01-09 15:45:34.940999", + "end_time": "2024-01-09 15:45:34.943648", + "duration": "0:00:00.002649", + "status": "SUCCESS", + "message": "", + "parameters": {} + } + ], + "user_defined_metrics": { + "spam": { + "0": "hello", + "1": "hey" + }, + "eggs": { + "0": { + "ham": "world" + }, + "1": { + "ham": "universe" + } + }, + "answer": 42.0, + "is_it_true": false + }, + "branches": {}, + "data_catalog": [ + { + "name": "Emit_Metrics.execution.log", + "data_hash": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", + "catalog_relative_path": "blocking-stonebraker-1545/Emit_Metrics.execution.log", + "catalog_handler_location": ".catalog", + "stage": "put" + } + ] + }, + "success": { + "name": "success", + "internal_name": "success", + "status": "SUCCESS", + "step_type": "success", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "858c4df44f15d81139341641c63ead45042e0d89", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-01-09 15:45:35.126659", + "end_time": "2024-01-09 15:45:35.126745", + "duration": "0:00:00.000086", + "status": "SUCCESS", + "message": "", + "parameters": {} + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [] + } + }, + "parameters": {}, + "run_config": { + "executor": { + "service_name": "local", + "service_type": "executor", + "enable_parallel": false, + "placeholders": {} + }, + "run_log_store": { + "service_name": "buffered", + "service_type": "run_log_store" + }, + "secrets_handler": { + "service_name": "do-nothing", + "service_type": "secrets" + }, + "catalog_handler": { + "service_name": "file-system", + "service_type": "catalog" + }, + "experiment_tracker": { + "service_name": "do-nothing", + "service_type": "experiment_tracker" + }, + "pipeline_file": "", + "parameters_file": "", + "configuration_file": "", + "tag": "", + "run_id": "blocking-stonebraker-1545", + "variables": {}, + "use_cached": false, + "original_run_id": "", + "dag": { + "start_at": "Emit Metrics", + "name": "", + "description": "", + "internal_branch_name": "", + "steps": { + "Emit Metrics": { + "type": "task", + "name": "Emit Metrics", + "internal_name": "Emit Metrics", + "internal_branch_name": "", + "is_composite": false + }, + "success": { + "type": "success", + "name": "success", + "internal_name": "success", + "internal_branch_name": "", + "is_composite": false + }, + "fail": { + "type": "fail", + "name": "fail", + "internal_name": "fail", + "internal_branch_name": "", + "is_composite": false + } + } + }, + "dag_hash": "", + "execution_plan": "chained" + } + } + ``` + +## Experiment tracking tools + +!!! note "Opt out" + + Pipelines need not use the ```experiment-tracking``` if the preferred tools of choice is + not implemented in magnus. The default configuration of ```do-nothing``` is no-op by design. + We kindly request to raise a feature request to make us aware of the eco-system. + + +The default experiment tracking tool of magnus is a no-op as the ```run log``` captures all the +required details. To make it compatible with other experiment tracking tools like +[mlflow](https://mlflow.org/docs/latest/tracking.html) or +[Weights and Biases](https://wandb.ai/site/experiment-tracking), we map attributes of magnus +to the underlying tool. + +For example, for mlflow: + +- Any numeric (int/float) observation is logged as +[a metric](https://mlflow.org/docs/latest/python_api/mlflow.html#mlflow.log_metric) +with a step. + +- Any non numeric observation is logged as +[a parameter](https://mlflow.org/docs/latest/python_api/mlflow.html#mlflow.log_param). +Since mlflow does not support step wise logging of parameters, the key name is formatted as +```key_step```. + +- The tag associate with an execution is used as the +[experiment name](https://mlflow.org/docs/latest/tracking/tracking-api.html#organizing-runs-in-experiments). + + +!!! note inline end "Shortcomings" + + Experiment tracking capabilities of magnus are inferior in integration with + popular python frameworks like pytorch and tensorflow as compared to other + experiment tracking tools. + + We strongly advise to use them if you need advanced capabilities. + + +=== "Example configuration" + + In the below configuration, the mlflow tracking server is a local instance listening on port 8080. + + ```yaml linenums="1" hl_lines="13-16" + --8<-- "examples/configs/mlflow-config.yaml" + ``` + +=== "Pipeline" + + As with other examples, we are using the ```track_this``` python API to capture metrics. During the pipeline + execution in line #39, we use the configuration of ```mlflow``` as experiment tracking tool. + + The tag provided during the execution is used as a experiment name in mlflow. + + You can run this example by ```python run examples/concepts/experiment_tracking_integration.py``` + + ```python linenums="1" hl_lines="13 27-33 49" + --8<-- "examples/concepts/experiment_tracking_integration.py" + ``` -Magnus, by default, has an experiment tracking tools in its rich logging framework but this could be extended to plug -any of the experiment tracking tools like MLflow or Weights and Biases. -## Configuration +=== "In mlflow UI" -Configuration of a Experiment tracking tools is as follows: +
+ ![Image](/assets/screenshots/mlflow.png){ width="800" height="600"} +
mlflow UI for the execution. The run_id remains the same as the run_id of magnus
+
-```yaml -experiment_tracker: - type: - config: -``` +
+ ![Image title](/assets/screenshots/mlflow_step.png){ width="800" height="600"} +
The step wise metric plotted as a graph in mlflow
+
-### type -The type of experiment tracking tool you want to use. -There is no default experiment tracking tool as logging structure takes care of that internally for magnus. - -### config - -Any configuration parameters the experiment tracking tool accepts. - - -## Interaction within code - ---8<-- [start:how-do-i-track] - -In magnus, experiment tracking is defined per step of the pipeline. You can instruct magnus to track a metric by: - -```python -# In some step of the pipeline or function or notebook -from magnus import track_this - -track_this(answer=42, step=0) -``` - -This would result in a corresponding entry in ```user_defined_metrics``` of the step log by default and also would be -passed to the underlying experiment tracking tool. - ---- -!!! Note - - Magnus allows the value pair of the metric to be any JSON friendly datatype. The underlying experiment tracker - should have mechanisms to handle the same. ---- - - -### step parameter - -Many implementations of experiment tracking tools support a step parameter to define the history of the parameter. -Any value other than ```0``` as step parameter would create a ```user_defined_metric``` of ```key_{step}```. - -## Environmental variables - -You can also track metrics via environmental variables. Any environmental variable with prefix ```MAGNUS_TRACK_``` is -added to the step log. - - -```python -import os - -os.environ['MAGNUS_TRACK_answer']=42 -``` - ---- -!!! Note - - Setting metrics via environmental variables would not invoke the underlying experiment tracking tool. ---- - - -## Client context - -You can also get a initialized context of the experiment tracking tool to completely control the behavior. - -```python -from magnus import get_experiment_tracker_context -context = get_experiment_tracker_context() - -with context as ctx: - # do something -``` - -## Grouping of experiments - -With experiment tracking tools that support grouping of experiments under a logical group, you can use ```tag``` of -magnus. - ---8<-- [end:how-do-i-track] - -## Parameterized definition - -As with any part of the magnus configuration, you can parameterize the configuration of secrets to switch between -providers without changing the base definition. - -Please follow the example provided [here](../dag/#parameterized_definition) for more information. - - -## Extensions - -You can easily extend magnus to bring in your custom provider, if a default -implementation does not exist or you are not happy with the implementation. - -[Extensions are being actively developed and can be found here.](https://github.com/AstraZeneca/magnus-extensions) - -To implement your custom experiment tracker, please extend BaseExperimentTracker class of magnus whose definition is -given below. - -```python -# Source code present at magnus/experiment_tracker.py ---8<-- "magnus/experiment_tracker.py:docs" -``` - -The custom extensions should be registered as part of the namespace: -```experiment_tracker``` for it to be loaded. - -```toml -# For example, as part of your pyproject.toml -[tool.poetry.plugins."experiment_tracker"] -"mlflow" = "YOUR_PACKAGE:mlflow" -``` +To provide implementation specific capabilities, we also provide a +[python API](/interactions/#magnus.get_experiment_tracker_context) to obtain the client context. The default +client context is a [null context manager](https://docs.python.org/3/library/contextlib.html#contextlib.nullcontext). diff --git a/docs/concepts/integration.md b/docs/concepts/integration.md deleted file mode 100644 index 24d1f9e4..00000000 --- a/docs/concepts/integration.md +++ /dev/null @@ -1,70 +0,0 @@ -# Integration - -Magnus at the core provides 5 services - -- A computational execution plan or an Executor. -- A run log store to store metadata and run logs. -- A cataloging functionality to pass data between steps and audibility trace. -- A framework to handle secrets. -- A framework to interact with experiment tracking tools. - -The executor plays the role of talking to other 4 service providers to process the graph, keep track of the status -of the run, pass data between steps and provide secrets. - -Depending upon the stage of execution, the executor might do one of the two actions - -- **traversing the graph**: For compute modes that just render instructions for other engines, the executor first - traverses the graph to understand the plan but does not actually execute. For interactive modes, the executor - traverses to set up the right environment for execution but defers the execution for later stage. - -- **executing the node**: The executor is actually in the compute environment that it has to be and executes the task. - -Magnus is designed to make the executor talk to the service providers at both these stages to understand the changes -needed for the config to make it happen via the ```BaseIntegration``` pattern. - -```python -# Source code present at magnus/integration.py ---8<-- "magnus/integration.py:docs" -``` - -The custom extensions should be registered as part of the namespace: ```magnus.integration.BaseIntegration``` for it -to be loaded. - -```toml -# For example, as part of your pyproject.toml -[tool.poetry.plugins."magnus.integration.BaseIntegration"] -# {executor.name}-{service}-{service.name} -"local-secrets-vault" = "YOUR_PACKAGE:LocalComputeSecretsVault" -``` - -All extensions need to be unique given a ```executor_type```, ```service_type``` and ```service_provider```. -Duplicate integrations will be raised as an exception. - - -## Example - -Consider the example of S3 Run log store. For the execution engine of ```local```, the aws credentials file is available -on the local machine and we can store the run logs in the S3 bucket. But for the executor ```local-container```, the -aws credentials file has to be mounted in the container for the container to have access to S3. - -This could be achieved by writing an integration pattern between S3 and ```local-container``` to do the same. - -```python -class LocalContainerComputeS3Store(BaseIntegration): - """ - Integration between local container and S3 run log store - """ - executor_type = 'local-container' - service_type = 'run-log-store' # One of secret, catalog, datastore - service_provider = 's3' # The actual implementation of the service - - def configure_for_traversal(self, **kwargs): - write_to = self.service.get_aws_credentials_file() - self.executor.volumes[str(Path(write_to).resolve())] = { - 'bind': '/root/.aws/credentials', - 'mode': 'ro' - } -``` - -We instruct the executor to mount the volumes containing the AWS credentials file as part of spinning the container to -make the credentials available to the running container. diff --git a/docs/concepts/map.md b/docs/concepts/map.md new file mode 100644 index 00000000..0a20b111 --- /dev/null +++ b/docs/concepts/map.md @@ -0,0 +1,850 @@ +```map``` nodes in magnus allows you to execute a sequence of nodes (i.e a pipeline) for all the items in a list. This is similar to +[Map state of AWS Step functions](https://docs.aws.amazon.com/step-functions/latest/dg/amazon-states-language-map-state.html) or +[loops in Argo workflows](https://argo-workflows.readthedocs.io/en/latest/walk-through/loops/). + +Conceptually, map node can be represented in python like below. + +```python +#technically it is async for +for i in iterable_parameter: + # a pipeline of steps + execute_first_step(i) + execute_second_step(i) + ... +``` + +You can control the parallelism by configuration of the executor. + +## Example + +Below is an example of processing a inventory of files (50) in parallel batches of 10 files per batch. +The ```stride``` parameter controls the chunk size and every batch is given the start index +of the files to process. + +=== "visualization" + + The step "chunk files" identifies the number of files to process and computes the start index of every + batch of files to process for a chunk size of 10, the stride. + + "Process Chunk" pipelines are then triggered in parallel to process the chunk of files between ```start index``` + and ```start index + stride``` + + ```mermaid + flowchart TD + chunkify([Chunk files]):::green + success([Success]):::green + + subgraph one[Process Chunk] + process_chunk1([Process Chunk]):::yellow + success_chunk1([Success]):::yellow + + process_chunk1 --> success_chunk1 + end + + subgraph two[Process Chunk] + process_chunk2([Process Chunk]):::yellow + success_chunk2([Success]):::yellow + + process_chunk2 --> success_chunk2 + end + + subgraph three[Process Chunk] + process_chunk3([Process Chunk]):::yellow + success_chunk3([Success]):::yellow + + process_chunk3 --> success_chunk3 + end + + subgraph four[Process Chunk] + process_chunk4([Process Chunk]):::yellow + success_chunk4([Success]):::yellow + + process_chunk4 --> success_chunk4 + end + + subgraph five[Process Chunk] + process_chunk5([Process Chunk]):::yellow + success_chunk5([Success]):::yellow + + process_chunk5 --> success_chunk5 + end + + + + chunkify -- (stride=10, start_index=0)--> one --> success + chunkify -- (stride=10, start_index=10)--> two --> success + chunkify -- (stride=10, start_index=20)--> three --> success + chunkify -- (stride=10, start_index=30)--> four --> success + chunkify -- (stride=10, start_index=40)--> five --> success + + classDef yellow stroke:#FFFF00 + classDef green stroke:#0f0 + ``` + +=== "python sdk" + + The ```start_index``` argument for the function ```process_chunk``` is dynamically set by iterating + over ```chunks```. + + If the argument ```start_index``` is not provided, you can still access the current + value by ```MAGNUS_MAP_VARIABLE``` environment variable. + The environment variable ```MAGNUS_MAP_VARIABLE``` is a dictionary with keys as + ```iterate_as``` + + This instruction is set while defining the map node. + + You can run this example by ```python examples/concepts/map.py``` + + ```python linenums="1" hl_lines="30-31 35 68-74" + --8<-- "examples/concepts/map.py" + ``` + + +=== "pipeline in yaml" + + The ```start_index``` argument for the function ```process_chunk``` is dynamically set by iterating + over ```chunks```. + + This instruction is set while defining the map node. + Note that the ```branch``` of the map node has a similar schema of the pipeline. + + You can run this example by ```magnus execute examples/concepts/map.yaml``` + + ```yaml linenums="1" hl_lines="23-26" + --8<-- "examples/concepts/map.yaml" + ``` + +=== "pipeline with shell tasks" + + The task ```chunk files``` sets the parameters ```stride``` and ```chunks``` similar to the python + functions. + + The map branch "iterate and execute" iterates over chunks and exposes the current start_index of + as environment variable ```MAGNUS_MAP_VARIABLE```. + + The environment variable ```MAGNUS_MAP_VARIABLE``` is a json string with keys of the ```iterate_as```. + + You can run this example by ```magnus execute examples/concepts/map_shell.yaml``` + + ```yaml linenums="1" hl_lines="26-27 29-32" + --8<-- "examples/concepts/map_shell.yaml" + ``` + +=== "Run log" + + The step log of the ```iterate and execute``` has branches for every dynamically executed branch + of the format ```iterate and execute.```. + + ```json linenums="1" + { + "run_id": "simple-turing-0153", + "dag_hash": "", + "use_cached": false, + "tag": "", + "original_run_id": "", + "status": "SUCCESS", + "steps": { + "chunk files": { + "name": "chunk files", + "internal_name": "chunk files", + "status": "SUCCESS", + "step_type": "task", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "30ca73bb01ac45db08b1ca75460029da142b53fa", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-01-18 01:54:00.038461", + "end_time": "2024-01-18 01:54:00.045343", + "duration": "0:00:00.006882", + "status": "SUCCESS", + "message": "", + "parameters": {} + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [ + { + "name": "chunk_files.execution.log", + "data_hash": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", + "catalog_relative_path": "simple-turing-0153/chunk_files.execution.log", + "catalog_handler_location": ".catalog", + "stage": "put" + } + ] + }, + "iterate and execute": { + "name": "iterate and execute", + "internal_name": "iterate and execute", + "status": "SUCCESS", + "step_type": "map", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "30ca73bb01ac45db08b1ca75460029da142b53fa", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [], + "user_defined_metrics": {}, + "branches": { + "iterate and execute.0": { + "internal_name": "iterate and execute.0", + "status": "SUCCESS", + "steps": { + "iterate and execute.0.execute": { + "name": "execute", + "internal_name": "iterate and execute.0.execute", + "status": "SUCCESS", + "step_type": "task", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "30ca73bb01ac45db08b1ca75460029da142b53fa", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-01-18 01:54:00.221240", + "end_time": "2024-01-18 01:54:00.222560", + "duration": "0:00:00.001320", + "status": "SUCCESS", + "message": "", + "parameters": { + "chunks": [ + 0, + 10, + 20, + 30, + 40 + ], + "stride": 10 + } + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [ + { + "name": "execute.execution.log_0", + "data_hash": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", + "catalog_relative_path": "simple-turing-0153/execute.execution.log_0", + "catalog_handler_location": ".catalog", + "stage": "put" + } + ] + }, + "iterate and execute.0.success": { + "name": "success", + "internal_name": "iterate and execute.0.success", + "status": "SUCCESS", + "step_type": "success", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "30ca73bb01ac45db08b1ca75460029da142b53fa", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-01-18 01:54:00.301335", + "end_time": "2024-01-18 01:54:00.302161", + "duration": "0:00:00.000826", + "status": "SUCCESS", + "message": "", + "parameters": { + "chunks": [ + 0, + 10, + 20, + 30, + 40 + ], + "stride": 10 + } + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [] + } + } + }, + "iterate and execute.10": { + "internal_name": "iterate and execute.10", + "status": "SUCCESS", + "steps": { + "iterate and execute.10.execute": { + "name": "execute", + "internal_name": "iterate and execute.10.execute", + "status": "SUCCESS", + "step_type": "task", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "30ca73bb01ac45db08b1ca75460029da142b53fa", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-01-18 01:54:00.396194", + "end_time": "2024-01-18 01:54:00.397462", + "duration": "0:00:00.001268", + "status": "SUCCESS", + "message": "", + "parameters": { + "chunks": [ + 0, + 10, + 20, + 30, + 40 + ], + "stride": 10 + } + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [ + { + "name": "execute.execution.log_10", + "data_hash": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", + "catalog_relative_path": "simple-turing-0153/execute.execution.log_10", + "catalog_handler_location": ".catalog", + "stage": "put" + } + ] + }, + "iterate and execute.10.success": { + "name": "success", + "internal_name": "iterate and execute.10.success", + "status": "SUCCESS", + "step_type": "success", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "30ca73bb01ac45db08b1ca75460029da142b53fa", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-01-18 01:54:00.469211", + "end_time": "2024-01-18 01:54:00.470266", + "duration": "0:00:00.001055", + "status": "SUCCESS", + "message": "", + "parameters": { + "chunks": [ + 0, + 10, + 20, + 30, + 40 + ], + "stride": 10 + } + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [] + } + } + }, + "iterate and execute.20": { + "internal_name": "iterate and execute.20", + "status": "SUCCESS", + "steps": { + "iterate and execute.20.execute": { + "name": "execute", + "internal_name": "iterate and execute.20.execute", + "status": "SUCCESS", + "step_type": "task", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "30ca73bb01ac45db08b1ca75460029da142b53fa", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-01-18 01:54:00.558053", + "end_time": "2024-01-18 01:54:00.561472", + "duration": "0:00:00.003419", + "status": "SUCCESS", + "message": "", + "parameters": { + "chunks": [ + 0, + 10, + 20, + 30, + 40 + ], + "stride": 10 + } + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [ + { + "name": "execute.execution.log_20", + "data_hash": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", + "catalog_relative_path": "simple-turing-0153/execute.execution.log_20", + "catalog_handler_location": ".catalog", + "stage": "put" + } + ] + }, + "iterate and execute.20.success": { + "name": "success", + "internal_name": "iterate and execute.20.success", + "status": "SUCCESS", + "step_type": "success", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "30ca73bb01ac45db08b1ca75460029da142b53fa", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-01-18 01:54:00.660092", + "end_time": "2024-01-18 01:54:00.661215", + "duration": "0:00:00.001123", + "status": "SUCCESS", + "message": "", + "parameters": { + "chunks": [ + 0, + 10, + 20, + 30, + 40 + ], + "stride": 10 + } + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [] + } + } + }, + "iterate and execute.30": { + "internal_name": "iterate and execute.30", + "status": "SUCCESS", + "steps": { + "iterate and execute.30.execute": { + "name": "execute", + "internal_name": "iterate and execute.30.execute", + "status": "SUCCESS", + "step_type": "task", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "30ca73bb01ac45db08b1ca75460029da142b53fa", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-01-18 01:54:00.765689", + "end_time": "2024-01-18 01:54:00.766705", + "duration": "0:00:00.001016", + "status": "SUCCESS", + "message": "", + "parameters": { + "chunks": [ + 0, + 10, + 20, + 30, + 40 + ], + "stride": 10 + } + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [ + { + "name": "execute.execution.log_30", + "data_hash": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", + "catalog_relative_path": "simple-turing-0153/execute.execution.log_30", + "catalog_handler_location": ".catalog", + "stage": "put" + } + ] + }, + "iterate and execute.30.success": { + "name": "success", + "internal_name": "iterate and execute.30.success", + "status": "SUCCESS", + "step_type": "success", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "30ca73bb01ac45db08b1ca75460029da142b53fa", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-01-18 01:54:00.851112", + "end_time": "2024-01-18 01:54:00.852454", + "duration": "0:00:00.001342", + "status": "SUCCESS", + "message": "", + "parameters": { + "chunks": [ + 0, + 10, + 20, + 30, + 40 + ], + "stride": 10 + } + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [] + } + } + }, + "iterate and execute.40": { + "internal_name": "iterate and execute.40", + "status": "SUCCESS", + "steps": { + "iterate and execute.40.execute": { + "name": "execute", + "internal_name": "iterate and execute.40.execute", + "status": "SUCCESS", + "step_type": "task", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "30ca73bb01ac45db08b1ca75460029da142b53fa", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-01-18 01:54:00.950911", + "end_time": "2024-01-18 01:54:00.952000", + "duration": "0:00:00.001089", + "status": "SUCCESS", + "message": "", + "parameters": { + "chunks": [ + 0, + 10, + 20, + 30, + 40 + ], + "stride": 10 + } + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [ + { + "name": "execute.execution.log_40", + "data_hash": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", + "catalog_relative_path": "simple-turing-0153/execute.execution.log_40", + "catalog_handler_location": ".catalog", + "stage": "put" + } + ] + }, + "iterate and execute.40.success": { + "name": "success", + "internal_name": "iterate and execute.40.success", + "status": "SUCCESS", + "step_type": "success", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "30ca73bb01ac45db08b1ca75460029da142b53fa", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-01-18 01:54:01.032790", + "end_time": "2024-01-18 01:54:01.034254", + "duration": "0:00:00.001464", + "status": "SUCCESS", + "message": "", + "parameters": { + "chunks": [ + 0, + 10, + 20, + 30, + 40 + ], + "stride": 10 + } + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [] + } + } + } + }, + "data_catalog": [] + }, + "success": { + "name": "success", + "internal_name": "success", + "status": "SUCCESS", + "step_type": "success", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "30ca73bb01ac45db08b1ca75460029da142b53fa", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-01-18 01:54:01.141928", + "end_time": "2024-01-18 01:54:01.142928", + "duration": "0:00:00.001000", + "status": "SUCCESS", + "message": "", + "parameters": { + "chunks": [ + 0, + 10, + 20, + 30, + 40 + ], + "stride": 10 + } + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [] + } + }, + "parameters": { + "chunks": [ + 0, + 10, + 20, + 30, + 40 + ], + "stride": 10 + }, + "run_config": { + "executor": { + "service_name": "local", + "service_type": "executor", + "enable_parallel": false, + "placeholders": {} + }, + "run_log_store": { + "service_name": "file-system", + "service_type": "run_log_store" + }, + "secrets_handler": { + "service_name": "do-nothing", + "service_type": "secrets" + }, + "catalog_handler": { + "service_name": "file-system", + "service_type": "catalog" + }, + "experiment_tracker": { + "service_name": "do-nothing", + "service_type": "experiment_tracker" + }, + "pipeline_file": "", + "parameters_file": "", + "configuration_file": "examples/configs/fs-catalog-run_log.yaml", + "tag": "", + "run_id": "simple-turing-0153", + "variables": {}, + "use_cached": false, + "original_run_id": "", + "dag": { + "start_at": "chunk files", + "name": "", + "description": "", + "steps": { + "chunk files": { + "type": "task", + "name": "chunk files", + "next": "iterate and execute", + "on_failure": "", + "executor_config": {}, + "catalog": null, + "max_attempts": 1, + "command": "examples.concepts.map.chunk_files", + "node_name": "chunk files" + }, + "iterate and execute": { + "type": "map", + "name": "iterate and execute", + "is_composite": true, + "next": "success", + "on_failure": "", + "executor_config": {}, + "iterate_on": "chunks", + "iterate_as": "start_index", + "branch": { + "start_at": "execute", + "name": "", + "description": "", + "steps": { + "execute": { + "type": "task", + "name": "execute", + "next": "success", + "on_failure": "", + "executor_config": {}, + "catalog": null, + "max_attempts": 1, + "command": "examples.concepts.map.process_chunk", + "node_name": "execute" + }, + "success": { + "type": "success", + "name": "success" + }, + "fail": { + "type": "fail", + "name": "fail" + } + } + } + }, + "success": { + "type": "success", + "name": "success" + }, + "fail": { + "type": "fail", + "name": "fail" + } + } + }, + "dag_hash": "", + "execution_plan": "chained" + } + } + ``` + + +## Traversal + +A branch of a map step is considered success only if the ```success``` step is reached at the end. +The steps of the pipeline can fail and be handled by [on failure](/concepts/pipeline/#on_failure) and +redirected to ```success``` if that is the desired behavior. + +The map step is considered successful only if all the branches of the step have terminated successfully. + + +## Parameters + +All the tasks defined in the branches of the map pipeline can +[access to parameters and data as usual](/concepts/task). + + +!!! warning + + The parameters can be updated by all the tasks and the last task to execute overwrites + the previous changes. + + Since the order of execution is not guaranteed, its best to avoid mutating the same parameters in + the steps belonging to map step. diff --git a/docs/concepts/nesting.md b/docs/concepts/nesting.md new file mode 100644 index 00000000..cdd3874c --- /dev/null +++ b/docs/concepts/nesting.md @@ -0,0 +1,1353 @@ +As seen from the definitions of [parallel](/concepts/parallel) or [map](/concepts/map), the branches are pipelines +themselves. This allows for deeply nested workflows in **magnus**. + +Technically there is no limit in the depth of nesting but there are some practical considerations. + + +- Not all workflow engines that magnus can transpile the workflow to support deeply nested workflows. +AWS Step functions and Argo workflows support them. + +- Deeply nested workflows are complex to understand and debug during errors. + + +## Example + + +=== "python sdk" + + + You can run this pipeline by ```python examples/concepts/nesting.py``` + + ```python linenums="1" + --8<-- "examples/concepts/nesting.py" + ``` + +=== "yaml" + + You can run this pipeline by ```magnus execute examples/concepts/nesting.yaml``` + + ```yaml linenums="1" + --8<-- "examples/concepts/nesting.yaml" + ``` + +=== "Run log" + +
+ Click to expand! + + ```json + { + "run_id": "bipartite-neumann-1913", + "dag_hash": "", + "use_cached": false, + "tag": "", + "original_run_id": "", + "status": "SUCCESS", + "steps": { + "generate list": { + "name": "generate list", + "internal_name": "generate list", + "status": "SUCCESS", + "step_type": "task", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "99139c3507898c60932ad5d35c08b395399a19f6", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-01-18 19:13:49.748656", + "end_time": "2024-01-18 19:13:49.756826", + "duration": "0:00:00.008170", + "status": "SUCCESS", + "message": "", + "parameters": {} + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [ + { + "name": "generate_list.execution.log", + "data_hash": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", + "catalog_relative_path": "bipartite-neumann-1913/generate_list.execution.log", + "catalog_handler_location": ".catalog", + "stage": "put" + } + ] + }, + "outer most": { + "name": "outer most", + "internal_name": "outer most", + "status": "SUCCESS", + "step_type": "map", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "99139c3507898c60932ad5d35c08b395399a19f6", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [], + "user_defined_metrics": {}, + "branches": { + "outer most.0": { + "internal_name": "outer most.0", + "status": "SUCCESS", + "steps": { + "outer most.0.nested parallel": { + "name": "nested parallel", + "internal_name": "outer most.0.nested parallel", + "status": "SUCCESS", + "step_type": "parallel", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "99139c3507898c60932ad5d35c08b395399a19f6", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [], + "user_defined_metrics": {}, + "branches": { + "outer most.0.nested parallel.a": { + "internal_name": "outer most.0.nested parallel.a", + "status": "SUCCESS", + "steps": { + "outer most.0.nested parallel.a.inner most": { + "name": "inner most", + "internal_name": "outer most.0.nested parallel.a.inner most", + "status": "SUCCESS", + "step_type": "map", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "99139c3507898c60932ad5d35c08b395399a19f6", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [], + "user_defined_metrics": {}, + "branches": { + "outer most.0.nested parallel.a.inner most.0": { + "internal_name": "outer most.0.nested parallel.a.inner most.0", + "status": "SUCCESS", + "steps": { + "outer most.0.nested parallel.a.inner most.0.executable": { + "name": "executable", + "internal_name": "outer most.0.nested parallel.a.inner most.0.executable", + "status": "SUCCESS", + "step_type": "stub", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "99139c3507898c60932ad5d35c08b395399a19f6", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-01-18 19:13:49.997158", + "end_time": "2024-01-18 19:13:49.997172", + "duration": "0:00:00.000014", + "status": "SUCCESS", + "message": "", + "parameters": { + "array": [ + 0, + 1 + ] + } + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [] + }, + "outer most.0.nested parallel.a.inner most.0.success": { + "name": "success", + "internal_name": "outer most.0.nested parallel.a.inner most.0.success", + "status": "SUCCESS", + "step_type": "success", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "99139c3507898c60932ad5d35c08b395399a19f6", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-01-18 19:13:50.060734", + "end_time": "2024-01-18 19:13:50.061345", + "duration": "0:00:00.000611", + "status": "SUCCESS", + "message": "", + "parameters": { + "array": [ + 0, + 1 + ] + } + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [] + } + } + }, + "outer most.0.nested parallel.a.inner most.1": { + "internal_name": "outer most.0.nested parallel.a.inner most.1", + "status": "SUCCESS", + "steps": { + "outer most.0.nested parallel.a.inner most.1.executable": { + "name": "executable", + "internal_name": "outer most.0.nested parallel.a.inner most.1.executable", + "status": "SUCCESS", + "step_type": "stub", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "99139c3507898c60932ad5d35c08b395399a19f6", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-01-18 19:13:50.131067", + "end_time": "2024-01-18 19:13:50.131078", + "duration": "0:00:00.000011", + "status": "SUCCESS", + "message": "", + "parameters": { + "array": [ + 0, + 1 + ] + } + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [] + }, + "outer most.0.nested parallel.a.inner most.1.success": { + "name": "success", + "internal_name": "outer most.0.nested parallel.a.inner most.1.success", + "status": "SUCCESS", + "step_type": "success", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "99139c3507898c60932ad5d35c08b395399a19f6", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-01-18 19:13:50.194038", + "end_time": "2024-01-18 19:13:50.194978", + "duration": "0:00:00.000940", + "status": "SUCCESS", + "message": "", + "parameters": { + "array": [ + 0, + 1 + ] + } + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [] + } + } + } + }, + "data_catalog": [] + }, + "outer most.0.nested parallel.a.success": { + "name": "success", + "internal_name": "outer most.0.nested parallel.a.success", + "status": "SUCCESS", + "step_type": "success", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "99139c3507898c60932ad5d35c08b395399a19f6", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-01-18 19:13:50.263302", + "end_time": "2024-01-18 19:13:50.264215", + "duration": "0:00:00.000913", + "status": "SUCCESS", + "message": "", + "parameters": { + "array": [ + 0, + 1 + ] + } + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [] + } + } + }, + "outer most.0.nested parallel.b": { + "internal_name": "outer most.0.nested parallel.b", + "status": "SUCCESS", + "steps": { + "outer most.0.nested parallel.b.inner most": { + "name": "inner most", + "internal_name": "outer most.0.nested parallel.b.inner most", + "status": "SUCCESS", + "step_type": "map", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "99139c3507898c60932ad5d35c08b395399a19f6", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [], + "user_defined_metrics": {}, + "branches": { + "outer most.0.nested parallel.b.inner most.0": { + "internal_name": "outer most.0.nested parallel.b.inner most.0", + "status": "SUCCESS", + "steps": { + "outer most.0.nested parallel.b.inner most.0.executable": { + "name": "executable", + "internal_name": "outer most.0.nested parallel.b.inner most.0.executable", + "status": "SUCCESS", + "step_type": "stub", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "99139c3507898c60932ad5d35c08b395399a19f6", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-01-18 19:13:50.402511", + "end_time": "2024-01-18 19:13:50.402525", + "duration": "0:00:00.000014", + "status": "SUCCESS", + "message": "", + "parameters": { + "array": [ + 0, + 1 + ] + } + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [] + }, + "outer most.0.nested parallel.b.inner most.0.success": { + "name": "success", + "internal_name": "outer most.0.nested parallel.b.inner most.0.success", + "status": "SUCCESS", + "step_type": "success", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "99139c3507898c60932ad5d35c08b395399a19f6", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-01-18 19:13:50.468196", + "end_time": "2024-01-18 19:13:50.469218", + "duration": "0:00:00.001022", + "status": "SUCCESS", + "message": "", + "parameters": { + "array": [ + 0, + 1 + ] + } + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [] + } + } + }, + "outer most.0.nested parallel.b.inner most.1": { + "internal_name": "outer most.0.nested parallel.b.inner most.1", + "status": "SUCCESS", + "steps": { + "outer most.0.nested parallel.b.inner most.1.executable": { + "name": "executable", + "internal_name": "outer most.0.nested parallel.b.inner most.1.executable", + "status": "SUCCESS", + "step_type": "stub", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "99139c3507898c60932ad5d35c08b395399a19f6", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-01-18 19:13:50.543884", + "end_time": "2024-01-18 19:13:50.543896", + "duration": "0:00:00.000012", + "status": "SUCCESS", + "message": "", + "parameters": { + "array": [ + 0, + 1 + ] + } + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [] + }, + "outer most.0.nested parallel.b.inner most.1.success": { + "name": "success", + "internal_name": "outer most.0.nested parallel.b.inner most.1.success", + "status": "SUCCESS", + "step_type": "success", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "99139c3507898c60932ad5d35c08b395399a19f6", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-01-18 19:13:50.610499", + "end_time": "2024-01-18 19:13:50.611839", + "duration": "0:00:00.001340", + "status": "SUCCESS", + "message": "", + "parameters": { + "array": [ + 0, + 1 + ] + } + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [] + } + } + } + }, + "data_catalog": [] + }, + "outer most.0.nested parallel.b.success": { + "name": "success", + "internal_name": "outer most.0.nested parallel.b.success", + "status": "SUCCESS", + "step_type": "success", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "99139c3507898c60932ad5d35c08b395399a19f6", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-01-18 19:13:50.682749", + "end_time": "2024-01-18 19:13:50.684374", + "duration": "0:00:00.001625", + "status": "SUCCESS", + "message": "", + "parameters": { + "array": [ + 0, + 1 + ] + } + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [] + } + } + } + }, + "data_catalog": [] + }, + "outer most.0.success": { + "name": "success", + "internal_name": "outer most.0.success", + "status": "SUCCESS", + "step_type": "success", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "99139c3507898c60932ad5d35c08b395399a19f6", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-01-18 19:13:50.763079", + "end_time": "2024-01-18 19:13:50.763895", + "duration": "0:00:00.000816", + "status": "SUCCESS", + "message": "", + "parameters": { + "array": [ + 0, + 1 + ] + } + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [] + } + } + }, + "outer most.1": { + "internal_name": "outer most.1", + "status": "SUCCESS", + "steps": { + "outer most.1.nested parallel": { + "name": "nested parallel", + "internal_name": "outer most.1.nested parallel", + "status": "SUCCESS", + "step_type": "parallel", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "99139c3507898c60932ad5d35c08b395399a19f6", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [], + "user_defined_metrics": {}, + "branches": { + "outer most.1.nested parallel.a": { + "internal_name": "outer most.1.nested parallel.a", + "status": "SUCCESS", + "steps": { + "outer most.1.nested parallel.a.inner most": { + "name": "inner most", + "internal_name": "outer most.1.nested parallel.a.inner most", + "status": "SUCCESS", + "step_type": "map", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "99139c3507898c60932ad5d35c08b395399a19f6", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [], + "user_defined_metrics": {}, + "branches": { + "outer most.1.nested parallel.a.inner most.0": { + "internal_name": "outer most.1.nested parallel.a.inner most.0", + "status": "SUCCESS", + "steps": { + "outer most.1.nested parallel.a.inner most.0.executable": { + "name": "executable", + "internal_name": "outer most.1.nested parallel.a.inner most.0.executable", + "status": "SUCCESS", + "step_type": "stub", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "99139c3507898c60932ad5d35c08b395399a19f6", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-01-18 19:13:50.981456", + "end_time": "2024-01-18 19:13:50.981467", + "duration": "0:00:00.000011", + "status": "SUCCESS", + "message": "", + "parameters": { + "array": [ + 0, + 1 + ] + } + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [] + }, + "outer most.1.nested parallel.a.inner most.0.success": { + "name": "success", + "internal_name": "outer most.1.nested parallel.a.inner most.0.success", + "status": "SUCCESS", + "step_type": "success", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "99139c3507898c60932ad5d35c08b395399a19f6", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-01-18 19:13:51.045547", + "end_time": "2024-01-18 19:13:51.046526", + "duration": "0:00:00.000979", + "status": "SUCCESS", + "message": "", + "parameters": { + "array": [ + 0, + 1 + ] + } + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [] + } + } + }, + "outer most.1.nested parallel.a.inner most.1": { + "internal_name": "outer most.1.nested parallel.a.inner most.1", + "status": "SUCCESS", + "steps": { + "outer most.1.nested parallel.a.inner most.1.executable": { + "name": "executable", + "internal_name": "outer most.1.nested parallel.a.inner most.1.executable", + "status": "SUCCESS", + "step_type": "stub", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "99139c3507898c60932ad5d35c08b395399a19f6", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-01-18 19:13:51.116489", + "end_time": "2024-01-18 19:13:51.116501", + "duration": "0:00:00.000012", + "status": "SUCCESS", + "message": "", + "parameters": { + "array": [ + 0, + 1 + ] + } + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [] + }, + "outer most.1.nested parallel.a.inner most.1.success": { + "name": "success", + "internal_name": "outer most.1.nested parallel.a.inner most.1.success", + "status": "SUCCESS", + "step_type": "success", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "99139c3507898c60932ad5d35c08b395399a19f6", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-01-18 19:13:51.180471", + "end_time": "2024-01-18 19:13:51.181726", + "duration": "0:00:00.001255", + "status": "SUCCESS", + "message": "", + "parameters": { + "array": [ + 0, + 1 + ] + } + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [] + } + } + } + }, + "data_catalog": [] + }, + "outer most.1.nested parallel.a.success": { + "name": "success", + "internal_name": "outer most.1.nested parallel.a.success", + "status": "SUCCESS", + "step_type": "success", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "99139c3507898c60932ad5d35c08b395399a19f6", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-01-18 19:13:51.253035", + "end_time": "2024-01-18 19:13:51.254294", + "duration": "0:00:00.001259", + "status": "SUCCESS", + "message": "", + "parameters": { + "array": [ + 0, + 1 + ] + } + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [] + } + } + }, + "outer most.1.nested parallel.b": { + "internal_name": "outer most.1.nested parallel.b", + "status": "SUCCESS", + "steps": { + "outer most.1.nested parallel.b.inner most": { + "name": "inner most", + "internal_name": "outer most.1.nested parallel.b.inner most", + "status": "SUCCESS", + "step_type": "map", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "99139c3507898c60932ad5d35c08b395399a19f6", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [], + "user_defined_metrics": {}, + "branches": { + "outer most.1.nested parallel.b.inner most.0": { + "internal_name": "outer most.1.nested parallel.b.inner most.0", + "status": "SUCCESS", + "steps": { + "outer most.1.nested parallel.b.inner most.0.executable": { + "name": "executable", + "internal_name": "outer most.1.nested parallel.b.inner most.0.executable", + "status": "SUCCESS", + "step_type": "stub", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "99139c3507898c60932ad5d35c08b395399a19f6", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-01-18 19:13:51.399358", + "end_time": "2024-01-18 19:13:51.399368", + "duration": "0:00:00.000010", + "status": "SUCCESS", + "message": "", + "parameters": { + "array": [ + 0, + 1 + ] + } + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [] + }, + "outer most.1.nested parallel.b.inner most.0.success": { + "name": "success", + "internal_name": "outer most.1.nested parallel.b.inner most.0.success", + "status": "SUCCESS", + "step_type": "success", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "99139c3507898c60932ad5d35c08b395399a19f6", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-01-18 19:13:51.465371", + "end_time": "2024-01-18 19:13:51.466805", + "duration": "0:00:00.001434", + "status": "SUCCESS", + "message": "", + "parameters": { + "array": [ + 0, + 1 + ] + } + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [] + } + } + }, + "outer most.1.nested parallel.b.inner most.1": { + "internal_name": "outer most.1.nested parallel.b.inner most.1", + "status": "SUCCESS", + "steps": { + "outer most.1.nested parallel.b.inner most.1.executable": { + "name": "executable", + "internal_name": "outer most.1.nested parallel.b.inner most.1.executable", + "status": "SUCCESS", + "step_type": "stub", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "99139c3507898c60932ad5d35c08b395399a19f6", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-01-18 19:13:51.536944", + "end_time": "2024-01-18 19:13:51.536959", + "duration": "0:00:00.000015", + "status": "SUCCESS", + "message": "", + "parameters": { + "array": [ + 0, + 1 + ] + } + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [] + }, + "outer most.1.nested parallel.b.inner most.1.success": { + "name": "success", + "internal_name": "outer most.1.nested parallel.b.inner most.1.success", + "status": "SUCCESS", + "step_type": "success", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "99139c3507898c60932ad5d35c08b395399a19f6", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-01-18 19:13:51.602562", + "end_time": "2024-01-18 19:13:51.604264", + "duration": "0:00:00.001702", + "status": "SUCCESS", + "message": "", + "parameters": { + "array": [ + 0, + 1 + ] + } + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [] + } + } + } + }, + "data_catalog": [] + }, + "outer most.1.nested parallel.b.success": { + "name": "success", + "internal_name": "outer most.1.nested parallel.b.success", + "status": "SUCCESS", + "step_type": "success", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "99139c3507898c60932ad5d35c08b395399a19f6", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-01-18 19:13:51.676208", + "end_time": "2024-01-18 19:13:51.678050", + "duration": "0:00:00.001842", + "status": "SUCCESS", + "message": "", + "parameters": { + "array": [ + 0, + 1 + ] + } + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [] + } + } + } + }, + "data_catalog": [] + }, + "outer most.1.success": { + "name": "success", + "internal_name": "outer most.1.success", + "status": "SUCCESS", + "step_type": "success", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "99139c3507898c60932ad5d35c08b395399a19f6", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-01-18 19:13:51.760988", + "end_time": "2024-01-18 19:13:51.762012", + "duration": "0:00:00.001024", + "status": "SUCCESS", + "message": "", + "parameters": { + "array": [ + 0, + 1 + ] + } + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [] + } + } + } + }, + "data_catalog": [] + }, + "success": { + "name": "success", + "internal_name": "success", + "status": "SUCCESS", + "step_type": "success", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "99139c3507898c60932ad5d35c08b395399a19f6", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-01-18 19:13:51.863908", + "end_time": "2024-01-18 19:13:51.863975", + "duration": "0:00:00.000067", + "status": "SUCCESS", + "message": "", + "parameters": { + "array": [ + 0, + 1 + ] + } + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [] + } + }, + "parameters": { + "array": [ + 0, + 1 + ] + }, + "run_config": { + "executor": { + "service_name": "local", + "service_type": "executor", + "enable_parallel": false, + "placeholders": {} + }, + "run_log_store": { + "service_name": "buffered", + "service_type": "run_log_store" + }, + "secrets_handler": { + "service_name": "do-nothing", + "service_type": "secrets" + }, + "catalog_handler": { + "service_name": "file-system", + "service_type": "catalog" + }, + "experiment_tracker": { + "service_name": "do-nothing", + "service_type": "experiment_tracker" + }, + "pipeline_file": "", + "parameters_file": "", + "configuration_file": "", + "tag": "", + "run_id": "bipartite-neumann-1913", + "variables": {}, + "use_cached": false, + "original_run_id": "", + "dag": { + "start_at": "generate list", + "name": "", + "description": "", + "steps": { + "generate list": { + "type": "task", + "name": "generate list", + "next": "outer most", + "on_failure": "", + "executor_config": {}, + "catalog": null, + "max_attempts": 1, + "command": "examples.concepts.nesting.generate_list", + "node_name": "generate list" + }, + "outer most": { + "type": "map", + "name": "outer most", + "is_composite": true, + "next": "success", + "on_failure": "", + "executor_config": {}, + "iterate_on": "array", + "iterate_as": "x", + "branch": { + "start_at": "nested parallel", + "name": "", + "description": "", + "steps": { + "nested parallel": { + "type": "parallel", + "name": "nested parallel", + "next": "success", + "on_failure": "", + "executor_config": {}, + "branches": { + "a": { + "start_at": "inner most", + "name": "", + "description": "", + "steps": { + "inner most": { + "type": "map", + "name": "inner most", + "is_composite": true, + "next": "success", + "on_failure": "", + "executor_config": {}, + "iterate_on": "array", + "iterate_as": "y", + "branch": { + "start_at": "executable", + "name": "", + "description": "", + "steps": { + "executable": { + "type": "stub", + "name": "executable", + "next": "success", + "on_failure": "", + "executor_config": {}, + "catalog": null, + "max_attempts": 1 + }, + "success": { + "type": "success", + "name": "success" + }, + "fail": { + "type": "fail", + "name": "fail" + } + } + } + }, + "success": { + "type": "success", + "name": "success" + }, + "fail": { + "type": "fail", + "name": "fail" + } + } + }, + "b": { + "start_at": "inner most", + "name": "", + "description": "", + "steps": { + "inner most": { + "type": "map", + "name": "inner most", + "is_composite": true, + "next": "success", + "on_failure": "", + "executor_config": {}, + "iterate_on": "array", + "iterate_as": "y", + "branch": { + "start_at": "executable", + "name": "", + "description": "", + "steps": { + "executable": { + "type": "stub", + "name": "executable", + "next": "success", + "on_failure": "", + "executor_config": {}, + "catalog": null, + "max_attempts": 1 + }, + "success": { + "type": "success", + "name": "success" + }, + "fail": { + "type": "fail", + "name": "fail" + } + } + } + }, + "success": { + "type": "success", + "name": "success" + }, + "fail": { + "type": "fail", + "name": "fail" + } + } + } + } + }, + "success": { + "type": "success", + "name": "success" + }, + "fail": { + "type": "fail", + "name": "fail" + } + } + } + }, + "success": { + "type": "success", + "name": "success" + }, + "fail": { + "type": "fail", + "name": "fail" + } + } + }, + "dag_hash": "", + "execution_plan": "chained" + } + } + ``` +
diff --git a/docs/concepts/nodes.md b/docs/concepts/nodes.md deleted file mode 100644 index 6c14e3d0..00000000 --- a/docs/concepts/nodes.md +++ /dev/null @@ -1,792 +0,0 @@ -# Nodes - ---- - -Nodes are fundamentally the smallest logical unit of work that you want to execute. Though there is no explicit -guidelines on how big or small a node should be, we advice that the node becomes a part of narrative of the -whole project. - -For example, lets take a scenario where you perform some data cleaning task before you are ready to transform/train -a machine learning model. The data cleaning task could be one single *task* node or single *dag* node -(which internally is a graph) if you have too many steps. The choice is completely yours to make and -depends on the narrative of the project. - -Nodes in magnus can be logically split into 3 types: - -- **Execution**: fundamentally this is a **python function call or Shell command** that you want to call as part of the -pipeline. Task and As-Is node is the only nodes of this type. - -- **Status**: nodes that denote the eventual status of a graph/sub-graph. Success or Fail nodes are -examples of this type. All dag definitions should have **one and exactly one** node of this type and -the status of the dag is basically the type of status node it hits at the end. - -- **Composite**: nodes that are **sub-graphs** by itself. Parallel, Dag and Map are examples of this type and -all three have different use cases. Nesting of composite nodes is possible, though we advise to keep the -nesting simple to promote readability. - ---- -!!! Note - - Node names cannot have . or % in them. - Any valid python string is acceptable as a name of the step. - ---- - -## Task - -The smallest executable of the pipeline or in python language, the function call that you want to call as -part of the the pipeline. In magnus, a task node has the following configuration. - -```yaml -step name: - retry: 1 # Defaults to 1 - type: task - next: - command: - command_type: # Defaults to python - on_failure: # Defaults to None - mode_config: # Defaults to None - catalog: # Defaults to None - compute_data_folder: - get: - put: -``` - -Or via the Python SDK: - -```python -from magnus import Task - -first = Task(name: str, command: str, command_type: str = 'python', - command_config: Optional[dict]=None, catalog: Optional[dict]=None, - mode_config: Optional[dict]=None, retry: int = 1, on_failure: str = '', next_node:str=None) -``` -The name given to the task has the same behavior as the ```step name``` given in the yaml definition. - -### command (required) - -The name of the actual function/shell executable you want to call as part of the pipeline. - -For example, for the following function, the command would be ```my_module.my_cool_function```. - -```python - -# In my_module.py - -def my_cool_function(): - pass -``` - -### command_type (optional) -Defaults to python if nothing is provided. For more information, please refer [command types](../command-types) - -### retry (optional) -The number of attempts to make before failing the node. Default to 1. - -For local executions, this is always be 1 independent of the actual ```retry``` value. -For cloud based implementations, the retry value is passed to the implementation. - -### next (required) -The name of the node in the graph to go if the node succeeds. - -```next``` is optional via SDK as it is assigned during pipeline construction stage. - -### on_failure (optional) -The name of the node in the graph to go if the node fails. -This is optional as we would move to the fail node of the graph if one is not provided. - -On_failure could be an use case where you want to send a failure notification before marking the run as failure. - -### mode_config (optional) -Use this section to pass instructions to the executor. -For example, we can instruct the ```local-container``` executor to use a different docker image to run this step of the -pipeline. - -Example usage of mode_config: - -```yaml -# In config.yaml -executor: - type: local-container - config: - docker_image: python:3.7 - -# In pipeline.yaml -dag: - start_at: Cool function - steps: - Cool function: - type: task - command: my_module.my_cool_function - next: Clean Up - Clean Up: - type: task - command: clean_up.sh - command_type: shell - executor_config: - docker_image: ubuntu:latest - next: Success - Success: - type: success - Fail: - type: fail -``` - -Or the same pipeline via the Python SDK: - -```python -# In pipeline.py -from magnus import Task, Pipeline - -def pipeline(): - first = Task(name='Cool function', command='my_module.my_cool_function') - second = Task(name='Clean Up', command='clean_up.sh', command_type='shell', - executor_config={'docker_image': 'ubunutu:latest'}) - - pipeline = pipeline(name='my pipeline') - pipeline.construct([first, second]) - pipeline.execute(configuration_file='config.yaml') - -if __name__ == '__main__': - pipeline() - -``` - -In the above example, while all the steps except for ```Clean Up``` happen in python3.7 docker image, the ```Clean Up``` -happens in Ubuntu. - -mode_config provides a way for dag to have customizable instructions to the executor. - -### catalog (optional) - -compute_data_folder: The folder where we need to sync-in or sync-out the data to the [catalog](../catalog). -If it is not provided, -it defaults to the global catalog settings. - -get: The files to sync-in from the catalog to the compute data folder, prior execution. - -put: The files to sync-out from the compute data folder to the catalog, post execution. - -Glob pattern naming in get or put are fully supported, internally we use -[Pathlib match function](https://docs.python.org/3/library/pathlib.html#pathlib.PurePath.match) -to match the name to pattern. - -Example catalog settings: -```yaml -catalog: - compute_data_folder: data/ - get: - - '*' - put: - - 'cleaned*' -``` - -or the same in Python SDK: - -```python -from magnus import Task - -catalog = { - 'compute_data_folder' : 'data/', - 'get': ['*'], - 'put': ['cleaned*'] -} - -first = Task(name='Cool function', command='my_module.my_cool_function', catalog=catalog) - -``` - -In this, we sync-in all the files from the catalog to the compute data folder, data prior to the execution and -sync-out all files started with *cleaned* to the catalog after the execution. - -
-Logically, magnus does the following when executing a task: - -1. Check the catalog-get list for any files that have to be synced to compute data folder. -2. Inspect the function call to determine the arguments required to make the function call. - Retrieve them from the parameters or fail if not present. -3. Check if the function call has to executed in case of re-runs. If the previous re-run of the step -was successful, we skip it. -4. Make the actual function call, if we need to, and determine the result. -5. Check the catalog-put list for any files that have to be synced back to catalog from the compute data folder. - - -### next_node: - -In python SDK, you need to provide the next node of the execution using ```next_node``` unless the node ends in -```success``` state. If you want to end the graph execution to fail state, you can use ```next_node='fail'```. - - -## Success - -A status node of the graph. There should be **one and only one** success node per graph. -The traversal of the graph stops at this node with marking the run as success. -In magnus, this node can be configured as: - -```yaml -step name: - type: success -``` - -No other fields are required and should not be provided. - -## Fail - -A status node of the graph. There should be **one and only one** fail node per graph. -The traversal of the graph stops at this node with marking the run as fail. In magnus, this node can be configured as: - -```yaml -step name: - type: fail -``` - -No other fields are required and should not be provided. - -## Parallel - -Parallel node is a composite node that in it-self has sub-graphs. A good example is to construct independent -features of a training data in machine learning experiments. The number of branches in parallel node is static -and pre-determined. Each branch follows the same definition language as the graph. - -The configuration of a parallel node could be done as: - -```yaml -step name: - type: parallel - next: - on_failure: - branches: - branch_a: - ... - branch_b: - ... -``` - ---- -!!! Note - - This is not yet available via Python SDK. - ---- - - -### next (required) -The name of the node in the graph to go if the node succeeds - -### on_failure (optional) -The name of the node in the graph to go if the node fails. -This is optional as we would move to the fail node of the graph if one is not provided. - -on_failure could be an use case where you want to send a failure notification before marking the run as failure. - -### branches (required) - -The branches of the step that you want to parallelize. Each branch follows the same definition as a dag in itself. - -### Example - -```yaml -Feature generation: - type: parallel - next: ML training - branches: - One hot encoding: - start_at: encoder - steps: - encoder: - type: task - next: success_state - command: my_encoder.encode - success_state: - type: success - fail_state: - type: fail - Scaler: - start_at: scale - steps: - scale: - type: task - next: success_state - command: my_scaler.scale - success_state: - type: success - fail_state: - type: fail -``` - -In the example, "One hot encoding" and "Scaler" are two branches that are defined using the same definition -language as a dag and both together form the Feature generation step of the parent dag. - - ---- -!!! Note - - A parallel state in the dag is just a definition, the actual implementation depends upon the mode - and the support for parallelization. ---- - -## Dag - -Dag is a composite node which has one branch defined elsewhere. It is used to logically separate the complex details -of a pipeline into modular units. For example, a typical data science project would have a data gathering, data -cleaning, data transformation, modelling, prediction as steps. And it is understandable that these individual steps -could get complex and require many steps to function. Instead of complicating the parent pipeline, we can abstract the -individual steps into its own dag nodes. - -The configuration of a dag node is: - -```yaml -step name: - type: dag - dag_definition: - next: - on_failure: # optional -``` - ---- -!!! Note - - This is not yet available in Python SDK. - ---- - - -### dag_definition - -The yaml file containing the dag definition in "dag" block of the file. The dag definition should follow the same rules -as any other dag in magnus. - -### next (required) -The name of the node in the graph to go if the node succeeds - -### on_failure (optional) -The name of the node in the graph to go if the node fails. -This is optional as we would move to the fail node of the graph if one is not provided. - - -### Example - -```yaml -# Parent dag -dag: - start_at: Data Cleaning - steps: - Data Cleaning: - type: dag - next: Data Transformation - dag_definition: data-cleaning.yaml - Data Transformation: - type: dag - next: Modelling - dag_definition: data-transformation.yaml - Modelling: - type: dag - next: Success - dag_definition: modelling.yaml - Success: - type: success - Fail: - type: fail -``` - -```yaml -# data-cleaning.yaml -dag: - start_at: Remove numbers - steps: - Remove numbers: - type: task - next: Remove special characters - command: data_cleaning.remove_numbers - Remove special characters: - type: dag - next: Success - command: data_cleaning.remove_special_characters - Success: - type: success - Fail: - type: fail -``` - -In this example, the parent dag only captures the high level tasks required to perform a data science experiment -while the details of how data cleaning should be done are mentioned in data-cleaning.yaml. - - -## Map - -Map is a composite node consisting of one branch that can be iterated over a parameter. A typical use case would be -performing the same data cleaning operation on a bunch of files or the columns of a data frame. The parameter over which -the branch is iterated over should be provided and also be available to the dag at the execution time. - -The configuration of the map node: -```yaml -step name: - type: map - iterate_on: - iterate_as: - next: - on_failure: # Optional - branch: -``` - ---- -!!! Note - - This is not yet available in Python SDK. - ---- - - -### iterate_on (required) -The name of the parameter to iterate on. The parameter should be of type List in python and should be available in the -parameter space. - -### iterate_as (required) -The name of the argument that is expected by the task. - -For example: - -- Set a parameter by name x which is a list [1, 2, 3] -- A python task node as part of the map dag definition expects this argument as x_i as part function signature. -- You should set ```iterate_on``` as ```x``` and ```iterate_as``` as ```x_i``` - -### branch (required) -The branch to iterate over the parameter. The branch definition should follow the same rules as a dag definition. - -### next (required) -The name of the node in the graph to go if the node succeeds - -### on_failure (optional) -The name of the node in the graph to go if the node fails. -This is optional as we would move to the fail node of the graph if one is not provided. - - -### Example - -```yaml -dag: - start_at: List files - steps: - List files: - type: task - next: Clean files - command: my_module.list_files - Clean files: - type: map - next: Success - iterate_on: file_list - iterate_as: file_name - branch: - start_at: Task Clean Files - steps: - Task Clean Files: - type: task - command: my_module.clean_file - next: success - success: - type: success - fail: - type: fail - Success: - type: success - Fail: - type: fail -``` - -In this example dag definition, - -- We start with the step *List files*, that generates a list of files to be cleaned and sets it as a parameter -- The step *Clean files* contains a branch that would be iterated over the list of files found in the previous step. - -To be comprehensive, here is the stub implementations of the python code - -```python -# in my_module.py - -def list_files(): - file_list = ['a', 'b', 'c'] - # do some compute to figure out the actual list of files would be - # By returning a dictionary, you can set parameters that would be available for down stream steps. - return {'file_list' : file_list} - -def clean_file(file_name): - # Retrieve the file or use catalog to retrieve the file and perform the cleaning steps - pass -``` - - ---- -!!! Note - - A map state in the dag is just a definition, the actual implementation depends upon the mode - and the support for parallelization. ---- - -## As-Is - -As-is a convenience node or a designers node. It can be used to mock nodes while designing the overall pipeline design -without implementing anything in *interactive* modes. The same node can be used to render required -templates in *orchestration* modes. - -The configuration of as-is node: - -```yaml -step name: - type: as-is - command: - next: - -``` - -You can have arbitrary attributes assigned to the as-is node. - - -### command (optional) - -The command is purely optional in as-is node and even if one is provided it is not executed. - -### next (required) - -The name of the node in the graph to go if the node succeeds - - -### Example as mock node - -A very typical data science project workflow could be mocked by: - -```yaml -dag: - description: A mocked data science pipeline - start_at: Data Cleaning - steps: - Data Cleaning: - type: as-ias - next: Data Transformation - Data Transformation: - type: as-is - next: Modelling - Modelling: - type: as-is - next: Deploy - Deploy: - type: as-is - next: Success - Success: - type: success - Fail: - type: fail -``` - -In this example, we only wrote a skeleton of the pipeline and none of the steps are actually implemented. - -### Example as template - -Taking the same example, we can imagine that there is an executor which can deploy the trained ML model and requires -a template to be generated as part of the continuos integration. - -```yaml -executor: - type: - -dag: - description: A mocked data science pipeline - start_at: Data Cleaning - steps: - Data Cleaning: - type: task - command: my_module.data_cleaning - next: Data Transformation - Data Transformation: - type: task - command: my_module.data_transformation - next: Modelling - Modelling: - type: task - command: my_module.modelling - next: Deploy - Deploy: - type: as-is - render_string: > - python -m my_module.check_accuracy_threshold - cp models/trained_models to s3:// - - next: Success - Success: - type: success - Fail: - type: fail -``` - -In *interactive* modes the as-is does not do anything and succeeds every time but the same dag in *orchestrated* modes -can render a template that could be part of continuos integration process. - -Data science and ML research teams would thrive in interactive modes, given their experimental nature of work. As-Is -nodes gives a way to do experiments without changing the dag definition once it is ready to be deployed. - -As-is nodes also provide a way to inject scripts as steps for orchestrators that do not support all the -features of magnus. For example, if an orchestrator mode of your liking does not support map state, you can -use as-is to inject a script that behaves like a map state and triggers all the required jobs. - - -## Passing data - ---8<-- [start:how-do-i-pass-simple] - -In magnus, we classify 2 kinds of data sets that can be passed around to down stream steps. - -- Data: Processed files by an upstream step should be available for downstream steps when required. -[Catalog](../catalog) provides the way to do this. - -- Parameters: Any JSON serializable data can be passed to down stream steps. - -### Parameters from command line - - -Initial parameters to the application can be sent in via a parameters file. - -Example: - -```shell -magnus execute --file getting-started.yaml --parameters-file parameters.yaml -``` - -```yaml -# in parameters.yaml -arg1 : test -arg2: dev -``` - -Or via environmental variables: Any environmental variable with prefix ```MAGNUS_PRM_``` is considered as a magnus -parameter. Eg: ```MAGNUS_PRM_arg1=test``` or ```MAGNUS_PRM_arg2=dev```. - ---- -!!! Note - - Parameters via environmental variables over-ride the parameters defined via parameters file. ---- - - - -In this case, arg1 and arg2 are available as parameters to downstream steps. - -### Storing parameters - -Any JSON serializable dictionary returned from a task node is available as parameters to downstream steps. - -Example: - -```python - -def my_cool_function(): - return {'arg1': 'hello', 'arg2': {'hello', 'world'} } - -``` - -Or - -```python - -from magnus import store_parameter - -def my_cool_function(): - store_parameter(arg1='hello', 'arg2'={'hello', 'world'}) - -``` - -Or - -```python -import os -import json - -def my_cool_function(): - os.environ['MAGNUS_PRM_' + 'arg1'] = 'hello' - os.environ['MAGNUS_PRM_' + 'arg2'] = json.dumps({'hello', 'world'}) -``` - -All the three above ways store arg1 and arg2 for downstream steps. - -### Accessing parameters - -Any parameters set either at command line or by upstream nodes can be accessed by: - - -``` python -def my_cool_function(arg1, arg2=None): - pass - -``` -The function is inspected to find all *named* args and provided value if the key exists in the parameters. - -or - -``` python - -import os - -def my_cool_function(): - arg1 = os.environ['MAGNUS_PRM_arg1'] - arg2 = os.environ['MAGNUS_PRM_arg2'] - -``` -or - -``` python - -from magnus import get_parameter - -def my_cool_function(): - arg1 = get_parameter('arg1') - arg2 = get_parameter('arg2') - -``` - -or -``` python -from magnus import get_parameter - -def my_cool_function(): - args = get_parameter() - arg1 = args['arg1'] - arg2 = args['arg2'] - -``` -Calling get_parameter with no key returns all parameters. - ---8<-- [end:how-do-i-pass-simple] - -## Extensions - -[Extensions are being actively developed and can be found here.](https://github.com/AstraZeneca/magnus-extensions) - -You can extend and implement your own ```node_types``` by extending the ```BaseNode``` class. - -The base class has the following methods with only one of the two methods to be implemented for custom implementations. - -If the ```node.is_composite``` is ```True```, implement the ```execute_as_graph``` method. -If the ```node.is_composite``` is ```False```, implement the ```execute``` method. - -```python -# Source code present at magnus/nodes.py ---8<-- "magnus/nodes.py:docs" - -``` - ---- -!!! Note - - The BaseNode has many other methods which are *private* and typically do not need modifications. - The Config datamodel of the custom class should have all the attributes that are required. - ---- - - - -The custom extensions should be registered as part of the namespace: ```nodes``` for it to be -loaded. - -```toml -# For example, as part of your pyproject.toml -[tool.poetry.plugins."nodes"] -"mail" = "YOUR_PACKAGE:MailTeam" -``` diff --git a/docs/concepts/parallel.md b/docs/concepts/parallel.md new file mode 100644 index 00000000..4c1e4ea9 --- /dev/null +++ b/docs/concepts/parallel.md @@ -0,0 +1,578 @@ +Parallel nodes in magnus allows you to run multiple pipelines in parallel and use your compute resources efficiently. + +## Example + +!!! note "Only stubs?" + + All the steps in the below example are ```stubbed``` for convenience. The functionality is similar + even if the steps are execution units like ```tasks``` or any other nodes. + + We support deeply [nested steps](/concepts/nesting). For example, a step in the parallel branch can be a ```map``` which internally + loops over a ```dag``` and so on. Though this functionality is useful, it can be difficult to debug and + understand in large code bases. + +Below is a stubbed out example of a pipeline that trains two models in parallel and create an ensemble model to +do the inference. The models XGBoost and Random Forest (RF model) are trained in parallel and training of the +ensemble model happens only after both models are (successfully) trained. + +=== "Visualisation" + + In the below visualisation, the green lined steps happen in sequence and wait for the previous step to + successfully complete. + + The branches lined in yellow run in parallel to each other but sequential within the branch. + + + + ```mermaid + flowchart TD + + getFeatures([Get Features]):::green + trainStep(Train Models):::green + ensembleModel([Ensemble Modelling]):::green + inference([Run Inference]):::green + success([Success]):::green + + prepareXG([Prepare for XGBoost]):::yellow + trainXG([Train XGBoost]):::yellow + successXG([XGBoost success]):::yellow + prepareXG --> trainXG --> successXG + + trainRF([Train RF model]):::yellow + successRF([RF Model success]):::yellow + trainRF --> successRF + + + getFeatures --> trainStep + trainStep --> prepareXG + trainStep --> trainRF + successXG --> ensembleModel + successRF --> ensembleModel + ensembleModel --> inference + inference --> success + + + classDef yellow stroke:#FFFF00 + classDef green stroke:#0f0 + + + ``` + +=== "Pipeline in yaml" + + ```yaml linenums="1" + --8<-- "examples/concepts/parallel.yaml" + ``` + +=== "python sdk" + + You can run this example by: ```python examples/concepts/parallel.py``` + + ```python linenums="1" + --8<-- "examples/concepts/parallel.py" + ``` + +=== "Run log" + + The step log for the parallel branch ```Train models``` has branches which have similar + structure to a run log. + + ```json linenums="1" + { + "run_id": "savory-pike-0201", + "dag_hash": "", + "use_cached": false, + "tag": "", + "original_run_id": "", + "status": "SUCCESS", + "steps": { + "Get Features": { + "name": "Get Features", + "internal_name": "Get Features", + "status": "SUCCESS", + "step_type": "stub", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "f0a2719001de9be30c27069933e4b4a64a065e2b", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-01-18 02:01:10.978646", + "end_time": "2024-01-18 02:01:10.978665", + "duration": "0:00:00.000019", + "status": "SUCCESS", + "message": "", + "parameters": {} + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [] + }, + "Train Models": { + "name": "Train Models", + "internal_name": "Train Models", + "status": "SUCCESS", + "step_type": "parallel", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "f0a2719001de9be30c27069933e4b4a64a065e2b", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [], + "user_defined_metrics": {}, + "branches": { + "Train Models.XGBoost": { + "internal_name": "Train Models.XGBoost", + "status": "SUCCESS", + "steps": { + "Train Models.XGBoost.Prepare for XGBoost": { + "name": "Prepare for XGBoost", + "internal_name": "Train Models.XGBoost.Prepare for XGBoost", + "status": "SUCCESS", + "step_type": "stub", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "f0a2719001de9be30c27069933e4b4a64a065e2b", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-01-18 02:01:11.132822", + "end_time": "2024-01-18 02:01:11.132840", + "duration": "0:00:00.000018", + "status": "SUCCESS", + "message": "", + "parameters": {} + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [] + }, + "Train Models.XGBoost.Train XGBoost": { + "name": "Train XGBoost", + "internal_name": "Train Models.XGBoost.Train XGBoost", + "status": "SUCCESS", + "step_type": "stub", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "f0a2719001de9be30c27069933e4b4a64a065e2b", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-01-18 02:01:11.216418", + "end_time": "2024-01-18 02:01:11.216430", + "duration": "0:00:00.000012", + "status": "SUCCESS", + "message": "", + "parameters": {} + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [] + }, + "Train Models.XGBoost.success": { + "name": "success", + "internal_name": "Train Models.XGBoost.success", + "status": "SUCCESS", + "step_type": "success", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "f0a2719001de9be30c27069933e4b4a64a065e2b", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-01-18 02:01:11.291222", + "end_time": "2024-01-18 02:01:11.292140", + "duration": "0:00:00.000918", + "status": "SUCCESS", + "message": "", + "parameters": {} + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [] + } + } + }, + "Train Models.RF Model": { + "internal_name": "Train Models.RF Model", + "status": "SUCCESS", + "steps": { + "Train Models.RF Model.Train RF": { + "name": "Train RF", + "internal_name": "Train Models.RF Model.Train RF", + "status": "SUCCESS", + "step_type": "stub", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "f0a2719001de9be30c27069933e4b4a64a065e2b", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-01-18 02:01:11.379438", + "end_time": "2024-01-18 02:01:11.379453", + "duration": "0:00:00.000015", + "status": "SUCCESS", + "message": "", + "parameters": {} + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [] + }, + "Train Models.RF Model.success": { + "name": "success", + "internal_name": "Train Models.RF Model.success", + "status": "SUCCESS", + "step_type": "success", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "f0a2719001de9be30c27069933e4b4a64a065e2b", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-01-18 02:01:11.458716", + "end_time": "2024-01-18 02:01:11.459695", + "duration": "0:00:00.000979", + "status": "SUCCESS", + "message": "", + "parameters": {} + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [] + } + } + } + }, + "data_catalog": [] + }, + "Ensemble Modelling": { + "name": "Ensemble Modelling", + "internal_name": "Ensemble Modelling", + "status": "SUCCESS", + "step_type": "stub", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "f0a2719001de9be30c27069933e4b4a64a065e2b", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-01-18 02:01:11.568072", + "end_time": "2024-01-18 02:01:11.568085", + "duration": "0:00:00.000013", + "status": "SUCCESS", + "message": "", + "parameters": {} + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [] + }, + "Run Inference": { + "name": "Run Inference", + "internal_name": "Run Inference", + "status": "SUCCESS", + "step_type": "stub", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "f0a2719001de9be30c27069933e4b4a64a065e2b", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-01-18 02:01:11.650023", + "end_time": "2024-01-18 02:01:11.650037", + "duration": "0:00:00.000014", + "status": "SUCCESS", + "message": "", + "parameters": {} + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [] + }, + "success": { + "name": "success", + "internal_name": "success", + "status": "SUCCESS", + "step_type": "success", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "f0a2719001de9be30c27069933e4b4a64a065e2b", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-01-18 02:01:11.727802", + "end_time": "2024-01-18 02:01:11.728651", + "duration": "0:00:00.000849", + "status": "SUCCESS", + "message": "", + "parameters": {} + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [] + } + }, + "parameters": {}, + "run_config": { + "executor": { + "service_name": "local", + "service_type": "executor", + "enable_parallel": false, + "placeholders": {} + }, + "run_log_store": { + "service_name": "file-system", + "service_type": "run_log_store" + }, + "secrets_handler": { + "service_name": "do-nothing", + "service_type": "secrets" + }, + "catalog_handler": { + "service_name": "file-system", + "service_type": "catalog" + }, + "experiment_tracker": { + "service_name": "do-nothing", + "service_type": "experiment_tracker" + }, + "pipeline_file": "", + "parameters_file": "", + "configuration_file": "examples/configs/fs-catalog-run_log.yaml", + "tag": "", + "run_id": "savory-pike-0201", + "variables": {}, + "use_cached": false, + "original_run_id": "", + "dag": { + "start_at": "Get Features", + "name": "", + "description": "", + "steps": { + "Get Features": { + "type": "stub", + "name": "Get Features", + "next": "Train Models", + "on_failure": "", + "executor_config": {}, + "catalog": null, + "max_attempts": 1 + }, + "Train Models": { + "type": "parallel", + "name": "Train Models", + "next": "Ensemble Modelling", + "on_failure": "", + "executor_config": {}, + "branches": { + "XGBoost": { + "start_at": "Prepare for XGBoost", + "name": "", + "description": "", + "steps": { + "Prepare for XGBoost": { + "type": "stub", + "name": "Prepare for XGBoost", + "next": "Train XGBoost", + "on_failure": "", + "executor_config": {}, + "catalog": null, + "max_attempts": 1 + }, + "Train XGBoost": { + "type": "stub", + "name": "Train XGBoost", + "next": "success", + "on_failure": "", + "executor_config": {}, + "catalog": null, + "max_attempts": 1 + }, + "success": { + "type": "success", + "name": "success" + }, + "fail": { + "type": "fail", + "name": "fail" + } + } + }, + "RF Model": { + "start_at": "Train RF", + "name": "", + "description": "", + "steps": { + "Train RF": { + "type": "stub", + "name": "Train RF", + "next": "success", + "on_failure": "", + "executor_config": {}, + "catalog": null, + "max_attempts": 1 + }, + "success": { + "type": "success", + "name": "success" + }, + "fail": { + "type": "fail", + "name": "fail" + } + } + } + } + }, + "Ensemble Modelling": { + "type": "stub", + "name": "Ensemble Modelling", + "next": "Run Inference", + "on_failure": "", + "executor_config": {}, + "catalog": null, + "max_attempts": 1 + }, + "Run Inference": { + "type": "stub", + "name": "Run Inference", + "next": "success", + "on_failure": "", + "executor_config": {}, + "catalog": null, + "max_attempts": 1 + }, + "success": { + "type": "success", + "name": "success" + }, + "fail": { + "type": "fail", + "name": "fail" + } + } + }, + "dag_hash": "", + "execution_plan": "chained" + } + } + ``` + + + +All pipelines, nested or parent, have the same structure as defined in +[pipeline definition](/concepts/pipeline). + +The parent pipeline defines a step ```Train models``` which is a parallel step. +The branches, XGBoost and RF model, are pipelines themselves. + +## Traversal + +A branch of a parallel step is considered success only if the ```success``` step is reached at the end. +The steps of the pipeline can fail and be handled by [on failure](/concepts/pipeline/#on_failure) and +redirected to ```success``` if that is the desired behavior. + +The parallel step is considered successful only if all the branches of the step have terminated successfully. + + +## Parameters + +All the tasks defined in the branches of the parallel pipeline can +[access to parameters and data as usual](/concepts/task). + + +!!! warning + + The parameters can be updated by all the tasks and the last task to execute overwrites + the previous changes. + + Since the order of execution is not guaranteed, its best to avoid mutating the same parameters in + the steps belonging to parallel step. diff --git a/docs/concepts/parameters.md b/docs/concepts/parameters.md new file mode 100644 index 00000000..d6967bc9 --- /dev/null +++ b/docs/concepts/parameters.md @@ -0,0 +1,46 @@ +In magnus, ```parameters``` are python data types that can be passed from one ```task``` +to the next ```task```. These parameters can be accessed by the ```task``` either as +environment variables, arguments of the ```python function``` or using the +[API](/interactions). + +## Initial parameters + +The initial parameters of the pipeline can set by using a ```yaml``` file and presented +during execution + +```--parameters-file, -parameters``` while using the [magnus CLI](/usage/#usage) + +or by using ```parameters_file``` with [the sdk](/sdk/#magnus.Pipeline.execute). + +They can also be set using environment variables which override the parameters defined by the file. + +=== "yaml" + + Deeply nested yaml objects are supported. + + ```yaml + --8<-- "examples/concepts/parameters.yaml" + ``` + + +=== "environment variables" + + Any environment variables prefixed with ```MAGNUS_PRM_ ``` are interpreted as + parameters by the ```tasks```. + + The yaml formatted parameters can also be defined as: + + ```shell + export MAGNUS_PRM_spam="hello" + export MAGNUS_PRM_eggs='{"ham": "Yes, please!!"}' + ``` + + Parameters defined by environment variables override parameters defined by + ```yaml```. This can be useful to do a quick experimentation without changing code. + + +## Parameters flow + +Tasks can access and return parameters and the patterns are specific to the +```command_type``` of the task nodes. Please refer to [tasks](/concepts/task) +for more information. diff --git a/docs/concepts/pipeline.md b/docs/concepts/pipeline.md new file mode 100644 index 00000000..4aadf3dd --- /dev/null +++ b/docs/concepts/pipeline.md @@ -0,0 +1,231 @@ +???+ tip inline end "Steps" + + In magnus, a step can be a simple ```task``` or ```stub``` or complex nested pipelines like + ```parallel``` branches, embedded ```dags``` or dynamic workflows. + + In this section, we use ```stub``` for convenience. For more in depth information about other types, + please see the relevant section. + + + +In **magnus**, we use the words + +- ```dag```, ```workflows``` and ```pipeline``` interchangeably. +- ```nodes```, ```steps``` interchangeably. + + +Dag or directed acyclic graphs are a way to define your pipelines. +Its a graph representation of the list of tasks you want to perform and the order of it. + + +
+ + +## Example + +Below is an example pipeline. + + + +=== "yaml" + + ``` yaml linenums="1" + --8<-- "examples/concepts/traversal.yaml" + ``` + + +=== "python" + + ``` python linenums="1" + --8<-- "examples/concepts/traversal.py" + ``` + +
+ +A closer look at the example: + + +## start_at + +- [x] start_at step is the starting node of the traversal. + + +=== "yaml" + + The value should be valid key in ```steps``` + + ```yaml linenums="10" hl_lines="1" + --8<-- "examples/concepts/traversal.yaml:10:12" + ``` + +=== "python" + + The node should be part of ```steps``` + + ```python linenums="32" hl_lines="3" + --8<-- "examples/concepts/traversal.py:32:36" + ``` + +By using a ```parallel``` node as starting node, you can get the behavior of multi-root graph. + +
+ +## Steps + +- [x] Apart from the terminal nodes (```success``` and ```fail```), the pipeline should have at least +one more node. + + + +???+ warning inline end "Step names" + + In magnus, the names of steps should not have ```%``` or ```.``` in them. + + You can name them as descriptive as you want. + + +=== "yaml" + + ```yaml linenums="12" + --8<-- "examples/concepts/traversal.yaml:12:21" + ``` + +=== "python" + + + ```python linenums="14" hl_lines="1-6 19-23" + --8<-- "examples/concepts/traversal.py:14:36" + ``` + +
+ +## Linking + +- [x] All nodes except for ```success``` and ```fail``` nodes need to have a ```next``` +step to execute upon successful execution. + + + + +Visually, the above pipeline can be seen as: + +???+ abstract inline end "Traversal" + + Start at step1. + + If it is successful, go to ```next``` step of the pipeline until we reach the success state. + + Any failure in execution of step would, by default, go to the fail state. + + + +```mermaid +stateDiagram-v2 + state "Start at step 1" as start_at + state "step 2" as step_2 + state "step 3" as step_3 + state "Success" as success + state "Fail" as fail + + + [*] --> start_at + start_at --> step_2 : #9989; + step_2 --> step_3 : #9989; + step_3 --> success : #9989; + start_at --> fail: #10060; + step_2--> fail: #10060; + step_3--> fail: #10060; + success --> [*] + fail --> [*] +``` + + +=== "yaml" + + ```yaml linenums="15" hl_lines="4 7 10" + --8<-- "examples/concepts/traversal.yaml:12:21" + ``` + +=== "python" + + + ```python linenums="14" hl_lines="7-17" + --8<-- "examples/concepts/traversal.py:14:36" + ``` + + +### on failure + +By default, any failure during the execution of step will traverse to ```fail``` node +marking the execution as failed. You can override this behavior by using ```on_failure``` + +=== "yaml" + + ```yaml hl_lines="21" + --8<-- "examples/on-failure.yaml" + ``` + +=== "python" + + ```python hl_lines="10" + --8<-- "examples/on_failure.py" + ``` + +=== "traversal" + + ```mermaid + stateDiagram-v2 + state "Start at step 1" as start_at + state "step 2" as step_2 + state "step 3" as step_3 + state "Success" as success + + + [*] --> start_at + start_at --> step_2 : #10060; + start_at --> step_3 : #9989; + step_3 --> success : #9989; + success --> [*] + ``` + + +
+ +## Terminating +- [x] All pipelines should have one and only one Success and Fail state + +Reaching one of these states as part of traversal indicates the status of the pipeline. + +=== "yaml" + + The type determines the node to be a ```success``` or ``fail`` state. + + The name can be anything that you prefer. + + ``` yaml linenums="1" + --8<-- "examples/concepts/traversal.yaml:22:25" + ``` + +=== "python" + + Setting ```add_terminal_nodes``` to be ```true``` during pipeline creation adds + ```success``` and ```fail``` states with the names success and fail. + + ``` python linenums="1" hl_lines="4" + --8<-- "examples/concepts/traversal.py:31:35" + ``` + + Individual steps can link + + - success state by setting ```terminate_with_success``` to ```True``` + - fail state by setting ```terminate_with_fail``` to ```True``` + + You can, alternatively, create a ```success``` and ```fail``` state and link them together. + + ```python + from magnus import Success, Fail + + success = Success(name="Custom Success") + fail = Fail(name="Custom Failure") + + ``` diff --git a/docs/concepts/run-log-implementations/bufferred.md b/docs/concepts/run-log-implementations/bufferred.md deleted file mode 100644 index 25169227..00000000 --- a/docs/concepts/run-log-implementations/bufferred.md +++ /dev/null @@ -1,25 +0,0 @@ -# Buffered Run Log store - -This Run Log store does not store the logs any where but in memory during the execution of the pipeline. - -When to use: - -- During development phase of the project and there is no need to compare outputs between runs. -- For a quick debug of a run. - -When not to use: - -- When you need to compare outputs between runs or experiments. -- Close to production runs or in production unless you do not want to store any run logs. -- Other than Local compute execution, no other compute modes accept this as a Run Log store. - -## Configuration - -Buffered Run Log store is the default if nothing was provided in the config. - -The configuration is minimal and just needs: - -```yaml -run_log: - type: buffered -``` diff --git a/docs/concepts/run-log-implementations/chunked-fs.md b/docs/concepts/run-log-implementations/chunked-fs.md deleted file mode 100644 index a5f8a16a..00000000 --- a/docs/concepts/run-log-implementations/chunked-fs.md +++ /dev/null @@ -1,34 +0,0 @@ -# File System Run Log store - -This Run Log store stores the run logs on the file system as multiple thread safe files. - -It creates a folder with ```run_id``` of the run and stores the contents of the run log in it. - - -When to use: - -- When you want to compare logs between runs. -- During testing/debugging/developing in local environments. -- Especially useful when you have parallel processing of tasks. - - -When not to use: - -- Only Local and Local Container compute modes accept this as a Run Log Store. - -## Configuration - -The configuration is as follows: - -```yaml -run_log: - type: chunked-fs - config: - log_folder: -``` - -### log_folder - -The location of the folder where you want to write the run logs. - -Defaults to .run_log_store diff --git a/docs/concepts/run-log-implementations/file-system.md b/docs/concepts/run-log-implementations/file-system.md deleted file mode 100644 index 8f44dca2..00000000 --- a/docs/concepts/run-log-implementations/file-system.md +++ /dev/null @@ -1,34 +0,0 @@ -# File System Run Log store - -This Run Log store stores the run logs on the file system as one JSON file. - -The name of the json file is the ```run_id``` of the run. - -When to use: - -- When you want to compare logs between runs. -- During testing/debugging in local environments. - - -When not to use: - -- This Run Log store is not compliant when the pipeline has parallel branches and enabled for a parallel runs. - The results could be inconsistent and not reliable. -- Only Local and Local Container compute modes accept this as a Run Log Store. - -## Configuration - -The configuration is as follows: - -```yaml -run_log: - type: file-system - config: - log_folder: -``` - -### log_folder - -The location of the folder where you want to write the run logs. - -Defaults to .run_log_store diff --git a/docs/concepts/run-log.md b/docs/concepts/run-log.md index cdaf301f..3bb41405 100644 --- a/docs/concepts/run-log.md +++ b/docs/concepts/run-log.md @@ -1,470 +1,1246 @@ # Run Log -In magnus, run log captures all the information required to accurately describe a run. It should not -confused with your application logs, which are project dependent. Independent of the providers of any systems -(compute, secrets, run log, catalog), the structure of the run log would remain the same and should enable -you to compare between runs. +Internally, magnus uses a ```run log``` to keep track of the execution of the pipeline. It +also stores the parameters, experiment tracking metrics and reproducibility information captured during the execution. -To accurately recreate an older run either for debugging purposes or for reproducibility, it should capture all -the variables of the system and their state during the run. For the purple of data science applications, -it boils down to: +It should not be confused with application logs generated during the execution of a ```task``` i.e the stdout and stderr +when running the ```command``` of a task. -- Data: The source of the data and the version of it. -- Code: The code used to run the the experiment and the version of it. -- Environment: The environment the code ran in with all the system installations. -- Configuration: The pipeline definition and the configuration. +## Example -The Run Log helps in storing them systematically for every run with the best possible information on all of the above. +=== "pipeline" -## Structure of Run Log + This is the same example [described in tasks](/concepts/task/#shell). -A typical run log has the following structure, with a few definitions given inline. + tl;dr a pipeline that consumes some initial parameters and passes them + to the next step. Both the steps are ```shell``` based tasks. -```json -{ - "run_id": , - "dag_hash": , # The SHA id of the dag definition - "use_cached": , # True for a re-run, False otherwise - "tag": , # A friendly name given to a group of runs - "original_run_id": , # The run id of the older run in case of a re-run - "status": , - "steps": {}, - "parameters": {}, - "variables": {} -} -``` - -### run_id -Every run in magnus is given a unique ```run_id```. - -Magnus creates one based on the timestamp is one is not provided during the run time. - -During the execution of the run, the ```run_id``` can be obtained in the following ways: - - -```python -from magnus import get_run_id - -def my_function(): - run_id = get_run_id() # Returns the run_id of the current run -``` - - -or using environmental variable ```MAGNUS_RUN_ID```. - -```python -import os - -def my_function(): - run_id = os.environ['MAGNUS_RUN_ID'] # Returns the run_id of the current run -``` - - - - -### dag_hash - -The SHA id of the pipeline itself is stored here. - -In the case of re-run, we check the newly run pipeline hash against the older run to ensure they are the same. You -can force to re-run too if you are aware of the differences. - -### tag - -A friendly name that could be used to group multiple runs together. You can ```group``` multiple runs by the tag to -compare and track the experiments done in the group. - -### status - -A flag to denote the status of the run. The status could be: - -- success : If the graph or sub-graph succeeded, i.e reached the success node. -- fail: If the graph or sub-graph reached the fail node. Please note that a failure of a node does not imply failure of - the graph as you can configure conditional traversal of the nodes. -- processing: A temporary status if any of the nodes are currently being processed. -- triggered: A temporary status if any of the nodes triggered a remote job (in cloud, for example). - -### parameters - -A dictionary of key-value pairs available to all the nodes. - -Any ```kwargs``` present in the function signature, called as part of the pipeline, are resolved against this -dictionary and the values are set during runtime. - -### steps - -steps is a dictionary containing step log for every individual step of the pipeline. The structure of step log is -described below. - -## Structure of Step Log + ```yaml linenums="1" + --8<-- "examples/concepts/task_shell_parameters.yaml" + ``` -Every step of the dag have a corresponding step log. -The general structure follows, with a few explanations given inline. +=== "Run log" -```json -"step name": { - "name": , # The name of the step as given in the dag definition - "internal_name": , # The name of the step log in dot path convention - "status": , - "step_type": , # The type of step as per the dag definition - "message": , # Any message added to step by the run - "mock": , # Is True if the step was skipped in case of a re-run - "code_identities": [ - ], - "attempts": [ - ], - "user_defined_metrics": { - }, - "branches": {}, - "data_catalog": [] -} -``` - -### Naming Step Log -The name of the step log follows a convention, we refer, to as *dot path* convention. - -All the steps of the parent dag have the same exact name as the step name provided in the dag. - -The naming of the steps of the nested branches like parallel, map or dag are given below. -#### parallel step - -The steps of the parallel branch follow parent_step.branch_name.child_step name. - -
-Example - -The step log names are given in-line for ease of reading. -```yaml -dag: - start_at: Simple Step - steps: - Simple Step: # dot path name: Simple Step - type: as-is - next: Parallel - Parallel: # dot path name: Parallel - type: parallel - next: Success - branches: - Branch A: - start_at: Child Step A - steps: - Child Step A: # dot path name: Parallel.Branch A.Child Step A - type: as-is - next: Success - Success: # dot path name: Parallel.Branch A.Success - type: success - Fail: # dot path name: Parallel.Branch A.Fail - type: fail - Branch B: - start_at: Child Step B - steps: - Child Step B: # dot path name: Parallel.Branch B. Child Step B - type: as-is - next: Success - Success: # dot path name: Parallel.Branch B.Success - type: success - Fail: # dot path name: Parallel.Branch B.Fail - type: fail - Success: # dot path name: Success - type: success - Fail: # dot path name: Fail - type: fail -``` - -
-#### dag step - -The steps of the dag branch follow parent_step.branch.child_step_name. -Here *branch* is a special name given to keep the naming always consistent. - -
-Example - -The step log names are given in-line for ease of reading. -```yaml -dag: - start_at: Simple Step - steps: - Simple Step: # dot path name: Simple Step - type: as-is - next: Dag - Dag: # dot path name: Dag - type: dag - next: Success - branch: - steps: - Child Step: # dot path name: Dag.branch.Child Step - type: as-is - next: Success - Success: # dot path name: Dag.branch.Success - type: success - Fail: # dot path name: Dag.branch.Fail - type: fail - Success: # dot path name: Success - type: success - Fail: # dot path name: Fail - type: fail -``` - -
- -#### map step - -The steps of the map branch follow parent_step.{value of iter_variable}.child_step_name. - -
-Example - -```yaml -dag: - start_at: Simple Step - steps: - Simple Step: # dot path name: Simple Step - type: as-is - next: Map - Map: # dot path name: Map - type: map - iterate_on: y - next: Success - branch: - steps: - Child Step: - type: as-is - next: Success - Success: - type: success - Fail: - type: fail - Success: # dot path name: Success - type: success - Fail: # dot path name: Fail - type: fail -``` - -If the value of parameter y turns out to be ['A', 'B'], the step log naming convention would by dynamic and have -Map.A.Child Step, Map.A.Success, Map.A.Fail and Map.B.Child Step, Map.B.Success, Map.B.Fail - -
- - -### status - -A flag to denote the status of the step. The status could be: - -- success : If the step succeeded. -- fail: If the step failed. -- processing: A temporary status if current step is being processed. - - -### code identity - -As part of the log, magnus captures any possible identification of the state of the code and environment. - -This section is only present for *Execution* nodes. - -An example code identity if the code is git controlled - -```json -"code_identities": [ + ```json linenums="1" { - "code_identifier": "1486bd7fbe27d57ff4a9612e8dabe6a914bc4eb5", # Git commit id - "code_identifier_type": "git", # Git - "code_identifier_dependable": true, # A flag to track if git tree is clean - "code_identifier_url": "ssh://git@##################.git", # The remote URL of the repo - "code_identifier_message": "" # Lists all the files that were found to be unclean as per git + "run_id": "devout-jones-0640", + "dag_hash": "9070f0b9c661d4ff7a23647cbe0ed2d461b9a26e", + "use_cached": false, + "tag": "", + "original_run_id": "", + "status": "SUCCESS", + "steps": { + "access initial": { + "name": "access initial", + "internal_name": "access initial", + "status": "SUCCESS", + "step_type": "task", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "ca4c5fbff4148d3862a4738942d4607a9c4f0d88", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2023-12-30 06:40:55.188207", + "end_time": "2023-12-30 06:40:55.202317", + "duration": "0:00:00.014110", + "status": "SUCCESS", + "message": "", + "parameters": { + "spam": "Hello", + "eggs": { + "ham": "Yes, please!!" + } + } + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [ + { + "name": "access_initial.execution.log", + "data_hash": "8a18b647052b3c85020beb2024f2a25289fe955b1421026008521b12cff4f44c", + "catalog_relative_path": "devout-jones-0640/access_initial.execution.log", + "catalog_handler_location": ".catalog", + "stage": "put" + } + ] + }, + "modify initial": { + "name": "modify initial", + "internal_name": "modify initial", + "status": "SUCCESS", + "step_type": "task", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "ca4c5fbff4148d3862a4738942d4607a9c4f0d88", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2023-12-30 06:40:55.266858", + "end_time": "2023-12-30 06:40:55.281405", + "duration": "0:00:00.014547", + "status": "SUCCESS", + "message": "", + "parameters": { + "spam": "Hello", + "eggs": { + "ham": "Yes, please!!" + } + } + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [ + { + "name": "modify_initial.execution.log", + "data_hash": "9dea22c132992504146374f6ac7cfe2f5510da78ca3bb5cc576abcfde0a4da3c", + "catalog_relative_path": "devout-jones-0640/modify_initial.execution.log", + "catalog_handler_location": ".catalog", + "stage": "put" + } + ] + }, + "display again": { + "name": "display again", + "internal_name": "display again", + "status": "SUCCESS", + "step_type": "task", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "ca4c5fbff4148d3862a4738942d4607a9c4f0d88", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2023-12-30 06:40:55.354662", + "end_time": "2023-12-30 06:40:55.366113", + "duration": "0:00:00.011451", + "status": "SUCCESS", + "message": "", + "parameters": { + "spam": "World", + "eggs": { + "ham": "No, Thank you!!" + } + } + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [ + { + "name": "display_again.execution.log", + "data_hash": "9126727342ebef3d3635db294708ad96b49092bf3680da8f38490ea84844c8d4", + "catalog_relative_path": "devout-jones-0640/display_again.execution.log", + "catalog_handler_location": ".catalog", + "stage": "put" + } + ] + }, + "success": { + "name": "success", + "internal_name": "success", + "status": "SUCCESS", + "step_type": "success", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "ca4c5fbff4148d3862a4738942d4607a9c4f0d88", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2023-12-30 06:40:55.431271", + "end_time": "2023-12-30 06:40:55.431327", + "duration": "0:00:00.000056", + "status": "SUCCESS", + "message": "", + "parameters": { + "spam": "Universe", + "eggs": { + "ham": "Maybe, one more.." + } + } + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [] + } + }, + "parameters": { + "spam": "Universe", + "eggs": { + "ham": "Maybe, one more.." + } + }, + "run_config": { + "executor": { + "service_name": "local", + "service_type": "executor", + "enable_parallel": false, + "placeholders": {} + }, + "run_log_store": { + "service_name": "buffered", + "service_type": "run_log_store" + }, + "secrets_handler": { + "service_name": "do-nothing", + "service_type": "secrets" + }, + "catalog_handler": { + "service_name": "file-system", + "service_type": "catalog", + "compute_data_folder": "data" + }, + "experiment_tracker": { + "service_name": "do-nothing", + "service_type": "experiment_tracker" + }, + "pipeline_file": "examples/concepts/task_shell_parameters.yaml", + "parameters_file": "examples/concepts/parameters.yaml", + "configuration_file": null, + "tag": "", + "run_id": "devout-jones-0640", + "variables": {}, + "use_cached": false, + "original_run_id": "", + "dag": { + "start_at": "access initial", + "name": "", + "description": "This is a sample pipeline to show the parameter flow for shell + types.\n\nThe step \"access initial\" just displays the initial parameters + defined in examples/concepts/parameters.yaml\nThe step modify_initial updates + the parameters and sets them back as environment variables.\nThe step + display_again displays the updated parameters from modify_initial and updates + them.\n\n + You can run this pipeline as:\n magnus execute -f + examples/concepts/task_shell_parameters.yaml -p examples/concepts/parameters. + yaml\n", + "internal_branch_name": "", + "steps": { + "access initial": { + "type": "task", + "name": "access initial", + "internal_name": "access initial", + "internal_branch_name": "", + "is_composite": false + }, + "modify initial": { + "type": "task", + "name": "modify initial", + "internal_name": "modify initial", + "internal_branch_name": "", + "is_composite": false + }, + "display again": { + "type": "task", + "name": "display again", + "internal_name": "display again", + "internal_branch_name": "", + "is_composite": false + }, + "success": { + "type": "success", + "name": "success", + "internal_name": "success", + "internal_branch_name": "", + "is_composite": false + }, + "fail": { + "type": "fail", + "name": "fail", + "internal_name": "fail", + "internal_branch_name": "", + "is_composite": false + } + } + }, + "dag_hash": "9070f0b9c661d4ff7a23647cbe0ed2d461b9a26e", + "execution_plan": "chained" + } } -] -``` + ``` + + +In the above example of ```run log``` tab, + +- ```run_id```: Defined in line #2, is a a unique id generated for every execution of the pipeline. +- ```use_cached```: in line #4, is the execution id of an older run that is being restarted in the current execution. +- ```tag```: A user defined label to be attached to an execution of the pipeline to contextually group executions. +This label can also be used to group experiments of experiment tracking tools like +[mlflow](https://mlflow.org/docs/latest/tracking/tracking-api.html#organizing-runs-in-experiments). +- ```status```: In line #7, defines the global status of the execution. ```SUCCESS```, ```PROCESSING``` or ```FAILED``` +are the three possible states. +- ```run_config```: From line #184 to end, capture the configuration used during the +execution. It details the configuration of different services (executor, catalog, secrets +handler etc) and also the pipeline definition. This is the internal representation of the +execution. + + +!!! tip + + The system generated ```run_id``` is always appended with the time of execution. Use this to distinguish + between execution id's during rapid experimentation. + + In the above example, the ```run_id```, "affable-babbage-0545" is executed at 05:45. + + +## parameters + +The final state of parameters are captured at the run log level while individual +[step logs](#step_log) show the parameters at the point of execution of the task. + +In the above example, lines 178-183 show the final parameters at the end of execution. + + +## Step Log + +The step log captures the information about the execution of the steps. It is mapping indexed by the name of the step +in the pipeline and is ordered chronologically by the start time of the execution of the step. + +### Example + +A snippet from the above example: + +```json linenums="1" +"steps": { + "access initial": { + "name": "access initial", + "internal_name": "access initial", + "status": "SUCCESS", + "step_type": "task", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "ca4c5fbff4148d3862a4738942d4607a9c4f0d88", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2023-12-30 06:40:55.188207", + "end_time": "2023-12-30 06:40:55.202317", + "duration": "0:00:00.014110", + "status": "SUCCESS", + "message": "", + "parameters": { + "spam": "Hello", + "eggs": { + "ham": "Yes, please!!" + } + } + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [ + { + "name": "access_initial.execution.log", + "data_hash": "8a18b647052b3c85020beb2024f2a25289fe955b1421026008521b12cff4f44c", + "catalog_relative_path": "devout-jones-0640/access_initial.execution.log", + "catalog_handler_location": ".catalog", + "stage": "put" + } + ] + }, + ... -If the execution was in a container, we also track the docker identity. -For example: - -```json -"code_identities": [ - { - "code_identifier": "1486bd7fbe27d57ff4a9612e8dabe6a914bc4eb5", # Git commit id - "code_identifier_type": "git", # Git - "code_identifier_dependable": true, # A flag to track if git tree is clean - "code_identifier_url": "ssh://git@##################.git", # The remote URL of the repo - "code_identifier_message": "" # Lists all the files that were found to be unclean as per git - }, - { - "code_identifier": "", # Docker image digest - "code_identifier_type": "docker", # Git - "code_identifier_dependable": true, # Always true as docker image id is dependable - "code_identifier_url": "", # The docker registry URL - "code_identifier_message": "" - } -] ``` -### attempts - -An attempt log capturing meta data about the attempt made to execute the node. -This section is only present for *Execution* nodes. - -The structure of attempt log along with inline definitions +- For non-nested steps, the key is the name of the step. For example, the first entry +in the steps mapping is "access initial" which corresponds to the name of the task in +the pipeline. For nested steps, the step log is also nested and shown in more detail for + [parallel](/concepts/parallel), [map](/concepts/map). + +- ```status```: In line #5 is the status of the step with three possible states, +```SUCCESS```, ```PROCESSING``` or ```FAILED``` +- ```step_type```: In line #6, is the type of step, in this case is a ```task```. +- ```message```: in line #7, is a short description of the error if the step failed. +This might not always be useful as a step can fail for many complicate reasons. +- ```code_identities```: We capture the unique identifier of the state of the code for +reproducibility purposes. + + * The ```code_identifier``` is the git sha of the code. + * ```code_identifier_dependable``` indicates if the current branch is clean. Unclean branches makes it hard to + determine the exact state of the code. + * ```code_identifier_message```: Captures the names of the files which have uncommitted changes. + + + It is easy to extend and customize the metrics being captured here. For example, executors like ```local-container``` + or ```argo``` can add the docker container identities as part of the log. + +- ```attempts```: In line #19-34, Is the ordered list of attempts to execute the step. It shows the start time, +end time, duration of the execution and the parameters at the time of execution of the step. + + For example, at the time of executing the step ```access initial```, the parameters are the + ```json + "spam": "Hello", + "eggs": { + "ham": "Yes, please!!" + } + ``` + while for the step, ```display again``` shows the modified parameters: + ```json + "spam": "World", + "eggs": { + "ham": "No, Thank you!!" + } + ``` + +- ```user_defined_metrics```: are any [experiment tracking metrics](/concepts/task/#experiment_tracking) +captured during the execution of the step. + +- ```branches```: This only applies to parallel, map or dag steps and shows the logs captured during the +execution of the branch. +- ```data_catalog```: Captures any data flowing through the tasks by the [catalog](/concepts/catalog). +By default, the execution logs of the task are put in the catalog for easier debugging purposes. + +For example, the below lines from the snippet specifies one entry into the catalog which is the execution log +of the task ```access initial``` and also the hash of the data. ```json -"attempts": [ +"data_catalog": [ { - "attempt_number": 0, # The sequence number of attempt. - "start_time": "", # The start time of the attempt - "end_time": "", # The end time of the attempt - "duration": null, # The duration of the time taken for the command to execute - "status": "", - "parameters": "", # The parameters at that point of execution. - "message": "" # If any exception was raised, this field captures the message of the exception + "name": "access_initial.execution.log", + "data_hash": "8a18b647052b3c85020beb2024f2a25289fe955b1421026008521b12cff4f44c", + "catalog_relative_path": "devout-jones-0640/access_initial.execution.log", + "catalog_handler_location": ".catalog", + "stage": "put" } ] ``` -The status of an attempt log could be one of: - -- success : If the attempt succeeded. -- fail: If the attempt failed. - - -### user defined metrics - -As part of the execution, there is a provision to store metrics in the run log. These metrics would be stored in this -section of the log. - -Example of storing metrics: -```python -# in my_module.py - -from magnus import track_this - -def my_cool_function(): - track_this(number_of_files=102, failed_for=10) - track_this(number_of_incidents={'mean_value':2, 'variance':0.1}) -``` +## Retrying failures -If this function was executed as part of the pipeline, you should see the following in the run log - -``` json -{ - ... - "steps": { - "step name": { - ..., - "number_of_incidents": { - "mean_value": 2, - "variance" : 0.1 - }, - "number_of_files": 102, - "failed_for": 10 - }, - ... - " - }, - ... - } -} -``` +The structure of the run log remains the same independent of the ```executor``` used to execute. +This enables to debug failures during the execution in complex environments to be easily +reproduced in local environments and fixed. -The same could also be acheived without ```import magnus``` by exporting environment variables with prefix of -```MAGNUS_TRACK_``` +!!! note "Shortcomings" -```python -# in my_module.py + Currently, the support is only available for -import os -import json + - non-nested, linear pipelines + - non-chunked run log store -def my_cool_function(): - os.environ['MAGNUS_TRACK_' + 'number_of_files'] = 102 - os.environ['MAGNUS_TRACK_' + 'failed_for'] = 10 + [mocked executor](/configurations/executors/mocked) provides better support in debugging failures. - os.environ['MAGNUS_TRACK_' + 'number_of_incidents'] = json.dumps({'mean_value':2, 'variance':0.1}) -``` +### Example +=== "Argo configuration" -### branches + The configuration file is assumed to be located at: ```examples/configs/argo-config-catalog.yaml``` -If the step was a composite node of type dag or parallel or map, this section is used to store the logs of the branch -which have a structure similar to the Run Log. + ```yaml linenums="1" + --8<-- "examples/configs/argo-config-catalog.yaml" + ``` +=== "Faulty pipeline" -### data catalog + To run the pipeline in argo, change the configuration file from + ```examples/configs/fs-catalog-run_log.yaml``` to + ```examples/configs/argo-config-catalog.yaml``` -Data generated as part of individual steps of the pipeline can use the catalog to make the data available for the -downstream steps or for reproducibility of the run. The catalog metadata is stored here in this section. + ```yaml linenums="1" + --8<-- "examples/retry-fail.yaml" + ``` -The structure of the data catalog is as follows with inline definition. +=== "Run log in Argo" -```json -"data_catalog": - [ - { - "name": "", # The name of the file - "stored_at": "", # The location at which it is stored - "data_hash": "", # The SHA id of the data - "stage": "" # The stage at which the data is cataloged. + ```json linenums="1" + { + "run_id": "toFail", + "dag_hash": "13f7c1b29ebb07ce058305253171ceae504e1683", + "use_cached": false, + "tag": "", + "original_run_id": "", + "status": "PROCESSING", + "steps": { + "Setup": { + "name": "Setup", + "internal_name": "Setup", + "status": "SUCCESS", + "step_type": "task", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "f94e49a4fcecebac4d5eecbb5b691561b08e45c0", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-02-05 22:11:47.213714", + "end_time": "2024-02-05 22:11:47.290352", + "duration": "0:00:00.076638", + "status": "SUCCESS", + "message": "", + "parameters": {} + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [ + { + "name": "Setup.execution.log", + "data_hash": "b709b710424701bd86be1cca36c5ec18f412b6dbb8d4e7729ec10e44319adbaf", + "catalog_relative_path": "toFail/Setup.execution.log", + "catalog_handler_location": "/mnt/catalog", + "stage": "put" + } + ] + }, + "Create Content": { + "name": "Create Content", + "internal_name": "Create Content", + "status": "SUCCESS", + "step_type": "task", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "f94e49a4fcecebac4d5eecbb5b691561b08e45c0", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-02-05 22:12:14.210011", + "end_time": "2024-02-05 22:12:14.225645", + "duration": "0:00:00.015634", + "status": "SUCCESS", + "message": "", + "parameters": {} + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [ + { + "name": "Create_Content.execution.log", + "data_hash": "618e515729e00c7811865306b41e91d698c00577078e75b2e4bcf87ec9669d62", + "catalog_relative_path": "toFail/Create_Content.execution.log", + "catalog_handler_location": "/mnt/catalog", + "stage": "put" + }, + { + "name": "data/hello.txt", + "data_hash": "949a4f1afcea77b4b3f483ebe993e733122fb87b7539a3fc3d6752030be6ad44", + "catalog_relative_path": "toFail/data/hello.txt", + "catalog_handler_location": "/mnt/catalog", + "stage": "put" + } + ] + }, + "Retrieve Content": { + "name": "Retrieve Content", + "internal_name": "Retrieve Content", + "status": "FAIL", + "step_type": "task", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "f94e49a4fcecebac4d5eecbb5b691561b08e45c0", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-02-05 22:12:36.514484", + "end_time": "2024-02-05 22:12:36.985694", + "duration": "0:00:00.471210", + "status": "FAIL", + "message": "Command failed", + "parameters": {} + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [ + { + "name": "data/hello.txt", + "data_hash": "949a4f1afcea77b4b3f483ebe993e733122fb87b7539a3fc3d6752030be6ad44", + "catalog_relative_path": "data/hello.txt", + "catalog_handler_location": "/mnt/catalog", + "stage": "get" + }, + { + "name": "Retrieve_Content.execution.log", + "data_hash": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", + "catalog_relative_path": "toFail/Retrieve_Content.execution.log", + "catalog_handler_location": "/mnt/catalog", + "stage": "put" + } + ] + } + }, + "parameters": {}, + "run_config": { + "executor": { + "service_name": "argo", + "service_type": "executor", + "enable_parallel": false, + "overrides": {}, + "image": "$argo_docker_image", + "expose_parameters_as_inputs": true, + "secrets_from_k8s": [], + "output_file": "argo-pipeline.yaml", + "name": "magnus-dag-", + "annotations": {}, + "labels": {}, + "activeDeadlineSeconds": 172800, + "nodeSelector": null, + "parallelism": null, + "retryStrategy": { + "limit": "0", + "retryPolicy": "Always", + "backoff": { + "duration": "120", + "factor": 2, + "maxDuration": "3600" + } + }, + "max_step_duration_in_seconds": 7200, + "tolerations": null, + "image_pull_policy": "", + "service_account_name": "default-editor", + "persistent_volumes": [ + { + "name": "magnus-volume", + "mount_path": "/mnt" + } + ], + "step_timeout": 14400 + }, + "run_log_store": { + "service_name": "file-system", + "service_type": "run_log_store", + "log_folder": "/mnt/run_log_store" + }, + "secrets_handler": { + "service_name": "do-nothing", + "service_type": "secrets" + }, + "catalog_handler": { + "service_name": "file-system", + "service_type": "catalog", + "catalog_location": "/mnt/catalog" + }, + "experiment_tracker": { + "service_name": "do-nothing", + "service_type": "experiment_tracker" + }, + "pipeline_file": "examples/retry-fail.yaml", + "parameters_file": null, + "configuration_file": "examples/configs/argo-config-catalog.yaml", + "tag": "", + "run_id": "toFail", + "variables": {}, + "use_cached": false, + "original_run_id": "", + "dag": { + "start_at": "Setup", + "name": "", + "description": "This is a simple pipeline that demonstrates retrying failures.\n\n1. Setup: We setup a data folder, we ignore if it is already present\n2. Create Content: We create a \"hello.txt\" and \"put\" the file in catalog\n3. Retrieve Content: We \"get\" the file \"hello.txt\" from the catalog and show the contents\n5. Cleanup: We remove the data folder. Note that this is stubbed to prevent accidental deletion.\n\n\nYou can run this pipeline by:\n magnus execute -f examples/catalog.yaml -c examples/configs/fs-catalog.yaml\n", + "steps": { + "Setup": { + "type": "task", + "name": "Setup", + "next": "Create Content", + "on_failure": "", + "overrides": {}, + "catalog": null, + "max_attempts": 1, + "command_type": "shell", + "command": "mkdir -p data", + "node_name": "Setup" + }, + "Create Content": { + "type": "task", + "name": "Create Content", + "next": "Retrieve Content", + "on_failure": "", + "overrides": {}, + "catalog": { + "get": [], + "put": [ + "data/hello.txt" + ] + }, + "max_attempts": 1, + "command_type": "shell", + "command": "echo \"Hello from magnus\" >> data/hello.txt\n", + "node_name": "Create Content" + }, + "Retrieve Content": { + "type": "task", + "name": "Retrieve Content", + "next": "success", + "on_failure": "", + "overrides": {}, + "catalog": { + "get": [ + "data/hello.txt" + ], + "put": [] + }, + "max_attempts": 1, + "command_type": "shell", + "command": "cat data/hello1.txt", + "node_name": "Retrieve Content" + }, + "success": { + "type": "success", + "name": "success" + }, + "fail": { + "type": "fail", + "name": "fail" + } + } + }, + "dag_hash": "13f7c1b29ebb07ce058305253171ceae504e1683", + "execution_plan": "chained" } - ] -``` - -More information about cataloging is found [here](../catalog). - - -## Configuration - -Configuration of a Run Log Store is as follows: - -```yaml -run_log: - type: - config: -``` - -### type - -The type of run log provider you want. This should be one of the run log types already available. - -Buffered Run Log is provided as default if nothing is given. - -### config - -Any configuration parameters the run log provider accepts. - -## Parameterized definition - -As with any part of the magnus configuration, you can parameterize the configuration of Run Log to switch between -Run Log providers without changing the base definition. - -Please follow the example provided [here](../dag/#parameterized_definition) for more information. - - + } + ``` -## Extensions -You can easily extend magnus to bring in your custom provider, if a default -implementation does not exist or you are not happy with the implementation. +=== "Fixed pipeline in local environment" -[Extensions are being actively developed and can be found here.](https://github.com/AstraZeneca/magnus-extensions) + Bring the run log from K8's volumes to local machine for a retry. -To implement your own run log store, please extend the BaseRunLogStore whose definition is given below. + ```yaml linenums="1" + --8<-- "examples/retry-fixed.yaml" + ``` -```python -# Code can be found in magnus/datastore.py ---8<-- "magnus/datastore.py:docs" -``` +=== "Run log in local" -The BaseRunLogStore depends upon a lot of other DataModels (pydantic datamodels) that capture and store the information. -These can all be found in ```magnus/datastore.py```. You can alternatively ignore all of them and create your own custom -implementation if desired but be aware of internal code dependencies on the structure of the datamodels. -The custom extensions should be registered as part of the namespace: ```run_log_store``` for it to be -loaded. + ```json linenums="1" + { + "run_id": "polynomial-bartik-2226", + "dag_hash": "2beec08fd417134cd3b04599d6684469db4ad176", + "use_cached": true, + "tag": "", + "original_run_id": "toFail", + "status": "SUCCESS", + "steps": { + "Setup": { + "name": "Setup", + "internal_name": "Setup", + "status": "SUCCESS", + "step_type": "task", + "message": "", + "mock": true, + "code_identities": [ + { + "code_identifier": "f94e49a4fcecebac4d5eecbb5b691561b08e45c0", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [] + }, + "Create Content": { + "name": "Create Content", + "internal_name": "Create Content", + "status": "SUCCESS", + "step_type": "task", + "message": "", + "mock": true, + "code_identities": [ + { + "code_identifier": "f94e49a4fcecebac4d5eecbb5b691561b08e45c0", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [] + }, + "Retrieve Content": { + "name": "Retrieve Content", + "internal_name": "Retrieve Content", + "status": "SUCCESS", + "step_type": "task", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "f94e49a4fcecebac4d5eecbb5b691561b08e45c0", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-02-05 22:26:05.366143", + "end_time": "2024-02-05 22:26:05.383790", + "duration": "0:00:00.017647", + "status": "SUCCESS", + "message": "", + "parameters": {} + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [ + { + "name": "data/hello.txt", + "data_hash": "14e0a818c551fd963f9496f5b9e780f741e3ee020456c7d8b761b902fbfa4cb4", + "catalog_relative_path": "data/hello.txt", + "catalog_handler_location": ".catalog", + "stage": "get" + }, + { + "name": "Retrieve_Content.execution.log", + "data_hash": "f7911c18bf8be5131e6f61eecbeaf607758b9bf38a84b237e2aad7497ff46211", + "catalog_relative_path": "polynomial-bartik-2226/Retrieve_Content.execution.log", + "catalog_handler_location": ".catalog", + "stage": "put" + } + ] + }, + "success": { + "name": "success", + "internal_name": "success", + "status": "SUCCESS", + "step_type": "success", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "f94e49a4fcecebac4d5eecbb5b691561b08e45c0", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-02-05 22:26:05.465249", + "end_time": "2024-02-05 22:26:05.466008", + "duration": "0:00:00.000759", + "status": "SUCCESS", + "message": "", + "parameters": {} + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [] + } + }, + "parameters": {}, + "run_config": { + "executor": { + "service_name": "local", + "service_type": "executor", + "enable_parallel": false, + "overrides": {} + }, + "run_log_store": { + "service_name": "file-system", + "service_type": "run_log_store", + "log_folder": ".run_log_store" + }, + "secrets_handler": { + "service_name": "do-nothing", + "service_type": "secrets" + }, + "catalog_handler": { + "service_name": "file-system", + "service_type": "catalog", + "catalog_location": ".catalog" + }, + "experiment_tracker": { + "service_name": "do-nothing", + "service_type": "experiment_tracker" + }, + "pipeline_file": "examples/retry-fixed.yaml", + "parameters_file": null, + "configuration_file": "examples/configs/fs-catalog-run_log.yaml", + "tag": "", + "run_id": "polynomial-bartik-2226", + "variables": { + "argo_docker_image": "harbor.csis.astrazeneca.net/mlops/magnus:latest" + }, + "use_cached": true, + "original_run_id": "toFail", + "dag": { + "start_at": "Setup", + "name": "", + "description": "This is a simple pipeline that demonstrates passing data between steps.\n\n1. Setup: We setup a data folder, we ignore if it is already + present\n2. Create Content: We create a \"hello.txt\" and \"put\" the file in catalog\n3. Clean up to get again: We remove the data folder. Note that this is stubbed + to prevent\n accidental deletion of your contents. You can change type to task to make really run.\n4. Retrieve Content: We \"get\" the file \"hello.txt\" from the + catalog and show the contents\n5. Cleanup: We remove the data folder. Note that this is stubbed to prevent accidental deletion.\n\n\nYou can run this pipeline by:\n + magnus execute -f examples/catalog.yaml -c examples/configs/fs-catalog.yaml\n", + "steps": { + "Setup": { + "type": "task", + "name": "Setup", + "next": "Create Content", + "on_failure": "", + "overrides": {}, + "catalog": null, + "max_attempts": 1, + "command_type": "shell", + "command": "mkdir -p data", + "node_name": "Setup" + }, + "Create Content": { + "type": "task", + "name": "Create Content", + "next": "Retrieve Content", + "on_failure": "", + "overrides": {}, + "catalog": { + "get": [], + "put": [ + "data/hello.txt" + ] + }, + "max_attempts": 1, + "command_type": "shell", + "command": "echo \"Hello from magnus\" >> data/hello.txt\n", + "node_name": "Create Content" + }, + "Retrieve Content": { + "type": "task", + "name": "Retrieve Content", + "next": "success", + "on_failure": "", + "overrides": {}, + "catalog": { + "get": [ + "data/hello.txt" + ], + "put": [] + }, + "max_attempts": 1, + "command_type": "shell", + "command": "cat data/hello.txt", + "node_name": "Retrieve Content" + }, + "success": { + "type": "success", + "name": "success" + }, + "fail": { + "type": "fail", + "name": "fail" + } + } + }, + "dag_hash": "2beec08fd417134cd3b04599d6684469db4ad176", + "execution_plan": "chained" + } + } -```toml -# For example, as part of your pyproject.toml -[tool.poetry.plugins."run_log_store"] -"mlmd" = "YOUR_PACKAGE:MLMDFormat" -``` + ``` + +=== "Diff" + + ```diff + diff .run_log_store/toFail.json .run_log_store/polynomial-bartik-2226.json + 2,4c2,4 + < "run_id": "toFail", + < "dag_hash": "13f7c1b29ebb07ce058305253171ceae504e1683", + < "use_cached": false, + --- + > "run_id": "polynomial-bartik-2226", + > "dag_hash": "2beec08fd417134cd3b04599d6684469db4ad176", + > "use_cached": true, + 6,7c6,7 + < "original_run_id": "", + < "status": "PROCESSING", + --- + > "original_run_id": "toFail", + > "status": "SUCCESS", + 15c15 + < "mock": false, + --- + > "mock": true, + 25,35c25 + < "attempts": [ + < { + < "attempt_number": 1, + < "start_time": "2024-02-05 22:11:47.213714", + < "end_time": "2024-02-05 22:11:47.290352", + < "duration": "0:00:00.076638", + < "status": "SUCCESS", + < "message": "", + < "parameters": {} + < } + < ], + --- + > "attempts": [], + 38,46c28 + < "data_catalog": [ + < { + < "name": "Setup.execution.log", + < "data_hash": "b709b710424701bd86be1cca36c5ec18f412b6dbb8d4e7729ec10e44319adbaf", + < "catalog_relative_path": "toFail/Setup.execution.log", + < "catalog_handler_location": "/mnt/catalog", + < "stage": "put" + < } + < ] + --- + > "data_catalog": [] + 53a36,56 + > "mock": true, + > "code_identities": [ + > { + > "code_identifier": "f94e49a4fcecebac4d5eecbb5b691561b08e45c0", + > "code_identifier_type": "git", + > "code_identifier_dependable": true, + > "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + > "code_identifier_message": "" + > } + > ], + > "attempts": [], + > "user_defined_metrics": {}, + > "branches": {}, + > "data_catalog": [] + > }, + > "Retrieve Content": { + > "name": "Retrieve Content", + > "internal_name": "Retrieve Content", + > "status": "SUCCESS", + > "step_type": "task", + > "message": "", + 67,69c70,72 + < "start_time": "2024-02-05 22:12:14.210011", + < "end_time": "2024-02-05 22:12:14.225645", + < "duration": "0:00:00.015634", + --- + > "start_time": "2024-02-05 22:26:05.366143", + > "end_time": "2024-02-05 22:26:05.383790", + > "duration": "0:00:00.017647", + 79,83c82,86 + < "name": "Create_Content.execution.log", + < "data_hash": "618e515729e00c7811865306b41e91d698c00577078e75b2e4bcf87ec9669d62", + < "catalog_relative_path": "toFail/Create_Content.execution.log", + < "catalog_handler_location": "/mnt/catalog", + < "stage": "put" + --- + > "name": "data/hello.txt", + > "data_hash": "14e0a818c551fd963f9496f5b9e780f741e3ee020456c7d8b761b902fbfa4cb4", + > "catalog_relative_path": "data/hello.txt", + > "catalog_handler_location": ".catalog", + > "stage": "get" + 86,89c89,92 + < "name": "data/hello.txt", + < "data_hash": "949a4f1afcea77b4b3f483ebe993e733122fb87b7539a3fc3d6752030be6ad44", + < "catalog_relative_path": "toFail/data/hello.txt", + < "catalog_handler_location": "/mnt/catalog", + --- + > "name": "Retrieve_Content.execution.log", + > "data_hash": "f7911c18bf8be5131e6f61eecbeaf607758b9bf38a84b237e2aad7497ff46211", + > "catalog_relative_path": "polynomial-bartik-2226/Retrieve_Content.execution.log", + > "catalog_handler_location": ".catalog", + 94,98c97,101 + < "Retrieve Content": { + < "name": "Retrieve Content", + < "internal_name": "Retrieve Content", + < "status": "FAIL", + < "step_type": "task", + --- + > "success": { + > "name": "success", + > "internal_name": "success", + > "status": "SUCCESS", + > "step_type": "success", + 113,117c116,120 + < "start_time": "2024-02-05 22:12:36.514484", + < "end_time": "2024-02-05 22:12:36.985694", + < "duration": "0:00:00.471210", + < "status": "FAIL", + < "message": "Command failed", + --- + > "start_time": "2024-02-05 22:26:05.465249", + > "end_time": "2024-02-05 22:26:05.466008", + > "duration": "0:00:00.000759", + > "status": "SUCCESS", + > "message": "", + 123,138c126 + < "data_catalog": [ + < { + < "name": "data/hello.txt", + < "data_hash": "949a4f1afcea77b4b3f483ebe993e733122fb87b7539a3fc3d6752030be6ad44", + < "catalog_relative_path": "data/hello.txt", + < "catalog_handler_location": "/mnt/catalog", + < "stage": "get" + < }, + < { + < "name": "Retrieve_Content.execution.log", + < "data_hash": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", + < "catalog_relative_path": "toFail/Retrieve_Content.execution.log", + < "catalog_handler_location": "/mnt/catalog", + < "stage": "put" + < } + < ] + --- + > "data_catalog": [] + 144c132 + < "service_name": "argo", + --- + > "service_name": "local", + 147,177c135 + < "overrides": {}, + < "image": "$argo_docker_image", + < "expose_parameters_as_inputs": true, + < "secrets_from_k8s": [], + < "output_file": "argo-pipeline.yaml", + < "name": "magnus-dag-", + < "annotations": {}, + < "labels": {}, + < "activeDeadlineSeconds": 172800, + < "nodeSelector": null, + < "parallelism": null, + < "retryStrategy": { + < "limit": "0", + < "retryPolicy": "Always", + < "backoff": { + < "duration": "120", + < "factor": 2, + < "maxDuration": "3600" + < } + < }, + < "max_step_duration_in_seconds": 7200, + < "tolerations": null, + < "image_pull_policy": "", + < "service_account_name": "default-editor", + < "persistent_volumes": [ + < { + < "name": "magnus-volume", + < "mount_path": "/mnt" + < } + < ], + < "step_timeout": 14400 + --- + > "overrides": {} + 182c140 + < "log_folder": "/mnt/run_log_store" + --- + > "log_folder": ".run_log_store" + 191c149 + < "catalog_location": "/mnt/catalog" + --- + > "catalog_location": ".catalog" + 197c155 + < "pipeline_file": "examples/retry-fail.yaml", + --- + > "pipeline_file": "examples/retry-fixed.yaml", + 199c157 + < "configuration_file": "examples/configs/argo-config-catalog.yaml", + --- + > "configuration_file": "examples/configs/fs-catalog-run_log.yaml", + 201,204c159,164 + < "run_id": "toFail", + < "variables": {}, + < "use_cached": false, + < "original_run_id": "", + --- + > "run_id": "polynomial-bartik-2226", + > "variables": { + > "argo_docker_image": "harbor.csis.astrazeneca.net/mlops/magnus:latest" + > }, + > "use_cached": true, + > "original_run_id": "toFail", + 208c168 + < "description": "This is a simple pipeline that demonstrates retrying failures.\n\n1. Setup: We setup a data folder, we ignore if it is already present\n2. Create Content: We create a \"hello.txt\" and \"put\" the file in catalog\n3. Retrieve Content: We \"get\" the file \"hello.txt\" from the catalog and show the contents\n5. Cleanup: We remove the data folder. Note that this is stubbed to prevent accidental deletion.\n\n\nYou can run this pipeline by:\n magnus execute -f examples/catalog.yaml -c examples/configs/fs-catalog.yaml\n", + --- + > "description": "This is a simple pipeline that demonstrates passing data between steps.\n\n1. Setup: We setup a data folder, we ignore if it is already present\n2. Create Content: We create a \"hello.txt\" and \"put\" the file in catalog\n3. Clean up to get again: We remove the data folder. Note that this is stubbed to prevent\n accidental deletion of your contents. You can change type to task to make really run.\n4. Retrieve Content: We \"get\" the file \"hello.txt\" from the catalog and show the contents\n5. Cleanup: We remove the data folder. Note that this is stubbed to prevent accidental deletion.\n\n\nYou can run this pipeline by:\n magnus execute -f examples/catalog.yaml -c examples/configs/fs-catalog.yaml\n", + 253c213 + < "command": "cat data/hello1.txt", + --- + > "command": "cat data/hello.txt", + 266c226 + < "dag_hash": "13f7c1b29ebb07ce058305253171ceae504e1683", + --- + > "dag_hash": "2beec08fd417134cd3b04599d6684469db4ad176", + ``` + + +## API + +Tasks can access the ```run log``` during the execution of the step +[using the API](/interactions/#magnus.get_run_log). The run log returned by this method is a deep copy +to prevent any modifications. + + +Tasks can also access the ```run_id``` of the current execution either by +[using the API](/interactions/#magnus.get_run_id) or by the environment +variable ```MAGNUS_RUN_ID```. diff --git a/docs/concepts/secrets-implementations/do-nothing.md b/docs/concepts/secrets-implementations/do-nothing.md deleted file mode 100644 index fed2b736..00000000 --- a/docs/concepts/secrets-implementations/do-nothing.md +++ /dev/null @@ -1,14 +0,0 @@ -# Dot Env - -This secrets provider is a dummy secrets manager and the default for magnus. - -If a name is provided, it returns an empty string. If a name is not provided, it returns an empty dictionary. - - -The complete configuration - -```yaml -secrets: - type: do-nothing - -``` diff --git a/docs/concepts/secrets-implementations/dot-env.md b/docs/concepts/secrets-implementations/dot-env.md deleted file mode 100644 index 8021a974..00000000 --- a/docs/concepts/secrets-implementations/dot-env.md +++ /dev/null @@ -1,41 +0,0 @@ -# Dot Env - -This secrets provider uses a file to store secrets. The naming convention for such file is the ```.env``` file. - - ---- -!!! Note - - This secrets provider should only be used for local modes and for development purpose only. - - Please be sure on **NOT** committing these files to your git and if possible, add them to .gitignore. - ---- - -The complete configuration - -```yaml -secrets: - type: dotenv - config: - location: - -``` - -## location - -The location of the file from which the secrets should be loaded. - -Defaults to .env file in the project root directory. - -## Format - -The format of contents of the secrets file should be - -```shell -secret_name=secret_value#Any comment that you want to pass -``` - -Any content after ```#``` is considered a comment and ignored. - -A exception would be raised if the secret naming does not follow these standards. diff --git a/docs/concepts/secrets-implementations/env-secrets-manager.md b/docs/concepts/secrets-implementations/env-secrets-manager.md deleted file mode 100644 index c215be30..00000000 --- a/docs/concepts/secrets-implementations/env-secrets-manager.md +++ /dev/null @@ -1,24 +0,0 @@ -# Dot Env - -This secrets provider the environment as a secrets provider. - -If a name is provided, we look for the secret in the environment. The name is case-sensitive. - -If a name is not provided, we return an empty dictionary. - ---- -!!! Note - - Providing secrets via environment variables poses security risks. Use a secure secrets manager. - - This secrets manager returns an empty dictionary, if a name is not provided, unlike other secret managers. - ---- - -The complete configuration - -```yaml -secrets: - type: env-secrets-manager - -``` diff --git a/docs/concepts/secrets.md b/docs/concepts/secrets.md index b04d2197..2a31d25e 100644 --- a/docs/concepts/secrets.md +++ b/docs/concepts/secrets.md @@ -1,119 +1,69 @@ # Overview -Secrets are essential in making your data science projects secure and collaborative. They could be database credentials, -API keys or any information that need to present at the run-time but invisible at all other times. -Magnus provides a clean interface to access/store secrets and independent of the actual secret provider, -the interface remains the same. +!!! note "Opt out" -As with all modules of magnus, there are many secrets providers and if none fit your needs, it is easier to write -one of your to fit your needs. In magnus, all secrets are key value pairs. + Pipelines need not use the ```secrets``` if the preferred tools of choice is + not implemented in magnus. The default configuration of ```do-nothing``` is no-op by design. + We kindly request to raise a feature request to make us aware of the eco-system. -## Configuration -Configuration of a Secrets is as follows: +Most complex pipelines require secrets to hold sensitive information during task execution. +They could be database credentials, API keys or any information that need to present at +the run-time but invisible at all other times. -```yaml -secrets: - type: - config: -``` - -### type - -The type of secrets provider you want. This should be one of the secrets types already available. - -There is no default secrets provider. - -### config - -Any configuration parameters the secret provider accepts. +Magnus provides a [clean API](/interactions/#magnus.get_secret) to access secrets +and independent of the actual secret provider, the interface remains the same. +A typical example would be a task requiring the database connection string to connect +to a database. -## Interaction with other services -Other service providers, like run log store or catalog, can access the secrets by using the -```executor.secrets_handler``` of ```context``` module during the run time. This could be useful for -constructing connection strings to database or AWS connections. - -For example: - -```python +```python title="Using the secrets API" class CustomObject: @property def connection_object(self): - from magnus.context import executor - secrets = executor.secrets_handler.get_secrets() + from magnus import get_secret + connection_string = get_secret("connection_string") # Do something with the secrets - ``` -## Interaction within code +Please refer to [configurations](/configurations/secrets) for available implementations. -Secrets is the only implementation that requires you to ```import magnus``` in the code to access secrets. -This is mostly to follow the best safety guidelines. +## Example -Once a secret configuration is defined as above, you can access the secret by using ```get_secret``` of magnus. -If a key is provided to the API, we return only the value associated with the secret by the key. -If a key is not provided, we return all the key value secret pairs provided. -The API would raise an exception if a secret by the key requested does not exist. +=== "dotenv format" -Currently, there is no provision to update/edit secrets via code. + The dotenv format for providing secrets. Ideally, this file should not be part of the + version control but present during development phase. + The file is assumed to be present in ```examples/secrets.env``` for this example. -For example if the secret key-value pairs are: + ```shell linenums="1" + --8<-- "examples/secrets.env" + ``` -```yaml -secret_answer: 42 -secret_question: everything -``` - -And for the code: -```python -# In my_module.py -from magnus import get_secret + 1. Shell scripts style are supported. + 2. Key value based format is also supported. -def my_cool_function(): - secret = get_secret('secret_answer') +=== "Example configuration" - all_secrets = get_secret() + Configuration to use the dotenv format file. -``` + ```yaml linenums="1" + --8<-- "examples/configs/dotenv.yaml" + ``` -secret would have a value of ```42``` while all_secrets would be a dictionary -```{'secret_answer': 42, 'secret_question': 'everything'}``` + 1. Use dotenv secrets manager. + 2. Location of the dotenv file, defaults to ```.env``` in project root. -## Parameterized definition +=== "Pipeline in python" -As with any part of the magnus configuration, you can parameterize the configuration of secrets to switch between -providers without changing the base definition. + ```python linenums="1" hl_lines="12-13" + --8<-- "examples/secrets.py" + ``` -Please follow the example provided [here](../dag/#parameterized_definition) for more information. - - -## Extensions - -You can easily extend magnus to bring in your custom provider, if a default -implementation does not exist or you are not happy with the implementation. - -[Extensions are being actively developed and can be found here.](https://github.com/AstraZeneca/magnus-extensions) - -To implement your custom secret class, please extend BaseSecret class of magnus whose definition is given below. - -```python -# Source code found in magnus/secrets.py ---8<-- "magnus/secrets.py:docs" - -``` - -The custom extensions should be registered as part of the namespace: ```secrets``` for it to be -loaded. - -```toml -# For example, as part of your pyproject.toml -[tool.poetry.plugins."secrets"] -"k8s-secrets" = "YOUR_PACKAGE:K8sSecret" -``` + 1. The key of the secret that you want to retrieve. diff --git a/docs/concepts/stub.md b/docs/concepts/stub.md new file mode 100644 index 00000000..a01d9cac --- /dev/null +++ b/docs/concepts/stub.md @@ -0,0 +1,41 @@ +Stub nodes in magnus are just like +[```Pass``` state](https://docs.aws.amazon.com/step-functions/latest/dg/amazon-states-language-pass-state.html) +in AWS Step Functions or ```pass``` in python code. It is a placeholder and useful when you want to debug or +design your pipeline. + +Stub nodes can take arbitrary number of parameters and is always a success. + +## Example + +!!! note annotate inline end "Intuition" + + Designing a pipeline is similar to writing a modular program. Stub nodes are handy to create a placeholder + for some step that will be implemented in the future. + + During debugging, changing a node to ```stub``` will let you focus on the actual bug without having to + execute the additional steps. + + +=== "yaml" + + In the below example, all the steps are ```stub``` nodes. The only required field is + the ```next``` which is needed for graph traversal. As seen in ```step 2``` definition, + they can have arbitrary fields. + + + ``` yaml hl_lines="20-24" + --8<-- "examples/mocking.yaml" + ``` + +=== "python" + + In the below example, all the steps are ```stub``` nodes. + + ``` python hl_lines="21-24" + --8<-- "examples/mocking.py" + ``` + +The only required field is the ```name```, ```next``` which is needed for graph traversal. + +- yaml definition needs ```next``` to be defined as part of the step definition. +- python SDK can define the ```next``` when linking the nodes as part of the pipeline. diff --git a/docs/concepts/task.md b/docs/concepts/task.md new file mode 100644 index 00000000..6d198bd6 --- /dev/null +++ b/docs/concepts/task.md @@ -0,0 +1,522 @@ +Task nodes are the execution units of the pipeline. + +In magnus, a ```command``` in a task node can be [python functions](#python_functions), +[Jupyter notebooks](#notebook) or a [shell scripts](#shell). +All task nodes can take arguments, retrieve and create files/objects and return +arguments, though their access patterns are different. + + +In the below examples, we define a pipeline either using python SDK or yaml format but both are equivalent +and all the pipelines can be expressed in either formats. + + +--- + +## Python functions + +Python is the default ```command type``` of a task node. The ```command``` +should be the dotted path to the python function. + +!!! example "Dotted path" + + Assuming the below project structure: + + - The ```command``` for the ```outer_function``` should be ```outer_functions.outer_function``` + + - The ```command``` for ```inner_function``` should be ```module_inner.inner_functions.inner_function``` + + + ``` + .. + ├── outer_functions.py + │   ├── outer_function() + ├── module_inner + │   ├── inner_functions.py + │   | ├── inner_function() + .. + + ``` + +### Example + + +=== "python" + + !!! tip inline end "Structuring" + + It is best to keep the application specific functions in a different module + than the pipeline definition, if you are using Python SDK. + + In this example, we combined them as one module for convenience. + + You can execute this pipeline using ```examples/concepts/simple.py``` + + ```python linenums="1" hl_lines="4-8" + --8<-- "examples/concepts/simple.py" + ``` + +=== "yaml" + + You can execute this by magnus execute -f examples/concepts/simple.yaml + + ```yaml linenums="1" + --8<-- "examples/concepts/simple.yaml" + ``` + + +### Closer look + + + +Lines 4-8 in the python code defines the function that we want to execute as + part of the pipeline. They are *plain old python functions*. + +The rest of the python code (or yaml) defines and executes a pipeline that executes a task whose ```command``` +is to execute this function. + + +### Fields + +- ```command``` : Should refer to the function in [dotted path notation](#python_functions). +- ```command_type```: Defaults to python and not needed for python task types. +- [next](../pipeline/#linking): is required for any step of the pipeline except for success and fail steps. +- [on_failure](../pipeline/#on_failure): Name of the step to execute if the step fails. +- catalog: Optional required for data access patterns from/to the central storage. + + +### Accessing parameters + +!!! tip "Mutability" + + Functions mutating the input parameters is idiomatic is python. However, functions as part of magnus + pipeline should return the mutated parameters for downstream steps to have access to them. + + For example, unless the function ```mutating_function``` returns the updated parameters, magnus will + not know about the change. + + + ```python + d = {"name": "monty"} + print(d) + ">>> {'name': 'monty'}" + + def mutating_function(input_dict): + input_dict["name"] = "python" + + + mutating_function(d) + print(d) + ">>>{'name': 'python'}" + ``` + + + +Please refer to [Initial Parameters](/concepts/parameters/#initial_parameters) for more information about setting +initial parameters. + +Lets assume that the initial parameters are: + +```yaml +--8<-- "examples/concepts/parameters.yaml" +``` + +- [x] Passing parameters between steps + + +=== "Natively" + + Internally, magnus stores the parameters in serialised json format. + + ### ^^Input arguments to the function^^ + + Any arguments passed into the function should be at the root level of the json object. + Arguments with type annotations will be casted appropriately. + Arguments with no type annotation will be sent in as ```dict```. + + In the below example, in line 13 and 28, arguments ```spam``` and ```eggs``` are at the root level in + the yaml representation and also are annotated in the function signature. They are sent in to the function + as arguments with proper type conversion. + + !!! warning "Annotation" + + Without annotations, magnus cannot determine the type and can cause unexpected behavior. + + This is especially true in distributed executors (eg: argo workflows). + + + ### ^^Output arguments of function^^ + + Only pydantic models are allowed to be return types of a function. There is no need + for any type annotation for return type but is advised for a cleaner code. + + Output arguments are stored in json format by + [model_dump](https://docs.pydantic.dev/latest/concepts/serialization/#modelmodel_dump), + respecting the alias. + + The model structure of the pydantic model would be added to the root structure. This is + useful when you want to add or modify parameters at the root level. For example, line 25 + would update all the initial parameters. + + To update a subset of existing parameters at the root level, you can either create a new model or + use [DynamicModel](https://docs.pydantic.dev/latest/concepts/models/#dynamic-model-creation). + For example, lines 42-45 create a dynamic model to update the ```eggs``` parameter. + + + !!! warning "caution" + + Returning "eggs" in line 42 would result in a new parameter "ham" at the root level + as it looses the nested structure. + + + You can run this example using: ```python run examples/concepts/task_native_parameters.py``` + + ```python linenums="1" + --8<-- "examples/concepts/task_native_parameters.py" + ``` + + +=== "Using the API" + + Magnus also has [python API](/interactions) to access parameters. + + Use [get_parameter](/interactions/#magnus.get_parameter) to access a parameter at the root level. + You can optionally specify the ```type``` by using ```cast_as``` argument to the API. + For example, line 19 would cast ```eggs```parameter into ```EggsModel```. + Native python types do not need any explicit ```cast_as``` argument. + + Use [set_parameter](/interactions/#magnus.set_parameter) to set parameters at the root level. + Multiple parameters can be set at the same time, for example, line 26 would set both the ```spam``` + and ```eggs``` in a single call. + + The pydantic models would be serialised to json format using + [model_dump](https://docs.pydantic.dev/latest/concepts/serialization/#modelmodel_dump), respecting the alias. + + + You can run this example by: ```python run examples/concepts/task_api_parameters.py``` + + ```python linenums="1" + --8<-- "examples/concepts/task_api_parameters.py" + ``` + +=== "Using environment variables" + + Any environment variable with ```MAGNUS_PRM_``` is understood to be a parameter in magnus. + + Before the execution of the ```command```, all the parameters at the root level are set as environment variables + with the key prefixed by ```MAGNUS_PRM_```. Python functions that are called during the execution of the command + can also access them as environment variables. + + After the execution of the ```command```, the environment is "scanned" again to identify changes to the existing + variables prefixed by ```MAGNUS_PRM_```. All updated variables are stored at the root level. + + Parameters set by environment variables over-ride the parameters defined by the initial parameters which can be + handy to quickly experiment without modifying code or to dynamically adjust behavior when running in + orchestrators like Argo or AWS step functions. + + You can run this example by: ```python run examples/concepts/task_env_parameters.py``` + + ```python linenums="1" + --8<-- "examples/concepts/task_env_parameters.py" + ``` + + + +!!! abstract "Verbose?" + + We acknowledge that using pydantic models as our + [Data transfer objects](https://stackoverflow.com/questions/1051182/what-is-a-data-transfer-object-dto) is verbose in comparison to using + ```dict```. + + The advantages of using strongly typed DTO has long term advantages of implicit validation, typing hints + in editors. This choice is inspired from [FastAPI's](https://fastapi.tiangolo.com/features/#pydantic-features) + ways of working. + + +### Passing data and execution logs + +Please refer to [catalog](/concepts/catalog) for more details and examples on passing +data between tasks and the storage of execution logs. + +--- + +## Notebook + + +Jupyter notebooks are supported as part of a task step. We internally use +[Ploomber engine](https://github.com/ploomber/ploomber-engine) for executing notebooks. + +The output is saved to the same location as the input notebook but with ```_out``` post-fixed to +the name of the notebook. This is configurable by ```notebook_output_path``` +while defining the task either via yaml or the sdk. + +The output notebook is also saved in the ```catalog``` for logging and ease of debugging. + +### Example + +=== "Notebook" + + The below is a simple notebook for demonstration. + + Below is just a screenshot, the original notebook can be found at ```examples/concepts/simple_notebook.yaml```. + + +
+ ![Image title](/assets/screenshots/simple_notebook.png){ width="800" height="600"} +
+
+ +=== "Pipeline" + + The same pipeline can also be defined via the SDK. + + ```yaml linenums="1" + --8<-- "examples/concepts/simple_notebook.yaml" + ``` + +### Closer look + +The structure of the pipeline remains the same as with any ```task```. In the pipeline definition, + ```command_type``` in line number 19 specifies the type of the task to be a notebook + while the ```command``` in line number 20 specifies the location of the notebook relative to the project root. + +The notebook executed in the same shell session, thanks to ploomber engine, so any libraries installed in +the current project are readily available. + + +### Fields + +- ```command``` : Should be the location of the notebook relative to the project root. It should end with ```.ipynb```. +- ```command_type```: Should be ```notebook``` to execute notebooks. +- ```notebook_output_path```: the location of the executed notebook. Defaults to the +notebook name defined in ```command``` with ```_out``` post-fixed. The location should be relative +to the project root and also would be stored in catalog in the same location. +- [next](/concepts/pipeline/#linking): is required for any step of the pipeline except for success and fail steps. +- [on_failure](/concepts/pipeline/#on_failure): Name of the step to execute if the step fails. +- catalog: Optional required for data access patterns from/to the central storage. + +### ploomber arguments + +Please refer to +[ploomber arguments](https://engine.ploomber.io/en/latest/api/api.html#execute-notebook) +for available arguments. During the execution of the notebook, we set + +- input_path: To refer to command +- output_path: to refer to notebook_output_path. +- parameters: To the dictionary of available parameters. +- log_output: True. +- progress_bar: False + +You can set additional arguments or override these by sending an optional dictionary +```optional_ploomber_args```. + + + + + +### Accessing parameters + +Please refer to [Initial Parameters](/concepts/parameters/#initial_parameters) for more information about setting +initial parameters. + +Assume that the initial parameters are: + +```yaml +--8<-- "examples/concepts/parameters.yaml" +``` + +- [x] Passing parameters between steps + +=== "Natively" + + !!! note + + The actual notebook is available in examples/concepts/notebook_native_parameters.ipynb. Below are some + screenshots to show the detail. + + + === "pipeline definition" + + ```yaml title="Pipeline definition" + --8<-- "examples/concepts/notebook_native_parameters.yaml" + ``` + + === "Notebook" + +
+ ![Image title](/assets/screenshots/notebook_native_parameters.png){ width="800" height="600"} +
+
+ + + ### ^^Input parameters to the notebook^^ + + Input parameters to the notebook are "injected" into the notebook by tagging the cell as "parameters". + Please refer to + [Ploomber engine](https://engine.ploomber.io/en/latest/user-guide/running.html#parametrizing-notebooks) + for more details. + + + For example, the initial parameters will be passed to the notebook as shown below. + +
+ ![Image title](/assets/screenshots/notebook_input_parameters.png){ width="800" height="600"} +
+
+ + The cell with the ```parameters``` tag will be introspected and variables defined in that cell would be + replaced with the variables passed into the notebook during execution. + + Nested parameters will be sent in as a ```dict ```. + + ### ^^Output parameters from the notebook^^ + + Similar to the input parameters, outputs from the notebook ca be indicated by tagging the cell. Please + ensure The tagged cell should ```print``` the dictionary as the output and nothing else. + + The default ```tag``` to indicate output parameters is ```magnus_output``` but can be configured by + ```output_cell_tag``` while defining the task in both SDK and yaml. + + +
+ ![Image title](/assets/screenshots/notebook_output_parameters.png){ width="800" height="600"} +
+
+ + + +=== "Using the API" + + As seen in [python tasks](#python_functions), you can use the python API's to get and set parameters. + + + === "pipeline definition" + + The below pipeline reuses the native parameters notebook to modify the initial parameters, retrieves + them via the ```get_parameter``` API and updates the parameters by ```set_parameter``` API. + + ```yaml title="Pipeline definition" + --8<-- "examples/concepts/notebook_api_parameters.yaml" + ``` + + === "Notebook" + + Below is just a screenshot of the notebook, the original notebook can be found at + ```examples/concepts/notebook_api_parameters.ipynb``` + + +
+ ![Image title](/assets/screenshots/notebook_api_parameters.png){ width="800" height="600"} +
+
+ + + In the output notebook, you might see a cell with a tag ```injected_parameters``` at the + start of the notebook, this should not interfere with the way the notebook behaves. + + +=== "Using environment variables" + + As seen in [python tasks](#python_functions), you can get/set the parameters by using environment variables. + Any variable with prefix ```MAGNUS_PRM_``` is identified to be a parameter. + + === "pipeline definition" + + The below pipeline reuses the native parameters notebook to modify the initial parameters, by using environment variables. + + ```yaml title="Pipeline definition" + --8<-- "examples/concepts/notebook_env_parameters.yaml" + ``` + + === "Notebook" + + Below is just a screenshot of the notebook, the original notebook can be found at + ```examples/concepts/notebook_env_parameters.ipynb``` + + +
+ ![Image title](../assets/screenshots/notebook_env_parameters.png){ width="800" height="600"} +
+
+ + +### Passing data and execution logs + +Please refer to [catalog](/concepts/catalog) for more details and examples on passing +data between tasks and the storage of execution logs. + + +--- + +## Shell + +[Python functions](#python_functions) and [Jupyter notebooks](#notebook) provide a rich interface to the python +ecosystem while shell provides a interface to non-python executables. + +We internally use [Popen](https://docs.python.org/3.8/library/subprocess.html#subprocess.Popen) +to execute the command. + +### Example + +```yaml title="Pipeline definition" +--8<-- "examples/concepts/task_shell_simple.yaml" +``` + +### Fields + +- ```command``` : Should refer to the exact command to execute. Multiple commands can be run by using the ```&&``` delimiter. +- ```command_type```: Should be shell. +- [next](../pipeline/#linking): is required for any step of the pipeline except for success and fail steps. +- [on_failure](../pipeline/#on_failure): Name of the step to execute if the step fails. +- catalog: Optional required for data access patterns from/to the central storage. + + +### Accessing parameters + +Please refer to [Initial Parameters](/concepts/parameters/#initial_parameters) for more information about setting +initial parameters. + +Assuming the initial parameters are: + +```yaml +--8<-- "examples/concepts/parameters.yaml" +``` + +- [x] Passing parameters between steps + +The only way ```shell``` commands can pass parameters between steps is via the ```environment``` variables. +Any environment variable with prefix ```MAGNUS_PRM_``` should be understood as a parameter inside the shell +script/command. Nested parameters are set in json string format. + +To pass parameter to downstream steps, set/update environment variables with ```MAGNUS_PRM_``` prefix. The +execution environment is "scanned" for updated environment variables and stored for downstream steps. + +```yaml linenums="1" +--8<-- "examples/concepts/task_shell_parameters.yaml" +``` + +In the above example, the execution is specified with initial parameters by the ```-p``` option. + +In line 18, we just display the parameters prefixed by ```MAGNUS_PRM_``. The next step ```modify_initial``` +updates the parameters by setting new environment variables in line 26 and 27. + +The next step ```display_again``` displays the updated parameters and updates them for downstream steps in +lines 33-35. + + +!!! note "Output" + + You might notice that the output might have a few extra lines starting with ```MAGNUS```. You can ignore + them as they are generated by internal mechanisms of magnus. + + + + +### Passing data and execution logs + +Please refer to [catalog](/concepts/catalog) for more details and examples on passing +data between tasks and the storage of execution logs. + + +## Experiment tracking + +Please refer to [experiment tracking](/concepts/experiment-tracking) for more details and examples on experiment tracking. diff --git a/docs/concepts/the-big-picture.md b/docs/concepts/the-big-picture.md new file mode 100644 index 00000000..62bd9208 --- /dev/null +++ b/docs/concepts/the-big-picture.md @@ -0,0 +1,224 @@ +Magnus revolves around the concept of pipelines or workflows and tasks that happen within them. + +--- + +A [workflow](/concepts/pipeline) is simply a series of steps that you want to execute for a desired outcome. + +``` mermaid +%%{ init: { 'flowchart': { 'curve': 'linear' } } }%% +flowchart LR + + step1:::green + step1([Step 1]) --> step2:::green + step2([Step 2]) --> step3:::green + step3([Step .. ]) --> step4:::green + step4([Step n]) --> suc([success]):::green + + classDef green stroke:#0f0 + +``` + +To define a workflow, we need: + +- [List of steps](/concepts/pipeline/#steps) +- [starting step](/concepts/pipeline/#start_at) +- Next step + + - [In case of success](/concepts/pipeline/#linking) + - [In case of failure](/concepts/pipeline/#on_failure) + +- [Terminating](/concepts/pipeline/#terminating) + +The workflow can be defined either in ```yaml``` or using the [```python sdk```](/sdk). + +--- + +A step in the workflow can be: + + +=== "task" + + A step in the workflow that does a logical unit work. + + The unit of work can be a [python function](/concepts/task/#python_functions), + a [shell script](/concepts/task/#shell) or a + [notebook](/concepts/task/#notebook). + + All the logs, i.e stderr and stdout or executed notebooks are stored + in [catalog](/concepts/catalog) for easier access and debugging. + + + +=== "stub" + + An [abstract step](/concepts/stub) that is not yet fully implemented. + + For example in python: + + ```python + def do_something(): + pass + ``` + + +=== "parallel" + + A step that has a defined number of [parallel workflows](/concepts/parallel) executing + simultaneously. + + In the below visualisation, the green lined steps happen in sequence and wait for the previous step to + successfully complete. + + The branches lined in yellow run in parallel to each other but sequential within the branch. + + ```mermaid + flowchart TD + + getFeatures([Get Features]):::green + trainStep(Train Models):::green + ensembleModel([Ensemble Modelling]):::green + inference([Run Inference]):::green + success([Success]):::green + + prepareXG([Prepare for XGBoost]):::yellow + trainXG([Train XGBoost]):::yellow + successXG([XGBoost success]):::yellow + prepareXG --> trainXG --> successXG + + trainRF([Train RF model]):::yellow + successRF([RF Model success]):::yellow + trainRF --> successRF + + + getFeatures --> trainStep + trainStep --> prepareXG + trainStep --> trainRF + successXG --> ensembleModel + successRF --> ensembleModel + ensembleModel --> inference + inference --> success + + + classDef yellow stroke:#FFFF00 + classDef green stroke:#0f0 + + + ``` + + +=== "map" + + A step that executes a workflow over an [iterable parameter](/concepts/map). + + The step "chunk files" identifies the number of files to process and computes the start index of every + batch of files to process for a chunk size of 10, the stride. + + "Process Chunk" pipelines are then triggered in parallel to process the chunk of files between ```start index``` + and ```start index + stride``` + + ```mermaid + flowchart TD + chunkify([Chunk files]):::green + success([Success]):::green + + subgraph one[Process Chunk] + process_chunk1([Process Chunk]):::yellow + success_chunk1([Success]):::yellow + + process_chunk1 --> success_chunk1 + end + + subgraph two[Process Chunk] + process_chunk2([Process Chunk]):::yellow + success_chunk2([Success]):::yellow + + process_chunk2 --> success_chunk2 + end + + subgraph three[Process Chunk] + process_chunk3([Process Chunk]):::yellow + success_chunk3([Success]):::yellow + + process_chunk3 --> success_chunk3 + end + + subgraph four[Process Chunk] + process_chunk4([Process Chunk]):::yellow + success_chunk4([Success]):::yellow + + process_chunk4 --> success_chunk4 + end + + subgraph five[Process Chunk] + process_chunk5([Process Chunk]):::yellow + success_chunk5([Success]):::yellow + + process_chunk5 --> success_chunk5 + end + + + + chunkify -- (stride=10, start_index=0)--> one --> success + chunkify -- (stride=10, start_index=10)--> two --> success + chunkify -- (stride=10, start_index=20)--> three --> success + chunkify -- (stride=10, start_index=30)--> four --> success + chunkify -- (stride=10, start_index=40)--> five --> success + + classDef yellow stroke:#FFFF00 + classDef green stroke:#0f0 + ``` + + + +--- + +A [step type of task](/concepts/task) is the functional unit of the pipeline. + +To be useful, it can: + +- Access parameters + + - Either [defined statically](/concepts/parameters/#initial_parameters) at the start of the + pipeline + - Or by [upstream steps](/concepts/parameters/#parameters_flow) + +- [Publish or retrieve artifacts](/concepts/catalog) from/to other steps. +- [Publish metrics](/concepts/experiment-tracking) that are interesting. +- Have [access to secrets](/concepts/secrets). + +All the above functionality is possible either via: + +- Non intrusive ways: Your code does not have anything specific to magnus. + + - Application native way. + - Or via environment variables. + +- Or via the [python API](/interactions) which involves ```importing magnus``` in your code. + +--- + +All executions of the pipeline should be: + +- [Reproducible](/concepts/run-log) for audit and data lineage purposes. +- Runnable at local environments for +[debugging failed runs](/concepts/run-log/#retrying_failures). + +--- + +Executions of pipeline should be scalable and use the infrastructure at +your disposal efficiently. + +Pipelines should be portable between different infrastructure patterns. +Infrastructure patterns change all the time and +so are the demands from the infrastructure. + +We achieve this by [changing configurations](/configurations/overview), rather than +changing the application code. + +For example a pipeline should be able to run: + +- Local machines + local file system for data + database for logs + mlflow for experiment +tracking. +- argo executor + cloud accessible storage for data and logs + mlflow for experiment tracking + +without any change in the code. diff --git a/docs/configurations/catalog.md b/docs/configurations/catalog.md new file mode 100644 index 00000000..a12d5609 --- /dev/null +++ b/docs/configurations/catalog.md @@ -0,0 +1,279 @@ +Catalog provides a way to store and retrieve data generated by the individual steps of the dag to downstream +steps of the dag. Please refer to [concepts](/concepts/catalog) for more detailed information. + + +## do-nothing + +A noop implementation which does nothing. + + +### Configuration + +```yaml linenums="1" +catalog: + type: do-nothing +``` + + +
+ + +## file-system + +In this configuration, the local folder is used a catalog store. The default location is +```.catalog```. Every execution of the pipeline will create a new directory by the ```run_id``` +to store all the generated artifacts. + +### Configuration + + +```yaml linenums="1" +catalog: + type: file-system + config: + catalog_location: .catalog # default value +``` + +### Example + +=== "Configuration" + + ```yaml linenums="1" + --8<-- "examples/configs/fs-catalog.yaml" + ``` + + 1. Use local file-system as catalog, default location is ```.catalog``` + +=== "Pipeline" + + ```python linenums="1" + --8<-- "examples/catalog_simple.py" + ``` + +=== "Catalog structure" + + The files suffixed by ```.execution.log``` are stdout and stderr of the command. + + ``` + .catalog + └── juicy-blackwell-0625 + ├── Create_Content.execution.log + ├── Setup.execution.log + └── data + └── hello.txt + + 3 directories, 3 files + ``` + +=== "Run log entry" + + All the execution logs of steps along with files are stored in the catalog. Please + look at the highlighted lines in the run log. + + ```json linenums="1" hl_lines="38-46 77-92" + { + "run_id": "juicy-blackwell-0625", + "dag_hash": "", + "use_cached": false, + "tag": "", + "original_run_id": "", + "status": "SUCCESS", + "steps": { + "Setup": { + "name": "Setup", + "internal_name": "Setup", + "status": "SUCCESS", + "step_type": "task", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "39cd98770cb2fd6994d8ac08ae4c5506e5ce694a", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-02-04 06:25:26.014967", + "end_time": "2024-02-04 06:25:26.026029", + "duration": "0:00:00.011062", + "status": "SUCCESS", + "message": "", + "parameters": {} + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [ + { + "name": "Setup.execution.log", + "data_hash": "b38eb7b5290ff433276a75fdd7a3935335aedff3ab5ee8714f6ea735d9c9492c", + "catalog_relative_path": "juicy-blackwell-0625/Setup.execution.log", + "catalog_handler_location": ".catalog", + "stage": "put" + } + ] + }, + "Create Content": { + "name": "Create Content", + "internal_name": "Create Content", + "status": "SUCCESS", + "step_type": "task", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "39cd98770cb2fd6994d8ac08ae4c5506e5ce694a", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-02-04 06:25:26.092282", + "end_time": "2024-02-04 06:25:26.100095", + "duration": "0:00:00.007813", + "status": "SUCCESS", + "message": "", + "parameters": {} + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [ + { + "name": "Create_Content.execution.log", + "data_hash": "b38eb7b5290ff433276a75fdd7a3935335aedff3ab5ee8714f6ea735d9c9492c", + "catalog_relative_path": "juicy-blackwell-0625/Create_Content.execution.log", + "catalog_handler_location": ".catalog", + "stage": "put" + }, + { + "name": "data/hello.txt", + "data_hash": "50e75c30352e8ef442b2b5be37dd19533f9334faaf8c4e41f2b528df57d3c20c", + "catalog_relative_path": "juicy-blackwell-0625/data/hello.txt", + "catalog_handler_location": ".catalog", + "stage": "put" + } + ] + }, + "success": { + "name": "success", + "internal_name": "success", + "status": "SUCCESS", + "step_type": "success", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "39cd98770cb2fd6994d8ac08ae4c5506e5ce694a", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-02-04 06:25:26.165278", + "end_time": "2024-02-04 06:25:26.165355", + "duration": "0:00:00.000077", + "status": "SUCCESS", + "message": "", + "parameters": {} + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [] + } + }, + "parameters": {}, + "run_config": { + "executor": { + "service_name": "local", + "service_type": "executor", + "enable_parallel": false, + "overrides": {} + }, + "run_log_store": { + "service_name": "buffered", + "service_type": "run_log_store" + }, + "secrets_handler": { + "service_name": "do-nothing", + "service_type": "secrets" + }, + "catalog_handler": { + "service_name": "file-system", + "service_type": "catalog", + "catalog_location": ".catalog" + }, + "experiment_tracker": { + "service_name": "do-nothing", + "service_type": "experiment_tracker" + }, + "pipeline_file": "", + "parameters_file": "", + "configuration_file": "examples/configs/fs-catalog.yaml", + "tag": "", + "run_id": "juicy-blackwell-0625", + "use_cached": false, + "original_run_id": "", + "dag": { + "start_at": "Setup", + "name": "", + "description": "", + "steps": { + "Setup": { + "type": "task", + "name": "Setup", + "next": "Create Content", + "on_failure": "", + "overrides": {}, + "catalog": null, + "max_attempts": 1, + "command": "mkdir -p data", + "command_type": "shell", + "node_name": "Setup" + }, + "Create Content": { + "type": "task", + "name": "Create Content", + "next": "success", + "on_failure": "", + "overrides": {}, + "catalog": { + "get": [], + "put": [ + "data/hello.txt" + ] + }, + "max_attempts": 1, + "command": "echo \"Hello from magnus\" >> data/hello.txt", + "command_type": "shell", + "node_name": "Create Content" + }, + "success": { + "type": "success", + "name": "success" + }, + "fail": { + "type": "fail", + "name": "fail" + } + } + }, + "dag_hash": "", + "execution_plan": "chained" + } + } + ``` diff --git a/docs/configurations/executors/argo.md b/docs/configurations/executors/argo.md new file mode 100644 index 00000000..bb732e31 --- /dev/null +++ b/docs/configurations/executors/argo.md @@ -0,0 +1,1650 @@ +[Argo workflows](https://argo-workflows.readthedocs.io/en/latest/) is a powerful +container orchestration framework for Kubernetes and it can run on any Kubernetes environment. + +**magnus** will transpile pipeline definition to argo specification during the pipeline execution which +you can then upload to the cluster either manually or via CICD (recommended). + +- [x] Execute the pipeline in any cloud environment. +- [x] Massively scalable. +- [x] Ability to provide specialized compute environments for different steps of the pipeline. +- [ ] Expects a mature cloud kubernetes environment and expertise. + +Magnus provides *sensible* defaults to most of the configuration variables but it is highly advised +to get inputs from infrastructure teams or ML engineers in defining the configuration. + + +## Configuration + +Only ```image``` is the required parameter. Please refer to the +[note on containers](/configurations/executors/container-environments) on building images. + + +```yaml linenums="1" +executor: + type: argo + config: + name: + annotations: + labels: + namespace: + image: + pod_gc: + max_workflow_duration_in_seconds: + node_selector: + parallelism: + service_account_name: + resources: + retry_strategy: + max_step_duration_in_seconds: + tolerations: + image_pull_policy: + expose_parameters_as_inputs: + output_file: + secrets_from_k8s: + persistent_volumes: +``` + + +### Defaults + + +!!! warning "Default values" + + Ensure that these default values fit your needs to avoid unexpected behavior. + +
+ +| Parameter | Default | Argo Field | +| :-----------: | :-------------: | :------------: | +| name | ```magnus-dag-``` | ```generateName``` | +| annotations | ```{}``` | ```annotations``` of ```metadata``` | +| labels | ```{}``` | ```labels``` | +| pod_gc | ```OnPodCompletion``` | ```podGC``` | +| service_account_name | ```None``` | ```serviceAccountName``` of spec | +| secrets_from_k8s | ```[]``` | List +| expose_parameters_as_inputs | True | NA +| max_workflow_duration_in_seconds | 86400 seconds = 1 day | ```activeDeadlineSeconds``` of spec | +| node_selector | ```{}``` | ```nodeSelector``` | +| parallelism | ```None``` | ```parallelism``` of spec | +| resources | limits: 1Gi of memory and 250m of CPU | ```resources``` of the container | +| retry_strategy | ```None``` | ```retryStrategy``` of the spec | +| max_step_duration_in_seconds | 60 * 60 * 2 = 2 hours | ```activeDeadlineSeconds``` of container | +| tolerations | ```{}``` | ```tolerations``` of the container | +| image_pull_policy | ```"" ``` | ```imagePullPolicy``` of the container | +| persistent_volumes | ```None``` | ''' + + + + +
+ +### Notes + +#### The following parameters cannot be overridden at individual step level. + +- ```name```: Using a name provides a logical way to organize pipelines. +- ```pod_gc```: Defines the pod garbage collection strategy. Setting to ```OnPodCompletion``` will mark the +pod for garbage collection immediately after completion, either success or failure. +- ```annotations```: [Unstructured key value pairs](http://kubernetes.io/docs/user-guide/annotations) +that can be added to K8's resources. +- ```labels```: Dictionary of labels to apply to all the objects of the workflow. +- ```service_account_name```: Name of the service account to be used to run the workflow. +- ```max_workflow_duration_in_seconds```: The default value is 1 day for the completion of the workflow. Kubernetes +will actively try to fail the pipeline after this duration. + +!!! tip inline end "Volumes" + + As the persistent volumes are attached to the pod at specified path, it allows for ```file-system``` based + catalog or run log store to work without any modifications. + + For example, ```/mnt``` folder can be used as the + ```parent``` directory for file-system run log store and catalog. + + + +- ```persistent_volumes```: Persistent volumes from the underlying Kubernetes cluster to be assigned to the pods. +You can attach multiple persistent volumes to the pods as long as there are no clashes with mount paths. + +#### Example: + + + +The following adds the volume ```magnus-volume``` to every container of the workflow at ```/mnt``` + +```yaml +persistent_volumes: + - name: magnus-volume + mount_path: /mnt +``` + +- ```secrets_from_k8s```: List of secrets from the Kubernetes cluster to be exposed as environment variables. + +!!! tip inline end "Secrets" + + As the secrets are exposed as environment variables, the application can then be configured using + ```env-secrets-manager``` as a convenient way to access K8's secrets. + + +#### Example: +In the example below, the secret ```connection_string``` from ```postgres``` secret of K8's is exposed as +```connection_string``` to the container. + +```yaml +secrets_from_k8s: + - environment_variable: connection_string + secret_name: postgres + secret_key: connection_string +``` + +- ```expose_parameters_as_inputs```: Expose parameters of simple python data types (str, int, float) +as inputs to the workflow. This allows for changing the parameters at runtime. + +#### Example: + + +=== "Initial Parameters" + + Assumed to present at ```examples/concepts/parameters.yaml``` + + ```yaml + --8<-- "examples/concepts/parameters.yaml" + ``` + + +=== "pipeline" + + Execute the pipeline as: + ```magnus execute -f examples/concepts/task_shell_parameters.yaml -p examples/concepts/parameters.yaml -c examples/configs/argo-config.yaml``` + + ```yaml linenums="1" + --8<-- "examples/concepts/task_shell_parameters.yaml" + ``` + +=== "argo workflow" + + The initial parameter of ```spam``` is exposed and defaulted to ```Hello``` as per the parameters file. + The ```run_id``` is also a configurable run time parameter. + + ```yaml linenums="1" hl_lines="151-156" + apiVersion: argoproj.io/v1alpha1 + kind: Workflow + metadata: + generateName: magnus-dag- + annotations: {} + labels: {} + spec: + activeDeadlineSeconds: 172800 + entrypoint: magnus-dag + podGC: + strategy: OnPodCompletion + retryStrategy: + limit: '0' + retryPolicy: Always + backoff: + duration: '120' + factor: 2 + maxDuration: '3600' + serviceAccountName: default-editor + templates: + - name: magnus-dag + failFast: true + dag: + tasks: + - name: access-initial-task-cybkoa + template: access-initial-task-cybkoa + depends: '' + - name: modify-initial-task-6lka8g + template: modify-initial-task-6lka8g + depends: access-initial-task-cybkoa.Succeeded + - name: display-again-task-6d1ofy + template: display-again-task-6d1ofy + depends: modify-initial-task-6lka8g.Succeeded + - name: success-success-igw6ct + template: success-success-igw6ct + depends: display-again-task-6d1ofy.Succeeded + - name: access-initial-task-cybkoa + container: + image: harbor.csis.astrazeneca.net/mlops/magnus:latest + command: + - magnus + - execute_single_node + - '{{workflow.parameters.run_id}}' + - access%initial + - --log-level + - WARNING + - --file + - examples/concepts/task_shell_parameters.yaml + - --config-file + - examples/configs/argo-config.yaml + - --parameters-file + - examples/concepts/parameters.yaml + volumeMounts: + - name: executor-0 + mountPath: /mnt + imagePullPolicy: '' + resources: + limits: + memory: 1Gi + cpu: 250m + requests: + memory: 1Gi + cpu: 250m + env: + - name: MAGNUS_PRM_spam + value: '{{workflow.parameters.spam}}' + - name: modify-initial-task-6lka8g + container: + image: harbor.csis.astrazeneca.net/mlops/magnus:latest + command: + - magnus + - execute_single_node + - '{{workflow.parameters.run_id}}' + - modify%initial + - --log-level + - WARNING + - --file + - examples/concepts/task_shell_parameters.yaml + - --config-file + - examples/configs/argo-config.yaml + - --parameters-file + - examples/concepts/parameters.yaml + volumeMounts: + - name: executor-0 + mountPath: /mnt + imagePullPolicy: '' + resources: + limits: + memory: 1Gi + cpu: 250m + requests: + memory: 1Gi + cpu: 250m + - name: display-again-task-6d1ofy + container: + image: harbor.csis.astrazeneca.net/mlops/magnus:latest + command: + - magnus + - execute_single_node + - '{{workflow.parameters.run_id}}' + - display%again + - --log-level + - WARNING + - --file + - examples/concepts/task_shell_parameters.yaml + - --config-file + - examples/configs/argo-config.yaml + - --parameters-file + - examples/concepts/parameters.yaml + volumeMounts: + - name: executor-0 + mountPath: /mnt + imagePullPolicy: '' + resources: + limits: + memory: 1Gi + cpu: 250m + requests: + memory: 1Gi + cpu: 250m + - name: success-success-igw6ct + container: + image: harbor.csis.astrazeneca.net/mlops/magnus:latest + command: + - magnus + - execute_single_node + - '{{workflow.parameters.run_id}}' + - success + - --log-level + - WARNING + - --file + - examples/concepts/task_shell_parameters.yaml + - --config-file + - examples/configs/argo-config.yaml + - --parameters-file + - examples/concepts/parameters.yaml + volumeMounts: + - name: executor-0 + mountPath: /mnt + imagePullPolicy: '' + resources: + limits: + memory: 1Gi + cpu: 250m + requests: + memory: 1Gi + cpu: 250m + templateDefaults: + activeDeadlineSeconds: 7200 + timeout: 10800s + arguments: + parameters: + - name: spam + value: Hello + - name: run_id + value: '{{workflow.uid}}' + volumes: + - name: executor-0 + persistentVolumeClaim: + claimName: magnus-volume + + ``` + + +=== "Run Submission" + +
+ ![Image](/assets/screenshots/argo-expose-parameters.png){ width="800" height="600"} +
argo workflows UI exposing the parameters
+
+ + +=== "Step Log" + + The ```step log``` of the first step, ```access initial``` receives the value of the parameter ```spam``` as + ```No-Hello``` from the UI submission. + + + ```json linenums="1" hl_lines="25-30" + { + "name": "access initial", + "internal_name": "access initial", + "status": "SUCCESS", + "step_type": "task", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "39cd98770cb2fd6994d8ac08ae4c5506e5ce694a", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-02-01 14:44:06.023052", + "end_time": "2024-02-01 14:44:06.031187", + "duration": "0:00:00.008135", + "status": "SUCCESS", + "message": "", + "parameters": { + "spam": "No-Hello", + "eggs": { + "ham": "Yes, please!!" + } + } + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [] + } + ``` + + +#### The following parameters can be configured at step level using overrides: + +- parallelism: Controls the number of parallel tasks that can happen at once. By default, +there is no limit either for ```parallel``` or ```map``` nodes. +To control the parallelism of a ```map``` or ```parallel```, provide an ```override``` +in the overrides section. + +The parallelism constraint [only applies to the step](https://github.com/argoproj/argo-workflows/blob/main/examples/parallelism-nested.yaml), any nested steps within the step have the ```default``` parallelism. + + +#### Example: + + +=== "Without Override" + + By default, there is no limit on the number of parallel tasks that can be run. + + === "Configuration" + + The argo config is a very basic configuration. + + ```yaml linenums="1" + --8<-- "examples/configs/argo-config.yaml" + ``` + + === "Pipeline" + + This example is the same as [detailed in map](/concepts/map). + + ```yaml linenums="1" hl_lines="22-23 25-36" + --8<-- "examples/concepts/map.yaml" + ``` + + === "Workflow execution" + + From the ```gant``` chart representation of the workflow execution, we can see that all the ```execute_task``` + tasks execute simultaneously. + +
+ ![Image](/assets/screenshots/argo-parallel-map.png){ width="800" height="600"} +
argo workflows UI exposing the parameters
+
+ + +=== "With Override" + + === "Configuration" + + While the global configuration has no limit on parallelism, any task using ```sequential``` override would + run sequentially. + + ```yaml linenums="1" hl_lines="9-11" + --8<-- "examples/configs/argo-config-sequential.yaml" + ``` + + + === "Pipeline" + + The pipeline defined here is nearly the same as [detailed in map](/concepts/map) with the + only exception in lines 25-26 which use the ```sequential``` override. + + ```yaml linenums="1" hl_lines="22-23 25-36" + --8<-- "examples/executors/argo-map-sequential.yaml" + ``` + + + === "Workflow execution" + + The workflow execution from the ```gant``` chart shows the execution of ```execute task``` is sequential + instead of parallel as seen in the default. + +
+ ![Image](/assets/screenshots/argo-sequential-map.png){ width="800" height="600"} +
argo workflows UI exposing the parameters
+
+ + + +- ```node_selector``` and ```tolerations```: Gives you the ability to selectively choose a node to run your task. +See more information about [node selector](https://kubernetes.io/docs/tasks/configure-pod-container/assign-pods-nodes/) +and [tolerations](https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/) for more information. + + + +- resources: Has the [same structure as K8's manifest](https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/). To use a GPU, you can mention the maximum number of GPUs in ```limits``` section. +The default value is 1Gi of memory and 250m of cpu with no GPU. To override the resources for a specific task, +use ```overrides``` section. + +Example: + +=== "Configuration" + ```yaml title="Argo configuration with override" + executor: + type: argo + config: + image: + overrides: + BiggerMachine: + requests: + memory: 4Gi + cpu: 4 + ``` + +=== "pipeline" + + In this example, the ```run on bigger machine``` will run on a node that can provide 4 CPU cores and 4GB memory. + + ```yaml + dag: + steps: + run on bigger machine: + type: task + override: + argo: BiggerMachine + + ``` + + +- ```max_step_duration_in_seconds```: Defines the maximum +[amount of time](https://argo-workflows.readthedocs.io/en/latest/walk-through/timeouts/) a task can +take for completion. The default value is 2 hours and an additional 1 hour is given for ```timeout```. + +- ```retry_strategy```: Defines the strategy to retry in case of failure. The default retry policy is ```Always```, +i.e in case of failure in execution of task or any other infrastructure failures. Please see +[argo workflows documentation](https://argo-workflows.readthedocs.io/en/latest/walk-through/retrying-failed-or-errored-steps/) +for more information. As with other parameters, this can be overridden for individual task nodes. + +- ```image_pull_policy```: Defaults to not setting the field. This behavior does not pull the image for any tag +[other than ```latest```](https://kubernetes.io/docs/concepts/containers/images/#imagepullpolicy-defaulting) + + +## Compatibility + +As argo workflows is a cloud based executor, not all the services are compatible with it. + +- Run log: All steps of the workflow need access to the run log as such ```buffered``` run log +store would not be compatible. ```file-system``` based run log store is compatible by +using volumes that are available for all the steps of the workflow, eg. persistent volumes. + +- catalog: Any catalog service that is available for all the steps of the workflow is compatible. +```file-system``` is compatible as long as the catalog store is mounted as a volume similar to the run log store. + +- secrets: It is possible to use ```dotenv``` secrets manager as long as the file is available +during the execution of the task. We highly recommend ```.env``` files to be excluded from the +code versioning tools. We recommend using ```secrets_from_k8s``` in the configuration. + + +## Example + + +=== "configuration" + + Assumed to be present at ```examples/configs/argo-config.yaml``` + + The docker image is a [variable](/configurations/executors/container-environments/#dynamic_name_of_the_image) and + dynamically set during execution. + + ```yaml linenums="1" hl_lines="4" + --8<-- "examples/configs/argo-config.yaml" + ``` + + 1. Use ```argo``` executor type to execute the pipeline. + 2. By default, all the tasks are executed in the docker image . Please + refer to [building docker images](/configurations/executors/container-environments/) + 3. Mount the persistent volume ```magnus-volume``` to all the containers as ```/mnt```. + 4. Store the run logs in the file-system. As all containers have access to ```magnus-volume``` + as ```/mnt```. We use that to mounted folder as run log store. + + +=== "python SDK" + + Running the SDK defined pipelines for any container based executions [happens in + multi-stage process](/configurations/executors/container-environments/). + + 1. Generate the ```yaml``` definition file by: + ```MAGNUS_CONFIGURATION_FILE=examples/configs/argo-config.yaml python examples/concepts/simple.py``` + 2. Build the docker image with yaml definition in it, called magnus:latest in current example. + 3. Execute the pipeline via the magnus CLI, + ```MAGNUS_VAR_argo_docker_image=magnus:latest magnus execute -f magnus-pipeline.yaml -c examples/configs/argo-config.yaml``` + + + ```python linenums="1" hl_lines="24" + --8<-- "examples/concepts/simple.py" + ``` + + 1. You can provide a configuration file dynamically by using the environment + variable ```MAGNUS_CONFIGURATION_FILE```. Please see [SDK for more details](../../sdk). + + +=== "yaml" + + For yaml based definitions, the execution order is to: + + 1. Build the docker image with the yaml definition in it, called magnus:latest in current example. + 2. Execute the pipeline via the magnus CLI: + ```MAGNUS_VAR_argo_docker_image=magnus:latest magnus execute -f examples/concepts/simple.yaml -c examples/configs/argo-config.yaml``` + + ```yaml linenums="1" + --8<-- "examples/concepts/simple.yaml" + ``` + +=== "Argo workflow definition" + + ```yaml linenums="1" + --8<-- "examples/configs/argo-pipeline.yaml" + ``` + +=== "Screenshots" + +
+ ![Image](/assets/screenshots/argo-workflows-gant.png){ width="800" height="600"} +
argo workflows UI showing the pipeline
+
+ +
+ ![Image](/assets/screenshots/argo-workflows-logs.png){ width="800" height="600"} +
argo workflows UI showing the logs
+
+ + +=== "Run Log" + + The run log structure is the same as any other executor. Any failed executions in the + workflow can be executed in ```local``` by providing this run log and any catalog files. + + ```json + { + "run_id": "bb96359d-74f0-4837-90e3-94aed85dbb8f", + "dag_hash": "d467805d7f743d459a6abce95bedbfc6c1ecab67", + "use_cached": false, + "tag": "", + "original_run_id": "", + "status": "SUCCESS", + "steps": { + "simple": { + "name": "simple", + "internal_name": "simple", + "status": "SUCCESS", + "step_type": "task", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "39cd98770cb2fd6994d8ac08ae4c5506e5ce694a", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-01-31 06:43:01.937309", + "end_time": "2024-01-31 06:43:01.940862", + "duration": "0:00:00.003553", + "status": "SUCCESS", + "message": "", + "parameters": {} + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [] + }, + "success": { + "name": "success", + "internal_name": "success", + "status": "SUCCESS", + "step_type": "success", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "39cd98770cb2fd6994d8ac08ae4c5506e5ce694a", + "code_identifier_type": "git", + "code_identifier_dependable": false, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-01-31 06:43:26.537710", + "end_time": "2024-01-31 06:43:26.544461", + "duration": "0:00:00.006751", + "status": "SUCCESS", + "message": "", + "parameters": {} + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [] + } + }, + "parameters": {}, + "run_config": { + "executor": { + "service_name": "argo", + "service_type": "executor", + "enable_parallel": false, + "overrides": {}, + "image": "$argo_docker_image", + "expose_parameters_as_inputs": true, + "output_file": "argo-pipeline.yaml", + "name": "magnus-dag-", + "annotations": {}, + "labels": {}, + "namespace": null, + "activeDeadlineSeconds": 172800, + "nodeSelector": null, + "parallelism": null, + "branch_parallelism": 0, + "retryStrategy": { + "limit": "0", + "retryPolicy": "Always", + "backoff": { + "duration": "120", + "factor": 2, + "maxDuration": "3600" + } + }, + "max_step_duration_in_seconds": 7200, + "tolerations": null, + "image_pull_policy": "", + "service_account_name": null, + "secrets_from_k8s": [], + "persistent_volumes": [ + { + "name": "magnus-volume", + "mount_path": "/mnt" + } + ], + "step_timeout": 14400 + }, + "run_log_store": { + "service_name": "file-system", + "service_type": "run_log_store", + "log_folder": "/mnt/run_log_store" + }, + "secrets_handler": { + "service_name": "do-nothing", + "service_type": "secrets" + }, + "catalog_handler": { + "service_name": "do-nothing", + "service_type": "catalog" + }, + "experiment_tracker": { + "service_name": "do-nothing", + "service_type": "experiment_tracker" + }, + "pipeline_file": "examples/concepts/simple.yaml", + "parameters_file": null, + "configuration_file": "examples/configs/argo-config.yaml", + "tag": "", + "run_id": "bb96359d-74f0-4837-90e3-94aed85dbb8f", + "variables": {}, + "use_cached": false, + "original_run_id": "", + "dag": { + "start_at": "simple", + "name": "", + "description": null, + "steps": { + "simple": { + "type": "task", + "name": "simple", + "next": "success", + "on_failure": "", + "overrides": {}, + "catalog": null, + "max_attempts": 1, + "command": "examples.concepts.simple.simple_function", + "command_type": "python", + "node_name": "simple" + }, + "success": { + "type": "success", + "name": "success" + }, + "fail": { + "type": "fail", + "name": "fail" + } + } + }, + "dag_hash": "d467805d7f743d459a6abce95bedbfc6c1ecab67", + "execution_plan": "chained" + } + } + ``` + + +## Nesting + +Magnus compiled argo workflows support deeply nested workflows. + +### Example + +=== "Nested workflow" + + This is the same example as shown in [nested](/concepts/nesting). + + ```yaml linenums="1" + --8<-- "examples/concepts/nesting.yaml" + ``` + + +=== "Configuration" + + Assumed to be present at ```examples/configs/argo-config.yaml``` + + The docker image is a [variable](/configurations/executors/container-environments/) and + dynamically set during execution. + + ```yaml linenums="1" hl_lines="4" + --8<-- "examples/configs/argo-config.yaml" + ``` + + 1. Use ```argo``` executor type to execute the pipeline. + 2. By default, all the tasks are executed in the docker image . Please + refer to [building docker images](#container_environments) + 3. Mount the persistent volume ```magnus-volume``` to all the containers as ```/mnt```. + 4. Store the run logs in the file-system. As all containers have access to ```magnus-volume``` + as ```/mnt```. We use that to mounted folder as run log store. + + +=== "Argo workflow" + + ```yaml linenums="1" + apiVersion: argoproj.io/v1alpha1 + kind: Workflow + metadata: + generateName: magnus-dag- + annotations: {} + labels: {} + spec: + activeDeadlineSeconds: 172800 + entrypoint: magnus-dag + retryStrategy: + limit: "0" + retryPolicy: Always + backoff: + duration: "120" + factor: 2 + maxDuration: "3600" + serviceAccountName: default-editor + templates: + - name: inner-most-map-map-yeslqe-map + inputs: + parameters: + - name: xarg + - name: yarg + failFast: true + dag: + tasks: + - name: executable-stub-blnf25 + template: executable-stub-blnf25 + depends: "" + arguments: + parameters: + - name: xarg + value: "{{inputs.parameters.xarg}}" + - name: yarg + value: "{{inputs.parameters.yarg}}" + - name: success-success-trvgst + template: success-success-trvgst + depends: executable-stub-blnf25.Succeeded + arguments: + parameters: + - name: xarg + value: "{{inputs.parameters.xarg}}" + - name: yarg + value: "{{inputs.parameters.yarg}}" + - name: inner-most-map-map-yeslqe + inputs: + parameters: + - name: xarg + failFast: true + dag: + tasks: + - name: inner-most-map-map-yeslqe-fan-out + template: inner-most-map-map-yeslqe-fan-out + depends: "" + arguments: + parameters: + - name: xarg + value: "{{inputs.parameters.xarg}}" + - name: inner-most-map-map-yeslqe-map + template: inner-most-map-map-yeslqe-map + depends: inner-most-map-map-yeslqe-fan-out.Succeeded + arguments: + parameters: + - name: xarg + value: "{{inputs.parameters.xarg}}" + - name: yarg + value: "{{item}}" + withParam: "{{tasks.inner-most-map-map-yeslqe-fan-out.outputs.parameters.iterate-on}}" + - name: inner-most-map-map-yeslqe-fan-in + template: inner-most-map-map-yeslqe-fan-in + depends: inner-most-map-map-yeslqe-map.Succeeded || inner-most-map-map-yeslqe-map.Failed + arguments: + parameters: + - name: xarg + value: "{{inputs.parameters.xarg}}" + - name: nested-parallel-parallel-wje1o4-outer-most-map-map-variable-placeholder-nested-parallel-a + inputs: + parameters: + - name: xarg + failFast: true + dag: + tasks: + - name: inner-most-map-map-yeslqe + template: inner-most-map-map-yeslqe + depends: "" + arguments: + parameters: + - name: xarg + value: "{{inputs.parameters.xarg}}" + - name: success-success-y1yr7v + template: success-success-y1yr7v + depends: inner-most-map-map-yeslqe.Succeeded + arguments: + parameters: + - name: xarg + value: "{{inputs.parameters.xarg}}" + - name: inner-most-map-map-b206p5-map + inputs: + parameters: + - name: xarg + - name: yarg + failFast: true + dag: + tasks: + - name: executable-stub-8ui1yv + template: executable-stub-8ui1yv + depends: "" + arguments: + parameters: + - name: xarg + value: "{{inputs.parameters.xarg}}" + - name: yarg + value: "{{inputs.parameters.yarg}}" + - name: success-success-h4j0k9 + template: success-success-h4j0k9 + depends: executable-stub-8ui1yv.Succeeded + arguments: + parameters: + - name: xarg + value: "{{inputs.parameters.xarg}}" + - name: yarg + value: "{{inputs.parameters.yarg}}" + - name: inner-most-map-map-b206p5 + inputs: + parameters: + - name: xarg + failFast: true + dag: + tasks: + - name: inner-most-map-map-b206p5-fan-out + template: inner-most-map-map-b206p5-fan-out + depends: "" + arguments: + parameters: + - name: xarg + value: "{{inputs.parameters.xarg}}" + - name: inner-most-map-map-b206p5-map + template: inner-most-map-map-b206p5-map + depends: inner-most-map-map-b206p5-fan-out.Succeeded + arguments: + parameters: + - name: xarg + value: "{{inputs.parameters.xarg}}" + - name: yarg + value: "{{item}}" + withParam: "{{tasks.inner-most-map-map-b206p5-fan-out.outputs.parameters.iterate-on}}" + - name: inner-most-map-map-b206p5-fan-in + template: inner-most-map-map-b206p5-fan-in + depends: inner-most-map-map-b206p5-map.Succeeded || inner-most-map-map-b206p5-map.Failed + arguments: + parameters: + - name: xarg + value: "{{inputs.parameters.xarg}}" + - name: nested-parallel-parallel-wje1o4-outer-most-map-map-variable-placeholder-nested-parallel-b + inputs: + parameters: + - name: xarg + failFast: true + dag: + tasks: + - name: inner-most-map-map-b206p5 + template: inner-most-map-map-b206p5 + depends: "" + arguments: + parameters: + - name: xarg + value: "{{inputs.parameters.xarg}}" + - name: success-success-dvma7h + template: success-success-dvma7h + depends: inner-most-map-map-b206p5.Succeeded + arguments: + parameters: + - name: xarg + value: "{{inputs.parameters.xarg}}" + - name: nested-parallel-parallel-wje1o4 + inputs: + parameters: + - name: xarg + failFast: true + dag: + tasks: + - name: nested-parallel-parallel-wje1o4-fan-out + template: nested-parallel-parallel-wje1o4-fan-out + depends: "" + arguments: + parameters: + - name: xarg + value: "{{inputs.parameters.xarg}}" + - name: nested-parallel-parallel-wje1o4-outer-most-map-map-variable-placeholder-nested-parallel-a + template: nested-parallel-parallel-wje1o4-outer-most-map-map-variable-placeholder-nested-parallel-a + depends: nested-parallel-parallel-wje1o4-fan-out.Succeeded + arguments: + parameters: + - name: xarg + value: "{{inputs.parameters.xarg}}" + - name: nested-parallel-parallel-wje1o4-outer-most-map-map-variable-placeholder-nested-parallel-b + template: nested-parallel-parallel-wje1o4-outer-most-map-map-variable-placeholder-nested-parallel-b + depends: nested-parallel-parallel-wje1o4-fan-out.Succeeded + arguments: + parameters: + - name: xarg + value: "{{inputs.parameters.xarg}}" + - name: nested-parallel-parallel-wje1o4-fan-in + template: nested-parallel-parallel-wje1o4-fan-in + depends: + nested-parallel-parallel-wje1o4-outer-most-map-map-variable-placeholder-nested-parallel-a.Succeeded + || + nested-parallel-parallel-wje1o4-outer-most-map-map-variable-placeholder-nested-parallel-a.Failed + || + nested-parallel-parallel-wje1o4-outer-most-map-map-variable-placeholder-nested-parallel-b.Succeeded + || + nested-parallel-parallel-wje1o4-outer-most-map-map-variable-placeholder-nested-parallel-b.Failed + arguments: + parameters: + - name: xarg + value: "{{inputs.parameters.xarg}}" + - name: outer-most-map-map-0ukhr5-map + inputs: + parameters: + - name: xarg + failFast: true + dag: + tasks: + - name: nested-parallel-parallel-wje1o4 + template: nested-parallel-parallel-wje1o4 + depends: "" + arguments: + parameters: + - name: xarg + value: "{{inputs.parameters.xarg}}" + - name: success-success-e4lb2k + template: success-success-e4lb2k + depends: nested-parallel-parallel-wje1o4.Succeeded + arguments: + parameters: + - name: xarg + value: "{{inputs.parameters.xarg}}" + - name: outer-most-map-map-0ukhr5 + failFast: true + dag: + tasks: + - name: outer-most-map-map-0ukhr5-fan-out + template: outer-most-map-map-0ukhr5-fan-out + depends: "" + - name: outer-most-map-map-0ukhr5-map + template: outer-most-map-map-0ukhr5-map + depends: outer-most-map-map-0ukhr5-fan-out.Succeeded + arguments: + parameters: + - name: xarg + value: "{{item}}" + withParam: "{{tasks.outer-most-map-map-0ukhr5-fan-out.outputs.parameters.iterate-on}}" + - name: outer-most-map-map-0ukhr5-fan-in + template: outer-most-map-map-0ukhr5-fan-in + depends: outer-most-map-map-0ukhr5-map.Succeeded || outer-most-map-map-0ukhr5-map.Failed + - name: magnus-dag + failFast: true + dag: + tasks: + - name: generate-list-task-s7za4e + template: generate-list-task-s7za4e + depends: "" + - name: outer-most-map-map-0ukhr5 + template: outer-most-map-map-0ukhr5 + depends: generate-list-task-s7za4e.Succeeded + - name: success-success-2v62uq + template: success-success-2v62uq + depends: outer-most-map-map-0ukhr5.Succeeded + - name: generate-list-task-s7za4e + container: + image: harbor.csis.astrazeneca.net/mlops/magnus:latest + command: + - magnus + - execute_single_node + - "{{workflow.parameters.run_id}}" + - generate_list + - --log-level + - WARNING + - --file + - examples/concepts/nesting.yaml + - --config-file + - examples/configs/argo-config.yaml + volumeMounts: + - name: executor-0 + mountPath: /mnt + imagePullPolicy: "" + resources: + limits: + memory: 1Gi + cpu: 250m + requests: + memory: 1Gi + cpu: 250m + - name: outer-most-map-map-0ukhr5-fan-out + container: + image: harbor.csis.astrazeneca.net/mlops/magnus:latest + command: + - magnus + - fan + - "{{workflow.parameters.run_id}}" + - outer%most%map + - --mode + - out + - --file + - examples/concepts/nesting.yaml + - --log-level + - WARNING + - --config-file + - examples/configs/argo-config.yaml + volumeMounts: + - name: executor-0 + mountPath: /mnt + imagePullPolicy: "" + resources: + limits: + memory: 1Gi + cpu: 250m + requests: + memory: 1Gi + cpu: 250m + outputs: + parameters: + - name: iterate-on + valueFrom: + path: /tmp/output.txt + - name: outer-most-map-map-0ukhr5-fan-in + container: + image: harbor.csis.astrazeneca.net/mlops/magnus:latest + command: + - magnus + - fan + - "{{workflow.parameters.run_id}}" + - outer%most%map + - --mode + - in + - --file + - examples/concepts/nesting.yaml + - --log-level + - WARNING + - --config-file + - examples/configs/argo-config.yaml + volumeMounts: + - name: executor-0 + mountPath: /mnt + imagePullPolicy: "" + resources: + limits: + memory: 1Gi + cpu: 250m + requests: + memory: 1Gi + cpu: 250m + - name: nested-parallel-parallel-wje1o4-fan-out + container: + image: harbor.csis.astrazeneca.net/mlops/magnus:latest + command: + - magnus + - fan + - "{{workflow.parameters.run_id}}" + - outer%most%map.map_variable_placeholder.nested%parallel + - --mode + - out + - --file + - examples/concepts/nesting.yaml + - --log-level + - WARNING + - --config-file + - examples/configs/argo-config.yaml + - --map-variable + - '{"xarg": "{{inputs.parameters.xarg}}"}' + volumeMounts: + - name: executor-0 + mountPath: /mnt + imagePullPolicy: "" + resources: + limits: + memory: 1Gi + cpu: 250m + requests: + memory: 1Gi + cpu: 250m + inputs: + parameters: + - name: xarg + - name: nested-parallel-parallel-wje1o4-fan-in + container: + image: harbor.csis.astrazeneca.net/mlops/magnus:latest + command: + - magnus + - fan + - "{{workflow.parameters.run_id}}" + - outer%most%map.map_variable_placeholder.nested%parallel + - --mode + - in + - --file + - examples/concepts/nesting.yaml + - --log-level + - WARNING + - --config-file + - examples/configs/argo-config.yaml + - --map-variable + - '{"xarg": "{{inputs.parameters.xarg}}"}' + volumeMounts: + - name: executor-0 + mountPath: /mnt + imagePullPolicy: "" + resources: + limits: + memory: 1Gi + cpu: 250m + requests: + memory: 1Gi + cpu: 250m + inputs: + parameters: + - name: xarg + - name: inner-most-map-map-yeslqe-fan-out + container: + image: harbor.csis.astrazeneca.net/mlops/magnus:latest + command: + - magnus + - fan + - "{{workflow.parameters.run_id}}" + - outer%most%map.map_variable_placeholder.nested%parallel.a.inner%most%map + - --mode + - out + - --file + - examples/concepts/nesting.yaml + - --log-level + - WARNING + - --config-file + - examples/configs/argo-config.yaml + - --map-variable + - '{"xarg": "{{inputs.parameters.xarg}}"}' + volumeMounts: + - name: executor-0 + mountPath: /mnt + imagePullPolicy: "" + resources: + limits: + memory: 1Gi + cpu: 250m + requests: + memory: 1Gi + cpu: 250m + outputs: + parameters: + - name: iterate-on + valueFrom: + path: /tmp/output.txt + inputs: + parameters: + - name: xarg + - name: inner-most-map-map-yeslqe-fan-in + container: + image: harbor.csis.astrazeneca.net/mlops/magnus:latest + command: + - magnus + - fan + - "{{workflow.parameters.run_id}}" + - outer%most%map.map_variable_placeholder.nested%parallel.a.inner%most%map + - --mode + - in + - --file + - examples/concepts/nesting.yaml + - --log-level + - WARNING + - --config-file + - examples/configs/argo-config.yaml + - --map-variable + - '{"xarg": "{{inputs.parameters.xarg}}"}' + volumeMounts: + - name: executor-0 + mountPath: /mnt + imagePullPolicy: "" + resources: + limits: + memory: 1Gi + cpu: 250m + requests: + memory: 1Gi + cpu: 250m + inputs: + parameters: + - name: xarg + - name: executable-stub-blnf25 + container: + image: harbor.csis.astrazeneca.net/mlops/magnus:latest + command: + - magnus + - execute_single_node + - "{{workflow.parameters.run_id}}" + - outer%most%map.map_variable_placeholder.nested%parallel.a.inner%most%map.map_variable_placeholder.executable + - --log-level + - WARNING + - --file + - examples/concepts/nesting.yaml + - --map-variable + - '{"xarg": "{{inputs.parameters.xarg}}", "yarg": "{{inputs.parameters.yarg}}"}' + - --config-file + - examples/configs/argo-config.yaml + volumeMounts: + - name: executor-0 + mountPath: /mnt + imagePullPolicy: "" + resources: + limits: + memory: 1Gi + cpu: 250m + requests: + memory: 1Gi + cpu: 250m + inputs: + parameters: + - name: xarg + - name: yarg + - name: success-success-trvgst + container: + image: harbor.csis.astrazeneca.net/mlops/magnus:latest + command: + - magnus + - execute_single_node + - "{{workflow.parameters.run_id}}" + - outer%most%map.map_variable_placeholder.nested%parallel.a.inner%most%map.map_variable_placeholder.success + - --log-level + - WARNING + - --file + - examples/concepts/nesting.yaml + - --map-variable + - '{"xarg": "{{inputs.parameters.xarg}}", "yarg": "{{inputs.parameters.yarg}}"}' + - --config-file + - examples/configs/argo-config.yaml + volumeMounts: + - name: executor-0 + mountPath: /mnt + imagePullPolicy: "" + resources: + limits: + memory: 1Gi + cpu: 250m + requests: + memory: 1Gi + cpu: 250m + inputs: + parameters: + - name: xarg + - name: yarg + - name: success-success-y1yr7v + container: + image: harbor.csis.astrazeneca.net/mlops/magnus:latest + command: + - magnus + - execute_single_node + - "{{workflow.parameters.run_id}}" + - outer%most%map.map_variable_placeholder.nested%parallel.a.success + - --log-level + - WARNING + - --file + - examples/concepts/nesting.yaml + - --map-variable + - '{"xarg": "{{inputs.parameters.xarg}}"}' + - --config-file + - examples/configs/argo-config.yaml + volumeMounts: + - name: executor-0 + mountPath: /mnt + imagePullPolicy: "" + resources: + limits: + memory: 1Gi + cpu: 250m + requests: + memory: 1Gi + cpu: 250m + inputs: + parameters: + - name: xarg + - name: inner-most-map-map-b206p5-fan-out + container: + image: harbor.csis.astrazeneca.net/mlops/magnus:latest + command: + - magnus + - fan + - "{{workflow.parameters.run_id}}" + - outer%most%map.map_variable_placeholder.nested%parallel.b.inner%most%map + - --mode + - out + - --file + - examples/concepts/nesting.yaml + - --log-level + - WARNING + - --config-file + - examples/configs/argo-config.yaml + - --map-variable + - '{"xarg": "{{inputs.parameters.xarg}}"}' + volumeMounts: + - name: executor-0 + mountPath: /mnt + imagePullPolicy: "" + resources: + limits: + memory: 1Gi + cpu: 250m + requests: + memory: 1Gi + cpu: 250m + outputs: + parameters: + - name: iterate-on + valueFrom: + path: /tmp/output.txt + inputs: + parameters: + - name: xarg + - name: inner-most-map-map-b206p5-fan-in + container: + image: harbor.csis.astrazeneca.net/mlops/magnus:latest + command: + - magnus + - fan + - "{{workflow.parameters.run_id}}" + - outer%most%map.map_variable_placeholder.nested%parallel.b.inner%most%map + - --mode + - in + - --file + - examples/concepts/nesting.yaml + - --log-level + - WARNING + - --config-file + - examples/configs/argo-config.yaml + - --map-variable + - '{"xarg": "{{inputs.parameters.xarg}}"}' + volumeMounts: + - name: executor-0 + mountPath: /mnt + imagePullPolicy: "" + resources: + limits: + memory: 1Gi + cpu: 250m + requests: + memory: 1Gi + cpu: 250m + inputs: + parameters: + - name: xarg + - name: executable-stub-8ui1yv + container: + image: harbor.csis.astrazeneca.net/mlops/magnus:latest + command: + - magnus + - execute_single_node + - "{{workflow.parameters.run_id}}" + - outer%most%map.map_variable_placeholder.nested%parallel.b.inner%most%map.map_variable_placeholder.executable + - --log-level + - WARNING + - --file + - examples/concepts/nesting.yaml + - --map-variable + - '{"xarg": "{{inputs.parameters.xarg}}", "yarg": "{{inputs.parameters.yarg}}"}' + - --config-file + - examples/configs/argo-config.yaml + volumeMounts: + - name: executor-0 + mountPath: /mnt + imagePullPolicy: "" + resources: + limits: + memory: 1Gi + cpu: 250m + requests: + memory: 1Gi + cpu: 250m + inputs: + parameters: + - name: xarg + - name: yarg + - name: success-success-h4j0k9 + container: + image: harbor.csis.astrazeneca.net/mlops/magnus:latest + command: + - magnus + - execute_single_node + - "{{workflow.parameters.run_id}}" + - outer%most%map.map_variable_placeholder.nested%parallel.b.inner%most%map.map_variable_placeholder.success + - --log-level + - WARNING + - --file + - examples/concepts/nesting.yaml + - --map-variable + - '{"xarg": "{{inputs.parameters.xarg}}", "yarg": "{{inputs.parameters.yarg}}"}' + - --config-file + - examples/configs/argo-config.yaml + volumeMounts: + - name: executor-0 + mountPath: /mnt + imagePullPolicy: "" + resources: + limits: + memory: 1Gi + cpu: 250m + requests: + memory: 1Gi + cpu: 250m + inputs: + parameters: + - name: xarg + - name: yarg + - name: success-success-dvma7h + container: + image: harbor.csis.astrazeneca.net/mlops/magnus:latest + command: + - magnus + - execute_single_node + - "{{workflow.parameters.run_id}}" + - outer%most%map.map_variable_placeholder.nested%parallel.b.success + - --log-level + - WARNING + - --file + - examples/concepts/nesting.yaml + - --map-variable + - '{"xarg": "{{inputs.parameters.xarg}}"}' + - --config-file + - examples/configs/argo-config.yaml + volumeMounts: + - name: executor-0 + mountPath: /mnt + imagePullPolicy: "" + resources: + limits: + memory: 1Gi + cpu: 250m + requests: + memory: 1Gi + cpu: 250m + inputs: + parameters: + - name: xarg + - name: success-success-e4lb2k + container: + image: harbor.csis.astrazeneca.net/mlops/magnus:latest + command: + - magnus + - execute_single_node + - "{{workflow.parameters.run_id}}" + - outer%most%map.map_variable_placeholder.success + - --log-level + - WARNING + - --file + - examples/concepts/nesting.yaml + - --map-variable + - '{"xarg": "{{inputs.parameters.xarg}}"}' + - --config-file + - examples/configs/argo-config.yaml + volumeMounts: + - name: executor-0 + mountPath: /mnt + imagePullPolicy: "" + resources: + limits: + memory: 1Gi + cpu: 250m + requests: + memory: 1Gi + cpu: 250m + inputs: + parameters: + - name: xarg + - name: success-success-2v62uq + container: + image: harbor.csis.astrazeneca.net/mlops/magnus:latest + command: + - magnus + - execute_single_node + - "{{workflow.parameters.run_id}}" + - success + - --log-level + - WARNING + - --file + - examples/concepts/nesting.yaml + - --config-file + - examples/configs/argo-config.yaml + volumeMounts: + - name: executor-0 + mountPath: /mnt + imagePullPolicy: "" + resources: + limits: + memory: 1Gi + cpu: 250m + requests: + memory: 1Gi + cpu: 250m + templateDefaults: + activeDeadlineSeconds: 7200 + timeout: 10800s + arguments: + parameters: + - name: run_id + value: "{{workflow.uid}}" + volumes: + - name: executor-0 + persistentVolumeClaim: + claimName: magnus-volume + + ``` + + +=== "In argo UI" + +
+ ![Image](/assets/screenshots/argo-nested.png){ width="800" height="600"} +
argo workflows UI showing the deeply nested workflows.
+
+ + +## Kubeflow + +Kubeflow pipelines compiles workflows defined in SDK to Argo workflows and thereby +has support for uploading argo workflows. Below is a screenshot of the [map](/concepts/map) pipeline uploaded to Kubeflow. + + +
+ ![Image](/assets/screenshots/argo-kubeflow-ui.png){ width="800" height="600"} +
argo workflows UI showing the map workflow definition.
+
+ +
+ ![Image](/assets/screenshots/argo-kubeflow-exec.png){ width="800" height="600"} +
argo workflows UI showing the map workflow execution.
+
diff --git a/docs/configurations/executors/container-environments.md b/docs/configurations/executors/container-environments.md new file mode 100644 index 00000000..32d18f9b --- /dev/null +++ b/docs/configurations/executors/container-environments.md @@ -0,0 +1,71 @@ +## Pipeline definition + +Executing pipelines in containers needs a ```yaml``` based definition of the pipeline which is +referred during the [task execution](/concepts/executor/#step_execution). + + +Any execution of the pipeline [defined by SDK](/sdk) generates the pipeline +definition in```yaml``` format for all executors apart from the [```local``` executor](../local). + + +Follow the below steps to execute the pipeline defined by SDK. + + +
+ +1. Execute the pipeline by running the python script as you would normally do to generate +```yaml``` based definition. +2. Optionally (but highly recommended) version your code using git. +2. Build the docker image with the ```yaml``` file-based definition as part of the image. We recommend +tagging the docker image with the short git sha to uniquely identify the docker image (1). +3. Define a [variable to temporarily hold](https://docs.python.org/3/library/string.html#template-strings) the docker image name in the +pipeline definition, if the docker image name is not known. +4. Execute the pipeline using the [magnus CLI](/usage/#usage). + +
+ +1. Avoid using generic tags such as [```latest```](https://docs.docker.com/develop/dev-best-practices/). + +## Dynamic name of the image + + +All containerized executors have a circular dependency problem. + +- The docker image tag is only known after the creation of the image with the ```yaml``` based definition. +- But the ```yaml``` based definition needs the docker image tag as part of the definition. + + + +!!! warning inline end + + Not providing the required environment variable will raise an exception. + +To resolve this, magnus supports ```variables``` in the configuration of executors, both global and in step +overrides. Variables should follow the +[python template strings](https://docs.python.org/3/library/string.html#template-strings) +syntax and are replaced with environment variable prefixed by ```MAGNUS_VAR_```. + +Concretely, ```$identifier``` is replaced by ```MAGNUS_VAR_```. + + +## Dockerfile + +magnus should be installed in the docker image and available in the path. An example dockerfile is provided +below. + +!!! note inline end "non-native orchestration" + + Having magnus to be part of the docker image adds additional dependencies for python to be present in the docker + image. In that sense, magnus is technically non-native container orchestration tool. + + Facilitating native container orchestration, without magnus as part of the docker image, results in a complicated + specification of files/parameters/experiment tracking losing the value of native interfaces to these essential + orchestration concepts. + + With the improvements in python packaging ecosystem, it should be possible to distribute magnus as a + self-contained binary and reducing the dependency on the docker image. + +#### TODO: Change this to a proper example. +```dockerfile linenums="1" +--8<-- "examples/Dockerfile" +``` diff --git a/docs/configurations/executors/local-container.md b/docs/configurations/executors/local-container.md new file mode 100644 index 00000000..b40ccaf5 --- /dev/null +++ b/docs/configurations/executors/local-container.md @@ -0,0 +1,352 @@ + +Execute all the steps of the pipeline in containers. Please refer to the +[note on containers](/configurations/executors/container-environments/) on building images. + +- [x] Provides a way to test the containers and the execution of the pipeline in local environment. +- [x] Any failure in cloud native container environments can be replicated in local environments. +- [x] Ability to provide specialized compute environments for different steps of the pipeline. +- [ ] The scalability is still constrained by the resources in local environment. + + +!!! warning inline end "parallel executions" + + Run logs that use a single json (eg. file-system) are not compatible with parallel + executions due to race conditions to write the same file by different processes. + + Use ```chunked``` run log stores (eg. chunked-fs). + + + +## Configuration + +```yaml +executor: local-container +config: + docker_image: + enable_parallel: false # (1) + auto_remove_container: true # (2) + run_in_local: false # (3) + environment: # (4) + ... + overrides: # (5) + ... +``` + + 1. By default, all tasks are sequentially executed. Provide ```true``` to enable tasks within +[parallel](/concepts/parallel) or [map](/concepts/map) to be executed in parallel. +2. Set it to false, to debug a failed container. +3. Setting it to true will behave exactly like a [local executor](/configurations/executors/local/). +4. Pass any environment variables into the container. +5. Please refer to [step overrides](#step_override) for more details. + +The ```docker_image``` field is required and default image to execute tasks +of the pipeline. Individual [tasks](/concepts/task) can +[override](#step_override) the global defaults of executor by providing ```overrides``` + + +!!! tip "Debugging" + + ```auto_remove_container``` allows you to run the failed container independently to + identify the issue that caused the failure. + + ```run_in_local``` allows you to execute a few tasks in local environment to allow + debugging and also selectively choose which step to run in container. + + +## Example + +Nearly all the examples seen in concepts can be executed using +the ```local-container``` configuration. Below is one simple example to concretely show +the patterns. + +=== "Configuration" + + Assumed to be present at ```examples/configs/local-container.yaml``` + + The docker image is a [variable](/configurations/executors/container-environments/#dynamic_name_of_the_image) and + dynamically set during execution. + + ```yaml linenums="1" hl_lines="4" + --8<-- "examples/configs/local-container.yaml" + ``` + + 1. Use local-container executor type to execute the pipeline. + 2. By default, all the tasks are executed in the docker image . Please + refer to [building docker images](/configurations/executors/container-environments/#dynamic_name_of_the_image) + 3. Pass any environment variables that are needed for the container. + 4. Store the run logs in the file-system. Magnus will handle the access to them + by mounting the file system into the container. + + +=== "python sdk" + + Running the SDK defined pipelines for any container based executions [happens in + multi-stage process](/configurations/executors/container-environments/). + + 1. Generate the ```yaml``` definition file by: + ```MAGNUS_CONFIGURATION_FILE=examples/configs/local-container.yaml python examples/concepts/simple.py``` + 2. Build the docker image with yaml definition in it, called magnus:demo in current example. + 3. Execute the pipeline via the magnus CLI, + ```MAGNUS_VAR_default_docker_image=magnus:demo magnus execute -f magnus-pipeline.yaml -c examples/configs/local-container.yaml``` + + + ```python linenums="1" hl_lines="24" + --8<-- "examples/concepts/simple.py" + ``` + + 1. You can provide a configuration file dynamically by using the environment + variable ```MAGNUS_CONFIGURATION_FILE```. Please see [SDK for more details](sdk). + + + +=== "yaml" + + For yaml based definitions, the execution order is to: + + 1. Build the docker image with the yaml definition in it, called magnus:demo in current example. + 2. Execute the pipeline via the magnus CLI: + ```MAGNUS_VAR_default_docker_image=magnus:demo magnus execute -f examples/concepts/simple.yaml -c examples/configs/local-container.yaml``` + + ```yaml linenums="1" + --8<-- "examples/concepts/simple.yaml" + ``` + +=== "Run log" + + The run log structure is the same as any other ```local``` executions apart from + an additional code identity with the information about the docker image. + + + ```json linenums="1" hl_lines="24-30" + { + "run_id": "shortest-stallman-2113", + "dag_hash": "d467805d7f743d459a6abce95bedbfc6c1ecab67", + "use_cached": false, + "tag": "", + "original_run_id": "", + "status": "SUCCESS", + "steps": { + "simple": { + "name": "simple", + "internal_name": "simple", + "status": "SUCCESS", + "step_type": "task", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "ef142998dc315ddbd9aa10e016128c872de6e6e1", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + }, + { + "code_identifier": "sha256:e5cc0936aad4d3cacb3075290729ce834dd2d9c89ea24eea609d7664f99ce50f", + "code_identifier_type": "docker", + "code_identifier_dependable": true, + "code_identifier_url": "local docker host", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-01-22 21:13:53.676698", + "end_time": "2024-01-22 21:13:53.678976", + "duration": "0:00:00.002278", + "status": "SUCCESS", + "message": "", + "parameters": {} + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [ + { + "name": "simple.execution.log", + "data_hash": "03ba204e50d126e4674c005e04d82e84c21366780af1f43bd54a37816b6ab340", + "catalog_relative_path": "shortest-stallman-2113/simple.execution.log", + "catalog_handler_location": "/tmp/catalog/", + "stage": "put" + } + ] + }, + "success": { + "name": "success", + "internal_name": "success", + "status": "SUCCESS", + "step_type": "success", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "ef142998dc315ddbd9aa10e016128c872de6e6e1", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-01-22 21:13:53.807381", + "end_time": "2024-01-22 21:13:53.807834", + "duration": "0:00:00.000453", + "status": "SUCCESS", + "message": "", + "parameters": {} + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [] + } + }, + "parameters": {}, + "run_config": { + "executor": { + "service_name": "local-container", + "service_type": "executor", + "enable_parallel": false, + "placeholders": {} + }, + "run_log_store": { + "service_name": "file-system", + "service_type": "run_log_store" + }, + "secrets_handler": { + "service_name": "do-nothing", + "service_type": "secrets" + }, + "catalog_handler": { + "service_name": "file-system", + "service_type": "catalog" + }, + "experiment_tracker": { + "service_name": "do-nothing", + "service_type": "experiment_tracker" + }, + "pipeline_file": "examples/concepts/simple.yaml", + "parameters_file": null, + "configuration_file": "examples/configs/local-container.yaml", + "tag": "", + "run_id": "shortest-stallman-2113", + "variables": { + "default_docker_image": "magnus:demo" + }, + "use_cached": false, + "original_run_id": "", + "dag": { + "start_at": "simple", + "name": "", + "description": null, + "steps": { + "simple": { + "type": "task", + "name": "simple", + "next": "success", + "on_failure": "", + "executor_config": {}, + "catalog": null, + "max_attempts": 1, + "command": "examples.concepts.simple.simple_function", + "command_type": "python", + "node_name": "simple" + }, + "success": { + "type": "success", + "name": "success" + }, + "fail": { + "type": "fail", + "name": "fail" + } + } + }, + "dag_hash": "d467805d7f743d459a6abce95bedbfc6c1ecab67", + "execution_plan": "chained" + } + } + ``` + +## Compatibility + + +## Step override + +Individual steps of the pipeline can over-ride the default configuration by referring to the +specific ```override``` defined in ```overrides``` section of the executor configuration. + +```override``` should be defined per executor and is only applicable for that specific +executor. + +### Example + + +=== "Configuration" + + Assumed to be present at ```examples/executors/local-container-override.yaml``` + + In the example below, we define the default configuration in the executor configuration. + We also provide a override ```custom_docker_image``` which overrides some of the default + configuration parameters. + + + ```yaml linenums="1" hl_lines="7-11" + --8<-- "examples/executors/local-container-override.yaml" + ``` + +=== "python sdk" + + As seen in the above example, + running the SDK defined pipelines for any container based executions [happens in + multi-stage process](/configurations/executors/container-environments/). + + 1. Generate the ```yaml``` definition file by: + ```MAGNUS_CONFIGURATION_FILE=examples/executors/local-container-override.yaml python examples/executors/step_overrides_container.py``` + 2. Build the docker image with yaml definition in it. In this example, we build + two docker images. + + * magnus:3.8 as the default_docker_image. + * magnus:3.9 as the custom_docker_image. + + Both the docker images are same except for the python version. + + 3. Execute the pipeline via the magnus CLI, + ```MAGNUS_VAR_default_docker_image=magnus:3.8 MAGNUS_VAR_custom_docker_image=magnus:3.9 magnus execute -f magnus-pipeline.yaml -c examples/executors/local-container-override.yaml``` + + + You should see the console output of the ```step 1``` to be ```3.8``` and key to be "value" + while the python version for ```step 2``` to be 3.9 and key to be "not-value". + + ```python linenums="1" hl_lines="26" + --8<-- "examples/executors/step_overrides_container.py" + ``` + + +=== "yaml" + + For yaml based definitions, the execution order is to: + + 1. Build the docker image with the yaml definition in it. In this example, we build + two docker images. + + + * magnus:3.8 as the default_docker_image. + * magnus:3.9 as the custom_docker_image. + + + Both the docker images are same except for the python version. + + + 2. Execute the pipeline via the magnus CLI: + ```MAGNUS_VAR_default_docker_image=magnus:3.8 MAGNUS_VAR_custom_docker_image=magnus:3.9 magnus execute -f examples/executors/step_overrides_container.yaml -c examples/executors/local-container-override.yaml``` + + You should see the console output of the ```step 1``` to be ```3.8``` and key to be "value" + while the python version for ```step 2``` to be 3.9 and key to be "not-value". + + ```yaml linenums="1" hl_lines="29-30" + --8<-- "examples/executors/step_overrides_container.yaml" + ``` diff --git a/docs/configurations/executors/local.md b/docs/configurations/executors/local.md new file mode 100644 index 00000000..f1299126 --- /dev/null +++ b/docs/configurations/executors/local.md @@ -0,0 +1,31 @@ +All the steps of the pipeline are executed in the local compute environment in the same shell +as it was triggered. + +- [x] Provides the most comfortable environment for experimentation and development. +- [ ] The scalability is constrained by the local compute environment. +- [ ] Not possible to provide specialized compute environments for different steps of the pipeline. + + +!!! warning inline end "parallel executions" + + Run logs that use a single json (eg. file-system) are not compatible with parallel + executions due to race conditions to write the same file by different processes. + + Use ```chunked``` run log stores (eg. chunked-fs). + + + +## Configuration + +```yaml +executor: local +config: + enable_parallel: false # (1) +``` + +1. By default, all tasks are sequentially executed. Provide ```true``` to enable tasks within +[parallel](/concepts/parallel) or [map](/concepts/map) to be executed in parallel. + + + +All the examples in the concepts section are executed using ```local``` executors. diff --git a/docs/configurations/executors/mocked.md b/docs/configurations/executors/mocked.md new file mode 100644 index 00000000..84d26f83 --- /dev/null +++ b/docs/configurations/executors/mocked.md @@ -0,0 +1,1014 @@ +Mocked executors provide a way to control the behavior of ```task``` node types to be either +pass through or execute a alternate command with modified configurations. + +- [x] Runs the pipeline only in local environment. +- [x] Enables unit testing of the pipeline in both yaml and SDK definitions. +- [x] Isolates specific node(s) from the execution for further analysis. +- [ ] Not meant to be used for production deployments + +### Options + +```yaml +executor: mocked +config: + patches: + name of the name: + command_configuration: +``` + +By default, all the ```task``` steps are passed through without an execution. +By providing ```patches```, indexed by the name of the node, gives control on the command +to run and the configuration of the command. + +#### Command configuration for notebook nodes + +```python``` and ```shell``` based tasks have no configuration options apart from the ```command```. +Notebook nodes have additional configuration options [detailed in concepts](/concepts/task/#notebook). Ploomber engine provides [rich options](https://engine.ploomber.io/en/docs/user-guide/debugging/debuglater.html) in debugging failed notebooks. + + +## Example + +### Mocking nodes + +The following example shows the simple case of mocking all the steps of the pipeline. + +=== "pipeline in yaml" + + You can execute the mocked pipeline by: + ```magnus execute -f examples/concepts/simple.yaml -c examples/configs/mocked-config-simple.yaml``` + + ```yaml linenums="1" + --8<-- "examples/concepts/simple.yaml" + ``` + +=== "python sdk" + + You can execute the mocked pipeline by: + + ```MAGNUS_CONFIGURATION_FILE=examples/configs/mocked-config-simple.yaml python examples/concepts/simple.py``` + + ```python linenums="1" + --8<-- "examples/concepts/simple.py" + ``` + +=== "Mocked configuration" + + ```yaml linenums="1" + --8<-- "examples/configs/mocked-config-simple.yaml" + ``` + +=== "Run log" + + The flag ```mock``` is set to be ```true``` for the execution of node simple which + denotes that the task was mocked. + + ```json linenums="1" hl_lines="15" + { + "run_id": "minty-goodall-0528", + "dag_hash": "", + "use_cached": false, + "tag": "", + "original_run_id": "", + "status": "SUCCESS", + "steps": { + "simple": { + "name": "simple", + "internal_name": "simple", + "status": "SUCCESS", + "step_type": "task", + "message": "", + "mock": true, + "code_identities": [ + { + "code_identifier": "d76cf865af2f8e03b6c1205403351cbe42e6cdc4", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-02-11 05:28:40.812597", + "end_time": "2024-02-11 05:28:40.812627", + "duration": "0:00:00.000030", + "status": "SUCCESS", + "message": "", + "parameters": {} + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [] + }, + "success": { + "name": "success", + "internal_name": "success", + "status": "SUCCESS", + "step_type": "success", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "d76cf865af2f8e03b6c1205403351cbe42e6cdc4", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-02-11 05:28:40.883909", + "end_time": "2024-02-11 05:28:40.884310", + "duration": "0:00:00.000401", + "status": "SUCCESS", + "message": "", + "parameters": {} + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [] + } + }, + "parameters": {}, + "run_config": { + "executor": { + "service_name": "mocked", + "service_type": "executor", + "enable_parallel": false, + "overrides": {}, + "patches": {} + }, + "run_log_store": { + "service_name": "file-system", + "service_type": "run_log_store", + "log_folder": ".run_log_store" + }, + "secrets_handler": { + "service_name": "do-nothing", + "service_type": "secrets" + }, + "catalog_handler": { + "service_name": "file-system", + "service_type": "catalog", + "catalog_location": ".catalog" + }, + "experiment_tracker": { + "service_name": "do-nothing", + "service_type": "experiment_tracker" + }, + "pipeline_file": "", + "parameters_file": "", + "configuration_file": "examples/configs/mocked-config-simple.yaml", + "tag": "", + "run_id": "minty-goodall-0528", + "variables": {}, + "use_cached": false, + "original_run_id": "", + "dag": { + "start_at": "simple", + "name": "", + "description": "", + "steps": { + "simple": { + "type": "task", + "name": "simple", + "next": "success", + "on_failure": "", + "overrides": {}, + "catalog": null, + "max_attempts": 1, + "command": "examples.concepts.simple.simple_function", + "command_type": "python", + "node_name": "simple" + }, + "success": { + "type": "success", + "name": "success" + }, + "fail": { + "type": "fail", + "name": "fail" + } + } + }, + "dag_hash": "", + "execution_plan": "chained" + } + } + ``` + + +### Patching nodes for unit testing + +Pipelines are themselves code and should be testable. In the below example, we +take an example pipeline to test the behavior of the traversal. + + +The below pipeline is designed to follow: ```step 1 >> step 2 >> step 3``` in case of no failures +and ```step 1 >> step3``` in case of failure. The traversal is +[shown in concepts](/concepts/pipeline/#on_failure). + +!!! tip "Asserting Run log" + + The run log is a simple json file that can be parsed and validated against designed + behaviors. You can also create the ```RunLog``` object by deserializing + ```magnus.datastore.RunLog``` from the json. + + This can be handy when validating complex pipelines. + +=== "pipeline in yaml" + + ```yaml linenums="1" + --8<-- "examples/on-failure.yaml" + ``` + + +=== "python sdk" + + ```python linenums="1" + --8<-- "examples/on_failure.py" + ``` + +=== "Run log with no mocking" + + The ```run log``` has only ```step 1``` and ```step 3``` as part of the steps (as designed) + showing the behavior of the pipeline in case of failure. The status of ```step 1``` is + captured as ```FAIL``` due to ```exit 1``` command in the pipeline definition. + + ```json linenums="1" hl_lines="9 48 31" + { + "run_id": "selfish-pasteur-0559", + "dag_hash": "", + "use_cached": false, + "tag": "", + "original_run_id": "", + "status": "SUCCESS", + "steps": { + "step 1": { + "name": "step 1", + "internal_name": "step 1", + "status": "FAIL", + "step_type": "task", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "d76cf865af2f8e03b6c1205403351cbe42e6cdc4", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-02-11 05:59:08.382587", + "end_time": "2024-02-11 05:59:08.446642", + "duration": "0:00:00.064055", + "status": "FAIL", + "message": "Command failed", + "parameters": {} + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [ + { + "name": "step_1.execution.log", + "data_hash": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", + "catalog_relative_path": "selfish-pasteur-0559/step_1.execution.log", + "catalog_handler_location": ".catalog", + "stage": "put" + } + ] + }, + "step 3": { + "name": "step 3", + "internal_name": "step 3", + "status": "SUCCESS", + "step_type": "stub", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "d76cf865af2f8e03b6c1205403351cbe42e6cdc4", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-02-11 05:59:08.516318", + "end_time": "2024-02-11 05:59:08.516333", + "duration": "0:00:00.000015", + "status": "SUCCESS", + "message": "", + "parameters": {} + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [] + }, + "success": { + "name": "success", + "internal_name": "success", + "status": "SUCCESS", + "step_type": "success", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "d76cf865af2f8e03b6c1205403351cbe42e6cdc4", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-02-11 05:59:08.580478", + "end_time": "2024-02-11 05:59:08.580555", + "duration": "0:00:00.000077", + "status": "SUCCESS", + "message": "", + "parameters": {} + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [] + } + }, + "parameters": {}, + "run_config": { + "executor": { + "service_name": "local", + "service_type": "executor", + "enable_parallel": false, + "overrides": {} + }, + "run_log_store": { + "service_name": "buffered", + "service_type": "run_log_store" + }, + "secrets_handler": { + "service_name": "do-nothing", + "service_type": "secrets" + }, + "catalog_handler": { + "service_name": "file-system", + "service_type": "catalog", + "catalog_location": ".catalog" + }, + "experiment_tracker": { + "service_name": "do-nothing", + "service_type": "experiment_tracker" + }, + "pipeline_file": "", + "parameters_file": "", + "configuration_file": "", + "tag": "", + "run_id": "selfish-pasteur-0559", + "variables": {}, + "use_cached": false, + "original_run_id": "", + "dag": { + "start_at": "step 1", + "name": "", + "description": "", + "steps": { + "step 1": { + "type": "task", + "name": "step 1", + "next": "step 2", + "on_failure": "step 3", + "overrides": {}, + "catalog": null, + "max_attempts": 1, + "command": "exit 1", + "command_type": "shell", + "node_name": "step 1" + }, + "step 2": { + "type": "stub", + "name": "step 2", + "next": "step 3", + "on_failure": "", + "overrides": {}, + "catalog": null, + "max_attempts": 1 + }, + "step 3": { + "type": "stub", + "name": "step 3", + "next": "success", + "on_failure": "", + "overrides": {}, + "catalog": null, + "max_attempts": 1 + }, + "success": { + "type": "success", + "name": "success" + }, + "fail": { + "type": "fail", + "name": "fail" + } + } + }, + "dag_hash": "", + "execution_plan": "chained" + } + } + ``` + + +=== "Mocked configuration" + + We can patch the command of step 1 to be successful to test the behavior of traversal in case + of no failures. + + Running the pipeline with mocked configuration: + + for yaml: ```magnus execute -f examples/on-failure.yaml -c examples/configs/mocked-config-unittest.yaml``` + + for python: ```MAGNUS_CONFIGURATION_FILE=examples/configs/mocked-config-unittest.yaml python examples/on_failure.py``` + + ```yaml linenums="1" + --8<-- "examples/configs/mocked-config-unittest.yaml" + ``` + + +=== "Run log with mocking" + + As seen in the ```run log```, the steps have ```step 1```, ```step 2```, ```step 3``` as + executed and successful steps. And the status of ```step 1``` is ```SUCCESS```. + + ```json linenums="1" hl_lines="9 12 48 79" + { + "run_id": "syrupy-aryabhata-0552", + "dag_hash": "026b36dd2b3507fe586f1f85ba308f817745c465", + "use_cached": false, + "tag": "", + "original_run_id": "", + "status": "SUCCESS", + "steps": { + "step 1": { + "name": "step 1", + "internal_name": "step 1", + "status": "SUCCESS", + "step_type": "task", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "d76cf865af2f8e03b6c1205403351cbe42e6cdc4", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-02-11 05:52:19.421358", + "end_time": "2024-02-11 05:52:19.426678", + "duration": "0:00:00.005320", + "status": "SUCCESS", + "message": "", + "parameters": {} + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [ + { + "name": "step_1.execution.log", + "data_hash": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", + "catalog_relative_path": "syrupy-aryabhata-0552/step_1.execution.log", + "catalog_handler_location": ".catalog", + "stage": "put" + } + ] + }, + "step 2": { + "name": "step 2", + "internal_name": "step 2", + "status": "SUCCESS", + "step_type": "stub", + "message": "", + "mock": true, + "code_identities": [ + { + "code_identifier": "d76cf865af2f8e03b6c1205403351cbe42e6cdc4", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-02-11 05:52:19.500544", + "end_time": "2024-02-11 05:52:19.500559", + "duration": "0:00:00.000015", + "status": "SUCCESS", + "message": "", + "parameters": {} + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [] + }, + "step 3": { + "name": "step 3", + "internal_name": "step 3", + "status": "SUCCESS", + "step_type": "stub", + "message": "", + "mock": true, + "code_identities": [ + { + "code_identifier": "d76cf865af2f8e03b6c1205403351cbe42e6cdc4", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-02-11 05:52:19.577734", + "end_time": "2024-02-11 05:52:19.577749", + "duration": "0:00:00.000015", + "status": "SUCCESS", + "message": "", + "parameters": {} + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [] + }, + "success": { + "name": "success", + "internal_name": "success", + "status": "SUCCESS", + "step_type": "success", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "d76cf865af2f8e03b6c1205403351cbe42e6cdc4", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-02-11 05:52:19.649764", + "end_time": "2024-02-11 05:52:19.650318", + "duration": "0:00:00.000554", + "status": "SUCCESS", + "message": "", + "parameters": {} + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [] + } + }, + "parameters": {}, + "run_config": { + "executor": { + "service_name": "mocked", + "service_type": "executor", + "enable_parallel": false, + "overrides": {}, + "patches": { + "step 1": { + "command": "exit 0" + } + } + }, + "run_log_store": { + "service_name": "file-system", + "service_type": "run_log_store", + "log_folder": ".run_log_store" + }, + "secrets_handler": { + "service_name": "do-nothing", + "service_type": "secrets" + }, + "catalog_handler": { + "service_name": "file-system", + "service_type": "catalog", + "catalog_location": ".catalog" + }, + "experiment_tracker": { + "service_name": "do-nothing", + "service_type": "experiment_tracker" + }, + "pipeline_file": "examples/on-failure.yaml", + "parameters_file": null, + "configuration_file": "examples/configs/mocked-config-unittest.yaml", + "tag": "", + "run_id": "syrupy-aryabhata-0552", + "variables": {}, + "use_cached": false, + "original_run_id": "", + "dag": { + "start_at": "step 1", + "name": "", + "description": "This is a simple pipeline to demonstrate failure in a step.\n\nThe default behavior is to traverse to step type fail and mark the run as + failed.\nBut you can control it by providing on_failure.\n\nIn this example: step 1 fails and moves to step 3 skipping step 2. The pipeline status\nis considered to be + success.\n\nstep 1 (FAIL) >> step 3 >> success\n\nYou can run this pipeline by magnus execute -f examples/on-failure.yaml\n", + "steps": { + "step 1": { + "type": "task", + "name": "step 1", + "next": "step 2", + "on_failure": "step 3", + "overrides": {}, + "catalog": null, + "max_attempts": 1, + "command_type": "shell", + "command": "exit 1", + "node_name": "step 1" + }, + "step 2": { + "type": "stub", + "name": "step 2", + "next": "step 3", + "on_failure": "", + "overrides": {}, + "catalog": null, + "max_attempts": 1 + }, + "step 3": { + "type": "stub", + "name": "step 3", + "next": "success", + "on_failure": "", + "overrides": {}, + "catalog": null, + "max_attempts": 1 + }, + "success": { + "type": "success", + "name": "success" + }, + "fail": { + "type": "fail", + "name": "fail" + } + } + }, + "dag_hash": "026b36dd2b3507fe586f1f85ba308f817745c465", + "execution_plan": "chained" + } + } + ``` + + + + + +### Debugging failed executions + +!!! tip "Using debuggers" + + For pipelines defined by the python SDK, you can create breakpoints at the + python function being executed and use [debuggers](https://docs.python.org/3/library/pdb.html). + + For ```notebook``` based tasks, + refer to [ploomber engine documentation](https://engine.ploomber.io/en/docs/user-guide/debugging/debuglater.html) for rich debuggers. + + Shell commands can be run in isolation by providing the parameters as environment variables + and catalog artifacts present in the ```compute_data_folder``` location. + +To debug a failed execution, we can use the mocked executor to mock all the steps except +for the failed step and providing the parameters and data exposed to the step during the +failure which are captured by the ```run log``` and ```catalog```. + +=== "Faulty pipeline" + + ```yaml linenums="1" + --8<-- "examples/retry-fail.yaml" + ``` + +=== "Faulty run log" + + ```json linenums="1" + { + "run_id": "wrong-file-name", + "dag_hash": "7b12d64874eff2072c9dd97912a17149f2c32ed2", + "use_cached": false, + "tag": "", + "original_run_id": "", + "status": "FAIL", + "steps": { + "Setup": { + "name": "Setup", + "internal_name": "Setup", + "status": "SUCCESS", + "step_type": "task", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "d76cf865af2f8e03b6c1205403351cbe42e6cdc4", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-02-11 23:03:00.417889", + "end_time": "2024-02-11 23:03:00.429579", + "duration": "0:00:00.011690", + "status": "SUCCESS", + "message": "", + "parameters": {} + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [ + { + "name": "Setup.execution.log", + "data_hash": "d2dd9105fa3c62c35d89182c44fbd1ec992d8d408e38f0350d582fa29ed88074", + "catalog_relative_path": "wrong-file-name/Setup.execution.log", + "catalog_handler_location": ".catalog", + "stage": "put" + } + ] + }, + "Create Content": { + "name": "Create Content", + "internal_name": "Create Content", + "status": "SUCCESS", + "step_type": "task", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "d76cf865af2f8e03b6c1205403351cbe42e6cdc4", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-02-11 23:03:00.507067", + "end_time": "2024-02-11 23:03:00.514757", + "duration": "0:00:00.007690", + "status": "SUCCESS", + "message": "", + "parameters": {} + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [ + { + "name": "Create_Content.execution.log", + "data_hash": "d2dd9105fa3c62c35d89182c44fbd1ec992d8d408e38f0350d582fa29ed88074", + "catalog_relative_path": "wrong-file-name/Create_Content.execution.log", + "catalog_handler_location": ".catalog", + "stage": "put" + }, + { + "name": "data/hello.txt", + "data_hash": "2ac8edfe4eb5d0d9392cb070664c31c45eecca78c43cb99d2d9c6f5a8c813932", + "catalog_relative_path": "wrong-file-name/data/hello.txt", + "catalog_handler_location": ".catalog", + "stage": "put" + } + ] + }, + "Retrieve Content": { + "name": "Retrieve Content", + "internal_name": "Retrieve Content", + "status": "FAIL", + "step_type": "task", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "d76cf865af2f8e03b6c1205403351cbe42e6cdc4", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-02-11 23:03:00.595992", + "end_time": "2024-02-11 23:03:00.645752", + "duration": "0:00:00.049760", + "status": "FAIL", + "message": "Command failed", + "parameters": {} + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [ + { + "name": "data/hello.txt", + "data_hash": "2ac8edfe4eb5d0d9392cb070664c31c45eecca78c43cb99d2d9c6f5a8c813932", + "catalog_relative_path": "data/hello.txt", + "catalog_handler_location": ".catalog", + "stage": "get" + }, + { + "name": "Retrieve_Content.execution.log", + "data_hash": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", + "catalog_relative_path": "wrong-file-name/Retrieve_Content.execution.log", + "catalog_handler_location": ".catalog", + "stage": "put" + } + ] + }, + "fail": { + "name": "fail", + "internal_name": "fail", + "status": "SUCCESS", + "step_type": "fail", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "d76cf865af2f8e03b6c1205403351cbe42e6cdc4", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-02-11 23:03:00.727316", + "end_time": "2024-02-11 23:03:00.727911", + "duration": "0:00:00.000595", + "status": "SUCCESS", + "message": "", + "parameters": {} + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [] + } + }, + "parameters": {}, + "run_config": { + "executor": { + "service_name": "local", + "service_type": "executor", + "enable_parallel": false, + "overrides": {} + }, + "run_log_store": { + "service_name": "file-system", + "service_type": "run_log_store", + "log_folder": ".run_log_store" + }, + "secrets_handler": { + "service_name": "do-nothing", + "service_type": "secrets" + }, + "catalog_handler": { + "service_name": "file-system", + "service_type": "catalog", + "catalog_location": ".catalog" + }, + "experiment_tracker": { + "service_name": "do-nothing", + "service_type": "experiment_tracker" + }, + "pipeline_file": "examples/retry-fail.yaml", + "parameters_file": null, + "configuration_file": "examples/configs/fs-catalog-run_log.yaml", + "tag": "", + "run_id": "wrong-file-name", + "variables": {}, + "use_cached": false, + "original_run_id": "", + "dag": { + "start_at": "Setup", + "name": "", + "description": "This is a simple pipeline that demonstrates retrying failures.\n\n1. Setup: We setup a data folder, we ignore if it is already present\n2. Create Content: We create a \"hello.txt\" and \"put\" the file in catalog\n3. Retrieve Content: We \"get\" the file \"hello.txt\" from the catalog and show the contents\n5. Cleanup: We remove the data folder. Note that this is stubbed to prevent accidental deletion.\n\n\nYou can run this pipeline by:\n magnus execute -f examples/retry-fail.yaml -c examples/configs/fs-catalog-run_log.yaml \\\n --run-id wrong-file-name\n", + "steps": { + "Setup": { + "type": "task", + "name": "Setup", + "next": "Create Content", + "on_failure": "", + "overrides": {}, + "catalog": null, + "max_attempts": 1, + "command_type": "shell", + "command": "mkdir -p data", + "node_name": "Setup" + }, + "Create Content": { + "type": "task", + "name": "Create Content", + "next": "Retrieve Content", + "on_failure": "", + "overrides": {}, + "catalog": { + "get": [], + "put": [ + "data/hello.txt" + ] + }, + "max_attempts": 1, + "command_type": "shell", + "command": "echo \"Hello from magnus\" >> data/hello.txt\n", + "node_name": "Create Content" + }, + "Retrieve Content": { + "type": "task", + "name": "Retrieve Content", + "next": "success", + "on_failure": "", + "overrides": {}, + "catalog": { + "get": [ + "data/hello.txt" + ], + "put": [] + }, + "max_attempts": 1, + "command_type": "shell", + "command": "cat data/hello1.txt", + "node_name": "Retrieve Content" + }, + "success": { + "type": "success", + "name": "success" + }, + "fail": { + "type": "fail", + "name": "fail" + } + } + }, + "dag_hash": "7b12d64874eff2072c9dd97912a17149f2c32ed2", + "execution_plan": "chained" + } + } + ``` + + + +=== "mocked configuration" + + ```yaml linenums="1" + --8<-- "examples/configs/mocked-config-debug.yaml" + ``` + +=== "Debugging failed executions" + + Copy the catalog during the failed execution to the debugging execution and + retry the step. We give it a run_id ```debug-pipeline``` + + cp .catalog/wrong-file-name debug-pipeline + + and retry with the fix: + + ```magnus execute -f examples/retry-fail.yaml -c examples/configs/mocked-config-debug.yaml + --run-id debug-pipeline``` diff --git a/tests/scenarios/archive/test_data/only_for_testing.dat b/docs/configurations/experiment-tracking.md similarity index 100% rename from tests/scenarios/archive/test_data/only_for_testing.dat rename to docs/configurations/experiment-tracking.md diff --git a/docs/configurations/overview.md b/docs/configurations/overview.md new file mode 100644 index 00000000..892f8c31 --- /dev/null +++ b/docs/configurations/overview.md @@ -0,0 +1,49 @@ +**Magnus** is designed to make effective collaborations between data scientists/researchers +and infrastructure engineers. + +All the features described in the [concepts](/concepts/the-big-picture) are +aimed at the *research* side of data science projects while configurations add *scaling* features to them. + + +Configurations are presented during the execution: + +For ```yaml``` based pipeline, use the ```--config-file, -c``` option in the [magnus CLI](/usage/#usage). + +For [python SDK](/sdk/#magnus.Pipeline.execute), use the ```configuration_file``` option or via +environment variable ```MAGNUS_CONFIGURATION_FILE``` + +## Default configuration + +```yaml +--8<-- "examples/configs/default.yaml" +``` + +1. Execute the pipeline in the local compute environment. +2. The run log is not persisted but present in-memory and flushed at the end of execution. +3. No catalog functionality, all catalog operations are effectively no-op. +4. No secrets functionality, all secrets are effectively no-op. +5. No experiment tracking tools, all interactions with experiment tracking tools are effectively no-op. +Run log still captures the metrics, but are not passed to the experiment tracking tools. + +The default configuration for all the pipeline executions runs on the +[local compute](/configurations/executors/local), using a +[buffered run log](/configurations/run-log/#buffered) store with +[no catalog](/configurations/catalog/#do-nothing) or +[secrets](/configurations/secrets/#do-nothing) or +[experiment tracking functionality](/configurations/experiment-tracking/). + + + +## Format + +The configuration file is in yaml format and the typical structure is: + +```yaml +service: + type: service provider + config: + ... +``` + +where service is one of ```executor```, ```catalog```, ```experiment_tracker```, + ```secrets``` or ```run_log_store```. diff --git a/docs/configurations/run-log.md b/docs/configurations/run-log.md new file mode 100644 index 00000000..9a728a10 --- /dev/null +++ b/docs/configurations/run-log.md @@ -0,0 +1,427 @@ +Along with tracking the progress and status of the execution of the pipeline, run log +also keeps a track of parameters, experiment tracking metrics, data flowing through +the pipeline and any reproducibility metrics emitted by the tasks of the pipeline. + +Please refer here for detailed [information about run log](/concepts/run-log). + + +## buffered + +Stores all the run log in-memory. The run log is not persisted and destroyed immediately +after the execution is complete. + +!!! warning inline end "Parallel execution" + + ```buffered``` run log stores suffers from race conditions when two tasks + need to update status concurrently. + + +### Configuration + +```yaml linenums="1" +run_log_store: + type: buffered +``` + +
+ +## file-system + +Stores the run log as a ```json``` file in the file-system accessible by all the steps +of the pipeline. + + +!!! warning inline end "Parallel execution" + + ```file-system``` based run log stores suffers from race conditions when two tasks + need to update status concurrently. Use ```chunked``` version to avoid this behavior + or disable parallelism. + + + +### Configuration + +```yaml linenums="1" +run_log_store: + type: file-system + config: + log_folder: # defaults to ".run_log_store" +``` + +### Example + +=== "Configuration" + + Assumed to be present at ```examples/configs/fs-run_log.yaml``` + + ```yaml linenums="1" + --8<-- "examples/configs/fs-run_log.yaml" + ``` + + +=== "sdk pipeline" + + The configuration can be provided dynamically by setting the environment variable + ```MAGNUS_CONFIGURATION_FILE```. + + Executing the pipeline with: + + ```MAGNUS_CONFIGURATION_FILE=examples/configs/fs-run_log.yaml python examples/concepts/simple.py``` + + ```python linenums="1" + --8<-- "examples/concepts/simple.py" + ``` + +=== "Run log" + + The structure of the run log is [detailed in concepts](/concepts/run-log). + + ```json linenums="1" + { + "run_id": "blocking-shaw-0538", + "dag_hash": "", + "use_cached": false, + "tag": "", + "original_run_id": "", + "status": "SUCCESS", + "steps": { + "simple": { + "name": "simple", + "internal_name": "simple", + "status": "SUCCESS", + "step_type": "task", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "39cd98770cb2fd6994d8ac08ae4c5506e5ce694a", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-02-02 05:38:07.973392", + "end_time": "2024-02-02 05:38:07.977228", + "duration": "0:00:00.003836", + "status": "SUCCESS", + "message": "", + "parameters": {} + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [ + { + "name": "simple.execution.log", + "data_hash": "03ba204e50d126e4674c005e04d82e84c21366780af1f43bd54a37816b6ab340", + "catalog_relative_path": "blocking-shaw-0538/simple.execution.log", + "catalog_handler_location": ".catalog", + "stage": "put" + } + ] + }, + "success": { + "name": "success", + "internal_name": "success", + "status": "SUCCESS", + "step_type": "success", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "39cd98770cb2fd6994d8ac08ae4c5506e5ce694a", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-02-02 05:38:08.056864", + "end_time": "2024-02-02 05:38:08.057359", + "duration": "0:00:00.000495", + "status": "SUCCESS", + "message": "", + "parameters": {} + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [] + } + }, + "parameters": {}, + "run_config": { + "executor": { + "service_name": "local", + "service_type": "executor", + "enable_parallel": false, + "overrides": {} + }, + "run_log_store": { + "service_name": "file-system", + "service_type": "run_log_store", + "log_folder": ".run_log_store" + }, + "secrets_handler": { + "service_name": "do-nothing", + "service_type": "secrets" + }, + "catalog_handler": { + "service_name": "file-system", + "service_type": "catalog", + "catalog_location": ".catalog" + }, + "experiment_tracker": { + "service_name": "do-nothing", + "service_type": "experiment_tracker" + }, + "pipeline_file": "", + "parameters_file": "", + "configuration_file": "examples/configs/fs-run_log.yaml", + "tag": "", + "run_id": "blocking-shaw-0538", + "use_cached": false, + "original_run_id": "", + "dag": { + "start_at": "simple", + "name": "", + "description": "", + "steps": { + "simple": { + "type": "task", + "name": "simple", + "next": "success", + "on_failure": "", + "overrides": {}, + "catalog": null, + "max_attempts": 1, + "command": "examples.concepts.simple.simple_function", + "command_type": "python", + "node_name": "simple" + }, + "success": { + "type": "success", + "name": "success" + }, + "fail": { + "type": "fail", + "name": "fail" + } + } + }, + "dag_hash": "", + "execution_plan": "chained" + } + } + ``` + +=== "folder structure" + + All the run logs are stored in .run_log_store with the filename being the ```run_id```. + + ``` + >>> tree .run_log_store + .run_log_store + └── blocking-shaw-0538.json + + 1 directory, 1 file + ``` + +
+ + +## chunked-fs + +Chunked file system is similar to the ```file-system``` but stores concents of the run log +that have concurrency blocks in separate files. + + +### Configuration + +```yaml linenums="1" +run_log_store: + type: chunked-fs + config: + log_folder: # defaults to ".run_log_store" +``` + +=== "Configuration" + + Assumed to be present at ```examples/configs/chunked-fs-run_log.yaml``` + + ```yaml linenums="1" + --8<-- "examples/configs/chunked-fs-run_log.yaml" + ``` + + +=== "sdk pipeline" + + The configuration can be provided dynamically by setting the environment variable + ```MAGNUS_CONFIGURATION_FILE```. + + Executing the pipeline with: + + ```MAGNUS_CONFIGURATION_FILE=examples/configs/chunked-fs-run_log.yaml python examples/concepts/simple.py``` + + ```python linenums="1" + --8<-- "examples/concepts/simple.py" + ``` + +=== "Run log" + + The structure of the run log is [detailed in concepts](/concepts/run-log). + + === "RunLog.json" + + Stores only the metadata of the run log. The contents of this are safe for concurrent + executions. + + ```json linenums="1" + { + "run_id": "pleasant-lamarr-0549", + "dag_hash": "", + "use_cached": false, + "tag": "", + "original_run_id": "", + "status": "SUCCESS", + "steps": {}, + "parameters": {}, + "run_config": { + "executor": { + "service_name": "local", + "service_type": "executor", + "enable_parallel": false, + "overrides": {} + }, + "run_log_store": { + "service_name": "chunked-fs", + "service_type": "run_log_store", + "log_folder": ".run_log_store" + }, + "secrets_handler": { + "service_name": "do-nothing", + "service_type": "secrets" + }, + "catalog_handler": { + "service_name": "file-system", + "service_type": "catalog", + "catalog_location": ".catalog" + }, + "experiment_tracker": { + "service_name": "do-nothing", + "service_type": "experiment_tracker" + }, + "pipeline_file": "", + "parameters_file": "", + "configuration_file": "examples/configs/chunked-fs-run_log.yaml", + "tag": "", + "run_id": "pleasant-lamarr-0549", + "use_cached": false, + "original_run_id": "", + "dag": { + "start_at": "simple", + "name": "", + "description": "", + "steps": { + "simple": { + "type": "task", + "name": "simple", + "next": "success", + "on_failure": "", + "overrides": {}, + "catalog": null, + "max_attempts": 1, + "command": "examples.concepts.simple.simple_function", + "command_type": "python", + "node_name": "simple" + }, + "success": { + "type": "success", + "name": "success" + }, + "fail": { + "type": "fail", + "name": "fail" + } + } + }, + "dag_hash": "", + "execution_plan": "chained" + } + } + ``` + + === "StepLog-simple-1706852981689005000.json" + + Contains only the information of the single step ```simple```. + The name of the file follows the pattern: + + ```StepLog--.json```. The timestamp allows magnus to infer + the order of execution of the steps. + + ```json linenums="1" + { + "name": "simple", + "internal_name": "simple", + "status": "SUCCESS", + "step_type": "task", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "39cd98770cb2fd6994d8ac08ae4c5506e5ce694a", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-02-02 05:49:41.697142", + "end_time": "2024-02-02 05:49:41.702983", + "duration": "0:00:00.005841", + "status": "SUCCESS", + "message": "", + "parameters": {} + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [ + { + "name": "simple.execution.log", + "data_hash": "03ba204e50d126e4674c005e04d82e84c21366780af1f43bd54a37816b6ab340", + "catalog_relative_path": "pleasant-lamarr-0549/simple.execution.log", + "catalog_handler_location": ".catalog", + "stage": "put" + } + ] + } + ``` + + +=== "folder structure" + + All the run logs are stored in .run_log_store with the directory name being the ```run_id```. + + Instead of storing a single ```json``` file, the contents are stored in the folder + by the name of the ```run_id``. + + ``` + .run_log_store + └── pleasant-lamarr-0549 + ├── RunLog.json + ├── StepLog-simple-1706852981689005000.json + └── StepLog-success-1706852981779002000.json + + 2 directories, 3 files + ``` diff --git a/docs/configurations/secrets.md b/docs/configurations/secrets.md new file mode 100644 index 00000000..d767ab34 --- /dev/null +++ b/docs/configurations/secrets.md @@ -0,0 +1,140 @@ +**Magnus** provides an interface to secrets managers +[via the API](/interactions/#magnus.get_secret). + +Please refer to [Secrets in concepts](/concepts/secrets) for more information. + +## do-nothing + +A no-op implementation of a secret manager. This is useful when you do not have need for +secrets in your application. + +### configuration + +```yaml +secrets: + type: do-nothing + +``` + +Note that this is the default configuration if nothing is specified. + + +
+ +## Environment Secret Manager + +A secrets manager to access secrets from environment variables. Many cloud based executors, especially +K8's, have capabilities to send in secrets as environment variables and this secrets provider could +used in those environments. + +### Configuration + +```yaml +secrets: + type: env-secrets-manager + config: + prefix: "" # default value + suffix: "" # default value +``` + +Use ```suffix``` and ```prefix``` the uniquely identify the secrets. +The actual key while calling the secrets manager via the API, ```get_secret(secret_key)``` is +``````. + +### Example + + +=== "Pipeline" + + Below is a simple pipeline to demonstrate the use of secrets. + + The configuration file to use can be dynamically specified via the environment variable + ```MAGNUS_CONFIGURATION_FILE```. + + The example can be found in ```examples/secrets_env.py``` + + ```python + --8<-- "examples/secrets_env.py" + ``` + +=== "Default Configuration" + + We can execute the pipeline using this configuration by: + ```secret="secret_value" MAGNUS_CONFIGURATION_FILE=examples/configs/secrets-env-default.yaml python examples/secrets_env.py``` + + The configuration file is located at ```examples/configs/secrets-env-default.yaml``` + + ```yaml + --8<-- "examples/configs/secrets-env-default.yaml" + ``` + +=== "Prefixed and Suffixed Configuration" + + We can execute the pipeline using this configuration by: + ```magnus_secret="secret_value" MAGNUS_CONFIGURATION_FILE=examples/configs/secrets-env-ps.yaml python examples/secrets_env.py``` + + The configuration file is located at ```examples/configs/secrets-env-ps.yaml``` + + ```yaml + --8<-- "examples/configs/secrets-env-ps.yaml" + ``` + +
+ +## dotenv + +```.env``` files are routinely used to provide configuration parameters and secrets during development phase. Magnus can dotenv files as a secret store and can surface them to tasks. + + +### Configuration + + +```yaml +secrets: + type: dotenv + config: + location: .env # default value + +``` + +The format of the ```.env``` file is ```key=value``` pairs. Any content after ```#``` is considered +as a comment and will be ignored. Using ```export``` or ```set```, case insensitive, as used +for shell scripts are allowed. + +### Example + +=== ".env file" + + Assumed to be present at ```examples/secrets.env``` + + ```shell linenums="1" + --8<-- "examples/secrets.env" + ``` + + 1. Shell scripts style are supported. + 2. Key value based format is also supported. + + +=== "Example configuration" + + Configuration to use the dotenv format file. + + Assumed to be present at ```examples/configs/dotenv.yaml``` + + ```yaml linenums="1" + --8<-- "examples/configs/dotenv.yaml" + ``` + + 1. Use dotenv secrets manager. + 2. Location of the dotenv file, defaults to ```.env``` in project root. + + +=== "Pipeline in python" + + The example is present in ```examples/secrets.py``` + + ```python linenums="1" hl_lines="12-13" + --8<-- "examples/secrets.py" + ``` + + 1. The key of the secret that you want to retrieve. diff --git a/docs/css/extra.css b/docs/css/extra.css new file mode 100644 index 00000000..74a8cdad --- /dev/null +++ b/docs/css/extra.css @@ -0,0 +1,3 @@ +:root>* { + --md-tooltip-width: 600px; +} diff --git a/docs/example/dataflow.md b/docs/example/dataflow.md new file mode 100644 index 00000000..039620c5 --- /dev/null +++ b/docs/example/dataflow.md @@ -0,0 +1,223 @@ +In **magnus**, we distinguish between 2 types of data that steps can communicate with each other. + +[`Parameters`](#flow_of_parameters) + +: Parameters can be thought of input and output arguments of functions. Magnus supports +pydantic models both as input and return types of functions. + +[`Files`](#flow_of_files) + +: Data files or objects created by individual tasks of the pipeline can be passed to downstream tasks +using catalog. This can be controlled either by the configuration or by python API. + + +## Flow of Parameters + +The [initial parameters](/concepts/parameters) of the pipeline can set by using a ```yaml``` file and presented +during execution + +```--parameters-file, -parameters``` while using the [magnus CLI](/usage/#usage) + +or by using ```parameters_file``` with [the sdk](/sdk/#magnus.Pipeline.execute). + +=== "Initial Parameters" + + ```yaml title="Defining initial parameters" + # The below is assumed to be examples/parameters_initial.yaml # (2) + simple: 1 + inner: # (1) + x: 3 + y: "hello" + ``` + + 1. You can create deeply nested parameter structures. + 2. You can name it as you want. + +=== "Pydantic model representation" + + The parameter structure can be represented as a pydantic model within your code. + + ```python title="Pydantic model representation" + + from pydantic import BaseModel + + class InnerModel(BaseModel): # (1) + x: int + y: str + + class NestedModel(BaseModel): # (2) + simple: int + inner: InnerModel + + ``` + + 1. Represents the ```inner``` nested model of parameters. + 2. Represents all parameters defined in initial parameters. + + +### Accessing parameters + + +=== "Application native way" + + !!! info annotate inline end "No ```import magnus``` !!!" + + A lot of design emphasis is to avoid "import magnus" and keep the function signature native to the application. + Magnus also has API's get_parameter and set_parameter if they are handy. + + + + ```python linenums="1" hl_lines="34-53" + --8<-- "examples/parameters.py" + ``` + + 1. Create a pydantic model to represent the parameters. + 2. Access those parameters by name. The annotations are used to cast to correct models. + 3. Return the modified parameters for downstream steps. The return type should be always a pydantic model. + + +=== "Using the python API" + + !!! info annotate inline end "Using API" + + Using the python API gives you access to the parameters without changing the + signature of the functions. Also, this the preferred way to access the parameters in + notebooks. (1) + + 1. We use parameters in notebooks but they can only support simple types while the + API supports rich pydantic models. + + + ```python linenums="1" hl_lines="45-72" + --8<-- "examples/parameters_api.py" + ``` + + 1. To get the parameters as pydantic models, you can hint the type using ```cast_as``` + 2. Downstream steps could access the modified parameters. + + +=== "Using environment variables" + + !!! info annotate inline end "Using Env" + + Tasks of type shell use this mechanism to access parameters. + + There are richer ways to pass parameters in magnus if you are using only + python in your application. This mechanism helps when you have non-python code + as part of your application. + + + ```yaml title="Using shell to access parameters" linenums="1" + --8<-- "examples/parameters_env.yaml" + ``` + + 1. Show all the parameters prefixed by MAGNUS_PRM_ + 2. Set new values of the parameters as environment variables prefixed by MAGNUS_PRM_ + 3. Consume the parameters like you would using python. + + + +## Flow of Files + + +**Magnus** stores all the artifacts/files/logs generated by ```task``` nodes in a central storage called +[catalog](/concepts/catalog). +The catalog is indexed by the ```run_id``` of the pipeline and is unique for every execution of the pipeline. + +Any ```task``` of the pipeline can interact with the ```catalog``` to get and put artifacts/files +as part of the execution. + +Conceptually, the flow is: + +
+```mermaid +flowchart LR + subgraph Task + direction LR + get("get + 📁 data folder") + exe(Execute code) + put("put + 📁 data folder") + end + + subgraph Catalog + direction BT + Data[📁 run id] + end +Data --> get +put --> Data +get --> exe +exe --> put +``` +
+ +The ```catalog``` for an execution has the same structure as the ```root``` of the project. +You can access content as if you are accessing files relative to the project root. + +=== "Example Configuration" + + ``` yaml + --8<-- "examples/configs/fs-catalog.yaml" + ``` + + 1. Use local file system as a central catalog, defaults to ```.catalog``` + 2. By default, magnus uses ```data``` folder as the directory containing the user data. + +=== "pipeline in yaml" + + !!! info annotate "Python functions" + + We have used shell for these operations for convenience but you can use python functions to + create content and retrieve content. + + For example, the below functions can be used in steps Create Content and Retrieve Content. + ```python + def create_content(): + with open("data/hello.txt") as f: + f.write("hello from magnus") + + def retrieve_content(): + with open("data/hello.txt") as f: + print(f.read()) + ``` + + + ``` yaml linenums="1" + --8<-- "examples/catalog.yaml" + ``` + + 1. Make a ```data``` folder if it does not already exist. + 2. As the ```compute_data_folder``` is defined to ```.```, all paths should be relative to ```.```. Put the file ```hello.txt``` in ```data``` folder into the catalog. + 3. We have intentionally made this ```stub``` node to prevent accidentally deleting your content. Please make it a ```task``` to actually delete the ```data``` folder. + 4. Should print "Hello from magnus" as the content of the ```hello.txt```. + 5. Override the default ```.``` as ```compute_data_folder``` to ```data```. All interactions should then be relative to ```data``` folder. + 6. Same as above, make it a ```task``` to actually delete the ```data``` folder + +=== "python sdk" + + !!! info annotate "Python functions" + + We have used shell for these operations for convenience but you can use python functions to + create content and retrieve content. + + For example, the below functions can be used in steps create and retrieve. + ```python + def create_content(): + with open("data/hello.txt") as f: + f.write("hello from magnus") + + def retrieve_content(): + with open("data/hello.txt") as f: + print(f.read()) + ``` + + ```python linenums="1" + --8<-- "examples/catalog.py" + ``` + +=== "python API" + + ```python linenums="1" + --8<-- "examples/catalog_api.py" + ``` diff --git a/docs/example/example.md b/docs/example/example.md new file mode 100644 index 00000000..ca52d9c1 --- /dev/null +++ b/docs/example/example.md @@ -0,0 +1,397 @@ + + +Magnus revolves around the concept of [pipelines or workflows](/concepts/pipeline). +Pipelines defined in magnus are translated into +other workflow engine definitions like [Argo workflows](https://argoproj.github.io/workflows/) or +[AWS step functions](https://aws.amazon.com/step-functions/). + +## Example Pipeline definition + +A contrived example of data science workflow without any implementation. + +!!! info annotate inline end "Simple pipeline" + + In this extremely reduced example, we acquire data from different sources, clean it and shape it for analysis. + Features are then engineered from the clean data to run data science modelling. + + +``` mermaid +%%{ init: { 'flowchart': { 'curve': 'linear' } } }%% +flowchart TD + + step1:::green + step1([Acquire data]) --> step2:::green + step2([Prepare data]) --> step3:::green + step3([Extract features]) --> step4:::green + step4([Model]) --> suc([success]):::green + + classDef green stroke:#0f0 + +``` + + +This pipeline can be represented in **magnus** as below: + + +=== "yaml" + + ``` yaml linenums="1" + --8<-- "examples/contrived.yaml" + ``` + + 1. ```stub``` nodes are mock nodes and always succeed. + 2. Execute the ```next``` node if it succeeds. + 3. This marks the pipeline to be be successfully completed. + 4. Any failure in the execution of the node will, by default, reach this step. + +=== "python" + + ``` python linenums="1" + --8<-- "examples/contrived.py" + ``` + + 1. You can specify dependencies by using the ```next``` while creating the node or defer it for later. + 2. ```terminate_with_success``` indicates the pipeline to be successfully complete. + 3. Alternative ways to define dependencies, ```>>```, ```<<```, ```depends_on```. Choose the style that you + prefer. + 4. ```add_terminal_nodes``` adds success and fail states to the pipeline. + 5. A very rich run log that captures different properties of the run for maximum reproducibility. + + +=== "Run log" + + Please see [Run log](/concepts/run-log) for more detailed information about the structure. + + ```json linenums="1" + { + "run_id": "vain-hopper-0731", // (1) + "dag_hash": "", + "use_cached": false, + "tag": "", + "original_run_id": "", + "status": "SUCCESS", / (2) + "steps": { + "Acquire Data": { + "name": "Acquire Data", // (3) + "internal_name": "Acquire Data", + "status": "SUCCESS", + "step_type": "stub", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "399b0d42f4f28aaeeb2e062bb0b938d50ff1595c", // (4) + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2023-11-16 07:31:39.929797", + "end_time": "2023-11-16 07:31:39.929815", + "duration": "0:00:00.000018", + "status": "SUCCESS", + "message": "", + "parameters": {} + } + ], + "user_defined_metrics": {}, // (5) + "branches": {}, + "data_catalog": [] // (6) + }, + "Prepare Data": { + "name": "Prepare Data", + "internal_name": "Prepare Data", + "status": "SUCCESS", + "step_type": "stub", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "399b0d42f4f28aaeeb2e062bb0b938d50ff1595c", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2023-11-16 07:31:39.993807", + "end_time": "2023-11-16 07:31:39.993828", + "duration": "0:00:00.000021", + "status": "SUCCESS", + "message": "", + "parameters": {} + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [] + }, + "Extract Features": { + "name": "Extract Features", + "internal_name": "Extract Features", + "status": "SUCCESS", + "step_type": "stub", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "399b0d42f4f28aaeeb2e062bb0b938d50ff1595c", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2023-11-16 07:31:40.056403", + "end_time": "2023-11-16 07:31:40.056420", + "duration": "0:00:00.000017", + "status": "SUCCESS", + "message": "", + "parameters": {} + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [] + }, + "Model": { + "name": "Model", + "internal_name": "Model", + "status": "SUCCESS", + "step_type": "stub", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "399b0d42f4f28aaeeb2e062bb0b938d50ff1595c", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2023-11-16 07:31:40.118268", + "end_time": "2023-11-16 07:31:40.118285", + "duration": "0:00:00.000017", + "status": "SUCCESS", + "message": "", + "parameters": {} + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [] + }, + "success": { + "name": "success", + "internal_name": "success", + "status": "SUCCESS", + "step_type": "success", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "399b0d42f4f28aaeeb2e062bb0b938d50ff1595c", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2023-11-16 07:31:40.176718", + "end_time": "2023-11-16 07:31:40.176774", + "duration": "0:00:00.000056", + "status": "SUCCESS", + "message": "", + "parameters": {} + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [] + } + }, + "parameters": {}, + "run_config": { // (7) + "executor": { + "service_name": "local", + "service_type": "executor", + "enable_parallel": false, + "placeholders": {} + }, + "run_log_store": { + "service_name": "buffered", + "service_type": "run_log_store" + }, + "secrets_handler": { + "service_name": "do-nothing", + "service_type": "secrets" + }, + "catalog_handler": { + "service_name": "file-system", + "service_type": "catalog", + "compute_data_folder": "data" + }, + "experiment_tracker": { + "service_name": "do-nothing", + "service_type": "experiment_tracker" + }, + "pipeline_file": "", + "parameters_file": "", + "configuration_file": "", + "tag": "", + "run_id": "vain-hopper-0731", + "variables": {}, + "use_cached": false, + "original_run_id": "", + "dag": { // (8) + "start_at": "Acquire Data", + "name": "", + "description": "", + "max_time": 86400, + "internal_branch_name": "", + "steps": { + "Acquire Data": { + "type": "stub", + "name": "Acquire Data", + "internal_name": "Acquire Data", + "internal_branch_name": "", + "is_composite": false + }, + "Prepare Data": { + "type": "stub", + "name": "Prepare Data", + "internal_name": "Prepare Data", + "internal_branch_name": "", + "is_composite": false + }, + "Extract Features": { + "type": "stub", + "name": "Extract Features", + "internal_name": "Extract Features", + "internal_branch_name": "", + "is_composite": false + }, + "Model": { + "type": "stub", + "name": "Model", + "internal_name": "Model", + "internal_branch_name": "", + "is_composite": false + }, + "success": { + "type": "success", + "name": "success", + "internal_name": "success", + "internal_branch_name": "", + "is_composite": false + }, + "fail": { + "type": "fail", + "name": "fail", + "internal_name": "fail", + "internal_branch_name": "", + "is_composite": false + } + } + }, + "dag_hash": "", + "execution_plan": "chained" + } + } + ``` + + 1. Unique execution id or run id for every run of the pipeline. + 2. The status of the execution, one of success, fail or processing. + 3. Steps as defined in the pipeline configuration. + 4. git hash of the code that was used to run the pipeline. + 5. Optional user defined metrics during the step execution. These are also made available to the experiment tracking + tool, if they are configured. + 6. Data files that are ```get``` or ```put``` into a central storage during execution of the step. + 7. The configuration used to run the pipeline. + 8. The pipeline definition. + + +Independent of the platform it is run on, + + +- [x] The [pipeline definition](/concepts/pipeline) remains the same from an author point of view. +The data scientists are always part of the process and contribute to the development even in production environments. + +- [x] The [run log](/concepts/run-log) remains the same except for the execution configuration enabling users +to debug the pipeline execution in lower environments for failed executions or to validate the +expectation of the execution. + + + + +## Example configuration + +To run the pipeline in different environments, we just provide the +[required configuration](/configurations/overview). + +=== "Default Configuration" + + ``` yaml linenums="1" + --8<-- "examples/configs/default.yaml" + ``` + + 1. Run the pipeline in local environment. + 2. Use the buffer as run log, this will not persist the run log to disk. + 3. Do not move any files to central storage. + 4. Do not use any secrets manager. + 5. Do not integrate with any experiment tracking tools + +=== "Argo Configuration" + + To render the pipeline in [argo specification](/configurations/executors/argo/), mention the + configuration during execution. + + yaml: + + ```magnus execute -f examples/contrived.yaml -c examples/configs/argo-config.yaml``` + + + python: + + Please refer to [containerised environments](/configurations/executors/container-environments/) for more information. + + MAGNUS_CONFIGURATION_FILE=examples/configs/argo-config.yaml python examples/contrived.py && magnus execute -f magnus-pipeline.yaml -c examples/configs/argo-config.yaml + + ``` yaml linenums="1" title="Argo Configuration" + --8<-- "examples/configs/argo-config.yaml" + ``` + + 1. Use argo workflows as the execution engine to run the pipeline. + 2. Run this docker image for every step of the pipeline. Please refer to + [containerised environments](/configurations/executors/container-environments/) for more details. + 3. Mount the volume from Kubernetes persistent volumes (magnus-volume) to /mnt directory. + 4. Resource constraints for the container runtime. + 5. Since every step runs in a container, the run log should be persisted. Here we are using the file-system as our + run log store. + 6. Kubernetes PVC is mounted to every container as ```/mnt```, use that to surface the run log to every step. + + +=== "Transpiled Workflow" + + The below is the same workflow definition in argo specification. + + ```yaml linenums="1" + --8<-- "examples/generated-argo-pipeline.yaml" + ``` diff --git a/docs/example/experiment-tracking.md b/docs/example/experiment-tracking.md new file mode 100644 index 00000000..e63e04ca --- /dev/null +++ b/docs/example/experiment-tracking.md @@ -0,0 +1,202 @@ +Metrics in data science projects summarize important information about the execution and performance of the +experiment. + +Magnus captures [this information as part of the run log](/concepts/experiment-tracking) and also provides +an [interface to experiment tracking tools](/concepts/experiment-tracking/#experiment_tracking_tools) +like [mlflow](https://mlflow.org/docs/latest/tracking.html) or +[Weights and Biases](https://wandb.ai/site/experiment-tracking). + + +### Example + + +=== "python" + + ```python linenums="1" + --8<-- "examples/experiment_tracking_api.py" + ``` + + 1. Nested metrics are possible as pydantic models. + 2. Using mlflow as experiment tracking tool. + +=== "yaml" + + ```yaml linenums="1" + --8<-- "examples/experiment_tracking_env.yaml" + ``` + +=== "configuration" + + Assumed to be present in ```examples/configs/mlflow-config.yaml``` + + ```yaml linenums="1" + --8<-- "examples/configs/mlflow-config.yaml" + ``` + +=== "Run log" + + The captured metrics as part of the run log are highlighted. + + ```json linenums="1" hl_lines="36-43" + { + "run_id": "clean-ride-1048", + "dag_hash": "", + "use_cached": false, + "tag": "", + "original_run_id": "", + "status": "SUCCESS", + "steps": { + "Emit Metrics": { + "name": "Emit Metrics", + "internal_name": "Emit Metrics", + "status": "SUCCESS", + "step_type": "task", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "0b62e4c661a4b4a2187afdf44a7c64520374202d", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-01-10 10:48:10.089266", + "end_time": "2024-01-10 10:48:10.092541", + "duration": "0:00:00.003275", + "status": "SUCCESS", + "message": "", + "parameters": {} + } + ], + "user_defined_metrics": { + "spam": "hello", + "eggs": { + "ham": "world" + }, + "answer": 42.0, + "is_it_true": false + }, + "branches": {}, + "data_catalog": [ + { + "name": "Emit_Metrics.execution.log", + "data_hash": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", + "catalog_relative_path": "clean-ride-1048/Emit_Metrics.execution.log", + "catalog_handler_location": ".catalog", + "stage": "put" + } + ] + }, + "success": { + "name": "success", + "internal_name": "success", + "status": "SUCCESS", + "step_type": "success", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "0b62e4c661a4b4a2187afdf44a7c64520374202d", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-01-10 10:48:10.585832", + "end_time": "2024-01-10 10:48:10.585937", + "duration": "0:00:00.000105", + "status": "SUCCESS", + "message": "", + "parameters": {} + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [] + } + }, + "parameters": {}, + "run_config": { + "executor": { + "service_name": "local", + "service_type": "executor", + "enable_parallel": false, + "placeholders": {} + }, + "run_log_store": { + "service_name": "buffered", + "service_type": "run_log_store" + }, + "secrets_handler": { + "service_name": "do-nothing", + "service_type": "secrets" + }, + "catalog_handler": { + "service_name": "file-system", + "service_type": "catalog" + }, + "experiment_tracker": { + "service_name": "mlflow", + "service_type": "experiment_tracker" + }, + "pipeline_file": "", + "parameters_file": "", + "configuration_file": "examples/configs/mlflow-config.yaml", + "tag": "", + "run_id": "clean-ride-1048", + "variables": {}, + "use_cached": false, + "original_run_id": "", + "dag": { + "start_at": "Emit Metrics", + "name": "", + "description": "", + "internal_branch_name": "", + "steps": { + "Emit Metrics": { + "type": "task", + "name": "Emit Metrics", + "internal_name": "Emit Metrics", + "internal_branch_name": "", + "is_composite": false + }, + "success": { + "type": "success", + "name": "success", + "internal_name": "success", + "internal_branch_name": "", + "is_composite": false + }, + "fail": { + "type": "fail", + "name": "fail", + "internal_name": "fail", + "internal_branch_name": "", + "is_composite": false + } + } + }, + "dag_hash": "", + "execution_plan": "chained" + } + } + ``` + + +=== "mlflow" + + The metrics are also sent to mlflow. + +
+ ![Image](/assets/screenshots/mlflow_example.png){ width="800" height="600"} +
mlflow UI for the execution. The run_id remains the same as the run_id of magnus
+
diff --git a/docs/example/reproducibility.md b/docs/example/reproducibility.md new file mode 100644 index 00000000..47ef34cb --- /dev/null +++ b/docs/example/reproducibility.md @@ -0,0 +1,231 @@ +Magnus stores a variety of information about the current execution in [run log](/concepts/run-log). +The run log is internally used +for keeping track of the execution (status of different steps, parameters, etc) but also has rich information +for reproducing the state at the time of pipeline execution. + + +The following are "invisibly" captured as part of the run log: + +- Code: The ```git``` commit hash of the code used to run a pipeline is stored as part of the run log against +every step. +- Data hash: The data hash of the file passing through the catalog is stored as part of the run log. Since the +catalog itself is indexed against the execution id, it is easy to recreate the exact state of the data used +in the pipeline execution. +- Configuration: The configuration of the pipeline (dag definition, execution configuration) is also stored +as part of the run log. + + + +!!! info annotate "Invisible?" + + Reproducibility should not be a "nice to have" but is a must in data science projects. We believe that + it should not be left to the data scientist to be conscious of it but should be done without any active + intervention. + + +Below we show an example pipeline and the different layers of the run log. + + +=== "Example pipeline" + + !!! info annotate "Example" + + This example pipeline is the same as the data flow pipeline showcasing flow of files. + The create content step creates writes a new file which is stored in the catalog and the retrieve content + gets it from the catalog. + + + ```python title="simple data passing pipeline" linenums="1" + --8<-- "examples/catalog_api.py" + ``` +=== "General run log attributes" + + !!! info annotate + + This section of the run log is about the over all status of the execution. It has information + about the run_id, the execution status, re-run indicators and the final state of the parameters. + + + ```json linenums="1" + { + "run_id": "greedy-yonath-1608", // (1) + "dag_hash": "", + "use_cached": false, + "tag": "", + "original_run_id": "", + "status": "SUCCESS", + ... + "parameters": {}, // (2) + } + ``` + + 1. The unique run_id of the execution. + 2. The parameters at the end of the pipeline. + + +=== "Logs captured against a step" + + !!! info annotate + + The information stored against an execution of a step. We capture the git commit id's, data hashes, + parameters at the point of execution. The execution logs are also stored in the catalog against the + run id. + + + ```json linenums="1" + "create_content": { // (1) + "name": "create_content", + "internal_name": "create_content", + "status": "SUCCESS", // (2) + "step_type": "task", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "ff60e7fa379c38adaa03755977057cd10acc4baa", // (3) + "code_identifier_type": "git", + "code_identifier_dependable": true, // (4) + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2023-12-15 16:08:51.869129", + "end_time": "2023-12-15 16:08:51.878428", + "duration": "0:00:00.009299", + "status": "SUCCESS", + "message": "", + "parameters": {} // (5) + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [ + { + "name": "data/hello.txt", // (6) + "data_hash": "c2e6b3d23c045731bf40a036aa6f558c9448da247e0cbb4ee3fcf10d3660ef18", // (7) + "catalog_relative_path": "greedy-yonath-1608/data/hello.txt", + "catalog_handler_location": ".catalog", + "stage": "put" + }, + { + "name": "create_content", // (8) + "data_hash": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", + "catalog_relative_path": "greedy-yonath-1608/create_content", + "catalog_handler_location": ".catalog", + "stage": "put" + } + ] + }, + ``` + + 1. The name of step. + 2. The status of the execution of the step. + 3. The git sha of the code at the point of execution of the pipeline. + 4. is True if the branch is clean, false otherwise. + 5. The parameters at the point of execution of the step. + 6. The name of the file that was "put" in the catalog by the step. + 7. The hash of the dataset put in the catalog. + 8. The execution logs of the step put in the catalog. + + +=== "Captured configuration" + + !!! info annotate + + The information about the configuration used to run the pipeline. It includes the configuration of the + different ```services``` used, the pipeline definition and state of variables used at the time of + execution of the pipeline. + + + ```json linenums="1" + "run_config": { + "executor": { // (1) + "service_name": "local", + "service_type": "executor", + "enable_parallel": false, + "placeholders": {} + }, + "run_log_store": { // (2) + "service_name": "buffered", + "service_type": "run_log_store" + }, + "secrets_handler": { // (3) + "service_name": "do-nothing", + "service_type": "secrets" + }, + "catalog_handler": { // (4) + "service_name": "file-system", + "service_type": "catalog", + "compute_data_folder": "." + }, + "experiment_tracker": { // (5) + "service_name": "do-nothing", + "service_type": "experiment_tracker" + }, + "pipeline_file": "", // (6 + "parameters_file": "", // (7) + "configuration_file": "examples/configs/fs-catalog.yaml", // (8) + "tag": "", + "run_id": "greedy-yonath-1608", + "variables": {}, + "use_cached": false, + "original_run_id": "", + "dag": { // (9) + "start_at": "create_content", + "name": "", + "description": "", + "max_time": 86400, + "internal_branch_name": "", + "steps": { + "create_content": { + "type": "task", + "name": "create_content", + "internal_name": "create_content", + "internal_branch_name": "", + "is_composite": false + }, + "retrieve_content": { + "type": "task", + "name": "retrieve_content", + "internal_name": "retrieve_content", + "internal_branch_name": "", + "is_composite": false + }, + "success": { + "type": "success", + "name": "success", + "internal_name": "success", + "internal_branch_name": "", + "is_composite": false + }, + "fail": { + "type": "fail", + "name": "fail", + "internal_name": "fail", + "internal_branch_name": "", + "is_composite": false + } + } + }, + "dag_hash": "", + "execution_plan": "chained" + } + ``` + + 1. The configuration of the ```executor``` + 2. The configuration of ```run log store```. The location where these logs are stored. + 3. The configuration of the secrets manager. + 4. The configuration of the catalog manager. + 5. The configuration of experiment tracker. + 6. The pipeline definition file, empty in this case as we use the SDK. + 7. The initial parameters file used for the execution. + 8. The configuration file used for the execution. + 9. The definition of the DAG being executed. + + + +This [structure of the run log](/concepts/run-log) is the same independent of where the pipeline was executed. +This enables you to reproduce a failed execution in complex environments on local environments for easier debugging. diff --git a/docs/example/retry-after-failure.md b/docs/example/retry-after-failure.md new file mode 100644 index 00000000..69060f29 --- /dev/null +++ b/docs/example/retry-after-failure.md @@ -0,0 +1,593 @@ +Magnus allows you to [debug and recover](/concepts/run-log/#retrying_failures) from a +failure during the execution of pipeline. The pipeline can be +restarted in any suitable environment for debugging. + + +!!! example annotate + + A pipeline that is transpiled to argo workflows can be re-run on your local compute + for debugging purposes. The only caveat is that, your local compute should have access to run log of the failed + execution (1), generated catalog artifacts (2) from the the failed execution. + +1. Access to the run log can be as simple as copy the json file to your local compute. +2. Generated catalog artifacts can be sourced from ```file-system``` which is your local folder. + + + +Below is an example of retrying a pipeline that failed. + + +=== "Failed pipeline" + + !!! note + + You can run this pipeline on your local machine by + + ```magnus execute -f examples/retry-fail.yaml -c examples/configs/fs-catalog-run_log.yaml --run-id wrong-file-name``` + + Note that we have specified the ```run_id``` to be something we can use later. + The execution logs of the steps in the catalog will show the reason of the failure. + + ```yaml title="Pipeline that fails" + --8<-- "examples/retry-fail.yaml" + ``` + + 1. We make a data folder to store content. + 2. Puts a file in the data folder and catalogs it for downstream steps. + 3. It will fail here as there is no file called ```hello1.txt``` in the data folder. + 4. Get the file, ```hello.txt``` generated from previous steps into data folder. + + +=== "Failed run log" + + Please note the overall status of the pipeline in line #7 to be ```FAIL```. + The step log of the failed step is also marked with status ```FAIL```. + + ```json linenums="1" hl_lines="7 94-139" + { + "run_id": "wrong-file-name", + "dag_hash": "13f7c1b29ebb07ce058305253171ceae504e1683", + "use_cached": false, + "tag": "", + "original_run_id": "", + "status": "FAIL", + "steps": { + "Setup": { + "name": "Setup", + "internal_name": "Setup", + "status": "SUCCESS", + "step_type": "task", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "f94e49a4fcecebac4d5eecbb5b691561b08e45c0", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-02-07 06:08:45.330918", + "end_time": "2024-02-07 06:08:45.348227", + "duration": "0:00:00.017309", + "status": "SUCCESS", + "message": "", + "parameters": {} + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [ + { + "name": "Setup.execution.log", + "data_hash": "e1f8eaa5d49d88fae21fd8a34ff9774bcd4136bdbc3aa613f88a986261bac694", + "catalog_relative_path": "wrong-file-name/Setup.execution.log", + "catalog_handler_location": ".catalog", + "stage": "put" + } + ] + }, + "Create Content": { + "name": "Create Content", + "internal_name": "Create Content", + "status": "SUCCESS", + "step_type": "task", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "f94e49a4fcecebac4d5eecbb5b691561b08e45c0", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-02-07 06:08:45.422420", + "end_time": "2024-02-07 06:08:45.438199", + "duration": "0:00:00.015779", + "status": "SUCCESS", + "message": "", + "parameters": {} + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [ + { + "name": "Create_Content.execution.log", + "data_hash": "e1f8eaa5d49d88fae21fd8a34ff9774bcd4136bdbc3aa613f88a986261bac694", + "catalog_relative_path": "wrong-file-name/Create_Content.execution.log", + "catalog_handler_location": ".catalog", + "stage": "put" + }, + { + "name": "data/hello.txt", + "data_hash": "108ecead366a67c2bb17e223032e12629bcc21b4ab0fff77cf48a5b784f208c7", + "catalog_relative_path": "wrong-file-name/data/hello.txt", + "catalog_handler_location": ".catalog", + "stage": "put" + } + ] + }, + "Retrieve Content": { + "name": "Retrieve Content", + "internal_name": "Retrieve Content", + "status": "FAIL", + "step_type": "task", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "f94e49a4fcecebac4d5eecbb5b691561b08e45c0", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-02-07 06:08:45.525924", + "end_time": "2024-02-07 06:08:45.605381", + "duration": "0:00:00.079457", + "status": "FAIL", + "message": "Command failed", + "parameters": {} + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [ + { + "name": "data/hello.txt", + "data_hash": "108ecead366a67c2bb17e223032e12629bcc21b4ab0fff77cf48a5b784f208c7", + "catalog_relative_path": "data/hello.txt", + "catalog_handler_location": ".catalog", + "stage": "get" + }, + { + "name": "Retrieve_Content.execution.log", + "data_hash": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", + "catalog_relative_path": "wrong-file-name/Retrieve_Content.execution.log", + "catalog_handler_location": ".catalog", + "stage": "put" + } + ] + }, + "fail": { + "name": "fail", + "internal_name": "fail", + "status": "SUCCESS", + "step_type": "fail", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "f94e49a4fcecebac4d5eecbb5b691561b08e45c0", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-02-07 06:08:45.701371", + "end_time": "2024-02-07 06:08:45.701954", + "duration": "0:00:00.000583", + "status": "SUCCESS", + "message": "", + "parameters": {} + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [] + } + }, + "parameters": {}, + "run_config": { + "executor": { + "service_name": "local", + "service_type": "executor", + "enable_parallel": false, + "overrides": {} + }, + "run_log_store": { + "service_name": "file-system", + "service_type": "run_log_store", + "log_folder": ".run_log_store" + }, + "secrets_handler": { + "service_name": "do-nothing", + "service_type": "secrets" + }, + "catalog_handler": { + "service_name": "file-system", + "service_type": "catalog", + "catalog_location": ".catalog" + }, + "experiment_tracker": { + "service_name": "do-nothing", + "service_type": "experiment_tracker" + }, + "pipeline_file": "examples/retry-fail.yaml", + "parameters_file": null, + "configuration_file": "examples/configs/fs-catalog-run_log.yaml", + "tag": "", + "run_id": "wrong-file-name", + "variables": { + "argo_docker_image": "harbor.csis.astrazeneca.net/mlops/magnus:latest" + }, + "use_cached": false, + "original_run_id": "", + "dag": { + "start_at": "Setup", + "name": "", + "description": "This is a simple pipeline that demonstrates retrying failures.\n\n1. Setup: We setup a data folder, we ignore if it is already present\n2. Create Content: We create a \"hello.txt\" and \"put\" the file in catalog\n3. Retrieve Content: We \"get\" the file \"hello.txt\" from the catalog and show the contents\n5. Cleanup: We remove the data folder. Note that this is stubbed to prevent accidental deletion.\n\n\nYou can run this pipeline by:\n magnus execute -f examples/catalog.yaml -c examples/configs/fs-catalog.yaml\n", + "steps": { + "Setup": { + "type": "task", + "name": "Setup", + "next": "Create Content", + "on_failure": "", + "overrides": {}, + "catalog": null, + "max_attempts": 1, + "command_type": "shell", + "command": "mkdir -p data", + "node_name": "Setup" + }, + "Create Content": { + "type": "task", + "name": "Create Content", + "next": "Retrieve Content", + "on_failure": "", + "overrides": {}, + "catalog": { + "get": [], + "put": [ + "data/hello.txt" + ] + }, + "max_attempts": 1, + "command_type": "shell", + "command": "echo \"Hello from magnus\" >> data/hello.txt\n", + "node_name": "Create Content" + }, + "Retrieve Content": { + "type": "task", + "name": "Retrieve Content", + "next": "success", + "on_failure": "", + "overrides": {}, + "catalog": { + "get": [ + "data/hello.txt" + ], + "put": [] + }, + "max_attempts": 1, + "command_type": "shell", + "command": "cat data/hello1.txt", + "node_name": "Retrieve Content" + }, + "success": { + "type": "success", + "name": "success" + }, + "fail": { + "type": "fail", + "name": "fail" + } + } + }, + "dag_hash": "13f7c1b29ebb07ce058305253171ceae504e1683", + "execution_plan": "chained" + } + } + ``` + + +=== "Fixed pipeline" + + !!! note + + You can run this pipeline on your local machine by + + ```magnus execute -f examples/retry-fixed.yaml -c examples/configs/fs-catalog-run_log.yaml --use-cached wrong-file-name``` + + Note that we have specified the run_id of the failed execution to be ```use-cached``` for the new execution. + + + ```yaml title="Pipeline that restarts" + --8<-- "examples/retry-fixed.yaml" + ``` + + 1. Though this step is identical to the failed pipeline, this step does not execute in retry. + 2. We mark this step to be stub to demonstrate a re-run using cached does not execute the + successful task. + + + +=== "Fixed Run log" + + The retry pipeline is executed with success state. + + Note the execution of step ```Setup``` has been marked as ```mock: true```, this step + has not been executed but passed through. + + The step ```Create Content``` has been modified to ```stub``` to prevent execution in the + fixed pipeline. + + ```json linenums="1" hl_lines="15 34 51-96" + { + "run_id": "naive-wilson-0625", + "dag_hash": "148de99f96565bb1b276db2baf23eba682615c76", + "use_cached": true, + "tag": "", + "original_run_id": "wrong-file-name", + "status": "SUCCESS", + "steps": { + "Setup": { + "name": "Setup", + "internal_name": "Setup", + "status": "SUCCESS", + "step_type": "stub", + "message": "", + "mock": true, + "code_identities": [ + { + "code_identifier": "f94e49a4fcecebac4d5eecbb5b691561b08e45c0", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [] + }, + "Create Content": { + "name": "Create Content", + "internal_name": "Create Content", + "status": "SUCCESS", + "step_type": "stub", + "message": "", + "mock": true, + "code_identities": [ + { + "code_identifier": "f94e49a4fcecebac4d5eecbb5b691561b08e45c0", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [] + }, + "Retrieve Content": { + "name": "Retrieve Content", + "internal_name": "Retrieve Content", + "status": "SUCCESS", + "step_type": "task", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "f94e49a4fcecebac4d5eecbb5b691561b08e45c0", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-02-07 06:25:13.506657", + "end_time": "2024-02-07 06:25:13.527603", + "duration": "0:00:00.020946", + "status": "SUCCESS", + "message": "", + "parameters": {} + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [ + { + "name": "data/hello.txt", + "data_hash": "108ecead366a67c2bb17e223032e12629bcc21b4ab0fff77cf48a5b784f208c7", + "catalog_relative_path": "data/hello.txt", + "catalog_handler_location": ".catalog", + "stage": "get" + }, + { + "name": "Retrieve_Content.execution.log", + "data_hash": "bd8e06cb7432666dc3b1b0db8034966c034397863c7ff629c98ffd13966681d7", + "catalog_relative_path": "naive-wilson-0625/Retrieve_Content.execution.log", + "catalog_handler_location": ".catalog", + "stage": "put" + } + ] + }, + "success": { + "name": "success", + "internal_name": "success", + "status": "SUCCESS", + "step_type": "success", + "message": "", + "mock": false, + "code_identities": [ + { + "code_identifier": "f94e49a4fcecebac4d5eecbb5b691561b08e45c0", + "code_identifier_type": "git", + "code_identifier_dependable": true, + "code_identifier_url": "https://github.com/AstraZeneca/magnus-core.git", + "code_identifier_message": "" + } + ], + "attempts": [ + { + "attempt_number": 1, + "start_time": "2024-02-07 06:25:13.597125", + "end_time": "2024-02-07 06:25:13.597694", + "duration": "0:00:00.000569", + "status": "SUCCESS", + "message": "", + "parameters": {} + } + ], + "user_defined_metrics": {}, + "branches": {}, + "data_catalog": [] + } + }, + "parameters": {}, + "run_config": { + "executor": { + "service_name": "local", + "service_type": "executor", + "enable_parallel": false, + "overrides": {} + }, + "run_log_store": { + "service_name": "file-system", + "service_type": "run_log_store", + "log_folder": ".run_log_store" + }, + "secrets_handler": { + "service_name": "do-nothing", + "service_type": "secrets" + }, + "catalog_handler": { + "service_name": "file-system", + "service_type": "catalog", + "catalog_location": ".catalog" + }, + "experiment_tracker": { + "service_name": "do-nothing", + "service_type": "experiment_tracker" + }, + "pipeline_file": "examples/retry-fixed.yaml", + "parameters_file": null, + "configuration_file": "examples/configs/fs-catalog-run_log.yaml", + "tag": "", + "run_id": "naive-wilson-0625", + "variables": { + "argo_docker_image": "harbor.csis.astrazeneca.net/mlops/magnus:latest" + }, + "use_cached": true, + "original_run_id": "wrong-file-name", + "dag": { + "start_at": "Setup", + "name": "", + "description": "This is a simple pipeline that demonstrates passing data between steps.\n\n1. Setup: We setup a data folder, we ignore if it is already + present\n2. Create Content: We create a \"hello.txt\" and \"put\" the file in catalog\n3. Clean up to get again: We remove the data folder. Note that this is stubbed + to prevent\n accidental deletion of your contents. You can change type to task to make really run.\n4. Retrieve Content: We \"get\" the file \"hello.txt\" from the + catalog and show the contents\n5. Cleanup: We remove the data folder. Note that this is stubbed to prevent accidental deletion.\n\n\nYou can run this pipeline by:\n + magnus execute -f examples/catalog.yaml -c examples/configs/fs-catalog.yaml\n", + "steps": { + "Setup": { + "type": "stub", + "name": "Setup", + "next": "Create Content", + "on_failure": "", + "overrides": {}, + "catalog": null, + "max_attempts": 1, + "command_type": "shell", + "command": "mkdir -p data" + }, + "Create Content": { + "type": "stub", + "name": "Create Content", + "next": "Retrieve Content", + "on_failure": "", + "overrides": {}, + "catalog": { + "get": [], + "put": [ + "data/hello.txt" + ] + }, + "max_attempts": 1, + "command_type": "shell", + "command": "echo \"Hello from magnus\" >> data/hello.txt\n" + }, + "Retrieve Content": { + "type": "task", + "name": "Retrieve Content", + "next": "success", + "on_failure": "", + "overrides": {}, + "catalog": { + "get": [ + "data/hello.txt" + ], + "put": [] + }, + "max_attempts": 1, + "command_type": "shell", + "command": "cat data/hello.txt", + "node_name": "Retrieve Content" + }, + "success": { + "type": "success", + "name": "success" + }, + "fail": { + "type": "fail", + "name": "fail" + } + } + }, + "dag_hash": "148de99f96565bb1b276db2baf23eba682615c76", + "execution_plan": "chained" + } + } + ``` + + +Magnus also supports [```mocked``` executor](/configurations/executors/mocked) which can +patch and mock tasks to isolate and focus on the failed task. Since python functions and notebooks +are run in the same shell, it is possible to use +[python debugger](https://docs.python.org/3/library/pdb.html) and +[ploomber debugger](https://engine.ploomber.io/en/docs/user-guide/debugging/debuglater.html) +to debug failed tasks. diff --git a/docs/example/secrets.md b/docs/example/secrets.md new file mode 100644 index 00000000..6ecdea06 --- /dev/null +++ b/docs/example/secrets.md @@ -0,0 +1,46 @@ +Secrets are required assets as the complexity of the application increases. Magnus provides a +[python API](/interactions/#magnus.get_secret) to get secrets from various sources. + +!!! info annotate inline end "from magnus import get_secret" + + Secrets is the only interface that you are required to "import magnus" in your python application. + + Native python and Jupyter notebooks can use this API. We currently do not support shell tasks with + secrets from this interface. (1) + +1. Using environment variables to access secrets is one pattern works in all environments. + +=== "dotenv format" + + The dotenv format for providing secrets. Ideally, this file should not be part of the + version control but present during development phase. + + The file is assumed to be present in ```examples/secrets.env``` for this example. + + ```shell linenums="1" + --8<-- "examples/secrets.env" + ``` + + 1. Shell scripts style are supported. + 2. Key value based format is also supported. + + +=== "Example configuration" + + Configuration to use the dotenv format file. + + ```yaml linenums="1" + --8<-- "examples/configs/dotenv.yaml" + ``` + + 1. Use dotenv secrets manager. + 2. Location of the dotenv file, defaults to ```.env``` in project root. + + +=== "Pipeline in python" + + ```python linenums="1" hl_lines="12-13" + --8<-- "examples/secrets.py" + ``` + + 1. The key of the secret that you want to retrieve. diff --git a/docs/example/steps.md b/docs/example/steps.md new file mode 100644 index 00000000..9b2360c3 --- /dev/null +++ b/docs/example/steps.md @@ -0,0 +1,78 @@ +Magnus provides a rich definition of of step types. + +
+ +- [stub](/concepts/stub): A mock step which is handy during designing and debugging pipelines. +- [task](/concepts/task): To execute python functions, jupyter notebooks, shell scripts. +- [parallel](/concepts/parallel): To execute many tasks in parallel. +- [map](/concepts/map): To execute the same task over a list of parameters. (1) + +
+ +1. Similar to ```map``` state in AWS step functions or ```loops``` in Argo workflows. + + +## stub + +Used as a mock node or a placeholder before the actual implementation (1). +{ .annotate } + +1. :raised_hand: Equivalent to ```pass``` or ```...``` in python. + + +=== "yaml" + + ``` yaml + --8<-- "examples/mocking.yaml" + ``` + +=== "python" + + ```python + --8<-- "examples/mocking.py" + ``` + + 1. The name of the node can be as descriptive as you want. Only ```.``` or ```%``` are not allowed. + 2. Stub nodes can take arbitrary parameters; useful to temporarily mock a node. You can define the dependency on step1 using ```depends_on``` + 3. ```terminate_with_success``` indicates that the dag is completed successfully. You can also use ```terminate_with_failure``` to indicate the dag failed. + 4. Add ```success``` and ```fail``` nodes to the dag. + + +## task + +Used to execute a single unit of work. You can use [python](/concepts/task/#python_functions), +[shell](/concepts/task/#shell), [notebook](/concepts/task/#notebook) as command types. + +!!! note annotate "Execution logs" + + You can view the execution logs of the tasks in the [catalog](/concepts/catalog) without digging through the + logs from the underlying executor. + + +=== "Example functions" + + The below content is assumed to be ```examples/functions.py``` + + ```python + --8<-- "examples/functions.py" + ``` + +=== "yaml" + + ``` yaml + --8<-- "examples/python-tasks.yaml" + ``` + + 1. Note that the ```command``` is the [path to the python function](/concepts/task/#python_functions). + 2. ```python``` is default command type, you can use ```shell```, ```notebook``` too. + +=== "python" + + ```python + --8<-- "examples/python-tasks.py" + ``` + + 1. Note that the command is the [path to the function](/concepts/task/#python_functions). + 2. There are many ways to define dependencies within nodes, step1 >> step2, step1 << step2 or during the definition of step1, we can define a next step. + 3. ```terminate_with_success``` indicates that the dag is completed successfully. You can also use ```terminate_with_failure``` to indicate the dag failed. + 4. Add ```success``` and ```fail``` nodes to the dag. diff --git a/docs/examples.md b/docs/examples.md deleted file mode 100644 index effbd933..00000000 --- a/docs/examples.md +++ /dev/null @@ -1,1885 +0,0 @@ -# Examples - -## Executing a notebook - -You can execute a Jupyter notebook by: - -```shell -magnus execute_notebook my_notebook.ipynb -``` - -The notebook file should have an extension of ```ipynb```. - -This execution would run on the local machine and the output notebook would be put in the ```.catalog``` folder called -```my_notebook_output.ipynb```. - -To change the compute environment, please provide the relevant configuration file. - - ---- - -## Executing a python function - -You can execute a python function defined in my_module: - -```python -# In my_module.py - -def my_function(): - print('In the function, my_function of my_module') - -``` - -by invoking magnus as follows: - -```shell -magnus execute_function my_module.my_function -``` - -This execution would run on the local machine and the captured output of the function would be added to the catalog -folder, ```.catalog``` in this case, as my_module.my_function.log - -To change the compute environment, please provide the relevant configuration file. - ---- - -## A single node pipeline - - -Assuming you have one simple function call as part of a pipeline defined below: - -```python -# In my_module.py - -def my_function(): - print('In the function, my_function of my_module') -``` - -You can define the single node pipeline either by: - -### YAML definition - -Every pipeline defined via YAML in magnus should have a ```success``` node and ```fail``` node. -The starting node of the pipeline is denoted by ```start_at``` and every node needs to define the next -node to traverse during successful execution of the current node using ```next```. - -Nodes can optionally mention the node to traverse during failure using ```on_failure```. - -The pipeline which contains one node to call the above function. - -```yaml -dag: - description: A single node pipeline - start_at: step 1 - steps: - step 1: - type: task - next: success - command: my_module.my_function - command_type: python - success: - type: success - failure: - type: fail -``` - -### Python SDK - -```python -#in pipeline.py - -from magnus import Task, Pipeline - -def pipeline(): - first = Task(name='step 1', command='my_module.my_function') - pipeline = Pipeline(start_at=first, name='my first pipeline') - pipeline.construct([first]) - pipeline.execute() - -if __name__ == '__main__': - pipeline() - -``` - -You can execute it via ```python pipeline.py```. - ---- - -## Mocking a node in pipeline - -In magnus, you can skip execution of a node or mock using a node of type ```as-is```. -This functionality is useful when you want to focus on designing the flow of code but not the specific implementation. - -Example: - -```yaml -dag: - description: A single node pipeline with mock - start_at: step 1 - steps: - step 1: - type: as-is # The function would not execute as this is as-is node - next: success - command: my_module.my_function # arbitrary config can be passed - command_type: python - success: - type: success - failure: - type: fail -``` - -or via python SDK: - -```python -#in pipeline.py - -from magnus import Task, Pipeline, AsIs - -def pipeline(): - first = AsIs(name='step 1', command='my_module.my_function') # The function would not execute - pipeline = Pipeline(start_at=first, name='my first pipeline') - pipeline.construct([first]) - pipeline.execute() - -if __name__ == '__main__': - pipeline() - -``` - - ---- - -## Using shell commands as part of the pipeline - -In magnus, a pipeline can have shell commands as part of the pipeline. The only caveat in doing so is magnus -would not be able to support returning ```parameters```, ```secrets``` or any of the built-in functions. The cataloging -functionality of magnus still would work via the configuration file. - -Parameters can be accessed by looking for environment variables with a prefix of ```MAGNUS_PRM_```. - -Example: Step 1 of the below pipeline would - -- Get all the files from the catalog to the ```compute_data_folder```. -- Execute the command python my_module.my_function in the shell. -- Put all the files from the ```compute_data_folder``` to the catalog. - -```yaml -dag: - description: A single node pipeline with shell - start_at: step 1 - steps: - step 1: - type: task - next: success - command: python -m my_module.my_function # You can use this to call some executable in the PATH - command_type: shell - catalog: - get: - - "*" - put: - - "*" - success: - type: success - failure: - type: fail -``` - -or via python SDK: - -```python -#in pipeline.py - -from magnus import Task, Pipeline - -def pipeline(): - catalog_config = { - 'get' : ['*'], - 'put' : ['*'], - } - first = Task(name='step 1', command='python -m my_module.my_function', command_type='shell', catalog=catalog_config) - pipeline = Pipeline(start_at=first, name='my first pipeline') - pipeline.construct([first]) - pipeline.execute() - -if __name__ == '__main__': - pipeline() - -``` - - ---- -## Using python lambda expressions in pipeline - -You can use python lambda expressions as a task type. Please note that you cannot have ```_``` or ```__``` as part of -the expression. This is to prevent any malicious code to be passed into the expression. In the example below, -```step 1``` takes in a parameter ```x``` and returns the integer ```x + 1```. - -Example: - -```yaml -dag: - description: A single node pipeline with python lambda - start_at: step 1 - steps: - step 1: - command_type: python-lambda - command: "lambda x: {'x': int(x) + 1}" - next: success - success: - type: success - failure: - type: fail -``` - -or via python SDK: - -```python -#in pipeline.py - -from magnus import Task, Pipeline - -def pipeline(): - first = Task(name='step 1', command='lambda x: {'x': int(x) + 1}', command_type='python-lambda') - pipeline = Pipeline(start_at=first, name='my first pipeline') - pipeline.construct([first]) - pipeline.execute() - -if __name__ == '__main__': - pipeline() - -``` - - ---- - -## Using notebook in pipeline - -You can use notebooks as a ```command_type``` of a step in the pipeline. Any of the functionality from python functions -is available via notebook too. - -We use [ploomber](https://ploomber.io/) to inspect the parameters and send them dynamically from the parameter space. - -The command refers to the notebook that you want to use as a task and it should point to the notebook. -The output notebook naming could be provided by using the ```command_config``` section or would be defaulted to the -notebook mentioned in ```command``` section post-fixed with ```_out```. - - -```yaml -dag: - description: A single node pipeline with notebook - start_at: step 1 - steps: - step 1: - command_type: notebook - command: pre_processing.iypnb - next: success - command_config: - notebook_output_path: notebooks/output.ipynb - success: - type: success - failure: - type: fail -``` - -or via python SDK: - -```python -#in pipeline.py - -from magnus import Task, Pipeline - -def pipeline(): - command_config = { - 'notebook_output_path': 'notebooks/output.ipynb - } - first = Task(name='step 1', command='pre_processing.iypnb', command_type='notebook', command_config=command_config) - pipeline = Pipeline(start_at=first, name='my first pipeline') - pipeline.construct([first]) - pipeline.execute() - -if __name__ == '__main__': - pipeline() - -``` - -The file name should end with ```.ipynb```. - ---- - -## A multi node pipeline - -A pipeline can have many nodes as part of its execution. - -Example: - -```python -# In my_module.py - -def first_function(): - print('In the function, first_function of my_module') - - -def second_function(): - print('In the function, second_function of my_module') - -``` - - -The pipeline which calls first_function of the above module and then to the call the second_function is given below. - -```yaml -dag: - description: A multi node pipeline - start_at: step 1 - steps: - step 1: - type: task - next: step 2 - command: my_module.first_function - command_type: python - step 2: - type: task - next: success - command: my_module.second_function - command_type: python - success: - type: success - failure: - type: fail -``` - - -or via python SDK: - -```python -#in pipeline.py - -from magnus import Task, Pipeline - -def pipeline(): - first = Task(name='step 1', command='my_module.first_function', next_node='step 2') - second = Task(name='step 2', command='my_module.second_function') - pipeline = Pipeline(start_at=first, name='my first pipeline') - pipeline.construct([first, second]) - pipeline.execute() - -if __name__ == '__main__': - pipeline() - -``` - - - ---- - -## Using on-failure to handle errors - -You can instruct magnus to traverse to a different node of the dag if the current node fails to execute. -A non-zero exit status of the python function or shell command is considered a failure. - -The default behavior in case of a failure of a node is, if no ```on_failure``` is defined, is to -traverse to the ```fail``` node of the graph and mark the execution of the dag as failure. - -The execution of a dag is considered failure if and only if the ```fail``` node of the graph is reached. - -```python -# In my_module.py - -def first_function(): - print('In the function, first_function of my_module') - - -def second_function(): - print('In the function, second_function of my_module') - - -def handle_error(): - print('Send an email notification') - ## Some logic to send error notification - ... - -``` - -The pipeline definition to call ```my_module.handle_error``` in case of a failure of any node is defined below. - - -```yaml -dag: - description: A multi node pipeline with on_failure - start_at: step 1 - steps: - step 1: - type: task - next: step 2 - command: my_module.first_function - command_type: python - on_failure: graceful exit - step 2: - type: task - next: success - command: my_module.second_function - command_type: python - on_failure: graceful exit - graceful exit: - type: task - next: fail - command: my_module.handle_error - command_type: python - success: - type: success - failure: - type: fail -``` - -or via python SDK: - -```python -#in pipeline.py - -from magnus import Task, Pipeline - -def pipeline(): - first = Task(name='step 1', command='my_module.first_function', next_node='step 2', on_failure='graceful exit') - second = Task(name='step 2', command='my_module.second_function') - third = Task(name='graceful exit', command='my_module.handle_error', next_node='fail') - pipeline = Pipeline(start_at=first, name='my first pipeline') - pipeline.construct([first, second, third]) - pipeline.execute() - -if __name__ == '__main__': - pipeline() - -``` - ---- -## Passing parameters between nodes - -There are several ways we can pass parameters between nodes. Please note that this functionality is only for simple -python data types which can be JSON serializable. Use the catalog functionality to pass files across to different -nodes of the graph. - -You can choose any of the methods to pass the parameters from below. All are compatible with each other. - -The example pipeline to call all the below functions is given here: - - -```yaml -dag: - description: A multi node pipeline to pass parameters - start_at: step 1 - steps: - step 1: - type: task - next: step 2 - command: my_module.first_function - command_type: python - step 2: - type: task - next: success - command: my_module.second_function - command_type: python - success: - type: success - failure: - type: fail -``` - -or via python SDK: - -```python -#in pipeline.py - -from magnus import Task, Pipeline - -def pipeline(): - first = Task(name='step 1', command='my_module.first_function', next_node='step 2') - second = Task(name='step 2', command='my_module.second_function') - pipeline = Pipeline(start_at=first, name='my first pipeline') - pipeline.construct([first, second]) - pipeline.execute() - -if __name__ == '__main__': - pipeline() - -``` - - -### Pythonically - -```python -# In my_module.py - -def first_function(): - print('In the function, first_function of my_module') - return {'a': 4} - - -def second_function(a): - print('In the function, second_function of my_module') - print(a) - -``` - -In the above code, ```first_function``` is returning a dictionary setting ```a``` to be 4. If the function was called -as a step in the magnus pipeline, magnus adds the key-value pair of ```a=4``` to the parameter space. Note that -```first_function``` can return a dictionary containing as many key-value pairs as needed, magnus would add all of them -to the parameter space. - -```second_function``` is expecting a ```named``` argument ```a```. If the function was called as a step in the magnus -pipeline, magnus would look for a parameter ```a``` in the parameter space and assign it. - -Very loosely, the whole process can be thought of as: ```second_function(**first_function())```. Since magnus holds -parameter space, the functions need not be consecutive and magnus handles the passing only the required arguments into -the function. - - -### Using in-built functions -You can also use the built-in functions that magnus provides to ```store``` and ```get``` parameters. - -```python -# In my_module.py -from magnus import store_parameter, get_parameter - -def first_function(): - print('In the function, first_function of my_module') - store_parameter(a=4) - - -def second_function(): - print('In the function, second_function of my_module') - a = get_parameter('a') # Get parameter with name provides only the named parameter. - parameters = get_parameter() # Returns a dictionary of all the parameters - print(a) # prints 4 - print(parameters) # prints {'a': 4} - -``` - -### Using environment variables -The parameters can also be accessed by using environment variables. All magnus specific parameters would be prefixed -by ```MAGNUS_PRM_```. Any environment variable that is prefixed by ```MAGNUS_PRM_``` is also added to the parameter -space. - -```python -# In my_module.py -import os - -def first_function(): - print('In the function, first_function of my_module') - os.environ['MAGNUS_PRM_a']=4 - - -def second_function(): - print('In the function, second_function of my_module') - a = os.environ['MAGNUS_PRM_a'] - print(a) - -``` - ---- - -## Passing parameters to the first node of the pipeline - -There are several ways to set parameters at the start of the execution of the pipeline. Please choose one that fits -your situation. - -### During execution of pipeline by magnus - -The step ```step parameters``` of the below pipeline expects a parameter ```x``` in the lambda expression. - -```yaml -# in getting-started.yaml -dag: - description: Getting started - start_at: step parameters - steps: - step parameters: - type: task - command_type: python-lambda - command: "lambda x: {'x': int(x) + 1}" - next: success - success: - type: success - fail: - type: fail -``` - -or via Python SDK: - -```python -#in pipeline.py - -from magnus import Task, Pipeline - -def pipeline(): - first = Task(name='step 1', command='lambda x: {'x': int(x) + 1}', command_type='python-lambda') - pipeline = Pipeline(start_at=first, name='my first pipeline') - pipeline.construct([first]) - pipeline.execute() - -if __name__ == '__main__': - pipeline() -``` - -You can pass the parameter during the execution of the run like below. - -```shell -magnus execute --file getting-started.yaml --parameters-file parameters.yaml -``` - -```yaml -# in parameters.yaml -x: 3 -``` - -### Using environment variables - -For the same pipeline defined in ```getting-started.yaml```, you can also pass the parameters as environment variables -prefixed by ```MAGNUS_PRM_x```. - -The below command does the same job of passing ```x``` as 3. - -```shell -MAGNUS_PRM_x=3; magnus execute --file getting-started.yaml -``` - -You can pass in as many parameters as you want by prefixing them with ```MAGNUS_PRM_```. All parameters would be read -as ```string``` and have to casted appropriately by the code. - -This method of sending parameters by environmental variables is independent of who does the pipeline execution. - ---- -## Using the catalog to pass artifacts between nodes - -While parameters are used to transfer simple and JSON serializable data types, catalog can be used to make larger files -or artifacts available to down stream nodes. A typical configuration of catalog provider would be: - -```yaml -catalog: - type: #defaults to file-system - config: - compute_data_folder: # defaults to data/ -``` - -If no config is provided, magnus defaults to ```file-system```. - -Logically magnus does the following: - -- ```get``` files from the catalog before the execution to a specific ```compute data folder``` -- execute the command -- ```put``` any files from the ```compute data folder``` back to the catalog. - -### Using the configuration. - -```yaml -dag: - description: Getting started - start_at: step shell make data - steps: - step shell make data: - type: task - command_type: shell - command: mkdir data ; env >> data/data.txt - next: step shell ls data - catalog: - put: - - "*" - step shell ls data: - type: task - command_type: shell - command: ls data/ - next: success - catalog: - compute_data_folder: data/ # This is the default value too. - get: - - "*" - success: - type: success - fail: - type: fail -``` - -or via Python SDK: - -```python - -#in pipeline.py - -from magnus import Task, Pipeline - -def pipeline(): - catalog_get_all = { - 'get' : ['*'] - } - - catalog_put_all = { - 'put': ['*'] - } - - first = Task(name='step shell make data', command='mkdir data ; env >> data/data.txt', command_type='shell', - catalog=catalog_put_all) - second = Task(name='step shell ls data', command='ls data/', command_type='shell', - catalog=catalog_get_all) - pipeline = Pipeline(start_at=first, name='my first pipeline') - pipeline.construct([first, second]) - pipeline.execute() - -if __name__ == '__main__': - pipeline() - - -``` - - -In the above dag definition, ```step shell make data``` makes a data folder and dumps the environmental variables into -```data.txt``` file and instructs the catalog to ```put``` all (i.e '*') files into the catalog for downstream nodes. - -While the step ```step shell ls data``` instructs the catalog to ```get``` (i.e '*') files from the catalog and put -them in ```compute_data_folder``` which is ```data``` and executes the command to see the contents of the directory. - -You can over-ride the ```compute_data_folder``` of a single step to any folder that you want as shown. - -Glob patterns are perfectly allowed and you can it to selectively ```get``` or ```put``` files in the catalog. - -### Using the in-built functions - -You can interact with the catalog from the python code too if that is convenient. - -```python -# In my_module.py -from pathlib import Path - -from magnus import put_in_catalog, get_from_catalog - -def first_function(): - print('In the function, first_function of my_module') - Path('data').mkdir(parents=True, exist_ok=True) - - with open('data/data.txt', 'w') as fw: - fw.write('something interesting) - - # filepath is required and can be a glob pattern - put_in_catalog(filepath='data/data.txt') - -def second_function(): - print('In the function, second_function of my_module') - - # name is required and can be a glob pattern. - # destination_folder is defaulted to the compute_data_folder as defined in the config - get_from_catalog(name='data.txt', destination_folder='data/') - -``` - -The python function ```first_function``` makes the ```compute_data_folder``` and instructs the catalog to put it the -catalog. The python function ```second_function``` instructs the catalog to get the file by name ```data.txt``` from -the catalog and put it in the folder ```data/```. You can use glob patterns both in ```put_in_catalog``` or -```get_from_catalog```. - -The corresponding pipeline definition need not even aware of the cataloging happening by the functions. - -```yaml -dag: - description: A multi node pipeline - start_at: step 1 - steps: - step 1: - type: task - next: step 2 - command: my_module.first_function - command_type: python - step 2: - type: task - next: success - command: my_module.second_function - command_type: python - success: - type: success - failure: - type: fail -``` - ---- -## Using the catalog to source external data - -In magnus, you can only ```get``` from catalog if the catalog location already exists. Calling ```put``` in catalog, -which safely makes the catalog location if it does not exist, before you are trying to ```get``` from the catalog -ensures that the catalog location is always present. - -But there are situations where you want to call ```get``` before you ```put``` data in the catalog location by the -steps of the pipeline. For example, you want to source a data file generated by external processes and transform them -in your pipeline. You can achieve that by the fact all catalog providers (eg. file-system and extensions) use -```run_id``` as the directory (or partition) of the catalog. - -To source data from external sources for a particular run, - -- Create a ```run_id``` that you want to use for pipeline execution. -- Create the directory (or partition) in the catalog location by that ```run_id``` -- Copy the contents that you want the pipeline steps to access in the catalog location. -- Run the magnus pipeline by providing the ```run_id``` i.e ```magnus execute --run-id run_id --file <>``` - -Since the catalog location already exists, ```get``` from the catalog will source the external data. - ---- -## Accessing secrets within code. - -Secrets are the only service that magnus provides where you need to ```import magnus``` in your source code. This is -to ensure that the integrity of the secrets are held and handled safely. - -A typical configuration of the secrets is: - -```yaml -secrets: - type: #defaults to do-nothing - config: -``` - -By default, magnus chooses a ```do-nothing``` secrets provider which holds no secrets. For local development, -```dotenv``` secrets manager is useful and the config is as below. - -```yaml -secrets: - type: dotenv - config: - location: # defaults to .env -``` - -Example: - -``` -#Inside .env file -secret_name=secret_value#Any comment that you want to pass - -``` - -Any content after # is ignored and the format is ```key=value``` pairs. - -```python -# In my_module.py -from magnus import get_secret - -def first_function(): - print('In the function, first_function of my_module') - secret_value = get_secret('secret_name') - print(secret_value) # Should print secret_value - - secrets = get_secret() - print(secrets) # Should print {'secret_name': 'secret_value'} -``` - -The pipeline to run the above function as a step of the pipeline. - -```yaml -# in config.yaml -secrets: - type: dotenv - config: - location: # defaults to .env - -# in pipeline.yaml -dag: - description: Demo of secrets - start_at: step 1 - steps: - step 1: - type: task - next: success - command: my_module.first_function - command_type: python - success: - type: success - failure: - type: fail -``` - -or via Python SDK: - -```python -#in pipeline.py - -from magnus import Task, Pipeline - -def pipeline(): - first = Task(name='step 1', command='my_module.my_function') - pipeline = Pipeline(start_at=first, name='my first pipeline') - pipeline.construct([first]) - pipeline.execute() - -if __name__ == '__main__': - pipeline() -``` - ---- -## Parallel node - -We will be using ```as-is``` nodes as part of the examples to keep it simple but the concepts of nesting/branching -remain the same even in the case of actual tasks. - -Example of a parallel node: - -```yaml -# In config.yaml -run_log_store: - type: file-system # Use chunked-fs when using parallel - -# In pipeline.yaml -dag: - description: DAG for testing with as-is and parallel - start_at: step1 - steps: - step1: - type: as-is - next: step2 - step2: - type: parallel - next: success - branches: - branch_1: - start_at: step_1 - steps: - step_1: - type: as-is - next: success - success: - type: success - fail: - type: fail - branch_2: - start_at: step_1 - steps: - step_1: - type: as-is - next: success - success: - type: success - fail: - type: fail - success: - type: success - fail: - type: fail -``` - -You can execute the above dag by: - -```magnus execute --file example-parallel.yaml``` - -The above run should produce a ```run_log``` in the ```.run_log_store``` directory with the ```run_id``` as filename. - -The contents of the log should be similar to this: - -
- Click to show the run log - - -```json - -{ - "run_id": "20220120131257", - "dag_hash": "cf5cc7df88d4af3bc0936a9a8a3c4572ce4e11bc", - "use_cached": false, - "tag": null, - "original_run_id": "", - "status": "SUCCESS", - "steps": { - "step1": { - "name": "step1", - "internal_name": "step1", - "status": "SUCCESS", - "step_type": "as-is", - "message": "", - "mock": false, - "code_identities": [ - { - "code_identifier": "2a5b33bdf60c4f0d38cae04ab3f988b3d1c6ed59", - "code_identifier_type": "git", - "code_identifier_dependable": false, - "code_identifier_url": , - "code_identifier_message": - } - ], - "attempts": [ - { - "attempt_number": 0, - "start_time": "2022-01-20 13:12:57.999265", - "end_time": "2022-01-20 13:12:57.999287", - "duration": "0:00:00.000022", - "status": "SUCCESS", - "message": "" - } - ], - "user_defined_metrics": {}, - "branches": {}, - "data_catalog": [] - }, - "step2": { - "name": "step2", - "internal_name": "step2", - "status": "SUCCESS", - "step_type": "parallel", - "message": "", - "mock": false, - "code_identities": [ - { - "code_identifier": "2a5b33bdf60c4f0d38cae04ab3f988b3d1c6ed59", - "code_identifier_type": "git", - "code_identifier_dependable": false, - "code_identifier_url": , - "code_identifier_message": - } - ], - "attempts": [], - "user_defined_metrics": {}, - "branches": { - "step2.branch_1": { - "internal_name": "step2.branch_1", - "status": "SUCCESS", - "steps": { - "step2.branch_1.step_1": { - "name": "step_1", - "internal_name": "step2.branch_1.step_1", - "status": "SUCCESS", - "step_type": "as-is", - "message": "", - "mock": false, - "code_identities": [ - { - "code_identifier": "2a5b33bdf60c4f0d38cae04ab3f988b3d1c6ed59", - "code_identifier_type": "git", - "code_identifier_dependable": false, - "code_identifier_url": , - "code_identifier_message": - } - ], - "attempts": [ - { - "attempt_number": 0, - "start_time": "2022-01-20 13:12:58.090461", - "end_time": "2022-01-20 13:12:58.090476", - "duration": "0:00:00.000015", - "status": "SUCCESS", - "message": "" - } - ], - "user_defined_metrics": {}, - "branches": {}, - "data_catalog": [] - }, - "step2.branch_1.success": { - "name": "success", - "internal_name": "step2.branch_1.success", - "status": "SUCCESS", - "step_type": "success", - "message": "", - "mock": false, - "code_identities": [ - { - "code_identifier": "2a5b33bdf60c4f0d38cae04ab3f988b3d1c6ed59", - "code_identifier_type": "git", - "code_identifier_dependable": false, - "code_identifier_url": , - "code_identifier_message": - } - ], - "attempts": [ - { - "attempt_number": 0, - "start_time": "2022-01-20 13:12:58.135551", - "end_time": "2022-01-20 13:12:58.135732", - "duration": "0:00:00.000181", - "status": "SUCCESS", - "message": "" - } - ], - "user_defined_metrics": {}, - "branches": {}, - "data_catalog": [] - } - } - }, - "step2.branch_2": { - "internal_name": "step2.branch_2", - "status": "SUCCESS", - "steps": { - "step2.branch_2.step_1": { - "name": "step_1", - "internal_name": "step2.branch_2.step_1", - "status": "SUCCESS", - "step_type": "as-is", - "message": "", - "mock": false, - "code_identities": [ - { - "code_identifier": "2a5b33bdf60c4f0d38cae04ab3f988b3d1c6ed59", - "code_identifier_type": "git", - "code_identifier_dependable": false, - "code_identifier_url": , - "code_identifier_message": - } - ], - "attempts": [ - { - "attempt_number": 0, - "start_time": "2022-01-20 13:12:58.187648", - "end_time": "2022-01-20 13:12:58.187661", - "duration": "0:00:00.000013", - "status": "SUCCESS", - "message": "" - } - ], - "user_defined_metrics": {}, - "branches": {}, - "data_catalog": [] - }, - "step2.branch_2.success": { - "name": "success", - "internal_name": "step2.branch_2.success", - "status": "SUCCESS", - "step_type": "success", - "message": "", - "mock": false, - "code_identities": [ - { - "code_identifier": "2a5b33bdf60c4f0d38cae04ab3f988b3d1c6ed59", - "code_identifier_type": "git", - "code_identifier_dependable": false, - "code_identifier_url": , - "code_identifier_message": - } - ], - "attempts": [ - { - "attempt_number": 0, - "start_time": "2022-01-20 13:12:58.233479", - "end_time": "2022-01-20 13:12:58.233681", - "duration": "0:00:00.000202", - "status": "SUCCESS", - "message": "" - } - ], - "user_defined_metrics": {}, - "branches": {}, - "data_catalog": [] - } - } - } - }, - "data_catalog": [] - }, - "success": { - "name": "success", - "internal_name": "success", - "status": "SUCCESS", - "step_type": "success", - "message": "", - "mock": false, - "code_identities": [ - { - "code_identifier": "2a5b33bdf60c4f0d38cae04ab3f988b3d1c6ed59", - "code_identifier_type": "git", - "code_identifier_dependable": false, - "code_identifier_url": , - "code_identifier_message": - } - ], - "attempts": [ - { - "attempt_number": 0, - "start_time": "2022-01-20 13:12:58.280538", - "end_time": "2022-01-20 13:12:58.280597", - "duration": "0:00:00.000059", - "status": "SUCCESS", - "message": "" - } - ], - "user_defined_metrics": {}, - "branches": {}, - "data_catalog": [] - } - }, - "parameters": {}, - "run_config": { - "executor": { - "type": "local", - "config": {} - }, - "run_log_store": { - "type": "file-system", - "config": {} - }, - "catalog": { - "type": "file-system", - "config": {} - }, - "secrets": { - "type": "do-nothing", - "config": {} - } - } -} - -``` -
- -The individual steps of the dag are named in [```dot path convention```](../concepts/run-log/#naming_step_log) - -You can nest a ```parallel``` node, ```dag``` or a ```map``` node within parallel node to enable modular dag designs. - -### Enabling parallel execution - -Though the dag definition defines a ```parallel``` node, the execution of the dag and the parallelism is actually -controlled by the executor. In ```local``` execution, you can enable parallel branch execution by modifying the config. - -```yaml -executor: - type: local - config: - enable_parallel: True -``` - -Point to note: - -- Run log stores which use a single file as their log source (eg. file-system) cannot reliably run parallel executions - as race conditions to modify the same file can happen leaving the run log in inconsistent state. The logs of the - execution would also warn the same. Partitioned run log stores (eg. db) can be reliable run log stores. - ---- -## Embedding dag within dag - -You can embed dag's defined elsewhere into your dag. - -For example, we can define a dag which works all by itself in sub-dag.yaml - -```yaml -# in sub-dag.yaml -dag: - description: sub dag - start_at: step1 - steps: - step1: - type: as-is - next: step2 - step2: - type: as-is - next: success - success: - type: success - fail: - type: fail - -``` - -We can embed this dag into another dag as a node like below. - -```yaml -dag: - description: DAG for nested dag - start_at: step_dag_within_dag - steps: - step_dag_within_dag: - type: dag - dag_definition: sub-dag.yaml # Should be the filepath to the dag you want to embed. - next: success - success: - type: success - fail: - type: fail - -``` - -Nested dag's should allow for a very modular design where individual dag's do well defined tasks but the nested dag -can stitch them to complete the whole task. - -As with parallel execution, the individual steps of the dag are named in -[```dot path convention```](../concepts/run-log/#naming_step_log) - ---- -## Looping a branch over an iterable parameter - -Often, you would need to do the same repetitive tasks over a list and magnus allows you to do that. - -Example of dynamic branch looping is below. - -```yaml -# in map-state.yaml -dag: - description: DAG for map - start_at: step1 - steps: - step1: - type: task - command: "lambda : {'variables' : ['a', 'b', 'c']}" - command_type: python-lambda - next: step2 - step2: - type: map - iterate_on: variables - iterate_as: x - next: success - branch: - start_at: step_1 - steps: - step_1: - type: task - command: "lambda x : {'state_' + str(x) : 5}" - command_type: python-lambda - next: success - success: - type: success - fail: - type: fail - success: - type: success - fail: - type: fail - -``` - -In the above dag, step1 sets the parameters ```variables``` as list ```['a', 'b', 'c']```. -step2 is a node of type map which will iterate on ```variables``` and execute the ```branch``` defined as part of the -definition of step2 for every value in the iterable ```variables```. - -The ```branch``` definition of the step2 basically creates one more parameter ```state_=5``` by the lambda -expression. You can see these parameters as part of the run log show below. - -
- Click to show the run log - -``` json -{ - "run_id": "20220120150813", - "dag_hash": "c0492a644b4f28f8441d669d9f0efb0f6d6be3d3", - "use_cached": false, - "tag": null, - "original_run_id": "", - "status": "SUCCESS", - "steps": { - "step1": { - "name": "step1", - "internal_name": "step1", - "status": "SUCCESS", - "step_type": "task", - "message": "", - "mock": false, - "code_identities": [ - { - "code_identifier": "2a5b33bdf60c4f0d38cae04ab3f988b3d1c6ed59", - "code_identifier_type": "git", - "code_identifier_dependable": false, - "code_identifier_url": "INTENTIONALLY_REMOVED", - "code_identifier_message": "INTENTIONALLY_REMOVED" - } - ], - "attempts": [ - { - "attempt_number": 0, - "start_time": "2022-01-20 15:08:14.069919", - "end_time": "2022-01-20 15:08:14.070484", - "duration": "0:00:00.000565", - "status": "SUCCESS", - "message": "" - } - ], - "user_defined_metrics": {}, - "branches": {}, - "data_catalog": [] - }, - "step2": { - "name": "step2", - "internal_name": "step2", - "status": "SUCCESS", - "step_type": "map", - "message": "", - "mock": false, - "code_identities": [ - { - "code_identifier": "2a5b33bdf60c4f0d38cae04ab3f988b3d1c6ed59", - "code_identifier_type": "git", - "code_identifier_dependable": false, - "code_identifier_url": "INTENTIONALLY_REMOVED", - "code_identifier_message": "INTENTIONALLY_REMOVED" - } - ], - "attempts": [], - "user_defined_metrics": {}, - "branches": { - "step2.a": { - "internal_name": "step2.a", - "status": "SUCCESS", - "steps": { - "step2.a.step_1": { - "name": "step_1", - "internal_name": "step2.a.step_1", - "status": "SUCCESS", - "step_type": "task", - "message": "", - "mock": false, - "code_identities": [ - { - "code_identifier": "2a5b33bdf60c4f0d38cae04ab3f988b3d1c6ed59", - "code_identifier_type": "git", - "code_identifier_dependable": false, - "code_identifier_url": "INTENTIONALLY_REMOVED", - "code_identifier_message": "INTENTIONALLY_REMOVED" - } - ], - "attempts": [ - { - "attempt_number": 0, - "start_time": "2022-01-20 15:08:14.162440", - "end_time": "2022-01-20 15:08:14.162882", - "duration": "0:00:00.000442", - "status": "SUCCESS", - "message": "" - } - ], - "user_defined_metrics": {}, - "branches": {}, - "data_catalog": [] - }, - "step2.a.success": { - "name": "success", - "internal_name": "step2.a.success", - "status": "SUCCESS", - "step_type": "success", - "message": "", - "mock": false, - "code_identities": [ - { - "code_identifier": "2a5b33bdf60c4f0d38cae04ab3f988b3d1c6ed59", - "code_identifier_type": "git", - "code_identifier_dependable": false, - "code_identifier_url": "INTENTIONALLY_REMOVED", - "code_identifier_message": "INTENTIONALLY_REMOVED" - } - ], - "attempts": [ - { - "attempt_number": 0, - "start_time": "2022-01-20 15:08:14.209895", - "end_time": "2022-01-20 15:08:14.210106", - "duration": "0:00:00.000211", - "status": "SUCCESS", - "message": "" - } - ], - "user_defined_metrics": {}, - "branches": {}, - "data_catalog": [] - } - } - }, - "step2.b": { - "internal_name": "step2.b", - "status": "SUCCESS", - "steps": { - "step2.b.step_1": { - "name": "step_1", - "internal_name": "step2.b.step_1", - "status": "SUCCESS", - "step_type": "task", - "message": "", - "mock": false, - "code_identities": [ - { - "code_identifier": "2a5b33bdf60c4f0d38cae04ab3f988b3d1c6ed59", - "code_identifier_type": "git", - "code_identifier_dependable": false, - "code_identifier_url": "INTENTIONALLY_REMOVED", - "code_identifier_message": "INTENTIONALLY_REMOVED" - } - ], - "attempts": [ - { - "attempt_number": 0, - "start_time": "2022-01-20 15:08:14.258519", - "end_time": "2022-01-20 15:08:14.258982", - "duration": "0:00:00.000463", - "status": "SUCCESS", - "message": "" - } - ], - "user_defined_metrics": {}, - "branches": {}, - "data_catalog": [] - }, - "step2.b.success": { - "name": "success", - "internal_name": "step2.b.success", - "status": "SUCCESS", - "step_type": "success", - "message": "", - "mock": false, - "code_identities": [ - { - "code_identifier": "2a5b33bdf60c4f0d38cae04ab3f988b3d1c6ed59", - "code_identifier_type": "git", - "code_identifier_dependable": false, - "code_identifier_url": "INTENTIONALLY_REMOVED", - "code_identifier_message": "INTENTIONALLY_REMOVED" - } - ], - "attempts": [ - { - "attempt_number": 0, - "start_time": "2022-01-20 15:08:14.305524", - "end_time": "2022-01-20 15:08:14.305754", - "duration": "0:00:00.000230", - "status": "SUCCESS", - "message": "" - } - ], - "user_defined_metrics": {}, - "branches": {}, - "data_catalog": [] - } - } - }, - "step2.c": { - "internal_name": "step2.c", - "status": "SUCCESS", - "steps": { - "step2.c.step_1": { - "name": "step_1", - "internal_name": "step2.c.step_1", - "status": "SUCCESS", - "step_type": "task", - "message": "", - "mock": false, - "code_identities": [ - { - "code_identifier": "2a5b33bdf60c4f0d38cae04ab3f988b3d1c6ed59", - "code_identifier_type": "git", - "code_identifier_dependable": false, - "code_identifier_url": "INTENTIONALLY_REMOVED", - "code_identifier_message": "INTENTIONALLY_REMOVED" - } - ], - "attempts": [ - { - "attempt_number": 0, - "start_time": "2022-01-20 15:08:14.353182", - "end_time": "2022-01-20 15:08:14.353603", - "duration": "0:00:00.000421", - "status": "SUCCESS", - "message": "" - } - ], - "user_defined_metrics": {}, - "branches": {}, - "data_catalog": [] - }, - "step2.c.success": { - "name": "success", - "internal_name": "step2.c.success", - "status": "SUCCESS", - "step_type": "success", - "message": "", - "mock": false, - "code_identities": [ - { - "code_identifier": "2a5b33bdf60c4f0d38cae04ab3f988b3d1c6ed59", - "code_identifier_type": "git", - "code_identifier_dependable": false, - "code_identifier_url": "INTENTIONALLY_REMOVED", - "code_identifier_message": "INTENTIONALLY_REMOVED" - } - ], - "attempts": [ - { - "attempt_number": 0, - "start_time": "2022-01-20 15:08:14.401043", - "end_time": "2022-01-20 15:08:14.401304", - "duration": "0:00:00.000261", - "status": "SUCCESS", - "message": "" - } - ], - "user_defined_metrics": {}, - "branches": {}, - "data_catalog": [] - } - } - } - }, - "data_catalog": [] - }, - "success": { - "name": "success", - "internal_name": "success", - "status": "SUCCESS", - "step_type": "success", - "message": "", - "mock": false, - "code_identities": [ - { - "code_identifier": "2a5b33bdf60c4f0d38cae04ab3f988b3d1c6ed59", - "code_identifier_type": "git", - "code_identifier_dependable": false, - "code_identifier_url": "INTENTIONALLY_REMOVED", - "code_identifier_message": `"INTENTIONALLY_REMOVED"` - } - ], - "attempts": [ - { - "attempt_number": 0, - "start_time": "2022-01-20 15:08:14.449759", - "end_time": "2022-01-20 15:08:14.449826", - "duration": "0:00:00.000067", - "status": "SUCCESS", - "message": "" - } - ], - "user_defined_metrics": {}, - "branches": {}, - "data_catalog": [] - } - }, - "parameters": { - "variables": [ - "a", - "b", - "c" - ], - "state_a": 5, - "state_b": 5, - "state_c": 5 - }, - "run_config": { - "executor": { - "type": "local", - "config": {} - }, - "run_log_store": { - "type": "buffered", - "config": {} - }, - "catalog": { - "type": "file-system", - "config": {} - }, - "secrets": { - "type": "do-nothing", - "config": {} - } - } -} -``` -
- -The individual steps of the dag are named in [```dot path convention```](../concepts/run-log/#naming_step_log). - -### Enabling parallel execution - -Though the dag definition defines a ```map``` node where the branches can be executed in parallel, -the execution of the dag and the parallelism is actually -controlled by the executor. In ```local``` execution, you can enable parallel branch execution by modifying the config. - -```yaml -executor: - type: local - config: - enable_parallel: True -``` - -Point to note: - -- Run log stores which use a single file as their log source (eg. file-system) cannot reliably run parallel executions - as race conditions to modify the same file can happen leaving the run log in inconsistent state. The logs of the - execution would also warn the same. Partitioned run log stores (eg. db) can be reliable run log stores. - ---- -## Nesting and complex dags - -Magnus does not limit you at all in nesting at any level. You have construct deep nesting levels easily and magnus -would execute them as you designed. - -As a general coding practice, having deeply nested branches could be hard to read and maintain. - -***NOTE***: There is a possibility that you can nest the same dag within the dag definition resulting in a infinite -loop. We are actively finding ways to detect these situations and warn you. - ---- -## Advanced use as-is - -Node type ```as-is``` defined in magnus can be a very powerful tool in some deployment patterns. - -For example in the below dag definition, the step ```step echo``` does nothing as part of ```local``` execution. - -```yaml -# In config.yaml -executor: - type: demo-renderer - -run_log_store: - type: file-system - -# In pipeline.yaml -dag: - description: Getting started - start_at: step parameters - steps: - step parameters: - type: task - command_type: python-lambda - command: "lambda x: {'x': int(x) + 1}" - next: step shell - step shell: - type: task - command_type: shell - command: mkdir data ; env >> data/data.txt - next: step echo - catalog: - put: - - "*" - step echo: - type: as-is - command_type: shell - command_config: - render_string: echo hello - next: success - success: - type: success - fail: - type: fail -``` - -But a deployment pattern, like ```demo-renderer```, can use it to inject a command into the bash script. To test it out, -uncomment the config to change to executor to ```demo-renderer``` and the run log store to be ```file-system``` and -execute it like below. - -```magnus execute --file getting-started.yaml``` - -should generate a bash script as show below in ```demo-bash.sh```. - -```shell -for ARGUMENT in "${@:2}" -do - KEY=$(echo $ARGUMENT | cut -f1 -d=) - VALUE=$(echo $ARGUMENT | cut -f2 -d=) - export "MAGNUS_PRM_$KEY"=$VALUE -done -magnus execute_single_node $1 step%parameters --file getting-started.yaml -exit_code=$? -echo $exit_code -if [ $exit_code -ne 0 ]; -then - $(magnus execute_single_node $1 fail --file getting-started.yaml) - exit 1 -fi -magnus execute_single_node $1 step%shell --file getting-started.yaml -exit_code=$? -echo $exit_code -if [ $exit_code -ne 0 ]; -then - $(magnus execute_single_node $1 fail --file getting-started.yaml) - exit 1 -fi -echo hello -exit_code=$? -echo $exit_code -if [ $exit_code -ne 0 ]; -then - $(magnus execute_single_node $1 fail --file getting-started.yaml) - exit 1 -fi -magnus execute_single_node $1 success --file getting-started.yaml -``` - -The shell script is translation of the dag into a series of bash commands but notice the command ```echo hello``` as -part of the script. While the ```local``` executor interpreted that node as a stub or a mock node, the -```demo-renderer``` execution used the ```render_string``` variable of the node ```config``` to inject a script. - -This feature is very useful when you want certain few steps (may be email notifications) to be only possible in -production like environments but want to mock the during dev/experimental set up. - -***NOTE***: When trying to ```locally``` re-run a dag definition with ```as-is``` node used to inject scripts, -the run would start from ```as-is``` step onwards independent of the source of failure. You can change this -behavior by writing extensions which skip over ```as-is``` nodes during re-run. -## Controlling the log level of magnus - -The default log level of magnus is WARNING but you can change it at the point of execution to one of -```['CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG', 'NOTSET]``` by using the command line argument --log-level. - -For example: - -```magnus execute --file --log-level DEBUG``` - -would set the magnus log level to DEBUG. This setting only affects magnus logs and will not alter your application log -levels. - ---- -## Order of configurations - -Magnus supports many ways of providing configurations but there is a order of preference. - -Magnus defaults to the following if no config is provided. - -```yaml -executor: - type: local - config: - enable_parallel: "false" - -run_log_store: - type: buffered - -catalog: - type: file-system - config: - compute_data_folder: data/ - catalog_location: .catalog - -secrets: - type: do-nothing - -experiment_tracking: - type: do-nothing - -``` - -But you can over-ride these defaults by providing a ```magnus-config.yaml``` in the source directory. For example, -if the ```magnus-config.yaml``` file has the following contents, even if you do not provide a config in the dag -definition file, these would taken as default service providers. - -```yaml -executor: - type: local - config: - enable_parallel: True - -run_log_store: - type: file-system - -catalog: - type: file-system - config: - compute_data_folder: data/ # default - catalog_location: .catalog # default - -secrets: - type: dotenv - config: - location: .env # default -``` - -Finally, you can also over-ride the configurations set in the dag definition file by providing a custom configuration -file containing only the configurations. - -For example, you can provide a dag definition file as above with ```do-nothing``` secrets handler but by providing -the below configurations file at the run time, you can over-ride it to ```dotenv```. - -```yaml -#in prod-configuration.yaml -secrets: - type: dotenv - -``` - -The command to execute while providing the configuration file. - -```magnus execute --file --config-file prod-configuration.yaml``` - - - -The design thought is enable switching between different configurations by different actors involved in the data science -workflow. The engineering team could provide ```magnus-config.yaml``` that should be default to the team or project -for dev/experimental phase of the work but can over-ride the configuration during production deployment. diff --git a/docs/extensions.md b/docs/extensions.md new file mode 100644 index 00000000..1a42f8e6 --- /dev/null +++ b/docs/extensions.md @@ -0,0 +1,203 @@ +## General set up + +Magnus is built around the idea to decouple the pipeline definition and pipeline execution. + +[All the concepts](/concepts/the-big-picture/) are defined with this principle and therefore +are extendible as long as the API is satisfied. + +We internally use [stevedore](https:/pypi.org/project/stevedore/) to manage extensions. +Our [pyproject.toml](https://github.com/AstraZeneca/magnus-core/blob/main/pyproject.toml) has +plugin space for all the concepts. + +```toml +[tool.poetry.plugins."executor"] +"local" = "magnus.extensions.executor.local.implementation:LocalExecutor" +"local-container" = "magnus.extensions.executor.local_container.implementation:LocalContainerExecutor" +"argo" = "magnus.extensions.executor.argo.implementation:ArgoExecutor" + +# Plugins for Catalog +[tool.poetry.plugins."catalog"] +"do-nothing" = "magnus.catalog:DoNothingCatalog" +"file-system" = "magnus.extensions.catalog.file_system.implementation:FileSystemCatalog" + +# Plugins for Secrets +[tool.poetry.plugins."secrets"] +"do-nothing" = "magnus.secrets:DoNothingSecretManager" +"dotenv" = "magnus.extensions.secrets.dotenv.implementation:DotEnvSecrets" +"env-secrets-manager" = "magnus.extensions.secrets.env_secrets.implementation:EnvSecretsManager" + +# Plugins for Run Log store +[tool.poetry.plugins."run_log_store"] +"buffered" = "magnus.datastore:BufferRunLogstore" +"file-system" = "magnus.extensions.run_log_store.file_system.implementation:FileSystemRunLogstore" +"chunked-fs" = "magnus.extensions.run_log_store.chunked_file_system.implementation:ChunkedFileSystemRunLogStore" + +# Plugins for Experiment tracker +[tool.poetry.plugins."experiment_tracker"] +"do-nothing" = "magnus.experiment_tracker:DoNothingTracker" +"mlflow" = "magnus.extensions.experiment_tracker.mlflow.implementation:MLFlowExperimentTracker" + +# Plugins for Pickler +[tool.poetry.plugins."pickler"] +"pickle" = "magnus.pickler:NativePickler" + + +# Plugins for Integration +[tool.poetry.plugins."integration"] +# Left empty for 3rd party integrations + +# Plugins for Tasks +[tool.poetry.plugins."tasks"] +"python" = "magnus.tasks:PythonTaskType" +"shell" = "magnus.tasks:ShellTaskType" +"notebook" = "magnus.tasks:NotebookTaskType" + + +# Plugins for Nodes +[tool.poetry.plugins."nodes"] +"task" = "magnus.extensions.nodes:TaskNode" +"fail" = "magnus.extensions.nodes:FailNode" +"success" = "magnus.extensions.nodes:SuccessNode" +"parallel" = "magnus.extensions.nodes:ParallelNode" +"map" = "magnus.extensions.nodes:MapNode" +"stub" = "magnus.extensions.nodes:StubNode" +``` + + +To submit extensions to this project (pretty please!!) submit a PR with plugin name +and implementation path inserted in *pyproject.toml*. We are happy to work with you to write +them, the complexity is mostly in having access to them. + +To write extensions for your project and are not useful for wider audience, include the plugin +within your pyproject.toml or [setuptools entry points](https://setuptools.pypa.io/en/latest/ +pkg_resources.html#entry-points). During the execution of the pipeline, +magnus would automatically pick up the extension if it registered to the correct namespace. + + +The below section shows the base class implementation for all the concepts. All the base classes +are extended from pydantic BaseModel. + + +## Executor + +Register to namespace: [tool.poetry.plugins."executor"] + +Examples: [local](/configurations/executors/local), +[local-container](/configurations/executors/local-container), +[argo](/configurations/executors/argo) + +::: magnus.executor.BaseExecutor + options: + show_root_heading: true + show_source: true + show_symbol_type_heading: true + members: None + heading_level: 3 + + +## Run Log + +Register to namespace: [tool.poetry.plugins."run_log_store"] + +Examples: [buffered](/configurations/run-log/#buffered), +[file-system](/configurations/run-log/#file-system), + [chunked-fs](/configurations/run-log/#chunked-fs) + +::: magnus.datastore.BaseRunLogStore + options: + show_root_heading: true + show_source: true + show_symbol_type_heading: true + members: None + heading_level: 3 + +The ```RunLog``` is a nested pydantic model and is located in ```magnus.datastore.RunLog```. + + + +## Catalog + +Register to namespace: [tool.poetry.plugins."catalog"] + +Example: +[do-nothing](/configurations/catalog/#do-nothing), + [file-system](/configurations/catalog/#file-system) + +::: magnus.catalog.BaseCatalog + options: + show_root_heading: true + show_source: true + show_symbol_type_heading: true + members: None + heading_level: 3 + + +## Secrets + +Register to namespace: [tool.poetry.plugins."secrets"] + +Example: +[do-nothing](/configurations/secrets/#do-nothing), + [env-secrets-manager](/configurations/secrets/#environment_secret_manager), + [dotenv](/configurations/secrets/#dotenv) + +::: magnus.secrets.BaseSecrets + options: + show_root_heading: true + show_source: true + show_symbol_type_heading: true + members: None + heading_level: 3 + + +## Experiment tracking + +Register to namespace: [tool.poetry.plugins."experiment_tracker"] + +Example: +[do-nothing](/configurations/experiment-tracking), ```mlflow``` + +::: magnus.experiment_tracker.BaseExperimentTracker + options: + show_root_heading: true + show_source: true + show_symbol_type_heading: true + members: None + heading_level: 3 + +## Nodes + +Register to namespace: [tool.poetry.plugins."nodes"] + +Example: +[task](/concepts/task), +[stub](/concepts/stub), +[parallel](/concepts/parallel), +[map](/concepts/map) + +::: magnus.nodes.BaseNode + options: + show_root_heading: true + show_source: true + show_symbol_type_heading: true + members: None + heading_level: 3 + + + +## Tasks + +Register to namespace: [tool.poetry.plugins."tasks"] + +Example: +[python](/concepts/task/#python_functions), +[shell](/concepts/task/#shell), +[notebook](/concepts/task/#notebook) + +::: magnus.tasks.BaseTaskType + options: + show_root_heading: true + show_source: true + show_symbol_type_heading: true + members: None + heading_level: 3 diff --git a/docs/extensions/extensions.md b/docs/extensions/extensions.md deleted file mode 100644 index e5bf0926..00000000 --- a/docs/extensions/extensions.md +++ /dev/null @@ -1,116 +0,0 @@ -# Guide to Extensions - -The idea behind magnus, in simple terms, is to decouple ```what``` should be done to ```how``` it is implemented. -So while dag only defines the ```what``` part of the equation while different compute modes (along with services) -define how to make it happen. - -All the services (compute modes, run log store, secrets, catalog, experiment tracking) are written to -align to the principle. All the interactions with the services only happen via defined API's that all -implementations of the service should implement. -The ```Base``` class of all the services are given the most general implementations to make extensions as easy as -possible. - -Please find supported extensions in [magnus extensions](https://github.com/AstraZeneca/magnus-extensions) - -## Technical insights - -Any dag execution has two distinct phases - -- Traversal of the dag: In this phase, we are only interested in traversal rules of the dag. -- Execution of the node: In this phase, we are only interested in executing a specific node. - -We can characterize a pipeline execution engine by asking two questions - -- Who is responsible for the dag traversal? -- Is the compute environment same as traversal environment? - -Taking the example of AWS step functions, AWS Step function is responsible for traversal and the compute environment is -not the same as traversal environment as most of the states relate to some kind of compute provided by AWS. The state -machine or workflow engine keeps track of the jobs in the compute environment or has some event based mechanism to -trigger the traversal after the node finishes execution. - -Asking the same questions in the context of magnus, gives us 4 possible choices. - -- Magnus traverses, execution environment same as traversal. -- Magnus traverses, execution environment not same as traversal. -- Magnus does not traverse, execution environment not same as traversal. -- Magnus does not traverse, execution environment same as traversal. - -Magnus is designed to handle all the above 4 choices which makes the decoupling possible. - -### ***Magnus traverses, execution environment same as traversal.*** - -There is only one possible way this can happen with magnus, i.e with ```local``` compute mode. Since both the traversal -and execution are the same, there is no change in configuration of services for execution and traversal. - -### ***Magnus traverses, the execution environment != traversal*** - -In this mode, magnus is responsible for traversal of the graph but triggers the actual execution to happen in a -different environment to the traversal of the graph. For example, ```local-container``` mode, magnus is responsible -for traversal of the graph but spins up a container with the instruction to execute the node. Since the traversal -and execution environments are different, the configuration of services have to modified for execution and traversal. -This is implemented by using an [*Integration* pattern](../../concepts/integration/) that can be provided to control -the configuration during both phases. - -Nearly all the other dag execution engines fall in this space. For example, AWS step functions or argo have a central -server that traverses the graph but the execution of the nodes happen in some containers or compute of the AWS. -We call this as *centralized executor*. - -Interestingly, in magnus there are two ways to handle this scenario: - -- Just like AWS Step functions or argo workflows, we can have a *centralized executor* which triggers the execution of - nodes in the environment that the user wants. For example, ```local-container```. -- Since the dag definition is part of the source code, every node of the graph is fully aware of the whole graph. This - enables some compute modes to let the execution environment decide the next job to trigger based on the status of - the execution of the current node. We call this as *decentralized executor*. - - Detailed use case: We have internally tested an *magnus-extension*, that - - - Traverses the graph and triggers an AWS Batch job for the first node from the local computer. - - The AWS Batch job role is given enough privileges to trigger another AWS Batch job from within the batch job. - - After the execution of the first node in AWS Batch, read the dag definition to find the next node to trigger - and sets up the AWS batch job accordingly. - - The graph traversal ends when one of ```success``` nodes or ```fail``` nodes have reached. - - -In our opinion, *decentralized executors* are ideal for experimentation phase as there could as many dag definitions as -needed by the team without blocking one another or causing merge conflicts. Since the compute can also be off-loaded to -compute providers, it does not block their local computers. - - -### ***Magnus does not traverse, execution environment not same as traversal.*** - -In this mode, magnus does not traverse the graph but translates the dag definition to something that the ```executor``` -of user's choice. For example, ```demo-renderer``` mode available as part of the ```magnus-core``` package translates -a dag definition to a bash script, although technically the execution environment is same as the traversal in this -specific example. - -Since the traversal and execution environments are different, the configuration of services have to modified for -execution and traversal. This is implemented by using an [*Integration* pattern](../../concepts/integration/) -that can be provided to control the configuration during both phases. - -The actual execution of the step is still wrapped around by magnus, like in -[```demo renderer```](../../getting_started/example-deployment/). - -The design process behind this is abstract the infrastructure or engineering processes behind production grade -deployments from the data science teams. This abstraction also lets the engineering teams continuously improve/test -different deployment patterns without disturbing the data science team. - -The compute extension, ```aws-step-functions``` is planned to be released along with other *magnus-extensions*. - -### ***Magnus does not traverse, execution environment same as traversal*** - -In this mode, the dag definition is translated into something that the ```executor``` of user's choice and we use -the ```as-is``` node to inject scripts that are beyond the control of magnus. An example of this behavior is shown -[here](../../examples/#advanced_use_as-is), where the ```render_string``` of ```as-is``` is used to inject scripts. - -The design process is to provide the best possible chance for the dag definition to remain the same independent upon -the mode of execution. - -## Submitting Community Extensions - -We absolutely love community extensions to magnus and would also provide support in cases of complex extensions. - -For all the extensions, you should also provide integration pattern between some of the magnus core compute patterns. -As of this writing, we consider ```local```, ```local-container``` and ```demo-renderer``` as core compute patterns and -we would be adding more to the list as we get more mature. diff --git a/docs/getting_started/brief-concepts-input.md b/docs/getting_started/brief-concepts-input.md deleted file mode 100644 index 2d6f16d7..00000000 --- a/docs/getting_started/brief-concepts-input.md +++ /dev/null @@ -1,129 +0,0 @@ -# Closer look at input - ---- - ---8<-- -README.md:exampleInput ---8<-- - - -## dag - -A [directed acyclic graph (dag)](../../concepts/dag) is the definition of the work you want to perform. -It defines a series of [nodes](../../concepts/nodes) and the rules of traversal between them. - -## Traversal of the dag - -### yaml definition: -In magnus yaml, the order of steps in the dag definition is not important. The traversal is as follows: - -1. We start at start_at of the dag, which is "step parameters". -2. If "step parameters" successfully completed we move to *next* of "step parameters", which is "step shell". -3. if "step parameters" failed, we move to the failure node of the dag (fail). The step definition can over-ride this. -4. We stop traversing once we reach one of success or fail nodes. - -All dag definitions should have a *success* node and *fail* node. A step/node in the dag defines the next node to -visit in the *next* section of the definition. A step/node can also define the next node to visit *on failure* -of the node, if one is not provided we default to the fail node of the dag. - -### Python SDK: - -There is an equivalence of structure even in python SDK. There is no need for an explicit ```success``` or ```fail``` -node as they are added implicitly. - - -You can also provide the maximum run time for a step or the entire dag in the definition. More information of all -the features is [available here](../../concepts/dag). - - -## Step/Node - -A Step/Node defines a single *logical unit of work* in the dag. - -In the example, we use three different type of nodes: - -### task - - Is some callable/executable code. - Python functions are default and fully [supported tasks](../../concepts/nodes/#task). - - As shown in the example, you can also use python lambda expressions with task type of python-lambda. - - Or shell with a caveat that [any interactions](../brief-concepts-output/#interaction_in_code) with magnus or - secret management within magnus is not possible. - -### success - - A [node](../../concepts/nodes/#success) that marks the graph/sub-graph as success. - -### fail - - A [node](../../concepts/nodes/#fail) that marks the graph/sub-graph as fail. - - -You can define more [complex node types (parallel, embedded dag, map) too](../../concepts/nodes/#nodes). - -## Parameters - - - - -Initial parameters to the pipeline could be sent by sending in a parameters file during execution. - - - -The lambda expression, ```lambda x: {'x': int(x) + 1}```, then can use the parameter and update it -(in this case, x = x + 1 = 4) by returning a dictionary. The [parameter space](../../concepts/nodes/#passing_data) -is updated with the key-value pair. Parameters can be passed to python functions using a similar fashion. - -Shell executions have access to the parameters too with key being prefixed by MAGNUS_PRM_. Any JSON serializable -key-value pairs can be used. You can confirm this by searching for ```MAGNUS_PRM_``` in ```data/data.txt```. - -For larger content/files, please use the data [catalog](../../concepts/catalog) -functionality. - ---- -!!! Note - - All parameter keys are case insensitive and the case is changed to lower to support Windows. - Please read more information [here](https://stackoverflow.com/questions/19023238/why-python-uppercases-all-environment-variables-in-windows). - ---- - - -## Catalog - -Catalog is a way to pass data files across nodes and also serves as a way to track data used/generated as part of the -execution. In the following instruction: - -```yaml -step shell: - type: task - command_type: shell - command: mkdir data ; env >> data/data.txt # For Linux/macOS - #command: mkdir data - next: success - catalog: - put: - - "*" -``` - -or in Python SDK: -```python - -second = Task(name='step shell', command='mkdir data ; env >> data/data.txt', - command_type='shell', catalog={'put': '*'}) - -``` - -we are instructing magnus to create a ```data``` folder and echo the environmental variables into ```data.txt``` in -the command section while asking magnus to put the files the catalog after execution. - -Logically, you can instruct magnus to: - -- ```get``` files from the catalog before the execution to a specific ```compute data folder``` -- execute the command -- ```put``` the files from the ```compute data folder``` to the catalog. - -By default, magnus would look into ```data``` folder but you can over-ride this by providing ```compute_folder``` in the -config. Glob patterns for file searching are allowed. Please read more about the catalog [here](../../concepts/catalog). diff --git a/docs/getting_started/brief-concepts-output.md b/docs/getting_started/brief-concepts-output.md deleted file mode 100644 index 22bcc160..00000000 --- a/docs/getting_started/brief-concepts-output.md +++ /dev/null @@ -1,120 +0,0 @@ -# Closer look at output - ---- - -While the dag defines the work that has to be done, it is only a piece of the whole puzzle. - -As clearly explained in [this paper by Sculley et al.](https://papers.nips.cc/paper/2015/file/86df7dcfd896fcaf2674f757a2463eba-Paper.pdf), -the actual machine learning/data science related code is only fraction of all the systems that have to be in place -to make it work. - -We implemented magnus with a clear understanding of the complexity while keeping the interface to the -data scientists/ML researchers as simple as possible. - ---- - -Though the example pipeline we just ran did nothing useful, it helps in understanding the different *systems* in place. - ---8<-- -README.md:exampleOutput ---8<-- - -## Run id - -Every run of magnus has a unique identifier called run_id. Magnus by default creates one based on timestamp but you -can provide one at run time for better control. - -```magnus execute --file getting-started.yaml --run-id my_first --x 3``` - -## Reproducibility - -All code breaks at some point and being able to replicate the exact cause of error is essential for a quick resolution. -Magnus tracks four possible sources of changes that could have led to a different outcome of an experiment. - -* dag: The [dag_hash](../../concepts/run-log/#dag_hash) in the log is the SHA id of the actual dag. -* code: If the code is git versioned, magnus tracks the [code commit id](../../concepts/run-log/#code_identity) -and modified files as part of the logs. If the run is containerized, magnus also tracks the -docker image digest as part of the log. -* data: Any data generated as part of the nodes can be cataloged along with the -[SHA identity](../../concepts/run-log/#data_catalog) of the file. -* config: The run config used to make the run is also stored as part of the run logs along with the pipeline definition. - - -The [run log structure](../../concepts/run-log) of the output is exactly the same independent of where the -actual run happens. This should enable to replicate a run that happened in an K8 environment, -for example, in your local computer to debug. - -## Step Log - -Every step of the dag, has a [corresponding block](../../concepts/run-log/#structure_of_step_log) in the run log. The -name of the ```step``` is name of key in ```steps```. - -Here is the step log for ```step shell``` of the example run - -``` json -"steps": { - ..., - "step shell": { - "name": "step shell", - "internal_name": "step shell", - "status": "SUCCESS", - "step_type": "task", - "message": "", - "mock": false, - "code_identities": [ - { - "code_identifier": "e15d1374aac217f649972d11fe772e61b5a2478d", - "code_identifier_type": "git", - "code_identifier_dependable": true, - "code_identifier_url": "INTENTIONALLY REMOVED", - "code_identifier_message": "" - } - ], - "attempts": [ - { - "attempt_number": 0, - "start_time": "2023-01-31 19:56:55.128697", - "end_time": "2023-01-31 19:56:55.150878", - "duration": "0:00:00.022181", - "status": "SUCCESS", - "message": "" - } - ], - "user_defined_metrics": {}, - "branches": {}, - "data_catalog": [ - { - "name": "data/data.txt", - "data_hash": "7e91b0a9ff8841a3b5bf2c711f58bcc0cbb6a7f85b9bc92aa65e78cdda59a96e", - "catalog_relative_path": "20230131195647/data/data.txt", - "catalog_handler_location": ".catalog", - "stage": "put" - } - ] - }, - ... -} -``` - -## Attempts - -As part of the attempt, we capture the start time, end time and the duration of the execution. Only task, success, fail -and as-is nodes have this block as it refers to the actual compute time used. In case of failure, magnus tries to -capture the exception message in the ```message``` block. - -## Code identity - -The git SHA id of the [code commit]((../../concepts/run-log/#code_identity)) is captured, -if the code is versioned using git. If the current branch was unclean, -magnus will warn the user about the dependability of the code id and lists the files that are different from the commit. - -If the execution was in a container, magnus also adds the docker image digest as a code identity along with git sha id. - -## Data catalog - -```Step shell``` of the example run creates a file ```data.txt``` as part of the run in the data folder. As per the -configuration of the pipeline, we have instructed magnus to store all (*) contents of the ```data``` folder for -downstream steps using the catalog. The data catalog section of the step log captures the hash of the data and the -metadata related to it. - -You can read more about catalog [here]((../../concepts/run-log/#data_catalog)). diff --git a/docs/getting_started/example-deployment.md b/docs/getting_started/example-deployment.md deleted file mode 100644 index 4b8c94b7..00000000 --- a/docs/getting_started/example-deployment.md +++ /dev/null @@ -1,335 +0,0 @@ -# Example Deployment - -While the previous two sections were about introducing magnus pipelines and different features, we can use the same -trivial example to showcase the features of magnus in deployment patterns. - -To recap, here is the pipeline that we ran as an example: - ---8<-- -README.md:exampleInput ---8<-- - - -The pipeline is simple and demonstrates the core concepts of data catalog, dag traversal, passing data between -nodes and task types. - -To demonstrate the strength of magnus, let us try to "deploy" the pipeline via a Bash shell script. This demonstration, -though trivial, is very similar in process to *transpile* a dag into something that argo or AWS step functions -understands. - -Let us create a configuration file which changes the behavior of magnus to transpile. - -```yaml -# config.yaml -executor: - type: demo-renderer - -run_log_store: - type: file-system - -catalog: - type: file-system - -``` - -**Points to note**: - -- We have not changed the dag definition at all. - -- We added a config file which instructs the execution type to "demo-renderer". -[Demo renderer](../../concepts/modes-implementations/demo-renderer) translates the dag definition into a bash script. - -- The buffered run log store that we have so far used in the example is not suitable anymore. -[File system](../../concepts/run-log-implementations/file-system/) run log store persists the logs -on physical folder and therefore more suitable. - -There are other ways to change the configurations which are detailed [here](../../concepts/configurations). - -## Transpilation - - -We can execute the pipeline, just like we did it previously, by the following command. - -```magnus execute --file getting-started.yaml --parameters-file parameters.yaml -config-file config.yaml``` - -or in python SDK: - -```python -#in pipeline.py -from magnus import Pipeline, Task - -def pipeline(): - first = Task(name='step parameters', command="lambda x: {'x': int(x) + 1}", command_type='python-lambda', - next_node='step shell') - second = Task(name='step shell', command='mkdir data ; env >> data/data.txt', - command_type='shell', catalog={'put': '*'}) - - pipeline = Pipeline(start_at=first, name='getting_started') - pipeline.construct([first, second]) - pipeline.execute(parameters_file='parameters.yaml', configuration_file='config.yaml') - -if __name__ == '__main__': - pipeline() -``` - -This run is different from the previous execution - -1. There is no output or run_id generated by magus. This is because the current execution only performs a translation -of the dag into a bash script and not actual function calls. - -2. You should also notice a file called ```demo-bash.sh``` created in the working directory which is a -translation of the dag into a bash script. - - -Let us have a closer look at the contents of the ```demo-bash.sh```. - -```shell -magnus execute_single_node $1 step%parameters --log-level WARNING --file pipeline.yaml --config-file config.yaml --parameters-file parameters.yaml -exit_code=$? -echo $exit_code -if [ $exit_code -ne 0 ]; -then - $(magnus execute_single_node $1 fail --log-level WARNING --file pipeline.yaml --config-file config.yaml --parameters-file parameters.yaml) - exit 1 -fi -magnus execute_single_node $1 step%shell --log-level WARNING --file pipeline.yaml --config-file config.yaml --parameters-file parameters.yaml -exit_code=$? -echo $exit_code -if [ $exit_code -ne 0 ]; -then - $(magnus execute_single_node $1 fail --log-level WARNING --file pipeline.yaml --config-file config.yaml --parameters-file parameters.yaml) - exit 1 -fi -magnus execute_single_node $1 success --log-level WARNING --file pipeline.yaml --config-file config.yaml --parameters-file parameters.yaml``` -``` - -The shell script does the following - -- Capture the command line arguments passed to the bash script as magnus parameters, i.e prefixed by MAGNUS_PRM_. - -- Execute the first node and capture the exit code. If the exit code is successful, move to ```next``` node as defined -in the dag. - -- If the exit code is failure, move to the failure node of the dag. This is as per the dag definition. - -## Execution - -We can execute the pipeline defined in the ```demo-bash.sh``` by - -```shell -chmod 755 demo-bash.sh - -./demo-bash.sh my_first_bash -``` - -**Points to note** - -1. run_id, my_first_bash, is no longer optional parameter and should be provided as the first positional parameter. - -2). The parameters file was part of the translation step and is provided to the shell script. - -Since the run log store is ```file-system```, there should be a directory, ```.run_log_store```, created with a single -run log in it by the name ```my_first_bash.json```. - -
- Click to show the run log - -```json -{ - "run_id": "demo-bash6", - "dag_hash": "ce0676d63e99c34848484f2df1744bab8d45e33a", - "use_cached": false, - "tag": "", - "original_run_id": "", - "status": "SUCCESS", - "steps": { - "step parameters": { - "name": "step parameters", - "internal_name": "step parameters", - "status": "SUCCESS", - "step_type": "task", - "message": "", - "mock": false, - "code_identities": [ - { - "code_identifier": "6ae3f4700fd07d529385148c34ed5c0b9a1c0727", - "code_identifier_type": "git", - "code_identifier_dependable": true, - "code_identifier_url": "INTENTIONALLY REMOVED", - "code_identifier_message": "" - } - ], - "attempts": [ - { - "attempt_number": 0, - "start_time": "2023-02-01 12:12:26.533528", - "end_time": "2023-02-01 12:12:26.534091", - "duration": "0:00:00.000563", - "status": "SUCCESS", - "message": "" - } - ], - "user_defined_metrics": {}, - "branches": {}, - "data_catalog": [] - }, - "step shell": { - "name": "step shell", - "internal_name": "step shell", - "status": "SUCCESS", - "step_type": "task", - "message": "", - "mock": false, - "code_identities": [ - { - "code_identifier": "6ae3f4700fd07d529385148c34ed5c0b9a1c0727", - "code_identifier_type": "git", - "code_identifier_dependable": true, - "code_identifier_url": "INTENTIONALLY REMOVED", - "code_identifier_message": "" - } - ], - "attempts": [ - { - "attempt_number": 0, - "start_time": "2023-02-01 12:12:29.287087", - "end_time": "2023-02-01 12:12:29.302014", - "duration": "0:00:00.014927", - "status": "SUCCESS", - "message": "" - } - ], - "user_defined_metrics": {}, - "branches": {}, - "data_catalog": [ - { - "name": "data/data.txt", - "data_hash": "474c6f64a8bbbb97a7f01fb1207db9b27db04212ab437d4f495e2ac3f4be7388", - "catalog_relative_path": "demo-bash6/data/data.txt", - "catalog_handler_location": ".catalog", - "stage": "put" - } - ] - }, - "success": { - "name": "success", - "internal_name": "success", - "status": "SUCCESS", - "step_type": "success", - "message": "", - "mock": false, - "code_identities": [ - { - "code_identifier": "6ae3f4700fd07d529385148c34ed5c0b9a1c0727", - "code_identifier_type": "git", - "code_identifier_dependable": true, - "code_identifier_url": "INTENTIONALLY REMOVED", - "code_identifier_message": "" - } - ], - "attempts": [ - { - "attempt_number": 0, - "start_time": "2023-02-01 12:12:32.083047", - "end_time": "2023-02-01 12:12:32.084351", - "duration": "0:00:00.001304", - "status": "SUCCESS", - "message": "" - } - ], - "user_defined_metrics": {}, - "branches": {}, - "data_catalog": [] - } - }, - "parameters": { - "x": 4 - }, - "run_config": { - "executor": { - "type": "demo-renderer", - "config": { - "enable_parallel": false, - "placeholders": {} - } - }, - "run_log_store": { - "type": "file-system", - "config": { - "log_folder": ".run_log_store" - } - }, - "catalog": { - "type": "file-system", - "config": { - "compute_data_folder": "data", - "catalog_location": ".catalog" - } - }, - "secrets": { - "type": "env-secrets-manager", - "config": {} - }, - "experiment_tracker": { - "type": "mlflow", - "config": { - "server_url": "http://127.0.0.1:5000/", - "autolog": true - } - }, - "variables": {}, - "pipeline": { - "start_at": "step parameters", - "name": "", - "description": "Getting started", - "max_time": 86400, - "steps": { - "step parameters": { - "mode_config": {}, - "next_node": "step shell", - "command": "lambda x: {'x': int(x) + 1}", - "command_type": "python-lambda", - "command_config": {}, - "catalog": {}, - "retry": 1, - "on_failure": "", - "type": "task" - }, - "step shell": { - "mode_config": {}, - "next_node": "success", - "command": "mkdir data ; env >> data/data.txt", - "command_type": "shell", - "command_config": {}, - "catalog": { - "put": [ - "*" - ] - }, - "retry": 1, - "on_failure": "", - "type": "task" - }, - "success": { - "mode_config": {}, - "type": "success" - }, - "fail": { - "mode_config": {}, - "type": "fail" - } - } - } - } -} -``` - -
- -While the original run was in one single python process, the run via the bash uses a different python process for each -step of the dag. To extrapolate the idea, this is very similar to AWS step function execution or Argo dag execution that -every step of the pipeline executes either a AWS compute or a container. - -Even though the process of execution of the nodes is different, the structure of run log/catalog is exactly identical -to ```local``` execution. This feature should enable you to debug/re-run a failed run in any other environments -in ```local``` environments. diff --git a/docs/getting_started/example.md b/docs/getting_started/example.md deleted file mode 100644 index f8266dce..00000000 --- a/docs/getting_started/example.md +++ /dev/null @@ -1,17 +0,0 @@ -# Example Run - ---8<-- -README.md:exampleRun -README.md:exampleInput -README.md:exampleOutput ---8<-- - -You should see that ```data``` folder being created with a file called ```data.txt``` in it. -This is according to the command in ```step shell```. - -You should also see a folder ```.catalog``` being created with a single folder corresponding to the run_id of this run. - - - - -Let's take a closer look at the input and output in the next sections. diff --git a/docs/getting_started/installation.md b/docs/getting_started/installation.md deleted file mode 100644 index 18b86f1e..00000000 --- a/docs/getting_started/installation.md +++ /dev/null @@ -1,34 +0,0 @@ -# Installation - ---8<-- -README.md:installation ---8<-- -## Optional capabilities - -### Docker - -To run the pipelines/functions/notebooks in a container, you need to install magnus with docker functionality. - -```shell -pip install "magnus[docker]" -``` - -or if you are using poetry - -```shell -poetry add "magnus[docker]" -``` - -### Notebook - -To use notebook functionality, you need to install magnus with notebook functionality. - -```shell -pip install "magnus[notebook]" -``` - -or if you are using poetry - -```shell -poetry add "magnus[notebook]" -``` diff --git a/docs/getting_started/why-magnus.md b/docs/getting_started/why-magnus.md deleted file mode 100644 index 7683425a..00000000 --- a/docs/getting_started/why-magnus.md +++ /dev/null @@ -1,66 +0,0 @@ -# Why Magnus - -Magnus is never set out to replace production grade orchestrators like AWS Step functions or argo. These -orchestrators are proven to be robust and are constantly improved to align to best practices. We agree that, we should -always use these tools for production grade deployments. - -But the same tools, seem to over-engineered and extremely complex for experiments and local development where the actual -data science teams thrive. The farther the operational world is from the developers, the longer it takes to -operationalize projects - lesson learnt from DevOps. Magnus was developed to bring the data science team closer to the -production infrastructure and practices while abstracting a lof of underlying complexity. - - -Magnus treats the *dag* definition as a contract between the data science team and the engineering team. While the dag -could be run on local computers or in cloud by the data science team during the development/experiment phase, the dag -is translated to chosen orchestrators language during deployment by the engineering team. This also enables the data -science team to think along the lines of pipelines and orchestration without infrastructure complexities. - -We also found that, a few implementations in magnus to be more convenient than the counterparts it tries to -emulate. For example: passing variables between steps in AWS Step function is complex and not even possible when -using containers as one of the steps. The same step when wrapped around magnus before step function makes it easier. - - -Here are some of the key points on choosing magnus. - -## Reproducibility of Experiments - -Data science experiments and projects are notorious for being difficult to replicate. In our opinion, a data science -experiment is [code](../../concepts/run-log/#code_identity) + [data](../../concepts/run-log/#data_catalog) + -configuration. -Magnus tracks all three of them in the run logs and makes it easier to -reproduce/highlight differences between experiments. - -If the default tracking provided by Magnus is not suitable, you can easily integrate your application with one of -your liking or extend magnus to fit your needs. - -## Easy re-run - -Along the same lines as reproducibility, a pipeline run with magnus can be re-run on any other environment as long as -the run log/catalog are available. Magnus would skip the steps that were successfully executed in the older -run and start execution from the point of failure. - - -## Extensibility - -A lot of design principles while writing magnus was to promote [extensibility](../../extensions/extensions). -Its easy to write extensions to include -new - -- [compute environments](../../concepts/executor/#extensions) (k8s, on-prem clusters etc) -- [run log store](../../concepts/run-log/#extensions) (databases, file systems etc) -- [data cataloging](../../concepts/catalog/#extensions) (feature stores, object storage etc) -- [secret managers](../../concepts/secrets/#extensions) (vault, azure secrets) -- [experiment tracking](../../concepts/experiment-tracking/#extensions) (mlflow, Weights & Biases) - -## Near Zero code change from local to production - -Magnus was designed to make data science teams closer to operational world. The code and orchestration are ready to -be productionized as soon as you are ready. The only change to enable that would be a -[config](../../concepts/configurations/). - -## Easy switch - -The technological decisions made today for your project may not be correct one in a few months for a lot of varied -reasons. You might want to change your cloud provider or orchestrating tools or secrets manager and it should be easy -to do so. With magnus, you can easily switch without touching your project code/practices. Since the configuration -could also be parameterized, switching might be as simple as changing one file. For more details check here. diff --git a/docs/getting_started/wrap-up.md b/docs/getting_started/wrap-up.md deleted file mode 100644 index f61c2e79..00000000 --- a/docs/getting_started/wrap-up.md +++ /dev/null @@ -1,59 +0,0 @@ -# Wrapping up - -To summarize the journey so-far, - -- We have defined a simple pipeline to show the features of magnus. - - - DAG definition and traversal rules. - - Passing data between nodes. - - Basic task types (task, success, fail) and execution environments (shell, python-lambda). - - Data catalogs. - -- We have executed the pipeline in ```local``` environment to demonstrate - - - The run log structure and its relation to the dag steps. - - The config identities (run_config, dag hash). - - The code identities (code commits). - - The data catalog and data identity (data hash). - -- We also "deployed" the pipeline as a bash script to demonstrate - - - Translation of the dag definition into language that compute environments understands. - - proven the identical structure of run log/catalog independent of the environment. - - proven the only change required to deploy is a config i.e no change in code/dag definition. - -## Design - -The design thought behind magnus has always been to **not** disturb the coding/engineering practices of the data teams -or the infrastructure teams. We found the right abstraction layer to make the communication between these teams to be -the DAG definition i.e - -- The data teams should focus on delivering and proving the correctness of the dag in environments that are friendly -to them. These could be ```local``` or any other environments that are experiment-friendly. - -- The infrastructure teams should focus on deploying the dag definition in production grade environments as per their -team practices or capabilities. - -While both teams are looking at the same dag definition, their interpretation of it is different and should be -decoupled. While the [example shown](../example-deployment/) is trivial, the rationale and the process of translating dag definitions is not very -far away from real world examples. - -## Testing - -We also agree with dagster's observation of ["Data applications are notoriously difficult to test and are therefore -often un- or under-tested."](https://docs.dagster.io/tutorial/intro-tutorial/testable) - -In magnus, ```python``` commands are just regular functions that can be unit tested as the data teams chose to. - -Magnus itself is unit tested with a test coverage closer to 80% and with a lot of scenarios tested where we have noticed -failures in the past. - -## Conclusion - -We hope you got a good introduction to magnus and its features. We did not complicate the pipeline to keep it simple -but there are many features that are interesting and might be of use to you in writing a robust pipeline. - -You can read about them in [concepts](../../concepts/nodes) or see [examples](../../examples/). - -You can even write [extensions](../../extensions/extensions) to magnus to see a feature that we -have not implemented. diff --git a/docs/how-do-i.md b/docs/how-do-i.md deleted file mode 100644 index 7c32042a..00000000 --- a/docs/how-do-i.md +++ /dev/null @@ -1,41 +0,0 @@ -# How do I - -## Pass parameters between steps? - ---8<-- -docs/concepts/nodes.md:how-do-i-pass-simple ---8<-- - -## Pass data files between steps? - -In magnus, data files are passed to downstream steps using the concept of [catalog](../concepts/catalog). The catalog -settings and behavior can be completely controlled by the pipeline definition but can also be controlled via code if -its convenient. - ---8<-- -docs/concepts/catalog.md:how-do-i-pass-data ---8<-- - - -## Pass data objects between steps? - -In magnus, data are passed to downstream steps using the concept of [catalog](../concepts/catalog). While this is -good for files, it is inconvenient to dump and load the object into files for the cataloging to happen. Magnus provides -utility functions to make it easier. - ---8<-- -docs/concepts/catalog.md:how-do-i-pass-objects ---8<-- - -## Define variables? - ---8<-- -docs/concepts/dag.md:how-do-i-parameterize ---8<-- - - -## Track experiments? - ---8<-- -docs/concepts/experiment-tracking.md:how-do-i-track ---8<-- diff --git a/docs/index.md b/docs/index.md index 56a6f361..90b7df8f 100644 --- a/docs/index.md +++ b/docs/index.md @@ -3,15 +3,84 @@ title: Welcome sidebarDepth: 0 --- - -![logo](assets/logo1.png){ width="400" height="300" style="display: block; margin: 0 auto" } +
+ ![Image title](assets/logo1.png){ width="400" height="300"} +
+
--- ---8<-- -README.md:intro ---8<-- +Magnus is a simplified workflow definition language that helps in: + +- **Streamlined Design Process:** Magnus enables users to efficiently plan their pipelines with +[stubbed nodes](concepts/stub), along with offering support for various structures such as +[tasks](../concepts/task), [parallel branches](concepts/parallel), and [loops or map branches](concepts/map) +in both [yaml](concepts/pipeline) or a [python SDK](sdk) for maximum flexibility. + +- **Incremental Development:** Build your pipeline piece by piece with Magnus, which allows for the +implementation of tasks as [python functions](concepts/task/#python_functions), +[notebooks](concepts/task/#notebooks), or [shell scripts](concepts/task/#shell), +adapting to the developer's preferred tools and methods. + +- **Robust Testing:** Ensure your pipeline performs as expected with the ability to test using sampled data. Magnus +also provides the capability to [mock and patch tasks](configurations/executors/mocked) +for thorough evaluation before full-scale deployment. + +- **Seamless Deployment:** Transition from the development stage to production with ease. +Magnus simplifies the process by requiring [only configuration changes](configurations/overview) +to adapt to different environments, including support for [argo workflows](configurations/executors/argo). + +- **Efficient Debugging:** Quickly identify and resolve issues in pipeline execution with Magnus's local +debugging features. Retrieve data from failed tasks and [retry failures](concepts/run-log/#retrying_failures) +using your chosen debugging tools to maintain a smooth development experience. + + +Along with the developer friendly features, magnus also acts as an interface to production grade concepts +such as [data catalog](concepts/catalog), [reproducibility](concepts/run-log), +[experiment tracking](concepts/experiment-tracking) +and secure [access to secrets](concepts/secrets). + +## Motivation + +Successful data science projects require a varied set of skills from data scientists, ML engineers, and infrastructure +teams. Often, the roles and responsibilities of these personas are blurred leading to projects that are difficult to +maintain, test, reproduce or run at scale. + +We build __**Magnus**__ to separate those concerns and create a clear boundary of the personas. + +## Design principles + +- [x] Code should not be mixed with implementation details of underlying platform. + +**Example**: Data and parameters are often shared between different steps of the pipeline. +The platform implementation should not add additional code to make this happen. + + + +- [x] Interactive development/debugging should be a first-class citizen. + + +**Example**: Data science teams thrive in environments with quick debug loop. Able to use their preferred tools +and iterate without constraints of the platform aids development/debugging. + + +- [x] Align the best practices even during development phase. + +**Example**: All projects require secrets to access secure content. The concept of secret should be +available even during development phase and there should be no change in code when it is run in production set up. + + + + +## What does it do? + +Magnus is a thin abstraction layer over the services typically provided by production grade infrastructures. Independent +of the provider, it exposes a consistent interface to those services, **this holds true even for the local environment**. -## Extensions +
+ ![Image title](assets/whatdo.png){ width="1200" height="800"} +
+
-Magnus is extensible by design and [are being actively developed.](https://github.com/AstraZeneca/magnus-extensions) +The scope of magnus is intentionally limited to aid during the model development phase. +It does not boast of end to end development. The heavy lifting is always done by the providers. diff --git a/docs/interactions.md b/docs/interactions.md new file mode 100644 index 00000000..d296adec --- /dev/null +++ b/docs/interactions.md @@ -0,0 +1,71 @@ +# API Reference of Magnus functions + + +::: magnus.get_parameter + options: + show_root_heading: true + +
+ +::: magnus.set_parameter + options: + show_root_heading: true + +
+ +::: magnus.get_from_catalog + options: + show_root_heading: true + +
+ +::: magnus.put_in_catalog + options: + show_root_heading: true + +
+ +::: magnus.get_object + options: + show_root_heading: true + +
+ +::: magnus.put_object + options: + show_root_heading: true + + + +
+ +::: magnus.get_secret + options: + show_root_heading: true + + +
+ +::: magnus.get_run_log + options: + show_root_heading: true + + +
+ +::: magnus.get_run_id + options: + show_root_heading: true + +
+ +::: magnus.track_this + options: + show_root_heading: true + + +
+ +::: magnus.get_experiment_tracker_context + options: + show_root_heading: true diff --git a/docs/roadmap.md b/docs/roadmap.md new file mode 100644 index 00000000..eeaf8a40 --- /dev/null +++ b/docs/roadmap.md @@ -0,0 +1,25 @@ +## AWS environments + +Bring in native AWS services to orchestrate workflows. The stack should be: + +- AWS step functions. +- Sagemaker jobs - Since they can take dynamic image name, AWS batch needs job definition and can be tricky. +- S3 for Run log and Catalog: Already tested and working prototype. +- AWS secrets manager: Access to AWS secrets manager via the RBAC of the execution role. + + +## HPC environment using SLURM executor. + +- Without native orchestration tools, the preferred way is to run it as local but use SLURM to schedule jobs. + +## Database based Run log store. + +## Better integrations with experiment tracking tools. + +Currently, the implementation of experiment tracking tools within magnus is limited. It might be better to +choose a good open source implementation and stick with it. + + +## Model registry service + +Could be interesting to bring in a model registry to catalog models. diff --git a/docs/sdk.md b/docs/sdk.md new file mode 100644 index 00000000..bb345437 --- /dev/null +++ b/docs/sdk.md @@ -0,0 +1,59 @@ +::: magnus.Catalog + options: + show_root_heading: true + show_bases: false + +
+ +::: magnus.Stub + options: + show_root_heading: true + show_bases: false + +
+ +::: magnus.Task + options: + show_root_heading: true + show_bases: false + show_docstring_description: true + +
+ +::: magnus.Parallel + options: + show_root_heading: true + show_bases: false + show_docstring_description: true + +
+ +::: magnus.Map + options: + show_root_heading: true + show_bases: false + show_docstring_description: true + +
+ +::: magnus.Success + options: + show_root_heading: true + show_bases: false + show_docstring_description: true + +
+ +::: magnus.Fail + options: + show_root_heading: true + show_bases: false + show_docstring_description: true + +
+ +::: magnus.Pipeline + options: + show_root_heading: true + show_bases: false + show_docstring_description: true diff --git a/docs/usage.md b/docs/usage.md new file mode 100644 index 00000000..44b553e1 --- /dev/null +++ b/docs/usage.md @@ -0,0 +1,63 @@ + +## Installation + +**magnus** is a python package and should be installed like any other python package. The minimum python version +is ```3.8``` + +```shell +pip install magnus +``` + +We recommend the installation in a virtual environment using ```poetry``` or any other package manager. + +### Extras + +#### Docker + +To run the pipelines/functions/notebooks in a container, install magnus with docker functionality. + +```shell +pip install "magnus[docker]" +``` + +#### Notebook + +To use notebooks as tasks, install magnus with ```notebook``` functionality. + +```shell +pip install "magnus[notebook]" +``` + +#### mlflow + +To enable ```mlflow``` as experiment tracker, install magnus with ```mlflow``` functionality + +```shell +pip install "magnus[mlflow]" +``` + +
+ +## Usage + +Pipelines defined in **magnus** can be either via [python sdk](/sdk) or ```yaml``` based definitions. + +To execute a pipeline, defined in ```yaml```, use the **magnus** cli. +The options are detailed below: + +- ```-f, --file``` (str): The pipeline definition file, defaults to pipeline.yaml +- ```-c, --config-file``` (str): [config file](/configurations/overview) to be used for the run [default: None] +- ```-p, --parameters-file``` (str): [Parameters](/concepts/parameters) accessible by the application [default: None] +- ```--log-level``` : The log level, one of ```INFO | DEBUG | WARNING| ERROR| FATAL``` [default: INFO] +- ```--tag``` (str): A tag attached to the run[default: ] +- ```--run-id``` (str): An optional run_id, one would be generated if not provided +- ```--use-cached``` (str): Provide the previous run_id to re-run. + +
+ +## Examples + +All the examples in the documentation are present in the ```examples``` directory of +[the repo](https://github.com/AstraZeneca/magnus-core) with instructions on how to run them. + +All the examples are tested, with multiple configurations, as part of our CI test suite. diff --git a/docs/why-magnus.md b/docs/why-magnus.md new file mode 100644 index 00000000..90aa747c --- /dev/null +++ b/docs/why-magnus.md @@ -0,0 +1,139 @@ +# Why Magnus + +The scope of **magnus** is intentionally limited as an aid to author workflows for +production grade orchestrators like AWS Step functions or Argo Workflows. It is designed +to complement them, **NOT** replace them. + +### Simplified flow of data + +Production-grade orchestrators excel at managing a series of independent tasks, offering +straightforward implementation for task orchestration. Nonetheless, due to their general-purpose +design, orchestrating the flow of data—whether parameters or artifacts—can introduce complexity and +require careful handling. + +Magnus simplifies this aspect by introducing an [intuitive mechanism for data flow](/example/dataflow), +thereby streamlining data management. This approach allows the orchestrators to focus on their core +competency: allocating the necessary computational resources for task execution. + +### Local first + +In the context of the project's proof-of-concept (PoC) phase, the utilization of production-level + orchestration systems is not optimal due to their complexity and potential constraints on rapid + experimentation. Data scientists require an environment that aligns with their established workflows, + which is most effectively achieved through the use of local development tools. + +Magnus serves as an intermediary stage, simulating the production environment by offering [local +versions](/configurations/overview/) of essential services—such as execution engines, data catalogs, secret management, and +experiment tracking—without necessitating intricate configuration. As the project transitions into the +production phase, these local stand-ins are replaced with their robust, production-grade counterparts. + +### Reduce refactoring + +Transitioning from the proof of concept (PoC) phase to production often necessitates extensive code +refactoring, which presents significant challenges: + +1. Refactoring demands considerable engineering resources to dissect the existing codebase and +reconstruct it in a form that is both maintainable and amenable to testing. + +2. The engineered solutions that result from this process tend to exclude researchers from further +experimentation, thus impeding iterative research and development. + + +Magnus is engineered to minimize the need for such extensive refactoring when operationalizing +projects. It achieves this by allowing tasks to be defined as [simple Python functions](/concepts/task/#python_functions) +or [Jupyter notebooks](/concepts/task/#notebook). This means that the research-centric components of the code +can remain unchanged, avoiding +the need for immediate refactoring and allowing for the postponement of these efforts until they +become necessary for the long-term maintenance of the product. + +### Decouple implementations + +In the rapidly evolving realm of technology, the solutions and tools selected today can +quickly become the technical debt of tomorrow. Magnus addresses this inevitability by +abstracting the implementation details from the underlying concepts. This decoupling +enables a seamless transition to new technical architectures, reducing the process to a +mere modification of configuration settings. Thus, Magnus facilitates adaptability +in the face of changing technological landscapes, ensuring that updates or replacements +of the technical stack can be implemented with minimal disruption. + +### Non intrusive implementation + +A lof of design aspect of magnus is to let the task definitions, python functions or notebooks, +remain agnostic of the orchestration process. Most often, the task orchestration can be +achieved by writing native "driver" functions. This allows the implementation to be completely +within the control of data scientists. + +Most often, it should be possible to remove magnus from the tech stack if necessary. + +
+ +## Alternatives + +[Kedro](https://github.com/kedro-org/kedro) and [metaflow](https://metaflow.org/) are also +based on similar ideas and have established presence in this field. We took a lot of +inspiration from these excellent projects when writing magnus. + +!!! note "Caveat" + + The scope of magnus is limited in comparison to metaflow. The below points are on + the design philosophy rather that implementation specifics. + + The highlighted differences are subjective opinions and should be taken as preferences + rather than criticisms. + + + + +### Infrastructure + +Metaflow stipulates [infrastructure prerequisites](https://docs.metaflow.org/getting-started/infrastructure) that are established and validated across numerous scenarios. + +In contrast, Magnus empowers engineering teams to define infrastructure specifications through a configuration file tailored to the stack they maintain. This versatility enables specialized teams to leverage their domain expertise, thereby enhancing the project's overall efficacy. + +As magnus is mostly responsible for translating workflows to infrastructure patterns, it can +adapt to different environments. + +### Project structure + +Kedro and metaflow come with their own predefined project structures, which might be +appealing to some users while others might find them restrictive. + +Magnus, on the other hand, offers a more flexible approach. It doesn't impose a specific +structure on your project. Whether you're working with Python functions, Jupyter notebooks, +or shell scripts, Magnus allows you to organize your work as you see fit. Even the location +of the data folder can be tailored for each step, avoiding a one-size-fits-all design and +providing the freedom to structure your project in a way that suits your preferences and +requirements. + + +### Notebook support + +Both metaflow and kedro do not support notebooks as tasks. Notebooks are great during the iterative +phase of the project allowing for interactive development. + +Magnus supports notebooks as tasks and has the ability to pass data/parameters between them +to allow orchestrating notebooks. + +### Testing pipelines + +Magnus supports patching and mocking tasks to test the end to end execution of the +pipeline. It is not clear on how to achieve the same in kedro or metaflow. + +### Learning curve + +Magnus allows tasks to stand on their own, separate from the orchestration system. Explaining and +understanding these tasks is made easy through the use of simple "driver" functions. This approach +makes it easier for anyone working on the project to get up to speed and maintain it, as the +orchestration part of Magnus remains distinct and straightforward. + +In contrast, learning to use Kedro and Metaflow can take more time because they have their own +specific ways of structuring projects and code that users need to learn. + +### Language support + +Kedro and metaflow only support python based pipeline definitions. It is possible to +run the non-python tasks as ```subprocesses``` in the pipeline tasks but the definition +is only possible using the python API. + +Magnus supports ```yaml``` based pipeline definitions and has ```shell``` tasks which +can be used for non-python tasks. diff --git a/examples/Dockerfile b/examples/Dockerfile new file mode 100755 index 00000000..9d77f5f1 --- /dev/null +++ b/examples/Dockerfile @@ -0,0 +1,31 @@ +# Python 3.8 Image without Dependecies +FROM python:3.8-slim + +LABEL maintainer="vijay.vammi@astrazeneca.com" + +RUN apt-get update && apt-get install -y --no-install-recommends \ + git \ + ca-certificates \ + && rm -rf /var/lib/apt/lists/* + + +RUN pip config set global.trusted-host \ + "pypi.org files.pythonhosted.org pypi.python.org" \ + --trusted-host=pypi.python.org \ + --trusted-host=pypi.org \ + --trusted-host=files.pythonhosted.org + +RUN pip install --upgrade pip \ + && pip install poetry + +ENV VIRTUAL_ENV=/opt/venv +RUN python -m virtualenv --python=/usr/local/bin/python $VIRTUAL_ENV +ENV PATH="$VIRTUAL_ENV/bin:$PATH" + +COPY . /app +WORKDIR /app + +RUN poetry config repositories.FPHO https://files.pythonhosted.org \ + && poetry config certificates.FPHO.cert false + +RUN poetry install --all-extras --without dev diff --git a/examples/Dockerfile.39 b/examples/Dockerfile.39 new file mode 100755 index 00000000..f116853d --- /dev/null +++ b/examples/Dockerfile.39 @@ -0,0 +1,31 @@ +# Python 3.8 Image without Dependecies +FROM python:3.9-slim + +LABEL maintainer="vijay.vammi@astrazeneca.com" + +RUN apt-get update && apt-get install -y --no-install-recommends \ + git \ + ca-certificates \ + && rm -rf /var/lib/apt/lists/* + + +RUN pip config set global.trusted-host \ + "pypi.org files.pythonhosted.org pypi.python.org" \ + --trusted-host=pypi.python.org \ + --trusted-host=pypi.org \ + --trusted-host=files.pythonhosted.org + +RUN pip install --upgrade pip \ + && pip install poetry + +ENV VIRTUAL_ENV=/opt/venv +RUN python -m virtualenv --python=/usr/local/bin/python $VIRTUAL_ENV +ENV PATH="$VIRTUAL_ENV/bin:$PATH" + +COPY . /app +WORKDIR /app + +RUN poetry config repositories.FPHO https://files.pythonhosted.org \ + && poetry config certificates.FPHO.cert false + +RUN poetry install --all-extras --without dev diff --git a/examples/catalog.py b/examples/catalog.py new file mode 100644 index 00000000..1f502407 --- /dev/null +++ b/examples/catalog.py @@ -0,0 +1,72 @@ +""" +Example pipeline to demonstrate passing data files between tasks. + +You can run this pipeline by: + python run examples/catalog.py +""" + +from magnus import Catalog, Pipeline, Stub, Task + + +def main(): + # Make the data folder if it does not exist + set_up = Task(name="Setup", command="mkdir -p data", command_type="shell") + + # create a catalog instruction to put a file into the catalog + create_catalog = Catalog(put=["data/hello.txt"]) + # This task will create a file in the data folder and attaches the instruction + # to put the file into the catalog. + create = Task( + name="Create Content", + command='echo "Hello from magnus" >> data/hello.txt', + command_type="shell", + catalog=create_catalog, + ) + + # We remove the data folder to ensure that the data folder is cleaned up. + # This is to show that the retrieve step just does not read from existing data + # This step is stubbed to prevent any accidental deletion, make it a Task + first_clean = Stub( + name="Clean up to get again", + command="rm -rf data", + command_type="shell", + ) + + # We create a catalog instruction to retrieve a file from the catalog + # Here we use "compute_folder_name" to point to the directory of interest. + # You can alteratively ignore compute_folder_name and get "data/hello.txt" + # You can use wild card following glob patterns to retrieve multiple files. + get_catalog = Catalog(get=["data/hello.txt"]) + # This task will retrieve the file from the catalog and attach the instruction + # to retrieve the file from the catalog before execution. + retrieve = Task( + name="Retrieve Content", + command="cat data/hello.txt", + command_type="shell", + catalog=get_catalog, + ) + + # We clean up. Note that this step is stubbed to prevent any accidental deletion, + # Make it a Task to actually clean up. + clean_up = Stub( + name="Clean up", + command="rm -rf data", + command_type="shell", + terminate_with_success=True, + ) + + # link all the steps of the pipeline + set_up >> create >> first_clean >> retrieve >> clean_up + + pipeline = Pipeline( + steps=[set_up, create, first_clean, retrieve, clean_up], + start_at=set_up, + add_terminal_nodes=True, + ) + + # override the default configuration to use file-system catalog. + pipeline.execute(configuration_file="examples/configs/fs-catalog.yaml") + + +if __name__ == "__main__": + main() diff --git a/examples/catalog.yaml b/examples/catalog.yaml new file mode 100644 index 00000000..4f34e52d --- /dev/null +++ b/examples/catalog.yaml @@ -0,0 +1,52 @@ +dag: + description: | + This is a simple pipeline that demonstrates passing data between steps. + + 1. Setup: We setup a data folder, we ignore if it is already present + 2. Create Content: We create a "hello.txt" and "put" the file in catalog + 3. Clean up to get again: We remove the data folder. Note that this is stubbed to prevent + accidental deletion of your contents. You can change type to task to make really run. + 4. Retrieve Content: We "get" the file "hello.txt" from the catalog and show the contents + 5. Cleanup: We remove the data folder. Note that this is stubbed to prevent accidental deletion. + + + You can run this pipeline by: + magnus execute -f examples/catalog.yaml -c examples/configs/fs-catalog.yaml + start_at: Setup + steps: + Setup: + type: task + command_type: shell + command: mkdir -p data # (1) + next: Create Content + Create Content: + type: task + command_type: shell + command: | + echo "Hello from magnus" >> data/hello.txt + next: Clean up to get again + catalog: # (2) + put: + - data/hello.txt + Clean up to get again: + type: stub # (3) + command_type: shell + command: rm -rf data + next: Retrieve Content + Retrieve Content: + type: task + command_type: shell + command: cat data/hello.txt # (4) + catalog: + get: + - "data/hello.txt" # You can use wild cards following glob pattern + next: Clean up + Clean up: + type: stub # (6) + command_type: shell + command: rm -rf data + next: success + success: + type: success + fail: + type: fail diff --git a/examples/catalog_api.py b/examples/catalog_api.py new file mode 100644 index 00000000..c670ad98 --- /dev/null +++ b/examples/catalog_api.py @@ -0,0 +1,53 @@ +""" +This example demonstrates how to use the catalog API. +You can use the python API for fine grained control if configurational specification +does not fit your needs. + +You can run this pipeline by: python examples/catalog_api.py +""" + +from pathlib import Path + +from magnus import Pipeline, Task, get_from_catalog, put_in_catalog + + +def create_content(): + dir_path = Path("data") + dir_path.mkdir(parents=True, exist_ok=True) # Make data folder if it doesn't exist + + with open(dir_path / "hello.txt", "w") as f: + f.write("Hello from magnus!!") + + # Put the created file in the catalog + put_in_catalog("data/hello.txt") + + +def retrieve_content(): + # Get the file from the catalog + get_from_catalog("data/hello.txt") + + with open("data/hello.txt", "r") as f: + f.read() + + +def main(): + # This step creates a file and syncs it to the catalog. + create = Task(name="create_content", command="examples.catalog_api.create_content") + # This step retrieves the file from the catalog and prints its content. + retrieve = Task( + name="retrieve_content", + command="examples.catalog_api.retrieve_content", + terminate_with_success=True, + ) + + create >> retrieve + + pipeline = Pipeline(steps=[create, retrieve], start_at=create, add_terminal_nodes=True) + + # Override the default configuration file with the one that has file-system as the catalog. + run_log = pipeline.execute(configuration_file="examples/configs/fs-catalog.yaml") + print(run_log) + + +if __name__ == "__main__": + main() diff --git a/examples/catalog_simple.py b/examples/catalog_simple.py new file mode 100644 index 00000000..b63bdb10 --- /dev/null +++ b/examples/catalog_simple.py @@ -0,0 +1,41 @@ +""" +An example pipeline to demonstrate the use of file-system catalog. + +Run this pipeline by: + python examples/concepts/catalog_simple.py + +""" + +from magnus import Catalog, Pipeline, Task + + +def main(): + # Make the data folder if it does not exist + set_up = Task(name="Setup", command="mkdir -p data", command_type="shell") + + # create a catalog instruction to put a file into the catalog + create_catalog = Catalog(put=["data/hello.txt"]) + # This task will create a file in the data folder and attaches the instruction + # to put the file into the catalog. + create = Task( + name="Create Content", + command='echo "Hello from magnus" >> data/hello.txt', + command_type="shell", + catalog=create_catalog, + terminate_with_success=True, + ) + + set_up >> create + + pipeline = Pipeline( + steps=[set_up, create], + start_at=set_up, + add_terminal_nodes=True, + ) + + # override the default configuration to use file-system catalog. + pipeline.execute(configuration_file="examples/configs/fs-catalog.yaml") + + +if __name__ == "__main__": + main() diff --git a/examples/concepts/catalog.py b/examples/concepts/catalog.py new file mode 100644 index 00000000..db0aefe7 --- /dev/null +++ b/examples/concepts/catalog.py @@ -0,0 +1,89 @@ +""" +A pipeline to demonstrate using the catalog service to create and retrieve content. + +You can run this pipeline by: + python run examples/concepts/catalog.py +""" + +from pathlib import Path + + +def create_content_in_data_folder(): + """ + Create a data directory and write a file "hello.txt" in the data folder. + """ + Path("data").mkdir(parents=True, exist_ok=True) + with open(Path("data") / "hello.txt", "w") as f: + f.write("Hello from data folder!!") + + +def create_content_in_another_folder(): + """ + Create a "another" directory and write a file "world.txt" in it. + """ + Path("another").mkdir(parents=True, exist_ok=True) + with open(Path("another") / "world.txt", "w") as f: + f.write("Hello from another folder!!") + + +def retrieve_content_from_both(): + """ + Display the contents of the files in data and "another" folder + """ + with open(Path("data") / "hello.txt", "r") as f: + print(f.read()) + + with open(Path("another") / "world.txt", "r") as f: + print(f.read()) + + +def main(): + from magnus import Catalog, Pipeline, Task + + # This step creates a file in the data folder and syncs it to the catalog. + data_catalog = Catalog(put=["data/hello.txt"]) + data_create = Task( + name="create_content_in_data_folder", + command="examples.concepts.catalog.create_content_in_data_folder", + catalog=data_catalog, + ) + + # This step creates a file in the another folder and syncs it to the catalog. + another_catalog = Catalog(put=["another/world.txt"]) + another_create = Task( + name="create_content_in_another_folder", + command="examples.concepts.catalog.create_content_in_another_folder", + catalog=another_catalog, + ) + + # Delete the another folder to showcase that the folder will be recreated + # when we run the retrieve task. + delete_another_folder = Task( + name="delete_another_folder", + command="rm -rf another/", + command_type="shell", + ) + + # This step retrieves the file from the catalog and prints its content. + all_catalog = Catalog(get=["**/*"]) + retrieve = Task( + name="retrieve_content_from_both", + command="examples.concepts.catalog.retrieve_content_from_both", + catalog=all_catalog, + terminate_with_success=True, + ) + + data_create >> another_create >> delete_another_folder >> retrieve + + pipeline = Pipeline( + steps=[data_create, another_create, retrieve, delete_another_folder], + start_at=data_create, + add_terminal_nodes=True, + ) + + # Override the default configuration file with the one that has file-system as the catalog. + _ = pipeline.execute(configuration_file="examples/configs/fs-catalog.yaml") + + +if __name__ == "__main__": + main() diff --git a/examples/concepts/catalog.yaml b/examples/concepts/catalog.yaml new file mode 100644 index 00000000..3a2b6a80 --- /dev/null +++ b/examples/concepts/catalog.yaml @@ -0,0 +1,45 @@ +dag: + description: | + An example pipeline to showcase catalog functionality. + + The pipeline consists of four steps: + create_content_in_data_folder: Creates a file in "data" folder and syncs it to catalog + create_content_in_another_folder: Creates another file in "another" folder and syncs it to catalog + delete_another_folder: Deletes the another folder to showcase that it is recreated later. + retrieve_content_from_both: Retrieves the content from both "data" and "another + + You can run this pipeline by: + magnus execute -f examples/concepts/catalog.yaml -c examples/configs/fs-catalog.yaml + + start_at: create_content_in_data_folder + steps: + create_content_in_data_folder: + type: task + command: examples.concepts.catalog.create_content_in_data_folder + catalog: + put: + - "data/hello.txt" + next: create_content_in_another_folder + create_content_in_another_folder: + type: task + command: examples.concepts.catalog.create_content_in_another_folder + catalog: + put: + - "another/world.txt" + next: delete_another_folder + delete_another_folder: + type: task + command_type: shell + command: rm -rf another + next: retrieve_content_from_both + retrieve_content_from_both: + type: task + command: examples.concepts.catalog.retrieve_content_from_both + catalog: + get: + - "**/*" + next: success + success: + type: success + fail: + type: fail diff --git a/examples/concepts/catalog_api.py b/examples/concepts/catalog_api.py new file mode 100644 index 00000000..a64077d6 --- /dev/null +++ b/examples/concepts/catalog_api.py @@ -0,0 +1,97 @@ +""" +A pipeline to demonstrate using the catalog service to create and retrieve content. +Here we use the python API get and put in the catalog. + +You can run this pipeline by: + python run examples/concepts/catalog_api.py +""" + +from pathlib import Path + +from magnus import get_from_catalog, put_in_catalog + + +def create_content_in_data_folder(): + """ + Create a data directory and write a file "hello.txt" in the data folder. + Use the python API put_in_catalog to put the file in the catalog. + """ + Path("data").mkdir(parents=True, exist_ok=True) + with open(Path("data") / "hello.txt", "w") as f: + f.write("Hello from data folder!!") + + put_in_catalog("data/hello.txt") + + +def create_content_in_another_folder(): + """ + Create a "another" directory and write a file "world.txt" in it. + Use the python API put_in_catalog to put the file in the catalog. + """ + Path("another").mkdir(parents=True, exist_ok=True) + with open(Path("another") / "world.txt", "w") as f: + f.write("Hello from another folder!!") + + put_in_catalog("another/world.txt") + + +def retrieve_content_from_both(): + """ + Retrieve the contents of the files from the catalog using the python + API get_from_catalog. + Display the contents of the files in data and "another" folder + """ + + get_from_catalog("**/*") + + with open(Path("data") / "hello.txt", "r") as f: + print(f.read()) + + with open(Path("another") / "world.txt", "r") as f: + print(f.read()) + + +def main(): + from magnus import Pipeline, Task + + # This step creates a file in the data folder and syncs it to the catalog. + data_create = Task( + name="create_content_in_data_folder", + command="examples.concepts.catalog_api.create_content_in_data_folder", + ) + + # This step creates a file in the another folder and syncs it to the catalog. + another_create = Task( + name="create_content_in_another_folder", + command="examples.concepts.catalog_api.create_content_in_another_folder", + ) + + # Delete the another folder to showcase that the folder will be recreated + # when we run the retrieve task. + delete_another_folder = Task( + name="delete_another_folder", + command="rm -rf another/", + command_type="shell", + ) + + # This step retrieves the file from the catalog and prints its content. + retrieve = Task( + name="retrieve_content_from_both", + command="examples.concepts.catalog_api.retrieve_content_from_both", + terminate_with_success=True, + ) + + data_create >> another_create >> delete_another_folder >> retrieve + + pipeline = Pipeline( + steps=[data_create, another_create, retrieve, delete_another_folder], + start_at=data_create, + add_terminal_nodes=True, + ) + + # Override the default configuration file with the one that has file-system as the catalog. + _ = pipeline.execute(configuration_file="examples/configs/fs-catalog.yaml") + + +if __name__ == "__main__": + main() diff --git a/examples/concepts/catalog_object.py b/examples/concepts/catalog_object.py new file mode 100644 index 00000000..60b96001 --- /dev/null +++ b/examples/concepts/catalog_object.py @@ -0,0 +1,75 @@ +""" +A simple example of using catalog service to create and retrieve objects. + +You can run this pipeline by: + python run examples/concepts/catalog_object.py +""" + +from pydantic import BaseModel + +from magnus import get_object, put_object + + +class EggsModel(BaseModel): + ham: str + + +class EverythingModel(BaseModel): + spam: str + eggs: EggsModel + + +def put_data_object(): + """ + Create a pydantic object that we want to pass between steps + Store the object in the catalog for downstream steps. + """ + + data_model = EverythingModel(spam="Hello", eggs=EggsModel(ham="Yes, please!!")) + + put_object(data_model, name="everything_model") + + +def retrieve_object(): + """ + Retrieve the pydantic object from the catalog. + """ + + data_model = get_object("everything_model") + + assert data_model == EverythingModel(spam="Hello", eggs=EggsModel(ham="Yes, please!!")) + + print(data_model) + ">>>spam='Hello' eggs=EggsModel(ham='Yes, please!!')" + + +def main(): + from magnus import Pipeline, Task + + # This step creates an object and stores it in the catalog. + object_put = Task( + name="create_content_in_data_folder", + command="examples.concepts.catalog_object.put_data_object", + ) + + # This step retrieves the object from the catalog and prints its content. + object_get = Task( + name="retrieve_content_from_both", + command="examples.concepts.catalog_object.retrieve_object", + terminate_with_success=True, + ) + + object_put >> object_get + + pipeline = Pipeline( + steps=[object_put, object_get], + start_at=object_put, + add_terminal_nodes=True, + ) + + # Override the default configuration file with the one that has file-system as the catalog. + _ = pipeline.execute(configuration_file="examples/configs/fs-catalog.yaml") + + +if __name__ == "__main__": + main() diff --git a/examples/concepts/experiment_tracking_api.py b/examples/concepts/experiment_tracking_api.py new file mode 100644 index 00000000..9a1cb8d9 --- /dev/null +++ b/examples/concepts/experiment_tracking_api.py @@ -0,0 +1,46 @@ +""" +A simple example of using experiment tracking service to track experiments. + +You can run this pipeline by: + python run examples/concepts/experiment_tracking_api.py +""" + +from pydantic import BaseModel + +from magnus import Pipeline, Task, track_this + + +class EggsModel(BaseModel): + ham: str + + +def emit_metrics(): + """ + A function that populates experiment tracker with metrics. + + track_this can take many keyword arguments. + Nested structures are supported by pydantic models. + """ + track_this(spam="hello", eggs=EggsModel(ham="world")) + track_this(answer=42.0) + track_this(is_it_true=False) + + +def main(): + metrics = Task( + name="Emit Metrics", + command="examples.concepts.experiment_tracking_api.emit_metrics", + terminate_with_success=True, + ) + + pipeline = Pipeline( + steps=[metrics], + start_at=metrics, + add_terminal_nodes=True, + ) + + pipeline.execute() # (1) + + +if __name__ == "__main__": + main() diff --git a/examples/concepts/experiment_tracking_env.py b/examples/concepts/experiment_tracking_env.py new file mode 100644 index 00000000..944b75ca --- /dev/null +++ b/examples/concepts/experiment_tracking_env.py @@ -0,0 +1,47 @@ +import json +import os + +from pydantic import BaseModel + +from magnus import Pipeline, Task + + +class EggsModel(BaseModel): + ham: str + + +def emit_metrics(): + """ + A function that populates environment variables with metrics. + + Any environment variable with prefix "MAGNUS_TRACK_" will be + understood as a metric. + + Numeric metrics can be set as strings but would be stored to + int/float. Boolean metrics are not supported. + """ + os.environ["MAGNUS_TRACK_spam"] = "hello" + os.environ["MAGNUS_TRACK_eggs"] = json.dumps( + EggsModel(ham="world").model_dump(by_alias=True), + ) + os.environ["MAGNUS_TRACK_answer"] = "42.0" # Would be stored as float(42) + + +def main(): + metrics = Task( + name="Emit Metrics", + command="examples.concepts.experiment_tracking_env.emit_metrics", + terminate_with_success=True, + ) + + pipeline = Pipeline( + steps=[metrics], + start_at=metrics, + add_terminal_nodes=True, + ) + + pipeline.execute() # (1) + + +if __name__ == "__main__": + main() diff --git a/examples/concepts/experiment_tracking_env.yaml b/examples/concepts/experiment_tracking_env.yaml new file mode 100644 index 00000000..0837bac1 --- /dev/null +++ b/examples/concepts/experiment_tracking_env.yaml @@ -0,0 +1,23 @@ +dag: + description: | + An example pipeline to demonstrate setting experiment tracking metrics + using environment variables. Any environment variable with prefix + 'MAGNUS_TRACK_' will be recorded as a metric captured during the step. + + You can run this pipeline as: + magnus execute -f examples/concepts/experiment_tracking_env.yaml + + start_at: shell + steps: + shell: + type: task + command_type: shell + command: | + export MAGNUS_TRACK_spam="hello" + export MAGNUS_TRACK_eggs='{"ham": "world"}' + export MAGNUS_TRACK_answer="42.0" + next: success + success: + type: success + fail: + type: fail diff --git a/examples/concepts/experiment_tracking_env_step.yaml b/examples/concepts/experiment_tracking_env_step.yaml new file mode 100644 index 00000000..7ec032d6 --- /dev/null +++ b/examples/concepts/experiment_tracking_env_step.yaml @@ -0,0 +1,25 @@ +dag: + description: | + An example pipeline to demonstrate setting experiment tracking metrics + using environment variables. Any environment variable with prefix + 'MAGNUS_TRACK_' will be recorded as a metric captured during the step. + + You can run this pipeline as: + magnus execute -f examples/concepts/experiment_tracking_env_step.yaml + + start_at: shell + steps: + shell: + type: task + command_type: shell + command: | + export MAGNUS_TRACK_spam="hello" + export MAGNUS_TRACK_eggs='{"ham": "world"}' + export MAGNUS_TRACK_spam_STEP_1="hey" + export MAGNUS_TRACK_eggs_STEP_1='{"ham": "universe"}' + export MAGNUS_TRACK_answer="42.0" + next: success + success: + type: success + fail: + type: fail diff --git a/examples/concepts/experiment_tracking_integration.py b/examples/concepts/experiment_tracking_integration.py new file mode 100644 index 00000000..a5078f8e --- /dev/null +++ b/examples/concepts/experiment_tracking_integration.py @@ -0,0 +1,53 @@ +""" +A simple example of using experiment tracking service to track experiments. +In this example, we integrate with mlflow as our experiment tracking service. + +The mlflow server is expected to be running at: http://127.0.0.1:8080 + +You can run this pipeline by: + python run examples/concepts/experiment_tracking_integration.py +""" + +from pydantic import BaseModel + +from magnus import Pipeline, Task, track_this + + +class EggsModel(BaseModel): + ham: str + + +def emit_metrics(): + """ + A function that populates experiment tracker with metrics. + + track_this can take many keyword arguments. + Nested structures are supported by pydantic models. + """ + track_this(spam="hello", eggs=EggsModel(ham="world")) + track_this(is_it_true=False) + + track_this(answer=0.0) + track_this(step=1, answer=20.0) + track_this(step=2, answer=40.0) + track_this(step=3, answer=60.0) + + +def main(): + metrics = Task( + name="Emit Metrics", + command="examples.concepts.experiment_tracking_integration.emit_metrics", + terminate_with_success=True, + ) + + pipeline = Pipeline( + steps=[metrics], + start_at=metrics, + add_terminal_nodes=True, + ) + + pipeline.execute(configuration_file="examples/configs/mlflow-config.yaml", tag="demo-magnus") + + +if __name__ == "__main__": + main() diff --git a/examples/concepts/experiment_tracking_step.py b/examples/concepts/experiment_tracking_step.py new file mode 100644 index 00000000..f693c6fb --- /dev/null +++ b/examples/concepts/experiment_tracking_step.py @@ -0,0 +1,48 @@ +""" +A simple example of using experiment tracking service to track experiments. +In this example, we are emitting metrics per step. + +You can run this pipeline by: + python run examples/concepts/experiment_tracking_step.py +""" + +from pydantic import BaseModel + +from magnus import Pipeline, Task, track_this + + +class EggsModel(BaseModel): + ham: str + + +def emit_metrics(): + """ + A function that populates experiment tracker with metrics. + + track_this can take many keyword arguments. + Nested structures are supported by pydantic models. + """ + track_this(step=0, spam="hello", eggs=EggsModel(ham="world")) + track_this(step=1, spam="hey", eggs=EggsModel(ham="universe")) + track_this(answer=42.0) + track_this(is_it_true=False) + + +def main(): + metrics = Task( + name="Emit Metrics", + command="examples.concepts.experiment_tracking_step.emit_metrics", + terminate_with_success=True, + ) + + pipeline = Pipeline( + steps=[metrics], + start_at=metrics, + add_terminal_nodes=True, + ) + + pipeline.execute() # (1) + + +if __name__ == "__main__": + main() diff --git a/examples/concepts/map.py b/examples/concepts/map.py new file mode 100644 index 00000000..769ff2fb --- /dev/null +++ b/examples/concepts/map.py @@ -0,0 +1,84 @@ +""" +An example pipeline of using "map" to process a sequence of nodes repeatedly over a iterable +parameter. + +The start_index argument for the function process_chunk is dynamically set by iterating over chunks. + +If the argument start_index is not provided, you can still access the current value by +MAGNUS_MAP_VARIABLE environment variable. The environment variable MAGNUS_MAP_VARIABLE +is a dictionary with keys as iterate_as. + +Run this pipeline by: + python examples/concepts/map.py +""" + +from typing import List + +from pydantic import create_model + + +def chunk_files(): + """ + Identify the number of chunks and files to execute per batch. + + Set the parameter "chunks" to be the start indexes of batch. + Set the parameter "stride" to be the number of files to + execute per batch. + """ + return create_model( + "DynamicModel", + chunks=(List[int], list(range(0, 50, 10))), + stride=(int, 10), + )() + + +def process_chunk(stride: int, start_index: int): + """ + The function processes a chunk of files. + The files between the start_index and the start_index + stride + are processed per chunk. + """ + print("stride", stride, type(stride)) + print("start_index", start_index, type(start_index)) + for i in range(start_index, start_index + stride, stride): + pass + + +def main(): + """ + The pythonic equivalent of the following pipeline. + + chunks = chunk_files() + + for start_index in chunks.chunks: + process_chunk(chunks.stride, start_index) + + """ + from magnus import Map, Pipeline, Task + + execute = Task( + name="execute", + command="examples.concepts.map.process_chunk", + terminate_with_success=True, + ) + + execute_branch = Pipeline(steps=[execute], start_at=execute, add_terminal_nodes=True) + + generate = Task(name="chunk files", command="examples.concepts.map.chunk_files") + iterate_and_execute = Map( + name="iterate and execute", + branch=execute_branch, + iterate_on="chunks", # iterate on chunks parameter set by execute step + iterate_as="start_index", # expose the current start_index as the iterate_as parameter + terminate_with_success=True, + ) + + generate >> iterate_and_execute + + pipeline = Pipeline(steps=[generate, iterate_and_execute], start_at=generate, add_terminal_nodes=True) + + _ = pipeline.execute(configuration_file="examples/configs/fs-catalog-run_log.yaml") + + +if __name__ == "__main__": + main() diff --git a/examples/concepts/map.yaml b/examples/concepts/map.yaml new file mode 100644 index 00000000..b997e443 --- /dev/null +++ b/examples/concepts/map.yaml @@ -0,0 +1,43 @@ +dag: + description: | + This pipeline demonstrates the usage of map state to dynamically + execute workflows in parallel. + + The step "chunk files" identifies the total number of batches to + execute in parallel and sets the parameters + - start_index of every batch to process, chunks + - number of files to process per batch, stride. + + The step "iterate and execute" iterates on "chunks" and the + parameter name per chunk is set to be "start_index". + + Run this example by: + magnus execute -f examples/concepts/map.yaml + start_at: chunk files + steps: + chunk files: + type: task + command_type: python + command: "examples.concepts.map.chunk_files" + next: iterate and execute + iterate and execute: + type: map + iterate_on: chunks + iterate_as: start_index + next: success + branch: + start_at: execute + steps: + execute: + type: task + command_type: python + command: "examples.concepts.map.process_chunk" + next: success + success: + type: success + fail: + type: fail + success: + type: success + fail: + type: fail diff --git a/examples/concepts/map_shell.yaml b/examples/concepts/map_shell.yaml new file mode 100644 index 00000000..64fe8f6e --- /dev/null +++ b/examples/concepts/map_shell.yaml @@ -0,0 +1,52 @@ +dag: + description: | + This pipeline demonstrates the usage of map state to dynamically + execute workflows in parallel. + + The step "chunk files" identifies the total number of batches to + execute in parallel and sets the parameters + - start_index of every batch to process, chunks + - number of files to process per batch, stride. + + The step "iterate and execute" iterates on "chunks" and the + parameter name per chunk is set to be "start_index". + + The shell script can access the start_index as + MAGNUS_MAP_VARIABLE environment variable. + + Run this pipeline by: + magnus execute -f examples/concepts/map_shell.yaml + + start_at: chunk files + steps: + chunk files: + type: task + command_type: shell + command: | + export MAGNUS_PRM_stride=10 && + export MAGNUS_PRM_chunks="[0, 10, 20, 30, 40]" + next: iterate and execute + iterate and execute: + type: map + iterate_on: chunks + iterate_as: start_index + next: success + branch: + start_at: execute + steps: + execute: + type: task + command_type: shell + command: | + echo $MAGNUS_PRM_stride + echo $MAGNUS_MAP_VARIABLE + # prints 10 and {"start_index": } + next: success + success: + type: success + fail: + type: fail + success: + type: success + fail: + type: fail diff --git a/examples/concepts/nesting.py b/examples/concepts/nesting.py new file mode 100644 index 00000000..8e2b0fea --- /dev/null +++ b/examples/concepts/nesting.py @@ -0,0 +1,73 @@ +""" +An example to demonstrate nesting workflows within workflows. + + +Run this pipeline by: + python examples/concepts/nesting.py + +""" + +from typing import List + +from pydantic import create_model + +from magnus import Map, Parallel, Pipeline, Stub, Task + + +def generate_list(): + return create_model( + "DynamicModel", + array=(List[int], list(range(2))), + )() + + +def main(): + stub = Stub(name="executable", terminate_with_success=True) + # A dummy pipeline that does nothing interesting + stubbed_pipeline = Pipeline(steps=[stub], start_at=stub, add_terminal_nodes=True) + + # A map step that executes the stubbed pipeline dynamically + # This step represents 2 parallel workflows when executed. + inner_most_map = Map( + name="inner most", + branch=stubbed_pipeline, + iterate_on="array", # Parameter defined in line #20 + iterate_as="y", + terminate_with_success=True, + ) + + # A pipeline with map state. + map_pipeline = Pipeline(steps=[inner_most_map], start_at=inner_most_map, add_terminal_nodes=True) + + # A parallel step that executes a map_pipeline and stubbed pipeline + # By nesting a map within the parallel step, the total number of workflows is 4 (2 X 2 = 4) + nested_parallel = Parallel( + name="nested parallel", + branches={"a": map_pipeline, "b": map_pipeline}, + terminate_with_success=True, + ) + + # A pipeline with one nested parallel step + nested_parallel_pipeline = Pipeline(steps=[nested_parallel], start_at=nested_parallel, add_terminal_nodes=True) + + list_generator = Task(name="generate list", command="examples.concepts.nesting.generate_list") + + # A map step that iterates over array and executes nested_parallel_pipeline + # The total number of workflows is 50 by this time (2 X 2 X 2 = 8) + outer_most_map = Map( + name="outer most", + branch=nested_parallel_pipeline, + iterate_on="array", + iterate_as="x", + terminate_with_success=True, + ) + + list_generator >> outer_most_map + + root_pipeline = Pipeline(steps=[list_generator, outer_most_map], start_at=list_generator, add_terminal_nodes=True) + + _ = root_pipeline.execute(configuration_file="examples/configs/fs-catalog-run_log.yaml") + + +if __name__ == "__main__": + main() diff --git a/examples/concepts/nesting.yaml b/examples/concepts/nesting.yaml new file mode 100644 index 00000000..b8ced8d0 --- /dev/null +++ b/examples/concepts/nesting.yaml @@ -0,0 +1,78 @@ +dag: + description: | + An example of nesting pipelines within pipelines. + + Run this pipeline by: + magnus execute -f example/concepts/nesting.yaml + + start_at: generate_list + steps: + generate_list: + type: task + command_type: shell + command: export MAGNUS_PRM_array="[0, 1]" + next: outer most map + outer most map: + type: map + iterate_on: array + iterate_as: xarg + next: success + branch: + start_at: nested parallel + steps: + nested parallel: + type: parallel + next: success + branches: + a: + start_at: inner most map + steps: + inner most map: + type: map + iterate_on: array + iterate_as: yarg + next: success + branch: + start_at: executable + steps: + executable: + type: stub + next: success + success: + type: success + fail: + type: fail + success: + type: success + fail: + type: fail + b: + start_at: inner most map + steps: + inner most map: + type: map + iterate_on: array + iterate_as: yarg + next: success + branch: + start_at: executable + steps: + executable: + type: stub + next: success + success: + type: success + fail: + type: fail + success: + type: success + fail: + type: fail + success: + type: success + fail: + type: fail + success: + type: success + fail: + type: fail diff --git a/examples/concepts/notebook_api_parameters.ipynb b/examples/concepts/notebook_api_parameters.ipynb new file mode 100644 index 00000000..1ee97a19 --- /dev/null +++ b/examples/concepts/notebook_api_parameters.ipynb @@ -0,0 +1,90 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "4377a9c8", + "metadata": {}, + "outputs": [], + "source": [ + "from pydantic import BaseModel" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "3e98e89e-765c-42d4-81ea-c371c2eab14d", + "metadata": {}, + "outputs": [], + "source": [ + "from magnus import get_parameter, set_parameter\n", + "\n", + "class EggsModel(BaseModel):\n", + " ham: str\n", + "\n", + "\n", + "class EverythingModel(BaseModel):\n", + " spam: str\n", + " eggs: EggsModel" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0e04f11a", + "metadata": {}, + "outputs": [], + "source": [ + "eggs = get_parameter(\"eggs\", cast_as=EggsModel)\n", + "spam = get_parameter(\"spam\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9f1cbac6-cada-42b0-8fb1-ddb25a88836c", + "metadata": {}, + "outputs": [], + "source": [ + "def modify_from_native(spam: str, eggs: EggsModel):\n", + " \"\"\"\n", + " Access initial parameters by the keys.\n", + " Type annotation helps in casting to the right model type.\n", + " \"\"\"\n", + " print(spam) # as returned by native parameters notebook\n", + " \">>> World\"\n", + " print(eggs) # as returned by native parameters notebook\n", + " \">>> ham='No, Thank you!!'\"\n", + "\n", + " \n", + " spam =\"Universe?\"\n", + " eggs = EggsModel(ham=\"Maybe, one more!!\")\n", + " set_parameter(spam=spam, eggs=eggs)\n", + "\n", + "\n", + "modified = modify_from_native(spam=spam, eggs=eggs)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/concepts/notebook_api_parameters.yaml b/examples/concepts/notebook_api_parameters.yaml new file mode 100644 index 00000000..04836c0d --- /dev/null +++ b/examples/concepts/notebook_api_parameters.yaml @@ -0,0 +1,26 @@ +dag: + description: | + This is a sample pipeline with two steps that executes a notebook. + + The notebook is executed in the same environment as the current + project, you can import any module that was installed for the project. + + You can run this pipeline as: + magnus execute -f examples/concepts/notebook_api_parameters.yaml -p examples/concepts/parameters.yaml + + start_at: native notebook + steps: + native notebook: + type: task + command_type: notebook + command: examples/concepts/notebook_native_parameters.ipynb + next: api notebook + api notebook: + type: task + command_type: notebook + command: examples/concepts/notebook_api_parameters.ipynb + next: success + success: + type: success + fail: + type: fail diff --git a/examples/concepts/notebook_api_parameters_out.ipynb b/examples/concepts/notebook_api_parameters_out.ipynb new file mode 100644 index 00000000..3d4446c5 --- /dev/null +++ b/examples/concepts/notebook_api_parameters_out.ipynb @@ -0,0 +1,139 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "1b6e62ed", + "metadata": { + "ploomber": { + "timestamp_end": 1707918790.255937, + "timestamp_start": 1707918790.255617 + }, + "tags": [ + "injected-parameters" + ] + }, + "outputs": [], + "source": [ + "# Injected parameters\n", + "spam = \"World\"\n", + "eggs = {\"ham\": \"No, Thank you!!\"}\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "4377a9c8", + "metadata": { + "ploomber": { + "timestamp_end": 1707918790.256169, + "timestamp_start": 1707918790.25596 + } + }, + "outputs": [], + "source": [ + "from pydantic import BaseModel" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "3e98e89e-765c-42d4-81ea-c371c2eab14d", + "metadata": { + "ploomber": { + "timestamp_end": 1707918790.257516, + "timestamp_start": 1707918790.256187 + } + }, + "outputs": [], + "source": [ + "from magnus import get_parameter, set_parameter\n", + "\n", + "class EggsModel(BaseModel):\n", + " ham: str\n", + "\n", + "\n", + "class EverythingModel(BaseModel):\n", + " spam: str\n", + " eggs: EggsModel" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "0e04f11a", + "metadata": { + "ploomber": { + "timestamp_end": 1707918790.257781, + "timestamp_start": 1707918790.257534 + } + }, + "outputs": [], + "source": [ + "eggs = get_parameter(\"eggs\", cast_as=EggsModel)\n", + "spam = get_parameter(\"spam\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "9f1cbac6-cada-42b0-8fb1-ddb25a88836c", + "metadata": { + "ploomber": { + "timestamp_end": 1707918790.25824, + "timestamp_start": 1707918790.257795 + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "World\n", + "ham='No, Thank you!!'\n" + ] + } + ], + "source": [ + "def modify_from_native(spam: str, eggs: EggsModel):\n", + " \"\"\"\n", + " Access initial parameters by the keys.\n", + " Type annotation helps in casting to the right model type.\n", + " \"\"\"\n", + " print(spam) # as returned by native parameters notebook\n", + " \">>> World\"\n", + " print(eggs) # as returned by native parameters notebook\n", + " \">>> ham='No, Thank you!!'\"\n", + "\n", + " \n", + " spam =\"Universe?\"\n", + " eggs = EggsModel(ham=\"Maybe, one more!!\")\n", + " set_parameter(spam=spam, eggs=eggs)\n", + "\n", + "\n", + "modified = modify_from_native(spam=spam, eggs=eggs)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/concepts/notebook_env_parameters.ipynb b/examples/concepts/notebook_env_parameters.ipynb new file mode 100644 index 00000000..cd230c69 --- /dev/null +++ b/examples/concepts/notebook_env_parameters.ipynb @@ -0,0 +1,86 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "4377a9c8", + "metadata": {}, + "outputs": [], + "source": [ + "from pydantic import BaseModel\n", + "import os\n", + "import json" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "3e98e89e-765c-42d4-81ea-c371c2eab14d", + "metadata": {}, + "outputs": [], + "source": [ + "class EggsModel(BaseModel):\n", + " ham: str\n", + "\n", + "\n", + "class EverythingModel(BaseModel):\n", + " spam: str\n", + " eggs: EggsModel" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0e04f11a", + "metadata": {}, + "outputs": [], + "source": [ + "spam = os.environ[\"MAGNUS_PRM_spam\"]\n", + "eggs = EggsModel.model_validate_json(os.environ[\"MAGNUS_PRM_eggs\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9f1cbac6-cada-42b0-8fb1-ddb25a88836c", + "metadata": {}, + "outputs": [], + "source": [ + "def modify_from_native(spam: str, eggs: EggsModel):\n", + " print(spam) # as returned by native parameters notebook\n", + " \">>> World\"\n", + " print(eggs) # as returned by native parameters notebook\n", + " \">>> ham='No, Thank you!!'\"\n", + "\n", + " \n", + " os.environ[\"MAGNUS_PRM_spam\"] = \"Universe\"\n", + " eggs = EggsModel(ham=\"Maybe, one more!!\")\n", + " os.environ[\"MAGNUS_PRM_eggs\"] = json.dumps(eggs.model_dump(by_alias=True))\n", + "\n", + "\n", + "modified = modify_from_native(spam=spam, eggs=eggs)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/concepts/notebook_env_parameters.yaml b/examples/concepts/notebook_env_parameters.yaml new file mode 100644 index 00000000..0df1d105 --- /dev/null +++ b/examples/concepts/notebook_env_parameters.yaml @@ -0,0 +1,26 @@ +dag: + description: | + This is a sample pipeline with two steps that executes a notebook. + + The notebook is executed in the same environment as the current + project, you can import any module that was installed for the project. + + You can run this pipeline as: + magnus execute -f examples/concepts/notebook_env_parameters.yaml -p examples/concepts/parameters.yaml + + start_at: native notebook + steps: + native notebook: + type: task + command_type: notebook + command: examples/concepts/notebook_native_parameters.ipynb + next: api notebook + api notebook: + type: task + command_type: notebook + command: examples/concepts/notebook_env_parameters.ipynb + next: success + success: + type: success + fail: + type: fail diff --git a/examples/concepts/notebook_env_parameters_out.ipynb b/examples/concepts/notebook_env_parameters_out.ipynb new file mode 100644 index 00000000..52d75e9d --- /dev/null +++ b/examples/concepts/notebook_env_parameters_out.ipynb @@ -0,0 +1,135 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "91076f2e", + "metadata": { + "ploomber": { + "timestamp_end": 1707918790.591766, + "timestamp_start": 1707918790.591444 + }, + "tags": [ + "injected-parameters" + ] + }, + "outputs": [], + "source": [ + "# Injected parameters\n", + "spam = \"World\"\n", + "eggs = {\"ham\": \"No, Thank you!!\"}\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "4377a9c8", + "metadata": { + "ploomber": { + "timestamp_end": 1707918790.591986, + "timestamp_start": 1707918790.591793 + } + }, + "outputs": [], + "source": [ + "from pydantic import BaseModel\n", + "import os\n", + "import json" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "3e98e89e-765c-42d4-81ea-c371c2eab14d", + "metadata": { + "ploomber": { + "timestamp_end": 1707918790.593172, + "timestamp_start": 1707918790.592001 + } + }, + "outputs": [], + "source": [ + "class EggsModel(BaseModel):\n", + " ham: str\n", + "\n", + "\n", + "class EverythingModel(BaseModel):\n", + " spam: str\n", + " eggs: EggsModel" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "0e04f11a", + "metadata": { + "ploomber": { + "timestamp_end": 1707918790.593867, + "timestamp_start": 1707918790.593187 + } + }, + "outputs": [], + "source": [ + "spam = os.environ[\"MAGNUS_PRM_spam\"]\n", + "eggs = EggsModel.model_validate_json(os.environ[\"MAGNUS_PRM_eggs\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "9f1cbac6-cada-42b0-8fb1-ddb25a88836c", + "metadata": { + "ploomber": { + "timestamp_end": 1707918790.594819, + "timestamp_start": 1707918790.593886 + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\"World\"\n", + "ham='No, Thank you!!'\n" + ] + } + ], + "source": [ + "def modify_from_native(spam: str, eggs: EggsModel):\n", + " print(spam) # as returned by native parameters notebook\n", + " \">>> World\"\n", + " print(eggs) # as returned by native parameters notebook\n", + " \">>> ham='No, Thank you!!'\"\n", + "\n", + " \n", + " os.environ[\"MAGNUS_PRM_spam\"] = \"Universe\"\n", + " eggs = EggsModel(ham=\"Maybe, one more!!\")\n", + " os.environ[\"MAGNUS_PRM_eggs\"] = json.dumps(eggs.model_dump(by_alias=True))\n", + "\n", + "\n", + "modified = modify_from_native(spam=spam, eggs=eggs)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/concepts/notebook_native_parameters.ipynb b/examples/concepts/notebook_native_parameters.ipynb new file mode 100644 index 00000000..4d9f7833 --- /dev/null +++ b/examples/concepts/notebook_native_parameters.ipynb @@ -0,0 +1,111 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "3e98e89e-765c-42d4-81ea-c371c2eab14d", + "metadata": {}, + "outputs": [], + "source": [ + "from pydantic import BaseModel\n", + "\n", + "class EggsModel(BaseModel):\n", + " ham: str\n", + "\n", + "\n", + "class EverythingModel(BaseModel):\n", + " spam: str\n", + " eggs: EggsModel" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e7f0aab2", + "metadata": { + "tags": [ + "parameters" + ] + }, + "outputs": [], + "source": [ + "# Note the tag parameters\n", + "spam = \"Change me\" \n", + "eggs =\"Change me\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0e04f11a", + "metadata": {}, + "outputs": [], + "source": [ + "eggs = EggsModel.model_validate(eggs) # Cast the dict to EggsModel object" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9f1cbac6-cada-42b0-8fb1-ddb25a88836c", + "metadata": {}, + "outputs": [], + "source": [ + "def modify_initial(spam: str, eggs: EggsModel):\n", + " \"\"\"\n", + " Access initial parameters by the keys.\n", + " Type annotation helps in casting to the right model type.\n", + " \"\"\"\n", + " print(spam)\n", + " \">>> Hello\"\n", + " print(eggs)\n", + " \">>> ham='Yes, please!!'\"\n", + "\n", + " \n", + " spam =\"World\"\n", + " eggs = EggsModel(ham=\"No, Thank you!!\")\n", + " return EverythingModel(spam=spam, eggs=eggs)\n", + "\n", + "\n", + "modified = modify_initial(spam=spam, eggs=eggs)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a58a4492", + "metadata": { + "tags": [ + "magnus_output" + ] + }, + "outputs": [], + "source": [ + "\n", + "# Note the tag magnus_output\n", + "print(modified.model_dump(by_alias=True))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/concepts/notebook_native_parameters.yaml b/examples/concepts/notebook_native_parameters.yaml new file mode 100644 index 00000000..e4d08b72 --- /dev/null +++ b/examples/concepts/notebook_native_parameters.yaml @@ -0,0 +1,25 @@ +dag: + description: | + This is a sample pipeline with one step that executes a notebook. + + The step name "notebook" has the "command_type" to be notebook to + let magnus know to execute a notebook while the command is the + path to the notebook relative to the project root. + + The notebook is executed in the same environment as the current + project, you can import any module that was installed for the project. + + You can run this pipeline as: + magnus execute -f examples/concepts/notebook_native_parameters.yaml -p examples/concepts/parameters.yaml + + start_at: notebook + steps: + notebook: + type: task + command_type: notebook + command: examples/concepts/notebook_native_parameters.ipynb + next: success + success: + type: success + fail: + type: fail diff --git a/examples/concepts/notebook_native_parameters_out.ipynb b/examples/concepts/notebook_native_parameters_out.ipynb new file mode 100644 index 00000000..b6382d45 --- /dev/null +++ b/examples/concepts/notebook_native_parameters_out.ipynb @@ -0,0 +1,171 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "3e98e89e-765c-42d4-81ea-c371c2eab14d", + "metadata": { + "ploomber": { + "timestamp_end": 1707918790.857198, + "timestamp_start": 1707918790.85559 + } + }, + "outputs": [], + "source": [ + "from pydantic import BaseModel\n", + "\n", + "class EggsModel(BaseModel):\n", + " ham: str\n", + "\n", + "\n", + "class EverythingModel(BaseModel):\n", + " spam: str\n", + " eggs: EggsModel" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "e7f0aab2", + "metadata": { + "ploomber": { + "timestamp_end": 1707918790.857372, + "timestamp_start": 1707918790.857225 + }, + "tags": [ + "parameters" + ] + }, + "outputs": [], + "source": [ + "# Note the tag parameters\n", + "spam = \"Change me\" \n", + "eggs =\"Change me\"" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "214344ac", + "metadata": { + "ploomber": { + "timestamp_end": 1707918790.857516, + "timestamp_start": 1707918790.857386 + }, + "tags": [ + "injected-parameters" + ] + }, + "outputs": [], + "source": [ + "# Injected parameters\n", + "spam = \"Hello\"\n", + "eggs = {\"ham\": \"Yes, please!!\"}\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "0e04f11a", + "metadata": { + "ploomber": { + "timestamp_end": 1707918790.857676, + "timestamp_start": 1707918790.857529 + } + }, + "outputs": [], + "source": [ + "eggs = EggsModel.model_validate(eggs) # Cast the dict to EggsModel object" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "9f1cbac6-cada-42b0-8fb1-ddb25a88836c", + "metadata": { + "ploomber": { + "timestamp_end": 1707918790.858087, + "timestamp_start": 1707918790.857688 + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Hello\n", + "ham='Yes, please!!'\n" + ] + } + ], + "source": [ + "def modify_initial(spam: str, eggs: EggsModel):\n", + " \"\"\"\n", + " Access initial parameters by the keys.\n", + " Type annotation helps in casting to the right model type.\n", + " \"\"\"\n", + " print(spam)\n", + " \">>> Hello\"\n", + " print(eggs)\n", + " \">>> ham='Yes, please!!'\"\n", + "\n", + " \n", + " spam =\"World\"\n", + " eggs = EggsModel(ham=\"No, Thank you!!\")\n", + " return EverythingModel(spam=spam, eggs=eggs)\n", + "\n", + "\n", + "modified = modify_initial(spam=spam, eggs=eggs)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "a58a4492", + "metadata": { + "ploomber": { + "timestamp_end": 1707918790.858388, + "timestamp_start": 1707918790.858251 + }, + "tags": [ + "magnus_output" + ] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'spam': 'World', 'eggs': {'ham': 'No, Thank you!!'}}\n" + ] + } + ], + "source": [ + "\n", + "# Note the tag magnus_output\n", + "print(modified.model_dump(by_alias=True))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/concepts/parallel.py b/examples/concepts/parallel.py new file mode 100644 index 00000000..0443bdc8 --- /dev/null +++ b/examples/concepts/parallel.py @@ -0,0 +1,64 @@ +""" +This is a stubbed pipeline that demonstrates parallel + pipeline execution. + Note that the branches schema is same as dag schema. + + All the steps are mocked and they will just pass through. + Use this pattern to define the skeleton of your pipeline and + flesh out the steps later. + + You can run this pipeline by: + python examples/concepts/parallel.py +""" + +from magnus import Parallel, Pipeline, Stub + + +def main(): + # The steps in XGBoost training pipeline + prepare_xgboost = Stub(name="Prepare for XGBoost") + train_xgboost = Stub(name="Train XGBoost", terminate_with_success=True) + + prepare_xgboost >> train_xgboost + + # The pipeline for XGBoost training + xgboost = Pipeline( + name="XGBoost", + steps=[prepare_xgboost, train_xgboost], + start_at=prepare_xgboost, + add_terminal_nodes=True, + ) + + # The steps and pipeline in Random Forest training + train_rf = Stub(name="Train RF", terminate_with_success=True) + rfmodel = Pipeline( + steps=[train_rf], + start_at=train_rf, + add_terminal_nodes=True, + ) + + # The steps in parent pipeline + get_features = Stub(name="Get Features") + # The parallel step definition. + # Branches are just pipelines themselves + train_models = Parallel( + name="Train Models", + branches={"XGBoost": xgboost, "RF Model": rfmodel}, + ) + ensemble_model = Stub(name="Ensemble Modelling") + run_inference = Stub(name="Run Inference", terminate_with_success=True) + + get_features >> train_models >> ensemble_model >> run_inference + + # The parent pipeline + pipeline = Pipeline( + steps=[get_features, train_models, ensemble_model, run_inference], + start_at=get_features, + add_terminal_nodes=True, + ) + + _ = pipeline.execute(configuration_file="examples/configs/fs-catalog-run_log.yaml") + + +if __name__ == "__main__": + main() diff --git a/examples/concepts/parallel.yaml b/examples/concepts/parallel.yaml new file mode 100644 index 00000000..dd623966 --- /dev/null +++ b/examples/concepts/parallel.yaml @@ -0,0 +1,54 @@ +dag: + description: | + This is a stubbed pipeline that demonstrates parallel + pipeline execution. + Note that the branches schema is same as dag schema. + + All the steps are mocked and they will just pass through. + Use this pattern to define the skeleton of your pipeline and + flesh out the steps later. + + You can run this pipeline by: + magnus execute -f examples/concepts/parallel.yaml + start_at: Get Features + steps: + Get Features: + type: stub + next: Train Models + Train Models: + type: parallel + next: Ensemble Modelling + branches: + XGBoost: + start_at: Prepare for XGBoost + steps: + Prepare for XGBoost: + type: stub + next: Train XGBoost + Train XGBoost: + type: stub + next: XGBoost success + XGBoost success: + type: success + fail: + type: fail + RF model: + start_at: Train RF Model + steps: + Train RF Model: + type: stub + next: RF Model Success + RF Model Success: + type: success + fail: + type: fail + Ensemble Modelling: + type: stub + next: Run Inference + Run Inference: + type: stub + next: success + success: + type: success + fail: + type: fail diff --git a/examples/concepts/parameters.yaml b/examples/concepts/parameters.yaml new file mode 100644 index 00000000..4bc1a8ac --- /dev/null +++ b/examples/concepts/parameters.yaml @@ -0,0 +1,3 @@ +spam: "Hello" +eggs: + ham: "Yes, please!!" diff --git a/examples/concepts/simple.py b/examples/concepts/simple.py new file mode 100644 index 00000000..b3932846 --- /dev/null +++ b/examples/concepts/simple.py @@ -0,0 +1,35 @@ +""" +A simple pipeline with a simple function that just prints "Hello World!". + +Run this pipeline by: + python examples/concepts/simple.py +""" + +from magnus import Pipeline, Task + + +def simple_function(): + """ + A simple function that just prints "Hello World!". + """ + print("Hello World!") + + +def main(): + simple_task = Task( + name="simple", + command="examples.concepts.simple.simple_function", + terminate_with_success=True, + ) + + pipeline = Pipeline( + steps=[simple_task], + start_at=simple_task, + add_terminal_nodes=True, + ) + + pipeline.execute() # (1) + + +if __name__ == "__main__": + main() diff --git a/examples/concepts/simple.yaml b/examples/concepts/simple.yaml new file mode 100644 index 00000000..97b241ce --- /dev/null +++ b/examples/concepts/simple.yaml @@ -0,0 +1,17 @@ +dag: + description: | + A simple pipeline with a simple function that just prints "Hello World!". + + Run this pipeline by: + magnus execute -f examples/concepts/simple.yaml + start_at: simple + steps: + simple: + type: task + command: "examples.concepts.simple.simple_function" + command_type: python + next: success + success: + type: success + fail: + type: fail diff --git a/examples/concepts/simple_notebook.ipynb b/examples/concepts/simple_notebook.ipynb new file mode 100644 index 00000000..0aa468a5 --- /dev/null +++ b/examples/concepts/simple_notebook.ipynb @@ -0,0 +1,67 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "3e98e89e-765c-42d4-81ea-c371c2eab14d", + "metadata": {}, + "outputs": [], + "source": [ + "def add(x, y):\n", + " return x + y" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9f1cbac6-cada-42b0-8fb1-ddb25a88836c", + "metadata": {}, + "outputs": [], + "source": [ + "def multiply(x, y):\n", + " return x * y" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9dcadc93-aa77-4a0a-9465-2e33eef4da44", + "metadata": {}, + "outputs": [], + "source": [ + "a = add(40, 2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7b872cdf-820b-47b5-8f22-15c4b69c8637", + "metadata": {}, + "outputs": [], + "source": [ + "b = multiply(2, 100)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/concepts/simple_notebook.yaml b/examples/concepts/simple_notebook.yaml new file mode 100644 index 00000000..69acada8 --- /dev/null +++ b/examples/concepts/simple_notebook.yaml @@ -0,0 +1,25 @@ +dag: + description: | + This is a sample pipeline with one step that executes a notebook. + + The step name "notebook" has the "command_type" to be notebook to + let magnus know to execute a notebook while the command is the + path to the notebook relative to the project root. + + The notebook is executed in the same environment as the current + project, you can import any module that was installed for the project. + + You can run this pipeline as: + magnus execute -f examples/concepts/simple_notebook.yaml + + start_at: notebook + steps: + notebook: + type: task + command_type: notebook + command: examples/concepts/simple_notebook.ipynb + next: success + success: + type: success + fail: + type: fail diff --git a/examples/concepts/simple_notebook_out.ipynb b/examples/concepts/simple_notebook_out.ipynb new file mode 100644 index 00000000..6517aa9d --- /dev/null +++ b/examples/concepts/simple_notebook_out.ipynb @@ -0,0 +1,105 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "fde0ddbb", + "metadata": { + "ploomber": { + "timestamp_end": 1707918791.855813, + "timestamp_start": 1707918791.855612 + }, + "tags": [ + "injected-parameters" + ] + }, + "outputs": [], + "source": [ + "# Injected parameters\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "3e98e89e-765c-42d4-81ea-c371c2eab14d", + "metadata": { + "ploomber": { + "timestamp_end": 1707918791.856004, + "timestamp_start": 1707918791.855832 + } + }, + "outputs": [], + "source": [ + "def add(x, y):\n", + " return x + y" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "9f1cbac6-cada-42b0-8fb1-ddb25a88836c", + "metadata": { + "ploomber": { + "timestamp_end": 1707918791.856139, + "timestamp_start": 1707918791.856017 + } + }, + "outputs": [], + "source": [ + "def multiply(x, y):\n", + " return x * y" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "9dcadc93-aa77-4a0a-9465-2e33eef4da44", + "metadata": { + "ploomber": { + "timestamp_end": 1707918791.856272, + "timestamp_start": 1707918791.856151 + } + }, + "outputs": [], + "source": [ + "a = add(40, 2)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "7b872cdf-820b-47b5-8f22-15c4b69c8637", + "metadata": { + "ploomber": { + "timestamp_end": 1707918791.856388, + "timestamp_start": 1707918791.856284 + } + }, + "outputs": [], + "source": [ + "b = multiply(2, 100)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/concepts/task_api_parameters.py b/examples/concepts/task_api_parameters.py new file mode 100644 index 00000000..998dcc27 --- /dev/null +++ b/examples/concepts/task_api_parameters.py @@ -0,0 +1,75 @@ +""" +An example pipeline of accessing initial parameters and passing parameters between tasks +using the python API. + +You can run this pipeline by: + python examples/concepts/task_api_parameters.py + +""" + +from pydantic import BaseModel + +from magnus import Pipeline, Task, get_parameter, set_parameter + + +class EggsModel(BaseModel): + ham: str + + +class EverythingModel(BaseModel): + spam: str + eggs: EggsModel + + +def modify_initial(): + """ + Access initial parameters by the keys. + """ + spam = get_parameter("spam") + eggs = get_parameter("eggs", cast_as=EggsModel) + print(spam) + ">>> Hello" + print(eggs) + ">>> ham='Yes, please!!'" + + # modify parameters + set_parameter(spam="World", eggs=EggsModel(ham="No, Thank you!!")) + + +def consume(): + """ + Access only a subset of the parameters. + """ + # the value is set by the modify_initial function. + # Use cast_as to type hint the return value. + eggs = get_parameter("eggs", cast_as=EggsModel) + print(eggs) + ">>> ham='No, Thank you!!'" + + set_parameter(eggs=EggsModel(ham="May be one more!!")) + + +def main(): + modify = Task( + name="Modify", + command="examples.concepts.task_api_parameters.modify_initial", + ) + + consume = Task( + name="Consume", + command="examples.concepts.task_api_parameters.consume", + terminate_with_success=True, + ) + + modify >> consume + + pipeline = Pipeline( + steps=[modify, consume], + start_at=modify, + add_terminal_nodes=True, + ) + pipeline.execute(parameters_file="examples/concepts/parameters.yaml") + + +if __name__ == "__main__": + main() diff --git a/examples/concepts/task_env_parameters.py b/examples/concepts/task_env_parameters.py new file mode 100644 index 00000000..0fc1d409 --- /dev/null +++ b/examples/concepts/task_env_parameters.py @@ -0,0 +1,79 @@ +""" +An example pipeline of accessing initial parameters and passing parameters between tasks +using environment variables. + +You can run this pipeline by: + python examples/concepts/task_env_parameters.py + +""" + +import json +import os + +from pydantic import BaseModel + +from magnus import Pipeline, Task + + +class EggsModel(BaseModel): + ham: str + + +class EverythingModel(BaseModel): + spam: str + eggs: EggsModel + + +def modify_initial(): + """ + Access initial parameters by the keys. + """ + spam = os.environ["MAGNUS_PRM_spam"] + eggs = EggsModel.model_validate_json(os.environ["MAGNUS_PRM_eggs"]) + print(spam) + ">>> Hello" + print(eggs) + ">>> ham='Yes, please!!'" + + # modify parameters + os.environ["MAGNUS_PRM_spam"] = "World" + os.environ["MAGNUS_PRM_eggs"] = json.dumps(eggs.model_dump(by_alias=True)) + + +def consume(): + """ + Access only a subset of the parameters. + """ + # the value is set by the modify_initial function. + # Use cast_as to type hint the return value. + eggs = EggsModel.model_validate_json(os.environ["MAGNUS_PRM_eggs"]) + print(eggs) + ">>> ham='No, Thank you!!'" + + os.environ["MAGNUS_PRM_eggs"] = json.dumps(EggsModel(ham="May be one more!!").model_dump_json(by_alias=True)) + + +def main(): + modify = Task( + name="Modify", + command="examples.concepts.task_api_parameters.modify_initial", + ) + + consume = Task( + name="Consume", + command="examples.concepts.task_api_parameters.consume", + terminate_with_success=True, + ) + + modify >> consume + + pipeline = Pipeline( + steps=[modify, consume], + start_at=modify, + add_terminal_nodes=True, + ) + pipeline.execute(parameters_file="examples/concepts/parameters.yaml") + + +if __name__ == "__main__": + main() diff --git a/examples/concepts/task_native_parameters.py b/examples/concepts/task_native_parameters.py new file mode 100644 index 00000000..e5623e3a --- /dev/null +++ b/examples/concepts/task_native_parameters.py @@ -0,0 +1,81 @@ +""" +An example pipeline of accessing initial parameters and passing parameters between tasks. + +You can run this pipeline by: + python examples/concepts/task_native_parameters.py + +""" + +from pydantic import BaseModel, create_model + + +class EggsModel(BaseModel): + ham: str + + +class EverythingModel(BaseModel): + spam: str + eggs: EggsModel + + +def modify_initial(spam: str, eggs: EggsModel): + """ + Access initial parameters by the keys. + Type annotation helps in casting to the right model type. + """ + print(spam) + ">>> Hello" + print(eggs) + ">>> ham='Yes, please!!'" + + # Return modified parameters + # Use this pattern to create or modify parameters at the root level. + return EverythingModel(spam="World", eggs=EggsModel(ham="No, Thank you!!")) + + +def consume(eggs: EggsModel): + """ + Access only a subset of the parameters. + """ + # the value is set by the modify_initial function. + print(eggs) + ">>> ham='No, Thank you!!'" + + # Magnus supports only pydantic models as return types. + # You can modify a subset of the parameters by creating a dynamic pydantic model. + # https://docs.pydantic.dev/latest/concepts/models/#dynamic-model-creation + + # CAUTION: Returning "eggs" would result in a new parameter "ham" at the root level + # as it looses the nested structure. + return create_model( + "DynamicModel", + eggs=(EggsModel, EggsModel(ham="May be one more!!")), + )() + + +def main(): + from magnus import Pipeline, Task + + modify = Task( + name="Modify", + command="examples.concepts.task_native_parameters.modify_initial", + ) + + consume = Task( + name="Consume", + command="examples.concepts.task_native_parameters.consume", + terminate_with_success=True, + ) + + modify >> consume + + pipeline = Pipeline( + steps=[modify, consume], + start_at=modify, + add_terminal_nodes=True, + ) + pipeline.execute(parameters_file="examples/concepts/parameters.yaml") + + +if __name__ == "__main__": + main() diff --git a/examples/concepts/task_shell_parameters.yaml b/examples/concepts/task_shell_parameters.yaml new file mode 100644 index 00000000..a014a135 --- /dev/null +++ b/examples/concepts/task_shell_parameters.yaml @@ -0,0 +1,44 @@ +dag: + description: | + This is a sample pipeline to show the parameter flow for shell types. + + The step "access initial" just displays the initial parameters defined in examples/concepts/parameters.yaml + The step modify_initial updates the parameters and sets them back as environment variables. + The step display_again displays the updated parameters from modify_initial and updates them. + + You can run this pipeline as: + magnus execute -f examples/concepts/task_shell_parameters.yaml -p examples/concepts/parameters.yaml + + start_at: access initial + steps: + access initial: + type: task + command_type: shell + command: | + env | grep 'MAGNUS_PRM_' + # MAGNUS_PRM_spam="Hello" + # MAGNUS_PRM_eggs={"ham": "Yes, please!!"} + next: modify initial + modify initial: + type: task + command_type: shell + command: | + export MAGNUS_PRM_spam='World' && \ + export MAGNUS_PRM_eggs='{"ham": "No, Thank you!!"}' + next: display again + display again: + type: task + command_type: shell + command: | + env | grep 'MAGNUS_PRM_' && \ + export MAGNUS_PRM_spam='Universe' && \ + export MAGNUS_PRM_eggs='{"ham": "Maybe, one more.."}' + # prints + # MAGNUS_PRM_spam="World" + # MAGNUS_PRM_eggs={"ham": "No, Thank you!!"} + # and sets new values + next: success + success: + type: success + fail: + type: fail diff --git a/examples/concepts/task_shell_simple.yaml b/examples/concepts/task_shell_simple.yaml new file mode 100644 index 00000000..4109fc54 --- /dev/null +++ b/examples/concepts/task_shell_simple.yaml @@ -0,0 +1,22 @@ +dag: + description: | + This is a sample pipeline with one step that executes a shell command. + + The step name "shell" has the "command_type" to be shell to + let magnus know to execute a shell while the command is directly + executed in the current environment. + + You can run this pipeline as: + magnus execute -f examples/concepts/task_shell_simple.yaml + + start_at: shell + steps: + shell: + type: task + command_type: shell + command: echo "Hello world!!" + next: success + success: + type: success + fail: + type: fail diff --git a/examples/concepts/traversal.py b/examples/concepts/traversal.py new file mode 100644 index 00000000..b648857b --- /dev/null +++ b/examples/concepts/traversal.py @@ -0,0 +1,43 @@ +""" +This is a stubbed pipeline that does 4 steps in sequence. +All the steps are mocked and they will just pass through. +Use this pattern to define the skeleton of your pipeline and +flesh out the steps later. + +You can run this pipeline by python run examples/pipelines/traversal.py +""" + +from magnus import Pipeline, Stub + + +def main(): + step_1 = Stub(name="Step 1") + + step_2 = Stub(name="Step 2") + + step_3 = Stub(name="Step 3", terminate_with_success=True) + + # link nodes + step_1 >> step_2 >> step_3 + + """ + or + step_1 << step_2 << step_3 + or + + step_2.depends_on(step_1) + step_3.depends_on(step_2) + """ + + pipeline = Pipeline( + steps=[step_1, step_2, step_3], + start_at=step_1, + add_terminal_nodes=True, + ) + + run_log = pipeline.execute() + print(run_log) + + +if __name__ == "__main__": + main() diff --git a/examples/concepts/traversal.yaml b/examples/concepts/traversal.yaml new file mode 100644 index 00000000..2b6af4d9 --- /dev/null +++ b/examples/concepts/traversal.yaml @@ -0,0 +1,25 @@ +dag: + description: | + This is a stubbed pipeline that does 3 steps in sequence. + All the steps are mocked and they will just pass through. + Use this pattern to define the skeleton of your pipeline + and flesh out the steps later. + + You can run this pipeline by: + magnus execute -f examples/pipelines/traversal.yaml + + start_at: step 1 + steps: + step 1: + type: stub + next: step 2 + step 2: + type: stub + next: step 3 + step 3: + type: stub + next: success + success: + type: success + fail: + type: fail diff --git a/examples/configs/argo-config-catalog.yaml b/examples/configs/argo-config-catalog.yaml new file mode 100644 index 00000000..6b58d7ae --- /dev/null +++ b/examples/configs/argo-config-catalog.yaml @@ -0,0 +1,24 @@ +executor: + type: "argo" # (1) + config: + image: $argo_docker_image # (2) + service_account_name: default-editor + persistent_volumes: # (3) + - name: magnus-volume + mount_path: /mnt + +run_log_store: # (4) + type: file-system + config: + log_folder: /mnt/run_log_store + +catalog: + type: file-system + config: + catalog_location: /mnt/catalog + +secrets: + type: do-nothing + +experiment_tracker: + type: do-nothing diff --git a/examples/configs/argo-config-full.yaml b/examples/configs/argo-config-full.yaml new file mode 100644 index 00000000..911dd88c --- /dev/null +++ b/examples/configs/argo-config-full.yaml @@ -0,0 +1,48 @@ +executor: + type: argo + config: + image: $argo_docker_image + max_workflow_duration_in_seconds: 86400 # Apply to spec + node_selector: + parallelism: 0 #apply to spec + service_account_name: pipeline-runner + resources: + limits: + memory: 140Mi + cpu: 100m + requests: + retry_strategy: + limit: 0 #int + retryPolicy: "always" + backoff: + duration: + factor: + max_duration: + max_step_duration_in_seconds: 60 # apply to templateDefaults + tolerations: # apply to spec + image_pull_policy: "" # apply to template + persistent_volumes: # (3) + - name: magnus-volume + mount_path: /mnt + + # overrides: + # override: + # tolerations: # template + # image: # container + # max_step_duration: # template + # step_timeout: #template + # nodeSelector: #template + # parallelism: # this need to applied for map + # resources: # container + # imagePullPolicy: #container + # retryStrategy: # template + +run_log_store: # (5) + type: chunked-fs + config: + log_folder: /mnt/run_log_store # (6) + +catalog: # (5) + type: file-system + config: + log_folder: /mnt/catalog # (6) diff --git a/examples/configs/argo-config-sequential.yaml b/examples/configs/argo-config-sequential.yaml new file mode 100644 index 00000000..9fdf8c13 --- /dev/null +++ b/examples/configs/argo-config-sequential.yaml @@ -0,0 +1,25 @@ +executor: + type: "argo" # (1) + config: + image: $argo_docker_image # (2) + service_account_name: default-editor + persistent_volumes: # (3) + - name: magnus-volume + mount_path: /mnt + overrides: + sequential: + parallelism: 1 + +run_log_store: # (4) + type: chunked-fs + config: + log_folder: /mnt/run_log_store + +catalog: + type: do-nothing + +secrets: + type: do-nothing + +experiment_tracker: + type: do-nothing diff --git a/examples/configs/argo-config.yaml b/examples/configs/argo-config.yaml new file mode 100644 index 00000000..b964e0b2 --- /dev/null +++ b/examples/configs/argo-config.yaml @@ -0,0 +1,22 @@ +executor: + type: "argo" # (1) + config: + image: magnus:demo # (2) + service_account_name: default-editor + persistent_volumes: # (3) + - name: magnus-volume + mount_path: /mnt + +run_log_store: # (4) + type: file-system + config: + log_folder: /mnt/run_log_store + +catalog: + type: do-nothing + +secrets: + type: do-nothing + +experiment_tracker: + type: do-nothing diff --git a/examples/configs/argo-pipeline.yaml b/examples/configs/argo-pipeline.yaml new file mode 100644 index 00000000..ffaa437e --- /dev/null +++ b/examples/configs/argo-pipeline.yaml @@ -0,0 +1,90 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Workflow +metadata: + generateName: magnus-dag- + annotations: {} + labels: {} +spec: + activeDeadlineSeconds: 172800 + entrypoint: magnus-dag + podGC: + strategy: OnPodCompletion + retryStrategy: + limit: "0" + retryPolicy: Always + backoff: + duration: "120" + factor: 2 + maxDuration: "3600" + templates: + - name: magnus-dag + failFast: true + dag: + tasks: + - name: simple-task-6mn2ll + template: simple-task-6mn2ll + depends: "" + - name: success-success-0uvo9r + template: success-success-0uvo9r + depends: simple-task-6mn2ll.Succeeded + - name: simple-task-6mn2ll + container: + image: harbor.csis.astrazeneca.net/mlops/magnus:latest + command: + - magnus + - execute_single_node + - "{{workflow.parameters.run_id}}" + - simple + - --log-level + - WARNING + - --file + - examples/concepts/simple.yaml + - --config-file + - examples/configs/argo-config.yaml + volumeMounts: + - name: executor-0 + mountPath: /mnt + imagePullPolicy: "" + resources: + limits: + memory: 1Gi + cpu: 250m + requests: + memory: 1Gi + cpu: 250m + - name: success-success-0uvo9r + container: + image: harbor.csis.astrazeneca.net/mlops/magnus:latest + command: + - magnus + - execute_single_node + - "{{workflow.parameters.run_id}}" + - success + - --log-level + - WARNING + - --file + - examples/concepts/simple.yaml + - --config-file + - examples/configs/argo-config.yaml + volumeMounts: + - name: executor-0 + mountPath: /mnt + imagePullPolicy: "" + resources: + limits: + memory: 1Gi + cpu: 250m + requests: + memory: 1Gi + cpu: 250m + templateDefaults: + activeDeadlineSeconds: 7200 + timeout: 10800s + arguments: + parameters: + - name: run_id + value: "{{workflow.uid}}" + volumes: + - name: executor-0 + persistentVolumeClaim: + claimName: magnus-volume diff --git a/examples/configs/chunked-fs-run_log.yaml b/examples/configs/chunked-fs-run_log.yaml new file mode 100644 index 00000000..acaecb6e --- /dev/null +++ b/examples/configs/chunked-fs-run_log.yaml @@ -0,0 +1,2 @@ +run_log_store: + type: chunked-fs diff --git a/examples/configs/default.yaml b/examples/configs/default.yaml new file mode 100644 index 00000000..c29165bb --- /dev/null +++ b/examples/configs/default.yaml @@ -0,0 +1,14 @@ +executor: + type: local # (1) + +run_log_store: + type: buffered # (2) + +catalog: + type: do-nothing # (3) + +secrets: + type: do-nothing # (4) + +experiment_tracker: # (5) + type: do-nothing diff --git a/examples/configs/dotenv.yaml b/examples/configs/dotenv.yaml new file mode 100644 index 00000000..82e4b4a5 --- /dev/null +++ b/examples/configs/dotenv.yaml @@ -0,0 +1,4 @@ +secrets: + type: dotenv # (1) + config: + location: examples/secrets.env # (2) diff --git a/examples/configs/fs-catalog-run_log.yaml b/examples/configs/fs-catalog-run_log.yaml new file mode 100644 index 00000000..03487500 --- /dev/null +++ b/examples/configs/fs-catalog-run_log.yaml @@ -0,0 +1,5 @@ +catalog: + type: file-system # (1) + +run_log_store: + type: file-system # (1) diff --git a/examples/configs/fs-catalog.yaml b/examples/configs/fs-catalog.yaml new file mode 100644 index 00000000..48597735 --- /dev/null +++ b/examples/configs/fs-catalog.yaml @@ -0,0 +1,2 @@ +catalog: + type: file-system # (1) diff --git a/examples/configs/fs-run_log.yaml b/examples/configs/fs-run_log.yaml new file mode 100644 index 00000000..896955d3 --- /dev/null +++ b/examples/configs/fs-run_log.yaml @@ -0,0 +1,2 @@ +run_log_store: + type: file-system diff --git a/examples/configs/local-container.yaml b/examples/configs/local-container.yaml new file mode 100644 index 00000000..2592a30c --- /dev/null +++ b/examples/configs/local-container.yaml @@ -0,0 +1,18 @@ +executor: + type: "local-container" # (1) + config: + docker_image: $default_docker_image # (2) + environment: + key: value # (3) + +run_log_store: # (4) + type: file-system + +catalog: + type: file-system + +secrets: + type: do-nothing + +experiment_tracker: + type: do-nothing diff --git a/examples/configs/mlflow-config.yaml b/examples/configs/mlflow-config.yaml new file mode 100644 index 00000000..2b02e58c --- /dev/null +++ b/examples/configs/mlflow-config.yaml @@ -0,0 +1,4 @@ +experiment_tracker: + type: mlflow + config: + server_url: http://127.0.0.1:8080 diff --git a/examples/configs/mocked-config-debug.yaml b/examples/configs/mocked-config-debug.yaml new file mode 100644 index 00000000..ea9d2ab3 --- /dev/null +++ b/examples/configs/mocked-config-debug.yaml @@ -0,0 +1,12 @@ +catalog: + type: file-system # (1) + +run_log_store: + type: file-system # (1) + +executor: + type: mocked + config: + patches: + Retrieve Content: + command: cat data/hello.txt diff --git a/examples/configs/mocked-config-simple.yaml b/examples/configs/mocked-config-simple.yaml new file mode 100644 index 00000000..12fb444d --- /dev/null +++ b/examples/configs/mocked-config-simple.yaml @@ -0,0 +1,8 @@ +catalog: + type: file-system # (1) + +run_log_store: + type: file-system # (1) + +executor: + type: mocked diff --git a/examples/configs/mocked-config-unittest.yaml b/examples/configs/mocked-config-unittest.yaml new file mode 100644 index 00000000..1cd29997 --- /dev/null +++ b/examples/configs/mocked-config-unittest.yaml @@ -0,0 +1,12 @@ +catalog: + type: file-system # (1) + +run_log_store: + type: file-system # (1) + +executor: + type: mocked + config: + patches: + step 1: + command: exit 0 diff --git a/examples/configs/mocked-config.yaml b/examples/configs/mocked-config.yaml new file mode 100644 index 00000000..a6228a12 --- /dev/null +++ b/examples/configs/mocked-config.yaml @@ -0,0 +1,12 @@ +catalog: + type: file-system # (1) + +run_log_store: + type: file-system # (1) + +executor: + type: mocked + config: + patches: + chunk files: + command: examples.concepts.map.chunk_files diff --git a/examples/configs/mocked_map_parameters.yaml b/examples/configs/mocked_map_parameters.yaml new file mode 100644 index 00000000..8b59320b --- /dev/null +++ b/examples/configs/mocked_map_parameters.yaml @@ -0,0 +1,7 @@ +chunks: + - 0 + - 10 + - 20 + - 30 + - 40 +stride: 10 diff --git a/examples/configs/secrets-env-default.yaml b/examples/configs/secrets-env-default.yaml new file mode 100644 index 00000000..33975b9b --- /dev/null +++ b/examples/configs/secrets-env-default.yaml @@ -0,0 +1,2 @@ +secrets: + type: env-secrets-manager diff --git a/examples/configs/secrets-env-ps.yaml b/examples/configs/secrets-env-ps.yaml new file mode 100644 index 00000000..1534ece1 --- /dev/null +++ b/examples/configs/secrets-env-ps.yaml @@ -0,0 +1,4 @@ +secrets: + type: env-secrets-manager + config: + prefix: "magnus_" diff --git a/examples/contrived.py b/examples/contrived.py new file mode 100644 index 00000000..c1d86fb2 --- /dev/null +++ b/examples/contrived.py @@ -0,0 +1,34 @@ +""" +This is a stubbed pipeline that does 4 steps in sequence. +All the steps are mocked and they will just pass through. +Use this pattern to define the skeleton of your pipeline and flesh out the steps later. + +You can run this pipeline by python run examples/contrived.py +""" + +from magnus import Pipeline, Stub + + +def main(): + acquire_data = Stub(name="Acquire Data", next="Prepare Data") # (1) + + prepare_data = Stub(name="Prepare Data") + + extract_features = Stub(name="Extract Features").depends_on(prepare_data) + + modelling = Stub(name="Model", terminate_with_success=True) # (2) + + extract_features >> modelling # (3) + + pipeline = Pipeline( + steps=[acquire_data, prepare_data, extract_features, modelling], + start_at=acquire_data, + add_terminal_nodes=True, + ) # (4) + + run_log = pipeline.execute() # (5) + print(run_log) + + +if __name__ == "__main__": + main() diff --git a/examples/contrived.yaml b/examples/contrived.yaml new file mode 100644 index 00000000..209ddfc3 --- /dev/null +++ b/examples/contrived.yaml @@ -0,0 +1,26 @@ +dag: + description: | + This is a stubbed pipeline that does 4 steps in sequence. + All the steps are mocked and they will just pass through. + Use this pattern to define the skeleton of your pipeline and flesh out the steps later. + + You can run this pipeline by magnus execute -f examples/contrived.yaml + + start_at: Acquire data + steps: + Acquire data: + type: stub # (1) + next: Prepare data # (2) + Prepare data: + type: stub + next: Extract features + Extract features: + type: stub + next: Model + Model: + type: stub + next: success + success: # (3) + type: success + fail: # (4) + type: fail diff --git a/examples/default-fail.yaml b/examples/default-fail.yaml new file mode 100644 index 00000000..283b480b --- /dev/null +++ b/examples/default-fail.yaml @@ -0,0 +1,28 @@ +dag: + description: | + This is a simple pipeline to demonstrate failure in a step. + + step 1 >> step 2 (FAIL) >> fail + + The default behavior is to traverse to step type fail and mark the run as failed. + + You can control the flow by using on_failure, please check example/on-failure.yaml + + You can run this pipeline by magnus execute -f examples/default-fail.yaml + start_at: step 1 + steps: + step 1: + type: stub + next: step 2 + step 2: + type: task + command_type: shell + command: exit 1 # This will fail + next: step 3 + step 3: + type: stub + next: success + success: + type: success + fail: + type: fail diff --git a/examples/executors/argo-map-sequential.yaml b/examples/executors/argo-map-sequential.yaml new file mode 100644 index 00000000..cfb8af2c --- /dev/null +++ b/examples/executors/argo-map-sequential.yaml @@ -0,0 +1,42 @@ +dag: + description: | + This pipeline demonstrates the usage of map state to dynamically + execute workflows in parallel. + + The step "chunk files" identifies the total number of batches to + execute in parallel and sets the parameters + - start_index of every batch to process, chunks + - number of files to process per batch, stride. + + The step "iterate and execute" iterates on "chunks" and the + parameter name per chunk is set to be "start_index". + start_at: chunk files + steps: + chunk files: + type: task + command_type: python + command: "examples.concepts.map.chunk_files" + next: iterate and execute + iterate and execute: + type: map + iterate_on: chunks + iterate_as: start_index + next: success + overrides: + argo: sequential + branch: + start_at: execute + steps: + execute: + type: task + command_type: python + command: "examples.concepts.map.process_chunk" + next: success + success: + type: success + fail: + type: fail + success: + type: success + fail: + type: fail diff --git a/examples/executors/local-container-override.yaml b/examples/executors/local-container-override.yaml new file mode 100644 index 00000000..a97bde83 --- /dev/null +++ b/examples/executors/local-container-override.yaml @@ -0,0 +1,23 @@ +executor: + type: "local-container" + config: + docker_image: $default_docker_image + environment: + key: value + overrides: + custom_docker_image: + docker_image: $custom_docker_image + environment: + key: not-value + +run_log_store: + type: file-system + +catalog: + type: file-system + +secrets: + type: do-nothing + +experiment_tracker: + type: do-nothing diff --git a/examples/executors/step_overrides_container.py b/examples/executors/step_overrides_container.py new file mode 100644 index 00000000..e29b5b11 --- /dev/null +++ b/examples/executors/step_overrides_container.py @@ -0,0 +1,56 @@ +""" +An example to demonstrate overriding global configuration for a step. + + step 1 runs in the docker image specified in the executor config and uses the environment + value for key to be "value" + + step 2 overrides the config and executes the command in the configuration defined + in overrides section of executor config. + + You can run this example using two steps: + Generates yaml file: + + MAGNUS_CONFIGURATION_FILE=examples/executors/local-container-override.yaml \ + python examples/executors/step_overrides_container.py + + # Create the docker image with the pipeline magnus-pipeline.yaml as part of it. + + Execute the pipeline using the CLI: + + MAGNUS_VAR_default_docker_image=magnus:3.8 \ + MAGNUS_VAR_custom_docker_image=magnus:3.9 \ + magnus execute -f magnus-pipeline.yaml -c examples/executors/local-container-override.yaml + +""" + +from magnus import Pipeline, Task + + +def main(): + step1 = Task( + name="step1", + command="python --version && env | grep key", + command_type="shell", + ) + + step2 = Task( + name="step2", + command="python --version && env | grep key", + command_type="shell", + terminate_with_success=True, + overrides={"local-container": "custom_docker_image"}, + ) + + step1 >> step2 + + pipeline = Pipeline( + start_at=step1, + steps=[step1, step2], + add_terminal_nodes=True, + ) + + pipeline.execute() + + +if __name__ == "__main__": + main() diff --git a/examples/executors/step_overrides_container.yaml b/examples/executors/step_overrides_container.yaml new file mode 100644 index 00000000..6ee00502 --- /dev/null +++ b/examples/executors/step_overrides_container.yaml @@ -0,0 +1,35 @@ +dag: + description: | + An example to demonstrate overriding global configuration for a step. + + step 1 runs in the docker image specified in the executor config and uses the environment + value for key to be "value" + + step 2 overrides the config and executes the command in the configuration defined + in overrides section of executor config. + + You can execute the pipeline by: + magnus execute -f examples/executors/step_overrides_container.yaml \ + -c examples/executors/local-container-override.yaml + start_at: step 1 + steps: + step 1: + type: task + command_type: shell # (2) + command: | + python --version && + env | grep key + next: step 2 + step 2: + type: task + command_type: shell + command: | + python --version && + env | grep key + overrides: + local-container: custom_docker_image + next: success + success: + type: success + fail: + type: fail diff --git a/examples/experiment_tracking_api.py b/examples/experiment_tracking_api.py new file mode 100644 index 00000000..1d88faaf --- /dev/null +++ b/examples/experiment_tracking_api.py @@ -0,0 +1,51 @@ +""" +An example pipeline to demonstrate setting experiment tracking metrics + using environment variables. Any environment variable with prefix + 'MAGNUS_TRACK_' will be recorded as a metric captured during the step. + + You can run this pipeline as: + python examples/experiment_tracking_api.py + + The mlflow server is expected to be running at http://127.0.0.1:8080 +""" + + +from pydantic import BaseModel + +from magnus import Pipeline, Task, track_this + + +class EggsModel(BaseModel): + ham: str + + +def emit_metrics(): + """ + A function that populates experiment tracker with metrics. + + track_this can take many keyword arguments. + Nested structures are supported by pydantic models. + """ + track_this(spam="hello", eggs=EggsModel(ham="world")) # (1) + track_this(answer=42.0) + track_this(is_it_true=False) + + +def main(): + metrics = Task( + name="Emit Metrics", + command="examples.experiment_tracking_api.emit_metrics", + terminate_with_success=True, + ) + + pipeline = Pipeline( + steps=[metrics], + start_at=metrics, + add_terminal_nodes=True, + ) + + pipeline.execute(configuration_file="examples/configs/mlflow-config.yaml") # (2) + + +if __name__ == "__main__": + main() diff --git a/examples/experiment_tracking_env.yaml b/examples/experiment_tracking_env.yaml new file mode 100644 index 00000000..92663515 --- /dev/null +++ b/examples/experiment_tracking_env.yaml @@ -0,0 +1,26 @@ +dag: + description: | + An example pipeline to demonstrate setting experiment tracking metrics + using environment variables. Any environment variable with prefix + 'MAGNUS_TRACK_' will be recorded as a metric captured during the step. + + You can run this pipeline as: + magnus execute -f examples/concepts/experiment_tracking_env.yaml \ + -c examples/configs/mlflow-config.yaml + + The mlflow server is expected to be running at http://127.0.0.1:8080 + + start_at: shell + steps: + shell: + type: task + command_type: shell + command: | + export MAGNUS_TRACK_spam="hello" + export MAGNUS_TRACK_eggs='{"ham": "world"}' + export MAGNUS_TRACK_answer="42.0" + next: success + success: + type: success + fail: + type: fail diff --git a/examples/functions.py b/examples/functions.py new file mode 100644 index 00000000..8b8cff97 --- /dev/null +++ b/examples/functions.py @@ -0,0 +1,80 @@ +""" +Utility functions used in examples. +""" + +import logging + +from pydantic import BaseModel + +# Magnus logging levels are different to your logging levels. +logger = logging.getLogger("application") +logger.setLevel(logging.DEBUG) + + +class InnerModel(BaseModel): + """ + A pydantic model representing a group of related parameters. + """ + + foo: int + bar: str + + +class Parameter(BaseModel): + """ + A pydantic model representing the parameters of the whole pipeline. + """ + + x: int + y: InnerModel + + +def return_parameter() -> Parameter: + """ + A example python task that does something interesting and returns + a parameter to be used in downstream steps. + + The annotation of the return type of the function is not mandatory + but it is a good practice. + + Returns: + Parameter: The parameters that should be used in downstream steps. + """ + # Return type of a function should be a pydantic model + return Parameter(x=1, y=InnerModel(foo=10, bar="hello world")) + + +def display_parameter(x: int, y: InnerModel): + """ + An example python task that does something interesting with input parameters. + + Annotating the arguments of the function is important for + magnus to understand the type of parameters you want. + + Without annotations, magnus would return a python dictionary. + + Input args can be a pydantic model or the individual attributes of the non-nested model + """ + print(x) + # >>> prints 1 + print(y) + # >>> prints InnerModel(foo=10, bar="hello world") + logger.info(f"I got a parameter: {x}") + logger.info(f"I got another parameter: {y}") + + +""" +Without any framework, the "driver" code would be the main function. +""" + + +def main(): + """ + This is not required for magnus to run! + """ + my_param = return_parameter() + display_parameter(my_param.x, my_param.y) + + +if __name__ == "__main__": + main() diff --git a/examples/generated-argo-pipeline.yaml b/examples/generated-argo-pipeline.yaml new file mode 100644 index 00000000..290dd5a2 --- /dev/null +++ b/examples/generated-argo-pipeline.yaml @@ -0,0 +1,175 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Workflow +metadata: + generateName: magnus-dag- + annotations: {} + labels: {} +spec: + activeDeadlineSeconds: 172800 + entrypoint: magnus-dag + podGC: + strategy: OnPodCompletion + retryStrategy: + limit: '0' + retryPolicy: Always + backoff: + duration: '120' + factor: 2 + maxDuration: '3600' + serviceAccountName: default-editor + templates: + - name: magnus-dag + failFast: true + dag: + tasks: + - name: Acquire-data-stub-zl7utt + template: Acquire-data-stub-zl7utt + depends: '' + - name: Prepare-data-stub-jkn77g + template: Prepare-data-stub-jkn77g + depends: Acquire-data-stub-zl7utt.Succeeded + - name: Extract-features-stub-jdonf3 + template: Extract-features-stub-jdonf3 + depends: Prepare-data-stub-jkn77g.Succeeded + - name: Model-stub-42qnma + template: Model-stub-42qnma + depends: Extract-features-stub-jdonf3.Succeeded + - name: success-success-mk4nqv + template: success-success-mk4nqv + depends: Model-stub-42qnma.Succeeded + - name: Acquire-data-stub-zl7utt + container: + image: magnus:demo + command: + - magnus + - execute_single_node + - '{{workflow.parameters.run_id}}' + - Acquire%data + - --log-level + - WARNING + - --file + - examples/contrived.yaml + - --config-file + - examples/configs/argo-config.yaml + volumeMounts: + - name: executor-0 + mountPath: /mnt + imagePullPolicy: '' + resources: + limits: + memory: 1Gi + cpu: 250m + requests: + memory: 1Gi + cpu: 250m + - name: Prepare-data-stub-jkn77g + container: + image: magnus:demo + command: + - magnus + - execute_single_node + - '{{workflow.parameters.run_id}}' + - Prepare%data + - --log-level + - WARNING + - --file + - examples/contrived.yaml + - --config-file + - examples/configs/argo-config.yaml + volumeMounts: + - name: executor-0 + mountPath: /mnt + imagePullPolicy: '' + resources: + limits: + memory: 1Gi + cpu: 250m + requests: + memory: 1Gi + cpu: 250m + - name: Extract-features-stub-jdonf3 + container: + image: magnus:demo + command: + - magnus + - execute_single_node + - '{{workflow.parameters.run_id}}' + - Extract%features + - --log-level + - WARNING + - --file + - examples/contrived.yaml + - --config-file + - examples/configs/argo-config.yaml + volumeMounts: + - name: executor-0 + mountPath: /mnt + imagePullPolicy: '' + resources: + limits: + memory: 1Gi + cpu: 250m + requests: + memory: 1Gi + cpu: 250m + - name: Model-stub-42qnma + container: + image: magnus:demo + command: + - magnus + - execute_single_node + - '{{workflow.parameters.run_id}}' + - Model + - --log-level + - WARNING + - --file + - examples/contrived.yaml + - --config-file + - examples/configs/argo-config.yaml + volumeMounts: + - name: executor-0 + mountPath: /mnt + imagePullPolicy: '' + resources: + limits: + memory: 1Gi + cpu: 250m + requests: + memory: 1Gi + cpu: 250m + - name: success-success-mk4nqv + container: + image: magnus:demo + command: + - magnus + - execute_single_node + - '{{workflow.parameters.run_id}}' + - success + - --log-level + - WARNING + - --file + - examples/contrived.yaml + - --config-file + - examples/configs/argo-config.yaml + volumeMounts: + - name: executor-0 + mountPath: /mnt + imagePullPolicy: '' + resources: + limits: + memory: 1Gi + cpu: 250m + requests: + memory: 1Gi + cpu: 250m + templateDefaults: + activeDeadlineSeconds: 7200 + timeout: 10800s + arguments: + parameters: + - name: run_id + value: '{{workflow.uid}}' + volumes: + - name: executor-0 + persistentVolumeClaim: + claimName: magnus-volume diff --git a/examples/logging.yaml b/examples/logging.yaml new file mode 100644 index 00000000..fdc9f6f0 --- /dev/null +++ b/examples/logging.yaml @@ -0,0 +1,29 @@ +dag: + description: | + This is a simple pipeline that does 3 steps in sequence. + + step 1 >> step 2 >> success + + Log levels: + magnus log level is different from your application log level. By default, it is set to WARNING but you can + control it by using --log-level while calling application. + + To view only your application logs, you can execute this pipeline with: + magnus execute logging.yaml --log-level ERROR + + start_at: step 1 + steps: + step 1: + type: task + command_type: python + command: examples.functions.return_parameter + next: step 2 + step 2: + type: task + command_type: python + command: examples.functions.display_parameter + next: success + success: + type: success + fail: + type: fail diff --git a/examples/mocking.py b/examples/mocking.py new file mode 100644 index 00000000..ffa1fc7a --- /dev/null +++ b/examples/mocking.py @@ -0,0 +1,41 @@ +""" +This is a simple pipeline that does 3 steps in sequence. + + step 1 >> step 2 >> step 3 >> success + + All the steps are mocked and they will just pass through. + Use this pattern to define the skeleton of your pipeline and flesh out the steps later. + + Note that you can give any arbitrary keys to the steps (like step 2). This is handy + to mock steps within mature pipelines. + + You can run this pipeline by: + python examples/mocking.py +""" + + +from magnus import Pipeline, Stub + + +def main(): + step1 = Stub(name="step1") # (1) + step2 = Stub(name="step2", what="is this thing").depends_on(step1) # (2) + + step3 = Stub(name="step3", terminate_with_success=True) # (3) + + step2 >> step3 + """ + Equivalents: + step3.depends_on(step2) + step3 << step2 + + Choose the definition that you prefer + """ + + pipeline = Pipeline(start_at=step1, steps=[step1, step2, step3], add_terminal_nodes=True) # (4) + + pipeline.execute() + + +if __name__ == "__main__": + main() diff --git a/examples/mocking.yaml b/examples/mocking.yaml new file mode 100644 index 00000000..fe27f2b8 --- /dev/null +++ b/examples/mocking.yaml @@ -0,0 +1,31 @@ +dag: + description: | + This is a simple pipeline that does 3 steps in sequence. + + step 1 >> step 2 >> step 3 >> success + + All the steps are mocked and they will just pass through. + Use this pattern to define the skeleton of your pipeline and flesh out the steps later. + + Note that you can give any arbitrary keys to the steps (like step 2). This is handy + to mock steps within mature pipelines. + + You can run this pipeline by: + magnus execute -f examples/mocking.yaml + start_at: step 1 + steps: + step 1: + type: stub + next: step 2 + step 2: + type: stub + what: is this thing? + It: does not matter!! + next: step 3 + step 3: + type: stub + next: success + success: + type: success + fail: + type: fail diff --git a/examples/on-failure.yaml b/examples/on-failure.yaml new file mode 100644 index 00000000..7249c695 --- /dev/null +++ b/examples/on-failure.yaml @@ -0,0 +1,31 @@ +dag: + description: | + This is a simple pipeline to demonstrate failure in a step. + + The default behavior is to traverse to step type fail and mark the run as failed. + But you can control it by providing on_failure. + + In this example: step 1 fails and moves to step 3 skipping step 2. The pipeline status + is considered to be success. + + step 1 (FAIL) >> step 3 >> success + + You can run this pipeline by magnus execute -f examples/on-failure.yaml + start_at: step 1 + steps: + step 1: + type: task + command_type: shell + command: exit 1 # This will fail! + next: step 2 + on_failure: step 3 + step 2: + type: stub # This step will never reach + next: step 3 + step 3: + type: stub + next: success + success: + type: success + fail: + type: fail diff --git a/examples/on_failure.py b/examples/on_failure.py new file mode 100644 index 00000000..40c96f4b --- /dev/null +++ b/examples/on_failure.py @@ -0,0 +1,38 @@ +""" +This is a simple pipeline to demonstrate failure in a step. + + The default behavior is to traverse to step type fail and mark the run as failed. + But you can control it by providing on_failure. + + In this example: step 1 fails and moves to step 3 skipping step 2. The pipeline status + is considered to be success. + + step 1 (FAIL) >> step 3 >> success + + You can run this example by: + python examples/on_failure.py +""" + +from magnus import Pipeline, Stub, Task + + +def main(): + step_1 = Task(name="step 1", command="exit 1", command_type="shell") + step_2 = Stub(name="step 2") + + step_3 = Stub(name="step 3", terminate_with_success=True) + + step_1.on_failure = step_3.name + + step_1 >> step_2 >> step_3 + + pipeline = Pipeline( + steps=[step_1, step_2, step_3], + start_at=step_1, + add_terminal_nodes=True, + ) + pipeline.execute() + + +if __name__ == "__main__": + main() diff --git a/examples/parallel-fail.yaml b/examples/parallel-fail.yaml new file mode 100644 index 00000000..e622d9e2 --- /dev/null +++ b/examples/parallel-fail.yaml @@ -0,0 +1,53 @@ +dag: + description: | + This is a simple pipeline that does 2 parallel branches at step 2. + step1 inside branch_a fails resulting in step2 to fail and eventually the graph. + + Note that the branches schema is same as dag schema. + + You can run this pipeline by: + magnus execute -f examples/parallel-fail.yaml + start_at: step 1 + steps: + step 1: + type: stub + next: step 2 + step 2: + type: parallel + next: step 3 + branches: + branch_a: + start_at: step 1 + steps: + step 1: + type: task + command_type: shell + command: exit 1 # This will fail + next: step 2 + step 2: + type: stub + next: success + success: + type: success + fail: + type: fail + branch_b: + start_at: step 1 + steps: + step 1: + type: stub + next: step 2 + step 2: + type: stub + next: success + success: + type: success + fail: + type: fail + step 3: + type: stub + next: success + success: + type: success + fail: + type: fail diff --git a/examples/parameters.py b/examples/parameters.py new file mode 100644 index 00000000..1c7aeb20 --- /dev/null +++ b/examples/parameters.py @@ -0,0 +1,91 @@ +""" +The initial parameters defined in the parameters file are: +simple: 1 +inner: + x: 10 + y: "hello" + +You can execute this pipeline by: python examples/parameters.py + +""" + +from pydantic import BaseModel + + +class InnerModel(BaseModel): + """ + Captures the "inner" part of the parameters. + The model definition can be as nested as you want. + """ + + x: int + y: str + + +class NestedModel(BaseModel): # (1) + """ + Captures the whole parameter space of the application. + """ + + simple: int + inner: InnerModel + + +def display(simple: int, inner: InnerModel): # (2) + """ + The parameter "simple" and "inner" can be accessed by name. + Magnus understands the parameter "inner" as a pydantic model from + annotation and casts it as a pydantic model. + """ + print(simple) + print(inner) + + +def return_parameters(simple: int, inner: InnerModel) -> NestedModel: # (3) + """ + The parameter "simple" and "inner" can be accessed by name. + You can redefine the parameters by returning a pydantic model. + """ + simple = 2 + inner.x = 30 + inner.y = "world!!" + + return NestedModel(simple=simple, inner=inner) + + +""" +The below code is only to provide a full working example. + +In the real world, you can "box magnus" in pipeline definition either in +python or yaml without cluttering your application code. +""" + + +def main(): + from magnus import Pipeline, Task + + display = Task(name="display", command="examples.parameters.display") + return_parameters = Task( + name="return_parameters", + command="examples.parameters.return_parameters", + terminate_with_success=True, + ) + + display >> return_parameters + + pipeline = Pipeline( + start_at=display, + steps=[display, return_parameters], + add_terminal_nodes=True, + ) + + run_log = pipeline.execute(parameters_file="examples/parameters_initial.yaml") + params = run_log.parameters + + ## Reflects the changes done by "return_parameters" function call. + assert params["simple"] == 2 + assert params["inner"] == {"x": 30, "y": "world!!"} + + +if __name__ == "__main__": + main() diff --git a/examples/parameters_api.py b/examples/parameters_api.py new file mode 100644 index 00000000..26c27ea2 --- /dev/null +++ b/examples/parameters_api.py @@ -0,0 +1,106 @@ +""" +The initial parameters defined in the parameters file are: +simple: 1 +inner: + x: 10 + y: "hello" + + +You can run this pipeline by: python examples/parameters_api.py + +""" + +from pydantic import BaseModel + + +class InnerModel(BaseModel): + """ + Captures the "inner" part of the parameters. + The model definition can be as nested as you want. + """ + + x: int + y: str + + +class NestedModel(BaseModel): + """ + Captures the whole parameter space of the application. + """ + + simple: int + inner: InnerModel + + +def display(simple: int, inner: InnerModel): + """ + The parameter "simple" and "inner" can be accessed by name. + Magnus understands the parameter "inner" as a pydantic model + from annotation and returns a pydantic model + """ + print(simple) + print(inner) + + +def set_and_get(): + """ + You can also use the python API for fine grained control if functional + specification does not fit your needs. + + get_parameter can be used to either + - return a specific parameter/model if a key is provided. + - return the whole parameter space casted as a + pydantic model or as a dictionary. + + set_parameter can be used to set a parameter/model. + + """ + from magnus import get_parameter, set_parameter + + # You can also get all the parameters as a pydantic model. + all_parameters = get_parameter(cast_as=NestedModel) # (1) + print(all_parameters) + ">>> # simple=1 inner=InnerModel(x=10, y='hello')" + + # get the parameter "inner" and cast it as InnerModel + b = get_parameter(key="inner", cast_as=InnerModel) + + b.x = 100 + b.y = "world" + + # set the parameter "inner" to the new value + set_parameter(inner=b) # (2) + + +""" +The below code is only to provide a full working example. + +In the real world, you can "box magnus" in pipeline definition +either in python or yaml without cluttering your application code. +""" + + +def main(): + from magnus import Pipeline, Task + + display = Task(name="display", command="examples.parameters.display") + + set_and_get = Task( + name="set_and_get", + command="examples.parameters.set_and_get", + terminate_with_success=True, + ) + + display >> set_and_get + + pipeline = Pipeline( + start_at=display, + steps=[display, set_and_get], + add_terminal_nodes=True, + ) + + pipeline.execute(parameters_file="examples/parameters_initial.yaml") + + +if __name__ == "__main__": + main() diff --git a/examples/parameters_env.yaml b/examples/parameters_env.yaml new file mode 100644 index 00000000..e8c1567b --- /dev/null +++ b/examples/parameters_env.yaml @@ -0,0 +1,37 @@ +dag: + description: | + This is a simple pipeline that demonstrates how to use + environment variables to access parameters. + + All parameters are prefixed by MAGNUS_PRM_ in json serialized form. + To set a parameter, you need to set the environment variable with the prefix + + You can run this example: + magnus execute -f examples/parameters_env.yaml -p examples/parameters_initial.yaml + + start_at: display + steps: + display: + type: task + command_type: shell + command: env | grep "MAGNUS_PRM" # (1) + # prints MAGNUS_PRM_simple=1 + # prints MAGNUS_PRM_inner={"x": 10, "y": "hello"} + next: update params + update params: + type: task + command_type: shell + next: display again + command: | # (2) + export MAGNUS_PRM_simple=10 && + export MAGNUS_PRM_inner='{"x": 100, "y": "world"}' + display again: + type: task + command: examples.parameters.display # (3) + # prints MAGNUS_PRM_simple=10 + # prints MAGNUS_PRM_inner={"x": 100, "y": "world"} + next: success + success: + type: success + fail: + type: fail diff --git a/examples/parameters_flow.yaml b/examples/parameters_flow.yaml new file mode 100644 index 00000000..61e3412d --- /dev/null +++ b/examples/parameters_flow.yaml @@ -0,0 +1,21 @@ +dag: + description: | + Setting and accessing parameters + start_at: access initial parameters + steps: + access initial parameters: + type: task + command: examples.parameters.display + next: return parameters + return parameters: + type: task + command: examples.parameters.return_parameters + next: show final parameters + show final parameters: + type: task + command: examples.parameters.display + next: success + success: + type: success + fail: + type: fail diff --git a/examples/parameters_initial.yaml b/examples/parameters_initial.yaml new file mode 100644 index 00000000..fb0a5d9e --- /dev/null +++ b/examples/parameters_initial.yaml @@ -0,0 +1,4 @@ +simple: 1 +inner: + x: 10 + y: "hello" diff --git a/examples/python-tasks-argo.py b/examples/python-tasks-argo.py new file mode 100644 index 00000000..dcb48122 --- /dev/null +++ b/examples/python-tasks-argo.py @@ -0,0 +1,38 @@ +""" +This is a simple pipeline that does 2 steps in sequence. + In this example: + 1. First step: returns a "parameter" x as a Pydantic model + 2. Second step: Consumes that parameter and prints it + + This pipeline demonstrates one way to pass small data from one step to another. + + You can run this pipeline by: python examples/python-tasks.py +""" + +from magnus import Pipeline, Task + + +def main(): + step1 = Task( + name="step1", + command="examples.functions.return_parameter", + ) # (1) + step2 = Task( + name="step2", + command="examples.functions.display_parameter", + terminate_with_success=True, + ).depends_on( + step1 + ) # (2), (3) + + pipeline = Pipeline( + start_at=step1, + steps=[step1, step2], + add_terminal_nodes=True, + ) # (4) + + pipeline.execute(configuration_file="examples/configs/argo-config.yaml") + + +if __name__ == "__main__": + main() diff --git a/examples/python-tasks.py b/examples/python-tasks.py new file mode 100644 index 00000000..d5a67311 --- /dev/null +++ b/examples/python-tasks.py @@ -0,0 +1,37 @@ +""" +This is a simple pipeline that does 2 steps in sequence. + In this example: + 1. First step: returns a "parameter" x as a Pydantic model + 2. Second step: Consumes that parameter and prints it + + This pipeline demonstrates one way to pass small data from one step to another. + + You can run this pipeline by: python examples/python-tasks.py +""" +from magnus import Pipeline, Task + + +def main(): + step1 = Task( + name="step1", + command="examples.functions.return_parameter", + ) # (1) + step2 = Task( + name="step2", + command="examples.functions.display_parameter", + terminate_with_success=True, + ).depends_on( + step1 + ) # (2), (3) + + pipeline = Pipeline( + start_at=step1, + steps=[step1, step2], + add_terminal_nodes=True, + ) # (4) + + pipeline.execute() + + +if __name__ == "__main__": + main() diff --git a/examples/python-tasks.yaml b/examples/python-tasks.yaml new file mode 100644 index 00000000..de805b3e --- /dev/null +++ b/examples/python-tasks.yaml @@ -0,0 +1,26 @@ +dag: + description: | + This is a simple pipeline that does 3 steps in sequence. + In this example: + 1. First step: returns a "parameter" x as a Pydantic model + 2. Second step: Consumes that parameter and prints it + + This pipeline demonstrates one way to pass small data from one step to another. + + You can run this pipeline by: magnus execute -f examples/python-tasks.yaml + start_at: step 1 + steps: + step 1: + type: task + command_type: python # (2) + command: examples.functions.return_parameter # (1) + next: step 2 + step 2: + type: task + command_type: python + command: examples.functions.display_parameter + next: success + success: + type: success + fail: + type: fail diff --git a/examples/retry-fail.yaml b/examples/retry-fail.yaml new file mode 100644 index 00000000..777895ef --- /dev/null +++ b/examples/retry-fail.yaml @@ -0,0 +1,41 @@ +dag: + description: | + This is a simple pipeline that demonstrates retrying failures. + + 1. Setup: We setup a data folder, we ignore if it is already present + 2. Create Content: We create a "hello.txt" and "put" the file in catalog + 3. Retrieve Content: We "get" the file "hello.txt" from the catalog and show the contents + 5. Cleanup: We remove the data folder. Note that this is stubbed to prevent accidental deletion. + + + You can run this pipeline by: + magnus execute -f examples/retry-fail.yaml -c examples/configs/fs-catalog-run_log.yaml \ + --run-id wrong-file-name + start_at: Setup + steps: + Setup: + type: task + command_type: shell + command: mkdir -p data # (1) + next: Create Content + Create Content: + type: task + command_type: shell + command: | + echo "Hello from magnus" >> data/hello.txt + next: Retrieve Content + catalog: # (2) + put: + - data/hello.txt + Retrieve Content: + type: task + command_type: shell + command: cat data/hello1.txt # (3) + catalog: + get: + - "data/hello.txt" # You can use wild cards following glob pattern + next: success + success: + type: success + fail: + type: fail diff --git a/examples/retry-fixed.yaml b/examples/retry-fixed.yaml new file mode 100644 index 00000000..5c09574d --- /dev/null +++ b/examples/retry-fixed.yaml @@ -0,0 +1,44 @@ +dag: + description: | + This is a simple pipeline that demonstrates passing data between steps. + + 1. Setup: We setup a data folder, we ignore if it is already present + 2. Create Content: We create a "hello.txt" and "put" the file in catalog + 3. Clean up to get again: We remove the data folder. Note that this is stubbed to prevent + accidental deletion of your contents. You can change type to task to make really run. + 4. Retrieve Content: We "get" the file "hello.txt" from the catalog and show the contents + 5. Cleanup: We remove the data folder. Note that this is stubbed to prevent accidental deletion. + + + You can run this pipeline by: + magnus execute -f examples/retry-fixed.yaml -c examples/configs/fs-catalog-run_log.yaml \ + --use-cached wrong-file-name + + start_at: Setup + steps: + Setup: + type: task # (1) + command_type: shell + command: mkdir -p data + next: Create Content + Create Content: + type: stub # (2) + command_type: shell + command: | + echo "Hello from magnus" >> data/hello.txt + next: Retrieve Content + catalog: + put: + - data/hello.txt + Retrieve Content: + type: task + command_type: shell + command: cat data/hello.txt + catalog: + get: + - "data/hello.txt" # You can use wild cards following glob pattern + next: success + success: + type: success + fail: + type: fail diff --git a/examples/secrets.env b/examples/secrets.env new file mode 100644 index 00000000..62cbd7c5 --- /dev/null +++ b/examples/secrets.env @@ -0,0 +1,2 @@ +export shell_type="shell type secret" # (1) +kv_style=value # A key value secret type. # (2) diff --git a/examples/secrets.py b/examples/secrets.py new file mode 100644 index 00000000..9fa7fa9d --- /dev/null +++ b/examples/secrets.py @@ -0,0 +1,34 @@ +""" +An example pipeline to demonstrate how to use the secrets manager. + +You can run this pipeline by: + python run examples/secrets.py +""" + +from magnus import get_secret + + +def show_secret(): + shell_variable = get_secret("shell_type") # (1) + key_value_type = get_secret("kv_style") + + assert shell_variable == "shell type secret" + assert key_value_type == "value" + + +def main(): + from magnus import Pipeline, Task + + show = Task( + name="show secret", + command="examples.secrets.show_secret", + terminate_with_success=True, + ) + + pipeline = Pipeline(steps=[show], start_at=show, add_terminal_nodes=True) + + pipeline.execute(configuration_file="examples/configs/dotenv.yaml") + + +if __name__ == "__main__": + main() diff --git a/examples/secrets_env.py b/examples/secrets_env.py new file mode 100644 index 00000000..9852aaae --- /dev/null +++ b/examples/secrets_env.py @@ -0,0 +1,35 @@ +""" +An example pipeline to demonstrate how to use the secrets manager. + +Run this pipeline by: + secret="secret_value" MAGNUS_CONFIGURATION_FILE=examples/configs/secrets-env-default.yaml \ + python examples/secrets_env.py + +""" + + +from magnus import get_secret + + +def show_secret(): + secret = get_secret("secret") + + assert secret == "secret_value" + + +def main(): + from magnus import Pipeline, Task + + show = Task( + name="show secret", + command="examples.secrets_env.show_secret", + terminate_with_success=True, + ) + + pipeline = Pipeline(steps=[show], start_at=show, add_terminal_nodes=True) + + pipeline.execute() + + +if __name__ == "__main__": + main() diff --git a/magnus/__init__.py b/magnus/__init__.py index 29bdb406..98c1aee8 100644 --- a/magnus/__init__.py +++ b/magnus/__init__.py @@ -1,10 +1,13 @@ # ruff: noqa +# TODO: Might need to add Rich to pyinstaller part import logging +from logging.config import dictConfig -from yachalk import chalk +from magnus import defaults -chalk_colors = {"debug": chalk.grey, "info": chalk.green, "warning": chalk.yellow_bright, "error": chalk.bold.red} +dictConfig(defaults.LOGGING_CONFIG) +logger = logging.getLogger(defaults.LOGGER_NAME) from magnus.interaction import ( end_interactive_session, @@ -13,30 +16,19 @@ get_object, get_parameter, get_run_id, + get_run_log, get_secret, put_in_catalog, put_object, start_interactive_session, - store_parameter, + set_parameter, track_this, ) # noqa -from magnus.sdk import AsIs, Pipeline, Task # noqa +from magnus.sdk import Stub, Pipeline, Task, Parallel, Map, Catalog, Success, Fail # noqa -class ColorFormatter(logging.Formatter): - """ - Custom class to get colors to logs - """ +# TODO: Think of model registry as a central place to store models. +# TODO: Implement Sagemaker pipelines as a executor. - def __init__(self, *args, **kwargs): - # can't do super(...) here because Formatter is an old school class - logging.Formatter.__init__(self, *args, **kwargs) # pragma: no cover - def format(self, record): # pragma: no cover - levelname = record.levelname - color = chalk_colors[levelname.lower()] - message = logging.Formatter.format(self, record) - return color(message) - - -logging.ColorFormatter = ColorFormatter # type: ignore +# TODO: Think of way of generating dag hash without executor configuration diff --git a/magnus/catalog.py b/magnus/catalog.py index c0b11d51..3daf3e9a 100644 --- a/magnus/catalog.py +++ b/magnus/catalog.py @@ -1,83 +1,40 @@ import logging -import os -import shutil -from pathlib import Path -from typing import List +from abc import ABC, abstractmethod +from typing import List, Optional -from pydantic import BaseModel +from pydantic import BaseModel, ConfigDict -from magnus import defaults, utils +import magnus.context as context +from magnus import defaults +from magnus.datastore import DataCatalog -logger = logging.getLogger(defaults.NAME) - - -def get_run_log_store(): - """ - This method should be called after the executor module has been populated with all the systems. - - This method retrieves the run log store from the global executor. - - Returns: - object: The run log store - """ - from magnus import context - - return context.executor.run_log_store - - -def is_catalog_out_of_sync(catalog, synced_catalogs=None) -> bool: - """ - Check if the catalog items are out of sync from already cataloged objects. - If they are, return False. - If the object does not exist or synced catalog does not exist, return True - """ - if not synced_catalogs: - return True # If nothing has been synced in the past - - for synced_catalog in synced_catalogs: - if synced_catalog.catalog_relative_path == catalog.catalog_relative_path: - if synced_catalog.data_hash == catalog.data_hash: - return False - return True - - return True # The object does not exist, sync it +logger = logging.getLogger(defaults.LOGGER_NAME) # --8<-- [start:docs] -class BaseCatalog: +class BaseCatalog(ABC, BaseModel): """ Base Catalog class definition. All implementations of the catalog handler should inherit and extend this class. - - Note: As a general guideline, do not extract anything from the config to set class level attributes. - Integration patterns modify the config after init to change behaviors. - Access config properties using getters/property of the class. """ - service_name = "" + service_name: str = "" + service_type: str = "catalog" + model_config = ConfigDict(extra="forbid") - class Config(BaseModel): - compute_data_folder: str = defaults.COMPUTE_DATA_FOLDER - - def __init__(self, config: dict, **kwargs): # pylint: disable=unused-argument - config = config or {} - self.config = self.Config(**config) + @property + def _context(self): + return context.run_context @property def compute_data_folder(self) -> str: - """ - Returns the compute data folder defined as per the config of the catalog. + return defaults.COMPUTE_DATA_FOLDER - Returns: - [str]: The compute data folder as defined or defaults to magnus default 'data/' - """ - return self.config.compute_data_folder - - def get(self, name: str, run_id: str, compute_data_folder=None, **kwargs) -> List[object]: - # pylint: disable=unused-argument + @abstractmethod + def get(self, name: str, run_id: str, compute_data_folder: str = "", **kwargs) -> List[DataCatalog]: """ Get the catalog item by 'name' for the 'run id' and store it in compute data folder. @@ -96,15 +53,15 @@ def get(self, name: str, run_id: str, compute_data_folder=None, **kwargs) -> Lis """ raise NotImplementedError + @abstractmethod def put( self, name: str, run_id: str, - compute_data_folder=None, - synced_catalogs=None, + compute_data_folder: str = "", + synced_catalogs: Optional[List[DataCatalog]] = None, **kwargs, - ) -> List[object]: - # pylint: disable=unused-argument + ) -> List[DataCatalog]: """ Put the file by 'name' from the 'compute_data_folder' in the catalog for the run_id. @@ -124,6 +81,7 @@ def put( """ raise NotImplementedError + @abstractmethod def sync_between_runs(self, previous_run_id: str, run_id: str): """ Given run_id of a previous run, sync them to the catalog of the run given by run_id @@ -152,9 +110,9 @@ class DoNothingCatalog(BaseCatalog): """ - service_name = "do-nothing" + service_name: str = "do-nothing" - def get(self, name: str, run_id: str, compute_data_folder=None, **kwargs) -> List[object]: + def get(self, name: str, run_id: str, compute_data_folder: str = "", **kwargs) -> List[DataCatalog]: """ Does nothing """ @@ -165,10 +123,10 @@ def put( self, name: str, run_id: str, - compute_data_folder=None, - synced_catalogs=None, + compute_data_folder: str = "", + synced_catalogs: Optional[List[DataCatalog]] = None, **kwargs, - ) -> List[object]: + ) -> List[DataCatalog]: """ Does nothing """ @@ -181,211 +139,3 @@ def sync_between_runs(self, previous_run_id: str, run_id: str): """ logger.info("Using a do-nothing catalog, doing nothing while sync between runs") ... - - -class FileSystemCatalog(BaseCatalog): - """ - A Catalog handler that uses the local file system for cataloging. - - Note: Do not use this if the steps of the pipeline run on different compute environments. - - Example config: - - catalog: - type: file-system - config: - catalog_location: The location to store the catalog. - compute_data_folder: The folder to source the data from. - - """ - - service_name = "file-system" - - class Config(BaseCatalog.Config): - catalog_location: str = defaults.CATALOG_LOCATION_FOLDER - - @property - def catalog_location(self) -> str: - """ - Get the catalog location from the config. - If its not defined, use the magnus default - - Returns: - str: The catalog location as defined by the config or magnus default '.catalog' - """ - return self.config.catalog_location # type: ignore - - def get(self, name: str, run_id: str, compute_data_folder=None, **kwargs) -> List[object]: - """ - Get the file by matching glob pattern to the name - - Args: - name ([str]): A glob matching the file name - run_id ([str]): The run id - - Raises: - Exception: If the catalog location does not exist - - Returns: - List(object) : A list of catalog objects - """ - logger.info(f"Using the {self.service_name} catalog and trying to get {name} for run_id: {run_id}") - - copy_to = self.compute_data_folder - if compute_data_folder: - copy_to = compute_data_folder - - copy_to = Path(copy_to) # type: ignore - - catalog_location = self.catalog_location - run_catalog = Path(catalog_location) / run_id / copy_to - - logger.debug(f"Copying objects to {copy_to} from the run catalog location of {run_catalog}") - - if not utils.does_dir_exist(run_catalog): - msg = ( - f"Expected Catalog to be present at: {run_catalog} but not found.\n" - "Note: Please make sure that some data was put in the catalog before trying to get from it.\n" - ) - raise Exception(msg) - - # Iterate through the contents of the run_catalog and copy the files that fit the name pattern - # We should also return a list of data hashes - glob_files = run_catalog.glob(name) - logger.debug(f"Glob identified {glob_files} as matches to from the catalog location: {run_catalog}") - - data_catalogs = [] - run_log_store = get_run_log_store() - for file in glob_files: - if file.is_dir(): - # Need not add a data catalog for the folder - continue - - relative_file_path = file.relative_to(run_catalog) - - data_catalog = run_log_store.create_data_catalog(str(relative_file_path)) - data_catalog.catalog_handler_location = catalog_location - data_catalog.catalog_relative_path = str(relative_file_path) - data_catalog.data_hash = utils.get_data_hash(str(file)) - data_catalog.stage = "get" - data_catalogs.append(data_catalog) - - # Make the directory in the data folder if required - Path(copy_to / relative_file_path.parent).mkdir(parents=True, exist_ok=True) - shutil.copy(file, copy_to / relative_file_path) - - logger.info(f"Copied {file} from {run_catalog} to {copy_to}") - - return data_catalogs - - def put( - self, - name: str, - run_id: str, - compute_data_folder=None, - synced_catalogs=None, - **kwargs, - ) -> List[object]: - """ - Put the files matching the glob pattern into the catalog. - - If previously synced catalogs are provided, and no changes were observed, we do not sync them. - - Args: - name (str): The glob pattern of the files to catalog - run_id (str): The run id of the run - compute_data_folder (str, optional): The compute data folder to sync from. Defaults to settings default. - synced_catalogs (dict, optional): dictionary of previously synced catalogs. Defaults to None. - - Raises: - Exception: If the compute data folder does not exist. - - Returns: - List(object) : A list of catalog objects - """ - logger.info(f"Using the {self.service_name} catalog and trying to put {name} for run_id: {run_id}") - - copy_from = self.compute_data_folder - if compute_data_folder: - copy_from = compute_data_folder - copy_from = Path(copy_from) # type: ignore - - catalog_location = self.catalog_location - run_catalog = Path(catalog_location) / run_id - utils.safe_make_dir(run_catalog) - - logger.debug(f"Copying objects from {copy_from} to the run catalog location of {run_catalog}") - - if not utils.does_dir_exist(copy_from): - msg = ( - f"Expected compute data folder to be present at: {compute_data_folder} but not found. \n" - "Note: Magnus does not create the compute data folder for you. Please ensure that the folder exists.\n" - ) - raise Exception(msg) - - # Iterate through the contents of copy_from and if the name matches, we move them to the run_catalog - # We should also return a list of datastore.DataCatalog items - - glob_files = copy_from.glob(name) # type: ignore - logger.debug(f"Glob identified {glob_files} as matches to from the compute data folder: {copy_from}") - - data_catalogs = [] - run_log_store = get_run_log_store() - for file in glob_files: - if file.is_dir(): - # Need not add a data catalog for the folder - continue - - relative_file_path = file.relative_to(".") - - data_catalog = run_log_store.create_data_catalog(str(relative_file_path)) - data_catalog.catalog_handler_location = catalog_location - data_catalog.catalog_relative_path = run_id + os.sep + str(relative_file_path) - data_catalog.data_hash = utils.get_data_hash(str(file)) - data_catalog.stage = "put" - data_catalogs.append(data_catalog) - - if is_catalog_out_of_sync(data_catalog, synced_catalogs): - logger.info(f"{data_catalog.name} was found to be changed, syncing") - - # Make the directory in the catalog if required - Path(run_catalog / relative_file_path.parent).mkdir(parents=True, exist_ok=True) - shutil.copy(file, run_catalog / relative_file_path) - else: - logger.info(f"{data_catalog.name} was found to be unchanged, ignoring syncing") - return data_catalogs - - def sync_between_runs(self, previous_run_id: str, run_id: str): - """ - Given the previous run id, sync the catalogs between the current one and previous - - Args: - previous_run_id (str): The previous run id to sync the catalogs from - run_id (str): The run_id to which the data catalogs should be synced to. - - Raises: - Exception: If the previous run log does not exist in the catalog - - """ - logger.info( - f"Using the {self.service_name} catalog and syncing catalogs" - "between old: {previous_run_id} to new: {run_id}" - ) - - catalog_location = Path(self.catalog_location) - run_catalog = catalog_location / run_id - utils.safe_make_dir(run_catalog) - - if not utils.does_dir_exist(catalog_location / previous_run_id): - msg = ( - f"Catalogs from previous run : {previous_run_id} are not found.\n" - "Note: Please provision the catalog objects generated by previous run in the same catalog location" - " as the current run, even if the catalog handler for the previous run was different" - ) - raise Exception(msg) - - cataloged_files = (catalog_location / previous_run_id).glob("**/**") - - for cataloged_file in cataloged_files: - shutil.copy(cataloged_file, run_catalog) - logger.info(f"Copied file from: {cataloged_file} to {run_catalog}") diff --git a/magnus/cli.py b/magnus/cli.py index 31cd1246..be04f932 100644 --- a/magnus/cli.py +++ b/magnus/cli.py @@ -1,14 +1,12 @@ import logging -from logging.config import fileConfig import click from click_plugins import with_plugins -from pkg_resources import iter_entry_points, resource_filename +from pkg_resources import iter_entry_points -from magnus import defaults, docker_utils, pipeline +from magnus import defaults, entrypoints -fileConfig(resource_filename(__name__, "log_config.ini")) -logger = logging.getLogger(defaults.NAME) +logger = logging.getLogger(defaults.LOGGER_NAME) @with_plugins(iter_entry_points("magnus.cli_plugins")) @@ -41,19 +39,30 @@ def cli(): show_default=True, type=click.Choice(["INFO", "DEBUG", "WARNING", "ERROR", "FATAL"]), ) -@click.option("--tag", help="A tag attached to the run") +@click.option("--tag", default="", help="A tag attached to the run") @click.option("--run-id", help="An optional run_id, one would be generated if not provided") @click.option("--use-cached", help="Provide the previous run_id to re-run.", show_default=True) def execute(file, config_file, parameters_file, log_level, tag, run_id, use_cached): # pragma: no cover """ - External entry point to executing a pipeline. This command is most commonly used - either to execute a pipeline or to translate the pipeline definition to another language. - - You can re-run an older run by providing the run_id of the older run in --use-cached. - Ensure that the catalogs and run logs are accessible by the present configuration. + Execute a pipeline + + Usage: magnus execute [OPTIONS] + + Options: + -f, --file TEXT The pipeline definition file [default: pipeline.yaml] + -c, --config-file TEXT config file, in yaml, to be used for the run [default: None] + -p, --parameters-file TEXT Parameters, in yaml, accessible by the application [default: None] + --log-level One of [INFO|DEBUG|WARNING|ERROR|FATAL] + The log level + [default: INFO] + --tag TEXT A tag attached to the run + [default: ] + --run-id TEXT An optional run_id, one would be generated if not + provided + --use-cached TEXT Provide the previous run_id to re-run. """ logger.setLevel(log_level) - pipeline.execute( + entrypoints.execute( configuration_file=config_file, pipeline_file=file, tag=tag, @@ -63,53 +72,6 @@ def execute(file, config_file, parameters_file, log_level, tag, run_id, use_cach ) -@cli.command("execute_step", short_help="Execute a single step of the pipeline") -@click.argument("step_name") -@click.option("-f", "--file", default="pipeline.yaml", help="The pipeline definition file", show_default=True) -@click.option( - "-c", "--config-file", default=None, help="config file, in yaml, to be used for the run", show_default=True -) -@click.option( - "-p", - "--parameters-file", - default=None, - help="Parameters, in yaml, accessible by the application", - show_default=True, -) -@click.option( - "--log-level", - default=defaults.LOG_LEVEL, - help="The log level", - show_default=True, - type=click.Choice(["INFO", "DEBUG", "WARNING", "ERROR", "FATAL"]), -) -@click.option("--tag", help="A tag attached to the run") -@click.option("--run-id", help="An optional run_id, one would be generated if not provided") -@click.option("--use-cached", help="Provide the previous run_id to re-run.", show_default=True) -def execute_step(step_name, file, config_file, parameters_file, log_level, tag, run_id, use_cached): # pragma: no cover - """ - External entry point to executing a single step of the pipeline. - - This command is helpful to run only one step of the pipeline in isolation. - Only the steps of the parent dag could be invoked using this method. - - You can re-run an older run by providing the run_id of the older run in --use-cached. - Ensure that the catalogs and run logs are accessible by the present configuration. - - When running map states, ensure that the parameter to iterate on is available in parameter space. - """ - logger.setLevel(log_level) - pipeline.execute_single_step( - configuration_file=config_file, - pipeline_file=file, - step_name=step_name, - tag=tag, - run_id=run_id, - parameters_file=parameters_file, - use_cached=use_cached, - ) - - @cli.command("execute_single_node", short_help="Internal entry point to execute a single node", hidden=True) @click.argument("run_id") @click.argument("step_name") @@ -143,7 +105,7 @@ def execute_single_node(run_id, step_name, map_variable, file, config_file, para logger.setLevel(log_level) # Execute the node as part of the graph execution. - pipeline.execute_single_node( + entrypoints.execute_single_node( configuration_file=config_file, pipeline_file=file, step_name=step_name, @@ -154,39 +116,9 @@ def execute_single_node(run_id, step_name, map_variable, file, config_file, para ) -@cli.command("execute_single_branch", short_help="Internal entry point to execute a single branch", hidden=True) -@click.argument("run_id") -@click.argument("branch_name") -@click.option("--map-variable", default="", help="The map variable dictionary in str", show_default=True) -@click.option("-f", "--file", default="pipeline.yaml", help="The pipeline definition file", show_default=True) -@click.option( - "-c", "--config-file", default=None, help="config file, in yaml, to be used for the run", show_default=True -) -@click.option( - "--log-level", - default=defaults.LOG_LEVEL, - help="The log level", - show_default=True, - type=click.Choice(["INFO", "DEBUG", "WARNING", "ERROR", "FATAL"]), -) -def execute_single_branch(run_id, branch_name, map_variable, file, config_file, log_level): - """ - Internal entrypoint for magnus to execute a single branch. - Currently it is only being used by local during parallel executions. - """ - logger.setLevel(log_level) - - pipeline.execute_single_brach( - configuration_file=config_file, - pipeline_file=file, - branch_name=branch_name, - map_variable=map_variable, - run_id=run_id, - ) - - @cli.command("execute_notebook", short_help="Entry point to execute a notebook") @click.argument("filename") +@click.option("--entrypoint", default=defaults.ENTRYPOINT.USER.value, hidden=True) @click.option( "-c", "--config-file", default=None, help="config file, in yaml, to be used for the run", show_default=True ) @@ -206,9 +138,21 @@ def execute_single_branch(run_id, branch_name, map_variable, file, config_file, ) @click.option("--data-folder", "-d", default="data/", help="The catalog data folder") @click.option("--put-in-catalog", "-put", default=None, multiple=True, help="The data to put from the catalog") +@click.option("--notebook-output-path", default="", help="The output path for the notebook") @click.option("--tag", help="A tag attached to the run") @click.option("--run-id", help="An optional run_id, one would be generated if not provided") -def execute_notebook(filename, config_file, parameters_file, log_level, data_folder, put_in_catalog, tag, run_id): +def execute_notebook( + filename, + entrypoint, + config_file, + parameters_file, + log_level, + data_folder, + put_in_catalog, + notebook_output_path, + tag, + run_id, +): """ External entry point to execute a Jupyter notebook in isolation. @@ -220,11 +164,13 @@ def execute_notebook(filename, config_file, parameters_file, log_level, data_fol if not filename.endswith(".ipynb"): raise Exception("A notebook should always have ipynb as the extension") - pipeline.execute_notebook( + entrypoints.execute_notebook( + entrypoint=entrypoint, notebook_file=filename, catalog_config=catalog_config, configuration_file=config_file, parameters_file=parameters_file, + notebook_output_path=notebook_output_path, tag=tag, run_id=run_id, ) @@ -232,6 +178,7 @@ def execute_notebook(filename, config_file, parameters_file, log_level, data_fol @cli.command("execute_function", short_help="Entry point to execute a python function") @click.argument("command") +@click.option("--entrypoint", default=defaults.ENTRYPOINT.USER.value, hidden=True) @click.option( "-c", "--config-file", default=None, help="config file, in yaml, to be used for the run", show_default=True ) @@ -253,7 +200,9 @@ def execute_notebook(filename, config_file, parameters_file, log_level, data_fol @click.option("--put-in-catalog", "-put", default=None, multiple=True, help="The data to put from the catalog") @click.option("--tag", help="A tag attached to the run") @click.option("--run-id", help="An optional run_id, one would be generated if not provided") -def execute_function(command, config_file, parameters_file, log_level, data_folder, put_in_catalog, tag, run_id): +def execute_function( + command, entrypoint, config_file, parameters_file, log_level, data_folder, put_in_catalog, tag, run_id +): """ External entry point to execute a python function in isolation. @@ -262,7 +211,8 @@ def execute_function(command, config_file, parameters_file, log_level, data_fold """ logger.setLevel(log_level) catalog_config = {"compute_data_folder": data_folder, "put": list(put_in_catalog) if put_in_catalog else None} - pipeline.execute_function( + entrypoints.execute_function( + entrypoint=entrypoint, command=command, catalog_config=catalog_config, configuration_file=config_file, @@ -272,49 +222,6 @@ def execute_function(command, config_file, parameters_file, log_level, data_fold ) -@cli.command("execute_nb_or_func", short_help="Entry point to execute a notebook or function") -@click.argument("run_id") -@click.argument("nb_or_func") -@click.option( - "-c", "--config-file", default=None, help="config file, in yaml, to be used for the run", show_default=True -) -@click.option( - "-p", - "--parameters-file", - default=None, - help="Parameters, in yaml, accessible by the application", - show_default=True, -) -@click.option( - "--log-level", - default=defaults.LOG_LEVEL, - help="The log level", - show_default=True, - type=click.Choice(["INFO", "DEBUG", "WARNING", "ERROR", "FATAL"]), -) -@click.option("--data-folder", "-d", default="data/", help="The catalog data folder") -@click.option("--put-in-catalog", "-put", default=None, multiple=True, help="The data to put from the catalog") -@click.option("--tag", help="A tag attached to the run") -def execute_nb_or_function( - run_id, nb_or_func, config_file, parameters_file, log_level, data_folder, put_in_catalog, tag -): - """ - Internal entry point to execute a notebook or function. - Executors other than local should use this entry point to execute the notebook or function in the requested - environment. - """ - logger.setLevel(log_level) - catalog_config = {"compute_data_folder": data_folder, "put": list(put_in_catalog) if put_in_catalog else None} - pipeline.execute_nb_or_func( - command=nb_or_func, - catalog_config=catalog_config, - configuration_file=config_file, - parameters_file=parameters_file, - tag=tag, - run_id=run_id, - ) - - @cli.command("fan", short_help="Internal entry point to fan in or out a composite node", hidden=True) @click.argument("run_id") @click.argument("step_name") @@ -348,7 +255,7 @@ def fan(run_id, step_name, mode, map_variable, file, config_file, parameters_fil logger.setLevel(log_level) # Fan in or out - pipeline.fan( + entrypoints.fan( configuration_file=config_file, pipeline_file=file, step_name=step_name, @@ -360,48 +267,6 @@ def fan(run_id, step_name, mode, map_variable, file, config_file, parameters_fil ) -@cli.command("build_docker", short_help="Utility tool to build docker images") -@click.argument("image_name") -@click.option("-f", "--docker-file", default=None, help="The dockerfile to be used. If None, we generate one") -@click.option("-s", "--style", default="poetry", help="The method used to get requirements", show_default=True) -@click.option("-t", "--tag", default="latest", help="The tag assigned to the image", show_default=True) -@click.option( - "-c", - "--commit-tag", - is_flag=True, - default=False, - help="Use commit id as tag. Over-rides tag option", - show_default=True, -) -@click.option( - "-d", "--dry-run", is_flag=True, default=False, help="Generate the dockerfile, but NOT the image", show_default=True -) -@click.option( - "--git-tracked/--all", - default=True, - help="Controls what should be added to image. All vs git-tracked", - show_default=True, -) -def build_docker(image_name, docker_file, style, tag, commit_tag, dry_run, git_tracked): - """ - A utility function to create docker images from the existing codebase. - - It is advised to provide your own dockerfile as much as possible. If you do not have one handy, you can use - --dry-run functionality to see if the auto-generated one suits your needs. - - If you are auto-generating the dockerfile: - BEWARE!! Over-riding the default options assumes you know what you are doing! BEWARE!! - - 1). By default, only git tracked files are added to the docker image. - - 2). The auto-generated dockerfile uses, python 3.8 as the default image and adds the current folder. - """ - docker_utils.build_docker( - image_name=image_name, - docker_file=docker_file, - style=style, - tag=tag, - commit_tag=commit_tag, - dry_run=dry_run, - git_tracked=git_tracked, - ) +# Needed for the binary creation +if __name__ == "__main__": + cli() diff --git a/magnus/context.py b/magnus/context.py index e0f61c93..ab17bd54 100644 --- a/magnus/context.py +++ b/magnus/context.py @@ -1,5 +1,34 @@ -from typing import Optional +from typing import Dict, Optional +from pydantic import BaseModel, SerializeAsAny + +from magnus.catalog import BaseCatalog +from magnus.datastore import BaseRunLogStore from magnus.executor import BaseExecutor +from magnus.experiment_tracker import BaseExperimentTracker +from magnus.graph import Graph +from magnus.secrets import BaseSecrets + + +class Context(BaseModel): + executor: SerializeAsAny[BaseExecutor] + run_log_store: SerializeAsAny[BaseRunLogStore] + secrets_handler: SerializeAsAny[BaseSecrets] + catalog_handler: SerializeAsAny[BaseCatalog] + experiment_tracker: SerializeAsAny[BaseExperimentTracker] + + pipeline_file: Optional[str] = "" + parameters_file: Optional[str] = "" + configuration_file: Optional[str] = "" + + tag: str = "" + run_id: str = "" + variables: Dict[str, str] = {} + use_cached: bool = False + original_run_id: str = "" + dag: Optional[Graph] = None + dag_hash: str = "" + execution_plan: str = "" + -executor: Optional[BaseExecutor] = None +run_context = None # type: Context # type: ignore diff --git a/magnus/datastore.py b/magnus/datastore.py index efd11b61..c2ae4252 100644 --- a/magnus/datastore.py +++ b/magnus/datastore.py @@ -1,24 +1,21 @@ from __future__ import annotations -import json import logging -import time -from enum import Enum -from pathlib import Path -from string import Template +from abc import ABC, abstractmethod from typing import Any, Dict, List, Optional, OrderedDict, Tuple, Union -from pydantic import BaseModel, Extra +from pydantic import BaseModel, Field -from magnus import defaults, exceptions, utils +import magnus.context as context +from magnus import defaults, exceptions -logger = logging.getLogger(defaults.NAME) +logger = logging.getLogger(defaults.LOGGER_NAME) # Once defined these classes are sealed to any additions unless a default is provided # Breaking this rule might make magnus backwardly incompatible -class DataCatalog(BaseModel, extra=Extra.allow): # type: ignore +class DataCatalog(BaseModel, extra="allow"): """ The captured attributes of a catalog item. """ @@ -56,10 +53,10 @@ class StepAttempt(BaseModel): duration: str = "" #  end_time - start_time status: str = "FAIL" message: str = "" - parameters: dict = {} + parameters: Dict[str, Any] = Field(default_factory=dict) -class CodeIdentity(BaseModel, extra=Extra.allow): # type: ignore +class CodeIdentity(BaseModel, extra="allow"): """ The captured attributes of a code identity of a step. """ @@ -82,11 +79,11 @@ class StepLog(BaseModel): step_type: str = "task" message: str = "" mock: bool = False - code_identities: List[CodeIdentity] = [] - attempts: List[StepAttempt] = [] - user_defined_metrics: dict = {} - branches: Dict[str, BranchLog] = {} # Keyed in by the branch key name - data_catalog: List[DataCatalog] = [] + code_identities: List[CodeIdentity] = Field(default_factory=list) + attempts: List[StepAttempt] = Field(default_factory=list) + user_defined_metrics: Dict[str, Any] = Field(default_factory=dict) + branches: Dict[str, BranchLog] = Field(default_factory=dict) + data_catalog: List[DataCatalog] = Field(default_factory=list) def get_data_catalogs_by_stage(self, stage="put") -> List[DataCatalog]: """ @@ -118,6 +115,7 @@ def add_data_catalogs(self, data_catalogs: List[DataCatalog]): Args: dict_catalogs ([DataCatalog]): A list of data catalog items """ + if not self.data_catalog: self.data_catalog = [] for data_catalog in data_catalogs: @@ -134,7 +132,7 @@ class BranchLog(BaseModel): internal_name: str status: str = "FAIL" - steps: OrderedDict[str, StepLog] = {} # type: ignore # StepLogs keyed by internal name + steps: OrderedDict[str, StepLog] = Field(default_factory=OrderedDict) def get_data_catalogs_by_stage(self, stage="put") -> List[DataCatalog]: """ @@ -160,7 +158,7 @@ def get_data_catalogs_by_stage(self, stage="put") -> List[DataCatalog]: # Needed for BranchLog of StepLog to be referenced -StepLog.update_forward_refs() +StepLog.model_rebuild() class RunLog(BaseModel): @@ -174,9 +172,9 @@ class RunLog(BaseModel): tag: Optional[str] = "" original_run_id: Optional[str] = "" status: str = defaults.FAIL - steps: OrderedDict[str, StepLog] = {} # type: ignore # Has the steps keyed by internal_name - parameters: dict = {} - run_config: dict = {} + steps: OrderedDict[str, StepLog] = Field(default_factory=OrderedDict) + parameters: Dict[str, Any] = Field(default_factory=dict) + run_config: Dict[str, Any] = Field(default_factory=dict) def get_data_catalogs_by_stage(self, stage: str = "put") -> List[DataCatalog]: """ @@ -288,25 +286,19 @@ def search_step_by_internal_name(self, i_name: str) -> Tuple[StepLog, Union[Bran # If you want to customize dataclass, extend BaseRunLogStore and implement the methods as per the specification -# --8<-- [start:docs] -class BaseRunLogStore: +class BaseRunLogStore(ABC, BaseModel): """ The base class of a Run Log Store with many common methods implemented. - - Note: As a general guideline, do not extract anything from the config to set class level attributes. - Integration patterns modify the config after init to change behaviors. - Access config properties using getters/property of the class. """ - service_name = "" + service_name: str = "" + service_type: str = "run_log_store" - class Config(BaseModel): - pass - - def __init__(self, config): - config = config or {} - self.config = self.Config(**config) + @property + def _context(self): + return context.run_context + @abstractmethod def create_run_log( self, run_id: str, @@ -330,6 +322,7 @@ def create_run_log( raise NotImplementedError + @abstractmethod def get_run_log_by_id(self, run_id: str, full: bool = False, **kwargs) -> RunLog: """ Retrieves a Run log from the database using the config and the run_id @@ -351,6 +344,7 @@ def get_run_log_by_id(self, run_id: str, full: bool = False, **kwargs) -> RunLog raise NotImplementedError + @abstractmethod def put_run_log(self, run_log: RunLog, **kwargs): """ Puts the Run Log in the database as defined by the config @@ -379,7 +373,7 @@ def update_run_log_status(self, run_id: str, status: str): run_log.status = status self.put_run_log(run_log) - def get_parameters(self, run_id: str, **kwargs) -> dict: # pylint: disable=unused-argument + def get_parameters(self, run_id: str, **kwargs) -> dict: """ Get the parameters from the Run log defined by the run_id @@ -398,7 +392,7 @@ def get_parameters(self, run_id: str, **kwargs) -> dict: # pylint: disable=unus run_log = self.get_run_log_by_id(run_id=run_id) return run_log.parameters - def set_parameters(self, run_id: str, parameters: dict, **kwargs): # pylint: disable=unused-argument + def set_parameters(self, run_id: str, parameters: dict, **kwargs): """ Update the parameters of the Run log with the new parameters @@ -419,7 +413,7 @@ def set_parameters(self, run_id: str, parameters: dict, **kwargs): # pylint: di run_log.parameters.update(parameters) self.put_run_log(run_log=run_log) - def get_run_config(self, run_id: str, **kwargs) -> dict: # pylint: disable=unused-argument + def get_run_config(self, run_id: str, **kwargs) -> dict: """ Given a run_id, return the run_config used to perform the run. @@ -433,7 +427,7 @@ def get_run_config(self, run_id: str, **kwargs) -> dict: # pylint: disable=unus run_log = self.get_run_log_by_id(run_id=run_id) return run_log.run_config - def set_run_config(self, run_id: str, run_config: dict, **kwargs): # pylint: disable=unused-argument + def set_run_config(self, run_id: str, run_config: dict, **kwargs): """Set the run config used to run the run_id Args: @@ -445,7 +439,7 @@ def set_run_config(self, run_id: str, run_config: dict, **kwargs): # pylint: di run_log.run_config.update(run_config) self.put_run_log(run_log=run_log) - def create_step_log(self, name: str, internal_name: str, **kwargs): # pylint: disable=unused-argument + def create_step_log(self, name: str, internal_name: str, **kwargs): """ Create a step log by the name and internal name @@ -463,7 +457,7 @@ def create_step_log(self, name: str, internal_name: str, **kwargs): # pylint: d logger.info(f"{self.service_name} Creating a Step Log: {internal_name}") return StepLog(name=name, internal_name=internal_name, status=defaults.CREATED) - def get_step_log(self, internal_name: str, run_id: str, **kwargs) -> StepLog: # pylint: disable=unused-argument + def get_step_log(self, internal_name: str, run_id: str, **kwargs) -> StepLog: """ Get a step log from the datastore for run_id and the internal naming of the step log @@ -490,7 +484,7 @@ def get_step_log(self, internal_name: str, run_id: str, **kwargs) -> StepLog: # step_log, _ = run_log.search_step_by_internal_name(internal_name) return step_log - def add_step_log(self, step_log: StepLog, run_id: str, **kwargs): # pylint: disable=unused-argument + def add_step_log(self, step_log: StepLog, run_id: str, **kwargs): """ Add the step log in the run log as identified by the run_id in the datastore @@ -520,7 +514,7 @@ def add_step_log(self, step_log: StepLog, run_id: str, **kwargs): # pylint: dis branch.steps[step_log.internal_name] = step_log self.put_run_log(run_log=run_log) - def create_branch_log(self, internal_branch_name: str, **kwargs) -> BranchLog: # pylint: disable=unused-argument + def create_branch_log(self, internal_branch_name: str, **kwargs) -> BranchLog: """ Creates a uncommitted branch log object by the internal name given @@ -534,9 +528,7 @@ def create_branch_log(self, internal_branch_name: str, **kwargs) -> BranchLog: logger.info(f"{self.service_name} Creating a Branch Log : {internal_branch_name}") return BranchLog(internal_name=internal_branch_name, status=defaults.CREATED) - def get_branch_log( - self, internal_branch_name: str, run_id: str, **kwargs - ) -> Union[BranchLog, RunLog]: # pylint: disable=unused-argument + def get_branch_log(self, internal_branch_name: str, run_id: str, **kwargs) -> Union[BranchLog, RunLog]: """ Returns the branch log by the internal branch name for the run id @@ -555,9 +547,7 @@ def get_branch_log( branch, _ = run_log.search_branch_by_internal_name(internal_branch_name) return branch - def add_branch_log( - self, branch_log: Union[BranchLog, RunLog], run_id: str, **kwargs - ): # pylint: disable=unused-argument + def add_branch_log(self, branch_log: Union[BranchLog, RunLog], run_id: str, **kwargs): """ The method should: # Get the run log @@ -589,7 +579,7 @@ def add_branch_log( step.branches[internal_branch_name] = branch_log # type: ignore self.put_run_log(run_log) - def create_attempt_log(self, **kwargs) -> StepAttempt: # pylint: disable=unused-argument + def create_attempt_log(self, **kwargs) -> StepAttempt: """ Returns an uncommitted step attempt log. @@ -599,7 +589,7 @@ def create_attempt_log(self, **kwargs) -> StepAttempt: # pylint: disable=unused logger.info(f"{self.service_name} Creating an attempt log") return StepAttempt() - def create_code_identity(self, **kwargs) -> CodeIdentity: # pylint: disable=unused-argument + def create_code_identity(self, **kwargs) -> CodeIdentity: """ Creates an uncommitted Code identity class @@ -609,7 +599,7 @@ def create_code_identity(self, **kwargs) -> CodeIdentity: # pylint: disable=unu logger.info(f"{self.service_name} Creating Code identity") return CodeIdentity() - def create_data_catalog(self, name: str, **kwargs) -> DataCatalog: # pylint: disable=unused-argument + def create_data_catalog(self, name: str, **kwargs) -> DataCatalog: """ Create a uncommitted data catalog object @@ -623,9 +613,6 @@ def create_data_catalog(self, name: str, **kwargs) -> DataCatalog: # pylint: di return DataCatalog(name=name) -# --8<-- [end:docs] - - class BufferRunLogstore(BaseRunLogStore): """ A in-memory run log store. @@ -646,11 +633,8 @@ class BufferRunLogstore(BaseRunLogStore): """ - service_name = "buffered" - - def __init__(self, config): - super().__init__(config) - self.run_log = None # For a buffered Run Log, this is the database + service_name: str = "buffered" + run_log: Optional[RunLog] = Field(default=None, exclude=True) # For a buffered Run Log, this is the database def create_run_log( self, @@ -700,716 +684,3 @@ def put_run_log(self, run_log: RunLog, **kwargs): """ logger.info(f"{self.service_name} Putting the run log in the DB: {run_log.run_id}") self.run_log = run_log - - -class FileSystemRunLogstore(BaseRunLogStore): - """ - In this type of Run Log store, we use a file system to store the JSON run log. - - Every single run is stored as a different file which makes it compatible across other store types. - - When to use: - When locally testing a pipeline and have the need to compare across runs. - Its fully featured and perfectly fine if your local environment is where you would do everyhing. - - Do not use: - If you need parallelization on local, this run log would not support it. - - Example config: - - run_log: - type: file-system - config: - log_folder: The folder to out the logs. Defaults to .run_log_store - """ - - service_name = "file-system" - - class Config(BaseModel): - log_folder: str = defaults.LOG_LOCATION_FOLDER - - @property - def log_folder_name(self) -> str: - """ - Returns: - str: The name of the log folder - """ - return self.config.log_folder - - def write_to_folder(self, run_log: RunLog): - """ - Write the run log to the folder - - Args: - run_log (RunLog): The run log to be added to the database - """ - write_to = self.log_folder_name - utils.safe_make_dir(write_to) - - write_to_path = Path(write_to) - run_id = run_log.run_id - json_file_path = write_to_path / f"{run_id}.json" - - with json_file_path.open("w") as fw: - json.dump(run_log.dict(), fw, ensure_ascii=True, indent=4) # pylint: disable=no-member - - def get_from_folder(self, run_id: str) -> RunLog: - """ - Look into the run log folder for the run log for the run id. - - If the run log does not exist, raise an exception. If it does, decode it - as a RunLog and return it - - Args: - run_id (str): The requested run id to retrieve the run log store - - Raises: - FileNotFoundError: If the Run Log has not been found. - - Returns: - RunLog: The decoded Run log - """ - write_to = self.log_folder_name - - read_from_path = Path(write_to) - json_file_path = read_from_path / f"{run_id}.json" - - if not json_file_path.exists(): - raise FileNotFoundError(f"Expected {json_file_path} is not present") - - with json_file_path.open("r") as fr: - json_str = json.load(fr) - run_log = RunLog(**json_str) # pylint: disable=no-member - return run_log - - def create_run_log( - self, - run_id: str, - dag_hash: str = "", - use_cached: bool = False, - tag: str = "", - original_run_id: str = "", - status: str = defaults.CREATED, - **kwargs, - ) -> RunLog: - """ - # Creates a Run log - # Adds it to the db - """ - - try: - self.get_run_log_by_id(run_id=run_id, full=False) - raise exceptions.RunLogExistsError(run_id=run_id) - except exceptions.RunLogNotFoundError: - pass - - logger.info(f"{self.service_name} Creating a Run Log for : {run_id}") - run_log = RunLog( - run_id=run_id, - dag_hash=dag_hash, - use_cached=use_cached, - tag=tag, - original_run_id=original_run_id, - status=status, - ) - self.write_to_folder(run_log) - return run_log - - def get_run_log_by_id(self, run_id: str, full: bool = False, **kwargs) -> RunLog: - """ - # Returns the run_log defined by id - # Raises Exception if not found - """ - try: - logger.info(f"{self.service_name} Getting a Run Log for : {run_id}") - run_log = self.get_from_folder(run_id) - return run_log - except FileNotFoundError as e: - raise exceptions.RunLogNotFoundError(run_id) from e - - def put_run_log(self, run_log: RunLog, **kwargs): - """ - # Puts the run_log into the database - """ - logger.info(f"{self.service_name} Putting the run log in the DB: {run_log.run_id}") - self.write_to_folder(run_log) - - -class ChunkedFileSystemRunLogStore(BaseRunLogStore): - """ - File system run log store but chunks the run log into thread safe chunks. - This enables executions to be parallel. - """ - - service_name = "chunked-fs" - - class Config(BaseModel): - log_folder: str = defaults.LOG_LOCATION_FOLDER - - class LogTypes(Enum): - RUN_LOG: str = "RunLog" - PARAMETER: str = "Parameter" - STEP_LOG: str = "StepLog" - BRANCH_LOG: str = "BranchLog" - - class ModelTypes(Enum): - RUN_LOG = RunLog - PARAMETER = dict - STEP_LOG = StepLog - BRANCH_LOG = BranchLog - - def naming_pattern(self, log_type: LogTypes, name: str = "") -> str: - """ - Naming pattern to store RunLog, Parameter, StepLog or BranchLog. - - The reasoning for name to be defaulted to empty string: - Its actually conditionally empty. For RunLog and Parameter it is empty. - For StepLog and BranchLog it should be provided. - - Args: - log_type (LogTypes): One of RunLog, Parameter, StepLog or BranchLog - name (str, optional): The name to be included or left. Defaults to ''. - - Raises: - Exception: If log_type is not recognized - - Returns: - str: The naming pattern - """ - if log_type == self.LogTypes.RUN_LOG: - return f"{self.LogTypes.RUN_LOG.value}" - - if log_type == self.LogTypes.PARAMETER: - return "-".join([self.LogTypes.PARAMETER.value, name]) - - if not name: - raise Exception(f"Name should be provided for naming pattern for {log_type}") - - if log_type == self.LogTypes.STEP_LOG: - return "-".join([self.LogTypes.STEP_LOG.value, name, "${creation_time}"]) - - if log_type == self.LogTypes.BRANCH_LOG: - return "-".join([self.LogTypes.BRANCH_LOG.value, name, "${creation_time}"]) - - raise Exception("Unexpected log type sent") - - def get_matches(self, run_id: str, name: str, multiple_allowed: bool = False) -> Optional[Union[List[Path], Path]]: - """ - Get contents of files matching the pattern name* - - Args: - run_id (str): The run id - name (str): The suffix of the file name to check in the run log store. - """ - log_folder = self.log_folder_with_run_id(run_id=run_id) - - sub_name = Template(name).safe_substitute({"creation_time": ""}) - - matches = list(log_folder.glob(f"{sub_name}*")) - if matches: - if not multiple_allowed: - if len(matches) > 1: - msg = f"Multiple matches found for {name} while multiple is not allowed" - raise Exception(msg) - return matches[0] - return matches - - return None - - @property - def log_folder_name(self) -> str: - """ - Returns the log folder name - """ - return self.config.log_folder - - def log_folder_with_run_id(self, run_id: str) -> Path: - """ - Utility function to get the log folder for a run id. - - Args: - run_id (str): The run id - - Returns: - Path: The path to the log folder with the run id - """ - return Path(self.log_folder_name) / run_id - - def safe_suffix_json(self, name: Path) -> str: - """ - Safely attach a suffix to a json file. - - Args: - name (Path): The name of the file with or without suffix of json - - Returns: - str : The name of the file with .json - """ - if str(name).endswith("json"): - return str(name) - - return str(name) + ".json" - - def _store(self, run_id: str, contents: dict, name: Path): - """ - Store the contents against the name in the folder. - - Args: - run_id (str): The run id - contents (dict): The dict to store - name (str): The name to store as - """ - utils.safe_make_dir(self.log_folder_with_run_id(run_id=run_id)) - - with open(self.safe_suffix_json(name), "w") as fw: - json.dump(contents, fw, ensure_ascii=True, indent=4) - - def _retrieve(self, name: Path) -> dict: - """ - Does the job of retrieving from the folder. - - Args: - name (str): the name of the file to retrieve - - Returns: - dict: The contents - """ - contents: dict = {} - - with open(self.safe_suffix_json(name), "r") as fr: - contents = json.load(fr) - - return contents - - def store(self, run_id: str, log_type: LogTypes, contents: dict, name: str = ""): - """Store a SINGLE log type in the file system - - Args: - run_id (str): The run id to store against - log_type (LogTypes): The type of log to store - contents (dict): The dict of contents to store - name (str, optional): The name against the contents have to be stored. Defaults to ''. - """ - naming_pattern = self.naming_pattern(log_type=log_type, name=name) - match = self.get_matches(run_id=run_id, name=naming_pattern, multiple_allowed=False) - # The boolean multiple allowed confuses mypy a lot! - name_to_give: Path = None # type: ignore - if match: - existing_contents = self._retrieve(name=match) # type: ignore - contents = dict(existing_contents, **contents) - name_to_give = match # type: ignore - else: - _name = Template(naming_pattern).safe_substitute({"creation_time": str(int(time.time_ns()))}) - name_to_give = self.log_folder_with_run_id(run_id=run_id) / _name - - self._store(run_id=run_id, contents=contents, name=name_to_give) - - def retrieve(self, run_id: str, log_type: LogTypes, name: str = "", multiple_allowed=False) -> Any: - """ - Retrieve the model given a log_type and a name. - Use multiple_allowed to control if you are expecting multiple of them. - eg: There could be multiple of Parameters- but only one of StepLog-stepname - - The reasoning for name to be defaulted to empty string: - Its actually conditionally empty. For RunLog and Parameter it is empty. - For StepLog and BranchLog it should be provided. - - Args: - run_id (str): The run id - log_type (LogTypes): One of RunLog, Parameter, StepLog, BranchLog - name (str, optional): The name to match. Defaults to ''. - multiple_allowed (bool, optional): Are multiple allowed. Defaults to False. - - Raises: - FileNotFoundError: If there is no match found - - Returns: - Any: One of StepLog, BranchLog, Parameter or RunLog - """ - # The reason of any is it could be one of Logs or dict or list of the - if not name and log_type not in [ - self.LogTypes.RUN_LOG, - self.LogTypes.PARAMETER, - ]: - raise Exception(f"Name is required during retrieval for {log_type}") - - naming_pattern = self.naming_pattern(log_type=log_type, name=name) - matches = self.get_matches(run_id=run_id, name=naming_pattern, multiple_allowed=multiple_allowed) - if matches: - if not multiple_allowed: - contents = self._retrieve(name=matches) # type: ignore - model = self.ModelTypes[log_type.name].value - return model(**contents) - - models = [] - for match in matches: # type: ignore - contents = self._retrieve(name=match) - model = self.ModelTypes[log_type.name].value - models.append(model(**contents)) - return models - - raise FileNotFoundError() - - def orderly_retrieve(self, run_id: str, log_type: LogTypes) -> dict[str, Union[StepLog, BranchLog]]: - """Should only be used by prepare full run log. - - Retrieves the StepLog or BranchLog sorted according to creation time. - - Args: - run_id (str): _description_ - log_type (LogTypes): _description_ - """ - prefix: str = self.LogTypes.STEP_LOG.value - - if log_type == self.LogTypes.BRANCH_LOG: - prefix = self.LogTypes.BRANCH_LOG.value - - matches = self.get_matches(run_id=run_id, name=prefix, multiple_allowed=True) - - if log_type == self.LogTypes.BRANCH_LOG and not matches: - # No branch logs are found - return {} - # Forcing get_matches to always return a list is a better design - epoch_created = [str(match).split("-")[-1] for match in matches] # type: ignore - - # sort matches by epoch created - epoch_created, matches = zip(*sorted(zip(epoch_created, matches))) # type: ignore - - logs: dict[str, Union[StepLog, BranchLog]] = {} - - for match in matches: # type: ignore - model = self.ModelTypes[log_type.name].value - log_model = model(**self._retrieve(match)) - logs[log_model.internal_name] = log_model # type: ignore - - return logs - - def _get_parent_branch(self, name: str) -> Union[str, None]: # pylint: disable=R0201 - """ - Returns the name of the parent branch. - If the step is part of main dag, return None. - - Args: - name (str): The name of the step. - - Returns: - str: The name of the branch containing the step. - """ - dot_path = name.split(".") - - if len(dot_path) == 1: - return None - # Ignore the step name - return ".".join(dot_path[:-1]) - - def _get_parent_step(self, name: str) -> Union[str, None]: # pylint: disable=R0201 - """ - Returns the step containing the step, useful when we have steps within a branch. - Returns None, if the step belongs to parent dag. - - Args: - name (str): The name of the step to find the parent step it belongs to. - - Returns: - str: The parent step the step belongs to, None if the step belongs to parent dag. - """ - dot_path = name.split(".") - - if len(dot_path) == 1: - return None - # Ignore the branch.step_name - return ".".join(dot_path[:-2]) - - def _prepare_full_run_log(self, run_log: RunLog): - """ - Populates the run log with the branches and steps. - - Args: - run_log (RunLog): The partial run log containing empty step logs - """ - run_id = run_log.run_id - run_log.parameters = self.get_parameters(run_id=run_id) - - ordered_steps = self.orderly_retrieve(run_id=run_id, log_type=self.LogTypes.STEP_LOG) - ordered_branches = self.orderly_retrieve(run_id=run_id, log_type=self.LogTypes.BRANCH_LOG) - - current_branch: Any = None # It could be str, None, RunLog - for step_internal_name in ordered_steps: - current_branch = self._get_parent_branch(step_internal_name) - step_to_add_branch = self._get_parent_step(step_internal_name) - - if not current_branch: - current_branch = run_log - else: - current_branch = ordered_branches[current_branch] # type: ignore - step_to_add_branch = ordered_steps[step_to_add_branch] # type: ignore - step_to_add_branch.branches[current_branch.internal_name] = current_branch # type: ignore - - current_branch.steps[step_internal_name] = ordered_steps[step_internal_name] - - def create_run_log( - self, - run_id: str, - dag_hash: str = "", - use_cached: bool = False, - tag: str = "", - original_run_id: str = "", - status: str = defaults.CREATED, - **kwargs, - ): - """ - Creates a Run Log object by using the config - - Logically the method should do the following: - * Creates a Run log - * Adds it to the db - * Return the log - """ - try: - self.get_run_log_by_id(run_id=run_id, full=False) - raise exceptions.RunLogExistsError(run_id=run_id) - except exceptions.RunLogNotFoundError: - pass - - logger.info(f"{self.service_name} Creating a Run Log for : {run_id}") - run_log = RunLog( - run_id=run_id, - dag_hash=dag_hash, - use_cached=use_cached, - tag=tag, - original_run_id=original_run_id, - status=status, - ) - - self.store(run_id=run_id, contents=run_log.dict(), log_type=self.LogTypes.RUN_LOG) - return run_log - - def get_run_log_by_id(self, run_id: str, full: bool = False, **kwargs) -> RunLog: - """ - Retrieves a Run log from the database using the config and the run_id - - Args: - run_id (str): The run_id of the run - full (bool): return the full run log store or only the RunLog object - - Returns: - RunLog: The RunLog object identified by the run_id - - Logically the method should: - * Returns the run_log defined by id from the data store defined by the config - - """ - try: - logger.info(f"{self.service_name} Getting a Run Log for : {run_id}") - run_log = self.retrieve(run_id=run_id, log_type=self.LogTypes.RUN_LOG, multiple_allowed=False) - - if full: - self._prepare_full_run_log(run_log=run_log) - - return run_log - except FileNotFoundError as e: - raise exceptions.RunLogNotFoundError(run_id) from e - - def put_run_log(self, run_log: RunLog, **kwargs): - """ - Puts the Run Log in the database as defined by the config - - Args: - run_log (RunLog): The Run log of the run - - Logically the method should: - Puts the run_log into the database - - Raises: - NotImplementedError: This is a base class and therefore has no default implementation - """ - run_id = run_log.run_id - self.store(run_id=run_id, contents=run_log.dict(), log_type=self.LogTypes.RUN_LOG) - - def get_parameters(self, run_id: str, **kwargs) -> dict: # pylint: disable=unused-argument - """ - Get the parameters from the Run log defined by the run_id - - Args: - run_id (str): The run_id of the run - - The method should: - * Call get_run_log_by_id(run_id) to retrieve the run_log - * Return the parameters as identified in the run_log - - Returns: - dict: A dictionary of the run_log parameters - Raises: - RunLogNotFoundError: If the run log for run_id is not found in the datastore - """ - parameters = {} - try: - parameters_list = self.retrieve(run_id=run_id, log_type=self.LogTypes.PARAMETER, multiple_allowed=True) - parameters = {key: value for param in parameters_list for key, value in param.items()} - except FileNotFoundError: - # No parameters are set - pass - - return parameters - - def set_parameters(self, run_id: str, parameters: dict, **kwargs): # pylint: disable=unused-argument - """ - Update the parameters of the Run log with the new parameters - - This method would over-write the parameters, if the parameter exists in the run log already - - The method should: - * Call get_run_log_by_id(run_id) to retrieve the run_log - * Update the parameters of the run_log - * Call put_run_log(run_log) to put the run_log in the datastore - - Args: - run_id (str): The run_id of the run - parameters (dict): The parameters to update in the run log - Raises: - RunLogNotFoundError: If the run log for run_id is not found in the datastore - """ - for key, value in parameters.items(): - self.store( - run_id=run_id, - log_type=self.LogTypes.PARAMETER, - contents={key: value}, - name=key, - ) - - def get_run_config(self, run_id: str, **kwargs) -> dict: # pylint: disable=unused-argument - """ - Given a run_id, return the run_config used to perform the run. - - Args: - run_id (str): The run_id of the run - - Returns: - dict: The run config used for the run - """ - - run_log = self.get_run_log_by_id(run_id=run_id) - return run_log.run_config - - def set_run_config(self, run_id: str, run_config: dict, **kwargs): # pylint: disable=unused-argument - """Set the run config used to run the run_id - - Args: - run_id (str): The run_id of the run - run_config (dict): The run_config of the run - """ - - run_log = self.get_run_log_by_id(run_id=run_id) - run_log.run_config.update(run_config) - self.put_run_log(run_log=run_log) - - def get_step_log(self, internal_name: str, run_id: str, **kwargs) -> StepLog: # pylint: disable=unused-argument - """ - Get a step log from the datastore for run_id and the internal naming of the step log - - The internal naming of the step log is a dot path convention. - - The method should: - * Call get_run_log_by_id(run_id) to retrieve the run_log - * Identify the step location by decoding the internal naming - * Return the step log - - Args: - internal_name (str): The internal name of the step log - run_id (str): The run_id of the run - - Returns: - StepLog: The step log object for the step defined by the internal naming and run_id - - Raises: - RunLogNotFoundError: If the run log for run_id is not found in the datastore - StepLogNotFoundError: If the step log for internal_name is not found in the datastore for run_id - """ - logger.info(f"{self.service_name} Getting the step log: {internal_name} of {run_id}") - - step_log = self.retrieve( - run_id=run_id, - log_type=self.LogTypes.STEP_LOG, - name=internal_name, - multiple_allowed=False, - ) - - return step_log - - def add_step_log(self, step_log: StepLog, run_id: str, **kwargs): # pylint: disable=unused-argument - """ - Add the step log in the run log as identified by the run_id in the datastore - - The method should: - * Call get_run_log_by_id(run_id) to retrieve the run_log - * Identify the branch to add the step by decoding the step_logs internal name - * Add the step log to the identified branch log - * Call put_run_log(run_log) to put the run_log in the datastore - - Args: - step_log (StepLog): The Step log to add to the database - run_id (str): The run id of the run - - Raises: - RunLogNotFoundError: If the run log for run_id is not found in the datastore - BranchLogNotFoundError: If the branch of the step log for internal_name is not found in the datastore - for run_id - """ - logger.info(f"{self.service_name} Adding the step log to DB: {step_log.internal_name}") - - self.store( - run_id=run_id, - log_type=self.LogTypes.STEP_LOG, - contents=step_log.dict(), - name=step_log.internal_name, - ) - - def get_branch_log( - self, internal_branch_name: str, run_id: str, **kwargs - ) -> Union[BranchLog, RunLog]: # pylint: disable=unused-argument - """ - Returns the branch log by the internal branch name for the run id - - If the internal branch name is none, returns the run log - - Args: - internal_branch_name (str): The internal branch name to retrieve. - run_id (str): The run id of interest - - Returns: - BranchLog: The branch log or the run log as requested. - """ - if not internal_branch_name: - return self.get_run_log_by_id(run_id=run_id) - branch = self.retrieve(run_id=run_id, log_type=self.LogTypes.BRANCH_LOG, name=internal_branch_name) - return branch - - def add_branch_log( - self, branch_log: Union[BranchLog, RunLog], run_id: str, **kwargs - ): # pylint: disable=unused-argument - """ - The method should: - # Get the run log - # Get the branch and step containing the branch - # Add the branch to the step - # Write the run_log - - The branch log could some times be a Run log and should be handled appropriately - - Args: - branch_log (BranchLog): The branch log/run log to add to the database - run_id (str): The run id to which the branch/run log is added - """ - if not isinstance(branch_log, BranchLog): - self.put_run_log(branch_log) # type: ignore # We are dealing with base dag here - return - - internal_branch_name = branch_log.internal_name - - logger.info(f"{self.service_name} Adding the branch log to DB: {branch_log.internal_name}") - self.store( - run_id=run_id, - log_type=self.LogTypes.BRANCH_LOG, - contents=branch_log.dict(), - name=internal_branch_name, - ) diff --git a/magnus/defaults.py b/magnus/defaults.py index f289a625..b0982ed6 100644 --- a/magnus/defaults.py +++ b/magnus/defaults.py @@ -1,6 +1,19 @@ +# mypy: ignore-errors +# The above should be done until https://github.com/python/mypy/issues/8823 from enum import Enum +from typing import Any, Dict, Mapping, Optional, Union + +from typing_extensions import TypeAlias + +# TODO: This is not the correct way to do this. +try: # pragma: no cover + from typing import TypedDict # type: ignore[unused-ignore] +except ImportError: # pragma: no cover + from typing_extensions import TypedDict # type: ignore[unused-ignore] + NAME = "magnus" +LOGGER_NAME = "magnus" # CLI settings LOG_LEVEL = "WARNING" @@ -16,13 +29,32 @@ class EXECUTION_PLAN(Enum): INTERACTIVE = "interactive" # used for interactive sessions +# Type definitions +class ServiceConfig(TypedDict): + type: str + config: Mapping[str, Any] + + +class MagnusConfig(TypedDict, total=False): + run_log_store: Optional[ServiceConfig] + secrets: Optional[ServiceConfig] + catalog: Optional[ServiceConfig] + executor: Optional[ServiceConfig] + experiment_tracker: Optional[ServiceConfig] + + +TypeMapVariable: TypeAlias = Optional[Dict[str, Union[str, int, float]]] + + # Config file environment variable MAGNUS_CONFIG_FILE = "MAGNUS_CONFIG_FILE" MAGNUS_RUN_TAG = "MAGNUS_RUN_TAG" # Interaction settings TRACK_PREFIX = "MAGNUS_TRACK_" +STEP_INDICATOR = "_STEP_" PARAMETER_PREFIX = "MAGNUS_PRM_" +MAP_VARIABLE = "MAGNUS_MAP_VARIABLE" VARIABLE_PREFIX = "MAGNUS_VAR_" ENV_RUN_ID = "MAGNUS_RUN_ID" ATTEMPT_NUMBER = "MAGNUS_STEP_ATTEMPT" @@ -40,13 +72,16 @@ class EXECUTION_PLAN(Enum): COMMAND_TYPE = "python" NODE_SPEC_FILE = "node_spec.yaml" COMMAND_FRIENDLY_CHARACTER = "%" +DEFAULT_CONTAINER_CONTEXT_PATH = "/opt/magnus/" +DEFAULT_CONTAINER_DATA_PATH = "data/" +DEFAULT_CONTAINER_OUTPUT_PARAMETERS = "parameters.json" # Default services -DEFAULT_EXECUTOR = {"type": "local"} -DEFAULT_RUN_LOG_STORE = {"type": "buffered"} -DEFAULT_CATALOG = {"type": "file-system"} -DEFAULT_SECRETS = {"type": "do-nothing"} -DEFAULT_EXPERIMENT_TRACKER = {"type": "do-nothing"} +DEFAULT_EXECUTOR = ServiceConfig(type="local", config={}) +DEFAULT_RUN_LOG_STORE = ServiceConfig(type="buffered", config={}) +DEFAULT_CATALOG = ServiceConfig(type="file-system", config={}) +DEFAULT_SECRETS = ServiceConfig(type="do-nothing", config={}) +DEFAULT_EXPERIMENT_TRACKER = ServiceConfig(type="do-nothing", config={}) # Map state MAP_PLACEHOLDER = "map_variable_placeholder" @@ -72,13 +107,11 @@ class EXECUTION_PLAN(Enum): # Data catalog settings CATALOG_LOCATION_FOLDER = ".catalog" -COMPUTE_DATA_FOLDER = "data" +COMPUTE_DATA_FOLDER = "." # Secrets settings DOTENV_FILE_LOCATION = ".env" -# AWS settings -AWS_REGION = "eu-west-1" # Docker settings DOCKERFILE_NAME = "Dockerfile" @@ -104,3 +137,43 @@ class EXECUTION_PLAN(Enum): """ GIT_ARCHIVE_NAME = "git_tracked" LEN_SHA_FOR_TAG = 8 + + +class ENTRYPOINT(Enum): + """ + The possible container entrypoint types. + """ + + USER = "user" + SYSTEM = "system" + + +## Logging settings + +LOGGING_CONFIG = { + "version": 1, + "disable_existing_loggers": True, + "formatters": { + "standard": {"format": "%(asctime)s [%(levelname)s] %(name)s: %(message)s"}, + "magnus_formatter": {"format": "%(message)s", "datefmt": "[%X]"}, + }, + "handlers": { + "default": { + "formatter": "standard", + "class": "logging.StreamHandler", + "stream": "ext://sys.stdout", # Default is stderr + }, + "magnus_handler": { + "formatter": "magnus_formatter", + "class": "rich.logging.RichHandler", + "rich_tracebacks": True, + }, + }, + "loggers": { + "": { + "handlers": ["default"], + "propagate": True, + }, # Root logger + LOGGER_NAME: {"handlers": ["magnus_handler"], "propagate": False}, + }, +} diff --git a/magnus/docker_utils.py b/magnus/docker_utils.py deleted file mode 100644 index a75062f0..00000000 --- a/magnus/docker_utils.py +++ /dev/null @@ -1,100 +0,0 @@ -import logging -from string import Template -from typing import Union - -from magnus import defaults, utils - -logger = logging.getLogger(defaults.NAME) -try: - import docker -except ImportError: - logger.info("docker was not installed, docker functionality will not work") - - -def generate_docker_file(style: str = "poetry", git_tracked: bool = True): - """ - Generates a Dockerfile based on the given style. - - Args: - style (str, optional): One of python dependency style. Defaults to "poetry". - git_tracked (bool, optional): Allow only git tracked or not. Defaults to True. - - Raises: - Exception: If the current directory is not a git repository. - """ - install_style, install_requirements, copy_content = None, None, None - - if style == "poetry": - install_style = "RUN pip install poetry" - install_requirements = "RUN poetry install" - logger.info("Using poetry style for requirements") - elif style == "pipenv": - install_style = "RUN pip install pipenv" - install_requirements = "RUN pipenv install" - logger.info("Using pipenv style for requirements") - else: - install_requirements = "RUN pip install -r requirements.txt" - logger.info("Trying requirements.txt, if one exists") - - copy_content = "COPY . /app" - if git_tracked: - if not utils.is_a_git_repo(): - msg = ( - "The current project is not git versioned. Disable only git tracked to create the image. " - "Be aware over-riding this can cause leak of sensitive data if you are not careful." - ) - raise Exception(msg) - copy_content = "ADD git_tracked.tar.gz /app" - utils.archive_git_tracked(defaults.GIT_ARCHIVE_NAME) - - dockerfile_content = Template(defaults.DOCKERFILE_CONTENT).safe_substitute( - { - "INSTALL_STYLE": install_style, - "INSTALL_REQUIREMENTS": install_requirements, - "COPY_CONTENT": copy_content, - } - ) - with open(defaults.DOCKERFILE_NAME, "w", encoding="utf-8") as fw: - fw.write(dockerfile_content) - - -def build_docker( - image_name: str, - docker_file: Union[str, None], - style: str, - tag: str, - commit_tag: bool, - dry_run: bool = False, - git_tracked: bool = True, -): - """ - Builds the docker image. - - Args: - image_name (str): The name of the docker image. - docker_file (Union[str, None]): The name of the dockerfile, if present or None - style (str): The style of python dependency. - tag (str): The tag of the docker image. - commit_tag (bool): Use the commit id for the docker image tag. - dry_run (bool, optional): Do not run just show what would be done. Defaults to False. - git_tracked (bool, optional): Only the git tracked files or not. Defaults to True. - - Raises: - Exception: If the current directory is not a git repository and asked for a git tag as docker image. - """ - if commit_tag: - if not utils.is_a_git_repo(): - msg = "The current project is not git versioned, cannot use commit tag option when building image" - raise Exception(msg) - - tag = utils.get_current_code_commit()[: defaults.LEN_SHA_FOR_TAG] # type: ignore - - if not docker_file: - generate_docker_file(style, git_tracked=git_tracked) - docker_file = defaults.DOCKERFILE_NAME - - if dry_run: - return - - docker_client = docker.from_env() - docker_client.images.build(path=".", dockerfile=docker_file, tag=f"{image_name}:{tag}", quiet=False) diff --git a/magnus/entrypoints.py b/magnus/entrypoints.py new file mode 100644 index 00000000..89791f26 --- /dev/null +++ b/magnus/entrypoints.py @@ -0,0 +1,484 @@ +import json +import logging +from typing import Optional, cast + +from rich import print + +import magnus.context as context +from magnus import defaults, graph, utils +from magnus.defaults import MagnusConfig, ServiceConfig + +logger = logging.getLogger(defaults.LOGGER_NAME) + + +def get_default_configs() -> MagnusConfig: + """ + User can provide extensions as part of their code base, magnus-config.yaml provides the place to put them. + """ + user_configs = {} + if utils.does_file_exist(defaults.USER_CONFIG_FILE): + user_configs = utils.load_yaml(defaults.USER_CONFIG_FILE) + + if not user_configs: + return {} + + user_defaults = user_configs.get("defaults", {}) + if user_defaults: + return user_defaults + + return {} + + +def prepare_configurations( + run_id: str, + configuration_file: str = "", + pipeline_file: str = "", + tag: str = "", + use_cached: str = "", + parameters_file: str = "", + force_local_executor: bool = False, +) -> context.Context: + """ + Replace the placeholders in the dag/config against the variables file. + + Attach the secrets_handler, run_log_store, catalog_handler to the executor and return it. + + Args: + variables_file (str): The variables file, if used or None + pipeline_file (str): The config/dag file + run_id (str): The run id of the run. + tag (str): If a tag is provided at the run time + use_cached (str): Provide the run_id of the older run + + Returns: + executor.BaseExecutor : A prepared executor as per the dag/config + """ + magnus_defaults = get_default_configs() + + variables = utils.gather_variables() + + templated_configuration = {} + if configuration_file: + templated_configuration = utils.load_yaml(configuration_file) or {} + + configuration: MagnusConfig = cast(MagnusConfig, templated_configuration) + + # Run log settings, configuration over-rides everything + run_log_config: Optional[ServiceConfig] = configuration.get("run_log_store", None) + if not run_log_config: + run_log_config = cast(ServiceConfig, magnus_defaults.get("run_log_store", defaults.DEFAULT_RUN_LOG_STORE)) + run_log_store = utils.get_provider_by_name_and_type("run_log_store", run_log_config) + + # Catalog handler settings, configuration over-rides everything + catalog_config: Optional[ServiceConfig] = configuration.get("catalog", None) + if not catalog_config: + catalog_config = cast(ServiceConfig, magnus_defaults.get("catalog", defaults.DEFAULT_CATALOG)) + catalog_handler = utils.get_provider_by_name_and_type("catalog", catalog_config) + + # Secret handler settings, configuration over-rides everything + secrets_config: Optional[ServiceConfig] = configuration.get("secrets", None) + if not secrets_config: + secrets_config = cast(ServiceConfig, magnus_defaults.get("secrets", defaults.DEFAULT_SECRETS)) + secrets_handler = utils.get_provider_by_name_and_type("secrets", secrets_config) + + # experiment tracker settings, configuration over-rides everything + tracker_config: Optional[ServiceConfig] = configuration.get("experiment_tracker", None) + if not tracker_config: + tracker_config = cast( + ServiceConfig, magnus_defaults.get("experiment_tracker", defaults.DEFAULT_EXPERIMENT_TRACKER) + ) + tracker_handler = utils.get_provider_by_name_and_type("experiment_tracker", tracker_config) + + # executor configurations, configuration over rides everything + executor_config: Optional[ServiceConfig] = configuration.get("executor", None) + if force_local_executor: + executor_config = ServiceConfig(type="local", config={}) + + if not executor_config: + executor_config = cast(ServiceConfig, magnus_defaults.get("executor", defaults.DEFAULT_EXECUTOR)) + configured_executor = utils.get_provider_by_name_and_type("executor", executor_config) + + # Construct the context + run_context = context.Context( + executor=configured_executor, + run_log_store=run_log_store, + catalog_handler=catalog_handler, + secrets_handler=secrets_handler, + experiment_tracker=tracker_handler, + variables=variables, + tag=tag, + run_id=run_id, + configuration_file=configuration_file, + parameters_file=parameters_file, + ) + + if pipeline_file: + # There are use cases where we are only preparing the executor + pipeline_config = utils.load_yaml(pipeline_file) + + logger.info("The input pipeline:") + logger.info(json.dumps(pipeline_config, indent=4)) + + # Create the graph + dag_config = pipeline_config["dag"] + dag_hash = utils.get_dag_hash(dag_config) + dag = graph.create_graph(dag_config) + + run_context.pipeline_file = pipeline_file + run_context.dag = dag + run_context.dag_hash = dag_hash + + run_context.use_cached = False + if use_cached: + run_context.use_cached = True + run_context.original_run_id = use_cached + + context.run_context = run_context + + return run_context + + +def execute( + configuration_file: str, + pipeline_file: str, + tag: str = "", + run_id: str = "", + use_cached: str = "", + parameters_file: str = "", +): + # pylint: disable=R0914,R0913 + """ + The entry point to magnus execution. This method would prepare the configurations and delegates traversal to the + executor + + Args: + pipeline_file (str): The config/dag file + run_id (str): The run id of the run. + tag (str): If a tag is provided at the run time + use_cached (str): The previous run_id to use. + parameters_file (str): The parameters being sent in to the application + """ + # Re run settings + run_id = utils.generate_run_id(run_id=run_id) + + run_context = prepare_configurations( + configuration_file=configuration_file, + pipeline_file=pipeline_file, + run_id=run_id, + tag=tag, + use_cached=use_cached, + parameters_file=parameters_file, + ) + print("Working with context:") + print(run_context) + + executor = run_context.executor + + run_context.execution_plan = defaults.EXECUTION_PLAN.CHAINED.value + + utils.set_magnus_environment_variables(run_id=run_id, configuration_file=configuration_file, tag=tag) + + # Prepare for graph execution + executor.prepare_for_graph_execution() + + logger.info("Executing the graph") + executor.execute_graph(dag=run_context.dag) # type: ignore + + executor.send_return_code() + + +def execute_single_node( + configuration_file: str, + pipeline_file: str, + step_name: str, + map_variable: str, + run_id: str, + tag: str = "", + parameters_file: str = "", +): + """ + The entry point into executing a single node of magnus. Orchestration modes should extensively use this + entry point. + + It should have similar set up of configurations to execute because orchestrator modes can initiate the execution. + + Args: + variables_file (str): The variables file, if used or None + step_name : The name of the step to execute in dot path convention + pipeline_file (str): The config/dag file + run_id (str): The run id of the run. + tag (str): If a tag is provided at the run time + parameters_file (str): The parameters being sent in to the application + + """ + from magnus import nodes + + run_context = prepare_configurations( + configuration_file=configuration_file, + pipeline_file=pipeline_file, + run_id=run_id, + tag=tag, + use_cached="", + parameters_file=parameters_file, + ) + print("Working with context:") + print(run_context) + + executor = run_context.executor + run_context.execution_plan = defaults.EXECUTION_PLAN.CHAINED.value + utils.set_magnus_environment_variables(run_id=run_id, configuration_file=configuration_file, tag=tag) + + executor.prepare_for_node_execution() + + if not run_context.dag: + # There are a few entry points that make graph dynamically and do not have a dag defined statically. + run_log = run_context.run_log_store.get_run_log_by_id(run_id=run_id, full=False) + run_context.dag = graph.create_graph(run_log.run_config["pipeline"]) + + step_internal_name = nodes.BaseNode._get_internal_name_from_command_name(step_name) + + map_variable_dict = utils.json_to_ordered_dict(map_variable) + + node_to_execute, _ = graph.search_node_by_internal_name(run_context.dag, step_internal_name) + + logger.info("Executing the single node of : %s", node_to_execute) + executor.execute_node(node=node_to_execute, map_variable=map_variable_dict) + + executor.send_return_code(stage="execution") + + +def execute_single_brach( + configuration_file: str, + pipeline_file: str, + branch_name: str, + map_variable: str, + run_id: str, + tag: str, +): + """ + The entry point into executing a branch of the graph. Interactive modes in parallel runs use this to execute + branches in parallel. + + This entry point is never used by its own but rather from a node. So the arguments sent into this are fewer. + + Args: + variables_file (str): The variables file, if used or None + branch_name : The name of the branch to execute, in dot.path.convention + pipeline_file (str): The config/dag file + run_id (str): The run id of the run. + tag (str): If a tag is provided at the run time + """ + from magnus import nodes + + run_context = prepare_configurations( + configuration_file=configuration_file, + pipeline_file=pipeline_file, + run_id=run_id, + tag=tag, + use_cached="", + ) + print("Working with context:") + print(run_context) + + executor = run_context.executor + run_context.execution_plan = defaults.EXECUTION_PLAN.CHAINED.value + utils.set_magnus_environment_variables(run_id=run_id, configuration_file=configuration_file, tag=tag) + + branch_internal_name = nodes.BaseNode._get_internal_name_from_command_name(branch_name) + + map_variable_dict = utils.json_to_ordered_dict(map_variable) + + branch_to_execute = graph.search_branch_by_internal_name(run_context.dag, branch_internal_name) # type: ignore + + logger.info("Executing the single branch of %s", branch_to_execute) + executor.execute_graph(dag=branch_to_execute, map_variable=map_variable_dict) + + executor.send_return_code() + + +def execute_notebook( + entrypoint: str, + notebook_file: str, + catalog_config: dict, + configuration_file: str, + notebook_output_path: str = "", + tag: str = "", + run_id: str = "", + parameters_file: str = "", +): + """ + The entry point to magnus execution of a notebook. This method would prepare the configurations and + delegates traversal to the executor + """ + run_id = utils.generate_run_id(run_id=run_id) + + run_context = prepare_configurations( + configuration_file=configuration_file, + run_id=run_id, + tag=tag, + parameters_file=parameters_file, + ) + + executor = run_context.executor + run_context.execution_plan = defaults.EXECUTION_PLAN.UNCHAINED.value + utils.set_magnus_environment_variables(run_id=run_id, configuration_file=configuration_file, tag=tag) + + print("Working with context:") + print(run_context) + + step_config = { + "command": notebook_file, + "command_type": "notebook", + "notebook_output_path": notebook_output_path, + "type": "task", + "next": "success", + "catalog": catalog_config, + } + node = graph.create_node(name="executing job", step_config=step_config) + + if entrypoint == defaults.ENTRYPOINT.USER.value: + # Prepare for graph execution + executor.prepare_for_graph_execution() + + logger.info("Executing the job from the user. We are still in the caller's compute environment") + executor.execute_job(node=node) + + elif entrypoint == defaults.ENTRYPOINT.SYSTEM.value: + executor.prepare_for_node_execution() + logger.info("Executing the job from the system. We are in the config's compute environment") + executor.execute_node(node=node) + + # Update the status of the run log + step_log = run_context.run_log_store.get_step_log(node._get_step_log_name(), run_id) + run_context.run_log_store.update_run_log_status(run_id=run_id, status=step_log.status) + + else: + raise ValueError(f"Invalid entrypoint {entrypoint}") + + executor.send_return_code() + + +def execute_function( + entrypoint: str, + command: str, + catalog_config: dict, + configuration_file: str, + tag: str = "", + run_id: str = "", + parameters_file: str = "", +): + """ + The entry point to magnus execution of a function. This method would prepare the configurations and + delegates traversal to the executor + """ + run_id = utils.generate_run_id(run_id=run_id) + + run_context = prepare_configurations( + configuration_file=configuration_file, + run_id=run_id, + tag=tag, + parameters_file=parameters_file, + ) + + executor = run_context.executor + + run_context.execution_plan = defaults.EXECUTION_PLAN.UNCHAINED.value + utils.set_magnus_environment_variables(run_id=run_id, configuration_file=configuration_file, tag=tag) + + print("Working with context:") + print(run_context) + + # Prepare the graph with a single node + step_config = { + "command": command, + "command_type": "python", + "type": "task", + "next": "success", + "catalog": catalog_config, + } + node = graph.create_node(name="executing job", step_config=step_config) + + if entrypoint == defaults.ENTRYPOINT.USER.value: + # Prepare for graph execution + executor.prepare_for_graph_execution() + + logger.info("Executing the job from the user. We are still in the caller's compute environment") + executor.execute_job(node=node) + + elif entrypoint == defaults.ENTRYPOINT.SYSTEM.value: + executor.prepare_for_node_execution() + logger.info("Executing the job from the system. We are in the config's compute environment") + executor.execute_node(node=node) + + # Update the status of the run log + step_log = run_context.run_log_store.get_step_log(node._get_step_log_name(), run_id) + run_context.run_log_store.update_run_log_status(run_id=run_id, status=step_log.status) + + else: + raise ValueError(f"Invalid entrypoint {entrypoint}") + + executor.send_return_code() + + +def fan( + configuration_file: str, + pipeline_file: str, + step_name: str, + mode: str, + map_variable: str, + run_id: str, + tag: str = "", + parameters_file: str = "", +): + """ + The entry point to either fan in or out for a composite node. Only 3rd party orchestrators should use this. + + It should have similar set up of configurations to execute because orchestrator modes can initiate the execution. + + Args: + configuration_file (str): The configuration file. + mode: in or out + step_name : The name of the step to execute in dot path convention + pipeline_file (str): The config/dag file + run_id (str): The run id of the run. + tag (str): If a tag is provided at the run time + parameters_file (str): The parameters being sent in to the application + + """ + from magnus import nodes + + run_context = prepare_configurations( + configuration_file=configuration_file, + pipeline_file=pipeline_file, + run_id=run_id, + tag=tag, + use_cached="", + parameters_file=parameters_file, + ) + print("Working with context:") + print(run_context) + + executor = run_context.executor + run_context.execution_plan = defaults.EXECUTION_PLAN.CHAINED.value + utils.set_magnus_environment_variables(run_id=run_id, configuration_file=configuration_file, tag=tag) + + executor.prepare_for_node_execution() + + step_internal_name = nodes.BaseNode._get_internal_name_from_command_name(step_name) + node_to_execute, _ = graph.search_node_by_internal_name(run_context.dag, step_internal_name) # type: ignore + + map_variable_dict = utils.json_to_ordered_dict(map_variable) + + if mode == "in": + logger.info("Fanning in for : %s", node_to_execute) + executor.fan_in(node=node_to_execute, map_variable=map_variable_dict) + elif mode == "out": + logger.info("Fanning out for : %s", node_to_execute) + executor.fan_out(node=node_to_execute, map_variable=map_variable_dict) + else: + raise ValueError(f"Invalid mode {mode}") + + +if __name__ == "__main__": + # This is only for perf testing purposes. + prepare_configurations(run_id="abc", pipeline_file="example/mocking.yaml") diff --git a/magnus/exceptions.py b/magnus/exceptions.py index 7b8e538a..d936cfa0 100644 --- a/magnus/exceptions.py +++ b/magnus/exceptions.py @@ -70,6 +70,12 @@ def __init__(self, name): self.message = f"Branch of name {name} is not found the graph" +class TerminalNodeError(Exception): # pragma: no cover + def __init__(self): + super().__init__() + self.message = "Terminal Nodes do not have next node" + + class SecretNotFoundError(Exception): # pragma: no cover """ Exception class @@ -80,3 +86,9 @@ class SecretNotFoundError(Exception): # pragma: no cover def __init__(self, secret_name, secret_setting): super().__init__() self.message = f"No secret found by name:{secret_name} in {secret_setting}" + + +class ExecutionFailedError(Exception): # pragma: no cover + def __init__(self, run_id: str): + super().__init__() + self.message = f"Execution failed for run id: {run_id}" diff --git a/magnus/executor.py b/magnus/executor.py index b6c2b534..56a77241 100644 --- a/magnus/executor.py +++ b/magnus/executor.py @@ -1,90 +1,52 @@ from __future__ import annotations -import copy -import json import logging import os -import re -from typing import TYPE_CHECKING, Dict, List, Optional, cast +from abc import ABC, abstractmethod +from typing import TYPE_CHECKING, Any, Dict, List, Optional -from pydantic import BaseModel +from pydantic import BaseModel, ConfigDict -from magnus import defaults, exceptions, integration, interaction, utils -from magnus.catalog import BaseCatalog +import magnus.context as context +from magnus import defaults +from magnus.datastore import DataCatalog, RunLog, StepLog +from magnus.defaults import TypeMapVariable from magnus.graph import Graph -from magnus.nodes import BaseNode -if TYPE_CHECKING: - from magnus.datastore import BaseRunLogStore, DataCatalog, StepLog - from magnus.experiment_tracker import BaseExperimentTracker - from magnus.secrets import BaseSecrets +if TYPE_CHECKING: # pragma: no cover + from magnus.extensions.nodes import TaskNode + from magnus.nodes import BaseNode -logger = logging.getLogger(defaults.NAME) +logger = logging.getLogger(defaults.LOGGER_NAME) -# --8<-- [start:docs] -class BaseExecutor: +class BaseExecutor(ABC, BaseModel): """ The skeleton of an executor class. Any implementation of an executor should inherit this class and over-ride accordingly. - This is a loaded base class which has a lot of methods already implemented for "typical" executions. - Look at the function docs to understand how to use them appropriately. - - For any implementation: - 1). When should the run log being set up? - 2). What should the step log being set up? + There is a extension available in magnus/extensions/executor/__init__.py + which implements the most common functionality which is easier to + extend/override in most scenarios. """ - service_name = "" - - class Config(BaseModel): - enable_parallel: bool = defaults.ENABLE_PARALLEL - placeholders: dict = {} - - def __init__(self, config: dict = None): - # pylint: disable=R0914,R0913 - config = config or {} - self.config = self.Config(**config) - # The remaining would be attached later - # The definition files - self.pipeline_file = None - self.variables_file = None - self.parameters_file = None - self.configuration_file = None - # run descriptors - self.tag: str = "" - self.run_id: str = "" - self.single_step: bool = False - self.variables: Dict[str, str] = {} - self.use_cached: bool = False - self.dag: Graph = None # type: ignore - self.dag_hash: str = "" - self.execution_plan: str = "" # Chained or unchained - # Services - self.catalog_handler: Optional[BaseCatalog] = None - self.secrets_handler: BaseSecrets = None # type: ignore - self.experiment_tracker: BaseExperimentTracker = None # type: ignore - self.run_log_store: BaseRunLogStore = None # type: ignore - self.previous_run_log = None - - self.context_step_log: Optional[StepLog] = None - self.context_node: Optional[BaseNode] = None + service_name: str = "" + service_type: str = "executor" - @property - def step_decorator_run_id(self): - """ - TODO: Experimental feature, design is not mature yet. + enable_parallel: bool = defaults.ENABLE_PARALLEL + overrides: dict = {} - This function is used by the decorator function. - The design idea is we can over-ride this method in different implementations to retrieve the run_id. - But is it really intrusive to ask to set the environmental variable MAGNUS_RUN_ID? + _previous_run_log: Optional[RunLog] = None + _single_step: str = "" - Returns: - _type_: _description_ - """ - return os.environ.get("MAGNUS_RUN_ID", None) + _context_step_log = None # type : StepLog + _context_node = None # type: BaseNode + model_config = ConfigDict(extra="forbid") + + @property + def _context(self): + return context.run_context def _is_parallel_execution(self) -> bool: """ @@ -97,56 +59,43 @@ def _is_parallel_execution(self) -> bool: Returns: bool: True if the execution allows parallel execution of branches. """ - return self.config.enable_parallel + return self.enable_parallel + + @abstractmethod + def _get_parameters(self) -> Dict[str, Any]: + """ + Get the parameters for the execution. + The parameters can be defined in parameters file and can be overridden by environment variables. + + Returns: + Dict[str, Any]: The parameters for the execution. + """ + ... + + @abstractmethod + def _set_up_for_re_run(self, parameters: Dict[str, Any]) -> None: + """ + Set up the executor for using a previous execution. + + Retrieve the older run log, error out if it does not exist. + Sync the catalogs from the previous run log with the current one. + + Update the parameters of this execution with the previous one. The previous one take precedence. + + Args: + parameters (Dict[str, Any]): The parameters for the current execution. + """ + @abstractmethod def _set_up_run_log(self, exists_ok=False): """ Create a run log and put that in the run log store If exists_ok, we allow the run log to be already present in the run log store. """ - try: - attempt_run_log = self.run_log_store.get_run_log_by_id(run_id=self.run_id, full=False) - if attempt_run_log.status in [defaults.FAIL, defaults.SUCCESS]: - raise Exception(f"The run log by id: {self.run_id} already exists and is {attempt_run_log.status}") - - raise exceptions.RunLogExistsError(self.run_id) - except exceptions.RunLogNotFoundError: - pass - except exceptions.RunLogExistsError: - if exists_ok: - return - raise - - run_log = {} - run_log["run_id"] = self.run_id - run_log["tag"] = self.tag - run_log["use_cached"] = False - run_log["status"] = defaults.PROCESSING - run_log["dag_hash"] = self.dag_hash - - parameters = {} - if self.parameters_file: - parameters.update(utils.load_yaml(self.parameters_file)) - - # Update these with some from the environment variables - parameters.update(utils.get_user_set_parameters()) - - if self.previous_run_log: - run_log["original_run_id"] = self.previous_run_log.run_id - # Sync the previous run log catalog to this one. - self.catalog_handler.sync_between_runs(previous_run_id=self.previous_run_log.run_id, run_id=self.run_id) - run_log["use_cached"] = True - parameters.update(self.previous_run_log.parameters) - - run_log = self.run_log_store.create_run_log(**run_log) - # Any interaction with run log store attributes should happen via API if available. - self.run_log_store.set_parameters(run_id=self.run_id, parameters=parameters) - - # Update run_config - run_config = utils.get_run_config(self) - self.run_log_store.set_run_config(run_id=self.run_id, run_config=run_config) + ... + @abstractmethod def prepare_for_graph_execution(self): """ This method should be called prior to calling execute_graph. @@ -157,21 +106,9 @@ def prepare_for_graph_execution(self): But in cases of actual rendering the job specs (eg: AWS step functions, K8's) we check if the services are OK. We do not set up a run log as its not relevant. """ + ... - integration.validate(self, self.run_log_store) - integration.configure_for_traversal(self, self.run_log_store) - - integration.validate(self, self.catalog_handler) - integration.configure_for_traversal(self, self.catalog_handler) - - integration.validate(self, self.secrets_handler) - integration.configure_for_traversal(self, self.secrets_handler) - - integration.validate(self, self.experiment_tracker) - integration.configure_for_traversal(self, self.experiment_tracker) - - self._set_up_run_log() - + @abstractmethod def prepare_for_node_execution(self): """ Perform any modifications to the services prior to execution of the node. @@ -180,22 +117,10 @@ def prepare_for_node_execution(self): node (Node): [description] map_variable (dict, optional): [description]. Defaults to None. """ + ... - integration.validate(self, self.run_log_store) - integration.configure_for_execution(self, self.run_log_store) - - integration.validate(self, self.catalog_handler) - integration.configure_for_execution(self, self.catalog_handler) - - integration.validate(self, self.secrets_handler) - integration.configure_for_execution(self, self.secrets_handler) - - integration.validate(self, self.experiment_tracker) - integration.configure_for_execution(self, self.experiment_tracker) - - def _sync_catalog( - self, node: BaseNode, step_log: StepLog, stage: str, synced_catalogs=None - ) -> Optional[List[DataCatalog]]: + @abstractmethod + def _sync_catalog(self, step_log: StepLog, stage: str, synced_catalogs=None) -> Optional[List[DataCatalog]]: """ 1). Identify the catalog settings by over-riding node settings with the global settings. 2). For stage = get: @@ -215,37 +140,9 @@ def _sync_catalog( Exception: If the stage is not in one of get/put """ - if stage not in ["get", "put"]: - msg = ( - "Catalog service only accepts get/put possible actions as part of node execution." - f"Sync catalog of the executor: {self.service_name} asks for {stage} which is not accepted" - ) - raise Exception(msg) - - node_catalog_settings = cast(BaseNode, self.context_node)._get_catalog_settings() - if not (node_catalog_settings and stage in node_catalog_settings): - # Nothing to get/put from the catalog - return None - - compute_data_folder = self.get_effective_compute_data_folder() - - if not compute_data_folder: - return None - - data_catalogs = [] - for name_pattern in cast(dict, node_catalog_settings).get(stage) or []: #  Assumes a list - data_catalogs = getattr(self.catalog_handler, stage)( - name=name_pattern, - run_id=self.run_id, - compute_data_folder=compute_data_folder, - synced_catalogs=synced_catalogs, - ) - - if data_catalogs: - step_log.add_data_catalogs(data_catalogs) - - return data_catalogs + ... + @abstractmethod def get_effective_compute_data_folder(self) -> Optional[str]: """ Get the effective compute data folder for the given stage. @@ -260,14 +157,7 @@ def get_effective_compute_data_folder(self) -> Optional[str]: Returns: Optional[str]: The compute data folder as defined by catalog handler or the node or None. """ - - catalog_settings = cast(BaseNode, self.context_node)._get_catalog_settings() - - compute_data_folder = cast(BaseCatalog, self.catalog_handler).compute_data_folder - if "compute_data_folder" in catalog_settings and catalog_settings["compute_data_folder"]: # type: ignore - compute_data_folder = catalog_settings["compute_data_folder"] # type: ignore - - return compute_data_folder + ... @property def step_attempt_number(self) -> int: @@ -280,10 +170,10 @@ def step_attempt_number(self) -> int: """ return int(os.environ.get(defaults.ATTEMPT_NUMBER, 1)) - def _execute_node(self, node: BaseNode, map_variable: dict = None, **kwargs): + @abstractmethod + def _execute_node(self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs): """ This is the entry point when we do the actual execution of the function. - DO NOT Over-ride this function. While in interactive execution, we just compute, in 3rd party interactive execution, we need to reach this function. @@ -300,58 +190,13 @@ def _execute_node(self, node: BaseNode, map_variable: dict = None, **kwargs): map_variable (dict, optional): If the node is of a map state, map_variable is the value of the iterable. Defaults to None. """ - step_log = self.run_log_store.get_step_log(node._get_step_log_name(map_variable), self.run_id) - - parameters = self.run_log_store.get_parameters(run_id=self.run_id) - # Set up environment variables for the execution - # If the key already exists, do not update it to give priority to parameters set by environment variables - interaction.store_parameter(update=False, **parameters) - - parameters_in = utils.get_user_set_parameters(remove=False) - - attempt = self.step_attempt_number - logger.info(f"Trying to execute node: {node.internal_name}, attempt : {attempt}") - - try: - self.context_step_log = step_log - self.context_node = node - - attempt_log = self.run_log_store.create_attempt_log() - data_catalogs_get: Optional[List[DataCatalog]] = self._sync_catalog(node, step_log, stage="get") - - attempt_log = node.execute(executor=self, mock=step_log.mock, map_variable=map_variable, **kwargs) - except Exception as e: - # Any exception here is a magnus exception as node suppresses exceptions. - msg = "This is clearly magnus fault, please report a bug and the logs" - raise Exception(msg) from e - finally: - attempt_log.attempt_number = attempt - attempt_log.parameters = parameters_in - step_log.attempts.append(attempt_log) - - tracked_data = utils.get_tracked_data() - parameters_out = utils.get_user_set_parameters(remove=True) - - if attempt_log.status == defaults.FAIL: - logger.exception(f"Node: {node} failed") - step_log.status = defaults.FAIL - else: - step_log.status = defaults.SUCCESS - self._sync_catalog(node, step_log, stage="put", synced_catalogs=data_catalogs_get) - step_log.user_defined_metrics = tracked_data - diff_parameters = utils.diff_dict(parameters_in, parameters_out) - self.run_log_store.set_parameters(self.run_id, diff_parameters) - - # Remove the step context - self.context_step_log = None - self.context_node = None - - self.run_log_store.add_step_log(step_log, self.run_id) - - def execute_node(self, node: BaseNode, map_variable: dict = None, **kwargs): + ... + + @abstractmethod + def execute_node(self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs): """ - The exposed method to executing a node. - All implementations should implement this method. + The entry point for all executors apart from local. + We have already prepared for node execution. Args: node (BaseNode): The node to execute @@ -360,8 +205,9 @@ def execute_node(self, node: BaseNode, map_variable: dict = None, **kwargs): Raises: NotImplementedError: _description_ """ - raise NotImplementedError + ... + @abstractmethod def add_code_identities(self, node: BaseNode, step_log: StepLog, **kwargs): """ Add code identities specific to the implementation. @@ -372,9 +218,10 @@ def add_code_identities(self, node: BaseNode, step_log: StepLog, **kwargs): step_log (object): The step log object node (BaseNode): The node we are adding the step log for """ - step_log.code_identities.append(utils.get_git_code_identity(self.run_log_store)) + ... - def execute_from_graph(self, node: BaseNode, map_variable: dict = None, **kwargs): + @abstractmethod + def execute_from_graph(self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs): """ This is the entry point to from the graph execution. @@ -399,47 +246,10 @@ def execute_from_graph(self, node: BaseNode, map_variable: dict = None, **kwargs map_variable (dict, optional): If the node if of a map state, this corresponds to the value of iterable. Defaults to None. """ - step_log = self.run_log_store.create_step_log(node.name, node._get_step_log_name(map_variable)) - - self.add_code_identities(node=node, step_log=step_log) - - step_log.step_type = node.node_type - step_log.status = defaults.PROCESSING - - # Add the step log to the database as per the situation. - # If its a terminal node, complete it now - if node.node_type in ["success", "fail"]: - self.run_log_store.add_step_log(step_log, self.run_id) - self._execute_node(node, map_variable=map_variable, **kwargs) - return - - # In single step - if self.single_step: - # If the node name does not match, we move on to the next node. - if not node.name == self.single_step: - step_log.mock = True - step_log.status = defaults.SUCCESS - self.run_log_store.add_step_log(step_log, self.run_id) - return - else: # We are not in single step mode - # If previous run was successful, move on to the next step - if not self._is_eligible_for_rerun(node, map_variable=map_variable): - step_log.mock = True - step_log.status = defaults.SUCCESS - self.run_log_store.add_step_log(step_log, self.run_id) - return - - # We call an internal function to iterate the sub graphs and execute them - if node.is_composite: - self.run_log_store.add_step_log(step_log, self.run_id) - node.execute_as_graph(self, map_variable=map_variable, **kwargs) - return - - # Executor specific way to trigger a job - self.run_log_store.add_step_log(step_log, self.run_id) - self.trigger_job(node=node, map_variable=map_variable, **kwargs) - - def trigger_job(self, node: BaseNode, map_variable: dict = None, **kwargs): + ... + + @abstractmethod + def trigger_job(self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs): """ Executor specific way of triggering jobs when magnus does both traversal and execution @@ -453,9 +263,10 @@ def trigger_job(self, node: BaseNode, map_variable: dict = None, **kwargs): NOTE: We do not raise an exception as this method is not required by many extensions """ - pass + ... - def _get_status_and_next_node_name(self, current_node: BaseNode, dag: Graph, map_variable: dict = None): + @abstractmethod + def _get_status_and_next_node_name(self, current_node: BaseNode, dag: Graph, map_variable: TypeMapVariable = None): """ Given the current node and the graph, returns the name of the next node to execute. @@ -470,19 +281,10 @@ def _get_status_and_next_node_name(self, current_node: BaseNode, dag: Graph, map map_variable (dict): If the node belongs to a map branch. """ - step_log = self.run_log_store.get_step_log(current_node._get_step_log_name(map_variable), self.run_id) - logger.info(f"Finished executing the node {current_node} with status {step_log.status}") + ... - next_node_name = current_node._get_next_node() - - if step_log.status == defaults.FAIL: - next_node_name = dag.get_fail_node().name - if current_node._get_on_failure_node(): - next_node_name = current_node._get_on_failure_node() - - return step_log.status, next_node_name - - def execute_graph(self, dag: Graph, map_variable: dict = None, **kwargs): + @abstractmethod + def execute_graph(self, dag: Graph, map_variable: TypeMapVariable = None, **kwargs): """ The parallelization is controlled by the nodes and not by this function. @@ -500,48 +302,10 @@ def execute_graph(self, dag: Graph, map_variable: dict = None, **kwargs): map_variable (dict, optional): If the node if of a map state, this corresponds to the value of the iterable. Defaults to None. """ - current_node = dag.start_at - previous_node = None - logger.info(f"Running the execution with {current_node}") - while True: - working_on = dag.get_node_by_name(current_node) - - if previous_node == current_node: - raise Exception("Potentially running in a infinite loop") - - previous_node = current_node - - logger.info(f"Creating execution log for {working_on}") - self.execute_from_graph(working_on, map_variable=map_variable, **kwargs) - - status, next_node_name = self._get_status_and_next_node_name( - current_node=working_on, dag=dag, map_variable=map_variable - ) - - if status == defaults.TRIGGERED: - # Some nodes go into triggered state and self traverse - logger.info(f"Triggered the job to execute the node {current_node}") - break - - if working_on.node_type in ["success", "fail"]: - break - - current_node = next_node_name + ... - run_log = self.run_log_store.get_branch_log(working_on._get_branch_log_name(map_variable), self.run_id) - - branch = "graph" - if working_on.internal_branch_name: - branch = working_on.internal_branch_name - - logger.info(f"Finished execution of the {branch} with status {run_log.status}") - - # get the final run log - if branch == "graph": - run_log = self.run_log_store.get_run_log_by_id(run_id=self.run_id, full=True) - print(json.dumps(run_log.dict(), indent=4)) - - def _is_eligible_for_rerun(self, node: BaseNode, map_variable: dict = None): + @abstractmethod + def _is_step_eligible_for_rerun(self, node: BaseNode, map_variable: TypeMapVariable = None): """ In case of a re-run, this method checks to see if the previous run step status to determine if a re-run is necessary. @@ -559,35 +323,9 @@ def _is_eligible_for_rerun(self, node: BaseNode, map_variable: dict = None): Returns: bool: Eligibility for re-run. True means re-run, False means skip to the next step. """ - if self.previous_run_log: - node_step_log_name = node._get_step_log_name(map_variable=map_variable) - logger.info(f"Scanning previous run logs for node logs of: {node_step_log_name}") - - previous_node_log = None - try: - ( - previous_node_log, - _, - ) = self.previous_run_log.search_step_by_internal_name(node_step_log_name) - except exceptions.StepLogNotFoundError: - logger.warning(f"Did not find the node {node.name} in previous run log") - return True # We should re-run the node. - - step_log = self.run_log_store.get_step_log(node._get_step_log_name(map_variable), self.run_id) - logger.info(f"The original step status: {previous_node_log.status}") - - if previous_node_log.status == defaults.SUCCESS: - logger.info(f"The step {node.name} is marked success, not executing it") - step_log.status = defaults.SUCCESS - step_log.message = "Node execution successful in previous run, skipping it" - self.run_log_store.add_step_log(step_log, self.run_id) - return False # We need not run the node - - #  Remove previous run log to start execution from this step - logger.info(f"The new execution should start executing graph from this node {node.name}") - self.previous_run_log = None - return True + ... + @abstractmethod def send_return_code(self, stage="traversal"): """ Convenience function used by pipeline to send return code to the caller of the cli @@ -595,17 +333,14 @@ def send_return_code(self, stage="traversal"): Raises: Exception: If the pipeline execution failed """ - run_id = self.run_id - - run_log = self.run_log_store.get_run_log_by_id(run_id=run_id, full=False) - if run_log.status == defaults.FAIL: - raise Exception("Pipeline execution failed") + ... + @abstractmethod def _resolve_executor_config(self, node: BaseNode): """ - The executor_config section can contain specific over-rides to an global executor config. - To avoid too much clutter in the dag definition, we allow the configuration file to have placeholders block. - The nodes can over-ride the global config by referring to key in the placeholder. + The overrides section can contain specific over-rides to an global executor config. + To avoid too much clutter in the dag definition, we allow the configuration file to have overrides block. + The nodes can over-ride the global config by referring to key in the overrides. For example: # configuration.yaml @@ -614,14 +349,14 @@ def _resolve_executor_config(self, node: BaseNode): config: k1: v1 k3: v3 - placeholders: + overrides: k2: v2 # Could be a mapping internally. # in pipeline definition.yaml dag: steps: step1: - executor_config: + overrides: cloud-implementation: k1: value_specific_to_node k2: @@ -632,30 +367,10 @@ def _resolve_executor_config(self, node: BaseNode): node (BaseNode): The current node being processed. """ - effective_node_config = copy.deepcopy(self.config.dict()) - ctx_node_config = node._get_executor_config(self.service_name) - - placeholders = self.config.placeholders - - for key, value in ctx_node_config.items(): - if not value: - if key in placeholders: # Update via placeholder only if value is None - try: - effective_node_config.update(placeholders[key]) - except TypeError: - logger.error(f"Expected value to the {key} to be a mapping but found {type(placeholders[key])}") - continue - logger.info( - f"For key: {key} in the {node.name} mode_config, there is no value provided and no \ - corresponding placeholder was found" - ) - - effective_node_config[key] = value - effective_node_config.pop("placeholders", None) - - return effective_node_config - - def execute_job(self, node: BaseNode): + ... + + @abstractmethod + def execute_job(self, node: TaskNode): """ Executor specific way of executing a job (python function or a notebook). @@ -668,9 +383,10 @@ def execute_job(self, node: BaseNode): Raises: NotImplementedError: Executors should choose to extend this functionality or not. """ - raise NotImplementedError + ... - def fan_out(self, node: BaseNode, map_variable: dict = None): + @abstractmethod + def fan_out(self, node: BaseNode, map_variable: TypeMapVariable = None): """ This method is used to appropriately fan-out the execution of a composite node. This is only useful when we want to execute a composite node during 3rd party orchestrators. @@ -690,17 +406,10 @@ def fan_out(self, node: BaseNode, map_variable: dict = None): map_variable (dict, optional): If the node if of a map state,.Defaults to None. """ - step_log = self.run_log_store.create_step_log(node.name, node._get_step_log_name(map_variable=map_variable)) - - self.add_code_identities(node=node, step_log=step_log) - - step_log.step_type = node.node_type - step_log.status = defaults.PROCESSING - self.run_log_store.add_step_log(step_log, self.run_id) + ... - node.fan_out(executor=self, map_variable=map_variable) - - def fan_in(self, node: BaseNode, map_variable: dict = None): + @abstractmethod + def fan_in(self, node: BaseNode, map_variable: TypeMapVariable = None): """ This method is used to appropriately fan-in after the execution of a composite node. This is only useful when we want to execute a composite node during 3rd party orchestrators. @@ -719,402 +428,4 @@ def fan_in(self, node: BaseNode, map_variable: dict = None): map_variable (dict, optional): If the node if of a map state,.Defaults to None. """ - node.fan_in(executor=self, map_variable=map_variable) - - step_log = self.run_log_store.get_step_log(node._get_step_log_name(map_variable=map_variable), self.run_id) - - if step_log.status == defaults.FAIL: - raise Exception(f"Step {node.name} failed") - - -# --8<-- [end:docs] - - -class LocalExecutor(BaseExecutor): - """ - In the mode of local execution, we run everything on the local computer. - - This has some serious implications on the amount of time it would take to complete the run. - Also ensure that the local compute is good enough for the compute to happen of all the steps. - - Example config: - execution: - type: local - config: - enable_parallel: True or False to enable parallel. - - """ - - service_name = "local" - - def trigger_job(self, node: BaseNode, map_variable: dict = None, **kwargs): - """ - In this mode of execution, we prepare for the node execution and execute the node - - Args: - node (BaseNode): [description] - map_variable (str, optional): [description]. Defaults to ''. - """ - self.prepare_for_node_execution() - self.execute_node(node=node, map_variable=map_variable, **kwargs) - - def execute_node(self, node: BaseNode, map_variable: dict[str, str] = None, **kwargs): - """ - For local execution, we just execute the node. - - Args: - node (BaseNode): _description_ - map_variable (dict[str, str], optional): _description_. Defaults to None. - """ - self._execute_node(node=node, map_variable=map_variable, **kwargs) - - def execute_job(self, node: BaseNode): - """ - Set up the step log and call the execute node - - Args: - node (BaseNode): _description_ - """ - step_log = self.run_log_store.create_step_log(node.name, node._get_step_log_name(map_variable=None)) - - self.add_code_identities(node=node, step_log=step_log) - - step_log.step_type = node.node_type - step_log.status = defaults.PROCESSING - self.run_log_store.add_step_log(step_log, self.run_id) - self.execute_node(node=node) - - # Update the run log status - step_log = self.run_log_store.get_step_log(node._get_step_log_name(), self.run_id) - self.run_log_store.update_run_log_status(run_id=self.run_id, status=step_log.status) - - -class LocalContainerExecutor(BaseExecutor): - """ - In the mode of local-container, we execute all the commands in a container. - - Ensure that the local compute has enough resources to finish all your jobs. - - The image of the run, could either be provided as default in the configuration of the execution engine - i.e.: - execution: - type: 'local-container' - config: - docker_image: the image you want the code to run in. - - or default image could be over-ridden for a single node by providing a docker_image in the step config. - i.e: - dag: - steps: - step: - executor_config: - local-container: - docker_image: The image that you want that single step to run in. - This image would only be used for that step only. - - This mode does not build the docker image with the latest code for you, it is still left for the user to build - and ensure that the docker image provided is the correct one. - - Example config: - execution: - type: local-container - config: - docker_image: The default docker image to use if the node does not provide one. - """ - - service_name = "local-container" - - class Config(BaseExecutor.Config): - docker_image: str - - def __init__(self, config): - # pylint: disable=R0914,R0913 - super().__init__(config=config) - self.container_log_location = "/tmp/run_logs/" - self.container_catalog_location = "/tmp/catalog/" - self.container_secrets_location = "/tmp/dotenv" - self.volumes = {} - - @property - def docker_image(self) -> str: - """ - Returns: - str: The default docker image to use from the config. - """ - return self.config.docker_image # type: ignore - - def add_code_identities(self, node: BaseNode, step_log: StepLog, **kwargs): - """ - Call the Base class to add the git code identity and add docker identity - - Args: - node (BaseNode): The node we are adding the code identity - step_log (Object): The step log corresponding to the node - """ - - super().add_code_identities(node, step_log) - mode_config = self._resolve_executor_config(node) - - docker_image = mode_config.get("docker_image", None) - if docker_image: - code_id = self.run_log_store.create_code_identity() - - code_id.code_identifier = utils.get_local_docker_image_id(docker_image) - code_id.code_identifier_type = "docker" - code_id.code_identifier_dependable = True - code_id.code_identifier_url = "local docker host" - step_log.code_identities.append(code_id) - - def execute_node(self, node: BaseNode, map_variable: dict = None, **kwargs): - """ - We are already in the container, we just execute the node. - """ - return self._execute_node(node, map_variable, **kwargs) - - def execute_job(self, node: BaseNode): - """ - Set up the step log and call the execute node - - Args: - node (BaseNode): _description_ - """ - step_log = self.run_log_store.create_step_log(node.name, node._get_step_log_name(map_variable=None)) - - self.add_code_identities(node=node, step_log=step_log) - - step_log.step_type = node.node_type - step_log.status = defaults.PROCESSING - self.run_log_store.add_step_log(step_log, self.run_id) - - command = utils.get_job_execution_command(self, node) - self._spin_container(node=node, command=command) - - # Check the step log status and warn if necessary. Docker errors are generally suppressed. - step_log = self.run_log_store.get_step_log(node._get_step_log_name(map_variable=None), self.run_id) - if step_log.status != defaults.SUCCESS: - msg = ( - "Node execution inside the container failed. Please check the logs.\n" - "Note: If you do not see any docker issue from your side and the code works properly on local execution" - "please raise a bug report." - ) - logger.warning(msg) - - def trigger_job(self, node: BaseNode, map_variable: dict = None, **kwargs): - """ - If the config has "run_in_local: True", we compute it on local system instead of container. - In local container execution, we just spin the container to execute magnus execute_single_node. - - Args: - node (BaseNode): The node we are currently executing - map_variable (str, optional): If the node is part of the map branch. Defaults to ''. - """ - executor_config = self._resolve_executor_config(node) - - logger.debug("Here is the resolved executor config") - logger.debug(executor_config) - - from magnus.nodes import TaskNode - from magnus.tasks import ContainerTaskType - - if executor_config.get("run_in_local", None) or ( - cast(TaskNode, node).executable.task_type == ContainerTaskType.task_type - ): - # Do not change config but only validate the configuration. - # Trigger the job on local system instead of a container - # Or if the task type is a container, just spin the container. - integration.validate(self, self.run_log_store) - integration.validate(self, self.catalog_handler) - integration.validate(self, self.secrets_handler) - - self.execute_node(node=node, map_variable=map_variable, **kwargs) - return - - command = utils.get_node_execution_command(self, node, map_variable=map_variable) - self._spin_container(node=node, command=command, map_variable=map_variable, **kwargs) - - # Check for the status of the node log and anything apart from Success is FAIL - # This typically happens if something is wrong with magnus or settings. - step_log = self.run_log_store.get_step_log(node._get_step_log_name(map_variable), self.run_id) - if step_log.status != defaults.SUCCESS: - msg = ( - "Node execution inside the container failed. Please check the logs.\n" - "Note: If you do not see any docker issue from your side and the code works properly on local execution" - "please raise a bug report." - ) - logger.warning(msg) - step_log.status = defaults.FAIL - self.run_log_store.add_step_log(step_log, self.run_id) - - def _spin_container( - self, node: BaseNode, command: str, map_variable: dict = None, **kwargs - ): # pylint: disable=unused-argument - """ - During the flow run, we have to spin up a container with the docker image mentioned - and the right log locations - """ - # Conditional import - import docker # pylint: disable=C0415 - - try: - client = docker.from_env() - api_client = docker.APIClient() - except Exception as ex: - logger.exception("Could not get access to docker") - raise Exception("Could not get the docker socket file, do you have docker installed?") from ex - - try: - logger.info(f"Running the command {command}") - #  Overrides global config with local - executor_config = self._resolve_executor_config(node) - docker_image = executor_config.get("docker_image", None) - environment = executor_config.get("environment", {}) - environment.update(self.variables) - if not docker_image: - raise Exception( - f"Please provide a docker_image using executor_config of the step {node.name} or at global config" - ) - - # TODO: Should consider using getpass.getuser() when running the docker container? Volume permissions - container = client.containers.create( - image=docker_image, - command=command, - auto_remove=False, - volumes=self.volumes, - network_mode="host", - environment=environment, - ) - container.start() - stream = api_client.logs(container=container.id, timestamps=True, stream=True, follow=True) - while True: - try: - output = next(stream).decode("utf-8") - output = output.strip("\r\n") - logger.info(output) - except StopIteration: - logger.info("Docker Run completed") - break - exit_status = api_client.inspect_container(container.id)["State"]["ExitCode"] - container.remove(force=True) - if exit_status != 0: - msg = f"Docker command failed with exit code {exit_status}" - raise Exception(msg) - - except Exception as _e: - logger.exception("Problems with spinning/running the container") - raise _e - - -class DemoRenderer(BaseExecutor): - """ - This renderer is an example of how you can render required job specifications as per your orchestration tool. - - BaseExecutor implements many of the functionalities that are common and can be safe defaults. - In this renderer example: We just render a bash script that sequentially calls the steps. - We do not handle composite steps in this execution type. - - Example config: - executor: - type: demo-renderer - """ - - service_name = "demo-renderer" - - def execute_node(self, node: BaseNode, map_variable: dict = None, **kwargs): - """ - This method does the actual execution of a task, as-is, success or fail node. - """ - self._set_up_run_log(exists_ok=True) - # Need to set up the step log for the node as the entry point is different - step_log = self.run_log_store.create_step_log(node.name, node._get_step_log_name(map_variable)) - - self.add_code_identities(node=node, step_log=step_log) - - step_log.step_type = node.node_type - step_log.status = defaults.PROCESSING - self.run_log_store.add_step_log(step_log, self.run_id) - - super()._execute_node(node, map_variable=map_variable, **kwargs) - - step_log = self.run_log_store.get_step_log(node._get_step_log_name(map_variable), self.run_id) - if step_log.status == defaults.FAIL: - raise Exception(f"Step {node.name} failed") - - def send_return_code(self, stage="traversal"): - """ - Convenience function used by pipeline to send return code to the caller of the cli - - Raises: - Exception: If the pipeline execution failed - """ - if stage != "traversal": # traversal does no actual execution, so return code is pointless - run_id = self.run_id - - run_log = self.run_log_store.get_run_log_by_id(run_id=run_id, full=False) - if run_log.status == defaults.FAIL: - raise Exception("Pipeline execution failed") - - def execute_graph(self, dag: Graph, map_variable: dict = None, **kwargs): - """ - Iterate through the graph and frame the bash script. - - For more complex outputs, dataclasses might be a better option. - - NOTE: This method should be over-written to write the exact specification to the compute engine. - - """ - current_node = dag.start_at - previous_node = None - logger.info(f"Rendering job started at {current_node}") - bash_script_lines = [] - - while True: - working_on = dag.get_node_by_name(current_node) - - if working_on.is_composite: - raise NotImplementedError("In this demo version, composite nodes are not implemented") - - if previous_node == current_node: - raise Exception("Potentially running in a infinite loop") - - previous_node = current_node - - logger.info(f"Creating execution log for {working_on}") - - _execute_node_command = utils.get_node_execution_command(self, working_on, over_write_run_id="$1") - re.sub("[^A-Za-z0-9]+", "", f"{current_node}_job_id") - fail_node_command = utils.get_node_execution_command(self, dag.get_fail_node(), over_write_run_id="$1") - - if working_on.node_type not in ["success", "fail"]: - if working_on.node_type == "as-is": - command_config = working_on.config.get("command_config", {}) - if "render_string" in command_config: - bash_script_lines.append(command_config["render_string"] + "\n") - else: - bash_script_lines.append(f"{_execute_node_command}\n") - - bash_script_lines.append("exit_code=$?\necho $exit_code\n") - # Write failure node - bash_script_lines.append( - ("if [ $exit_code -ne 0 ];\nthen\n" f"\t $({fail_node_command})\n" "\texit 1\n" "fi\n") - ) - - if working_on.node_type == "success": - bash_script_lines.append(f"{_execute_node_command}") - if working_on.node_type in ["success", "fail"]: - break - - current_node = working_on._get_next_node() - - with open("demo-bash.sh", "w", encoding="utf-8") as fw: - fw.writelines(bash_script_lines) - - msg = ( - "demo-bash.sh for running the pipeline is written. To execute it \n" - "1). Activate the environment:\n" - "\t for example poetry shell or pipenv shell etc\n" - "2). Make the shell script executable.\n" - "\t chmod 755 demo-bash.sh\n" - "3). Run the script by: source demo-bash.sh \n" - "\t The first argument to the script is the run id you want for the run." - ) - logger.info(msg) + ... diff --git a/magnus/experiment_tracker.py b/magnus/experiment_tracker.py index 5b26360e..c4d39436 100644 --- a/magnus/experiment_tracker.py +++ b/magnus/experiment_tracker.py @@ -1,38 +1,89 @@ import contextlib +import json import logging -from typing import Any +import os +from abc import ABC, abstractmethod +from collections import defaultdict +from typing import Any, ContextManager, Dict, Tuple, Union -from pydantic import BaseModel +from pydantic import BaseModel, ConfigDict +import magnus.context as context from magnus import defaults +from magnus.utils import remove_prefix + +logger = logging.getLogger(defaults.LOGGER_NAME) + + +def retrieve_step_details(key: str) -> Tuple[str, int]: + key = remove_prefix(key, defaults.TRACK_PREFIX) + data = key.split(defaults.STEP_INDICATOR) + + key = data[0].lower() + step = 0 + + if len(data) > 1: + step = int(data[1]) + + return key, step + + +def get_tracked_data() -> Dict[str, Any]: + tracked_data: Dict[str, Any] = defaultdict(dict) + for env_var, value in os.environ.items(): + if env_var.startswith(defaults.TRACK_PREFIX): + key, step = retrieve_step_details(env_var) + + # print(value, type(value)) + try: + value = json.loads(value) + except json.decoder.JSONDecodeError: + logger.warning(f"Tracker {key} could not be JSON decoded, adding the literal value") + + tracked_data[key][step] = value + del os.environ[env_var] + + for key, value in tracked_data.items(): + if len(value) == 1: + tracked_data[key] = value[0] + + return tracked_data -logger = logging.getLogger(defaults.NAME) # --8<-- [start:docs] -class BaseExperimentTracker: +class BaseExperimentTracker(ABC, BaseModel): """ Base Experiment tracker class definition. """ - service_name = "" + service_name: str = "" + service_type: str = "experiment_tracker" - class Config(BaseModel): - pass + @property + def _context(self): + return context.run_context - def __init__(self, config: dict = None, **kwargs): # pylint: disable=unused-argument - config = config or {} - self.config = self.Config(**config) + model_config = ConfigDict(extra="forbid") @property - def client_context(self) -> Any: + def client_context(self) -> ContextManager: """ Returns the client context. """ return contextlib.nullcontext() - def log_metric(self, key: str, value: float, step: int = 0): + def publish_data(self, tracked_data: Dict[str, Any]): + for key, value in tracked_data.items(): + if isinstance(value, dict): + for key2, value2 in value.items(): + self.log_metric(key, value2, step=key2) + continue + self.log_metric(key, value) + + @abstractmethod + def log_metric(self, key: str, value: Union[int, float], step: int = 0): """ Sets the metric in the experiment tracking. @@ -46,6 +97,7 @@ def log_metric(self, key: str, value: float, step: int = 0): """ raise NotImplementedError + @abstractmethod def log_parameter(self, key: str, value: Any): """ Logs a parameter in the experiment tracking. @@ -68,9 +120,9 @@ class DoNothingTracker(BaseExperimentTracker): A Do nothing tracker """ - service_name = "do-nothing" + service_name: str = "do-nothing" - def set_metric(self, key: str, value: float, step: int = 0): + def log_metric(self, key: str, value: Union[int, float], step: int = 0): """ Sets the metric in the experiment tracking. @@ -78,10 +130,10 @@ def set_metric(self, key: str, value: float, step: int = 0): key (str): The key against you want to store the value value (float): The value of the metric """ - pass + ... def log_parameter(self, key: str, value: Any): """ Since this is a Do nothing tracker, we don't need to log anything. """ - pass + ... diff --git a/magnus/extensions/__init__.py b/magnus/extensions/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/magnus/extensions/catalog/__init__.py b/magnus/extensions/catalog/__init__.py new file mode 100644 index 00000000..3561aa6b --- /dev/null +++ b/magnus/extensions/catalog/__init__.py @@ -0,0 +1,21 @@ +from typing import List, Optional + +from magnus.datastore import DataCatalog + + +def is_catalog_out_of_sync(catalog, synced_catalogs=Optional[List[DataCatalog]]) -> bool: + """ + Check if the catalog items are out of sync from already cataloged objects. + If they are, return False. + If the object does not exist or synced catalog does not exist, return True + """ + if not synced_catalogs: + return True # If nothing has been synced in the past + + for synced_catalog in synced_catalogs: + if synced_catalog.catalog_relative_path == catalog.catalog_relative_path: + if synced_catalog.data_hash == catalog.data_hash: + return False + return True + + return True # The object does not exist, sync it diff --git a/magnus/extensions/catalog/file_system/__init__.py b/magnus/extensions/catalog/file_system/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/magnus/extensions/catalog/file_system/implementation.py b/magnus/extensions/catalog/file_system/implementation.py new file mode 100644 index 00000000..9928e067 --- /dev/null +++ b/magnus/extensions/catalog/file_system/implementation.py @@ -0,0 +1,226 @@ +import logging +import os +import shutil +from pathlib import Path +from typing import List, Optional + +from magnus import defaults, utils +from magnus.catalog import BaseCatalog +from magnus.datastore import DataCatalog +from magnus.extensions.catalog import is_catalog_out_of_sync + +logger = logging.getLogger(defaults.LOGGER_NAME) + + +class FileSystemCatalog(BaseCatalog): + """ + A Catalog handler that uses the local file system for cataloging. + + Note: Do not use this if the steps of the pipeline run on different compute environments. + + Example config: + + catalog: + type: file-system + config: + catalog_location: The location to store the catalog. + compute_data_folder: The folder to source the data from. + + """ + + service_name: str = "file-system" + catalog_location: str = defaults.CATALOG_LOCATION_FOLDER + + def get_catalog_location(self): + return self.catalog_location + + def get(self, name: str, run_id: str, compute_data_folder: str = "", **kwargs) -> List[DataCatalog]: + """ + Get the file by matching glob pattern to the name + + Args: + name ([str]): A glob matching the file name + run_id ([str]): The run id + + Raises: + Exception: If the catalog location does not exist + + Returns: + List(object) : A list of catalog objects + """ + logger.info(f"Using the {self.service_name} catalog and trying to get {name} for run_id: {run_id}") + + copy_to = self.compute_data_folder + if compute_data_folder: + copy_to = compute_data_folder + + copy_to = Path(copy_to) # type: ignore + + catalog_location = self.get_catalog_location() + run_catalog = Path(catalog_location) / run_id / copy_to + + logger.debug(f"Copying objects to {copy_to} from the run catalog location of {run_catalog}") + + if not utils.does_dir_exist(run_catalog): + msg = ( + f"Expected Catalog to be present at: {run_catalog} but not found.\n" + "Note: Please make sure that some data was put in the catalog before trying to get from it.\n" + ) + raise Exception(msg) + + # Iterate through the contents of the run_catalog and copy the files that fit the name pattern + # We should also return a list of data hashes + glob_files = run_catalog.glob(name) + logger.debug(f"Glob identified {glob_files} as matches to from the catalog location: {run_catalog}") + + data_catalogs = [] + run_log_store = self._context.run_log_store + for file in glob_files: + if file.is_dir(): + # Need not add a data catalog for the folder + continue + + if str(file).endswith(".execution.log"): + continue + + relative_file_path = file.relative_to(run_catalog) + + data_catalog = run_log_store.create_data_catalog(str(relative_file_path)) + data_catalog.catalog_handler_location = catalog_location + data_catalog.catalog_relative_path = str(relative_file_path) + data_catalog.data_hash = utils.get_data_hash(str(file)) + data_catalog.stage = "get" + data_catalogs.append(data_catalog) + + # Make the directory in the data folder if required + Path(copy_to / relative_file_path.parent).mkdir(parents=True, exist_ok=True) + shutil.copy(file, copy_to / relative_file_path) + + logger.info(f"Copied {file} from {run_catalog} to {copy_to}") + + if not data_catalogs: + raise Exception(f"Did not find any files matching {name} in {run_catalog}") + + return data_catalogs + + def put( + self, + name: str, + run_id: str, + compute_data_folder: str = "", + synced_catalogs: Optional[List[DataCatalog]] = None, + **kwargs, + ) -> List[DataCatalog]: + """ + Put the files matching the glob pattern into the catalog. + + If previously synced catalogs are provided, and no changes were observed, we do not sync them. + + Args: + name (str): The glob pattern of the files to catalog + run_id (str): The run id of the run + compute_data_folder (str, optional): The compute data folder to sync from. Defaults to settings default. + synced_catalogs (dict, optional): dictionary of previously synced catalogs. Defaults to None. + + Raises: + Exception: If the compute data folder does not exist. + + Returns: + List(object) : A list of catalog objects + """ + logger.info(f"Using the {self.service_name} catalog and trying to put {name} for run_id: {run_id}") + + copy_from = self.compute_data_folder + if compute_data_folder: + copy_from = compute_data_folder + copy_from = Path(copy_from) # type: ignore + + catalog_location = self.get_catalog_location() + run_catalog = Path(catalog_location) / run_id + utils.safe_make_dir(run_catalog) + + logger.debug(f"Copying objects from {copy_from} to the run catalog location of {run_catalog}") + + if not utils.does_dir_exist(copy_from): + msg = ( + f"Expected compute data folder to be present at: {compute_data_folder} but not found. \n" + "Note: Magnus does not create the compute data folder for you. Please ensure that the folder exists.\n" + ) + raise Exception(msg) + + # Iterate through the contents of copy_from and if the name matches, we move them to the run_catalog + # We should also return a list of datastore.DataCatalog items + + glob_files = copy_from.glob(name) # type: ignore + logger.debug(f"Glob identified {glob_files} as matches to from the compute data folder: {copy_from}") + + data_catalogs = [] + run_log_store = self._context.run_log_store + for file in glob_files: + if file.is_dir(): + # Need not add a data catalog for the folder + continue + + relative_file_path = file.relative_to(".") + + data_catalog = run_log_store.create_data_catalog(str(relative_file_path)) + data_catalog.catalog_handler_location = catalog_location + data_catalog.catalog_relative_path = run_id + os.sep + str(relative_file_path) + data_catalog.data_hash = utils.get_data_hash(str(file)) + data_catalog.stage = "put" + data_catalogs.append(data_catalog) + + if is_catalog_out_of_sync(data_catalog, synced_catalogs): + logger.info(f"{data_catalog.name} was found to be changed, syncing") + + # Make the directory in the catalog if required + Path(run_catalog / relative_file_path.parent).mkdir(parents=True, exist_ok=True) + shutil.copy(file, run_catalog / relative_file_path) + else: + logger.info(f"{data_catalog.name} was found to be unchanged, ignoring syncing") + + if not data_catalogs: + raise Exception(f"Did not find any files matching {name} in {copy_from}") + + return data_catalogs + + def sync_between_runs(self, previous_run_id: str, run_id: str): + """ + Given the previous run id, sync the catalogs between the current one and previous + + Args: + previous_run_id (str): The previous run id to sync the catalogs from + run_id (str): The run_id to which the data catalogs should be synced to. + + Raises: + Exception: If the previous run log does not exist in the catalog + + """ + logger.info( + f"Using the {self.service_name} catalog and syncing catalogs" + "between old: {previous_run_id} to new: {run_id}" + ) + + catalog_location = Path(self.get_catalog_location()) + run_catalog = catalog_location / run_id + utils.safe_make_dir(run_catalog) + + if not utils.does_dir_exist(catalog_location / previous_run_id): + msg = ( + f"Catalogs from previous run : {previous_run_id} are not found.\n" + "Note: Please provision the catalog objects generated by previous run in the same catalog location" + " as the current run, even if the catalog handler for the previous run was different" + ) + raise Exception(msg) + + cataloged_files = list((catalog_location / previous_run_id).glob("*")) + + for cataloged_file in cataloged_files: + if str(cataloged_file).endswith("execution.log"): + continue + print(cataloged_file.name) + if cataloged_file.is_file(): + shutil.copy(cataloged_file, run_catalog / cataloged_file.name) + else: + shutil.copytree(cataloged_file, run_catalog / cataloged_file.name) + logger.info(f"Copied file from: {cataloged_file} to {run_catalog}") diff --git a/magnus/extensions/catalog/k8s_pvc/__init__.py b/magnus/extensions/catalog/k8s_pvc/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/magnus/extensions/catalog/k8s_pvc/implementation.py b/magnus/extensions/catalog/k8s_pvc/implementation.py new file mode 100644 index 00000000..5ea47dec --- /dev/null +++ b/magnus/extensions/catalog/k8s_pvc/implementation.py @@ -0,0 +1,16 @@ +import logging +from pathlib import Path + +from magnus import defaults +from magnus.extensions.catalog.file_system.implementation import FileSystemCatalog + +logger = logging.getLogger(defaults.LOGGER_NAME) + + +class K8sPVCatalog(FileSystemCatalog): + service_name: str = "k8s-pvc" + persistent_volume_name: str + mount_path: str + + def get_catalog_location(self): + return str(Path(self.mount_path) / self.catalog_location) diff --git a/magnus/extensions/catalog/k8s_pvc/integration.py b/magnus/extensions/catalog/k8s_pvc/integration.py new file mode 100644 index 00000000..e3d886c1 --- /dev/null +++ b/magnus/extensions/catalog/k8s_pvc/integration.py @@ -0,0 +1,59 @@ +import logging +from typing import cast + +from magnus import defaults +from magnus.integration import BaseIntegration + +logger = logging.getLogger(defaults.NAME) + + +class LocalCompute(BaseIntegration): + """ + Integration between local and k8's pvc + """ + + executor_type = "local" + service_type = "catalog" # One of secret, catalog, datastore + service_provider = "k8s-pvc" # The actual implementation of the service + + def validate(self, **kwargs): + msg = "We can't use the local compute k8s pvc store integration." + raise Exception(msg) + + +class LocalContainerCompute(BaseIntegration): + """ + Integration between local-container and k8's pvc + """ + + executor_type = "local-container" + service_type = "catalog" # One of secret, catalog, datastore + service_provider = "k8s-pvc" # The actual implementation of the service + + def validate(self, **kwargs): + msg = "We can't use the local-container compute k8s pvc store integration." + raise Exception(msg) + + +class ArgoCompute(BaseIntegration): + """ + Integration between argo and k8's pvc + """ + + executor_type = "argo" + service_type = "catalog" # One of secret, catalog, datastore + service_provider = "k8s-pvc" # The actual implementation of the service + + def configure_for_traversal(self, **kwargs): + from magnus.extensions.catalog.k8s_pvc.implementation import K8sPVCatalog + from magnus.extensions.executor.argo.implementation import ArgoExecutor, UserVolumeMounts + + self.executor = cast(ArgoExecutor, self.executor) + self.service = cast(K8sPVCatalog, self.service) + + volume_mount = UserVolumeMounts( + name=self.service.persistent_volume_name, + mount_path=self.service.mount_path, + ) + + self.executor.persistent_volumes.append(volume_mount) diff --git a/magnus/extensions/executor/__init__.py b/magnus/extensions/executor/__init__.py new file mode 100644 index 00000000..45430f2e --- /dev/null +++ b/magnus/extensions/executor/__init__.py @@ -0,0 +1,714 @@ +import copy +import json +import logging +import os +from abc import abstractmethod +from typing import Any, Dict, List, Optional, cast + +from rich import print + +from magnus import context, defaults, exceptions, integration, parameters, utils +from magnus.datastore import DataCatalog, RunLog, StepLog +from magnus.defaults import TypeMapVariable +from magnus.executor import BaseExecutor +from magnus.experiment_tracker import get_tracked_data +from magnus.extensions.nodes import TaskNode +from magnus.graph import Graph +from magnus.nodes import BaseNode + +logger = logging.getLogger(defaults.LOGGER_NAME) + + +class GenericExecutor(BaseExecutor): + """ + The skeleton of an executor class. + Any implementation of an executor should inherit this class and over-ride accordingly. + + This is a loaded base class which has a lot of methods already implemented for "typical" executions. + Look at the function docs to understand how to use them appropriately. + + For any implementation: + 1). Who/when should the run log be set up? + 2). Who/When should the step log be set up? + + """ + + service_name: str = "" + service_type: str = "executor" + + @property + def _context(self): + return context.run_context + + @property + def step_decorator_run_id(self): + """ + TODO: Experimental feature, design is not mature yet. + + This function is used by the decorator function. + The design idea is we can over-ride this method in different implementations to retrieve the run_id. + But is it really intrusive to ask to set the environmental variable MAGNUS_RUN_ID? + + Returns: + _type_: _description_ + """ + return os.environ.get("MAGNUS_RUN_ID", None) + + def _get_parameters(self) -> Dict[str, Any]: + """ + Consolidate the parameters from the environment variables + and the parameters file. + + The parameters defined in the environment variables take precedence over the parameters file. + + Returns: + _type_: _description_ + """ + params: Dict[str, Any] = {} + if self._context.parameters_file: + params.update(utils.load_yaml(self._context.parameters_file)) + + # Update these with some from the environment variables + params.update(parameters.get_user_set_parameters()) + return params + + def _set_up_for_re_run(self, parameters: Dict[str, Any]) -> None: + try: + attempt_run_log = self._context.run_log_store.get_run_log_by_id( + run_id=self._context.original_run_id, full=False + ) + except exceptions.RunLogNotFoundError as e: + msg = ( + f"Expected a run log with id: {self._context.original_run_id} " + "but it does not exist in the run log store. " + "If the original execution was in a different environment, ensure that it is available in the current " + "environment." + ) + logger.exception(msg) + raise Exception(msg) from e + + # Sync the previous run log catalog to this one. + self._context.catalog_handler.sync_between_runs( + previous_run_id=self._context.original_run_id, run_id=self._context.run_id + ) + + parameters.update(cast(RunLog, attempt_run_log).parameters) + + def _set_up_run_log(self, exists_ok=False): + """ + Create a run log and put that in the run log store + + If exists_ok, we allow the run log to be already present in the run log store. + """ + try: + attempt_run_log = self._context.run_log_store.get_run_log_by_id(run_id=self._context.run_id, full=False) + + logger.warning(f"The run log by id: {self._context.run_id} already exists") + raise exceptions.RunLogExistsError( + f"The run log by id: {self._context.run_id} already exists and is {attempt_run_log.status}" + ) + except exceptions.RunLogNotFoundError: + pass + except exceptions.RunLogExistsError: + if exists_ok: + return + raise + + # Consolidate and get the parameters + parameters = self._get_parameters() + + if self._context.use_cached: + self._set_up_for_re_run(parameters=parameters) + + self._context.run_log_store.create_run_log( + run_id=self._context.run_id, + tag=self._context.tag, + status=defaults.PROCESSING, + dag_hash=self._context.dag_hash, + use_cached=self._context.use_cached, + original_run_id=self._context.original_run_id, + ) + # Any interaction with run log store attributes should happen via API if available. + self._context.run_log_store.set_parameters(run_id=self._context.run_id, parameters=parameters) + + # Update run_config + run_config = utils.get_run_config() + self._context.run_log_store.set_run_config(run_id=self._context.run_id, run_config=run_config) + + def prepare_for_graph_execution(self): + """ + This method should be called prior to calling execute_graph. + Perform any steps required before doing the graph execution. + + The most common implementation is to prepare a run log for the run if the run uses local interactive compute. + + But in cases of actual rendering the job specs (eg: AWS step functions, K8's) we check if the services are OK. + We do not set up a run log as its not relevant. + """ + + integration.validate(self, self._context.run_log_store) + integration.configure_for_traversal(self, self._context.run_log_store) + + integration.validate(self, self._context.catalog_handler) + integration.configure_for_traversal(self, self._context.catalog_handler) + + integration.validate(self, self._context.secrets_handler) + integration.configure_for_traversal(self, self._context.secrets_handler) + + integration.validate(self, self._context.experiment_tracker) + integration.configure_for_traversal(self, self._context.experiment_tracker) + + self._set_up_run_log() + + def prepare_for_node_execution(self): + """ + Perform any modifications to the services prior to execution of the node. + + Args: + node (Node): [description] + map_variable (dict, optional): [description]. Defaults to None. + """ + integration.validate(self, self._context.run_log_store) + integration.configure_for_execution(self, self._context.run_log_store) + + integration.validate(self, self._context.catalog_handler) + integration.configure_for_execution(self, self._context.catalog_handler) + + integration.validate(self, self._context.secrets_handler) + integration.configure_for_execution(self, self._context.secrets_handler) + + integration.validate(self, self._context.experiment_tracker) + integration.configure_for_execution(self, self._context.experiment_tracker) + + def _sync_catalog(self, step_log: StepLog, stage: str, synced_catalogs=None) -> Optional[List[DataCatalog]]: + """ + 1). Identify the catalog settings by over-riding node settings with the global settings. + 2). For stage = get: + Identify the catalog items that are being asked to get from the catalog + And copy them to the local compute data folder + 3). For stage = put: + Identify the catalog items that are being asked to put into the catalog + Copy the items from local compute folder to the catalog + 4). Add the items onto the step log according to the stage + + Args: + node (Node): The current node being processed + step_log (StepLog): The step log corresponding to that node + stage (str): One of get or put + + Raises: + Exception: If the stage is not in one of get/put + + """ + if stage not in ["get", "put"]: + msg = ( + "Catalog service only accepts get/put possible actions as part of node execution." + f"Sync catalog of the executor: {self.service_name} asks for {stage} which is not accepted" + ) + raise Exception(msg) + + try: + node_catalog_settings = self._context_node._get_catalog_settings() + except exceptions.TerminalNodeError: + return None + + if not (node_catalog_settings and stage in node_catalog_settings): + logger.info("No catalog settings found for stage: %s", stage) + # Nothing to get/put from the catalog + return None + + compute_data_folder = self.get_effective_compute_data_folder() + + data_catalogs = [] + for name_pattern in node_catalog_settings.get(stage) or []: + if stage == "get": + data_catalog = self._context.catalog_handler.get( + name=name_pattern, run_id=self._context.run_id, compute_data_folder=compute_data_folder + ) + elif stage == "put": + data_catalog = self._context.catalog_handler.put( + name=name_pattern, + run_id=self._context.run_id, + compute_data_folder=compute_data_folder, + synced_catalogs=synced_catalogs, + ) + else: + raise Exception(f"Invalid stage: {stage}") + logger.info(f"Added data catalog: {data_catalog} to step log") + data_catalogs.extend(data_catalog) + + if data_catalogs: + step_log.add_data_catalogs(data_catalogs) + + return data_catalogs + + def get_effective_compute_data_folder(self) -> str: + """ + Get the effective compute data folder for the given stage. + If there is nothing to catalog, we return None. + + The default is the compute data folder of the catalog but this can be over-ridden by the node. + + Args: + stage (str): The stage we are in the process of cataloging + + + Returns: + str: The compute data folder as defined by the node defaulting to catalog handler + """ + compute_data_folder = self._context.catalog_handler.compute_data_folder + + catalog_settings = self._context_node._get_catalog_settings() + effective_compute_data_folder = catalog_settings.get("compute_data_folder", "") or compute_data_folder + + return effective_compute_data_folder + + @property + def step_attempt_number(self) -> int: + """ + The attempt number of the current step. + Orchestrators should use this step to submit multiple attempts of the job. + + Returns: + int: The attempt number of the current step. Defaults to 1. + """ + return int(os.environ.get(defaults.ATTEMPT_NUMBER, 1)) + + def _execute_node(self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs): + """ + This is the entry point when we do the actual execution of the function. + DO NOT Over-ride this function. + + While in interactive execution, we just compute, in 3rd party interactive execution, we need to reach + this function. + + In most cases, + * We get the corresponding step_log of the node and the parameters. + * We sync the catalog to GET any data sets that are in the catalog + * We call the execute method of the node for the actual compute and retry it as many times as asked. + * If the node succeeds, we get any of the user defined metrics provided by the user. + * We sync the catalog to PUT any data sets that are in the catalog. + + Args: + node (Node): The node to execute + map_variable (dict, optional): If the node is of a map state, map_variable is the value of the iterable. + Defaults to None. + """ + step_log = self._context.run_log_store.get_step_log(node._get_step_log_name(map_variable), self._context.run_id) + """ + By now, all the parameters are part of the run log as a dictionary. + We set them as environment variables, serialized as json strings. + """ + params = self._context.run_log_store.get_parameters(run_id=self._context.run_id) + parameters.set_user_defined_params_as_environment_variables(params) + + attempt = self.step_attempt_number + logger.info(f"Trying to execute node: {node.internal_name}, attempt : {attempt}") + + attempt_log = self._context.run_log_store.create_attempt_log() + self._context_step_log = step_log + self._context_node = node + + data_catalogs_get: Optional[List[DataCatalog]] = self._sync_catalog(step_log, stage="get") + try: + attempt_log = node.execute(executor=self, mock=step_log.mock, map_variable=map_variable, **kwargs) + except Exception as e: + # Any exception here is a magnus exception as node suppresses exceptions. + msg = "This is clearly magnus fault, please report a bug and the logs" + logger.exception(msg) + raise Exception(msg) from e + finally: + attempt_log.attempt_number = attempt + attempt_log.parameters = params.copy() + step_log.attempts.append(attempt_log) + + tracked_data = get_tracked_data() + + self._context.experiment_tracker.publish_data(tracked_data) + # By this point, the updated parameters are deserialized as json strings. + parameters_out = parameters.get_user_set_parameters(remove=True) + + if attempt_log.status == defaults.FAIL: + logger.exception(f"Node: {node} failed") + step_log.status = defaults.FAIL + else: + # Mock is always set to False, bad design?? + # TODO: Stub nodes should not sync back data + # TODO: Errors in catalog syncing should point to Fail step + # TODO: Even for a failed execution, the catalog can happen + step_log.status = defaults.SUCCESS + self._sync_catalog(step_log, stage="put", synced_catalogs=data_catalogs_get) + step_log.user_defined_metrics = tracked_data + diff_parameters = utils.diff_dict(params, parameters_out) + self._context.run_log_store.set_parameters(self._context.run_id, diff_parameters) + + # Remove the step context + self._context_step_log = None + self._context_node = None # type: ignore + self._context_metrics = {} # type: ignore + + self._context.run_log_store.add_step_log(step_log, self._context.run_id) + + def add_code_identities(self, node: BaseNode, step_log: StepLog, **kwargs): + """ + Add code identities specific to the implementation. + + The Base class has an implementation of adding git code identities. + + Args: + step_log (object): The step log object + node (BaseNode): The node we are adding the step log for + """ + step_log.code_identities.append(utils.get_git_code_identity()) + + def execute_from_graph(self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs): + """ + This is the entry point to from the graph execution. + + While the self.execute_graph is responsible for traversing the graph, this function is responsible for + actual execution of the node. + + If the node type is: + * task : We can delegate to _execute_node after checking the eligibility for re-run in cases of a re-run + * success: We can delegate to _execute_node + * fail: We can delegate to _execute_node + + For nodes that are internally graphs: + * parallel: Delegate the responsibility of execution to the node.execute_as_graph() + * dag: Delegate the responsibility of execution to the node.execute_as_graph() + * map: Delegate the responsibility of execution to the node.execute_as_graph() + + Transpilers will NEVER use this method and will NEVER call ths method. + This method should only be used by interactive executors. + + Args: + node (Node): The node to execute + map_variable (dict, optional): If the node if of a map state, this corresponds to the value of iterable. + Defaults to None. + """ + step_log = self._context.run_log_store.create_step_log(node.name, node._get_step_log_name(map_variable)) + + self.add_code_identities(node=node, step_log=step_log) + + step_log.step_type = node.node_type + step_log.status = defaults.PROCESSING + + # Add the step log to the database as per the situation. + # If its a terminal node, complete it now + if node.node_type in ["success", "fail"]: + self._context.run_log_store.add_step_log(step_log, self._context.run_id) + self._execute_node(node, map_variable=map_variable, **kwargs) + return + + # In single step + if (self._single_step and not node.name == self._single_step) or not self._is_step_eligible_for_rerun( + node, map_variable=map_variable + ): + # If the node name does not match, we move on to the next node. + # If previous run was successful, move on to the next step + step_log.mock = True + step_log.status = defaults.SUCCESS + self._context.run_log_store.add_step_log(step_log, self._context.run_id) + return + # We call an internal function to iterate the sub graphs and execute them + if node.is_composite: + self._context.run_log_store.add_step_log(step_log, self._context.run_id) + node.execute_as_graph(map_variable=map_variable, **kwargs) + return + + # Executor specific way to trigger a job + self._context.run_log_store.add_step_log(step_log, self._context.run_id) + self.trigger_job(node=node, map_variable=map_variable, **kwargs) + + def trigger_job(self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs): + """ + Call this method only if we are responsible for traversing the graph via + execute_from_graph(). + + We are not prepared to execute node as of now. + + Args: + node (BaseNode): The node to execute + map_variable (str, optional): If the node if of a map state, this corresponds to the value of iterable. + Defaults to ''. + + NOTE: We do not raise an exception as this method is not required by many extensions + """ + pass + + def _get_status_and_next_node_name(self, current_node: BaseNode, dag: Graph, map_variable: TypeMapVariable = None): + """ + Given the current node and the graph, returns the name of the next node to execute. + + The name is always relative the graph that the node resides in. + + If the current node succeeded, we return the next node as per the graph. + If the current node failed, we return the on failure node of the node (if provided) or the global one. + + This method is only used by interactive executors i.e local and local-container + + Args: + current_node (BaseNode): The current node. + dag (Graph): The dag we are traversing. + map_variable (dict): If the node belongs to a map branch. + + """ + + step_log = self._context.run_log_store.get_step_log( + current_node._get_step_log_name(map_variable), self._context.run_id + ) + logger.info(f"Finished executing the node {current_node} with status {step_log.status}") + + try: + next_node_name = current_node._get_next_node() + except exceptions.TerminalNodeError: + next_node_name = "" + + if step_log.status == defaults.FAIL: + next_node_name = dag.get_fail_node().name + if current_node._get_on_failure_node(): + next_node_name = current_node._get_on_failure_node() + + return step_log.status, next_node_name + + def execute_graph(self, dag: Graph, map_variable: TypeMapVariable = None, **kwargs): + """ + The parallelization is controlled by the nodes and not by this function. + + Transpilers should over ride this method to do the translation of dag to the platform specific way. + Interactive methods should use this to traverse and execute the dag. + - Use execute_from_graph to handle sub-graphs + + Logically the method should: + * Start at the dag.start_at of the dag. + * Call the self.execute_from_graph(node) + * depending upon the status of the execution, either move to the success node or failure node. + + Args: + dag (Graph): The directed acyclic graph to traverse and execute. + map_variable (dict, optional): If the node if of a map state, this corresponds to the value of the iterable. + Defaults to None. + """ + current_node = dag.start_at + previous_node = None + logger.info(f"Running the execution with {current_node}") + + while True: + working_on = dag.get_node_by_name(current_node) + + if previous_node == current_node: + raise Exception("Potentially running in a infinite loop") + + previous_node = current_node + + logger.info(f"Creating execution log for {working_on}") + self.execute_from_graph(working_on, map_variable=map_variable, **kwargs) + + status, next_node_name = self._get_status_and_next_node_name( + current_node=working_on, dag=dag, map_variable=map_variable + ) + + if status == defaults.TRIGGERED: + # Some nodes go into triggered state and self traverse + logger.info(f"Triggered the job to execute the node {current_node}") + break + + if working_on.node_type in ["success", "fail"]: + break + + current_node = next_node_name + + run_log = self._context.run_log_store.get_branch_log( + working_on._get_branch_log_name(map_variable), self._context.run_id + ) + + branch = "graph" + if working_on.internal_branch_name: + branch = working_on.internal_branch_name + + logger.info(f"Finished execution of the {branch} with status {run_log.status}") + + # get the final run log + if branch == "graph": + run_log = self._context.run_log_store.get_run_log_by_id(run_id=self._context.run_id, full=True) + print(json.dumps(run_log.model_dump(), indent=4)) + + def _is_step_eligible_for_rerun(self, node: BaseNode, map_variable: TypeMapVariable = None): + """ + In case of a re-run, this method checks to see if the previous run step status to determine if a re-run is + necessary. + * True: If its not a re-run. + * True: If its a re-run and we failed in the last run or the corresponding logs do not exist. + * False: If its a re-run and we succeeded in the last run. + + Most cases, this logic need not be touched + + Args: + node (Node): The node to check against re-run + map_variable (dict, optional): If the node if of a map state, this corresponds to the value of iterable.. + Defaults to None. + + Returns: + bool: Eligibility for re-run. True means re-run, False means skip to the next step. + """ + if self._context.use_cached: + node_step_log_name = node._get_step_log_name(map_variable=map_variable) + logger.info(f"Scanning previous run logs for node logs of: {node_step_log_name}") + + try: + previous_node_log = self._context.run_log_store.get_step_log( + internal_name=node_step_log_name, run_id=self._context.original_run_id + ) + except exceptions.StepLogNotFoundError: + logger.warning(f"Did not find the node {node.name} in previous run log") + return True # We should re-run the node. + + logger.info(f"The original step status: {previous_node_log.status}") + + if previous_node_log.status == defaults.SUCCESS: + return False # We need not run the node + + logger.info(f"The new execution should start executing graph from this node {node.name}") + return True + + return True + + def send_return_code(self, stage="traversal"): + """ + Convenience function used by pipeline to send return code to the caller of the cli + + Raises: + Exception: If the pipeline execution failed + """ + run_id = self._context.run_id + + run_log = self._context.run_log_store.get_run_log_by_id(run_id=run_id, full=False) + if run_log.status == defaults.FAIL: + raise exceptions.ExecutionFailedError(run_id=run_id) + + def _resolve_executor_config(self, node: BaseNode): + """ + The overrides section can contain specific over-rides to an global executor config. + To avoid too much clutter in the dag definition, we allow the configuration file to have overrides block. + The nodes can over-ride the global config by referring to key in the overrides. + + This function also applies variables to the effective node config. + + For example: + # configuration.yaml + execution: + type: cloud-implementation + config: + k1: v1 + k3: v3 + overrides: + custom_config: + k1: v11 + k2: v2 # Could be a mapping internally. + + # in pipeline definition.yaml + dag: + steps: + step1: + overrides: + cloud-implementation: custom_config + + This method should resolve the node_config to {'k1': 'v11', 'k2': 'v2', 'k3': 'v3'} + + Args: + node (BaseNode): The current node being processed. + + """ + effective_node_config = copy.deepcopy(self.model_dump()) + try: + ctx_node_config = node._get_executor_config(self.service_name) + except exceptions.TerminalNodeError: + # Some modes request for effective node config even for success or fail nodes + return utils.apply_variables(effective_node_config, self._context.variables) + + if ctx_node_config: + if ctx_node_config not in self.overrides: + raise Exception(f"No override of key: {ctx_node_config} found in the overrides section") + + effective_node_config.update(self.overrides[ctx_node_config]) + + effective_node_config = utils.apply_variables(effective_node_config, self._context.variables) + logger.debug(f"Effective node config: {effective_node_config}") + + return effective_node_config + + @abstractmethod + def execute_job(self, node: TaskNode): + """ + Executor specific way of executing a job (python function or a notebook). + + Interactive executors should execute the job. + Transpilers should write the instructions. + + Args: + node (BaseNode): The job node to execute + + Raises: + NotImplementedError: Executors should choose to extend this functionality or not. + """ + raise NotImplementedError + + def fan_out(self, node: BaseNode, map_variable: TypeMapVariable = None): + """ + This method is used to appropriately fan-out the execution of a composite node. + This is only useful when we want to execute a composite node during 3rd party orchestrators. + + Reason: Transpilers typically try to run the leaf nodes but do not have any capacity to do anything for the + step which is composite. By calling this fan-out before calling the leaf nodes, we have an opportunity to + do the right set up (creating the step log, exposing the parameters, etc.) for the composite step. + + All 3rd party orchestrators should use this method to fan-out the execution of a composite node. + This ensures: + - The dot path notation is preserved, this method should create the step and call the node's fan out to + create the branch logs and let the 3rd party do the actual step execution. + - Gives 3rd party orchestrators an opportunity to set out the required for running a composite node. + + Args: + node (BaseNode): The node to fan-out + map_variable (dict, optional): If the node if of a map state,.Defaults to None. + + """ + step_log = self._context.run_log_store.create_step_log( + node.name, node._get_step_log_name(map_variable=map_variable) + ) + + self.add_code_identities(node=node, step_log=step_log) + + step_log.step_type = node.node_type + step_log.status = defaults.PROCESSING + self._context.run_log_store.add_step_log(step_log, self._context.run_id) + + node.fan_out(executor=self, map_variable=map_variable) + + def fan_in(self, node: BaseNode, map_variable: TypeMapVariable = None): + """ + This method is used to appropriately fan-in after the execution of a composite node. + This is only useful when we want to execute a composite node during 3rd party orchestrators. + + Reason: Transpilers typically try to run the leaf nodes but do not have any capacity to do anything for the + step which is composite. By calling this fan-in after calling the leaf nodes, we have an opportunity to + act depending upon the status of the individual branches. + + All 3rd party orchestrators should use this method to fan-in the execution of a composite node. + This ensures: + - Gives the renderer's the control on where to go depending upon the state of the composite node. + - The status of the step and its underlying branches are correctly updated. + + Args: + node (BaseNode): The node to fan-in + map_variable (dict, optional): If the node if of a map state,.Defaults to None. + + """ + node.fan_in(executor=self, map_variable=map_variable) + + step_log = self._context.run_log_store.get_step_log( + node._get_step_log_name(map_variable=map_variable), self._context.run_id + ) + + if step_log.status == defaults.FAIL: + raise Exception(f"Step {node.name} failed") diff --git a/magnus/extensions/executor/argo/__init__.py b/magnus/extensions/executor/argo/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/magnus/extensions/executor/argo/implementation.py b/magnus/extensions/executor/argo/implementation.py new file mode 100644 index 00000000..2065381e --- /dev/null +++ b/magnus/extensions/executor/argo/implementation.py @@ -0,0 +1,1182 @@ +import json +import logging +import random +import shlex +import string +from abc import ABC, abstractmethod +from collections import OrderedDict +from typing import Any, Dict, List, Optional, Union, cast + +from pydantic import BaseModel, ConfigDict, Field, computed_field, field_serializer, field_validator +from pydantic.functional_serializers import PlainSerializer +from ruamel.yaml import YAML +from typing_extensions import Annotated + +from magnus import defaults, exceptions, integration, parameters, utils +from magnus.defaults import TypeMapVariable +from magnus.extensions.executor import GenericExecutor +from magnus.extensions.nodes import DagNode, MapNode, ParallelNode +from magnus.graph import Graph, create_node, search_node_by_internal_name +from magnus.integration import BaseIntegration +from magnus.nodes import BaseNode + +logger = logging.getLogger(defaults.NAME) + +# TODO: Leave the run log in consistent state. + +""" +executor: + type: argo + config: + image: # apply to template + max_workflow_duration: # Apply to spec + nodeSelector: #Apply to spec + parallelism: #apply to spec + resources: # convert to podSpecPath + limits: + requests: + retryStrategy: + max_step_duration: # apply to templateDefaults + step_timeout: # apply to templateDefaults + tolerations: # apply to spec + imagePullPolicy: # apply to template + + overrides: + override: + tolerations: # template + image: # container + max_step_duration: # template + step_timeout: #template + nodeSelector: #template + parallelism: # this need to applied for map + resources: # container + imagePullPolicy: #container + retryStrategy: # template +""" + + +class SecretEnvVar(BaseModel): + """ + Renders: + env: + - name: MYSECRETPASSWORD + valueFrom: + secretKeyRef: + name: my-secret + key: mypassword + """ + + environment_variable: str = Field(serialization_alias="name") + secret_name: str = Field(exclude=True) + secret_key: str = Field(exclude=True) + + @computed_field # type: ignore + @property + def valueFrom(self) -> Dict[str, Dict[str, str]]: + return { + "secretKeyRef": { + "name": self.secret_name, + "key": self.secret_key, + } + } + + +class EnvVar(BaseModel): + """ + Renders: + parameters: # in arguments + - name: x + value: 3 # This is optional for workflow parameters + + """ + + name: str + value: Union[str, int, float] = Field(default="") + + +class Parameter(BaseModel): + name: str + value: Optional[str] = None + + @field_serializer("name") + def serialize_name(self, name: str) -> str: + return f"{str(name)}" + + @field_serializer("value") + def serialize_value(self, value: str) -> str: + return f"{value}" + + +class OutputParameter(Parameter): + """ + Renders: + - name: step-name + valueFrom: + path: /tmp/output.txt + """ + + path: str = Field(default="/tmp/output.txt", exclude=True) + + @computed_field # type: ignore + @property + def valueFrom(self) -> Dict[str, str]: + return {"path": self.path} + + +class Argument(BaseModel): + """ + Templates are called with arguments, which become inputs for the template + Renders: + arguments: + parameters: + - name: The name of the parameter + value: The value of the parameter + """ + + name: str + value: str + + @field_serializer("name") + def serialize_name(self, name: str) -> str: + return f"{str(name)}" + + @field_serializer("value") + def serialize_value(self, value: str) -> str: + return f"{value}" + + +class Request(BaseModel): + """ + The default requests + """ + + memory: str = "1Gi" + cpu: str = "250m" + + +VendorGPU = Annotated[ + Optional[int], + PlainSerializer(lambda x: str(x), return_type=str, when_used="unless-none"), +] + + +class Limit(Request): + """ + The default limits + """ + + gpu: VendorGPU = Field(default=None, serialization_alias="nvidia.com/gpu") + + +class Resources(BaseModel): + limits: Limit = Field(default=Limit(), serialization_alias="limits") + requests: Request = Field(default=Request(), serialization_alias="requests") + + +class BackOff(BaseModel): + duration_in_seconds: int = Field(default=2 * 60, serialization_alias="duration") + factor: float = Field(default=2, serialization_alias="factor") + max_duration: int = Field(default=60 * 60, serialization_alias="maxDuration") + + @field_serializer("duration_in_seconds") + def cast_duration_as_str(self, duration_in_seconds: int, _info) -> str: + return str(duration_in_seconds) + + @field_serializer("max_duration") + def cast_mas_duration_as_str(self, max_duration: int, _info) -> str: + return str(max_duration) + + +class Retry(BaseModel): + limit: int = 0 + retry_policy: str = Field(default="Always", serialization_alias="retryPolicy") + back_off: BackOff = Field(default=BackOff(), serialization_alias="backoff") + + @field_serializer("limit") + def cast_limit_as_str(self, limit: int, _info) -> str: + return str(limit) + + +class Toleration(BaseModel): + effect: str + key: str + operator: str + value: str + + +class TemplateDefaults(BaseModel): + max_step_duration: int = Field( + default=60 * 60 * 2, + serialization_alias="activeDeadlineSeconds", + gt=0, + description="Max run time of a step", + ) + + @computed_field # type: ignore + @property + def timeout(self) -> str: + return f"{self.max_step_duration + 60*60}s" + + +ShlexCommand = Annotated[str, PlainSerializer(lambda x: shlex.split(x), return_type=List[str])] + + +class Container(BaseModel): + image: str + command: ShlexCommand + volume_mounts: Optional[List["ContainerVolume"]] = Field(default=None, serialization_alias="volumeMounts") + image_pull_policy: str = Field(default="", serialization_alias="imagePullPolicy") + resources: Optional[Resources] = Field(default=None, serialization_alias="resources") + + env_vars: List[EnvVar] = Field(default_factory=list, exclude=True) + secrets_from_k8s: List[SecretEnvVar] = Field(default_factory=list, exclude=True) + + @computed_field # type: ignore + @property + def env(self) -> Optional[List[Union[EnvVar, SecretEnvVar]]]: + if not self.env_vars and not self.secrets_from_k8s: + return None + + return self.env_vars + self.secrets_from_k8s + + +class DagTaskTemplate(BaseModel): + """ + dag: + tasks: + name: A + template: nested-diamond + arguments: + parameters: [{name: message, value: A}] + """ + + name: str + template: str + depends: List[str] = [] + arguments: Optional[List[Argument]] = Field(default=None) + with_param: Optional[str] = Field(default=None, serialization_alias="withParam") + + @field_serializer("depends") + def transform_depends_as_str(self, depends: List[str]) -> str: + return " || ".join(depends) + + @field_serializer("arguments", when_used="unless-none") + def empty_arguments_to_none(self, arguments: List[Argument]) -> Dict[str, List[Argument]]: + return {"parameters": arguments} + + +class ContainerTemplate(BaseModel): + # These templates are used for actual execution nodes. + name: str + active_deadline_seconds: Optional[int] = Field(default=None, serialization_alias="activeDeadlineSeconds", gt=0) + node_selector: Optional[Dict[str, str]] = Field(default=None, serialization_alias="nodeSelector") + retry_strategy: Optional[Retry] = Field(default=None, serialization_alias="retryStrategy") + tolerations: Optional[List[Toleration]] = Field(default=None, serialization_alias="tolerations") + + container: Container + + outputs: Optional[List[OutputParameter]] = Field(default=None, serialization_alias="outputs") + inputs: Optional[List[Parameter]] = Field(default=None, serialization_alias="inputs") + + def __hash__(self): + return hash(self.name) + + @field_serializer("outputs", when_used="unless-none") + def reshape_outputs(self, outputs: List[OutputParameter]) -> Dict[str, List[OutputParameter]]: + return {"parameters": outputs} + + @field_serializer("inputs", when_used="unless-none") + def reshape_inputs(self, inputs: List[Parameter]) -> Dict[str, List[Parameter]]: + return {"parameters": inputs} + + +class DagTemplate(BaseModel): + # These are used for parallel, map nodes dag definition + name: str = "magnus-dag" + tasks: List[DagTaskTemplate] = Field(default=[], exclude=True) + inputs: Optional[List[Parameter]] = Field(default=None, serialization_alias="inputs") + parallelism: Optional[int] = None + fail_fast: bool = Field(default=True, serialization_alias="failFast") + + @field_validator("parallelism") + @classmethod + def validate_parallelism(cls, parallelism: Optional[int]) -> Optional[int]: + if parallelism is not None and parallelism <= 0: + raise ValueError("Parallelism must be a positive integer greater than 0") + return parallelism + + @computed_field # type: ignore + @property + def dag(self) -> Dict[str, List[DagTaskTemplate]]: + return {"tasks": self.tasks} + + @field_serializer("inputs", when_used="unless-none") + def reshape_inputs(self, inputs: List[Parameter], _info) -> Dict[str, List[Parameter]]: + return {"parameters": inputs} + + +class Volume(BaseModel): + """ + spec config requires, name and persistentVolumeClaim + step requires name and mountPath + """ + + name: str + claim: str = Field(exclude=True) + mount_path: str = Field(serialization_alias="mountPath", exclude=True) + + @computed_field # type: ignore + @property + def persistentVolumeClaim(self) -> Dict[str, str]: + return {"claimName": self.claim} + + +class ContainerVolume(BaseModel): + name: str + mount_path: str = Field(serialization_alias="mountPath") + + +class UserVolumeMounts(BaseModel): + """ + The volume specification as user defines it. + """ + + name: str # This is the name of the PVC on K8s + mount_path: str # This is mount path on the container + + +class NodeRenderer(ABC): + allowed_node_types: List[str] = [] + + def __init__(self, executor: "ArgoExecutor", node: BaseNode) -> None: + self.executor = executor + self.node = node + + @abstractmethod + def render(self, list_of_iter_values: Optional[List] = None): + pass + + +class ExecutionNode(NodeRenderer): + allowed_node_types = ["task", "stub", "success", "fail"] + + def render(self, list_of_iter_values: Optional[List] = None): + """ + Compose the map variable and create the execution command. + Create an input to the command. + create_container_template : creates an argument for the list of iter values + """ + map_variable = self.executor.compose_map_variable(list_of_iter_values) + command = utils.get_node_execution_command( + self.node, + over_write_run_id=self.executor._run_id_placeholder, + map_variable=map_variable, + ) + + inputs = [] + if list_of_iter_values: + for val in list_of_iter_values: + inputs.append(Parameter(name=val)) + + # Create the container template + container_template = self.executor.create_container_template( + working_on=self.node, + command=command, + inputs=inputs, + ) + + self.executor._container_templates.append(container_template) + + +class DagNodeRenderer(NodeRenderer): + allowed_node_types = ["dag"] + + def render(self, list_of_iter_values: Optional[List] = None): + self.node = cast(DagNode, self.node) + task_template_arguments = [] + dag_inputs = [] + if list_of_iter_values: + for value in list_of_iter_values: + task_template_arguments.append(Argument(name=value, value="{{inputs.parameters." + value + "}}")) + dag_inputs.append(Parameter(name=value)) + + clean_name = self.executor.get_clean_name(self.node) + fan_out_template = self.executor._create_fan_out_template( + composite_node=self.node, list_of_iter_values=list_of_iter_values + ) + fan_out_template.arguments = task_template_arguments if task_template_arguments else None + + fan_in_template = self.executor._create_fan_in_template( + composite_node=self.node, list_of_iter_values=list_of_iter_values + ) + fan_in_template.arguments = task_template_arguments if task_template_arguments else None + + self.executor._gather_task_templates_of_dag( + self.node.branch, + dag_name=f"{clean_name}-branch", + list_of_iter_values=list_of_iter_values, + ) + + branch_template = DagTaskTemplate( + name=f"{clean_name}-branch", + template=f"{clean_name}-branch", + arguments=task_template_arguments if task_template_arguments else None, + ) + branch_template.depends.append(f"{clean_name}-fan-out.Succeeded") + fan_in_template.depends.append(f"{clean_name}-branch.Succeeded") + fan_in_template.depends.append(f"{clean_name}-branch.Failed") + + self.executor._dag_templates.append( + DagTemplate( + tasks=[fan_out_template, branch_template, fan_in_template], + name=clean_name, + inputs=dag_inputs if dag_inputs else None, + ) + ) + + +class ParallelNodeRender(NodeRenderer): + allowed_node_types = ["parallel"] + + def render(self, list_of_iter_values: Optional[List] = None): + self.node = cast(ParallelNode, self.node) + task_template_arguments = [] + dag_inputs = [] + if list_of_iter_values: + for value in list_of_iter_values: + task_template_arguments.append(Argument(name=value, value="{{inputs.parameters." + value + "}}")) + dag_inputs.append(Parameter(name=value)) + + clean_name = self.executor.get_clean_name(self.node) + fan_out_template = self.executor._create_fan_out_template( + composite_node=self.node, list_of_iter_values=list_of_iter_values + ) + fan_out_template.arguments = task_template_arguments if task_template_arguments else None + + fan_in_template = self.executor._create_fan_in_template( + composite_node=self.node, list_of_iter_values=list_of_iter_values + ) + fan_in_template.arguments = task_template_arguments if task_template_arguments else None + + branch_templates = [] + for name, branch in self.node.branches.items(): + branch_name = self.executor.sanitize_name(name) + self.executor._gather_task_templates_of_dag( + branch, + dag_name=f"{clean_name}-{branch_name}", + list_of_iter_values=list_of_iter_values, + ) + task_template = DagTaskTemplate( + name=f"{clean_name}-{branch_name}", + template=f"{clean_name}-{branch_name}", + arguments=task_template_arguments if task_template_arguments else None, + ) + task_template.depends.append(f"{clean_name}-fan-out.Succeeded") + fan_in_template.depends.append(f"{task_template.name}.Succeeded") + fan_in_template.depends.append(f"{task_template.name}.Failed") + branch_templates.append(task_template) + + executor_config = self.executor._resolve_executor_config(self.node) + + self.executor._dag_templates.append( + DagTemplate( + tasks=[fan_out_template] + branch_templates + [fan_in_template], + name=clean_name, + inputs=dag_inputs if dag_inputs else None, + parallelism=executor_config.get("parallelism", None), + ) + ) + + +class MapNodeRender(NodeRenderer): + allowed_node_types = ["map"] + + def render(self, list_of_iter_values: Optional[List] = None): + self.node = cast(MapNode, self.node) + task_template_arguments = [] + dag_inputs = [] + if list_of_iter_values: + for value in list_of_iter_values: + task_template_arguments.append(Argument(name=value, value="{{inputs.parameters." + value + "}}")) + dag_inputs.append(Parameter(name=value)) + + clean_name = self.executor.get_clean_name(self.node) + fan_out_template = self.executor._create_fan_out_template( + composite_node=self.node, list_of_iter_values=list_of_iter_values + ) + fan_out_template.arguments = task_template_arguments if task_template_arguments else None + + fan_in_template = self.executor._create_fan_in_template( + composite_node=self.node, list_of_iter_values=list_of_iter_values + ) + fan_in_template.arguments = task_template_arguments if task_template_arguments else None + + if not list_of_iter_values: + list_of_iter_values = [] + + list_of_iter_values.append(self.node.iterate_as) + + self.executor._gather_task_templates_of_dag( + self.node.branch, + dag_name=f"{clean_name}-map", + list_of_iter_values=list_of_iter_values, + ) + + task_template = DagTaskTemplate( + name=f"{clean_name}-map", + template=f"{clean_name}-map", + arguments=task_template_arguments if task_template_arguments else None, + ) + task_template.with_param = "{{tasks." + f"{clean_name}-fan-out" + ".outputs.parameters." + "iterate-on" + "}}" + + argument = Argument(name=self.node.iterate_as, value="{{item}}") + if task_template.arguments is None: + task_template.arguments = [] + task_template.arguments.append(argument) + + task_template.depends.append(f"{clean_name}-fan-out.Succeeded") + fan_in_template.depends.append(f"{clean_name}-map.Succeeded") + fan_in_template.depends.append(f"{clean_name}-map.Failed") + + executor_config = self.executor._resolve_executor_config(self.node) + + self.executor._dag_templates.append( + DagTemplate( + tasks=[fan_out_template, task_template, fan_in_template], + name=clean_name, + inputs=dag_inputs if dag_inputs else None, + parallelism=executor_config.get("parallelism", None), + fail_fast=executor_config.get("fail_fast", True), + ) + ) + + +def get_renderer(node): + renderers = NodeRenderer.__subclasses__() + + for renderer in renderers: + if node.node_type in renderer.allowed_node_types: + return renderer + raise Exception("This node type is not render-able") + + +class MetaData(BaseModel): + generate_name: str = Field(default="magnus-dag-", serialization_alias="generateName") + annotations: Optional[Dict[str, str]] = Field(default_factory=dict) + labels: Optional[Dict[str, str]] = Field(default_factory=dict) + namespace: Optional[str] = Field(default=None) + + +class Spec(BaseModel): + active_deadline_seconds: int = Field(serialization_alias="activeDeadlineSeconds") + entrypoint: str = Field(default="magnus-dag") + node_selector: Optional[Dict[str, str]] = Field(default_factory=dict, serialization_alias="nodeSelector") + tolerations: Optional[List[Toleration]] = Field(default=None, serialization_alias="tolerations") + parallelism: Optional[int] = Field(default=None, serialization_alias="parallelism") + # TODO: This has to be user driven + pod_gc: Dict[str, str] = Field(default={"strategy": "OnPodCompletion"}, serialization_alias="podGC") + + retry_strategy: Retry = Field(default=Retry(), serialization_alias="retryStrategy") + service_account_name: Optional[str] = Field(default=None, serialization_alias="serviceAccountName") + + templates: List[Union[DagTemplate, ContainerTemplate]] = Field(default_factory=list) + template_defaults: Optional[TemplateDefaults] = Field(default=None, serialization_alias="templateDefaults") + + arguments: Optional[List[EnvVar]] = Field(default_factory=list) + persistent_volumes: List[UserVolumeMounts] = Field(default_factory=list, exclude=True) + + @field_validator("parallelism") + @classmethod + def validate_parallelism(cls, parallelism: Optional[int]) -> Optional[int]: + if parallelism is not None and parallelism <= 0: + raise ValueError("Parallelism must be a positive integer greater than 0") + return parallelism + + @computed_field # type: ignore + @property + def volumes(self) -> List[Volume]: + volumes: List[Volume] = [] + claim_names = {} + for i, user_volume in enumerate(self.persistent_volumes): + if user_volume.name in claim_names: + raise Exception(f"Duplicate claim name {user_volume.name}") + claim_names[user_volume.name] = user_volume.name + + volume = Volume(name=f"executor-{i}", claim=user_volume.name, mount_path=user_volume.mount_path) + volumes.append(volume) + return volumes + + @field_serializer("arguments", when_used="unless-none") + def reshape_arguments(self, arguments: List[EnvVar], _info) -> Dict[str, List[EnvVar]]: + return {"parameters": arguments} + + +class Workflow(BaseModel): + api_version: str = Field( + default="argoproj.io/v1alpha1", + serialization_alias="apiVersion", + ) + kind: str = "Workflow" + metadata: MetaData = Field(default=MetaData()) + spec: Spec + + +class Override(BaseModel): + model_config = ConfigDict(extra="ignore") + + image: str + tolerations: Optional[List[Toleration]] = Field(default=None) + + max_step_duration_in_seconds: int = Field( + default=2 * 60 * 60, # 2 hours + gt=0, + ) + + node_selector: Optional[Dict[str, str]] = Field( + default=None, + serialization_alias="nodeSelector", + ) + + parallelism: Optional[int] = Field( + default=None, + serialization_alias="parallelism", + ) + + resources: Resources = Field( + default=Resources(), + serialization_alias="resources", + ) + + image_pull_policy: str = Field(default="") + + retry_strategy: Retry = Field( + default=Retry(), + serialization_alias="retryStrategy", + description="Common across all templates", + ) + + @field_validator("parallelism") + @classmethod + def validate_parallelism(cls, parallelism: Optional[int]) -> Optional[int]: + if parallelism is not None and parallelism <= 0: + raise ValueError("Parallelism must be a positive integer greater than 0") + return parallelism + + +class ArgoExecutor(GenericExecutor): + service_name: str = "argo" + + model_config = ConfigDict(extra="forbid") + + image: str + expose_parameters_as_inputs: bool = True + secrets_from_k8s: List[SecretEnvVar] = Field(default_factory=list) + output_file: str = "argo-pipeline.yaml" + + # Metadata related fields + name: str = Field(default="magnus-dag-", description="Used as an identifier for the workflow") + annotations: Dict[str, str] = Field(default_factory=dict) + labels: Dict[str, str] = Field(default_factory=dict) + + max_workflow_duration_in_seconds: int = Field( + 2 * 24 * 60 * 60, # 2 days + serialization_alias="activeDeadlineSeconds", + gt=0, + ) + node_selector: Optional[Dict[str, str]] = Field( + default=None, + serialization_alias="nodeSelector", + ) + parallelism: Optional[int] = Field( + default=None, + serialization_alias="parallelism", + ) + resources: Resources = Field( + default=Resources(), + serialization_alias="resources", + exclude=True, + ) + retry_strategy: Retry = Field( + default=Retry(), + serialization_alias="retryStrategy", + description="Common across all templates", + ) + max_step_duration_in_seconds: int = Field( + default=2 * 60 * 60, # 2 hours + gt=0, + ) + tolerations: Optional[List[Toleration]] = Field(default=None) + image_pull_policy: str = Field(default="") + service_account_name: Optional[str] = None + persistent_volumes: List[UserVolumeMounts] = Field(default_factory=list) + + _run_id_placeholder: str = "{{workflow.parameters.run_id}}" + _container_templates: List[ContainerTemplate] = [] + _dag_templates: List[DagTemplate] = [] + _clean_names: Dict[str, str] = {} + _container_volumes: List[ContainerVolume] = [] + + @field_validator("parallelism") + @classmethod + def validate_parallelism(cls, parallelism: Optional[int]) -> Optional[int]: + if parallelism is not None and parallelism <= 0: + raise ValueError("Parallelism must be a positive integer greater than 0") + return parallelism + + @computed_field # type: ignore + @property + def step_timeout(self) -> int: + """ + Maximum time the step can take to complete, including the pending state. + """ + return self.max_step_duration_in_seconds + 2 * 60 * 60 # 2 hours + max_step_duration_in_seconds + + @property + def metadata(self) -> MetaData: + return MetaData( + generate_name=self.name, + annotations=self.annotations, + labels=self.labels, + ) + + @property + def spec(self) -> Spec: + return Spec( + active_deadline_seconds=self.max_workflow_duration_in_seconds, + node_selector=self.node_selector, + tolerations=self.tolerations, + parallelism=self.parallelism, + retry_strategy=self.retry_strategy, + service_account_name=self.service_account_name, + persistent_volumes=self.persistent_volumes, + template_defaults=TemplateDefaults(max_step_duration=self.max_step_duration_in_seconds), + ) + + def prepare_for_graph_execution(self): + """ + This method should be called prior to calling execute_graph. + Perform any steps required before doing the graph execution. + + The most common implementation is to prepare a run log for the run if the run uses local interactive compute. + + But in cases of actual rendering the job specs (eg: AWS step functions, K8's) we check if the services are OK. + We do not set up a run log as its not relevant. + """ + + integration.validate(self, self._context.run_log_store) + integration.configure_for_traversal(self, self._context.run_log_store) + + integration.validate(self, self._context.catalog_handler) + integration.configure_for_traversal(self, self._context.catalog_handler) + + integration.validate(self, self._context.secrets_handler) + integration.configure_for_traversal(self, self._context.secrets_handler) + + integration.validate(self, self._context.experiment_tracker) + integration.configure_for_traversal(self, self._context.experiment_tracker) + + def prepare_for_node_execution(self): + """ + Perform any modifications to the services prior to execution of the node. + + Args: + node (Node): [description] + map_variable (dict, optional): [description]. Defaults to None. + """ + + super().prepare_for_node_execution() + self._set_up_run_log(exists_ok=True) + + def execute_node(self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs): + step_log = self._context.run_log_store.create_step_log(node.name, node._get_step_log_name(map_variable)) + + self.add_code_identities(node=node, step_log=step_log) + + step_log.step_type = node.node_type + step_log.status = defaults.PROCESSING + self._context.run_log_store.add_step_log(step_log, self._context.run_id) + + super()._execute_node(node, map_variable=map_variable, **kwargs) + + # Implicit fail + if self._context.dag: + # functions and notebooks do not have dags + _, current_branch = search_node_by_internal_name(dag=self._context.dag, internal_name=node.internal_name) + _, next_node_name = self._get_status_and_next_node_name(node, current_branch, map_variable=map_variable) + if next_node_name: + # Terminal nodes do not have next node name + next_node = current_branch.get_node_by_name(next_node_name) + + if next_node.node_type == defaults.FAIL: + self.execute_node(next_node, map_variable=map_variable) + + step_log = self._context.run_log_store.get_step_log(node._get_step_log_name(map_variable), self._context.run_id) + if step_log.status == defaults.FAIL: + raise Exception(f"Step {node.name} failed") + + def fan_out(self, node: BaseNode, map_variable: TypeMapVariable = None): + super().fan_out(node, map_variable) + + # If its a map node, write the list values to "/tmp/output.txt" + if node.node_type == "map": + node = cast(MapNode, node) + iterate_on = self._context.run_log_store.get_parameters(self._context.run_id)[node.iterate_on] + + with open("/tmp/output.txt", mode="w", encoding="utf-8") as myfile: + json.dump(iterate_on, myfile, indent=4) + + def _get_parameters(self) -> Dict[str, Any]: + params = {} + if self._context.parameters_file: + # Parameters from the parameters file if defined + params.update(utils.load_yaml(self._context.parameters_file)) + # parameters from environment variables supersede file based + params.update(parameters.get_user_set_parameters()) + + return params + + def sanitize_name(self, name): + return name.replace(" ", "-").replace(".", "-").replace("_", "-") + + def get_clean_name(self, node: BaseNode): + # Cache names for the node + if node.internal_name not in self._clean_names: + sanitized = self.sanitize_name(node.name) + tag = "".join(random.choices(string.ascii_lowercase + string.digits, k=6)) + self._clean_names[node.internal_name] = f"{sanitized}-{node.node_type}-{tag}" + + return self._clean_names[node.internal_name] + + def compose_map_variable(self, list_of_iter_values: Optional[List] = None) -> TypeMapVariable: + map_variable = OrderedDict() + + # If we are inside a map node, compose a map_variable + # The values of "iterate_as" are sent over as inputs to the container template + if list_of_iter_values: + for var in list_of_iter_values: + map_variable[var] = "{{inputs.parameters." + str(var) + "}}" + + return map_variable # type: ignore + + def create_container_template( + self, + working_on: BaseNode, + command: str, + inputs: Optional[List] = None, + outputs: Optional[List] = None, + overwrite_name: str = "", + ): + effective_node_config = self._resolve_executor_config(working_on) + + override: Override = Override(**effective_node_config) + + container = Container( + command=command, + image=override.image, + volume_mounts=self._container_volumes, + image_pull_policy=override.image_pull_policy, + resources=override.resources, + secrets_from_k8s=self.secrets_from_k8s, + ) + + if working_on.name == self._context.dag.start_at and self.expose_parameters_as_inputs: + for key, value in self._get_parameters().items(): + # Get the value from work flow parameters for dynamic behavior + if isinstance(value, int) or isinstance(value, float) or isinstance(value, str): + env_var = EnvVar( + name=defaults.PARAMETER_PREFIX + key, + value="{{workflow.parameters." + key + "}}", + ) + container.env_vars.append(env_var) + + clean_name = self.get_clean_name(working_on) + if overwrite_name: + clean_name = overwrite_name + + container_template = ContainerTemplate( + name=clean_name, + active_deadline_seconds=( + override.max_step_duration_in_seconds + if self.max_step_duration_in_seconds != override.max_step_duration_in_seconds + else None + ), + container=container, + retry_strategy=override.retry_strategy if self.retry_strategy != override.retry_strategy else None, + tolerations=override.tolerations if self.tolerations != override.tolerations else None, + node_selector=override.node_selector if self.node_selector != override.node_selector else None, + ) + + # inputs are the "iterate_as" value map variables in the same order as they are observed + # We need to expose the map variables in the command of the container + if inputs: + if not container_template.inputs: + container_template.inputs = [] + container_template.inputs.extend(inputs) + + # The map step fan out would create an output that we should propagate via Argo + if outputs: + if not container_template.outputs: + container_template.outputs = [] + container_template.outputs.extend(outputs) + + return container_template + + def _create_fan_out_template(self, composite_node, list_of_iter_values: Optional[List] = None): + clean_name = self.get_clean_name(composite_node) + inputs = [] + # If we are fanning out already map state, we need to send the map variable inside + # The container template also should be accepting an input parameter + map_variable = None + if list_of_iter_values: + map_variable = self.compose_map_variable(list_of_iter_values=list_of_iter_values) + + for val in list_of_iter_values: + inputs.append(Parameter(name=val)) + + command = utils.get_fan_command( + mode="out", + node=composite_node, + run_id=self._run_id_placeholder, + map_variable=map_variable, + ) + + outputs = [] + # If the node is a map node, we have to set the output parameters + # Output is always the step's internal name + iterate-on + if composite_node.node_type == "map": + output_parameter = OutputParameter(name="iterate-on") + outputs.append(output_parameter) + + # Create the node now + step_config = {"command": command, "type": "task", "next": "dummy"} + node = create_node(name=f"{clean_name}-fan-out", step_config=step_config) + + container_template = self.create_container_template( + working_on=node, + command=command, + outputs=outputs, + inputs=inputs, + overwrite_name=f"{clean_name}-fan-out", + ) + + self._container_templates.append(container_template) + return DagTaskTemplate(name=f"{clean_name}-fan-out", template=f"{clean_name}-fan-out") + + def _create_fan_in_template(self, composite_node, list_of_iter_values: Optional[List] = None): + clean_name = self.get_clean_name(composite_node) + inputs = [] + # If we are fanning in already map state, we need to send the map variable inside + # The container template also should be accepting an input parameter + map_variable = None + if list_of_iter_values: + map_variable = self.compose_map_variable(list_of_iter_values=list_of_iter_values) + + for val in list_of_iter_values: + inputs.append(Parameter(name=val)) + + command = utils.get_fan_command( + mode="in", + node=composite_node, + run_id=self._run_id_placeholder, + map_variable=map_variable, + ) + + step_config = {"command": command, "type": "task", "next": "dummy"} + node = create_node(name=f"{clean_name}-fan-in", step_config=step_config) + container_template = self.create_container_template( + working_on=node, + command=command, + inputs=inputs, + overwrite_name=f"{clean_name}-fan-in", + ) + self._container_templates.append(container_template) + clean_name = self.get_clean_name(composite_node) + return DagTaskTemplate(name=f"{clean_name}-fan-in", template=f"{clean_name}-fan-in") + + def _gather_task_templates_of_dag( + self, dag: Graph, dag_name="magnus-dag", list_of_iter_values: Optional[List] = None + ): + current_node = dag.start_at + previous_node = None + previous_node_template_name = None + + templates: Dict[str, DagTaskTemplate] = {} + + if not list_of_iter_values: + list_of_iter_values = [] + + while True: + working_on = dag.get_node_by_name(current_node) + if previous_node == current_node: + raise Exception("Potentially running in a infinite loop") + + render_obj = get_renderer(working_on)(executor=self, node=working_on) + render_obj.render(list_of_iter_values=list_of_iter_values.copy()) + + clean_name = self.get_clean_name(working_on) + + # If a task template for clean name exists, retrieve it (could have been created by on_failure) + template = templates.get(clean_name, DagTaskTemplate(name=clean_name, template=clean_name)) + + # Link the current node to previous node, if the previous node was successful. + if previous_node: + template.depends.append(f"{previous_node_template_name}.Succeeded") + + templates[clean_name] = template + + # On failure nodes + if working_on.node_type not in ["success", "fail"] and working_on._get_on_failure_node(): + failure_node = dag.get_node_by_name(working_on._get_on_failure_node()) + + failure_template_name = self.get_clean_name(failure_node) + # If a task template for clean name exists, retrieve it + failure_template = templates.get( + failure_template_name, + DagTaskTemplate(name=failure_template_name, template=failure_template_name), + ) + failure_template.depends.append(f"{clean_name}.Failed") + + templates[failure_template_name] = failure_template + + # If we are in a map node, we need to add the values as arguments + template = templates[clean_name] + if list_of_iter_values: + if not template.arguments: + template.arguments = [] + for value in list_of_iter_values: + template.arguments.append(Argument(name=value, value="{{inputs.parameters." + value + "}}")) + + # Move ahead to the next node + previous_node = current_node + previous_node_template_name = self.get_clean_name(working_on) + + if working_on.node_type in ["success", "fail"]: + break + + current_node = working_on._get_next_node() + + # Add the iteration values as input to dag template + dag_template = DagTemplate(tasks=list(templates.values()), name=dag_name) + if list_of_iter_values: + if not dag_template.inputs: + dag_template.inputs = [] + dag_template.inputs.extend([Parameter(name=val) for val in list_of_iter_values]) + + # Add the dag template to the list of templates + self._dag_templates.append(dag_template) + + def _get_template_defaults(self) -> TemplateDefaults: + user_provided_config = self.model_dump(by_alias=False) + + return TemplateDefaults(**user_provided_config) + + def execute_graph(self, dag: Graph, map_variable: Optional[dict] = None, **kwargs): + # TODO: Add metadata + arguments = [] + # Expose "simple" parameters as workflow arguments for dynamic behavior + if self.expose_parameters_as_inputs: + for key, value in self._get_parameters().items(): + if isinstance(value, dict) or isinstance(value, list): + continue + env_var = EnvVar(name=key, value=value) + arguments.append(env_var) + + run_id_var = EnvVar(name="run_id", value="{{workflow.uid}}") + arguments.append(run_id_var) + + # # TODO: Experimental feature + + # original_run_id_var = EnvVar(name="original_run_id") + # arguments.append(original_run_id_var) + + for volume in self.spec.volumes: + self._container_volumes.append(ContainerVolume(name=volume.name, mount_path=volume.mount_path)) + + # Container specifications are globally collected and added at the end. + # Dag specifications are added as part of the dag traversal. + templates: List[Union[DagTemplate, ContainerTemplate]] = [] + self._gather_task_templates_of_dag(dag=dag, list_of_iter_values=[]) + templates.extend(self._dag_templates) + templates.extend(self._container_templates) + + spec = self.spec + spec.templates = templates + spec.arguments = arguments + workflow = Workflow(metadata=self.metadata, spec=spec) + + yaml = YAML() + with open(self.output_file, "w") as f: + yaml.indent(mapping=2, sequence=4, offset=2) + + yaml.dump(workflow.model_dump(by_alias=True, exclude_none=True), f) + + def execute_job(self, node: BaseNode): + """ + Use K8's job instead + """ + raise NotImplementedError("Use K8's job instead") + + def send_return_code(self, stage="traversal"): + """ + Convenience function used by pipeline to send return code to the caller of the cli + + Raises: + Exception: If the pipeline execution failed + """ + if stage != "traversal": # traversal does no actual execution, so return code is pointless + run_id = self._context.run_id + + run_log = self._context.run_log_store.get_run_log_by_id(run_id=run_id, full=False) + if run_log.status == defaults.FAIL: + raise exceptions.ExecutionFailedError(run_id) + + +class FileSystemRunLogStore(BaseIntegration): + """ + Only local execution mode is possible for Buffered Run Log store + """ + + executor_type = "argo" + service_type = "run_log_store" # One of secret, catalog, datastore + service_provider = "file-system" # The actual implementation of the service + + def validate(self, **kwargs): + msg = ( + "Argo cannot run work with file-system run log store. " + "Unless you have made a mechanism to use volume mounts." + "Using this run log store if the pipeline has concurrent tasks might lead to unexpected results" + ) + logger.warning(msg) + + +class ChunkedFileSystemRunLogStore(BaseIntegration): + """ + Only local execution mode is possible for Buffered Run Log store + """ + + executor_type = "argo" + service_type = "run_log_store" # One of secret, catalog, datastore + service_provider = "chunked-fs" # The actual implementation of the service + + def validate(self, **kwargs): + msg = ( + "Argo cannot run work with chunked file-system run log store. " + "Unless you have made a mechanism to use volume mounts" + ) + logger.warning(msg) + + +class FileSystemCatalog(BaseIntegration): + """ + Only local execution mode is possible for Buffered Run Log store + """ + + executor_type = "argo" + service_type = "catalog" # One of secret, catalog, datastore + service_provider = "file-system" # The actual implementation of the service + + def validate(self, **kwargs): + msg = ( + "Argo cannot run work with file-system run log store. Unless you have made a mechanism to use volume mounts" + ) + logger.warning(msg) diff --git a/magnus/extensions/executor/argo/specification.yaml b/magnus/extensions/executor/argo/specification.yaml new file mode 100644 index 00000000..9c126361 --- /dev/null +++ b/magnus/extensions/executor/argo/specification.yaml @@ -0,0 +1,51 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Workflow +metadata: + generateName: magnus-dag +spec: + activeDeadlineSeconds: int # max run time of the workflow + entrypoint: str + nodeSelector: Dict[str, str] # global node selector + parallelism: # global level + podGC: OnPodCompletion + resources: # Should be converted to podSpecPath + limits: + requests: + podSpecPatch: json str representation of resources for defaults + retryStrategy: # global level for all templates + limit: int + retryPolicy: # global level for all templates + backoff: + duration: str + factor: int + maxDuration: str + serviceAccountName: str # Optionally required + templateDefaults: + activeDeadlineSeconds: int, for a template + timeout: str # max time including the wait time + failFast: true + volumes: + templates: + activeDeadlineSeconds: # override + nodeSelector: # override + retryStrategy: # override + tolerations: # override + container: + command: + env: + image: + imagePullPolicy: + volumeMounts: + resources: + limits: + requests: + dag: + tasks: + depends: + continueOn: + tolerations: # global level for all templates + effect: str + key: str + operator: str + value: str + volumes: diff --git a/magnus/extensions/executor/k8s_job/__init__.py b/magnus/extensions/executor/k8s_job/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/magnus/extensions/executor/k8s_job/implementation_FF.py b/magnus/extensions/executor/k8s_job/implementation_FF.py new file mode 100644 index 00000000..40b69612 --- /dev/null +++ b/magnus/extensions/executor/k8s_job/implementation_FF.py @@ -0,0 +1,259 @@ +# import logging +# import shlex +# from typing import Dict, List, Optional + +# from pydantic import BaseModel + +# from magnus import defaults, integration, utils +# from magnus.executor import BaseExecutor +# from magnus.graph import Graph +# from magnus.nodes import BaseNode + +# logger = logging.getLogger(defaults.NAME) + +# try: +# from kubernetes import client +# from kubernetes.client import V1EnvVar, V1EnvVarSource, V1PersistentVolumeClaimVolumeSource, V1SecretKeySelector +# except ImportError as _e: +# msg = "Kubernetes Dependencies have not been installed!!" +# # raise Exception(msg) from _e + + +# class Toleration(BaseModel): +# effect: str +# key: str +# operator: str +# value: str + + +# class K8sJobExecutor(BaseExecutor): +# service_name = "k8s-job" + +# # TODO: move this to K8's style config. +# class ContextConfig(BaseModel): +# docker_image: str +# config_path: str = "" # Let the client decide on the path to the config file. +# namespace: str = "default" +# cpu_limit: str = "250m" +# memory_limit: str = "1G" +# gpu_limit: int = 0 +# gpu_vendor: str = "nvidia.com/gpu" +# cpu_request: str = "" +# memory_request: str = "" +# active_deadline_seconds: int = 60 * 60 * 2 # 2 hours +# ttl_seconds_after_finished: int = 60 #  1 minute +# image_pull_policy: str = "Always" +# secrets_from_k8s: dict = {} # EnvVar=SecretName:Key +# persistent_volumes: dict = {} # volume-name:mount_path +# node_selector: Dict[str, str] = {} +# tolerations: List[Toleration] = [] +# labels: Dict[str, str] = {} + +# def __init__(self, config: Optional[dict] = None): +# self.config = self.ContextConfig(**(config or {})) +# self.persistent_volumes = {} + +# for i, (claim, mount_path) in enumerate(self.config.persistent_volumes.items()): +# self.persistent_volumes[f"executor-{i}"] = (claim, mount_path) + +# def prepare_for_graph_execution(self): +# """ +# This method would be called prior to calling execute_graph. +# Perform any steps required before doing the graph execution. + +# The most common implementation is to prepare a run log for the run if the run uses local interactive compute. + +# But in cases of actual rendering the job specs (eg: AWS step functions, K8's) we need not do anything. +# """ + +# integration.validate(self, self.run_log_store) +# integration.configure_for_traversal(self, self.run_log_store) + +# integration.validate(self, self.catalog_handler) +# integration.configure_for_traversal(self, self.catalog_handler) + +# integration.validate(self, self.secrets_handler) +# integration.configure_for_traversal(self, self.secrets_handler) + +# integration.validate(self, self.experiment_tracker) +# integration.configure_for_traversal(self, self.experiment_tracker) + +# def prepare_for_node_execution(self): +# """ +# Perform any modifications to the services prior to execution of the node. + +# Args: +# node (Node): [description] +# map_variable (dict, optional): [description]. Defaults to None. +# """ + +# integration.validate(self, self.run_log_store) +# integration.configure_for_execution(self, self.run_log_store) + +# integration.validate(self, self.catalog_handler) +# integration.configure_for_execution(self, self.catalog_handler) + +# integration.validate(self, self.secrets_handler) +# integration.configure_for_execution(self, self.secrets_handler) + +# integration.validate(self, self.experiment_tracker) +# integration.configure_for_execution(self, self.experiment_tracker) + +# self._set_up_run_log(exists_ok=True) + +# @property +# def _client(self): +# from kubernetes import config as k8s_config + +# if self.config.config_path: +# k8s_config.load_kube_config(kube_config_path=self.config.config_path) +# else: +# # https://github.com/kubernetes-client/python/blob/master/kubernetes/base/config/__init__.py +# k8s_config.load_config() +# return client + +# @property +# def tolerations(self): +# return [toleration.dict() for toleration in self.config.tolerations] + +# def execute_job(self, node: BaseNode): +# command = utils.get_job_execution_command(self, node) +# logger.info(f"Triggering a kubernetes job with : {command}") + +# self.config.labels["job_name"] = self.run_id + +# k8s_batch = self._client.BatchV1Api() + +# cpu_limit = self.config.cpu_limit +# memory_limit = self.config.memory_limit + +# cpu_request = self.config.cpu_request or cpu_limit +# memory_request = self.config.memory_request or memory_limit + +# gpu_limit = str(self.config.gpu_limit) # Should be something like nvidia -etc + +# limits = { +# "cpu": cpu_limit, +# "memory": memory_limit, +# self.config.gpu_vendor: gpu_limit, +# } +# requests = {"cpu": cpu_request, "memory": memory_request} +# resources = {"limits": limits, "requests": requests} + +# environment_variables = [] +# for secret_env, k8_secret in self.config.secrets_from_k8s.items(): +# try: +# secret_name, key = k8_secret.split(":") +# except Exception as _e: +# msg = "K8's secret should be of format EnvVar=SecretName:Key" +# raise Exception(msg) from _e +# secret_as_env = V1EnvVar( +# name=secret_env, +# value_from=V1EnvVarSource(secret_key_ref=V1SecretKeySelector(name=secret_name, key=key)), +# ) +# environment_variables.append(secret_as_env) + +# overridden_params = utils.get_user_set_parameters() +# # The parameters present in the environment override the parameters present in the parameters file +# # The values are coerced to be strings, hopefully they will be fine on the other side. +# for k, v in overridden_params.items(): +# environment_variables.append(V1EnvVar(name=defaults.PARAMETER_PREFIX + k, value=str(v))) + +# pod_volumes = [] +# volume_mounts = [] +# for claim_name, (claim, mount_path) in self.persistent_volumes.items(): +# pod_volumes.append( +# self._client.V1Volume( +# name=claim_name, +# persistent_volume_claim=V1PersistentVolumeClaimVolumeSource(claim_name=claim), +# ) +# ) +# volume_mounts.append(self._client.V1VolumeMount(name=claim_name, mount_path=mount_path)) + +# base_container = self._client.V1Container( +# name=self.run_id, +# image=self.config.docker_image, +# command=shlex.split(command), +# resources=resources, +# env=environment_variables, +# image_pull_policy="Always", +# volume_mounts=volume_mounts or None, +# ) + +# pod_spec = self._client.V1PodSpec( +# volumes=pod_volumes or None, +# restart_policy="Never", +# containers=[base_container], +# node_selector=self.config.node_selector, +# tolerations=self.tolerations, +# ) + +# pod_template = self._client.V1PodTemplateSpec( +# metadata=client.V1ObjectMeta( +# labels=self.config.labels, +# annotations={"sidecar.istio.io/inject": "false"}, +# ), +# spec=pod_spec, +# ) + +# job_spec = client.V1JobSpec( +# template=pod_template, +# backoff_limit=2, +# ttl_seconds_after_finished=self.config.ttl_seconds_after_finished, +# ) +# job_spec.active_deadline_seconds = self.config.active_deadline_seconds + +# job = client.V1Job( +# api_version="batch/v1", +# kind="Job", +# metadata=client.V1ObjectMeta(name=self.run_id), +# spec=job_spec, +# ) + +# logger.debug(f"Submitting kubernetes job: {job}") + +# try: +# response = k8s_batch.create_namespaced_job( +# body=job, +# namespace=self.config.namespace, +# _preload_content=False, +# pretty=True, +# ) +# print(f"Kubernetes job {self.run_id} created") +# logger.debug(f"Kubernetes job response: {response}") +# except Exception as e: +# logger.exception(e) +# raise + +# def execute_node(self, node: BaseNode, map_variable: Optional[dict] = None, **kwargs): +# step_log = self.run_log_store.create_step_log(node.name, node._get_step_log_name(map_variable)) + +# self.add_code_identities(node=node, step_log=step_log) + +# step_log.step_type = node.node_type +# step_log.status = defaults.PROCESSING +# self.run_log_store.add_step_log(step_log, self.run_id) + +# super()._execute_node(node, map_variable=map_variable, **kwargs) + +# step_log = self.run_log_store.get_step_log(node._get_step_log_name(map_variable), self.run_id) +# if step_log.status == defaults.FAIL: +# raise Exception(f"Step {node.name} failed") + +# def execute_graph(self, dag: Graph, map_variable: Optional[dict] = None, **kwargs): +# msg = "This executor is not supported to execute any graphs but only jobs (functions or notebooks)" +# raise NotImplementedError(msg) + +# def send_return_code(self, stage="traversal"): +# """ +# Convenience function used by pipeline to send return code to the caller of the cli + +# Raises: +# Exception: If the pipeline execution failed +# """ +# if stage != "traversal": # traversal does no actual execution, so return code is pointless +# run_id = self.run_id + +# run_log = self.run_log_store.get_run_log_by_id(run_id=run_id, full=False) +# if run_log.status == defaults.FAIL: +# raise Exception("Pipeline execution failed") diff --git a/magnus/extensions/executor/k8s_job/integration_FF.py b/magnus/extensions/executor/k8s_job/integration_FF.py new file mode 100644 index 00000000..d8e21f2a --- /dev/null +++ b/magnus/extensions/executor/k8s_job/integration_FF.py @@ -0,0 +1,69 @@ +import logging + +from magnus import defaults +from magnus.integration import BaseIntegration + +logger = logging.getLogger(defaults.NAME) + + +class BufferedRunLogStore(BaseIntegration): + """ + Only local execution mode is possible for Buffered Run Log store + """ + + executor_type = "k8s-job" + service_type = "run_log_store" # One of secret, catalog, datastore + service_provider = "buffered" # The actual implementation of the service + + def validate(self, **kwargs): + raise Exception("K8s job cannot run work with buffered run log store") + + +class FileSystemRunLogStore(BaseIntegration): + """ + Only local execution mode is possible for Buffered Run Log store + """ + + executor_type = "k8s-job" + service_type = "run_log_store" # One of secret, catalog, datastore + service_provider = "file-system" # The actual implementation of the service + + def validate(self, **kwargs): + msg = ( + "K8s job cannot run work with file-system run log store." + "Unless you have made a mechanism to use volume mounts" + ) + logger.warning(msg) + + +class ChunkedFSRunLogStore(BaseIntegration): + """ + Only local execution mode is possible for Buffered Run Log store + """ + + executor_type = "k8s-job" + service_type = "run_log_store" # One of secret, catalog, datastore + service_provider = "chunked-fs" # The actual implementation of the service + + def validate(self, **kwargs): + msg = ( + "K8s job cannot run work with chunked-fs run log store." + "Unless you have made a mechanism to use volume mounts" + ) + logger.warning(msg) + + +class FileSystemCatalog(BaseIntegration): + """ + Only local execution mode is possible for Buffered Run Log store + """ + + executor_type = "k8s-job" + service_type = "catalog" # One of secret, catalog, datastore + service_provider = "file-system" # The actual implementation of the service + + def validate(self, **kwargs): + msg = ( + "K8s Job cannot run work with file-system catalog." "Unless you have made a mechanism to use volume mounts" + ) + logger.warning(msg) diff --git a/magnus/extensions/executor/local/__init__.py b/magnus/extensions/executor/local/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/magnus/extensions/executor/local/implementation.py b/magnus/extensions/executor/local/implementation.py new file mode 100644 index 00000000..49de2863 --- /dev/null +++ b/magnus/extensions/executor/local/implementation.py @@ -0,0 +1,69 @@ +import logging + +from magnus import defaults +from magnus.defaults import TypeMapVariable +from magnus.extensions.executor import GenericExecutor +from magnus.extensions.nodes import TaskNode +from magnus.nodes import BaseNode + +logger = logging.getLogger(defaults.LOGGER_NAME) + + +class LocalExecutor(GenericExecutor): + """ + In the mode of local execution, we run everything on the local computer. + + This has some serious implications on the amount of time it would take to complete the run. + Also ensure that the local compute is good enough for the compute to happen of all the steps. + + Example config: + execution: + type: local + config: + enable_parallel: True or False to enable parallel. + + """ + + service_name: str = "local" + + def trigger_job(self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs): + """ + In this mode of execution, we prepare for the node execution and execute the node + + Args: + node (BaseNode): [description] + map_variable (str, optional): [description]. Defaults to ''. + """ + self.prepare_for_node_execution() + self.execute_node(node=node, map_variable=map_variable, **kwargs) + + def execute_node(self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs): + """ + For local execution, we just execute the node. + + Args: + node (BaseNode): _description_ + map_variable (dict[str, str], optional): _description_. Defaults to None. + """ + self._execute_node(node=node, map_variable=map_variable, **kwargs) + + def execute_job(self, node: TaskNode): + """ + Set up the step log and call the execute node + + Args: + node (BaseNode): _description_ + """ + + step_log = self._context.run_log_store.create_step_log(node.name, node._get_step_log_name(map_variable=None)) + + self.add_code_identities(node=node, step_log=step_log) + + step_log.step_type = node.node_type + step_log.status = defaults.PROCESSING + self._context.run_log_store.add_step_log(step_log, self._context.run_id) + self.execute_node(node=node) + + # Update the run log status + step_log = self._context.run_log_store.get_step_log(node._get_step_log_name(), self._context.run_id) + self._context.run_log_store.update_run_log_status(run_id=self._context.run_id, status=step_log.status) diff --git a/magnus/extensions/executor/local_container/__init__.py b/magnus/extensions/executor/local_container/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/magnus/extensions/executor/local_container/implementation.py b/magnus/extensions/executor/local_container/implementation.py new file mode 100644 index 00000000..9e38e27a --- /dev/null +++ b/magnus/extensions/executor/local_container/implementation.py @@ -0,0 +1,367 @@ +import logging +from pathlib import Path +from typing import Dict, cast + +from pydantic import Field +from rich import print + +from magnus import defaults, integration, utils +from magnus.datastore import StepLog +from magnus.defaults import TypeMapVariable +from magnus.extensions.executor import GenericExecutor +from magnus.extensions.nodes import TaskNode +from magnus.integration import BaseIntegration +from magnus.nodes import BaseNode + +logger = logging.getLogger(defaults.LOGGER_NAME) + + +class LocalContainerExecutor(GenericExecutor): + """ + In the mode of local-container, we execute all the commands in a container. + + Ensure that the local compute has enough resources to finish all your jobs. + + The image of the run, could either be provided as default in the configuration of the execution engine + i.e.: + execution: + type: 'local-container' + config: + docker_image: the image you want the code to run in. + + or default image could be over-ridden for a single node by providing a docker_image in the step config. + i.e: + dag: + steps: + step: + executor_config: + local-container: + docker_image: The image that you want that single step to run in. + This image would only be used for that step only. + + This mode does not build the docker image with the latest code for you, it is still left for the user to build + and ensure that the docker image provided is the correct one. + + Example config: + execution: + type: local-container + config: + docker_image: The default docker image to use if the node does not provide one. + """ + + service_name: str = "local-container" + docker_image: str + auto_remove_container: bool = True + run_in_local: bool = False + environment: Dict[str, str] = Field(default_factory=dict) + + _container_log_location = "/tmp/run_logs/" + _container_catalog_location = "/tmp/catalog/" + _container_secrets_location = "/tmp/dotenv" + _volumes: Dict[str, Dict[str, str]] = {} + + def add_code_identities(self, node: BaseNode, step_log: StepLog, **kwargs): + """ + Call the Base class to add the git code identity and add docker identity + + Args: + node (BaseNode): The node we are adding the code identity + step_log (Object): The step log corresponding to the node + """ + + super().add_code_identities(node, step_log) + + if node.node_type in ["success", "fail"]: + # Need not add code identities if we are in a success or fail node + return + + executor_config = self._resolve_executor_config(node) + + docker_image = executor_config.get("docker_image", None) + if docker_image: + code_id = self._context.run_log_store.create_code_identity() + + code_id.code_identifier = utils.get_local_docker_image_id(docker_image) + code_id.code_identifier_type = "docker" + code_id.code_identifier_dependable = True + code_id.code_identifier_url = "local docker host" + step_log.code_identities.append(code_id) + + def execute_node(self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs): + """ + We are already in the container, we just execute the node. + The node is already prepared for execution. + """ + return self._execute_node(node, map_variable, **kwargs) + + def execute_job(self, node: TaskNode): + """ + Set up the step log and call the execute node + + Args: + node (BaseNode): _description_ + """ + + step_log = self._context.run_log_store.create_step_log(node.name, node._get_step_log_name(map_variable=None)) + + self.add_code_identities(node=node, step_log=step_log) + + step_log.step_type = node.node_type + step_log.status = defaults.PROCESSING + self._context.run_log_store.add_step_log(step_log, self._context.run_id) + + command = utils.get_job_execution_command(node) + self._spin_container(node=node, command=command) + + # Check the step log status and warn if necessary. Docker errors are generally suppressed. + step_log = self._context.run_log_store.get_step_log( + node._get_step_log_name(map_variable=None), self._context.run_id + ) + if step_log.status != defaults.SUCCESS: + msg = ( + "Node execution inside the container failed. Please check the logs.\n" + "Note: If you do not see any docker issue from your side and the code works properly on local execution" + "please raise a bug report." + ) + logger.warning(msg) + + def trigger_job(self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs): + """ + We come into this step via execute from graph, use trigger job to spin up the container. + + + If the config has "run_in_local: True", we compute it on local system instead of container. + In local container execution, we just spin the container to execute magnus execute_single_node. + + Args: + node (BaseNode): The node we are currently executing + map_variable (str, optional): If the node is part of the map branch. Defaults to ''. + """ + executor_config = self._resolve_executor_config(node) + auto_remove_container = executor_config.get("auto_remove_container", True) + + logger.debug("Here is the resolved executor config") + logger.debug(executor_config) + + if executor_config.get("run_in_local", False): + # Do not change config but only validate the configuration. + # Trigger the job on local system instead of a container + integration.validate(self, self._context.run_log_store) + integration.validate(self, self._context.catalog_handler) + integration.validate(self, self._context.secrets_handler) + + self.execute_node(node=node, map_variable=map_variable, **kwargs) + return + + command = utils.get_node_execution_command(node, map_variable=map_variable) + + self._spin_container( + node=node, + command=command, + map_variable=map_variable, + auto_remove_container=auto_remove_container, + **kwargs, + ) + + step_log = self._context.run_log_store.get_step_log(node._get_step_log_name(map_variable), self._context.run_id) + if step_log.status != defaults.SUCCESS: + msg = ( + "Node execution inside the container failed. Please check the logs.\n" + "Note: If you do not see any docker issue from your side and the code works properly on local execution" + "please raise a bug report." + ) + logger.warning(msg) + step_log.status = defaults.FAIL + self._context.run_log_store.add_step_log(step_log, self._context.run_id) + + def _spin_container( + self, + node: BaseNode, + command: str, + map_variable: TypeMapVariable = None, + auto_remove_container: bool = True, + **kwargs, + ): + """ + During the flow run, we have to spin up a container with the docker image mentioned + and the right log locations + """ + # Conditional import + import docker # pylint: disable=C0415 + + try: + client = docker.from_env() + api_client = docker.APIClient() + except Exception as ex: + logger.exception("Could not get access to docker") + raise Exception("Could not get the docker socket file, do you have docker installed?") from ex + + try: + logger.info(f"Running the command {command}") + #  Overrides global config with local + executor_config = self._resolve_executor_config(node) + + docker_image = executor_config.get("docker_image", None) + environment = executor_config.get("environment", {}) + environment.update(self._context.variables) + if not docker_image: + raise Exception( + f"Please provide a docker_image using executor_config of the step {node.name} or at global config" + ) + + # TODO: Should consider using getpass.getuser() when running the docker container? Volume permissions + container = client.containers.create( + image=docker_image, + command=command, + auto_remove=False, + volumes=self._volumes, + network_mode="host", + environment=environment, + ) + + # print(container.__dict__) + + container.start() + stream = api_client.logs(container=container.id, timestamps=True, stream=True, follow=True) + while True: + try: + output = next(stream).decode("utf-8") + output = output.strip("\r\n") + logger.info(output) + print(output) + except StopIteration: + logger.info("Docker Run completed") + break + + exit_status = api_client.inspect_container(container.id)["State"]["ExitCode"] + + if auto_remove_container: + container.remove(force=True) + + if exit_status != 0: + msg = f"Docker command failed with exit code {exit_status}" + raise Exception(msg) + + except Exception as _e: + logger.exception("Problems with spinning/running the container") + raise _e + + +class LocalContainerComputeFileSystemRunLogstore(BaseIntegration): + """ + Integration between local container and file system run log store + """ + + executor_type = "local-container" + service_type = "run_log_store" # One of secret, catalog, datastore + service_provider = "file-system" # The actual implementation of the service + + def validate(self, **kwargs): + if self.executor._is_parallel_execution(): # pragma: no branch + msg = ( + "Run log generated by file-system run log store are not thread safe. " + "Inconsistent results are possible because of race conditions to write to the same file.\n" + "Consider using partitioned run log store like database for consistent results." + ) + logger.warning(msg) + + def configure_for_traversal(self, **kwargs): + from magnus.extensions.run_log_store.file_system.implementation import FileSystemRunLogstore + + self.executor = cast(LocalContainerExecutor, self.executor) + self.service = cast(FileSystemRunLogstore, self.service) + + write_to = self.service.log_folder_name + self.executor._volumes[str(Path(write_to).resolve())] = { + "bind": f"{self.executor._container_log_location}", + "mode": "rw", + } + + def configure_for_execution(self, **kwargs): + from magnus.extensions.run_log_store.file_system.implementation import FileSystemRunLogstore + + self.executor = cast(LocalContainerExecutor, self.executor) + self.service = cast(FileSystemRunLogstore, self.service) + + self.service.log_folder = self.executor._container_log_location + + +class LocalContainerComputeFileSystemCatalog(BaseIntegration): + """ + Integration pattern between Local container and File System catalog + """ + + executor_type = "local-container" + service_type = "catalog" # One of secret, catalog, datastore + service_provider = "file-system" # The actual implementation of the service + + def configure_for_traversal(self, **kwargs): + from magnus.extensions.catalog.file_system.implementation import FileSystemCatalog + + self.executor = cast(LocalContainerExecutor, self.executor) + self.service = cast(FileSystemCatalog, self.service) + + catalog_location = self.service.catalog_location + self.executor._volumes[str(Path(catalog_location).resolve())] = { + "bind": f"{self.executor._container_catalog_location}", + "mode": "rw", + } + + def configure_for_execution(self, **kwargs): + from magnus.extensions.catalog.file_system.implementation import FileSystemCatalog + + self.executor = cast(LocalContainerExecutor, self.executor) + self.service = cast(FileSystemCatalog, self.service) + + self.service.catalog_location = self.executor._container_catalog_location + + +class LocalContainerComputeDotEnvSecrets(BaseIntegration): + """ + Integration between local container and dot env secrets + """ + + executor_type = "local-container" + service_type = "secrets" # One of secret, catalog, datastore + service_provider = "dotenv" # The actual implementation of the service + + def validate(self, **kwargs): + logger.warning("Using dot env for non local deployments is not ideal, consider options") + + def configure_for_traversal(self, **kwargs): + from magnus.extensions.secrets.dotenv.implementation import DotEnvSecrets + + self.executor = cast(LocalContainerExecutor, self.executor) + self.service = cast(DotEnvSecrets, self.service) + + secrets_location = self.service.secrets_location + self.executor._volumes[str(Path(secrets_location).resolve())] = { + "bind": f"{self.executor._container_secrets_location}", + "mode": "ro", + } + + def configure_for_execution(self, **kwargs): + from magnus.extensions.secrets.dotenv.implementation import DotEnvSecrets + + self.executor = cast(LocalContainerExecutor, self.executor) + self.service = cast(DotEnvSecrets, self.service) + + self.service.location = self.executor._container_secrets_location + + +class LocalContainerComputeEnvSecretsManager(BaseIntegration): + """ + Integration between local container and env secrets manager + """ + + executor_type = "local-container" + service_type = "secrets" # One of secret, catalog, datastore + service_provider = "env-secrets-manager" # The actual implementation of the service + + def validate(self, **kwargs): + msg = ( + "Local container executions cannot be used with environment secrets manager. " + "Please use a supported secrets manager" + ) + logger.exception(msg) + raise Exception(msg) diff --git a/magnus/extensions/executor/mocked/__init__.py b/magnus/extensions/executor/mocked/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/magnus/extensions/executor/mocked/implementation.py b/magnus/extensions/executor/mocked/implementation.py new file mode 100644 index 00000000..b7e9d4f6 --- /dev/null +++ b/magnus/extensions/executor/mocked/implementation.py @@ -0,0 +1,220 @@ +import copy +import logging +from typing import Any, Dict, Type, cast + +from pydantic import ConfigDict, Field + +from magnus import context, defaults +from magnus.defaults import TypeMapVariable +from magnus.extensions.executor import GenericExecutor +from magnus.extensions.nodes import TaskNode +from magnus.integration import BaseIntegration +from magnus.nodes import BaseNode +from magnus.tasks import BaseTaskType + +logger = logging.getLogger(defaults.LOGGER_NAME) + + +def create_executable(params: Dict[str, Any], model: Type[BaseTaskType], node_name: str) -> BaseTaskType: + class EasyModel(model): # type: ignore + model_config = ConfigDict(extra="ignore") + + swallow_all = EasyModel(**params, node_name=node_name) + return swallow_all + + +class MockedExecutor(GenericExecutor): + service_name: str = "mocked" + + enable_parallel: bool = defaults.ENABLE_PARALLEL + + patches: Dict[str, Any] = Field(default_factory=dict) + + @property + def _context(self): + return context.run_context + + def _set_up_for_re_run(self, parameters: Dict[str, Any]) -> None: + raise Exception("MockedExecutor does not support re-run") + + def execute_from_graph(self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs): + """ + This is the entry point to from the graph execution. + + While the self.execute_graph is responsible for traversing the graph, this function is responsible for + actual execution of the node. + + If the node type is: + * task : We can delegate to _execute_node after checking the eligibility for re-run in cases of a re-run + * success: We can delegate to _execute_node + * fail: We can delegate to _execute_node + + For nodes that are internally graphs: + * parallel: Delegate the responsibility of execution to the node.execute_as_graph() + * dag: Delegate the responsibility of execution to the node.execute_as_graph() + * map: Delegate the responsibility of execution to the node.execute_as_graph() + + Transpilers will NEVER use this method and will NEVER call ths method. + This method should only be used by interactive executors. + + Args: + node (Node): The node to execute + map_variable (dict, optional): If the node if of a map state, this corresponds to the value of iterable. + Defaults to None. + """ + step_log = self._context.run_log_store.create_step_log(node.name, node._get_step_log_name(map_variable)) + + self.add_code_identities(node=node, step_log=step_log) + + step_log.step_type = node.node_type + step_log.status = defaults.PROCESSING + + # Add the step log to the database as per the situation. + # If its a terminal node, complete it now + if node.node_type in ["success", "fail"]: + self._context.run_log_store.add_step_log(step_log, self._context.run_id) + self._execute_node(node, map_variable=map_variable, **kwargs) + return + + # We call an internal function to iterate the sub graphs and execute them + if node.is_composite: + self._context.run_log_store.add_step_log(step_log, self._context.run_id) + node.execute_as_graph(map_variable=map_variable, **kwargs) + return + + node_to_send: TaskNode = cast(TaskNode, node).model_copy(deep=True) + if node.name not in self.patches: + # node is not patched, so mock it + step_log.mock = True + else: + # node is mocked, change the executable to python with the + # command as the patch value + executable_type = node_to_send.executable.__class__ + executable = create_executable( + self.patches[node.name], + executable_type, + node_name=node.name, + ) + node_to_send.executable = executable + pass + + # Executor specific way to trigger a job + self._context.run_log_store.add_step_log(step_log, self._context.run_id) + self.trigger_job(node=node_to_send, map_variable=map_variable, **kwargs) + + def trigger_job(self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs): + """ + Call this method only if we are responsible for traversing the graph via + execute_from_graph(). + + We are not prepared to execute node as of now. + + Args: + node (BaseNode): The node to execute + map_variable (str, optional): If the node if of a map state, this corresponds to the value of iterable. + Defaults to ''. + + NOTE: We do not raise an exception as this method is not required by many extensions + """ + self.prepare_for_node_execution() + self.execute_node(node=node, map_variable=map_variable, **kwargs) + + def _is_step_eligible_for_rerun(self, node: BaseNode, map_variable: TypeMapVariable = None): + """ + In case of a re-run, this method checks to see if the previous run step status to determine if a re-run is + necessary. + * True: If its not a re-run. + * True: If its a re-run and we failed in the last run or the corresponding logs do not exist. + * False: If its a re-run and we succeeded in the last run. + + Most cases, this logic need not be touched + + Args: + node (Node): The node to check against re-run + map_variable (dict, optional): If the node if of a map state, this corresponds to the value of iterable.. + Defaults to None. + + Returns: + bool: Eligibility for re-run. True means re-run, False means skip to the next step. + """ + return True + + def _resolve_executor_config(self, node: BaseNode): + """ + The overrides section can contain specific over-rides to an global executor config. + To avoid too much clutter in the dag definition, we allow the configuration file to have overrides block. + The nodes can over-ride the global config by referring to key in the overrides. + + This function also applies variables to the effective node config. + + For example: + # configuration.yaml + execution: + type: cloud-implementation + config: + k1: v1 + k3: v3 + overrides: + custom_config: + k1: v11 + k2: v2 # Could be a mapping internally. + + # in pipeline definition.yaml + dag: + steps: + step1: + overrides: + cloud-implementation: custom_config + + This method should resolve the node_config to {'k1': 'v11', 'k2': 'v2', 'k3': 'v3'} + + Args: + node (BaseNode): The current node being processed. + + """ + effective_node_config = copy.deepcopy(self.model_dump()) + + return effective_node_config + + def execute_job(self, node: TaskNode): + pass + + def execute_node(self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs): + """ + For local execution, we just execute the node. + + Args: + node (BaseNode): _description_ + map_variable (dict[str, str], optional): _description_. Defaults to None. + """ + self._execute_node(node=node, map_variable=map_variable, **kwargs) + + +class LocalContainerComputeFileSystemRunLogstore(BaseIntegration): + """ + Integration between local container and file system run log store + """ + + executor_type = "local-container" + service_type = "run_log_store" # One of secret, catalog, datastore + service_provider = "file-system" # The actual implementation of the service + + def validate(self, **kwargs): + if self.executor._is_parallel_execution(): # pragma: no branch + msg = "Mocked executor does not support parallel execution. " + logger.warning(msg) + + +class LocalContainerComputeChunkedFSRunLogstore(BaseIntegration): + """ + Integration between local container and file system run log store + """ + + executor_type = "local-container" + service_type = "run_log_store" # One of secret, catalog, datastore + service_provider = "chunked-fs" # The actual implementation of the service + + def validate(self, **kwargs): + if self.executor._is_parallel_execution(): # pragma: no branch + msg = "Mocked executor does not support parallel execution. " + logger.warning(msg) diff --git a/magnus/extensions/experiment_tracker/__init__.py b/magnus/extensions/experiment_tracker/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/magnus/extensions/experiment_tracker/mlflow/__init__.py b/magnus/extensions/experiment_tracker/mlflow/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/magnus/extensions/experiment_tracker/mlflow/implementation.py b/magnus/extensions/experiment_tracker/mlflow/implementation.py new file mode 100644 index 00000000..4cce45c0 --- /dev/null +++ b/magnus/extensions/experiment_tracker/mlflow/implementation.py @@ -0,0 +1,94 @@ +import functools +import logging +from typing import Any, Union + +from pydantic import ConfigDict, PrivateAttr + +from magnus import defaults +from magnus.experiment_tracker import BaseExperimentTracker + +logger = logging.getLogger(defaults.NAME) + + +class MLFlowExperimentTracker(BaseExperimentTracker): + """ + A MLFlow experiment tracker. + + TODO: Need to set up credentials from secrets + """ + + service_name: str = "mlflow" + + server_url: str + autolog: bool = False + + _default_experiment_name: str = PrivateAttr(default="Default") + _active_run_id: str = PrivateAttr(default="") + _client: Any = PrivateAttr(default=None) + + model_config = ConfigDict(extra="forbid") + + def model_post_init(self, __context: Any) -> None: + try: + import mlflow + except ImportError: + raise Exception("You need to install mlflow to use MLFlowExperimentTracker.") + + self._client = mlflow + + self._client.set_tracking_uri(self.server_url) + + if self.autolog: + self._client.autolog(log_models=False) + + @functools.cached_property + def experiment_id(self): + experiment_name = self._default_experiment_name + + # If a tag is provided, we should create that as our experiment + if self._context.tag: + experiment_name = self._context.tag + + experiment = self._client.get_experiment_by_name(experiment_name) + if not experiment: + # Create the experiment and get it. + experiment = self._client.create_experiment(experiment_name) + experiment = self._client.get_experiment(experiment) + + return experiment.experiment_id + + @functools.cached_property + def run_name(self): + return self._context.run_id + + @property + def client_context(self): + if self._active_run_id: + return self._client.start_run( + run_id=self._active_run_id, experiment_id=self.experiment_id, run_name=self.run_name + ) + + active_run = self._client.start_run(run_name=self.run_name, experiment_id=self.experiment_id) + self._active_run_id = active_run.info.run_id + return active_run + + def log_metric(self, key: str, value: Union[int, float], step: int = 0): + """ + Sets the metric in the experiment tracking. + + Args: + key (str): The key against you want to store the value + value (Any): The value of the metric + """ + if not isinstance(value, float) or isinstance(value, int): + msg = f"Only float/int values are accepted as metrics. Setting the metric {key} as parameter {key}_{step}" + logger.warning(msg) + self.log_parameter(key=key, value=value, step=step) + return + + with self.client_context as _: + self._client.log_metric(key, float(value), step=step or None) + + def log_parameter(self, key: str, value: Any, step: int = 0): + with self.client_context as _: + self._client.log_param(key + f"_{str(step)}", value) diff --git a/magnus/extensions/nodes.py b/magnus/extensions/nodes.py new file mode 100644 index 00000000..17f0a981 --- /dev/null +++ b/magnus/extensions/nodes.py @@ -0,0 +1,675 @@ +import json +import logging +import multiprocessing +from collections import OrderedDict +from copy import deepcopy +from datetime import datetime +from typing import Any, Dict, cast + +from pydantic import ConfigDict, Field, ValidationInfo, field_serializer, field_validator +from typing_extensions import Annotated + +from magnus import defaults, utils +from magnus.datastore import StepAttempt +from magnus.defaults import TypeMapVariable +from magnus.graph import Graph, create_graph +from magnus.nodes import CompositeNode, ExecutableNode, TerminalNode +from magnus.tasks import BaseTaskType, create_task + +logger = logging.getLogger(defaults.LOGGER_NAME) + + +class TaskNode(ExecutableNode): + """ + A node of type Task. + + This node does the actual function execution of the graph in all cases. + """ + + executable: BaseTaskType = Field(exclude=True) + node_type: str = Field(default="task", serialization_alias="type") + + # It is technically not allowed as parse_from_config filters them. + # This is just to get the task level configuration to be present during serialization. + model_config = ConfigDict(extra="allow") + + @classmethod + def parse_from_config(cls, config: Dict[str, Any]) -> "TaskNode": + # separate task config from node config + task_config = {k: v for k, v in config.items() if k not in TaskNode.model_fields.keys()} + node_config = {k: v for k, v in config.items() if k in TaskNode.model_fields.keys()} + + task_config["node_name"] = config.get("name") + + executable = create_task(task_config) + return cls(executable=executable, **node_config, **task_config) + + def execute(self, mock=False, map_variable: TypeMapVariable = None, **kwargs) -> StepAttempt: + """ + All that we do in magnus is to come to this point where we actually execute the command. + + Args: + executor (_type_): The executor class + mock (bool, optional): If we should just mock and not execute. Defaults to False. + map_variable (dict, optional): If the node is part of internal branch. Defaults to None. + + Returns: + StepAttempt: The attempt object + """ + print("Executing task:", self._context.executor._context_node) + # Here is where the juice is + attempt_log = self._context.run_log_store.create_attempt_log() + try: + attempt_log.start_time = str(datetime.now()) + attempt_log.status = defaults.SUCCESS + if not mock: + # Do not run if we are mocking the execution, could be useful for caching and dry runs + self.executable.execute_command(map_variable=map_variable) + except Exception as _e: # pylint: disable=W0703 + logger.exception("Task failed") + attempt_log.status = defaults.FAIL + attempt_log.message = str(_e) + finally: + attempt_log.end_time = str(datetime.now()) + attempt_log.duration = utils.get_duration_between_datetime_strings( + attempt_log.start_time, attempt_log.end_time + ) + return attempt_log + + +class FailNode(TerminalNode): + """ + A leaf node of the graph that represents a failure node + """ + + node_type: str = Field(default="fail", serialization_alias="type") + + @classmethod + def parse_from_config(cls, config: Dict[str, Any]) -> "FailNode": + return cast("FailNode", super().parse_from_config(config)) + + def execute(self, mock=False, map_variable: TypeMapVariable = None, **kwargs) -> StepAttempt: + """ + Execute the failure node. + Set the run or branch log status to failure. + + Args: + executor (_type_): the executor class + mock (bool, optional): If we should just mock and not do the actual execution. Defaults to False. + map_variable (dict, optional): If the node belongs to internal branches. Defaults to None. + + Returns: + StepAttempt: The step attempt object + """ + attempt_log = self._context.run_log_store.create_attempt_log() + try: + attempt_log.start_time = str(datetime.now()) + attempt_log.status = defaults.SUCCESS + #  could be a branch or run log + run_or_branch_log = self._context.run_log_store.get_branch_log( + self._get_branch_log_name(map_variable), self._context.run_id + ) + run_or_branch_log.status = defaults.FAIL + self._context.run_log_store.add_branch_log(run_or_branch_log, self._context.run_id) + except BaseException: # pylint: disable=W0703 + logger.exception("Fail node execution failed") + finally: + attempt_log.status = defaults.SUCCESS # This is a dummy node, so we ignore errors and mark SUCCESS + attempt_log.end_time = str(datetime.now()) + attempt_log.duration = utils.get_duration_between_datetime_strings( + attempt_log.start_time, attempt_log.end_time + ) + return attempt_log + + +class SuccessNode(TerminalNode): + """ + A leaf node of the graph that represents a success node + """ + + node_type: str = Field(default="success", serialization_alias="type") + + @classmethod + def parse_from_config(cls, config: Dict[str, Any]) -> "SuccessNode": + return cast("SuccessNode", super().parse_from_config(config)) + + def execute(self, mock=False, map_variable: TypeMapVariable = None, **kwargs) -> StepAttempt: + """ + Execute the success node. + Set the run or branch log status to success. + + Args: + executor (_type_): The executor class + mock (bool, optional): If we should just mock and not perform anything. Defaults to False. + map_variable (dict, optional): If the node belongs to an internal branch. Defaults to None. + + Returns: + StepAttempt: The step attempt object + """ + attempt_log = self._context.run_log_store.create_attempt_log() + try: + attempt_log.start_time = str(datetime.now()) + attempt_log.status = defaults.SUCCESS + #  could be a branch or run log + run_or_branch_log = self._context.run_log_store.get_branch_log( + self._get_branch_log_name(map_variable), self._context.run_id + ) + run_or_branch_log.status = defaults.SUCCESS + self._context.run_log_store.add_branch_log(run_or_branch_log, self._context.run_id) + except BaseException: # pylint: disable=W0703 + logger.exception("Success node execution failed") + finally: + attempt_log.status = defaults.SUCCESS # This is a dummy node and we make sure we mark it as success + attempt_log.end_time = str(datetime.now()) + attempt_log.duration = utils.get_duration_between_datetime_strings( + attempt_log.start_time, attempt_log.end_time + ) + return attempt_log + + +class ParallelNode(CompositeNode): + """ + A composite node containing many graph objects within itself. + + The structure is generally: + ParallelNode: + Branch A: + Sub graph definition + Branch B: + Sub graph definition + . . . + + """ + + node_type: str = Field(default="parallel", serialization_alias="type") + branches: Dict[str, Graph] + is_composite: bool = Field(default=True, exclude=True) + + @field_serializer("branches") + def ser_branches(self, branches: Dict[str, Graph]) -> Dict[str, Graph]: + ret: Dict[str, Graph] = {} + + for branch_name, branch in branches.items(): + ret[branch_name.split(".")[-1]] = branch + + return ret + + @classmethod + def parse_from_config(cls, config: Dict[str, Any]) -> "ParallelNode": + internal_name = cast(str, config.get("internal_name")) + + config_branches = config.pop("branches", {}) + branches = {} + for branch_name, branch_config in config_branches.items(): + sub_graph = create_graph( + deepcopy(branch_config), + internal_branch_name=internal_name + "." + branch_name, + ) + branches[internal_name + "." + branch_name] = sub_graph + + if not branches: + raise Exception("A parallel node should have branches") + return cls(branches=branches, **config) + + def _get_branch_by_name(self, branch_name: str) -> Graph: + if branch_name in self.branches: + return self.branches[branch_name] + + raise Exception(f"Branch {branch_name} does not exist") + + def fan_out(self, map_variable: TypeMapVariable = None, **kwargs): + """ + The general fan out method for a node of type Parallel. + This method assumes that the step log has already been created. + + 3rd party orchestrators should create the step log and use this method to create the branch logs. + + Args: + executor (BaseExecutor): The executor class as defined by the config + map_variable (dict, optional): If the node is part of a map node. Defaults to None. + """ + # Prepare the branch logs + for internal_branch_name, _ in self.branches.items(): + effective_branch_name = self._resolve_map_placeholders(internal_branch_name, map_variable=map_variable) + + branch_log = self._context.run_log_store.create_branch_log(effective_branch_name) + branch_log.status = defaults.PROCESSING + self._context.run_log_store.add_branch_log(branch_log, self._context.run_id) + + def execute_as_graph(self, map_variable: TypeMapVariable = None, **kwargs): + """ + This function does the actual execution of the sub-branches of the parallel node. + + From a design perspective, this function should not be called if the execution is 3rd party orchestrated. + + The modes that render the job specifications, do not need to interact with this node at all as they have their + own internal mechanisms of handing parallel states. + If they do not, you can find a way using as-is nodes as hack nodes. + + The execution of a dag, could result in + * The dag being completely executed with a definite (fail, success) state in case of + local or local-container execution + * The dag being in a processing state with PROCESSING status in case of local-aws-batch + + Only fail state is considered failure during this phase of execution. + + Args: + executor (Executor): The Executor as per the use config + **kwargs: Optional kwargs passed around + """ + from magnus import entrypoints + + self.fan_out(map_variable=map_variable, **kwargs) + + jobs = [] + # Given that we can have nesting and complex graphs, controlling the number of processes is hard. + # A better way is to actually submit the job to some process scheduler which does resource management + for internal_branch_name, branch in self.branches.items(): + if self._context.executor._is_parallel_execution(): + # Trigger parallel jobs + action = entrypoints.execute_single_brach + kwargs = { + "configuration_file": self._context.configuration_file, + "pipeline_file": self._context.pipeline_file, + "branch_name": internal_branch_name.replace(" ", defaults.COMMAND_FRIENDLY_CHARACTER), + "run_id": self._context.run_id, + "map_variable": json.dumps(map_variable), + "tag": self._context.tag, + } + process = multiprocessing.Process(target=action, kwargs=kwargs) + jobs.append(process) + process.start() + + else: + # If parallel is not enabled, execute them sequentially + self._context.executor.execute_graph(branch, map_variable=map_variable, **kwargs) + + for job in jobs: + job.join() # Find status of the branches + + self.fan_in(map_variable=map_variable, **kwargs) + + def fan_in(self, map_variable: TypeMapVariable = None, **kwargs): + """ + The general fan in method for a node of type Parallel. + + 3rd party orchestrators should use this method to find the status of the composite step. + + Args: + executor (BaseExecutor): The executor class as defined by the config + map_variable (dict, optional): If the node is part of a map. Defaults to None. + """ + step_success_bool = True + for internal_branch_name, _ in self.branches.items(): + effective_branch_name = self._resolve_map_placeholders(internal_branch_name, map_variable=map_variable) + branch_log = self._context.run_log_store.get_branch_log(effective_branch_name, self._context.run_id) + if branch_log.status != defaults.SUCCESS: + step_success_bool = False + + # Collate all the results and update the status of the step + effective_internal_name = self._resolve_map_placeholders(self.internal_name, map_variable=map_variable) + step_log = self._context.run_log_store.get_step_log(effective_internal_name, self._context.run_id) + + if step_success_bool: #  If none failed + step_log.status = defaults.SUCCESS + else: + step_log.status = defaults.FAIL + + self._context.run_log_store.add_step_log(step_log, self._context.run_id) + + +class MapNode(CompositeNode): + """ + A composite node that contains ONE graph object within itself that has to be executed with an iterable. + + The structure is generally: + MapNode: + branch + + The config is expected to have a variable 'iterate_on' and iterate_as which are looked for in the parameters. + for iter_variable in parameters['iterate_on']: + Execute the Branch by sending {'iterate_as': iter_variable} + + The internal naming convention creates branches dynamically based on the iteration value + """ + + node_type: str = Field(default="map", serialization_alias="type") + iterate_on: str + iterate_as: str + branch: Graph + is_composite: bool = True + + @classmethod + def parse_from_config(cls, config: Dict[str, Any]) -> "MapNode": + internal_name = cast(str, config.get("internal_name")) + + config_branch = config.pop("branch", {}) + if not config_branch: + raise Exception("A map node should have a branch") + + branch = create_graph( + deepcopy(config_branch), + internal_branch_name=internal_name + "." + defaults.MAP_PLACEHOLDER, + ) + return cls(branch=branch, **config) + + def _get_branch_by_name(self, branch_name: str) -> Graph: + """ + Retrieve a branch by name. + + In the case of a Map Object, the branch naming is dynamic as it is parameterized on iterable. + This method takes no responsibility in checking the validity of the naming. + + Returns a Graph Object + + Args: + branch_name (str): The name of the branch to retrieve + + Raises: + Exception: If the branch by that name does not exist + """ + return self.branch + + def fan_out(self, map_variable: TypeMapVariable = None, **kwargs): + """ + The general method to fan out for a node of type map. + This method assumes that the step log has already been created. + + 3rd party orchestrators should call this method to create the individual branch logs. + + Args: + executor (BaseExecutor): The executor class as defined by the config + map_variable (dict, optional): If the node is part of map. Defaults to None. + """ + iterate_on = self._context.run_log_store.get_parameters(self._context.run_id)[self.iterate_on] + + # Prepare the branch logs + for iter_variable in iterate_on: + effective_branch_name = self._resolve_map_placeholders( + self.internal_name + "." + str(iter_variable), map_variable=map_variable + ) + branch_log = self._context.run_log_store.create_branch_log(effective_branch_name) + branch_log.status = defaults.PROCESSING + self._context.run_log_store.add_branch_log(branch_log, self._context.run_id) + + def execute_as_graph(self, map_variable: TypeMapVariable = None, **kwargs): + """ + This function does the actual execution of the branch of the map node. + + From a design perspective, this function should not be called if the execution is 3rd party orchestrated. + + The modes that render the job specifications, do not need to interact with this node at all as + they have their own internal mechanisms of handing map states or dynamic parallel states. + If they do not, you can find a way using as-is nodes as hack nodes. + + The actual logic is : + * We iterate over the iterable as mentioned in the config + * For every value in the iterable we call the executor.execute_graph(branch, iterate_as: iter_variable) + + The execution of a dag, could result in + * The dag being completely executed with a definite (fail, success) state in case of local + or local-container execution + * The dag being in a processing state with PROCESSING status in case of local-aws-batch + + Only fail state is considered failure during this phase of execution. + + Args: + executor (Executor): The Executor as per the use config + map_variable (dict): The map variables the graph belongs to + **kwargs: Optional kwargs passed around + """ + from magnus import entrypoints + + iterate_on = None + try: + iterate_on = self._context.run_log_store.get_parameters(self._context.run_id)[self.iterate_on] + except KeyError: + raise Exception( + f"Expected parameter {self.iterate_on} not present in Run Log parameters, was it ever set before?" + ) + + if not isinstance(iterate_on, list): + raise Exception("Only list is allowed as a valid iterator type") + + self.fan_out(map_variable=map_variable, **kwargs) + + jobs = [] + # Given that we can have nesting and complex graphs, controlling the number of processess is hard. + # A better way is to actually submit the job to some process scheduler which does resource management + for iter_variable in iterate_on: + effective_map_variable = map_variable or OrderedDict() + effective_map_variable[self.iterate_as] = iter_variable + + if self._context.executor._is_parallel_execution(): + # Trigger parallel jobs + action = entrypoints.execute_single_brach + kwargs = { + "configuration_file": self._context.configuration_file, + "pipeline_file": self._context.pipeline_file, + "branch_name": self.branch.internal_branch_name.replace(" ", defaults.COMMAND_FRIENDLY_CHARACTER), + "run_id": self._context.run_id, + "map_variable": json.dumps(effective_map_variable), + "tag": self._context.tag, + } + process = multiprocessing.Process(target=action, kwargs=kwargs) + jobs.append(process) + process.start() + + else: + # If parallel is not enabled, execute them sequentially + self._context.executor.execute_graph(self.branch, map_variable=effective_map_variable, **kwargs) + + for job in jobs: + job.join() + + self.fan_in(map_variable=map_variable, **kwargs) + + def fan_in(self, map_variable: TypeMapVariable = None, **kwargs): + """ + The general method to fan in for a node of type map. + + 3rd party orchestrators should call this method to find the status of the step log. + + Args: + executor (BaseExecutor): The executor class as defined by the config + map_variable (dict, optional): If the node is part of map node. Defaults to None. + """ + iterate_on = self._context.run_log_store.get_parameters(self._context.run_id)[self.iterate_on] + # # Find status of the branches + step_success_bool = True + + for iter_variable in iterate_on: + effective_branch_name = self._resolve_map_placeholders( + self.internal_name + "." + str(iter_variable), map_variable=map_variable + ) + branch_log = self._context.run_log_store.get_branch_log(effective_branch_name, self._context.run_id) + if branch_log.status != defaults.SUCCESS: + step_success_bool = False + + # Collate all the results and update the status of the step + effective_internal_name = self._resolve_map_placeholders(self.internal_name, map_variable=map_variable) + step_log = self._context.run_log_store.get_step_log(effective_internal_name, self._context.run_id) + + if step_success_bool: #  If none failed and nothing is waiting + step_log.status = defaults.SUCCESS + else: + step_log.status = defaults.FAIL + + self._context.run_log_store.add_step_log(step_log, self._context.run_id) + + +class DagNode(CompositeNode): + """ + A composite node that internally holds a dag. + + The structure is generally: + DagNode: + dag_definition: A YAML file that holds the dag in 'dag' block + + The config is expected to have a variable 'dag_definition'. + """ + + node_type: str = Field(default="dag", serialization_alias="type") + dag_definition: str + branch: Graph + is_composite: bool = True + internal_branch_name: Annotated[str, Field(validate_default=True)] = "" + + @field_validator("internal_branch_name") + @classmethod + def validate_internal_branch_name(cls, internal_branch_name: str, info: ValidationInfo): + internal_name = info.data["internal_name"] + return internal_name + "." + defaults.DAG_BRANCH_NAME + + @field_validator("dag_definition") + @classmethod + def validate_dag_definition(cls, value): + if not value.endswith(".yaml"): # TODO: Might have a problem with the SDK + raise ValueError("dag_definition must be a YAML file") + return value + + @classmethod + def parse_from_config(cls, config: Dict[str, Any]) -> "DagNode": + internal_name = cast(str, config.get("internal_name")) + + if "dag_definition" not in config: + raise Exception(f"No dag definition found in {config}") + + dag_config = utils.load_yaml(config["dag_definition"]) + if "dag" not in dag_config: + raise Exception("No DAG found in dag_definition, please provide it in dag block") + + branch = create_graph(dag_config["dag"], internal_branch_name=internal_name + "." + defaults.DAG_BRANCH_NAME) + + return cls(branch=branch, **config) + + def _get_branch_by_name(self, branch_name: str): + """ + Retrieve a branch by name. + The name is expected to follow a dot path convention. + + Returns a Graph Object + + Args: + branch_name (str): The name of the branch to retrieve + + Raises: + Exception: If the branch_name is not 'dag' + """ + if branch_name != self.internal_branch_name: + raise Exception(f"Node of type {self.node_type} only allows a branch of name {defaults.DAG_BRANCH_NAME}") + + return self.branch + + def fan_out(self, map_variable: TypeMapVariable = None, **kwargs): + """ + The general method to fan out for a node of type dag. + The method assumes that the step log has already been created. + + Args: + executor (BaseExecutor): The executor class as defined by the config + map_variable (dict, optional): _description_. Defaults to None. + """ + effective_branch_name = self._resolve_map_placeholders(self.internal_branch_name, map_variable=map_variable) + + branch_log = self._context.run_log_store.create_branch_log(effective_branch_name) + branch_log.status = defaults.PROCESSING + self._context.run_log_store.add_branch_log(branch_log, self._context.run_id) + + def execute_as_graph(self, map_variable: TypeMapVariable = None, **kwargs): + """ + This function does the actual execution of the branch of the dag node. + + From a design perspective, this function should not be called if the execution is 3rd party orchestrated. + + The modes that render the job specifications, do not need to interact with this node at all + as they have their own internal mechanisms of handling sub dags. + If they do not, you can find a way using as-is nodes as hack nodes. + + The actual logic is : + * We just execute the branch as with any other composite nodes + * The branch name is called 'dag' + + The execution of a dag, could result in + * The dag being completely executed with a definite (fail, success) state in case of + local or local-container execution + * The dag being in a processing state with PROCESSING status in case of local-aws-batch + + Only fail state is considered failure during this phase of execution. + + Args: + executor (Executor): The Executor as per the use config + **kwargs: Optional kwargs passed around + """ + self.fan_out(map_variable=map_variable, **kwargs) + self._context.executor.execute_graph(self.branch, map_variable=map_variable, **kwargs) + self.fan_in(map_variable=map_variable, **kwargs) + + def fan_in(self, map_variable: TypeMapVariable = None, **kwargs): + """ + The general method to fan in for a node of type dag. + + 3rd party orchestrators should call this method to find the status of the step log. + + Args: + executor (BaseExecutor): The executor class as defined by the config + map_variable (dict, optional): If the node is part of type dag. Defaults to None. + """ + step_success_bool = True + effective_branch_name = self._resolve_map_placeholders(self.internal_branch_name, map_variable=map_variable) + effective_internal_name = self._resolve_map_placeholders(self.internal_name, map_variable=map_variable) + + branch_log = self._context.run_log_store.get_branch_log(effective_branch_name, self._context.run_id) + if branch_log.status != defaults.SUCCESS: + step_success_bool = False + + step_log = self._context.run_log_store.get_step_log(effective_internal_name, self._context.run_id) + step_log.status = defaults.PROCESSING + + if step_success_bool: #  If none failed and nothing is waiting + step_log.status = defaults.SUCCESS + else: + step_log.status = defaults.FAIL + + self._context.run_log_store.add_step_log(step_log, self._context.run_id) + + +class StubNode(ExecutableNode): + """ + Stub is a convenience design node. + + It always returns success in the attempt log and does nothing. + + This node is very similar to pass state in Step functions. + + This node type could be handy when designing the pipeline and stubbing functions + """ + + node_type: str = Field(default="stub", serialization_alias="type") + model_config = ConfigDict(extra="allow") + + @classmethod + def parse_from_config(cls, config: Dict[str, Any]) -> "StubNode": + return cls(**config) + + def execute(self, mock=False, map_variable: TypeMapVariable = None, **kwargs) -> StepAttempt: + """ + Do Nothing node. + We just send an success attempt log back to the caller + + Args: + executor ([type]): [description] + mock (bool, optional): [description]. Defaults to False. + map_variable (str, optional): [description]. Defaults to ''. + + Returns: + [type]: [description] + """ + attempt_log = self._context.run_log_store.create_attempt_log() + + attempt_log.start_time = str(datetime.now()) + attempt_log.status = defaults.SUCCESS # This is a dummy node and always will be success + + attempt_log.end_time = str(datetime.now()) + attempt_log.duration = utils.get_duration_between_datetime_strings(attempt_log.start_time, attempt_log.end_time) + return attempt_log diff --git a/magnus/extensions/run_log_store/__init__.py b/magnus/extensions/run_log_store/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/magnus/extensions/run_log_store/chunked_file_system/__init__.py b/magnus/extensions/run_log_store/chunked_file_system/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/magnus/extensions/run_log_store/chunked_file_system/implementation.py b/magnus/extensions/run_log_store/chunked_file_system/implementation.py new file mode 100644 index 00000000..425b42e4 --- /dev/null +++ b/magnus/extensions/run_log_store/chunked_file_system/implementation.py @@ -0,0 +1,106 @@ +import json +import logging +from pathlib import Path +from string import Template +from typing import Optional, Sequence, Union + +from magnus import defaults, utils +from magnus.extensions.run_log_store.generic_chunked import ChunkedRunLogStore + +logger = logging.getLogger(defaults.LOGGER_NAME) + +T = Union[str, Path] + + +class ChunkedFileSystemRunLogStore(ChunkedRunLogStore): + """ + File system run log store but chunks the run log into thread safe chunks. + This enables executions to be parallel. + """ + + service_name: str = "chunked-fs" + log_folder: str = defaults.LOG_LOCATION_FOLDER + + def get_matches(self, run_id: str, name: str, multiple_allowed: bool = False) -> Optional[Union[Sequence[T], T]]: + """ + Get contents of files matching the pattern name* + + Args: + run_id (str): The run id + name (str): The suffix of the file name to check in the run log store. + """ + log_folder = self.log_folder_with_run_id(run_id=run_id) + + sub_name = Template(name).safe_substitute({"creation_time": ""}) + + matches = list(log_folder.glob(f"{sub_name}*")) + if matches: + if not multiple_allowed: + if len(matches) > 1: + msg = f"Multiple matches found for {name} while multiple is not allowed" + raise Exception(msg) + return matches[0] + return matches + + return None + + def log_folder_with_run_id(self, run_id: str) -> Path: + """ + Utility function to get the log folder for a run id. + + Args: + run_id (str): The run id + + Returns: + Path: The path to the log folder with the run id + """ + return Path(self.log_folder) / run_id + + def safe_suffix_json(self, name: Union[Path, str]) -> str: + """ + Safely attach a suffix to a json file. + + Args: + name (Path): The name of the file with or without suffix of json + + Returns: + str : The name of the file with .json + """ + if str(name).endswith("json"): + return str(name) + + return str(name) + ".json" + + def _store(self, run_id: str, contents: dict, name: Union[Path, str], insert=False): + """ + Store the contents against the name in the folder. + + Args: + run_id (str): The run id + contents (dict): The dict to store + name (str): The name to store as + """ + if insert: + name = self.log_folder_with_run_id(run_id=run_id) / name + + utils.safe_make_dir(self.log_folder_with_run_id(run_id=run_id)) + + with open(self.safe_suffix_json(name), "w") as fw: + json.dump(contents, fw, ensure_ascii=True, indent=4) + + def _retrieve(self, name: Union[str, Path]) -> dict: + """ + Does the job of retrieving from the folder. + + Args: + name (str): the name of the file to retrieve + + Returns: + dict: The contents + """ + contents: dict = {} + + with open(self.safe_suffix_json(name), "r") as fr: + contents = json.load(fr) + + return contents diff --git a/magnus/extensions/run_log_store/chunked_k8s_pvc/__init__.py b/magnus/extensions/run_log_store/chunked_k8s_pvc/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/magnus/extensions/run_log_store/chunked_k8s_pvc/implementation.py b/magnus/extensions/run_log_store/chunked_k8s_pvc/implementation.py new file mode 100644 index 00000000..1de5cdb2 --- /dev/null +++ b/magnus/extensions/run_log_store/chunked_k8s_pvc/implementation.py @@ -0,0 +1,21 @@ +import logging +from pathlib import Path + +from magnus import defaults +from magnus.extensions.run_log_store.chunked_file_system.implementation import ChunkedFileSystemRunLogStore + +logger = logging.getLogger(defaults.NAME) + + +class ChunkedK8PersistentVolumeRunLogstore(ChunkedFileSystemRunLogStore): + """ + Uses the K8s Persistent Volumes to store run logs. + """ + + service_name: str = "chunked-k8s-pvc" + persistent_volume_name: str + mount_path: str + + @property + def log_folder_name(self) -> str: + return str(Path(self.mount_path) / self.log_folder) diff --git a/magnus/extensions/run_log_store/chunked_k8s_pvc/integration.py b/magnus/extensions/run_log_store/chunked_k8s_pvc/integration.py new file mode 100644 index 00000000..233e3c26 --- /dev/null +++ b/magnus/extensions/run_log_store/chunked_k8s_pvc/integration.py @@ -0,0 +1,59 @@ +import logging +from typing import cast + +from magnus import defaults +from magnus.integration import BaseIntegration + +logger = logging.getLogger(defaults.NAME) + + +class LocalCompute(BaseIntegration): + """ + Integration between local and k8's pvc + """ + + executor_type = "local" + service_type = "run_log_store" # One of secret, catalog, datastore + service_provider = "chunked-k8s-pvc" # The actual implementation of the service + + def validate(self, **kwargs): + msg = "We can't use the local compute k8s pvc store integration." + raise Exception(msg) + + +class LocalContainerCompute(BaseIntegration): + """ + Integration between local-container and k8's pvc + """ + + executor_type = "local-container" + service_type = "run_log_store" # One of secret, catalog, datastore + service_provider = "chunked-k8s-pvc" # The actual implementation of the service + + def validate(self, **kwargs): + msg = "We can't use the local-container compute k8s pvc store integration." + raise Exception(msg) + + +class ArgoCompute(BaseIntegration): + """ + Integration between argo and k8's pvc + """ + + executor_type = "argo" + service_type = "run_log_store" # One of secret, catalog, datastore + service_provider = "chunked-k8s-pvc" # The actual implementation of the service + + def configure_for_traversal(self, **kwargs): + from magnus.extensions.executor.argo.implementation import ArgoExecutor, UserVolumeMounts + from magnus.extensions.run_log_store.chunked_k8s_pvc.implementation import ChunkedK8PersistentVolumeRunLogstore + + self.executor = cast(ArgoExecutor, self.executor) + self.service = cast(ChunkedK8PersistentVolumeRunLogstore, self.service) + + volume_mount = UserVolumeMounts( + name=self.service.persistent_volume_name, + mount_path=self.service.mount_path, + ) + + self.executor.persistent_volumes.append(volume_mount) diff --git a/magnus/extensions/run_log_store/db/implementation_FF.py b/magnus/extensions/run_log_store/db/implementation_FF.py new file mode 100644 index 00000000..33e6acea --- /dev/null +++ b/magnus/extensions/run_log_store/db/implementation_FF.py @@ -0,0 +1,157 @@ +import datetime +import json +import logging +from pathlib import Path +from string import Template +from typing import Any, Dict, List, Optional, Union, cast + +from magnus import defaults, utils +from magnus.extensions.run_log_store.generic_chunked import ChunkedRunLogStore + +logger = logging.getLogger(defaults.LOGGER_NAME) + + +class DBRunLogStore(ChunkedRunLogStore): + """ + File system run log store but chunks the run log into thread safe chunks. + This enables executions to be parallel. + """ + + service_name: str = "chunked-fs" + connection_string: str + db_name: str + + _DB_LOG: Any = None + _engine: Any = None + _session: Any = None + _connection_string: str = "" + _base: Any = None + + def model_post_init(self, _: Any) -> None: + run_context = self._context + + secrets = cast(Dict[str, str], run_context.secrets_handler.get()) + connection_string = Template(self.connection_string).safe_substitute(**secrets) + + try: + import sqlalchemy + from sqlalchemy import Column, DateTime, Integer, Sequence, Text + from sqlalchemy.orm import declarative_base, sessionmaker + + Base = declarative_base() + + class DBLog(Base): + """ + Base table for storing run logs in database. + + In this model, we fragment the run log into logical units that are concurrent safe. + """ + + __tablename__ = self.db_name + pk = Column(Integer, Sequence("id_seq"), primary_key=True) + run_id = Column(Text, index=True) + attribute_key = Column(Text) # run_log, step_internal_name, parameter_key etc + attribute_type = Column(Text) # RunLog, Step, Branch, Parameter + attribute_value = Column(Text) # The JSON string + created_at = Column(DateTime, default=datetime.datetime.utcnow) + + self._engine = sqlalchemy.create_engine(connection_string, pool_pre_ping=True) + self._session = sessionmaker(bind=self._engine) + self._DB_LOG = DBLog + self._connection_string = connection_string + self._base = Base + + except ImportError as _e: + logger.exception("Unable to import SQLalchemy, is it installed?") + msg = "SQLAlchemy is required for this extension. Please install it" + raise Exception(msg) from _e + + def create_tables(self): + import sqlalchemy + + engine = sqlalchemy.create_engine(self._connection_string) + self._base.metadata.create_all(engine) + + def get_matches(self, run_id: str, name: str, multiple_allowed: bool = False) -> Optional[Union[List[Path], Path]]: + """ + Get contents of files matching the pattern name* + + Args: + run_id (str): The run id + name (str): The suffix of the file name to check in the run log store. + """ + log_folder = self.log_folder_with_run_id(run_id=run_id) + + sub_name = Template(name).safe_substitute({"creation_time": ""}) + + matches = list(log_folder.glob(f"{sub_name}*")) + if matches: + if not multiple_allowed: + if len(matches) > 1: + msg = f"Multiple matches found for {name} while multiple is not allowed" + raise Exception(msg) + return matches[0] + return matches + + return None + + def log_folder_with_run_id(self, run_id: str) -> Path: + """ + Utility function to get the log folder for a run id. + + Args: + run_id (str): The run id + + Returns: + Path: The path to the log folder with the run id + """ + return Path(self.log_folder) / run_id + + def safe_suffix_json(self, name: Union[Path, str]) -> str: + """ + Safely attach a suffix to a json file. + + Args: + name (Path): The name of the file with or without suffix of json + + Returns: + str : The name of the file with .json + """ + if str(name).endswith("json"): + return str(name) + + return str(name) + ".json" + + def _store(self, run_id: str, contents: dict, name: Union[Path, str], insert=False): + """ + Store the contents against the name in the folder. + + Args: + run_id (str): The run id + contents (dict): The dict to store + name (str): The name to store as + """ + if insert: + name = self.log_folder_with_run_id(run_id=run_id) / name + + utils.safe_make_dir(self.log_folder_with_run_id(run_id=run_id)) + + with open(self.safe_suffix_json(name), "w") as fw: + json.dump(contents, fw, ensure_ascii=True, indent=4) + + def _retrieve(self, name: Path) -> dict: + """ + Does the job of retrieving from the folder. + + Args: + name (str): the name of the file to retrieve + + Returns: + dict: The contents + """ + contents: dict = {} + + with open(self.safe_suffix_json(name), "r") as fr: + contents = json.load(fr) + + return contents diff --git a/magnus/extensions/run_log_store/db/integration_FF.py b/magnus/extensions/run_log_store/db/integration_FF.py new file mode 100644 index 00000000..e69de29b diff --git a/magnus/extensions/run_log_store/file_system/__init__.py b/magnus/extensions/run_log_store/file_system/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/magnus/extensions/run_log_store/file_system/implementation.py b/magnus/extensions/run_log_store/file_system/implementation.py new file mode 100644 index 00000000..00211ef3 --- /dev/null +++ b/magnus/extensions/run_log_store/file_system/implementation.py @@ -0,0 +1,136 @@ +import json +import logging +from pathlib import Path + +from magnus import defaults, exceptions, utils +from magnus.datastore import BaseRunLogStore, RunLog + +logger = logging.getLogger(defaults.LOGGER_NAME) + + +class FileSystemRunLogstore(BaseRunLogStore): + """ + In this type of Run Log store, we use a file system to store the JSON run log. + + Every single run is stored as a different file which makes it compatible across other store types. + + When to use: + When locally testing a pipeline and have the need to compare across runs. + Its fully featured and perfectly fine if your local environment is where you would do everything. + + Do not use: + If you need parallelization on local, this run log would not support it. + + Example config: + + run_log: + type: file-system + config: + log_folder: The folder to out the logs. Defaults to .run_log_store + + """ + + service_name: str = "file-system" + log_folder: str = defaults.LOG_LOCATION_FOLDER + + @property + def log_folder_name(self): + return self.log_folder + + def write_to_folder(self, run_log: RunLog): + """ + Write the run log to the folder + + Args: + run_log (RunLog): The run log to be added to the database + """ + write_to = self.log_folder_name + utils.safe_make_dir(write_to) + + write_to_path = Path(write_to) + run_id = run_log.run_id + json_file_path = write_to_path / f"{run_id}.json" + + with json_file_path.open("w") as fw: + json.dump(run_log.model_dump(), fw, ensure_ascii=True, indent=4) # pylint: disable=no-member + + def get_from_folder(self, run_id: str) -> RunLog: + """ + Look into the run log folder for the run log for the run id. + + If the run log does not exist, raise an exception. If it does, decode it + as a RunLog and return it + + Args: + run_id (str): The requested run id to retrieve the run log store + + Raises: + FileNotFoundError: If the Run Log has not been found. + + Returns: + RunLog: The decoded Run log + """ + write_to = self.log_folder_name + + read_from_path = Path(write_to) + json_file_path = read_from_path / f"{run_id}.json" + + if not json_file_path.exists(): + raise FileNotFoundError(f"Expected {json_file_path} is not present") + + with json_file_path.open("r") as fr: + json_str = json.load(fr) + run_log = RunLog(**json_str) # pylint: disable=no-member + return run_log + + def create_run_log( + self, + run_id: str, + dag_hash: str = "", + use_cached: bool = False, + tag: str = "", + original_run_id: str = "", + status: str = defaults.CREATED, + **kwargs, + ) -> RunLog: + """ + # Creates a Run log + # Adds it to the db + """ + + try: + self.get_run_log_by_id(run_id=run_id, full=False) + raise exceptions.RunLogExistsError(run_id=run_id) + except exceptions.RunLogNotFoundError: + pass + + logger.info(f"{self.service_name} Creating a Run Log for : {run_id}") + run_log = RunLog( + run_id=run_id, + dag_hash=dag_hash, + use_cached=use_cached, + tag=tag, + original_run_id=original_run_id, + status=status, + ) + self.write_to_folder(run_log) + return run_log + + def get_run_log_by_id(self, run_id: str, full: bool = False, **kwargs) -> RunLog: + """ + # Returns the run_log defined by id + # Raises Exception if not found + """ + try: + logger.info(f"{self.service_name} Getting a Run Log for : {run_id}") + run_log = self.get_from_folder(run_id) + return run_log + except FileNotFoundError as e: + raise exceptions.RunLogNotFoundError(run_id) from e + + def put_run_log(self, run_log: RunLog, **kwargs): + """ + # Puts the run_log into the database + """ + logger.info(f"{self.service_name} Putting the run log in the DB: {run_log.run_id}") + self.write_to_folder(run_log) diff --git a/magnus/extensions/run_log_store/generic_chunked.py b/magnus/extensions/run_log_store/generic_chunked.py new file mode 100644 index 00000000..2156dcf0 --- /dev/null +++ b/magnus/extensions/run_log_store/generic_chunked.py @@ -0,0 +1,541 @@ +import logging +import time +from abc import abstractmethod +from enum import Enum +from pathlib import Path +from string import Template +from typing import Any, Dict, Optional, Sequence, Union + +from magnus import defaults, exceptions +from magnus.datastore import BaseRunLogStore, BranchLog, RunLog, StepLog + +logger = logging.getLogger(defaults.LOGGER_NAME) + + +T = Union[str, Path] # Holds str, path + + +class EntityNotFoundError(Exception): + pass + + +class ChunkedRunLogStore(BaseRunLogStore): + """ + A generic implementation of a RunLogStore that stores RunLogs in chunks. + """ + + service_name: str = "" + + class LogTypes(Enum): + RUN_LOG = "RunLog" + PARAMETER = "Parameter" + STEP_LOG = "StepLog" + BRANCH_LOG = "BranchLog" + + class ModelTypes(Enum): + RUN_LOG = RunLog + PARAMETER = dict + STEP_LOG = StepLog + BRANCH_LOG = BranchLog + + def naming_pattern(self, log_type: LogTypes, name: str = "") -> str: + """ + Naming pattern to store RunLog, Parameter, StepLog or BranchLog. + + The reasoning for name to be defaulted to empty string: + Its actually conditionally empty. For RunLog and Parameter it is empty. + For StepLog and BranchLog it should be provided. + + Args: + log_type (LogTypes): One of RunLog, Parameter, StepLog or BranchLog + name (str, optional): The name to be included or left. Defaults to ''. + + Raises: + Exception: If log_type is not recognized + + Returns: + str: The naming pattern + """ + if log_type == self.LogTypes.RUN_LOG: + return f"{self.LogTypes.RUN_LOG.value}" + + if log_type == self.LogTypes.PARAMETER: + return "-".join([self.LogTypes.PARAMETER.value, name]) + + if not name: + raise Exception(f"Name should be provided for naming pattern for {log_type}") + + if log_type == self.LogTypes.STEP_LOG: + return "-".join([self.LogTypes.STEP_LOG.value, name, "${creation_time}"]) + + if log_type == self.LogTypes.BRANCH_LOG: + return "-".join([self.LogTypes.BRANCH_LOG.value, name, "${creation_time}"]) + + raise Exception("Unexpected log type") + + @abstractmethod + def get_matches(self, run_id: str, name: str, multiple_allowed: bool = False) -> Optional[Union[Sequence[T], T]]: + """ + Get contents of persistence layer matching the pattern name* + + Args: + run_id (str): The run id + name (str): The suffix of the entity name to check in the run log store. + """ + ... + + @abstractmethod + def _store(self, run_id: str, contents: dict, name: T, insert: bool = False): + """ + Store the contents against the name in the persistence layer. + + Args: + run_id (str): The run id + contents (dict): The dict to store + name (str): The name to store as + """ + ... + + @abstractmethod + def _retrieve(self, name: T) -> dict: + """ + Does the job of retrieving from the persistent layer. + + Args: + name (str): the name of the file to retrieve + + Returns: + dict: The contents + """ + ... + + def store(self, run_id: str, log_type: LogTypes, contents: dict, name: str = ""): + """Store a SINGLE log type in the file system + + Args: + run_id (str): The run id to store against + log_type (LogTypes): The type of log to store + contents (dict): The dict of contents to store + name (str, optional): The name against the contents have to be stored. Defaults to ''. + """ + naming_pattern = self.naming_pattern(log_type=log_type, name=name) + match = self.get_matches(run_id=run_id, name=naming_pattern, multiple_allowed=False) + # The boolean multiple allowed confuses mypy a lot! + name_to_give: str = "" + insert = False + + if match: + existing_contents = self._retrieve(name=match) # type: ignore + contents = dict(existing_contents, **contents) + name_to_give = match # type: ignore + else: + name_to_give = Template(naming_pattern).safe_substitute({"creation_time": str(int(time.time_ns()))}) + insert = True + + self._store(run_id=run_id, contents=contents, name=name_to_give, insert=insert) + + def retrieve(self, run_id: str, log_type: LogTypes, name: str = "", multiple_allowed=False) -> Any: + """ + Retrieve the model given a log_type and a name. + Use multiple_allowed to control if you are expecting multiple of them. + eg: There could be multiple of Parameters- but only one of StepLog-stepname + + The reasoning for name to be defaulted to empty string: + Its actually conditionally empty. For RunLog and Parameter it is empty. + For StepLog and BranchLog it should be provided. + + Args: + run_id (str): The run id + log_type (LogTypes): One of RunLog, Parameter, StepLog, BranchLog + name (str, optional): The name to match. Defaults to ''. + multiple_allowed (bool, optional): Are multiple allowed. Defaults to False. + + Raises: + FileNotFoundError: If there is no match found + + Returns: + Any: One of StepLog, BranchLog, Parameter or RunLog + """ + # The reason of any is it could be one of Logs or dict or list of the + if not name and log_type not in [ + self.LogTypes.RUN_LOG, + self.LogTypes.PARAMETER, + ]: + raise Exception(f"Name is required during retrieval for {log_type}") + + naming_pattern = self.naming_pattern(log_type=log_type, name=name) + matches = self.get_matches(run_id=run_id, name=naming_pattern, multiple_allowed=multiple_allowed) + if matches: + if not multiple_allowed: + contents = self._retrieve(name=matches) # type: ignore + model = self.ModelTypes[log_type.name].value + return model(**contents) + + models = [] + for match in matches: # type: ignore + contents = self._retrieve(name=match) + model = self.ModelTypes[log_type.name].value + models.append(model(**contents)) + return models + + raise EntityNotFoundError() + + def orderly_retrieve(self, run_id: str, log_type: LogTypes) -> Dict[str, Union[StepLog, BranchLog]]: + """Should only be used by prepare full run log. + + Retrieves the StepLog or BranchLog sorted according to creation time. + + Args: + run_id (str): _description_ + log_type (LogTypes): _description_ + """ + prefix: str = self.LogTypes.STEP_LOG.value + + if log_type == self.LogTypes.BRANCH_LOG: + prefix = self.LogTypes.BRANCH_LOG.value + + matches = self.get_matches(run_id=run_id, name=prefix, multiple_allowed=True) + + if log_type == self.LogTypes.BRANCH_LOG and not matches: + # No branch logs are found + return {} + # Forcing get_matches to always return a list is a better design + epoch_created = [str(match).split("-")[-1] for match in matches] # type: ignore + + # sort matches by epoch created + epoch_created, matches = zip(*sorted(zip(epoch_created, matches))) # type: ignore + + logs: Dict[str, Union[StepLog, BranchLog]] = {} + + for match in matches: + model = self.ModelTypes[log_type.name].value + log_model = model(**self._retrieve(match)) + logs[log_model.internal_name] = log_model # type: ignore + + return logs + + def _get_parent_branch(self, name: str) -> Union[str, None]: + """ + Returns the name of the parent branch. + If the step is part of main dag, return None. + + Args: + name (str): The name of the step. + + Returns: + str: The name of the branch containing the step. + """ + dot_path = name.split(".") + + if len(dot_path) == 1: + return None + # Ignore the step name + return ".".join(dot_path[:-1]) + + def _get_parent_step(self, name: str) -> Union[str, None]: + """ + Returns the step containing the step, useful when we have steps within a branch. + Returns None, if the step belongs to parent dag. + + Args: + name (str): The name of the step to find the parent step it belongs to. + + Returns: + str: The parent step the step belongs to, None if the step belongs to parent dag. + """ + dot_path = name.split(".") + + if len(dot_path) == 1: + return None + # Ignore the branch.step_name + return ".".join(dot_path[:-2]) + + def _prepare_full_run_log(self, run_log: RunLog): + """ + Populates the run log with the branches and steps. + + Args: + run_log (RunLog): The partial run log containing empty step logs + """ + run_id = run_log.run_id + run_log.parameters = self.get_parameters(run_id=run_id) + + ordered_steps = self.orderly_retrieve(run_id=run_id, log_type=self.LogTypes.STEP_LOG) + ordered_branches = self.orderly_retrieve(run_id=run_id, log_type=self.LogTypes.BRANCH_LOG) + + current_branch: Any = None # It could be str, None, RunLog + for step_internal_name in ordered_steps: + current_branch = self._get_parent_branch(step_internal_name) + step_to_add_branch = self._get_parent_step(step_internal_name) + + if not current_branch: + current_branch = run_log + else: + current_branch = ordered_branches[current_branch] + step_to_add_branch = ordered_steps[step_to_add_branch] # type: ignore + step_to_add_branch.branches[current_branch.internal_name] = current_branch # type: ignore + + current_branch.steps[step_internal_name] = ordered_steps[step_internal_name] + + def create_run_log( + self, + run_id: str, + dag_hash: str = "", + use_cached: bool = False, + tag: str = "", + original_run_id: str = "", + status: str = defaults.CREATED, + **kwargs, + ): + """ + Creates a Run Log object by using the config + + Logically the method should do the following: + * Creates a Run log + * Adds it to the db + * Return the log + """ + try: + self.get_run_log_by_id(run_id=run_id, full=False) + raise exceptions.RunLogExistsError(run_id=run_id) + except exceptions.RunLogNotFoundError: + pass + + logger.info(f"{self.service_name} Creating a Run Log for : {run_id}") + run_log = RunLog( + run_id=run_id, + dag_hash=dag_hash, + use_cached=use_cached, + tag=tag, + original_run_id=original_run_id, + status=status, + ) + + self.store(run_id=run_id, contents=run_log.model_dump(), log_type=self.LogTypes.RUN_LOG) + return run_log + + def get_run_log_by_id(self, run_id: str, full: bool = False, **kwargs) -> RunLog: + """ + Retrieves a Run log from the database using the config and the run_id + + Args: + run_id (str): The run_id of the run + full (bool): return the full run log store or only the RunLog object + + Returns: + RunLog: The RunLog object identified by the run_id + + Logically the method should: + * Returns the run_log defined by id from the data store defined by the config + + """ + try: + logger.info(f"{self.service_name} Getting a Run Log for : {run_id}") + run_log = self.retrieve(run_id=run_id, log_type=self.LogTypes.RUN_LOG, multiple_allowed=False) + + if full: + self._prepare_full_run_log(run_log=run_log) + + return run_log + except EntityNotFoundError as e: + raise exceptions.RunLogNotFoundError(run_id) from e + + def put_run_log(self, run_log: RunLog, **kwargs): + """ + Puts the Run Log in the database as defined by the config + + Args: + run_log (RunLog): The Run log of the run + + Logically the method should: + Puts the run_log into the database + + Raises: + NotImplementedError: This is a base class and therefore has no default implementation + """ + run_id = run_log.run_id + self.store(run_id=run_id, contents=run_log.model_dump(), log_type=self.LogTypes.RUN_LOG) + + def get_parameters(self, run_id: str, **kwargs) -> dict: + """ + Get the parameters from the Run log defined by the run_id + + Args: + run_id (str): The run_id of the run + + The method should: + * Call get_run_log_by_id(run_id) to retrieve the run_log + * Return the parameters as identified in the run_log + + Returns: + dict: A dictionary of the run_log parameters + Raises: + RunLogNotFoundError: If the run log for run_id is not found in the datastore + """ + parameters = {} + try: + parameters_list = self.retrieve(run_id=run_id, log_type=self.LogTypes.PARAMETER, multiple_allowed=True) + parameters = {key: value for param in parameters_list for key, value in param.items()} + except EntityNotFoundError: + # No parameters are set + pass + + return parameters + + def set_parameters(self, run_id: str, parameters: dict, **kwargs): + """ + Update the parameters of the Run log with the new parameters + + This method would over-write the parameters, if the parameter exists in the run log already + + The method should: + * Call get_run_log_by_id(run_id) to retrieve the run_log + * Update the parameters of the run_log + * Call put_run_log(run_log) to put the run_log in the datastore + + Args: + run_id (str): The run_id of the run + parameters (dict): The parameters to update in the run log + Raises: + RunLogNotFoundError: If the run log for run_id is not found in the datastore + """ + for key, value in parameters.items(): + self.store( + run_id=run_id, + log_type=self.LogTypes.PARAMETER, + contents={key: value}, + name=key, + ) + + def get_run_config(self, run_id: str, **kwargs) -> dict: + """ + Given a run_id, return the run_config used to perform the run. + + Args: + run_id (str): The run_id of the run + + Returns: + dict: The run config used for the run + """ + + run_log = self.get_run_log_by_id(run_id=run_id) + return run_log.run_config + + def set_run_config(self, run_id: str, run_config: dict, **kwargs): + """Set the run config used to run the run_id + + Args: + run_id (str): The run_id of the run + run_config (dict): The run_config of the run + """ + + run_log = self.get_run_log_by_id(run_id=run_id) + run_log.run_config.update(run_config) + self.put_run_log(run_log=run_log) + + def get_step_log(self, internal_name: str, run_id: str, **kwargs) -> StepLog: + """ + Get a step log from the datastore for run_id and the internal naming of the step log + + The internal naming of the step log is a dot path convention. + + The method should: + * Call get_run_log_by_id(run_id) to retrieve the run_log + * Identify the step location by decoding the internal naming + * Return the step log + + Args: + internal_name (str): The internal name of the step log + run_id (str): The run_id of the run + + Returns: + StepLog: The step log object for the step defined by the internal naming and run_id + + Raises: + RunLogNotFoundError: If the run log for run_id is not found in the datastore + StepLogNotFoundError: If the step log for internal_name is not found in the datastore for run_id + """ + logger.info(f"{self.service_name} Getting the step log: {internal_name} of {run_id}") + + step_log = self.retrieve( + run_id=run_id, + log_type=self.LogTypes.STEP_LOG, + name=internal_name, + multiple_allowed=False, + ) + + return step_log + + def add_step_log(self, step_log: StepLog, run_id: str, **kwargs): + """ + Add the step log in the run log as identified by the run_id in the datastore + + The method should: + * Call get_run_log_by_id(run_id) to retrieve the run_log + * Identify the branch to add the step by decoding the step_logs internal name + * Add the step log to the identified branch log + * Call put_run_log(run_log) to put the run_log in the datastore + + Args: + step_log (StepLog): The Step log to add to the database + run_id (str): The run id of the run + + Raises: + RunLogNotFoundError: If the run log for run_id is not found in the datastore + BranchLogNotFoundError: If the branch of the step log for internal_name is not found in the datastore + for run_id + """ + logger.info(f"{self.service_name} Adding the step log to DB: {step_log.internal_name}") + + self.store( + run_id=run_id, + log_type=self.LogTypes.STEP_LOG, + contents=step_log.model_dump(), + name=step_log.internal_name, + ) + + def get_branch_log(self, internal_branch_name: str, run_id: str, **kwargs) -> Union[BranchLog, RunLog]: + """ + Returns the branch log by the internal branch name for the run id + + If the internal branch name is none, returns the run log + + Args: + internal_branch_name (str): The internal branch name to retrieve. + run_id (str): The run id of interest + + Returns: + BranchLog: The branch log or the run log as requested. + """ + if not internal_branch_name: + return self.get_run_log_by_id(run_id=run_id) + branch = self.retrieve(run_id=run_id, log_type=self.LogTypes.BRANCH_LOG, name=internal_branch_name) + return branch + + def add_branch_log(self, branch_log: Union[BranchLog, RunLog], run_id: str, **kwargs): + """ + The method should: + # Get the run log + # Get the branch and step containing the branch + # Add the branch to the step + # Write the run_log + + The branch log could some times be a Run log and should be handled appropriately + + Args: + branch_log (BranchLog): The branch log/run log to add to the database + run_id (str): The run id to which the branch/run log is added + """ + if not isinstance(branch_log, BranchLog): + self.put_run_log(branch_log) + return + + internal_branch_name = branch_log.internal_name + + logger.info(f"{self.service_name} Adding the branch log to DB: {branch_log.internal_name}") + self.store( + run_id=run_id, + log_type=self.LogTypes.BRANCH_LOG, + contents=branch_log.model_dump(), + name=internal_branch_name, + ) diff --git a/magnus/extensions/run_log_store/k8s_pvc/__init__.py b/magnus/extensions/run_log_store/k8s_pvc/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/magnus/extensions/run_log_store/k8s_pvc/implementation.py b/magnus/extensions/run_log_store/k8s_pvc/implementation.py new file mode 100644 index 00000000..943489b4 --- /dev/null +++ b/magnus/extensions/run_log_store/k8s_pvc/implementation.py @@ -0,0 +1,21 @@ +import logging +from pathlib import Path + +from magnus import defaults +from magnus.extensions.run_log_store.file_system.implementation import FileSystemRunLogstore + +logger = logging.getLogger(defaults.NAME) + + +class K8PersistentVolumeRunLogstore(FileSystemRunLogstore): + """ + Uses the K8s Persistent Volumes to store run logs. + """ + + service_name: str = "k8s-pvc" + persistent_volume_name: str + mount_path: str + + @property + def log_folder_name(self) -> str: + return str(Path(self.mount_path) / self.log_folder) diff --git a/magnus/extensions/run_log_store/k8s_pvc/integration.py b/magnus/extensions/run_log_store/k8s_pvc/integration.py new file mode 100644 index 00000000..4fa4f63a --- /dev/null +++ b/magnus/extensions/run_log_store/k8s_pvc/integration.py @@ -0,0 +1,56 @@ +import logging +from typing import cast + +from magnus import defaults +from magnus.integration import BaseIntegration + +logger = logging.getLogger(defaults.NAME) + + +class LocalCompute(BaseIntegration): + """ + Integration between local and k8's pvc + """ + + executor_type = "local" + service_type = "run_log_store" # One of secret, catalog, datastore + service_provider = "k8s-pvc" # The actual implementation of the service + + def validate(self, **kwargs): + msg = "We can't use the local compute k8s pvc store integration." + raise Exception(msg) + + +class LocalContainerCompute(BaseIntegration): + """ + Integration between local-container and k8's pvc + """ + + executor_type = "local-container" + service_type = "run_log_store" # One of secret, catalog, datastore + service_provider = "k8s-pvc" # The actual implementation of the service + + def validate(self, **kwargs): + msg = "We can't use the local-container compute k8s pvc store integration." + raise Exception(msg) + + +class ArgoCompute(BaseIntegration): + """ + Integration between argo and k8's pvc + """ + + executor_type = "argo" + service_type = "run_log_store" # One of secret, catalog, datastore + service_provider = "k8s-pvc" # The actual implementation of the service + + def configure_for_traversal(self, **kwargs): + from magnus.extensions.executor.argo.implementation import ArgoExecutor, UserVolumeMounts + from magnus.extensions.run_log_store.k8s_pvc.implementation import K8PersistentVolumeRunLogstore + + self.executor = cast(ArgoExecutor, self.executor) + self.service = cast(K8PersistentVolumeRunLogstore, self.service) + + volume_mount = UserVolumeMounts(name=self.service.persistent_volume_name, mount_path=self.service.mount_path) + + self.executor.persistent_volumes.append(volume_mount) diff --git a/magnus/extensions/secrets/__init__.py b/magnus/extensions/secrets/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/magnus/extensions/secrets/dotenv/__init__.py b/magnus/extensions/secrets/dotenv/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/magnus/extensions/secrets/dotenv/implementation.py b/magnus/extensions/secrets/dotenv/implementation.py new file mode 100644 index 00000000..d1d8a637 --- /dev/null +++ b/magnus/extensions/secrets/dotenv/implementation.py @@ -0,0 +1,100 @@ +import logging +import os + +from magnus import defaults, exceptions, utils +from magnus.secrets import BaseSecrets + +logger = logging.getLogger(defaults.LOGGER_NAME) + + +class DotEnvSecrets(BaseSecrets): + """ + A secret manager which uses .env files for secrets. + + We recommend this secrets manager only for local development and should not be used for anything close to + production. + """ + + service_name: str = "dotenv" + location: str = defaults.DOTENV_FILE_LOCATION + secrets: dict = {} + + @property + def secrets_location(self): + """ + Return the location of the .env file. + If the user has not over-ridden it, it defaults to .env file in the project root. + + Returns: + str: The location of the secrets file + """ + return self.location + + def _load_secrets(self): + """ + We assume that a dotenv file is of format, + key=value -> secrets[key]='value' + key=value# comment -> secrets[key1]='value1' + key=value2 # comment. -> secrets[key2]='value2' + + Any of the above formats with export or set in front of them. + + We strip the secret value of any empty spaces at the start and end. + + Raises: + Exception: If the file at secrets_location is not found. + Exception: If the secrets are not formatted correctly. + """ + # It was loaded in the previous call and need not to be reloaded + if self.secrets: + return + + secrets_location = self.secrets_location + if not utils.does_file_exist(secrets_location): + raise Exception(f"Did not find the secrets file in {secrets_location}") + + with open(secrets_location, "r") as fr: + for secret_line in fr: + # The order of removing fluff around the expression + # the new line + # the comment + # the white space + # Any export or set in front of the key any spaces after that. + + secret_line = secret_line.strip(os.linesep).split("#")[0].strip() + + if secret_line == "": + continue + + secret_line = utils.remove_prefix(secret_line, prefix="export").strip() + secret_line = utils.remove_prefix(secret_line, prefix="EXPORT").strip() + secret_line = utils.remove_prefix(secret_line, prefix="set").strip() + secret_line = utils.remove_prefix(secret_line, prefix="SET").strip() + + data = secret_line.split("=") + if len(data) != 2: + raise Exception("A secret should be of format, secret_name=secret_value[# any comment]") + + key, value = data + self.secrets[key] = value.strip().strip('"').strip(os.linesep) + + def get(self, name: str = "", **kwargs) -> str: + """ + Get a secret of name from the secrets file. + + + Args: + name (str): The name of the secret to retrieve + + Raises: + Exception: If the secret by the name is not found. + + Returns: + str: The value of the secret + """ + self._load_secrets() + + if name in self.secrets: + return self.secrets[name] + + raise exceptions.SecretNotFoundError(secret_name=name, secret_setting=self.secrets_location) diff --git a/magnus/extensions/secrets/env_secrets/__init__.py b/magnus/extensions/secrets/env_secrets/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/magnus/extensions/secrets/env_secrets/implementation.py b/magnus/extensions/secrets/env_secrets/implementation.py new file mode 100644 index 00000000..d0f2c319 --- /dev/null +++ b/magnus/extensions/secrets/env_secrets/implementation.py @@ -0,0 +1,42 @@ +import logging +import os + +from magnus import defaults, exceptions +from magnus.secrets import BaseSecrets + +logger = logging.getLogger(defaults.LOGGER_NAME) + + +class EnvSecretsManager(BaseSecrets): + """ + A secret manager via environment variables. + + This secret manager returns nothing if the key does not match + """ + + service_name: str = "env-secrets-manager" + prefix: str = "" + suffix: str = "" + + def get(self, name: str = "", **kwargs) -> str: + """ + If a name is provided, we look for that in the environment. + If a environment variable by that name is not found, we raise an Exception. + + If a name is not provided, we return an empty dictionary. + + Args: + name (str): The name of the secret to retrieve + + Raises: + Exception: If the secret by the name is not found. + + Returns: + [type]: [description] + """ + + try: + return os.environ[f"{self.prefix}{name}{self.suffix}"] + except KeyError as _e: + logger.exception(f"Secret {self.prefix}{name}{self.suffix} not found in environment") + raise exceptions.SecretNotFoundError(secret_name=name, secret_setting="environment") from _e diff --git a/magnus/graph.py b/magnus/graph.py index d70d7c87..2bf46cd7 100644 --- a/magnus/graph.py +++ b/magnus/graph.py @@ -1,17 +1,18 @@ +from __future__ import annotations + import logging -from typing import TYPE_CHECKING, Dict, List +from typing import Any, Dict, List, Optional, cast +from pydantic import BaseModel, Field, SerializeAsAny from stevedore import driver from magnus import defaults, exceptions -if TYPE_CHECKING: - from magnus.nodes import BaseNode - -logger = logging.getLogger(defaults.NAME) +logger = logging.getLogger(defaults.LOGGER_NAME) +logging.getLogger("stevedore").setLevel(logging.CRITICAL) -class Graph: +class Graph(BaseModel): """ A class representing a graph. @@ -19,35 +20,11 @@ class Graph: We have nodes and traversal is based on start_at and on_failure definition of individual nodes of the graph """ - def __init__( - self, - start_at, - name: str = "", - description: str = "", - max_time: int = 86400, - internal_branch_name: str = "", - ): - self.start_at = start_at - self.name = name - self.description = description - self.max_time = max_time - self.internal_branch_name = internal_branch_name - self.nodes: List[BaseNode] = [] - - def _to_dict(self) -> dict: - """ - Return a dict representation of the graph - """ - dag = {} - dag["start_at"] = self.start_at - dag["name"] = self.name - dag["description"] = self.description - dag["max_time"] = self.max_time - dag["steps"] = {} - for node in self.nodes: - dag["steps"][node.name] = node._to_dict() - - return dag + start_at: str + name: str = "" + description: Optional[str] = "" + internal_branch_name: str = Field(default="", exclude=True) + nodes: SerializeAsAny[Dict[str, "BaseNode"]] = Field(default_factory=dict, serialization_alias="steps") def get_node_by_name(self, name: str) -> "BaseNode": """ @@ -63,9 +40,9 @@ def get_node_by_name(self, name: str) -> "BaseNode": Returns: Node: The Node object by name """ - for node in self.nodes: - if node.name == name: - return node + for key, value in self.nodes.items(): + if key == name: + return value raise exceptions.NodeNotFoundError(name) def get_node_by_internal_name(self, internal_name: str) -> "BaseNode": @@ -83,17 +60,17 @@ def get_node_by_internal_name(self, internal_name: str) -> "BaseNode": Returns: Node: The Node object by the name """ - for node in self.nodes: - if node.internal_name == internal_name: - return node + for _, value in self.nodes.items(): + if value.internal_name == internal_name: + return value raise exceptions.NodeNotFoundError(internal_name) def __str__(self): # pragma: no cover """ Return a string representation of the graph """ - node_str = ", ".join([x.name for x in self.nodes]) - return f"Starts at: {self.start_at} and has a max run time of {self.max_time} and {node_str}" + node_str = ", ".join([x.name for x in list(self.nodes.values())]) + return f"Starts at: {self.start_at} and {node_str}" def add_node(self, node: "BaseNode"): """ @@ -102,9 +79,9 @@ def add_node(self, node: "BaseNode"): Args: node (object): The node to add """ - self.nodes.append(node) + self.nodes[node.name] = node - def validate(self): + def check_graph(self): """ Validate the graph to make sure, 1). All the neighbors of nodes are present. @@ -160,9 +137,9 @@ def get_success_node(self) -> "BaseNode": Returns: object: The success node """ - for node in self.nodes: - if node.node_type == "success": - return node + for _, value in self.nodes.items(): + if value.node_type == "success": + return value raise Exception("No success node defined") def get_fail_node(self) -> "BaseNode": @@ -175,9 +152,9 @@ def get_fail_node(self) -> "BaseNode": Returns: object: The fail node of the graph """ - for node in self.nodes: - if node.node_type == "fail": - return node + for _, value in self.nodes.items(): + if value.node_type == "fail": + return value raise Exception("No fail node defined") def is_start_node_present(self) -> bool: @@ -202,8 +179,8 @@ def success_node_validation(self) -> bool: bool: True if there is only one, false otherwise """ node_count = 0 - for node in self.nodes: - if node.node_type == "success": + for _, value in self.nodes.items(): + if value.node_type == "success": node_count += 1 if node_count == 1: return True @@ -217,8 +194,8 @@ def fail_node_validation(self) -> bool: bool: true if there is one and only one fail node, false otherwise """ node_count = 0 - for node in self.nodes: - if node.node_type == "fail": + for _, value in self.nodes.items(): + if value.node_type == "fail": node_count += 1 if node_count == 1: return True @@ -231,11 +208,11 @@ def is_dag(self) -> bool: Returns: bool: Returns True if it is directed and acyclic. """ - visited = {n.name: False for n in self.nodes} - recstack = {n.name: False for n in self.nodes} + visited = {n: False for n in self.nodes.keys()} + recstack = {n: False for n in self.nodes.keys()} - for node in self.nodes: - if not visited[node.name]: + for name, node in self.nodes.items(): + if not visited[name]: if self.is_cyclic_util(node, visited, recstack): return False return True @@ -267,7 +244,7 @@ def is_cyclic_util(self, node: "BaseNode", visited: Dict[str, bool], recstack: D recstack[node.name] = False return False - def missing_neighbors(self) -> List["BaseNode"]: + def missing_neighbors(self) -> List[str]: """ Iterates through nodes and gets their connecting neighbors and checks if they exist in the graph. @@ -275,13 +252,13 @@ def missing_neighbors(self) -> List["BaseNode"]: list: List of the missing nodes. Empty list if all neighbors are in the graph. """ missing_nodes = [] - for node in self.nodes: + for _, node in self.nodes.items(): neighbors = node._get_neighbors() for neighbor in neighbors: try: self.get_node_by_name(neighbor) except exceptions.NodeNotFoundError: - logger.exception("Could not find the node") + logger.exception(f"Could not find the node {neighbor}") if neighbor not in missing_nodes: missing_nodes.append(neighbor) return missing_nodes @@ -290,7 +267,7 @@ def add_terminal_nodes( self, success_node_name: str = "success", failure_node_name: str = "fail", - internal_branch_name: str = None, + internal_branch_name: str = "", ): """ Add the success and fail nodes to the graph @@ -315,8 +292,12 @@ def add_terminal_nodes( self.add_node(fail_node) -def create_graph(dag_config: dict, internal_branch_name: str = "") -> Graph: - # pylint: disable=R0914,R0913 +from magnus.nodes import BaseNode # noqa: E402 + +Graph.model_rebuild() + + +def create_graph(dag_config: Dict[str, Any], internal_branch_name: str = "") -> Graph: """ Creates a dag object from the dag definition. @@ -333,36 +314,28 @@ def create_graph(dag_config: dict, internal_branch_name: str = "") -> Graph: Returns: Graph: The created graph object """ - description = dag_config.get("description", None) - max_time = dag_config.get("max_time", defaults.MAX_TIME) - start_at = dag_config.get("start_at") # Let the start_at be relative to the graph + description: str = dag_config.get("description", None) + start_at: str = cast(str, dag_config.get("start_at")) # Let the start_at be relative to the graph graph = Graph( start_at=start_at, description=description, - max_time=max_time, internal_branch_name=internal_branch_name, ) - logger.info(f"Initialized a graph object that starts at {start_at} and runs for maximum of {max_time} secs") - messages = [] - for step in dag_config.get("steps", []): - step_config = dag_config["steps"][step] - logger.info(f"Adding node {step} with :{step_config}") + logger.info(f"Initialized a graph object that starts at {start_at}") + for name, step_config in dag_config.get("steps", {}).items(): + logger.info(f"Adding node {name} with :{step_config}") - node = create_node(step, step_config=step_config, internal_branch_name=internal_branch_name) - messages.extend(node.validate()) + node = create_node(name, step_config=step_config, internal_branch_name=internal_branch_name) graph.add_node(node) - if messages: - raise Exception(", ".join(messages)) - - graph.validate() + graph.check_graph() return graph -def create_node(name: str, step_config: dict, internal_branch_name: str = None): +def create_node(name: str, step_config: dict, internal_branch_name: Optional[str] = ""): """ Creates a node object from the step configuration. @@ -383,27 +356,31 @@ def create_node(name: str, step_config: dict, internal_branch_name: str = None): try: node_type = step_config.pop("type") # Remove the type as it is not used in node creation. - node_mgr = driver.DriverManager( - namespace="nodes", - name=node_type, - invoke_on_load=True, - invoke_kwds={ - "name": name, - "internal_name": internal_name, - "config": step_config, - "internal_branch_name": internal_branch_name, - }, - ) - return node_mgr.driver + node_mgr: BaseNode = driver.DriverManager(namespace="nodes", name=node_type).driver + + next_node = step_config.pop("next", None) + + if next_node: + step_config["next_node"] = next_node + + invoke_kwds = { + "name": name, + "internal_name": internal_name, + "internal_branch_name": internal_branch_name, + **step_config, + } + node = node_mgr.parse_from_config(config=invoke_kwds) + return node except KeyError: + # type is missing!! msg = "The node configuration does not contain the required key 'type'." logger.exception(step_config) raise Exception(msg) except Exception as _e: msg = ( - f"Could not find the node type {step_config['type']}. Please ensure you have installed " + f"Could not find the node type {node_type}. Please ensure you have installed " "the extension that provides the node type." - "\nCore supports: task, success, fail, parallel, dag, map, as-is" + "\nCore supports: task, success, fail, parallel, dag, map, stub" ) raise Exception(msg) from _e diff --git a/magnus/integration.py b/magnus/integration.py index a9699962..1ae9a436 100644 --- a/magnus/integration.py +++ b/magnus/integration.py @@ -1,19 +1,11 @@ import logging -from pathlib import Path -from typing import TYPE_CHECKING - -from magnus import defaults -from magnus.catalog import BaseCatalog -from magnus.datastore import BaseRunLogStore -from magnus.experiment_tracker import BaseExperimentTracker -from magnus.secrets import BaseSecrets - -if TYPE_CHECKING: - from magnus.executor import BaseExecutor from stevedore import extension -logger = logging.getLogger(defaults.NAME) +from magnus import defaults +from magnus.executor import BaseExecutor + +logger = logging.getLogger(defaults.LOGGER_NAME) logging.getLogger("stevedore").setLevel(logging.CRITICAL) # --8<-- [start:docs] @@ -57,31 +49,6 @@ def configure_for_execution(self, **kwargs): # --8<-- [end:docs] -def get_service_type(service_provider: object) -> str: - """ - Given a service provider, identify the type of service. - - Args: - service_provider (object): The service provider object - - Raises: - Exception: If the service provider is not inherited from one of BaseSecret, BaseCatalog, BaseRunLogStore - - Returns: - [str]: Returns either 'secret', 'catalog', 'run_log_store' according to the service provider. - """ - if isinstance(service_provider, BaseSecrets): - return "secrets" - if isinstance(service_provider, BaseCatalog): - return "catalog" - if isinstance(service_provider, BaseRunLogStore): - return "run_log_store" - if isinstance(service_provider, BaseExperimentTracker): - return "experiment_tracker" - - raise Exception("Service Provider is not a inherited from any of the Base Service providers") - - def get_integration_handler(executor: "BaseExecutor", service: object) -> BaseIntegration: """ Return the integration handler between executor and the service. @@ -98,10 +65,11 @@ def get_integration_handler(executor: "BaseExecutor", service: object) -> BaseIn Raises: Exception: If multiple integrations are found for the executor and service """ - service_type = get_service_type(service) + service_type = service.service_type # type: ignore service_name = getattr(service, "service_name") integrations = [] + # Get all the integrations defined by the 3rd party in their pyproject.toml mgr = extension.ExtensionManager( namespace="integration", invoke_on_load=True, @@ -109,25 +77,35 @@ def get_integration_handler(executor: "BaseExecutor", service: object) -> BaseIn ) for _, kls in mgr.items(): if ( - kls.obj.service_type == service_type - and kls.obj.executor_type == executor.service_name # type: ignore - and kls.obj.service_provider == service_name # type: ignore + kls.obj.executor_type == executor.service_name + and kls.obj.service_type == service_type + and kls.obj.service_provider == service_name ): logger.info(f"Identified an integration pattern {kls.obj}") integrations.append(kls.obj) + # Get all the implementations defined by the magnus package + for kls in BaseIntegration.__subclasses__(): + # Match the exact service type + if kls.service_type == service_type and kls.service_provider == service_name: + # Match either all executor or specific ones provided + if kls.executor_type == "" or kls.executor_type == executor.service_name: + integrations.append(kls(executor=executor, integration_service=service)) + if len(integrations) > 1: msg = ( f"Multiple integrations between {executor.service_name} and {service_name} of type {service_type} found. " - "This is not correct. Please raise a bug report to fix this." + "If you defined an integration pattern, please ensure it is specific and does not conflict with magnus " + " implementations." ) logger.exception(msg) raise Exception(msg) if not integrations: logger.warning( - f"Could not find an integration pattern for {executor.service_name} and {service_name} for {service_type}" - ) # type: ignore + f"Could not find an integration pattern for {executor.service_name} and {service_name} for {service_type}." + " This implies that there is no need to change the configurations." + ) return BaseIntegration(executor, service) return integrations[0] @@ -169,16 +147,18 @@ def configure_for_execution(executor: "BaseExecutor", service: object, **kwargs) integration_handler.configure_for_execution(**kwargs) -class LocalComputeBufferedRunLogStore(BaseIntegration): +class BufferedRunLogStore(BaseIntegration): """ - Local compute and buffered + Integration between any executor and buffered run log store """ - executor_type = "local" service_type = "run_log_store" # One of secret, catalog, datastore service_provider = "buffered" # The actual implementation of the service def validate(self, **kwargs): + if not self.executor.service_name == "local": + raise Exception("Buffered run log store is only supported for local executor") + msg = ( "Run log generated by buffered run log store are not persisted. " "Re-running this run, in case of a failure, is not possible" @@ -186,114 +166,11 @@ def validate(self, **kwargs): logger.warning(msg) -class LocalComputeFileSystemRunLogStore(BaseIntegration): - """ - Local compute and File system run log store - """ - - executor_type = "local" - service_type = "run_log_store" # One of secret, catalog, datastore - service_provider = "file-system" # The actual implementation of the service - - def validate(self, **kwargs): - if self.executor._is_parallel_execution(): - msg = ( - "Run log generated by file-system run log store are not thread safe. " - "Inconsistent results are possible because of race conditions to write to the same file.\n" - "Consider using partitioned run log store like database for consistent results." - ) - logger.warning(msg) - - -class LocalContainerComputeBufferedRunLogStore(BaseIntegration): - """ - Only local executions is possible for Buffered Run Log store - """ - - executor_type = "local-container" - service_type = "run_log_store" # One of secret, catalog, datastore - service_provider = "buffered" # The actual implementation of the service - - def validate(self, **kwargs): - raise Exception("Only local compute executions is possible for buffered run log store") - - -class LocalContainerComputeFileSystemRunLogstore(BaseIntegration): - """ - Integration between local container and file system run log store - """ - - executor_type = "local-container" - service_type = "run_log_store" # One of secret, catalog, datastore - service_provider = "file-system" # The actual implementation of the service - - def validate(self, **kwargs): - if self.executor._is_parallel_execution(): - msg = ( - "Run log generated by file-system run log store are not thread safe. " - "Inconsistent results are possible because of race conditions to write to the same file.\n" - "Consider using partitioned run log store like database for consistent results." - ) - logger.warning(msg) - - def configure_for_traversal(self, **kwargs): - write_to = self.service.log_folder_name - self.executor.volumes[str(Path(write_to).resolve())] = { - "bind": f"{self.executor.container_log_location}", - "mode": "rw", - } - - def configure_for_execution(self, **kwargs): - self.service.config.log_folder = self.executor.container_log_location - - -class LocalContainerComputeDotEnvSecrets(BaseIntegration): - """ - Integration between local container and dot env secrets - """ - - executor_type = "local-container" - service_type = "secrets" # One of secret, catalog, datastore - service_provider = "dotenv" # The actual implementation of the service - - def validate(self, **kwargs): - logger.warning("Using dot env for non local deployments is not ideal, consider options") - - def configure_for_traversal(self, **kwargs): - secrets_location = self.service.secrets_location - self.executor.volumes[str(Path(secrets_location).resolve())] = { - "bind": f"{self.executor.container_secrets_location}", - "mode": "ro", - } - - def configure_for_execution(self, **kwargs): - self.service.config.location = self.executor.container_secrets_location - - -class LocalContainerComputeEnvSecretsManager(BaseIntegration): +class DoNothingCatalog(BaseIntegration): """ - Integration between local container and env secrets manager + Integration between any executor and do nothing catalog """ - executor_type = "local-container" - service_type = "secrets" # One of secret, catalog, datastore - service_provider = "env-secrets-manager" # The actual implementation of the service - - def validate(self, **kwargs): - msg = ( - "Local container executions cannot be used with environment secrets manager. " - "Please use a supported secrets manager" - ) - logger.exception(msg) - raise Exception(msg) - - -class LocalContainerDoNothingCatalog(BaseIntegration): - """ - Integration between local container and do nothing catalog - """ - - executor_type = "local-container" service_type = "catalog" # One of secret, catalog, datastore service_provider = "do-nothing" # The actual implementation of the service @@ -302,53 +179,27 @@ def validate(self, **kwargs): logger.warning(msg) -class LocalDoNothingCatalog(BaseIntegration): +class DoNothingSecrets(BaseIntegration): """ - Integration between local and do nothing catalog + Integration between any executor and do nothing secrets """ - executor_type = "local" - service_type = "catalog" # One of secret, catalog, datastore + service_type = "secrets" # One of secret, catalog, datastore service_provider = "do-nothing" # The actual implementation of the service def validate(self, **kwargs): - msg = "A do-nothing catalog does not hold any data and therefore cannot pass data between nodes." + msg = "A do-nothing secrets does not hold any secrets and therefore cannot return you any secrets." logger.warning(msg) -class LocalContainerComputeFileSystemCatalog(BaseIntegration): +class DoNothingExperimentTracker(BaseIntegration): """ - Integration pattern between Local container and File System catalog + Integration between any executor and do nothing experiment tracker """ - executor_type = "local-container" - service_type = "catalog" # One of secret, catalog, datastore - service_provider = "file-system" # The actual implementation of the service - - def configure_for_traversal(self, **kwargs): - catalog_location = self.service.catalog_location - self.executor.volumes[str(Path(catalog_location).resolve())] = { - "bind": f"{self.executor.container_catalog_location}", - "mode": "rw", - } - - def configure_for_execution(self, **kwargs): - self.service.config.catalog_location = self.executor.container_catalog_location - - -class DemoRenderBufferedRunLogStore(BaseIntegration): - """ - Demo rendered and buffered - """ - - executor_type = "demo-renderer" - service_type = "run_log_store" # One of secret, catalog, datastore - service_provider = "buffered" # The actual implementation of the service + service_type = "experiment_tracker" # One of secret, catalog, datastore + service_provider = "do-nothing" # The actual implementation of the service def validate(self, **kwargs): - msg = ( - "Run log generated by buffered run log store are not persisted. " - "Demo renderer cannot use buffered as steps are executed as individual commands." - ) - logger.exception(msg) - raise Exception(msg) + msg = "A do-nothing experiment tracker does nothing and therefore cannot track anything." + logger.warning(msg) diff --git a/magnus/interaction.py b/magnus/interaction.py index 3dafa21f..01534c73 100644 --- a/magnus/interaction.py +++ b/magnus/interaction.py @@ -3,204 +3,240 @@ import json import logging import os -from pathlib import Path -from typing import Any, Union, cast +from functools import wraps +from typing import Any, ContextManager, Dict, Optional, TypeVar, Union, cast, overload -from magnus import defaults, exceptions, pickler, pipeline, utils +from pydantic import BaseModel -logger = logging.getLogger(defaults.NAME) +import magnus.context as context +from magnus import defaults, exceptions, parameters, pickler, utils +from magnus.datastore import RunLog, StepLog +logger = logging.getLogger(defaults.LOGGER_NAME) -def track_this(step: int = 0, **kwargs): - """ - Set up the keyword args as environment variables for tracking purposes as - part of the run. +CastT = TypeVar("CastT") - For every key-value pair found in kwargs, we set up an environmental variable of - MAGNUS_TRACK_key_{step} = json.dumps(value) - If step=0, we ignore step for magnus purposes. +def check_context(func): + @wraps(func) + def wrapper(*args, **kwargs): + if not context.run_context.executor: + msg = ( + "There are no active executor and services. This should not have happened and is a bug." + " Please raise a bug report." + ) + raise Exception(msg) + result = func(*args, **kwargs) + return result - Args: - kwargs (dict): The dictionary of key value pairs to track. + return wrapper + + +@check_context +def track_this(step: int = 0, **kwargs): """ - from magnus import context # pylint: disable=import-outside-toplevel + Tracks key-value pairs to the experiment tracker. - if not context.executor: - msg = ( - "There are no active executor and services. This should not have happened and is a bug." - " Please raise a bug report." - ) - raise Exception(msg) + The value is dumped as a dict, by alias, if it is a pydantic model. - prefix = defaults.TRACK_PREFIX + Args: + step (int, optional): The step to track the data at. Defaults to 0. + **kwargs (dict): The key-value pairs to track. - if step: - prefix += f"{str(step)}_" + Examples: + >>> track_this(step=0, my_int_param=123, my_float_param=123.45, my_str_param='hello world') + >>> track_this(step=1, my_int_param=456, my_float_param=456.78, my_str_param='goodbye world') + """ + prefix = defaults.TRACK_PREFIX for key, value in kwargs.items(): logger.info(f"Tracking {key} with value: {value}") - os.environ[prefix + key] = json.dumps(value) - context.executor.experiment_tracker.set_metric(key, value, step=step) # type: ignore + if isinstance(value, BaseModel): + value = value.model_dump(by_alias=True) -def store_parameter(update: bool = True, **kwargs: dict): + os.environ[prefix + key + f"{defaults.STEP_INDICATOR}{step}"] = json.dumps(value) + + +@check_context +def set_parameter(**kwargs) -> None: """ - Set up the keyword args as environment variables for parameters tracking - purposes as part pf the run. + Store a set of parameters. + + !!! note + The parameters are not stored in run log at this point in time. + They are collected now and stored in the run log after completion of the task. - If update_existing is True, we override the current value if the parameter already exists. + Parameters: + **kwargs (dict): A dictionary of key-value pairs to store as parameters. + + Returns: + None + + Examples: + >>> set_parameter(my_int_param=123, my_float_param=123.45, my_bool_param=True, my_str_param='hello world') + >>> get_parameter('my_int_param', int) + 123 + >>> get_parameter('my_float_param', float) + 123.45 + >>> get_parameter('my_bool_param', bool) + True + >>> get_parameter('my_str_param', str) + 'hello world' + + >>> # Example of using Pydantic models + >>> class MyModel(BaseModel): + ... field1: str + ... field2: int + >>> set_parameter(my_model_param=MyModel(field1='value1', field2=2)) + >>> get_parameter('my_model_param', MyModel) + MyModel(field1='value1', field2=2) - For every key-value pair found in kwargs, we set up an environmental variable of - MAGNUS_PRM_key = json.dumps(value) """ - for key, value in kwargs.items(): - logger.info(f"Storing parameter {key} with value: {value}") - environ_key = defaults.PARAMETER_PREFIX + key + parameters.set_user_defined_params_as_environment_variables(kwargs) + - if environ_key in os.environ and not update: - continue +@overload +def get_parameter(key: str, cast_as: Optional[CastT]) -> CastT: + ... - os.environ[environ_key] = json.dumps(value) +@overload +def get_parameter(cast_as: Optional[CastT]) -> CastT: + ... -def get_parameter(key=None) -> Union[str, dict]: + +@check_context +def get_parameter(key: Optional[str] = None, cast_as: Optional[CastT] = None) -> Union[Dict[str, Any], CastT]: """ - Get the parameter set as part of the user convenience function. + Get a parameter by its key. + If the key is not provided, all parameters will be returned. - We do not remove the parameter from the environment in this phase as - as the function execution has not been completed. + cast_as is not required for JSON supported type (int, float, bool, str). + For complex nested parameters, cast_as could package them into a pydantic model. + If cast_as is not provided, the type will remain as dict for nested structures. - Returns all the parameters, if no key was provided. + Note that the cast_as pydantic model is the class, not an instance. Args: - key (str, optional): The parameter key to retrieve. Defaults to None. + key (str, optional): The key of the parameter to retrieve. If not provided, all parameters will be returned. + cast_as (Type, optional): The type to cast the parameter to. If not provided, the type will remain as it is + for simple data types (int, float, bool, str). For nested parameters, it would be a dict. Raises: - Exception: If the mentioned key was not part of the paramters + Exception: If the parameter does not exist and key is not provided. + ValidationError: If the parameter cannot be cast as pydantic model, when cast_as is provided. + + Examples: + >>> get_parameter('my_int_param', int) + 123 + >>> get_parameter('my_float_param', float) + 123.45 + >>> get_parameter('my_bool_param', bool) + True + >>> get_parameter('my_str_param', str) + 'hello world' + >>> get_parameter('my_model_param', MyModel) + MyModel(field1='value1', field2=2) + >>> get_parameter(cast_as=MyModel) + MyModel(field1='value1', field2=2) - Returns: - Union[str, dict]: A single value of str or a dictionary if no key was specified """ - parameters = utils.get_user_set_parameters(remove=False) + params = parameters.get_user_set_parameters(remove=False) + if not key: - return parameters - if key not in parameters: + # Return all parameters + return cast(CastT, parameters.cast_parameters_as_type(params, cast_as)) # type: ignore + + if key not in params: raise Exception(f"Parameter {key} is not set before") - return parameters[key] + + # Return the parameter value, casted as asked. + return cast(CastT, parameters.cast_parameters_as_type(params[key], cast_as)) # type: ignore -def get_secret(secret_name: str = None) -> str: +@check_context +def get_secret(secret_name: str) -> str: """ - Get a secret by the name from the secrets manager + Retrieve a secret from the secret store. Args: - secret_name (str): The name of the secret to get. Defaults to None. - - Returns: - str: The secret from the secrets manager, if exists. If the requested secret was None, we return all. - Otherwise, raises exception. + secret_name (str): The name of the secret to retrieve. Raises: - exceptions.SecretNotFoundError: Secret not found in the secrets manager. - """ - from magnus import context # pylint: disable=import-outside-toplevel - - if not context.executor: - msg = ( - "There are no active executor and services. This should not have happened and is a bug." - " Please raise a bug report." - ) - raise Exception(msg) + SecretNotFoundError: If the secret does not exist in the store. - secrets_handler = context.executor.secrets_handler # type: ignore + Returns: + str: The secret value. + """ + secrets_handler = context.run_context.secrets_handler try: - return secrets_handler.get(name=secret_name) # type: ignore + return secrets_handler.get(name=secret_name) except exceptions.SecretNotFoundError: logger.exception(f"No secret by the name {secret_name} found in the store") raise -def get_from_catalog(name: str, destination_folder: str = None): +@check_context +def get_from_catalog(name: str, destination_folder: str = ""): """ - A convenience interaction function to get file from the catalog and place it in the destination folder + Get data from the catalog. - Note: We do not perform any kind of serialization/deserialization in this way. - Args: - name (str): The name of the file to get from the catalog - destination_folder (None): The place to put the file. defaults to compute data folder + The name can be a wildcard pattern following globing rules. + Args: + name (str): The name of the data catalog entry. + destination_folder (str, optional): The destination folder to download the data to. + If not provided, the default destination folder set in the catalog will be used. """ - from magnus import context # pylint: disable=import-outside-toplevel - from magnus.catalog import BaseCatalog - - if not context.executor: - msg = ( - "There are no active executor and services. This should not have happened and is a bug." - " Please raise a bug report." - ) - raise Exception(msg) - if not destination_folder: - destination_folder = context.executor.catalog_handler.compute_data_folder # type: ignore + destination_folder = context.run_context.catalog_handler.compute_data_folder - data_catalog = cast(BaseCatalog, context.executor.catalog_handler).get( + data_catalog = context.run_context.catalog_handler.get( name, - run_id=context.executor.run_id, # type: ignore - compute_data_folder=destination_folder, + run_id=context.run_context.run_id, ) - if not data_catalog: - logger.warn(f"No catalog was obtained by the {name}") - - if context.executor.context_step_log: # type: ignore - context.executor.context_step_log.add_data_catalogs(data_catalog) # type: ignore + if context.run_context.executor._context_step_log: + context.run_context.executor._context_step_log.add_data_catalogs(data_catalog) else: logger.warning("Step log context was not found during interaction! The step log will miss the record") +@check_context def put_in_catalog(filepath: str): """ - A convenience interaction function to put the file in the catalog. - - Note: We do not perform any kind of serialization/deserialization in this way. + Add a file or folder to the data catalog. + You can use wild cards following globing rules. Args: - filepath (str): The path of the file to put in the catalog + filepath (str): The path to the file or folder added to the catalog """ - from magnus import context # pylint: disable=import-outside-toplevel - from magnus.catalog import BaseCatalog - - if not context.executor: - msg = ( - "There are no active executor and services. This should not have happened and is a bug." - " Please raise a bug report." - ) - raise Exception(msg) - - file_path = Path(filepath) - - data_catalog = cast(BaseCatalog, context.executor.catalog_handler).put( - file_path.name, - run_id=context.executor.run_id, # type: ignore - compute_data_folder=file_path.parent, + + data_catalog = context.run_context.catalog_handler.put( + filepath, + run_id=context.run_context.run_id, ) if not data_catalog: - logger.warn(f"No catalog was done by the {filepath}") + logger.warning(f"No catalog was done by the {filepath}") - if context.executor.context_step_log: # type: ignore - context.executor.context_step_log.add_data_catalogs(data_catalog) # type: ignore + if context.run_context.executor._context_step_log: + context.run_context.executor._context_step_log.add_data_catalogs(data_catalog) else: logger.warning("Step log context was not found during interaction! The step log will miss the record") +@check_context def put_object(data: Any, name: str): """ - A convenient interaction function to serialize and store the object in catalog. + Serialize and store a python object in the data catalog. + + This function behaves the same as `put_in_catalog` + but with python objects. Args: - data (Any): The data object to add to catalog - name (str): The name to give to the object + data (Any): The python data object to store. + name (str): The name to store it against. """ native_pickler = pickler.NativePickler() @@ -211,12 +247,13 @@ def put_object(data: Any, name: str): os.remove(f"{name}{native_pickler.extension}") +@check_context def get_object(name: str) -> Any: """ - A convenient interaction function to deserialize and retrieve the object from the catalog. + Retrieve and deserialize a python object from the data catalog. - Args: - name (str): The name of the object to retrieve + This function behaves the same as `get_from_catalog` but with + python objects. Returns: Any : The object @@ -237,13 +274,30 @@ def get_object(name: str) -> Any: raise e +@check_context def get_run_id() -> str: """ - Returns the run_id of the current run + Returns the run_id of the current run. + + You can also access this from the environment variable `MAGNUS_RUN_ID`. + """ + return context.run_context.run_id + + +@check_context +def get_run_log() -> RunLog: """ - return os.environ.get(defaults.ENV_RUN_ID, "") + Returns the run_log of the current run. + The return is a deep copy of the run log to prevent any modification. + """ + return context.run_context.run_log_store.get_run_log_by_id( + context.run_context.run_id, + full=True, + ).copy(deep=True) + +@check_context def get_tag() -> str: """ Returns the tag from the environment. @@ -251,32 +305,17 @@ def get_tag() -> str: Returns: str: The tag if provided for the run, otherwise None """ - return os.environ.get(defaults.MAGNUS_RUN_TAG, "") + return context.run_context.tag -def get_experiment_tracker_context(): +@check_context +def get_experiment_tracker_context() -> ContextManager: """ Return a context session of the experiment tracker. - You can start to use the context with the python with statement. - - eg: - with get_experiment_tracker_context() as ctx: - pass - - Returns: - _type_: _description_ + You can start to use the context with the python ```with``` statement. """ - from magnus import context # pylint: disable=import-outside-toplevel - - if not context.executor: - msg = ( - "There are no active executor and services. This should not have happened and is a bug." - " Please raise a bug report." - ) - raise Exception(msg) - - experiment_tracker = context.executor.experiment_tracker + experiment_tracker = context.run_context.experiment_tracker return experiment_tracker.client_context @@ -293,17 +332,15 @@ def start_interactive_session(run_id: str = "", config_file: str = "", tag: str tag (str, optional): The tag to attach to the run. Defaults to "". parameters_file (str, optional): The parameters file to use. Defaults to "". """ - from magnus import ( - context, # pylint: disable=import-outside-toplevel - graph, - ) - if context.executor: + from magnus import entrypoints, graph # pylint: disable=import-outside-toplevel + + if context.run_context.executor: logger.warn("This is not an interactive session or a session has already been activated.") return run_id = utils.generate_run_id(run_id=run_id) - executor = pipeline.prepare_configurations( + context.run_context = entrypoints.prepare_configurations( configuration_file=config_file, run_id=run_id, tag=tag, @@ -311,9 +348,11 @@ def start_interactive_session(run_id: str = "", config_file: str = "", tag: str force_local_executor=True, ) + executor = context.run_context.executor + utils.set_magnus_environment_variables(run_id=run_id, configuration_file=config_file, tag=tag) - executor.execution_plan = defaults.EXECUTION_PLAN.INTERACTIVE.value + context.run_context.execution_plan = defaults.EXECUTION_PLAN.INTERACTIVE.value executor.prepare_for_graph_execution() step_config = { "command": "interactive", @@ -323,12 +362,12 @@ def start_interactive_session(run_id: str = "", config_file: str = "", tag: str } node = graph.create_node(name="interactive", step_config=step_config) - step_log = executor.run_log_store.create_step_log("interactive", node._get_step_log_name()) + step_log = context.run_context.run_log_store.create_step_log("interactive", node._get_step_log_name()) executor.add_code_identities(node=node, step_log=step_log) step_log.step_type = node.node_type step_log.status = defaults.PROCESSING - executor.context_step_log = step_log + executor._context_step_log = step_log def end_interactive_session(): @@ -337,25 +376,24 @@ def end_interactive_session(): Does nothing if the executor is not interactive. """ - from magnus import context # pylint: disable=import-outside-toplevel - if not context.executor: + if not context.run_context.executor: logger.warn("There is no active session in play, doing nothing!") return - if context.executor.execution_plan != defaults.EXECUTION_PLAN.INTERACTIVE.value: + if context.run_context.execution_plan != defaults.EXECUTION_PLAN.INTERACTIVE.value: logger.warn("There is not an interactive session, doing nothing!") return tracked_data = utils.get_tracked_data() - parameters = utils.get_user_set_parameters(remove=True) + set_parameters = parameters.get_user_set_parameters(remove=True) - step_log = context.executor.context_step_log + step_log = cast(StepLog, context.run_context.executor._context_step_log) step_log.user_defined_metrics = tracked_data - context.executor.run_log_store.add_step_log(step_log, context.executor.run_id) + context.run_context.run_log_store.add_step_log(step_log, context.run_context.run_id) - context.executor.run_log_store.set_parameters(context.executor.run_id, parameters) + context.run_context.run_log_store.set_parameters(context.run_context.run_id, set_parameters) - context.executor.context_step_log = None - context.executor.execution_plan = "" - context.executor = None + context.run_context.executor._context_step_log = None + context.run_context.execution_plan = "" + context.run_context.executor = None # type: ignore diff --git a/magnus/log_config.ini b/magnus/log_config.ini deleted file mode 100644 index 0dae015a..00000000 --- a/magnus/log_config.ini +++ /dev/null @@ -1,29 +0,0 @@ -[loggers] -keys=magnus,root - -[handlers] -keys=consoleHandler - -[formatters] -keys=simpleFormatter - -[logger_root] -level=DEBUG -handlers=consoleHandler - -[logger_magnus] -level=DEBUG -handlers=consoleHandler -qualname=magnus -propagate=0 - -[handler_consoleHandler] -class=StreamHandler -level=DEBUG -formatter=simpleFormatter -args=(sys.stdout,) - -[formatter_simpleFormatter] -class=logging.ColorFormatter -format=%(asctime)s - [%(module)s %(funcName)s] - %(levelname)s - %(message)s -datefmt= diff --git a/magnus/nodes.py b/magnus/nodes.py index a555912c..e28b06fd 100644 --- a/magnus/nodes.py +++ b/magnus/nodes.py @@ -1,25 +1,20 @@ -import json import logging -import multiprocessing -from collections import OrderedDict -from copy import deepcopy -from datetime import datetime -from typing import List, Optional, Union +from abc import ABC, abstractmethod +from typing import Any, Dict, List, Optional -from pydantic import BaseModel, Extra +from pydantic import BaseModel, ConfigDict, Field, field_validator -import magnus -from magnus import defaults, utils +import magnus.context as context +from magnus import defaults, exceptions from magnus.datastore import StepAttempt -from magnus.graph import create_graph -from magnus.tasks import create_task +from magnus.defaults import TypeMapVariable -logger = logging.getLogger(defaults.NAME) +logger = logging.getLogger(defaults.LOGGER_NAME) # --8<-- [start:docs] -class BaseNode: +class BaseNode(ABC, BaseModel): """ Base class with common functionality provided for a Node of a graph. @@ -37,50 +32,24 @@ class BaseNode: The internal branch name should always be even when split against dot. """ - node_type = "" + node_type: str = Field(serialization_alias="type") + name: str + internal_name: str = Field(exclude=True) + internal_branch_name: str = Field(default="", exclude=True) + is_composite: bool = Field(default=False, exclude=True) - class Config(BaseModel): - class Config: - extra = Extra.forbid - - executor_config: dict = {} - - def __init__(self, *args, **kwargs): - next_node = kwargs.get("next", "") - if next_node: - del kwargs["next"] - kwargs["next_node"] = next_node - - super().__init__(*args, **kwargs) - - def __init__(self, name, internal_name, config, internal_branch_name=None): - # pylint: disable=R0914,R0913 - self.name = name - self.internal_name = internal_name #  Dot notation naming of the steps - self.config = self.Config(**config) - self.internal_branch_name = internal_branch_name # parallel, map, dag only have internal names - self.is_composite = False - - def validate(self) -> List[str]: - """ - Return a list of validation errors. - """ - messages = [] - if "." in self.name: - messages.append("Node names cannot have . in them") - - if "%" in self.name: - messages.append("Node names cannot have '%' in them") + @property + def _context(self): + return context.run_context - return messages + model_config = ConfigDict(extra="forbid", arbitrary_types_allowed=False) - def _to_dict(self) -> dict: - """ - Return a dict representation of the node. - """ - config_dict = dict(self.config.dict()) - config_dict["type"] = self.node_type - return config_dict + @field_validator("name") + @classmethod + def validate_name(cls, name: str): + if "." in name or "%" in name: + raise ValueError("Node names cannot have . or '%' in them") + return name def _command_friendly_name(self, replace_with=defaults.COMMAND_FRIENDLY_CHARACTER) -> str: """ @@ -107,7 +76,7 @@ def _get_internal_name_from_command_name(cls, command_name: str) -> str: return command_name.replace(defaults.COMMAND_FRIENDLY_CHARACTER, " ") @classmethod - def _resolve_map_placeholders(cls, name: str, map_variable: dict = None) -> str: + def _resolve_map_placeholders(cls, name: str, map_variable: TypeMapVariable = None) -> str: """ If there is no map step used, then we just return the name as we find it. @@ -153,11 +122,11 @@ def _resolve_map_placeholders(cls, name: str, map_variable: dict = None) -> str: return name for _, value in map_variable.items(): - name = name.replace(defaults.MAP_PLACEHOLDER, value, 1) + name = name.replace(defaults.MAP_PLACEHOLDER, str(value), 1) return name - def _get_step_log_name(self, map_variable: dict = None) -> str: + def _get_step_log_name(self, map_variable: TypeMapVariable = None) -> str: """ For every step in the dag, there is a corresponding step log name. This method returns the step log name in dot path convention. @@ -174,7 +143,7 @@ def _get_step_log_name(self, map_variable: dict = None) -> str: """ return self._resolve_map_placeholders(self.internal_name, map_variable=map_variable) - def _get_branch_log_name(self, map_variable: dict = None) -> str: + def _get_branch_log_name(self, map_variable: TypeMapVariable = None) -> str: """ For nodes that are internally branches, this method returns the branch log name. The branch log name is in dot path convention. @@ -200,7 +169,8 @@ def __str__(self) -> str: # pragma: no cover """ return f"Node of type {self.node_type} and name {self.internal_name}" - def _get_on_failure_node(self) -> Optional[str]: + @abstractmethod + def _get_on_failure_node(self) -> str: """ If the node defines a on_failure node in the config, return this or None. @@ -210,49 +180,54 @@ def _get_on_failure_node(self) -> Optional[str]: str: The on_failure node defined by the dag or '' This is a base implementation which the BaseNode does not satisfy """ + ... + + @abstractmethod + def _get_next_node(self) -> str: + """ + Return the next node as defined by the config. + + Returns: + str: The node name, relative to the dag, as defined by the config + """ + ... + + @abstractmethod + def _is_terminal_node(self) -> bool: + """ + Returns whether a node has a next node - return self.config.on_failure + Returns: + bool: True or False of whether there is next node. + """ + ... - def _get_catalog_settings(self) -> Optional[dict]: + @abstractmethod + def _get_catalog_settings(self) -> Dict[str, Any]: """ If the node defines a catalog settings, return it or None Returns: dict: catalog settings defined as per the node or None - This is a base implementation which the BaseNode does not satisfy """ - return self.config.catalog + ... + @abstractmethod def _get_branch_by_name(self, branch_name: str): """ Retrieve a branch by name. The name is expected to follow a dot path convention. - This method will raise an exception if the node does not have any branches. - i.e: task, success, fail and as-is would raise an exception - Args: branch_name (str): [description] Raises: Exception: [description] """ - raise Exception(f"Node of type {self.node_type} does not have any branches") - - def _is_terminal_node(self) -> bool: - """ - Returns whether a node has a next node - - Returns: - bool: True or False of whether there is next node. - """ - if self.node_type in ["success", "fail"]: - return True - - return False + ... - def _get_neighbors(self) -> List: + def _get_neighbors(self) -> List[str]: """ Gets the connecting neighbor nodes, either the "next" node or "on_failure" node. @@ -260,29 +235,23 @@ def _get_neighbors(self) -> List: list: List of connected neighbors for a given node. Empty if terminal node. """ neighbors = [] - next_node = self._get_next_node() - if next_node: + try: + next_node = self._get_next_node() neighbors += [next_node] + except exceptions.TerminalNodeError: + pass - fail_node = self._get_on_failure_node() - if fail_node: - neighbors += [fail_node] + try: + fail_node = self._get_on_failure_node() + if fail_node: + neighbors += [fail_node] + except exceptions.TerminalNodeError: + pass return neighbors - def _get_next_node(self) -> Union[str, None]: - """ - Return the next node as defined by the config. - - Returns: - str: The node name, relative to the dag, as defined by the config - This is a base implementation which the BaseNode does not satisfy - """ - if not self._is_terminal_node(): - return self.config.next_node - return None - - def _get_executor_config(self, executor_type) -> dict: + @abstractmethod + def _get_executor_config(self, executor_type: str) -> str: """ Return the executor config of the node, if defined, or empty dict @@ -291,21 +260,21 @@ def _get_executor_config(self, executor_type) -> dict: Returns: dict: The executor config, if defined or an empty dict - This is a base implementation which the BaseNode does not satisfy """ - return self.config.executor_config.get(executor_type, {}) + ... + @abstractmethod def _get_max_attempts(self) -> int: """ The number of max attempts as defined by the config or 1. Returns: int: The number of maximum retries as defined by the config or 1. - This is a base implementation which the BaseNode does not satisfy """ - return self.config.retry + ... - def execute(self, executor, mock=False, map_variable: dict = None, **kwargs) -> StepAttempt: + @abstractmethod + def execute(self, mock=False, map_variable: TypeMapVariable = None, **kwargs) -> StepAttempt: """ The actual function that does the execution of the command in the config. @@ -321,9 +290,10 @@ def execute(self, executor, mock=False, map_variable: dict = None, **kwargs) -> Raises: NotImplementedError: Base class, hence not implemented. """ - raise NotImplementedError + ... - def execute_as_graph(self, executor, map_variable: dict = None, **kwargs): + @abstractmethod + def execute_as_graph(self, map_variable: TypeMapVariable = None, **kwargs): """ This function would be called to set up the execution of the individual branches of a composite node. @@ -336,9 +306,10 @@ def execute_as_graph(self, executor, map_variable: dict = None, **kwargs): Raises: NotImplementedError: Base class, hence not implemented. """ - raise NotImplementedError + ... - def fan_out(self, executor, map_variable: dict = None, **kwargs): + @abstractmethod + def fan_out(self, map_variable: TypeMapVariable = None, **kwargs): """ This function would be called to set up the execution of the individual branches of a composite node. @@ -352,10 +323,10 @@ def fan_out(self, executor, map_variable: dict = None, **kwargs): Raises: Exception: If the node is not a composite node. """ - if not self.is_composite: - raise Exception(f"Node of type {self.node_type} is not a composite node. This is a bug.") + ... - def fan_in(self, executor, map_variable: dict = None, **kwargs): + @abstractmethod + def fan_in(self, map_variable: TypeMapVariable = None, **kwargs): """ This function would be called to tear down the execution of the individual branches of a composite node. @@ -369,880 +340,150 @@ def fan_in(self, executor, map_variable: dict = None, **kwargs): Raises: Exception: If the node is not a composite node. """ - if not self.is_composite: - raise Exception(f"Node of type {self.node_type} is not a composite node. This is a bug.") - - -# --8<-- [end:docs] - - -class TaskNode(BaseNode): - """ - A node of type Task. - - This node does the actual function execution of the graph in all cases. - """ - - node_type = "task" - - class Config(BaseNode.Config, extra=Extra.allow): # type: ignore - next_node: str - catalog: dict = {} - retry: int = 1 - on_failure: str = "" - - @classmethod - def get_field_names(cls) -> List[str]: - field_names = [] - for k, _ in cls.__fields__.items(): - field_names.append(k) - - return field_names - - def __init__(self, name, internal_name, config, internal_branch_name=None): - super().__init__(name, internal_name, config, internal_branch_name) - - kwargs_for_command = { - "node_name": self.name, - } - - for key, value in self.config.dict().items(): - if key not in TaskNode.Config.get_field_names(): - # Ignore all the fields that are used by node itself - kwargs_for_command[key] = value - - self.executable = create_task(kwargs_for_command) - - def execute(self, executor, mock=False, map_variable: dict = None, **kwargs) -> StepAttempt: - """ - All that we do in magnus is to come to this point where we actually execute the command. + ... - Args: - executor (_type_): The executor class - mock (bool, optional): If we should just mock and not execute. Defaults to False. - map_variable (dict, optional): If the node is part of internal branch. Defaults to None. - - Returns: - StepAttempt: The attempt object - """ - # Here is where the juice is - attempt_log = executor.run_log_store.create_attempt_log() - try: - attempt_log.start_time = str(datetime.now()) - attempt_log.status = defaults.SUCCESS - if not mock: - # Do not run if we are mocking the execution, could be useful for caching and dry runs - self.executable.execute_command(map_variable=map_variable) - except Exception as _e: # pylint: disable=W0703 - logger.exception("Task failed") - attempt_log.status = defaults.FAIL - attempt_log.message = str(_e) - finally: - attempt_log.end_time = str(datetime.now()) - attempt_log.duration = utils.get_duration_between_datetime_strings( - attempt_log.start_time, attempt_log.end_time - ) - return attempt_log - - def execute_as_graph(self, executor, map_variable: dict = None, **kwargs): + @classmethod + @abstractmethod + def parse_from_config(cls, config: Dict[str, Any]) -> "BaseNode": """ - Should not be implemented for a single node. + Parse the config from the user and create the corresponding node. Args: - executor ([type]): [description] - - Raises: - Exception: Not a composite node, always raises an exception - """ - raise Exception("Node is not a composite node, invalid traversal rule") - - -class FailNode(BaseNode): - """ - A leaf node of the graph that represents a failure node - """ - - node_type = "fail" - - def _get_on_failure_node(self) -> Optional[str]: - """ - The on_failure node as defined by the config. - Which is nothing as failure nodes do not have an on_failure node. + config (Dict[str, Any]): The config of the node from the yaml or from the sdk. Returns: - Optional[str]: Returns an empty string. + BaseNode: The corresponding node. """ - return "" + ... - def _get_max_attempts(self) -> int: - """ - The number of max attempts as defined by the config or 1. - Returns: - int: The number of maximum retries as defined by the config or 1. - """ - return 1 - - def _get_catalog_settings(self) -> Optional[dict]: - """ - There are no catalog settings for failure nodes. - - Returns: - Optional[dict]: Any empty dict - """ - return {} - - def execute(self, executor, mock=False, map_variable: dict = None, **kwargs) -> StepAttempt: - """ - Execute the failure node. - Set the run or branch log status to failure. - - Args: - executor (_type_): the executor class - mock (bool, optional): If we should just mock and not do the actual execution. Defaults to False. - map_variable (dict, optional): If the node belongs to internal branches. Defaults to None. - - Returns: - StepAttempt: The step attempt object - """ - attempt_log = executor.run_log_store.create_attempt_log() - try: - attempt_log.start_time = str(datetime.now()) - attempt_log.status = defaults.SUCCESS - #  could be a branch or run log - run_or_branch_log = executor.run_log_store.get_branch_log( - self._get_branch_log_name(map_variable), executor.run_id - ) - run_or_branch_log.status = defaults.FAIL - executor.run_log_store.add_branch_log(run_or_branch_log, executor.run_id) - except BaseException: # pylint: disable=W0703 - logger.exception("Fail node execution failed") - finally: - attempt_log.status = defaults.SUCCESS # This is a dummy node, so we ignore errors and mark SUCCESS - attempt_log.end_time = str(datetime.now()) - attempt_log.duration = utils.get_duration_between_datetime_strings( - attempt_log.start_time, attempt_log.end_time - ) - return attempt_log - - def execute_as_graph(self, executor, map_variable: dict = None, **kwargs): - """ - Should not be implemented for a single node. - - Args: - executor ([type]): [description] +# --8<-- [end:docs] +class TraversalNode(BaseNode): + next_node: str = Field(serialization_alias="next") + on_failure: str = Field(default="") + overrides: Dict[str, str] = Field(default_factory=dict) - Raises: - Exception: Not a composite node, always raises an exception + def _get_on_failure_node(self) -> str: """ - raise Exception("Node is not a composite node, invalid traversal rule") - - -class SuccessNode(BaseNode): - """ - A leaf node of the graph that represents a success node - """ - - node_type = "success" + If the node defines a on_failure node in the config, return this or None. - def _get_on_failure_node(self) -> Optional[str]: - """ - The on_failure node as defined by the config. - Which is nothing as success nodes do not have an on_failure node. + The naming is relative to the dag, the caller is supposed to resolve it to the correct graph Returns: - Optional[str]: Returns an empty string. + str: The on_failure node defined by the dag or '' + This is a base implementation which the BaseNode does not satisfy """ - return "" + return self.on_failure - def _get_max_attempts(self) -> int: + def _get_next_node(self) -> str: """ - The number of max attempts as defined by the config or 1. + Return the next node as defined by the config. Returns: - int: The number of maximum retries as defined by the config or 1. - """ - return 1 - - def _get_catalog_settings(self) -> Optional[dict]: + str: The node name, relative to the dag, as defined by the config """ - There are no catalog settings for success nodes. - Returns an empty dict. - """ - return {} + return self.next_node - def execute(self, executor, mock=False, map_variable: dict = None, **kwargs) -> StepAttempt: + def _is_terminal_node(self) -> bool: """ - Execute the success node. - Set the run or branch log status to success. - - Args: - executor (_type_): The executor class - mock (bool, optional): If we should just mock and not perform anything. Defaults to False. - map_variable (dict, optional): If the node belongs to an internal branch. Defaults to None. + Returns whether a node has a next node Returns: - StepAttempt: The step attempt object - """ - attempt_log = executor.run_log_store.create_attempt_log() - try: - attempt_log.start_time = str(datetime.now()) - attempt_log.status = defaults.SUCCESS - #  could be a branch or run log - run_or_branch_log = executor.run_log_store.get_branch_log( - self._get_branch_log_name(map_variable), executor.run_id - ) - run_or_branch_log.status = defaults.SUCCESS - executor.run_log_store.add_branch_log(run_or_branch_log, executor.run_id) - except BaseException: # pylint: disable=W0703 - logger.exception("Success node execution failed") - finally: - attempt_log.status = defaults.SUCCESS # This is a dummy node and we make sure we mark it as success - attempt_log.end_time = str(datetime.now()) - attempt_log.duration = utils.get_duration_between_datetime_strings( - attempt_log.start_time, attempt_log.end_time - ) - return attempt_log - - def execute_as_graph(self, executor, map_variable: dict = None, **kwargs): - """ - Should not be implemented for a single node. - - Args: - executor ([type]): [description] - - Raises: - Exception: Not a composite node, always raises an exception + bool: True or False of whether there is next node. """ - raise Exception("Node is not a composite node, invalid traversal rule") - + return False -class ParallelNode(BaseNode): - """ - A composite node containing many graph objects within itself. + def _get_executor_config(self, executor_type) -> str: + return self.overrides.get(executor_type) or "" - The structure is generally: - ParallelNode: - Branch A: - Sub graph definition - Branch B: - Sub graph definition - . . . - """ +class CatalogStructure(BaseModel): + model_config = ConfigDict(extra="forbid") # Need to forbid - node_type = "parallel" + get: List[str] = Field(default_factory=list) + put: List[str] = Field(default_factory=list) - class Config(BaseNode.Config): - next_node: str - branches: dict - on_failure: str = "" - def __init__(self, name, internal_name, config, internal_branch_name=None): - # pylint: disable=R0914,R0913 - super().__init__(name, internal_name, config, internal_branch_name=internal_branch_name) - self.branches = self.get_sub_graphs() - self.is_composite = True +class ExecutableNode(TraversalNode): + catalog: Optional[CatalogStructure] = Field(default=None) + max_attempts: int = Field(default=1, ge=1) - def get_sub_graphs(self): + def _get_catalog_settings(self) -> Dict[str, Any]: """ - For the branches mentioned in the config['branches'], create a graph object. - The graph object is also given an internal naming convention following a dot path convention + If the node defines a catalog settings, return it or None Returns: - dict: A branch_name: dag for every branch mentioned in the branches + dict: catalog settings defined as per the node or None """ + if self.catalog: + return self.catalog.model_dump() + return {} - branches = {} - for branch_name, branch_config in self.config.branches.items(): - sub_graph = create_graph( - deepcopy(branch_config), - internal_branch_name=self.internal_name + "." + branch_name, - ) - branches[self.internal_name + "." + branch_name] = sub_graph - - if not branches: - raise Exception("A parallel node should have branches") - return branches + def _get_max_attempts(self) -> int: + return self.max_attempts def _get_branch_by_name(self, branch_name: str): - """ - Retrieve a branch by name. - The name is expected to follow a dot path convention. - - Returns a Graph Object - - Args: - branch_name (str): The name of the branch to retrieve - - Raises: - Exception: If the branch by that name does not exist - """ - if branch_name in self.branches: - return self.branches[branch_name] - - raise Exception(f"No branch by name: {branch_name} is present in {self.name}") - - def execute(self, executor, mock=False, map_variable: dict = None, **kwargs): - """ - This method should never be called for a node of type Parallel - - Args: - executor (BaseExecutor): The Executor class as defined by the config - mock (bool, optional): If the operation is just a mock. Defaults to False. - - Raises: - NotImplementedError: This method should never be called for a node of type Parallel - """ - raise Exception("Node is of type composite, error in traversal rules") - - def fan_out(self, executor, map_variable: dict = None, **kwargs): - """ - The general fan out method for a node of type Parallel. - This method assumes that the step log has already been created. - - 3rd party orchestrators should create the step log and use this method to create the branch logs. - - Args: - executor (BaseExecutor): The executor class as defined by the config - map_variable (dict, optional): If the node is part of a map node. Defaults to None. - """ - # Prepare the branch logs - for internal_branch_name, _ in self.branches.items(): - effective_branch_name = self._resolve_map_placeholders(internal_branch_name, map_variable=map_variable) + raise Exception("This is an executable node and does not have branches") - branch_log = executor.run_log_store.create_branch_log(effective_branch_name) - branch_log.status = defaults.PROCESSING - executor.run_log_store.add_branch_log(branch_log, executor.run_id) + def execute_as_graph(self, map_variable: TypeMapVariable = None, **kwargs): + raise Exception("This is an executable node and does not have a graph") - def execute_as_graph(self, executor, map_variable: dict = None, **kwargs): - """ - This function does the actual execution of the sub-branches of the parallel node. - - From a design perspective, this function should not be called if the execution is 3rd party orchestrated. - - The modes that render the job specifications, do not need to interact with this node at all as they have their - own internal mechanisms of handing parallel states. - If they do not, you can find a way using as-is nodes as hack nodes. - - The execution of a dag, could result in - * The dag being completely executed with a definite (fail, success) state in case of - local or local-container execution - * The dag being in a processing state with PROCESSING status in case of local-aws-batch - - Only fail state is considered failure during this phase of execution. - - Args: - executor (Executor): The Executor as per the use config - **kwargs: Optional kwargs passed around - """ - self.fan_out(executor, map_variable=map_variable, **kwargs) - - jobs = [] - # Given that we can have nesting and complex graphs, controlling the number of processes is hard. - # A better way is to actually submit the job to some process scheduler which does resource management - for internal_branch_name, branch in self.branches.items(): - if executor._is_parallel_execution(): - # Trigger parallel jobs - action = magnus.pipeline.execute_single_brach - kwargs = { - "configuration_file": executor.configuration_file, - "pipeline_file": executor.pipeline_file, - "branch_name": internal_branch_name.replace(" ", defaults.COMMAND_FRIENDLY_CHARACTER), - "run_id": executor.run_id, - "map_variable": json.dumps(map_variable), - "tag": executor.tag, - } - process = multiprocessing.Process(target=action, kwargs=kwargs) - jobs.append(process) - process.start() - - else: - # If parallel is not enabled, execute them sequentially - executor.execute_graph(branch, map_variable=map_variable, **kwargs) - - for job in jobs: - job.join() # Find status of the branches - - self.fan_in(executor, map_variable=map_variable, **kwargs) - - def fan_in(self, executor, map_variable: dict = None, **kwargs): - """ - The general fan in method for a node of type Parallel. - - 3rd party orchestrators should use this method to find the status of the composite step. - - Args: - executor (BaseExecutor): The executor class as defined by the config - map_variable (dict, optional): If the node is part of a map. Defaults to None. - """ - step_success_bool = True - for internal_branch_name, _ in self.branches.items(): - effective_branch_name = self._resolve_map_placeholders(internal_branch_name, map_variable=map_variable) - branch_log = executor.run_log_store.get_branch_log(effective_branch_name, executor.run_id) - if branch_log.status != defaults.SUCCESS: - step_success_bool = False - - # Collate all the results and update the status of the step - effective_internal_name = self._resolve_map_placeholders(self.internal_name, map_variable=map_variable) - step_log = executor.run_log_store.get_step_log(effective_internal_name, executor.run_id) - - if step_success_bool: #  If none failed - step_log.status = defaults.SUCCESS - else: - step_log.status = defaults.FAIL - - executor.run_log_store.add_step_log(step_log, executor.run_id) - - -class MapNode(BaseNode): - """ - A composite node that contains ONE graph object within itself that has to be executed with an iterable. - - The structure is generally: - MapNode: - branch - - The config is expected to have a variable 'iterate_on' and iterate_as which are looked for in the parameters. - for iter_variable in parameters['iterate_on']: - Execute the Branch by sending {'iterate_as': iter_variable} - - The internal naming convention creates branches dynamically based on the iteration value - """ - - node_type = "map" - - class Config(BaseNode.Config): - next_node: str - branch: dict - iterate_on: str - iterate_as: str - on_failure: str = "" - - def __init__(self, name, internal_name, config, internal_branch_name=None): - # pylint: disable=R0914,R0913 - super().__init__(name, internal_name, config, internal_branch_name=internal_branch_name) - self.is_composite = True - self.branch_placeholder_name = defaults.MAP_PLACEHOLDER - self.branch = self.get_sub_graph() - - @property - def iterate_as(self) -> str: - """ - The name to give the variable in the iteration. - - For example: for i in range(10): - Here "i" is the iterate_as - - Returns: - str: The name to give the variable in the iteration - """ - return self.config.iterate_as + def fan_in(self, map_variable: TypeMapVariable = None, **kwargs): + raise Exception("This is an executable node and does not have a fan in") - @property - def iterate_on(self) -> str: - """ - The parameter to be iterated on. + def fan_out(self, map_variable: TypeMapVariable = None, **kwargs): + raise Exception("This is an executable node and does not have a fan out") - Returns: - str: The name of the parameter to be iterated on - """ - return self.config.iterate_on - def get_sub_graph(self): +class CompositeNode(TraversalNode): + def _get_catalog_settings(self) -> Dict[str, Any]: """ - Create a sub-dag from the config['branch'] - - The graph object has an internal branch name, that is equal to the name of the step. - And the sub-dag nodes follow an dot path naming convention + If the node defines a catalog settings, return it or None Returns: - Graph: A graph object - """ - - branch_config = self.config.branch - branch = create_graph( - deepcopy(branch_config), - internal_branch_name=self.internal_name + "." + self.branch_placeholder_name, - ) - return branch - - def _get_branch_by_name(self, branch_name: str): - """ - Retrieve a branch by name. - - In the case of a Map Object, the branch naming is dynamic as it is parameterized on iterable. - This method takes no responsibility in checking the validity of the naming. - - Returns a Graph Object - - Args: - branch_name (str): The name of the branch to retrieve - - Raises: - Exception: If the branch by that name does not exist - """ - return self.branch - - def execute(self, executor, mock=False, map_variable: dict = None, **kwargs): - """ - This method should never be called for a node of type map - - Args: - executor (BaseExecutor): The Executor class as defined by the config - mock (bool, optional): If the operation is just a mock. Defaults to False. - - Raises: - NotImplementedError: This method should never be called for a node of type map. - """ - raise Exception("Node is of type composite, error in traversal rules") - - def fan_out(self, executor, map_variable: dict = None, **kwargs): - """ - The general method to fan out for a node of type map. - This method assumes that the step log has already been created. - - 3rd party orchestrators should call this method to create the individual branch logs. - - Args: - executor (BaseExecutor): The executor class as defined by the config - map_variable (dict, optional): If the node is part of map. Defaults to None. - """ - iterate_on = executor.run_log_store.get_parameters(executor.run_id)[self.iterate_on] - - # Prepare the branch logs - for iter_variable in iterate_on: - effective_branch_name = self._resolve_map_placeholders( - self.internal_name + "." + str(iter_variable), map_variable=map_variable - ) - branch_log = executor.run_log_store.create_branch_log(effective_branch_name) - branch_log.status = defaults.PROCESSING - executor.run_log_store.add_branch_log(branch_log, executor.run_id) - - def execute_as_graph(self, executor, map_variable: dict = None, **kwargs): - """ - This function does the actual execution of the branch of the map node. - - From a design perspective, this function should not be called if the execution is 3rd party orchestrated. - - The modes that render the job specifications, do not need to interact with this node at all as - they have their own internal mechanisms of handing map states or dynamic parallel states. - If they do not, you can find a way using as-is nodes as hack nodes. - - The actual logic is : - * We iterate over the iterable as mentioned in the config - * For every value in the iterable we call the executor.execute_graph(branch, iterate_as: iter_variable) - - The execution of a dag, could result in - * The dag being completely executed with a definite (fail, success) state in case of local - or local-container execution - * The dag being in a processing state with PROCESSING status in case of local-aws-batch - - Only fail state is considered failure during this phase of execution. - - Args: - executor (Executor): The Executor as per the use config - map_variable (dict): The map variables the graph belongs to - **kwargs: Optional kwargs passed around - """ - iterate_on = None - try: - iterate_on = executor.run_log_store.get_parameters(executor.run_id)[self.iterate_on] - except KeyError: - raise Exception( - f"Expected parameter {self.iterate_on} not present in Run Log parameters, was it ever set before?" - ) - - if not isinstance(iterate_on, list): - raise Exception("Only list is allowed as a valid iterator type") - - self.fan_out(executor, map_variable=map_variable, **kwargs) - - jobs = [] - # Given that we can have nesting and complex graphs, controlling the number of processess is hard. - # A better way is to actually submit the job to some process scheduler which does resource management - for iter_variable in iterate_on: - effective_map_variable = map_variable or OrderedDict() - effective_map_variable[self.iterate_as] = iter_variable - - if executor._is_parallel_execution(): - # Trigger parallel jobs - action = magnus.pipeline.execute_single_brach - kwargs = { - "configuration_file": executor.configuration_file, - "pipeline_file": executor.pipeline_file, - "branch_name": self.branch.internal_branch_name.replace(" ", defaults.COMMAND_FRIENDLY_CHARACTER), - "run_id": executor.run_id, - "map_variable": json.dumps(effective_map_variable), - "tag": executor.tag, - } - process = multiprocessing.Process(target=action, kwargs=kwargs) - jobs.append(process) - process.start() - - else: - # If parallel is not enabled, execute them sequentially - executor.execute_graph(self.branch, map_variable=effective_map_variable, **kwargs) - - for job in jobs: - job.join() - - self.fan_in(executor, map_variable=map_variable, **kwargs) - - def fan_in(self, executor, map_variable: dict = None, **kwargs): - """ - The general method to fan in for a node of type map. - - 3rd party orchestrators should call this method to find the status of the step log. - - Args: - executor (BaseExecutor): The executor class as defined by the config - map_variable (dict, optional): If the node is part of map node. Defaults to None. + dict: catalog settings defined as per the node or None """ - iterate_on = executor.run_log_store.get_parameters(executor.run_id)[self.iterate_on] - # # Find status of the branches - step_success_bool = True - - for iter_variable in iterate_on: - effective_branch_name = self._resolve_map_placeholders( - self.internal_name + "." + str(iter_variable), map_variable=map_variable - ) - branch_log = executor.run_log_store.get_branch_log(effective_branch_name, executor.run_id) - if branch_log.status != defaults.SUCCESS: - step_success_bool = False - - # Collate all the results and update the status of the step - effective_internal_name = self._resolve_map_placeholders(self.internal_name, map_variable=map_variable) - step_log = executor.run_log_store.get_step_log(effective_internal_name, executor.run_id) - - if step_success_bool: #  If none failed and nothing is waiting - step_log.status = defaults.SUCCESS - else: - step_log.status = defaults.FAIL - - executor.run_log_store.add_step_log(step_log, executor.run_id) - - -class DagNode(BaseNode): - """ - A composite node that internally holds a dag. - - The structure is genrally: - DagNode: - dag_definition: A YAML file that holds the dag in 'dag' block + raise Exception("This is a composite node and does not have a catalog settings") - The config is expected to have a variable 'dag_definition'. - """ - - node_type = "dag" - - class Config(BaseNode.Config): - next_node: str - dag_definition: str - on_failure: str = "" - - def __init__(self, name, internal_name, config, internal_branch_name=None): - # pylint: disable=R0914,R0913 - super().__init__(name, internal_name, config, internal_branch_name=internal_branch_name) - self.sub_dag_file = self.config.dag_definition - self.is_composite = True - self.branch = self.get_sub_graph() - - @property - def _internal_branch_name(self): - """ - THe internal branch name in dot path convention + def _get_max_attempts(self) -> int: + raise Exception("This is a composite node and does not have a max_attempts") - Returns: - [type]: [description] - """ - return self.internal_name + "." + defaults.DAG_BRANCH_NAME + def execute(self, mock=False, map_variable: TypeMapVariable = None, **kwargs) -> StepAttempt: + raise Exception("This is a composite node and does not have an execute function") - def get_sub_graph(self): - """ - Create a sub-dag from the config['dag_definition'] - The graph object has an internal branch name, that is equal to the name of the step. - And the sub-dag nodes follow an dot path naming convention +class TerminalNode(BaseNode): + def _get_on_failure_node(self) -> str: + raise exceptions.TerminalNodeError() - Returns: - Graph: A graph object - """ + def _get_next_node(self) -> str: + raise exceptions.TerminalNodeError() - dag_config = utils.load_yaml(self.sub_dag_file) - if "dag" not in dag_config: - raise Exception(f"No DAG found in {self.sub_dag_file}, please provide it in dag block") + def _is_terminal_node(self) -> bool: + return True - branch = create_graph(dag_config["dag"], internal_branch_name=self._internal_branch_name) - return branch + def _get_catalog_settings(self) -> Dict[str, Any]: + raise exceptions.TerminalNodeError() def _get_branch_by_name(self, branch_name: str): - """ - Retrieve a branch by name. - The name is expected to follow a dot path convention. - - Returns a Graph Object - - Args: - branch_name (str): The name of the branch to retrieve - - Raises: - Exception: If the branch_name is not 'dag' - """ - if branch_name != self._internal_branch_name: - raise Exception(f"Node of type {self.node_type} only allows a branch of name {defaults.DAG_BRANCH_NAME}") - - return self.branch - - def execute(self, executor, mock=False, map_variable: dict = None, **kwargs): - """ - This method should never be called for a node of type dag - - Args: - executor (BaseExecutor): The Executor class as defined by the config - mock (bool, optional): If the operation is just a mock. Defaults to False. - - Raises: - NotImplementedError: This method should never be called for a node of type Parallel - """ - raise Exception("Node is of type composite, error in traversal rules") - - def fan_out(self, executor, map_variable: dict = None, **kwargs): - """ - The general method to fan out for a node of type dag. - The method assumes that the step log has already been created. + raise exceptions.TerminalNodeError() - Args: - executor (BaseExecutor): The executor class as defined by the config - map_variable (dict, optional): _description_. Defaults to None. - """ - effective_branch_name = self._resolve_map_placeholders(self._internal_branch_name, map_variable=map_variable) - - branch_log = executor.run_log_store.create_branch_log(effective_branch_name) - branch_log.status = defaults.PROCESSING - executor.run_log_store.add_branch_log(branch_log, executor.run_id) - - def execute_as_graph(self, executor, map_variable: dict = None, **kwargs): - """ - This function does the actual execution of the branch of the dag node. - - From a design perspective, this function should not be called if the execution is 3rd party orchestrated. + def _get_executor_config(self, executor_type) -> str: + raise exceptions.TerminalNodeError() - The modes that render the job specifications, do not need to interact with this node at all - as they have their own internal mechanisms of handling sub dags. - If they do not, you can find a way using as-is nodes as hack nodes. - - The actual logic is : - * We just execute the branch as with any other composite nodes - * The branch name is called 'dag' - - The execution of a dag, could result in - * The dag being completely executed with a definite (fail, success) state in case of - local or local-container execution - * The dag being in a processing state with PROCESSING status in case of local-aws-batch - - Only fail state is considered failure during this phase of execution. - - Args: - executor (Executor): The Executor as per the use config - **kwargs: Optional kwargs passed around - """ - self.fan_out(executor, map_variable=map_variable, **kwargs) - executor.execute_graph(self.branch, map_variable=map_variable, **kwargs) - self.fan_in(executor, map_variable=map_variable, **kwargs) - - def fan_in(self, executor, map_variable: dict = None, **kwargs): - """ - The general method to fan in for a node of type dag. - - 3rd party orchestrators should call this method to find the status of the step log. - - Args: - executor (BaseExecutor): The executor class as defined by the config - map_variable (dict, optional): If the node is part of type dag. Defaults to None. - """ - step_success_bool = True - effective_branch_name = self._resolve_map_placeholders(self._internal_branch_name, map_variable=map_variable) - effective_internal_name = self._resolve_map_placeholders(self.internal_name, map_variable=map_variable) - - branch_log = executor.run_log_store.get_branch_log(effective_branch_name, executor.run_id) - if branch_log.status != defaults.SUCCESS: - step_success_bool = False - - step_log = executor.run_log_store.get_step_log(effective_internal_name, executor.run_id) - step_log.status = defaults.PROCESSING - - if step_success_bool: #  If none failed and nothing is waiting - step_log.status = defaults.SUCCESS - else: - step_log.status = defaults.FAIL - - executor.run_log_store.add_step_log(step_log, executor.run_id) - - -class AsISNode(BaseNode): - """ - AsIs is a convenience design node. - - It always returns success in the attempt log and does nothing during interactive compute. - - The command given to execute is ignored but it does do the syncing of the catalog. - This node is very akin to pass state in Step functions. - - This node type could be handy when designing the pipeline and stubbing functions - - But in render mode for job specification of a 3rd party orchestrator, this node comes handy. - """ - - node_type = "as-is" - - class Config(BaseNode.Config, extra=Extra.allow): # type: ignore - next_node: str - on_failure: str = "" - retry: int = 1 - - def _get_catalog_settings(self) -> Optional[dict]: - """ - Get the catalog settings from the config. - - As it is as-is node, we do not need to sync the catalog. - - Returns: - dict: The catalog settings - """ - return {} - - def execute(self, executor, mock=False, map_variable: dict = None, **kwargs) -> StepAttempt: - """ - Do Nothing node. - We just send an success attempt log back to the caller - - Args: - executor ([type]): [description] - mock (bool, optional): [description]. Defaults to False. - map_variable (str, optional): [description]. Defaults to ''. - - Returns: - [type]: [description] - """ - attempt_log = executor.run_log_store.create_attempt_log() - - attempt_log.start_time = str(datetime.now()) - attempt_log.status = defaults.SUCCESS # This is a dummy node and always will be success + def _get_max_attempts(self) -> int: + return 1 - attempt_log.end_time = str(datetime.now()) - attempt_log.duration = utils.get_duration_between_datetime_strings(attempt_log.start_time, attempt_log.end_time) - return attempt_log + def execute_as_graph(self, map_variable: TypeMapVariable = None, **kwargs): + raise exceptions.TerminalNodeError() - def execute_as_graph(self, executor, map_variable: dict = None, **kwargs): - """ - Should not be implemented for a single node. + def fan_in(self, map_variable: TypeMapVariable = None, **kwargs): + raise exceptions.TerminalNodeError() - Args: - executor ([type]): [description] + def fan_out(self, map_variable: TypeMapVariable = None, **kwargs): + raise exceptions.TerminalNodeError() - Raises: - Exception: Not a composite node, always raises an exception - """ - raise Exception("Node is not a composite node, invalid traversal rule") + @classmethod + def parse_from_config(cls, config: Dict[str, Any]) -> "TerminalNode": + return cls(**config) diff --git a/magnus/parameters.py b/magnus/parameters.py new file mode 100644 index 00000000..59953dca --- /dev/null +++ b/magnus/parameters.py @@ -0,0 +1,183 @@ +import inspect +import json +import logging +import os +from typing import Any, Dict, Optional, Type, Union + +from pydantic import BaseModel, ConfigDict +from typing_extensions import Callable + +from magnus import defaults +from magnus.defaults import TypeMapVariable +from magnus.utils import remove_prefix + +logger = logging.getLogger(defaults.LOGGER_NAME) + + +def get_user_set_parameters(remove: bool = False) -> Dict[str, Any]: + """ + Scans the environment variables for any user returned parameters that have a prefix MAGNUS_PRM_. + + This function does not deal with any type conversion of the parameters. + It just deserializes the parameters and returns them as a dictionary. + + Args: + remove (bool, optional): Flag to remove the parameter if needed. Defaults to False. + + Returns: + dict: The dictionary of found user returned parameters + """ + parameters = {} + for env_var, value in os.environ.items(): + if env_var.startswith(defaults.PARAMETER_PREFIX): + key = remove_prefix(env_var, defaults.PARAMETER_PREFIX) + try: + parameters[key.lower()] = json.loads(value) + except json.decoder.JSONDecodeError: + logger.error(f"Parameter {key} could not be JSON decoded, adding the literal value") + parameters[key.lower()] = value + + if remove: + del os.environ[env_var] + return parameters + + +def set_user_defined_params_as_environment_variables(params: Dict[str, Any]): + """ + Sets the user set parameters as environment variables. + + At this point in time, the params are already in Dict or some kind of literal + + Args: + parameters (Dict[str, Any]): The parameters to set as environment variables + update (bool, optional): Flag to update the environment variables. Defaults to True. + + """ + for key, value in params.items(): + logger.info(f"Storing parameter {key} with value: {value}") + environ_key = defaults.PARAMETER_PREFIX + key + + os.environ[environ_key] = serialize_parameter_as_str(value) + + +def cast_parameters_as_type(value: Any, newT: Optional[Type] = None) -> Union[Any, BaseModel, Dict[str, Any]]: + """ + Casts the environment variable to the given type. + + Note: Only pydantic models special, everything else just goes through. + + Args: + value (Any): The value to cast + newT (T): The type to cast to + + Returns: + T: The casted value + + Examples: + >>> class MyBaseModel(BaseModel): + ... a: int + ... b: str + >>> + >>> class MyDict(dict): + ... pass + >>> + >>> cast_parameters_as_type({"a": 1, "b": "2"}, MyBaseModel) + MyBaseModel(a=1, b="2") + >>> cast_parameters_as_type({"a": 1, "b": "2"}, MyDict) + MyDict({'a': 1, 'b': '2'}) + >>> cast_parameters_as_type(MyBaseModel(a=1, b="2"), MyBaseModel) + MyBaseModel(a=1, b="2") + >>> cast_parameters_as_type(MyDict({"a": 1, "b": "2"}), MyBaseModel) + MyBaseModel(a=1, b="2") + >>> cast_parameters_as_type({"a": 1, "b": "2"}, MyDict[str, int]) + MyDict({'a': 1, 'b': '2'}) + >>> cast_parameters_as_type({"a": 1, "b": "2"}, Dict[str, int]) + MyDict({'a': 1, 'b': '2'}) + >>> with pytest.warns(UserWarning): + ... cast_parameters_as_type(1, MyBaseModel) + MyBaseModel(a=1, b=None) + >>> with pytest.raises(TypeError): + ... cast_parameters_as_type(1, MyDict) + """ + if not newT: + return value + + if issubclass(newT, BaseModel): + return newT(**value) + + if issubclass(newT, Dict): + return dict(value) + + if type(value) != newT: + logger.warning(f"Casting {value} of {type(value)} to {newT} seems wrong!!") + + return newT(value) + + +def serialize_parameter_as_str(value: Any) -> str: + if isinstance(value, BaseModel): + return json.dumps(value.model_dump()) + + return json.dumps(value) + + +def filter_arguments_for_func( + func: Callable[..., Any], params: Dict[str, Any], map_variable: TypeMapVariable = None +) -> Dict[str, Any]: + """ + Inspects the function to be called as part of the pipeline to find the arguments of the function. + Matches the function arguments to the parameters available either by command line or by up stream steps. + + + Args: + func (Callable): The function to inspect + parameters (dict): The parameters available for the run + + Returns: + dict: The parameters matching the function signature + """ + function_args = inspect.signature(func).parameters + + # Update parameters with the map variables + params.update(map_variable or {}) + + unassigned_params = set(params.keys()) + bound_args = {} + for name, value in function_args.items(): + if name not in params: + # No parameter of this name was provided + if value.default == inspect.Parameter.empty: + # No default value is given in the function signature. error as parameter is required. + raise ValueError(f"Parameter {name} is required for {func.__name__} but not provided") + # default value is given in the function signature, nothing further to do. + continue + + if issubclass(value.annotation, BaseModel): + # We try to cast it as a pydantic model. + named_param = params[name] + + if not isinstance(named_param, dict): + # A case where the parameter is a one attribute model + named_param = {name: named_param} + + bound_model = bind_args_for_pydantic_model(named_param, value.annotation) + bound_args[name] = bound_model + unassigned_params = unassigned_params.difference(bound_model.model_fields.keys()) + else: + # simple python data type. + bound_args[name] = cast_parameters_as_type(params[name], value.annotation) # type: ignore + + unassigned_params.remove(name) + + params = {key: params[key] for key in unassigned_params} # remove keys from params if they are assigned + + return bound_args + + +def bind_args_for_pydantic_model(params: Dict[str, Any], model: Type[BaseModel]) -> BaseModel: + class EasyModel(model): # type: ignore + model_config = ConfigDict(extra="ignore") + + swallow_all = EasyModel(**params) + bound_model = model(**swallow_all.model_dump()) + return bound_model diff --git a/magnus/pickler.py b/magnus/pickler.py index 03f9b805..66166031 100644 --- a/magnus/pickler.py +++ b/magnus/pickler.py @@ -1,8 +1,13 @@ import pickle +from abc import ABC, abstractmethod from typing import Any +from pydantic import BaseModel, ConfigDict -class BasePickler: +import magnus.context as context + + +class BasePickler(ABC, BaseModel): """ The base class for all picklers. @@ -10,9 +15,16 @@ class BasePickler: For now, we are just going to use pickle. """ - extension = "" - service_name = "" + extension: str = "" + service_name: str = "" + service_type: str = "pickler" + model_config = ConfigDict(extra="forbid") + + @property + def _context(self): + return context.run_context + @abstractmethod def dump(self, data: Any, path: str): """ Dump an object to the specified path. @@ -30,6 +42,7 @@ def dump(self, data: Any, path: str): """ raise NotImplementedError + @abstractmethod def load(self, path: str) -> Any: """ Load the object from the specified path. @@ -51,8 +64,8 @@ class NativePickler(BasePickler): Uses native python pickle to load and dump files """ - extension = ".pickle" - service_name = "pickle" + extension: str = ".pickle" + service_name: str = "pickle" def dump(self, data: Any, path: str): """ diff --git a/magnus/pipeline.py b/magnus/pipeline.py deleted file mode 100644 index b801b072..00000000 --- a/magnus/pipeline.py +++ /dev/null @@ -1,556 +0,0 @@ -import json -import logging -from typing import Union - -from magnus import context, defaults, exceptions, graph, utils - -logger = logging.getLogger(defaults.NAME) - - -def get_default_configs() -> dict: - """ - User can provide extensions as part of their code base, magnus-config.yaml provides the place to put them. - """ - user_configs = {} - if utils.does_file_exist(defaults.USER_CONFIG_FILE): - user_configs = utils.load_yaml(defaults.USER_CONFIG_FILE) - - if not user_configs: - return {} - - user_defaults = user_configs.get("defaults", {}) - if user_defaults: - return user_defaults - - return {} - - -def prepare_configurations( - configuration_file: str = None, - pipeline_file: str = None, - run_id: str = None, - tag: Union[str, None] = None, - use_cached: Union[str, None] = "", - parameters_file: str = None, - force_local_executor: bool = False, -): - # pylint: disable=R0914 - """ - Replace the placeholders in the dag/config against the variables file. - - Attach the secrets_handler, run_log_store, catalog_handler to the executor and return it. - - Args: - variables_file (str): The variables file, if used or None - pipeline_file (str): The config/dag file - run_id (str): The run id of the run. - tag (str): If a tag is provided at the run time - use_cached (str): Provide the run_id of the older run - - Returns: - executor.BaseExecutor : A prepared executor as per the dag/config - """ - magnus_defaults = get_default_configs() - - variables = utils.gather_variables() - - configuration = {} - if configuration_file: - configuration = utils.load_yaml(configuration_file) or {} - - # apply variables - configuration = utils.apply_variables(configuration, variables) - - # Run log settings, configuration over-rides everything - run_log_config = configuration.get("run_log_store", {}) - if not run_log_config: - run_log_config = magnus_defaults.get("run_log_store", defaults.DEFAULT_RUN_LOG_STORE) - run_log_store = utils.get_provider_by_name_and_type("run_log_store", run_log_config) - - # Catalog handler settings, configuration over-rides everything - catalog_config = configuration.get("catalog", {}) - if not catalog_config: - catalog_config = magnus_defaults.get("catalog", defaults.DEFAULT_CATALOG) - catalog_handler = utils.get_provider_by_name_and_type("catalog", catalog_config) - - # Secret handler settings, configuration over-rides everything - secrets_config = configuration.get("secrets", {}) - if not secrets_config: - secrets_config = magnus_defaults.get("secrets", defaults.DEFAULT_SECRETS) - secrets_handler = utils.get_provider_by_name_and_type("secrets", secrets_config) - - # experiment tracker settings, configuration over-rides everything - tracker_config = configuration.get("experiment_tracker", {}) - if not tracker_config: - tracker_config = magnus_defaults.get("experiment_tracker", defaults.DEFAULT_EXPERIMENT_TRACKER) - tracker_handler = utils.get_provider_by_name_and_type("experiment_tracker", tracker_config) - - # executor configurations, configuration over rides everything - executor_config = configuration.get("executor", {}) - if force_local_executor: - executor_config = {"type": "local"} - - if not executor_config: - executor_config = magnus_defaults.get("executor", defaults.DEFAULT_EXECUTOR) - configured_executor = utils.get_provider_by_name_and_type("executor", executor_config) - - if pipeline_file: - # There are use cases where we are only preparing the executor - pipeline_config = utils.load_yaml(pipeline_file) - pipeline_config = utils.apply_variables(pipeline_config, variables=variables) - - logger.info("The input pipeline:") - logger.info(json.dumps(pipeline_config, indent=4)) - - # Create the graph - dag_config = pipeline_config["dag"] - dag_hash = utils.get_dag_hash(dag_config) - # TODO: Dag nodes should not self refer themselves - dag = graph.create_graph(dag_config) - - configured_executor.pipeline_file = pipeline_file - configured_executor.dag = dag - configured_executor.dag_hash = dag_hash - - configured_executor.run_id = run_id - configured_executor.tag = tag - configured_executor.use_cached = use_cached - - # Set a global executor for inter-module access later - context.executor = configured_executor - - configured_executor.run_log_store = run_log_store - configured_executor.catalog_handler = catalog_handler - configured_executor.secrets_handler = secrets_handler - configured_executor.experiment_tracker = tracker_handler - configured_executor.configuration_file = configuration_file - configured_executor.parameters_file = parameters_file - configured_executor.variables = variables - - return configured_executor - - -def execute( - configuration_file: str, - pipeline_file: str, - tag: str = None, - run_id: str = None, - use_cached: str = None, - parameters_file: str = None, -): - # pylint: disable=R0914,R0913 - """ - The entry point to magnus execution. This method would prepare the configurations and delegates traversal to the - executor - - Args: - pipeline_file (str): The config/dag file - run_id (str): The run id of the run. - tag (str): If a tag is provided at the run time - use_cached (str): The previous run_id to use. - parameters_file (str): The parameters being sent in to the application - """ - # Re run settings - run_id = utils.generate_run_id(run_id=run_id) - - mode_executor = prepare_configurations( - configuration_file=configuration_file, - pipeline_file=pipeline_file, - run_id=run_id, - tag=tag, - use_cached=use_cached, - parameters_file=parameters_file, - ) - mode_executor.execution_plan = defaults.EXECUTION_PLAN.CHAINED.value - - utils.set_magnus_environment_variables(run_id=run_id, configuration_file=configuration_file, tag=tag) - - previous_run_log = None - if use_cached: - try: - previous_run_log = mode_executor.run_log_store.get_run_log_by_id(run_id=use_cached, full=True) - except exceptions.RunLogNotFoundError as _e: - msg = ( - f"There is no run by {use_cached} in the current run log store " - f"{mode_executor.run_log_store.service_name}. Please ensure that that run log exists to re-run.\n" - "Note: Even if the previous run used a different run log store, provide the run log store in the format" - " accepted by the current run log store." - ) - raise Exception(msg) from _e - - if previous_run_log.dag_hash != mode_executor.dag_hash: - logger.warning("The previous dag does not match to the current one!") - mode_executor.previous_run_log = previous_run_log - logger.info("Found a previous run log and using it as cache") - - # Prepare for graph execution - mode_executor.prepare_for_graph_execution() - - logger.info("Executing the graph") - mode_executor.execute_graph(dag=mode_executor.dag) - - mode_executor.send_return_code() - - -def execute_single_step( - configuration_file: str, - pipeline_file: str, - step_name: str, - run_id: str, - tag: str = None, - parameters_file: str = None, - use_cached: str = None, -): - # pylint: disable=R0914,R0913 - """ - The entry point into executing a single step of magnus. - - It should have similar set up of configurations to execute because orchestrator modes can initiate the execution. - - Args: - variables_file (str): The variables file, if used or None - step_name : The name of the step to execute in dot path convention - pipeline_file (str): The config/dag file - run_id (str): The run id of the run. - tag (str): If a tag is provided at the run time - parameters_file (str): The parameters being sent in to the application - - """ - run_id = utils.generate_run_id(run_id=run_id) - - mode_executor = prepare_configurations( - configuration_file=configuration_file, - pipeline_file=pipeline_file, - run_id=run_id, - tag=tag, - use_cached="", - parameters_file=parameters_file, - ) - mode_executor.execution_plan = defaults.EXECUTION_PLAN.CHAINED.value - utils.set_magnus_environment_variables(run_id=run_id, configuration_file=configuration_file, tag=tag) - try: - _ = mode_executor.dag.get_node_by_name(step_name) - except exceptions.NodeNotFoundError as e: - msg = f"The node by name {step_name} is not found in the graph. Please provide a valid node name" - raise Exception(msg) from e - - previous_run_log = None - if use_cached: - try: - previous_run_log = mode_executor.run_log_store.get_run_log_by_id(run_id=use_cached, full=True) - except exceptions.RunLogNotFoundError as _e: - msg = ( - f"There is no run by {use_cached} in the current run log store " - f"{mode_executor.run_log_store.service_name}. Please ensure that that run log exists to re-run.\n" - "Note: Even if the previous run used a different run log store, provide the run log store in the format" - " accepted by the current run log store." - ) - raise Exception(msg) from _e - - if previous_run_log.dag_hash != mode_executor.dag_hash: - logger.warning("The previous dag does not match to the current one!") - mode_executor.previous_run_log = previous_run_log - logger.info("Found a previous run log and using it as cache") - - mode_executor.single_step = step_name - mode_executor.prepare_for_graph_execution() - - logger.info("Executing the graph") - mode_executor.execute_graph(dag=mode_executor.dag) - - mode_executor.send_return_code() - - -def execute_single_node( - configuration_file: str, - pipeline_file: str, - step_name: str, - map_variable: str, - run_id: str, - tag: str = None, - parameters_file: str = None, -): - # pylint: disable=R0914,R0913 - """ - The entry point into executing a single node of magnus. Orchestration modes should extensively use this - entry point. - - It should have similar set up of configurations to execute because orchestrator modes can initiate the execution. - - Args: - variables_file (str): The variables file, if used or None - step_name : The name of the step to execute in dot path convention - pipeline_file (str): The config/dag file - run_id (str): The run id of the run. - tag (str): If a tag is provided at the run time - parameters_file (str): The parameters being sent in to the application - - """ - from magnus import nodes - - mode_executor = prepare_configurations( - configuration_file=configuration_file, - pipeline_file=pipeline_file, - run_id=run_id, - tag=tag, - use_cached="", - parameters_file=parameters_file, - ) - mode_executor.execution_plan = defaults.EXECUTION_PLAN.CHAINED.value - utils.set_magnus_environment_variables(run_id=run_id, configuration_file=configuration_file, tag=tag) - - mode_executor.prepare_for_node_execution() - - if not mode_executor.dag: - # There are a few entry points that make graph dynamically and do not have a dag defined statically. - run_log = mode_executor.run_log_store.get_run_log_by_id(run_id=run_id, full=False) - mode_executor.dag = graph.create_graph(run_log.run_config["pipeline"]) - - step_internal_name = nodes.BaseNode._get_internal_name_from_command_name(step_name) - - map_variable_dict = utils.json_to_ordered_dict(map_variable) - - node_to_execute, _ = graph.search_node_by_internal_name(mode_executor.dag, step_internal_name) - - logger.info("Executing the single node of : %s", node_to_execute) - mode_executor.execute_node(node=node_to_execute, map_variable=map_variable_dict) - - mode_executor.send_return_code(stage="execution") - - -def execute_single_brach( - configuration_file: str, - pipeline_file: str, - branch_name: str, - map_variable: str, - run_id: str, - tag: str, -): - # pylint: disable=R0914,R0913 - """ - The entry point into executing a branch of the graph. Interactive modes in parallel runs use this to execute - branches in parallel. - - This entry point is never used by its own but rather from a node. So the arguments sent into this are fewer. - - Args: - variables_file (str): The variables file, if used or None - branch_name : The name of the branch to execute, in dot.path.convention - pipeline_file (str): The config/dag file - run_id (str): The run id of the run. - tag (str): If a tag is provided at the run time - """ - from magnus import nodes - - mode_executor = prepare_configurations( - configuration_file=configuration_file, - pipeline_file=pipeline_file, - run_id=run_id, - tag=tag, - use_cached="", - ) - mode_executor.execution_plan = defaults.EXECUTION_PLAN.CHAINED.value - utils.set_magnus_environment_variables(run_id=run_id, configuration_file=configuration_file, tag=tag) - - branch_internal_name = nodes.BaseNode._get_internal_name_from_command_name(branch_name) - - map_variable_dict = utils.json_to_ordered_dict(map_variable) - - branch_to_execute = graph.search_branch_by_internal_name(mode_executor.dag, branch_internal_name) - - logger.info("Executing the single branch of %s", branch_to_execute) - mode_executor.execute_graph(dag=branch_to_execute, map_variable=map_variable_dict) - - mode_executor.send_return_code() - - -def execute_notebook( - notebook_file: str, - catalog_config: dict, - configuration_file: str, - tag: str = None, - run_id: str = None, - parameters_file: str = None, -): - # pylint: disable=R0914,R0913 - """ - The entry point to magnus execution of a notebook. This method would prepare the configurations and - delegates traversal to the executor - """ - run_id = utils.generate_run_id(run_id=run_id) - - mode_executor = prepare_configurations( - configuration_file=configuration_file, - run_id=run_id, - tag=tag, - parameters_file=parameters_file, - ) - - mode_executor.execution_plan = defaults.EXECUTION_PLAN.UNCHAINED.value - utils.set_magnus_environment_variables(run_id=run_id, configuration_file=configuration_file, tag=tag) - - step_config = { - "command": notebook_file, - "command_type": "notebook", - "type": "task", - "next": "success", - "catalog": catalog_config, - } - node = graph.create_node(name="executing job", step_config=step_config) - - # Prepare for graph execution - mode_executor.prepare_for_graph_execution() - - logger.info("Executing the job") - mode_executor.execute_job(node=node) - - mode_executor.send_return_code() - - -def execute_function( - command: str, - catalog_config: dict, - configuration_file: str, - tag: str = None, - run_id: str = None, - parameters_file: str = None, -): - # pylint: disable=R0914,R0913 - """ - The entry point to magnus execution of a function. This method would prepare the configurations and - delegates traversal to the executor - """ - run_id = utils.generate_run_id(run_id=run_id) - - mode_executor = prepare_configurations( - configuration_file=configuration_file, - run_id=run_id, - tag=tag, - parameters_file=parameters_file, - ) - - mode_executor.execution_plan = defaults.EXECUTION_PLAN.UNCHAINED.value - utils.set_magnus_environment_variables(run_id=run_id, configuration_file=configuration_file, tag=tag) - - # Prepare the graph with a single node - step_config = { - "command": command, - "command_type": "python", - "type": "task", - "next": "success", - "catalog": catalog_config, - } - node = graph.create_node(name="executing job", step_config=step_config) - - # Prepare for graph execution - mode_executor.prepare_for_graph_execution() - - logger.info("Executing the job") - mode_executor.execute_job(node=node) - - mode_executor.send_return_code() - - -def execute_nb_or_func( - run_id, - command: str, - catalog_config: dict, - configuration_file: str, - parameters_file: str = "", - tag: str = "", -): - """ - Internal function to execute a notebook or function. - This function is called by executors who do not execute in the same environment as the traversal. - - Examples include local-container or any transpilers or K8's job. - """ - mode_executor = prepare_configurations( - configuration_file=configuration_file, - run_id=run_id, - tag=tag, - parameters_file=parameters_file, - ) - - mode_executor.execution_plan = defaults.EXECUTION_PLAN.UNCHAINED.value - utils.set_magnus_environment_variables(run_id=run_id, configuration_file=configuration_file, tag=tag) - - command_type = "python" - if command.endswith(".ipynb"): - command_type = "notebook" - - step_config = { - "command": command, - "command_type": command_type, - "type": "task", - "next": "success", - "catalog": catalog_config, - } - node = graph.create_node(name="executing job", step_config=step_config) - - # Prepare for graph execution - mode_executor.prepare_for_node_execution() - logger.info("Executing the job") - mode_executor.execute_node(node=node) - - # Update the status of the run log - step_log = mode_executor.run_log_store.get_step_log(node._get_step_log_name(), run_id) - mode_executor.run_log_store.update_run_log_status(run_id=run_id, status=step_log.status) - - mode_executor.send_return_code() - - -def fan( - configuration_file: str, - pipeline_file: str, - step_name: str, - mode: str, - map_variable: str, - run_id: str, - tag: str = None, - parameters_file: str = None, -): - # pylint: disable=R0914,R0913 - """ - The entry point to either fan in or out for a composite node. Only 3rd party orchestrators should use this. - - It should have similar set up of configurations to execute because orchestrator modes can initiate the execution. - - Args: - configuration_file (str): The configuration file. - mode: in or out - step_name : The name of the step to execute in dot path convention - pipeline_file (str): The config/dag file - run_id (str): The run id of the run. - tag (str): If a tag is provided at the run time - parameters_file (str): The parameters being sent in to the application - - """ - from magnus import nodes - - mode_executor = prepare_configurations( - configuration_file=configuration_file, - pipeline_file=pipeline_file, - run_id=run_id, - tag=tag, - use_cached="", - parameters_file=parameters_file, - ) - mode_executor.execution_plan = defaults.EXECUTION_PLAN.CHAINED.value - utils.set_magnus_environment_variables(run_id=run_id, configuration_file=configuration_file, tag=tag) - - mode_executor.prepare_for_node_execution() - - step_internal_name = nodes.BaseNode._get_internal_name_from_command_name(step_name) - node_to_execute, _ = graph.search_node_by_internal_name(mode_executor.dag, step_internal_name) - - map_variable_dict = utils.json_to_ordered_dict(map_variable) - - if mode == "in": - logger.info("Fanning in for : %s", node_to_execute) - mode_executor.fan_in(node=node_to_execute, map_variable=map_variable_dict) - elif mode == "out": - logger.info("Fanning out for : %s", node_to_execute) - mode_executor.fan_out(node=node_to_execute, map_variable=map_variable_dict) - else: - raise ValueError(f"Invalid mode {mode}") diff --git a/magnus/sdk.py b/magnus/sdk.py index 0f46ac31..e9603c58 100644 --- a/magnus/sdk.py +++ b/magnus/sdk.py @@ -1,239 +1,389 @@ +from __future__ import annotations + import logging -from logging.config import fileConfig -from types import FunctionType -from typing import Dict, List, Optional, Union - -from pkg_resources import resource_filename - -from magnus import defaults, graph, nodes, pipeline, utils - -logger = logging.getLogger(defaults.NAME) - - -# class step(object): - -# def __init__( -# self, name: Union[str, FunctionType], -# catalog_config: dict = None, magnus_config: str = None, -# parameters_file: str = None): -# """ -# This decorator could be used to make the function within the scope of magnus. - -# Since we are not orchestrating, it is expected that resource management happens outside this scope. - -# Args: -# name (str, callable): The name of the step. The step log would have the same name -# catalog_config (dict): The configuration of the catalog per step. -# magnus_config (str): The name of the file having the magnus config, defaults to None. -# """ -# if isinstance(name, FunctionType): -# name = name() - -# self.name = name -# self.catalog_config = catalog_config -# self.active = True # Check if we are executing the function via pipeline - -# if pipeline.global_executor \ -# and pipeline.global_executor.execution_plan == defaults.EXECUTION_PLAN.CHAINED.value: -# self.active = False -# return - -# self.executor = pipeline.prepare_configurations( -# configuration_file=magnus_config, parameters_file=parameters_file) - -# self.executor.execution_plan = defaults.EXECUTION_PLAN.UNCHAINED.value -# run_id = self.executor.step_decorator_run_id -# if not run_id: -# msg = ( -# f'Step decorator expects run id from environment.' -# ) -# raise Exception(msg) - -# self.executor.run_id = run_id -# utils.set_magnus_environment_variables(run_id=run_id, configuration_file=magnus_config, tag=get_tag()) - -# try: -# # Try to get it if previous steps have created it -# # TODO: Can call the set_up_runlog now. -# run_log = self.executor.run_log_store.get_run_log_by_id(self.executor.run_id) -# if run_log.status in [defaults.FAIL, defaults.SUCCESS]: # TODO: Remove this in preference to defaults -# """ -# This check is mostly useless as we do not know when the graph ends as they are created dynamically. -# This only prevents from using a run_id which has reached a final state. -# #TODO: There is a need to create a status called step_success -# """ -# msg = ( -# f'The run_log for run_id: {run_id} already exists and is in {run_log.status} state.' -# ' Make sure that this was not run before.' -# ) -# raise Exception(msg) -# except exceptions.RunLogNotFoundError: -# # Create one if they are not created -# self.executor._set_up_run_log() - -# def __call__(self, func): -# """ -# The function is converted into a node and called via the magnus framework. -# """ -# @functools.wraps(func) -# def wrapped_f(*args, **kwargs): -# if not self.active: -# # If we are not running via decorator, execute the function -# return func(*args, **kwargs) - -# step_config = { -# 'command': func, -# 'command_type': 'python-function', -# 'type': 'task', -# 'next': 'not defined', -# 'catalog': self.catalog_config -# } -# node = graph.create_node(name=self.name, step_config=step_config) -# self.executor.execute_from_graph(node=node) -# run_log = self.executor.run_log_store.get_run_log_by_id(run_id=self.executor.run_id, full=False) -# # TODO: If the previous step succeeded, make the status of the run log step_success -# print(json.dumps(run_log.dict(), indent=4)) -# return wrapped_f - - -class Task: - """A exposed magnus task to be used in SDK.""" - - def __init__( - self, - name: str, - command: Union[str, FunctionType], - command_type: str = defaults.COMMAND_TYPE, - command_config: Optional[dict] = None, - catalog: Optional[dict] = None, - executor_config: Optional[dict] = None, - retry: int = 1, - on_failure: str = "", - next_node: str = "", - ): - self.name = name - self.command = command - self.command_type = command_type - self.command_config = command_config or {} - self.catalog = catalog or {} - self.executor_config = executor_config or {} - self.retry = retry - self.on_failure = on_failure - self.next_node = next_node or "success" - self.node: Optional[nodes.BaseNode] = None - - def _construct_node(self): - """Construct a node of the graph.""" - # TODO: The below has issues if the function and the pipeline are in the same module - # Something to do with __main__ being present - if isinstance(self.command, FunctionType): - self.command = utils.get_module_and_func_from_function(self.command) - - node_config = { - "type": "task", - "next_node": self.next_node, - "command": self.command, - "command_type": self.command_type, - "command_config": self.command_config, - "catalog": self.catalog, - "executor_config": self.executor_config, - "retry": self.retry, - "on_failure": self.on_failure, - } - # The node will temporarily have invalid branch names - self.node = graph.create_node(name=self.name, step_config=node_config, internal_branch_name="") - - def _fix_internal_name(self): - """Should be done after the parallel's are implemented.""" - pass - - -class AsIs: - """An exposed magnus as-is task to be used in SDK.""" - - def __init__( - self, - name: str, - mode_config: Optional[dict] = None, - retry: int = 1, - on_failure: str = "", - next_node: str = "", - **kwargs - ): - self.name = name - self.mode_config = mode_config or {} - self.retry = retry - self.on_failure = on_failure - self.next_node = next_node or "success" - self.additional_kwargs = kwargs or {} - self.node: Optional[nodes.BaseNode] = None - - def _construct_node(self): - node_config = { - "type": "as-is", - "next_node": self.next_node, - "mode_config": self.mode_config, - "retry": self.retry, - "on_failure": self.on_failure, - } - node_config.update(self.additional_kwargs) - # The node will temporarily have invalid branch names - self.node = graph.create_node(name=self.name, step_config=node_config, internal_branch_name="") - - def _fix_internal_name(self): - """Should be done after the parallel's are implemented.""" - pass - - -class Pipeline: - # A way for the user to define a pipeline - # TODO: Allow for nodes other than Task, AsIs - """An exposed magnus pipeline to be used in SDK.""" - - def __init__( - self, - start_at: Union[Task, AsIs], - name: str = "", - description: str = "", - max_time: int = defaults.MAX_TIME, - internal_branch_name: str = "", - ): - self.start_at = start_at - self.name = name - self.description = description - self.max_time = max_time - self.internal_branch_name = internal_branch_name - self.dag: Optional[graph.Graph] = None - - def construct(self, steps: List[Task]): - """Construct a pipeline from a list of tasks.""" - graph_config: Dict[str, Union[str, int]] = { - "description": self.description, - "name": self.name, - "max_time": self.max_time, - "internal_branch_name": self.internal_branch_name, - } - messages: List[str] = [] - for step in steps: - step._construct_node() - print(step.node.__dict__) - messages.extend(step.node.validate()) # type: ignore - - if not steps: - raise Exception("A dag needs at least one step") - - if messages: - raise Exception(", ".join(messages)) - - graph_config["start_at"] = self.start_at.node.name # type: ignore - - dag = graph.Graph(**graph_config) # type: ignore - dag.nodes = [step.node for step in steps] # type: ignore - - dag.add_terminal_nodes() - - dag.validate() - self.dag = dag +import os +from abc import ABC, abstractmethod +from typing import Any, Dict, List, Optional, Union + +from pydantic import BaseModel, ConfigDict, Field, PrivateAttr, computed_field, field_validator, model_validator +from rich import print +from ruamel.yaml import YAML +from typing_extensions import Self + +from magnus import defaults, entrypoints, graph, utils +from magnus.extensions.nodes import FailNode, MapNode, ParallelNode, StubNode, SuccessNode, TaskNode +from magnus.nodes import TraversalNode + +logger = logging.getLogger(defaults.LOGGER_NAME) + +StepType = Union["Stub", "Task", "Success", "Fail", "Parallel", "Map"] +TraversalTypes = Union["Stub", "Task", "Parallel", "Map"] + + +ALLOWED_COMMAND_TYPES = ["shell", "python", "notebook"] + + +class Catalog(BaseModel): + """ + Use to instruct a task to sync data from/to the central catalog. + Please refer to [concepts](../../concepts/catalog) for more information. + + Attributes: + get (List[str]): List of glob patterns to get from central catalog to the compute data folder. + put (List[str]): List of glob patterns to put into central catalog from the compute data folder. + + Examples: + >>> from magnus import Catalog, Task + >>> catalog = Catalog(compute_data_folder="/path/to/data", get=["*.csv"], put=["*.csv"]) + + >>> task = Task(name="task", catalog=catalog, command="echo 'hello'") + + """ + + model_config = ConfigDict(extra="forbid") # Need to be for command, would be validated later + # Note: compute_data_folder was confusing to explain, might be introduced later. + # compute_data_folder: str = Field(default="", alias="compute_data_folder") + get: List[str] = Field(default_factory=list, alias="get") + put: List[str] = Field(default_factory=list, alias="put") + + +class BaseTraversal(ABC, BaseModel): + name: str + next_node: str = Field(default="", alias="next") + terminate_with_success: bool = Field(default=False, exclude=True) + terminate_with_failure: bool = Field(default=False, exclude=True) + on_failure: str = Field(default="", alias="on_failure") + + model_config = ConfigDict(extra="forbid") + + @computed_field # type: ignore + @property + def internal_name(self) -> str: + return self.name + + def __rshift__(self, other: StepType) -> StepType: + if self.next_node: + raise Exception(f"The node {self} already has a next node: {self.next_node}") + self.next_node = other.name + + return other + + def __lshift__(self, other: TraversalNode) -> TraversalNode: + if other.next_node: + raise Exception(f"The {other} node already has a next node: {other.next_node}") + other.next_node = self.name + + return other + + def depends_on(self, node: StepType) -> Self: + assert not isinstance(node, Success) + assert not isinstance(node, Fail) + + if node.next_node: + raise Exception(f"The {node} node already has a next node: {node.next_node}") + + node.next_node = self.name + return self + + @model_validator(mode="after") + def validate_terminations(self) -> Self: + if self.terminate_with_failure and self.terminate_with_success: + raise AssertionError("A node cannot terminate with success and failure") + + if self.terminate_with_failure or self.terminate_with_success: + if self.next_node and self.next_node not in ["success", "fail"]: + raise AssertionError("A node being terminated cannot have a user defined next node") + + if self.terminate_with_failure: + self.next_node = "fail" + + if self.terminate_with_success: + self.next_node = "success" + + return self + + @abstractmethod + def create_node(self) -> TraversalNode: + ... + + +class Task(BaseTraversal): + """ + An execution node of the pipeline. + Please refer to [concepts](../../concepts/task) for more information. + + Attributes: + name (str): The name of the node. + command (str): The command to execute. + + - For python functions, [dotted path](../../concepts/task/#python_functions) to the function. + - For shell commands: command to execute in the shell. + - For notebooks: path to the notebook. + command_type (str): The type of command to execute. + Can be one of "shell", "python", or "notebook". + catalog (Optional[Catalog]): The catalog to sync data from/to. + Please see Catalog about the structure of the catalog. + overrides (Dict[str, Any]): Any overrides to the command. + Individual tasks can override the global configuration config by referring to the + specific override. + + For example, + ### Global configuration + ```yaml + executor: + type: local-container + config: + docker_image: "magnus/magnus:latest" + overrides: + custom_docker_image: + docker_image: "magnus/magnus:custom" + ``` + ### Task specific configuration + ```python + task = Task(name="task", command="echo 'hello'", command_type="shell", + overrides={'local-container': custom_docker_image}) + ``` + notebook_output_path (Optional[str]): The path to save the notebook output. + Only used when command_type is 'notebook', defaults to command+_out.ipynb + optional_ploomber_args (Optional[Dict[str, Any]]): Any optional ploomber args. + Only used when command_type is 'notebook', defaults to {} + output_cell_tag (Optional[str]): The tag of the output cell. + Only used when command_type is 'notebook', defaults to "magnus_output" + terminate_with_failure (bool): Whether to terminate the pipeline with a failure after this node. + terminate_with_success (bool): Whether to terminate the pipeline with a success after this node. + on_failure (str): The name of the node to execute if the step fails. + + """ + + command: str = Field(alias="command") + command_type: str = Field(default="python") + catalog: Optional[Catalog] = Field(default=None, alias="catalog") + overrides: Dict[str, Any] = Field(default_factory=dict, alias="overrides") + + notebook_output_path: Optional[str] = Field(default=None, alias="notebook_output_path") + optional_ploomber_args: Optional[Dict[str, Any]] = Field(default=None, alias="optional_ploomber_args") + output_cell_tag: Optional[str] = Field(default=None, alias="output_cell_tag") + + @field_validator("command_type", mode="before") + @classmethod + def validate_command_type(cls, value: str) -> str: + if value not in ALLOWED_COMMAND_TYPES: + raise ValueError(f"Invalid command_type: {value}") + return value + + @model_validator(mode="after") + def check_notebook_args(self) -> "Task": + if self.command_type != "notebook": + assert ( + self.notebook_output_path is None + ), "Only command_types of 'notebook' can be used with notebook_output_path" + + assert ( + self.optional_ploomber_args is None + ), "Only command_types of 'notebook' can be used with optional_ploomber_args" + + assert self.output_cell_tag is None, "Only command_types of 'notebook' can be used with output_cell_tag" + return self + + def create_node(self) -> TaskNode: + if not self.next_node: + if not (self.terminate_with_failure or self.terminate_with_success): + raise AssertionError("A node not being terminated must have a user defined next node") + return TaskNode.parse_from_config(self.model_dump(exclude_none=True)) + + +class Stub(BaseTraversal): + """ + A node that does nothing. + + A stub node can tak arbitrary number of arguments. + Please refer to [concepts](../../concepts/stub) for more information. + + Attributes: + name (str): The name of the node. + terminate_with_failure (bool): Whether to terminate the pipeline with a failure after this node. + terminate_with_success (bool): Whether to terminate the pipeline with a success after this node. + + """ + + model_config = ConfigDict(extra="allow") + catalog: Optional[Catalog] = Field(default=None, alias="catalog") + + def create_node(self) -> StubNode: + if not self.next_node: + if not (self.terminate_with_failure or self.terminate_with_success): + raise AssertionError("A node not being terminated must have a user defined next node") + + return StubNode.parse_from_config(self.model_dump(exclude_none=True)) + + +class Parallel(BaseTraversal): + """ + A node that executes multiple branches in parallel. + Please refer to [concepts](../../concepts/parallel) for more information. + + Attributes: + name (str): The name of the node. + branches (Dict[str, Pipeline]): A dictionary of branches to execute in parallel. + terminate_with_failure (bool): Whether to terminate the pipeline with a failure after this node. + terminate_with_success (bool): Whether to terminate the pipeline with a success after this node. + on_failure (str): The name of the node to execute if any of the branches fail. + """ + + branches: Dict[str, "Pipeline"] + + @computed_field # type: ignore + @property + def graph_branches(self) -> Dict[str, graph.Graph]: + return {name: pipeline._dag.model_copy() for name, pipeline in self.branches.items()} + + def create_node(self) -> ParallelNode: + if not self.next_node: + if not (self.terminate_with_failure or self.terminate_with_success): + raise AssertionError("A node not being terminated must have a user defined next node") + + node = ParallelNode(name=self.name, branches=self.graph_branches, internal_name="", next_node=self.next_node) + return node + + +class Map(BaseTraversal): + """ + A node that iterates over a list of items and executes a pipeline for each item. + Please refer to [concepts](../../concepts/map) for more information. + + Attributes: + branch: The pipeline to execute for each item. + + iterate_on: The name of the parameter to iterate over. + The parameter should be defined either by previous steps or statically at the start of execution. + + iterate_as: The name of the iterable to be passed to functions. + + + overrides (Dict[str, Any]): Any overrides to the command. + + """ + + branch: "Pipeline" + iterate_on: str + iterate_as: str + overrides: Dict[str, Any] = Field(default_factory=dict) + + @computed_field # type: ignore + @property + def graph_branch(self) -> graph.Graph: + return self.branch._dag.model_copy() + + def create_node(self) -> MapNode: + if not self.next_node: + if not (self.terminate_with_failure or self.terminate_with_success): + raise AssertionError("A node not being terminated must have a user defined next node") + + node = MapNode( + name=self.name, + branch=self.graph_branch, + internal_name="", + next_node=self.next_node, + iterate_on=self.iterate_on, + iterate_as=self.iterate_as, + overrides=self.overrides, + ) + + return node + + +class Success(BaseModel): + """ + A node that represents a successful execution of the pipeline. + + Most often, there is no need to use this node as nodes can be instructed to + terminate_with_success and pipeline with add_terminal_nodes=True. + + Attributes: + name (str): The name of the node. + """ + + name: str = "success" + + @computed_field # type: ignore + @property + def internal_name(self) -> str: + return self.name + + def create_node(self) -> SuccessNode: + return SuccessNode.parse_from_config(self.model_dump()) + + +class Fail(BaseModel): + """ + A node that represents a failed execution of the pipeline. + + Most often, there is no need to use this node as nodes can be instructed to + terminate_with_failure and pipeline with add_terminal_nodes=True. + + Attributes: + name (str): The name of the node. + """ + + name: str = "fail" + + @computed_field # type: ignore + @property + def internal_name(self) -> str: + return self.name + + def create_node(self) -> FailNode: + return FailNode.parse_from_config(self.model_dump()) + + +class Pipeline(BaseModel): + """ + A Pipeline is a directed acyclic graph of Steps that define a workflow. + + Attributes: + steps (List[Stub | Task | Parallel | Map | Success | Fail]): A list of Steps that make up the Pipeline. + start_at (Stub | Task | Parallel | Map): The name of the first Step in the Pipeline. + name (str, optional): The name of the Pipeline. Defaults to "". + description (str, optional): A description of the Pipeline. Defaults to "". + add_terminal_nodes (bool, optional): Whether to add terminal nodes to the Pipeline. Defaults to True. + + The default behavior is to add "success" and "fail" nodes to the Pipeline. + To add custom success and fail nodes, set add_terminal_nodes=False and create success + and fail nodes manually. + + """ + + steps: List[StepType] + start_at: TraversalTypes + name: str = "" + description: str = "" + add_terminal_nodes: bool = True # Adds "success" and "fail" nodes + + internal_branch_name: str = "" + + _dag: graph.Graph = PrivateAttr() + model_config = ConfigDict(extra="forbid") + + def model_post_init(self, __context: Any) -> None: + self.steps = [model.model_copy(deep=True) for model in self.steps] + + self._dag = graph.Graph( + start_at=self.start_at.name, + description=self.description, + internal_branch_name=self.internal_branch_name, + ) + + for step in self.steps: + if step.name == self.start_at.name: + if isinstance(step, Success) or isinstance(step, Fail): + raise Exception("A success or fail node cannot be the start_at of the graph") + assert step.next_node + self._dag.add_node(step.create_node()) + + if self.add_terminal_nodes: + self._dag.add_terminal_nodes() + + self._dag.check_graph() def execute( self, @@ -241,32 +391,80 @@ def execute( run_id: str = "", tag: str = "", parameters_file: str = "", + use_cached: str = "", log_level: str = defaults.LOG_LEVEL, + output_pipeline_definition: str = "magnus-pipeline.yaml", ): - """Execute the pipeline. + """ + *Execute* the Pipeline. + + Execution of pipeline could either be: + + Traverse and execute all the steps of the pipeline, eg. [local execution](../../configurations/executors/local). + + Or create the ```yaml``` representation of the pipeline for other executors. + + Please refer to [concepts](../../concepts/executor) for more information. + + Args: + configuration_file (str, optional): The path to the configuration file. Defaults to "". + The configuration file can be overridden by the environment variable MAGNUS_CONFIGURATION_FILE. + + run_id (str, optional): The ID of the run. Defaults to "". + tag (str, optional): The tag of the run. Defaults to "". + Use to group multiple runs. - This method should be beefed up as the use cases grow. + parameters_file (str, optional): The path to the parameters file. Defaults to "". + use_cached (str, optional): Whether to use cached results. Defaults to "". + Provide the run_id of the older execution to recover. + + log_level (str, optional): The log level. Defaults to defaults.LOG_LEVEL. + output_pipeline_definition (str, optional): The path to the output pipeline definition file. + Defaults to "magnus-pipeline.yaml". + + Only applicable for the execution via SDK for non ```local``` executors. """ - fileConfig(resource_filename(__name__, "log_config.ini")) - logger = logging.getLogger(defaults.NAME) + from magnus.extensions.executor.local.implementation import LocalExecutor + from magnus.extensions.executor.mocked.implementation import MockedExecutor + logger.setLevel(log_level) run_id = utils.generate_run_id(run_id=run_id) - mode_executor = pipeline.prepare_configurations( + configuration_file = os.environ.get("MAGNUS_CONFIGURATION_FILE", configuration_file) + run_context = entrypoints.prepare_configurations( configuration_file=configuration_file, run_id=run_id, tag=tag, parameters_file=parameters_file, + use_cached=use_cached, ) - mode_executor.execution_plan = defaults.EXECUTION_PLAN.CHAINED.value + run_context.execution_plan = defaults.EXECUTION_PLAN.CHAINED.value utils.set_magnus_environment_variables(run_id=run_id, configuration_file=configuration_file, tag=tag) - mode_executor.dag = self.dag + dag_definition = self._dag.model_dump(by_alias=True, exclude_none=True) + + run_context.dag = graph.create_graph(dag_definition) + + print("Working with context:") + print(run_context) + + if not (isinstance(run_context.executor, LocalExecutor) or isinstance(run_context.executor, MockedExecutor)): + logger.debug(run_context.dag.model_dump(by_alias=True)) + yaml = YAML() + + with open(output_pipeline_definition, "w", encoding="utf-8") as f: + yaml.dump( + {"dag": run_context.dag.model_dump(by_alias=True, exclude_none=True)}, + f, + ) + + return + # Prepare for graph execution - mode_executor.prepare_for_graph_execution() + run_context.executor.prepare_for_graph_execution() logger.info("Executing the graph") - mode_executor.execute_graph(dag=mode_executor.dag) + run_context.executor.execute_graph(dag=run_context.dag) - mode_executor.send_return_code() + return run_context.run_log_store.get_run_log_by_id(run_id=run_context.run_id) diff --git a/magnus/secrets.py b/magnus/secrets.py index 3b3d86e0..a0a49716 100644 --- a/magnus/secrets.py +++ b/magnus/secrets.py @@ -1,47 +1,44 @@ import logging import os -from typing import Union +from abc import ABC, abstractmethod -from pydantic import BaseModel +from pydantic import BaseModel, ConfigDict -from magnus import defaults, exceptions, utils +import magnus.context as context +from magnus import defaults, exceptions -logger = logging.getLogger(defaults.NAME) +logger = logging.getLogger(defaults.LOGGER_NAME) # --8<-- [start:docs] -class BaseSecrets: +class BaseSecrets(ABC, BaseModel): """ A base class for Secrets Handler. All implementations should extend this class. - Note: As a general guideline, do not extract anything from the config to set class level attributes. - Integration patterns modify the config after init to change behaviors. - Access config properties using getters/property of the class. - Raises: NotImplementedError: Base class and not implemented """ - service_name = "" - - class Config(BaseModel): - pass + service_name: str = "" + service_type: str = "secrets" + model_config = ConfigDict(extra="forbid") - def __init__(self, config: dict, **kwargs): # pylint: disable=unused-argument - config = config or {} - self.config = self.Config(**config) + @property + def _context(self): + return context.run_context - def get(self, name: str = None, **kwargs) -> Union[str, dict]: + @abstractmethod + def get(self, name: str, **kwargs) -> str: """ Return the secret by name. - If no name is give, return all the secrets. Args: name (str): The name of the secret to return. Raises: NotImplementedError: Base class and hence not implemented. + exceptions.SecretNotFoundError: Secret not found in the secrets manager. """ raise NotImplementedError @@ -54,13 +51,9 @@ class DoNothingSecretManager(BaseSecrets): Does nothing secret manager """ - service_name = "do-nothing" + service_name: str = "do-nothing" - def __init__(self, config, **kwargs): - super().__init__(config, **kwargs) - self.secrets = {} - - def get(self, name: str = None, **kwargs) -> Union[str, dict]: + def get(self, name: str, **kwargs) -> str: """ If a name is provided, return None else return empty dict. @@ -68,128 +61,35 @@ def get(self, name: str = None, **kwargs) -> Union[str, dict]: name (str): The name of the secret to retrieve Raises: - Exception: If the secret by the name is not found. + exceptions.SecretNotFoundError: Secret not found in the secrets manager. Returns: - [type]: [description] + [str]: The value of the secret """ - if name: - return "" - return {} + return "" class EnvSecretsManager(BaseSecrets): """ - A secret manager via environment variables. - - This secret manager returns nothing if the key does not match + A secret manager which uses environment variables for secrets. """ - service_name = "env-secrets-manager" - - def __init__(self, config, **kwargs): - super().__init__(config, **kwargs) + service_name: str = "env-secrets" - def get(self, name: str = None, **kwargs) -> Union[str, dict]: + def get(self, name: str, **kwargs) -> str: """ - If a name is provided, we look for that in the environment. - If a environment variable by that name is not found, we raise an Exception. - - If a name is not provided, we return an empty dictionary. - - Args: - name (str): The name of the secret to retrieve - - Raises: - Exception: If the secret by the name is not found. - - Returns: - [type]: [description] - """ - if name: - try: - return os.environ[name] - except KeyError: - raise exceptions.SecretNotFoundError(secret_name=name, secret_setting="environment") - - return {} - - -class DotEnvSecrets(BaseSecrets): - """ - A secret manager which uses .env files for secrets. - - We recommend this secrets manager only for local development and should not be used for anything close to - production. - """ - - service_name = "dotenv" - - class Config(BaseModel): - location: str = defaults.DOTENV_FILE_LOCATION - - def __init__(self, config, **kwargs): - super().__init__(config, **kwargs) - self.secrets = {} - - @property - def secrets_location(self): - """ - Return the location of the .env file. - If the user has not over-ridden it, it defaults to .env file in the project root. - - Returns: - str: The location of the secrets file - """ - return self.config.location - - def _load_secrets(self): - """ - We assume that a dotenv file is of format, - key=value -> secrets[key]='value' - key1=value1# comment -> secrets[key1]='value1' - key2=value2 # comment. -> secrets[key2]='value2' - - We strip the secret value of any empty spaces at the start and end. - - Raises: - Exception: If the file at secrets_location is not found. - Exception: If the secrets are not formatted correctly. - """ - secrets_location = self.secrets_location - if not utils.does_file_exist(secrets_location): - raise Exception(f"Did not find the secrets file in {secrets_location}") - - with open(secrets_location, "r") as fr: - for secret_line in fr: - secret_line = secret_line.split("#")[0] #  To remove any comments the user might have put - data = secret_line.split("=") - if len(data) != 2: - raise Exception("A secret should be of format, secret_name=secret_value[# any comment]") - key, value = data - self.secrets[key] = value.strip("\n") - - def get(self, name: str = None, **kwargs) -> Union[str, dict]: - """ - Get a secret of name from the secrets file. - - If no name is provided, we return all + If a name is provided, return None else return empty dict. Args: name (str): The name of the secret to retrieve Raises: - Exception: If the secret by the name is not found. + exceptions.SecretNotFoundError: Secret not found in the secrets manager. Returns: - [type]: [description] + [str]: The value of the secret """ - self._load_secrets() - if not name: - return self.secrets - - if name in self.secrets: - return self.secrets[name] - - secrets_location = self.secrets_location - raise exceptions.SecretNotFoundError(secret_name=name, secret_setting=secrets_location) + try: + return os.environ[name] + except KeyError: + raise exceptions.SecretNotFoundError(secret_name=name, secret_setting="environment variables") diff --git a/magnus/tasks.py b/magnus/tasks.py index b63e4045..6fee16e9 100644 --- a/magnus/tasks.py +++ b/magnus/tasks.py @@ -1,39 +1,59 @@ +import ast import contextlib import importlib import io import json import logging import os -import shutil import subprocess import sys -import tempfile -from pathlib import Path -from typing import ClassVar, List, cast +from typing import Any, Dict, Tuple -from pydantic import BaseModel, Extra, validator +from pydantic import BaseModel, ConfigDict, Field, ValidationInfo, field_validator +from pydantic._internal._model_construction import ModelMetaclass from stevedore import driver -from magnus import defaults, utils +import magnus.context as context +from magnus import defaults, parameters, utils +from magnus.defaults import TypeMapVariable -logger = logging.getLogger(defaults.NAME) +logger = logging.getLogger(defaults.LOGGER_NAME) +logging.getLogger("stevedore").setLevel(logging.CRITICAL) -# --8<-- [start:docs] +# TODO: Can we add memory peak, cpu usage, etc. to the metrics? -class BaseTaskType(BaseModel): # pylint: disable=too-few-public-methods +class BaseTaskType(BaseModel): """A base task class which does the execution of command defined by the user.""" - task_type: ClassVar[str] = "" + task_type: str = Field(serialization_alias="command_type") + node_name: str = Field(exclude=True) - node_name: str + model_config = ConfigDict(extra="forbid") - class Config: - extra = Extra.forbid + @property + def _context(self): + return context.run_context - def _get_parameters(self, map_variable: dict = None, **kwargs) -> dict: - """Return the parameters in scope for the execution. + def get_cli_options(self) -> Tuple[str, dict]: + """ + Key is the name of the cli option and value is the value of the cli option. + This should always be in sync with the cli options defined in execute_*. + + Returns: + str: The name of the cli option. + dict: The dict of cli options for the task. + + Raises: + NotImplementedError: Base class, not implemented + """ + raise NotImplementedError() + + def _get_parameters(self, map_variable: TypeMapVariable = None, **kwargs) -> Dict[str, Any]: + """ + By this step, all the parameters are present as environment variables as json strings. + Return the parameters in scope for the execution. Args: map_variable (dict, optional): If the command is part of map node, the value of map. Defaults to None. @@ -41,9 +61,9 @@ def _get_parameters(self, map_variable: dict = None, **kwargs) -> dict: Returns: dict: The parameters dictionary in-scope for the task execution """ - return utils.get_user_set_parameters(remove=False) + return parameters.get_user_set_parameters(remove=False) - def execute_command(self, map_variable: dict = None, **kwargs): + def execute_command(self, map_variable: TypeMapVariable = None, **kwargs): """The function to execute the command. And map_variable is sent in as an argument into the function. @@ -56,30 +76,23 @@ def execute_command(self, map_variable: dict = None, **kwargs): """ raise NotImplementedError() - def _set_parameters(self, parameters: dict = None, **kwargs): + def _set_parameters(self, params: BaseModel, **kwargs): """Set the parameters back to the environment variables. Args: parameters (dict, optional): The parameters to set back as env variables. Defaults to None. """ # Nothing to do - if not parameters: + if not params: return - if not isinstance(parameters, dict): - msg = ( - f"call to function {self.command} returns of type: {type(parameters)}. " - "Only dictionaries are supported as return values for functions as part part of magnus pipeline." - ) - logger.warn(msg) - return + if not isinstance(params, BaseModel) or isinstance(params, ModelMetaclass): + raise ValueError("Output variable of a function can only be a pydantic model or dynamic model.") - for key, value in parameters.items(): - logger.info(f"Setting User defined parameter {key} with value: {value}") - os.environ[defaults.PARAMETER_PREFIX + key] = json.dumps(value) + parameters.set_user_defined_params_as_environment_variables(params.model_dump(by_alias=True)) @contextlib.contextmanager - def output_to_file(self, map_variable: dict = None): + def output_to_file(self, map_variable: TypeMapVariable = None): """Context manager to put the output of a function execution to catalog. Args: @@ -88,7 +101,7 @@ def output_to_file(self, map_variable: dict = None): """ from magnus import put_in_catalog # Causing cyclic imports - log_file_name = self.node_name.replace(" ", "_") + log_file_name = self.node_name.replace(" ", "_") + ".execution.log" if map_variable: for _, value in map_variable.items(): log_file_name += "_" + str(value) @@ -109,35 +122,52 @@ def output_to_file(self, map_variable: dict = None): os.remove(log_file.name) -# --8<-- [end:docs] +class EasyModel(BaseModel): + model_config = ConfigDict(extra="allow") + + +def make_pydantic_model( + variables: Dict[str, Any], + prefix: str = "", +) -> BaseModel: + prefix_removed = {utils.remove_prefix(k, prefix): v for k, v in variables.items()} + return EasyModel(**prefix_removed) class PythonTaskType(BaseTaskType): # pylint: disable=too-few-public-methods """The task class for python command.""" - task_type: ClassVar[str] = "python" - + task_type: str = Field(default="python", serialization_alias="command_type") command: str - @validator("command") + @field_validator("command") + @classmethod def validate_command(cls, command: str): if not command: raise Exception("Command cannot be empty for shell task") return command - def execute_command(self, map_variable: dict = None, **kwargs): + def get_cli_options(self) -> Tuple[str, dict]: + """Return the cli options for the task. + + Returns: + dict: The cli options for the task + """ + return "function", {"command": self.command} + + def execute_command(self, map_variable: TypeMapVariable = None, **kwargs): """Execute the notebook as defined by the command.""" - module, func = utils.get_module_and_func_names(self.command) + module, func = utils.get_module_and_attr_names(self.command) sys.path.insert(0, os.getcwd()) # Need to add the current directory to path imported_module = importlib.import_module(module) f = getattr(imported_module, func) - parameters = self._get_parameters() - filtered_parameters = utils.filter_arguments_for_func(f, parameters, map_variable) + params = self._get_parameters() + filtered_parameters = parameters.filter_arguments_for_func(f, params, map_variable) if map_variable: - os.environ[defaults.PARAMETER_PREFIX + "MAP_VARIABLE"] = json.dumps(map_variable) + os.environ[defaults.MAP_VARIABLE] = json.dumps(map_variable) logger.info(f"Calling {func} from {module} with {filtered_parameters}") @@ -151,74 +181,24 @@ def execute_command(self, map_variable: dict = None, **kwargs): raise if map_variable: - del os.environ[defaults.PARAMETER_PREFIX + "MAP_VARIABLE"] + del os.environ[defaults.MAP_VARIABLE] self._set_parameters(user_set_parameters) -class PythonLambdaTaskType(BaseTaskType): # pylint: disable=too-few-public-methods - """The task class for python-lambda command.""" - - task_type: ClassVar[str] = "python-lambda" - - command: str - - @validator("command") - def validate_command(cls, command: str): - if not command: - raise Exception("Command cannot be empty for shell task") - - return command - - def execute_command(self, map_variable: dict = None, **kwargs): - """Execute the lambda function as defined by the command. - - Args: - map_variable (dict, optional): If the node is part of an internal branch. Defaults to None. - - Raises: - Exception: If the lambda function has _ or __ in it that can cause issues. - """ - if "_" in self.command or "__" in self.command: - msg = ( - f"Command given to {self.task_type} cannot have _ or __ in them. " - "The string is supposed to be for simple expressions only." - ) - raise Exception(msg) - - f = eval(self.command) - - parameters = self._get_parameters() - filtered_parameters = utils.filter_arguments_for_func(f, parameters, map_variable) - - if map_variable: - os.environ[defaults.PARAMETER_PREFIX + "MAP_VARIABLE"] = json.dumps(map_variable) - - logger.info(f"Calling lambda function: {self.command} with {filtered_parameters}") - try: - user_set_parameters = f(**filtered_parameters) - except Exception as _e: - msg = f"Call to the function {self.command} with {filtered_parameters} did not succeed.\n" - logger.exception(msg) - logger.exception(_e) - raise - - if map_variable: - del os.environ[defaults.PARAMETER_PREFIX + "MAP_VARIABLE"] - - self._set_parameters(user_set_parameters) - - class NotebookTaskType(BaseTaskType): """The task class for Notebook based execution.""" - task_type: ClassVar[str] = "notebook" - + task_type: str = Field(default="notebook", serialization_alias="command_type") command: str - notebook_output_path: str = "" + notebook_output_path: str = Field(default="", validate_default=True) + output_cell_tag: str = Field(default="magnus_output", validate_default=True) optional_ploomber_args: dict = {} - @validator("command") + _output_tag: str = "magnus_output" + + @field_validator("command") + @classmethod def notebook_should_end_with_ipynb(cls, command: str): if not command: raise Exception("Command should point to the ipynb file") @@ -228,14 +208,36 @@ def notebook_should_end_with_ipynb(cls, command: str): return command - @validator("notebook_output_path") - def correct_notebook_output_path(cls, notebook_output_path: str, values: dict): + @field_validator("notebook_output_path") + @classmethod + def correct_notebook_output_path(cls, notebook_output_path: str, info: ValidationInfo): if notebook_output_path: return notebook_output_path - return "".join(values["command"].command.split(".")[:-1]) + "_out.ipynb" + command = info.data["command"] + return "".join(command.split(".")[:-1]) + "_out.ipynb" + + def get_cli_options(self) -> Tuple[str, dict]: + return "notebook", {"command": self.command, "notebook-output-path": self.notebook_output_path} + + def _parse_notebook_for_output(self, notebook: Any): + collected_params = {} + + for cell in notebook.cells: + d = cell.dict() + # identify the tags attached to the cell. + tags = d.get("metadata", {}).get("tags", {}) + if self.output_cell_tag in tags: + # There is a tag that has output + outputs = d["outputs"] + + for out in outputs: + params = out.get("text", "{}") + collected_params.update(ast.literal_eval(params)) + + return collected_params - def execute_command(self, map_variable: dict = None, **kwargs): + def execute_command(self, map_variable: TypeMapVariable = None, **kwargs): """Execute the python notebook as defined by the command. Args: @@ -256,12 +258,12 @@ def execute_command(self, map_variable: dict = None, **kwargs): notebook_output_path = self.notebook_output_path if map_variable: - os.environ[defaults.PARAMETER_PREFIX + "MAP_VARIABLE"] = json.dumps(map_variable) + os.environ[defaults.MAP_VARIABLE] = json.dumps(map_variable) for _, value in map_variable.items(): notebook_output_path += "_" + str(value) - ploomber_optional_args = self.optional_ploomber_args # type: ignore + ploomber_optional_args = self.optional_ploomber_args kwds = { "input_path": self.command, @@ -270,15 +272,19 @@ def execute_command(self, map_variable: dict = None, **kwargs): "log_output": True, "progress_bar": False, } - kwds.update(ploomber_optional_args) + collected_params: Dict[str, Any] = {} with self.output_to_file(map_variable=map_variable) as _: - pm.execute_notebook(**kwds) + out = pm.execute_notebook(**kwds) + collected_params = self._parse_notebook_for_output(out) + + collected_params_model = make_pydantic_model(collected_params) + self._set_parameters(collected_params_model) put_in_catalog(notebook_output_path) if map_variable: - del os.environ[defaults.PARAMETER_PREFIX + "MAP_VARIABLE"] + del os.environ[defaults.MAP_VARIABLE] except ImportError as e: msg = ( @@ -288,20 +294,22 @@ def execute_command(self, map_variable: dict = None, **kwargs): class ShellTaskType(BaseTaskType): - """The task class for shell based commands.""" - - task_type: ClassVar[str] = "shell" + """ + The task class for shell based commands. + """ + task_type: str = Field(default="shell", serialization_alias="command_type") command: str - @validator("command") + @field_validator("command") + @classmethod def validate_command(cls, command: str): if not command: raise Exception("Command cannot be empty for shell task") return command - def execute_command(self, map_variable: dict = None, **kwargs): + def execute_command(self, map_variable: TypeMapVariable = None, **kwargs): # Using shell=True as we want to have chained commands to be executed in the same shell. """Execute the shell command as defined by the command. @@ -311,155 +319,46 @@ def execute_command(self, map_variable: dict = None, **kwargs): subprocess_env = os.environ.copy() if map_variable: - subprocess_env[defaults.PARAMETER_PREFIX + "MAP_VARIABLE"] = json.dumps(map_variable) + subprocess_env[defaults.MAP_VARIABLE] = json.dumps(map_variable) + + command = self.command.strip() + " && env | grep MAGNUS" + logger.info(f"Executing shell command: {command}") + + output_parameters = {} with subprocess.Popen( - self.command, + command, shell=True, env=subprocess_env, stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - universal_newlines=True, + stderr=subprocess.PIPE, + text=True, ) as proc, self.output_to_file(map_variable=map_variable) as _: for line in proc.stdout: # type: ignore logger.info(line) print(line) + if line.startswith(defaults.PARAMETER_PREFIX): + key, value = line.strip().split("=", 1) + try: + output_parameters[key] = json.loads(value) + except json.JSONDecodeError: + output_parameters[key] = value # simple data types + + if line.startswith(defaults.TRACK_PREFIX): + key, value = line.split("=", 1) + os.environ[key] = value.strip() + proc.wait() if proc.returncode != 0: raise Exception("Command failed") - -class ContainerTaskType(BaseTaskType): - """ - TODO: This is not fully done - The task class for container based execution. - """ - - task_type: ClassVar[str] = "container" - - image: str - context_path: str = "/opt/magnus" - command: str = "" # Would be defaulted to the entrypoint of the container - data_folder: str = "data" # Would be relative to the context_path - output_parameters_file: str = "parameters.json" # would be relative to the context_path - secrets: List[str] = [] - - _temp_dir: str = "" - - class Config: - underscore_attrs_are_private = True - - def execute_command(self, map_variable: dict = None, **kwargs): - # Conditional import - from magnus.context import executor as context_executor - - try: - import docker # pylint: disable=C0415 - - client = docker.from_env() - api_client = docker.APIClient() - except ImportError as e: - msg = "Task type of container requires docker to be installed. Please install via optional: docker" - logger.exception(msg) - raise Exception(msg) from e - except Exception as ex: - logger.exception("Could not get access to docker") - raise Exception("Could not get the docker socket file, do you have docker installed?") from ex - - container_env_variables = {} - - for key, value in self._get_parameters().items(): - container_env_variables[defaults.PARAMETER_PREFIX + key] = value - - if map_variable: - container_env_variables[defaults.PARAMETER_PREFIX + "MAP_VARIABLE"] = json.dumps(map_variable) - - for secret_name in self.secrets: - secret_value = context_executor.secrets_handler.get(secret_name) # type: ignore - container_env_variables[secret_name] = secret_value - - mount_volumes = self.get_mount_volumes() - - executor_config = context_executor._resolve_executor_config(context_executor.context_node) # type: ignore - optional_docker_args = executor_config.get("optional_docker_args", {}) - - try: - container = client.containers.create( - self.image, - command=self.command, - auto_remove=False, - network_mode="host", - environment=container_env_variables, - volumes=mount_volumes, - **optional_docker_args, + self._set_parameters( + params=make_pydantic_model( + output_parameters, + defaults.PARAMETER_PREFIX, ) - - container.start() - stream = api_client.logs(container=container.id, timestamps=True, stream=True, follow=True) - while True: - try: - output = next(stream).decode("utf-8") - output = output.strip("\r\n") - logger.info(output) - except StopIteration: - logger.info("Docker Run completed") - break - - exit_status = api_client.inspect_container(container.id)["State"]["ExitCode"] - container.remove(force=True) - - if exit_status != 0: - msg = f"Docker command failed with exit code {exit_status}" - raise Exception(msg) - - if self._temp_dir: - parameters_file = Path(self._temp_dir) / self.output_parameters_file - container_return_parameters = {} - if parameters_file.is_file(): - with open(parameters_file, "r") as f: - container_return_parameters = json.load(f) - - self._set_parameters(container_return_parameters) - except Exception as _e: - logger.exception("Problems with spinning up the container") - raise _e - finally: - if self._temp_dir: - shutil.rmtree(self._temp_dir) - - def get_mount_volumes(self) -> dict: - """ - Get the required mount volumes from the configuration. - We need to mount both the catalog and the parameter.json files. - - Returns: - dict: The mount volumes in the format that docker expects. - """ - from magnus.context import executor as context_executor - from magnus.executor import BaseExecutor - - compute_data_folder = cast(BaseExecutor, context_executor).get_effective_compute_data_folder() - mount_volumes = {} - - # Create temporary directory for parameters.json and map it to context_path - self._temp_dir = tempfile.mkdtemp() - mount_volumes[str(Path(self._temp_dir).resolve())] = { - "bind": f"{str(Path(self.context_path).resolve())}/", - "mode": "rw", - } - logger.info(f"Mounting {str(Path(self._temp_dir).resolve())} to {str(Path(self.context_path).resolve())}/") - - # Map the data folder to context_path/data_folder - if compute_data_folder: - path_to_data = Path(self.context_path) / self.data_folder - mount_volumes[str(Path(compute_data_folder).resolve())] = { - "bind": f"{str(path_to_data)}/", - "mode": "rw", - } - logger.info(f"Mounting {compute_data_folder} to {str(path_to_data)}/") - - return mount_volumes + ) def create_task(kwargs_for_init) -> BaseTaskType: @@ -473,17 +372,16 @@ def create_task(kwargs_for_init) -> BaseTaskType: Returns: tasks.BaseTaskType: The command object """ - command_type = kwargs_for_init.pop("command_type", defaults.COMMAND_TYPE) - - command_config = kwargs_for_init.pop("command_config", {}) - kwargs_for_init.update(command_config) + # The dictionary cannot be modified + kwargs = kwargs_for_init.copy() + command_type = kwargs.pop("command_type", defaults.COMMAND_TYPE) try: task_mgr = driver.DriverManager( namespace="tasks", name=command_type, invoke_on_load=True, - invoke_kwds=kwargs_for_init, + invoke_kwds=kwargs, ) return task_mgr.driver except Exception as _e: diff --git a/magnus/utils.py b/magnus/utils.py index 174ceeea..b00c212e 100644 --- a/magnus/utils.py +++ b/magnus/utils.py @@ -5,26 +5,26 @@ import logging import os import subprocess -import sys from collections import OrderedDict from datetime import datetime -from inspect import signature from pathlib import Path from string import Template as str_template -from types import FunctionType -from typing import TYPE_CHECKING, Callable, List, Mapping, Tuple, Union, cast +from typing import TYPE_CHECKING, Any, Dict, Mapping, Tuple, Union -from ruamel.yaml import YAML # type: ignore +from ruamel.yaml import YAML from stevedore import driver +import magnus.context as context from magnus import defaults, names +from magnus.defaults import TypeMapVariable -if TYPE_CHECKING: - from magnus.executor import BaseExecutor +if TYPE_CHECKING: # pragma: no cover + from magnus.extensions.nodes import TaskNode from magnus.nodes import BaseNode -logger = logging.getLogger(defaults.NAME) +logger = logging.getLogger(defaults.LOGGER_NAME) +logging.getLogger("stevedore").setLevel(logging.CRITICAL) def does_file_exist(file_path: str) -> bool: @@ -65,7 +65,7 @@ def safe_make_dir(directory: Union[str, Path]): Path(directory).mkdir(parents=True, exist_ok=True) -def generate_run_id(run_id: str = None) -> str: +def generate_run_id(run_id: str = "") -> str: """Generate a new run_id. If the input run_id is none, we create one based on time stamp. @@ -84,7 +84,7 @@ def generate_run_id(run_id: str = None) -> str: return run_id -def apply_variables(apply_to: dict, variables: dict) -> dict: +def apply_variables(apply_to: Dict[str, Any], variables: Dict[str, str]) -> Dict[str, Any]: """Safely applies the variables to a config. For example: For config: @@ -106,11 +106,11 @@ def apply_variables(apply_to: dict, variables: dict) -> dict: raise Exception("Argument Variables should be dict") json_d = json.dumps(apply_to) - transformed = str_template(json_d).safe_substitute(**variables) + transformed = str_template(json_d).substitute(**variables) return json.loads(transformed) -def get_module_and_func_names(command: str) -> Tuple[str, str]: +def get_module_and_attr_names(command: str) -> Tuple[str, str]: """Given a string of module.function, this functions returns the module name and func names. It also checks to make sure that the string is of expected 'module.func' format @@ -132,21 +132,7 @@ def get_module_and_func_names(command: str) -> Tuple[str, str]: return module, func -def get_module_and_func_from_function(command: FunctionType) -> str: - """Given a function, this function returns the module name and func names. - - Args: - command (FunctionType): The function to extract the module and func names from - - Returns: - str: the module and function in module.func format. - """ - module_name = sys.modules[command.__module__] - func_name = command.__name__ - return f"{module_name}.{func_name}" - - -def get_dag_hash(dag: dict) -> str: +def get_dag_hash(dag: Dict[str, Any]) -> str: """Generates the hash of the dag definition. Args: @@ -159,7 +145,7 @@ def get_dag_hash(dag: dict) -> str: return hashlib.sha1(dag_str.encode("utf-8")).hexdigest() -def load_yaml(file_path: str, load_type: str = "safe") -> dict: +def load_yaml(file_path: str, load_type: str = "safe") -> Dict[str, Any]: """Loads an yaml and returns the dictionary. Args: @@ -291,7 +277,7 @@ def get_local_docker_image_id(image_name: str) -> str: return "" -def get_git_code_identity(run_log_store): +def get_git_code_identity(): """Returns a code identity object for version controlled code. Args: @@ -300,7 +286,7 @@ def get_git_code_identity(run_log_store): Returns: magnus.datastore.CodeIdentity: The code identity used by the run log store. """ - code_identity = run_log_store.create_code_identity() + code_identity = context.run_context.run_log_store.create_code_identity() try: code_identity.code_identifier = get_current_code_commit() code_identity.code_identifier_type = "git" @@ -329,7 +315,7 @@ def remove_prefix(text: str, prefix: str) -> str: return text # or whatever is given -def get_tracked_data() -> dict: +def get_tracked_data() -> Dict[str, str]: """Scans the environment variables to find any user tracked variables that have a prefix MAGNUS_TRACK_ Removes the environment variable to prevent any clashes in the future steps. @@ -340,37 +326,19 @@ def get_tracked_data() -> dict: for env_var, value in os.environ.items(): if env_var.startswith(defaults.TRACK_PREFIX): key = remove_prefix(env_var, defaults.TRACK_PREFIX) - tracked_data[key.lower()] = json.loads(value) - del os.environ[env_var] - return tracked_data - - -def get_user_set_parameters(remove: bool = False) -> dict: - """Scans the environment variables for any user returned parameters that have a prefix MAGNUS_PRM_. - - Args: - remove (bool, optional): Flag to remove the parameter if needed. Defaults to False. - - Returns: - dict: The dictionary of found user returned parameters - """ - parameters = {} - for env_var, value in os.environ.items(): - if env_var.startswith(defaults.PARAMETER_PREFIX): - key = remove_prefix(env_var, defaults.PARAMETER_PREFIX) try: - parameters[key.lower()] = json.loads(value) + tracked_data[key.lower()] = json.loads(value) except json.decoder.JSONDecodeError: - logger.error(f"Parameter {key} could not be JSON decoded, adding the literal value") - parameters[key.lower()] = value + logger.warning(f"Tracker {key} could not be JSON decoded, adding the literal value") + tracked_data[key.lower()] = value - if remove: - del os.environ[env_var] - return parameters + del os.environ[env_var] + return tracked_data -def diff_dict(d1: dict, d2: dict) -> dict: - """Given two dicts d1 and d2, return a new dict that has upsert items from d1. +def diff_dict(d1: Dict[str, Any], d2: Dict[str, Any]) -> Dict[str, Any]: + """ + Given two dicts d1 and d2, return a new dict that has upsert items from d1. Args: d1 (reference): The reference dict. @@ -422,58 +390,9 @@ def get_data_hash(file_name: str): return hash_bytestr_iter(file_as_blockiter(open(file_name, "rb")), hashlib.sha256()) # pragma: no cover -def filter_arguments_for_func(func: Callable, parameters: dict, map_variable: dict = None) -> dict: - """Inspects the function to be called as part of the pipeline to find the arguments of the function. - Matches the function arguments to the parameters available either by command line or by up stream steps. - - Args: - func (Callable): The function to inspect - parameters (dict): The parameters available for the run - - Returns: - dict: The parameters matching the function signature - """ - sign = signature(func) - return filter_arguments_from_parameters( - parameters=parameters, - signature_parameters=sign.parameters, - map_variable=map_variable, - ) - - -def filter_arguments_from_parameters( - parameters: dict, - signature_parameters: Union[List, Mapping], - map_variable: dict = None, -) -> dict: - """Filters the given parameters based on the signature of the function. - - Args: - parameters (dict): All the parameters available for the run - signature_parameters (Union[List, Mapping]): The arguments of the function signature - map_variable (dict, optional): If the function is part of a map step. Defaults to None. - - Returns: - dict: The filtered parameters of the function. - """ - arguments = {} - - for param, value in parameters.items(): - if param in signature_parameters: - arguments[param] = value - - if map_variable: - for iterate_as, value in map_variable.items(): - if iterate_as in signature_parameters: - arguments[iterate_as] = value - - return arguments - - def get_node_execution_command( - executor: BaseExecutor, node: BaseNode, - map_variable: dict = None, + map_variable: TypeMapVariable = None, over_write_run_id: str = "", ) -> str: """A utility function to standardize execution call to a node via command line. @@ -486,7 +405,7 @@ def get_node_execution_command( Returns: str: The execution command to run a node via command line. """ - run_id = executor.run_id + run_id = context.run_context.run_id if over_write_run_id: run_id = over_write_run_id @@ -495,30 +414,29 @@ def get_node_execution_command( action = f"magnus execute_single_node {run_id} " f"{node._command_friendly_name()}" f" --log-level {log_level}" - if executor.pipeline_file: - action = action + f" --file {executor.pipeline_file}" + if context.run_context.pipeline_file: + action = action + f" --file {context.run_context.pipeline_file}" if map_variable: action = action + f" --map-variable '{json.dumps(map_variable)}'" - if executor.configuration_file: - action = action + f" --config-file {executor.configuration_file}" + if context.run_context.configuration_file: + action = action + f" --config-file {context.run_context.configuration_file}" - if executor.parameters_file: - action = action + f" --parameters-file {executor.parameters_file}" + if context.run_context.parameters_file: + action = action + f" --parameters-file {context.run_context.parameters_file}" - if executor.tag: - action = action + f" --tag {executor.tag}" + if context.run_context.tag: + action = action + f" --tag {context.run_context.tag}" return action def get_fan_command( - executor: BaseExecutor, mode: str, node: BaseNode, run_id: str, - map_variable: dict = None, + map_variable: TypeMapVariable = None, ) -> str: """ An utility function to return the fan "in or out" command @@ -538,25 +456,25 @@ def get_fan_command( f"magnus fan {run_id} " f"{node._command_friendly_name()} " f"--mode {mode} " - f"--file {executor.pipeline_file} " + f"--file {context.run_context.pipeline_file} " f"--log-level {log_level} " ) - if executor.configuration_file: - action = action + f" --config-file {executor.configuration_file} " + if context.run_context.configuration_file: + action = action + f" --config-file {context.run_context.configuration_file} " - if executor.parameters_file: - action = action + f" --parameters-file {executor.parameters_file}" + if context.run_context.parameters_file: + action = action + f" --parameters-file {context.run_context.parameters_file}" if map_variable: action = action + f" --map-variable '{json.dumps(map_variable)}'" - if executor.tag: - action = action + f" --tag {executor.tag}" + if context.run_context.tag: + action = action + f" --tag {context.run_context.tag}" return action -def get_job_execution_command(executor: BaseExecutor, node: BaseNode, over_write_run_id: str = "") -> str: +def get_job_execution_command(node: TaskNode, over_write_run_id: str = "") -> str: """Get the execution command to run a job via command line. This function should be used by all executors to submit jobs in remote environment @@ -569,40 +487,36 @@ def get_job_execution_command(executor: BaseExecutor, node: BaseNode, over_write Returns: str: The execution command to run a job via command line. """ - run_id = executor.run_id + + run_id = context.run_context.run_id if over_write_run_id: run_id = over_write_run_id log_level = logging.getLevelName(logger.getEffectiveLevel()) - action = f"magnus execute_nb_or_func {run_id} " f" --log-level {log_level}" - - action = action + f" {node.config.command}" + cli_command, cli_options = node.executable.get_cli_options() - if executor.configuration_file: - action = action + f" --config-file {executor.configuration_file}" + action = f"magnus execute_{cli_command} {run_id} " f" --log-level {log_level}" - if executor.parameters_file: - action = action + f" --parameters-file {executor.parameters_file}" + action = action + f" --entrypoint {defaults.ENTRYPOINT.SYSTEM.value}" - if executor.tag: - action = action + f" --tag {executor.tag}" + if context.run_context.configuration_file: + action = action + f" --config-file {context.run_context.configuration_file}" - catalog_config = node._get_catalog_settings() or {} + if context.run_context.parameters_file: + action = action + f" --parameters-file {context.run_context.parameters_file}" - data_folder = catalog_config.get("compute_data_folder", None) - if data_folder: - action = action + f" --data-folder {data_folder}" + if context.run_context.tag: + action = action + f" --tag {context.run_context.tag}" - put_in_catalog = catalog_config.get("put", []) or [] # The put itself can be None - for every_put in put_in_catalog: - action = action + f" --put-in-catalog {every_put}" + for key, value in cli_options.items(): + action = action + f" --{key} {value}" return action -def get_provider_by_name_and_type(service_type: str, service_details: dict): +def get_provider_by_name_and_type(service_type: str, service_details: defaults.ServiceConfig): """Given a service type, one of executor, run_log_store, catalog, secrets and the config return the exact child class implementing the service. We use stevedore to do the work for us. @@ -620,7 +534,7 @@ def get_provider_by_name_and_type(service_type: str, service_details: dict): namespace = service_type service_name = service_details["type"] - service_config = {} + service_config: Mapping[str, Any] = {} if "config" in service_details: service_config = service_details.get("config", {}) @@ -630,7 +544,7 @@ def get_provider_by_name_and_type(service_type: str, service_details: dict): namespace=namespace, name=service_name, invoke_on_load=True, - invoke_kwds={"config": service_config}, + invoke_kwds={**service_config}, ) return mgr.driver except Exception as _e: @@ -652,7 +566,7 @@ def get_duration_between_datetime_strings(start_time: str, end_time: str) -> str return str(end - start) -def get_run_config(executor: BaseExecutor) -> dict: +def get_run_config() -> dict: """Given an executor with assigned services, return the run_config. Args: @@ -661,41 +575,12 @@ def get_run_config(executor: BaseExecutor) -> dict: Returns: dict: The run_config. """ - from magnus.catalog import BaseCatalog - - run_config = {} - - run_config["executor"] = {"type": executor.service_name, "config": executor.config} - - run_config["run_log_store"] = { - "type": executor.run_log_store.service_name, - "config": executor.run_log_store.config, - } - - run_config["catalog"] = { - "type": cast(BaseCatalog, executor.catalog_handler).service_name, - "config": cast(BaseCatalog, executor.catalog_handler).config, - } - - run_config["secrets"] = { - "type": executor.secrets_handler.service_name, - "config": executor.secrets_handler.config, - } - - run_config["experiment_tracker"] = { - "type": executor.experiment_tracker.service_name, - "config": executor.experiment_tracker.config, - } - run_config["variables"] = executor.variables # type: ignore - - if executor.dag: - # Some executions do not define a dag - run_config["pipeline"] = executor.dag._to_dict() + run_config = context.run_context.model_dump(by_alias=True) return run_config -def json_to_ordered_dict(json_str: str) -> OrderedDict: +def json_to_ordered_dict(json_str: str) -> TypeMapVariable: """Decode a JSON str into OrderedDict. Args: @@ -710,7 +595,7 @@ def json_to_ordered_dict(json_str: str) -> OrderedDict: return OrderedDict() -def set_magnus_environment_variables(run_id: str = None, configuration_file: str = None, tag: str = None): +def set_magnus_environment_variables(run_id: str = "", configuration_file: str = "", tag: str = "") -> None: """Set the environment variables used by magnus. This function should be called during the prepare configurations by all executors. diff --git a/mkdocs.yml b/mkdocs.yml index a75f37f3..c587c87e 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -3,6 +3,8 @@ site_description: "Pipelines made easy" strict: true repo_url: https://github.com/AstraZeneca/magnus-core +# TODO: Set up versioning + docs_dir: "docs" theme: logo: assets/logo.png @@ -30,89 +32,122 @@ theme: icon: material/lightbulb name: Switch to light mode features: - - tabs + - content.code.copy + - content.code.annotate + - content.tabs.link + - header.autohide - navigation.top - toc.integrate + - toc.follow - search.suggest - navigation.tabs - navigation.tabs.sticky + - navigation.sections + - navigation.expand + - navigation.instant + - navigation.instant.progress + - navigation.tracking # Extensions markdown_extensions: - - pymdownx.snippets: - base_path: "." + - mkdocs-click - admonition - - codehilite + - def_list + - attr_list + - md_in_html + - pymdownx.details - footnotes - - pymdownx.highlight - - pymdownx.superfences + - tables + - pymdownx.tasklist: + custom_checkbox: true + - pymdownx.tabbed: + alternate_style: true + - pymdownx.snippets: + base_path: "." + dedent_subsections: true + - pymdownx.inlinehilite + - pymdownx.highlight: + anchor_linenums: true + line_spans: __span + pygments_lang_class: true + - pymdownx.superfences: + custom_fences: + - name: mermaid + class: mermaid + format: !!python/name:pymdownx.superfences.fence_code_format + - pymdownx.caret + - pymdownx.mark + - pymdownx.tilde - pymdownx.emoji: - emoji_index: !!python/name:materialx.emoji.twemoji - emoji_generator: !!python/name:materialx.emoji.to_svg + emoji_index: !!python/name:material.extensions.emoji.twemoji + emoji_generator: !!python/name:material.extensions.emoji.to_svg - toc: permalink: true separator: "_" toc_depth: 2 - - attr_list - - md_in_html plugins: - search - section-index + - mkdocstrings: + enabled: !ENV [ENABLE_MKDOCSTRINGS, true] + default_handler: python + handlers: + python: + options: + show_source: false + docstring_section_style: list + allow_inspection: false + show_signature_annotations: true + separate_signature: true + +# extra_javascript: +# - https://unpkg.com/mermaid@9.2/dist/mermaid.min.js + +extra_css: + - css/extra.css nav: - - "Home": "index.md" - - "Getting started": - - "getting_started/installation.md" - - "Example Run": "getting_started/example.md" - - "Explanation - input": "getting_started/brief-concepts-input.md" - - "Explanation - output": "getting_started/brief-concepts-output.md" - - "Example Deployment": "getting_started/example-deployment.md" - - "Wrap up": "getting_started/wrap-up.md" - - "Why magnus": "getting_started/why-magnus.md" - # - "Tutorial": - # - "Overview": "tutorial/overview.md" - # - "Single notebook": "tutorial/single-notebook.md" - # - "Modular notebooks": "tutorial/modular-notebooks.md" - # - "Switching configs": "tutorial/switching-configs.md" - # - "Using parallels": "tutorial/using-parallel.md" - # - "Dynamic looping": "tutorial/dynamic-looping.md" - # - "Modular pipelines": "tutorial/modular-dags.md" - # - "Script based": "tutorial/script-based.md" - # - "What next?": "tutorial/wrap-up.md" + - "Magnus": + - "Introduction": "index.md" + - "Usage": "usage.md" + - "Example": + - "Pipeline Definition": "example/example.md" + - "Steps": "example/steps.md" + - "Flow of data": "example/dataflow.md" + - "Reproducibility": "example/reproducibility.md" + - "Experiment tracking": "example/experiment-tracking.md" + - "Secrets": "example/secrets.md" + - "Retry failures": "example/retry-after-failure.md" + - "Why magnus?": "why-magnus.md" - "Concepts": - - "Node": "concepts/nodes.md" - - "Command types": "concepts/command-types.md" - - "Dag": "concepts/dag.md" - - "Catalog": - - "concepts/catalog.md" - - "Do Nothing Catalog": "concepts/catalog-implementations/do-nothing.md" - - "File System": "concepts/catalog-implementations/file-system.md" - - "Secrets": - - "concepts/secrets.md" - - "Dot Env": "concepts/secrets-implementations/dot-env.md" - - "Do Nothing": "concepts/secrets-implementations/do-nothing.md" - - "Env Secrets Manager": "concepts/secrets-implementations/env-secrets-manager.md" - - "Run Log": - - "concepts/run-log.md" - - "Buffered": "concepts/run-log-implementations/bufferred.md" - - "File System": "concepts/run-log-implementations/file-system.md" - - "Chunked File System": "concepts/run-log-implementations/chunked-fs.md" - - "Experiment Tracking": - - "concepts/experiment-tracking.md" - - "Integration": - - "concepts/integration.md" - - "Executors": - - "concepts/executor.md" - - "Local": "concepts/executor-implementations/local.md" - - "Local Container": "concepts/executor-implementations/local-container.md" - - "Demo Renderer": "concepts/executor-implementations/demo-renderer.md" - - "Command Line": - - "command-line.md" - - "How do I?": - - "how-do-i.md" - - "Examples": - - "examples.md" - - "Extensions": - - "extensions/extensions.md" - - "Releases": "RELEASES.md" + - "tl;dr": "concepts/the-big-picture.md" + - "Pipeline": "concepts/pipeline.md" + - "Executor": "concepts/executor.md" + - "Parameters": "concepts/parameters.md" + - "Run log": "concepts/run-log.md" + - "Catalog": "concepts/catalog.md" + - "Experiment tracking": "concepts/experiment-tracking.md" + - "Secrets": "concepts/secrets.md" + - "Nodes": + - "Stub": "concepts/stub.md" + - "Task": "concepts/task.md" + - "Parallel": "concepts/parallel.md" + - "Map": "concepts/map.md" + - "Nesting": "concepts/nesting.md" + - "Configurations": + - "Overview": "configurations/overview.md" + - "Executor": + - "local": "configurations/executors/local.md" + - "mocked": "configurations/executors/mocked.md" + - "local-container": "configurations/executors/local-container.md" + - "argo workflows": "configurations/executors/argo.md" + - "container environments": "configurations/executors/container-environments.md" + - "Run log": "configurations/run-log.md" + - "Catalog": "configurations/catalog.md" + - "Secrets": "configurations/secrets.md" + - "Experiment tracking": "configurations/experiment-tracking.md" + - "Python API": "interactions.md" + - "Python SDK": "sdk.md" + - "Extensions": "extensions.md" + - "Roadmap": "roadmap.md" diff --git a/poetry.lock b/poetry.lock index a1925cb2..a7c028eb 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,56 +1,114 @@ -# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. + +[[package]] +name = "altgraph" +version = "0.17.4" +description = "Python graph (network) package" +optional = false +python-versions = "*" +files = [ + {file = "altgraph-0.17.4-py2.py3-none-any.whl", hash = "sha256:642743b4750de17e655e6711601b077bc6598dbfa3ba5fa2b2a35ce12b508dff"}, + {file = "altgraph-0.17.4.tar.gz", hash = "sha256:1b5afbb98f6c4dcadb2e2ae6ab9fa994bbb8c1d75f4fa96d340f9437ae454406"}, +] + +[[package]] +name = "annotated-types" +version = "0.6.0" +description = "Reusable constraint types to use with typing.Annotated" +optional = false +python-versions = ">=3.8" +files = [ + {file = "annotated_types-0.6.0-py3-none-any.whl", hash = "sha256:0641064de18ba7a25dee8f96403ebc39113d0cb953a01429249d5c7564666a43"}, + {file = "annotated_types-0.6.0.tar.gz", hash = "sha256:563339e807e53ffd9c267e99fc6d9ea23eb8443c08f112651963e24e22f84a5d"}, +] + +[package.dependencies] +typing-extensions = {version = ">=4.0.0", markers = "python_version < \"3.9\""} [[package]] name = "appnope" version = "0.1.3" description = "Disable App Nap on macOS >= 10.9" -optional = true +optional = false python-versions = "*" files = [ {file = "appnope-0.1.3-py2.py3-none-any.whl", hash = "sha256:265a455292d0bd8a72453494fa24df5a11eb18373a60c7c0430889f22548605e"}, {file = "appnope-0.1.3.tar.gz", hash = "sha256:02bd91c4de869fbb1e1c50aafc4098827a7a54ab2f39d9dcba6c9547ed920e24"}, ] +[[package]] +name = "arrow" +version = "1.2.3" +description = "Better dates & times for Python" +optional = false +python-versions = ">=3.6" +files = [ + {file = "arrow-1.2.3-py3-none-any.whl", hash = "sha256:5a49ab92e3b7b71d96cd6bfcc4df14efefc9dfa96ea19045815914a6ab6b1fe2"}, + {file = "arrow-1.2.3.tar.gz", hash = "sha256:3934b30ca1b9f292376d9db15b19446088d12ec58629bc3f0da28fd55fb633a1"}, +] + +[package.dependencies] +python-dateutil = ">=2.7.0" + [[package]] name = "asttokens" -version = "2.2.1" +version = "2.4.1" description = "Annotate AST trees with source code positions" -optional = true +optional = false python-versions = "*" files = [ - {file = "asttokens-2.2.1-py2.py3-none-any.whl", hash = "sha256:6b0ac9e93fb0335014d382b8fa9b3afa7df546984258005da0b9e7095b3deb1c"}, - {file = "asttokens-2.2.1.tar.gz", hash = "sha256:4622110b2a6f30b77e1473affaa97e711bc2f07d3f10848420ff1898edbe94f3"}, + {file = "asttokens-2.4.1-py2.py3-none-any.whl", hash = "sha256:051ed49c3dcae8913ea7cd08e46a606dba30b79993209636c4875bc1d637bc24"}, + {file = "asttokens-2.4.1.tar.gz", hash = "sha256:b03869718ba9a6eb027e134bfdf69f38a236d681c83c160d510768af11254ba0"}, ] [package.dependencies] -six = "*" +six = ">=1.12.0" [package.extras] -test = ["astroid", "pytest"] +astroid = ["astroid (>=1,<2)", "astroid (>=2,<4)"] +test = ["astroid (>=1,<2)", "astroid (>=2,<4)", "pytest"] [[package]] name = "attrs" -version = "23.1.0" +version = "23.2.0" description = "Classes Without Boilerplate" -optional = true +optional = false python-versions = ">=3.7" files = [ - {file = "attrs-23.1.0-py3-none-any.whl", hash = "sha256:1f28b4522cdc2fb4256ac1a020c78acf9cba2c6b461ccd2c126f3aa8e8335d04"}, - {file = "attrs-23.1.0.tar.gz", hash = "sha256:6279836d581513a26f1bf235f9acd333bc9115683f14f7e8fae46c98fc50e015"}, + {file = "attrs-23.2.0-py3-none-any.whl", hash = "sha256:99b87a485a5820b23b879f04c2305b44b951b502fd64be915879d77a7e8fc6f1"}, + {file = "attrs-23.2.0.tar.gz", hash = "sha256:935dc3b529c262f6cf76e50877d35a4bd3c1de194fd41f47a2b7ae8f19971f30"}, ] [package.extras] cov = ["attrs[tests]", "coverage[toml] (>=5.3)"] -dev = ["attrs[docs,tests]", "pre-commit"] +dev = ["attrs[tests]", "pre-commit"] docs = ["furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier", "zope-interface"] tests = ["attrs[tests-no-zope]", "zope-interface"] -tests-no-zope = ["cloudpickle", "hypothesis", "mypy (>=1.1.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] +tests-mypy = ["mypy (>=1.6)", "pytest-mypy-plugins"] +tests-no-zope = ["attrs[tests-mypy]", "cloudpickle", "hypothesis", "pympler", "pytest (>=4.3.0)", "pytest-xdist[psutil]"] + +[[package]] +name = "babel" +version = "2.14.0" +description = "Internationalization utilities" +optional = false +python-versions = ">=3.7" +files = [ + {file = "Babel-2.14.0-py3-none-any.whl", hash = "sha256:efb1a25b7118e67ce3a259bed20545c29cb68be8ad2c784c83689981b7a57287"}, + {file = "Babel-2.14.0.tar.gz", hash = "sha256:6919867db036398ba21eb5c7a0f6b28ab8cbc3ae7a73a44ebe34ae74a4e7d363"}, +] + +[package.dependencies] +pytz = {version = ">=2015.7", markers = "python_version < \"3.9\""} + +[package.extras] +dev = ["freezegun (>=1.0,<2.0)", "pytest (>=6.0)", "pytest-cov"] [[package]] name = "backcall" version = "0.2.0" description = "Specifications for callback functions passed in to an API" -optional = true +optional = false python-versions = "*" files = [ {file = "backcall-0.2.0-py2.py3-none-any.whl", hash = "sha256:fbbce6a29f263178a1f7915c1940bde0ec2b2a967566fe1c65c1dfb7422bd255"}, @@ -61,45 +119,63 @@ files = [ name = "backoff" version = "2.2.1" description = "Function decoration for backoff and retry" -optional = true +optional = false python-versions = ">=3.7,<4.0" files = [ {file = "backoff-2.2.1-py3-none-any.whl", hash = "sha256:63579f9a0628e06278f7e47b7d7d5b6ce20dc65c5e96a6f3ca99a6adca0396e8"}, {file = "backoff-2.2.1.tar.gz", hash = "sha256:03f829f5bb1923180821643f8753b0502c3b682293992485b0eef2807afa5cba"}, ] +[[package]] +name = "beautifulsoup4" +version = "4.12.3" +description = "Screen-scraping library" +optional = false +python-versions = ">=3.6.0" +files = [ + {file = "beautifulsoup4-4.12.3-py3-none-any.whl", hash = "sha256:b80878c9f40111313e55da8ba20bdba06d8fa3969fc68304167741bbf9e082ed"}, + {file = "beautifulsoup4-4.12.3.tar.gz", hash = "sha256:74e3d1928edc070d21748185c46e3fb33490f22f52a3addee9aee0f4f7781051"}, +] + +[package.dependencies] +soupsieve = ">1.2" + +[package.extras] +cchardet = ["cchardet"] +chardet = ["chardet"] +charset-normalizer = ["charset-normalizer"] +html5lib = ["html5lib"] +lxml = ["lxml"] + [[package]] name = "black" -version = "23.3.0" +version = "23.12.1" description = "The uncompromising code formatter." optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "black-23.3.0-cp310-cp310-macosx_10_16_arm64.whl", hash = "sha256:0945e13506be58bf7db93ee5853243eb368ace1c08a24c65ce108986eac65915"}, - {file = "black-23.3.0-cp310-cp310-macosx_10_16_universal2.whl", hash = "sha256:67de8d0c209eb5b330cce2469503de11bca4085880d62f1628bd9972cc3366b9"}, - {file = "black-23.3.0-cp310-cp310-macosx_10_16_x86_64.whl", hash = "sha256:7c3eb7cea23904399866c55826b31c1f55bbcd3890ce22ff70466b907b6775c2"}, - {file = "black-23.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:32daa9783106c28815d05b724238e30718f34155653d4d6e125dc7daec8e260c"}, - {file = "black-23.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:35d1381d7a22cc5b2be2f72c7dfdae4072a3336060635718cc7e1ede24221d6c"}, - {file = "black-23.3.0-cp311-cp311-macosx_10_16_arm64.whl", hash = "sha256:a8a968125d0a6a404842fa1bf0b349a568634f856aa08ffaff40ae0dfa52e7c6"}, - {file = "black-23.3.0-cp311-cp311-macosx_10_16_universal2.whl", hash = "sha256:c7ab5790333c448903c4b721b59c0d80b11fe5e9803d8703e84dcb8da56fec1b"}, - {file = "black-23.3.0-cp311-cp311-macosx_10_16_x86_64.whl", hash = "sha256:a6f6886c9869d4daae2d1715ce34a19bbc4b95006d20ed785ca00fa03cba312d"}, - {file = "black-23.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6f3c333ea1dd6771b2d3777482429864f8e258899f6ff05826c3a4fcc5ce3f70"}, - {file = "black-23.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:11c410f71b876f961d1de77b9699ad19f939094c3a677323f43d7a29855fe326"}, - {file = "black-23.3.0-cp37-cp37m-macosx_10_16_x86_64.whl", hash = "sha256:1d06691f1eb8de91cd1b322f21e3bfc9efe0c7ca1f0e1eb1db44ea367dff656b"}, - {file = "black-23.3.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:50cb33cac881766a5cd9913e10ff75b1e8eb71babf4c7104f2e9c52da1fb7de2"}, - {file = "black-23.3.0-cp37-cp37m-win_amd64.whl", hash = "sha256:e114420bf26b90d4b9daa597351337762b63039752bdf72bf361364c1aa05925"}, - {file = "black-23.3.0-cp38-cp38-macosx_10_16_arm64.whl", hash = "sha256:48f9d345675bb7fbc3dd85821b12487e1b9a75242028adad0333ce36ed2a6d27"}, - {file = "black-23.3.0-cp38-cp38-macosx_10_16_universal2.whl", hash = "sha256:714290490c18fb0126baa0fca0a54ee795f7502b44177e1ce7624ba1c00f2331"}, - {file = "black-23.3.0-cp38-cp38-macosx_10_16_x86_64.whl", hash = "sha256:064101748afa12ad2291c2b91c960be28b817c0c7eaa35bec09cc63aa56493c5"}, - {file = "black-23.3.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:562bd3a70495facf56814293149e51aa1be9931567474993c7942ff7d3533961"}, - {file = "black-23.3.0-cp38-cp38-win_amd64.whl", hash = "sha256:e198cf27888ad6f4ff331ca1c48ffc038848ea9f031a3b40ba36aced7e22f2c8"}, - {file = "black-23.3.0-cp39-cp39-macosx_10_16_arm64.whl", hash = "sha256:3238f2aacf827d18d26db07524e44741233ae09a584273aa059066d644ca7b30"}, - {file = "black-23.3.0-cp39-cp39-macosx_10_16_universal2.whl", hash = "sha256:f0bd2f4a58d6666500542b26354978218a9babcdc972722f4bf90779524515f3"}, - {file = "black-23.3.0-cp39-cp39-macosx_10_16_x86_64.whl", hash = "sha256:92c543f6854c28a3c7f39f4d9b7694f9a6eb9d3c5e2ece488c327b6e7ea9b266"}, - {file = "black-23.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a150542a204124ed00683f0db1f5cf1c2aaaa9cc3495b7a3b5976fb136090ab"}, - {file = "black-23.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:6b39abdfb402002b8a7d030ccc85cf5afff64ee90fa4c5aebc531e3ad0175ddb"}, - {file = "black-23.3.0-py3-none-any.whl", hash = "sha256:ec751418022185b0c1bb7d7736e6933d40bbb14c14a0abcf9123d1b159f98dd4"}, - {file = "black-23.3.0.tar.gz", hash = "sha256:1c7b8d606e728a41ea1ccbd7264677e494e87cf630e399262ced92d4a8dac940"}, + {file = "black-23.12.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e0aaf6041986767a5e0ce663c7a2f0e9eaf21e6ff87a5f95cbf3675bfd4c41d2"}, + {file = "black-23.12.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c88b3711d12905b74206227109272673edce0cb29f27e1385f33b0163c414bba"}, + {file = "black-23.12.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a920b569dc6b3472513ba6ddea21f440d4b4c699494d2e972a1753cdc25df7b0"}, + {file = "black-23.12.1-cp310-cp310-win_amd64.whl", hash = "sha256:3fa4be75ef2a6b96ea8d92b1587dd8cb3a35c7e3d51f0738ced0781c3aa3a5a3"}, + {file = "black-23.12.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:8d4df77958a622f9b5a4c96edb4b8c0034f8434032ab11077ec6c56ae9f384ba"}, + {file = "black-23.12.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:602cfb1196dc692424c70b6507593a2b29aac0547c1be9a1d1365f0d964c353b"}, + {file = "black-23.12.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9c4352800f14be5b4864016882cdba10755bd50805c95f728011bcb47a4afd59"}, + {file = "black-23.12.1-cp311-cp311-win_amd64.whl", hash = "sha256:0808494f2b2df923ffc5723ed3c7b096bd76341f6213989759287611e9837d50"}, + {file = "black-23.12.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:25e57fd232a6d6ff3f4478a6fd0580838e47c93c83eaf1ccc92d4faf27112c4e"}, + {file = "black-23.12.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2d9e13db441c509a3763a7a3d9a49ccc1b4e974a47be4e08ade2a228876500ec"}, + {file = "black-23.12.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d1bd9c210f8b109b1762ec9fd36592fdd528485aadb3f5849b2740ef17e674e"}, + {file = "black-23.12.1-cp312-cp312-win_amd64.whl", hash = "sha256:ae76c22bde5cbb6bfd211ec343ded2163bba7883c7bc77f6b756a1049436fbb9"}, + {file = "black-23.12.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1fa88a0f74e50e4487477bc0bb900c6781dbddfdfa32691e780bf854c3b4a47f"}, + {file = "black-23.12.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:a4d6a9668e45ad99d2f8ec70d5c8c04ef4f32f648ef39048d010b0689832ec6d"}, + {file = "black-23.12.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b18fb2ae6c4bb63eebe5be6bd869ba2f14fd0259bda7d18a46b764d8fb86298a"}, + {file = "black-23.12.1-cp38-cp38-win_amd64.whl", hash = "sha256:c04b6d9d20e9c13f43eee8ea87d44156b8505ca8a3c878773f68b4e4812a421e"}, + {file = "black-23.12.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3e1b38b3135fd4c025c28c55ddfc236b05af657828a8a6abe5deec419a0b7055"}, + {file = "black-23.12.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4f0031eaa7b921db76decd73636ef3a12c942ed367d8c3841a0739412b260a54"}, + {file = "black-23.12.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:97e56155c6b737854e60a9ab1c598ff2533d57e7506d97af5481141671abf3ea"}, + {file = "black-23.12.1-cp39-cp39-win_amd64.whl", hash = "sha256:dd15245c8b68fe2b6bd0f32c1556509d11bb33aec9b5d0866dd8e2ed3dba09c2"}, + {file = "black-23.12.1-py3-none-any.whl", hash = "sha256:78baad24af0f033958cad29731e27363183e140962595def56423e626f4bee3e"}, + {file = "black-23.12.1.tar.gz", hash = "sha256:4ce3ef14ebe8d9509188014d96af1c456a910d5b5cbf434a09fef7e024b3d0d5"}, ] [package.dependencies] @@ -109,24 +185,23 @@ packaging = ">=22.0" pathspec = ">=0.9.0" platformdirs = ">=2" tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} -typed-ast = {version = ">=1.4.2", markers = "python_version < \"3.8\" and implementation_name == \"cpython\""} -typing-extensions = {version = ">=3.10.0.0", markers = "python_version < \"3.10\""} +typing-extensions = {version = ">=4.0.1", markers = "python_version < \"3.11\""} [package.extras] colorama = ["colorama (>=0.4.3)"] -d = ["aiohttp (>=3.7.4)"] +d = ["aiohttp (>=3.7.4)", "aiohttp (>=3.7.4,!=3.9.0)"] jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"] uvloop = ["uvloop (>=0.15.2)"] [[package]] name = "bleach" -version = "6.0.0" +version = "6.1.0" description = "An easy safelist-based HTML-sanitizing tool." optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "bleach-6.0.0-py3-none-any.whl", hash = "sha256:33c16e3353dbd13028ab4799a0f89a83f113405c766e9c122df8a06f5b85b3f4"}, - {file = "bleach-6.0.0.tar.gz", hash = "sha256:1a1a85c1595e07d8db14c5f09f09e6433502c51c595970edc090551f0db99414"}, + {file = "bleach-6.1.0-py3-none-any.whl", hash = "sha256:3225f354cfc436b9789c66c4ee030194bee0568fbf9cbdad3bc8b5c26c5f12b6"}, + {file = "bleach-6.1.0.tar.gz", hash = "sha256:0a31f1837963c41d46bbf1331b8778e1308ea0791db03cc4e7357b97cf42a8fe"}, ] [package.dependencies] @@ -134,90 +209,78 @@ six = ">=1.9.0" webencodings = "*" [package.extras] -css = ["tinycss2 (>=1.1.0,<1.2)"] +css = ["tinycss2 (>=1.1.0,<1.3)"] [[package]] name = "certifi" -version = "2023.7.22" +version = "2023.11.17" description = "Python package for providing Mozilla's CA Bundle." optional = false python-versions = ">=3.6" files = [ - {file = "certifi-2023.7.22-py3-none-any.whl", hash = "sha256:92d6037539857d8206b8f6ae472e8b77db8058fec5937a1ef3f54304089edbb9"}, - {file = "certifi-2023.7.22.tar.gz", hash = "sha256:539cc1d13202e33ca466e88b2807e29f4c13049d6d87031a3c110744495cb082"}, + {file = "certifi-2023.11.17-py3-none-any.whl", hash = "sha256:e036ab49d5b79556f99cfc2d9320b34cfbe5be05c5871b51de9329f0603b0474"}, + {file = "certifi-2023.11.17.tar.gz", hash = "sha256:9b469f3a900bf28dc19b8cfbf8019bf47f7fdd1a65a1d4ffb98fc14166beb4d1"}, ] [[package]] name = "cffi" -version = "1.15.1" +version = "1.16.0" description = "Foreign Function Interface for Python calling C code." optional = false -python-versions = "*" +python-versions = ">=3.8" files = [ - {file = "cffi-1.15.1-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:a66d3508133af6e8548451b25058d5812812ec3798c886bf38ed24a98216fab2"}, - {file = "cffi-1.15.1-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:470c103ae716238bbe698d67ad020e1db9d9dba34fa5a899b5e21577e6d52ed2"}, - {file = "cffi-1.15.1-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:9ad5db27f9cabae298d151c85cf2bad1d359a1b9c686a275df03385758e2f914"}, - {file = "cffi-1.15.1-cp27-cp27m-win32.whl", hash = "sha256:b3bbeb01c2b273cca1e1e0c5df57f12dce9a4dd331b4fa1635b8bec26350bde3"}, - {file = "cffi-1.15.1-cp27-cp27m-win_amd64.whl", hash = "sha256:e00b098126fd45523dd056d2efba6c5a63b71ffe9f2bbe1a4fe1716e1d0c331e"}, - {file = "cffi-1.15.1-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:d61f4695e6c866a23a21acab0509af1cdfd2c013cf256bbf5b6b5e2695827162"}, - {file = "cffi-1.15.1-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:ed9cb427ba5504c1dc15ede7d516b84757c3e3d7868ccc85121d9310d27eed0b"}, - {file = "cffi-1.15.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:39d39875251ca8f612b6f33e6b1195af86d1b3e60086068be9cc053aa4376e21"}, - {file = "cffi-1.15.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:285d29981935eb726a4399badae8f0ffdff4f5050eaa6d0cfc3f64b857b77185"}, - {file = "cffi-1.15.1-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3eb6971dcff08619f8d91607cfc726518b6fa2a9eba42856be181c6d0d9515fd"}, - {file = "cffi-1.15.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:21157295583fe8943475029ed5abdcf71eb3911894724e360acff1d61c1d54bc"}, - {file = "cffi-1.15.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5635bd9cb9731e6d4a1132a498dd34f764034a8ce60cef4f5319c0541159392f"}, - {file = "cffi-1.15.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2012c72d854c2d03e45d06ae57f40d78e5770d252f195b93f581acf3ba44496e"}, - {file = "cffi-1.15.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd86c085fae2efd48ac91dd7ccffcfc0571387fe1193d33b6394db7ef31fe2a4"}, - {file = "cffi-1.15.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:fa6693661a4c91757f4412306191b6dc88c1703f780c8234035eac011922bc01"}, - {file = "cffi-1.15.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:59c0b02d0a6c384d453fece7566d1c7e6b7bae4fc5874ef2ef46d56776d61c9e"}, - {file = "cffi-1.15.1-cp310-cp310-win32.whl", hash = "sha256:cba9d6b9a7d64d4bd46167096fc9d2f835e25d7e4c121fb2ddfc6528fb0413b2"}, - {file = "cffi-1.15.1-cp310-cp310-win_amd64.whl", hash = "sha256:ce4bcc037df4fc5e3d184794f27bdaab018943698f4ca31630bc7f84a7b69c6d"}, - {file = "cffi-1.15.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3d08afd128ddaa624a48cf2b859afef385b720bb4b43df214f85616922e6a5ac"}, - {file = "cffi-1.15.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3799aecf2e17cf585d977b780ce79ff0dc9b78d799fc694221ce814c2c19db83"}, - {file = "cffi-1.15.1-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a591fe9e525846e4d154205572a029f653ada1a78b93697f3b5a8f1f2bc055b9"}, - {file = "cffi-1.15.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3548db281cd7d2561c9ad9984681c95f7b0e38881201e157833a2342c30d5e8c"}, - {file = "cffi-1.15.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:91fc98adde3d7881af9b59ed0294046f3806221863722ba7d8d120c575314325"}, - {file = "cffi-1.15.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:94411f22c3985acaec6f83c6df553f2dbe17b698cc7f8ae751ff2237d96b9e3c"}, - {file = "cffi-1.15.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:03425bdae262c76aad70202debd780501fabeaca237cdfddc008987c0e0f59ef"}, - {file = "cffi-1.15.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:cc4d65aeeaa04136a12677d3dd0b1c0c94dc43abac5860ab33cceb42b801c1e8"}, - {file = "cffi-1.15.1-cp311-cp311-win32.whl", hash = "sha256:a0f100c8912c114ff53e1202d0078b425bee3649ae34d7b070e9697f93c5d52d"}, - {file = "cffi-1.15.1-cp311-cp311-win_amd64.whl", hash = "sha256:04ed324bda3cda42b9b695d51bb7d54b680b9719cfab04227cdd1e04e5de3104"}, - {file = "cffi-1.15.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50a74364d85fd319352182ef59c5c790484a336f6db772c1a9231f1c3ed0cbd7"}, - {file = "cffi-1.15.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e263d77ee3dd201c3a142934a086a4450861778baaeeb45db4591ef65550b0a6"}, - {file = "cffi-1.15.1-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cec7d9412a9102bdc577382c3929b337320c4c4c4849f2c5cdd14d7368c5562d"}, - {file = "cffi-1.15.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4289fc34b2f5316fbb762d75362931e351941fa95fa18789191b33fc4cf9504a"}, - {file = "cffi-1.15.1-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:173379135477dc8cac4bc58f45db08ab45d228b3363adb7af79436135d028405"}, - {file = "cffi-1.15.1-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:6975a3fac6bc83c4a65c9f9fcab9e47019a11d3d2cf7f3c0d03431bf145a941e"}, - {file = "cffi-1.15.1-cp36-cp36m-win32.whl", hash = "sha256:2470043b93ff09bf8fb1d46d1cb756ce6132c54826661a32d4e4d132e1977adf"}, - {file = "cffi-1.15.1-cp36-cp36m-win_amd64.whl", hash = "sha256:30d78fbc8ebf9c92c9b7823ee18eb92f2e6ef79b45ac84db507f52fbe3ec4497"}, - {file = "cffi-1.15.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:198caafb44239b60e252492445da556afafc7d1e3ab7a1fb3f0584ef6d742375"}, - {file = "cffi-1.15.1-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5ef34d190326c3b1f822a5b7a45f6c4535e2f47ed06fec77d3d799c450b2651e"}, - {file = "cffi-1.15.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8102eaf27e1e448db915d08afa8b41d6c7ca7a04b7d73af6514df10a3e74bd82"}, - {file = "cffi-1.15.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5df2768244d19ab7f60546d0c7c63ce1581f7af8b5de3eb3004b9b6fc8a9f84b"}, - {file = "cffi-1.15.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a8c4917bd7ad33e8eb21e9a5bbba979b49d9a97acb3a803092cbc1133e20343c"}, - {file = "cffi-1.15.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0e2642fe3142e4cc4af0799748233ad6da94c62a8bec3a6648bf8ee68b1c7426"}, - {file = "cffi-1.15.1-cp37-cp37m-win32.whl", hash = "sha256:e229a521186c75c8ad9490854fd8bbdd9a0c9aa3a524326b55be83b54d4e0ad9"}, - {file = "cffi-1.15.1-cp37-cp37m-win_amd64.whl", hash = "sha256:a0b71b1b8fbf2b96e41c4d990244165e2c9be83d54962a9a1d118fd8657d2045"}, - {file = "cffi-1.15.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:320dab6e7cb2eacdf0e658569d2575c4dad258c0fcc794f46215e1e39f90f2c3"}, - {file = "cffi-1.15.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1e74c6b51a9ed6589199c787bf5f9875612ca4a8a0785fb2d4a84429badaf22a"}, - {file = "cffi-1.15.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a5c84c68147988265e60416b57fc83425a78058853509c1b0629c180094904a5"}, - {file = "cffi-1.15.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3b926aa83d1edb5aa5b427b4053dc420ec295a08e40911296b9eb1b6170f6cca"}, - {file = "cffi-1.15.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:87c450779d0914f2861b8526e035c5e6da0a3199d8f1add1a665e1cbc6fc6d02"}, - {file = "cffi-1.15.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4f2c9f67e9821cad2e5f480bc8d83b8742896f1242dba247911072d4fa94c192"}, - {file = "cffi-1.15.1-cp38-cp38-win32.whl", hash = "sha256:8b7ee99e510d7b66cdb6c593f21c043c248537a32e0bedf02e01e9553a172314"}, - {file = "cffi-1.15.1-cp38-cp38-win_amd64.whl", hash = "sha256:00a9ed42e88df81ffae7a8ab6d9356b371399b91dbdf0c3cb1e84c03a13aceb5"}, - {file = "cffi-1.15.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:54a2db7b78338edd780e7ef7f9f6c442500fb0d41a5a4ea24fff1c929d5af585"}, - {file = "cffi-1.15.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:fcd131dd944808b5bdb38e6f5b53013c5aa4f334c5cad0c72742f6eba4b73db0"}, - {file = "cffi-1.15.1-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7473e861101c9e72452f9bf8acb984947aa1661a7704553a9f6e4baa5ba64415"}, - {file = "cffi-1.15.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6c9a799e985904922a4d207a94eae35c78ebae90e128f0c4e521ce339396be9d"}, - {file = "cffi-1.15.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3bcde07039e586f91b45c88f8583ea7cf7a0770df3a1649627bf598332cb6984"}, - {file = "cffi-1.15.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:33ab79603146aace82c2427da5ca6e58f2b3f2fb5da893ceac0c42218a40be35"}, - {file = "cffi-1.15.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5d598b938678ebf3c67377cdd45e09d431369c3b1a5b331058c338e201f12b27"}, - {file = "cffi-1.15.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:db0fbb9c62743ce59a9ff687eb5f4afbe77e5e8403d6697f7446e5f609976f76"}, - {file = "cffi-1.15.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:98d85c6a2bef81588d9227dde12db8a7f47f639f4a17c9ae08e773aa9c697bf3"}, - {file = "cffi-1.15.1-cp39-cp39-win32.whl", hash = "sha256:40f4774f5a9d4f5e344f31a32b5096977b5d48560c5592e2f3d2c4374bd543ee"}, - {file = "cffi-1.15.1-cp39-cp39-win_amd64.whl", hash = "sha256:70df4e3b545a17496c9b3f41f5115e69a4f2e77e94e1d2a8e1070bc0c38c8a3c"}, - {file = "cffi-1.15.1.tar.gz", hash = "sha256:d400bfb9a37b1351253cb402671cea7e89bdecc294e8016a707f6d1d8ac934f9"}, + {file = "cffi-1.16.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6b3d6606d369fc1da4fd8c357d026317fbb9c9b75d36dc16e90e84c26854b088"}, + {file = "cffi-1.16.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ac0f5edd2360eea2f1daa9e26a41db02dd4b0451b48f7c318e217ee092a213e9"}, + {file = "cffi-1.16.0-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7e61e3e4fa664a8588aa25c883eab612a188c725755afff6289454d6362b9673"}, + {file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a72e8961a86d19bdb45851d8f1f08b041ea37d2bd8d4fd19903bc3083d80c896"}, + {file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5b50bf3f55561dac5438f8e70bfcdfd74543fd60df5fa5f62d94e5867deca684"}, + {file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7651c50c8c5ef7bdb41108b7b8c5a83013bfaa8a935590c5d74627c047a583c7"}, + {file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e4108df7fe9b707191e55f33efbcb2d81928e10cea45527879a4749cbe472614"}, + {file = "cffi-1.16.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:32c68ef735dbe5857c810328cb2481e24722a59a2003018885514d4c09af9743"}, + {file = "cffi-1.16.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:673739cb539f8cdaa07d92d02efa93c9ccf87e345b9a0b556e3ecc666718468d"}, + {file = "cffi-1.16.0-cp310-cp310-win32.whl", hash = "sha256:9f90389693731ff1f659e55c7d1640e2ec43ff725cc61b04b2f9c6d8d017df6a"}, + {file = "cffi-1.16.0-cp310-cp310-win_amd64.whl", hash = "sha256:e6024675e67af929088fda399b2094574609396b1decb609c55fa58b028a32a1"}, + {file = "cffi-1.16.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b84834d0cf97e7d27dd5b7f3aca7b6e9263c56308ab9dc8aae9784abb774d404"}, + {file = "cffi-1.16.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1b8ebc27c014c59692bb2664c7d13ce7a6e9a629be20e54e7271fa696ff2b417"}, + {file = "cffi-1.16.0-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ee07e47c12890ef248766a6e55bd38ebfb2bb8edd4142d56db91b21ea68b7627"}, + {file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8a9d3ebe49f084ad71f9269834ceccbf398253c9fac910c4fd7053ff1386936"}, + {file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e70f54f1796669ef691ca07d046cd81a29cb4deb1e5f942003f401c0c4a2695d"}, + {file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5bf44d66cdf9e893637896c7faa22298baebcd18d1ddb6d2626a6e39793a1d56"}, + {file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7b78010e7b97fef4bee1e896df8a4bbb6712b7f05b7ef630f9d1da00f6444d2e"}, + {file = "cffi-1.16.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:c6a164aa47843fb1b01e941d385aab7215563bb8816d80ff3a363a9f8448a8dc"}, + {file = "cffi-1.16.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e09f3ff613345df5e8c3667da1d918f9149bd623cd9070c983c013792a9a62eb"}, + {file = "cffi-1.16.0-cp311-cp311-win32.whl", hash = "sha256:2c56b361916f390cd758a57f2e16233eb4f64bcbeee88a4881ea90fca14dc6ab"}, + {file = "cffi-1.16.0-cp311-cp311-win_amd64.whl", hash = "sha256:db8e577c19c0fda0beb7e0d4e09e0ba74b1e4c092e0e40bfa12fe05b6f6d75ba"}, + {file = "cffi-1.16.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:fa3a0128b152627161ce47201262d3140edb5a5c3da88d73a1b790a959126956"}, + {file = "cffi-1.16.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:68e7c44931cc171c54ccb702482e9fc723192e88d25a0e133edd7aff8fcd1f6e"}, + {file = "cffi-1.16.0-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:abd808f9c129ba2beda4cfc53bde801e5bcf9d6e0f22f095e45327c038bfe68e"}, + {file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:88e2b3c14bdb32e440be531ade29d3c50a1a59cd4e51b1dd8b0865c54ea5d2e2"}, + {file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fcc8eb6d5902bb1cf6dc4f187ee3ea80a1eba0a89aba40a5cb20a5087d961357"}, + {file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b7be2d771cdba2942e13215c4e340bfd76398e9227ad10402a8767ab1865d2e6"}, + {file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e715596e683d2ce000574bae5d07bd522c781a822866c20495e52520564f0969"}, + {file = "cffi-1.16.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:2d92b25dbf6cae33f65005baf472d2c245c050b1ce709cc4588cdcdd5495b520"}, + {file = "cffi-1.16.0-cp312-cp312-win32.whl", hash = "sha256:b2ca4e77f9f47c55c194982e10f058db063937845bb2b7a86c84a6cfe0aefa8b"}, + {file = "cffi-1.16.0-cp312-cp312-win_amd64.whl", hash = "sha256:68678abf380b42ce21a5f2abde8efee05c114c2fdb2e9eef2efdb0257fba1235"}, + {file = "cffi-1.16.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0c9ef6ff37e974b73c25eecc13952c55bceed9112be2d9d938ded8e856138bcc"}, + {file = "cffi-1.16.0-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a09582f178759ee8128d9270cd1344154fd473bb77d94ce0aeb2a93ebf0feaf0"}, + {file = "cffi-1.16.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e760191dd42581e023a68b758769e2da259b5d52e3103c6060ddc02c9edb8d7b"}, + {file = "cffi-1.16.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:80876338e19c951fdfed6198e70bc88f1c9758b94578d5a7c4c91a87af3cf31c"}, + {file = "cffi-1.16.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a6a14b17d7e17fa0d207ac08642c8820f84f25ce17a442fd15e27ea18d67c59b"}, + {file = "cffi-1.16.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6602bc8dc6f3a9e02b6c22c4fc1e47aa50f8f8e6d3f78a5e16ac33ef5fefa324"}, + {file = "cffi-1.16.0-cp38-cp38-win32.whl", hash = "sha256:131fd094d1065b19540c3d72594260f118b231090295d8c34e19a7bbcf2e860a"}, + {file = "cffi-1.16.0-cp38-cp38-win_amd64.whl", hash = "sha256:31d13b0f99e0836b7ff893d37af07366ebc90b678b6664c955b54561fc36ef36"}, + {file = "cffi-1.16.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:582215a0e9adbe0e379761260553ba11c58943e4bbe9c36430c4ca6ac74b15ed"}, + {file = "cffi-1.16.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b29ebffcf550f9da55bec9e02ad430c992a87e5f512cd63388abb76f1036d8d2"}, + {file = "cffi-1.16.0-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dc9b18bf40cc75f66f40a7379f6a9513244fe33c0e8aa72e2d56b0196a7ef872"}, + {file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9cb4a35b3642fc5c005a6755a5d17c6c8b6bcb6981baf81cea8bfbc8903e8ba8"}, + {file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b86851a328eedc692acf81fb05444bdf1891747c25af7529e39ddafaf68a4f3f"}, + {file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c0f31130ebc2d37cdd8e44605fb5fa7ad59049298b3f745c74fa74c62fbfcfc4"}, + {file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f8e709127c6c77446a8c0a8c8bf3c8ee706a06cd44b1e827c3e6a2ee6b8c098"}, + {file = "cffi-1.16.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:748dcd1e3d3d7cd5443ef03ce8685043294ad6bd7c02a38d1bd367cfd968e000"}, + {file = "cffi-1.16.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8895613bcc094d4a1b2dbe179d88d7fb4a15cee43c052e8885783fac397d91fe"}, + {file = "cffi-1.16.0-cp39-cp39-win32.whl", hash = "sha256:ed86a35631f7bfbb28e108dd96773b9d5a6ce4811cf6ea468bb6a359b256b1e4"}, + {file = "cffi-1.16.0-cp39-cp39-win_amd64.whl", hash = "sha256:3686dffb02459559c74dd3d81748269ffb0eb027c39a6fc99502de37d501faa8"}, + {file = "cffi-1.16.0.tar.gz", hash = "sha256:bcb3ef43e58665bbda2fb198698fcae6776483e0c4a631aa5647806c25e02cc0"}, ] [package.dependencies] @@ -225,127 +288,127 @@ pycparser = "*" [[package]] name = "cfgv" -version = "3.3.1" +version = "3.4.0" description = "Validate configuration and produce human readable error messages." optional = false -python-versions = ">=3.6.1" +python-versions = ">=3.8" files = [ - {file = "cfgv-3.3.1-py2.py3-none-any.whl", hash = "sha256:c6a0883f3917a037485059700b9e75da2464e6c27051014ad85ba6aaa5884426"}, - {file = "cfgv-3.3.1.tar.gz", hash = "sha256:f5a830efb9ce7a445376bb66ec94c638a9787422f96264c98edc6bdeed8ab736"}, + {file = "cfgv-3.4.0-py2.py3-none-any.whl", hash = "sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9"}, + {file = "cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560"}, ] [[package]] name = "charset-normalizer" -version = "3.2.0" +version = "3.3.2" description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." optional = false python-versions = ">=3.7.0" files = [ - {file = "charset-normalizer-3.2.0.tar.gz", hash = "sha256:3bb3d25a8e6c0aedd251753a79ae98a093c7e7b471faa3aa9a93a81431987ace"}, - {file = "charset_normalizer-3.2.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:0b87549028f680ca955556e3bd57013ab47474c3124dc069faa0b6545b6c9710"}, - {file = "charset_normalizer-3.2.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7c70087bfee18a42b4040bb9ec1ca15a08242cf5867c58726530bdf3945672ed"}, - {file = "charset_normalizer-3.2.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a103b3a7069b62f5d4890ae1b8f0597618f628b286b03d4bc9195230b154bfa9"}, - {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:94aea8eff76ee6d1cdacb07dd2123a68283cb5569e0250feab1240058f53b623"}, - {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:db901e2ac34c931d73054d9797383d0f8009991e723dab15109740a63e7f902a"}, - {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b0dac0ff919ba34d4df1b6131f59ce95b08b9065233446be7e459f95554c0dc8"}, - {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:193cbc708ea3aca45e7221ae58f0fd63f933753a9bfb498a3b474878f12caaad"}, - {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:09393e1b2a9461950b1c9a45d5fd251dc7c6f228acab64da1c9c0165d9c7765c"}, - {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:baacc6aee0b2ef6f3d308e197b5d7a81c0e70b06beae1f1fcacffdbd124fe0e3"}, - {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:bf420121d4c8dce6b889f0e8e4ec0ca34b7f40186203f06a946fa0276ba54029"}, - {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:c04a46716adde8d927adb9457bbe39cf473e1e2c2f5d0a16ceb837e5d841ad4f"}, - {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:aaf63899c94de41fe3cf934601b0f7ccb6b428c6e4eeb80da72c58eab077b19a"}, - {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:d62e51710986674142526ab9f78663ca2b0726066ae26b78b22e0f5e571238dd"}, - {file = "charset_normalizer-3.2.0-cp310-cp310-win32.whl", hash = "sha256:04e57ab9fbf9607b77f7d057974694b4f6b142da9ed4a199859d9d4d5c63fe96"}, - {file = "charset_normalizer-3.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:48021783bdf96e3d6de03a6e39a1171ed5bd7e8bb93fc84cc649d11490f87cea"}, - {file = "charset_normalizer-3.2.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:4957669ef390f0e6719db3613ab3a7631e68424604a7b448f079bee145da6e09"}, - {file = "charset_normalizer-3.2.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:46fb8c61d794b78ec7134a715a3e564aafc8f6b5e338417cb19fe9f57a5a9bf2"}, - {file = "charset_normalizer-3.2.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f779d3ad205f108d14e99bb3859aa7dd8e9c68874617c72354d7ecaec2a054ac"}, - {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f25c229a6ba38a35ae6e25ca1264621cc25d4d38dca2942a7fce0b67a4efe918"}, - {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2efb1bd13885392adfda4614c33d3b68dee4921fd0ac1d3988f8cbb7d589e72a"}, - {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1f30b48dd7fa1474554b0b0f3fdfdd4c13b5c737a3c6284d3cdc424ec0ffff3a"}, - {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:246de67b99b6851627d945db38147d1b209a899311b1305dd84916f2b88526c6"}, - {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9bd9b3b31adcb054116447ea22caa61a285d92e94d710aa5ec97992ff5eb7cf3"}, - {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:8c2f5e83493748286002f9369f3e6607c565a6a90425a3a1fef5ae32a36d749d"}, - {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:3170c9399da12c9dc66366e9d14da8bf7147e1e9d9ea566067bbce7bb74bd9c2"}, - {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:7a4826ad2bd6b07ca615c74ab91f32f6c96d08f6fcc3902ceeedaec8cdc3bcd6"}, - {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:3b1613dd5aee995ec6d4c69f00378bbd07614702a315a2cf6c1d21461fe17c23"}, - {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:9e608aafdb55eb9f255034709e20d5a83b6d60c054df0802fa9c9883d0a937aa"}, - {file = "charset_normalizer-3.2.0-cp311-cp311-win32.whl", hash = "sha256:f2a1d0fd4242bd8643ce6f98927cf9c04540af6efa92323e9d3124f57727bfc1"}, - {file = "charset_normalizer-3.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:681eb3d7e02e3c3655d1b16059fbfb605ac464c834a0c629048a30fad2b27489"}, - {file = "charset_normalizer-3.2.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:c57921cda3a80d0f2b8aec7e25c8aa14479ea92b5b51b6876d975d925a2ea346"}, - {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:41b25eaa7d15909cf3ac4c96088c1f266a9a93ec44f87f1d13d4a0e86c81b982"}, - {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f058f6963fd82eb143c692cecdc89e075fa0828db2e5b291070485390b2f1c9c"}, - {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a7647ebdfb9682b7bb97e2a5e7cb6ae735b1c25008a70b906aecca294ee96cf4"}, - {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eef9df1eefada2c09a5e7a40991b9fc6ac6ef20b1372abd48d2794a316dc0449"}, - {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e03b8895a6990c9ab2cdcd0f2fe44088ca1c65ae592b8f795c3294af00a461c3"}, - {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:ee4006268ed33370957f55bf2e6f4d263eaf4dc3cfc473d1d90baff6ed36ce4a"}, - {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:c4983bf937209c57240cff65906b18bb35e64ae872da6a0db937d7b4af845dd7"}, - {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:3bb7fda7260735efe66d5107fb7e6af6a7c04c7fce9b2514e04b7a74b06bf5dd"}, - {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:72814c01533f51d68702802d74f77ea026b5ec52793c791e2da806a3844a46c3"}, - {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:70c610f6cbe4b9fce272c407dd9d07e33e6bf7b4aa1b7ffb6f6ded8e634e3592"}, - {file = "charset_normalizer-3.2.0-cp37-cp37m-win32.whl", hash = "sha256:a401b4598e5d3f4a9a811f3daf42ee2291790c7f9d74b18d75d6e21dda98a1a1"}, - {file = "charset_normalizer-3.2.0-cp37-cp37m-win_amd64.whl", hash = "sha256:c0b21078a4b56965e2b12f247467b234734491897e99c1d51cee628da9786959"}, - {file = "charset_normalizer-3.2.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:95eb302ff792e12aba9a8b8f8474ab229a83c103d74a750ec0bd1c1eea32e669"}, - {file = "charset_normalizer-3.2.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1a100c6d595a7f316f1b6f01d20815d916e75ff98c27a01ae817439ea7726329"}, - {file = "charset_normalizer-3.2.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:6339d047dab2780cc6220f46306628e04d9750f02f983ddb37439ca47ced7149"}, - {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e4b749b9cc6ee664a3300bb3a273c1ca8068c46be705b6c31cf5d276f8628a94"}, - {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a38856a971c602f98472050165cea2cdc97709240373041b69030be15047691f"}, - {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f87f746ee241d30d6ed93969de31e5ffd09a2961a051e60ae6bddde9ec3583aa"}, - {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:89f1b185a01fe560bc8ae5f619e924407efca2191b56ce749ec84982fc59a32a"}, - {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e1c8a2f4c69e08e89632defbfabec2feb8a8d99edc9f89ce33c4b9e36ab63037"}, - {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:2f4ac36d8e2b4cc1aa71df3dd84ff8efbe3bfb97ac41242fbcfc053c67434f46"}, - {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a386ebe437176aab38c041de1260cd3ea459c6ce5263594399880bbc398225b2"}, - {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:ccd16eb18a849fd8dcb23e23380e2f0a354e8daa0c984b8a732d9cfaba3a776d"}, - {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:e6a5bf2cba5ae1bb80b154ed68a3cfa2fa00fde979a7f50d6598d3e17d9ac20c"}, - {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:45de3f87179c1823e6d9e32156fb14c1927fcc9aba21433f088fdfb555b77c10"}, - {file = "charset_normalizer-3.2.0-cp38-cp38-win32.whl", hash = "sha256:1000fba1057b92a65daec275aec30586c3de2401ccdcd41f8a5c1e2c87078706"}, - {file = "charset_normalizer-3.2.0-cp38-cp38-win_amd64.whl", hash = "sha256:8b2c760cfc7042b27ebdb4a43a4453bd829a5742503599144d54a032c5dc7e9e"}, - {file = "charset_normalizer-3.2.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:855eafa5d5a2034b4621c74925d89c5efef61418570e5ef9b37717d9c796419c"}, - {file = "charset_normalizer-3.2.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:203f0c8871d5a7987be20c72442488a0b8cfd0f43b7973771640fc593f56321f"}, - {file = "charset_normalizer-3.2.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e857a2232ba53ae940d3456f7533ce6ca98b81917d47adc3c7fd55dad8fab858"}, - {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5e86d77b090dbddbe78867a0275cb4df08ea195e660f1f7f13435a4649e954e5"}, - {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c4fb39a81950ec280984b3a44f5bd12819953dc5fa3a7e6fa7a80db5ee853952"}, - {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2dee8e57f052ef5353cf608e0b4c871aee320dd1b87d351c28764fc0ca55f9f4"}, - {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8700f06d0ce6f128de3ccdbc1acaea1ee264d2caa9ca05daaf492fde7c2a7200"}, - {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1920d4ff15ce893210c1f0c0e9d19bfbecb7983c76b33f046c13a8ffbd570252"}, - {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:c1c76a1743432b4b60ab3358c937a3fe1341c828ae6194108a94c69028247f22"}, - {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:f7560358a6811e52e9c4d142d497f1a6e10103d3a6881f18d04dbce3729c0e2c"}, - {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:c8063cf17b19661471ecbdb3df1c84f24ad2e389e326ccaf89e3fb2484d8dd7e"}, - {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:cd6dbe0238f7743d0efe563ab46294f54f9bc8f4b9bcf57c3c666cc5bc9d1299"}, - {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:1249cbbf3d3b04902ff081ffbb33ce3377fa6e4c7356f759f3cd076cc138d020"}, - {file = "charset_normalizer-3.2.0-cp39-cp39-win32.whl", hash = "sha256:6c409c0deba34f147f77efaa67b8e4bb83d2f11c8806405f76397ae5b8c0d1c9"}, - {file = "charset_normalizer-3.2.0-cp39-cp39-win_amd64.whl", hash = "sha256:7095f6fbfaa55defb6b733cfeb14efaae7a29f0b59d8cf213be4e7ca0b857b80"}, - {file = "charset_normalizer-3.2.0-py3-none-any.whl", hash = "sha256:8e098148dd37b4ce3baca71fb394c81dc5d9c7728c95df695d2dca218edf40e6"}, + {file = "charset-normalizer-3.3.2.tar.gz", hash = "sha256:f30c3cb33b24454a82faecaf01b19c18562b1e89558fb6c56de4d9118a032fd5"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:25baf083bf6f6b341f4121c2f3c548875ee6f5339300e08be3f2b2ba1721cdd3"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:06435b539f889b1f6f4ac1758871aae42dc3a8c0e24ac9e60c2384973ad73027"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9063e24fdb1e498ab71cb7419e24622516c4a04476b17a2dab57e8baa30d6e03"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6897af51655e3691ff853668779c7bad41579facacf5fd7253b0133308cf000d"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1d3193f4a680c64b4b6a9115943538edb896edc190f0b222e73761716519268e"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cd70574b12bb8a4d2aaa0094515df2463cb429d8536cfb6c7ce983246983e5a6"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8465322196c8b4d7ab6d1e049e4c5cb460d0394da4a27d23cc242fbf0034b6b5"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a9a8e9031d613fd2009c182b69c7b2c1ef8239a0efb1df3f7c8da66d5dd3d537"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:beb58fe5cdb101e3a055192ac291b7a21e3b7ef4f67fa1d74e331a7f2124341c"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:e06ed3eb3218bc64786f7db41917d4e686cc4856944f53d5bdf83a6884432e12"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:2e81c7b9c8979ce92ed306c249d46894776a909505d8f5a4ba55b14206e3222f"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:572c3763a264ba47b3cf708a44ce965d98555f618ca42c926a9c1616d8f34269"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:fd1abc0d89e30cc4e02e4064dc67fcc51bd941eb395c502aac3ec19fab46b519"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-win32.whl", hash = "sha256:3d47fa203a7bd9c5b6cee4736ee84ca03b8ef23193c0d1ca99b5089f72645c73"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-win_amd64.whl", hash = "sha256:10955842570876604d404661fbccbc9c7e684caf432c09c715ec38fbae45ae09"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:802fe99cca7457642125a8a88a084cef28ff0cf9407060f7b93dca5aa25480db"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:573f6eac48f4769d667c4442081b1794f52919e7edada77495aaed9236d13a96"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:549a3a73da901d5bc3ce8d24e0600d1fa85524c10287f6004fbab87672bf3e1e"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f27273b60488abe721a075bcca6d7f3964f9f6f067c8c4c605743023d7d3944f"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ceae2f17a9c33cb48e3263960dc5fc8005351ee19db217e9b1bb15d28c02574"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:65f6f63034100ead094b8744b3b97965785388f308a64cf8d7c34f2f2e5be0c4"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:753f10e867343b4511128c6ed8c82f7bec3bd026875576dfd88483c5c73b2fd8"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4a78b2b446bd7c934f5dcedc588903fb2f5eec172f3d29e52a9096a43722adfc"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e537484df0d8f426ce2afb2d0f8e1c3d0b114b83f8850e5f2fbea0e797bd82ae"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:eb6904c354526e758fda7167b33005998fb68c46fbc10e013ca97f21ca5c8887"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:deb6be0ac38ece9ba87dea880e438f25ca3eddfac8b002a2ec3d9183a454e8ae"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:4ab2fe47fae9e0f9dee8c04187ce5d09f48eabe611be8259444906793ab7cbce"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:80402cd6ee291dcb72644d6eac93785fe2c8b9cb30893c1af5b8fdd753b9d40f"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-win32.whl", hash = "sha256:7cd13a2e3ddeed6913a65e66e94b51d80a041145a026c27e6bb76c31a853c6ab"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-win_amd64.whl", hash = "sha256:663946639d296df6a2bb2aa51b60a2454ca1cb29835324c640dafb5ff2131a77"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:0b2b64d2bb6d3fb9112bafa732def486049e63de9618b5843bcdd081d8144cd8"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:ddbb2551d7e0102e7252db79ba445cdab71b26640817ab1e3e3648dad515003b"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:55086ee1064215781fff39a1af09518bc9255b50d6333f2e4c74ca09fac6a8f6"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f4a014bc36d3c57402e2977dada34f9c12300af536839dc38c0beab8878f38a"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a10af20b82360ab00827f916a6058451b723b4e65030c5a18577c8b2de5b3389"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8d756e44e94489e49571086ef83b2bb8ce311e730092d2c34ca8f7d925cb20aa"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90d558489962fd4918143277a773316e56c72da56ec7aa3dc3dbbe20fdfed15b"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6ac7ffc7ad6d040517be39eb591cac5ff87416c2537df6ba3cba3bae290c0fed"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7ed9e526742851e8d5cc9e6cf41427dfc6068d4f5a3bb03659444b4cabf6bc26"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:8bdb58ff7ba23002a4c5808d608e4e6c687175724f54a5dade5fa8c67b604e4d"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:6b3251890fff30ee142c44144871185dbe13b11bab478a88887a639655be1068"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:b4a23f61ce87adf89be746c8a8974fe1c823c891d8f86eb218bb957c924bb143"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:efcb3f6676480691518c177e3b465bcddf57cea040302f9f4e6e191af91174d4"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-win32.whl", hash = "sha256:d965bba47ddeec8cd560687584e88cf699fd28f192ceb452d1d7ee807c5597b7"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-win_amd64.whl", hash = "sha256:96b02a3dc4381e5494fad39be677abcb5e6634bf7b4fa83a6dd3112607547001"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:95f2a5796329323b8f0512e09dbb7a1860c46a39da62ecb2324f116fa8fdc85c"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c002b4ffc0be611f0d9da932eb0f704fe2602a9a949d1f738e4c34c75b0863d5"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a981a536974bbc7a512cf44ed14938cf01030a99e9b3a06dd59578882f06f985"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3287761bc4ee9e33561a7e058c72ac0938c4f57fe49a09eae428fd88aafe7bb6"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:42cb296636fcc8b0644486d15c12376cb9fa75443e00fb25de0b8602e64c1714"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0a55554a2fa0d408816b3b5cedf0045f4b8e1a6065aec45849de2d6f3f8e9786"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:c083af607d2515612056a31f0a8d9e0fcb5876b7bfc0abad3ecd275bc4ebc2d5"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:87d1351268731db79e0f8e745d92493ee2841c974128ef629dc518b937d9194c"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:bd8f7df7d12c2db9fab40bdd87a7c09b1530128315d047a086fa3ae3435cb3a8"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:c180f51afb394e165eafe4ac2936a14bee3eb10debc9d9e4db8958fe36afe711"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:8c622a5fe39a48f78944a87d4fb8a53ee07344641b0562c540d840748571b811"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-win32.whl", hash = "sha256:db364eca23f876da6f9e16c9da0df51aa4f104a972735574842618b8c6d999d4"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-win_amd64.whl", hash = "sha256:86216b5cee4b06df986d214f664305142d9c76df9b6512be2738aa72a2048f99"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:6463effa3186ea09411d50efc7d85360b38d5f09b870c48e4600f63af490e56a"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6c4caeef8fa63d06bd437cd4bdcf3ffefe6738fb1b25951440d80dc7df8c03ac"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:37e55c8e51c236f95b033f6fb391d7d7970ba5fe7ff453dad675e88cf303377a"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb69256e180cb6c8a894fee62b3afebae785babc1ee98b81cdf68bbca1987f33"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ae5f4161f18c61806f411a13b0310bea87f987c7d2ecdbdaad0e94eb2e404238"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b2b0a0c0517616b6869869f8c581d4eb2dd83a4d79e0ebcb7d373ef9956aeb0a"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:45485e01ff4d3630ec0d9617310448a8702f70e9c01906b0d0118bdf9d124cf2"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:eb00ed941194665c332bf8e078baf037d6c35d7c4f3102ea2d4f16ca94a26dc8"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:2127566c664442652f024c837091890cb1942c30937add288223dc895793f898"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a50aebfa173e157099939b17f18600f72f84eed3049e743b68ad15bd69b6bf99"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:4d0d1650369165a14e14e1e47b372cfcb31d6ab44e6e33cb2d4e57265290044d"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:923c0c831b7cfcb071580d3f46c4baf50f174be571576556269530f4bbd79d04"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:06a81e93cd441c56a9b65d8e1d043daeb97a3d0856d177d5c90ba85acb3db087"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-win32.whl", hash = "sha256:6ef1d82a3af9d3eecdba2321dc1b3c238245d890843e040e41e470ffa64c3e25"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-win_amd64.whl", hash = "sha256:eb8821e09e916165e160797a6c17edda0679379a4be5c716c260e836e122f54b"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:c235ebd9baae02f1b77bcea61bce332cb4331dc3617d254df3323aa01ab47bd4"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5b4c145409bef602a690e7cfad0a15a55c13320ff7a3ad7ca59c13bb8ba4d45d"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:68d1f8a9e9e37c1223b656399be5d6b448dea850bed7d0f87a8311f1ff3dabb0"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22afcb9f253dac0696b5a4be4a1c0f8762f8239e21b99680099abd9b2b1b2269"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e27ad930a842b4c5eb8ac0016b0a54f5aebbe679340c26101df33424142c143c"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1f79682fbe303db92bc2b1136016a38a42e835d932bab5b3b1bfcfbf0640e519"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b261ccdec7821281dade748d088bb6e9b69e6d15b30652b74cbbac25e280b796"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:122c7fa62b130ed55f8f285bfd56d5f4b4a5b503609d181f9ad85e55c89f4185"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d0eccceffcb53201b5bfebb52600a5fb483a20b61da9dbc885f8b103cbe7598c"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:9f96df6923e21816da7e0ad3fd47dd8f94b2a5ce594e00677c0013018b813458"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:7f04c839ed0b6b98b1a7501a002144b76c18fb1c1850c8b98d458ac269e26ed2"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:34d1c8da1e78d2e001f363791c98a272bb734000fcef47a491c1e3b0505657a8"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ff8fa367d09b717b2a17a052544193ad76cd49979c805768879cb63d9ca50561"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-win32.whl", hash = "sha256:aed38f6e4fb3f5d6bf81bfa990a07806be9d83cf7bacef998ab1a9bd660a581f"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-win_amd64.whl", hash = "sha256:b01b88d45a6fcb69667cd6d2f7a9aeb4bf53760d7fc536bf679ec94fe9f3ff3d"}, + {file = "charset_normalizer-3.3.2-py3-none-any.whl", hash = "sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc"}, ] [[package]] name = "click" -version = "8.1.4" +version = "8.1.3" description = "Composable command line interface toolkit" optional = false python-versions = ">=3.7" files = [ - {file = "click-8.1.4-py3-none-any.whl", hash = "sha256:2739815aaa5d2c986a88f1e9230c55e17f0caad3d958a5e13ad0797c166db9e3"}, - {file = "click-8.1.4.tar.gz", hash = "sha256:b97d0c74955da062a7d4ef92fadb583806a585b2ea81958a81bd72726cbb8e37"}, + {file = "click-8.1.3-py3-none-any.whl", hash = "sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48"}, + {file = "click-8.1.3.tar.gz", hash = "sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e"}, ] [package.dependencies] colorama = {version = "*", markers = "platform_system == \"Windows\""} -importlib-metadata = {version = "*", markers = "python_version < \"3.8\""} - -[[package]] -name = "click-log" -version = "0.4.0" -description = "Logging integration for Click" -optional = false -python-versions = "*" -files = [ - {file = "click-log-0.4.0.tar.gz", hash = "sha256:3970f8570ac54491237bcdb3d8ab5e3eef6c057df29f8c3d1151a51a9c23b975"}, - {file = "click_log-0.4.0-py2.py3-none-any.whl", hash = "sha256:a43e394b528d52112af599f2fc9e4b7cf3c15f94e53581f74fa6867e68c91756"}, -] - -[package.dependencies] -click = "*" [[package]] name = "click-plugins" @@ -364,6 +427,17 @@ click = ">=4.0" [package.extras] dev = ["coveralls", "pytest (>=3.6)", "pytest-cov", "wheel"] +[[package]] +name = "cloudpickle" +version = "3.0.0" +description = "Pickler class to extend the standard pickle.Pickler functionality" +optional = true +python-versions = ">=3.8" +files = [ + {file = "cloudpickle-3.0.0-py3-none-any.whl", hash = "sha256:246ee7d0c295602a036e86369c77fecda4ab17b506496730f2f576d9016fd9c7"}, + {file = "cloudpickle-3.0.0.tar.gz", hash = "sha256:996d9a482c6fb4f33c1a35335cf8afd065d2a56e973270364840712d9131a882"}, +] + [[package]] name = "colorama" version = "0.4.6" @@ -377,82 +451,74 @@ files = [ [[package]] name = "commit-linter" -version = "1.0.2" +version = "1.0.3" description = "simple git hooks scripts for a better git experience that enforces you to use the known commit messages conventions" optional = false python-versions = ">=3.6" files = [ - {file = "commit-linter-1.0.2.tar.gz", hash = "sha256:ea3cb1f157ec77d20031c6aff7b91e2de64c5864f65045925dd0ab1eccd95256"}, - {file = "commit_linter-1.0.2-py3-none-any.whl", hash = "sha256:b27fc0ff9a7deac6b1b33efff0a440348680e2af84d0bf54461937ae70cab2d8"}, + {file = "commit-linter-1.0.3.tar.gz", hash = "sha256:e380cac6d9010dab4559380e8d153b71743526bf042d3828dacd6efe544d8aba"}, + {file = "commit_linter-1.0.3-py3-none-any.whl", hash = "sha256:404fc2adb7ad49fddc27b1fa4424c17bf41556dbaed8dc4c274ca1e104825fbb"}, ] [[package]] name = "coverage" -version = "7.2.7" +version = "7.4.0" description = "Code coverage measurement for Python" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "coverage-7.2.7-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d39b5b4f2a66ccae8b7263ac3c8170994b65266797fb96cbbfd3fb5b23921db8"}, - {file = "coverage-7.2.7-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6d040ef7c9859bb11dfeb056ff5b3872436e3b5e401817d87a31e1750b9ae2fb"}, - {file = "coverage-7.2.7-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ba90a9563ba44a72fda2e85302c3abc71c5589cea608ca16c22b9804262aaeb6"}, - {file = "coverage-7.2.7-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e7d9405291c6928619403db1d10bd07888888ec1abcbd9748fdaa971d7d661b2"}, - {file = "coverage-7.2.7-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:31563e97dae5598556600466ad9beea39fb04e0229e61c12eaa206e0aa202063"}, - {file = "coverage-7.2.7-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:ebba1cd308ef115925421d3e6a586e655ca5a77b5bf41e02eb0e4562a111f2d1"}, - {file = "coverage-7.2.7-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:cb017fd1b2603ef59e374ba2063f593abe0fc45f2ad9abdde5b4d83bd922a353"}, - {file = "coverage-7.2.7-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:d62a5c7dad11015c66fbb9d881bc4caa5b12f16292f857842d9d1871595f4495"}, - {file = "coverage-7.2.7-cp310-cp310-win32.whl", hash = "sha256:ee57190f24fba796e36bb6d3aa8a8783c643d8fa9760c89f7a98ab5455fbf818"}, - {file = "coverage-7.2.7-cp310-cp310-win_amd64.whl", hash = "sha256:f75f7168ab25dd93110c8a8117a22450c19976afbc44234cbf71481094c1b850"}, - {file = "coverage-7.2.7-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:06a9a2be0b5b576c3f18f1a241f0473575c4a26021b52b2a85263a00f034d51f"}, - {file = "coverage-7.2.7-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5baa06420f837184130752b7c5ea0808762083bf3487b5038d68b012e5937dbe"}, - {file = "coverage-7.2.7-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fdec9e8cbf13a5bf63290fc6013d216a4c7232efb51548594ca3631a7f13c3a3"}, - {file = "coverage-7.2.7-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:52edc1a60c0d34afa421c9c37078817b2e67a392cab17d97283b64c5833f427f"}, - {file = "coverage-7.2.7-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:63426706118b7f5cf6bb6c895dc215d8a418d5952544042c8a2d9fe87fcf09cb"}, - {file = "coverage-7.2.7-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:afb17f84d56068a7c29f5fa37bfd38d5aba69e3304af08ee94da8ed5b0865833"}, - {file = "coverage-7.2.7-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:48c19d2159d433ccc99e729ceae7d5293fbffa0bdb94952d3579983d1c8c9d97"}, - {file = "coverage-7.2.7-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:0e1f928eaf5469c11e886fe0885ad2bf1ec606434e79842a879277895a50942a"}, - {file = "coverage-7.2.7-cp311-cp311-win32.whl", hash = "sha256:33d6d3ea29d5b3a1a632b3c4e4f4ecae24ef170b0b9ee493883f2df10039959a"}, - {file = "coverage-7.2.7-cp311-cp311-win_amd64.whl", hash = "sha256:5b7540161790b2f28143191f5f8ec02fb132660ff175b7747b95dcb77ac26562"}, - {file = "coverage-7.2.7-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:f2f67fe12b22cd130d34d0ef79206061bfb5eda52feb6ce0dba0644e20a03cf4"}, - {file = "coverage-7.2.7-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a342242fe22407f3c17f4b499276a02b01e80f861f1682ad1d95b04018e0c0d4"}, - {file = "coverage-7.2.7-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:171717c7cb6b453aebac9a2ef603699da237f341b38eebfee9be75d27dc38e01"}, - {file = "coverage-7.2.7-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:49969a9f7ffa086d973d91cec8d2e31080436ef0fb4a359cae927e742abfaaa6"}, - {file = "coverage-7.2.7-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:b46517c02ccd08092f4fa99f24c3b83d8f92f739b4657b0f146246a0ca6a831d"}, - {file = "coverage-7.2.7-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:a3d33a6b3eae87ceaefa91ffdc130b5e8536182cd6dfdbfc1aa56b46ff8c86de"}, - {file = "coverage-7.2.7-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:976b9c42fb2a43ebf304fa7d4a310e5f16cc99992f33eced91ef6f908bd8f33d"}, - {file = "coverage-7.2.7-cp312-cp312-win32.whl", hash = "sha256:8de8bb0e5ad103888d65abef8bca41ab93721647590a3f740100cd65c3b00511"}, - {file = "coverage-7.2.7-cp312-cp312-win_amd64.whl", hash = "sha256:9e31cb64d7de6b6f09702bb27c02d1904b3aebfca610c12772452c4e6c21a0d3"}, - {file = "coverage-7.2.7-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:58c2ccc2f00ecb51253cbe5d8d7122a34590fac9646a960d1430d5b15321d95f"}, - {file = "coverage-7.2.7-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d22656368f0e6189e24722214ed8d66b8022db19d182927b9a248a2a8a2f67eb"}, - {file = "coverage-7.2.7-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a895fcc7b15c3fc72beb43cdcbdf0ddb7d2ebc959edac9cef390b0d14f39f8a9"}, - {file = "coverage-7.2.7-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e84606b74eb7de6ff581a7915e2dab7a28a0517fbe1c9239eb227e1354064dcd"}, - {file = "coverage-7.2.7-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:0a5f9e1dbd7fbe30196578ca36f3fba75376fb99888c395c5880b355e2875f8a"}, - {file = "coverage-7.2.7-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:419bfd2caae268623dd469eff96d510a920c90928b60f2073d79f8fe2bbc5959"}, - {file = "coverage-7.2.7-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:2aee274c46590717f38ae5e4650988d1af340fe06167546cc32fe2f58ed05b02"}, - {file = "coverage-7.2.7-cp37-cp37m-win32.whl", hash = "sha256:61b9a528fb348373c433e8966535074b802c7a5d7f23c4f421e6c6e2f1697a6f"}, - {file = "coverage-7.2.7-cp37-cp37m-win_amd64.whl", hash = "sha256:b1c546aca0ca4d028901d825015dc8e4d56aac4b541877690eb76490f1dc8ed0"}, - {file = "coverage-7.2.7-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:54b896376ab563bd38453cecb813c295cf347cf5906e8b41d340b0321a5433e5"}, - {file = "coverage-7.2.7-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:3d376df58cc111dc8e21e3b6e24606b5bb5dee6024f46a5abca99124b2229ef5"}, - {file = "coverage-7.2.7-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5e330fc79bd7207e46c7d7fd2bb4af2963f5f635703925543a70b99574b0fea9"}, - {file = "coverage-7.2.7-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1e9d683426464e4a252bf70c3498756055016f99ddaec3774bf368e76bbe02b6"}, - {file = "coverage-7.2.7-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8d13c64ee2d33eccf7437961b6ea7ad8673e2be040b4f7fd4fd4d4d28d9ccb1e"}, - {file = "coverage-7.2.7-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:b7aa5f8a41217360e600da646004f878250a0d6738bcdc11a0a39928d7dc2050"}, - {file = "coverage-7.2.7-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:8fa03bce9bfbeeef9f3b160a8bed39a221d82308b4152b27d82d8daa7041fee5"}, - {file = "coverage-7.2.7-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:245167dd26180ab4c91d5e1496a30be4cd721a5cf2abf52974f965f10f11419f"}, - {file = "coverage-7.2.7-cp38-cp38-win32.whl", hash = "sha256:d2c2db7fd82e9b72937969bceac4d6ca89660db0a0967614ce2481e81a0b771e"}, - {file = "coverage-7.2.7-cp38-cp38-win_amd64.whl", hash = "sha256:2e07b54284e381531c87f785f613b833569c14ecacdcb85d56b25c4622c16c3c"}, - {file = "coverage-7.2.7-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:537891ae8ce59ef63d0123f7ac9e2ae0fc8b72c7ccbe5296fec45fd68967b6c9"}, - {file = "coverage-7.2.7-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:06fb182e69f33f6cd1d39a6c597294cff3143554b64b9825d1dc69d18cc2fff2"}, - {file = "coverage-7.2.7-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:201e7389591af40950a6480bd9edfa8ed04346ff80002cec1a66cac4549c1ad7"}, - {file = "coverage-7.2.7-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f6951407391b639504e3b3be51b7ba5f3528adbf1a8ac3302b687ecababf929e"}, - {file = "coverage-7.2.7-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6f48351d66575f535669306aa7d6d6f71bc43372473b54a832222803eb956fd1"}, - {file = "coverage-7.2.7-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:b29019c76039dc3c0fd815c41392a044ce555d9bcdd38b0fb60fb4cd8e475ba9"}, - {file = "coverage-7.2.7-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:81c13a1fc7468c40f13420732805a4c38a105d89848b7c10af65a90beff25250"}, - {file = "coverage-7.2.7-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:975d70ab7e3c80a3fe86001d8751f6778905ec723f5b110aed1e450da9d4b7f2"}, - {file = "coverage-7.2.7-cp39-cp39-win32.whl", hash = "sha256:7ee7d9d4822c8acc74a5e26c50604dff824710bc8de424904c0982e25c39c6cb"}, - {file = "coverage-7.2.7-cp39-cp39-win_amd64.whl", hash = "sha256:eb393e5ebc85245347950143969b241d08b52b88a3dc39479822e073a1a8eb27"}, - {file = "coverage-7.2.7-pp37.pp38.pp39-none-any.whl", hash = "sha256:b7b4c971f05e6ae490fef852c218b0e79d4e52f79ef0c8475566584a8fb3e01d"}, - {file = "coverage-7.2.7.tar.gz", hash = "sha256:924d94291ca674905fe9481f12294eb11f2d3d3fd1adb20314ba89e94f44ed59"}, + {file = "coverage-7.4.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:36b0ea8ab20d6a7564e89cb6135920bc9188fb5f1f7152e94e8300b7b189441a"}, + {file = "coverage-7.4.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0676cd0ba581e514b7f726495ea75aba3eb20899d824636c6f59b0ed2f88c471"}, + {file = "coverage-7.4.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d0ca5c71a5a1765a0f8f88022c52b6b8be740e512980362f7fdbb03725a0d6b9"}, + {file = "coverage-7.4.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a7c97726520f784239f6c62506bc70e48d01ae71e9da128259d61ca5e9788516"}, + {file = "coverage-7.4.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:815ac2d0f3398a14286dc2cea223a6f338109f9ecf39a71160cd1628786bc6f5"}, + {file = "coverage-7.4.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:80b5ee39b7f0131ebec7968baa9b2309eddb35b8403d1869e08f024efd883566"}, + {file = "coverage-7.4.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:5b2ccb7548a0b65974860a78c9ffe1173cfb5877460e5a229238d985565574ae"}, + {file = "coverage-7.4.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:995ea5c48c4ebfd898eacb098164b3cc826ba273b3049e4a889658548e321b43"}, + {file = "coverage-7.4.0-cp310-cp310-win32.whl", hash = "sha256:79287fd95585ed36e83182794a57a46aeae0b64ca53929d1176db56aacc83451"}, + {file = "coverage-7.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:5b14b4f8760006bfdb6e08667af7bc2d8d9bfdb648351915315ea17645347137"}, + {file = "coverage-7.4.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:04387a4a6ecb330c1878907ce0dc04078ea72a869263e53c72a1ba5bbdf380ca"}, + {file = "coverage-7.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ea81d8f9691bb53f4fb4db603203029643caffc82bf998ab5b59ca05560f4c06"}, + {file = "coverage-7.4.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:74775198b702868ec2d058cb92720a3c5a9177296f75bd97317c787daf711505"}, + {file = "coverage-7.4.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:76f03940f9973bfaee8cfba70ac991825611b9aac047e5c80d499a44079ec0bc"}, + {file = "coverage-7.4.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:485e9f897cf4856a65a57c7f6ea3dc0d4e6c076c87311d4bc003f82cfe199d25"}, + {file = "coverage-7.4.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:6ae8c9d301207e6856865867d762a4b6fd379c714fcc0607a84b92ee63feff70"}, + {file = "coverage-7.4.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:bf477c355274a72435ceb140dc42de0dc1e1e0bf6e97195be30487d8eaaf1a09"}, + {file = "coverage-7.4.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:83c2dda2666fe32332f8e87481eed056c8b4d163fe18ecc690b02802d36a4d26"}, + {file = "coverage-7.4.0-cp311-cp311-win32.whl", hash = "sha256:697d1317e5290a313ef0d369650cfee1a114abb6021fa239ca12b4849ebbd614"}, + {file = "coverage-7.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:26776ff6c711d9d835557ee453082025d871e30b3fd6c27fcef14733f67f0590"}, + {file = "coverage-7.4.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:13eaf476ec3e883fe3e5fe3707caeb88268a06284484a3daf8250259ef1ba143"}, + {file = "coverage-7.4.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:846f52f46e212affb5bcf131c952fb4075b55aae6b61adc9856222df89cbe3e2"}, + {file = "coverage-7.4.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:26f66da8695719ccf90e794ed567a1549bb2644a706b41e9f6eae6816b398c4a"}, + {file = "coverage-7.4.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:164fdcc3246c69a6526a59b744b62e303039a81e42cfbbdc171c91a8cc2f9446"}, + {file = "coverage-7.4.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:316543f71025a6565677d84bc4df2114e9b6a615aa39fb165d697dba06a54af9"}, + {file = "coverage-7.4.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:bb1de682da0b824411e00a0d4da5a784ec6496b6850fdf8c865c1d68c0e318dd"}, + {file = "coverage-7.4.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:0e8d06778e8fbffccfe96331a3946237f87b1e1d359d7fbe8b06b96c95a5407a"}, + {file = "coverage-7.4.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:a56de34db7b7ff77056a37aedded01b2b98b508227d2d0979d373a9b5d353daa"}, + {file = "coverage-7.4.0-cp312-cp312-win32.whl", hash = "sha256:51456e6fa099a8d9d91497202d9563a320513fcf59f33991b0661a4a6f2ad450"}, + {file = "coverage-7.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:cd3c1e4cb2ff0083758f09be0f77402e1bdf704adb7f89108007300a6da587d0"}, + {file = "coverage-7.4.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:e9d1bf53c4c8de58d22e0e956a79a5b37f754ed1ffdbf1a260d9dcfa2d8a325e"}, + {file = "coverage-7.4.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:109f5985182b6b81fe33323ab4707011875198c41964f014579cf82cebf2bb85"}, + {file = "coverage-7.4.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3cc9d4bc55de8003663ec94c2f215d12d42ceea128da8f0f4036235a119c88ac"}, + {file = "coverage-7.4.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cc6d65b21c219ec2072c1293c505cf36e4e913a3f936d80028993dd73c7906b1"}, + {file = "coverage-7.4.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5a10a4920def78bbfff4eff8a05c51be03e42f1c3735be42d851f199144897ba"}, + {file = "coverage-7.4.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:b8e99f06160602bc64da35158bb76c73522a4010f0649be44a4e167ff8555952"}, + {file = "coverage-7.4.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:7d360587e64d006402b7116623cebf9d48893329ef035278969fa3bbf75b697e"}, + {file = "coverage-7.4.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:29f3abe810930311c0b5d1a7140f6395369c3db1be68345638c33eec07535105"}, + {file = "coverage-7.4.0-cp38-cp38-win32.whl", hash = "sha256:5040148f4ec43644702e7b16ca864c5314ccb8ee0751ef617d49aa0e2d6bf4f2"}, + {file = "coverage-7.4.0-cp38-cp38-win_amd64.whl", hash = "sha256:9864463c1c2f9cb3b5db2cf1ff475eed2f0b4285c2aaf4d357b69959941aa555"}, + {file = "coverage-7.4.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:936d38794044b26c99d3dd004d8af0035ac535b92090f7f2bb5aa9c8e2f5cd42"}, + {file = "coverage-7.4.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:799c8f873794a08cdf216aa5d0531c6a3747793b70c53f70e98259720a6fe2d7"}, + {file = "coverage-7.4.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e7defbb9737274023e2d7af02cac77043c86ce88a907c58f42b580a97d5bcca9"}, + {file = "coverage-7.4.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a1526d265743fb49363974b7aa8d5899ff64ee07df47dd8d3e37dcc0818f09ed"}, + {file = "coverage-7.4.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bf635a52fc1ea401baf88843ae8708591aa4adff875e5c23220de43b1ccf575c"}, + {file = "coverage-7.4.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:756ded44f47f330666843b5781be126ab57bb57c22adbb07d83f6b519783b870"}, + {file = "coverage-7.4.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:0eb3c2f32dabe3a4aaf6441dde94f35687224dfd7eb2a7f47f3fd9428e421058"}, + {file = "coverage-7.4.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:bfd5db349d15c08311702611f3dccbef4b4e2ec148fcc636cf8739519b4a5c0f"}, + {file = "coverage-7.4.0-cp39-cp39-win32.whl", hash = "sha256:53d7d9158ee03956e0eadac38dfa1ec8068431ef8058fe6447043db1fb40d932"}, + {file = "coverage-7.4.0-cp39-cp39-win_amd64.whl", hash = "sha256:cfd2a8b6b0d8e66e944d47cdec2f47c48fef2ba2f2dff5a9a75757f64172857e"}, + {file = "coverage-7.4.0-pp38.pp39.pp310-none-any.whl", hash = "sha256:c530833afc4707fe48524a44844493f36d8727f04dcce91fb978c414a8556cc6"}, + {file = "coverage-7.4.0.tar.gz", hash = "sha256:707c0f58cb1712b8809ece32b68996ee1e609f71bd14615bd8f87a1293cb610e"}, ] [package.dependencies] @@ -462,55 +528,30 @@ tomli = {version = "*", optional = true, markers = "python_full_version <= \"3.1 toml = ["tomli"] [[package]] -name = "cryptography" -version = "41.0.2" -description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers." -optional = false +name = "databricks-cli" +version = "0.18.0" +description = "A command line interface for Databricks" +optional = true python-versions = ">=3.7" files = [ - {file = "cryptography-41.0.2-cp37-abi3-macosx_10_12_universal2.whl", hash = "sha256:01f1d9e537f9a15b037d5d9ee442b8c22e3ae11ce65ea1f3316a41c78756b711"}, - {file = "cryptography-41.0.2-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:079347de771f9282fbfe0e0236c716686950c19dee1b76240ab09ce1624d76d7"}, - {file = "cryptography-41.0.2-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:439c3cc4c0d42fa999b83ded80a9a1fb54d53c58d6e59234cfe97f241e6c781d"}, - {file = "cryptography-41.0.2-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f14ad275364c8b4e525d018f6716537ae7b6d369c094805cae45300847e0894f"}, - {file = "cryptography-41.0.2-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:84609ade00a6ec59a89729e87a503c6e36af98ddcd566d5f3be52e29ba993182"}, - {file = "cryptography-41.0.2-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:49c3222bb8f8e800aead2e376cbef687bc9e3cb9b58b29a261210456a7783d83"}, - {file = "cryptography-41.0.2-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:d73f419a56d74fef257955f51b18d046f3506270a5fd2ac5febbfa259d6c0fa5"}, - {file = "cryptography-41.0.2-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:2a034bf7d9ca894720f2ec1d8b7b5832d7e363571828037f9e0c4f18c1b58a58"}, - {file = "cryptography-41.0.2-cp37-abi3-win32.whl", hash = "sha256:d124682c7a23c9764e54ca9ab5b308b14b18eba02722b8659fb238546de83a76"}, - {file = "cryptography-41.0.2-cp37-abi3-win_amd64.whl", hash = "sha256:9c3fe6534d59d071ee82081ca3d71eed3210f76ebd0361798c74abc2bcf347d4"}, - {file = "cryptography-41.0.2-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:a719399b99377b218dac6cf547b6ec54e6ef20207b6165126a280b0ce97e0d2a"}, - {file = "cryptography-41.0.2-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:182be4171f9332b6741ee818ec27daff9fb00349f706629f5cbf417bd50e66fd"}, - {file = "cryptography-41.0.2-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:7a9a3bced53b7f09da251685224d6a260c3cb291768f54954e28f03ef14e3766"}, - {file = "cryptography-41.0.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:f0dc40e6f7aa37af01aba07277d3d64d5a03dc66d682097541ec4da03cc140ee"}, - {file = "cryptography-41.0.2-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:674b669d5daa64206c38e507808aae49904c988fa0a71c935e7006a3e1e83831"}, - {file = "cryptography-41.0.2-pp38-pypy38_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:7af244b012711a26196450d34f483357e42aeddb04128885d95a69bd8b14b69b"}, - {file = "cryptography-41.0.2-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:9b6d717393dbae53d4e52684ef4f022444fc1cce3c48c38cb74fca29e1f08eaa"}, - {file = "cryptography-41.0.2-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:192255f539d7a89f2102d07d7375b1e0a81f7478925b3bc2e0549ebf739dae0e"}, - {file = "cryptography-41.0.2-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:f772610fe364372de33d76edcd313636a25684edb94cee53fd790195f5989d14"}, - {file = "cryptography-41.0.2-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:b332cba64d99a70c1e0836902720887fb4529ea49ea7f5462cf6640e095e11d2"}, - {file = "cryptography-41.0.2-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:9a6673c1828db6270b76b22cc696f40cde9043eb90373da5c2f8f2158957f42f"}, - {file = "cryptography-41.0.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:342f3767e25876751e14f8459ad85e77e660537ca0a066e10e75df9c9e9099f0"}, - {file = "cryptography-41.0.2.tar.gz", hash = "sha256:7d230bf856164de164ecb615ccc14c7fc6de6906ddd5b491f3af90d3514c925c"}, -] - -[package.dependencies] -cffi = ">=1.12" - -[package.extras] -docs = ["sphinx (>=5.3.0)", "sphinx-rtd-theme (>=1.1.1)"] -docstest = ["pyenchant (>=1.6.11)", "sphinxcontrib-spelling (>=4.0.1)", "twine (>=1.12.0)"] -nox = ["nox"] -pep8test = ["black", "check-sdist", "mypy", "ruff"] -sdist = ["build"] -ssh = ["bcrypt (>=3.1.5)"] -test = ["pretend", "pytest (>=6.2.0)", "pytest-benchmark", "pytest-cov", "pytest-xdist"] -test-randomorder = ["pytest-randomly"] + {file = "databricks-cli-0.18.0.tar.gz", hash = "sha256:87569709eda9af3e9db8047b691e420b5e980c62ef01675575c0d2b9b4211eb7"}, + {file = "databricks_cli-0.18.0-py2.py3-none-any.whl", hash = "sha256:1176a5f42d3e8af4abfc915446fb23abc44513e325c436725f5898cbb9e3384b"}, +] + +[package.dependencies] +click = ">=7.0" +oauthlib = ">=3.1.0" +pyjwt = ">=1.7.0" +requests = ">=2.17.3" +six = ">=1.10.0" +tabulate = ">=0.7.7" +urllib3 = ">=1.26.7,<3" [[package]] name = "debuglater" version = "1.4.4" description = "Post-mortem debugging for Python programs" -optional = true +optional = false python-versions = "*" files = [ {file = "debuglater-1.4.4-py3-none-any.whl", hash = "sha256:8d1b0faa931ecf4201be5dd71d4e37226ab746c0ea2df14fa772da6d4bec444b"}, @@ -528,33 +569,44 @@ dev = ["dill", "flake8", "invoke", "numpy", "pandas", "pkgmt", "pytest", "twine" name = "decorator" version = "5.1.1" description = "Decorators for Humans" -optional = true +optional = false python-versions = ">=3.5" files = [ {file = "decorator-5.1.1-py3-none-any.whl", hash = "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186"}, {file = "decorator-5.1.1.tar.gz", hash = "sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330"}, ] +[[package]] +name = "defusedxml" +version = "0.7.1" +description = "XML bomb protection for Python stdlib modules" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +files = [ + {file = "defusedxml-0.7.1-py2.py3-none-any.whl", hash = "sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61"}, + {file = "defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69"}, +] + [[package]] name = "distlib" -version = "0.3.6" +version = "0.3.8" description = "Distribution utilities" optional = false python-versions = "*" files = [ - {file = "distlib-0.3.6-py2.py3-none-any.whl", hash = "sha256:f35c4b692542ca110de7ef0bea44d73981caeb34ca0b9b6b2e6d7790dda8f80e"}, - {file = "distlib-0.3.6.tar.gz", hash = "sha256:14bad2d9b04d3a36127ac97f30b12a19268f211063d8f8ee4f47108896e11b46"}, + {file = "distlib-0.3.8-py2.py3-none-any.whl", hash = "sha256:034db59a0b96f8ca18035f36290806a9a6e6bd9d1ff91e45a7f172eb17e51784"}, + {file = "distlib-0.3.8.tar.gz", hash = "sha256:1530ea13e350031b6312d8580ddb6b27a104275a31106523b8f123787f494f64"}, ] [[package]] name = "docker" -version = "6.1.3" +version = "7.0.0" description = "A Python library for the Docker Engine API." optional = true -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "docker-6.1.3-py3-none-any.whl", hash = "sha256:aecd2277b8bf8e506e484f6ab7aec39abe0038e29fa4a6d3ba86c3fe01844ed9"}, - {file = "docker-6.1.3.tar.gz", hash = "sha256:aa6d17830045ba5ef0168d5eaa34d37beeb113948c413affe1d5991fc11f9a20"}, + {file = "docker-7.0.0-py3-none-any.whl", hash = "sha256:12ba681f2777a0ad28ffbcc846a69c31b4dfd9752b47eb425a274ee269c5e14b"}, + {file = "docker-7.0.0.tar.gz", hash = "sha256:323736fb92cd9418fc5e7133bc953e11a9da04f4483f828b527db553f1e7e5a3"}, ] [package.dependencies] @@ -562,42 +614,31 @@ packaging = ">=14.0" pywin32 = {version = ">=304", markers = "sys_platform == \"win32\""} requests = ">=2.26.0" urllib3 = ">=1.26.0" -websocket-client = ">=0.32.0" [package.extras] ssh = ["paramiko (>=2.4.3)"] +websockets = ["websocket-client (>=1.3.0)"] [[package]] -name = "docutils" -version = "0.20.1" -description = "Docutils -- Python Documentation Utilities" -optional = false -python-versions = ">=3.7" -files = [ - {file = "docutils-0.20.1-py3-none-any.whl", hash = "sha256:96f387a2c5562db4476f09f13bbab2192e764cac08ebbf3a34a95d9b1e4a59d6"}, - {file = "docutils-0.20.1.tar.gz", hash = "sha256:f08a4e276c3a1583a86dce3e34aba3fe04d02bba2dd51ed16106244e8a923e3b"}, -] - -[[package]] -name = "dotty-dict" -version = "1.3.1" -description = "Dictionary wrapper for quick access to deeply nested keys." -optional = false -python-versions = ">=3.5,<4.0" +name = "entrypoints" +version = "0.4" +description = "Discover and load entry points from installed packages." +optional = true +python-versions = ">=3.6" files = [ - {file = "dotty_dict-1.3.1-py3-none-any.whl", hash = "sha256:5022d234d9922f13aa711b4950372a06a6d64cb6d6db9ba43d0ba133ebfce31f"}, - {file = "dotty_dict-1.3.1.tar.gz", hash = "sha256:4b016e03b8ae265539757a53eba24b9bfda506fb94fbce0bee843c6f05541a15"}, + {file = "entrypoints-0.4-py3-none-any.whl", hash = "sha256:f174b5ff827504fd3cd97cc3f8649f3693f51538c7e4bdf3ef002c8429d42f9f"}, + {file = "entrypoints-0.4.tar.gz", hash = "sha256:b706eddaa9218a19ebcd67b56818f05bb27589b1ca9e8d797b74affad4ccacd4"}, ] [[package]] name = "exceptiongroup" -version = "1.1.2" +version = "1.2.0" description = "Backport of PEP 654 (exception groups)" optional = false python-versions = ">=3.7" files = [ - {file = "exceptiongroup-1.1.2-py3-none-any.whl", hash = "sha256:e346e69d186172ca7cf029c8c1d16235aa0e04035e5750b4b95039e65204328f"}, - {file = "exceptiongroup-1.1.2.tar.gz", hash = "sha256:12c3e887d6485d16943a309616de20ae5582633e0a2eda17f4e10fd61c1e8af5"}, + {file = "exceptiongroup-1.2.0-py3-none-any.whl", hash = "sha256:4bfd3996ac73b41e9b9628b04e079f193850720ea5945fc96a08633c66912f14"}, + {file = "exceptiongroup-1.2.0.tar.gz", hash = "sha256:91f5c769735f051a4290d52edd0858999b57e5876e9f85937691bd4c9fa3ed68"}, ] [package.extras] @@ -605,27 +646,27 @@ test = ["pytest (>=6)"] [[package]] name = "executing" -version = "1.2.0" +version = "2.0.1" description = "Get the currently executing AST node of a frame, and other information" -optional = true -python-versions = "*" +optional = false +python-versions = ">=3.5" files = [ - {file = "executing-1.2.0-py2.py3-none-any.whl", hash = "sha256:0314a69e37426e3608aada02473b4161d4caf5a4b244d1d0c48072b8fee7bacc"}, - {file = "executing-1.2.0.tar.gz", hash = "sha256:19da64c18d2d851112f09c287f8d3dbbdf725ab0e569077efb6cdcbd3497c107"}, + {file = "executing-2.0.1-py2.py3-none-any.whl", hash = "sha256:eac49ca94516ccc753f9fb5ce82603156e590b27525a8bc32cce8ae302eb61bc"}, + {file = "executing-2.0.1.tar.gz", hash = "sha256:35afe2ce3affba8ee97f2d69927fa823b08b472b7b994e36a52a964b93d16147"}, ] [package.extras] -tests = ["asttokens", "littleutils", "pytest", "rich"] +tests = ["asttokens (>=2.1.0)", "coverage", "coverage-enable-subprocess", "ipython", "littleutils", "pytest", "rich"] [[package]] name = "fastjsonschema" -version = "2.17.1" +version = "2.19.1" description = "Fastest Python implementation of JSON schema" -optional = true +optional = false python-versions = "*" files = [ - {file = "fastjsonschema-2.17.1-py3-none-any.whl", hash = "sha256:4b90b252628ca695280924d863fe37234eebadc29c5360d322571233dc9746e0"}, - {file = "fastjsonschema-2.17.1.tar.gz", hash = "sha256:f4eeb8a77cef54861dbf7424ac8ce71306f12cbb086c45131bcba2c6a4f726e3"}, + {file = "fastjsonschema-2.19.1-py3-none-any.whl", hash = "sha256:3672b47bc94178c9f23dbb654bf47440155d4db9df5f7bc47643315f9c405cd0"}, + {file = "fastjsonschema-2.19.1.tar.gz", hash = "sha256:e3126a94bdc4623d3de4485f8d468a12f02a67921315ddc87836d6e456dc789d"}, ] [package.extras] @@ -633,18 +674,19 @@ devel = ["colorama", "json-spec", "jsonschema", "pylint", "pytest", "pytest-benc [[package]] name = "filelock" -version = "3.12.2" +version = "3.13.1" description = "A platform independent file lock." optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "filelock-3.12.2-py3-none-any.whl", hash = "sha256:cbb791cdea2a72f23da6ac5b5269ab0a0d161e9ef0100e653b69049a7706d1ec"}, - {file = "filelock-3.12.2.tar.gz", hash = "sha256:002740518d8aa59a26b0c76e10fb8c6e15eae825d34b6fdf670333fd7b938d81"}, + {file = "filelock-3.13.1-py3-none-any.whl", hash = "sha256:57dbda9b35157b05fb3e58ee91448612eb674172fab98ee235ccb0b5bee19a1c"}, + {file = "filelock-3.13.1.tar.gz", hash = "sha256:521f5f56c50f8426f5e03ad3b281b490a87ef15bc6c526f168290f0c7148d44e"}, ] [package.extras] -docs = ["furo (>=2023.5.20)", "sphinx (>=7.0.1)", "sphinx-autodoc-typehints (>=1.23,!=1.23.4)"] -testing = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "diff-cover (>=7.5)", "pytest (>=7.3.1)", "pytest-cov (>=4.1)", "pytest-mock (>=3.10)", "pytest-timeout (>=2.1)"] +docs = ["furo (>=2023.9.10)", "sphinx (>=7.2.6)", "sphinx-autodoc-typehints (>=1.24)"] +testing = ["covdefaults (>=2.3)", "coverage (>=7.3.2)", "diff-cover (>=8)", "pytest (>=7.4.3)", "pytest-cov (>=4.1)", "pytest-mock (>=3.12)", "pytest-timeout (>=2.2)"] +typing = ["typing-extensions (>=4.8)"] [[package]] name = "ghp-import" @@ -665,42 +707,171 @@ dev = ["flake8", "markdown", "twine", "wheel"] [[package]] name = "gitdb" -version = "4.0.10" +version = "4.0.11" description = "Git Object Database" -optional = false +optional = true python-versions = ">=3.7" files = [ - {file = "gitdb-4.0.10-py3-none-any.whl", hash = "sha256:c286cf298426064079ed96a9e4a9d39e7f3e9bf15ba60701e95f5492f28415c7"}, - {file = "gitdb-4.0.10.tar.gz", hash = "sha256:6eb990b69df4e15bad899ea868dc46572c3f75339735663b81de79b06f17eb9a"}, + {file = "gitdb-4.0.11-py3-none-any.whl", hash = "sha256:81a3407ddd2ee8df444cbacea00e2d038e40150acfa3001696fe0dcf1d3adfa4"}, + {file = "gitdb-4.0.11.tar.gz", hash = "sha256:bf5421126136d6d0af55bc1e7c1af1c397a34f5b7bd79e776cd3e89785c2b04b"}, ] [package.dependencies] smmap = ">=3.0.1,<6" +[[package]] +name = "gitlint" +version = "0.19.1" +description = "Git commit message linter written in python, checks your commit messages for style." +optional = false +python-versions = ">=3.7" +files = [ + {file = "gitlint-0.19.1-py3-none-any.whl", hash = "sha256:26bb085959148d99fbbc178b4e56fda6c3edd7646b7c2a24d8ee1f8e036ed85d"}, + {file = "gitlint-0.19.1.tar.gz", hash = "sha256:b5b70fb894e80849b69abbb65ee7dbb3520fc3511f202a6e6b6ddf1a71ee8f61"}, +] + +[package.dependencies] +gitlint-core = {version = "0.19.1", extras = ["trusted-deps"]} + +[[package]] +name = "gitlint-core" +version = "0.19.1" +description = "Git commit message linter written in python, checks your commit messages for style." +optional = false +python-versions = ">=3.7" +files = [ + {file = "gitlint_core-0.19.1-py3-none-any.whl", hash = "sha256:f41effd1dcbc06ffbfc56b6888cce72241796f517b46bd9fd4ab1b145056988c"}, + {file = "gitlint_core-0.19.1.tar.gz", hash = "sha256:7bf977b03ff581624a9e03f65ebb8502cc12dfaa3e92d23e8b2b54bbdaa29992"}, +] + +[package.dependencies] +arrow = [ + {version = ">=1"}, + {version = "1.2.3", optional = true, markers = "extra == \"trusted-deps\""}, +] +click = [ + {version = ">=8"}, + {version = "8.1.3", optional = true, markers = "extra == \"trusted-deps\""}, +] +sh = [ + {version = ">=1.13.0", markers = "sys_platform != \"win32\""}, + {version = "1.14.3", optional = true, markers = "sys_platform != \"win32\" and extra == \"trusted-deps\""}, +] + +[package.extras] +trusted-deps = ["arrow (==1.2.3)", "click (==8.1.3)", "sh (==1.14.3)"] + [[package]] name = "gitpython" -version = "3.1.32" +version = "3.1.41" description = "GitPython is a Python library used to interact with Git repositories" -optional = false +optional = true python-versions = ">=3.7" files = [ - {file = "GitPython-3.1.32-py3-none-any.whl", hash = "sha256:e3d59b1c2c6ebb9dfa7a184daf3b6dd4914237e7488a1730a6d8f6f5d0b4187f"}, - {file = "GitPython-3.1.32.tar.gz", hash = "sha256:8d9b8cb1e80b9735e8717c9362079d3ce4c6e5ddeebedd0361b228c3a67a62f6"}, + {file = "GitPython-3.1.41-py3-none-any.whl", hash = "sha256:c36b6634d069b3f719610175020a9aed919421c87552185b085e04fbbdb10b7c"}, + {file = "GitPython-3.1.41.tar.gz", hash = "sha256:ed66e624884f76df22c8e16066d567aaa5a37d5b5fa19db2c6df6f7156db9048"}, ] [package.dependencies] gitdb = ">=4.0.1,<5" -typing-extensions = {version = ">=3.7.4.3", markers = "python_version < \"3.8\""} + +[package.extras] +test = ["black", "coverage[toml]", "ddt (>=1.1.1,!=1.4.3)", "mock", "mypy", "pre-commit", "pytest (>=7.3.1)", "pytest-cov", "pytest-instafail", "pytest-mock", "pytest-sugar", "sumtypes"] + +[[package]] +name = "greenlet" +version = "3.0.3" +description = "Lightweight in-process concurrent programming" +optional = true +python-versions = ">=3.7" +files = [ + {file = "greenlet-3.0.3-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:9da2bd29ed9e4f15955dd1595ad7bc9320308a3b766ef7f837e23ad4b4aac31a"}, + {file = "greenlet-3.0.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d353cadd6083fdb056bb46ed07e4340b0869c305c8ca54ef9da3421acbdf6881"}, + {file = "greenlet-3.0.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dca1e2f3ca00b84a396bc1bce13dd21f680f035314d2379c4160c98153b2059b"}, + {file = "greenlet-3.0.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3ed7fb269f15dc662787f4119ec300ad0702fa1b19d2135a37c2c4de6fadfd4a"}, + {file = "greenlet-3.0.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd4f49ae60e10adbc94b45c0b5e6a179acc1736cf7a90160b404076ee283cf83"}, + {file = "greenlet-3.0.3-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:73a411ef564e0e097dbe7e866bb2dda0f027e072b04da387282b02c308807405"}, + {file = "greenlet-3.0.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:7f362975f2d179f9e26928c5b517524e89dd48530a0202570d55ad6ca5d8a56f"}, + {file = "greenlet-3.0.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:649dde7de1a5eceb258f9cb00bdf50e978c9db1b996964cd80703614c86495eb"}, + {file = "greenlet-3.0.3-cp310-cp310-win_amd64.whl", hash = "sha256:68834da854554926fbedd38c76e60c4a2e3198c6fbed520b106a8986445caaf9"}, + {file = "greenlet-3.0.3-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:b1b5667cced97081bf57b8fa1d6bfca67814b0afd38208d52538316e9422fc61"}, + {file = "greenlet-3.0.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:52f59dd9c96ad2fc0d5724107444f76eb20aaccb675bf825df6435acb7703559"}, + {file = "greenlet-3.0.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:afaff6cf5200befd5cec055b07d1c0a5a06c040fe5ad148abcd11ba6ab9b114e"}, + {file = "greenlet-3.0.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fe754d231288e1e64323cfad462fcee8f0288654c10bdf4f603a39ed923bef33"}, + {file = "greenlet-3.0.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2797aa5aedac23af156bbb5a6aa2cd3427ada2972c828244eb7d1b9255846379"}, + {file = "greenlet-3.0.3-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b7f009caad047246ed379e1c4dbcb8b020f0a390667ea74d2387be2998f58a22"}, + {file = "greenlet-3.0.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:c5e1536de2aad7bf62e27baf79225d0d64360d4168cf2e6becb91baf1ed074f3"}, + {file = "greenlet-3.0.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:894393ce10ceac937e56ec00bb71c4c2f8209ad516e96033e4b3b1de270e200d"}, + {file = "greenlet-3.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:1ea188d4f49089fc6fb283845ab18a2518d279c7cd9da1065d7a84e991748728"}, + {file = "greenlet-3.0.3-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:70fb482fdf2c707765ab5f0b6655e9cfcf3780d8d87355a063547b41177599be"}, + {file = "greenlet-3.0.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d4d1ac74f5c0c0524e4a24335350edad7e5f03b9532da7ea4d3c54d527784f2e"}, + {file = "greenlet-3.0.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:149e94a2dd82d19838fe4b2259f1b6b9957d5ba1b25640d2380bea9c5df37676"}, + {file = "greenlet-3.0.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:15d79dd26056573940fcb8c7413d84118086f2ec1a8acdfa854631084393efcc"}, + {file = "greenlet-3.0.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:881b7db1ebff4ba09aaaeae6aa491daeb226c8150fc20e836ad00041bcb11230"}, + {file = "greenlet-3.0.3-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fcd2469d6a2cf298f198f0487e0a5b1a47a42ca0fa4dfd1b6862c999f018ebbf"}, + {file = "greenlet-3.0.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:1f672519db1796ca0d8753f9e78ec02355e862d0998193038c7073045899f305"}, + {file = "greenlet-3.0.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:2516a9957eed41dd8f1ec0c604f1cdc86758b587d964668b5b196a9db5bfcde6"}, + {file = "greenlet-3.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:bba5387a6975598857d86de9eac14210a49d554a77eb8261cc68b7d082f78ce2"}, + {file = "greenlet-3.0.3-cp37-cp37m-macosx_11_0_universal2.whl", hash = "sha256:5b51e85cb5ceda94e79d019ed36b35386e8c37d22f07d6a751cb659b180d5274"}, + {file = "greenlet-3.0.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:daf3cb43b7cf2ba96d614252ce1684c1bccee6b2183a01328c98d36fcd7d5cb0"}, + {file = "greenlet-3.0.3-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:99bf650dc5d69546e076f413a87481ee1d2d09aaaaaca058c9251b6d8c14783f"}, + {file = "greenlet-3.0.3-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2dd6e660effd852586b6a8478a1d244b8dc90ab5b1321751d2ea15deb49ed414"}, + {file = "greenlet-3.0.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e3391d1e16e2a5a1507d83e4a8b100f4ee626e8eca43cf2cadb543de69827c4c"}, + {file = "greenlet-3.0.3-cp37-cp37m-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e1f145462f1fa6e4a4ae3c0f782e580ce44d57c8f2c7aae1b6fa88c0b2efdb41"}, + {file = "greenlet-3.0.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:1a7191e42732df52cb5f39d3527217e7ab73cae2cb3694d241e18f53d84ea9a7"}, + {file = "greenlet-3.0.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:0448abc479fab28b00cb472d278828b3ccca164531daab4e970a0458786055d6"}, + {file = "greenlet-3.0.3-cp37-cp37m-win32.whl", hash = "sha256:b542be2440edc2d48547b5923c408cbe0fc94afb9f18741faa6ae970dbcb9b6d"}, + {file = "greenlet-3.0.3-cp37-cp37m-win_amd64.whl", hash = "sha256:01bc7ea167cf943b4c802068e178bbf70ae2e8c080467070d01bfa02f337ee67"}, + {file = "greenlet-3.0.3-cp38-cp38-macosx_11_0_universal2.whl", hash = "sha256:1996cb9306c8595335bb157d133daf5cf9f693ef413e7673cb07e3e5871379ca"}, + {file = "greenlet-3.0.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3ddc0f794e6ad661e321caa8d2f0a55ce01213c74722587256fb6566049a8b04"}, + {file = "greenlet-3.0.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c9db1c18f0eaad2f804728c67d6c610778456e3e1cc4ab4bbd5eeb8e6053c6fc"}, + {file = "greenlet-3.0.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7170375bcc99f1a2fbd9c306f5be8764eaf3ac6b5cb968862cad4c7057756506"}, + {file = "greenlet-3.0.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6b66c9c1e7ccabad3a7d037b2bcb740122a7b17a53734b7d72a344ce39882a1b"}, + {file = "greenlet-3.0.3-cp38-cp38-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:098d86f528c855ead3479afe84b49242e174ed262456c342d70fc7f972bc13c4"}, + {file = "greenlet-3.0.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:81bb9c6d52e8321f09c3d165b2a78c680506d9af285bfccbad9fb7ad5a5da3e5"}, + {file = "greenlet-3.0.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:fd096eb7ffef17c456cfa587523c5f92321ae02427ff955bebe9e3c63bc9f0da"}, + {file = "greenlet-3.0.3-cp38-cp38-win32.whl", hash = "sha256:d46677c85c5ba00a9cb6f7a00b2bfa6f812192d2c9f7d9c4f6a55b60216712f3"}, + {file = "greenlet-3.0.3-cp38-cp38-win_amd64.whl", hash = "sha256:419b386f84949bf0e7c73e6032e3457b82a787c1ab4a0e43732898a761cc9dbf"}, + {file = "greenlet-3.0.3-cp39-cp39-macosx_11_0_universal2.whl", hash = "sha256:da70d4d51c8b306bb7a031d5cff6cc25ad253affe89b70352af5f1cb68e74b53"}, + {file = "greenlet-3.0.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:086152f8fbc5955df88382e8a75984e2bb1c892ad2e3c80a2508954e52295257"}, + {file = "greenlet-3.0.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d73a9fe764d77f87f8ec26a0c85144d6a951a6c438dfe50487df5595c6373eac"}, + {file = "greenlet-3.0.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b7dcbe92cc99f08c8dd11f930de4d99ef756c3591a5377d1d9cd7dd5e896da71"}, + {file = "greenlet-3.0.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1551a8195c0d4a68fac7a4325efac0d541b48def35feb49d803674ac32582f61"}, + {file = "greenlet-3.0.3-cp39-cp39-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:64d7675ad83578e3fc149b617a444fab8efdafc9385471f868eb5ff83e446b8b"}, + {file = "greenlet-3.0.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:b37eef18ea55f2ffd8f00ff8fe7c8d3818abd3e25fb73fae2ca3b672e333a7a6"}, + {file = "greenlet-3.0.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:77457465d89b8263bca14759d7c1684df840b6811b2499838cc5b040a8b5b113"}, + {file = "greenlet-3.0.3-cp39-cp39-win32.whl", hash = "sha256:57e8974f23e47dac22b83436bdcf23080ade568ce77df33159e019d161ce1d1e"}, + {file = "greenlet-3.0.3-cp39-cp39-win_amd64.whl", hash = "sha256:c5ee858cfe08f34712f548c3c363e807e7186f03ad7a5039ebadb29e8c6be067"}, + {file = "greenlet-3.0.3.tar.gz", hash = "sha256:43374442353259554ce33599da8b692d5aa96f8976d567d4badf263371fbe491"}, +] + +[package.extras] +docs = ["Sphinx", "furo"] +test = ["objgraph", "psutil"] + +[[package]] +name = "griffe" +version = "0.39.1" +description = "Signatures for entire Python programs. Extract the structure, the frame, the skeleton of your project, to generate API documentation or find breaking changes in your API." +optional = false +python-versions = ">=3.8" +files = [ + {file = "griffe-0.39.1-py3-none-any.whl", hash = "sha256:6ce4ecffcf0d2f96362c5974b3f7df812da8f8d4cfcc5ebc8202ef72656fc087"}, + {file = "griffe-0.39.1.tar.gz", hash = "sha256:ead8dfede6e6531cce6bf69090a4f3c6d36fdf923c43f8e85aa530552cef0c09"}, +] + +[package.dependencies] +colorama = ">=0.4" [[package]] name = "identify" -version = "2.5.24" +version = "2.5.33" description = "File identification library for Python" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "identify-2.5.24-py2.py3-none-any.whl", hash = "sha256:986dbfb38b1140e763e413e6feb44cd731faf72d1909543178aa79b0e258265d"}, - {file = "identify-2.5.24.tar.gz", hash = "sha256:0aac67d5b4812498056d28a9a512a483f5085cc28640b02b258a59dac34301d4"}, + {file = "identify-2.5.33-py2.py3-none-any.whl", hash = "sha256:d40ce5fcd762817627670da8a7d8d8e65f24342d14539c59488dc603bf662e34"}, + {file = "identify-2.5.33.tar.gz", hash = "sha256:161558f9fe4559e1557e1bff323e8631f6a0e4837f7497767c1782832f16b62d"}, ] [package.extras] @@ -708,52 +879,51 @@ license = ["ukkonen"] [[package]] name = "idna" -version = "3.4" +version = "3.6" description = "Internationalized Domain Names in Applications (IDNA)" optional = false python-versions = ">=3.5" files = [ - {file = "idna-3.4-py3-none-any.whl", hash = "sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2"}, - {file = "idna-3.4.tar.gz", hash = "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4"}, + {file = "idna-3.6-py3-none-any.whl", hash = "sha256:c05567e9c24a6b9faaa835c4821bad0590fbb9d5779e7caa6e1cc4978e7eb24f"}, + {file = "idna-3.6.tar.gz", hash = "sha256:9ecdbbd083b06798ae1e86adcbfe8ab1479cf864e4ee30fe4e46a003d12491ca"}, ] [[package]] name = "importlib-metadata" -version = "6.7.0" +version = "7.0.1" description = "Read metadata from Python packages" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "importlib_metadata-6.7.0-py3-none-any.whl", hash = "sha256:cb52082e659e97afc5dac71e79de97d8681de3aa07ff18578330904a9d18e5b5"}, - {file = "importlib_metadata-6.7.0.tar.gz", hash = "sha256:1aaf550d4f73e5d6783e7acb77aec43d49da8017410afae93822cc9cca98c4d4"}, + {file = "importlib_metadata-7.0.1-py3-none-any.whl", hash = "sha256:4805911c3a4ec7c3966410053e9ec6a1fecd629117df5adee56dfc9432a1081e"}, + {file = "importlib_metadata-7.0.1.tar.gz", hash = "sha256:f238736bb06590ae52ac1fab06a3a9ef1d8dce2b7a35b5ab329371d6c8f5d2cc"}, ] [package.dependencies] -typing-extensions = {version = ">=3.6.4", markers = "python_version < \"3.8\""} zipp = ">=0.5" [package.extras] -docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (<7.2.5)", "sphinx (>=3.5)", "sphinx-lint"] perf = ["ipython"] -testing = ["flufl.flake8", "importlib-resources (>=1.3)", "packaging", "pyfakefs", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-mypy (>=0.9.1)", "pytest-perf (>=0.9.2)", "pytest-ruff"] +testing = ["flufl.flake8", "importlib-resources (>=1.3)", "packaging", "pyfakefs", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy (>=0.9.1)", "pytest-perf (>=0.9.2)", "pytest-ruff"] [[package]] name = "importlib-resources" -version = "5.12.0" +version = "6.1.1" description = "Read resources from Python packages" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "importlib_resources-5.12.0-py3-none-any.whl", hash = "sha256:7b1deeebbf351c7578e09bf2f63fa2ce8b5ffec296e0d349139d43cca061a81a"}, - {file = "importlib_resources-5.12.0.tar.gz", hash = "sha256:4be82589bf5c1d7999aedf2a45159d10cb3ca4f19b2271f8792bc8e6da7b22f6"}, + {file = "importlib_resources-6.1.1-py3-none-any.whl", hash = "sha256:e8bf90d8213b486f428c9c39714b920041cb02c184686a3dee24905aaa8105d6"}, + {file = "importlib_resources-6.1.1.tar.gz", hash = "sha256:3893a00122eafde6894c59914446a512f728a0c1a45f9bb9b63721b6bacf0b4a"}, ] [package.dependencies] zipp = {version = ">=3.1.0", markers = "python_version < \"3.10\""} [package.extras] -docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] -testing = ["flake8 (<5)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)"] +docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (<7.2.5)", "sphinx (>=3.5)", "sphinx-lint"] +testing = ["pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy (>=0.9.1)", "pytest-ruff", "zipp (>=3.17)"] [[package]] name = "iniconfig" @@ -766,26 +936,15 @@ files = [ {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, ] -[[package]] -name = "invoke" -version = "2.2.0" -description = "Pythonic task execution" -optional = false -python-versions = ">=3.6" -files = [ - {file = "invoke-2.2.0-py3-none-any.whl", hash = "sha256:6ea924cc53d4f78e3d98bc436b08069a03077e6f85ad1ddaa8a116d7dad15820"}, - {file = "invoke-2.2.0.tar.gz", hash = "sha256:ee6cbb101af1a859c7fe84f2a264c059020b0cb7fe3535f9424300ab568f6bd5"}, -] - [[package]] name = "ipython" -version = "8.12.2" +version = "8.12.0" description = "IPython: Productive Interactive Computing" -optional = true +optional = false python-versions = ">=3.8" files = [ - {file = "ipython-8.12.2-py3-none-any.whl", hash = "sha256:ea8801f15dfe4ffb76dea1b09b847430ffd70d827b41735c64a0638a04103bfc"}, - {file = "ipython-8.12.2.tar.gz", hash = "sha256:c7b80eb7f5a855a88efc971fda506ff7a91c280b42cdae26643e0f601ea281ea"}, + {file = "ipython-8.12.0-py3-none-any.whl", hash = "sha256:1c183bf61b148b00bcebfa5d9b39312733ae97f6dad90d7e9b4d86c8647f498c"}, + {file = "ipython-8.12.0.tar.gz", hash = "sha256:a950236df04ad75b5bc7f816f9af3d74dc118fd42f2ff7e80e8e60ca1f182e2d"}, ] [package.dependencies] @@ -817,66 +976,70 @@ test = ["pytest (<7.1)", "pytest-asyncio", "testpath"] test-extra = ["curio", "matplotlib (!=3.2.0)", "nbformat", "numpy (>=1.21)", "pandas", "pytest (<7.1)", "pytest-asyncio", "testpath", "trio"] [[package]] -name = "jaraco-classes" -version = "3.2.3" -description = "Utility functions for Python class constructs" +name = "ipython" +version = "8.18.1" +description = "IPython: Productive Interactive Computing" optional = false -python-versions = ">=3.7" +python-versions = ">=3.9" files = [ - {file = "jaraco.classes-3.2.3-py3-none-any.whl", hash = "sha256:2353de3288bc6b82120752201c6b1c1a14b058267fa424ed5ce5984e3b922158"}, - {file = "jaraco.classes-3.2.3.tar.gz", hash = "sha256:89559fa5c1d3c34eff6f631ad80bb21f378dbcbb35dd161fd2c6b93f5be2f98a"}, + {file = "ipython-8.18.1-py3-none-any.whl", hash = "sha256:e8267419d72d81955ec1177f8a29aaa90ac80ad647499201119e2f05e99aa397"}, + {file = "ipython-8.18.1.tar.gz", hash = "sha256:ca6f079bb33457c66e233e4580ebfc4128855b4cf6370dddd73842a9563e8a27"}, ] [package.dependencies] -more-itertools = "*" +colorama = {version = "*", markers = "sys_platform == \"win32\""} +decorator = "*" +exceptiongroup = {version = "*", markers = "python_version < \"3.11\""} +jedi = ">=0.16" +matplotlib-inline = "*" +pexpect = {version = ">4.3", markers = "sys_platform != \"win32\""} +prompt-toolkit = ">=3.0.41,<3.1.0" +pygments = ">=2.4.0" +stack-data = "*" +traitlets = ">=5" +typing-extensions = {version = "*", markers = "python_version < \"3.10\""} [package.extras] -docs = ["jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)"] -testing = ["flake8 (<5)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)"] +all = ["black", "curio", "docrepr", "exceptiongroup", "ipykernel", "ipyparallel", "ipywidgets", "matplotlib", "matplotlib (!=3.2.0)", "nbconvert", "nbformat", "notebook", "numpy (>=1.22)", "pandas", "pickleshare", "pytest (<7)", "pytest (<7.1)", "pytest-asyncio (<0.22)", "qtconsole", "setuptools (>=18.5)", "sphinx (>=1.3)", "sphinx-rtd-theme", "stack-data", "testpath", "trio", "typing-extensions"] +black = ["black"] +doc = ["docrepr", "exceptiongroup", "ipykernel", "matplotlib", "pickleshare", "pytest (<7)", "pytest (<7.1)", "pytest-asyncio (<0.22)", "setuptools (>=18.5)", "sphinx (>=1.3)", "sphinx-rtd-theme", "stack-data", "testpath", "typing-extensions"] +kernel = ["ipykernel"] +nbconvert = ["nbconvert"] +nbformat = ["nbformat"] +notebook = ["ipywidgets", "notebook"] +parallel = ["ipyparallel"] +qtconsole = ["qtconsole"] +test = ["pickleshare", "pytest (<7.1)", "pytest-asyncio (<0.22)", "testpath"] +test-extra = ["curio", "matplotlib (!=3.2.0)", "nbformat", "numpy (>=1.22)", "pandas", "pickleshare", "pytest (<7.1)", "pytest-asyncio (<0.22)", "testpath", "trio"] [[package]] name = "jedi" -version = "0.18.2" +version = "0.19.1" description = "An autocompletion tool for Python that can be used for text editors." -optional = true +optional = false python-versions = ">=3.6" files = [ - {file = "jedi-0.18.2-py2.py3-none-any.whl", hash = "sha256:203c1fd9d969ab8f2119ec0a3342e0b49910045abe6af0a3ae83a5764d54639e"}, - {file = "jedi-0.18.2.tar.gz", hash = "sha256:bae794c30d07f6d910d32a7048af09b5a39ed740918da923c6b780790ebac612"}, + {file = "jedi-0.19.1-py2.py3-none-any.whl", hash = "sha256:e983c654fe5c02867aef4cdfce5a2fbb4a50adc0af145f70504238f18ef5e7e0"}, + {file = "jedi-0.19.1.tar.gz", hash = "sha256:cf0496f3651bc65d7174ac1b7d043eff454892c708a87d1b683e57b569927ffd"}, ] [package.dependencies] -parso = ">=0.8.0,<0.9.0" +parso = ">=0.8.3,<0.9.0" [package.extras] docs = ["Jinja2 (==2.11.3)", "MarkupSafe (==1.1.1)", "Pygments (==2.8.1)", "alabaster (==0.7.12)", "babel (==2.9.1)", "chardet (==4.0.0)", "commonmark (==0.8.1)", "docutils (==0.17.1)", "future (==0.18.2)", "idna (==2.10)", "imagesize (==1.2.0)", "mock (==1.0.1)", "packaging (==20.9)", "pyparsing (==2.4.7)", "pytz (==2021.1)", "readthedocs-sphinx-ext (==2.1.4)", "recommonmark (==0.5.0)", "requests (==2.25.1)", "six (==1.15.0)", "snowballstemmer (==2.1.0)", "sphinx (==1.8.5)", "sphinx-rtd-theme (==0.4.3)", "sphinxcontrib-serializinghtml (==1.1.4)", "sphinxcontrib-websupport (==1.2.4)", "urllib3 (==1.26.4)"] -qa = ["flake8 (==3.8.3)", "mypy (==0.782)"] -testing = ["Django (<3.1)", "attrs", "colorama", "docopt", "pytest (<7.0.0)"] - -[[package]] -name = "jeepney" -version = "0.8.0" -description = "Low-level, pure Python DBus protocol wrapper." -optional = false -python-versions = ">=3.7" -files = [ - {file = "jeepney-0.8.0-py3-none-any.whl", hash = "sha256:c0a454ad016ca575060802ee4d590dd912e35c122fa04e70306de3d076cce755"}, - {file = "jeepney-0.8.0.tar.gz", hash = "sha256:5efe48d255973902f6badc3ce55e2aa6c5c3b3bc642059ef3a91247bcfcc5806"}, -] - -[package.extras] -test = ["async-timeout", "pytest", "pytest-asyncio (>=0.17)", "pytest-trio", "testpath", "trio"] -trio = ["async_generator", "trio"] +qa = ["flake8 (==5.0.4)", "mypy (==0.971)", "types-setuptools (==67.2.0.1)"] +testing = ["Django", "attrs", "colorama", "docopt", "pytest (<7.0.0)"] [[package]] name = "jinja2" -version = "3.1.2" +version = "3.1.3" description = "A very fast and expressive template engine." optional = false python-versions = ">=3.7" files = [ - {file = "Jinja2-3.1.2-py3-none-any.whl", hash = "sha256:6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61"}, - {file = "Jinja2-3.1.2.tar.gz", hash = "sha256:31351a702a408a9e7595a8fc6150fc3f43bb6bf7e319770cbc0db9df9437e852"}, + {file = "Jinja2-3.1.3-py3-none-any.whl", hash = "sha256:7d6d50dd97d52cbc355597bd845fabfbac3f551e1f99619e39a35ce8c370b5fa"}, + {file = "Jinja2-3.1.3.tar.gz", hash = "sha256:ac8bd6544d4bb2c9792bf3a159e80bba8fda7f07e81bc3aed565432d5925ba90"}, ] [package.dependencies] @@ -887,13 +1050,13 @@ i18n = ["Babel (>=2.7)"] [[package]] name = "jsonschema" -version = "4.18.2" +version = "4.21.1" description = "An implementation of JSON Schema validation for Python" -optional = true +optional = false python-versions = ">=3.8" files = [ - {file = "jsonschema-4.18.2-py3-none-any.whl", hash = "sha256:159fdff1443b4c5ed900d4eeac6b928a3485f4aff5fba6edd1e25cd66bb46b39"}, - {file = "jsonschema-4.18.2.tar.gz", hash = "sha256:af3855bfa30e83b2200a5fe12ab5eb92460e4d3b8e4efd34094aa637f7272a87"}, + {file = "jsonschema-4.21.1-py3-none-any.whl", hash = "sha256:7996507afae316306f9e2290407761157c6f78002dcf7419acb99822143d1c6f"}, + {file = "jsonschema-4.21.1.tar.gz", hash = "sha256:85727c00279f5fa6bedbe6238d2aa6403bedd8b4864ab11207d07df3cc1b2ee5"}, ] [package.dependencies] @@ -910,28 +1073,28 @@ format-nongpl = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339- [[package]] name = "jsonschema-specifications" -version = "2023.6.1" +version = "2023.12.1" description = "The JSON Schema meta-schemas and vocabularies, exposed as a Registry" -optional = true +optional = false python-versions = ">=3.8" files = [ - {file = "jsonschema_specifications-2023.6.1-py3-none-any.whl", hash = "sha256:3d2b82663aff01815f744bb5c7887e2121a63399b49b104a3c96145474d091d7"}, - {file = "jsonschema_specifications-2023.6.1.tar.gz", hash = "sha256:ca1c4dd059a9e7b34101cf5b3ab7ff1d18b139f35950d598d629837ef66e8f28"}, + {file = "jsonschema_specifications-2023.12.1-py3-none-any.whl", hash = "sha256:87e4fdf3a94858b8a2ba2778d9ba57d8a9cafca7c7489c46ba0d30a8bc6a9c3c"}, + {file = "jsonschema_specifications-2023.12.1.tar.gz", hash = "sha256:48a76787b3e70f5ed53f1160d2b81f586e4ca6d1548c5de7085d1682674764cc"}, ] [package.dependencies] importlib-resources = {version = ">=1.4.0", markers = "python_version < \"3.9\""} -referencing = ">=0.28.0" +referencing = ">=0.31.0" [[package]] name = "jupyter-client" -version = "8.3.0" +version = "8.6.0" description = "Jupyter protocol implementation and client libraries" -optional = true +optional = false python-versions = ">=3.8" files = [ - {file = "jupyter_client-8.3.0-py3-none-any.whl", hash = "sha256:7441af0c0672edc5d28035e92ba5e32fadcfa8a4e608a434c228836a89df6158"}, - {file = "jupyter_client-8.3.0.tar.gz", hash = "sha256:3af69921fe99617be1670399a0b857ad67275eefcfa291e2c81a160b7b650f5f"}, + {file = "jupyter_client-8.6.0-py3-none-any.whl", hash = "sha256:909c474dbe62582ae62b758bca86d6518c85234bdee2d908c778db6d72f39d99"}, + {file = "jupyter_client-8.6.0.tar.gz", hash = "sha256:0642244bb83b4764ae60d07e010e15f0e2d275ec4e918a8f7b80fbbef3ca60c7"}, ] [package.dependencies] @@ -948,13 +1111,13 @@ test = ["coverage", "ipykernel (>=6.14)", "mypy", "paramiko", "pre-commit", "pyt [[package]] name = "jupyter-core" -version = "5.3.1" +version = "5.7.1" description = "Jupyter core package. A base package on which Jupyter projects rely." -optional = true +optional = false python-versions = ">=3.8" files = [ - {file = "jupyter_core-5.3.1-py3-none-any.whl", hash = "sha256:ae9036db959a71ec1cac33081eeb040a79e681f08ab68b0883e9a676c7a90dce"}, - {file = "jupyter_core-5.3.1.tar.gz", hash = "sha256:5ba5c7938a7f97a6b0481463f7ff0dbac7c15ba48cf46fa4035ca6e838aa1aba"}, + {file = "jupyter_core-5.7.1-py3-none-any.whl", hash = "sha256:c65c82126453a723a2804aa52409930434598fd9d35091d63dfb919d2b765bb7"}, + {file = "jupyter_core-5.7.1.tar.gz", hash = "sha256:de61a9d7fc71240f688b2fb5ab659fbb56979458dc66a71decd098e03c79e218"}, ] [package.dependencies] @@ -963,114 +1126,150 @@ pywin32 = {version = ">=300", markers = "sys_platform == \"win32\" and platform_ traitlets = ">=5.3" [package.extras] -docs = ["myst-parser", "sphinx-autodoc-typehints", "sphinxcontrib-github-alt", "sphinxcontrib-spelling", "traitlets"] +docs = ["myst-parser", "pydata-sphinx-theme", "sphinx-autodoc-typehints", "sphinxcontrib-github-alt", "sphinxcontrib-spelling", "traitlets"] test = ["ipykernel", "pre-commit", "pytest", "pytest-cov", "pytest-timeout"] [[package]] -name = "keyring" -version = "24.1.1" -description = "Store and access your passwords safely." +name = "jupyterlab-pygments" +version = "0.3.0" +description = "Pygments theme using JupyterLab CSS variables" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "keyring-24.1.1-py3-none-any.whl", hash = "sha256:bc402c5e501053098bcbd149c4ddbf8e36c6809e572c2d098d4961e88d4c270d"}, - {file = "keyring-24.1.1.tar.gz", hash = "sha256:3d44a48fa9a254f6c72879d7c88604831ebdaac6ecb0b214308b02953502c510"}, + {file = "jupyterlab_pygments-0.3.0-py3-none-any.whl", hash = "sha256:841a89020971da1d8693f1a99997aefc5dc424bb1b251fd6322462a1b8842780"}, + {file = "jupyterlab_pygments-0.3.0.tar.gz", hash = "sha256:721aca4d9029252b11cfa9d185e5b5af4d54772bb8072f9b7036f4170054d35d"}, ] -[package.dependencies] -importlib-metadata = {version = ">=4.11.4", markers = "python_version < \"3.12\""} -importlib-resources = {version = "*", markers = "python_version < \"3.9\""} -"jaraco.classes" = "*" -jeepney = {version = ">=0.4.2", markers = "sys_platform == \"linux\""} -pywin32-ctypes = {version = ">=0.2.0", markers = "sys_platform == \"win32\""} -SecretStorage = {version = ">=3.2", markers = "sys_platform == \"linux\""} +[[package]] +name = "macholib" +version = "1.16.3" +description = "Mach-O header analysis and editing" +optional = false +python-versions = "*" +files = [ + {file = "macholib-1.16.3-py2.py3-none-any.whl", hash = "sha256:0e315d7583d38b8c77e815b1ecbdbf504a8258d8b3e17b61165c6feb60d18f2c"}, + {file = "macholib-1.16.3.tar.gz", hash = "sha256:07ae9e15e8e4cd9a788013d81f5908b3609aa76f9b1421bae9c4d7606ec86a30"}, +] -[package.extras] -completion = ["shtab"] -docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] -testing = ["pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-mypy (>=0.9.1)", "pytest-ruff"] +[package.dependencies] +altgraph = ">=0.17" [[package]] name = "markdown" -version = "3.3.7" -description = "Python implementation of Markdown." +version = "3.5.2" +description = "Python implementation of John Gruber's Markdown." optional = false -python-versions = ">=3.6" +python-versions = ">=3.8" files = [ - {file = "Markdown-3.3.7-py3-none-any.whl", hash = "sha256:f5da449a6e1c989a4cea2631aa8ee67caa5a2ef855d551c88f9e309f4634c621"}, - {file = "Markdown-3.3.7.tar.gz", hash = "sha256:cbb516f16218e643d8e0a95b309f77eb118cb138d39a4f27851e6a63581db874"}, + {file = "Markdown-3.5.2-py3-none-any.whl", hash = "sha256:d43323865d89fc0cb9b20c75fc8ad313af307cc087e84b657d9eec768eddeadd"}, + {file = "Markdown-3.5.2.tar.gz", hash = "sha256:e1ac7b3dc550ee80e602e71c1d168002f062e49f1b11e26a36264dafd4df2ef8"}, ] [package.dependencies] importlib-metadata = {version = ">=4.4", markers = "python_version < \"3.10\""} [package.extras] +docs = ["mdx-gh-links (>=0.2)", "mkdocs (>=1.5)", "mkdocs-gen-files", "mkdocs-literate-nav", "mkdocs-nature (>=0.6)", "mkdocs-section-index", "mkdocstrings[python]"] testing = ["coverage", "pyyaml"] +[[package]] +name = "markdown-it-py" +version = "3.0.0" +description = "Python port of markdown-it. Markdown parsing, done right!" +optional = false +python-versions = ">=3.8" +files = [ + {file = "markdown-it-py-3.0.0.tar.gz", hash = "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb"}, + {file = "markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1"}, +] + +[package.dependencies] +mdurl = ">=0.1,<1.0" + +[package.extras] +benchmarking = ["psutil", "pytest", "pytest-benchmark"] +code-style = ["pre-commit (>=3.0,<4.0)"] +compare = ["commonmark (>=0.9,<1.0)", "markdown (>=3.4,<4.0)", "mistletoe (>=1.0,<2.0)", "mistune (>=2.0,<3.0)", "panflute (>=2.3,<3.0)"] +linkify = ["linkify-it-py (>=1,<3)"] +plugins = ["mdit-py-plugins"] +profiling = ["gprof2dot"] +rtd = ["jupyter_sphinx", "mdit-py-plugins", "myst-parser", "pyyaml", "sphinx", "sphinx-copybutton", "sphinx-design", "sphinx_book_theme"] +testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"] + [[package]] name = "markupsafe" -version = "2.1.3" +version = "2.1.4" description = "Safely add untrusted strings to HTML/XML markup." optional = false python-versions = ">=3.7" files = [ - {file = "MarkupSafe-2.1.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:cd0f502fe016460680cd20aaa5a76d241d6f35a1c3350c474bac1273803893fa"}, - {file = "MarkupSafe-2.1.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e09031c87a1e51556fdcb46e5bd4f59dfb743061cf93c4d6831bf894f125eb57"}, - {file = "MarkupSafe-2.1.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:68e78619a61ecf91e76aa3e6e8e33fc4894a2bebe93410754bd28fce0a8a4f9f"}, - {file = "MarkupSafe-2.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:65c1a9bcdadc6c28eecee2c119465aebff8f7a584dd719facdd9e825ec61ab52"}, - {file = "MarkupSafe-2.1.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:525808b8019e36eb524b8c68acdd63a37e75714eac50e988180b169d64480a00"}, - {file = "MarkupSafe-2.1.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:962f82a3086483f5e5f64dbad880d31038b698494799b097bc59c2edf392fce6"}, - {file = "MarkupSafe-2.1.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:aa7bd130efab1c280bed0f45501b7c8795f9fdbeb02e965371bbef3523627779"}, - {file = "MarkupSafe-2.1.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c9c804664ebe8f83a211cace637506669e7890fec1b4195b505c214e50dd4eb7"}, - {file = "MarkupSafe-2.1.3-cp310-cp310-win32.whl", hash = "sha256:10bbfe99883db80bdbaff2dcf681dfc6533a614f700da1287707e8a5d78a8431"}, - {file = "MarkupSafe-2.1.3-cp310-cp310-win_amd64.whl", hash = "sha256:1577735524cdad32f9f694208aa75e422adba74f1baee7551620e43a3141f559"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ad9e82fb8f09ade1c3e1b996a6337afac2b8b9e365f926f5a61aacc71adc5b3c"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3c0fae6c3be832a0a0473ac912810b2877c8cb9d76ca48de1ed31e1c68386575"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b076b6226fb84157e3f7c971a47ff3a679d837cf338547532ab866c57930dbee"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bfce63a9e7834b12b87c64d6b155fdd9b3b96191b6bd334bf37db7ff1fe457f2"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:338ae27d6b8745585f87218a3f23f1512dbf52c26c28e322dbe54bcede54ccb9"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e4dd52d80b8c83fdce44e12478ad2e85c64ea965e75d66dbeafb0a3e77308fcc"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:df0be2b576a7abbf737b1575f048c23fb1d769f267ec4358296f31c2479db8f9"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-win32.whl", hash = "sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686"}, - {file = "MarkupSafe-2.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2"}, - {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b"}, - {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707"}, - {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ca379055a47383d02a5400cb0d110cef0a776fc644cda797db0c5696cfd7e18e"}, - {file = "MarkupSafe-2.1.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:b7ff0f54cb4ff66dd38bebd335a38e2c22c41a8ee45aa608efc890ac3e3931bc"}, - {file = "MarkupSafe-2.1.3-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:c011a4149cfbcf9f03994ec2edffcb8b1dc2d2aede7ca243746df97a5d41ce48"}, - {file = "MarkupSafe-2.1.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:56d9f2ecac662ca1611d183feb03a3fa4406469dafe241673d521dd5ae92a155"}, - {file = "MarkupSafe-2.1.3-cp37-cp37m-win32.whl", hash = "sha256:8758846a7e80910096950b67071243da3e5a20ed2546e6392603c096778d48e0"}, - {file = "MarkupSafe-2.1.3-cp37-cp37m-win_amd64.whl", hash = "sha256:787003c0ddb00500e49a10f2844fac87aa6ce977b90b0feaaf9de23c22508b24"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:2ef12179d3a291be237280175b542c07a36e7f60718296278d8593d21ca937d4"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2c1b19b3aaacc6e57b7e25710ff571c24d6c3613a45e905b1fde04d691b98ee0"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8afafd99945ead6e075b973fefa56379c5b5c53fd8937dad92c662da5d8fd5ee"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8c41976a29d078bb235fea9b2ecd3da465df42a562910f9022f1a03107bd02be"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d080e0a5eb2529460b30190fcfcc4199bd7f827663f858a226a81bc27beaa97e"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:69c0f17e9f5a7afdf2cc9fb2d1ce6aabdb3bafb7f38017c0b77862bcec2bbad8"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:504b320cd4b7eff6f968eddf81127112db685e81f7e36e75f9f84f0df46041c3"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:42de32b22b6b804f42c5d98be4f7e5e977ecdd9ee9b660fda1a3edf03b11792d"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-win32.whl", hash = "sha256:ceb01949af7121f9fc39f7d27f91be8546f3fb112c608bc4029aef0bab86a2a5"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-win_amd64.whl", hash = "sha256:1b40069d487e7edb2676d3fbdb2b0829ffa2cd63a2ec26c4938b2d34391b4ecc"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:8023faf4e01efadfa183e863fefde0046de576c6f14659e8782065bcece22198"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6b2b56950d93e41f33b4223ead100ea0fe11f8e6ee5f641eb753ce4b77a7042b"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9dcdfd0eaf283af041973bff14a2e143b8bd64e069f4c383416ecd79a81aab58"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:05fb21170423db021895e1ea1e1f3ab3adb85d1c2333cbc2310f2a26bc77272e"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:282c2cb35b5b673bbcadb33a585408104df04f14b2d9b01d4c345a3b92861c2c"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:ab4a0df41e7c16a1392727727e7998a467472d0ad65f3ad5e6e765015df08636"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:7ef3cb2ebbf91e330e3bb937efada0edd9003683db6b57bb108c4001f37a02ea"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:0a4e4a1aff6c7ac4cd55792abf96c915634c2b97e3cc1c7129578aa68ebd754e"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-win32.whl", hash = "sha256:fec21693218efe39aa7f8599346e90c705afa52c5b31ae019b2e57e8f6542bb2"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-win_amd64.whl", hash = "sha256:3fd4abcb888d15a94f32b75d8fd18ee162ca0c064f35b11134be77050296d6ba"}, - {file = "MarkupSafe-2.1.3.tar.gz", hash = "sha256:af598ed32d6ae86f1b747b82783958b1a4ab8f617b06fe68795c7f026abbdcad"}, + {file = "MarkupSafe-2.1.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:de8153a7aae3835484ac168a9a9bdaa0c5eee4e0bc595503c95d53b942879c84"}, + {file = "MarkupSafe-2.1.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e888ff76ceb39601c59e219f281466c6d7e66bd375b4ec1ce83bcdc68306796b"}, + {file = "MarkupSafe-2.1.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0b838c37ba596fcbfca71651a104a611543077156cb0a26fe0c475e1f152ee8"}, + {file = "MarkupSafe-2.1.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dac1ebf6983148b45b5fa48593950f90ed6d1d26300604f321c74a9ca1609f8e"}, + {file = "MarkupSafe-2.1.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0fbad3d346df8f9d72622ac71b69565e621ada2ce6572f37c2eae8dacd60385d"}, + {file = "MarkupSafe-2.1.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:d5291d98cd3ad9a562883468c690a2a238c4a6388ab3bd155b0c75dd55ece858"}, + {file = "MarkupSafe-2.1.4-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:a7cc49ef48a3c7a0005a949f3c04f8baa5409d3f663a1b36f0eba9bfe2a0396e"}, + {file = "MarkupSafe-2.1.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b83041cda633871572f0d3c41dddd5582ad7d22f65a72eacd8d3d6d00291df26"}, + {file = "MarkupSafe-2.1.4-cp310-cp310-win32.whl", hash = "sha256:0c26f67b3fe27302d3a412b85ef696792c4a2386293c53ba683a89562f9399b0"}, + {file = "MarkupSafe-2.1.4-cp310-cp310-win_amd64.whl", hash = "sha256:a76055d5cb1c23485d7ddae533229039b850db711c554a12ea64a0fd8a0129e2"}, + {file = "MarkupSafe-2.1.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:9e9e3c4020aa2dc62d5dd6743a69e399ce3de58320522948af6140ac959ab863"}, + {file = "MarkupSafe-2.1.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0042d6a9880b38e1dd9ff83146cc3c9c18a059b9360ceae207805567aacccc69"}, + {file = "MarkupSafe-2.1.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:55d03fea4c4e9fd0ad75dc2e7e2b6757b80c152c032ea1d1de487461d8140efc"}, + {file = "MarkupSafe-2.1.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ab3a886a237f6e9c9f4f7d272067e712cdb4efa774bef494dccad08f39d8ae6"}, + {file = "MarkupSafe-2.1.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:abf5ebbec056817057bfafc0445916bb688a255a5146f900445d081db08cbabb"}, + {file = "MarkupSafe-2.1.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e1a0d1924a5013d4f294087e00024ad25668234569289650929ab871231668e7"}, + {file = "MarkupSafe-2.1.4-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:e7902211afd0af05fbadcc9a312e4cf10f27b779cf1323e78d52377ae4b72bea"}, + {file = "MarkupSafe-2.1.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:c669391319973e49a7c6230c218a1e3044710bc1ce4c8e6eb71f7e6d43a2c131"}, + {file = "MarkupSafe-2.1.4-cp311-cp311-win32.whl", hash = "sha256:31f57d64c336b8ccb1966d156932f3daa4fee74176b0fdc48ef580be774aae74"}, + {file = "MarkupSafe-2.1.4-cp311-cp311-win_amd64.whl", hash = "sha256:54a7e1380dfece8847c71bf7e33da5d084e9b889c75eca19100ef98027bd9f56"}, + {file = "MarkupSafe-2.1.4-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:a76cd37d229fc385738bd1ce4cba2a121cf26b53864c1772694ad0ad348e509e"}, + {file = "MarkupSafe-2.1.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:987d13fe1d23e12a66ca2073b8d2e2a75cec2ecb8eab43ff5624ba0ad42764bc"}, + {file = "MarkupSafe-2.1.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5244324676254697fe5c181fc762284e2c5fceeb1c4e3e7f6aca2b6f107e60dc"}, + {file = "MarkupSafe-2.1.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:78bc995e004681246e85e28e068111a4c3f35f34e6c62da1471e844ee1446250"}, + {file = "MarkupSafe-2.1.4-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a4d176cfdfde84f732c4a53109b293d05883e952bbba68b857ae446fa3119b4f"}, + {file = "MarkupSafe-2.1.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:f9917691f410a2e0897d1ef99619fd3f7dd503647c8ff2475bf90c3cf222ad74"}, + {file = "MarkupSafe-2.1.4-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:f06e5a9e99b7df44640767842f414ed5d7bedaaa78cd817ce04bbd6fd86e2dd6"}, + {file = "MarkupSafe-2.1.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:396549cea79e8ca4ba65525470d534e8a41070e6b3500ce2414921099cb73e8d"}, + {file = "MarkupSafe-2.1.4-cp312-cp312-win32.whl", hash = "sha256:f6be2d708a9d0e9b0054856f07ac7070fbe1754be40ca8525d5adccdbda8f475"}, + {file = "MarkupSafe-2.1.4-cp312-cp312-win_amd64.whl", hash = "sha256:5045e892cfdaecc5b4c01822f353cf2c8feb88a6ec1c0adef2a2e705eef0f656"}, + {file = "MarkupSafe-2.1.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:7a07f40ef8f0fbc5ef1000d0c78771f4d5ca03b4953fc162749772916b298fc4"}, + {file = "MarkupSafe-2.1.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d18b66fe626ac412d96c2ab536306c736c66cf2a31c243a45025156cc190dc8a"}, + {file = "MarkupSafe-2.1.4-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:698e84142f3f884114ea8cf83e7a67ca8f4ace8454e78fe960646c6c91c63bfa"}, + {file = "MarkupSafe-2.1.4-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:49a3b78a5af63ec10d8604180380c13dcd870aba7928c1fe04e881d5c792dc4e"}, + {file = "MarkupSafe-2.1.4-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:15866d7f2dc60cfdde12ebb4e75e41be862348b4728300c36cdf405e258415ec"}, + {file = "MarkupSafe-2.1.4-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:6aa5e2e7fc9bc042ae82d8b79d795b9a62bd8f15ba1e7594e3db243f158b5565"}, + {file = "MarkupSafe-2.1.4-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:54635102ba3cf5da26eb6f96c4b8c53af8a9c0d97b64bdcb592596a6255d8518"}, + {file = "MarkupSafe-2.1.4-cp37-cp37m-win32.whl", hash = "sha256:3583a3a3ab7958e354dc1d25be74aee6228938312ee875a22330c4dc2e41beb0"}, + {file = "MarkupSafe-2.1.4-cp37-cp37m-win_amd64.whl", hash = "sha256:d6e427c7378c7f1b2bef6a344c925b8b63623d3321c09a237b7cc0e77dd98ceb"}, + {file = "MarkupSafe-2.1.4-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:bf1196dcc239e608605b716e7b166eb5faf4bc192f8a44b81e85251e62584bd2"}, + {file = "MarkupSafe-2.1.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:4df98d4a9cd6a88d6a585852f56f2155c9cdb6aec78361a19f938810aa020954"}, + {file = "MarkupSafe-2.1.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b835aba863195269ea358cecc21b400276747cc977492319fd7682b8cd2c253d"}, + {file = "MarkupSafe-2.1.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:23984d1bdae01bee794267424af55eef4dfc038dc5d1272860669b2aa025c9e3"}, + {file = "MarkupSafe-2.1.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1c98c33ffe20e9a489145d97070a435ea0679fddaabcafe19982fe9c971987d5"}, + {file = "MarkupSafe-2.1.4-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:9896fca4a8eb246defc8b2a7ac77ef7553b638e04fbf170bff78a40fa8a91474"}, + {file = "MarkupSafe-2.1.4-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:b0fe73bac2fed83839dbdbe6da84ae2a31c11cfc1c777a40dbd8ac8a6ed1560f"}, + {file = "MarkupSafe-2.1.4-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:c7556bafeaa0a50e2fe7dc86e0382dea349ebcad8f010d5a7dc6ba568eaaa789"}, + {file = "MarkupSafe-2.1.4-cp38-cp38-win32.whl", hash = "sha256:fc1a75aa8f11b87910ffd98de62b29d6520b6d6e8a3de69a70ca34dea85d2a8a"}, + {file = "MarkupSafe-2.1.4-cp38-cp38-win_amd64.whl", hash = "sha256:3a66c36a3864df95e4f62f9167c734b3b1192cb0851b43d7cc08040c074c6279"}, + {file = "MarkupSafe-2.1.4-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:765f036a3d00395a326df2835d8f86b637dbaf9832f90f5d196c3b8a7a5080cb"}, + {file = "MarkupSafe-2.1.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:21e7af8091007bf4bebf4521184f4880a6acab8df0df52ef9e513d8e5db23411"}, + {file = "MarkupSafe-2.1.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d5c31fe855c77cad679b302aabc42d724ed87c043b1432d457f4976add1c2c3e"}, + {file = "MarkupSafe-2.1.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7653fa39578957bc42e5ebc15cf4361d9e0ee4b702d7d5ec96cdac860953c5b4"}, + {file = "MarkupSafe-2.1.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:47bb5f0142b8b64ed1399b6b60f700a580335c8e1c57f2f15587bd072012decc"}, + {file = "MarkupSafe-2.1.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:fe8512ed897d5daf089e5bd010c3dc03bb1bdae00b35588c49b98268d4a01e00"}, + {file = "MarkupSafe-2.1.4-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:36d7626a8cca4d34216875aee5a1d3d654bb3dac201c1c003d182283e3205949"}, + {file = "MarkupSafe-2.1.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:b6f14a9cd50c3cb100eb94b3273131c80d102e19bb20253ac7bd7336118a673a"}, + {file = "MarkupSafe-2.1.4-cp39-cp39-win32.whl", hash = "sha256:c8f253a84dbd2c63c19590fa86a032ef3d8cc18923b8049d91bcdeeb2581fbf6"}, + {file = "MarkupSafe-2.1.4-cp39-cp39-win_amd64.whl", hash = "sha256:8b570a1537367b52396e53325769608f2a687ec9a4363647af1cded8928af959"}, + {file = "MarkupSafe-2.1.4.tar.gz", hash = "sha256:3aae9af4cac263007fd6309c64c6ab4506dd2b79382d9d19a1994f9240b8db4f"}, ] [[package]] name = "matplotlib-inline" version = "0.1.6" description = "Inline Matplotlib backend for Jupyter" -optional = true +optional = false python-versions = ">=3.5" files = [ {file = "matplotlib-inline-0.1.6.tar.gz", hash = "sha256:f887e5f10ba98e8d2b150ddcf4702c1e5f8b3a20005eb0f74bfdbd360ee6f304"}, @@ -1080,6 +1279,17 @@ files = [ [package.dependencies] traitlets = "*" +[[package]] +name = "mdurl" +version = "0.1.2" +description = "Markdown URL utilities" +optional = false +python-versions = ">=3.7" +files = [ + {file = "mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8"}, + {file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"}, +] + [[package]] name = "mergedeep" version = "1.3.4" @@ -1091,15 +1301,26 @@ files = [ {file = "mergedeep-1.3.4.tar.gz", hash = "sha256:0096d52e9dad9939c3d975a774666af186eda617e6ca84df4c94dec30004f2a8"}, ] +[[package]] +name = "mistune" +version = "3.0.2" +description = "A sane and fast Markdown parser with useful plugins and renderers" +optional = false +python-versions = ">=3.7" +files = [ + {file = "mistune-3.0.2-py3-none-any.whl", hash = "sha256:71481854c30fdbc938963d3605b72501f5c10a9320ecd412c121c163a1c7d205"}, + {file = "mistune-3.0.2.tar.gz", hash = "sha256:fc7f93ded930c92394ef2cb6f04a8aabab4117a91449e72dcc8dfa646a508be8"}, +] + [[package]] name = "mkdocs" -version = "1.4.3" +version = "1.5.3" description = "Project documentation with Markdown." optional = false python-versions = ">=3.7" files = [ - {file = "mkdocs-1.4.3-py3-none-any.whl", hash = "sha256:6ee46d309bda331aac915cd24aab882c179a933bd9e77b80ce7d2eaaa3f689dd"}, - {file = "mkdocs-1.4.3.tar.gz", hash = "sha256:5955093bbd4dd2e9403c5afaf57324ad8b04f16886512a3ee6ef828956481c57"}, + {file = "mkdocs-1.5.3-py3-none-any.whl", hash = "sha256:3b3a78e736b31158d64dbb2f8ba29bd46a379d0c6e324c2246c3bc3d2189cfc1"}, + {file = "mkdocs-1.5.3.tar.gz", hash = "sha256:eb7c99214dcb945313ba30426c2451b735992c73c2e10838f76d09e39ff4d0e2"}, ] [package.dependencies] @@ -1108,125 +1329,239 @@ colorama = {version = ">=0.4", markers = "platform_system == \"Windows\""} ghp-import = ">=1.0" importlib-metadata = {version = ">=4.3", markers = "python_version < \"3.10\""} jinja2 = ">=2.11.1" -markdown = ">=3.2.1,<3.4" +markdown = ">=3.2.1" +markupsafe = ">=2.0.1" mergedeep = ">=1.3.4" packaging = ">=20.5" +pathspec = ">=0.11.1" +platformdirs = ">=2.2.0" pyyaml = ">=5.1" pyyaml-env-tag = ">=0.1" -typing-extensions = {version = ">=3.10", markers = "python_version < \"3.8\""} watchdog = ">=2.0" [package.extras] i18n = ["babel (>=2.9.0)"] -min-versions = ["babel (==2.9.0)", "click (==7.0)", "colorama (==0.4)", "ghp-import (==1.0)", "importlib-metadata (==4.3)", "jinja2 (==2.11.1)", "markdown (==3.2.1)", "markupsafe (==2.0.1)", "mergedeep (==1.3.4)", "packaging (==20.5)", "pyyaml (==5.1)", "pyyaml-env-tag (==0.1)", "typing-extensions (==3.10)", "watchdog (==2.0)"] +min-versions = ["babel (==2.9.0)", "click (==7.0)", "colorama (==0.4)", "ghp-import (==1.0)", "importlib-metadata (==4.3)", "jinja2 (==2.11.1)", "markdown (==3.2.1)", "markupsafe (==2.0.1)", "mergedeep (==1.3.4)", "packaging (==20.5)", "pathspec (==0.11.1)", "platformdirs (==2.2.0)", "pyyaml (==5.1)", "pyyaml-env-tag (==0.1)", "typing-extensions (==3.10)", "watchdog (==2.0)"] + +[[package]] +name = "mkdocs-autorefs" +version = "0.5.0" +description = "Automatically link across pages in MkDocs." +optional = false +python-versions = ">=3.8" +files = [ + {file = "mkdocs_autorefs-0.5.0-py3-none-any.whl", hash = "sha256:7930fcb8ac1249f10e683967aeaddc0af49d90702af111a5e390e8b20b3d97ff"}, + {file = "mkdocs_autorefs-0.5.0.tar.gz", hash = "sha256:9a5054a94c08d28855cfab967ada10ed5be76e2bfad642302a610b252c3274c0"}, +] + +[package.dependencies] +Markdown = ">=3.3" +mkdocs = ">=1.1" + +[[package]] +name = "mkdocs-click" +version = "0.8.1" +description = "An MkDocs extension to generate documentation for Click command line applications" +optional = false +python-versions = ">=3.7" +files = [ + {file = "mkdocs_click-0.8.1-py3-none-any.whl", hash = "sha256:a100ff938be63911f86465a1c21d29a669a7c51932b700fdb3daa90d13b61ee4"}, + {file = "mkdocs_click-0.8.1.tar.gz", hash = "sha256:0a88cce04870c5d70ff63138e2418219c3c4119cc928a59c66b76eb5214edba6"}, +] + +[package.dependencies] +click = ">=8.1" +markdown = ">=3.3" [[package]] name = "mkdocs-material" -version = "9.1.18" +version = "9.5.4" description = "Documentation that simply works" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "mkdocs_material-9.1.18-py3-none-any.whl", hash = "sha256:5bcf8fb79ac2f253c0ffe93fa181cba87718c6438f459dc4180ac7418cc9a450"}, - {file = "mkdocs_material-9.1.18.tar.gz", hash = "sha256:981dd39979723d4cda7cfc77bbbe5e54922d5761a7af23fb8ba9edb52f114b13"}, + {file = "mkdocs_material-9.5.4-py3-none-any.whl", hash = "sha256:efd7cc8ae03296d728da9bd38f4db8b07ab61f9738a0cbd0dfaf2a15a50e7343"}, + {file = "mkdocs_material-9.5.4.tar.gz", hash = "sha256:3d196ee67fad16b2df1a458d650a8ac1890294eaae368d26cee71bc24ad41c40"}, ] [package.dependencies] -colorama = ">=0.4" -jinja2 = ">=3.0" -markdown = ">=3.2" -mkdocs = ">=1.4.2" -mkdocs-material-extensions = ">=1.1" -pygments = ">=2.14" -pymdown-extensions = ">=9.9.1" -regex = ">=2022.4.24" -requests = ">=2.26" +babel = ">=2.10,<3.0" +colorama = ">=0.4,<1.0" +jinja2 = ">=3.0,<4.0" +markdown = ">=3.2,<4.0" +mkdocs = ">=1.5.3,<1.6.0" +mkdocs-material-extensions = ">=1.3,<2.0" +paginate = ">=0.5,<1.0" +pygments = ">=2.16,<3.0" +pymdown-extensions = ">=10.2,<11.0" +regex = ">=2022.4" +requests = ">=2.26,<3.0" + +[package.extras] +git = ["mkdocs-git-committers-plugin-2 (>=1.1,<2.0)", "mkdocs-git-revision-date-localized-plugin (>=1.2,<2.0)"] +imaging = ["cairosvg (>=2.6,<3.0)", "pillow (>=9.4,<10.0)"] +recommended = ["mkdocs-minify-plugin (>=0.7,<1.0)", "mkdocs-redirects (>=1.2,<2.0)", "mkdocs-rss-plugin (>=1.6,<2.0)"] [[package]] name = "mkdocs-material-extensions" -version = "1.1.1" +version = "1.3.1" description = "Extension pack for Python Markdown and MkDocs Material." optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "mkdocs_material_extensions-1.1.1-py3-none-any.whl", hash = "sha256:e41d9f38e4798b6617ad98ca8f7f1157b1e4385ac1459ca1e4ea219b556df945"}, - {file = "mkdocs_material_extensions-1.1.1.tar.gz", hash = "sha256:9c003da71e2cc2493d910237448c672e00cefc800d3d6ae93d2fc69979e3bd93"}, + {file = "mkdocs_material_extensions-1.3.1-py3-none-any.whl", hash = "sha256:adff8b62700b25cb77b53358dad940f3ef973dd6db797907c49e3c2ef3ab4e31"}, + {file = "mkdocs_material_extensions-1.3.1.tar.gz", hash = "sha256:10c9511cea88f568257f960358a467d12b970e1f7b2c0e5fb2bb48cab1928443"}, ] [[package]] name = "mkdocs-section-index" -version = "0.3.5" +version = "0.3.8" description = "MkDocs plugin to allow clickable sections that lead to an index page" optional = false python-versions = ">=3.7" files = [ - {file = "mkdocs_section_index-0.3.5-py3-none-any.whl", hash = "sha256:1f6359287b0a823d6297cf1cb6c0a49ed75851d0d1cea8b425b207a45ce10141"}, - {file = "mkdocs_section_index-0.3.5.tar.gz", hash = "sha256:fa8b1ce0649326b1873c6460c1df2bb0c4825fd21e3dd416f13ec212d31edf12"}, + {file = "mkdocs_section_index-0.3.8-py3-none-any.whl", hash = "sha256:823d298d78bc1e73e23678ff60889f3c369c2167b03dba73fea88bd0e268a60d"}, + {file = "mkdocs_section_index-0.3.8.tar.gz", hash = "sha256:bbd209f0da79441baf136ef3a9c40665bb9681d1fb62c73ca2f116fd1388a404"}, ] [package.dependencies] -mkdocs = ">=1.0.3" +mkdocs = ">=1.2" [[package]] -name = "monotonic" -version = "1.6" -description = "An implementation of time.monotonic() for Python 2 & < 3.3" +name = "mkdocstrings" +version = "0.24.0" +description = "Automatic documentation from sources, for MkDocs." +optional = false +python-versions = ">=3.8" +files = [ + {file = "mkdocstrings-0.24.0-py3-none-any.whl", hash = "sha256:f4908560c10f587326d8f5165d1908817b2e280bbf707607f601c996366a2264"}, + {file = "mkdocstrings-0.24.0.tar.gz", hash = "sha256:222b1165be41257b494a9d29b14135d2b7ca43f38161d5b10caae03b87bd4f7e"}, +] + +[package.dependencies] +click = ">=7.0" +importlib-metadata = {version = ">=4.6", markers = "python_version < \"3.10\""} +Jinja2 = ">=2.11.1" +Markdown = ">=3.3" +MarkupSafe = ">=1.1" +mkdocs = ">=1.4" +mkdocs-autorefs = ">=0.3.1" +mkdocstrings-python = {version = ">=0.5.2", optional = true, markers = "extra == \"python\""} +platformdirs = ">=2.2.0" +pymdown-extensions = ">=6.3" +typing-extensions = {version = ">=4.1", markers = "python_version < \"3.10\""} + +[package.extras] +crystal = ["mkdocstrings-crystal (>=0.3.4)"] +python = ["mkdocstrings-python (>=0.5.2)"] +python-legacy = ["mkdocstrings-python-legacy (>=0.2.1)"] + +[[package]] +name = "mkdocstrings-python" +version = "1.8.0" +description = "A Python handler for mkdocstrings." +optional = false +python-versions = ">=3.8" +files = [ + {file = "mkdocstrings_python-1.8.0-py3-none-any.whl", hash = "sha256:4209970cc90bec194568682a535848a8d8489516c6ed4adbe58bbc67b699ca9d"}, + {file = "mkdocstrings_python-1.8.0.tar.gz", hash = "sha256:1488bddf50ee42c07d9a488dddc197f8e8999c2899687043ec5dd1643d057192"}, +] + +[package.dependencies] +griffe = ">=0.37" +mkdocstrings = ">=0.20" + +[[package]] +name = "mlflow-skinny" +version = "2.9.2" +description = "MLflow: A Platform for ML Development and Productionization" optional = true -python-versions = "*" +python-versions = ">=3.8" files = [ - {file = "monotonic-1.6-py2.py3-none-any.whl", hash = "sha256:68687e19a14f11f26d140dd5c86f3dba4bf5df58003000ed467e0e2a69bca96c"}, - {file = "monotonic-1.6.tar.gz", hash = "sha256:3a55207bcfed53ddd5c5bae174524062935efed17792e9de2ad0205ce9ad63f7"}, + {file = "mlflow-skinny-2.9.2.tar.gz", hash = "sha256:61de0a70e22552132f7140e1ddfa7d31a88a3ed735df155dcd884d5a64c81f14"}, + {file = "mlflow_skinny-2.9.2-py3-none-any.whl", hash = "sha256:364d53b703a238594aa9d1b978c4377d4f931b1fcd43d021b76fb483009a31f9"}, ] +[package.dependencies] +click = ">=7.0,<9" +cloudpickle = "<4" +databricks-cli = ">=0.8.7,<1" +entrypoints = "<1" +gitpython = ">=2.1.0,<4" +importlib-metadata = ">=3.7.0,<4.7.0 || >4.7.0,<8" +packaging = "<24" +protobuf = ">=3.12.0,<5" +pytz = "<2024" +pyyaml = ">=5.1,<7" +requests = ">=2.17.3,<3" +sqlparse = ">=0.4.0,<1" + +[package.extras] +aliyun-oss = ["aliyunstoreplugin"] +databricks = ["azure-storage-file-datalake (>12)", "boto3 (>1)", "google-cloud-storage (>=1.30.0)"] +extras = ["azureml-core (>=1.2.0)", "boto3", "google-cloud-storage (>=1.30.0)", "kubernetes", "mlserver (>=1.2.0,!=1.3.1)", "mlserver-mlflow (>=1.2.0,!=1.3.1)", "prometheus-flask-exporter", "pyarrow", "pysftp", "requests-auth-aws-sigv4", "virtualenv"] +gateway = ["aiohttp (<4)", "boto3 (>=1.28.56,<2)", "fastapi (<1)", "pydantic (>=1.0,<3)", "tiktoken (<1)", "uvicorn[standard] (<1)", "watchfiles (<1)"] +genai = ["aiohttp (<4)", "boto3 (>=1.28.56,<2)", "fastapi (<1)", "pydantic (>=1.0,<3)", "tiktoken (<1)", "uvicorn[standard] (<1)", "watchfiles (<1)"] +sqlserver = ["mlflow-dbstore"] +xethub = ["mlflow-xethub"] + [[package]] -name = "more-itertools" -version = "9.1.0" -description = "More routines for operating on iterables, beyond itertools" +name = "monotonic" +version = "1.6" +description = "An implementation of time.monotonic() for Python 2 & < 3.3" optional = false -python-versions = ">=3.7" +python-versions = "*" files = [ - {file = "more-itertools-9.1.0.tar.gz", hash = "sha256:cabaa341ad0389ea83c17a94566a53ae4c9d07349861ecb14dc6d0345cf9ac5d"}, - {file = "more_itertools-9.1.0-py3-none-any.whl", hash = "sha256:d2bc7f02446e86a68911e58ded76d6561eea00cddfb2a91e7019bbb586c799f3"}, + {file = "monotonic-1.6-py2.py3-none-any.whl", hash = "sha256:68687e19a14f11f26d140dd5c86f3dba4bf5df58003000ed467e0e2a69bca96c"}, + {file = "monotonic-1.6.tar.gz", hash = "sha256:3a55207bcfed53ddd5c5bae174524062935efed17792e9de2ad0205ce9ad63f7"}, ] [[package]] name = "mypy" -version = "0.931" +version = "1.8.0" description = "Optional static typing for Python" optional = false -python-versions = ">=3.6" +python-versions = ">=3.8" files = [ - {file = "mypy-0.931-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:3c5b42d0815e15518b1f0990cff7a705805961613e701db60387e6fb663fe78a"}, - {file = "mypy-0.931-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c89702cac5b302f0c5d33b172d2b55b5df2bede3344a2fbed99ff96bddb2cf00"}, - {file = "mypy-0.931-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:300717a07ad09525401a508ef5d105e6b56646f7942eb92715a1c8d610149714"}, - {file = "mypy-0.931-cp310-cp310-win_amd64.whl", hash = "sha256:7b3f6f557ba4afc7f2ce6d3215d5db279bcf120b3cfd0add20a5d4f4abdae5bc"}, - {file = "mypy-0.931-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:1bf752559797c897cdd2c65f7b60c2b6969ffe458417b8d947b8340cc9cec08d"}, - {file = "mypy-0.931-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:4365c60266b95a3f216a3047f1d8e3f895da6c7402e9e1ddfab96393122cc58d"}, - {file = "mypy-0.931-cp36-cp36m-win_amd64.whl", hash = "sha256:1b65714dc296a7991000b6ee59a35b3f550e0073411ac9d3202f6516621ba66c"}, - {file = "mypy-0.931-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:e839191b8da5b4e5d805f940537efcaa13ea5dd98418f06dc585d2891d228cf0"}, - {file = "mypy-0.931-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:50c7346a46dc76a4ed88f3277d4959de8a2bd0a0fa47fa87a4cde36fe247ac05"}, - {file = "mypy-0.931-cp37-cp37m-win_amd64.whl", hash = "sha256:d8f1ff62f7a879c9fe5917b3f9eb93a79b78aad47b533911b853a757223f72e7"}, - {file = "mypy-0.931-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:f9fe20d0872b26c4bba1c1be02c5340de1019530302cf2dcc85c7f9fc3252ae0"}, - {file = "mypy-0.931-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:1b06268df7eb53a8feea99cbfff77a6e2b205e70bf31743e786678ef87ee8069"}, - {file = "mypy-0.931-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:8c11003aaeaf7cc2d0f1bc101c1cc9454ec4cc9cb825aef3cafff8a5fdf4c799"}, - {file = "mypy-0.931-cp38-cp38-win_amd64.whl", hash = "sha256:d9d2b84b2007cea426e327d2483238f040c49405a6bf4074f605f0156c91a47a"}, - {file = "mypy-0.931-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:ff3bf387c14c805ab1388185dd22d6b210824e164d4bb324b195ff34e322d166"}, - {file = "mypy-0.931-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5b56154f8c09427bae082b32275a21f500b24d93c88d69a5e82f3978018a0266"}, - {file = "mypy-0.931-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:8ca7f8c4b1584d63c9a0f827c37ba7a47226c19a23a753d52e5b5eddb201afcd"}, - {file = "mypy-0.931-cp39-cp39-win_amd64.whl", hash = "sha256:74f7eccbfd436abe9c352ad9fb65872cc0f1f0a868e9d9c44db0893440f0c697"}, - {file = "mypy-0.931-py3-none-any.whl", hash = "sha256:1171f2e0859cfff2d366da2c7092b06130f232c636a3f7301e3feb8b41f6377d"}, - {file = "mypy-0.931.tar.gz", hash = "sha256:0038b21890867793581e4cb0d810829f5fd4441aa75796b53033af3aa30430ce"}, -] - -[package.dependencies] -mypy-extensions = ">=0.4.3" -tomli = ">=1.1.0" -typed-ast = {version = ">=1.4.0,<2", markers = "python_version < \"3.8\""} -typing-extensions = ">=3.10" + {file = "mypy-1.8.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:485a8942f671120f76afffff70f259e1cd0f0cfe08f81c05d8816d958d4577d3"}, + {file = "mypy-1.8.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:df9824ac11deaf007443e7ed2a4a26bebff98d2bc43c6da21b2b64185da011c4"}, + {file = "mypy-1.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2afecd6354bbfb6e0160f4e4ad9ba6e4e003b767dd80d85516e71f2e955ab50d"}, + {file = "mypy-1.8.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8963b83d53ee733a6e4196954502b33567ad07dfd74851f32be18eb932fb1cb9"}, + {file = "mypy-1.8.0-cp310-cp310-win_amd64.whl", hash = "sha256:e46f44b54ebddbeedbd3d5b289a893219065ef805d95094d16a0af6630f5d410"}, + {file = "mypy-1.8.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:855fe27b80375e5c5878492f0729540db47b186509c98dae341254c8f45f42ae"}, + {file = "mypy-1.8.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4c886c6cce2d070bd7df4ec4a05a13ee20c0aa60cb587e8d1265b6c03cf91da3"}, + {file = "mypy-1.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d19c413b3c07cbecf1f991e2221746b0d2a9410b59cb3f4fb9557f0365a1a817"}, + {file = "mypy-1.8.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:9261ed810972061388918c83c3f5cd46079d875026ba97380f3e3978a72f503d"}, + {file = "mypy-1.8.0-cp311-cp311-win_amd64.whl", hash = "sha256:51720c776d148bad2372ca21ca29256ed483aa9a4cdefefcef49006dff2a6835"}, + {file = "mypy-1.8.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:52825b01f5c4c1c4eb0db253ec09c7aa17e1a7304d247c48b6f3599ef40db8bd"}, + {file = "mypy-1.8.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f5ac9a4eeb1ec0f1ccdc6f326bcdb464de5f80eb07fb38b5ddd7b0de6bc61e55"}, + {file = "mypy-1.8.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:afe3fe972c645b4632c563d3f3eff1cdca2fa058f730df2b93a35e3b0c538218"}, + {file = "mypy-1.8.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:42c6680d256ab35637ef88891c6bd02514ccb7e1122133ac96055ff458f93fc3"}, + {file = "mypy-1.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:720a5ca70e136b675af3af63db533c1c8c9181314d207568bbe79051f122669e"}, + {file = "mypy-1.8.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:028cf9f2cae89e202d7b6593cd98db6759379f17a319b5faf4f9978d7084cdc6"}, + {file = "mypy-1.8.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4e6d97288757e1ddba10dd9549ac27982e3e74a49d8d0179fc14d4365c7add66"}, + {file = "mypy-1.8.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f1478736fcebb90f97e40aff11a5f253af890c845ee0c850fe80aa060a267c6"}, + {file = "mypy-1.8.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:42419861b43e6962a649068a61f4a4839205a3ef525b858377a960b9e2de6e0d"}, + {file = "mypy-1.8.0-cp38-cp38-win_amd64.whl", hash = "sha256:2b5b6c721bd4aabaadead3a5e6fa85c11c6c795e0c81a7215776ef8afc66de02"}, + {file = "mypy-1.8.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5c1538c38584029352878a0466f03a8ee7547d7bd9f641f57a0f3017a7c905b8"}, + {file = "mypy-1.8.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4ef4be7baf08a203170f29e89d79064463b7fc7a0908b9d0d5114e8009c3a259"}, + {file = "mypy-1.8.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7178def594014aa6c35a8ff411cf37d682f428b3b5617ca79029d8ae72f5402b"}, + {file = "mypy-1.8.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ab3c84fa13c04aeeeabb2a7f67a25ef5d77ac9d6486ff33ded762ef353aa5592"}, + {file = "mypy-1.8.0-cp39-cp39-win_amd64.whl", hash = "sha256:99b00bc72855812a60d253420d8a2eae839b0afa4938f09f4d2aa9bb4654263a"}, + {file = "mypy-1.8.0-py3-none-any.whl", hash = "sha256:538fd81bb5e430cc1381a443971c0475582ff9f434c16cd46d2c66763ce85d9d"}, + {file = "mypy-1.8.0.tar.gz", hash = "sha256:6ff8b244d7085a0b425b56d327b480c3b29cafbd2eff27316a004f9a7391ae07"}, +] + +[package.dependencies] +mypy-extensions = ">=1.0.0" +tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} +typing-extensions = ">=4.1.0" [package.extras] dmypy = ["psutil (>=4.0)"] -python2 = ["typed-ast (>=1.4.0,<2)"] +install-types = ["pip"] +mypyc = ["setuptools (>=50)"] +reports = ["lxml"] [[package]] name = "mypy-extensions" @@ -1241,13 +1576,13 @@ files = [ [[package]] name = "nbclient" -version = "0.8.0" +version = "0.9.0" description = "A client library for executing notebooks. Formerly nbconvert's ExecutePreprocessor." -optional = true +optional = false python-versions = ">=3.8.0" files = [ - {file = "nbclient-0.8.0-py3-none-any.whl", hash = "sha256:25e861299e5303a0477568557c4045eccc7a34c17fc08e7959558707b9ebe548"}, - {file = "nbclient-0.8.0.tar.gz", hash = "sha256:f9b179cd4b2d7bca965f900a2ebf0db4a12ebff2f36a711cb66861e4ae158e55"}, + {file = "nbclient-0.9.0-py3-none-any.whl", hash = "sha256:a3a1ddfb34d4a9d17fc744d655962714a866639acd30130e9be84191cd97cd15"}, + {file = "nbclient-0.9.0.tar.gz", hash = "sha256:4b28c207877cf33ef3a9838cdc7a54c5ceff981194a82eac59d558f05487295e"}, ] [package.dependencies] @@ -1261,15 +1596,53 @@ dev = ["pre-commit"] docs = ["autodoc-traits", "mock", "moto", "myst-parser", "nbclient[test]", "sphinx (>=1.7)", "sphinx-book-theme", "sphinxcontrib-spelling"] test = ["flaky", "ipykernel (>=6.19.3)", "ipython", "ipywidgets", "nbconvert (>=7.0.0)", "pytest (>=7.0)", "pytest-asyncio", "pytest-cov (>=4.0)", "testpath", "xmltodict"] +[[package]] +name = "nbconvert" +version = "7.14.2" +description = "Converting Jupyter Notebooks" +optional = false +python-versions = ">=3.8" +files = [ + {file = "nbconvert-7.14.2-py3-none-any.whl", hash = "sha256:db28590cef90f7faf2ebbc71acd402cbecf13d29176df728c0a9025a49345ea1"}, + {file = "nbconvert-7.14.2.tar.gz", hash = "sha256:a7f8808fd4e082431673ac538400218dd45efd076fbeb07cc6e5aa5a3a4e949e"}, +] + +[package.dependencies] +beautifulsoup4 = "*" +bleach = "!=5.0.0" +defusedxml = "*" +importlib-metadata = {version = ">=3.6", markers = "python_version < \"3.10\""} +jinja2 = ">=3.0" +jupyter-core = ">=4.7" +jupyterlab-pygments = "*" +markupsafe = ">=2.0" +mistune = ">=2.0.3,<4" +nbclient = ">=0.5.0" +nbformat = ">=5.7" +packaging = "*" +pandocfilters = ">=1.4.1" +pygments = ">=2.4.1" +tinycss2 = "*" +traitlets = ">=5.1" + +[package.extras] +all = ["nbconvert[docs,qtpdf,serve,test,webpdf]"] +docs = ["ipykernel", "ipython", "myst-parser", "nbsphinx (>=0.2.12)", "pydata-sphinx-theme", "sphinx (==5.0.2)", "sphinxcontrib-spelling"] +qtpdf = ["nbconvert[qtpng]"] +qtpng = ["pyqtwebengine (>=5.15)"] +serve = ["tornado (>=6.1)"] +test = ["flaky", "ipykernel", "ipywidgets (>=7.5)", "pytest"] +webpdf = ["playwright"] + [[package]] name = "nbformat" -version = "5.9.1" +version = "5.9.2" description = "The Jupyter Notebook format" -optional = true +optional = false python-versions = ">=3.8" files = [ - {file = "nbformat-5.9.1-py3-none-any.whl", hash = "sha256:b7968ebf4811178a4108ee837eae1442e3f054132100f0359219e9ed1ce3ca45"}, - {file = "nbformat-5.9.1.tar.gz", hash = "sha256:3a7f52d040639cbd8a3890218c8b0ffb93211588c57446c90095e32ba5881b5d"}, + {file = "nbformat-5.9.2-py3-none-any.whl", hash = "sha256:1c5172d786a41b82bcfd0c23f9e6b6f072e8fb49c39250219e4acfff1efe89e9"}, + {file = "nbformat-5.9.2.tar.gz", hash = "sha256:5f98b5ba1997dff175e77e0c17d5c10a96eaed2cbd1de3533d1fc35d5e111192"}, ] [package.dependencies] @@ -1296,22 +1669,59 @@ files = [ [package.dependencies] setuptools = "*" +[[package]] +name = "oauthlib" +version = "3.2.2" +description = "A generic, spec-compliant, thorough implementation of the OAuth request-signing logic" +optional = true +python-versions = ">=3.6" +files = [ + {file = "oauthlib-3.2.2-py3-none-any.whl", hash = "sha256:8139f29aac13e25d502680e9e19963e83f16838d48a0d71c287fe40e7067fbca"}, + {file = "oauthlib-3.2.2.tar.gz", hash = "sha256:9859c40929662bec5d64f34d01c99e093149682a3f38915dc0655d5a633dd918"}, +] + +[package.extras] +rsa = ["cryptography (>=3.0.0)"] +signals = ["blinker (>=1.4.0)"] +signedtoken = ["cryptography (>=3.0.0)", "pyjwt (>=2.0.0,<3)"] + [[package]] name = "packaging" -version = "23.1" +version = "23.2" description = "Core utilities for Python packages" optional = false python-versions = ">=3.7" files = [ - {file = "packaging-23.1-py3-none-any.whl", hash = "sha256:994793af429502c4ea2ebf6bf664629d07c1a9fe974af92966e4b8d2df7edc61"}, - {file = "packaging-23.1.tar.gz", hash = "sha256:a392980d2b6cffa644431898be54b0045151319d1e7ec34f0cfed48767dd334f"}, + {file = "packaging-23.2-py3-none-any.whl", hash = "sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7"}, + {file = "packaging-23.2.tar.gz", hash = "sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5"}, +] + +[[package]] +name = "paginate" +version = "0.5.6" +description = "Divides large result sets into pages for easier browsing" +optional = false +python-versions = "*" +files = [ + {file = "paginate-0.5.6.tar.gz", hash = "sha256:5e6007b6a9398177a7e1648d04fdd9f8c9766a1a945bceac82f1929e8c78af2d"}, +] + +[[package]] +name = "pandocfilters" +version = "1.5.1" +description = "Utilities for writing pandoc filters in python" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +files = [ + {file = "pandocfilters-1.5.1-py2.py3-none-any.whl", hash = "sha256:93be382804a9cdb0a7267585f157e5d1731bbe5545a85b268d6f5fe6232de2bc"}, + {file = "pandocfilters-1.5.1.tar.gz", hash = "sha256:002b4a555ee4ebc03f8b66307e287fa492e4a77b4ea14d3f934328297bb4939e"}, ] [[package]] name = "parso" version = "0.8.3" description = "A Python Parser" -optional = true +optional = false python-versions = ">=3.6" files = [ {file = "parso-0.8.3-py2.py3-none-any.whl", hash = "sha256:c001d4636cd3aecdaf33cbb40aebb59b094be2a74c556778ef5576c175e19e75"}, @@ -1324,35 +1734,46 @@ testing = ["docopt", "pytest (<6.0.0)"] [[package]] name = "pathspec" -version = "0.11.1" +version = "0.12.1" description = "Utility library for gitignore style pattern matching of file paths." optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "pathspec-0.11.1-py3-none-any.whl", hash = "sha256:d8af70af76652554bd134c22b3e8a1cc46ed7d91edcdd721ef1a0c51a84a5293"}, - {file = "pathspec-0.11.1.tar.gz", hash = "sha256:2798de800fa92780e33acca925945e9a19a133b715067cf165b8866c15a31687"}, + {file = "pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08"}, + {file = "pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"}, ] [[package]] name = "pbr" -version = "5.11.1" +version = "6.0.0" description = "Python Build Reasonableness" optional = false python-versions = ">=2.6" files = [ - {file = "pbr-5.11.1-py2.py3-none-any.whl", hash = "sha256:567f09558bae2b3ab53cb3c1e2e33e726ff3338e7bae3db5dc954b3a44eef12b"}, - {file = "pbr-5.11.1.tar.gz", hash = "sha256:aefc51675b0b533d56bb5fd1c8c6c0522fe31896679882e1c4c63d5e4a0fccb3"}, + {file = "pbr-6.0.0-py2.py3-none-any.whl", hash = "sha256:4a7317d5e3b17a3dccb6a8cfe67dab65b20551404c52c8ed41279fa4f0cb4cda"}, + {file = "pbr-6.0.0.tar.gz", hash = "sha256:d1377122a5a00e2f940ee482999518efe16d745d423a670c27773dfbc3c9a7d9"}, +] + +[[package]] +name = "pefile" +version = "2023.2.7" +description = "Python PE parsing module" +optional = false +python-versions = ">=3.6.0" +files = [ + {file = "pefile-2023.2.7-py3-none-any.whl", hash = "sha256:da185cd2af68c08a6cd4481f7325ed600a88f6a813bad9dea07ab3ef73d8d8d6"}, + {file = "pefile-2023.2.7.tar.gz", hash = "sha256:82e6114004b3d6911c77c3953e3838654b04511b8b66e8583db70c65998017dc"}, ] [[package]] name = "pexpect" -version = "4.8.0" +version = "4.9.0" description = "Pexpect allows easy control of interactive console applications." -optional = true +optional = false python-versions = "*" files = [ - {file = "pexpect-4.8.0-py2.py3-none-any.whl", hash = "sha256:0b48a55dcb3c05f3329815901ea4fc1537514d6ba867a152b581d69ae3710937"}, - {file = "pexpect-4.8.0.tar.gz", hash = "sha256:fc65a43959d153d0114afe13997d439c22823a27cefceb5ff35c2178c6784c0c"}, + {file = "pexpect-4.9.0-py2.py3-none-any.whl", hash = "sha256:7236d1e080e4936be2dc3e326cec0af72acf9212a7e1d060210e70a47e253523"}, + {file = "pexpect-4.9.0.tar.gz", hash = "sha256:ee7d41123f3c9911050ea2c2dac107568dc43b2d3b0c7557a33212c398ead30f"}, ] [package.dependencies] @@ -1362,32 +1783,18 @@ ptyprocess = ">=0.5" name = "pickleshare" version = "0.7.5" description = "Tiny 'shelve'-like database with concurrency support" -optional = true +optional = false python-versions = "*" files = [ {file = "pickleshare-0.7.5-py2.py3-none-any.whl", hash = "sha256:9649af414d74d4df115d5d718f82acb59c9d418196b7b4290ed47a12ce62df56"}, - {file = "pickleshare-0.7.5.tar.gz", hash = "sha256:87683d47965c1da65cdacaf31c8441d12b8044cdec9aca500cd78fc2c683afca"}, -] - -[[package]] -name = "pkginfo" -version = "1.9.6" -description = "Query metadata from sdists / bdists / installed packages." -optional = false -python-versions = ">=3.6" -files = [ - {file = "pkginfo-1.9.6-py3-none-any.whl", hash = "sha256:4b7a555a6d5a22169fcc9cf7bfd78d296b0361adad412a346c1226849af5e546"}, - {file = "pkginfo-1.9.6.tar.gz", hash = "sha256:8fd5896e8718a4372f0ea9cc9d96f6417c9b986e23a4d116dda26b62cc29d046"}, + {file = "pickleshare-0.7.5.tar.gz", hash = "sha256:87683d47965c1da65cdacaf31c8441d12b8044cdec9aca500cd78fc2c683afca"}, ] -[package.extras] -testing = ["pytest", "pytest-cov"] - [[package]] name = "pkgutil-resolve-name" version = "1.3.10" description = "Resolve a name to an object." -optional = true +optional = false python-versions = ">=3.6" files = [ {file = "pkgutil_resolve_name-1.3.10-py3-none-any.whl", hash = "sha256:ca27cc078d25c5ad71a9de0a7a330146c4e014c2462d9af19c6b828280649c5e"}, @@ -1396,35 +1803,31 @@ files = [ [[package]] name = "platformdirs" -version = "3.8.1" +version = "4.1.0" description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "platformdirs-3.8.1-py3-none-any.whl", hash = "sha256:cec7b889196b9144d088e4c57d9ceef7374f6c39694ad1577a0aab50d27ea28c"}, - {file = "platformdirs-3.8.1.tar.gz", hash = "sha256:f87ca4fcff7d2b0f81c6a748a77973d7af0f4d526f98f308477c3c436c74d528"}, + {file = "platformdirs-4.1.0-py3-none-any.whl", hash = "sha256:11c8f37bcca40db96d8144522d925583bdb7a31f7b0e37e3ed4318400a8e2380"}, + {file = "platformdirs-4.1.0.tar.gz", hash = "sha256:906d548203468492d432bcb294d4bc2fff751bf84971fbb2c10918cc206ee420"}, ] -[package.dependencies] -typing-extensions = {version = ">=4.6.3", markers = "python_version < \"3.8\""} - [package.extras] -docs = ["furo (>=2023.5.20)", "proselint (>=0.13)", "sphinx (>=7.0.1)", "sphinx-autodoc-typehints (>=1.23,!=1.23.4)"] -test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.3.1)", "pytest-cov (>=4.1)", "pytest-mock (>=3.10)"] +docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.1)", "sphinx-autodoc-typehints (>=1.24)"] +test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.4)", "pytest-cov (>=4.1)", "pytest-mock (>=3.11.1)"] [[package]] name = "ploomber-core" -version = "0.2.13" +version = "0.2.22" description = "" -optional = true +optional = false python-versions = "*" files = [ - {file = "ploomber-core-0.2.13.tar.gz", hash = "sha256:85ec50db182ffde945cb6965e98580fc7f8b058b7da52920ef6bddf64d00061e"}, - {file = "ploomber_core-0.2.13-py3-none-any.whl", hash = "sha256:3ef944d240661d4d7364f01ae28e492bad1dd82ee63c44474e726eb6c1284559"}, + {file = "ploomber-core-0.2.22.tar.gz", hash = "sha256:6ba97a39f6dc20053ffa2f9d7f9885ced72b6632f4b7dae45d24b718b63f6292"}, + {file = "ploomber_core-0.2.22-py3-none-any.whl", hash = "sha256:83b118042a2ffe1fa920b7634e7070ae61d0901ad61eea4de34b5048449b01c9"}, ] [package.dependencies] -click = "*" posthog = "*" pyyaml = "*" @@ -1433,23 +1836,26 @@ dev = ["flake8", "invoke", "pkgmt", "pytest", "pywin32", "twine"] [[package]] name = "ploomber-engine" -version = "0.0.19" +version = "0.0.31" description = "" -optional = true +optional = false python-versions = "*" files = [ - {file = "ploomber-engine-0.0.19.tar.gz", hash = "sha256:346ad30f85749106766fc1a0c366fe6bb08afdbd0a5ed30612f5322a63eaf03f"}, - {file = "ploomber_engine-0.0.19-py3-none-any.whl", hash = "sha256:be324ca4de60d219cdb6dea16e4fa8d0c9901a41d7868b7fb2f6867e0cde673f"}, + {file = "ploomber-engine-0.0.31.tar.gz", hash = "sha256:d0d2f64dab57e3ec630de75067f800d545a014f26d27000ef9d01b920592bb77"}, + {file = "ploomber_engine-0.0.31-py3-none-any.whl", hash = "sha256:65d051ed459102c5c2dc086c575e8f8f08e1a89802bd34dab52174758a117d27"}, ] [package.dependencies] click = "*" debuglater = ">=1.4.4" -ipython = "*" +ipython = [ + {version = "<=8.12.0", markers = "python_version <= \"3.8\""}, + {version = "*", markers = "python_version > \"3.8\""}, +] nbclient = "*" nbformat = "*" parso = "*" -ploomber-core = ">=0.1" +ploomber-core = ">=0.2" tqdm = "*" [package.extras] @@ -1457,31 +1863,28 @@ dev = ["flake8", "invoke", "jupytext", "matplotlib", "numpy", "pandas", "pkgmt", [[package]] name = "pluggy" -version = "1.2.0" +version = "1.3.0" description = "plugin and hook calling mechanisms for python" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "pluggy-1.2.0-py3-none-any.whl", hash = "sha256:c2fd55a7d7a3863cba1a013e4e2414658b1d07b6bc57b3919e0c63c9abb99849"}, - {file = "pluggy-1.2.0.tar.gz", hash = "sha256:d12f0c4b579b15f5e054301bb226ee85eeeba08ffec228092f8defbaa3a4c4b3"}, + {file = "pluggy-1.3.0-py3-none-any.whl", hash = "sha256:d89c696a773f8bd377d18e5ecda92b7a3793cbe66c87060a6fb58c7b6e1061f7"}, + {file = "pluggy-1.3.0.tar.gz", hash = "sha256:cf61ae8f126ac6f7c451172cf30e3e43d3ca77615509771b3a984a0730651e12"}, ] -[package.dependencies] -importlib-metadata = {version = ">=0.12", markers = "python_version < \"3.8\""} - [package.extras] dev = ["pre-commit", "tox"] testing = ["pytest", "pytest-benchmark"] [[package]] name = "posthog" -version = "3.0.1" +version = "3.3.2" description = "Integrate PostHog into any python application." -optional = true +optional = false python-versions = "*" files = [ - {file = "posthog-3.0.1-py2.py3-none-any.whl", hash = "sha256:9c7f92fecc713257d4b2710d05b456569c9156fbdd3e85655ba7ba5ba6c7b3ae"}, - {file = "posthog-3.0.1.tar.gz", hash = "sha256:57d2791ff5752ce56ba0f9bb8876faf3ca9208f1c2c6ceaeb5a2504c34493767"}, + {file = "posthog-3.3.2-py2.py3-none-any.whl", hash = "sha256:14fb43ea95c40b353db59c49af2c09ff15188aa2963f48091fc7912fa9375263"}, + {file = "posthog-3.3.2.tar.gz", hash = "sha256:734bf89f3c372605a8bbf2b07f600885287209145d747b09ccd004c59834750e"}, ] [package.dependencies] @@ -1494,46 +1897,65 @@ six = ">=1.5" [package.extras] dev = ["black", "flake8", "flake8-print", "isort", "pre-commit"] sentry = ["django", "sentry-sdk"] -test = ["coverage", "flake8", "freezegun (==0.3.15)", "mock (>=2.0.0)", "pylint", "pytest"] +test = ["coverage", "flake8", "freezegun (==0.3.15)", "mock (>=2.0.0)", "pylint", "pytest", "pytest-timeout"] [[package]] name = "pre-commit" -version = "2.21.0" +version = "3.5.0" description = "A framework for managing and maintaining multi-language pre-commit hooks." optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "pre_commit-2.21.0-py2.py3-none-any.whl", hash = "sha256:e2f91727039fc39a92f58a588a25b87f936de6567eed4f0e673e0507edc75bad"}, - {file = "pre_commit-2.21.0.tar.gz", hash = "sha256:31ef31af7e474a8d8995027fefdfcf509b5c913ff31f2015b4ec4beb26a6f658"}, + {file = "pre_commit-3.5.0-py2.py3-none-any.whl", hash = "sha256:841dc9aef25daba9a0238cd27984041fa0467b4199fc4852e27950664919f660"}, + {file = "pre_commit-3.5.0.tar.gz", hash = "sha256:5804465c675b659b0862f07907f96295d490822a450c4c40e747d0b1c6ebcb32"}, ] [package.dependencies] cfgv = ">=2.0.0" identify = ">=1.0.0" -importlib-metadata = {version = "*", markers = "python_version < \"3.8\""} nodeenv = ">=0.11.1" pyyaml = ">=5.1" virtualenv = ">=20.10.0" [[package]] name = "prompt-toolkit" -version = "3.0.39" +version = "3.0.43" description = "Library for building powerful interactive command lines in Python" -optional = true +optional = false python-versions = ">=3.7.0" files = [ - {file = "prompt_toolkit-3.0.39-py3-none-any.whl", hash = "sha256:9dffbe1d8acf91e3de75f3b544e4842382fc06c6babe903ac9acb74dc6e08d88"}, - {file = "prompt_toolkit-3.0.39.tar.gz", hash = "sha256:04505ade687dc26dc4284b1ad19a83be2f2afe83e7a828ace0c72f3a1df72aac"}, + {file = "prompt_toolkit-3.0.43-py3-none-any.whl", hash = "sha256:a11a29cb3bf0a28a387fe5122cdb649816a957cd9261dcedf8c9f1fef33eacf6"}, + {file = "prompt_toolkit-3.0.43.tar.gz", hash = "sha256:3527b7af26106cbc65a040bcc84839a3566ec1b051bb0bfe953631e704b0ff7d"}, ] [package.dependencies] wcwidth = "*" +[[package]] +name = "protobuf" +version = "4.25.2" +description = "" +optional = true +python-versions = ">=3.8" +files = [ + {file = "protobuf-4.25.2-cp310-abi3-win32.whl", hash = "sha256:b50c949608682b12efb0b2717f53256f03636af5f60ac0c1d900df6213910fd6"}, + {file = "protobuf-4.25.2-cp310-abi3-win_amd64.whl", hash = "sha256:8f62574857ee1de9f770baf04dde4165e30b15ad97ba03ceac65f760ff018ac9"}, + {file = "protobuf-4.25.2-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:2db9f8fa64fbdcdc93767d3cf81e0f2aef176284071507e3ede160811502fd3d"}, + {file = "protobuf-4.25.2-cp37-abi3-manylinux2014_aarch64.whl", hash = "sha256:10894a2885b7175d3984f2be8d9850712c57d5e7587a2410720af8be56cdaf62"}, + {file = "protobuf-4.25.2-cp37-abi3-manylinux2014_x86_64.whl", hash = "sha256:fc381d1dd0516343f1440019cedf08a7405f791cd49eef4ae1ea06520bc1c020"}, + {file = "protobuf-4.25.2-cp38-cp38-win32.whl", hash = "sha256:33a1aeef4b1927431d1be780e87b641e322b88d654203a9e9d93f218ee359e61"}, + {file = "protobuf-4.25.2-cp38-cp38-win_amd64.whl", hash = "sha256:47f3de503fe7c1245f6f03bea7e8d3ec11c6c4a2ea9ef910e3221c8a15516d62"}, + {file = "protobuf-4.25.2-cp39-cp39-win32.whl", hash = "sha256:5e5c933b4c30a988b52e0b7c02641760a5ba046edc5e43d3b94a74c9fc57c1b3"}, + {file = "protobuf-4.25.2-cp39-cp39-win_amd64.whl", hash = "sha256:d66a769b8d687df9024f2985d5137a337f957a0916cf5464d1513eee96a63ff0"}, + {file = "protobuf-4.25.2-py3-none-any.whl", hash = "sha256:a8b7a98d4ce823303145bf3c1a8bdb0f2f4642a414b196f04ad9853ed0c8f830"}, + {file = "protobuf-4.25.2.tar.gz", hash = "sha256:fe599e175cb347efc8ee524bcd4b902d11f7262c0e569ececcb89995c15f0a5e"}, +] + [[package]] name = "ptyprocess" version = "0.7.0" description = "Run a subprocess in a pseudo terminal" -optional = true +optional = false python-versions = "*" files = [ {file = "ptyprocess-0.7.0-py2.py3-none-any.whl", hash = "sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35"}, @@ -1544,7 +1966,7 @@ files = [ name = "pure-eval" version = "0.2.2" description = "Safely evaluate AST nodes without side effects" -optional = true +optional = false python-versions = "*" files = [ {file = "pure_eval-0.2.2-py3-none-any.whl", hash = "sha256:01eaab343580944bc56080ebe0a674b39ec44a945e6d09ba7db3cb8cec289350"}, @@ -1578,100 +2000,268 @@ files = [ [[package]] name = "pydantic" -version = "1.10.11" -description = "Data validation and settings management using python type hints" +version = "2.5.3" +description = "Data validation using Python type hints" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pydantic-2.5.3-py3-none-any.whl", hash = "sha256:d0caf5954bee831b6bfe7e338c32b9e30c85dfe080c843680783ac2b631673b4"}, + {file = "pydantic-2.5.3.tar.gz", hash = "sha256:b3ef57c62535b0941697cce638c08900d87fcb67e29cfa99e8a68f747f393f7a"}, +] + +[package.dependencies] +annotated-types = ">=0.4.0" +pydantic-core = "2.14.6" +typing-extensions = ">=4.6.1" + +[package.extras] +email = ["email-validator (>=2.0.0)"] + +[[package]] +name = "pydantic-core" +version = "2.14.6" +description = "" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pydantic_core-2.14.6-cp310-cp310-macosx_10_7_x86_64.whl", hash = "sha256:72f9a942d739f09cd42fffe5dc759928217649f070056f03c70df14f5770acf9"}, + {file = "pydantic_core-2.14.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6a31d98c0d69776c2576dda4b77b8e0c69ad08e8b539c25c7d0ca0dc19a50d6c"}, + {file = "pydantic_core-2.14.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5aa90562bc079c6c290f0512b21768967f9968e4cfea84ea4ff5af5d917016e4"}, + {file = "pydantic_core-2.14.6-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:370ffecb5316ed23b667d99ce4debe53ea664b99cc37bfa2af47bc769056d534"}, + {file = "pydantic_core-2.14.6-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f85f3843bdb1fe80e8c206fe6eed7a1caeae897e496542cee499c374a85c6e08"}, + {file = "pydantic_core-2.14.6-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9862bf828112e19685b76ca499b379338fd4c5c269d897e218b2ae8fcb80139d"}, + {file = "pydantic_core-2.14.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:036137b5ad0cb0004c75b579445a1efccd072387a36c7f217bb8efd1afbe5245"}, + {file = "pydantic_core-2.14.6-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:92879bce89f91f4b2416eba4429c7b5ca22c45ef4a499c39f0c5c69257522c7c"}, + {file = "pydantic_core-2.14.6-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:0c08de15d50fa190d577e8591f0329a643eeaed696d7771760295998aca6bc66"}, + {file = "pydantic_core-2.14.6-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:36099c69f6b14fc2c49d7996cbf4f87ec4f0e66d1c74aa05228583225a07b590"}, + {file = "pydantic_core-2.14.6-cp310-none-win32.whl", hash = "sha256:7be719e4d2ae6c314f72844ba9d69e38dff342bc360379f7c8537c48e23034b7"}, + {file = "pydantic_core-2.14.6-cp310-none-win_amd64.whl", hash = "sha256:36fa402dcdc8ea7f1b0ddcf0df4254cc6b2e08f8cd80e7010d4c4ae6e86b2a87"}, + {file = "pydantic_core-2.14.6-cp311-cp311-macosx_10_7_x86_64.whl", hash = "sha256:dea7fcd62915fb150cdc373212141a30037e11b761fbced340e9db3379b892d4"}, + {file = "pydantic_core-2.14.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ffff855100bc066ff2cd3aa4a60bc9534661816b110f0243e59503ec2df38421"}, + {file = "pydantic_core-2.14.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1b027c86c66b8627eb90e57aee1f526df77dc6d8b354ec498be9a757d513b92b"}, + {file = "pydantic_core-2.14.6-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:00b1087dabcee0b0ffd104f9f53d7d3eaddfaa314cdd6726143af6bc713aa27e"}, + {file = "pydantic_core-2.14.6-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:75ec284328b60a4e91010c1acade0c30584f28a1f345bc8f72fe8b9e46ec6a96"}, + {file = "pydantic_core-2.14.6-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7e1f4744eea1501404b20b0ac059ff7e3f96a97d3e3f48ce27a139e053bb370b"}, + {file = "pydantic_core-2.14.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b2602177668f89b38b9f84b7b3435d0a72511ddef45dc14446811759b82235a1"}, + {file = "pydantic_core-2.14.6-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6c8edaea3089bf908dd27da8f5d9e395c5b4dc092dbcce9b65e7156099b4b937"}, + {file = "pydantic_core-2.14.6-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:478e9e7b360dfec451daafe286998d4a1eeaecf6d69c427b834ae771cad4b622"}, + {file = "pydantic_core-2.14.6-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:b6ca36c12a5120bad343eef193cc0122928c5c7466121da7c20f41160ba00ba2"}, + {file = "pydantic_core-2.14.6-cp311-none-win32.whl", hash = "sha256:2b8719037e570639e6b665a4050add43134d80b687288ba3ade18b22bbb29dd2"}, + {file = "pydantic_core-2.14.6-cp311-none-win_amd64.whl", hash = "sha256:78ee52ecc088c61cce32b2d30a826f929e1708f7b9247dc3b921aec367dc1b23"}, + {file = "pydantic_core-2.14.6-cp311-none-win_arm64.whl", hash = "sha256:a19b794f8fe6569472ff77602437ec4430f9b2b9ec7a1105cfd2232f9ba355e6"}, + {file = "pydantic_core-2.14.6-cp312-cp312-macosx_10_7_x86_64.whl", hash = "sha256:667aa2eac9cd0700af1ddb38b7b1ef246d8cf94c85637cbb03d7757ca4c3fdec"}, + {file = "pydantic_core-2.14.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:cdee837710ef6b56ebd20245b83799fce40b265b3b406e51e8ccc5b85b9099b7"}, + {file = "pydantic_core-2.14.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2c5bcf3414367e29f83fd66f7de64509a8fd2368b1edf4351e862910727d3e51"}, + {file = "pydantic_core-2.14.6-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:26a92ae76f75d1915806b77cf459811e772d8f71fd1e4339c99750f0e7f6324f"}, + {file = "pydantic_core-2.14.6-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a983cca5ed1dd9a35e9e42ebf9f278d344603bfcb174ff99a5815f953925140a"}, + {file = "pydantic_core-2.14.6-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cb92f9061657287eded380d7dc455bbf115430b3aa4741bdc662d02977e7d0af"}, + {file = "pydantic_core-2.14.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e4ace1e220b078c8e48e82c081e35002038657e4b37d403ce940fa679e57113b"}, + {file = "pydantic_core-2.14.6-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ef633add81832f4b56d3b4c9408b43d530dfca29e68fb1b797dcb861a2c734cd"}, + {file = "pydantic_core-2.14.6-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7e90d6cc4aad2cc1f5e16ed56e46cebf4877c62403a311af20459c15da76fd91"}, + {file = "pydantic_core-2.14.6-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:e8a5ac97ea521d7bde7621d86c30e86b798cdecd985723c4ed737a2aa9e77d0c"}, + {file = "pydantic_core-2.14.6-cp312-none-win32.whl", hash = "sha256:f27207e8ca3e5e021e2402ba942e5b4c629718e665c81b8b306f3c8b1ddbb786"}, + {file = "pydantic_core-2.14.6-cp312-none-win_amd64.whl", hash = "sha256:b3e5fe4538001bb82e2295b8d2a39356a84694c97cb73a566dc36328b9f83b40"}, + {file = "pydantic_core-2.14.6-cp312-none-win_arm64.whl", hash = "sha256:64634ccf9d671c6be242a664a33c4acf12882670b09b3f163cd00a24cffbd74e"}, + {file = "pydantic_core-2.14.6-cp37-cp37m-macosx_10_7_x86_64.whl", hash = "sha256:24368e31be2c88bd69340fbfe741b405302993242ccb476c5c3ff48aeee1afe0"}, + {file = "pydantic_core-2.14.6-cp37-cp37m-macosx_11_0_arm64.whl", hash = "sha256:e33b0834f1cf779aa839975f9d8755a7c2420510c0fa1e9fa0497de77cd35d2c"}, + {file = "pydantic_core-2.14.6-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6af4b3f52cc65f8a0bc8b1cd9676f8c21ef3e9132f21fed250f6958bd7223bed"}, + {file = "pydantic_core-2.14.6-cp37-cp37m-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d15687d7d7f40333bd8266f3814c591c2e2cd263fa2116e314f60d82086e353a"}, + {file = "pydantic_core-2.14.6-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:095b707bb287bfd534044166ab767bec70a9bba3175dcdc3371782175c14e43c"}, + {file = "pydantic_core-2.14.6-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:94fc0e6621e07d1e91c44e016cc0b189b48db053061cc22d6298a611de8071bb"}, + {file = "pydantic_core-2.14.6-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1ce830e480f6774608dedfd4a90c42aac4a7af0a711f1b52f807130c2e434c06"}, + {file = "pydantic_core-2.14.6-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a306cdd2ad3a7d795d8e617a58c3a2ed0f76c8496fb7621b6cd514eb1532cae8"}, + {file = "pydantic_core-2.14.6-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:2f5fa187bde8524b1e37ba894db13aadd64faa884657473b03a019f625cee9a8"}, + {file = "pydantic_core-2.14.6-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:438027a975cc213a47c5d70672e0d29776082155cfae540c4e225716586be75e"}, + {file = "pydantic_core-2.14.6-cp37-none-win32.whl", hash = "sha256:f96ae96a060a8072ceff4cfde89d261837b4294a4f28b84a28765470d502ccc6"}, + {file = "pydantic_core-2.14.6-cp37-none-win_amd64.whl", hash = "sha256:e646c0e282e960345314f42f2cea5e0b5f56938c093541ea6dbf11aec2862391"}, + {file = "pydantic_core-2.14.6-cp38-cp38-macosx_10_7_x86_64.whl", hash = "sha256:db453f2da3f59a348f514cfbfeb042393b68720787bbef2b4c6068ea362c8149"}, + {file = "pydantic_core-2.14.6-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:3860c62057acd95cc84044e758e47b18dcd8871a328ebc8ccdefd18b0d26a21b"}, + {file = "pydantic_core-2.14.6-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:36026d8f99c58d7044413e1b819a67ca0e0b8ebe0f25e775e6c3d1fabb3c38fb"}, + {file = "pydantic_core-2.14.6-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8ed1af8692bd8d2a29d702f1a2e6065416d76897d726e45a1775b1444f5928a7"}, + {file = "pydantic_core-2.14.6-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:314ccc4264ce7d854941231cf71b592e30d8d368a71e50197c905874feacc8a8"}, + {file = "pydantic_core-2.14.6-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:982487f8931067a32e72d40ab6b47b1628a9c5d344be7f1a4e668fb462d2da42"}, + {file = "pydantic_core-2.14.6-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2dbe357bc4ddda078f79d2a36fc1dd0494a7f2fad83a0a684465b6f24b46fe80"}, + {file = "pydantic_core-2.14.6-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2f6ffc6701a0eb28648c845f4945a194dc7ab3c651f535b81793251e1185ac3d"}, + {file = "pydantic_core-2.14.6-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:7f5025db12fc6de7bc1104d826d5aee1d172f9ba6ca936bf6474c2148ac336c1"}, + {file = "pydantic_core-2.14.6-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:dab03ed811ed1c71d700ed08bde8431cf429bbe59e423394f0f4055f1ca0ea60"}, + {file = "pydantic_core-2.14.6-cp38-none-win32.whl", hash = "sha256:dfcbebdb3c4b6f739a91769aea5ed615023f3c88cb70df812849aef634c25fbe"}, + {file = "pydantic_core-2.14.6-cp38-none-win_amd64.whl", hash = "sha256:99b14dbea2fdb563d8b5a57c9badfcd72083f6006caf8e126b491519c7d64ca8"}, + {file = "pydantic_core-2.14.6-cp39-cp39-macosx_10_7_x86_64.whl", hash = "sha256:4ce8299b481bcb68e5c82002b96e411796b844d72b3e92a3fbedfe8e19813eab"}, + {file = "pydantic_core-2.14.6-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b9a9d92f10772d2a181b5ca339dee066ab7d1c9a34ae2421b2a52556e719756f"}, + {file = "pydantic_core-2.14.6-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fd9e98b408384989ea4ab60206b8e100d8687da18b5c813c11e92fd8212a98e0"}, + {file = "pydantic_core-2.14.6-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:4f86f1f318e56f5cbb282fe61eb84767aee743ebe32c7c0834690ebea50c0a6b"}, + {file = "pydantic_core-2.14.6-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:86ce5fcfc3accf3a07a729779d0b86c5d0309a4764c897d86c11089be61da160"}, + {file = "pydantic_core-2.14.6-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3dcf1978be02153c6a31692d4fbcc2a3f1db9da36039ead23173bc256ee3b91b"}, + {file = "pydantic_core-2.14.6-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eedf97be7bc3dbc8addcef4142f4b4164066df0c6f36397ae4aaed3eb187d8ab"}, + {file = "pydantic_core-2.14.6-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d5f916acf8afbcab6bacbb376ba7dc61f845367901ecd5e328fc4d4aef2fcab0"}, + {file = "pydantic_core-2.14.6-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:8a14c192c1d724c3acbfb3f10a958c55a2638391319ce8078cb36c02283959b9"}, + {file = "pydantic_core-2.14.6-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:0348b1dc6b76041516e8a854ff95b21c55f5a411c3297d2ca52f5528e49d8411"}, + {file = "pydantic_core-2.14.6-cp39-none-win32.whl", hash = "sha256:de2a0645a923ba57c5527497daf8ec5df69c6eadf869e9cd46e86349146e5975"}, + {file = "pydantic_core-2.14.6-cp39-none-win_amd64.whl", hash = "sha256:aca48506a9c20f68ee61c87f2008f81f8ee99f8d7f0104bff3c47e2d148f89d9"}, + {file = "pydantic_core-2.14.6-pp310-pypy310_pp73-macosx_10_7_x86_64.whl", hash = "sha256:d5c28525c19f5bb1e09511669bb57353d22b94cf8b65f3a8d141c389a55dec95"}, + {file = "pydantic_core-2.14.6-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:78d0768ee59baa3de0f4adac9e3748b4b1fffc52143caebddfd5ea2961595277"}, + {file = "pydantic_core-2.14.6-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8b93785eadaef932e4fe9c6e12ba67beb1b3f1e5495631419c784ab87e975670"}, + {file = "pydantic_core-2.14.6-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a874f21f87c485310944b2b2734cd6d318765bcbb7515eead33af9641816506e"}, + {file = "pydantic_core-2.14.6-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b89f4477d915ea43b4ceea6756f63f0288941b6443a2b28c69004fe07fde0d0d"}, + {file = "pydantic_core-2.14.6-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:172de779e2a153d36ee690dbc49c6db568d7b33b18dc56b69a7514aecbcf380d"}, + {file = "pydantic_core-2.14.6-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:dfcebb950aa7e667ec226a442722134539e77c575f6cfaa423f24371bb8d2e94"}, + {file = "pydantic_core-2.14.6-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:55a23dcd98c858c0db44fc5c04fc7ed81c4b4d33c653a7c45ddaebf6563a2f66"}, + {file = "pydantic_core-2.14.6-pp37-pypy37_pp73-macosx_10_7_x86_64.whl", hash = "sha256:4241204e4b36ab5ae466ecec5c4c16527a054c69f99bba20f6f75232a6a534e2"}, + {file = "pydantic_core-2.14.6-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e574de99d735b3fc8364cba9912c2bec2da78775eba95cbb225ef7dda6acea24"}, + {file = "pydantic_core-2.14.6-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1302a54f87b5cd8528e4d6d1bf2133b6aa7c6122ff8e9dc5220fbc1e07bffebd"}, + {file = "pydantic_core-2.14.6-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f8e81e4b55930e5ffab4a68db1af431629cf2e4066dbdbfef65348b8ab804ea8"}, + {file = "pydantic_core-2.14.6-pp37-pypy37_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:c99462ffc538717b3e60151dfaf91125f637e801f5ab008f81c402f1dff0cd0f"}, + {file = "pydantic_core-2.14.6-pp37-pypy37_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:e4cf2d5829f6963a5483ec01578ee76d329eb5caf330ecd05b3edd697e7d768a"}, + {file = "pydantic_core-2.14.6-pp38-pypy38_pp73-macosx_10_7_x86_64.whl", hash = "sha256:cf10b7d58ae4a1f07fccbf4a0a956d705356fea05fb4c70608bb6fa81d103cda"}, + {file = "pydantic_core-2.14.6-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:399ac0891c284fa8eb998bcfa323f2234858f5d2efca3950ae58c8f88830f145"}, + {file = "pydantic_core-2.14.6-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9c6a5c79b28003543db3ba67d1df336f253a87d3112dac3a51b94f7d48e4c0e1"}, + {file = "pydantic_core-2.14.6-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:599c87d79cab2a6a2a9df4aefe0455e61e7d2aeede2f8577c1b7c0aec643ee8e"}, + {file = "pydantic_core-2.14.6-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:43e166ad47ba900f2542a80d83f9fc65fe99eb63ceec4debec160ae729824052"}, + {file = "pydantic_core-2.14.6-pp38-pypy38_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:3a0b5db001b98e1c649dd55afa928e75aa4087e587b9524a4992316fa23c9fba"}, + {file = "pydantic_core-2.14.6-pp38-pypy38_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:747265448cb57a9f37572a488a57d873fd96bf51e5bb7edb52cfb37124516da4"}, + {file = "pydantic_core-2.14.6-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:7ebe3416785f65c28f4f9441e916bfc8a54179c8dea73c23023f7086fa601c5d"}, + {file = "pydantic_core-2.14.6-pp39-pypy39_pp73-macosx_10_7_x86_64.whl", hash = "sha256:86c963186ca5e50d5c8287b1d1c9d3f8f024cbe343d048c5bd282aec2d8641f2"}, + {file = "pydantic_core-2.14.6-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:e0641b506486f0b4cd1500a2a65740243e8670a2549bb02bc4556a83af84ae03"}, + {file = "pydantic_core-2.14.6-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:71d72ca5eaaa8d38c8df16b7deb1a2da4f650c41b58bb142f3fb75d5ad4a611f"}, + {file = "pydantic_core-2.14.6-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:27e524624eace5c59af499cd97dc18bb201dc6a7a2da24bfc66ef151c69a5f2a"}, + {file = "pydantic_core-2.14.6-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a3dde6cac75e0b0902778978d3b1646ca9f438654395a362cb21d9ad34b24acf"}, + {file = "pydantic_core-2.14.6-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:00646784f6cd993b1e1c0e7b0fdcbccc375d539db95555477771c27555e3c556"}, + {file = "pydantic_core-2.14.6-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:23598acb8ccaa3d1d875ef3b35cb6376535095e9405d91a3d57a8c7db5d29341"}, + {file = "pydantic_core-2.14.6-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:7f41533d7e3cf9520065f610b41ac1c76bc2161415955fbcead4981b22c7611e"}, + {file = "pydantic_core-2.14.6.tar.gz", hash = "sha256:1fd0c1d395372843fba13a51c28e3bb9d59bd7aebfeb17358ffaaa1e4dbbe948"}, +] + +[package.dependencies] +typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0" + +[[package]] +name = "pyflame" +version = "0.3.1" +description = "A Flamegraph generator for Python" optional = false python-versions = ">=3.7" files = [ - {file = "pydantic-1.10.11-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ff44c5e89315b15ff1f7fdaf9853770b810936d6b01a7bcecaa227d2f8fe444f"}, - {file = "pydantic-1.10.11-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a6c098d4ab5e2d5b3984d3cb2527e2d6099d3de85630c8934efcfdc348a9760e"}, - {file = "pydantic-1.10.11-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:16928fdc9cb273c6af00d9d5045434c39afba5f42325fb990add2c241402d151"}, - {file = "pydantic-1.10.11-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0588788a9a85f3e5e9ebca14211a496409cb3deca5b6971ff37c556d581854e7"}, - {file = "pydantic-1.10.11-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:e9baf78b31da2dc3d3f346ef18e58ec5f12f5aaa17ac517e2ffd026a92a87588"}, - {file = "pydantic-1.10.11-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:373c0840f5c2b5b1ccadd9286782852b901055998136287828731868027a724f"}, - {file = "pydantic-1.10.11-cp310-cp310-win_amd64.whl", hash = "sha256:c3339a46bbe6013ef7bdd2844679bfe500347ac5742cd4019a88312aa58a9847"}, - {file = "pydantic-1.10.11-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:08a6c32e1c3809fbc49debb96bf833164f3438b3696abf0fbeceb417d123e6eb"}, - {file = "pydantic-1.10.11-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a451ccab49971af043ec4e0d207cbc8cbe53dbf148ef9f19599024076fe9c25b"}, - {file = "pydantic-1.10.11-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5b02d24f7b2b365fed586ed73582c20f353a4c50e4be9ba2c57ab96f8091ddae"}, - {file = "pydantic-1.10.11-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3f34739a89260dfa420aa3cbd069fbcc794b25bbe5c0a214f8fb29e363484b66"}, - {file = "pydantic-1.10.11-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:e297897eb4bebde985f72a46a7552a7556a3dd11e7f76acda0c1093e3dbcf216"}, - {file = "pydantic-1.10.11-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d185819a7a059550ecb85d5134e7d40f2565f3dd94cfd870132c5f91a89cf58c"}, - {file = "pydantic-1.10.11-cp311-cp311-win_amd64.whl", hash = "sha256:4400015f15c9b464c9db2d5d951b6a780102cfa5870f2c036d37c23b56f7fc1b"}, - {file = "pydantic-1.10.11-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:2417de68290434461a266271fc57274a138510dca19982336639484c73a07af6"}, - {file = "pydantic-1.10.11-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:331c031ba1554b974c98679bd0780d89670d6fd6f53f5d70b10bdc9addee1713"}, - {file = "pydantic-1.10.11-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8268a735a14c308923e8958363e3a3404f6834bb98c11f5ab43251a4e410170c"}, - {file = "pydantic-1.10.11-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:44e51ba599c3ef227e168424e220cd3e544288c57829520dc90ea9cb190c3248"}, - {file = "pydantic-1.10.11-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:d7781f1d13b19700b7949c5a639c764a077cbbdd4322ed505b449d3ca8edcb36"}, - {file = "pydantic-1.10.11-cp37-cp37m-win_amd64.whl", hash = "sha256:7522a7666157aa22b812ce14c827574ddccc94f361237ca6ea8bb0d5c38f1629"}, - {file = "pydantic-1.10.11-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:bc64eab9b19cd794a380179ac0e6752335e9555d214cfcb755820333c0784cb3"}, - {file = "pydantic-1.10.11-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:8dc77064471780262b6a68fe67e013298d130414d5aaf9b562c33987dbd2cf4f"}, - {file = "pydantic-1.10.11-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fe429898f2c9dd209bd0632a606bddc06f8bce081bbd03d1c775a45886e2c1cb"}, - {file = "pydantic-1.10.11-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:192c608ad002a748e4a0bed2ddbcd98f9b56df50a7c24d9a931a8c5dd053bd3d"}, - {file = "pydantic-1.10.11-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:ef55392ec4bb5721f4ded1096241e4b7151ba6d50a50a80a2526c854f42e6a2f"}, - {file = "pydantic-1.10.11-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:41e0bb6efe86281623abbeeb0be64eab740c865388ee934cd3e6a358784aca6e"}, - {file = "pydantic-1.10.11-cp38-cp38-win_amd64.whl", hash = "sha256:265a60da42f9f27e0b1014eab8acd3e53bd0bad5c5b4884e98a55f8f596b2c19"}, - {file = "pydantic-1.10.11-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:469adf96c8e2c2bbfa655fc7735a2a82f4c543d9fee97bd113a7fb509bf5e622"}, - {file = "pydantic-1.10.11-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e6cbfbd010b14c8a905a7b10f9fe090068d1744d46f9e0c021db28daeb8b6de1"}, - {file = "pydantic-1.10.11-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:abade85268cc92dff86d6effcd917893130f0ff516f3d637f50dadc22ae93999"}, - {file = "pydantic-1.10.11-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e9738b0f2e6c70f44ee0de53f2089d6002b10c33264abee07bdb5c7f03038303"}, - {file = "pydantic-1.10.11-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:787cf23e5a0cde753f2eabac1b2e73ae3844eb873fd1f5bdbff3048d8dbb7604"}, - {file = "pydantic-1.10.11-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:174899023337b9fc685ac8adaa7b047050616136ccd30e9070627c1aaab53a13"}, - {file = "pydantic-1.10.11-cp39-cp39-win_amd64.whl", hash = "sha256:1954f8778489a04b245a1e7b8b22a9d3ea8ef49337285693cf6959e4b757535e"}, - {file = "pydantic-1.10.11-py3-none-any.whl", hash = "sha256:008c5e266c8aada206d0627a011504e14268a62091450210eda7c07fabe6963e"}, - {file = "pydantic-1.10.11.tar.gz", hash = "sha256:f66d479cf7eb331372c470614be6511eae96f1f120344c25f3f9bb59fb1b5528"}, -] - -[package.dependencies] -typing-extensions = ">=4.2.0" - -[package.extras] -dotenv = ["python-dotenv (>=0.10.4)"] -email = ["email-validator (>=1.0.3)"] + {file = "pyflame-0.3.1-py3-none-any.whl", hash = "sha256:241bbc9c31749225d4502662d9fb833500780c707177d0878cebfe394e758553"}, + {file = "pyflame-0.3.1.tar.gz", hash = "sha256:d4d710a917bf12754675e058fb6d07c376ca52831a11f8f8d38b3426c5f06348"}, +] + +[package.extras] +django = ["django (>=3.2)", "django-debug-toolbar"] +jupyter = ["ipython", "traitlets (>=5)"] [[package]] name = "pygments" -version = "2.15.1" +version = "2.17.2" description = "Pygments is a syntax highlighting package written in Python." optional = false python-versions = ">=3.7" files = [ - {file = "Pygments-2.15.1-py3-none-any.whl", hash = "sha256:db2db3deb4b4179f399a09054b023b6a586b76499d36965813c71aa8ed7b5fd1"}, - {file = "Pygments-2.15.1.tar.gz", hash = "sha256:8ace4d3c1dd481894b2005f560ead0f9f19ee64fe983366be1a21e171d12775c"}, + {file = "pygments-2.17.2-py3-none-any.whl", hash = "sha256:b27c2826c47d0f3219f29554824c30c5e8945175d888647acd804ddd04af846c"}, + {file = "pygments-2.17.2.tar.gz", hash = "sha256:da46cec9fd2de5be3a8a784f434e4c4ab670b4ff54d605c4c2717e9d49c4c367"}, ] [package.extras] plugins = ["importlib-metadata"] +windows-terminal = ["colorama (>=0.4.6)"] + +[[package]] +name = "pyinstaller" +version = "5.13.2" +description = "PyInstaller bundles a Python application and all its dependencies into a single package." +optional = false +python-versions = "<3.13,>=3.7" +files = [ + {file = "pyinstaller-5.13.2-py3-none-macosx_10_13_universal2.whl", hash = "sha256:16cbd66b59a37f4ee59373a003608d15df180a0d9eb1a29ff3bfbfae64b23d0f"}, + {file = "pyinstaller-5.13.2-py3-none-manylinux2014_aarch64.whl", hash = "sha256:8f6dd0e797ae7efdd79226f78f35eb6a4981db16c13325e962a83395c0ec7420"}, + {file = "pyinstaller-5.13.2-py3-none-manylinux2014_i686.whl", hash = "sha256:65133ed89467edb2862036b35d7c5ebd381670412e1e4361215e289c786dd4e6"}, + {file = "pyinstaller-5.13.2-py3-none-manylinux2014_ppc64le.whl", hash = "sha256:7d51734423685ab2a4324ab2981d9781b203dcae42839161a9ee98bfeaabdade"}, + {file = "pyinstaller-5.13.2-py3-none-manylinux2014_s390x.whl", hash = "sha256:2c2fe9c52cb4577a3ac39626b84cf16cf30c2792f785502661286184f162ae0d"}, + {file = "pyinstaller-5.13.2-py3-none-manylinux2014_x86_64.whl", hash = "sha256:c63ef6133eefe36c4b2f4daf4cfea3d6412ece2ca218f77aaf967e52a95ac9b8"}, + {file = "pyinstaller-5.13.2-py3-none-musllinux_1_1_aarch64.whl", hash = "sha256:aadafb6f213549a5906829bb252e586e2cf72a7fbdb5731810695e6516f0ab30"}, + {file = "pyinstaller-5.13.2-py3-none-musllinux_1_1_x86_64.whl", hash = "sha256:b2e1c7f5cceb5e9800927ddd51acf9cc78fbaa9e79e822c48b0ee52d9ce3c892"}, + {file = "pyinstaller-5.13.2-py3-none-win32.whl", hash = "sha256:421cd24f26144f19b66d3868b49ed673176765f92fa9f7914cd2158d25b6d17e"}, + {file = "pyinstaller-5.13.2-py3-none-win_amd64.whl", hash = "sha256:ddcc2b36052a70052479a9e5da1af067b4496f43686ca3cdda99f8367d0627e4"}, + {file = "pyinstaller-5.13.2-py3-none-win_arm64.whl", hash = "sha256:27cd64e7cc6b74c5b1066cbf47d75f940b71356166031deb9778a2579bb874c6"}, + {file = "pyinstaller-5.13.2.tar.gz", hash = "sha256:c8e5d3489c3a7cc5f8401c2d1f48a70e588f9967e391c3b06ddac1f685f8d5d2"}, +] + +[package.dependencies] +altgraph = "*" +macholib = {version = ">=1.8", markers = "sys_platform == \"darwin\""} +pefile = {version = ">=2022.5.30", markers = "sys_platform == \"win32\""} +pyinstaller-hooks-contrib = ">=2021.4" +pywin32-ctypes = {version = ">=0.2.1", markers = "sys_platform == \"win32\""} +setuptools = ">=42.0.0" + +[package.extras] +encryption = ["tinyaes (>=1.0.0)"] +hook-testing = ["execnet (>=1.5.0)", "psutil", "pytest (>=2.7.3)"] + +[[package]] +name = "pyinstaller-hooks-contrib" +version = "2024.0" +description = "Community maintained hooks for PyInstaller" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pyinstaller-hooks-contrib-2024.0.tar.gz", hash = "sha256:a7118c1a5c9788595e5c43ad058a7a5b7b6d59e1eceb42362f6ec1f0b61986b0"}, + {file = "pyinstaller_hooks_contrib-2024.0-py2.py3-none-any.whl", hash = "sha256:469b5690df53223e2e8abffb2e44d6ee596e7d79d4b1eed9465123b67439875a"}, +] + +[package.dependencies] +importlib-metadata = {version = ">=4.6", markers = "python_version < \"3.10\""} +packaging = ">=22.0" +setuptools = ">=42.0.0" + +[[package]] +name = "pyjwt" +version = "2.8.0" +description = "JSON Web Token implementation in Python" +optional = true +python-versions = ">=3.7" +files = [ + {file = "PyJWT-2.8.0-py3-none-any.whl", hash = "sha256:59127c392cc44c2da5bb3192169a91f429924e17aff6534d70fdc02ab3e04320"}, + {file = "PyJWT-2.8.0.tar.gz", hash = "sha256:57e28d156e3d5c10088e0c68abb90bfac3df82b40a71bd0daa20c65ccd5c23de"}, +] + +[package.extras] +crypto = ["cryptography (>=3.4.0)"] +dev = ["coverage[toml] (==5.0.4)", "cryptography (>=3.4.0)", "pre-commit", "pytest (>=6.0.0,<7.0.0)", "sphinx (>=4.5.0,<5.0.0)", "sphinx-rtd-theme", "zope.interface"] +docs = ["sphinx (>=4.5.0,<5.0.0)", "sphinx-rtd-theme", "zope.interface"] +tests = ["coverage[toml] (==5.0.4)", "pytest (>=6.0.0,<7.0.0)"] [[package]] name = "pymdown-extensions" -version = "10.0.1" +version = "10.7" description = "Extension pack for Python Markdown." optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "pymdown_extensions-10.0.1-py3-none-any.whl", hash = "sha256:ae66d84013c5d027ce055693e09a4628b67e9dec5bce05727e45b0918e36f274"}, - {file = "pymdown_extensions-10.0.1.tar.gz", hash = "sha256:b44e1093a43b8a975eae17b03c3a77aad4681b3b56fce60ce746dbef1944c8cb"}, + {file = "pymdown_extensions-10.7-py3-none-any.whl", hash = "sha256:6ca215bc57bc12bf32b414887a68b810637d039124ed9b2e5bd3325cbb2c050c"}, + {file = "pymdown_extensions-10.7.tar.gz", hash = "sha256:c0d64d5cf62566f59e6b2b690a4095c931107c250a8c8e1351c1de5f6b036deb"}, ] [package.dependencies] -markdown = ">=3.2" +markdown = ">=3.5" pyyaml = "*" +[package.extras] +extra = ["pygments (>=2.12)"] + [[package]] name = "pytest" -version = "7.4.0" +version = "7.4.4" description = "pytest: simple powerful testing with Python" optional = false python-versions = ">=3.7" files = [ - {file = "pytest-7.4.0-py3-none-any.whl", hash = "sha256:78bf16451a2eb8c7a2ea98e32dc119fd2aa758f1d5d66dbf0a59d69a3969df32"}, - {file = "pytest-7.4.0.tar.gz", hash = "sha256:b4bf8c45bd59934ed84001ad51e11b4ee40d40a1229d2c79f9c592b0a3f6bd8a"}, + {file = "pytest-7.4.4-py3-none-any.whl", hash = "sha256:b090cdf5ed60bf4c45261be03239c2c1c22df034fbffe691abe93cd80cea01d8"}, + {file = "pytest-7.4.4.tar.gz", hash = "sha256:2cf0005922c6ace4a3e2ec8b4080eb0d9753fdc93107415332f50ce9e7994280"}, ] [package.dependencies] colorama = {version = "*", markers = "sys_platform == \"win32\""} exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""} -importlib-metadata = {version = ">=0.12", markers = "python_version < \"3.8\""} iniconfig = "*" packaging = "*" pluggy = ">=0.12,<2.0" @@ -1700,13 +2290,13 @@ testing = ["fields", "hunter", "process-tests", "pytest-xdist", "six", "virtuale [[package]] name = "pytest-mock" -version = "3.11.1" +version = "3.12.0" description = "Thin-wrapper around the mock package for easier use with pytest" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "pytest-mock-3.11.1.tar.gz", hash = "sha256:7f6b125602ac6d743e523ae0bfa71e1a697a2f5534064528c6ff84c2f7c2fc7f"}, - {file = "pytest_mock-3.11.1-py3-none-any.whl", hash = "sha256:21c279fff83d70763b05f8874cc9cfb3fcacd6d354247a976f9529d19f9acf39"}, + {file = "pytest-mock-3.12.0.tar.gz", hash = "sha256:31a40f038c22cad32287bb43932054451ff5583ff094bca6f675df2f8bc1a6e9"}, + {file = "pytest_mock-3.12.0-py3-none-any.whl", hash = "sha256:0972719a7263072da3a21c7f4773069bcc7486027d7e8e1f81d98a47e701bc4f"}, ] [package.dependencies] @@ -1730,61 +2320,21 @@ files = [ six = ">=1.5" [[package]] -name = "python-gitlab" -version = "3.15.0" -description = "Interact with GitLab API" -optional = false -python-versions = ">=3.7.0" -files = [ - {file = "python-gitlab-3.15.0.tar.gz", hash = "sha256:c9e65eb7612a9fbb8abf0339972eca7fd7a73d4da66c9b446ffe528930aff534"}, - {file = "python_gitlab-3.15.0-py3-none-any.whl", hash = "sha256:8f8d1c0d387f642eb1ac7bf5e8e0cd8b3dd49c6f34170cee3c7deb7d384611f3"}, -] - -[package.dependencies] -requests = ">=2.25.0" -requests-toolbelt = ">=0.10.1" -typing-extensions = {version = ">=4.0.0", markers = "python_version < \"3.8\""} - -[package.extras] -autocompletion = ["argcomplete (>=1.10.0,<3)"] -yaml = ["PyYaml (>=5.2)"] - -[[package]] -name = "python-semantic-release" -version = "7.34.6" -description = "Automatic Semantic Versioning for Python projects" +name = "pytz" +version = "2023.3.post1" +description = "World timezone definitions, modern and historical" optional = false python-versions = "*" files = [ - {file = "python-semantic-release-7.34.6.tar.gz", hash = "sha256:e9b8fb788024ae9510a924136d573588415a16eeca31cc5240f2754a80a2e831"}, - {file = "python_semantic_release-7.34.6-py3-none-any.whl", hash = "sha256:7e3969ba4663d9b2087b02bf3ac140e202551377bf045c34e09bfe19753e19ab"}, + {file = "pytz-2023.3.post1-py2.py3-none-any.whl", hash = "sha256:ce42d816b81b68506614c11e8937d3aa9e41007ceb50bfdcb0749b921bf646c7"}, + {file = "pytz-2023.3.post1.tar.gz", hash = "sha256:7b4fddbeb94a1eba4b557da24f19fdf9db575192544270a9101d8509f9f43d7b"}, ] -[package.dependencies] -click = ">=7,<9" -click-log = ">=0.3,<1" -dotty-dict = ">=1.3.0,<2" -gitpython = ">=3.0.8,<4" -invoke = ">=1.4.1,<3" -packaging = "*" -python-gitlab = ">=2,<4" -requests = ">=2.25,<3" -semver = ">=2.10,<3" -tomlkit = ">=0.10,<1.0" -twine = ">=3,<4" -wheel = "*" - -[package.extras] -dev = ["black", "isort", "tox"] -docs = ["Jinja2 (==3.0.3)", "Sphinx (==1.8.6)"] -mypy = ["mypy", "types-requests"] -test = ["coverage (>=5,<6)", "mock (==1.3.0)", "pytest (>=7,<8)", "pytest-mock (>=2,<3)", "pytest-xdist (>=1,<2)", "responses (==0.13.3)"] - [[package]] name = "pywin32" version = "306" description = "Python for Window Extensions" -optional = true +optional = false python-versions = "*" files = [ {file = "pywin32-306-cp310-cp310-win32.whl", hash = "sha256:06d3420a5155ba65f0b72f2699b5bacf3109f36acbe8923765c22938a69dfc8d"}, @@ -1816,51 +2366,51 @@ files = [ [[package]] name = "pyyaml" -version = "6.0" +version = "6.0.1" description = "YAML parser and emitter for Python" optional = false python-versions = ">=3.6" files = [ - {file = "PyYAML-6.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d4db7c7aef085872ef65a8fd7d6d09a14ae91f691dec3e87ee5ee0539d516f53"}, - {file = "PyYAML-6.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9df7ed3b3d2e0ecfe09e14741b857df43adb5a3ddadc919a2d94fbdf78fea53c"}, - {file = "PyYAML-6.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:77f396e6ef4c73fdc33a9157446466f1cff553d979bd00ecb64385760c6babdc"}, - {file = "PyYAML-6.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a80a78046a72361de73f8f395f1f1e49f956c6be882eed58505a15f3e430962b"}, - {file = "PyYAML-6.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:f84fbc98b019fef2ee9a1cb3ce93e3187a6df0b2538a651bfb890254ba9f90b5"}, - {file = "PyYAML-6.0-cp310-cp310-win32.whl", hash = "sha256:2cd5df3de48857ed0544b34e2d40e9fac445930039f3cfe4bcc592a1f836d513"}, - {file = "PyYAML-6.0-cp310-cp310-win_amd64.whl", hash = "sha256:daf496c58a8c52083df09b80c860005194014c3698698d1a57cbcfa182142a3a"}, - {file = "PyYAML-6.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d4b0ba9512519522b118090257be113b9468d804b19d63c71dbcf4a48fa32358"}, - {file = "PyYAML-6.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:81957921f441d50af23654aa6c5e5eaf9b06aba7f0a19c18a538dc7ef291c5a1"}, - {file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:afa17f5bc4d1b10afd4466fd3a44dc0e245382deca5b3c353d8b757f9e3ecb8d"}, - {file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dbad0e9d368bb989f4515da330b88a057617d16b6a8245084f1b05400f24609f"}, - {file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:432557aa2c09802be39460360ddffd48156e30721f5e8d917f01d31694216782"}, - {file = "PyYAML-6.0-cp311-cp311-win32.whl", hash = "sha256:bfaef573a63ba8923503d27530362590ff4f576c626d86a9fed95822a8255fd7"}, - {file = "PyYAML-6.0-cp311-cp311-win_amd64.whl", hash = "sha256:01b45c0191e6d66c470b6cf1b9531a771a83c1c4208272ead47a3ae4f2f603bf"}, - {file = "PyYAML-6.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:897b80890765f037df3403d22bab41627ca8811ae55e9a722fd0392850ec4d86"}, - {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:50602afada6d6cbfad699b0c7bb50d5ccffa7e46a3d738092afddc1f9758427f"}, - {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:48c346915c114f5fdb3ead70312bd042a953a8ce5c7106d5bfb1a5254e47da92"}, - {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:98c4d36e99714e55cfbaaee6dd5badbc9a1ec339ebfc3b1f52e293aee6bb71a4"}, - {file = "PyYAML-6.0-cp36-cp36m-win32.whl", hash = "sha256:0283c35a6a9fbf047493e3a0ce8d79ef5030852c51e9d911a27badfde0605293"}, - {file = "PyYAML-6.0-cp36-cp36m-win_amd64.whl", hash = "sha256:07751360502caac1c067a8132d150cf3d61339af5691fe9e87803040dbc5db57"}, - {file = "PyYAML-6.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:819b3830a1543db06c4d4b865e70ded25be52a2e0631ccd2f6a47a2822f2fd7c"}, - {file = "PyYAML-6.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:473f9edb243cb1935ab5a084eb238d842fb8f404ed2193a915d1784b5a6b5fc0"}, - {file = "PyYAML-6.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0ce82d761c532fe4ec3f87fc45688bdd3a4c1dc5e0b4a19814b9009a29baefd4"}, - {file = "PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:231710d57adfd809ef5d34183b8ed1eeae3f76459c18fb4a0b373ad56bedcdd9"}, - {file = "PyYAML-6.0-cp37-cp37m-win32.whl", hash = "sha256:c5687b8d43cf58545ade1fe3e055f70eac7a5a1a0bf42824308d868289a95737"}, - {file = "PyYAML-6.0-cp37-cp37m-win_amd64.whl", hash = "sha256:d15a181d1ecd0d4270dc32edb46f7cb7733c7c508857278d3d378d14d606db2d"}, - {file = "PyYAML-6.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0b4624f379dab24d3725ffde76559cff63d9ec94e1736b556dacdfebe5ab6d4b"}, - {file = "PyYAML-6.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:213c60cd50106436cc818accf5baa1aba61c0189ff610f64f4a3e8c6726218ba"}, - {file = "PyYAML-6.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9fa600030013c4de8165339db93d182b9431076eb98eb40ee068700c9c813e34"}, - {file = "PyYAML-6.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:277a0ef2981ca40581a47093e9e2d13b3f1fbbeffae064c1d21bfceba2030287"}, - {file = "PyYAML-6.0-cp38-cp38-win32.whl", hash = "sha256:d4eccecf9adf6fbcc6861a38015c2a64f38b9d94838ac1810a9023a0609e1b78"}, - {file = "PyYAML-6.0-cp38-cp38-win_amd64.whl", hash = "sha256:1e4747bc279b4f613a09eb64bba2ba602d8a6664c6ce6396a4d0cd413a50ce07"}, - {file = "PyYAML-6.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:055d937d65826939cb044fc8c9b08889e8c743fdc6a32b33e2390f66013e449b"}, - {file = "PyYAML-6.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e61ceaab6f49fb8bdfaa0f92c4b57bcfbea54c09277b1b4f7ac376bfb7a7c174"}, - {file = "PyYAML-6.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d67d839ede4ed1b28a4e8909735fc992a923cdb84e618544973d7dfc71540803"}, - {file = "PyYAML-6.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cba8c411ef271aa037d7357a2bc8f9ee8b58b9965831d9e51baf703280dc73d3"}, - {file = "PyYAML-6.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:40527857252b61eacd1d9af500c3337ba8deb8fc298940291486c465c8b46ec0"}, - {file = "PyYAML-6.0-cp39-cp39-win32.whl", hash = "sha256:b5b9eccad747aabaaffbc6064800670f0c297e52c12754eb1d976c57e4f74dcb"}, - {file = "PyYAML-6.0-cp39-cp39-win_amd64.whl", hash = "sha256:b3d267842bf12586ba6c734f89d1f5b871df0273157918b0ccefa29deb05c21c"}, - {file = "PyYAML-6.0.tar.gz", hash = "sha256:68fb519c14306fec9720a2a5b45bc9f0c8d1b9c72adf45c37baedfcd949c35a2"}, + {file = "PyYAML-6.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d858aa552c999bc8a8d57426ed01e40bef403cd8ccdd0fc5f6f04a00414cac2a"}, + {file = "PyYAML-6.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fd66fc5d0da6d9815ba2cebeb4205f95818ff4b79c3ebe268e75d961704af52f"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, + {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, + {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, + {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, + {file = "PyYAML-6.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f003ed9ad21d6a4713f0a9b5a7a0a79e08dd0f221aff4525a2be4c346ee60aab"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, + {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, + {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, + {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:afd7e57eddb1a54f0f1a974bc4391af8bcce0b444685d936840f125cf046d5bd"}, + {file = "PyYAML-6.0.1-cp36-cp36m-win32.whl", hash = "sha256:fca0e3a251908a499833aa292323f32437106001d436eca0e6e7833256674585"}, + {file = "PyYAML-6.0.1-cp36-cp36m-win_amd64.whl", hash = "sha256:f22ac1c3cac4dbc50079e965eba2c1058622631e526bd9afd45fedd49ba781fa"}, + {file = "PyYAML-6.0.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b1275ad35a5d18c62a7220633c913e1b42d44b46ee12554e5fd39c70a243d6a3"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:18aeb1bf9a78867dc38b259769503436b7c72f7a1f1f4c93ff9a17de54319b27"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:596106435fa6ad000c2991a98fa58eeb8656ef2325d7e158344fb33864ed87e3"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:baa90d3f661d43131ca170712d903e6295d1f7a0f595074f151c0aed377c9b9c"}, + {file = "PyYAML-6.0.1-cp37-cp37m-win32.whl", hash = "sha256:9046c58c4395dff28dd494285c82ba00b546adfc7ef001486fbf0324bc174fba"}, + {file = "PyYAML-6.0.1-cp37-cp37m-win_amd64.whl", hash = "sha256:4fb147e7a67ef577a588a0e2c17b6db51dda102c71de36f8549b6816a96e1867"}, + {file = "PyYAML-6.0.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1d4c7e777c441b20e32f52bd377e0c409713e8bb1386e1099c2415f26e479595"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, + {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, + {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, + {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, + {file = "PyYAML-6.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c8098ddcc2a85b61647b2590f825f3db38891662cfc2fc776415143f599bb859"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, + {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, + {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, + {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, ] [[package]] @@ -1879,121 +2429,118 @@ pyyaml = "*" [[package]] name = "pyzmq" -version = "25.1.0" +version = "25.1.2" description = "Python bindings for 0MQ" -optional = true +optional = false python-versions = ">=3.6" files = [ - {file = "pyzmq-25.1.0-cp310-cp310-macosx_10_15_universal2.whl", hash = "sha256:1a6169e69034eaa06823da6a93a7739ff38716142b3596c180363dee729d713d"}, - {file = "pyzmq-25.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:19d0383b1f18411d137d891cab567de9afa609b214de68b86e20173dc624c101"}, - {file = "pyzmq-25.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f1e931d9a92f628858a50f5bdffdfcf839aebe388b82f9d2ccd5d22a38a789dc"}, - {file = "pyzmq-25.1.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:97d984b1b2f574bc1bb58296d3c0b64b10e95e7026f8716ed6c0b86d4679843f"}, - {file = "pyzmq-25.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:154bddda2a351161474b36dba03bf1463377ec226a13458725183e508840df89"}, - {file = "pyzmq-25.1.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:cb6d161ae94fb35bb518b74bb06b7293299c15ba3bc099dccd6a5b7ae589aee3"}, - {file = "pyzmq-25.1.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:90146ab578931e0e2826ee39d0c948d0ea72734378f1898939d18bc9c823fcf9"}, - {file = "pyzmq-25.1.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:831ba20b660b39e39e5ac8603e8193f8fce1ee03a42c84ade89c36a251449d80"}, - {file = "pyzmq-25.1.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:3a522510e3434e12aff80187144c6df556bb06fe6b9d01b2ecfbd2b5bfa5c60c"}, - {file = "pyzmq-25.1.0-cp310-cp310-win32.whl", hash = "sha256:be24a5867b8e3b9dd5c241de359a9a5217698ff616ac2daa47713ba2ebe30ad1"}, - {file = "pyzmq-25.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:5693dcc4f163481cf79e98cf2d7995c60e43809e325b77a7748d8024b1b7bcba"}, - {file = "pyzmq-25.1.0-cp311-cp311-macosx_10_15_universal2.whl", hash = "sha256:13bbe36da3f8aaf2b7ec12696253c0bf6ffe05f4507985a8844a1081db6ec22d"}, - {file = "pyzmq-25.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:69511d604368f3dc58d4be1b0bad99b61ee92b44afe1cd9b7bd8c5e34ea8248a"}, - {file = "pyzmq-25.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4a983c8694667fd76d793ada77fd36c8317e76aa66eec75be2653cef2ea72883"}, - {file = "pyzmq-25.1.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:332616f95eb400492103ab9d542b69d5f0ff628b23129a4bc0a2fd48da6e4e0b"}, - {file = "pyzmq-25.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:58416db767787aedbfd57116714aad6c9ce57215ffa1c3758a52403f7c68cff5"}, - {file = "pyzmq-25.1.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:cad9545f5801a125f162d09ec9b724b7ad9b6440151b89645241d0120e119dcc"}, - {file = "pyzmq-25.1.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:d6128d431b8dfa888bf51c22a04d48bcb3d64431caf02b3cb943269f17fd2994"}, - {file = "pyzmq-25.1.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:2b15247c49d8cbea695b321ae5478d47cffd496a2ec5ef47131a9e79ddd7e46c"}, - {file = "pyzmq-25.1.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:442d3efc77ca4d35bee3547a8e08e8d4bb88dadb54a8377014938ba98d2e074a"}, - {file = "pyzmq-25.1.0-cp311-cp311-win32.whl", hash = "sha256:65346f507a815a731092421d0d7d60ed551a80d9b75e8b684307d435a5597425"}, - {file = "pyzmq-25.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:8b45d722046fea5a5694cba5d86f21f78f0052b40a4bbbbf60128ac55bfcc7b6"}, - {file = "pyzmq-25.1.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:f45808eda8b1d71308c5416ef3abe958f033fdbb356984fabbfc7887bed76b3f"}, - {file = "pyzmq-25.1.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8b697774ea8273e3c0460cf0bba16cd85ca6c46dfe8b303211816d68c492e132"}, - {file = "pyzmq-25.1.0-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b324fa769577fc2c8f5efcd429cef5acbc17d63fe15ed16d6dcbac2c5eb00849"}, - {file = "pyzmq-25.1.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:5873d6a60b778848ce23b6c0ac26c39e48969823882f607516b91fb323ce80e5"}, - {file = "pyzmq-25.1.0-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:f0d9e7ba6a815a12c8575ba7887da4b72483e4cfc57179af10c9b937f3f9308f"}, - {file = "pyzmq-25.1.0-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:414b8beec76521358b49170db7b9967d6974bdfc3297f47f7d23edec37329b00"}, - {file = "pyzmq-25.1.0-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:01f06f33e12497dca86353c354461f75275a5ad9eaea181ac0dc1662da8074fa"}, - {file = "pyzmq-25.1.0-cp36-cp36m-win32.whl", hash = "sha256:b5a07c4f29bf7cb0164664ef87e4aa25435dcc1f818d29842118b0ac1eb8e2b5"}, - {file = "pyzmq-25.1.0-cp36-cp36m-win_amd64.whl", hash = "sha256:968b0c737797c1809ec602e082cb63e9824ff2329275336bb88bd71591e94a90"}, - {file = "pyzmq-25.1.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:47b915ba666c51391836d7ed9a745926b22c434efa76c119f77bcffa64d2c50c"}, - {file = "pyzmq-25.1.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5af31493663cf76dd36b00dafbc839e83bbca8a0662931e11816d75f36155897"}, - {file = "pyzmq-25.1.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5489738a692bc7ee9a0a7765979c8a572520d616d12d949eaffc6e061b82b4d1"}, - {file = "pyzmq-25.1.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:1fc56a0221bdf67cfa94ef2d6ce5513a3d209c3dfd21fed4d4e87eca1822e3a3"}, - {file = "pyzmq-25.1.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:75217e83faea9edbc29516fc90c817bc40c6b21a5771ecb53e868e45594826b0"}, - {file = "pyzmq-25.1.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:3830be8826639d801de9053cf86350ed6742c4321ba4236e4b5568528d7bfed7"}, - {file = "pyzmq-25.1.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:3575699d7fd7c9b2108bc1c6128641a9a825a58577775ada26c02eb29e09c517"}, - {file = "pyzmq-25.1.0-cp37-cp37m-win32.whl", hash = "sha256:95bd3a998d8c68b76679f6b18f520904af5204f089beebb7b0301d97704634dd"}, - {file = "pyzmq-25.1.0-cp37-cp37m-win_amd64.whl", hash = "sha256:dbc466744a2db4b7ca05589f21ae1a35066afada2f803f92369f5877c100ef62"}, - {file = "pyzmq-25.1.0-cp38-cp38-macosx_10_15_universal2.whl", hash = "sha256:3bed53f7218490c68f0e82a29c92335daa9606216e51c64f37b48eb78f1281f4"}, - {file = "pyzmq-25.1.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:eb52e826d16c09ef87132c6e360e1879c984f19a4f62d8a935345deac43f3c12"}, - {file = "pyzmq-25.1.0-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:ddbef8b53cd16467fdbfa92a712eae46dd066aa19780681a2ce266e88fbc7165"}, - {file = "pyzmq-25.1.0-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:9301cf1d7fc1ddf668d0abbe3e227fc9ab15bc036a31c247276012abb921b5ff"}, - {file = "pyzmq-25.1.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7e23a8c3b6c06de40bdb9e06288180d630b562db8ac199e8cc535af81f90e64b"}, - {file = "pyzmq-25.1.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:4a82faae00d1eed4809c2f18b37f15ce39a10a1c58fe48b60ad02875d6e13d80"}, - {file = "pyzmq-25.1.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:c8398a1b1951aaa330269c35335ae69744be166e67e0ebd9869bdc09426f3871"}, - {file = "pyzmq-25.1.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:d40682ac60b2a613d36d8d3a0cd14fbdf8e7e0618fbb40aa9fa7b796c9081584"}, - {file = "pyzmq-25.1.0-cp38-cp38-win32.whl", hash = "sha256:33d5c8391a34d56224bccf74f458d82fc6e24b3213fc68165c98b708c7a69325"}, - {file = "pyzmq-25.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:c66b7ff2527e18554030319b1376d81560ca0742c6e0b17ff1ee96624a5f1afd"}, - {file = "pyzmq-25.1.0-cp39-cp39-macosx_10_15_universal2.whl", hash = "sha256:af56229ea6527a849ac9fb154a059d7e32e77a8cba27e3e62a1e38d8808cb1a5"}, - {file = "pyzmq-25.1.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:bdca18b94c404af6ae5533cd1bc310c4931f7ac97c148bbfd2cd4bdd62b96253"}, - {file = "pyzmq-25.1.0-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:0b6b42f7055bbc562f63f3df3b63e3dd1ebe9727ff0f124c3aa7bcea7b3a00f9"}, - {file = "pyzmq-25.1.0-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:4c2fc7aad520a97d64ffc98190fce6b64152bde57a10c704b337082679e74f67"}, - {file = "pyzmq-25.1.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:be86a26415a8b6af02cd8d782e3a9ae3872140a057f1cadf0133de685185c02b"}, - {file = "pyzmq-25.1.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:851fb2fe14036cfc1960d806628b80276af5424db09fe5c91c726890c8e6d943"}, - {file = "pyzmq-25.1.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:2a21fec5c3cea45421a19ccbe6250c82f97af4175bc09de4d6dd78fb0cb4c200"}, - {file = "pyzmq-25.1.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:bad172aba822444b32eae54c2d5ab18cd7dee9814fd5c7ed026603b8cae2d05f"}, - {file = "pyzmq-25.1.0-cp39-cp39-win32.whl", hash = "sha256:4d67609b37204acad3d566bb7391e0ecc25ef8bae22ff72ebe2ad7ffb7847158"}, - {file = "pyzmq-25.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:71c7b5896e40720d30cd77a81e62b433b981005bbff0cb2f739e0f8d059b5d99"}, - {file = "pyzmq-25.1.0-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:4cb27ef9d3bdc0c195b2dc54fcb8720e18b741624686a81942e14c8b67cc61a6"}, - {file = "pyzmq-25.1.0-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:0c4fc2741e0513b5d5a12fe200d6785bbcc621f6f2278893a9ca7bed7f2efb7d"}, - {file = "pyzmq-25.1.0-pp37-pypy37_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:fc34fdd458ff77a2a00e3c86f899911f6f269d393ca5675842a6e92eea565bae"}, - {file = "pyzmq-25.1.0-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8751f9c1442624da391bbd92bd4b072def6d7702a9390e4479f45c182392ff78"}, - {file = "pyzmq-25.1.0-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:6581e886aec3135964a302a0f5eb68f964869b9efd1dbafdebceaaf2934f8a68"}, - {file = "pyzmq-25.1.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:5482f08d2c3c42b920e8771ae8932fbaa0a67dff925fc476996ddd8155a170f3"}, - {file = "pyzmq-25.1.0-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:5e7fbcafa3ea16d1de1f213c226005fea21ee16ed56134b75b2dede5a2129e62"}, - {file = "pyzmq-25.1.0-pp38-pypy38_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:adecf6d02b1beab8d7c04bc36f22bb0e4c65a35eb0b4750b91693631d4081c70"}, - {file = "pyzmq-25.1.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f6d39e42a0aa888122d1beb8ec0d4ddfb6c6b45aecb5ba4013c27e2f28657765"}, - {file = "pyzmq-25.1.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:7018289b402ebf2b2c06992813523de61d4ce17bd514c4339d8f27a6f6809492"}, - {file = "pyzmq-25.1.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:9e68ae9864d260b18f311b68d29134d8776d82e7f5d75ce898b40a88df9db30f"}, - {file = "pyzmq-25.1.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e21cc00e4debe8f54c3ed7b9fcca540f46eee12762a9fa56feb8512fd9057161"}, - {file = "pyzmq-25.1.0-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2f666ae327a6899ff560d741681fdcdf4506f990595201ed39b44278c471ad98"}, - {file = "pyzmq-25.1.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2f5efcc29056dfe95e9c9db0dfbb12b62db9c4ad302f812931b6d21dd04a9119"}, - {file = "pyzmq-25.1.0-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:48e5e59e77c1a83162ab3c163fc01cd2eebc5b34560341a67421b09be0891287"}, - {file = "pyzmq-25.1.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:108c96ebbd573d929740d66e4c3d1bdf31d5cde003b8dc7811a3c8c5b0fc173b"}, - {file = "pyzmq-25.1.0.tar.gz", hash = "sha256:80c41023465d36280e801564a69cbfce8ae85ff79b080e1913f6e90481fb8957"}, + {file = "pyzmq-25.1.2-cp310-cp310-macosx_10_15_universal2.whl", hash = "sha256:e624c789359f1a16f83f35e2c705d07663ff2b4d4479bad35621178d8f0f6ea4"}, + {file = "pyzmq-25.1.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:49151b0efece79f6a79d41a461d78535356136ee70084a1c22532fc6383f4ad0"}, + {file = "pyzmq-25.1.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d9a5f194cf730f2b24d6af1f833c14c10f41023da46a7f736f48b6d35061e76e"}, + {file = "pyzmq-25.1.2-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:faf79a302f834d9e8304fafdc11d0d042266667ac45209afa57e5efc998e3872"}, + {file = "pyzmq-25.1.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f51a7b4ead28d3fca8dda53216314a553b0f7a91ee8fc46a72b402a78c3e43d"}, + {file = "pyzmq-25.1.2-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:0ddd6d71d4ef17ba5a87becf7ddf01b371eaba553c603477679ae817a8d84d75"}, + {file = "pyzmq-25.1.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:246747b88917e4867e2367b005fc8eefbb4a54b7db363d6c92f89d69abfff4b6"}, + {file = "pyzmq-25.1.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:00c48ae2fd81e2a50c3485de1b9d5c7c57cd85dc8ec55683eac16846e57ac979"}, + {file = "pyzmq-25.1.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:5a68d491fc20762b630e5db2191dd07ff89834086740f70e978bb2ef2668be08"}, + {file = "pyzmq-25.1.2-cp310-cp310-win32.whl", hash = "sha256:09dfe949e83087da88c4a76767df04b22304a682d6154de2c572625c62ad6886"}, + {file = "pyzmq-25.1.2-cp310-cp310-win_amd64.whl", hash = "sha256:fa99973d2ed20417744fca0073390ad65ce225b546febb0580358e36aa90dba6"}, + {file = "pyzmq-25.1.2-cp311-cp311-macosx_10_15_universal2.whl", hash = "sha256:82544e0e2d0c1811482d37eef297020a040c32e0687c1f6fc23a75b75db8062c"}, + {file = "pyzmq-25.1.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:01171fc48542348cd1a360a4b6c3e7d8f46cdcf53a8d40f84db6707a6768acc1"}, + {file = "pyzmq-25.1.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bc69c96735ab501419c432110016329bf0dea8898ce16fab97c6d9106dc0b348"}, + {file = "pyzmq-25.1.2-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3e124e6b1dd3dfbeb695435dff0e383256655bb18082e094a8dd1f6293114642"}, + {file = "pyzmq-25.1.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7598d2ba821caa37a0f9d54c25164a4fa351ce019d64d0b44b45540950458840"}, + {file = "pyzmq-25.1.2-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:d1299d7e964c13607efd148ca1f07dcbf27c3ab9e125d1d0ae1d580a1682399d"}, + {file = "pyzmq-25.1.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:4e6f689880d5ad87918430957297c975203a082d9a036cc426648fcbedae769b"}, + {file = "pyzmq-25.1.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:cc69949484171cc961e6ecd4a8911b9ce7a0d1f738fcae717177c231bf77437b"}, + {file = "pyzmq-25.1.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:9880078f683466b7f567b8624bfc16cad65077be046b6e8abb53bed4eeb82dd3"}, + {file = "pyzmq-25.1.2-cp311-cp311-win32.whl", hash = "sha256:4e5837af3e5aaa99a091302df5ee001149baff06ad22b722d34e30df5f0d9097"}, + {file = "pyzmq-25.1.2-cp311-cp311-win_amd64.whl", hash = "sha256:25c2dbb97d38b5ac9fd15586e048ec5eb1e38f3d47fe7d92167b0c77bb3584e9"}, + {file = "pyzmq-25.1.2-cp312-cp312-macosx_10_15_universal2.whl", hash = "sha256:11e70516688190e9c2db14fcf93c04192b02d457b582a1f6190b154691b4c93a"}, + {file = "pyzmq-25.1.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:313c3794d650d1fccaaab2df942af9f2c01d6217c846177cfcbc693c7410839e"}, + {file = "pyzmq-25.1.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1b3cbba2f47062b85fe0ef9de5b987612140a9ba3a9c6d2543c6dec9f7c2ab27"}, + {file = "pyzmq-25.1.2-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fc31baa0c32a2ca660784d5af3b9487e13b61b3032cb01a115fce6588e1bed30"}, + {file = "pyzmq-25.1.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:02c9087b109070c5ab0b383079fa1b5f797f8d43e9a66c07a4b8b8bdecfd88ee"}, + {file = "pyzmq-25.1.2-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:f8429b17cbb746c3e043cb986328da023657e79d5ed258b711c06a70c2ea7537"}, + {file = "pyzmq-25.1.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:5074adeacede5f810b7ef39607ee59d94e948b4fd954495bdb072f8c54558181"}, + {file = "pyzmq-25.1.2-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:7ae8f354b895cbd85212da245f1a5ad8159e7840e37d78b476bb4f4c3f32a9fe"}, + {file = "pyzmq-25.1.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:b264bf2cc96b5bc43ce0e852be995e400376bd87ceb363822e2cb1964fcdc737"}, + {file = "pyzmq-25.1.2-cp312-cp312-win32.whl", hash = "sha256:02bbc1a87b76e04fd780b45e7f695471ae6de747769e540da909173d50ff8e2d"}, + {file = "pyzmq-25.1.2-cp312-cp312-win_amd64.whl", hash = "sha256:ced111c2e81506abd1dc142e6cd7b68dd53747b3b7ae5edbea4578c5eeff96b7"}, + {file = "pyzmq-25.1.2-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:7b6d09a8962a91151f0976008eb7b29b433a560fde056ec7a3db9ec8f1075438"}, + {file = "pyzmq-25.1.2-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:967668420f36878a3c9ecb5ab33c9d0ff8d054f9c0233d995a6d25b0e95e1b6b"}, + {file = "pyzmq-25.1.2-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5edac3f57c7ddaacdb4d40f6ef2f9e299471fc38d112f4bc6d60ab9365445fb0"}, + {file = "pyzmq-25.1.2-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:0dabfb10ef897f3b7e101cacba1437bd3a5032ee667b7ead32bbcdd1a8422fe7"}, + {file = "pyzmq-25.1.2-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:2c6441e0398c2baacfe5ba30c937d274cfc2dc5b55e82e3749e333aabffde561"}, + {file = "pyzmq-25.1.2-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:16b726c1f6c2e7625706549f9dbe9b06004dfbec30dbed4bf50cbdfc73e5b32a"}, + {file = "pyzmq-25.1.2-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:a86c2dd76ef71a773e70551a07318b8e52379f58dafa7ae1e0a4be78efd1ff16"}, + {file = "pyzmq-25.1.2-cp36-cp36m-win32.whl", hash = "sha256:359f7f74b5d3c65dae137f33eb2bcfa7ad9ebefd1cab85c935f063f1dbb245cc"}, + {file = "pyzmq-25.1.2-cp36-cp36m-win_amd64.whl", hash = "sha256:55875492f820d0eb3417b51d96fea549cde77893ae3790fd25491c5754ea2f68"}, + {file = "pyzmq-25.1.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b8c8a419dfb02e91b453615c69568442e897aaf77561ee0064d789705ff37a92"}, + {file = "pyzmq-25.1.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8807c87fa893527ae8a524c15fc505d9950d5e856f03dae5921b5e9aa3b8783b"}, + {file = "pyzmq-25.1.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5e319ed7d6b8f5fad9b76daa0a68497bc6f129858ad956331a5835785761e003"}, + {file = "pyzmq-25.1.2-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:3c53687dde4d9d473c587ae80cc328e5b102b517447456184b485587ebd18b62"}, + {file = "pyzmq-25.1.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:9add2e5b33d2cd765ad96d5eb734a5e795a0755f7fc49aa04f76d7ddda73fd70"}, + {file = "pyzmq-25.1.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:e690145a8c0c273c28d3b89d6fb32c45e0d9605b2293c10e650265bf5c11cfec"}, + {file = "pyzmq-25.1.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:00a06faa7165634f0cac1abb27e54d7a0b3b44eb9994530b8ec73cf52e15353b"}, + {file = "pyzmq-25.1.2-cp37-cp37m-win32.whl", hash = "sha256:0f97bc2f1f13cb16905a5f3e1fbdf100e712d841482b2237484360f8bc4cb3d7"}, + {file = "pyzmq-25.1.2-cp37-cp37m-win_amd64.whl", hash = "sha256:6cc0020b74b2e410287e5942e1e10886ff81ac77789eb20bec13f7ae681f0fdd"}, + {file = "pyzmq-25.1.2-cp38-cp38-macosx_10_15_universal2.whl", hash = "sha256:bef02cfcbded83473bdd86dd8d3729cd82b2e569b75844fb4ea08fee3c26ae41"}, + {file = "pyzmq-25.1.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:e10a4b5a4b1192d74853cc71a5e9fd022594573926c2a3a4802020360aa719d8"}, + {file = "pyzmq-25.1.2-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:8c5f80e578427d4695adac6fdf4370c14a2feafdc8cb35549c219b90652536ae"}, + {file = "pyzmq-25.1.2-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:5dde6751e857910c1339890f3524de74007958557593b9e7e8c5f01cd919f8a7"}, + {file = "pyzmq-25.1.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ea1608dd169da230a0ad602d5b1ebd39807ac96cae1845c3ceed39af08a5c6df"}, + {file = "pyzmq-25.1.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:0f513130c4c361201da9bc69df25a086487250e16b5571ead521b31ff6b02220"}, + {file = "pyzmq-25.1.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:019744b99da30330798bb37df33549d59d380c78e516e3bab9c9b84f87a9592f"}, + {file = "pyzmq-25.1.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:2e2713ef44be5d52dd8b8e2023d706bf66cb22072e97fc71b168e01d25192755"}, + {file = "pyzmq-25.1.2-cp38-cp38-win32.whl", hash = "sha256:07cd61a20a535524906595e09344505a9bd46f1da7a07e504b315d41cd42eb07"}, + {file = "pyzmq-25.1.2-cp38-cp38-win_amd64.whl", hash = "sha256:eb7e49a17fb8c77d3119d41a4523e432eb0c6932187c37deb6fbb00cc3028088"}, + {file = "pyzmq-25.1.2-cp39-cp39-macosx_10_15_universal2.whl", hash = "sha256:94504ff66f278ab4b7e03e4cba7e7e400cb73bfa9d3d71f58d8972a8dc67e7a6"}, + {file = "pyzmq-25.1.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6dd0d50bbf9dca1d0bdea219ae6b40f713a3fb477c06ca3714f208fd69e16fd8"}, + {file = "pyzmq-25.1.2-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:004ff469d21e86f0ef0369717351073e0e577428e514c47c8480770d5e24a565"}, + {file = "pyzmq-25.1.2-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:c0b5ca88a8928147b7b1e2dfa09f3b6c256bc1135a1338536cbc9ea13d3b7add"}, + {file = "pyzmq-25.1.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2c9a79f1d2495b167119d02be7448bfba57fad2a4207c4f68abc0bab4b92925b"}, + {file = "pyzmq-25.1.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:518efd91c3d8ac9f9b4f7dd0e2b7b8bf1a4fe82a308009016b07eaa48681af82"}, + {file = "pyzmq-25.1.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:1ec23bd7b3a893ae676d0e54ad47d18064e6c5ae1fadc2f195143fb27373f7f6"}, + {file = "pyzmq-25.1.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:db36c27baed588a5a8346b971477b718fdc66cf5b80cbfbd914b4d6d355e44e2"}, + {file = "pyzmq-25.1.2-cp39-cp39-win32.whl", hash = "sha256:39b1067f13aba39d794a24761e385e2eddc26295826530a8c7b6c6c341584289"}, + {file = "pyzmq-25.1.2-cp39-cp39-win_amd64.whl", hash = "sha256:8e9f3fabc445d0ce320ea2c59a75fe3ea591fdbdeebec5db6de530dd4b09412e"}, + {file = "pyzmq-25.1.2-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:a8c1d566344aee826b74e472e16edae0a02e2a044f14f7c24e123002dcff1c05"}, + {file = "pyzmq-25.1.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:759cfd391a0996345ba94b6a5110fca9c557ad4166d86a6e81ea526c376a01e8"}, + {file = "pyzmq-25.1.2-pp310-pypy310_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7c61e346ac34b74028ede1c6b4bcecf649d69b707b3ff9dc0fab453821b04d1e"}, + {file = "pyzmq-25.1.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4cb8fc1f8d69b411b8ec0b5f1ffbcaf14c1db95b6bccea21d83610987435f1a4"}, + {file = "pyzmq-25.1.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:3c00c9b7d1ca8165c610437ca0c92e7b5607b2f9076f4eb4b095c85d6e680a1d"}, + {file = "pyzmq-25.1.2-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:df0c7a16ebb94452d2909b9a7b3337940e9a87a824c4fc1c7c36bb4404cb0cde"}, + {file = "pyzmq-25.1.2-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:45999e7f7ed5c390f2e87ece7f6c56bf979fb213550229e711e45ecc7d42ccb8"}, + {file = "pyzmq-25.1.2-pp37-pypy37_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:ac170e9e048b40c605358667aca3d94e98f604a18c44bdb4c102e67070f3ac9b"}, + {file = "pyzmq-25.1.2-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d1b604734bec94f05f81b360a272fc824334267426ae9905ff32dc2be433ab96"}, + {file = "pyzmq-25.1.2-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:a793ac733e3d895d96f865f1806f160696422554e46d30105807fdc9841b9f7d"}, + {file = "pyzmq-25.1.2-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:0806175f2ae5ad4b835ecd87f5f85583316b69f17e97786f7443baaf54b9bb98"}, + {file = "pyzmq-25.1.2-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:ef12e259e7bc317c7597d4f6ef59b97b913e162d83b421dd0db3d6410f17a244"}, + {file = "pyzmq-25.1.2-pp38-pypy38_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:ea253b368eb41116011add00f8d5726762320b1bda892f744c91997b65754d73"}, + {file = "pyzmq-25.1.2-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1b9b1f2ad6498445a941d9a4fee096d387fee436e45cc660e72e768d3d8ee611"}, + {file = "pyzmq-25.1.2-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:8b14c75979ce932c53b79976a395cb2a8cd3aaf14aef75e8c2cb55a330b9b49d"}, + {file = "pyzmq-25.1.2-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:889370d5174a741a62566c003ee8ddba4b04c3f09a97b8000092b7ca83ec9c49"}, + {file = "pyzmq-25.1.2-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9a18fff090441a40ffda8a7f4f18f03dc56ae73f148f1832e109f9bffa85df15"}, + {file = "pyzmq-25.1.2-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:99a6b36f95c98839ad98f8c553d8507644c880cf1e0a57fe5e3a3f3969040882"}, + {file = "pyzmq-25.1.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4345c9a27f4310afbb9c01750e9461ff33d6fb74cd2456b107525bbeebcb5be3"}, + {file = "pyzmq-25.1.2-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:3516e0b6224cf6e43e341d56da15fd33bdc37fa0c06af4f029f7d7dfceceabbc"}, + {file = "pyzmq-25.1.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:146b9b1f29ead41255387fb07be56dc29639262c0f7344f570eecdcd8d683314"}, + {file = "pyzmq-25.1.2.tar.gz", hash = "sha256:93f1aa311e8bb912e34f004cf186407a4e90eec4f0ecc0efd26056bf7eda0226"}, ] [package.dependencies] cffi = {version = "*", markers = "implementation_name == \"pypy\""} -[[package]] -name = "readme-renderer" -version = "37.3" -description = "readme_renderer is a library for rendering \"readme\" descriptions for Warehouse" -optional = false -python-versions = ">=3.7" -files = [ - {file = "readme_renderer-37.3-py3-none-any.whl", hash = "sha256:f67a16caedfa71eef48a31b39708637a6f4664c4394801a7b0d6432d13907343"}, - {file = "readme_renderer-37.3.tar.gz", hash = "sha256:cd653186dfc73055656f090f227f5cb22a046d7f71a841dfa305f55c9a513273"}, -] - -[package.dependencies] -bleach = ">=2.1.0" -docutils = ">=0.13.1" -Pygments = ">=2.5.1" - -[package.extras] -md = ["cmarkgfm (>=0.8.0)"] - [[package]] name = "referencing" -version = "0.29.1" +version = "0.32.1" description = "JSON Referencing + Python" -optional = true +optional = false python-versions = ">=3.8" files = [ - {file = "referencing-0.29.1-py3-none-any.whl", hash = "sha256:d3c8f323ee1480095da44d55917cfb8278d73d6b4d5f677e3e40eb21314ac67f"}, - {file = "referencing-0.29.1.tar.gz", hash = "sha256:90cb53782d550ba28d2166ef3f55731f38397def8832baac5d45235f1995e35e"}, + {file = "referencing-0.32.1-py3-none-any.whl", hash = "sha256:7e4dc12271d8e15612bfe35792f5ea1c40970dadf8624602e33db2758f7ee554"}, + {file = "referencing-0.32.1.tar.gz", hash = "sha256:3c57da0513e9563eb7e203ebe9bb3a1b509b042016433bd1e45a2853466c3dd3"}, ] [package.dependencies] @@ -2002,99 +2549,104 @@ rpds-py = ">=0.7.0" [[package]] name = "regex" -version = "2023.6.3" +version = "2023.12.25" description = "Alternative regular expression module, to replace re." optional = false -python-versions = ">=3.6" +python-versions = ">=3.7" files = [ - {file = "regex-2023.6.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:824bf3ac11001849aec3fa1d69abcb67aac3e150a933963fb12bda5151fe1bfd"}, - {file = "regex-2023.6.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:05ed27acdf4465c95826962528f9e8d41dbf9b1aa8531a387dee6ed215a3e9ef"}, - {file = "regex-2023.6.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0b49c764f88a79160fa64f9a7b425620e87c9f46095ef9c9920542ab2495c8bc"}, - {file = "regex-2023.6.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8e3f1316c2293e5469f8f09dc2d76efb6c3982d3da91ba95061a7e69489a14ef"}, - {file = "regex-2023.6.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:43e1dd9d12df9004246bacb79a0e5886b3b6071b32e41f83b0acbf293f820ee8"}, - {file = "regex-2023.6.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4959e8bcbfda5146477d21c3a8ad81b185cd252f3d0d6e4724a5ef11c012fb06"}, - {file = "regex-2023.6.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:af4dd387354dc83a3bff67127a124c21116feb0d2ef536805c454721c5d7993d"}, - {file = "regex-2023.6.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:2239d95d8e243658b8dbb36b12bd10c33ad6e6933a54d36ff053713f129aa536"}, - {file = "regex-2023.6.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:890e5a11c97cf0d0c550eb661b937a1e45431ffa79803b942a057c4fb12a2da2"}, - {file = "regex-2023.6.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:a8105e9af3b029f243ab11ad47c19b566482c150c754e4c717900a798806b222"}, - {file = "regex-2023.6.3-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:25be746a8ec7bc7b082783216de8e9473803706723b3f6bef34b3d0ed03d57e2"}, - {file = "regex-2023.6.3-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:3676f1dd082be28b1266c93f618ee07741b704ab7b68501a173ce7d8d0d0ca18"}, - {file = "regex-2023.6.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:10cb847aeb1728412c666ab2e2000ba6f174f25b2bdc7292e7dd71b16db07568"}, - {file = "regex-2023.6.3-cp310-cp310-win32.whl", hash = "sha256:dbbbfce33cd98f97f6bffb17801b0576e653f4fdb1d399b2ea89638bc8d08ae1"}, - {file = "regex-2023.6.3-cp310-cp310-win_amd64.whl", hash = "sha256:c5f8037000eb21e4823aa485149f2299eb589f8d1fe4b448036d230c3f4e68e0"}, - {file = "regex-2023.6.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c123f662be8ec5ab4ea72ea300359023a5d1df095b7ead76fedcd8babbedf969"}, - {file = "regex-2023.6.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9edcbad1f8a407e450fbac88d89e04e0b99a08473f666a3f3de0fd292badb6aa"}, - {file = "regex-2023.6.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dcba6dae7de533c876255317c11f3abe4907ba7d9aa15d13e3d9710d4315ec0e"}, - {file = "regex-2023.6.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:29cdd471ebf9e0f2fb3cac165efedc3c58db841d83a518b082077e612d3ee5df"}, - {file = "regex-2023.6.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:12b74fbbf6cbbf9dbce20eb9b5879469e97aeeaa874145517563cca4029db65c"}, - {file = "regex-2023.6.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0c29ca1bd61b16b67be247be87390ef1d1ef702800f91fbd1991f5c4421ebae8"}, - {file = "regex-2023.6.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d77f09bc4b55d4bf7cc5eba785d87001d6757b7c9eec237fe2af57aba1a071d9"}, - {file = "regex-2023.6.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ea353ecb6ab5f7e7d2f4372b1e779796ebd7b37352d290096978fea83c4dba0c"}, - {file = "regex-2023.6.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:10590510780b7541969287512d1b43f19f965c2ece6c9b1c00fc367b29d8dce7"}, - {file = "regex-2023.6.3-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:e2fbd6236aae3b7f9d514312cdb58e6494ee1c76a9948adde6eba33eb1c4264f"}, - {file = "regex-2023.6.3-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:6b2675068c8b56f6bfd5a2bda55b8accbb96c02fd563704732fd1c95e2083461"}, - {file = "regex-2023.6.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:74419d2b50ecb98360cfaa2974da8689cb3b45b9deff0dcf489c0d333bcc1477"}, - {file = "regex-2023.6.3-cp311-cp311-win32.whl", hash = "sha256:fb5ec16523dc573a4b277663a2b5a364e2099902d3944c9419a40ebd56a118f9"}, - {file = "regex-2023.6.3-cp311-cp311-win_amd64.whl", hash = "sha256:09e4a1a6acc39294a36b7338819b10baceb227f7f7dbbea0506d419b5a1dd8af"}, - {file = "regex-2023.6.3-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:0654bca0cdf28a5956c83839162692725159f4cda8d63e0911a2c0dc76166525"}, - {file = "regex-2023.6.3-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:463b6a3ceb5ca952e66550a4532cef94c9a0c80dc156c4cc343041951aec1697"}, - {file = "regex-2023.6.3-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:87b2a5bb5e78ee0ad1de71c664d6eb536dc3947a46a69182a90f4410f5e3f7dd"}, - {file = "regex-2023.6.3-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6343c6928282c1f6a9db41f5fd551662310e8774c0e5ebccb767002fcf663ca9"}, - {file = "regex-2023.6.3-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b6192d5af2ccd2a38877bfef086d35e6659566a335b1492786ff254c168b1693"}, - {file = "regex-2023.6.3-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:74390d18c75054947e4194019077e243c06fbb62e541d8817a0fa822ea310c14"}, - {file = "regex-2023.6.3-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:742e19a90d9bb2f4a6cf2862b8b06dea5e09b96c9f2df1779e53432d7275331f"}, - {file = "regex-2023.6.3-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:8abbc5d54ea0ee80e37fef009e3cec5dafd722ed3c829126253d3e22f3846f1e"}, - {file = "regex-2023.6.3-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:c2b867c17a7a7ae44c43ebbeb1b5ff406b3e8d5b3e14662683e5e66e6cc868d3"}, - {file = "regex-2023.6.3-cp36-cp36m-musllinux_1_1_ppc64le.whl", hash = "sha256:d831c2f8ff278179705ca59f7e8524069c1a989e716a1874d6d1aab6119d91d1"}, - {file = "regex-2023.6.3-cp36-cp36m-musllinux_1_1_s390x.whl", hash = "sha256:ee2d1a9a253b1729bb2de27d41f696ae893507c7db224436abe83ee25356f5c1"}, - {file = "regex-2023.6.3-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:61474f0b41fe1a80e8dfa70f70ea1e047387b7cd01c85ec88fa44f5d7561d787"}, - {file = "regex-2023.6.3-cp36-cp36m-win32.whl", hash = "sha256:0b71e63226e393b534105fcbdd8740410dc6b0854c2bfa39bbda6b0d40e59a54"}, - {file = "regex-2023.6.3-cp36-cp36m-win_amd64.whl", hash = "sha256:bbb02fd4462f37060122e5acacec78e49c0fbb303c30dd49c7f493cf21fc5b27"}, - {file = "regex-2023.6.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b862c2b9d5ae38a68b92e215b93f98d4c5e9454fa36aae4450f61dd33ff48487"}, - {file = "regex-2023.6.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:976d7a304b59ede34ca2921305b57356694f9e6879db323fd90a80f865d355a3"}, - {file = "regex-2023.6.3-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:83320a09188e0e6c39088355d423aa9d056ad57a0b6c6381b300ec1a04ec3d16"}, - {file = "regex-2023.6.3-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9427a399501818a7564f8c90eced1e9e20709ece36be701f394ada99890ea4b3"}, - {file = "regex-2023.6.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7178bbc1b2ec40eaca599d13c092079bf529679bf0371c602edaa555e10b41c3"}, - {file = "regex-2023.6.3-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:837328d14cde912af625d5f303ec29f7e28cdab588674897baafaf505341f2fc"}, - {file = "regex-2023.6.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:2d44dc13229905ae96dd2ae2dd7cebf824ee92bc52e8cf03dcead37d926da019"}, - {file = "regex-2023.6.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:d54af539295392611e7efbe94e827311eb8b29668e2b3f4cadcfe6f46df9c777"}, - {file = "regex-2023.6.3-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:7117d10690c38a622e54c432dfbbd3cbd92f09401d622902c32f6d377e2300ee"}, - {file = "regex-2023.6.3-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:bb60b503ec8a6e4e3e03a681072fa3a5adcbfa5479fa2d898ae2b4a8e24c4591"}, - {file = "regex-2023.6.3-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:65ba8603753cec91c71de423a943ba506363b0e5c3fdb913ef8f9caa14b2c7e0"}, - {file = "regex-2023.6.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:271f0bdba3c70b58e6f500b205d10a36fb4b58bd06ac61381b68de66442efddb"}, - {file = "regex-2023.6.3-cp37-cp37m-win32.whl", hash = "sha256:9beb322958aaca059f34975b0df135181f2e5d7a13b84d3e0e45434749cb20f7"}, - {file = "regex-2023.6.3-cp37-cp37m-win_amd64.whl", hash = "sha256:fea75c3710d4f31389eed3c02f62d0b66a9da282521075061ce875eb5300cf23"}, - {file = "regex-2023.6.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:8f56fcb7ff7bf7404becdfc60b1e81a6d0561807051fd2f1860b0d0348156a07"}, - {file = "regex-2023.6.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:d2da3abc88711bce7557412310dfa50327d5769a31d1c894b58eb256459dc289"}, - {file = "regex-2023.6.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a99b50300df5add73d307cf66abea093304a07eb017bce94f01e795090dea87c"}, - {file = "regex-2023.6.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5708089ed5b40a7b2dc561e0c8baa9535b77771b64a8330b684823cfd5116036"}, - {file = "regex-2023.6.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:687ea9d78a4b1cf82f8479cab23678aff723108df3edeac098e5b2498879f4a7"}, - {file = "regex-2023.6.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4d3850beab9f527f06ccc94b446c864059c57651b3f911fddb8d9d3ec1d1b25d"}, - {file = "regex-2023.6.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e8915cc96abeb8983cea1df3c939e3c6e1ac778340c17732eb63bb96247b91d2"}, - {file = "regex-2023.6.3-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:841d6e0e5663d4c7b4c8099c9997be748677d46cbf43f9f471150e560791f7ff"}, - {file = "regex-2023.6.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:9edce5281f965cf135e19840f4d93d55b3835122aa76ccacfd389e880ba4cf82"}, - {file = "regex-2023.6.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:b956231ebdc45f5b7a2e1f90f66a12be9610ce775fe1b1d50414aac1e9206c06"}, - {file = "regex-2023.6.3-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:36efeba71c6539d23c4643be88295ce8c82c88bbd7c65e8a24081d2ca123da3f"}, - {file = "regex-2023.6.3-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:cf67ca618b4fd34aee78740bea954d7c69fdda419eb208c2c0c7060bb822d747"}, - {file = "regex-2023.6.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:b4598b1897837067a57b08147a68ac026c1e73b31ef6e36deeeb1fa60b2933c9"}, - {file = "regex-2023.6.3-cp38-cp38-win32.whl", hash = "sha256:f415f802fbcafed5dcc694c13b1292f07fe0befdb94aa8a52905bd115ff41e88"}, - {file = "regex-2023.6.3-cp38-cp38-win_amd64.whl", hash = "sha256:d4f03bb71d482f979bda92e1427f3ec9b220e62a7dd337af0aa6b47bf4498f72"}, - {file = "regex-2023.6.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:ccf91346b7bd20c790310c4147eee6ed495a54ddb6737162a36ce9dbef3e4751"}, - {file = "regex-2023.6.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b28f5024a3a041009eb4c333863d7894d191215b39576535c6734cd88b0fcb68"}, - {file = "regex-2023.6.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e0bb18053dfcfed432cc3ac632b5e5e5c5b7e55fb3f8090e867bfd9b054dbcbf"}, - {file = "regex-2023.6.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9a5bfb3004f2144a084a16ce19ca56b8ac46e6fd0651f54269fc9e230edb5e4a"}, - {file = "regex-2023.6.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5c6b48d0fa50d8f4df3daf451be7f9689c2bde1a52b1225c5926e3f54b6a9ed1"}, - {file = "regex-2023.6.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:051da80e6eeb6e239e394ae60704d2b566aa6a7aed6f2890a7967307267a5dc6"}, - {file = "regex-2023.6.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a4c3b7fa4cdaa69268748665a1a6ff70c014d39bb69c50fda64b396c9116cf77"}, - {file = "regex-2023.6.3-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:457b6cce21bee41ac292d6753d5e94dcbc5c9e3e3a834da285b0bde7aa4a11e9"}, - {file = "regex-2023.6.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:aad51907d74fc183033ad796dd4c2e080d1adcc4fd3c0fd4fd499f30c03011cd"}, - {file = "regex-2023.6.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:0385e73da22363778ef2324950e08b689abdf0b108a7d8decb403ad7f5191938"}, - {file = "regex-2023.6.3-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:c6a57b742133830eec44d9b2290daf5cbe0a2f1d6acee1b3c7b1c7b2f3606df7"}, - {file = "regex-2023.6.3-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:3e5219bf9e75993d73ab3d25985c857c77e614525fac9ae02b1bebd92f7cecac"}, - {file = "regex-2023.6.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:e5087a3c59eef624a4591ef9eaa6e9a8d8a94c779dade95d27c0bc24650261cd"}, - {file = "regex-2023.6.3-cp39-cp39-win32.whl", hash = "sha256:20326216cc2afe69b6e98528160b225d72f85ab080cbdf0b11528cbbaba2248f"}, - {file = "regex-2023.6.3-cp39-cp39-win_amd64.whl", hash = "sha256:bdff5eab10e59cf26bc479f565e25ed71a7d041d1ded04ccf9aee1d9f208487a"}, - {file = "regex-2023.6.3.tar.gz", hash = "sha256:72d1a25bf36d2050ceb35b517afe13864865268dfb45910e2e17a84be6cbfeb0"}, + {file = "regex-2023.12.25-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:0694219a1d54336fd0445ea382d49d36882415c0134ee1e8332afd1529f0baa5"}, + {file = "regex-2023.12.25-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b014333bd0217ad3d54c143de9d4b9a3ca1c5a29a6d0d554952ea071cff0f1f8"}, + {file = "regex-2023.12.25-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d865984b3f71f6d0af64d0d88f5733521698f6c16f445bb09ce746c92c97c586"}, + {file = "regex-2023.12.25-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1e0eabac536b4cc7f57a5f3d095bfa557860ab912f25965e08fe1545e2ed8b4c"}, + {file = "regex-2023.12.25-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c25a8ad70e716f96e13a637802813f65d8a6760ef48672aa3502f4c24ea8b400"}, + {file = "regex-2023.12.25-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a9b6d73353f777630626f403b0652055ebfe8ff142a44ec2cf18ae470395766e"}, + {file = "regex-2023.12.25-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9cc99d6946d750eb75827cb53c4371b8b0fe89c733a94b1573c9dd16ea6c9e4"}, + {file = "regex-2023.12.25-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:88d1f7bef20c721359d8675f7d9f8e414ec5003d8f642fdfd8087777ff7f94b5"}, + {file = "regex-2023.12.25-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:cb3fe77aec8f1995611f966d0c656fdce398317f850d0e6e7aebdfe61f40e1cd"}, + {file = "regex-2023.12.25-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:7aa47c2e9ea33a4a2a05f40fcd3ea36d73853a2aae7b4feab6fc85f8bf2c9704"}, + {file = "regex-2023.12.25-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:df26481f0c7a3f8739fecb3e81bc9da3fcfae34d6c094563b9d4670b047312e1"}, + {file = "regex-2023.12.25-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:c40281f7d70baf6e0db0c2f7472b31609f5bc2748fe7275ea65a0b4601d9b392"}, + {file = "regex-2023.12.25-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:d94a1db462d5690ebf6ae86d11c5e420042b9898af5dcf278bd97d6bda065423"}, + {file = "regex-2023.12.25-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:ba1b30765a55acf15dce3f364e4928b80858fa8f979ad41f862358939bdd1f2f"}, + {file = "regex-2023.12.25-cp310-cp310-win32.whl", hash = "sha256:150c39f5b964e4d7dba46a7962a088fbc91f06e606f023ce57bb347a3b2d4630"}, + {file = "regex-2023.12.25-cp310-cp310-win_amd64.whl", hash = "sha256:09da66917262d9481c719599116c7dc0c321ffcec4b1f510c4f8a066f8768105"}, + {file = "regex-2023.12.25-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:1b9d811f72210fa9306aeb88385b8f8bcef0dfbf3873410413c00aa94c56c2b6"}, + {file = "regex-2023.12.25-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d902a43085a308cef32c0d3aea962524b725403fd9373dea18110904003bac97"}, + {file = "regex-2023.12.25-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d166eafc19f4718df38887b2bbe1467a4f74a9830e8605089ea7a30dd4da8887"}, + {file = "regex-2023.12.25-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c7ad32824b7f02bb3c9f80306d405a1d9b7bb89362d68b3c5a9be53836caebdb"}, + {file = "regex-2023.12.25-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:636ba0a77de609d6510235b7f0e77ec494d2657108f777e8765efc060094c98c"}, + {file = "regex-2023.12.25-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0fda75704357805eb953a3ee15a2b240694a9a514548cd49b3c5124b4e2ad01b"}, + {file = "regex-2023.12.25-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f72cbae7f6b01591f90814250e636065850c5926751af02bb48da94dfced7baa"}, + {file = "regex-2023.12.25-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:db2a0b1857f18b11e3b0e54ddfefc96af46b0896fb678c85f63fb8c37518b3e7"}, + {file = "regex-2023.12.25-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:7502534e55c7c36c0978c91ba6f61703faf7ce733715ca48f499d3dbbd7657e0"}, + {file = "regex-2023.12.25-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:e8c7e08bb566de4faaf11984af13f6bcf6a08f327b13631d41d62592681d24fe"}, + {file = "regex-2023.12.25-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:283fc8eed679758de38fe493b7d7d84a198b558942b03f017b1f94dda8efae80"}, + {file = "regex-2023.12.25-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:f44dd4d68697559d007462b0a3a1d9acd61d97072b71f6d1968daef26bc744bd"}, + {file = "regex-2023.12.25-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:67d3ccfc590e5e7197750fcb3a2915b416a53e2de847a728cfa60141054123d4"}, + {file = "regex-2023.12.25-cp311-cp311-win32.whl", hash = "sha256:68191f80a9bad283432385961d9efe09d783bcd36ed35a60fb1ff3f1ec2efe87"}, + {file = "regex-2023.12.25-cp311-cp311-win_amd64.whl", hash = "sha256:7d2af3f6b8419661a0c421584cfe8aaec1c0e435ce7e47ee2a97e344b98f794f"}, + {file = "regex-2023.12.25-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:8a0ccf52bb37d1a700375a6b395bff5dd15c50acb745f7db30415bae3c2b0715"}, + {file = "regex-2023.12.25-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c3c4a78615b7762740531c27cf46e2f388d8d727d0c0c739e72048beb26c8a9d"}, + {file = "regex-2023.12.25-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ad83e7545b4ab69216cef4cc47e344d19622e28aabec61574b20257c65466d6a"}, + {file = "regex-2023.12.25-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b7a635871143661feccce3979e1727c4e094f2bdfd3ec4b90dfd4f16f571a87a"}, + {file = "regex-2023.12.25-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d498eea3f581fbe1b34b59c697512a8baef88212f92e4c7830fcc1499f5b45a5"}, + {file = "regex-2023.12.25-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:43f7cd5754d02a56ae4ebb91b33461dc67be8e3e0153f593c509e21d219c5060"}, + {file = "regex-2023.12.25-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:51f4b32f793812714fd5307222a7f77e739b9bc566dc94a18126aba3b92b98a3"}, + {file = "regex-2023.12.25-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ba99d8077424501b9616b43a2d208095746fb1284fc5ba490139651f971d39d9"}, + {file = "regex-2023.12.25-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:4bfc2b16e3ba8850e0e262467275dd4d62f0d045e0e9eda2bc65078c0110a11f"}, + {file = "regex-2023.12.25-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:8c2c19dae8a3eb0ea45a8448356ed561be843b13cbc34b840922ddf565498c1c"}, + {file = "regex-2023.12.25-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:60080bb3d8617d96f0fb7e19796384cc2467447ef1c491694850ebd3670bc457"}, + {file = "regex-2023.12.25-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:b77e27b79448e34c2c51c09836033056a0547aa360c45eeeb67803da7b0eedaf"}, + {file = "regex-2023.12.25-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:518440c991f514331f4850a63560321f833979d145d7d81186dbe2f19e27ae3d"}, + {file = "regex-2023.12.25-cp312-cp312-win32.whl", hash = "sha256:e2610e9406d3b0073636a3a2e80db05a02f0c3169b5632022b4e81c0364bcda5"}, + {file = "regex-2023.12.25-cp312-cp312-win_amd64.whl", hash = "sha256:cc37b9aeebab425f11f27e5e9e6cf580be7206c6582a64467a14dda211abc232"}, + {file = "regex-2023.12.25-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:da695d75ac97cb1cd725adac136d25ca687da4536154cdc2815f576e4da11c69"}, + {file = "regex-2023.12.25-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d126361607b33c4eb7b36debc173bf25d7805847346dd4d99b5499e1fef52bc7"}, + {file = "regex-2023.12.25-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4719bb05094d7d8563a450cf8738d2e1061420f79cfcc1fa7f0a44744c4d8f73"}, + {file = "regex-2023.12.25-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5dd58946bce44b53b06d94aa95560d0b243eb2fe64227cba50017a8d8b3cd3e2"}, + {file = "regex-2023.12.25-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:22a86d9fff2009302c440b9d799ef2fe322416d2d58fc124b926aa89365ec482"}, + {file = "regex-2023.12.25-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2aae8101919e8aa05ecfe6322b278f41ce2994c4a430303c4cd163fef746e04f"}, + {file = "regex-2023.12.25-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:e692296c4cc2873967771345a876bcfc1c547e8dd695c6b89342488b0ea55cd8"}, + {file = "regex-2023.12.25-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:263ef5cc10979837f243950637fffb06e8daed7f1ac1e39d5910fd29929e489a"}, + {file = "regex-2023.12.25-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:d6f7e255e5fa94642a0724e35406e6cb7001c09d476ab5fce002f652b36d0c39"}, + {file = "regex-2023.12.25-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:88ad44e220e22b63b0f8f81f007e8abbb92874d8ced66f32571ef8beb0643b2b"}, + {file = "regex-2023.12.25-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:3a17d3ede18f9cedcbe23d2daa8a2cd6f59fe2bf082c567e43083bba3fb00347"}, + {file = "regex-2023.12.25-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:d15b274f9e15b1a0b7a45d2ac86d1f634d983ca40d6b886721626c47a400bf39"}, + {file = "regex-2023.12.25-cp37-cp37m-win32.whl", hash = "sha256:ed19b3a05ae0c97dd8f75a5d8f21f7723a8c33bbc555da6bbe1f96c470139d3c"}, + {file = "regex-2023.12.25-cp37-cp37m-win_amd64.whl", hash = "sha256:a6d1047952c0b8104a1d371f88f4ab62e6275567d4458c1e26e9627ad489b445"}, + {file = "regex-2023.12.25-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:b43523d7bc2abd757119dbfb38af91b5735eea45537ec6ec3a5ec3f9562a1c53"}, + {file = "regex-2023.12.25-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:efb2d82f33b2212898f1659fb1c2e9ac30493ac41e4d53123da374c3b5541e64"}, + {file = "regex-2023.12.25-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:b7fca9205b59c1a3d5031f7e64ed627a1074730a51c2a80e97653e3e9fa0d415"}, + {file = "regex-2023.12.25-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:086dd15e9435b393ae06f96ab69ab2d333f5d65cbe65ca5a3ef0ec9564dfe770"}, + {file = "regex-2023.12.25-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e81469f7d01efed9b53740aedd26085f20d49da65f9c1f41e822a33992cb1590"}, + {file = "regex-2023.12.25-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:34e4af5b27232f68042aa40a91c3b9bb4da0eeb31b7632e0091afc4310afe6cb"}, + {file = "regex-2023.12.25-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9852b76ab558e45b20bf1893b59af64a28bd3820b0c2efc80e0a70a4a3ea51c1"}, + {file = "regex-2023.12.25-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ff100b203092af77d1a5a7abe085b3506b7eaaf9abf65b73b7d6905b6cb76988"}, + {file = "regex-2023.12.25-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:cc038b2d8b1470364b1888a98fd22d616fba2b6309c5b5f181ad4483e0017861"}, + {file = "regex-2023.12.25-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:094ba386bb5c01e54e14434d4caabf6583334090865b23ef58e0424a6286d3dc"}, + {file = "regex-2023.12.25-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:5cd05d0f57846d8ba4b71d9c00f6f37d6b97d5e5ef8b3c3840426a475c8f70f4"}, + {file = "regex-2023.12.25-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:9aa1a67bbf0f957bbe096375887b2505f5d8ae16bf04488e8b0f334c36e31360"}, + {file = "regex-2023.12.25-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:98a2636994f943b871786c9e82bfe7883ecdaba2ef5df54e1450fa9869d1f756"}, + {file = "regex-2023.12.25-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:37f8e93a81fc5e5bd8db7e10e62dc64261bcd88f8d7e6640aaebe9bc180d9ce2"}, + {file = "regex-2023.12.25-cp38-cp38-win32.whl", hash = "sha256:d78bd484930c1da2b9679290a41cdb25cc127d783768a0369d6b449e72f88beb"}, + {file = "regex-2023.12.25-cp38-cp38-win_amd64.whl", hash = "sha256:b521dcecebc5b978b447f0f69b5b7f3840eac454862270406a39837ffae4e697"}, + {file = "regex-2023.12.25-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:f7bc09bc9c29ebead055bcba136a67378f03d66bf359e87d0f7c759d6d4ffa31"}, + {file = "regex-2023.12.25-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:e14b73607d6231f3cc4622809c196b540a6a44e903bcfad940779c80dffa7be7"}, + {file = "regex-2023.12.25-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9eda5f7a50141291beda3edd00abc2d4a5b16c29c92daf8d5bd76934150f3edc"}, + {file = "regex-2023.12.25-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cc6bb9aa69aacf0f6032c307da718f61a40cf970849e471254e0e91c56ffca95"}, + {file = "regex-2023.12.25-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:298dc6354d414bc921581be85695d18912bea163a8b23cac9a2562bbcd5088b1"}, + {file = "regex-2023.12.25-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2f4e475a80ecbd15896a976aa0b386c5525d0ed34d5c600b6d3ebac0a67c7ddf"}, + {file = "regex-2023.12.25-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:531ac6cf22b53e0696f8e1d56ce2396311254eb806111ddd3922c9d937151dae"}, + {file = "regex-2023.12.25-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:22f3470f7524b6da61e2020672df2f3063676aff444db1daa283c2ea4ed259d6"}, + {file = "regex-2023.12.25-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:89723d2112697feaa320c9d351e5f5e7b841e83f8b143dba8e2d2b5f04e10923"}, + {file = "regex-2023.12.25-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0ecf44ddf9171cd7566ef1768047f6e66975788258b1c6c6ca78098b95cf9a3d"}, + {file = "regex-2023.12.25-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:905466ad1702ed4acfd67a902af50b8db1feeb9781436372261808df7a2a7bca"}, + {file = "regex-2023.12.25-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:4558410b7a5607a645e9804a3e9dd509af12fb72b9825b13791a37cd417d73a5"}, + {file = "regex-2023.12.25-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:7e316026cc1095f2a3e8cc012822c99f413b702eaa2ca5408a513609488cb62f"}, + {file = "regex-2023.12.25-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:3b1de218d5375cd6ac4b5493e0b9f3df2be331e86520f23382f216c137913d20"}, + {file = "regex-2023.12.25-cp39-cp39-win32.whl", hash = "sha256:11a963f8e25ab5c61348d090bf1b07f1953929c13bd2309a0662e9ff680763c9"}, + {file = "regex-2023.12.25-cp39-cp39-win_amd64.whl", hash = "sha256:e693e233ac92ba83a87024e1d32b5f9ab15ca55ddd916d878146f4e3406b5c91"}, + {file = "regex-2023.12.25.tar.gz", hash = "sha256:29171aa128da69afdf4bde412d5bedc335f2ca8fcfe4489038577d05f16181e5"}, ] [[package]] @@ -2119,201 +2671,207 @@ socks = ["PySocks (>=1.5.6,!=1.5.7)"] use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] [[package]] -name = "requests-toolbelt" -version = "1.0.0" -description = "A utility belt for advanced users of python-requests" +name = "rich" +version = "13.7.0" +description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal" optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +python-versions = ">=3.7.0" files = [ - {file = "requests-toolbelt-1.0.0.tar.gz", hash = "sha256:7681a0a3d047012b5bdc0ee37d7f8f07ebe76ab08caeccfc3921ce23c88d5bc6"}, - {file = "requests_toolbelt-1.0.0-py2.py3-none-any.whl", hash = "sha256:cccfdd665f0a24fcf4726e690f65639d272bb0637b9b92dfd91a5568ccf6bd06"}, + {file = "rich-13.7.0-py3-none-any.whl", hash = "sha256:6da14c108c4866ee9520bbffa71f6fe3962e193b7da68720583850cd4548e235"}, + {file = "rich-13.7.0.tar.gz", hash = "sha256:5cb5123b5cf9ee70584244246816e9114227e0b98ad9176eede6ad54bf5403fa"}, ] [package.dependencies] -requests = ">=2.0.1,<3.0.0" - -[[package]] -name = "rfc3986" -version = "2.0.0" -description = "Validating URI References per RFC 3986" -optional = false -python-versions = ">=3.7" -files = [ - {file = "rfc3986-2.0.0-py2.py3-none-any.whl", hash = "sha256:50b1502b60e289cb37883f3dfd34532b8873c7de9f49bb546641ce9cbd256ebd"}, - {file = "rfc3986-2.0.0.tar.gz", hash = "sha256:97aacf9dbd4bfd829baad6e6309fa6573aaf1be3f6fa735c8ab05e46cecb261c"}, -] +markdown-it-py = ">=2.2.0" +pygments = ">=2.13.0,<3.0.0" +typing-extensions = {version = ">=4.0.0,<5.0", markers = "python_version < \"3.9\""} [package.extras] -idna2008 = ["idna"] +jupyter = ["ipywidgets (>=7.5.1,<9)"] [[package]] name = "rpds-py" -version = "0.8.10" +version = "0.17.1" description = "Python bindings to Rust's persistent data structures (rpds)" -optional = true +optional = false python-versions = ">=3.8" files = [ - {file = "rpds_py-0.8.10-cp310-cp310-macosx_10_7_x86_64.whl", hash = "sha256:93d06cccae15b3836247319eee7b6f1fdcd6c10dabb4e6d350d27bd0bdca2711"}, - {file = "rpds_py-0.8.10-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3816a890a6a9e9f1de250afa12ca71c9a7a62f2b715a29af6aaee3aea112c181"}, - {file = "rpds_py-0.8.10-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a7c6304b894546b5a6bdc0fe15761fa53fe87d28527a7142dae8de3c663853e1"}, - {file = "rpds_py-0.8.10-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ad3bfb44c8840fb4be719dc58e229f435e227fbfbe133dc33f34981ff622a8f8"}, - {file = "rpds_py-0.8.10-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:14f1c356712f66653b777ecd8819804781b23dbbac4eade4366b94944c9e78ad"}, - {file = "rpds_py-0.8.10-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:82bb361cae4d0a627006dadd69dc2f36b7ad5dc1367af9d02e296ec565248b5b"}, - {file = "rpds_py-0.8.10-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b2e3c4f2a8e3da47f850d7ea0d7d56720f0f091d66add889056098c4b2fd576c"}, - {file = "rpds_py-0.8.10-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:15a90d0ac11b4499171067ae40a220d1ca3cb685ec0acc356d8f3800e07e4cb8"}, - {file = "rpds_py-0.8.10-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:70bb9c8004b97b4ef7ae56a2aa56dfaa74734a0987c78e7e85f00004ab9bf2d0"}, - {file = "rpds_py-0.8.10-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:d64f9f88d5203274a002b54442cafc9c7a1abff2a238f3e767b70aadf919b451"}, - {file = "rpds_py-0.8.10-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:ccbbd276642788c4376fbe8d4e6c50f0fb4972ce09ecb051509062915891cbf0"}, - {file = "rpds_py-0.8.10-cp310-none-win32.whl", hash = "sha256:fafc0049add8043ad07ab5382ee80d80ed7e3699847f26c9a5cf4d3714d96a84"}, - {file = "rpds_py-0.8.10-cp310-none-win_amd64.whl", hash = "sha256:915031002c86a5add7c6fd4beb601b2415e8a1c956590a5f91d825858e92fe6e"}, - {file = "rpds_py-0.8.10-cp311-cp311-macosx_10_7_x86_64.whl", hash = "sha256:84eb541a44f7a18f07a6bfc48b95240739e93defe1fdfb4f2a295f37837945d7"}, - {file = "rpds_py-0.8.10-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f59996d0550894affaad8743e97b9b9c98f638b221fac12909210ec3d9294786"}, - {file = "rpds_py-0.8.10-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f9adb5664b78fcfcd830000416c8cc69853ef43cb084d645b3f1f0296edd9bae"}, - {file = "rpds_py-0.8.10-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f96f3f98fbff7af29e9edf9a6584f3c1382e7788783d07ba3721790625caa43e"}, - {file = "rpds_py-0.8.10-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:376b8de737401050bd12810003d207e824380be58810c031f10ec563ff6aef3d"}, - {file = "rpds_py-0.8.10-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5d1c2bc319428d50b3e0fa6b673ab8cc7fa2755a92898db3a594cbc4eeb6d1f7"}, - {file = "rpds_py-0.8.10-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:73a1e48430f418f0ac3dfd87860e4cc0d33ad6c0f589099a298cb53724db1169"}, - {file = "rpds_py-0.8.10-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:134ec8f14ca7dbc6d9ae34dac632cdd60939fe3734b5d287a69683c037c51acb"}, - {file = "rpds_py-0.8.10-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:4b519bac7c09444dd85280fd60f28c6dde4389c88dddf4279ba9b630aca3bbbe"}, - {file = "rpds_py-0.8.10-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:9cd57981d9fab04fc74438d82460f057a2419974d69a96b06a440822d693b3c0"}, - {file = "rpds_py-0.8.10-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:69d089c026f6a8b9d64a06ff67dc3be196707b699d7f6ca930c25f00cf5e30d8"}, - {file = "rpds_py-0.8.10-cp311-none-win32.whl", hash = "sha256:220bdcad2d2936f674650d304e20ac480a3ce88a40fe56cd084b5780f1d104d9"}, - {file = "rpds_py-0.8.10-cp311-none-win_amd64.whl", hash = "sha256:6c6a0225b8501d881b32ebf3f5807a08ad3685b5eb5f0a6bfffd3a6e039b2055"}, - {file = "rpds_py-0.8.10-cp312-cp312-macosx_10_7_x86_64.whl", hash = "sha256:e3d0cd3dff0e7638a7b5390f3a53057c4e347f4ef122ee84ed93fc2fb7ea4aa2"}, - {file = "rpds_py-0.8.10-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d77dff3a5aa5eedcc3da0ebd10ff8e4969bc9541aa3333a8d41715b429e99f47"}, - {file = "rpds_py-0.8.10-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:41c89a366eae49ad9e65ed443a8f94aee762931a1e3723749d72aeac80f5ef2f"}, - {file = "rpds_py-0.8.10-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3793c21494bad1373da517001d0849eea322e9a049a0e4789e50d8d1329df8e7"}, - {file = "rpds_py-0.8.10-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:805a5f3f05d186c5d50de2e26f765ba7896d0cc1ac5b14ffc36fae36df5d2f10"}, - {file = "rpds_py-0.8.10-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b01b39ad5411563031ea3977bbbc7324d82b088e802339e6296f082f78f6115c"}, - {file = "rpds_py-0.8.10-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3f1e860be21f3e83011116a65e7310486300e08d9a3028e73e8d13bb6c77292"}, - {file = "rpds_py-0.8.10-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a13c8e56c46474cd5958d525ce6a9996727a83d9335684e41f5192c83deb6c58"}, - {file = "rpds_py-0.8.10-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:93d99f957a300d7a4ced41615c45aeb0343bb8f067c42b770b505de67a132346"}, - {file = "rpds_py-0.8.10-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:148b0b38d719c0760e31ce9285a9872972bdd7774969a4154f40c980e5beaca7"}, - {file = "rpds_py-0.8.10-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3cc5e5b5514796f45f03a568981971b12a3570f3de2e76114f7dc18d4b60a3c4"}, - {file = "rpds_py-0.8.10-cp38-cp38-macosx_10_7_x86_64.whl", hash = "sha256:e8e24b210a4deb5a7744971f8f77393005bae7f873568e37dfd9effe808be7f7"}, - {file = "rpds_py-0.8.10-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:b41941583adce4242af003d2a8337b066ba6148ca435f295f31ac6d9e4ea2722"}, - {file = "rpds_py-0.8.10-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3c490204e16bca4f835dba8467869fe7295cdeaa096e4c5a7af97f3454a97991"}, - {file = "rpds_py-0.8.10-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1ee45cd1d84beed6cbebc839fd85c2e70a3a1325c8cfd16b62c96e2ffb565eca"}, - {file = "rpds_py-0.8.10-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4a8ca409f1252e1220bf09c57290b76cae2f14723746215a1e0506472ebd7bdf"}, - {file = "rpds_py-0.8.10-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:96b293c0498c70162effb13100624c5863797d99df75f2f647438bd10cbf73e4"}, - {file = "rpds_py-0.8.10-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b4627520a02fccbd324b33c7a83e5d7906ec746e1083a9ac93c41ac7d15548c7"}, - {file = "rpds_py-0.8.10-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e39d7ab0c18ac99955b36cd19f43926450baba21e3250f053e0704d6ffd76873"}, - {file = "rpds_py-0.8.10-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:ba9f1d1ebe4b63801977cec7401f2d41e888128ae40b5441270d43140efcad52"}, - {file = "rpds_py-0.8.10-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:802f42200d8caf7f25bbb2a6464cbd83e69d600151b7e3b49f49a47fa56b0a38"}, - {file = "rpds_py-0.8.10-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:d19db6ba816e7f59fc806c690918da80a7d186f00247048cd833acdab9b4847b"}, - {file = "rpds_py-0.8.10-cp38-none-win32.whl", hash = "sha256:7947e6e2c2ad68b1c12ee797d15e5f8d0db36331200b0346871492784083b0c6"}, - {file = "rpds_py-0.8.10-cp38-none-win_amd64.whl", hash = "sha256:fa326b3505d5784436d9433b7980171ab2375535d93dd63fbcd20af2b5ca1bb6"}, - {file = "rpds_py-0.8.10-cp39-cp39-macosx_10_7_x86_64.whl", hash = "sha256:7b38a9ac96eeb6613e7f312cd0014de64c3f07000e8bf0004ad6ec153bac46f8"}, - {file = "rpds_py-0.8.10-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c4d42e83ddbf3445e6514f0aff96dca511421ed0392d9977d3990d9f1ba6753c"}, - {file = "rpds_py-0.8.10-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1b21575031478609db6dbd1f0465e739fe0e7f424a8e7e87610a6c7f68b4eb16"}, - {file = "rpds_py-0.8.10-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:574868858a7ff6011192c023a5289158ed20e3f3b94b54f97210a773f2f22921"}, - {file = "rpds_py-0.8.10-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ae40f4a70a1f40939d66ecbaf8e7edc144fded190c4a45898a8cfe19d8fc85ea"}, - {file = "rpds_py-0.8.10-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:37f7ee4dc86db7af3bac6d2a2cedbecb8e57ce4ed081f6464510e537589f8b1e"}, - {file = "rpds_py-0.8.10-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:695f642a3a5dbd4ad2ffbbacf784716ecd87f1b7a460843b9ddf965ccaeafff4"}, - {file = "rpds_py-0.8.10-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f43ab4cb04bde6109eb2555528a64dfd8a265cc6a9920a67dcbde13ef53a46c8"}, - {file = "rpds_py-0.8.10-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:a11ab0d97be374efd04f640c04fe5c2d3dabc6dfb998954ea946ee3aec97056d"}, - {file = "rpds_py-0.8.10-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:92cf5b3ee60eef41f41e1a2cabca466846fb22f37fc580ffbcb934d1bcab225a"}, - {file = "rpds_py-0.8.10-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:ceaac0c603bf5ac2f505a78b2dcab78d3e6b706be6596c8364b64cc613d208d2"}, - {file = "rpds_py-0.8.10-cp39-none-win32.whl", hash = "sha256:dd4f16e57c12c0ae17606c53d1b57d8d1c8792efe3f065a37cb3341340599d49"}, - {file = "rpds_py-0.8.10-cp39-none-win_amd64.whl", hash = "sha256:c03a435d26c3999c2a8642cecad5d1c4d10c961817536af52035f6f4ee2f5dd0"}, - {file = "rpds_py-0.8.10-pp310-pypy310_pp73-macosx_10_7_x86_64.whl", hash = "sha256:0da53292edafecba5e1d8c1218f99babf2ed0bf1c791d83c0ab5c29b57223068"}, - {file = "rpds_py-0.8.10-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:7d20a8ed227683401cc508e7be58cba90cc97f784ea8b039c8cd01111e6043e0"}, - {file = "rpds_py-0.8.10-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:97cab733d303252f7c2f7052bf021a3469d764fc2b65e6dbef5af3cbf89d4892"}, - {file = "rpds_py-0.8.10-pp310-pypy310_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8c398fda6df361a30935ab4c4bccb7f7a3daef2964ca237f607c90e9f3fdf66f"}, - {file = "rpds_py-0.8.10-pp310-pypy310_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2eb4b08c45f8f8d8254cdbfacd3fc5d6b415d64487fb30d7380b0d0569837bf1"}, - {file = "rpds_py-0.8.10-pp310-pypy310_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e7dfb1cbb895810fa2b892b68153c17716c6abaa22c7dc2b2f6dcf3364932a1c"}, - {file = "rpds_py-0.8.10-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:89c92b74e8bf6f53a6f4995fd52f4bd510c12f103ee62c99e22bc9e05d45583c"}, - {file = "rpds_py-0.8.10-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e9c0683cb35a9b5881b41bc01d5568ffc667910d9dbc632a1fba4e7d59e98773"}, - {file = "rpds_py-0.8.10-pp310-pypy310_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:0eeb2731708207d0fe2619afe6c4dc8cb9798f7de052da891de5f19c0006c315"}, - {file = "rpds_py-0.8.10-pp310-pypy310_pp73-musllinux_1_2_i686.whl", hash = "sha256:7495010b658ec5b52835f21d8c8b1a7e52e194c50f095d4223c0b96c3da704b1"}, - {file = "rpds_py-0.8.10-pp310-pypy310_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:c72ebc22e70e04126158c46ba56b85372bc4d54d00d296be060b0db1671638a4"}, - {file = "rpds_py-0.8.10-pp38-pypy38_pp73-macosx_10_7_x86_64.whl", hash = "sha256:2cd3045e7f6375dda64ed7db1c5136826facb0159ea982f77d9cf6125025bd34"}, - {file = "rpds_py-0.8.10-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:2418cf17d653d24ffb8b75e81f9f60b7ba1b009a23298a433a4720b2a0a17017"}, - {file = "rpds_py-0.8.10-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1a2edf8173ac0c7a19da21bc68818be1321998528b5e3f748d6ee90c0ba2a1fd"}, - {file = "rpds_py-0.8.10-pp38-pypy38_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:7f29b8c55fd3a2bc48e485e37c4e2df3317f43b5cc6c4b6631c33726f52ffbb3"}, - {file = "rpds_py-0.8.10-pp38-pypy38_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9a7d20c1cf8d7b3960c5072c265ec47b3f72a0c608a9a6ee0103189b4f28d531"}, - {file = "rpds_py-0.8.10-pp38-pypy38_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:521fc8861a86ae54359edf53a15a05fabc10593cea7b3357574132f8427a5e5a"}, - {file = "rpds_py-0.8.10-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d5c191713e98e7c28800233f039a32a42c1a4f9a001a8a0f2448b07391881036"}, - {file = "rpds_py-0.8.10-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:083df0fafe199371206111583c686c985dddaf95ab3ee8e7b24f1fda54515d09"}, - {file = "rpds_py-0.8.10-pp38-pypy38_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:ed41f3f49507936a6fe7003985ea2574daccfef999775525d79eb67344e23767"}, - {file = "rpds_py-0.8.10-pp38-pypy38_pp73-musllinux_1_2_i686.whl", hash = "sha256:2614c2732bf45de5c7f9e9e54e18bc78693fa2f635ae58d2895b7965e470378c"}, - {file = "rpds_py-0.8.10-pp38-pypy38_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:c60528671d9d467009a6ec284582179f6b88651e83367d0ab54cb739021cd7de"}, - {file = "rpds_py-0.8.10-pp39-pypy39_pp73-macosx_10_7_x86_64.whl", hash = "sha256:ee744fca8d1ea822480a2a4e7c5f2e1950745477143668f0b523769426060f29"}, - {file = "rpds_py-0.8.10-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:a38b9f526d0d6cbdaa37808c400e3d9f9473ac4ff64d33d9163fd05d243dbd9b"}, - {file = "rpds_py-0.8.10-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:60e0e86e870350e03b3e25f9b1dd2c6cc72d2b5f24e070249418320a6f9097b7"}, - {file = "rpds_py-0.8.10-pp39-pypy39_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f53f55a8852f0e49b0fc76f2412045d6ad9d5772251dea8f55ea45021616e7d5"}, - {file = "rpds_py-0.8.10-pp39-pypy39_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c493365d3fad241d52f096e4995475a60a80f4eba4d3ff89b713bc65c2ca9615"}, - {file = "rpds_py-0.8.10-pp39-pypy39_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:300eb606e6b94a7a26f11c8cc8ee59e295c6649bd927f91e1dbd37a4c89430b6"}, - {file = "rpds_py-0.8.10-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5a665f6f1a87614d1c3039baf44109094926dedf785e346d8b0a728e9cabd27a"}, - {file = "rpds_py-0.8.10-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:927d784648211447201d4c6f1babddb7971abad922b32257ab74de2f2750fad0"}, - {file = "rpds_py-0.8.10-pp39-pypy39_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:c200b30dd573afa83847bed7e3041aa36a8145221bf0cfdfaa62d974d720805c"}, - {file = "rpds_py-0.8.10-pp39-pypy39_pp73-musllinux_1_2_i686.whl", hash = "sha256:08166467258fd0240a1256fce272f689f2360227ee41c72aeea103e9e4f63d2b"}, - {file = "rpds_py-0.8.10-pp39-pypy39_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:996cc95830de9bc22b183661d95559ec6b3cd900ad7bc9154c4cbf5be0c9b734"}, - {file = "rpds_py-0.8.10.tar.gz", hash = "sha256:13e643ce8ad502a0263397362fb887594b49cf84bf518d6038c16f235f2bcea4"}, + {file = "rpds_py-0.17.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:4128980a14ed805e1b91a7ed551250282a8ddf8201a4e9f8f5b7e6225f54170d"}, + {file = "rpds_py-0.17.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ff1dcb8e8bc2261a088821b2595ef031c91d499a0c1b031c152d43fe0a6ecec8"}, + {file = "rpds_py-0.17.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d65e6b4f1443048eb7e833c2accb4fa7ee67cc7d54f31b4f0555b474758bee55"}, + {file = "rpds_py-0.17.1-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a71169d505af63bb4d20d23a8fbd4c6ce272e7bce6cc31f617152aa784436f29"}, + {file = "rpds_py-0.17.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:436474f17733c7dca0fbf096d36ae65277e8645039df12a0fa52445ca494729d"}, + {file = "rpds_py-0.17.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:10162fe3f5f47c37ebf6d8ff5a2368508fe22007e3077bf25b9c7d803454d921"}, + {file = "rpds_py-0.17.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:720215373a280f78a1814becb1312d4e4d1077b1202a56d2b0815e95ccb99ce9"}, + {file = "rpds_py-0.17.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:70fcc6c2906cfa5c6a552ba7ae2ce64b6c32f437d8f3f8eea49925b278a61453"}, + {file = "rpds_py-0.17.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:91e5a8200e65aaac342a791272c564dffcf1281abd635d304d6c4e6b495f29dc"}, + {file = "rpds_py-0.17.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:99f567dae93e10be2daaa896e07513dd4bf9c2ecf0576e0533ac36ba3b1d5394"}, + {file = "rpds_py-0.17.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:24e4900a6643f87058a27320f81336d527ccfe503984528edde4bb660c8c8d59"}, + {file = "rpds_py-0.17.1-cp310-none-win32.whl", hash = "sha256:0bfb09bf41fe7c51413f563373e5f537eaa653d7adc4830399d4e9bdc199959d"}, + {file = "rpds_py-0.17.1-cp310-none-win_amd64.whl", hash = "sha256:20de7b7179e2031a04042e85dc463a93a82bc177eeba5ddd13ff746325558aa6"}, + {file = "rpds_py-0.17.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:65dcf105c1943cba45d19207ef51b8bc46d232a381e94dd38719d52d3980015b"}, + {file = "rpds_py-0.17.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:01f58a7306b64e0a4fe042047dd2b7d411ee82e54240284bab63e325762c1147"}, + {file = "rpds_py-0.17.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:071bc28c589b86bc6351a339114fb7a029f5cddbaca34103aa573eba7b482382"}, + {file = "rpds_py-0.17.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ae35e8e6801c5ab071b992cb2da958eee76340e6926ec693b5ff7d6381441745"}, + {file = "rpds_py-0.17.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:149c5cd24f729e3567b56e1795f74577aa3126c14c11e457bec1b1c90d212e38"}, + {file = "rpds_py-0.17.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e796051f2070f47230c745d0a77a91088fbee2cc0502e9b796b9c6471983718c"}, + {file = "rpds_py-0.17.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:60e820ee1004327609b28db8307acc27f5f2e9a0b185b2064c5f23e815f248f8"}, + {file = "rpds_py-0.17.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1957a2ab607f9added64478a6982742eb29f109d89d065fa44e01691a20fc20a"}, + {file = "rpds_py-0.17.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8587fd64c2a91c33cdc39d0cebdaf30e79491cc029a37fcd458ba863f8815383"}, + {file = "rpds_py-0.17.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:4dc889a9d8a34758d0fcc9ac86adb97bab3fb7f0c4d29794357eb147536483fd"}, + {file = "rpds_py-0.17.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:2953937f83820376b5979318840f3ee47477d94c17b940fe31d9458d79ae7eea"}, + {file = "rpds_py-0.17.1-cp311-none-win32.whl", hash = "sha256:1bfcad3109c1e5ba3cbe2f421614e70439f72897515a96c462ea657261b96518"}, + {file = "rpds_py-0.17.1-cp311-none-win_amd64.whl", hash = "sha256:99da0a4686ada4ed0f778120a0ea8d066de1a0a92ab0d13ae68492a437db78bf"}, + {file = "rpds_py-0.17.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:1dc29db3900cb1bb40353772417800f29c3d078dbc8024fd64655a04ee3c4bdf"}, + {file = "rpds_py-0.17.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:82ada4a8ed9e82e443fcef87e22a3eed3654dd3adf6e3b3a0deb70f03e86142a"}, + {file = "rpds_py-0.17.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1d36b2b59e8cc6e576f8f7b671e32f2ff43153f0ad6d0201250a7c07f25d570e"}, + {file = "rpds_py-0.17.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3677fcca7fb728c86a78660c7fb1b07b69b281964673f486ae72860e13f512ad"}, + {file = "rpds_py-0.17.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:516fb8c77805159e97a689e2f1c80655c7658f5af601c34ffdb916605598cda2"}, + {file = "rpds_py-0.17.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:df3b6f45ba4515632c5064e35ca7f31d51d13d1479673185ba8f9fefbbed58b9"}, + {file = "rpds_py-0.17.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a967dd6afda7715d911c25a6ba1517975acd8d1092b2f326718725461a3d33f9"}, + {file = "rpds_py-0.17.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:dbbb95e6fc91ea3102505d111b327004d1c4ce98d56a4a02e82cd451f9f57140"}, + {file = "rpds_py-0.17.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:02866e060219514940342a1f84303a1ef7a1dad0ac311792fbbe19b521b489d2"}, + {file = "rpds_py-0.17.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:2528ff96d09f12e638695f3a2e0c609c7b84c6df7c5ae9bfeb9252b6fa686253"}, + {file = "rpds_py-0.17.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:bd345a13ce06e94c753dab52f8e71e5252aec1e4f8022d24d56decd31e1b9b23"}, + {file = "rpds_py-0.17.1-cp312-none-win32.whl", hash = "sha256:2a792b2e1d3038daa83fa474d559acfd6dc1e3650ee93b2662ddc17dbff20ad1"}, + {file = "rpds_py-0.17.1-cp312-none-win_amd64.whl", hash = "sha256:292f7344a3301802e7c25c53792fae7d1593cb0e50964e7bcdcc5cf533d634e3"}, + {file = "rpds_py-0.17.1-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:8ffe53e1d8ef2520ebcf0c9fec15bb721da59e8ef283b6ff3079613b1e30513d"}, + {file = "rpds_py-0.17.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4341bd7579611cf50e7b20bb8c2e23512a3dc79de987a1f411cb458ab670eb90"}, + {file = "rpds_py-0.17.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2f4eb548daf4836e3b2c662033bfbfc551db58d30fd8fe660314f86bf8510b93"}, + {file = "rpds_py-0.17.1-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b686f25377f9c006acbac63f61614416a6317133ab7fafe5de5f7dc8a06d42eb"}, + {file = "rpds_py-0.17.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4e21b76075c01d65d0f0f34302b5a7457d95721d5e0667aea65e5bb3ab415c25"}, + {file = "rpds_py-0.17.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b86b21b348f7e5485fae740d845c65a880f5d1eda1e063bc59bef92d1f7d0c55"}, + {file = "rpds_py-0.17.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f175e95a197f6a4059b50757a3dca33b32b61691bdbd22c29e8a8d21d3914cae"}, + {file = "rpds_py-0.17.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1701fc54460ae2e5efc1dd6350eafd7a760f516df8dbe51d4a1c79d69472fbd4"}, + {file = "rpds_py-0.17.1-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:9051e3d2af8f55b42061603e29e744724cb5f65b128a491446cc029b3e2ea896"}, + {file = "rpds_py-0.17.1-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:7450dbd659fed6dd41d1a7d47ed767e893ba402af8ae664c157c255ec6067fde"}, + {file = "rpds_py-0.17.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:5a024fa96d541fd7edaa0e9d904601c6445e95a729a2900c5aec6555fe921ed6"}, + {file = "rpds_py-0.17.1-cp38-none-win32.whl", hash = "sha256:da1ead63368c04a9bded7904757dfcae01eba0e0f9bc41d3d7f57ebf1c04015a"}, + {file = "rpds_py-0.17.1-cp38-none-win_amd64.whl", hash = "sha256:841320e1841bb53fada91c9725e766bb25009cfd4144e92298db296fb6c894fb"}, + {file = "rpds_py-0.17.1-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:f6c43b6f97209e370124baf2bf40bb1e8edc25311a158867eb1c3a5d449ebc7a"}, + {file = "rpds_py-0.17.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5e7d63ec01fe7c76c2dbb7e972fece45acbb8836e72682bde138e7e039906e2c"}, + {file = "rpds_py-0.17.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:81038ff87a4e04c22e1d81f947c6ac46f122e0c80460b9006e6517c4d842a6ec"}, + {file = "rpds_py-0.17.1-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:810685321f4a304b2b55577c915bece4c4a06dfe38f6e62d9cc1d6ca8ee86b99"}, + {file = "rpds_py-0.17.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:25f071737dae674ca8937a73d0f43f5a52e92c2d178330b4c0bb6ab05586ffa6"}, + {file = "rpds_py-0.17.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:aa5bfb13f1e89151ade0eb812f7b0d7a4d643406caaad65ce1cbabe0a66d695f"}, + {file = "rpds_py-0.17.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dfe07308b311a8293a0d5ef4e61411c5c20f682db6b5e73de6c7c8824272c256"}, + {file = "rpds_py-0.17.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a000133a90eea274a6f28adc3084643263b1e7c1a5a66eb0a0a7a36aa757ed74"}, + {file = "rpds_py-0.17.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:5d0e8a6434a3fbf77d11448c9c25b2f25244226cfbec1a5159947cac5b8c5fa4"}, + {file = "rpds_py-0.17.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:efa767c220d94aa4ac3a6dd3aeb986e9f229eaf5bce92d8b1b3018d06bed3772"}, + {file = "rpds_py-0.17.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:dbc56680ecf585a384fbd93cd42bc82668b77cb525343170a2d86dafaed2a84b"}, + {file = "rpds_py-0.17.1-cp39-none-win32.whl", hash = "sha256:270987bc22e7e5a962b1094953ae901395e8c1e1e83ad016c5cfcfff75a15a3f"}, + {file = "rpds_py-0.17.1-cp39-none-win_amd64.whl", hash = "sha256:2a7b2f2f56a16a6d62e55354dd329d929560442bd92e87397b7a9586a32e3e76"}, + {file = "rpds_py-0.17.1-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:a3264e3e858de4fc601741498215835ff324ff2482fd4e4af61b46512dd7fc83"}, + {file = "rpds_py-0.17.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:f2f3b28b40fddcb6c1f1f6c88c6f3769cd933fa493ceb79da45968a21dccc920"}, + {file = "rpds_py-0.17.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9584f8f52010295a4a417221861df9bea4c72d9632562b6e59b3c7b87a1522b7"}, + {file = "rpds_py-0.17.1-pp310-pypy310_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c64602e8be701c6cfe42064b71c84ce62ce66ddc6422c15463fd8127db3d8066"}, + {file = "rpds_py-0.17.1-pp310-pypy310_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:060f412230d5f19fc8c8b75f315931b408d8ebf56aec33ef4168d1b9e54200b1"}, + {file = "rpds_py-0.17.1-pp310-pypy310_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b9412abdf0ba70faa6e2ee6c0cc62a8defb772e78860cef419865917d86c7342"}, + {file = "rpds_py-0.17.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9737bdaa0ad33d34c0efc718741abaafce62fadae72c8b251df9b0c823c63b22"}, + {file = "rpds_py-0.17.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9f0e4dc0f17dcea4ab9d13ac5c666b6b5337042b4d8f27e01b70fae41dd65c57"}, + {file = "rpds_py-0.17.1-pp310-pypy310_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:1db228102ab9d1ff4c64148c96320d0be7044fa28bd865a9ce628ce98da5973d"}, + {file = "rpds_py-0.17.1-pp310-pypy310_pp73-musllinux_1_2_i686.whl", hash = "sha256:d8bbd8e56f3ba25a7d0cf980fc42b34028848a53a0e36c9918550e0280b9d0b6"}, + {file = "rpds_py-0.17.1-pp310-pypy310_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:be22ae34d68544df293152b7e50895ba70d2a833ad9566932d750d3625918b82"}, + {file = "rpds_py-0.17.1-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:bf046179d011e6114daf12a534d874958b039342b347348a78b7cdf0dd9d6041"}, + {file = "rpds_py-0.17.1-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:1a746a6d49665058a5896000e8d9d2f1a6acba8a03b389c1e4c06e11e0b7f40d"}, + {file = "rpds_py-0.17.1-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f0b8bf5b8db49d8fd40f54772a1dcf262e8be0ad2ab0206b5a2ec109c176c0a4"}, + {file = "rpds_py-0.17.1-pp38-pypy38_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f7f4cb1f173385e8a39c29510dd11a78bf44e360fb75610594973f5ea141028b"}, + {file = "rpds_py-0.17.1-pp38-pypy38_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7fbd70cb8b54fe745301921b0816c08b6d917593429dfc437fd024b5ba713c58"}, + {file = "rpds_py-0.17.1-pp38-pypy38_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9bdf1303df671179eaf2cb41e8515a07fc78d9d00f111eadbe3e14262f59c3d0"}, + {file = "rpds_py-0.17.1-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fad059a4bd14c45776600d223ec194e77db6c20255578bb5bcdd7c18fd169361"}, + {file = "rpds_py-0.17.1-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:3664d126d3388a887db44c2e293f87d500c4184ec43d5d14d2d2babdb4c64cad"}, + {file = "rpds_py-0.17.1-pp38-pypy38_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:698ea95a60c8b16b58be9d854c9f993c639f5c214cf9ba782eca53a8789d6b19"}, + {file = "rpds_py-0.17.1-pp38-pypy38_pp73-musllinux_1_2_i686.whl", hash = "sha256:c3d2010656999b63e628a3c694f23020322b4178c450dc478558a2b6ef3cb9bb"}, + {file = "rpds_py-0.17.1-pp38-pypy38_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:938eab7323a736533f015e6069a7d53ef2dcc841e4e533b782c2bfb9fb12d84b"}, + {file = "rpds_py-0.17.1-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:1e626b365293a2142a62b9a614e1f8e331b28f3ca57b9f05ebbf4cf2a0f0bdc5"}, + {file = "rpds_py-0.17.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:380e0df2e9d5d5d339803cfc6d183a5442ad7ab3c63c2a0982e8c824566c5ccc"}, + {file = "rpds_py-0.17.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b760a56e080a826c2e5af09002c1a037382ed21d03134eb6294812dda268c811"}, + {file = "rpds_py-0.17.1-pp39-pypy39_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5576ee2f3a309d2bb403ec292d5958ce03953b0e57a11d224c1f134feaf8c40f"}, + {file = "rpds_py-0.17.1-pp39-pypy39_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1f3c3461ebb4c4f1bbc70b15d20b565759f97a5aaf13af811fcefc892e9197ba"}, + {file = "rpds_py-0.17.1-pp39-pypy39_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:637b802f3f069a64436d432117a7e58fab414b4e27a7e81049817ae94de45d8d"}, + {file = "rpds_py-0.17.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ffee088ea9b593cc6160518ba9bd319b5475e5f3e578e4552d63818773c6f56a"}, + {file = "rpds_py-0.17.1-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:3ac732390d529d8469b831949c78085b034bff67f584559340008d0f6041a049"}, + {file = "rpds_py-0.17.1-pp39-pypy39_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:93432e747fb07fa567ad9cc7aaadd6e29710e515aabf939dfbed8046041346c6"}, + {file = "rpds_py-0.17.1-pp39-pypy39_pp73-musllinux_1_2_i686.whl", hash = "sha256:7b7d9ca34542099b4e185b3c2a2b2eda2e318a7dbde0b0d83357a6d4421b5296"}, + {file = "rpds_py-0.17.1-pp39-pypy39_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:0387ce69ba06e43df54e43968090f3626e231e4bc9150e4c3246947567695f68"}, + {file = "rpds_py-0.17.1.tar.gz", hash = "sha256:0210b2668f24c078307260bf88bdac9d6f1093635df5123789bfee4d8d7fc8e7"}, ] [[package]] name = "ruamel-yaml" -version = "0.17.32" +version = "0.18.5" description = "ruamel.yaml is a YAML parser/emitter that supports roundtrip preservation of comments, seq/map flow style, and map key order" optional = false -python-versions = ">=3" +python-versions = ">=3.7" files = [ - {file = "ruamel.yaml-0.17.32-py3-none-any.whl", hash = "sha256:23cd2ed620231677564646b0c6a89d138b6822a0d78656df7abda5879ec4f447"}, - {file = "ruamel.yaml-0.17.32.tar.gz", hash = "sha256:ec939063761914e14542972a5cba6d33c23b0859ab6342f61cf070cfc600efc2"}, + {file = "ruamel.yaml-0.18.5-py3-none-any.whl", hash = "sha256:a013ac02f99a69cdd6277d9664689eb1acba07069f912823177c5eced21a6ada"}, + {file = "ruamel.yaml-0.18.5.tar.gz", hash = "sha256:61917e3a35a569c1133a8f772e1226961bf5a1198bea7e23f06a0841dea1ab0e"}, ] [package.dependencies] -"ruamel.yaml.clib" = {version = ">=0.2.7", markers = "platform_python_implementation == \"CPython\" and python_version < \"3.12\""} +"ruamel.yaml.clib" = {version = ">=0.2.7", markers = "platform_python_implementation == \"CPython\" and python_version < \"3.13\""} [package.extras] -docs = ["ryd"] +docs = ["mercurial (>5.7)", "ryd"] jinja2 = ["ruamel.yaml.jinja2 (>=0.2)"] [[package]] name = "ruamel-yaml-clib" -version = "0.2.7" +version = "0.2.8" description = "C version of reader, parser and emitter for ruamel.yaml derived from libyaml" optional = false -python-versions = ">=3.5" +python-versions = ">=3.6" files = [ - {file = "ruamel.yaml.clib-0.2.7-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:d5859983f26d8cd7bb5c287ef452e8aacc86501487634573d260968f753e1d71"}, - {file = "ruamel.yaml.clib-0.2.7-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:debc87a9516b237d0466a711b18b6ebeb17ba9f391eb7f91c649c5c4ec5006c7"}, - {file = "ruamel.yaml.clib-0.2.7-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:df5828871e6648db72d1c19b4bd24819b80a755c4541d3409f0f7acd0f335c80"}, - {file = "ruamel.yaml.clib-0.2.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:efa08d63ef03d079dcae1dfe334f6c8847ba8b645d08df286358b1f5293d24ab"}, - {file = "ruamel.yaml.clib-0.2.7-cp310-cp310-win32.whl", hash = "sha256:763d65baa3b952479c4e972669f679fe490eee058d5aa85da483ebae2009d231"}, - {file = "ruamel.yaml.clib-0.2.7-cp310-cp310-win_amd64.whl", hash = "sha256:d000f258cf42fec2b1bbf2863c61d7b8918d31ffee905da62dede869254d3b8a"}, - {file = "ruamel.yaml.clib-0.2.7-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:045e0626baf1c52e5527bd5db361bc83180faaba2ff586e763d3d5982a876a9e"}, - {file = "ruamel.yaml.clib-0.2.7-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:1a6391a7cabb7641c32517539ca42cf84b87b667bad38b78d4d42dd23e957c81"}, - {file = "ruamel.yaml.clib-0.2.7-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:9c7617df90c1365638916b98cdd9be833d31d337dbcd722485597b43c4a215bf"}, - {file = "ruamel.yaml.clib-0.2.7-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:41d0f1fa4c6830176eef5b276af04c89320ea616655d01327d5ce65e50575c94"}, - {file = "ruamel.yaml.clib-0.2.7-cp311-cp311-win32.whl", hash = "sha256:f6d3d39611ac2e4f62c3128a9eed45f19a6608670c5a2f4f07f24e8de3441d38"}, - {file = "ruamel.yaml.clib-0.2.7-cp311-cp311-win_amd64.whl", hash = "sha256:da538167284de58a52109a9b89b8f6a53ff8437dd6dc26d33b57bf6699153122"}, - {file = "ruamel.yaml.clib-0.2.7-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:4b3a93bb9bc662fc1f99c5c3ea8e623d8b23ad22f861eb6fce9377ac07ad6072"}, - {file = "ruamel.yaml.clib-0.2.7-cp36-cp36m-macosx_12_0_arm64.whl", hash = "sha256:a234a20ae07e8469da311e182e70ef6b199d0fbeb6c6cc2901204dd87fb867e8"}, - {file = "ruamel.yaml.clib-0.2.7-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:15910ef4f3e537eea7fe45f8a5d19997479940d9196f357152a09031c5be59f3"}, - {file = "ruamel.yaml.clib-0.2.7-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:370445fd795706fd291ab00c9df38a0caed0f17a6fb46b0f607668ecb16ce763"}, - {file = "ruamel.yaml.clib-0.2.7-cp36-cp36m-win32.whl", hash = "sha256:ecdf1a604009bd35c674b9225a8fa609e0282d9b896c03dd441a91e5f53b534e"}, - {file = "ruamel.yaml.clib-0.2.7-cp36-cp36m-win_amd64.whl", hash = "sha256:f34019dced51047d6f70cb9383b2ae2853b7fc4dce65129a5acd49f4f9256646"}, - {file = "ruamel.yaml.clib-0.2.7-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:2aa261c29a5545adfef9296b7e33941f46aa5bbd21164228e833412af4c9c75f"}, - {file = "ruamel.yaml.clib-0.2.7-cp37-cp37m-macosx_12_0_arm64.whl", hash = "sha256:f01da5790e95815eb5a8a138508c01c758e5f5bc0ce4286c4f7028b8dd7ac3d0"}, - {file = "ruamel.yaml.clib-0.2.7-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:40d030e2329ce5286d6b231b8726959ebbe0404c92f0a578c0e2482182e38282"}, - {file = "ruamel.yaml.clib-0.2.7-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:c3ca1fbba4ae962521e5eb66d72998b51f0f4d0f608d3c0347a48e1af262efa7"}, - {file = "ruamel.yaml.clib-0.2.7-cp37-cp37m-win32.whl", hash = "sha256:7bdb4c06b063f6fd55e472e201317a3bb6cdeeee5d5a38512ea5c01e1acbdd93"}, - {file = "ruamel.yaml.clib-0.2.7-cp37-cp37m-win_amd64.whl", hash = "sha256:be2a7ad8fd8f7442b24323d24ba0b56c51219513cfa45b9ada3b87b76c374d4b"}, - {file = "ruamel.yaml.clib-0.2.7-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:91a789b4aa0097b78c93e3dc4b40040ba55bef518f84a40d4442f713b4094acb"}, - {file = "ruamel.yaml.clib-0.2.7-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:99e77daab5d13a48a4054803d052ff40780278240a902b880dd37a51ba01a307"}, - {file = "ruamel.yaml.clib-0.2.7-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:3243f48ecd450eddadc2d11b5feb08aca941b5cd98c9b1db14b2fd128be8c697"}, - {file = "ruamel.yaml.clib-0.2.7-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:8831a2cedcd0f0927f788c5bdf6567d9dc9cc235646a434986a852af1cb54b4b"}, - {file = "ruamel.yaml.clib-0.2.7-cp38-cp38-win32.whl", hash = "sha256:3110a99e0f94a4a3470ff67fc20d3f96c25b13d24c6980ff841e82bafe827cac"}, - {file = "ruamel.yaml.clib-0.2.7-cp38-cp38-win_amd64.whl", hash = "sha256:92460ce908546ab69770b2e576e4f99fbb4ce6ab4b245345a3869a0a0410488f"}, - {file = "ruamel.yaml.clib-0.2.7-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5bc0667c1eb8f83a3752b71b9c4ba55ef7c7058ae57022dd9b29065186a113d9"}, - {file = "ruamel.yaml.clib-0.2.7-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:4a4d8d417868d68b979076a9be6a38c676eca060785abaa6709c7b31593c35d1"}, - {file = "ruamel.yaml.clib-0.2.7-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:bf9a6bc4a0221538b1a7de3ed7bca4c93c02346853f44e1cd764be0023cd3640"}, - {file = "ruamel.yaml.clib-0.2.7-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:a7b301ff08055d73223058b5c46c55638917f04d21577c95e00e0c4d79201a6b"}, - {file = "ruamel.yaml.clib-0.2.7-cp39-cp39-win32.whl", hash = "sha256:d5e51e2901ec2366b79f16c2299a03e74ba4531ddcfacc1416639c557aef0ad8"}, - {file = "ruamel.yaml.clib-0.2.7-cp39-cp39-win_amd64.whl", hash = "sha256:184faeaec61dbaa3cace407cffc5819f7b977e75360e8d5ca19461cd851a5fc5"}, - {file = "ruamel.yaml.clib-0.2.7.tar.gz", hash = "sha256:1f08fd5a2bea9c4180db71678e850b995d2a5f4537be0e94557668cf0f5f9497"}, + {file = "ruamel.yaml.clib-0.2.8-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:b42169467c42b692c19cf539c38d4602069d8c1505e97b86387fcf7afb766e1d"}, + {file = "ruamel.yaml.clib-0.2.8-cp310-cp310-macosx_13_0_arm64.whl", hash = "sha256:07238db9cbdf8fc1e9de2489a4f68474e70dffcb32232db7c08fa61ca0c7c462"}, + {file = "ruamel.yaml.clib-0.2.8-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:fff3573c2db359f091e1589c3d7c5fc2f86f5bdb6f24252c2d8e539d4e45f412"}, + {file = "ruamel.yaml.clib-0.2.8-cp310-cp310-manylinux_2_24_aarch64.whl", hash = "sha256:aa2267c6a303eb483de8d02db2871afb5c5fc15618d894300b88958f729ad74f"}, + {file = "ruamel.yaml.clib-0.2.8-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:840f0c7f194986a63d2c2465ca63af8ccbbc90ab1c6001b1978f05119b5e7334"}, + {file = "ruamel.yaml.clib-0.2.8-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:024cfe1fc7c7f4e1aff4a81e718109e13409767e4f871443cbff3dba3578203d"}, + {file = "ruamel.yaml.clib-0.2.8-cp310-cp310-win32.whl", hash = "sha256:c69212f63169ec1cfc9bb44723bf2917cbbd8f6191a00ef3410f5a7fe300722d"}, + {file = "ruamel.yaml.clib-0.2.8-cp310-cp310-win_amd64.whl", hash = "sha256:cabddb8d8ead485e255fe80429f833172b4cadf99274db39abc080e068cbcc31"}, + {file = "ruamel.yaml.clib-0.2.8-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:bef08cd86169d9eafb3ccb0a39edb11d8e25f3dae2b28f5c52fd997521133069"}, + {file = "ruamel.yaml.clib-0.2.8-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:b16420e621d26fdfa949a8b4b47ade8810c56002f5389970db4ddda51dbff248"}, + {file = "ruamel.yaml.clib-0.2.8-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:25c515e350e5b739842fc3228d662413ef28f295791af5e5110b543cf0b57d9b"}, + {file = "ruamel.yaml.clib-0.2.8-cp311-cp311-manylinux_2_24_aarch64.whl", hash = "sha256:1707814f0d9791df063f8c19bb51b0d1278b8e9a2353abbb676c2f685dee6afe"}, + {file = "ruamel.yaml.clib-0.2.8-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:46d378daaac94f454b3a0e3d8d78cafd78a026b1d71443f4966c696b48a6d899"}, + {file = "ruamel.yaml.clib-0.2.8-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:09b055c05697b38ecacb7ac50bdab2240bfca1a0c4872b0fd309bb07dc9aa3a9"}, + {file = "ruamel.yaml.clib-0.2.8-cp311-cp311-win32.whl", hash = "sha256:53a300ed9cea38cf5a2a9b069058137c2ca1ce658a874b79baceb8f892f915a7"}, + {file = "ruamel.yaml.clib-0.2.8-cp311-cp311-win_amd64.whl", hash = "sha256:c2a72e9109ea74e511e29032f3b670835f8a59bbdc9ce692c5b4ed91ccf1eedb"}, + {file = "ruamel.yaml.clib-0.2.8-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:ebc06178e8821efc9692ea7544aa5644217358490145629914d8020042c24aa1"}, + {file = "ruamel.yaml.clib-0.2.8-cp312-cp312-macosx_13_0_arm64.whl", hash = "sha256:edaef1c1200c4b4cb914583150dcaa3bc30e592e907c01117c08b13a07255ec2"}, + {file = "ruamel.yaml.clib-0.2.8-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d176b57452ab5b7028ac47e7b3cf644bcfdc8cacfecf7e71759f7f51a59e5c92"}, + {file = "ruamel.yaml.clib-0.2.8-cp312-cp312-manylinux_2_24_aarch64.whl", hash = "sha256:1dc67314e7e1086c9fdf2680b7b6c2be1c0d8e3a8279f2e993ca2a7545fecf62"}, + {file = "ruamel.yaml.clib-0.2.8-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:3213ece08ea033eb159ac52ae052a4899b56ecc124bb80020d9bbceeb50258e9"}, + {file = "ruamel.yaml.clib-0.2.8-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:aab7fd643f71d7946f2ee58cc88c9b7bfc97debd71dcc93e03e2d174628e7e2d"}, + {file = "ruamel.yaml.clib-0.2.8-cp312-cp312-win32.whl", hash = "sha256:5c365d91c88390c8d0a8545df0b5857172824b1c604e867161e6b3d59a827eaa"}, + {file = "ruamel.yaml.clib-0.2.8-cp312-cp312-win_amd64.whl", hash = "sha256:1758ce7d8e1a29d23de54a16ae867abd370f01b5a69e1a3ba75223eaa3ca1a1b"}, + {file = "ruamel.yaml.clib-0.2.8-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:a5aa27bad2bb83670b71683aae140a1f52b0857a2deff56ad3f6c13a017a26ed"}, + {file = "ruamel.yaml.clib-0.2.8-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:c58ecd827313af6864893e7af0a3bb85fd529f862b6adbefe14643947cfe2942"}, + {file = "ruamel.yaml.clib-0.2.8-cp37-cp37m-macosx_12_0_arm64.whl", hash = "sha256:f481f16baec5290e45aebdc2a5168ebc6d35189ae6fea7a58787613a25f6e875"}, + {file = "ruamel.yaml.clib-0.2.8-cp37-cp37m-manylinux_2_24_aarch64.whl", hash = "sha256:77159f5d5b5c14f7c34073862a6b7d34944075d9f93e681638f6d753606c6ce6"}, + {file = "ruamel.yaml.clib-0.2.8-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:7f67a1ee819dc4562d444bbafb135832b0b909f81cc90f7aa00260968c9ca1b3"}, + {file = "ruamel.yaml.clib-0.2.8-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:4ecbf9c3e19f9562c7fdd462e8d18dd902a47ca046a2e64dba80699f0b6c09b7"}, + {file = "ruamel.yaml.clib-0.2.8-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:87ea5ff66d8064301a154b3933ae406b0863402a799b16e4a1d24d9fbbcbe0d3"}, + {file = "ruamel.yaml.clib-0.2.8-cp37-cp37m-win32.whl", hash = "sha256:75e1ed13e1f9de23c5607fe6bd1aeaae21e523b32d83bb33918245361e9cc51b"}, + {file = "ruamel.yaml.clib-0.2.8-cp37-cp37m-win_amd64.whl", hash = "sha256:3f215c5daf6a9d7bbed4a0a4f760f3113b10e82ff4c5c44bec20a68c8014f675"}, + {file = "ruamel.yaml.clib-0.2.8-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1b617618914cb00bf5c34d4357c37aa15183fa229b24767259657746c9077615"}, + {file = "ruamel.yaml.clib-0.2.8-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:a6a9ffd280b71ad062eae53ac1659ad86a17f59a0fdc7699fd9be40525153337"}, + {file = "ruamel.yaml.clib-0.2.8-cp38-cp38-manylinux_2_24_aarch64.whl", hash = "sha256:305889baa4043a09e5b76f8e2a51d4ffba44259f6b4c72dec8ca56207d9c6fe1"}, + {file = "ruamel.yaml.clib-0.2.8-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:700e4ebb569e59e16a976857c8798aee258dceac7c7d6b50cab63e080058df91"}, + {file = "ruamel.yaml.clib-0.2.8-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:e2b4c44b60eadec492926a7270abb100ef9f72798e18743939bdbf037aab8c28"}, + {file = "ruamel.yaml.clib-0.2.8-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:e79e5db08739731b0ce4850bed599235d601701d5694c36570a99a0c5ca41a9d"}, + {file = "ruamel.yaml.clib-0.2.8-cp38-cp38-win32.whl", hash = "sha256:955eae71ac26c1ab35924203fda6220f84dce57d6d7884f189743e2abe3a9fbe"}, + {file = "ruamel.yaml.clib-0.2.8-cp38-cp38-win_amd64.whl", hash = "sha256:56f4252222c067b4ce51ae12cbac231bce32aee1d33fbfc9d17e5b8d6966c312"}, + {file = "ruamel.yaml.clib-0.2.8-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:03d1162b6d1df1caa3a4bd27aa51ce17c9afc2046c31b0ad60a0a96ec22f8001"}, + {file = "ruamel.yaml.clib-0.2.8-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:bba64af9fa9cebe325a62fa398760f5c7206b215201b0ec825005f1b18b9bccf"}, + {file = "ruamel.yaml.clib-0.2.8-cp39-cp39-manylinux_2_24_aarch64.whl", hash = "sha256:a1a45e0bb052edf6a1d3a93baef85319733a888363938e1fc9924cb00c8df24c"}, + {file = "ruamel.yaml.clib-0.2.8-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:da09ad1c359a728e112d60116f626cc9f29730ff3e0e7db72b9a2dbc2e4beed5"}, + {file = "ruamel.yaml.clib-0.2.8-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:184565012b60405d93838167f425713180b949e9d8dd0bbc7b49f074407c5a8b"}, + {file = "ruamel.yaml.clib-0.2.8-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:a75879bacf2c987c003368cf14bed0ffe99e8e85acfa6c0bfffc21a090f16880"}, + {file = "ruamel.yaml.clib-0.2.8-cp39-cp39-win32.whl", hash = "sha256:84b554931e932c46f94ab306913ad7e11bba988104c5cff26d90d03f68258cd5"}, + {file = "ruamel.yaml.clib-0.2.8-cp39-cp39-win_amd64.whl", hash = "sha256:25ac8c08322002b06fa1d49d1646181f0b2c72f5cbc15a85e80b4c30a544bb15"}, + {file = "ruamel.yaml.clib-0.2.8.tar.gz", hash = "sha256:beb2e0404003de9a4cab9753a8805a8fe9320ee6673136ed7f04255fe60bb512"}, ] [[package]] @@ -2343,47 +2901,31 @@ files = [ ] [[package]] -name = "secretstorage" -version = "3.3.3" -description = "Python bindings to FreeDesktop.org Secret Service API" +name = "setuptools" +version = "69.0.3" +description = "Easily download, build, install, upgrade, and uninstall Python packages" optional = false -python-versions = ">=3.6" +python-versions = ">=3.8" files = [ - {file = "SecretStorage-3.3.3-py3-none-any.whl", hash = "sha256:f356e6628222568e3af06f2eba8df495efa13b3b63081dafd4f7d9a7b7bc9f99"}, - {file = "SecretStorage-3.3.3.tar.gz", hash = "sha256:2403533ef369eca6d2ba81718576c5e0f564d5cca1b58f73a8b23e7d4eeebd77"}, + {file = "setuptools-69.0.3-py3-none-any.whl", hash = "sha256:385eb4edd9c9d5c17540511303e39a147ce2fc04bc55289c322b9e5904fe2c05"}, + {file = "setuptools-69.0.3.tar.gz", hash = "sha256:be1af57fc409f93647f2e8e4573a142ed38724b8cdd389706a867bb4efcf1e78"}, ] -[package.dependencies] -cryptography = ">=2.0" -jeepney = ">=0.6" - -[[package]] -name = "semver" -version = "2.13.0" -description = "Python helper for Semantic Versioning (http://semver.org/)" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" -files = [ - {file = "semver-2.13.0-py2.py3-none-any.whl", hash = "sha256:ced8b23dceb22134307c1b8abfa523da14198793d9787ac838e70e29e77458d4"}, - {file = "semver-2.13.0.tar.gz", hash = "sha256:fa0fe2722ee1c3f57eac478820c3a5ae2f624af8264cbdf9000c980ff7f75e3f"}, -] +[package.extras] +docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "rst.linker (>=1.9)", "sphinx (<7.2.5)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier"] +testing = ["build[virtualenv]", "filelock (>=3.4.0)", "flake8-2020", "ini2toml[lite] (>=0.9)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pip (>=19.1)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy (>=0.9.1)", "pytest-perf", "pytest-ruff", "pytest-timeout", "pytest-xdist", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] +testing-integration = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "packaging (>=23.1)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"] [[package]] -name = "setuptools" -version = "68.0.0" -description = "Easily download, build, install, upgrade, and uninstall Python packages" +name = "sh" +version = "1.14.3" +description = "Python subprocess replacement" optional = false -python-versions = ">=3.7" +python-versions = "*" files = [ - {file = "setuptools-68.0.0-py3-none-any.whl", hash = "sha256:11e52c67415a381d10d6b462ced9cfb97066179f0e871399e006c4ab101fc85f"}, - {file = "setuptools-68.0.0.tar.gz", hash = "sha256:baf1fdb41c6da4cd2eae722e135500da913332ab3f2f5c7d33af9b492acb5235"}, + {file = "sh-1.14.3.tar.gz", hash = "sha256:e4045b6c732d9ce75d571c79f5ac2234edd9ae4f5fa9d59b09705082bdca18c7"}, ] -[package.extras] -docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-hoverxref (<2)", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (==0.8.3)", "sphinx-reredirects", "sphinxcontrib-towncrier"] -testing = ["build[virtualenv]", "filelock (>=3.4.0)", "flake8-2020", "ini2toml[lite] (>=0.9)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pip (>=19.1)", "pip-run (>=8.8)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-mypy (>=0.9.1)", "pytest-perf", "pytest-ruff", "pytest-timeout", "pytest-xdist", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] -testing-integration = ["build[virtualenv]", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"] - [[package]] name = "six" version = "1.16.0" @@ -2397,24 +2939,138 @@ files = [ [[package]] name = "smmap" -version = "5.0.0" +version = "5.0.1" description = "A pure Python implementation of a sliding window memory map manager" +optional = true +python-versions = ">=3.7" +files = [ + {file = "smmap-5.0.1-py3-none-any.whl", hash = "sha256:e6d8668fa5f93e706934a62d7b4db19c8d9eb8cf2adbb75ef1b675aa332b69da"}, + {file = "smmap-5.0.1.tar.gz", hash = "sha256:dceeb6c0028fdb6734471eb07c0cd2aae706ccaecab45965ee83f11c8d3b1f62"}, +] + +[[package]] +name = "soupsieve" +version = "2.5" +description = "A modern CSS selector implementation for Beautiful Soup." optional = false -python-versions = ">=3.6" +python-versions = ">=3.8" +files = [ + {file = "soupsieve-2.5-py3-none-any.whl", hash = "sha256:eaa337ff55a1579b6549dc679565eac1e3d000563bcb1c8ab0d0fefbc0c2cdc7"}, + {file = "soupsieve-2.5.tar.gz", hash = "sha256:5663d5a7b3bfaeee0bc4372e7fc48f9cff4940b3eec54a6451cc5299f1097690"}, +] + +[[package]] +name = "sqlalchemy" +version = "2.0.25" +description = "Database Abstraction Library" +optional = true +python-versions = ">=3.7" +files = [ + {file = "SQLAlchemy-2.0.25-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:4344d059265cc8b1b1be351bfb88749294b87a8b2bbe21dfbe066c4199541ebd"}, + {file = "SQLAlchemy-2.0.25-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6f9e2e59cbcc6ba1488404aad43de005d05ca56e069477b33ff74e91b6319735"}, + {file = "SQLAlchemy-2.0.25-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:84daa0a2055df9ca0f148a64fdde12ac635e30edbca80e87df9b3aaf419e144a"}, + {file = "SQLAlchemy-2.0.25-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc8b7dabe8e67c4832891a5d322cec6d44ef02f432b4588390017f5cec186a84"}, + {file = "SQLAlchemy-2.0.25-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:f5693145220517b5f42393e07a6898acdfe820e136c98663b971906120549da5"}, + {file = "SQLAlchemy-2.0.25-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:db854730a25db7c956423bb9fb4bdd1216c839a689bf9cc15fada0a7fb2f4570"}, + {file = "SQLAlchemy-2.0.25-cp310-cp310-win32.whl", hash = "sha256:14a6f68e8fc96e5e8f5647ef6cda6250c780612a573d99e4d881581432ef1669"}, + {file = "SQLAlchemy-2.0.25-cp310-cp310-win_amd64.whl", hash = "sha256:87f6e732bccd7dcf1741c00f1ecf33797383128bd1c90144ac8adc02cbb98643"}, + {file = "SQLAlchemy-2.0.25-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:342d365988ba88ada8af320d43df4e0b13a694dbd75951f537b2d5e4cb5cd002"}, + {file = "SQLAlchemy-2.0.25-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f37c0caf14b9e9b9e8f6dbc81bc56db06acb4363eba5a633167781a48ef036ed"}, + {file = "SQLAlchemy-2.0.25-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aa9373708763ef46782d10e950b49d0235bfe58facebd76917d3f5cbf5971aed"}, + {file = "SQLAlchemy-2.0.25-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d24f571990c05f6b36a396218f251f3e0dda916e0c687ef6fdca5072743208f5"}, + {file = "SQLAlchemy-2.0.25-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:75432b5b14dc2fff43c50435e248b45c7cdadef73388e5610852b95280ffd0e9"}, + {file = "SQLAlchemy-2.0.25-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:884272dcd3ad97f47702965a0e902b540541890f468d24bd1d98bcfe41c3f018"}, + {file = "SQLAlchemy-2.0.25-cp311-cp311-win32.whl", hash = "sha256:e607cdd99cbf9bb80391f54446b86e16eea6ad309361942bf88318bcd452363c"}, + {file = "SQLAlchemy-2.0.25-cp311-cp311-win_amd64.whl", hash = "sha256:7d505815ac340568fd03f719446a589162d55c52f08abd77ba8964fbb7eb5b5f"}, + {file = "SQLAlchemy-2.0.25-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:0dacf67aee53b16f365c589ce72e766efaabd2b145f9de7c917777b575e3659d"}, + {file = "SQLAlchemy-2.0.25-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b801154027107461ee992ff4b5c09aa7cc6ec91ddfe50d02bca344918c3265c6"}, + {file = "SQLAlchemy-2.0.25-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:59a21853f5daeb50412d459cfb13cb82c089ad4c04ec208cd14dddd99fc23b39"}, + {file = "SQLAlchemy-2.0.25-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:29049e2c299b5ace92cbed0c1610a7a236f3baf4c6b66eb9547c01179f638ec5"}, + {file = "SQLAlchemy-2.0.25-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:b64b183d610b424a160b0d4d880995e935208fc043d0302dd29fee32d1ee3f95"}, + {file = "SQLAlchemy-2.0.25-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4f7a7d7fcc675d3d85fbf3b3828ecd5990b8d61bd6de3f1b260080b3beccf215"}, + {file = "SQLAlchemy-2.0.25-cp312-cp312-win32.whl", hash = "sha256:cf18ff7fc9941b8fc23437cc3e68ed4ebeff3599eec6ef5eebf305f3d2e9a7c2"}, + {file = "SQLAlchemy-2.0.25-cp312-cp312-win_amd64.whl", hash = "sha256:91f7d9d1c4dd1f4f6e092874c128c11165eafcf7c963128f79e28f8445de82d5"}, + {file = "SQLAlchemy-2.0.25-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:bb209a73b8307f8fe4fe46f6ad5979649be01607f11af1eb94aa9e8a3aaf77f0"}, + {file = "SQLAlchemy-2.0.25-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:798f717ae7c806d67145f6ae94dc7c342d3222d3b9a311a784f371a4333212c7"}, + {file = "SQLAlchemy-2.0.25-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5fdd402169aa00df3142149940b3bf9ce7dde075928c1886d9a1df63d4b8de62"}, + {file = "SQLAlchemy-2.0.25-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:0d3cab3076af2e4aa5693f89622bef7fa770c6fec967143e4da7508b3dceb9b9"}, + {file = "SQLAlchemy-2.0.25-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:74b080c897563f81062b74e44f5a72fa44c2b373741a9ade701d5f789a10ba23"}, + {file = "SQLAlchemy-2.0.25-cp37-cp37m-win32.whl", hash = "sha256:87d91043ea0dc65ee583026cb18e1b458d8ec5fc0a93637126b5fc0bc3ea68c4"}, + {file = "SQLAlchemy-2.0.25-cp37-cp37m-win_amd64.whl", hash = "sha256:75f99202324383d613ddd1f7455ac908dca9c2dd729ec8584c9541dd41822a2c"}, + {file = "SQLAlchemy-2.0.25-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:420362338681eec03f53467804541a854617faed7272fe71a1bfdb07336a381e"}, + {file = "SQLAlchemy-2.0.25-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7c88f0c7dcc5f99bdb34b4fd9b69b93c89f893f454f40219fe923a3a2fd11625"}, + {file = "SQLAlchemy-2.0.25-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a3be4987e3ee9d9a380b66393b77a4cd6d742480c951a1c56a23c335caca4ce3"}, + {file = "SQLAlchemy-2.0.25-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f2a159111a0f58fb034c93eeba211b4141137ec4b0a6e75789ab7a3ef3c7e7e3"}, + {file = "SQLAlchemy-2.0.25-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:8b8cb63d3ea63b29074dcd29da4dc6a97ad1349151f2d2949495418fd6e48db9"}, + {file = "SQLAlchemy-2.0.25-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:736ea78cd06de6c21ecba7416499e7236a22374561493b456a1f7ffbe3f6cdb4"}, + {file = "SQLAlchemy-2.0.25-cp38-cp38-win32.whl", hash = "sha256:10331f129982a19df4284ceac6fe87353ca3ca6b4ca77ff7d697209ae0a5915e"}, + {file = "SQLAlchemy-2.0.25-cp38-cp38-win_amd64.whl", hash = "sha256:c55731c116806836a5d678a70c84cb13f2cedba920212ba7dcad53260997666d"}, + {file = "SQLAlchemy-2.0.25-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:605b6b059f4b57b277f75ace81cc5bc6335efcbcc4ccb9066695e515dbdb3900"}, + {file = "SQLAlchemy-2.0.25-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:665f0a3954635b5b777a55111ababf44b4fc12b1f3ba0a435b602b6387ffd7cf"}, + {file = "SQLAlchemy-2.0.25-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ecf6d4cda1f9f6cb0b45803a01ea7f034e2f1aed9475e883410812d9f9e3cfcf"}, + {file = "SQLAlchemy-2.0.25-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c51db269513917394faec5e5c00d6f83829742ba62e2ac4fa5c98d58be91662f"}, + {file = "SQLAlchemy-2.0.25-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:790f533fa5c8901a62b6fef5811d48980adeb2f51f1290ade8b5e7ba990ba3de"}, + {file = "SQLAlchemy-2.0.25-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:1b1180cda6df7af84fe72e4530f192231b1f29a7496951db4ff38dac1687202d"}, + {file = "SQLAlchemy-2.0.25-cp39-cp39-win32.whl", hash = "sha256:555651adbb503ac7f4cb35834c5e4ae0819aab2cd24857a123370764dc7d7e24"}, + {file = "SQLAlchemy-2.0.25-cp39-cp39-win_amd64.whl", hash = "sha256:dc55990143cbd853a5d038c05e79284baedf3e299661389654551bd02a6a68d7"}, + {file = "SQLAlchemy-2.0.25-py3-none-any.whl", hash = "sha256:a86b4240e67d4753dc3092d9511886795b3c2852abe599cffe108952f7af7ac3"}, + {file = "SQLAlchemy-2.0.25.tar.gz", hash = "sha256:a2c69a7664fb2d54b8682dd774c3b54f67f84fa123cf84dda2a5f40dcaa04e08"}, +] + +[package.dependencies] +greenlet = {version = "!=0.4.17", markers = "platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\""} +typing-extensions = ">=4.6.0" + +[package.extras] +aiomysql = ["aiomysql (>=0.2.0)", "greenlet (!=0.4.17)"] +aioodbc = ["aioodbc", "greenlet (!=0.4.17)"] +aiosqlite = ["aiosqlite", "greenlet (!=0.4.17)", "typing_extensions (!=3.10.0.1)"] +asyncio = ["greenlet (!=0.4.17)"] +asyncmy = ["asyncmy (>=0.2.3,!=0.2.4,!=0.2.6)", "greenlet (!=0.4.17)"] +mariadb-connector = ["mariadb (>=1.0.1,!=1.1.2,!=1.1.5)"] +mssql = ["pyodbc"] +mssql-pymssql = ["pymssql"] +mssql-pyodbc = ["pyodbc"] +mypy = ["mypy (>=0.910)"] +mysql = ["mysqlclient (>=1.4.0)"] +mysql-connector = ["mysql-connector-python"] +oracle = ["cx_oracle (>=8)"] +oracle-oracledb = ["oracledb (>=1.0.1)"] +postgresql = ["psycopg2 (>=2.7)"] +postgresql-asyncpg = ["asyncpg", "greenlet (!=0.4.17)"] +postgresql-pg8000 = ["pg8000 (>=1.29.1)"] +postgresql-psycopg = ["psycopg (>=3.0.7)"] +postgresql-psycopg2binary = ["psycopg2-binary"] +postgresql-psycopg2cffi = ["psycopg2cffi"] +postgresql-psycopgbinary = ["psycopg[binary] (>=3.0.7)"] +pymysql = ["pymysql"] +sqlcipher = ["sqlcipher3_binary"] + +[[package]] +name = "sqlparse" +version = "0.4.4" +description = "A non-validating SQL parser." +optional = true +python-versions = ">=3.5" files = [ - {file = "smmap-5.0.0-py3-none-any.whl", hash = "sha256:2aba19d6a040e78d8b09de5c57e96207b09ed71d8e55ce0959eeee6c8e190d94"}, - {file = "smmap-5.0.0.tar.gz", hash = "sha256:c840e62059cd3be204b0c9c9f74be2c09d5648eddd4580d9314c3ecde0b30936"}, + {file = "sqlparse-0.4.4-py3-none-any.whl", hash = "sha256:5430a4fe2ac7d0f93e66f1efc6e1338a41884b7ddf2a350cedd20ccc4d9d28f3"}, + {file = "sqlparse-0.4.4.tar.gz", hash = "sha256:d446183e84b8349fa3061f0fe7f06ca94ba65b426946ffebe6e3e8295332420c"}, ] +[package.extras] +dev = ["build", "flake8"] +doc = ["sphinx"] +test = ["pytest", "pytest-cov"] + [[package]] name = "stack-data" -version = "0.6.2" +version = "0.6.3" description = "Extract data from python stack frames and tracebacks for informative displays" -optional = true +optional = false python-versions = "*" files = [ - {file = "stack_data-0.6.2-py3-none-any.whl", hash = "sha256:cbb2a53eb64e5785878201a97ed7c7b94883f48b87bfb0bbe8b623c74679e4a8"}, - {file = "stack_data-0.6.2.tar.gz", hash = "sha256:32d2dd0376772d01b6cb9fc996f3c8b57a357089dec328ed4b6553d037eaf815"}, + {file = "stack_data-0.6.3-py3-none-any.whl", hash = "sha256:d5558e0c25a4cb0853cddad3d77da9891a08cb85dd9f9f91b9f8cd66e511e695"}, + {file = "stack_data-0.6.3.tar.gz", hash = "sha256:836a778de4fec4dcd1dcd89ed8abff8a221f58308462e1c4aa2a3cf30148f0b9"}, ] [package.dependencies] @@ -2437,49 +3093,69 @@ files = [ ] [package.dependencies] -importlib-metadata = {version = ">=1.7.0", markers = "python_version < \"3.8\""} pbr = ">=2.0.0,<2.1.0 || >2.1.0" [[package]] -name = "tomli" -version = "2.0.1" -description = "A lil' TOML parser" +name = "tabulate" +version = "0.9.0" +description = "Pretty-print tabular data" +optional = true +python-versions = ">=3.7" +files = [ + {file = "tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f"}, + {file = "tabulate-0.9.0.tar.gz", hash = "sha256:0095b12bf5966de529c0feb1fa08671671b3368eec77d7ef7ab114be2c068b3c"}, +] + +[package.extras] +widechars = ["wcwidth"] + +[[package]] +name = "tinycss2" +version = "1.2.1" +description = "A tiny CSS parser" optional = false python-versions = ">=3.7" files = [ - {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, - {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, + {file = "tinycss2-1.2.1-py3-none-any.whl", hash = "sha256:2b80a96d41e7c3914b8cda8bc7f705a4d9c49275616e886103dd839dfc847847"}, + {file = "tinycss2-1.2.1.tar.gz", hash = "sha256:8cff3a8f066c2ec677c06dbc7b45619804a6938478d9d73c284b29d14ecb0627"}, ] +[package.dependencies] +webencodings = ">=0.4" + +[package.extras] +doc = ["sphinx", "sphinx_rtd_theme"] +test = ["flake8", "isort", "pytest"] + [[package]] -name = "tomlkit" -version = "0.11.8" -description = "Style preserving TOML library" +name = "tomli" +version = "2.0.1" +description = "A lil' TOML parser" optional = false python-versions = ">=3.7" files = [ - {file = "tomlkit-0.11.8-py3-none-any.whl", hash = "sha256:8c726c4c202bdb148667835f68d68780b9a003a9ec34167b6c673b38eff2a171"}, - {file = "tomlkit-0.11.8.tar.gz", hash = "sha256:9330fc7faa1db67b541b28e62018c17d20be733177d290a13b24c62d1614e0c3"}, + {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, + {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, ] [[package]] name = "tornado" -version = "6.3.2" +version = "6.4" description = "Tornado is a Python web framework and asynchronous networking library, originally developed at FriendFeed." -optional = true +optional = false python-versions = ">= 3.8" files = [ - {file = "tornado-6.3.2-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:c367ab6c0393d71171123ca5515c61ff62fe09024fa6bf299cd1339dc9456829"}, - {file = "tornado-6.3.2-cp38-abi3-macosx_10_9_x86_64.whl", hash = "sha256:b46a6ab20f5c7c1cb949c72c1994a4585d2eaa0be4853f50a03b5031e964fc7c"}, - {file = "tornado-6.3.2-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c2de14066c4a38b4ecbbcd55c5cc4b5340eb04f1c5e81da7451ef555859c833f"}, - {file = "tornado-6.3.2-cp38-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:05615096845cf50a895026f749195bf0b10b8909f9be672f50b0fe69cba368e4"}, - {file = "tornado-6.3.2-cp38-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5b17b1cf5f8354efa3d37c6e28fdfd9c1c1e5122f2cb56dac121ac61baa47cbe"}, - {file = "tornado-6.3.2-cp38-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:29e71c847a35f6e10ca3b5c2990a52ce38b233019d8e858b755ea6ce4dcdd19d"}, - {file = "tornado-6.3.2-cp38-abi3-musllinux_1_1_i686.whl", hash = "sha256:834ae7540ad3a83199a8da8f9f2d383e3c3d5130a328889e4cc991acc81e87a0"}, - {file = "tornado-6.3.2-cp38-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:6a0848f1aea0d196a7c4f6772197cbe2abc4266f836b0aac76947872cd29b411"}, - {file = "tornado-6.3.2-cp38-abi3-win32.whl", hash = "sha256:7efcbcc30b7c654eb6a8c9c9da787a851c18f8ccd4a5a3a95b05c7accfa068d2"}, - {file = "tornado-6.3.2-cp38-abi3-win_amd64.whl", hash = "sha256:0c325e66c8123c606eea33084976c832aa4e766b7dff8aedd7587ea44a604cdf"}, - {file = "tornado-6.3.2.tar.gz", hash = "sha256:4b927c4f19b71e627b13f3db2324e4ae660527143f9e1f2e2fb404f3a187e2ba"}, + {file = "tornado-6.4-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:02ccefc7d8211e5a7f9e8bc3f9e5b0ad6262ba2fbb683a6443ecc804e5224ce0"}, + {file = "tornado-6.4-cp38-abi3-macosx_10_9_x86_64.whl", hash = "sha256:27787de946a9cffd63ce5814c33f734c627a87072ec7eed71f7fc4417bb16263"}, + {file = "tornado-6.4-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f7894c581ecdcf91666a0912f18ce5e757213999e183ebfc2c3fdbf4d5bd764e"}, + {file = "tornado-6.4-cp38-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e43bc2e5370a6a8e413e1e1cd0c91bedc5bd62a74a532371042a18ef19e10579"}, + {file = "tornado-6.4-cp38-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f0251554cdd50b4b44362f73ad5ba7126fc5b2c2895cc62b14a1c2d7ea32f212"}, + {file = "tornado-6.4-cp38-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:fd03192e287fbd0899dd8f81c6fb9cbbc69194d2074b38f384cb6fa72b80e9c2"}, + {file = "tornado-6.4-cp38-abi3-musllinux_1_1_i686.whl", hash = "sha256:88b84956273fbd73420e6d4b8d5ccbe913c65d31351b4c004ae362eba06e1f78"}, + {file = "tornado-6.4-cp38-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:71ddfc23a0e03ef2df1c1397d859868d158c8276a0603b96cf86892bff58149f"}, + {file = "tornado-6.4-cp38-abi3-win32.whl", hash = "sha256:6f8a6c77900f5ae93d8b4ae1196472d0ccc2775cc1dfdc9e7727889145c45052"}, + {file = "tornado-6.4-cp38-abi3-win_amd64.whl", hash = "sha256:10aeaa8006333433da48dec9fe417877f8bcc21f48dda8d661ae79da357b2a63"}, + {file = "tornado-6.4.tar.gz", hash = "sha256:72291fa6e6bc84e626589f1c29d90a5a6d593ef5ae68052ee2ef000dfd273dee"}, ] [[package]] @@ -2496,7 +3172,6 @@ files = [ [package.dependencies] colorama = {version = ">=0.4.1", markers = "platform_system == \"Windows\""} filelock = ">=3.0.0" -importlib-metadata = {version = ">=0.12", markers = "python_version < \"3.8\""} packaging = ">=14" pluggy = ">=0.12.0" py = ">=1.4.17" @@ -2510,160 +3185,85 @@ testing = ["flaky (>=3.4.0)", "freezegun (>=0.3.11)", "pathlib2 (>=2.3.3)", "psu [[package]] name = "tqdm" -version = "4.65.0" +version = "4.66.1" description = "Fast, Extensible Progress Meter" optional = false python-versions = ">=3.7" files = [ - {file = "tqdm-4.65.0-py3-none-any.whl", hash = "sha256:c4f53a17fe37e132815abceec022631be8ffe1b9381c2e6e30aa70edc99e9671"}, - {file = "tqdm-4.65.0.tar.gz", hash = "sha256:1871fb68a86b8fb3b59ca4cdd3dcccbc7e6d613eeed31f4c332531977b89beb5"}, + {file = "tqdm-4.66.1-py3-none-any.whl", hash = "sha256:d302b3c5b53d47bce91fea46679d9c3c6508cf6332229aa1e7d8653723793386"}, + {file = "tqdm-4.66.1.tar.gz", hash = "sha256:d88e651f9db8d8551a62556d3cff9e3034274ca5d66e93197cf2490e2dcb69c7"}, ] [package.dependencies] colorama = {version = "*", markers = "platform_system == \"Windows\""} [package.extras] -dev = ["py-make (>=0.1.0)", "twine", "wheel"] +dev = ["pytest (>=6)", "pytest-cov", "pytest-timeout", "pytest-xdist"] notebook = ["ipywidgets (>=6)"] slack = ["slack-sdk"] telegram = ["requests"] [[package]] name = "traitlets" -version = "5.9.0" +version = "5.14.1" description = "Traitlets Python configuration system" -optional = true -python-versions = ">=3.7" +optional = false +python-versions = ">=3.8" files = [ - {file = "traitlets-5.9.0-py3-none-any.whl", hash = "sha256:9e6ec080259b9a5940c797d58b613b5e31441c2257b87c2e795c5228ae80d2d8"}, - {file = "traitlets-5.9.0.tar.gz", hash = "sha256:f6cde21a9c68cf756af02035f72d5a723bf607e862e7be33ece505abf4a3bad9"}, + {file = "traitlets-5.14.1-py3-none-any.whl", hash = "sha256:2e5a030e6eff91737c643231bfcf04a65b0132078dad75e4936700b213652e74"}, + {file = "traitlets-5.14.1.tar.gz", hash = "sha256:8585105b371a04b8316a43d5ce29c098575c2e477850b62b848b964f1444527e"}, ] [package.extras] docs = ["myst-parser", "pydata-sphinx-theme", "sphinx"] -test = ["argcomplete (>=2.0)", "pre-commit", "pytest", "pytest-mock"] - -[[package]] -name = "twine" -version = "3.8.0" -description = "Collection of utilities for publishing packages on PyPI" -optional = false -python-versions = ">=3.6" -files = [ - {file = "twine-3.8.0-py3-none-any.whl", hash = "sha256:d0550fca9dc19f3d5e8eadfce0c227294df0a2a951251a4385797c8a6198b7c8"}, - {file = "twine-3.8.0.tar.gz", hash = "sha256:8efa52658e0ae770686a13b675569328f1fba9837e5de1867bfe5f46a9aefe19"}, -] - -[package.dependencies] -colorama = ">=0.4.3" -importlib-metadata = ">=3.6" -keyring = ">=15.1" -pkginfo = ">=1.8.1" -readme-renderer = ">=21.0" -requests = ">=2.20" -requests-toolbelt = ">=0.8.0,<0.9.0 || >0.9.0" -rfc3986 = ">=1.4.0" -tqdm = ">=4.14" -urllib3 = ">=1.26.0" - -[[package]] -name = "typed-ast" -version = "1.5.5" -description = "a fork of Python 2 and 3 ast modules with type comment support" -optional = false -python-versions = ">=3.6" -files = [ - {file = "typed_ast-1.5.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:4bc1efe0ce3ffb74784e06460f01a223ac1f6ab31c6bc0376a21184bf5aabe3b"}, - {file = "typed_ast-1.5.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5f7a8c46a8b333f71abd61d7ab9255440d4a588f34a21f126bbfc95f6049e686"}, - {file = "typed_ast-1.5.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:597fc66b4162f959ee6a96b978c0435bd63791e31e4f410622d19f1686d5e769"}, - {file = "typed_ast-1.5.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d41b7a686ce653e06c2609075d397ebd5b969d821b9797d029fccd71fdec8e04"}, - {file = "typed_ast-1.5.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:5fe83a9a44c4ce67c796a1b466c270c1272e176603d5e06f6afbc101a572859d"}, - {file = "typed_ast-1.5.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:d5c0c112a74c0e5db2c75882a0adf3133adedcdbfd8cf7c9d6ed77365ab90a1d"}, - {file = "typed_ast-1.5.5-cp310-cp310-win_amd64.whl", hash = "sha256:e1a976ed4cc2d71bb073e1b2a250892a6e968ff02aa14c1f40eba4f365ffec02"}, - {file = "typed_ast-1.5.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c631da9710271cb67b08bd3f3813b7af7f4c69c319b75475436fcab8c3d21bee"}, - {file = "typed_ast-1.5.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b445c2abfecab89a932b20bd8261488d574591173d07827c1eda32c457358b18"}, - {file = "typed_ast-1.5.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cc95ffaaab2be3b25eb938779e43f513e0e538a84dd14a5d844b8f2932593d88"}, - {file = "typed_ast-1.5.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:61443214d9b4c660dcf4b5307f15c12cb30bdfe9588ce6158f4a005baeb167b2"}, - {file = "typed_ast-1.5.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:6eb936d107e4d474940469e8ec5b380c9b329b5f08b78282d46baeebd3692dc9"}, - {file = "typed_ast-1.5.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e48bf27022897577d8479eaed64701ecaf0467182448bd95759883300ca818c8"}, - {file = "typed_ast-1.5.5-cp311-cp311-win_amd64.whl", hash = "sha256:83509f9324011c9a39faaef0922c6f720f9623afe3fe220b6d0b15638247206b"}, - {file = "typed_ast-1.5.5-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:44f214394fc1af23ca6d4e9e744804d890045d1643dd7e8229951e0ef39429b5"}, - {file = "typed_ast-1.5.5-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:118c1ce46ce58fda78503eae14b7664163aa735b620b64b5b725453696f2a35c"}, - {file = "typed_ast-1.5.5-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:be4919b808efa61101456e87f2d4c75b228f4e52618621c77f1ddcaae15904fa"}, - {file = "typed_ast-1.5.5-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:fc2b8c4e1bc5cd96c1a823a885e6b158f8451cf6f5530e1829390b4d27d0807f"}, - {file = "typed_ast-1.5.5-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:16f7313e0a08c7de57f2998c85e2a69a642e97cb32f87eb65fbfe88381a5e44d"}, - {file = "typed_ast-1.5.5-cp36-cp36m-win_amd64.whl", hash = "sha256:2b946ef8c04f77230489f75b4b5a4a6f24c078be4aed241cfabe9cbf4156e7e5"}, - {file = "typed_ast-1.5.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:2188bc33d85951ea4ddad55d2b35598b2709d122c11c75cffd529fbc9965508e"}, - {file = "typed_ast-1.5.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0635900d16ae133cab3b26c607586131269f88266954eb04ec31535c9a12ef1e"}, - {file = "typed_ast-1.5.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:57bfc3cf35a0f2fdf0a88a3044aafaec1d2f24d8ae8cd87c4f58d615fb5b6311"}, - {file = "typed_ast-1.5.5-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:fe58ef6a764de7b4b36edfc8592641f56e69b7163bba9f9c8089838ee596bfb2"}, - {file = "typed_ast-1.5.5-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:d09d930c2d1d621f717bb217bf1fe2584616febb5138d9b3e8cdd26506c3f6d4"}, - {file = "typed_ast-1.5.5-cp37-cp37m-win_amd64.whl", hash = "sha256:d40c10326893ecab8a80a53039164a224984339b2c32a6baf55ecbd5b1df6431"}, - {file = "typed_ast-1.5.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:fd946abf3c31fb50eee07451a6aedbfff912fcd13cf357363f5b4e834cc5e71a"}, - {file = "typed_ast-1.5.5-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:ed4a1a42df8a3dfb6b40c3d2de109e935949f2f66b19703eafade03173f8f437"}, - {file = "typed_ast-1.5.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:045f9930a1550d9352464e5149710d56a2aed23a2ffe78946478f7b5416f1ede"}, - {file = "typed_ast-1.5.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:381eed9c95484ceef5ced626355fdc0765ab51d8553fec08661dce654a935db4"}, - {file = "typed_ast-1.5.5-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:bfd39a41c0ef6f31684daff53befddae608f9daf6957140228a08e51f312d7e6"}, - {file = "typed_ast-1.5.5-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:8c524eb3024edcc04e288db9541fe1f438f82d281e591c548903d5b77ad1ddd4"}, - {file = "typed_ast-1.5.5-cp38-cp38-win_amd64.whl", hash = "sha256:7f58fabdde8dcbe764cef5e1a7fcb440f2463c1bbbec1cf2a86ca7bc1f95184b"}, - {file = "typed_ast-1.5.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:042eb665ff6bf020dd2243307d11ed626306b82812aba21836096d229fdc6a10"}, - {file = "typed_ast-1.5.5-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:622e4a006472b05cf6ef7f9f2636edc51bda670b7bbffa18d26b255269d3d814"}, - {file = "typed_ast-1.5.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1efebbbf4604ad1283e963e8915daa240cb4bf5067053cf2f0baadc4d4fb51b8"}, - {file = "typed_ast-1.5.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f0aefdd66f1784c58f65b502b6cf8b121544680456d1cebbd300c2c813899274"}, - {file = "typed_ast-1.5.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:48074261a842acf825af1968cd912f6f21357316080ebaca5f19abbb11690c8a"}, - {file = "typed_ast-1.5.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:429ae404f69dc94b9361bb62291885894b7c6fb4640d561179548c849f8492ba"}, - {file = "typed_ast-1.5.5-cp39-cp39-win_amd64.whl", hash = "sha256:335f22ccb244da2b5c296e6f96b06ee9bed46526db0de38d2f0e5a6597b81155"}, - {file = "typed_ast-1.5.5.tar.gz", hash = "sha256:94282f7a354f36ef5dbce0ef3467ebf6a258e370ab33d5b40c249fa996e590dd"}, -] +test = ["argcomplete (>=3.0.3)", "mypy (>=1.7.0)", "pre-commit", "pytest (>=7.0,<7.5)", "pytest-mock", "pytest-mypy-testing"] [[package]] name = "typing-extensions" -version = "4.7.1" -description = "Backported and Experimental Type Hints for Python 3.7+" +version = "4.9.0" +description = "Backported and Experimental Type Hints for Python 3.8+" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "typing_extensions-4.7.1-py3-none-any.whl", hash = "sha256:440d5dd3af93b060174bf433bccd69b0babc3b15b1a8dca43789fd7f61514b36"}, - {file = "typing_extensions-4.7.1.tar.gz", hash = "sha256:b75ddc264f0ba5615db7ba217daeb99701ad295353c45f9e95963337ceeeffb2"}, + {file = "typing_extensions-4.9.0-py3-none-any.whl", hash = "sha256:af72aea155e91adfc61c3ae9e0e342dbc0cba726d6cba4b6c72c1f34e47291cd"}, + {file = "typing_extensions-4.9.0.tar.gz", hash = "sha256:23478f88c37f27d76ac8aee6c905017a143b0b1b886c3c9f66bc2fd94f9f5783"}, ] [[package]] name = "urllib3" -version = "2.0.3" +version = "2.1.0" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "urllib3-2.0.3-py3-none-any.whl", hash = "sha256:48e7fafa40319d358848e1bc6809b208340fafe2096f1725d05d67443d0483d1"}, - {file = "urllib3-2.0.3.tar.gz", hash = "sha256:bee28b5e56addb8226c96f7f13ac28cb4c301dd5ea8a6ca179c0b9835e032825"}, + {file = "urllib3-2.1.0-py3-none-any.whl", hash = "sha256:55901e917a5896a349ff771be919f8bd99aff50b79fe58fec595eb37bbc56bb3"}, + {file = "urllib3-2.1.0.tar.gz", hash = "sha256:df7aa8afb0148fa78488e7899b2c59b5f4ffcfa82e6c54ccb9dd37c1d7b52d54"}, ] [package.extras] brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"] -secure = ["certifi", "cryptography (>=1.9)", "idna (>=2.0.0)", "pyopenssl (>=17.1.0)", "urllib3-secure-extra"] socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] zstd = ["zstandard (>=0.18.0)"] [[package]] name = "virtualenv" -version = "20.23.1" +version = "20.25.0" description = "Virtual Python Environment builder" optional = false python-versions = ">=3.7" files = [ - {file = "virtualenv-20.23.1-py3-none-any.whl", hash = "sha256:34da10f14fea9be20e0fd7f04aba9732f84e593dac291b757ce42e3368a39419"}, - {file = "virtualenv-20.23.1.tar.gz", hash = "sha256:8ff19a38c1021c742148edc4f81cb43d7f8c6816d2ede2ab72af5b84c749ade1"}, + {file = "virtualenv-20.25.0-py3-none-any.whl", hash = "sha256:4238949c5ffe6876362d9c0180fc6c3a824a7b12b80604eeb8085f2ed7460de3"}, + {file = "virtualenv-20.25.0.tar.gz", hash = "sha256:bf51c0d9c7dd63ea8e44086fa1e4fb1093a31e963b86959257378aef020e1f1b"}, ] [package.dependencies] -distlib = ">=0.3.6,<1" -filelock = ">=3.12,<4" -importlib-metadata = {version = ">=6.6", markers = "python_version < \"3.8\""} -platformdirs = ">=3.5.1,<4" +distlib = ">=0.3.7,<1" +filelock = ">=3.12.2,<4" +platformdirs = ">=3.9.1,<5" [package.extras] -docs = ["furo (>=2023.5.20)", "proselint (>=0.13)", "sphinx (>=7.0.1)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"] -test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.3.1)", "pytest-env (>=0.8.1)", "pytest-freezer (>=0.4.6)", "pytest-mock (>=3.10)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=67.8)", "time-machine (>=2.9)"] +docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.2)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"] +test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8)", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10)"] [[package]] name = "watchdog" @@ -2706,13 +3306,13 @@ watchmedo = ["PyYAML (>=3.10)"] [[package]] name = "wcwidth" -version = "0.2.6" +version = "0.2.13" description = "Measures the displayed width of unicode strings in a terminal" -optional = true +optional = false python-versions = "*" files = [ - {file = "wcwidth-0.2.6-py2.py3-none-any.whl", hash = "sha256:795b138f6875577cd91bba52baf9e445cd5118fd32723b460e30a0af30ea230e"}, - {file = "wcwidth-0.2.6.tar.gz", hash = "sha256:a5220780a404dbe3353789870978e472cfe477761f06ee55077256e509b156d0"}, + {file = "wcwidth-0.2.13-py2.py3-none-any.whl", hash = "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859"}, + {file = "wcwidth-0.2.13.tar.gz", hash = "sha256:72ea0c06399eb286d978fdedb6923a9eb47e1c486ce63e9b4e64fc18303972b5"}, ] [[package]] @@ -2726,71 +3326,28 @@ files = [ {file = "webencodings-0.5.1.tar.gz", hash = "sha256:b36a1c245f2d304965eb4e0a82848379241dc04b865afcc4aab16748587e1923"}, ] -[[package]] -name = "websocket-client" -version = "1.6.1" -description = "WebSocket client for Python with low level API options" -optional = true -python-versions = ">=3.7" -files = [ - {file = "websocket-client-1.6.1.tar.gz", hash = "sha256:c951af98631d24f8df89ab1019fc365f2227c0892f12fd150e935607c79dd0dd"}, - {file = "websocket_client-1.6.1-py3-none-any.whl", hash = "sha256:f1f9f2ad5291f0225a49efad77abf9e700b6fef553900623060dad6e26503b9d"}, -] - -[package.extras] -docs = ["Sphinx (>=3.4)", "sphinx-rtd-theme (>=0.5)"] -optional = ["python-socks", "wsaccel"] -test = ["websockets"] - -[[package]] -name = "wheel" -version = "0.40.0" -description = "A built-package format for Python" -optional = false -python-versions = ">=3.7" -files = [ - {file = "wheel-0.40.0-py3-none-any.whl", hash = "sha256:d236b20e7cb522daf2390fa84c55eea81c5c30190f90f29ae2ca1ad8355bf247"}, - {file = "wheel-0.40.0.tar.gz", hash = "sha256:cd1196f3faee2b31968d626e1731c94f99cbdb67cf5a46e4f5656cbee7738873"}, -] - -[package.extras] -test = ["pytest (>=6.0.0)"] - -[[package]] -name = "yachalk" -version = "0.1.5" -description = "🖍️ Terminal string styling done right" -optional = false -python-versions = "*" -files = [ - {file = "yachalk-0.1.5-py3-none-any.whl", hash = "sha256:fe389be1d574a3be00e0b849f1c0767f81da5bdfed9b122da07e6861e27c1c62"}, - {file = "yachalk-0.1.5.tar.gz", hash = "sha256:cac3b66a198a280f06adfde82f1604aecd9dd5ffc372a02b4f36d79ec499798a"}, -] - -[package.dependencies] -importlib-resources = "*" -setuptools = "*" - [[package]] name = "zipp" -version = "3.15.0" +version = "3.17.0" description = "Backport of pathlib-compatible object wrapper for zip files" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "zipp-3.15.0-py3-none-any.whl", hash = "sha256:48904fc76a60e542af151aded95726c1a5c34ed43ab4134b597665c86d7ad556"}, - {file = "zipp-3.15.0.tar.gz", hash = "sha256:112929ad649da941c23de50f356a2b5570c954b65150642bccdd66bf194d224b"}, + {file = "zipp-3.17.0-py3-none-any.whl", hash = "sha256:0e923e726174922dce09c53c59ad483ff7bbb8e572e00c7f7c46b88556409f31"}, + {file = "zipp-3.17.0.tar.gz", hash = "sha256:84e64a1c28cf7e91ed2078bb8cc8c259cb19b76942096c8d7b84947690cabaf0"}, ] [package.extras] -docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] -testing = ["big-O", "flake8 (<5)", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)"] +docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (<7.2.5)", "sphinx (>=3.5)", "sphinx-lint"] +testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy (>=0.9.1)", "pytest-ruff"] [extras] +database = ["sqlalchemy"] docker = ["docker"] +mlflow = ["mlflow-skinny"] notebook = ["ploomber-engine"] [metadata] lock-version = "2.0" -python-versions = "^3.7" -content-hash = "9059dbb4fa611855b1068705119c7c04a34e06639b27adb15fa022092dbd582c" +python-versions = ">=3.8,<3.13" +content-hash = "d8a8675e69ca12306c86ea22a2f10716c7c70a3db2f9708080f060c1fb73be90" diff --git a/pyproject.toml b/pyproject.toml index 8975f750..3113261c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,39 +10,52 @@ repository = "https://github.com/AstraZeneca/magnus-core" documentation = "https://astrazeneca.github.io/magnus-core/" [tool.poetry.dependencies] -python = "^3.7" +python = ">=3.8,<3.13" "ruamel.yaml" = "*" "ruamel.yaml.clib" = "*" -yachalk = "*" -pydantic = "^1.9.0" +pydantic = "^2.5" stevedore = "^3.5.0" "click" = "*" click-plugins = "^1.1.1" -ploomber-engine ={ version= "^0.0.19", optional = true, python = ">=3.8,<4.0" } +typing-extensions ={ version= "*", python = "<3.8" } docker ={ version = "*", optional = true } +sqlalchemy ={ version = "*", optional = true } +rich = "^13.5.2" +mlflow-skinny ={ version = "*", optional = true } +ploomber-engine = "^0.0.31" [tool.poetry.group.docs.dependencies] mkdocs = "*" mkdocs-material = "*" mkdocs-section-index = "^0.3.5" +mkdocstrings = {extras = ["python"], version = "^0.24.0"} +nbconvert = "^7.13.1" +mkdocs-click = "^0.8.1" +[tool.poetry.group.binary.dependencies] +pyinstaller = "^5.13.2" + +[tool.poetry.group.perf.dependencies] +# Run the performace tests poetry run python -m pyflame -p ./flamegraph.pl magnus/entrypoints.py +pyflame = "^0.3.1" [tool.poetry.extras] docker = ['docker'] notebook = ['ploomber-engine'] - +database = ["sqlalchemy"] +mlflow = ["mlflow-skinny"] [tool.poetry.group.dev.dependencies] pytest = "*" pytest-cov = "*" pytest-mock = "*" -mypy = "^0.931" +mypy = "^1.5.1" tox = "^3.24.5" pre-commit = "*" ruff = "^0.0.259" commit-linter = "^1.0.2" -python-semantic-release = "^7.33.1" black = "^23.3.0" +gitlint = "^0.19.1" [tool.poetry.scripts] @@ -51,89 +64,58 @@ magnus= 'magnus.cli:cli' # Plugins for Executors [tool.poetry.plugins."executor"] -"local" = "magnus.executor:LocalExecutor" -"local-container" = "magnus.executor:LocalContainerExecutor" -"demo-renderer" = "magnus.executor:DemoRenderer" +"local" = "magnus.extensions.executor.local.implementation:LocalExecutor" +"local-container" = "magnus.extensions.executor.local_container.implementation:LocalContainerExecutor" +"argo" = "magnus.extensions.executor.argo.implementation:ArgoExecutor" +"mocked" = "magnus.extensions.executor.mocked.implementation:MockedExecutor" # Plugins for Catalog [tool.poetry.plugins."catalog"] "do-nothing" = "magnus.catalog:DoNothingCatalog" -"file-system" = "magnus.catalog:FileSystemCatalog" +"file-system" = "magnus.extensions.catalog.file_system.implementation:FileSystemCatalog" # Plugins for Secrets [tool.poetry.plugins."secrets"] "do-nothing" = "magnus.secrets:DoNothingSecretManager" -"dotenv" = "magnus.secrets:DotEnvSecrets" -"env-secrets-manager" = "magnus.secrets:EnvSecretsManager" +"dotenv" = "magnus.extensions.secrets.dotenv.implementation:DotEnvSecrets" +"env-secrets-manager" = "magnus.extensions.secrets.env_secrets.implementation:EnvSecretsManager" # Plugins for Run Log store [tool.poetry.plugins."run_log_store"] "buffered" = "magnus.datastore:BufferRunLogstore" -"file-system" = "magnus.datastore:FileSystemRunLogstore" -"chunked-fs" = "magnus.datastore:ChunkedFileSystemRunLogStore" +"file-system" = "magnus.extensions.run_log_store.file_system.implementation:FileSystemRunLogstore" +"chunked-fs" = "magnus.extensions.run_log_store.chunked_file_system.implementation:ChunkedFileSystemRunLogStore" # Plugins for Experiment tracker [tool.poetry.plugins."experiment_tracker"] "do-nothing" = "magnus.experiment_tracker:DoNothingTracker" +"mlflow" = "magnus.extensions.experiment_tracker.mlflow.implementation:MLFlowExperimentTracker" # Plugins for Pickler -# TODO: Currently not being extended, need to get more use cases [tool.poetry.plugins."pickler"] "pickle" = "magnus.pickler:NativePickler" # Plugins for Integration [tool.poetry.plugins."integration"] -# The name does not matter, the core would be labelled c -"c1" = "magnus.integration:LocalComputeBufferedRunLogStore" -"c2" = "magnus.integration:LocalComputeFileSystemRunLogStore" -"c3" = "magnus.integration:LocalContainerComputeBufferedRunLogStore" -"c4" = "magnus.integration:LocalContainerComputeFileSystemRunLogstore" -"c5" = "magnus.integration:LocalContainerComputeDotEnvSecrets" -"c6" = "magnus.integration:LocalContainerComputeEnvSecretsManager" -"c7" = "magnus.integration:LocalContainerDoNothingCatalog" -"c8" = "magnus.integration:LocalDoNothingCatalog" -"c9" = "magnus.integration:LocalContainerComputeFileSystemCatalog" -"c10" = "magnus.integration:DemoRenderBufferedRunLogStore" +# Left empty for 3rd party integrations # Plugins for Tasks [tool.poetry.plugins."tasks"] "python" = "magnus.tasks:PythonTaskType" -"python-lambda" = "magnus.tasks:PythonLambdaTaskType" "shell" = "magnus.tasks:ShellTaskType" "notebook" = "magnus.tasks:NotebookTaskType" -"container" = "magnus.tasks:ContainerTaskType" # Plugins for Nodes [tool.poetry.plugins."nodes"] -"task" = "magnus.nodes:TaskNode" -"fail" = "magnus.nodes:FailNode" -"success" = "magnus.nodes:SuccessNode" -"parallel" = "magnus.nodes:ParallelNode" -"map" = "magnus.nodes:MapNode" -"dag" = "magnus.nodes:DagNode" -"as-is" = "magnus.nodes:AsISNode" - -[tool.interrogate] -ignore-init-method = true -ignore-init-module = true -ignore-magic = false -ignore-semiprivate = false -ignore-private = false -ignore-property-decorators = false -ignore-module = true -ignore-nested-functions = false -ignore-nested-classes = true -ignore-setters = false -fail-under = 95 -exclude = ["setup.py", "docs", "build", "magnus/integration.py"] -ignore-regex = ["^get$", "^mock_.*", ".*BaseClass.*"] -# possible values: 0 (minimal output), 1 (-v), 2 (-vv) -verbose = 2 -quiet = false -whitelist-regex = [] -color = true -omit-covered-files = false +"task" = "magnus.extensions.nodes:TaskNode" +"fail" = "magnus.extensions.nodes:FailNode" +"success" = "magnus.extensions.nodes:SuccessNode" +"parallel" = "magnus.extensions.nodes:ParallelNode" +"map" = "magnus.extensions.nodes:MapNode" +"dag" = "magnus.extensions.nodes:DagNode" +"stub" = "magnus.extensions.nodes:StubNode" + [tool.black] line-length = 120 @@ -198,11 +180,48 @@ requires = ["poetry-core>=1.0.0"] build-backend = "poetry.core.masonry.api" [tool.semantic_release] -branch = "main" tag_commit = false -version_source = "tag_only" major_on_zero = true commit_version_number = false upload_to_pypi = false upload_to_repository = false upload_to_release = false +tag_format = "{version}" + +[tool.semantic_release.branches.main] +match = "main" + +[tool.semantic_release.branches."rc"] +match = "rc" +prerelease = true +prerelease_token = "rc" + +[tool.semantic_release.remote] +ignore_token_for_push = true + +[tool.coverage.run] +branch = true + +[tool.coverage.report] +# Regexes for lines to exclude from consideration +exclude_lines = [ + "pragma: no cover" +] + +include_namespace_packages = true +show_missing = true + +exclude_also = [ + # Don't complain if tests don't hit defensive assertion code: + "raise AssertionError", + "raise NotImplementedError", + + # Don't complain about abstract methods, they aren't run: + "@(abc\\.)?abstractmethod", + ] + +omit =[ + "magnus/cli.py", + "magnus/extensions/executor/demo_renderer/*", + "*FF.py" + ] diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 00000000..3b9fd1cd --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,11 @@ +from pytest import fixture + + +@fixture(scope="session", autouse=True) +def magnus_log(): + import logging + + logger = logging.getLogger("magnus") + logger.setLevel(logging.WARNING) + logger.propagate = True + yield logger diff --git a/tests/magnus/extensions/__init__.py b/tests/magnus/extensions/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/magnus/extensions/catalog/test_catalog_extension.py b/tests/magnus/extensions/catalog/test_catalog_extension.py new file mode 100644 index 00000000..d6389f15 --- /dev/null +++ b/tests/magnus/extensions/catalog/test_catalog_extension.py @@ -0,0 +1,52 @@ +from magnus.extensions.catalog import is_catalog_out_of_sync + + +def test_is_catalog_out_of_sync_returns_true_for_empty_synced_catalogs(): + assert is_catalog_out_of_sync(1, []) is True + + +def test_is_catalog_out_of_sync_returns_false_for_same_objects(): + class MockCatalog: + catalog_relative_path = None + data_hash = None + + catalog_item = MockCatalog() + catalog_item.catalog_relative_path = "path" + catalog_item.data_hash = "hash" + + synced_catalog = [catalog_item] + assert is_catalog_out_of_sync(catalog_item, synced_catalog) is False + + +def test_is_catalog_out_of_sync_returns_true_for_different_hash(): + class MockCatalog: + catalog_relative_path = None + data_hash = None + + catalog_item1 = MockCatalog() + catalog_item1.catalog_relative_path = "path" + catalog_item1.data_hash = "hash" + + catalog_item2 = MockCatalog() + catalog_item2.catalog_relative_path = "path" + catalog_item2.data_hash = "not-hash" + + synced_catalog = [catalog_item1] + assert is_catalog_out_of_sync(catalog_item2, synced_catalog) is True + + +def test_is_catalog_out_of_sync_returns_true_for_different_paths(): + class MockCatalog: + catalog_relative_path = None + data_hash = None + + catalog_item1 = MockCatalog() + catalog_item1.catalog_relative_path = "path" + catalog_item1.data_hash = "hash" + + catalog_item2 = MockCatalog() + catalog_item2.catalog_relative_path = "path1" + catalog_item2.data_hash = "hash" + + synced_catalog = [catalog_item1] + assert is_catalog_out_of_sync(catalog_item2, synced_catalog) is True diff --git a/tests/magnus/extensions/catalog/test_file_system.py b/tests/magnus/extensions/catalog/test_file_system.py new file mode 100644 index 00000000..b12f5261 --- /dev/null +++ b/tests/magnus/extensions/catalog/test_file_system.py @@ -0,0 +1,258 @@ +import pytest +import tempfile +import os + +from magnus import defaults +from magnus.extensions.catalog.file_system.implementation import FileSystemCatalog +import magnus.extensions.catalog.file_system.implementation as implementation + + +def test_file_system_catalog_inits_default_values_if_none_config(): + catalog_handler = FileSystemCatalog() + assert catalog_handler.compute_data_folder == defaults.COMPUTE_DATA_FOLDER + assert catalog_handler.catalog_location == defaults.CATALOG_LOCATION_FOLDER + + +def test_file_system_catalog_get_catalog_location_defaults_if_location_not_provided(monkeypatch, mocker): + catalog_handler = FileSystemCatalog() + + assert catalog_handler.catalog_location == defaults.CATALOG_LOCATION_FOLDER + + +def test_file_system_catalog_catalog_location_returns_config_catalog_location_if_provided(monkeypatch, mocker): + catalog_handler = FileSystemCatalog(catalog_location="this") + + assert catalog_handler.catalog_location == "this" + + +def test_file_system_catalog_get_raises_exception_if_catalog_does_not_exist(monkeypatch, mocker): + def mock_does_dir_exist(dir_name): + if dir_name == "this_compute_folder": + return True + return False + + monkeypatch.setattr(implementation.utils, "does_dir_exist", mock_does_dir_exist) + + catalog_handler = FileSystemCatalog(catalog_location="this_location") + with pytest.raises(Exception, match="Expected Catalog to be present at"): + catalog_handler.get("testing", run_id="dummy_run_id", compute_data_folder="this_compute_folder") + + +def test_file_system_catalog_get_copies_files_from_catalog_to_compute_folder_with_all(mocker, monkeypatch): + mock_run_store = mocker.MagicMock() + mock_context = mocker.MagicMock() + mock_context.run_log_store = mock_run_store + + mocker.patch( + "magnus.catalog.BaseCatalog._context", + new_callable=mocker.PropertyMock, + return_value=mock_context, + ) + + with tempfile.TemporaryDirectory() as catalog_location: + with tempfile.TemporaryDirectory(dir=".") as compute_folder: + catalog_location_path = implementation.Path(catalog_location) + run_id = "testing" + implementation.Path(catalog_location_path / run_id / compute_folder).mkdir(parents=True) + with open(implementation.Path(catalog_location) / run_id / compute_folder / "catalog_file", "w") as fw: + fw.write("hello") + + catalog_handler = FileSystemCatalog() + catalog_handler.catalog_location = catalog_location + + catalog_handler.get(name="**/*", run_id=run_id) + + _, _, files = next(os.walk(compute_folder)) + + assert len(list(files)) == 1 + + +def test_file_system_catalog_get_copies_files_from_catalog_to_compute_folder_with_pattern(mocker, monkeypatch): + mock_run_store = mocker.MagicMock() + mock_context = mocker.MagicMock() + mock_context.run_log_store = mock_run_store + + mocker.patch( + "magnus.catalog.BaseCatalog._context", + new_callable=mocker.PropertyMock, + return_value=mock_context, + ) + + with tempfile.TemporaryDirectory() as catalog_location: + with tempfile.TemporaryDirectory(dir=".") as compute_folder: + catalog_location_path = implementation.Path(catalog_location) + run_id = "testing" + implementation.Path(catalog_location_path / run_id / compute_folder).mkdir(parents=True) + with open(implementation.Path(catalog_location) / run_id / compute_folder / "catalog_file", "w") as fw: + fw.write("hello") + + with open(implementation.Path(catalog_location) / run_id / compute_folder / "not_catalog", "w") as fw: + fw.write("hello") + + catalog_handler = FileSystemCatalog(catalog_location=catalog_location) + catalog_handler.get(name="**/catalog*", run_id=run_id) + + _, _, files = next(os.walk(compute_folder)) + + assert len(list(files)) == 1 + + +def test_file_system_catalog_put_copies_files_from_compute_folder_to_catalog_if_synced_changed_all(mocker, monkeypatch): + monkeypatch.setattr(implementation, "is_catalog_out_of_sync", mocker.MagicMock(return_value=True)) + mock_run_store = mocker.MagicMock() + mock_context = mocker.MagicMock() + mock_context.run_log_store = mock_run_store + + mocker.patch( + "magnus.catalog.BaseCatalog._context", + new_callable=mocker.PropertyMock, + return_value=mock_context, + ) + + with tempfile.TemporaryDirectory() as catalog_location: + with tempfile.TemporaryDirectory(dir=".") as compute_folder: + catalog_location_path = implementation.Path(catalog_location) + run_id = "testing" + implementation.Path(catalog_location_path / run_id).mkdir(parents=True) + + with open(implementation.Path(compute_folder) / "catalog_file", "w") as fw: + fw.write("hello") + + catalog_handler = FileSystemCatalog(catalog_location=catalog_location) + catalog_handler.put(name=str(compute_folder) + "/*", run_id=run_id) + + _, _, files = next(os.walk(catalog_location_path / run_id / compute_folder)) + + assert len(list(files)) == 1 + + +def test_file_system_catalog_put_copies_files_from_compute_folder_to_catalog_if_synced_changed_pattern( + mocker, monkeypatch +): + monkeypatch.setattr(implementation, "is_catalog_out_of_sync", mocker.MagicMock(return_value=True)) + mock_run_store = mocker.MagicMock() + mock_context = mocker.MagicMock() + mock_context.run_log_store = mock_run_store + + mocker.patch( + "magnus.catalog.BaseCatalog._context", + new_callable=mocker.PropertyMock, + return_value=mock_context, + ) + with tempfile.TemporaryDirectory() as catalog_location: + with tempfile.TemporaryDirectory(dir=".") as compute_folder: + catalog_location_path = implementation.Path(catalog_location) + run_id = "testing" + implementation.Path(catalog_location_path / run_id).mkdir(parents=True) + with open(implementation.Path(compute_folder) / "catalog_file", "w") as fw: + fw.write("hello") + + with open(implementation.Path(compute_folder) / "not_catalog_file", "w") as fw: + fw.write("hello") + + catalog_handler = FileSystemCatalog(catalog_location=catalog_location) + + catalog_handler.put(name=str(compute_folder) + "/catalog*", run_id=run_id) + + _, _, files = next(os.walk(catalog_location_path / run_id / compute_folder)) + + assert len(list(files)) == 1 + + +def test_file_system_catalog_put_copies_files_from_compute_folder_to_catalog_if_synced_true(mocker, monkeypatch): + monkeypatch.setattr(implementation, "is_catalog_out_of_sync", mocker.MagicMock(return_value=False)) + mock_run_store = mocker.MagicMock() + mock_context = mocker.MagicMock() + mock_context.run_log_store = mock_run_store + + mocker.patch( + "magnus.catalog.BaseCatalog._context", + new_callable=mocker.PropertyMock, + return_value=mock_context, + ) + + with tempfile.TemporaryDirectory() as catalog_location: + with tempfile.TemporaryDirectory(dir=".") as compute_folder: + catalog_location_path = implementation.Path(catalog_location) + run_id = "testing" + implementation.Path(catalog_location_path / run_id).mkdir(parents=True) + with open(implementation.Path(compute_folder) / "catalog_file", "w") as fw: + fw.write("hello") + + with open(implementation.Path(compute_folder) / "not_catalog_file", "w") as fw: + fw.write("hello") + + catalog_handler = FileSystemCatalog(catalog_location=catalog_location) + + catalog_handler.put(name=str(compute_folder) + "/*", run_id=run_id) + + with pytest.raises(FileNotFoundError): + _ = os.listdir(catalog_location_path / run_id / compute_folder) + assert True + + +def test_file_system_catalog_put_uses_compute_folder_by_default(monkeypatch, mocker): + mock_safe_make_dir = mocker.MagicMock() + monkeypatch.setattr(implementation.utils, "safe_make_dir", mock_safe_make_dir) + + mock_does_dir_exist = mocker.MagicMock(side_effect=Exception()) + monkeypatch.setattr(implementation.utils, "does_dir_exist", mock_does_dir_exist) + + catalog_handler = FileSystemCatalog(catalog_location="this_location") + with pytest.raises(Exception): + catalog_handler.put("testing", run_id="dummy_run_id") + + mock_does_dir_exist.assert_called_once_with(implementation.Path(".")) + + +def test_file_system_catalog_put_uses_compute_folder_provided(monkeypatch, mocker): + mock_safe_make_dir = mocker.MagicMock() + monkeypatch.setattr(implementation.utils, "safe_make_dir", mock_safe_make_dir) + + mock_does_dir_exist = mocker.MagicMock(side_effect=Exception()) + monkeypatch.setattr(implementation.utils, "does_dir_exist", mock_does_dir_exist) + + catalog_handler = FileSystemCatalog(catalog_location="this_location") + with pytest.raises(Exception): + catalog_handler.put("testing", run_id="dummy_run_id", compute_data_folder="not_data") + + mock_does_dir_exist.assert_called_once_with(implementation.Path("not_data")) + + +def test_file_system_catalog_put_raises_exception_if_compute_data_folder_does_not_exist(monkeypatch, mocker): + mock_safe_make_dir = mocker.MagicMock() + monkeypatch.setattr(implementation.utils, "safe_make_dir", mock_safe_make_dir) + + mock_does_dir_exist = mocker.MagicMock(return_value=False) + monkeypatch.setattr(implementation.utils, "does_dir_exist", mock_does_dir_exist) + + catalog_handler = FileSystemCatalog(catalog_location="this_location") + with pytest.raises(Exception): + catalog_handler.put("testing", run_id="dummy_run_id", compute_data_folder="this_compute_folder") + + +def test_file_system_catalog_put_creates_catalog_location_using_run_id(monkeypatch, mocker): + mock_safe_make_dir = mocker.MagicMock() + monkeypatch.setattr(implementation.utils, "safe_make_dir", mock_safe_make_dir) + + mock_does_dir_exist = mocker.MagicMock(side_effect=Exception()) + monkeypatch.setattr(implementation.utils, "does_dir_exist", mock_does_dir_exist) + + catalog_handler = FileSystemCatalog(catalog_location="this_location") + + with pytest.raises(Exception): + catalog_handler.put("testing", run_id="dummy_run_id") + + mock_safe_make_dir.assert_called_once_with(implementation.Path("this_location") / "dummy_run_id") + + +def test_file_system_sync_between_runs_raises_exception_if_previous_catalog_does_not_exist(monkeypatch, mocker): + mock_safe_make_dir = mocker.MagicMock() + monkeypatch.setattr(implementation.utils, "safe_make_dir", mock_safe_make_dir) + + mock_does_dir_exist = mocker.MagicMock(return_value=False) + monkeypatch.setattr(implementation.utils, "does_dir_exist", mock_does_dir_exist) + + catalog_handler = FileSystemCatalog(catalog_location="this_location") + with pytest.raises(Exception): + catalog_handler.sync_between_runs("previous", "current") diff --git a/tests/magnus/extensions/catalog/test_k8s_pvc.py b/tests/magnus/extensions/catalog/test_k8s_pvc.py new file mode 100644 index 00000000..e951f357 --- /dev/null +++ b/tests/magnus/extensions/catalog/test_k8s_pvc.py @@ -0,0 +1,7 @@ +from magnus.extensions.catalog.k8s_pvc.implementation import K8sPVCatalog + + +def test_get_catalog_location_returns_location_relative_to_mount_path(): + test_catalog = K8sPVCatalog(catalog_location="test_location", mount_path="/here", persistent_volume_name="test") + + assert test_catalog.get_catalog_location() == "/here/test_location" diff --git a/tests/magnus/extensions/catalog/test_k8s_pvc_integration.py b/tests/magnus/extensions/catalog/test_k8s_pvc_integration.py new file mode 100644 index 00000000..243fc7b0 --- /dev/null +++ b/tests/magnus/extensions/catalog/test_k8s_pvc_integration.py @@ -0,0 +1,17 @@ +import pytest + +from magnus.extensions.catalog.k8s_pvc import integration + + +def test_k8s_pvc_errors_for_local(): + test_integration = integration.LocalCompute(executor="executor", integration_service="catalog") + + with pytest.raises(Exception, match="We can't use the local compute"): + test_integration.validate() + + +def test_k8s_pvc_errors_for_local_container(): + test_integration = integration.LocalContainerCompute(executor="executor", integration_service="catalog") + + with pytest.raises(Exception, match="We can't use the local-container compute"): + test_integration.validate() diff --git a/tests/magnus/extensions/executor/test_argo_executor.py b/tests/magnus/extensions/executor/test_argo_executor.py new file mode 100644 index 00000000..3298e46a --- /dev/null +++ b/tests/magnus/extensions/executor/test_argo_executor.py @@ -0,0 +1,106 @@ +import pytest + +from magnus.extensions.executor.argo import implementation + + +def test_secret_env_var_has_value_from_field(): + secret_env = implementation.SecretEnvVar( + environment_variable="test_env", secret_name="secret_name", secret_key="secret_key" + ) + + assert secret_env.environment_variable == "test_env" + assert secret_env.valueFrom == {"secretKeyRef": {"name": "secret_name", "key": "secret_key"}} + + +def test_secret_env_renders_properly(): + secret_env = implementation.SecretEnvVar( + environment_variable="test_env", secret_name="secret_name", secret_key="secret_key" + ) + assert secret_env.model_dump(by_alias=True) == { + "name": "test_env", + "valueFrom": {"secretKeyRef": {"name": "secret_name", "key": "secret_key"}}, + } + + +def test_retry_serialize_makes_limit_str(): + retry = implementation.Retry(limit=10) + assert retry.model_dump(by_alias=True)["limit"] == "10" + + +def test_limit_renders_gpu_when_available(): + limit = implementation.Limit(gpu=1) + + request = implementation.Request() + + assert limit.model_dump(by_alias=True, exclude_none=True) == {**request.model_dump(), "nvidia.com/gpu": "1"} + + +def test_limit_ignores_gpu_when_none(): + limit = implementation.Limit() + + request = implementation.Request() + + assert limit.model_dump(by_alias=True, exclude_none=True) == {**request.model_dump()} + + +def test_out_put_parameter_renders_properly(): + output_parameter = implementation.OutputParameter(name="test_name", value="test_value") + + assert output_parameter.model_dump(by_alias=True) == { + "name": "test_name", + "value": "test_value", + "valueFrom": {"path": "/tmp/output.txt"}, + } + + +def test_volume_renders_properly(): + volume = implementation.Volume(name="test_name", claim="test_claim", mount_path="mount here") + + assert volume.model_dump(by_alias=True, exclude_none=True) == { + "name": "test_name", + "persistentVolumeClaim": {"claimName": "test_claim"}, + } + + +def test_spec_reshapes_arguments(): + test_env1 = implementation.EnvVar(name="test_env1", value="test_value1") + test_env2 = implementation.EnvVar(name="test_env2", value="test_value2") + + spec = implementation.Spec(arguments=[test_env1, test_env2], active_deadline_seconds=10) + + assert spec.model_dump(by_alias=True, exclude_none=True)["arguments"] == { + "parameters": [{"name": "test_env1", "value": "test_value1"}, {"name": "test_env2", "value": "test_value2"}] + } + + +def test_spec_populates_container_volumes_and_persistent_volumes(): + volume1 = implementation.UserVolumeMounts(name="test_name1", mount_path="test_mount_path1") + volume2 = implementation.UserVolumeMounts(name="test_name2", mount_path="test_mount_path2") + + spec = implementation.Spec(persistent_volumes=[volume1, volume2], active_deadline_seconds=10) + + model_dump = spec.model_dump(by_alias=True, exclude_none=True) + + assert model_dump["volumes"] == [ + {"name": "executor-0", "persistentVolumeClaim": {"claimName": "test_name1"}}, + {"name": "executor-1", "persistentVolumeClaim": {"claimName": "test_name2"}}, + ] + + +def test_output_parameter_valuefrom_includes_path(): + test_out_put_parameter = implementation.OutputParameter(name="test_name", path="test_path") + + assert test_out_put_parameter.model_dump(by_alias=True, exclude_none=True) == { + "name": "test_name", + "valueFrom": {"path": "test_path"}, + } + + +def test_container_command_gets_split(): + test_container = implementation.Container(image="test_image", command="am I splitting?") + + assert test_container.model_dump(by_alias=True, exclude_none=True, exclude_unset=True)["command"] == [ + "am", + "I", + "splitting?", + ] diff --git a/tests/magnus/extensions/executor/test_generic_executor.py b/tests/magnus/extensions/executor/test_generic_executor.py new file mode 100644 index 00000000..f138f85b --- /dev/null +++ b/tests/magnus/extensions/executor/test_generic_executor.py @@ -0,0 +1,671 @@ +import pytest +import logging + +from magnus import defaults, exceptions +from magnus.extensions.executor import GenericExecutor +from magnus.extensions import executor +import magnus.extensions.executor as executor + + +@pytest.fixture(autouse=True) +def instantiable_base_class(monkeypatch, mocker): + monkeypatch.setattr(GenericExecutor, "__abstractmethods__", set()) + yield + + +@pytest.fixture +def mock_run_context(mocker, monkeypatch): + mock_run_context = mocker.Mock() + monkeypatch.setattr(executor.context, "run_context", mock_run_context) + return mock_run_context + + +def test_get_parameters_gets_parameters_from_parameters_file(mocker, monkeypatch, mock_run_context): + mock_run_context.parameters_file = "parameters_file" + mock_load_yaml = mocker.MagicMock(return_value={"executor": "test"}) + monkeypatch.setattr(executor.utils, "load_yaml", mock_load_yaml) + + test_executor = GenericExecutor() + assert test_executor._get_parameters() == {"executor": "test"} + mock_load_yaml.assert_called_once_with("parameters_file") + + +def test_get_parameters_gets_parameters_from_user_parameters(mocker, monkeypatch, mock_run_context): + mock_run_context.parameters_file = "" + monkeypatch.setattr( + executor.parameters, "get_user_set_parameters", mocker.MagicMock(return_value={"executor": "test"}) + ) + + test_executor = GenericExecutor() + assert test_executor._get_parameters() == {"executor": "test"} + + +def test_get_parameters_user_parameters_overwrites_parameters_from_parameters_file( + mocker, monkeypatch, mock_run_context +): + mock_run_context.parameters_file = "parameters_file" + + mock_load_yaml = mocker.MagicMock(return_value={"executor": "this"}) + monkeypatch.setattr(executor.utils, "load_yaml", mock_load_yaml) + monkeypatch.setattr( + executor.parameters, "get_user_set_parameters", mocker.MagicMock(return_value={"executor": "that"}) + ) + + test_executor = GenericExecutor() + assert test_executor._get_parameters() == {"executor": "that"} + + +def test_set_up_for_rerun_throws_exception_if_run_log_not_exists(mocker, monkeypatch, mock_run_context): + mock_run_log_store = mocker.MagicMock() + + mock_run_context.run_log_store = mock_run_log_store + mock_run_context.original_run_id = "original_run_id" + mock_run_log_store.get_run_log_by_id = mocker.MagicMock(side_effect=exceptions.RunLogNotFoundError("test")) + + with pytest.raises(Exception, match="Expected a run log with id: original_run_id"): + GenericExecutor()._set_up_for_re_run(parameters={}) + + +def test_set_up_for_re_run_syncs_catalog_and_parameters(mocker, monkeypatch, mock_run_context): + mock_catalog_handler_sync_between_runs = mocker.MagicMock() + mock_catalog_handler = mocker.MagicMock() + mock_catalog_handler.sync_between_runs = mock_catalog_handler_sync_between_runs + + mock_run_context.catalog_handler = mock_catalog_handler + mock_run_context.run_id = "run_id" + mock_run_context.original_run_id = "original_run_id" + + mock_attempt_run_log = mocker.MagicMock() + mock_attempt_run_log.parameters = {"ghost": "from past"} + + mock_run_log_store = mocker.MagicMock() + mock_run_log_store.get_run_log_by_id.return_value = mock_attempt_run_log + mock_run_context.run_log_store = mock_run_log_store + + parameters = {} + GenericExecutor()._set_up_for_re_run(parameters=parameters) + + mock_catalog_handler_sync_between_runs.assert_called_once_with(previous_run_id="original_run_id", run_id="run_id") + assert parameters == {"ghost": "from past"} + + +def test_set_up_for_re_run_syncs_catalog_and_updates_parameters(mocker, monkeypatch, mock_run_context): + mock_catalog_handler_sync_between_runs = mocker.MagicMock() + mock_catalog_handler = mocker.MagicMock() + mock_catalog_handler.sync_between_runs = mock_catalog_handler_sync_between_runs + + mock_run_context.catalog_handler = mock_catalog_handler + mock_run_context.run_id = "run_id" + mock_run_context.original_run_id = "original_run_id" + + mock_attempt_run_log = mocker.MagicMock() + mock_attempt_run_log.parameters = {"ghost": "from past"} + + mock_run_log_store = mocker.MagicMock() + mock_run_log_store.get_run_log_by_id.return_value = mock_attempt_run_log + mock_run_context.run_log_store = mock_run_log_store + + parameters = {"present": "now"} + GenericExecutor()._set_up_for_re_run(parameters=parameters) + + mock_catalog_handler_sync_between_runs.assert_called_once_with(previous_run_id="original_run_id", run_id="run_id") + assert parameters == {"present": "now", "ghost": "from past"} + + +def test_set_up_run_log_throws_exception_if_run_log_already_exists(mocker, monkeypatch, mock_run_context): + mock_run_log_store = mocker.MagicMock() + + mock_run_log_store.get_run_log_by_id = mocker.MagicMock(side_effect=exceptions.RunLogExistsError) + + with pytest.raises(exceptions.RunLogExistsError): + GenericExecutor()._set_up_run_log() + + +def test_set_up_run_log_exists_ok_returns_without_exception(mocker, monkeypatch, mock_run_context): + GenericExecutor()._set_up_run_log(exists_ok=True) + + +def test_set_up_run_log_calls_get_parameters(mocker, monkeypatch, mock_run_context): + mock_get_parameters = mocker.MagicMock() + monkeypatch.setattr(GenericExecutor, "_get_parameters", mock_get_parameters) + + mock_run_context.run_log_store.get_run_log_by_id = mocker.MagicMock( + side_effect=exceptions.RunLogNotFoundError("test") + ) + mock_run_context.use_cached = False + + GenericExecutor()._set_up_run_log() + + assert mock_get_parameters.call_count == 1 + + +def test_set_up_run_log_calls_set_up_for_re_run(mocker, monkeypatch, mock_run_context): + mock_set_up_for_re_run = mocker.MagicMock() + monkeypatch.setattr(GenericExecutor, "_set_up_for_re_run", mock_set_up_for_re_run) + + mock_get_parameters = mocker.MagicMock() + monkeypatch.setattr(GenericExecutor, "_get_parameters", mock_get_parameters) + + mock_run_context.run_log_store.get_run_log_by_id = mocker.MagicMock( + side_effect=exceptions.RunLogNotFoundError("test") + ) + + GenericExecutor()._set_up_run_log() + + assert mock_set_up_for_re_run.call_count == 1 + + +def test_set_up_run_log_calls_create_run_log(mocker, monkeypatch, mock_run_context): + mock_get_parameters = mocker.MagicMock() + monkeypatch.setattr(GenericExecutor, "_get_parameters", mock_get_parameters) + + mock_run_context.run_log_store.get_run_log_by_id = mocker.MagicMock( + side_effect=exceptions.RunLogNotFoundError("test") + ) + + mock_create_run_log = mocker.MagicMock() + mock_run_context.run_log_store.create_run_log = mock_create_run_log + + mock_run_context.run_id = "test" + mock_run_context.tag = "tag" + mock_run_context.dag_hash = "dag_hash" + mock_run_context.use_cached = False + mock_run_context.original_run_id = "original_run_id" + + GenericExecutor()._set_up_run_log() + + mock_create_run_log.assert_called_once_with( + run_id="test", + tag="tag", + status=defaults.PROCESSING, + dag_hash="dag_hash", + use_cached=False, + original_run_id="original_run_id", + ) + + +def test_set_up_run_log_store_calls_set_parameters(mocker, monkeypatch, mock_run_context): + mock_get_parameters = mocker.MagicMock() + monkeypatch.setattr(GenericExecutor, "_get_parameters", mock_get_parameters) + + mock_run_context.run_log_store.get_run_log_by_id = mocker.MagicMock( + side_effect=exceptions.RunLogNotFoundError("test") + ) + + mock_run_context.use_cached = False + mock_set_parameters = mocker.MagicMock() + mock_run_context.run_log_store.set_parameters = mock_set_parameters + + GenericExecutor()._set_up_run_log() + + assert mock_set_parameters.call_count == 1 + + +def test_set_up_run_log_store_calls_set_run_config(mocker, monkeypatch, mock_run_context): + mock_get_parameters = mocker.MagicMock() + monkeypatch.setattr(GenericExecutor, "_get_parameters", mock_get_parameters) + + mock_run_context.run_log_store.get_run_log_by_id = mocker.MagicMock( + side_effect=exceptions.RunLogNotFoundError("test") + ) + + mock_run_context.use_cached = False + mock_set_run_config = mocker.MagicMock() + mock_run_context.run_log_store.set_parameters = mock_set_run_config + + GenericExecutor()._set_up_run_log() + + assert mock_set_run_config.call_count == 1 + + +def test_base_executor_prepare_for_graph_execution_calls(mocker, monkeypatch, mock_run_context): + mock_integration = mocker.MagicMock() + mock_validate = mocker.MagicMock() + mock_configure_for_traversal = mocker.MagicMock() + + mock_integration.validate = mock_validate + mock_integration.configure_for_traversal = mock_configure_for_traversal + + mock_set_up_run_log = mocker.MagicMock() + monkeypatch.setattr(GenericExecutor, "_set_up_run_log", mock_set_up_run_log) + + monkeypatch.setattr(executor, "integration", mock_integration) + monkeypatch.setattr(executor.BaseExecutor, "_set_up_run_log", mocker.MagicMock()) + + base_executor = GenericExecutor() + + base_executor.prepare_for_graph_execution() + + assert mock_configure_for_traversal.call_count == 4 + assert mock_validate.call_count == 4 + + +def test_base_execution_prepare_for_node_calls(mocker, monkeypatch, mock_run_context): + mock_integration = mocker.MagicMock() + mock_validate = mocker.MagicMock() + mock_configure_for_execution = mocker.MagicMock() + + mock_integration.validate = mock_validate + mock_integration.configure_for_execution = mock_configure_for_execution + + mock_set_up_run_log = mocker.MagicMock() + monkeypatch.setattr(GenericExecutor, "_set_up_run_log", mock_set_up_run_log) + + monkeypatch.setattr(executor, "integration", mock_integration) + + base_executor = GenericExecutor() + + base_executor.prepare_for_node_execution() + + assert mock_configure_for_execution.call_count == 4 + assert mock_validate.call_count == 4 + + +def test_base_executor__sync_catalog_raises_exception_if_stage_not_in_get_or_put(mocker, monkeypatch): + test_executor = GenericExecutor() + with pytest.raises(Exception): + test_executor._sync_catalog(step_log="test", stage="puts") + + +def test_sync_catalog_does_nothing_for_terminal_node(mocker, monkeypatch, mock_run_context): + mock_node = mocker.MagicMock() + mock_node._get_catalog_settings = mocker.MagicMock(side_effect=exceptions.TerminalNodeError) + + test_executor = GenericExecutor() + test_executor._context_node = mock_node + + test_executor._sync_catalog("test", stage="get") + + +def test_sync_catalog_does_nothing_for_no_catalog_settings(mocker, monkeypatch, mock_run_context): + mock_node = mocker.MagicMock() + mock_node._get_catalog_settings = mocker.MagicMock(return_value={}) + + test_executor = GenericExecutor() + test_executor._context_node = mock_node + + test_executor._sync_catalog("test", stage="get") + + +def test_sync_catalog_does_nothing_for_catalog_settings_stage_not_in(mocker, monkeypatch, mock_run_context): + mock_node = mocker.MagicMock() + mock_node._get_catalog_settings = mocker.MagicMock(return_value={"get": "something"}) + + test_executor = GenericExecutor() + test_executor._context_node = mock_node + + test_executor._sync_catalog("test", stage="put") + + +def test_sync_catalog_returns_nothing_if_no_syncing_for_node(mocker, monkeypatch, mock_run_context): + mock_node = mocker.MagicMock() + + mock_node._get_catalog_settings.return_value = None + + test_executor = GenericExecutor() + test_executor._context_node = mock_node + + assert test_executor._sync_catalog("test", stage="get") is None + + +def test_sync_catalog_returns_empty_list_if_asked_nothing_in_stage(mocker, monkeypatch, mock_run_context): + mock_node = mocker.MagicMock() + mock_node._get_catalog_settings.return_value = {"get": [], "put": []} + + mock_get_effective_compute_folder = mocker.MagicMock(return_value="compute_folder") + monkeypatch.setattr(GenericExecutor, "get_effective_compute_data_folder", mock_get_effective_compute_folder) + + test_executor = GenericExecutor() + test_executor._context_node = mock_node + + assert test_executor._sync_catalog("test", stage="get") == [] + assert test_executor._sync_catalog("test", stage="put") == [] + + +def test_sync_catalog_calls_get_from_catalog_handler(mocker, monkeypatch, mock_run_context): + mock_node = mocker.MagicMock() + mock_node._get_catalog_settings.return_value = {"get": ["me"], "put": []} + mock_step_log = mocker.MagicMock() + + mock_get_effective_compute_folder = mocker.MagicMock(return_value="compute_folder") + monkeypatch.setattr(GenericExecutor, "get_effective_compute_data_folder", mock_get_effective_compute_folder) + + mock_catalog_handler_get = mocker.MagicMock() + mock_catalog_handler_get.return_value = ["data_catalog"] + mock_run_context.catalog_handler.get = mock_catalog_handler_get + mock_run_context.run_id = "run_id" + + test_executor = GenericExecutor() + test_executor._context_node = mock_node + + data_catalogs = test_executor._sync_catalog(step_log=mock_step_log, stage="get") + + assert data_catalogs == ["data_catalog"] + mock_catalog_handler_get.assert_called_once_with(name="me", run_id="run_id", compute_data_folder="compute_folder") + + +def test_sync_catalog_calls_get_from_catalog_handler_as_per_input(mocker, monkeypatch, mock_run_context): + mock_node = mocker.MagicMock() + mock_node._get_catalog_settings.return_value = {"get": ["me", "you"], "put": []} + mock_step_log = mocker.MagicMock() + + mock_get_effective_compute_folder = mocker.MagicMock(return_value="compute_folder") + monkeypatch.setattr(GenericExecutor, "get_effective_compute_data_folder", mock_get_effective_compute_folder) + + mock_catalog_handler_get = mocker.MagicMock() + mock_catalog_handler_get.return_value = ["data_catalog"] + mock_run_context.catalog_handler.get = mock_catalog_handler_get + mock_run_context.run_id = "run_id" + + test_executor = GenericExecutor() + test_executor._context_node = mock_node + + data_catalogs = test_executor._sync_catalog(step_log=mock_step_log, stage="get") + + assert data_catalogs == ["data_catalog", "data_catalog"] + assert mock_catalog_handler_get.call_count == 2 + + +def test_sync_catalog_calls_put_from_catalog_handler(mocker, monkeypatch, mock_run_context): + mock_node = mocker.MagicMock() + mock_node._get_catalog_settings.return_value = {"get": [], "put": ["me"]} + mock_step_log = mocker.MagicMock() + + mock_get_effective_compute_folder = mocker.MagicMock(return_value="compute_folder") + monkeypatch.setattr(GenericExecutor, "get_effective_compute_data_folder", mock_get_effective_compute_folder) + + mock_catalog_handler_put = mocker.MagicMock() + mock_catalog_handler_put.return_value = ["data_catalog"] + mock_run_context.catalog_handler.put = mock_catalog_handler_put + mock_run_context.run_id = "run_id" + + test_executor = GenericExecutor() + test_executor._context_node = mock_node + + data_catalogs = test_executor._sync_catalog(step_log=mock_step_log, stage="put") + + assert data_catalogs == ["data_catalog"] + mock_catalog_handler_put.assert_called_once_with( + name="me", run_id="run_id", compute_data_folder="compute_folder", synced_catalogs=None + ) + + +def test_sync_catalog_calls_put_from_catalog_handler_as_per_input(mocker, monkeypatch, mock_run_context): + mock_node = mocker.MagicMock() + mock_node._get_catalog_settings.return_value = {"get": [], "put": ["me", "you"]} + mock_step_log = mocker.MagicMock() + + mock_get_effective_compute_folder = mocker.MagicMock(return_value="compute_folder") + monkeypatch.setattr(GenericExecutor, "get_effective_compute_data_folder", mock_get_effective_compute_folder) + + mock_catalog_handler_put = mocker.MagicMock() + mock_catalog_handler_put.return_value = ["data_catalog"] + mock_run_context.catalog_handler.put = mock_catalog_handler_put + mock_run_context.run_id = "run_id" + + test_executor = GenericExecutor() + test_executor._context_node = mock_node + + data_catalogs = test_executor._sync_catalog(step_log=mock_step_log, stage="put") + + assert data_catalogs == ["data_catalog", "data_catalog"] + assert mock_catalog_handler_put.call_count == 2 + + +def test_sync_catalog_calls_put_sends_synced_catalogs_to_catalog_handler(mocker, monkeypatch, mock_run_context): + mock_node = mocker.MagicMock() + mock_node._get_catalog_settings.return_value = {"get": [], "put": ["me"]} + mock_step_log = mocker.MagicMock() + + mock_get_effective_compute_folder = mocker.MagicMock(return_value="compute_folder") + monkeypatch.setattr(GenericExecutor, "get_effective_compute_data_folder", mock_get_effective_compute_folder) + + mock_catalog_handler_put = mocker.MagicMock() + mock_catalog_handler_put.return_value = ["data_catalog"] + mock_run_context.catalog_handler.put = mock_catalog_handler_put + mock_run_context.run_id = "run_id" + + test_executor = GenericExecutor() + test_executor._context_node = mock_node + + data_catalogs = test_executor._sync_catalog(step_log=mock_step_log, stage="put", synced_catalogs="in_sync") + + assert data_catalogs == ["data_catalog"] + mock_catalog_handler_put.assert_called_once_with( + name="me", run_id="run_id", compute_data_folder="compute_folder", synced_catalogs="in_sync" + ) + + +def test_get_effective_compute_data_folder_returns_default(mocker, mock_run_context): + mock_run_context.catalog_handler.compute_data_folder = "default" + + mock_node = mocker.MagicMock() + mock_node._get_catalog_settings.return_value = {} + + test_executor = GenericExecutor() + test_executor._context_node = mock_node + + assert test_executor.get_effective_compute_data_folder() == "default" + + +def test_get_effective_compute_data_folder_returns_from_node_settings(mocker, mock_run_context): + mock_run_context.catalog_handler.compute_data_folder = "default" + + mock_node = mocker.MagicMock() + mock_node._get_catalog_settings.return_value = {"compute_data_folder": "not_default"} + + test_executor = GenericExecutor() + test_executor._context_node = mock_node + + assert test_executor.get_effective_compute_data_folder() == "not_default" + + +def test_step_attempt_returns_one_by_default(): + test_executor = GenericExecutor() + + assert test_executor.step_attempt_number == 1 + + +def test_step_attempt_returns_from_env(monkeypatch): + test_executor = GenericExecutor() + + monkeypatch.setenv("MAGNUS_STEP_ATTEMPT", "2") + + assert test_executor.step_attempt_number == 2 + + +def test_base_executor__is_step_eligible_for_rerun_returns_true_if_not_use_cached(mock_run_context): + test_executor = GenericExecutor() + + mock_run_context.use_cached = False + + assert test_executor._is_step_eligible_for_rerun(node=None) + + +def test_base_executor__is_step_eligible_for_rerun_returns_true_if_step_log_not_found(mocker, mock_run_context): + mock_run_context.use_cached = True + + mock_node = mocker.MagicMock() + mock_node._get_step_log_name.return_value = "IdontExist" + + mock_run_context.run_log_store.get_step_log.side_effect = exceptions.StepLogNotFoundError( + run_id="test", name="test" + ) + + test_executor = GenericExecutor() + + assert test_executor._is_step_eligible_for_rerun(node=mock_node) + + +def test_base_executor__is_step_eligible_for_rerun_returns_true_if_step_failed(mocker, mock_run_context): + mock_run_context.use_cached = True + + mock_node = mocker.MagicMock() + mock_node._get_step_log_name.return_value = "IExist" + + mock_run_context.run_log_store.get_step_log.return_value.status = defaults.FAIL + + test_executor = GenericExecutor() + + assert test_executor._is_step_eligible_for_rerun(node=mock_node) is True + + +def test_base_executor__is_step_eligible_for_rerun_returns_false_if_step_succeeded(mocker, mock_run_context): + mock_run_context.use_cached = True + + mock_node = mocker.MagicMock() + mock_node._get_step_log_name.return_value = "IExist" + + mock_run_context.run_log_store.get_step_log.return_value.status = defaults.SUCCESS + + test_executor = GenericExecutor() + + assert test_executor._is_step_eligible_for_rerun(node=mock_node) is False + + +def test_base_executor_resolve_executor_config_gives_global_config_if_node_does_not_override(mocker, mock_run_context): + mock_node = mocker.MagicMock() + mock_node._get_executor_config.return_value = {} + + mock_run_context.variables = {} + + test_executor = GenericExecutor() + + assert test_executor._resolve_executor_config(mock_node) == {**test_executor.model_dump()} + + +def test_get_status_and_next_node_name_returns_empty_for_terminal_node(mocker, monkeypatch, mock_run_context): + mock_node = mocker.MagicMock() + mock_node._get_next_node = mocker.MagicMock(side_effect=exceptions.TerminalNodeError) + + mock_step_log = mocker.MagicMock() + mock_step_log.status = defaults.SUCCESS + mock_run_context.run_log_store.get_step_log.return_value = mock_step_log + + test_executor = GenericExecutor() + + assert test_executor._get_status_and_next_node_name(mock_node, "dag") == (defaults.SUCCESS, "") + + +def test_get_status_and_next_node_name_returns_next_node_if_success(mocker, monkeypatch, mock_run_context): + mock_node = mocker.MagicMock() + mock_node._get_next_node.return_value = "next_node" + + mock_step_log = mocker.MagicMock() + mock_step_log.status = defaults.SUCCESS + mock_run_context.run_log_store.get_step_log.return_value = mock_step_log + + test_executor = GenericExecutor() + + assert test_executor._get_status_and_next_node_name(mock_node, "dag") == (defaults.SUCCESS, "next_node") + + +def test_get_status_and_next_node_name_returns_terminal_node_in_case_of_failure(mocker, monkeypatch, mock_run_context): + mock_node = mocker.MagicMock() + mock_node._get_next_node.return_value = "next_node" + mock_node._get_on_failure_node.return_value = "" + + mock_run_context.run_log_store.get_step_log.return_value.status = defaults.FAIL + + mock_dag = mocker.MagicMock() + mock_dag.get_fail_node.return_value.name = "fail_node" + + test_executor = GenericExecutor() + + assert test_executor._get_status_and_next_node_name(mock_node, mock_dag) == (defaults.FAIL, "fail_node") + + +def test_get_status_and_next_node_name_returns_on_failure_node_if_failed(mocker, monkeypatch, mock_run_context): + mock_node = mocker.MagicMock() + mock_node._get_next_node.return_value = "next_node" + mock_node._get_on_failure_node.return_value = "me_please" + + mock_run_context.run_log_store.get_step_log.return_value.status = defaults.FAIL + + mock_dag = mocker.MagicMock() + mock_dag.get_fail_node.return_value.name = "fail_node" + + test_executor = GenericExecutor() + + assert test_executor._get_status_and_next_node_name(mock_node, mock_dag) == (defaults.FAIL, "me_please") + + +def test_execute_node_calls_store_parameter_with_update_false(mocker, monkeypatch, mock_run_context): + mock_parameters = mocker.MagicMock() + monkeypatch.setattr(executor, "parameters", mock_parameters) + + mock_run_context.run_log_store.get_parameters.return_value = {"a": 1} + + test_executor = GenericExecutor() + test_executor._sync_catalog = mocker.MagicMock() + + mock_node = mocker.MagicMock() + test_executor._execute_node(mock_node) + + args, kwargs = mock_parameters.set_user_defined_params_as_environment_variables.call_args + assert args[0] == {"a": 1} + + +def test_execute_node_raises_exception_if_node_execute_raises_one(mocker, monkeypatch, mock_run_context, caplog): + mock_run_context.run_log_store.get_parameters.return_value = {"a": 1} + test_executor = GenericExecutor() + test_executor._sync_catalog = mocker.MagicMock() + + mock_node = mocker.MagicMock() + mock_node.execute.side_effect = Exception() + + with caplog.at_level(logging.ERROR, logger="magnus") and pytest.raises(Exception): + test_executor._execute_node(mock_node) + + assert "This is clearly magnus fault, " in caplog.text + + +def test_execute_node_sets_step_log_status_to_fail_if_node_fails(mocker, monkeypatch, mock_run_context): + mock_step_log = mocker.MagicMock() + mock_run_context.run_log_store.get_step_log.return_value = mock_step_log + mock_run_context.run_log_store.get_parameters.return_value = {"a": 1} + + mock_attempt_log = mocker.MagicMock() + mock_attempt_log.status = defaults.FAIL + + mock_node = mocker.MagicMock() + mock_node.execute.return_value = mock_attempt_log + + test_executor = GenericExecutor() + test_executor._sync_catalog = mocker.MagicMock() + + test_executor._execute_node(mock_node) + + assert mock_step_log.status == defaults.FAIL + + +def test_execute_node_sets_step_log_status_to_success_if_node_succeeds(mocker, monkeypatch, mock_run_context): + mock_step_log = mocker.MagicMock() + mock_run_context.run_log_store.get_step_log.return_value = mock_step_log + mock_run_context.run_log_store.get_parameters.return_value = {"a": 1} + + mock_node = mocker.MagicMock() + mock_node.execute.return_value.status = defaults.SUCCESS + + test_executor = GenericExecutor() + test_executor._sync_catalog = mocker.MagicMock() + + test_executor._execute_node(mock_node) + + assert mock_step_log.status == defaults.SUCCESS + + +def test_send_return_code_raises_exception_if_pipeline_execution_failed(mocker, mock_run_context): + mock_run_context.run_log_store.get_run_log_by_id.return_value.status = defaults.FAIL + + test_executor = GenericExecutor() + + with pytest.raises(exceptions.ExecutionFailedError): + test_executor.send_return_code() + + +def test_send_return_code_does_not_raise_exception_if_pipeline_execution_succeeded(mocker, mock_run_context): + mock_run_context.run_log_store.get_run_log_by_id.return_value.status = defaults.SUCCESS + + test_executor = GenericExecutor() + test_executor.send_return_code() diff --git a/tests/magnus/extensions/executor/test_local_container_integration.py b/tests/magnus/extensions/executor/test_local_container_integration.py new file mode 100644 index 00000000..6b87e7b9 --- /dev/null +++ b/tests/magnus/extensions/executor/test_local_container_integration.py @@ -0,0 +1,35 @@ +from pathlib import Path + +from magnus.extensions.executor.local_container import implementation + + +def test_configure_for_traversal_populates_volumes(mocker, monkeypatch): + mock_local_container = mocker.MagicMock() + monkeypatch.setattr(implementation, "LocalContainerComputeFileSystemRunLogstore", mock_local_container) + + mock_executor = mocker.MagicMock() + mock_executor._volumes = {} + mock_executor._container_catalog_location = "this_location" + + mock_fs_catalog = mocker.MagicMock() + mock_fs_catalog.catalog_location = "catalog_location" + + test_integration = implementation.LocalContainerComputeFileSystemCatalog(mock_executor, mock_fs_catalog) + test_integration.configure_for_traversal() + + assert mock_executor._volumes == {str(Path("catalog_location").resolve()): {"bind": "this_location", "mode": "rw"}} + + +def test_configure_for_execution_assigns_catalog_location_within_container(mocker, monkeypatch): + mock_local_container = mocker.MagicMock() + monkeypatch.setattr(implementation, "LocalContainerComputeFileSystemRunLogstore", mock_local_container) + + mock_executor = mocker.MagicMock() + mock_executor._container_catalog_location = "this_location" + + mock_fs_catalog = mocker.MagicMock() + + test_integration = implementation.LocalContainerComputeFileSystemCatalog(mock_executor, mock_fs_catalog) + test_integration.configure_for_execution() + + assert mock_fs_catalog.catalog_location == "this_location" diff --git a/tests/magnus/extensions/executor/test_local_executor.py b/tests/magnus/extensions/executor/test_local_executor.py new file mode 100644 index 00000000..d451dc30 --- /dev/null +++ b/tests/magnus/extensions/executor/test_local_executor.py @@ -0,0 +1,14 @@ +from magnus.extensions.executor.local.implementation import LocalExecutor + + +def test_local_executor_execute_node_just_calls___execute_node(mocker, monkeypatch): + mock__execute_node = mocker.MagicMock() + + monkeypatch.setattr(LocalExecutor, "_execute_node", mock__execute_node) + executor = LocalExecutor() + + mock_node = mocker.MagicMock() + + executor.execute_node(mock_node) + + assert mock__execute_node.call_count == 1 diff --git a/tests/magnus/extensions/experiment_tracker/test_mlflow.py b/tests/magnus/extensions/experiment_tracker/test_mlflow.py new file mode 100644 index 00000000..f972af4e --- /dev/null +++ b/tests/magnus/extensions/experiment_tracker/test_mlflow.py @@ -0,0 +1 @@ +from magnus.extensions.experiment_tracker.mlflow import implementation diff --git a/tests/magnus/extensions/run_log_store/__init__.py b/tests/magnus/extensions/run_log_store/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/magnus/extensions/run_log_store/test_chunked_k8s_pvc_integration.py b/tests/magnus/extensions/run_log_store/test_chunked_k8s_pvc_integration.py new file mode 100644 index 00000000..8b197c9f --- /dev/null +++ b/tests/magnus/extensions/run_log_store/test_chunked_k8s_pvc_integration.py @@ -0,0 +1,17 @@ +import pytest + +from magnus.extensions.run_log_store.chunked_k8s_pvc import integration + + +def test_k8s_pvc_errors_for_local(): + test_integration = integration.LocalCompute(executor="executor", integration_service="catalog") + + with pytest.raises(Exception, match="We can't use the local compute"): + test_integration.validate() + + +def test_k8s_pvc_errors_for_local_container(): + test_integration = integration.LocalContainerCompute(executor="executor", integration_service="catalog") + + with pytest.raises(Exception, match="We can't use the local-container compute"): + test_integration.validate() diff --git a/tests/magnus/extensions/run_log_store/test_file_system.py b/tests/magnus/extensions/run_log_store/test_file_system.py new file mode 100644 index 00000000..18c476e1 --- /dev/null +++ b/tests/magnus/extensions/run_log_store/test_file_system.py @@ -0,0 +1,130 @@ +import pytest + +from magnus.extensions.run_log_store.file_system.implementation import FileSystemRunLogstore +import magnus.extensions.run_log_store.file_system.implementation as implementation +from magnus import defaults +from magnus import exceptions + + +def test_file_system_run_log_store_log_folder_name_defaults_if_not_provided(): + run_log_store = FileSystemRunLogstore() + + assert run_log_store.log_folder_name == defaults.LOG_LOCATION_FOLDER + + +def test_file_system_run_log_store_log_folder_name_if__provided(): + run_log_store = FileSystemRunLogstore(log_folder="test") + + assert run_log_store.log_folder_name == "test" + + +def test_file_system_run_log_store_write_to_folder_makes_dir_if_not_present(mocker, monkeypatch): + mock_safe_make_dir = mocker.MagicMock() + monkeypatch.setattr(implementation.utils, "safe_make_dir", mock_safe_make_dir) + + mock_json = mocker.MagicMock() + mock_path = mocker.MagicMock() + monkeypatch.setattr(implementation, "json", mock_json) + monkeypatch.setattr(implementation, "Path", mock_path) + + mock_run_log = mocker.MagicMock() + mock_dict = mocker.MagicMock() + mock_run_log.model_dump = mock_dict + + run_log_store = FileSystemRunLogstore() + run_log_store.write_to_folder(run_log=mock_run_log) + + mock_safe_make_dir.assert_called_once_with(run_log_store.log_folder_name) + assert mock_dict.call_count == 1 + + +def test_file_system_run_log_store_get_from_folder_raises_exception_if_folder_not_present(mocker, monkeypatch): + mock_path = mocker.MagicMock() + monkeypatch.setattr(implementation, "Path", mocker.MagicMock(return_value=mock_path)) + + mock_path.__truediv__.return_value = mock_path + + mock_path.exists.return_value = False + + run_log_store = implementation.FileSystemRunLogstore() + + with pytest.raises(FileNotFoundError): + run_log_store.get_from_folder(run_id="test") + + +def test_file_system_run_log_store_get_from_folder_returns_run_log_from_file_contents(mocker, monkeypatch): + mock_path = mocker.MagicMock() + monkeypatch.setattr(implementation, "Path", mocker.MagicMock(return_value=mock_path)) + + mock_path.__truediv__.return_value = mock_path + mock_path.exists.return_value = True + + mock_json = mocker.MagicMock() + monkeypatch.setattr(implementation, "json", mock_json) + mock_json.load.return_value = {"run_id": "test"} + + run_log_store = implementation.FileSystemRunLogstore() + run_log = run_log_store.get_from_folder(run_id="does not matter") + + assert run_log.run_id == "test" + + +def test_file_system_run_log_store_create_run_log_writes_to_folder(mocker, monkeypatch): + mock_write_to_folder = mocker.MagicMock() + + monkeypatch.setattr(implementation.FileSystemRunLogstore, "write_to_folder", mock_write_to_folder) + + run_log_store = implementation.FileSystemRunLogstore() + run_log = run_log_store.create_run_log(run_id="test random") + + mock_write_to_folder.assert_called_once_with(run_log) + + assert run_log.run_id == "test random" + + +def test_file_system_run_log_store_create_run_log_raises_exception_if_present(mocker, monkeypatch): + mock_write_to_folder = mocker.MagicMock() + mock_get_run_log_by_id = mocker.MagicMock(return_value="existing") + + monkeypatch.setattr(implementation.FileSystemRunLogstore, "write_to_folder", mock_write_to_folder) + monkeypatch.setattr(implementation.FileSystemRunLogstore, "get_run_log_by_id", mock_get_run_log_by_id) + + run_log_store = implementation.FileSystemRunLogstore() + with pytest.raises(exceptions.RunLogExistsError): + run_log_store.create_run_log(run_id="test random") + + +def test_file_system_run_log_store_get_run_log_by_id_raises_exception_if_get_from_folder_fails(mocker, monkeypatch): + mock_get_from_folder = mocker.MagicMock() + mock_get_from_folder.side_effect = FileNotFoundError() + + monkeypatch.setattr(implementation.FileSystemRunLogstore, "get_from_folder", mock_get_from_folder) + + run_log_store = implementation.FileSystemRunLogstore() + with pytest.raises(exceptions.RunLogNotFoundError): + run_log_store.get_run_log_by_id(run_id="should fail") + + +def test_file_system_run_log_store_get_run_log_by_id_returns_run_log_from_get_from_folder(mocker, monkeypatch): + mock_get_from_folder = mocker.MagicMock() + mock_get_from_folder.return_value = "I am a run log" + + monkeypatch.setattr(implementation.FileSystemRunLogstore, "get_from_folder", mock_get_from_folder) + + run_log_store = implementation.FileSystemRunLogstore() + + run_log = run_log_store.get_run_log_by_id(run_id="test") + + assert run_log == "I am a run log" + + +def test_file_system_run_log_store_put_run_log_writes_to_folder(mocker, monkeypatch): + mock_write_to_folder = mocker.MagicMock() + + monkeypatch.setattr(implementation.FileSystemRunLogstore, "write_to_folder", mock_write_to_folder) + + run_log_store = implementation.FileSystemRunLogstore() + mock_run_log = mocker.MagicMock() + run_log_store.put_run_log(run_log=mock_run_log) + + mock_write_to_folder.assert_called_once_with(mock_run_log) diff --git a/tests/magnus/extensions/run_log_store/test_generic_chunked.py b/tests/magnus/extensions/run_log_store/test_generic_chunked.py new file mode 100644 index 00000000..3f4d4335 --- /dev/null +++ b/tests/magnus/extensions/run_log_store/test_generic_chunked.py @@ -0,0 +1 @@ +from magnus.extensions.run_log_store import generic_chunked diff --git a/tests/magnus/extensions/run_log_store/test_k8s_pvc_integration.py b/tests/magnus/extensions/run_log_store/test_k8s_pvc_integration.py new file mode 100644 index 00000000..a1c11cb7 --- /dev/null +++ b/tests/magnus/extensions/run_log_store/test_k8s_pvc_integration.py @@ -0,0 +1,17 @@ +import pytest + +from magnus.extensions.run_log_store.k8s_pvc import integration + + +def test_k8s_pvc_errors_for_local(): + test_integration = integration.LocalCompute(executor="executor", integration_service="catalog") + + with pytest.raises(Exception, match="We can't use the local compute"): + test_integration.validate() + + +def test_k8s_pvc_errors_for_local_container(): + test_integration = integration.LocalContainerCompute(executor="executor", integration_service="catalog") + + with pytest.raises(Exception, match="We can't use the local-container compute"): + test_integration.validate() diff --git a/tests/magnus/extensions/secrets/test_dotenv.py b/tests/magnus/extensions/secrets/test_dotenv.py new file mode 100644 index 00000000..5525c704 --- /dev/null +++ b/tests/magnus/extensions/secrets/test_dotenv.py @@ -0,0 +1,72 @@ +import pytest + +from magnus import defaults, exceptions + +from magnus.extensions.secrets.dotenv.implementation import DotEnvSecrets +import magnus.extensions.secrets.dotenv.implementation as implementation + + +def test_dot_env_secrets_defaults_to_default_location_if_none_provided(mocker, monkeypatch): + dot_env_secret = DotEnvSecrets() + assert dot_env_secret.secrets_location == defaults.DOTENV_FILE_LOCATION + + +def test_dot_env_secrets_usees_location_if_provided(mocker, monkeypatch): + dot_env_secret = DotEnvSecrets(location="here") + assert dot_env_secret.location == "here" + + +def test_dot_env_secrets_get_returns_secret_if_one_provided(mocker, monkeypatch): + dot_env_secret = DotEnvSecrets(location="here") + dot_env_secret.secrets["give"] = "this" + + assert dot_env_secret.get("give") == "this" + + +def test_dot_env_secrets_raises_exception_if_secret_not_found(mocker, monkeypatch): + monkeypatch.setattr(DotEnvSecrets, "_load_secrets", mocker.MagicMock()) + + dot_env_secret = DotEnvSecrets(location="here") + dot_env_secret.secrets["give"] = "this" + + with pytest.raises(expected_exception=exceptions.SecretNotFoundError): + dot_env_secret.get("give1") + + +def test_dot_env_load_secrets_raises_exception_if_file_does_not_exist(mocker, monkeypatch): + monkeypatch.setattr(implementation.utils, "does_file_exist", mocker.MagicMock(return_value=False)) + + dot_env_secret = DotEnvSecrets(location="here") + + with pytest.raises(Exception, match="Did not find the secrets file"): + dot_env_secret._load_secrets() + + +def test_dot_env_load_secrets_raises_exception_if_secret_formatting_is_invalid(mocker, monkeypatch): + monkeypatch.setattr(implementation.utils, "does_file_exist", mocker.MagicMock(return_value=True)) + + dot_env_secret = DotEnvSecrets(location="here") + + with pytest.raises(Exception, match="A secret should be of format, secret_name=secret_value"): + mocker.patch("builtins.open", mocker.mock_open(read_data="data")) + dot_env_secret._load_secrets() + + +def test_dot_env_load_secrets_raises_exception_if_secret_formatting_is_invalid_ge_2(mocker, monkeypatch): + monkeypatch.setattr(implementation.utils, "does_file_exist", mocker.MagicMock(return_value=True)) + + dot_env_secret = DotEnvSecrets(location="here") + + with pytest.raises(Exception, match="A secret should be of format, secret_name=secret_value"): + mocker.patch("builtins.open", mocker.mock_open(read_data="data=data1=")) + dot_env_secret._load_secrets() + + +def test_dot_env_load_secrets_populates_correct_secrets_if_valid(mocker, monkeypatch): + monkeypatch.setattr(implementation.utils, "does_file_exist", mocker.MagicMock(return_value=True)) + + dot_env_secret = DotEnvSecrets(location="here") + + mocker.patch("builtins.open", mocker.mock_open(read_data="data=data1\n")) + dot_env_secret._load_secrets() + assert dot_env_secret.secrets == {"data": "data1"} diff --git a/tests/magnus/extensions/secrets/test_env_secrets_manager.py b/tests/magnus/extensions/secrets/test_env_secrets_manager.py new file mode 100644 index 00000000..ffb40ba7 --- /dev/null +++ b/tests/magnus/extensions/secrets/test_env_secrets_manager.py @@ -0,0 +1,48 @@ +import pytest +import os + +from magnus.extensions.secrets.env_secrets.implementation import EnvSecretsManager +from magnus import exceptions + + +def test_env_secrets_manager_raises_error_if_name_provided_and_not_present(): + manager = EnvSecretsManager() + + with pytest.raises(exceptions.SecretNotFoundError): + manager.get("environment") + + +def test_env_secrets_returns_secret_if_present_in_environment(monkeypatch): + monkeypatch.setenv("TEST_SECRET", "test_secret") + + manager = EnvSecretsManager() + assert manager.get("TEST_SECRET") == "test_secret" + + +def test_env_secrets_returns_secret_if_present_in_environment_with_prefix(monkeypatch): + monkeypatch.setenv("PREFIX_TEST_SECRET", "test_secret") + + manager = EnvSecretsManager(prefix="PREFIX_") + assert manager.get("TEST_SECRET") == "test_secret" + + +def test_env_secrets_returns_secret_if_present_in_environment_with_suffix(monkeypatch): + monkeypatch.setenv("TEST_SECRET_SUFFIX", "test_secret") + + manager = EnvSecretsManager(suffix="_SUFFIX") + assert manager.get("TEST_SECRET") == "test_secret" + + +def test_env_secrets_returns_secret_if_present_in_environment_with_suffix_and_prefix(monkeypatch): + monkeypatch.setenv("PREFIX_TEST_SECRET_SUFFIX", "test_secret") + + manager = EnvSecretsManager(suffix="_SUFFIX", prefix="PREFIX_") + assert manager.get("TEST_SECRET") == "test_secret" + + +def test_env_secrets_returns_matched_secrets_with_suffix(monkeypatch): + monkeypatch.setenv("TEST_SECRET_SUFFIX", "test_secret") + + manager = EnvSecretsManager(suffix="_SUFFIX") + + assert manager.get("TEST_SECRET") == "test_secret" diff --git a/tests/magnus/extensions/test_node_extensions.py b/tests/magnus/extensions/test_node_extensions.py new file mode 100644 index 00000000..67d3ebee --- /dev/null +++ b/tests/magnus/extensions/test_node_extensions.py @@ -0,0 +1,344 @@ +import pytest + +from magnus import defaults +from magnus.extensions import nodes as nodes + +from magnus.tasks import BaseTaskType + + +@pytest.fixture(autouse=True) +def instantiable_base_class(monkeypatch): + monkeypatch.setattr(BaseTaskType, "__abstractmethods__", set()) + yield + + +def test_task_node_parse_from_config_seperates_task_from_node_confifg(mocker, monkeypatch): + base_task = BaseTaskType(node_name="test", task_type="dummy") + mock_create_task = mocker.MagicMock(return_value=base_task) + + command_config = {"to_be_sent_to_task": "yes"} + node_config = { + "name": "test", + "node_type": "task", + "internal_name": "test", + "next_node": "next_node", + } + monkeypatch.setattr(nodes, "create_task", mock_create_task) + task_node = nodes.TaskNode.parse_from_config({**node_config, **command_config}) + + command_config["node_name"] = "test" + + mock_create_task.assert_called_once_with(command_config) + assert task_node.executable == base_task + + +def test_task_node_mocks_if_mock_is_true(mocker, monkeypatch): + mock_attempt_log = mocker.MagicMock() + mock_context = mocker.MagicMock() + + monkeypatch.setattr(nodes.TaskNode, "_context", mock_context) + mock_context.run_log_store.create_attempt_log = mocker.MagicMock(return_value=mock_attempt_log) + + base_task = BaseTaskType(node_name="test", task_type="dummy") + task_node = nodes.TaskNode(name="test", internal_name="test", next_node="next_node", executable=base_task) + + attempt_log = task_node.execute(mock=True) + + assert attempt_log.status == defaults.SUCCESS + + +def test_task_node_sets_attempt_log_fail_in_exception_of_execution(mocker, monkeypatch): + mock_attempt_log = mocker.MagicMock() + mock_context = mocker.MagicMock() + + monkeypatch.setattr(nodes.TaskNode, "_context", mock_context) + mock_context.run_log_store.create_attempt_log = mocker.MagicMock(return_value=mock_attempt_log) + + monkeypatch.setattr(BaseTaskType, "execute_command", mocker.MagicMock(side_effect=Exception())) + base_task = BaseTaskType(node_name="test", task_type="dummy") + + task_node = nodes.TaskNode(name="test", internal_name="test", next_node="next_node", executable=base_task) + + task_node.execute() + + assert mock_attempt_log.status == defaults.FAIL + + +def test_task_node_sets_attempt_log_success_in_no_exception_of_execution(mocker, monkeypatch): + mock_attempt_log = mocker.MagicMock() + mock_context = mocker.MagicMock() + + monkeypatch.setattr(nodes.TaskNode, "_context", mock_context) + mock_context.run_log_store.create_attempt_log = mocker.MagicMock(return_value=mock_attempt_log) + + monkeypatch.setattr(BaseTaskType, "execute_command", mocker.MagicMock()) + base_task = BaseTaskType(node_name="test", task_type="dummy") + task_node = nodes.TaskNode(name="test", internal_name="test", next_node="next_node", executable=base_task) + + task_node.execute() + + assert mock_attempt_log.status == defaults.SUCCESS + + +def test_fail_node_sets_branch_log_fail(mocker, monkeypatch): + mock_attempt_log = mocker.MagicMock() + mock_branch_log = mocker.MagicMock() + mock_context = mocker.MagicMock() + + monkeypatch.setattr(nodes.FailNode, "_context", mock_context) + + mock_context.run_log_store.create_attempt_log = mocker.MagicMock(return_value=mock_attempt_log) + mock_context.run_log_store.get_branch_log = mocker.MagicMock(return_value=mock_branch_log) + + node = nodes.FailNode(name="test", internal_name="test") + + node.execute() + + assert mock_attempt_log.status == defaults.SUCCESS + assert mock_branch_log.status == defaults.FAIL + + +def test_fail_node_sets_attempt_log_success_even_in_exception(mocker, monkeypatch): + mock_attempt_log = mocker.MagicMock() + mock_context = mocker.MagicMock() + + monkeypatch.setattr(nodes.FailNode, "_context", mock_context) + + mock_context.run_log_store.create_attempt_log = mocker.MagicMock(return_value=mock_attempt_log) + mock_context.run_log_store.get_branch_log = mocker.MagicMock(side_effect=Exception()) + + node = nodes.FailNode(name="test", internal_name="test") + + node.execute() + + assert mock_attempt_log.status == defaults.SUCCESS + + +def test_success_node_sets_branch_log_success(mocker, monkeypatch): + mock_attempt_log = mocker.MagicMock() + mock_branch_log = mocker.MagicMock() + mock_context = mocker.MagicMock() + + monkeypatch.setattr(nodes.SuccessNode, "_context", mock_context) + + mock_context.run_log_store.create_attempt_log = mocker.MagicMock(return_value=mock_attempt_log) + mock_context.run_log_store.get_branch_log = mocker.MagicMock(return_value=mock_branch_log) + + node = nodes.SuccessNode(name="test", internal_name="test") + + node.execute() + + assert mock_attempt_log.status == defaults.SUCCESS + assert mock_branch_log.status == defaults.SUCCESS + + +def test_success_node_sets_attempt_log_success_even_in_exception(mocker, monkeypatch): + mock_attempt_log = mocker.MagicMock() + mock_branch_log = mocker.MagicMock() + mock_context = mocker.MagicMock() + + monkeypatch.setattr(nodes.SuccessNode, "_context", mock_context) + + mock_context.run_log_store.create_attempt_log = mocker.MagicMock(return_value=mock_attempt_log) + mock_context.run_log_store.get_branch_log = mocker.MagicMock(side_effect=Exception()) + + node = nodes.SuccessNode(name="test", internal_name="test") + + node.execute() + + assert mock_attempt_log.status == defaults.SUCCESS + + +def test_parallel_node_parse_from_config_creates_sub_graph(mocker, monkeypatch): + graph = nodes.Graph(start_at="first", name="first_branch") + + mock_create_graph = mocker.MagicMock(return_value=graph) + config = { + "branches": {"first": {"name": "first"}, "second": {"name": "second"}}, + "next_node": "next_node", + "name": "test", + "internal_name": "parent", + } + monkeypatch.setattr(nodes, "create_graph", mock_create_graph) + + parallel_node = nodes.ParallelNode.parse_from_config(config=config) + assert mock_create_graph.call_count == 2 + assert len(parallel_node.branches.items()) == 2 + + for name, branch in parallel_node.branches.items(): + assert name == "parent.first" or name == "parent.second" + assert branch == graph + + +def test_parallel_node_parse_from_config_raises_exception_if_no_branches(mocker, monkeypatch): + config = { + "branches": {}, + "next_node": "next_node", + "name": "test", + "internal_name": "parent", + } + with pytest.raises(Exception, match="A parallel node should have branches"): + _ = nodes.ParallelNode.parse_from_config(config=config) + + +def test_map_node_parse_from_config_raises_exception_if_no_branch(mocker, monkeypatch): + config = {} + with pytest.raises(Exception, match="A map node should have a branch"): + _ = nodes.MapNode.parse_from_config(config=config) + + +def test_map_node_parse_from_config_calls_create_graph(mocker, monkeypatch): + graph = nodes.Graph(start_at="first", name="first_branch") + + mock_create_graph = mocker.MagicMock(return_value=graph) + config = { + "branch": {"name": "test"}, + "next_node": "next_node", + "name": "test", + "internal_name": "parent", + "iterate_on": "me", + "iterate_as": "you", + } + monkeypatch.setattr(nodes, "create_graph", mock_create_graph) + map_node = nodes.MapNode.parse_from_config(config=config) + + assert mock_create_graph.call_count == 1 + mock_create_graph.assert_called_once_with( + {"name": "test"}, internal_branch_name=f"parent.{defaults.MAP_PLACEHOLDER}" + ) + assert map_node.iterate_as == "you" + assert map_node.iterate_on == "me" + + +def test_map_node_get_branch_by_name_returns_branch(mocker, monkeypatch): + graph = nodes.Graph(start_at="first", name="first_branch") + + mock_create_graph = mocker.MagicMock(return_value=graph) + config = { + "branch": {"name": "test"}, + "next_node": "next_node", + "name": "test", + "internal_name": "parent", + "iterate_on": "me", + "iterate_as": "you", + } + monkeypatch.setattr(nodes, "create_graph", mock_create_graph) + map_node = nodes.MapNode.parse_from_config(config=config) + + assert map_node._get_branch_by_name("test") == graph + + +def test_dag_node_fails_non_yaml_dag_definition(mocker, monkeypatch): + graph = nodes.Graph(start_at="first", name="first_branch") + + with pytest.raises(ValueError, match="dag_definition must be a YAML"): + _ = nodes.DagNode(name="test", internal_name="test", next_node="next", dag_definition="notanyaml", branch=graph) + + +def test_dag_node_sets_internal_branch_name(mocker, monkeypatch): + graph = nodes.Graph(start_at="first", name="first_branch") + + node = nodes.DagNode(name="test", internal_name="test", next_node="next", dag_definition="a.yaml", branch=graph) + + assert node.internal_branch_name == f"test.{defaults.DAG_BRANCH_NAME}" + + +def test_dag_node_parse_config_raises_exception_if_dag_definition_is_not_part_of_config(mocker, monkeypatch): + config = {} + + with pytest.raises(Exception, match="No dag definition found in"): + _ = nodes.DagNode.parse_from_config(config=config) + + +def test_dag_node_parse_config_raises_exception_if_dag_definition_is_not_yaml(mocker, monkeypatch): + monkeypatch.setattr(nodes.utils, "load_yaml", mocker.MagicMock(return_value={})) + + config = {"dag_definition": "notanyaml"} + + with pytest.raises(Exception, match="please provide it in dag block"): + _ = nodes.DagNode.parse_from_config(config=config) + + +def test_parse_config_calls_create_graph(mocker, monkeypatch): + graph = nodes.Graph(start_at="first", name="first_branch") + mock_create_graph = mocker.MagicMock(return_value=graph) + + monkeypatch.setattr(nodes, "create_graph", mock_create_graph) + monkeypatch.setattr(nodes.utils, "load_yaml", mocker.MagicMock(return_value={"dag": {"name": "test"}})) + + config = { + "next_node": "next_node", + "name": "test", + "internal_name": "parent", + "dag_definition": "a.yaml", + } + + dag_node = nodes.DagNode.parse_from_config(config=config) + + assert mock_create_graph.call_count == 1 + assert dag_node.branch == graph + + +def test_dag_node_get_branch_by_name_returns_branch(mocker, monkeypatch): + graph = nodes.Graph(start_at="first", name="first_branch") + mock_create_graph = mocker.MagicMock(return_value=graph) + + monkeypatch.setattr(nodes, "create_graph", mock_create_graph) + monkeypatch.setattr(nodes.utils, "load_yaml", mocker.MagicMock(return_value={"dag": {"name": "test"}})) + + config = { + "next_node": "next_node", + "name": "test", + "internal_name": "parent", + "dag_definition": "a.yaml", + } + + dag_node = nodes.DagNode.parse_from_config(config=config) + + assert dag_node._get_branch_by_name(f"parent.{defaults.DAG_BRANCH_NAME}") == graph + + +def test_dag_node_get_branch_by_name_raises_exception_if_incorrect_name(mocker, monkeypatch): + graph = nodes.Graph(start_at="first", name="first_branch") + mock_create_graph = mocker.MagicMock(return_value=graph) + + monkeypatch.setattr(nodes, "create_graph", mock_create_graph) + monkeypatch.setattr(nodes.utils, "load_yaml", mocker.MagicMock(return_value={"dag": {"name": "test"}})) + + config = { + "next_node": "next_node", + "name": "test", + "internal_name": "parent", + "dag_definition": "a.yaml", + } + + dag_node = nodes.DagNode.parse_from_config(config=config) + + with pytest.raises(Exception, match="only allows a branch of name"): + assert dag_node._get_branch_by_name(f"parent") == graph + + +def test__as_is_node_takes_anything_as_input(mocker, monkeypatch): + config = { + "name": "test", + "internal_name": "test", + "next_node": "next_node", + "whose": "me", + "wheres": "them", + } + + _ = nodes.StubNode.parse_from_config(config=config) + + +def test_as_is_node_execute_returns_success(mocker, monkeypatch): + mock_attempt_log = mocker.MagicMock() + + mock_context = mocker.MagicMock() + monkeypatch.setattr(nodes.StubNode, "_context", mock_context) + mock_context.run_log_store.create_attempt_log = mocker.MagicMock(return_value=mock_attempt_log) + + node = nodes.StubNode(name="test", internal_name="test", next_node="next_node") + + node.execute() + + assert mock_attempt_log.status == defaults.SUCCESS diff --git a/tests/magnus/test_catalog.py b/tests/magnus/test_catalog.py index 9250be37..29cb93b5 100644 --- a/tests/magnus/test_catalog.py +++ b/tests/magnus/test_catalog.py @@ -1,364 +1,57 @@ -import os -import tempfile - import pytest from magnus import ( catalog, # pylint: disable=import-error - context, # pylint: disable=import-error defaults, # pylint: disable=import-error ) -def test_get_run_log_store_returns_context_executor_run_log_store(mocker, monkeypatch): - mock_context_executor = mocker.MagicMock() - mock_context_executor.run_log_store = "RunLogStore" - - monkeypatch.setattr(context, "executor", mock_context_executor) - run_log_store = catalog.get_run_log_store() - - assert run_log_store == "RunLogStore" - - -def test_is_catalog_out_of_sync_returns_true_for_empty_synced_catalogs(): - assert catalog.is_catalog_out_of_sync(1, []) is True - - -def test_is_catalog_out_of_sync_returns_false_for_same_objects(): - class MockCatalog: - catalog_relative_path = None - data_hash = None - - catalog_item = MockCatalog() - catalog_item.catalog_relative_path = "path" - catalog_item.data_hash = "hash" - - synced_catalog = [catalog_item] - assert catalog.is_catalog_out_of_sync(catalog_item, synced_catalog) is False - - -def test_is_catalog_out_of_sync_returns_true_for_different_hash(): - class MockCatalog: - catalog_relative_path = None - data_hash = None - - catalog_item1 = MockCatalog() - catalog_item1.catalog_relative_path = "path" - catalog_item1.data_hash = "hash" - - catalog_item2 = MockCatalog() - catalog_item2.catalog_relative_path = "path" - catalog_item2.data_hash = "not-hash" +@pytest.fixture +def instantiable_base_class(monkeypatch): + monkeypatch.setattr(catalog.BaseCatalog, "__abstractmethods__", set()) + yield - synced_catalog = [catalog_item1] - assert catalog.is_catalog_out_of_sync(catalog_item2, synced_catalog) is True +def test_base_run_log_store_context_property(mocker, monkeypatch, instantiable_base_class): + mock_run_context = mocker.Mock() -def test_is_catalog_out_of_sync_returns_true_for_different_paths(): - class MockCatalog: - catalog_relative_path = None - data_hash = None + monkeypatch.setattr(catalog.context, "run_context", mock_run_context) - catalog_item1 = MockCatalog() - catalog_item1.catalog_relative_path = "path" - catalog_item1.data_hash = "hash" + assert catalog.BaseCatalog()._context == mock_run_context - catalog_item2 = MockCatalog() - catalog_item2.catalog_relative_path = "path1" - catalog_item2.data_hash = "hash" - synced_catalog = [catalog_item1] - assert catalog.is_catalog_out_of_sync(catalog_item2, synced_catalog) is True - - -def test_base_catalog_inits_empty_config_if_none_config(): - base_catalog = catalog.BaseCatalog(config=None) - assert base_catalog.config == base_catalog.Config() - - -def test_base_catalog_get_raises_exception(): - base_catalog = catalog.BaseCatalog(config=None) +def test_base_catalog_get_raises_exception(instantiable_base_class): + base_catalog = catalog.BaseCatalog() with pytest.raises(NotImplementedError): base_catalog.get(name="test", run_id="test") -def test_base_catalog_put_raises_exception(): - base_catalog = catalog.BaseCatalog(config=None) +def test_base_catalog_put_raises_exception(instantiable_base_class): + base_catalog = catalog.BaseCatalog() with pytest.raises(NotImplementedError): base_catalog.put(name="test", run_id="test") -def test_base_catalog_sync_between_runs_raises_exception(): - base_catalog = catalog.BaseCatalog(config=None) +def test_base_catalog_sync_between_runs_raises_exception(instantiable_base_class): + base_catalog = catalog.BaseCatalog() with pytest.raises(NotImplementedError): base_catalog.sync_between_runs(previous_run_id=1, run_id=2) -def test_base_catalog_inits_default_compute_folder_if_none_config(): - base_catalog = catalog.BaseCatalog(config=None) - assert base_catalog.compute_data_folder == defaults.COMPUTE_DATA_FOLDER - assert base_catalog.config.compute_data_folder == defaults.COMPUTE_DATA_FOLDER +def test_base_catalog_config_default_compute_folder_if_none_config(instantiable_base_class): + assert catalog.BaseCatalog().compute_data_folder == defaults.COMPUTE_DATA_FOLDER def test_do_nothing_catalog_get_returns_empty_list(monkeypatch, mocker): - mock_base_catalog = mocker.MagicMock() - - monkeypatch.setattr(catalog, "BaseCatalog", mock_base_catalog) - - catalog_handler = catalog.DoNothingCatalog(config=None) + catalog_handler = catalog.DoNothingCatalog() assert catalog_handler.get(name="does not matter", run_id="none") == [] def test_do_nothing_catalog_put_returns_empty_list(monkeypatch, mocker): - mock_base_catalog = mocker.MagicMock() - - monkeypatch.setattr(catalog, "BaseCatalog", mock_base_catalog) - - catalog_handler = catalog.DoNothingCatalog(config=None) + catalog_handler = catalog.DoNothingCatalog() assert catalog_handler.put(name="does not matter", run_id="none") == [] -def test_file_system_catalog_inits_default_values_if_none_config(): - catalog_handler = catalog.FileSystemCatalog(config=None) - assert catalog_handler.config.compute_data_folder == defaults.COMPUTE_DATA_FOLDER - assert catalog_handler.config.catalog_location == defaults.CATALOG_LOCATION_FOLDER - - -def test_file_system_catalog_get_catalog_location_defaults_if_location_not_provided(monkeypatch, mocker): - mock_base_catalog = mocker.MagicMock() - - monkeypatch.setattr(catalog, "BaseCatalog", mock_base_catalog) - - catalog_handler = catalog.FileSystemCatalog(config=None) - - assert catalog_handler.catalog_location == defaults.CATALOG_LOCATION_FOLDER - assert catalog_handler.config.catalog_location == defaults.CATALOG_LOCATION_FOLDER - - -def test_file_system_catalog_catalog_location_returns_config_catalog_location_if_provided(monkeypatch, mocker): - mock_base_catalog = mocker.MagicMock() - - monkeypatch.setattr(catalog, "BaseCatalog", mock_base_catalog) - - catalog_handler = catalog.FileSystemCatalog(config={"type": "file-system", "catalog_location": "this"}) - - assert catalog_handler.catalog_location == "this" - assert catalog_handler.config.catalog_location == "this" - - -def test_file_system_catalog_get_raises_exception_if_catalog_does_not_exist(monkeypatch, mocker): - def mock_does_dir_exist(dir_name): - if dir_name == "this_compute_folder": - return True - return False - - monkeypatch.setattr(catalog.utils, "does_dir_exist", mock_does_dir_exist) - monkeypatch.setattr(catalog.FileSystemCatalog, "catalog_location", mocker.MagicMock(return_value="this_location")) - - monkeypatch.setattr(catalog, "BaseCatalog", mocker.MagicMock()) - - catalog_handler = catalog.FileSystemCatalog(config={"type": "file-system"}) - with pytest.raises(Exception): - catalog_handler.get("testing", run_id="dummy_run_id", compute_data_folder="this_compute_folder") - - -def test_file_system_catalog_get_copies_files_from_catalog_to_compute_folder_with_all(mocker, monkeypatch): - monkeypatch.setattr(catalog, "get_run_log_store", mocker.MagicMock()) - - with tempfile.TemporaryDirectory() as catalog_location: - with tempfile.TemporaryDirectory(dir=".") as compute_folder: - catalog_location_path = catalog.Path(catalog_location) - run_id = "testing" - catalog.Path(catalog_location_path / run_id / compute_folder).mkdir(parents=True) - with open(catalog.Path(catalog_location) / run_id / compute_folder / "catalog_file", "w") as fw: - fw.write("hello") - - catalog_handler = catalog.FileSystemCatalog(config=None) - catalog_handler.config.catalog_location = catalog_location - catalog_handler.config.compute_data_folder = compute_folder - - catalog_handler.get(name="*", run_id=run_id) - - _, _, files = next(os.walk(compute_folder)) - - assert len(list(files)) == 1 - - -def test_file_system_catalog_get_copies_files_from_catalog_to_compute_folder_with_pattern(mocker, monkeypatch): - monkeypatch.setattr(catalog, "get_run_log_store", mocker.MagicMock()) - - with tempfile.TemporaryDirectory() as catalog_location: - with tempfile.TemporaryDirectory(dir=".") as compute_folder: - catalog_location_path = catalog.Path(catalog_location) - run_id = "testing" - catalog.Path(catalog_location_path / run_id / compute_folder).mkdir(parents=True) - with open(catalog.Path(catalog_location) / run_id / compute_folder / "catalog_file", "w") as fw: - fw.write("hello") - - with open(catalog.Path(catalog_location) / run_id / compute_folder / "not_catalog", "w") as fw: - fw.write("hello") - - catalog_handler = catalog.FileSystemCatalog(config=None) - catalog_handler.config.catalog_location = catalog_location - catalog_handler.config.compute_data_folder = compute_folder - - catalog_handler.get(name="catalog*", run_id=run_id) - - _, _, files = next(os.walk(compute_folder)) - - assert len(list(files)) == 1 - - -def test_file_system_catalog_put_copies_files_from_compute_folder_to_catalog_if_synced_changed_all(mocker, monkeypatch): - monkeypatch.setattr(catalog, "is_catalog_out_of_sync", mocker.MagicMock(return_value=True)) - monkeypatch.setattr(catalog, "get_run_log_store", mocker.MagicMock()) - - with tempfile.TemporaryDirectory() as catalog_location: - with tempfile.TemporaryDirectory(dir=".") as compute_folder: - catalog_location_path = catalog.Path(catalog_location) - run_id = "testing" - catalog.Path(catalog_location_path / run_id).mkdir(parents=True) - with open(catalog.Path(compute_folder) / "catalog_file", "w") as fw: - fw.write("hello") - - catalog_handler = catalog.FileSystemCatalog(config=None) - catalog_handler.config.catalog_location = catalog_location - catalog_handler.config.compute_data_folder = compute_folder - - catalog_handler.put(name="*", run_id=run_id) - - _, _, files = next(os.walk(catalog_location_path / run_id / compute_folder)) - - assert len(list(files)) == 1 - - -def test_file_system_catalog_put_copies_files_from_compute_folder_to_catalog_if_synced_changed_pattern( - mocker, monkeypatch -): - monkeypatch.setattr(catalog, "is_catalog_out_of_sync", mocker.MagicMock(return_value=True)) - monkeypatch.setattr(catalog, "get_run_log_store", mocker.MagicMock()) - - with tempfile.TemporaryDirectory() as catalog_location: - with tempfile.TemporaryDirectory(dir=".") as compute_folder: - catalog_location_path = catalog.Path(catalog_location) - run_id = "testing" - catalog.Path(catalog_location_path / run_id).mkdir(parents=True) - with open(catalog.Path(compute_folder) / "catalog_file", "w") as fw: - fw.write("hello") - - with open(catalog.Path(compute_folder) / "not_catalog_file", "w") as fw: - fw.write("hello") - - catalog_handler = catalog.FileSystemCatalog(config=None) - catalog_handler.config.catalog_location = catalog_location - catalog_handler.config.compute_data_folder = compute_folder - - catalog_handler.put(name="catalog*", run_id=run_id) - - _, _, files = next(os.walk(catalog_location_path / run_id / compute_folder)) - - assert len(list(files)) == 1 - - -def test_file_system_catalog_put_copies_files_from_compute_folder_to_catalog_if_synced_true(mocker, monkeypatch): - monkeypatch.setattr(catalog, "is_catalog_out_of_sync", mocker.MagicMock(return_value=False)) - monkeypatch.setattr(catalog, "get_run_log_store", mocker.MagicMock()) - - with tempfile.TemporaryDirectory() as catalog_location: - with tempfile.TemporaryDirectory(dir=".") as compute_folder: - catalog_location_path = catalog.Path(catalog_location) - run_id = "testing" - catalog.Path(catalog_location_path / run_id).mkdir(parents=True) - with open(catalog.Path(compute_folder) / "catalog_file", "w") as fw: - fw.write("hello") - - with open(catalog.Path(compute_folder) / "not_catalog_file", "w") as fw: - fw.write("hello") - - catalog_handler = catalog.FileSystemCatalog(config=None) - catalog_handler.config.catalog_location = catalog_location - catalog_handler.config.compute_data_folder = compute_folder - - catalog_handler.put(name="*", run_id=run_id) - - with pytest.raises(FileNotFoundError): - _ = os.listdir(catalog_location_path / run_id / compute_folder) - assert True - - -def test_file_system_catalog_put_uses_compute_folder_by_default(monkeypatch, mocker): - mock_safe_make_dir = mocker.MagicMock() - monkeypatch.setattr(catalog.utils, "safe_make_dir", mock_safe_make_dir) - - mock_does_dir_exist = mocker.MagicMock(side_effect=Exception()) - monkeypatch.setattr(catalog.utils, "does_dir_exist", mock_does_dir_exist) - monkeypatch.setattr(catalog.FileSystemCatalog, "catalog_location", "this_location") - monkeypatch.setattr(catalog, "BaseCatalog", mocker.MagicMock()) - - catalog_handler = catalog.FileSystemCatalog(config={"type": "file-system"}) - with pytest.raises(Exception): - catalog_handler.put("testing", run_id="dummy_run_id") - - mock_does_dir_exist.assert_called_once_with(catalog.Path("data")) - - -def test_file_system_catalog_put_uses_compute_folder_provided(monkeypatch, mocker): - mock_safe_make_dir = mocker.MagicMock() - monkeypatch.setattr(catalog.utils, "safe_make_dir", mock_safe_make_dir) - - mock_does_dir_exist = mocker.MagicMock(side_effect=Exception()) - monkeypatch.setattr(catalog.utils, "does_dir_exist", mock_does_dir_exist) - monkeypatch.setattr(catalog.FileSystemCatalog, "catalog_location", "this_location") - monkeypatch.setattr(catalog, "BaseCatalog", mocker.MagicMock()) - - catalog_handler = catalog.FileSystemCatalog(config={"type": "file-system"}) - with pytest.raises(Exception): - catalog_handler.put("testing", run_id="dummy_run_id", compute_data_folder="not_data") - - mock_does_dir_exist.assert_called_once_with(catalog.Path("not_data")) - - -def test_file_system_catalog_put_raises_exception_if_compute_data_folder_does_not_exist(monkeypatch, mocker): - mock_safe_make_dir = mocker.MagicMock() - monkeypatch.setattr(catalog.utils, "safe_make_dir", mock_safe_make_dir) - - mock_does_dir_exist = mocker.MagicMock(return_value=False) - monkeypatch.setattr(catalog.utils, "does_dir_exist", mock_does_dir_exist) - monkeypatch.setattr(catalog.FileSystemCatalog, "catalog_location", mocker.MagicMock(return_value="this_location")) - monkeypatch.setattr(catalog, "BaseCatalog", mocker.MagicMock()) - - catalog_handler = catalog.FileSystemCatalog(config={"type": "file-system"}) - with pytest.raises(Exception): - catalog_handler.put("testing", run_id="dummy_run_id", compute_data_folder="this_compute_folder") - - -def test_file_system_catalog_put_creates_catalog_location_using_run_id(monkeypatch, mocker): - mock_safe_make_dir = mocker.MagicMock() - monkeypatch.setattr(catalog.utils, "safe_make_dir", mock_safe_make_dir) - - mock_does_dir_exist = mocker.MagicMock(side_effect=Exception()) - monkeypatch.setattr(catalog.utils, "does_dir_exist", mock_does_dir_exist) - - monkeypatch.setattr(catalog, "BaseCatalog", mocker.MagicMock()) - - catalog_handler = catalog.FileSystemCatalog(config={"type": "file-system"}) - catalog_handler.config.catalog_location = "this_location" - - with pytest.raises(Exception): - catalog_handler.put("testing", run_id="dummy_run_id") - - mock_safe_make_dir.assert_called_once_with(catalog.Path("this_location") / "dummy_run_id") - - -def test_file_system_sync_between_runs_raises_exception_if_previous_catalog_does_not_exist(monkeypatch, mocker): - mock_safe_make_dir = mocker.MagicMock() - monkeypatch.setattr(catalog.utils, "safe_make_dir", mock_safe_make_dir) - - mock_does_dir_exist = mocker.MagicMock(return_value=False) - monkeypatch.setattr(catalog.utils, "does_dir_exist", mock_does_dir_exist) - monkeypatch.setattr(catalog.FileSystemCatalog, "catalog_location", mocker.MagicMock(return_value="this_location")) - monkeypatch.setattr(catalog, "BaseCatalog", mocker.MagicMock()) - - catalog_handler = catalog.FileSystemCatalog(config={"type": "file-system"}) - with pytest.raises(Exception): - catalog_handler.sync_between_runs("previous", "current") +def test_do_nothing_catalog_sync_between_runs_does_nothing(monkeypatch, mocker): + catalog_handler = catalog.DoNothingCatalog() + catalog_handler.sync_between_runs(previous_run_id="1", run_id="2") diff --git a/tests/magnus/test_datastore.py b/tests/magnus/test_datastore.py index 877fc2cc..502a6536 100644 --- a/tests/magnus/test_datastore.py +++ b/tests/magnus/test_datastore.py @@ -3,6 +3,12 @@ from magnus import datastore, defaults, exceptions +@pytest.fixture(autouse=True) +def instantiable_base_class(monkeypatch): + monkeypatch.setattr(datastore.BaseRunLogStore, "__abstractmethods__", set()) + yield + + def test_data_catalog_eq_is_equal_if_name_is_same(): this = datastore.DataCatalog(name="test") that = datastore.DataCatalog(name="test") @@ -142,38 +148,49 @@ def test_run_log_get_data_catalogs_by_stage_gets_catalogs_from_steps(mocker, mon assert data_catalogs == ["data catalog"] -def test_base_run_log_store_assigns_empty_config_if_none(): - config = None - - run_log_store = datastore.BaseRunLogStore(config=config) - - assert run_log_store.config == {} - - def test_base_run_log_store_create_run_log_not_implemented(): - config = {"key": "value"} - - run_log_store = datastore.BaseRunLogStore(config=config) + run_log_store = datastore.BaseRunLogStore() with pytest.raises(NotImplementedError): run_log_store.create_run_log(run_id="will fail") def test_base_run_log_store_get_run_log_by_id_not_implemented(): - config = {"key": "value"} - - run_log_store = datastore.BaseRunLogStore(config=config) + run_log_store = datastore.BaseRunLogStore() with pytest.raises(NotImplementedError): run_log_store.get_run_log_by_id(run_id="will fail") def test_base_run_log_store_put_run_log_not_implemented(): - config = {"key": "value"} - - run_log_store = datastore.BaseRunLogStore(config=config) + run_log_store = datastore.BaseRunLogStore() with pytest.raises(NotImplementedError): run_log_store.put_run_log(run_log="will fail") +def test_base_run_log_store_context_returns_global_context(mocker, monkeypatch): + mock_context = mocker.MagicMock() + mock_run_context = mocker.MagicMock() + + mock_context.run_context = mock_run_context + + monkeypatch.setattr(datastore, "context", mock_context) + run_log_store = datastore.BaseRunLogStore() + assert run_log_store._context == mock_run_context + + +def test_base_run_log_store_update_run_log_status(mocker, monkeypatch): + run_log = datastore.RunLog(run_id="testing") + + mock_get_run_log_by_id = mocker.MagicMock(return_value=run_log) + mock_put_run_log = mocker.MagicMock() + + monkeypatch.setattr(datastore.BaseRunLogStore, "get_run_log_by_id", mock_get_run_log_by_id) + monkeypatch.setattr(datastore.BaseRunLogStore, "put_run_log", mock_put_run_log) + + run_log_store = datastore.BaseRunLogStore() + run_log_store.update_run_log_status(run_id="test", status="running") + assert run_log.status == "running" + + def test_base_run_log_set_parameters_creates_parameters_if_not_present_previously(mocker, monkeypatch): run_log = datastore.RunLog(run_id="testing") @@ -184,14 +201,14 @@ def test_base_run_log_set_parameters_creates_parameters_if_not_present_previousl monkeypatch.setattr(datastore.BaseRunLogStore, "put_run_log", mock_put_run_log) parameters = {"a": 1} - run_log_store = datastore.BaseRunLogStore(config=None) + run_log_store = datastore.BaseRunLogStore() run_log_store.set_parameters(run_id="testing", parameters=parameters) assert run_log.parameters == parameters mock_put_run_log.assert_called_once_with(run_log=run_log) -def test_base_run_log_set_parameters_updatesparameters_if_present_previously(mocker, monkeypatch): +def test_base_run_log_set_parameters_updates_parameters_if_present_previously(mocker, monkeypatch): run_log = datastore.RunLog(run_id="testing") run_log.parameters = {"b": 2} mock_get_run_log_by_id = mocker.MagicMock(return_value=run_log) @@ -201,7 +218,7 @@ def test_base_run_log_set_parameters_updatesparameters_if_present_previously(moc monkeypatch.setattr(datastore.BaseRunLogStore, "put_run_log", mock_put_run_log) parameters = {"a": 1} - run_log_store = datastore.BaseRunLogStore(config=None) + run_log_store = datastore.BaseRunLogStore() run_log_store.set_parameters(run_id="testing", parameters=parameters) assert run_log.parameters == {"a": 1, "b": 2} @@ -216,7 +233,7 @@ def test_base_run_log_store_get_parameters_gets_from_run_log(mocker, monkeypatch monkeypatch.setattr(datastore.BaseRunLogStore, "get_run_log_by_id", mock_get_run_log_by_id) - run_log_store = datastore.BaseRunLogStore(config=None) + run_log_store = datastore.BaseRunLogStore() assert run_log_store.get_parameters(run_id="testing") == {"b": 2} @@ -228,7 +245,7 @@ def test_base_run_log_store_get_run_config_returns_config_from_run_log(mocker, m mock_get_run_log_by_id = mocker.MagicMock(return_value=run_log) monkeypatch.setattr(datastore.BaseRunLogStore, "get_run_log_by_id", mock_get_run_log_by_id) - run_log_store = datastore.BaseRunLogStore(config=None) + run_log_store = datastore.BaseRunLogStore() assert run_config == run_log_store.get_run_config(run_id="testing") @@ -243,7 +260,7 @@ def test_base_run_log_store_set_run_config_creates_run_log_if_not_present(mocker monkeypatch.setattr(datastore.BaseRunLogStore, "get_run_log_by_id", mock_get_run_log_by_id) monkeypatch.setattr(datastore.BaseRunLogStore, "put_run_log", mock_put_run_log) - run_log_store = datastore.BaseRunLogStore(config=None) + run_log_store = datastore.BaseRunLogStore() run_log_store.set_run_config(run_id="testing", run_config=run_config) assert run_log.run_config == run_config @@ -260,14 +277,14 @@ def test_base_run_log_store_set_run_config_updates_run_log_if_present(mocker, mo monkeypatch.setattr(datastore.BaseRunLogStore, "get_run_log_by_id", mock_get_run_log_by_id) monkeypatch.setattr(datastore.BaseRunLogStore, "put_run_log", mock_put_run_log) - run_log_store = datastore.BaseRunLogStore(config=None) + run_log_store = datastore.BaseRunLogStore() run_log_store.set_run_config(run_id="testing", run_config=run_config) assert run_log.run_config == {"datastore": "for testing", "executor": "for testing"} def test_base_run_log_store_create_step_log_returns_a_step_log_object(): - run_log_store = datastore.BaseRunLogStore(config=None) + run_log_store = datastore.BaseRunLogStore() step_log = run_log_store.create_step_log(name="test", internal_name="test") @@ -278,8 +295,8 @@ def test_base_run_log_store_get_step_log_raises_step_log_not_found_error_if_sear mock_run_log = mocker.MagicMock() mock_run_log.search_step_by_internal_name.side_effect = exceptions.StepLogNotFoundError("test", "test") - run_log_store = datastore.BaseRunLogStore(config=None) - run_log_store.get_run_log_by_id = mocker.MagicMock(return_value=mock_run_log) + monkeypatch.setattr(datastore.BaseRunLogStore, "get_run_log_by_id", mocker.MagicMock(return_value=mock_run_log)) + run_log_store = datastore.BaseRunLogStore() with pytest.raises(exceptions.StepLogNotFoundError): run_log_store.get_step_log(internal_name="test", run_id="test") @@ -290,14 +307,14 @@ def test_base_run_log_store_get_step_log_returns_from_log_search(monkeypatch, mo mock_step_log = mocker.MagicMock() mock_run_log.search_step_by_internal_name.return_value = mock_step_log, None - run_log_store = datastore.BaseRunLogStore(config=None) - run_log_store.get_run_log_by_id = mocker.MagicMock(return_value=mock_run_log) + monkeypatch.setattr(datastore.BaseRunLogStore, "get_run_log_by_id", mocker.MagicMock(return_value=mock_run_log)) + run_log_store = datastore.BaseRunLogStore() assert mock_step_log == run_log_store.get_step_log(internal_name="test", run_id="test") def test_base_run_log_store_create_branch_log_returns_a_branch_log_object(): - run_log_store = datastore.BaseRunLogStore(config=None) + run_log_store = datastore.BaseRunLogStore() branch_log = run_log_store.create_branch_log(internal_branch_name="test") @@ -305,7 +322,7 @@ def test_base_run_log_store_create_branch_log_returns_a_branch_log_object(): def test_base_run_log_store_create_attempt_log_returns_a_attempt_log_object(): - run_log_store = datastore.BaseRunLogStore(config=None) + run_log_store = datastore.BaseRunLogStore() attempt_log = run_log_store.create_attempt_log() @@ -313,7 +330,7 @@ def test_base_run_log_store_create_attempt_log_returns_a_attempt_log_object(): def test_base_run_log_store_create_code_identity_object(): - run_log_store = datastore.BaseRunLogStore(config=None) + run_log_store = datastore.BaseRunLogStore() code_identity = run_log_store.create_code_identity() @@ -321,7 +338,7 @@ def test_base_run_log_store_create_code_identity_object(): def test_base_run_log_store_create_data_catalog_object(): - run_log_store = datastore.BaseRunLogStore(config=None) + run_log_store = datastore.BaseRunLogStore() data_catalog = run_log_store.create_data_catalog(name="data") @@ -338,7 +355,7 @@ def test_base_run_log_store_add_step_log_adds_log_to_run_log_if_branch_is_none(m monkeypatch.setattr(datastore.BaseRunLogStore, "get_run_log_by_id", mocker.MagicMock(return_value=mock_run_log)) monkeypatch.setattr(datastore.BaseRunLogStore, "put_run_log", mocker.MagicMock()) - run_log_store = datastore.BaseRunLogStore(config=None) + run_log_store = datastore.BaseRunLogStore() run_log_store.add_step_log(step_log=step_log, run_id="test") assert mock_run_log.steps["test"] == step_log @@ -355,7 +372,7 @@ def test_base_run_log_store_add_step_log_adds_log_to_branch_log_if_branch_is_fou monkeypatch.setattr(datastore.BaseRunLogStore, "get_run_log_by_id", mocker.MagicMock(return_value=mock_run_log)) monkeypatch.setattr(datastore.BaseRunLogStore, "put_run_log", mocker.MagicMock()) - run_log_store = datastore.BaseRunLogStore(config=None) + run_log_store = datastore.BaseRunLogStore() run_log_store.add_step_log(step_log=step_log, run_id="test") assert mock_branch_log.steps["test.branch.step"] == step_log @@ -366,7 +383,7 @@ def test_base_run_log_store_get_branch_log_returns_run_log_if_internal_branch_na monkeypatch.setattr(datastore.BaseRunLogStore, "get_run_log_by_id", mocker.MagicMock(return_value=mock_run_log)) - run_log_store = datastore.BaseRunLogStore(config=None) + run_log_store = datastore.BaseRunLogStore() assert mock_run_log == run_log_store.get_branch_log(internal_branch_name=None, run_id="test") @@ -378,7 +395,7 @@ def test_base_run_log_store_get_branch_log_returns_branch_log_if_internal_branch monkeypatch.setattr(datastore.BaseRunLogStore, "get_run_log_by_id", mocker.MagicMock(return_value=mock_run_log)) - run_log_store = datastore.BaseRunLogStore(config=None) + run_log_store = datastore.BaseRunLogStore() assert mock_branch_log == run_log_store.get_branch_log(internal_branch_name="branch", run_id="test") @@ -388,7 +405,7 @@ def test_base_run_log_store_add_branch_log_adds_run_log_if_sent(monkeypatch, moc run_log = datastore.RunLog(run_id="test") - run_log_store = datastore.BaseRunLogStore(config=None) + run_log_store = datastore.BaseRunLogStore() run_log_store.add_branch_log(branch_log=run_log, run_id="test") @@ -408,7 +425,7 @@ def test_base_run_log_add_branch_log_adds_branch_to_the_right_step(mocker, monke mock_run_log.search_step_by_internal_name.return_value = mock_step, None - run_log_store = datastore.BaseRunLogStore(config=None) + run_log_store = datastore.BaseRunLogStore() run_log_store.add_branch_log(branch_log=branch_log, run_id="test") @@ -416,13 +433,13 @@ def test_base_run_log_add_branch_log_adds_branch_to_the_right_step(mocker, monke def test_buffered_run_log_store_inits_run_log_as_none(): - run_log_store = datastore.BufferRunLogstore(config=None) + run_log_store = datastore.BufferRunLogstore() assert run_log_store.run_log is None def test_buffered_run_log_store_create_run_log_creates_a_run_log_object(): - run_log_store = datastore.BufferRunLogstore(config=None) + run_log_store = datastore.BufferRunLogstore() run_log = run_log_store.create_run_log(run_id="test") @@ -431,7 +448,7 @@ def test_buffered_run_log_store_create_run_log_creates_a_run_log_object(): def test_buffered_run_log_store_get_run_log_returns_the_run_log(): - run_log_store = datastore.BufferRunLogstore(config=None) + run_log_store = datastore.BufferRunLogstore() run_log = datastore.RunLog(run_id="test") run_log.status = defaults.PROCESSING @@ -442,7 +459,7 @@ def test_buffered_run_log_store_get_run_log_returns_the_run_log(): def test_buffered_run_log_store_put_run_log_updates_the_run_log(): - run_log_store = datastore.BufferRunLogstore(config=None) + run_log_store = datastore.BufferRunLogstore() run_log = datastore.RunLog(run_id="test") run_log_store.put_run_log(run_log=run_log) @@ -453,125 +470,8 @@ def test_buffered_run_log_store_put_run_log_updates_the_run_log(): assert r_run_log == run_log -def test_file_system_run_log_store_log_folder_name_defaults_if_not_provided(): - run_log_store = datastore.FileSystemRunLogstore(config=None) - - assert run_log_store.log_folder_name == defaults.LOG_LOCATION_FOLDER - - -def test_file_system_run_log_store_log_folder_name_if__provided(): - run_log_store = datastore.FileSystemRunLogstore(config={"log_folder": "test"}) - - assert run_log_store.log_folder_name == "test" - - -def test_file_system_run_log_store_write_to_folder_makes_dir_if_not_present(mocker, monkeypatch): - mock_safe_make_dir = mocker.MagicMock() - monkeypatch.setattr(datastore.utils, "safe_make_dir", mock_safe_make_dir) - - mock_json = mocker.MagicMock() - mock_path = mocker.MagicMock() - monkeypatch.setattr(datastore, "json", mock_json) - monkeypatch.setattr(datastore, "Path", mock_path) - - mock_run_log = mocker.MagicMock() - mock_dict = mocker.MagicMock() - mock_run_log.dict = mock_dict - - run_log_store = datastore.FileSystemRunLogstore(config=None) - run_log_store.write_to_folder(run_log=mock_run_log) - - mock_safe_make_dir.assert_called_once_with(run_log_store.log_folder_name) - assert mock_dict.call_count == 1 - +def test_buffered_get_run_log_by_id_raises_exception_if_not_found(): + run_log_store = datastore.BufferRunLogstore() -def test_file_system_run_log_store_get_from_folder_raises_exception_if_folder_not_present(mocker, monkeypatch): - mock_path = mocker.MagicMock() - monkeypatch.setattr(datastore, "Path", mocker.MagicMock(return_value=mock_path)) - - mock_path.__truediv__.return_value = mock_path - - mock_path.exists.return_value = False - - run_log_store = datastore.FileSystemRunLogstore(config=None) - - with pytest.raises(FileNotFoundError): - run_log_store.get_from_folder(run_id="test") - - -def test_file_system_run_log_store_get_from_folder_returns_run_log_from_file_contents(mocker, monkeypatch): - mock_path = mocker.MagicMock() - monkeypatch.setattr(datastore, "Path", mocker.MagicMock(return_value=mock_path)) - - mock_path.__truediv__.return_value = mock_path - mock_path.exists.return_value = True - - mock_json = mocker.MagicMock() - monkeypatch.setattr(datastore, "json", mock_json) - mock_json.load.return_value = {"run_id": "test"} - - run_log_store = datastore.FileSystemRunLogstore(config=None) - run_log = run_log_store.get_from_folder(run_id="does not matter") - - assert run_log.run_id == "test" - - -def test_file_system_run_log_store_create_run_log_writes_to_folder(mocker, monkeypatch): - mock_write_to_folder = mocker.MagicMock() - - monkeypatch.setattr(datastore.FileSystemRunLogstore, "write_to_folder", mock_write_to_folder) - - run_log_store = datastore.FileSystemRunLogstore(config=None) - run_log = run_log_store.create_run_log(run_id="test random") - - mock_write_to_folder.assert_called_once_with(run_log) - - assert run_log.run_id == "test random" - - -def test_file_system_run_log_store_create_run_log_raises_exception_if_present(mocker, monkeypatch): - mock_write_to_folder = mocker.MagicMock() - mock_get_run_log_by_id = mocker.MagicMock(return_value="existing") - - monkeypatch.setattr(datastore.FileSystemRunLogstore, "write_to_folder", mock_write_to_folder) - monkeypatch.setattr(datastore.FileSystemRunLogstore, "get_run_log_by_id", mock_get_run_log_by_id) - - run_log_store = datastore.FileSystemRunLogstore(config=None) - with pytest.raises(exceptions.RunLogExistsError): - run_log_store.create_run_log(run_id="test random") - - -def test_file_system_run_log_store_get_run_log_by_id_raises_exception_if_get_from_folder_fails(mocker, monkeypatch): - mock_get_from_folder = mocker.MagicMock() - mock_get_from_folder.side_effect = FileNotFoundError() - - monkeypatch.setattr(datastore.FileSystemRunLogstore, "get_from_folder", mock_get_from_folder) - - run_log_store = datastore.FileSystemRunLogstore(config=None) with pytest.raises(exceptions.RunLogNotFoundError): - run_log_store.get_run_log_by_id(run_id="should fail") - - -def test_file_system_run_log_store_get_run_log_by_id_returns_run_log_from_get_from_folder(mocker, monkeypatch): - mock_get_from_folder = mocker.MagicMock() - mock_get_from_folder.return_value = "I am a run log" - - monkeypatch.setattr(datastore.FileSystemRunLogstore, "get_from_folder", mock_get_from_folder) - - run_log_store = datastore.FileSystemRunLogstore(config=None) - - run_log = run_log_store.get_run_log_by_id(run_id="test") - - assert run_log == "I am a run log" - - -def test_file_system_run_log_store_put_run_log_writes_to_folder(mocker, monkeypatch): - mock_write_to_folder = mocker.MagicMock() - - monkeypatch.setattr(datastore.FileSystemRunLogstore, "write_to_folder", mock_write_to_folder) - - run_log_store = datastore.FileSystemRunLogstore(config=None) - mock_run_log = mocker.MagicMock() - run_log_store.put_run_log(run_log=mock_run_log) - - mock_write_to_folder.assert_called_once_with(mock_run_log) + run_log_store.get_run_log_by_id("test") diff --git a/tests/magnus/test_executor.py b/tests/magnus/test_executor.py index 1b1d7e34..ac7e78ba 100644 --- a/tests/magnus/test_executor.py +++ b/tests/magnus/test_executor.py @@ -1,766 +1,43 @@ import pytest -from pydantic import BaseModel, Extra -from magnus import defaults, exceptions, executor +from magnus import executor, defaults -def test_base_executor__is_parallel_execution_uses_default(): - base_executor = executor.BaseExecutor(config=None) +@pytest.fixture(autouse=True) +def instantiable_base_class(monkeypatch): + monkeypatch.setattr(executor.BaseExecutor, "__abstractmethods__", set()) + yield - assert base_executor._is_parallel_execution() == defaults.ENABLE_PARALLEL +def test_base_executor_context_refers_to_global_run_context(mocker, monkeypatch): + mock_run_context = mocker.MagicMock() + monkeypatch.setattr(executor.context, "run_context", mock_run_context) -def test_base_executor__set_up_run_log_with_no_previous_run_log(mocker, monkeypatch): - base_executor = executor.BaseExecutor(config=None) + base_executor = executor.BaseExecutor() + assert base_executor._context is mock_run_context - mock_run_log_store = mocker.MagicMock() - mock_create_run_log = mocker.MagicMock() - mock_run_log_store.create_run_log = mock_create_run_log - mock_run_log_store.get_run_log_by_id = mocker.MagicMock( - side_effect=exceptions.RunLogNotFoundError(run_id="nothing") - ) - base_executor.run_log_store = mock_run_log_store - base_executor.run_id = "run_id" +def test_is_parallel_refers_to_config(): + base_executor = executor.BaseExecutor() - monkeypatch.setattr(executor.utils, "get_run_config", mocker.MagicMock(return_value={"executor": "test"})) + assert base_executor._is_parallel_execution() == False - base_executor._set_up_run_log() - mock_create_run_log.assert_called_once_with( - run_id="run_id", tag="", use_cached=False, status=defaults.PROCESSING, dag_hash="" - ) +def test_is_parallel_refers_to_config_true(): + base_executor = executor.BaseExecutor() + base_executor.enable_parallel = True + assert base_executor._is_parallel_execution() == True -def test_base_executor__set_up_run_log_with_previous_run_log(mocker, monkeypatch): - base_executor = executor.BaseExecutor(config=None) - mock_run_log_store = mocker.MagicMock() - mock_create_run_log = mocker.MagicMock() - mock_run_log_store.create_run_log = mock_create_run_log - mock_run_log_store.create_run_log = mock_create_run_log - mock_run_log_store.get_run_log_by_id = mocker.MagicMock( - side_effect=exceptions.RunLogNotFoundError(run_id="nothing") - ) +def test_step_attempt_number_defaults_to_1(): + base_executor = executor.BaseExecutor() - mock_previous_run_log = mocker.MagicMock() - mock_previous_run_log.run_id = "old run id" - mock_previous_run_log.parameters = {"b": 1} + assert base_executor.step_attempt_number == 1 - base_executor.run_log_store = mock_run_log_store - base_executor.run_id = "run_id" - base_executor.previous_run_log = mock_previous_run_log - base_executor.catalog_handler = mocker.MagicMock() +def test_step_attempt_number_looks_up_environment(monkeypatch): + monkeypatch.setenv(defaults.ATTEMPT_NUMBER, "12345") + base_executor = executor.BaseExecutor() - monkeypatch.setattr(executor.utils, "get_run_config", mocker.MagicMock(return_value={"executor": "test"})) - - base_executor._set_up_run_log() - - mock_create_run_log.assert_called_once_with( - run_id="run_id", tag="", use_cached=True, status=defaults.PROCESSING, dag_hash="", original_run_id="old run id" - ) - - -def test_base_executor_prepare_for_graph_execution_calls(mocker, monkeypatch): - mock_integration = mocker.MagicMock() - mock_validate = mocker.MagicMock() - mock_configure_for_traversal = mocker.MagicMock() - - mock_integration.validate = mock_validate - mock_integration.configure_for_traversal = mock_configure_for_traversal - - monkeypatch.setattr(executor, "integration", mock_integration) - monkeypatch.setattr(executor.BaseExecutor, "_set_up_run_log", mocker.MagicMock()) - - base_executor = executor.BaseExecutor(config=None) - - base_executor.prepare_for_graph_execution() - - assert mock_configure_for_traversal.call_count == 4 - assert mock_validate.call_count == 4 - - -def test_base_execution_prepare_for_node_calls(mocker, monkeypatch): - mock_integration = mocker.MagicMock() - mock_validate = mocker.MagicMock() - mock_configure_for_execution = mocker.MagicMock() - - mock_integration.validate = mock_validate - mock_integration.configure_for_execution = mock_configure_for_execution - - monkeypatch.setattr(executor, "integration", mock_integration) - - base_executor = executor.BaseExecutor(config=None) - - base_executor.prepare_for_node_execution() - - assert mock_configure_for_execution.call_count == 4 - assert mock_validate.call_count == 4 - - -def test_base_executor__sync_catalog_returns_nothing_if_no_syncing_for_node(mocker, monkeypatch): - mock_node = mocker.MagicMock() - - mock_node._get_catalog_settings.return_value = None - - base_executor = executor.BaseExecutor(config=None) - base_executor.context_node = mock_node - - assert base_executor._sync_catalog(mock_node, None, stage="get") is None - - -def test_base_executor__sync_catalog_raises_exception_if_stage_not_in_get_or_put(mocker, monkeypatch): - mock_node = mocker.MagicMock() - - base_executor = executor.BaseExecutor(config=None) - base_executor.context_node = mock_node - with pytest.raises(Exception): - base_executor._sync_catalog(node=None, step_log=None, stage="puts") - - -def test_base_executor__sync_catalog_uses_catalog_handler_compute_folder_by_default(mocker, monkeypatch): - mock_node = mocker.MagicMock() - mock_node._get_catalog_settings.return_value = {"get": ["all"]} - - mock_catalog = mocker.MagicMock() - mock_catalog.compute_data_folder = "data/" - - mock_catalog_get = mocker.MagicMock() - mock_catalog.get = mock_catalog_get - - mock_step_log = mocker.MagicMock() - - base_executor = executor.BaseExecutor(config=None) - base_executor.run_id = "run_id" - base_executor.catalog_handler = mock_catalog - base_executor.context_node = mock_node - - base_executor._sync_catalog(mock_node, mock_step_log, stage="get") - - mock_catalog_get.assert_called_once_with( - name="all", run_id="run_id", compute_data_folder="data/", synced_catalogs=None - ) - - -def test_base_executor__sync_catalog_uses_compute_folder_if_provided_by_node(mocker, monkeypatch): - mock_node = mocker.MagicMock() - mock_node._get_catalog_settings.return_value = {"get": ["all"], "compute_data_folder": "data_from_node"} - - mock_catalog = mocker.MagicMock() - mock_catalog.compute_data_folder = "data/" - - mock_catalog_get = mocker.MagicMock() - mock_catalog.get = mock_catalog_get - - mock_step_log = mocker.MagicMock() - - base_executor = executor.BaseExecutor(config=None) - base_executor.context_node = mock_node - base_executor.run_id = "run_id" - base_executor.catalog_handler = mock_catalog - - base_executor._sync_catalog(mock_node, mock_step_log, stage="get") - - mock_catalog_get.assert_called_once_with( - name="all", run_id="run_id", compute_data_folder="data_from_node", synced_catalogs=None - ) - - -def test_base_executor_add_code_identities_adds_git_identity(mocker, monkeypatch): - mock_step_log = mocker.MagicMock() - - mock_step_log.code_identities = [] - - mock_utils_get_git_code_id = mocker.MagicMock(return_value="code id") - monkeypatch.setattr(executor.utils, "get_git_code_identity", mock_utils_get_git_code_id) - - base_executor = executor.BaseExecutor(config=None) - - base_executor.add_code_identities(node=None, step_log=mock_step_log) - - assert mock_step_log.code_identities == ["code id"] - - -def test_base_executor_execute_from_graph_executes_node_for_success_or_fail(mocker, monkeypatch): - mock_node = mocker.MagicMock() - mock_run_log_store = mocker.MagicMock() - mock__execute_node = mocker.MagicMock() - - monkeypatch.setattr(executor.BaseExecutor, "add_code_identities", mocker.MagicMock()) - monkeypatch.setattr(executor.BaseExecutor, "_execute_node", mock__execute_node) - - mock_node.node_type = "success" - base_executor = executor.BaseExecutor(config=None) - base_executor.run_log_store = mock_run_log_store - - base_executor.execute_from_graph(node=mock_node, map_variable=None) - - assert mock__execute_node.call_count == 1 - - mock_node.reset_mock() - mock__execute_node.reset_mock() - - mock_node.node_type = "fail" - base_executor.execute_from_graph(node=mock_node, map_variable=None) - - assert mock__execute_node.call_count == 1 - - -def test_base_executor_execute_from_graph_makes_step_log_processing(mocker, monkeypatch): - mock_node = mocker.MagicMock() - mock_run_log_store = mocker.MagicMock() - mock__execute_node = mocker.MagicMock() - mock_step_log = mocker.MagicMock() - - mock_run_log_store.create_step_log.return_value = mock_step_log - - monkeypatch.setattr(executor.BaseExecutor, "add_code_identities", mocker.MagicMock()) - monkeypatch.setattr(executor.BaseExecutor, "_execute_node", mock__execute_node) - - mock_node.node_type = "success" - base_executor = executor.BaseExecutor(config=None) - base_executor.run_log_store = mock_run_log_store - - base_executor.execute_from_graph(node=mock_node, map_variable=None) - - assert mock_step_log.status == defaults.PROCESSING - - -def test_base_executor_execute_from_graph_makes_step_log_success_if_previous_run_log_success(mocker, monkeypatch): - mock_node = mocker.MagicMock() - mock_run_log_store = mocker.MagicMock() - mock_step_log = mocker.MagicMock() - - mock_run_log_store.create_step_log.return_value = mock_step_log - - monkeypatch.setattr(executor.BaseExecutor, "add_code_identities", mocker.MagicMock()) - monkeypatch.setattr(executor.BaseExecutor, "_is_eligible_for_rerun", mocker.MagicMock(return_value=False)) - - base_executor = executor.BaseExecutor(config=None) - base_executor.run_log_store = mock_run_log_store - - base_executor.execute_from_graph(node=mock_node, map_variable=None) - - assert mock_step_log.status == defaults.SUCCESS - - -def test_base_executor_execute_from_graph_delegates_to_execute_as_graph_for_composite_nodes(mocker, monkeypatch): - mock_node = mocker.MagicMock() - mock_run_log_store = mocker.MagicMock() - mock_step_log = mocker.MagicMock() - mock_node_execute_as_graph = mocker.MagicMock() - - mock_run_log_store.create_step_log.return_value = mock_step_log - - mock_node.node_type = "parallel" - mock_node.execute_as_graph = mock_node_execute_as_graph - monkeypatch.setattr(executor.BaseExecutor, "add_code_identities", mocker.MagicMock()) - - base_executor = executor.BaseExecutor(config=None) - base_executor.run_log_store = mock_run_log_store - - base_executor.execute_from_graph(node=mock_node, map_variable=None) - - assert mock_node_execute_as_graph.call_count == 1 - assert mock_step_log.status == defaults.PROCESSING - - mock_node_execute_as_graph.reset_mock() - mock_step_log.reset_mock() - - mock_node.node_type = "dag" - base_executor.execute_from_graph(node=mock_node, map_variable=None) - - assert mock_node_execute_as_graph.call_count == 1 - assert mock_step_log.status == defaults.PROCESSING - - mock_node_execute_as_graph.reset_mock() - mock_step_log.reset_mock() - - mock_node.node_type = "map" - base_executor.execute_from_graph(node=mock_node, map_variable=None) - - assert mock_node_execute_as_graph.call_count == 1 - assert mock_step_log.status == defaults.PROCESSING - - -def test_base_executor_execute_from_graph_triggers_job_for_simple_nodes(mocker, monkeypatch): - mock_node = mocker.MagicMock() - mock_run_log_store = mocker.MagicMock() - mock_step_log = mocker.MagicMock() - mock_trigger_job = mocker.MagicMock() - - mock_run_log_store.create_step_log.return_value = mock_step_log - - mock_node.node_type = "task" - mock_node.is_composite = False - monkeypatch.setattr(executor.BaseExecutor, "add_code_identities", mocker.MagicMock()) - monkeypatch.setattr(executor.BaseExecutor, "trigger_job", mock_trigger_job) - - base_executor = executor.BaseExecutor(config=None) - base_executor.run_log_store = mock_run_log_store - - base_executor.execute_from_graph(node=mock_node, map_variable=None) - - assert mock_trigger_job.call_count == 1 - assert mock_step_log.status == defaults.PROCESSING - - -def test_base_executor__execute_node_calls_catalog(mocker, monkeypatch): - mock_node = mocker.MagicMock() - mock_node._get_max_attempts.return_value = 1 - - mock_run_log_store = mocker.MagicMock() - mock_step_catalog = mocker.MagicMock() - mock__sync_catalog = mocker.MagicMock() - mock__sync_catalog.return_value = "data_catalogs_get" - - monkeypatch.setattr(executor, "interaction", mocker.MagicMock()) - monkeypatch.setattr(executor, "utils", mocker.MagicMock()) - monkeypatch.setattr(executor.BaseExecutor, "_sync_catalog", mock__sync_catalog) - - mock_run_log_store.get_step_log.return_value = mock_step_catalog - base_executor = executor.BaseExecutor(config=None) - base_executor.run_log_store = mock_run_log_store - - base_executor._execute_node(node=mock_node) - - mock__sync_catalog.assert_any_call(mock_node, mock_step_catalog, stage="get") - mock__sync_catalog.assert_any_call(mock_node, mock_step_catalog, stage="put", synced_catalogs="data_catalogs_get") - - -def test_base_executor_sets_step_log_to_success_if_node_succeeds(mocker, monkeypatch): - mock_node = mocker.MagicMock() - mock_node._get_max_attempts.return_value = 1 - - mock_run_log_store = mocker.MagicMock() - mock_step_catalog = mocker.MagicMock() - mock__sync_catalog = mocker.MagicMock() - mock__sync_catalog.return_value = "data_catalogs_get" - - monkeypatch.setattr(executor, "interaction", mocker.MagicMock()) - monkeypatch.setattr(executor, "utils", mocker.MagicMock()) - monkeypatch.setattr(executor.BaseExecutor, "_sync_catalog", mock__sync_catalog) - - mock_run_log_store.get_step_log.return_value = mock_step_catalog - base_executor = executor.BaseExecutor(config=None) - base_executor.run_log_store = mock_run_log_store - - base_executor._execute_node(node=mock_node) - - assert mock_step_catalog.status == defaults.SUCCESS - - -def test_base_executor_sets_status_to_fail_if_attempt_log_is_fail(monkeypatch, mocker): - mock_node = mocker.MagicMock() - mock_node._get_max_attempts.return_value = 1 - mock_attempt_log = mocker.MagicMock() - mock_node.execute.return_value = mock_attempt_log - mock_attempt_log.status = defaults.FAIL - - mock_run_log_store = mocker.MagicMock() - mock_step_catalog = mocker.MagicMock() - mock__sync_catalog = mocker.MagicMock() - - monkeypatch.setattr(executor, "interaction", mocker.MagicMock()) - monkeypatch.setattr(executor, "utils", mocker.MagicMock()) - monkeypatch.setattr(executor.BaseExecutor, "_sync_catalog", mock__sync_catalog) - - mock_run_log_store.get_step_log.return_value = mock_step_catalog - base_executor = executor.BaseExecutor(config=None) - base_executor.run_log_store = mock_run_log_store - - base_executor._execute_node(node=mock_node) - - assert mock_step_catalog.status == defaults.FAIL - - -def test_base_executor__get_status_and_next_node_name_gets_next_if_success(mocker, monkeypatch): - mock_node = mocker.MagicMock() - mock_run_log_store = mocker.MagicMock() - mock_dag = mocker.MagicMock() - mock_step_log = mocker.MagicMock() - - mock_run_log_store.get_step_log.return_value = mock_step_log - mock_step_log.status = defaults.SUCCESS - mock_node._get_next_node.return_value = "next node" - - base_executor = executor.BaseExecutor(config=None) - base_executor.run_log_store = mock_run_log_store - - status, next_node = base_executor._get_status_and_next_node_name(current_node=mock_node, dag=mock_dag) - assert status == defaults.SUCCESS - assert next_node == "next node" - - -def test_base_executor_get_status_and_next_node_gets_global_failure_node_by_default_if_step_fails(mocker, monkeypatch): - mock_node = mocker.MagicMock() - mock_run_log_store = mocker.MagicMock() - mock_dag = mocker.MagicMock() - mock_step_log = mocker.MagicMock() - mock_fail_node = mocker.MagicMock() - - mock_run_log_store.get_step_log.return_value = mock_step_log - mock_step_log.status = defaults.FAIL - mock_node._get_on_failure_node.return_value = None - mock_dag.get_fail_node.return_value = mock_fail_node - mock_fail_node.name = "global fail node" - - base_executor = executor.BaseExecutor(config=None) - base_executor.run_log_store = mock_run_log_store - - status, next_node = base_executor._get_status_and_next_node_name(current_node=mock_node, dag=mock_dag) - assert status == defaults.FAIL - assert next_node == "global fail node" - - -def test_base_executor_get_status_and_next_node_gets_node_failure_node_if_provided_if_step_fails(mocker, monkeypatch): - mock_node = mocker.MagicMock() - mock_run_log_store = mocker.MagicMock() - mock_dag = mocker.MagicMock() - mock_step_log = mocker.MagicMock() - - mock_run_log_store.get_step_log.return_value = mock_step_log - mock_step_log.status = defaults.FAIL - mock_node._get_on_failure_node.return_value = "node fail node" - - base_executor = executor.BaseExecutor(config=None) - base_executor.run_log_store = mock_run_log_store - - status, next_node = base_executor._get_status_and_next_node_name(current_node=mock_node, dag=mock_dag) - assert status == defaults.FAIL - assert next_node == "node fail node" - - -def test_base_executor__is_eligible_for_rerun_returns_true_if_no_previous_run_log(): - base_executor = executor.BaseExecutor(config=None) - - base_executor.previous_run_log = None - - assert base_executor._is_eligible_for_rerun(node=None) - - -def test_base_executor__is_eligible_for_rerun_returns_true_if_step_log_not_found(mocker, monkeypatch): - mock_node = mocker.MagicMock() - mock_previous_run_log = mocker.MagicMock() - mock_search_step_by_internal_name = mocker.MagicMock( - side_effect=exceptions.StepLogNotFoundError(run_id="id", name="hi") - ) - - mock_previous_run_log.search_step_by_internal_name = mock_search_step_by_internal_name - mock_node._get_step_log_name.return_value = "step_log" - - base_executor = executor.BaseExecutor(config=None) - base_executor.previous_run_log = mock_previous_run_log - - assert base_executor._is_eligible_for_rerun(node=mock_node) - mock_search_step_by_internal_name.assert_called_once_with("step_log") - - -def test_base_executor__is_eligible_for_rerun_returns_false_if_previous_was_success(mocker, monkeypatch): - mock_node = mocker.MagicMock() - mock_step_log = mocker.MagicMock() - mock_previous_node_log = mocker.MagicMock() - mock_previous_run_log = mocker.MagicMock() - mock_run_log_store = mocker.MagicMock() - - mock_search_step_by_internal_name = mocker.MagicMock(return_value=(mock_previous_node_log, None)) - mock_run_log_store.get_step_log.return_value = mock_step_log - - mock_previous_node_log.status = defaults.SUCCESS - - mock_previous_run_log.search_step_by_internal_name = mock_search_step_by_internal_name - mock_node._get_step_log_name.return_value = "step_log" - - base_executor = executor.BaseExecutor(config=None) - base_executor.previous_run_log = mock_previous_run_log - base_executor.run_log_store = mock_run_log_store - - assert base_executor._is_eligible_for_rerun(node=mock_node) is False - assert mock_step_log.status == defaults.SUCCESS - - -def test_base_executor__is_eligible_for_rerun_returns_true_if_previous_was_not_success(mocker, monkeypatch): - mock_node = mocker.MagicMock() - mock_step_log = mocker.MagicMock() - mock_previous_node_log = mocker.MagicMock() - mock_previous_run_log = mocker.MagicMock() - mock_run_log_store = mocker.MagicMock() - - mock_search_step_by_internal_name = mocker.MagicMock(return_value=(mock_previous_node_log, None)) - mock_run_log_store.get_step_log.return_value = mock_step_log - - mock_previous_node_log.status = defaults.FAIL - - mock_previous_run_log.search_step_by_internal_name = mock_search_step_by_internal_name - mock_node._get_step_log_name.return_value = "step_log" - - base_executor = executor.BaseExecutor(config=None) - base_executor.previous_run_log = mock_previous_run_log - base_executor.run_log_store = mock_run_log_store - - assert base_executor._is_eligible_for_rerun(node=mock_node) - assert base_executor.previous_run_log is None - - -def test_base_executor_execute_graph_breaks_if_node_status_is_triggered(mocker, monkeypatch): - mock_dag = mocker.MagicMock() - mock_execute_from_graph = mocker.MagicMock() - mock__get_status_and_next_node_name = mocker.MagicMock() - mock_run_log_store = mocker.MagicMock() - - mock__get_status_and_next_node_name.return_value = defaults.TRIGGERED, None - - monkeypatch.setattr(executor.BaseExecutor, "execute_from_graph", mock_execute_from_graph) - monkeypatch.setattr(executor.BaseExecutor, "_get_status_and_next_node_name", mock__get_status_and_next_node_name) - monkeypatch.setattr(executor, "json", mocker.MagicMock()) - base_executor = executor.BaseExecutor(config=None) - base_executor.run_log_store = mock_run_log_store - - base_executor.execute_graph(dag=mock_dag) - - assert mock_execute_from_graph.call_count == 1 - - -def test_base_executor_execute_graph_breaks_if_node_status_is_terminal(mocker, monkeypatch): - mock_dag = mocker.MagicMock() - mock_execute_from_graph = mocker.MagicMock() - mock__get_status_and_next_node_name = mocker.MagicMock() - mock_run_log_store = mocker.MagicMock() - mock_node = mocker.MagicMock() - - mock_dag.get_node_by_name.return_value = mock_node - mock_node.node_type = "success" - - mock__get_status_and_next_node_name.return_value = defaults.SUCCESS, None - - monkeypatch.setattr(executor.BaseExecutor, "execute_from_graph", mock_execute_from_graph) - monkeypatch.setattr(executor.BaseExecutor, "_get_status_and_next_node_name", mock__get_status_and_next_node_name) - monkeypatch.setattr(executor, "json", mocker.MagicMock()) - base_executor = executor.BaseExecutor(config=None) - base_executor.run_log_store = mock_run_log_store - - base_executor.execute_graph(dag=mock_dag) - - assert mock_execute_from_graph.call_count == 1 - - -def test_base_executor__resolve_node_config_gives_global_config_if_node_does_not_override(mocker, monkeypatch): - mock_node = mocker.MagicMock() - mock_node._get_mode_config.return_value = {} - - class MockConfig(BaseModel, extra=Extra.allow): - placeholders: dict = {} - - monkeypatch.setattr(executor.BaseExecutor, "Config", MockConfig) - - base_executor = executor.BaseExecutor(config={"a": 1}) - - assert base_executor._resolve_executor_config(mock_node) == {"a": 1} - - -def test_base_executor__resolve_node_config_updates_global_config_if_node_overrides(mocker, monkeypatch): - mock_node = mocker.MagicMock() - mock_node._get_executor_config.return_value = {"a": 2} - - class MockConfig(BaseModel, extra=Extra.allow): - placeholders: dict = {} - - monkeypatch.setattr(executor.BaseExecutor, "Config", MockConfig) - - base_executor = executor.BaseExecutor(config={"a": 1}) - - assert base_executor._resolve_executor_config(mock_node) == {"a": 2} - - -def test_base_executor__resolve_node_config_updates_global_config_if_node_adds(mocker, monkeypatch): - mock_node = mocker.MagicMock() - mock_node._get_executor_config.return_value = {"b": 2} - - class MockConfig(BaseModel, extra=Extra.allow): - placeholders: dict = {} - - monkeypatch.setattr(executor.BaseExecutor, "Config", MockConfig) - - base_executor = executor.BaseExecutor(config={"a": 1}) - - assert base_executor._resolve_executor_config(mock_node) == {"a": 1, "b": 2} - - -def test_base_executor__resolve_node_config_updates_global_config_from_placeholders(mocker, monkeypatch): - mock_node = mocker.MagicMock() - mock_node._get_executor_config.return_value = {"b": 2, "replace": None} - - config = {"a": 1, "placeholders": {"replace": {"c": 3}}} - - class MockConfig(BaseModel, extra=Extra.allow): - placeholders: dict = {} - - monkeypatch.setattr(executor.BaseExecutor, "Config", MockConfig) - - base_executor = executor.BaseExecutor(config=config) - - assert base_executor._resolve_executor_config(mock_node) == {"a": 1, "c": 3, "b": 2} - - -def test_base_executor_resolve_node_supresess_global_config_from_placeholders_if_its_not_mapping(mocker, monkeypatch): - mock_node = mocker.MagicMock() - mock_node._get_executor_config.return_value = {"b": 2, "replace": None} - - config = {"a": 1, "placeholders": {"replace": [1, 2, 3]}} - - class MockConfig(BaseModel, extra=Extra.allow): - placeholders: dict = {} - - monkeypatch.setattr(executor.BaseExecutor, "Config", MockConfig) - - base_executor = executor.BaseExecutor(config=config) - - assert base_executor._resolve_executor_config(mock_node) == {"a": 1, "b": 2} - - -def test_base_executor_execute_graph_raises_exception_if_loop(mocker, monkeypatch): - mock_dag = mocker.MagicMock() - mock_execute_from_graph = mocker.MagicMock() - mock__get_status_and_next_node_name = mocker.MagicMock() - mock_run_log_store = mocker.MagicMock() - mock_node = mocker.MagicMock() - - mock_dag.get_node_by_name.return_value = mock_node - - mock__get_status_and_next_node_name.return_value = defaults.SUCCESS, None - - monkeypatch.setattr(executor.BaseExecutor, "execute_from_graph", mock_execute_from_graph) - monkeypatch.setattr(executor.BaseExecutor, "_get_status_and_next_node_name", mock__get_status_and_next_node_name) - monkeypatch.setattr(executor, "json", mocker.MagicMock()) - base_executor = executor.BaseExecutor(config=None) - base_executor.run_log_store = mock_run_log_store - with pytest.raises(Exception): - base_executor.execute_graph(dag=mock_dag) - - -def test_local_executor__is_parallel_execution_sends_defaults_if_not_config(): - local_executor = executor.LocalExecutor(config=None) - - assert defaults.ENABLE_PARALLEL == local_executor._is_parallel_execution() - - -def test_local_executor__is_parallel_execution_sends_from_config_if_present(): - config = {"enable_parallel": "true"} - - local_executor = executor.LocalExecutor(config=config) - - assert local_executor._is_parallel_execution() - - -def test_local_executor_trigger_job_calls(mocker, monkeypatch): - mock_node = mocker.MagicMock() - mock_prepare_for_node_execution = mocker.MagicMock() - mock__execute_node = mocker.MagicMock() - - monkeypatch.setattr(executor.LocalExecutor, "prepare_for_node_execution", mock_prepare_for_node_execution) - monkeypatch.setattr(executor.LocalExecutor, "_execute_node", mock__execute_node) - - local_executor = executor.LocalExecutor(config=None) - - local_executor.trigger_job(mock_node) - assert mock_prepare_for_node_execution.call_count == 1 - assert mock__execute_node.call_count == 1 - - -def test_local_container_executor__is_parallel_execution_sends_defaults_if_not_config(): - local_container_executor = executor.LocalContainerExecutor(config={"docker_image": "test"}) - - assert defaults.ENABLE_PARALLEL == local_container_executor._is_parallel_execution() - - -def test_local_container_executor__is_parallel_execution_sends_from_config_if_present(): - local_container_executor = executor.LocalContainerExecutor(config={"enable_parallel": True, "docker_image": "test"}) - - assert local_container_executor._is_parallel_execution() - - -def test_local_container_executor_docker_image_is_retrieved_from_config(): - config = {"enable_parallel": "true", "docker_image": "docker"} - - local_container_executor = executor.LocalContainerExecutor(config=config) - - assert local_container_executor.docker_image == "docker" - - -def test_local_container_executor_add_code_ids_uses_global_docker_image(mocker, monkeypatch): - mock_super_add_code_ids = mocker.MagicMock() - monkeypatch.setattr(executor.BaseExecutor, "add_code_identities", mock_super_add_code_ids) - - mock_node = mocker.MagicMock() - mock_node._get_mode_config.return_value = {} - - mock_get_local_docker_image_id = mocker.MagicMock() - monkeypatch.setattr(executor.utils, "get_local_docker_image_id", mock_get_local_docker_image_id) - - mock_run_log_store = mocker.MagicMock() - mock_step_log = mocker.MagicMock() - local_container_executor = executor.LocalContainerExecutor(config={"docker_image": "global"}) - local_container_executor.run_log_store = mock_run_log_store - - local_container_executor.add_code_identities(node=mock_node, step_log=mock_step_log) - - mock_get_local_docker_image_id.assert_called_once_with("global") - - -def test_local_container_executor_add_code_ids_uses_local_docker_image_if_provided(mocker, monkeypatch): - mock_super_add_code_ids = mocker.MagicMock() - monkeypatch.setattr(executor.BaseExecutor, "add_code_identities", mock_super_add_code_ids) - - mock_node = mocker.MagicMock() - mock_node._get_executor_config.return_value = {"docker_image": "local"} - - mock_get_local_docker_image_id = mocker.MagicMock() - monkeypatch.setattr(executor.utils, "get_local_docker_image_id", mock_get_local_docker_image_id) - - mock_run_log_store = mocker.MagicMock() - mock_step_log = mocker.MagicMock() - local_container_executor = executor.LocalContainerExecutor(config={"docker_image": "global"}) - local_container_executor.run_log_store = mock_run_log_store - - local_container_executor.add_code_identities(node=mock_node, step_log=mock_step_log) - - mock_get_local_docker_image_id.assert_called_once_with("local") - - -def test_local_container_executor_calls_spin_container_during_trigger_job(mocker, monkeypatch): - mock_spin_container = mocker.MagicMock() - mock_step_log = mocker.MagicMock() - mock_run_log_store = mocker.MagicMock() - mock_node = mocker.MagicMock() - - mock_run_log_store.get_step_log.return_value = mock_step_log - mock_step_log.status = defaults.SUCCESS - - monkeypatch.setattr(executor.LocalContainerExecutor, "_spin_container", mock_spin_container) - - local_container_executor = executor.LocalContainerExecutor(config={"docker_image": "test"}) - local_container_executor.run_log_store = mock_run_log_store - - local_container_executor.trigger_job(node=mock_node) - - assert mock_spin_container.call_count == 1 - - -def test_local_container_executor_marks_step_fail_if_status_is_not_success(mocker, monkeypatch): - mock_spin_container = mocker.MagicMock() - mock_step_log = mocker.MagicMock() - mock_run_log_store = mocker.MagicMock() - mock_node = mocker.MagicMock() - - mock_run_log_store.get_step_log.return_value = mock_step_log - mock_step_log.status = defaults.PROCESSING - - monkeypatch.setattr(executor.LocalContainerExecutor, "_spin_container", mock_spin_container) - - local_container_executor = executor.LocalContainerExecutor(config={"docker_image": "test"}) - local_container_executor.run_log_store = mock_run_log_store - - local_container_executor.trigger_job(node=mock_node) - - assert mock_step_log.status == defaults.FAIL + assert base_executor.step_attempt_number == 12345 diff --git a/tests/magnus/test_experiment_tracker.py b/tests/magnus/test_experiment_tracker.py new file mode 100644 index 00000000..acdfdd2e --- /dev/null +++ b/tests/magnus/test_experiment_tracker.py @@ -0,0 +1,36 @@ +import pytest +import contextlib + +from magnus import experiment_tracker + + +@pytest.fixture(autouse=True) +def instantiable_base_class(monkeypatch): + monkeypatch.setattr(experiment_tracker.BaseExperimentTracker, "__abstractmethods__", set()) + yield + + +def test_base_run_log_store_context_property(mocker, monkeypatch, instantiable_base_class): + mock_run_context = mocker.Mock() + + monkeypatch.setattr(experiment_tracker.context, "run_context", mock_run_context) + + assert experiment_tracker.BaseExperimentTracker()._context == mock_run_context + + +def test_client_connection_is_null_context(): + ep = experiment_tracker.BaseExperimentTracker() + + assert isinstance(ep.client_context, contextlib.nullcontext) + + +def test_do_nothing_experiment_tracker_log_metric_does_nothing(): + ep = experiment_tracker.DoNothingTracker() + + ep.log_metric(key="foo", value=3.41) + + +def test_do_nothing_experiment_tracker_log_parmeter_does_nothing(): + ep = experiment_tracker.DoNothingTracker() + + ep.log_parameter(key="foo", value="bar") diff --git a/tests/magnus/test_graph.py b/tests/magnus/test_graph.py index b85c6d1b..9bfd8ba6 100644 --- a/tests/magnus/test_graph.py +++ b/tests/magnus/test_graph.py @@ -5,7 +5,7 @@ exceptions, # pylint: disable=import-error graph, # pylint: disable=import-error ) -from magnus.nodes import AsISNode, FailNode, SuccessNode +from magnus.extensions.nodes import StubNode, FailNode, SuccessNode def get_new_graph(start_at="this", internal_branch_name="i_name"): @@ -48,7 +48,7 @@ def test_get_node_by_name_raises_exception_if_no_match(new_graph): def test_get_node_by_name_returns_node_if_match(new_graph, dummy_node): - new_graph.nodes.append(dummy_node) + new_graph.nodes["a"] = dummy_node assert dummy_node == new_graph.get_node_by_name("a") @@ -58,14 +58,14 @@ def test_get_node_by_internal_name_raises_exception_if_no_match(new_graph): def test_get_node_by_internal_name_returns_node_if_match(new_graph, dummy_node): - new_graph.nodes.append(dummy_node) + new_graph.nodes["a.b"] = dummy_node assert dummy_node == new_graph.get_node_by_internal_name("a.b") def test_add_node_adds_to_nodes(new_graph, dummy_node): new_graph.add_node(dummy_node) assert len(new_graph.nodes) == 1 - assert new_graph.nodes[0] == dummy_node + assert new_graph.nodes["a"] == dummy_node def test_get_success_node_fails_if_none_present(new_graph): @@ -74,8 +74,8 @@ def test_get_success_node_fails_if_none_present(new_graph): def test_get_success_node_returns_success_node_if_present(new_graph): - new_node = Node(node_type="success") - new_graph.nodes.append(new_node) + new_node = Node(node_type="success", name="a") + new_graph.nodes["a"] = new_node assert new_graph.get_success_node() == new_node @@ -86,8 +86,8 @@ def test_get_fail_node_fails_if_none_present(new_graph): def test_get_fail_node_returns_success_node_if_present(new_graph): - new_node = Node(node_type="fail") - new_graph.nodes.append(new_node) + new_node = Node(node_type="fail", name="a") + new_graph.nodes["a"] = new_node assert new_graph.get_fail_node() == new_node @@ -111,15 +111,15 @@ def test_success_node_validation_returns_false_if_neq_1(new_graph): def test_success_node_validation_returns_false_if_gr_1(new_graph): - node = Node(node_type="success") - new_graph.nodes.append(node) - new_graph.nodes.append(node) + node = Node(node_type="success", name="a") + new_graph.nodes["a"] = node + new_graph.nodes["b"] = node assert new_graph.success_node_validation() is False def test_success_node_validation_returns_true_if_eq_1(new_graph): - node = Node(node_type="success") - new_graph.nodes.append(node) + node = Node(node_type="success", name="a") + new_graph.nodes["a"] = node assert new_graph.success_node_validation() is True @@ -128,19 +128,19 @@ def test_fail_node_validation_returns_false_if_neq_1(new_graph): def test_fail_node_validation_returns_false_if_gr_1(new_graph): - node = Node(node_type="fail") - new_graph.nodes.append(node) - new_graph.nodes.append(node) + node = Node(node_type="fail", name="a") + new_graph.nodes["a"] = node + new_graph.nodes["b"] = node assert new_graph.fail_node_validation() is False def test_fail_node_validation_returns_true_if_eq_1(new_graph): - node = Node(node_type="fail") - new_graph.nodes.append(node) + node = Node(node_type="fail", name="a") + new_graph.nodes["a"] = node assert new_graph.fail_node_validation() is True -def test_validate_does_not_raise_exception_if_all_pass(monkeypatch, mocker): +def test_check_graph_does_not_raise_exception_if_all_pass(monkeypatch, mocker): try: monkeypatch.setattr(graph.Graph, "missing_neighbors", mocker.MagicMock(return_value=[])) monkeypatch.setattr(graph.Graph, "is_dag", mocker.MagicMock(return_value=True)) @@ -148,7 +148,7 @@ def test_validate_does_not_raise_exception_if_all_pass(monkeypatch, mocker): monkeypatch.setattr(graph.Graph, "success_node_validation", mocker.MagicMock(return_value=True)) monkeypatch.setattr(graph.Graph, "fail_node_validation", mocker.MagicMock(return_value=True)) new_graph = get_new_graph() - new_graph.validate() + new_graph.check_graph() except BaseException: assert False @@ -161,7 +161,7 @@ def test_validate_raises_exception_if_is_dag_fails(mocker, monkeypatch): monkeypatch.setattr(graph.Graph, "success_node_validation", mocker.MagicMock(return_value=True)) monkeypatch.setattr(graph.Graph, "fail_node_validation", mocker.MagicMock(return_value=True)) new_graph = get_new_graph() - new_graph.validate() + new_graph.check_graph() def test_validate_raises_exception_if_is_start_node_present_fails(mocker, monkeypatch): @@ -172,7 +172,7 @@ def test_validate_raises_exception_if_is_start_node_present_fails(mocker, monkey monkeypatch.setattr(graph.Graph, "success_node_validation", mocker.MagicMock(return_value=True)) monkeypatch.setattr(graph.Graph, "fail_node_validation", mocker.MagicMock(return_value=True)) new_graph = get_new_graph() - new_graph.validate() + new_graph.check_graph() def test_validate_raises_exception_if_success_node_validation_fails(mocker, monkeypatch): @@ -183,7 +183,7 @@ def test_validate_raises_exception_if_success_node_validation_fails(mocker, monk monkeypatch.setattr(graph.Graph, "success_node_validation", mocker.MagicMock(return_value=False)) monkeypatch.setattr(graph.Graph, "fail_node_validation", mocker.MagicMock(return_value=True)) new_graph = get_new_graph() - new_graph.validate() + new_graph.check_graph() def test_validate_raises_exception_if_fail_node_validation_fails(mocker, monkeypatch): @@ -194,7 +194,7 @@ def test_validate_raises_exception_if_fail_node_validation_fails(mocker, monkeyp monkeypatch.setattr(graph.Graph, "success_node_validation", mocker.MagicMock(return_value=True)) monkeypatch.setattr(graph.Graph, "fail_node_validation", mocker.MagicMock(return_value=False)) new_graph = get_new_graph() - new_graph.validate() + new_graph.check_graph() def test_validate_raises_exception_if_missing_neighbors(mocker, monkeypatch): @@ -205,96 +205,26 @@ def test_validate_raises_exception_if_missing_neighbors(mocker, monkeypatch): monkeypatch.setattr(graph.Graph, "success_node_validation", mocker.MagicMock(return_value=True)) monkeypatch.setattr(graph.Graph, "fail_node_validation", mocker.MagicMock(return_value=True)) new_graph = get_new_graph() - new_graph.validate() - - -def test_create_graph_inits_graph_with_defaults(mocker, monkeypatch): - dag_config = {"start_at": "step1"} - graph_init = mocker.MagicMock(return_value=None) - monkeypatch.setattr(graph.Graph, "__init__", graph_init) - monkeypatch.setattr(graph.Graph, "validate", mocker.MagicMock()) - - graph.create_graph(dag_config, internal_branch_name="i_name") - graph_init.assert_called_once_with( - start_at="step1", description=None, max_time=defaults.MAX_TIME, internal_branch_name="i_name" - ) - - -def test_create_graph_inits_graph_with_given_config(mocker, monkeypatch): - dag_config = {"start_at": "step1", "description": "test", "max_time": 1} - graph_init = mocker.MagicMock(return_value=None) - monkeypatch.setattr(graph.Graph, "__init__", graph_init) - monkeypatch.setattr(graph.Graph, "validate", mocker.MagicMock()) - - graph.create_graph(dag_config, internal_branch_name="i_name") - graph_init.assert_called_once_with(start_at="step1", description="test", max_time=1, internal_branch_name="i_name") - - -def test_create_graph_inits_graph_populates_nodes(mocker, monkeypatch): - dag_config = {"start_at": "step1", "steps": {"step1": {"type": "test"}}} - graph_init = mocker.MagicMock(return_value=None) - monkeypatch.setattr(graph.Graph, "__init__", graph_init) - monkeypatch.setattr(graph.Graph, "validate", mocker.MagicMock()) - monkeypatch.setattr(graph.Graph, "add_node", mocker.MagicMock()) - - mock_driver_manager = mocker.MagicMock() - - monkeypatch.setattr(graph.driver, "DriverManager", mock_driver_manager) - graph.create_graph(dag_config, internal_branch_name=None) - - _, kwargs = mock_driver_manager.call_args - assert kwargs["invoke_kwds"]["name"] == "step1" - assert kwargs["invoke_kwds"]["internal_name"] == "step1" - - -def test_create_graph_inits_graph_populates_nodes_with_internal_branch(mocker, monkeypatch): - dag_config = {"start_at": "step1", "steps": {"step1": {"type": "test"}}} - graph_init = mocker.MagicMock(return_value=None) - monkeypatch.setattr(graph.Graph, "__init__", graph_init) - monkeypatch.setattr(graph.Graph, "validate", mocker.MagicMock()) - monkeypatch.setattr(graph.Graph, "add_node", mocker.MagicMock()) - - mock_driver_manager = mocker.MagicMock() - - monkeypatch.setattr(graph.driver, "DriverManager", mock_driver_manager) - graph.create_graph(dag_config, internal_branch_name="i_name") - - _, kwargs = mock_driver_manager.call_args - assert kwargs["invoke_kwds"]["name"] == "step1" - assert kwargs["invoke_kwds"]["internal_name"] == "i_name.step1" - - -def test_create_graph_raises_exception_if_node_fails(mocker, monkeypatch): - dag_config = {"start_at": "step1", "steps": {"step1": {"type": "test"}}} - graph_init = mocker.MagicMock(return_value=None) - monkeypatch.setattr(graph.Graph, "__init__", graph_init) - monkeypatch.setattr(graph.Graph, "validate", mocker.MagicMock()) - monkeypatch.setattr(graph.Graph, "add_node", mocker.MagicMock()) - - with pytest.raises(Exception): - graph.create_graph(dag_config, internal_branch_name=None) + new_graph.check_graph() @pytest.fixture(name="mocked_graph") def create_mocked_graph(mocker): - mocked_graph_init = mocker.MagicMock(return_value=None) - mocker.patch.object(graph.Graph, "__init__", mocked_graph_init) - return graph.Graph() + return graph.Graph(start_at="start") def test_is_dag_returns_true_when_acyclic(mocked_graph): - test_graph = mocked_graph - start_node_config = {"next_node": "middle", "on_failure": ""} - start_node = AsISNode(name="start", internal_name="start", config=start_node_config) + start_node = StubNode(name="start", internal_name="start", next_node="middle") - middle_node_config = {"next_node": "success", "on_failure": ""} - middle_node = AsISNode(name="middle", internal_name="middle", config=middle_node_config) + middle_node = StubNode(name="middle", internal_name="middle", next_node="success") - success_node = SuccessNode(name="success", internal_name="success", config={}) + success_node = SuccessNode(name="success", internal_name="success") - fail_node = FailNode(name="fail", internal_name="fail", config={}) + fail_node = FailNode(name="fail", internal_name="fail") - test_graph.nodes = [start_node, middle_node, success_node, fail_node] + test_graph = mocked_graph + for node in [start_node, middle_node, success_node, fail_node]: + test_graph.add_node(node) assert test_graph.is_dag() @@ -303,16 +233,19 @@ def test_is_dag_returns_true_when_on_failure_points_to_non_terminal_node_and_lat test_graph = mocked_graph start_node_config = {"next_node": "middle", "on_failure": ""} - start_node = AsISNode(name="start", internal_name="start", config=start_node_config) + start_node = StubNode(name="start", internal_name="start", next_node="middle", on_failure="") middle_node_config = {"next_node": "success", "on_failure": "fail"} - middle_node = AsISNode(name="middle", internal_name="middle", config=middle_node_config) + middle_node = StubNode(name="middle", internal_name="middle", next_node="success", on_failure="fail") + + success_node = SuccessNode(name="success", internal_name="success") - success_node = SuccessNode(name="success", internal_name="success", config={}) + fail_node = FailNode(name="fail", internal_name="fail") - fail_node = FailNode(name="fail", internal_name="fail", config={}) + test_graph = mocked_graph + for node in [start_node, middle_node, success_node, fail_node]: + test_graph.add_node(node) - test_graph.nodes = [start_node, middle_node, success_node, fail_node] assert test_graph.is_dag() @@ -320,20 +253,20 @@ def test_is_dag_returns_false_when_cyclic_in_next_nodes(mocked_graph): test_graph = mocked_graph start_node_config = {"next_node": "b", "on_failure": "fail"} - start_node = AsISNode(name="start", internal_name="start", config=start_node_config) + start_node = StubNode(name="start", internal_name="start", next_node="b", on_failure="fail") - bnode_config = {"next_node": "c", "on_failure": "fail"} - bnode = AsISNode(name="b", internal_name="b", config=bnode_config) + bnode = StubNode(name="b", internal_name="b", next_node="c", on_failure="fail") - cnode_config = {"next_node": "d", "on_failure": "fail"} - cnode = AsISNode(name="c", internal_name="c", config=cnode_config) + cnode = StubNode(name="c", internal_name="c", next_node="d", on_failure="fail") - dnode_config = {"next_node": "b", "on_failure": "fail"} - dnode = AsISNode(name="d", internal_name="d", config=dnode_config) + dnode = StubNode(name="d", internal_name="d", next_node="b", on_failure="fail") - fail_node = FailNode(name="fail", internal_name="fail", config={}) + fail_node = FailNode(name="fail", internal_name="fail") - test_graph.nodes = [start_node, bnode, cnode, dnode, fail_node] + nodes = [start_node, bnode, cnode, dnode, fail_node] + test_graph = mocked_graph + for node in nodes: + test_graph.add_node(node) assert not test_graph.is_dag() @@ -341,17 +274,16 @@ def test_is_dag_returns_false_when_cyclic_in_next_nodes(mocked_graph): def test_is_dag_returns_false_when_fail_points_to_previous_node(mocked_graph): test_graph = mocked_graph - start_config = {"next_node": "b", "on_failure": "fail"} - start_node = AsISNode(name="start", internal_name="start", config=start_config) - - b_config = {"next_node": "c", "on_failure": "fail"} - bnode = AsISNode(name="b", internal_name="b", config=b_config) + start_node = StubNode(name="start", internal_name="start", next_node="b", on_failure="fail") + bnode = StubNode(name="b", internal_name="b", next_node="c", on_failure="fail") - c_config = {"next_node": "c", "on_failure": "b"} - cnode = AsISNode(name="c", internal_name="c", config=c_config) + cnode = StubNode(name="c", internal_name="c", next_node="c", on_failure="fail") - fail_node = FailNode(name="fail", internal_name="fail", config={}) - test_graph.nodes = [start_node, bnode, cnode, fail_node] + fail_node = FailNode(name="fail", internal_name="fail") + nodes = [start_node, bnode, cnode, fail_node] + test_graph = mocked_graph + for node in nodes: + test_graph.add_node(node) assert not test_graph.is_dag() @@ -359,17 +291,18 @@ def test_is_dag_returns_false_when_fail_points_to_previous_node(mocked_graph): def test_missing_neighbors_empty_list_no_neigbors_missing(mocked_graph): test_graph = mocked_graph - start_config = {"next_node": "middle", "on_failure": "fail"} - start_node = AsISNode(name="start", internal_name="start", config=start_config) + start_node = StubNode(name="start", internal_name="start", next_node="middle", on_failure="fail") - middle_config = {"next_node": "success", "on_failure": "fail"} - middle_node = AsISNode(name="middle", internal_name="middle", config=middle_config) + middle_node = StubNode(name="middle", internal_name="middle", next_node="success", on_failure="fail") - success_node = SuccessNode(name="success", internal_name="success", config={}) + success_node = SuccessNode(name="success", internal_name="success") - fail_node = FailNode(name="fail", internal_name="fail", config={}) + fail_node = FailNode(name="fail", internal_name="fail") - test_graph.nodes = [start_node, middle_node, success_node, fail_node] + nodes = [start_node, middle_node, success_node, fail_node] + test_graph = mocked_graph + for node in nodes: + test_graph.add_node(node) missing_nodes = test_graph.missing_neighbors() assert len(missing_nodes) == 0 @@ -379,14 +312,17 @@ def test_missing_neighbors_list_of_missing_neighbor_one_missing_next(mocked_grap test_graph = mocked_graph start_config = {"next_node": "middle", "on_failure": "fail"} - start_node = AsISNode(name="start", internal_name="start", config=start_config) + start_node = StubNode(name="start", internal_name="start", next_node="middle", on_failure="fail") middle_config = {"next_node": "success", "on_failure": "fail"} - middle_node = AsISNode(name="middle", internal_name="middle", config=middle_config) + middle_node = StubNode(name="middle", internal_name="middle", next_node="success", on_failure="fail") - fail_node = FailNode(name="fail", internal_name="fail", config={}) + fail_node = FailNode(name="fail", internal_name="fail") - test_graph.nodes = [start_node, middle_node, fail_node] + nodes = [start_node, middle_node, fail_node] + test_graph = mocked_graph + for node in nodes: + test_graph.add_node(node) missing_nodes = test_graph.missing_neighbors() assert len(missing_nodes) == 1 @@ -396,17 +332,18 @@ def test_missing_neighbors_list_of_missing_neighbor_one_missing_next(mocked_grap def test_missing_list_of_missing_neighbor_one_missing_on_failure(mocked_graph): test_graph = mocked_graph - start_config = {"next_node": "middle", "on_failure": "fail"} - start_node = AsISNode(name="start", internal_name="start", config=start_config) + start_node = StubNode(name="start", internal_name="start", next_node="middle", on_failure="fail") - middle_config = {"next_node": "success", "on_failure": "fail"} - middle_node = AsISNode(name="middle", internal_name="middle", config=middle_config) + middle_node = StubNode(name="middle", internal_name="middle", next_node="success", on_failure="fail") - success_node = SuccessNode(name="success", internal_name="success", config={}) + success_node = SuccessNode(name="success", internal_name="success") - FailNode(name="fail", internal_name="fail", config={}) + FailNode(name="fail", internal_name="fail") - test_graph.nodes = [start_node, middle_node, success_node] + nodes = [start_node, middle_node, success_node] + test_graph = mocked_graph + for node in nodes: + test_graph.add_node(node) missing_nodes = test_graph.missing_neighbors() assert len(missing_nodes) == 1 @@ -415,22 +352,22 @@ def test_missing_list_of_missing_neighbor_one_missing_on_failure(mocked_graph): def test_missing_list_of_missing_neighbor_two_missing(mocked_graph): test_graph = mocked_graph + start_node = StubNode(name="start", internal_name="start", next_node="middle", on_failure="fail") - start_config = {"next_node": "middle", "on_failure": "fail"} - start_node = AsISNode(name="start", internal_name="start", config=start_config) + StubNode(name="middle", internal_name="middle", next_node="success", on_failure="fail") - middle_config = {"next_node": "success", "on_failure": "fail"} - AsISNode(name="middle", internal_name="middle", config=middle_config) + success_node = SuccessNode(name="success", internal_name="success") - success_node = SuccessNode(name="success", internal_name="success", config={}) + FailNode(name="fail", internal_name="fail") - FailNode(name="fail", internal_name="fail", config={}) - - test_graph.nodes = [ + nodes = [ start_node, success_node, ] + test_graph = mocked_graph + for node in nodes: + test_graph.add_node(node) missing_nodes = test_graph.missing_neighbors() assert len(missing_nodes) == 2 assert "middle" in missing_nodes diff --git a/tests/magnus/test_integration.py b/tests/magnus/test_integration.py index 4ead7f68..78751eff 100644 --- a/tests/magnus/test_integration.py +++ b/tests/magnus/test_integration.py @@ -1,20 +1,25 @@ +import logging + +import pytest + + from magnus import ( integration, # pylint: disable=import-error; pylint: disable=import-error ) def test_base_integration_validate_does_nothing(): - base_integration = integration.BaseIntegration(None, None) + base_integration = integration.BaseIntegration("Executor", "service") base_integration.validate() def test_base_integration_configure_for_traversal_does_nothing(): - base_integration = integration.BaseIntegration(None, None) + base_integration = integration.BaseIntegration("Executor", "service") base_integration.validate() def test_base_integration_configure_for_execution_does_nothing(): - base_integration = integration.BaseIntegration(None, None) + base_integration = integration.BaseIntegration("Executor", "service") base_integration.validate() @@ -57,12 +62,74 @@ def test_configure_for_execution_calls_validate_of_integration_handler(monkeypat assert mock_configure_for_execution.call_count == 1 -def test_get_integration_handler_returns_base_integration_if_subclass_not_found(monkeypatch, mocker): +def test_get_integration_handler_gives_default_integration_if_no_match(monkeypatch, mocker): mock_service = mocker.MagicMock() - monkeypatch.setattr(integration, "get_service_type", lambda x: "DummyService") + mock_service.service_type = "I do not exist" + mock_service.service_name = "DummyService" mock_executor = mocker.MagicMock() mock_executor.executor_type = "DummyExecutor" + mock_executor.service_type = "executor" obj = integration.get_integration_handler(mock_executor, mock_service) assert isinstance(obj, integration.BaseIntegration) + + +# def test_get_integration_handler_considers_extension_from_stevedore(monkeypatch, mocker): +# mock_extension = mocker.MagicMock() +# mock_extension_manager = mocker.MagicMock(return_value={"extension": mock_extension}) + +# monkeypatch.setattr(integration.extension, "ExtensionManager", mock_extension_manager) +# m = mocker.Mock.create_autospec(return_value=integration.BaseIntegration) + +# m.__class__.__subclasses__ = [] # way to remove subclasses + + +def test_do_nothing_catalog_validate_emits_warning(caplog): + extension = integration.DoNothingCatalog("none", "service") + + with caplog.at_level(logging.WARNING, logger="magnus"): + extension.validate() + + assert "A do-nothing catalog does not hold any data and therefore cannot pass data between nodes." in caplog.text + + +def test_do_nothing_secrets_validate_emits_warning(caplog): + extension = integration.DoNothingSecrets("none", "service") + + with caplog.at_level(logging.WARNING, logger="magnus"): + extension.validate() + + assert "A do-nothing secrets does not hold any secrets and therefore cannot return you any secrets." in caplog.text + + +def test_do_nothing_experiment_tracker_validate_emits_warning(caplog): + extension = integration.DoNothingExperimentTracker("none", "service") + + with caplog.at_level(logging.WARNING, logger="magnus"): + extension.validate() + + assert "A do-nothing experiment tracker does nothing and therefore cannot track anything." in caplog.text + + +def test_buffered_run_log_store_raises_exception_for_anything_else_than_local(mocker, monkeypatch): + mock_executor = mocker.MagicMock() + + mock_executor.service_name = "not_local" + + extension = integration.BufferedRunLogStore(mock_executor, "service") + # expect an exception + with pytest.raises(Exception, match="Buffered run log store is only supported for local executor"): + extension.validate() + + +def test_buffered_run_log_store_accepts_local(mocker, caplog): + mock_executor = mocker.MagicMock() + + mock_executor.service_name = "local" + + extension = integration.BufferedRunLogStore(mock_executor, "service") + with caplog.at_level(logging.WARNING, logger="magnus"): + extension.validate() + + assert "Run log generated by buffered run log store are not persisted." in caplog.text diff --git a/tests/magnus/test_interaction.py b/tests/magnus/test_interaction.py index 68353744..6b54f6d7 100644 --- a/tests/magnus/test_interaction.py +++ b/tests/magnus/test_interaction.py @@ -1,8 +1,9 @@ import os +import json +import logging import pytest -import magnus from magnus import ( defaults, # pylint: disable=import-error exceptions, # pylint: disable=import-error @@ -10,65 +11,93 @@ ) -def test_track_this_adds_values_to_environ(monkeypatch, mocker): - mock_executor = mocker.MagicMock() - monkeypatch.setattr(magnus.context, "executor", mock_executor) +@pytest.fixture(autouse=True) +def mock_context(monkeypatch, mocker, request): + if "noautofixt" in request.keywords: + yield None + return + mc = mocker.MagicMock() + monkeypatch.setattr(interaction, "context", mc) + yield + + +def test_track_this_adds_values_to_environ(): interaction.track_this(a="b") - assert defaults.TRACK_PREFIX + "a" in os.environ - del os.environ[defaults.TRACK_PREFIX + "a"] + assert defaults.TRACK_PREFIX + "a" + f"{defaults.STEP_INDICATOR}0" in os.environ + del os.environ[defaults.TRACK_PREFIX + "a" + f"{defaults.STEP_INDICATOR}0"] -def test_track_this_adds_multiple_values_to_environ(mocker, monkeypatch): - mock_executor = mocker.MagicMock() - monkeypatch.setattr(magnus.context, "executor", mock_executor) +def test_track_this_adds_multiple_values_to_environ(): interaction.track_this(a="b", b="a") - assert defaults.TRACK_PREFIX + "a" in os.environ - assert defaults.TRACK_PREFIX + "b" in os.environ - del os.environ[defaults.TRACK_PREFIX + "a"] - del os.environ[defaults.TRACK_PREFIX + "b"] + assert defaults.TRACK_PREFIX + "a" + f"{defaults.STEP_INDICATOR}0" in os.environ + assert defaults.TRACK_PREFIX + "b" + f"{defaults.STEP_INDICATOR}0" in os.environ + del os.environ[defaults.TRACK_PREFIX + "a" + f"{defaults.STEP_INDICATOR}0"] + del os.environ[defaults.TRACK_PREFIX + "b" + f"{defaults.STEP_INDICATOR}0"] + + +def test_track_this_adds_step_if_non_zero(): + interaction.track_this(a="b", b="a", step=1) + assert defaults.TRACK_PREFIX + "a" f"{defaults.STEP_INDICATOR}1" in os.environ + assert defaults.TRACK_PREFIX + "b" + f"{defaults.STEP_INDICATOR}1" in os.environ + del os.environ[defaults.TRACK_PREFIX + "a" + f"{defaults.STEP_INDICATOR}1"] + del os.environ[defaults.TRACK_PREFIX + "b" + f"{defaults.STEP_INDICATOR}1"] def test_store_paramenter_adds_values_to_environ(): - interaction.store_parameter(a="b") + interaction.set_parameter(a="b") assert defaults.PARAMETER_PREFIX + "a" in os.environ del os.environ[defaults.PARAMETER_PREFIX + "a"] def test_store_parameter_adds_multiple_values_to_environ(): - interaction.store_parameter(a="b", b="a") + interaction.set_parameter(a="b", b="a") assert defaults.PARAMETER_PREFIX + "a" in os.environ assert defaults.PARAMETER_PREFIX + "b" in os.environ del os.environ[defaults.PARAMETER_PREFIX + "a"] del os.environ[defaults.PARAMETER_PREFIX + "b"] -def test_get_parameter_returns_all_parameters_if_no_key_provided(mocker, monkeypatch): - monkeypatch.setattr(interaction.utils, "get_user_set_parameters", mocker.MagicMock(return_value="this")) +def test_store_parameter_updates_if_present_and_asked(): + os.environ[defaults.PARAMETER_PREFIX + "a"] = "b" + os.environ[defaults.PARAMETER_PREFIX + "b"] = "a" + interaction.set_parameter(a="c", b="d") + assert json.loads(os.environ[defaults.PARAMETER_PREFIX + "a"]) == "c" + assert json.loads(os.environ[defaults.PARAMETER_PREFIX + "b"]) == "d" + + del os.environ[defaults.PARAMETER_PREFIX + "a"] + del os.environ[defaults.PARAMETER_PREFIX + "b"] + + +def test_get_parameter_returns_all_parameters_if_no_key_provided(monkeypatch, mocker): + monkeypatch.setattr(interaction.parameters, "get_user_set_parameters", mocker.MagicMock(return_value="this")) assert interaction.get_parameter() == "this" def test_get_parameter_returns_parameters_if_provided(mocker, monkeypatch): - monkeypatch.setattr(interaction.utils, "get_user_set_parameters", mocker.MagicMock(return_value={"this": "that"})) + monkeypatch.setattr( + interaction.parameters, "get_user_set_parameters", mocker.MagicMock(return_value={"this": "that"}) + ) assert interaction.get_parameter("this") == "that" def test_get_parameter_returns_parameters_raises_exception_if_key_not_found(mocker, monkeypatch): - monkeypatch.setattr(interaction.utils, "get_user_set_parameters", mocker.MagicMock(return_value={"this": "that"})) + monkeypatch.setattr( + interaction.parameters, "get_user_set_parameters", mocker.MagicMock(return_value={"this": "that"}) + ) with pytest.raises(Exception): interaction.get_parameter("this1") def test_get_secret_delegates_to_secrets_handler_get(mocker, monkeypatch): - mock_global_exec = mocker.MagicMock() - from magnus import context + mock_context = mocker.MagicMock() + mock_secrets_handler = mocker.MagicMock() - context.executor = mock_global_exec + mock_context.run_context.secrets_handler = mock_secrets_handler - mock_secrets_handler = mocker.MagicMock() - mock_global_exec.secrets_handler = mock_secrets_handler + monkeypatch.setattr(interaction, "context", mock_context) mock_secrets_handler.get.return_value = "test" @@ -76,13 +105,12 @@ def test_get_secret_delegates_to_secrets_handler_get(mocker, monkeypatch): def test_get_secret_raises_exception_if_secrets_handler_raises(mocker, monkeypatch): - mock_global_exec = mocker.MagicMock() - from magnus import context + mock_context = mocker.MagicMock() + mock_secrets_handler = mocker.MagicMock() - context.executor = mock_global_exec + mock_context.run_context.secrets_handler = mock_secrets_handler - mock_secrets_handler = mocker.MagicMock() - mock_global_exec.secrets_handler = mock_secrets_handler + monkeypatch.setattr(interaction, "context", mock_context) mock_secrets_handler.get.side_effect = exceptions.SecretNotFoundError("test", "test") with pytest.raises(exceptions.SecretNotFoundError): @@ -90,55 +118,155 @@ def test_get_secret_raises_exception_if_secrets_handler_raises(mocker, monkeypat def test_get_from_catalog_delegates_to_catalog_handler(mocker, monkeypatch): - from magnus import context + mock_context = mocker.MagicMock() + mock_catalog_handler = mocker.MagicMock() - mock_global_exec = mocker.MagicMock() - context.executor = mock_global_exec + mock_context.run_context.catalog_handler = mock_catalog_handler mock_catalog_handler_get = mocker.MagicMock() - mock_global_exec.catalog_handler.get = mock_catalog_handler_get - mock_global_exec.run_id = "RUN_ID" + mock_catalog_handler.get = mock_catalog_handler_get + mock_context.run_context.run_id = "RUN_ID" - mock_global_exec.catalog_handler.compute_data_folder = "compute_folder" + mock_catalog_handler.compute_data_folder = "compute_folder" + monkeypatch.setattr(interaction, "context", mock_context) interaction.get_from_catalog("this") - mock_catalog_handler_get.assert_called_once_with("this", compute_data_folder="compute_folder", run_id="RUN_ID") + mock_catalog_handler_get.assert_called_once_with("this", run_id="RUN_ID") def test_get_from_catalog_uses_destination_folder(mocker, monkeypatch): - from magnus import context + mock_context = mocker.MagicMock() + mock_catalog_handler = mocker.MagicMock() - mock_global_exec = mocker.MagicMock() - context.executor = mock_global_exec + mock_context.run_context.catalog_handler = mock_catalog_handler mock_catalog_handler_get = mocker.MagicMock() - mock_global_exec.catalog_handler.get = mock_catalog_handler_get - mock_global_exec.run_id = "RUN_ID" + mock_catalog_handler.get = mock_catalog_handler_get + mock_context.run_context.run_id = "RUN_ID" - mock_global_exec.catalog_handler.compute_data_folder = "compute_folder_not_used" + mock_catalog_handler.compute_data_folder = "compute_folder" + monkeypatch.setattr(interaction, "context", mock_context) interaction.get_from_catalog("this", destination_folder="use_this_folder") - mock_catalog_handler_get.assert_called_once_with("this", compute_data_folder="use_this_folder", run_id="RUN_ID") + mock_catalog_handler_get.assert_called_once_with("this", run_id="RUN_ID") + + +def test_get_from_catalog_raises_warning_if_no_context_step_log(mocker, monkeypatch, caplog): + mock_context = mocker.MagicMock() + mock_catalog_handler = mocker.MagicMock() + + mock_context.run_context.catalog_handler = mock_catalog_handler + mock_context.run_context.executor._context_step_log = None + + mock_catalog_handler_get = mocker.MagicMock() + mock_catalog_handler.get = mock_catalog_handler_get + mock_context.run_context.run_id = "RUN_ID" + + mock_catalog_handler.compute_data_folder = "compute_folder" + monkeypatch.setattr(interaction, "context", mock_context) + + with caplog.at_level(logging.WARNING, logger="magnus"): + interaction.get_from_catalog("this") + + assert "Step log context was not found during interaction" in caplog.text + + mock_catalog_handler_get.assert_called_once_with("this", run_id="RUN_ID") + + +@pytest.mark.noautofixt +def test_get_run_id_returns_from_context(monkeypatch, mocker): + mock_context = mocker.MagicMock() + mock_context.run_context.run_id = "1234" + monkeypatch.setattr(interaction, "context", mock_context) + + assert interaction.get_run_id() == "1234" + + +@pytest.mark.noautofixt +def test_get_tag_raises_exception_if_no_executor(monkeypatch, mocker): + mock_context = mocker.MagicMock() + mock_context.run_context.executor = None + monkeypatch.setattr(interaction, "context", mock_context) + + with pytest.raises(Exception, match="Please raise a bug report"): + assert interaction.get_tag() == "1234" + + +@pytest.mark.noautofixt +def test_get_tag_gets_tag_from_context(monkeypatch, mocker): + mock_context = mocker.MagicMock() + mock_context.run_context.tag = "1234" + monkeypatch.setattr(interaction, "context", mock_context) + + assert interaction.get_tag() == "1234" + + +@pytest.mark.noautofixt +def test_get_experiment_context_raises_exception_if_no_executor(monkeypatch, mocker): + mock_context = mocker.MagicMock() + mock_context.run_context.executor = None + monkeypatch.setattr(interaction, "context", mock_context) + + with pytest.raises(Exception, match="Please raise a bug report"): + interaction.get_experiment_tracker_context() + + +@pytest.mark.noautofixt +def test_get_experiment_context_returns_client_context(monkeypatch, mocker): + mock_context = mocker.MagicMock() + mock_experiment_tracker = mocker.MagicMock() + mock_client_context = mocker.MagicMock() + + mock_experiment_tracker.client_context = mock_client_context + + mock_context.run_context.experiment_tracker = mock_experiment_tracker + monkeypatch.setattr(interaction, "context", mock_context) + + assert interaction.get_experiment_tracker_context() == mock_client_context + + +def test_put_object_calls_put_in_catalog(monkeypatch, mocker): + mock_dump = mocker.MagicMock() + mock_put_in_catalog = mocker.MagicMock() + mock_os_remove = mocker.MagicMock() + + monkeypatch.setattr(interaction, "put_in_catalog", mock_put_in_catalog) + monkeypatch.setattr(interaction.pickler.NativePickler, "dump", mock_dump) + monkeypatch.setattr(interaction.os, "remove", mock_os_remove) + + interaction.put_object("imdata", "iamsam") + + mock_dump.assert_called_once_with(data="imdata", path="iamsam") + mock_put_in_catalog.assert_called_once_with(f"iamsam.pickle") + mock_os_remove.assert_called_once_with(f"iamsam.pickle") + + +def test_get_object_calls_get_from_catalog(monkeypatch, mocker): + mock_load = mocker.MagicMock() + mock_get_from_catalog = mocker.MagicMock() + mock_os_remove = mocker.MagicMock() + monkeypatch.setattr(interaction, "get_from_catalog", mock_get_from_catalog) + monkeypatch.setattr(interaction.pickler.NativePickler, "load", mock_load) + monkeypatch.setattr(interaction.os, "remove", mock_os_remove) -def test_put_in_catalog_delegates_to_catalog_handler(mocker, monkeypatch): - from magnus import context + interaction.get_object("iamsam") - mock_global_exec = mocker.MagicMock() - context.executor = mock_global_exec + mock_load.assert_called_once_with("iamsam") + mock_get_from_catalog.assert_called_once_with(name="iamsam.pickle", destination_folder=".") + mock_os_remove.assert_called_once_with("iamsam.pickle") - mock_catalog_handler_put = mocker.MagicMock() - mock_global_exec.catalog_handler.put = mock_catalog_handler_put - mock_global_exec.run_id = "RUN_ID" - mock_file_path = mocker.MagicMock() - mock_path = mocker.MagicMock(return_value=mock_file_path) - mock_file_path.name = "file_name" - mock_file_path.parent = "in_this_folder" - monkeypatch.setattr(magnus.interaction, "Path", mock_path) +def test_get_object_raises_exception_if_file_not_found(monkeypatch, mocker): + mock_load = mocker.MagicMock(side_effect=FileNotFoundError()) + mock_get_from_catalog = mocker.MagicMock() + mock_os_remove = mocker.MagicMock() - interaction.put_in_catalog("this_file") + monkeypatch.setattr(interaction, "get_from_catalog", mock_get_from_catalog) + monkeypatch.setattr(interaction.pickler.NativePickler, "load", mock_load) + monkeypatch.setattr(interaction.os, "remove", mock_os_remove) - mock_catalog_handler_put.assert_called_once_with("file_name", compute_data_folder="in_this_folder", run_id="RUN_ID") + with pytest.raises(FileNotFoundError): + interaction.get_object("iamsam") diff --git a/tests/magnus/test_nodes.py b/tests/magnus/test_nodes.py index f70a659c..2399ba93 100644 --- a/tests/magnus/test_nodes.py +++ b/tests/magnus/test_nodes.py @@ -1,13 +1,58 @@ import pytest -from magnus import ( - defaults, # pylint: disable=import-error - nodes, # pylint: disable=import-error -) +from magnus import defaults, nodes, exceptions # pylint: disable=import-error # pylint: disable=import-error + + +@pytest.fixture(autouse=True) +def instantiable_base_class(monkeypatch): + monkeypatch.setattr(nodes.BaseNode, "__abstractmethods__", set()) + yield + + +@pytest.fixture() +def instantiable_traversal_node(monkeypatch): + monkeypatch.setattr(nodes.TraversalNode, "__abstractmethods__", set()) + yield + + +@pytest.fixture() +def instantiable_executable_node(monkeypatch): + monkeypatch.setattr(nodes.ExecutableNode, "__abstractmethods__", set()) + yield + + +@pytest.fixture() +def instantiable_composite_node(monkeypatch): + monkeypatch.setattr(nodes.CompositeNode, "__abstractmethods__", set()) + yield + + +@pytest.fixture() +def instantiable_terminal_node(monkeypatch): + monkeypatch.setattr(nodes.TerminalNode, "__abstractmethods__", set()) + yield + + +def test_base_run_log_store_context_property(mocker, monkeypatch, instantiable_base_class): + mock_run_context = mocker.Mock() + + monkeypatch.setattr(nodes.context, "run_context", mock_run_context) + + assert nodes.BaseNode(node_type="dummy", name="test", internal_name="")._context == mock_run_context + + +def test_validate_name_for_dot(instantiable_base_class): + with pytest.raises(ValueError): + nodes.BaseNode(name="test.", internal_name="test", node_type="dummy") + + +def test_validate_name_for_percent(instantiable_base_class): + with pytest.raises(ValueError): + nodes.BaseNode(name="test%", internal_name="test", node_type="dummy") def test_base_node__command_friendly_name_replaces_whitespace_with_character(): - node = nodes.BaseNode(name="test", internal_name="test", config={}) + node = nodes.BaseNode(name="test", internal_name="test", node_type="dummy") assert node._command_friendly_name() == "test" @@ -22,40 +67,42 @@ def test_base_node__get_internal_name_from_command_name_replaces_character_with_ def test_base_node__get_step_log_name_returns_internal_name_if_no_map_variable(): - node = nodes.BaseNode(name="test", internal_name="test", config={}) + node = nodes.BaseNode(name="test", internal_name="test", node_type="dummy") assert node._get_step_log_name() == "test" def test_base_node__get_step_log_name_returns_map_modified_internal_name_if_map_variable(): - node = nodes.BaseNode(name="test", internal_name="test." + defaults.MAP_PLACEHOLDER, config={}) + node = nodes.BaseNode(name="test", internal_name="test." + defaults.MAP_PLACEHOLDER, node_type="dummy") assert node._get_step_log_name(map_variable={"map_key": "a"}) == "test.a" def test_base_node__get_step_log_name_returns_map_modified_internal_name_if_map_variable_multiple(): node = nodes.BaseNode( - name="test", internal_name="test." + defaults.MAP_PLACEHOLDER + ".step." + defaults.MAP_PLACEHOLDER, config={} + name="test", + internal_name="test." + defaults.MAP_PLACEHOLDER + ".step." + defaults.MAP_PLACEHOLDER, + node_type="dummy", ) assert node._get_step_log_name(map_variable={"map_key": "a", "map_key1": "b"}) == "test.a.step.b" def test_base_node__get_branch_log_name_returns_null_if_not_set(): - node = nodes.BaseNode(name="test", internal_name="test", config={}) + node = nodes.BaseNode(name="test", internal_name="test", node_type="dummy") - assert node._get_branch_log_name() is None + assert node._get_branch_log_name() is "" def test_base_node__get_branch_log_name_returns_internal_name_if_set(): - node = nodes.BaseNode(name="test", internal_name="test", config={}, internal_branch_name="test_internal") + node = nodes.BaseNode(name="test", internal_name="test", internal_branch_name="test_internal", node_type="dummy") assert node._get_branch_log_name() == "test_internal" def test_base_node__get_branch_log_name_returns_map_modified_internal_name_if_map_variable(): node = nodes.BaseNode( - name="test", internal_name="test_", config={}, internal_branch_name="test." + defaults.MAP_PLACEHOLDER + name="test", internal_name="test_", internal_branch_name="test." + defaults.MAP_PLACEHOLDER, node_type="dummy" ) assert node._get_branch_log_name(map_variable={"map_key": "a"}) == "test.a" @@ -65,350 +112,255 @@ def test_base_node__get_branch_log_name_returns_map_modified_internal_name_if_ma node = nodes.BaseNode( name="test", internal_name="test_", - config={}, internal_branch_name="test." + defaults.MAP_PLACEHOLDER + ".step." + defaults.MAP_PLACEHOLDER, + node_type="dummy", ) assert node._get_branch_log_name(map_variable={"map_key": "a", "map_key1": "b"}) == "test.a.step.b" -def test_base_node__get_branch_by_name_raises_exception(): - node = nodes.BaseNode(name="test", internal_name="test", config={}) - - with pytest.raises(Exception): - node._get_branch_by_name("fail") - - -def test_base_node_execute_raises_not_implemented_error(): - node = nodes.BaseNode(name="test", internal_name="test", config={}) - - with pytest.raises(NotImplementedError): - node.execute(executor="test") - - -def test_base_node_execute_as_graph_raises_not_implemented_error(): - node = nodes.BaseNode(name="test", internal_name="test", config={}) - - with pytest.raises(NotImplementedError): - node.execute_as_graph(executor="test") - - -def test_task_node_mocks_if_mock_is_true(mocker, monkeypatch): - mock_attempt_log = mocker.MagicMock() - - mock_executor = mocker.MagicMock() - mock_executor.run_log_store.create_attempt_log = mocker.MagicMock(return_value=mock_attempt_log) - - configuration = {"command": "test", "next": "next_node"} - task_node = nodes.TaskNode(name="test", internal_name="test", config=configuration) - - task_node.execute(executor=mock_executor, mock=True) - - assert mock_attempt_log.status == defaults.SUCCESS - - -def test_task_node_sets_attempt_log_fail_in_exception_of_execution(mocker, monkeypatch): - mock_attempt_log = mocker.MagicMock() - - mock_executor = mocker.MagicMock() - mock_executor.run_log_store.create_attempt_log = mocker.MagicMock(return_value=mock_attempt_log) - - configuration = {"command": "test", "next": "next_node"} - task_node = nodes.TaskNode(name="test", internal_name="test", config=configuration) - - mock_execution_type = mocker.MagicMock() - task_node.execution_type = mocker.MagicMock(return_value=mock_execution_type) - mock_execution_type.execute_command = mocker.MagicMock(side_effect=Exception()) - task_node.execute(executor=mock_executor) - - assert mock_attempt_log.status == defaults.FAIL - - -def test_task_node_sets_attempt_log_success_in_no_exception_of_execution(mocker, monkeypatch): - mock_attempt_log = mocker.MagicMock() - - mock_executor = mocker.MagicMock() - mock_executor.run_log_store.create_attempt_log = mocker.MagicMock(return_value=mock_attempt_log) - - configuration = {"command": "test", "next": "next_node"} - task_node = nodes.TaskNode(name="test", internal_name="test", config=configuration) - - task_node.executable = mocker.MagicMock() - - task_node.execute(executor=mock_executor) - - assert mock_attempt_log.status == defaults.SUCCESS - - -def test_task_node_execute_as_graph_raises_exception(): - configuration = {"command": "test", "next": "next_node"} - task_node = nodes.TaskNode(name="test", internal_name="test", config=configuration) - - with pytest.raises(Exception): - task_node.execute_as_graph(None) - - -def test_fail_node_sets_branch_log_fail(mocker, monkeypatch): - mock_attempt_log = mocker.MagicMock() - mock_branch_log = mocker.MagicMock() - - mock_executor = mocker.MagicMock() - mock_executor.run_log_store.create_attempt_log = mocker.MagicMock(return_value=mock_attempt_log) - mock_executor.run_log_store.get_branch_log = mocker.MagicMock(return_value=mock_branch_log) - - node = nodes.FailNode(name="test", internal_name="test", config={}) - - node.execute(executor=mock_executor) - - assert mock_attempt_log.status == defaults.SUCCESS - assert mock_branch_log.status == defaults.FAIL - - -def test_fail_node_sets_attempt_log_success_even_in_exception(mocker, monkeypatch): - mock_attempt_log = mocker.MagicMock() - - mock_executor = mocker.MagicMock() - mock_executor.run_log_store.create_attempt_log = mocker.MagicMock(return_value=mock_attempt_log) - mock_executor.run_log_store.get_branch_log = mocker.MagicMock(side_effect=Exception()) - - node = nodes.FailNode(name="test", internal_name="test", config={}) - - node.execute(executor=mock_executor) - - assert mock_attempt_log.status == defaults.SUCCESS - - -def test_fail_node_execute_as_graph_raises_exception(): - fail_node = nodes.FailNode(name="test", internal_name="test", config={}) - - with pytest.raises(Exception): - fail_node.execute_as_graph(None) - - -def test_success_node_sets_branch_log_success(mocker, monkeypatch): - mock_attempt_log = mocker.MagicMock() - mock_branch_log = mocker.MagicMock() - - mock_executor = mocker.MagicMock() - mock_executor.run_log_store.create_attempt_log = mocker.MagicMock(return_value=mock_attempt_log) - mock_executor.run_log_store.get_branch_log = mocker.MagicMock(return_value=mock_branch_log) - - node = nodes.SuccessNode(name="test", internal_name="test", config={}) - - node.execute(executor=mock_executor) - - assert mock_attempt_log.status == defaults.SUCCESS - assert mock_branch_log.status == defaults.SUCCESS - - -def test_success_node_sets_attempt_log_success_even_in_exception(mocker, monkeypatch): - mock_attempt_log = mocker.MagicMock() - - mock_executor = mocker.MagicMock() - mock_executor.run_log_store.create_attempt_log = mocker.MagicMock(return_value=mock_attempt_log) - mock_executor.run_log_store.get_branch_log = mocker.MagicMock(side_effect=Exception()) - - node = nodes.SuccessNode(name="test", internal_name="test", config={}) - - node.execute(executor=mock_executor) - - assert mock_attempt_log.status == defaults.SUCCESS - - -def test_success_node_execute_as_graph_raises_exception(): - success_node = nodes.SuccessNode(name="test", internal_name="test", config={}) - - with pytest.raises(Exception): - success_node.execute_as_graph(None) - - -def test_parallel_node_raises_exception_for_empty_branches(): - with pytest.raises(Exception): - nodes.ParallelNode(name="test", internal_name="test", config={"branches": {}}, execution_type="python") - - -def test_parallel_node_get_sub_graphs_creates_graphs(mocker, monkeypatch): - mock_create_graph = mocker.MagicMock(return_value="agraphobject") - - monkeypatch.setattr(nodes, "create_graph", mock_create_graph) - - parallel_config = {"branches": {"a": {}, "b": {}}, "next": "next_node"} - node = nodes.ParallelNode(name="test", internal_name="test", config=parallel_config) - assert mock_create_graph.call_count == 2 - assert len(node.branches.items()) == 2 +def test_traversal_node_get_on_failure_node_returns_from_config(instantiable_traversal_node): + traversal_class = nodes.TraversalNode( + name="test", internal_name="test", node_type="test", next_node="next", on_failure="on_failure" + ) + assert traversal_class._get_on_failure_node() == "on_failure" -def test_parallel_node__get_branch_by_name_raises_exception_if_branch_not_found(mocker, monkeypatch): - monkeypatch.setattr(nodes.ParallelNode, "get_sub_graphs", mocker.MagicMock()) - parallel_config = {"branches": {"a": {}, "b": {}}, "next": "next_node"} +def test_traversal_node_get_next_node_returns_from_config(instantiable_traversal_node): + traversal_class = nodes.TraversalNode( + name="test", internal_name="test", node_type="test", next_node="next", on_failure="on_failure" + ) - node = nodes.ParallelNode(name="test", internal_name="test", config=parallel_config) + assert traversal_class._get_next_node() == "next" - with pytest.raises(Exception): - node._get_branch_by_name("a1") +def test_traversal_node_is_terminal_node_is_false(instantiable_traversal_node): + traversal_class = nodes.TraversalNode( + name="test", internal_name="test", node_type="test", next_node="next", on_failure="on_failure" + ) -def test_parallel_node__get_branch_by_name_returns_branch_if_found(mocker, monkeypatch): - monkeypatch.setattr(nodes.ParallelNode, "get_sub_graphs", mocker.MagicMock()) + assert traversal_class._is_terminal_node() is False - parallel_config = {"branches": {"a": {}, "b": {}}, "next": "next_node"} - node = nodes.ParallelNode(name="test", internal_name="test", config=parallel_config) - node.branches = {"a": "somegraph"} +def test_traversal_node_get_executor_config_defaults_to_empty_dict(instantiable_traversal_node): + traversal_class = nodes.TraversalNode( + name="test", internal_name="test", node_type="test", next_node="next", on_failure="on_failure" + ) - assert node._get_branch_by_name("a") == "somegraph" + assert traversal_class._get_executor_config("I do not exist") == "" -def test_parallel_node_execute_raises_exception(mocker, monkeypatch): - monkeypatch.setattr(nodes.ParallelNode, "get_sub_graphs", mocker.MagicMock()) +def test_traversal_node_get_executor_returns_configured_config(instantiable_traversal_node): + traversal_class = nodes.TraversalNode( + name="test", + internal_name="test", + node_type="test", + next_node="next", + on_failure="on_failure", + overrides={"test": "key"}, + ) - parallel_config = {"branches": {"a": {}, "b": {}}, "next": "next_node"} + assert traversal_class._get_executor_config("test") == "key" - node = nodes.ParallelNode(name="test", internal_name="test", config=parallel_config) - with pytest.raises(Exception): - node.execute(executor="test") +def test_executable_node_get_catalog_detaults_to_empty(instantiable_executable_node): + traversal_class = nodes.ExecutableNode( + name="test", + internal_name="test", + node_type="test", + next_node="next", + on_failure="on_failure", + ) + assert traversal_class._get_catalog_settings() == {} -def test_nodes_map_node_raises_exception_if_config_not_have_iterate_on(): - map_config = {"branch": {}, "next": "next_node", "iterate_as": "test"} - with pytest.raises(Exception): - nodes.MapNode(name="test", internal_name="test", config=map_config) +def test_executable_node_get_max_attempts_from_config(instantiable_executable_node): + traversal_class = nodes.ExecutableNode( + name="test", + internal_name="test", + node_type="test", + next_node="next", + on_failure="on_failure", + max_attempts=10, + ) -def test_nodes_map_node_raises_exception_if_config_not_have_iterate_as(): - map_config = {"branch": {}, "next": "next_node", "iterate_on": "test"} - with pytest.raises(Exception): - nodes.MapNode(name="test", internal_name="test", config=map_config) + assert traversal_class._get_max_attempts() == 10 -def test_nodes_map_node_names_the_branch_as_defaults_place_holder(monkeypatch, mocker): - monkeypatch.setattr(nodes.MapNode, "get_sub_graph", mocker.MagicMock()) +def test_executable_node_get_catalog_detaults_to_1(instantiable_executable_node): + traversal_class = nodes.ExecutableNode( + name="test", + internal_name="test", + node_type="test", + next_node="next", + on_failure="on_failure", + ) - map_config = {"branch": {}, "next": "next_node", "iterate_on": "test", "iterate_as": "test"} + assert traversal_class._get_max_attempts() == 1 - node = nodes.MapNode(name="test", internal_name="test", config=map_config) - assert node.branch_placeholder_name == defaults.MAP_PLACEHOLDER +def test_executable_node_get_branch_by_name_raises_exception(instantiable_executable_node): + traversal_class = nodes.ExecutableNode( + name="test", + internal_name="test", + node_type="test", + next_node="next", + ) + with pytest.raises(Exception, match="This is an executable node and"): + traversal_class._get_branch_by_name("test") -def test_nodes_map_get_sub_graph_calls_create_graph_with_correct_naming(mocker, monkeypatch): - mock_create_graph = mocker.MagicMock() - monkeypatch.setattr(nodes, "create_graph", mock_create_graph) - map_config = {"branch": {}, "next": "next_node", "iterate_on": "test", "iterate_as": "test"} +def test_executable_node_execute_as_graph_raises_exception(instantiable_executable_node): + traversal_class = nodes.ExecutableNode( + name="test", + internal_name="test", + node_type="test", + next_node="next", + ) - _ = nodes.MapNode(name="test", internal_name="test", config=map_config) + with pytest.raises(Exception, match="This is an executable node and"): + traversal_class.execute_as_graph() - mock_create_graph.assert_called_once_with({}, internal_branch_name="test." + defaults.MAP_PLACEHOLDER) +def test_executable_node_fan_in_raises_exception(instantiable_executable_node): + traversal_class = nodes.ExecutableNode( + name="test", + internal_name="test", + node_type="test", + next_node="next", + ) -def test_nodes_map__get_branch_by_name_returns_a_sub_graph(mocker, monkeypatch): - mock_create_graph = mocker.MagicMock(return_value="a") - monkeypatch.setattr(nodes, "create_graph", mock_create_graph) + with pytest.raises(Exception, match="This is an executable node and"): + traversal_class.fan_in() - map_config = {"branch": {}, "next": "next_node", "iterate_on": "test", "iterate_as": "test"} - node = nodes.MapNode(name="test", internal_name="test", config=map_config) - assert node._get_branch_by_name("anyname") == "a" +def test_executable_node_fan_out_raises_exception(instantiable_executable_node): + traversal_class = nodes.ExecutableNode( + name="test", + internal_name="test", + node_type="test", + next_node="next", + ) + with pytest.raises(Exception, match="This is an executable node and"): + traversal_class.fan_out() -def test_nodes_map_node_execute_raises_exception(mocker, monkeypatch): - monkeypatch.setattr(nodes.MapNode, "get_sub_graph", mocker.MagicMock()) - map_config = {"branch": {}, "next": "next_node", "iterate_on": "test", "iterate_as": "test"} +def test_composite_node_get_catalog_settings_raises_exception(instantiable_composite_node): + traversal_class = nodes.CompositeNode( + name="test", + internal_name="test", + node_type="test", + next_node="next", + ) - node = nodes.MapNode(name="test", internal_name="test", config=map_config) + with pytest.raises(Exception, match="This is a composite node and"): + traversal_class._get_catalog_settings() - with pytest.raises(Exception): - node.execute("dummy") +def test_composite_node_get_max_attempts_raises_exception(instantiable_composite_node): + traversal_class = nodes.CompositeNode( + name="test", + internal_name="test", + node_type="test", + next_node="next", + ) -def test_nodes_dag_node_raises_exception_if_dag_definition_is_not_present(): - dag_config = {"next": "test"} - with pytest.raises(Exception): - nodes.DagNode(name="test", internal_name="test", config=dag_config) + with pytest.raises(Exception, match="This is a composite node and"): + traversal_class._get_max_attempts() -def test_node_dag_node_get_sub_graph_raises_exception_if_dag_block_not_present(mocker, monkeypatch): - mock_load_yaml = mocker.MagicMock(return_value={}) - monkeypatch.setattr(nodes.utils, "load_yaml", mock_load_yaml) +def test_composite_node_execute_raises_exception(instantiable_composite_node): + traversal_class = nodes.CompositeNode( + name="test", + internal_name="test", + node_type="test", + next_node="next", + ) - dag_config = {"next": "test", "dag_definition": "test"} + with pytest.raises(Exception, match="This is a composite node and"): + traversal_class.execute() - with pytest.raises(Exception): - nodes.DagNode(name="test", internal_name="test", config=dag_config) +def test_terminal_node_get_on_failure_node_raises_exception(instantiable_terminal_node): + node = nodes.TerminalNode(name="test", internal_name="test", node_type="dummy") -def test_nodes_dag_node_get_sub_graph_calls_create_graph_with_correct_parameters(mocker, monkeypatch): - mock_load_yaml = mocker.MagicMock(return_value={"dag": "a"}) - mock_create_graph = mocker.MagicMock(return_value="branch") + with pytest.raises(exceptions.TerminalNodeError): + node._get_on_failure_node() - monkeypatch.setattr(nodes.utils, "load_yaml", mock_load_yaml) - monkeypatch.setattr(nodes, "create_graph", mock_create_graph) - dag_config = {"next": "test", "dag_definition": "test"} +def test_terminal_node__get_next_node_raises_exception(instantiable_terminal_node): + node = nodes.TerminalNode(name="test", internal_name="test", node_type="dummy") - _ = nodes.DagNode(name="test", internal_name="test", config=dag_config) + with pytest.raises(exceptions.TerminalNodeError): + node._get_next_node() - mock_create_graph.assert_called_once_with("a", internal_branch_name="test." + defaults.DAG_BRANCH_NAME) +def test_terminal_node__get_catalog_settings_raises_exception(instantiable_terminal_node): + node = nodes.TerminalNode(name="test", internal_name="test", node_type="dummy") -def test_nodes_dag_node__get_branch_by_name_raises_exception_if_branch_name_is_invalid(mocker, monkeypatch): - monkeypatch.setattr(nodes.DagNode, "get_sub_graph", mocker.MagicMock(return_value="branch")) + with pytest.raises(exceptions.TerminalNodeError): + node._get_catalog_settings() - dag_config = {"next": "test", "dag_definition": "test"} - node = nodes.DagNode(name="test", internal_name="test", config=dag_config) - with pytest.raises(Exception): - node._get_branch_by_name("test") +def test_terminal_node__get_branch_by_name_raises_exception(instantiable_terminal_node): + node = nodes.TerminalNode(name="test", internal_name="test", node_type="dummy") + with pytest.raises(exceptions.TerminalNodeError): + node._get_branch_by_name("does not matter") -def test_nodes_dag_node_get_branch_by_name_returns_if_branch_name_is_valid(mocker, monkeypatch): - monkeypatch.setattr(nodes.DagNode, "get_sub_graph", mocker.MagicMock(return_value="branch")) - dag_config = {"next": "test", "dag_definition": "test"} +def test_terminal_node__get_executor_config_raises_exception(instantiable_terminal_node): + node = nodes.TerminalNode(name="test", internal_name="test", node_type="dummy") - node = nodes.DagNode(name="test", internal_name="test", config=dag_config) + with pytest.raises(exceptions.TerminalNodeError): + node._get_executor_config("does not matter") - assert node._get_branch_by_name("test." + defaults.DAG_BRANCH_NAME) == "branch" +def test_terminal_node_execute_as_graph_raises_exception(instantiable_terminal_node): + node = nodes.TerminalNode(name="test", internal_name="test", node_type="dummy") -def test_nodes_dag_node_execute_raises_exception(mocker, monkeypatch): - monkeypatch.setattr(nodes.DagNode, "get_sub_graph", mocker.MagicMock(return_value="branch")) + with pytest.raises(exceptions.TerminalNodeError): + node.execute_as_graph() - dag_config = {"next": "test", "dag_definition": "test"} - node = nodes.DagNode(name="test", internal_name="test", config=dag_config) +def test_terminal_node_fan_out_raises_exception(instantiable_terminal_node): + node = nodes.TerminalNode(name="test", internal_name="test", node_type="dummy") - with pytest.raises(Exception): - node.execute("dummy") + with pytest.raises(exceptions.TerminalNodeError): + node.fan_out() -def test_nodes_as_is_node_accepts_what_is_given(): - node = nodes.AsISNode( - name="test", internal_name="test", config={"command_config": {"render_string": "test"}, "next": "test"} - ) +def test_terminal_node_fan_in_raises_exception(instantiable_terminal_node): + node = nodes.TerminalNode(name="test", internal_name="test", node_type="dummy") - assert node.config.command_config == {"render_string": "test"} + with pytest.raises(exceptions.TerminalNodeError): + node.fan_in() -def test_as_is_node_execute_as_graph_raises_exception(): - as_is_node = nodes.AsISNode(name="test", internal_name="test", config={"command": "nocommand", "next": "test"}) +def test_terminal_node_max_attempts_returns_1(instantiable_terminal_node): + node = nodes.TerminalNode(name="test", internal_name="test", node_type="dummy") - with pytest.raises(Exception): - as_is_node.execute_as_graph(None) + assert node._get_max_attempts() == 1 -def test_as_is_node_sets_attempt_log_success(mocker, monkeypatch): - mock_attempt_log = mocker.MagicMock() +def test_terminal_node_is_terminal_node_returns_true(instantiable_terminal_node): + node = nodes.TerminalNode(name="test", internal_name="test", node_type="dummy") - mock_executor = mocker.MagicMock() - mock_executor.run_log_store.create_attempt_log = mocker.MagicMock(return_value=mock_attempt_log) + assert node._is_terminal_node() - node = nodes.AsISNode(name="test", internal_name="test", config={"next": "test"}) - node.execute(executor=mock_executor) +def test_terminal_node_parse_from_config_sends_the_config_for_instantiation(instantiable_terminal_node): + config = { + "node_type": "dummy", + "name": "test", + "internal_name": "test", + } - assert mock_attempt_log.status == defaults.SUCCESS + node = nodes.TerminalNode.parse_from_config(config) + assert node.node_type == "dummy" + assert node.name == "test" + assert node.internal_name == "test" diff --git a/tests/magnus/test_parmeters.py b/tests/magnus/test_parmeters.py new file mode 100644 index 00000000..718443b4 --- /dev/null +++ b/tests/magnus/test_parmeters.py @@ -0,0 +1,196 @@ +import os +import logging + +import pytest + +from pydantic import BaseModel, ValidationError + +from magnus import defaults +from magnus.parameters import ( + get_user_set_parameters, + cast_parameters_as_type, + bind_args_for_pydantic_model, + filter_arguments_for_func, +) + + +def test_get_user_set_parameters_does_nothing_if_prefix_does_not_match(monkeypatch): + monkeypatch.setenv("random", "value") + + assert get_user_set_parameters() == {} + + +def test_get_user_set_parameters_returns_the_parameter_if_prefix_match_int(monkeypatch): + monkeypatch.setenv(defaults.PARAMETER_PREFIX + "key", "1") + + assert get_user_set_parameters() == {"key": 1} + + +def test_get_user_set_parameters_returns_the_parameter_if_prefix_match_string(monkeypatch): + monkeypatch.setenv(defaults.PARAMETER_PREFIX + "key", '"value"') + + assert get_user_set_parameters() == {"key": "value"} + + +def test_get_user_set_parameters_removes_the_parameter_if_prefix_match_remove(monkeypatch): + monkeypatch.setenv(defaults.PARAMETER_PREFIX + "key", "1") + + assert defaults.PARAMETER_PREFIX + "key" in os.environ + + get_user_set_parameters(remove=True) + + assert defaults.PARAMETER_PREFIX + "key" not in os.environ + + +def test_cast_parameters_as_type_with_pydantic_model(): + class MyModel(BaseModel): + a: int + b: str + + value = {"a": 1, "b": "test"} + cast_value = cast_parameters_as_type(value, MyModel) + + assert isinstance(cast_value, MyModel) + assert cast_value.a == 1 + assert cast_value.b == "test" + + +def test_cast_parameters_as_type_with_dict(): + value = {"a": 1, "b": "test"} + cast_value = cast_parameters_as_type(value, dict) + + assert isinstance(cast_value, dict) + assert cast_value == value + + +def test_cast_parameters_as_type_with_non_special_type(): + value = "1" + cast_value = cast_parameters_as_type(value, int) + + assert isinstance(cast_value, int) + assert cast_value == 1 + + +def test_cast_parameters_as_type_with_none(): + value = None + cast_value = cast_parameters_as_type(value, None) + + assert cast_value is None + + +def test_cast_parameters_as_type_with_invalid_value(): + class MyModel(BaseModel): + a: int + + value = {"a": "test"} + with pytest.raises(ValidationError): + cast_parameters_as_type(value, MyModel) + + +def test_cast_parameters_as_type_with_invalid_type(caplog): + value = "test" + with caplog.at_level(logging.WARNING, logger="magnus"): + cast_parameters_as_type(value, list) + + assert f"Casting {value} of {type(value)} to {list} seems wrong!!" in caplog.text + + +def test_bind_args_for_pydantic_model_with_correct_params(): + class MyModel(BaseModel): + a: int + b: str + + params = {"a": 1, "b": "test"} + bound_model = bind_args_for_pydantic_model(params, MyModel) + + assert isinstance(bound_model, MyModel) + assert bound_model.a == 1 + assert bound_model.b == "test" + + +def test_bind_args_for_pydantic_model_with_extra_params(): + class MyModel(BaseModel): + a: int + b: str + + params = {"a": 1, "b": "test", "c": 2} + bound_model = bind_args_for_pydantic_model(params, MyModel) + + assert isinstance(bound_model, MyModel) + assert bound_model.a == 1 + assert bound_model.b == "test" + + +def test_bind_args_for_pydantic_model_with_missing_params(): + class MyModel(BaseModel): + a: int + b: str + + params = {"a": 1} + with pytest.raises(ValidationError): + bind_args_for_pydantic_model(params, MyModel) + + +def test_filter_arguments_for_func_with_simple_arguments(): + def func(a: int, b: str): + pass + + params = {"a": 1, "b": "test"} + bound_args = filter_arguments_for_func(func, params) + + assert bound_args == {"a": 1, "b": "test"} + + +def test_filter_arguments_for_func_with_pydantic_model_arguments(): + class MyModel(BaseModel): + a: int + b: str + + def func(inner: MyModel, c: str): + pass + + params = {"inner": {"a": 1, "b": "test"}, "c": "test"} + bound_args = filter_arguments_for_func(func, params) + + assert bound_args == {"inner": MyModel(a=1, b="test"), "c": "test"} + + +def test_filter_arguments_for_func_with_missing_arguments_but_defaults_present(): + class MyModel(BaseModel): + a: int + b: str + + def func(inner: MyModel, c: str = "test"): + pass + + params = {"inner": {"a": 1, "b": "test"}} + bound_args = filter_arguments_for_func(func, params) + + assert bound_args == {"inner": MyModel(a=1, b="test")} + + +def test_filter_arguments_for_func_with_missing_arguments_and_no_defaults(): + class MyModel(BaseModel): + a: int + b: str + + def func(inner: MyModel, c: str): + pass + + params = {"inner": {"a": 1, "b": "test"}} + with pytest.raises(ValueError, match=r"Parameter c is required for func but not provided"): + _ = filter_arguments_for_func(func, params) + + +def test_filter_arguments_for_func_with_map_variable_sent_in(): + class MyModel(BaseModel): + a: int + b: str + + params = {"inner": {"a": 1, "b": "test"}} + + def func(inner: MyModel, first: int, second: str): + pass + + bound_args = filter_arguments_for_func(func, params, map_variable={"first": 1, "second": "test"}) + assert bound_args == {"inner": MyModel(a=1, b="test"), "first": 1, "second": "test"} diff --git a/tests/magnus/test_sdk.py b/tests/magnus/test_sdk.py new file mode 100644 index 00000000..ff97586d --- /dev/null +++ b/tests/magnus/test_sdk.py @@ -0,0 +1,31 @@ +import pytest + +from magnus.extensions import nodes +from magnus import sdk + + +def test_success_init(): + test_success = sdk.Success() + + assert test_success.name == "success" + assert test_success.create_node() == nodes.SuccessNode(name="success", internal_name="success") + + +def test_fail_init(): + test_fail = sdk.Fail() + + assert test_fail.name == "fail" + assert test_fail.create_node() == nodes.FailNode(name="fail", internal_name="fail") + + +def test_stub_node_makes_next_success_if_terminate_with_success(): + test_stub = sdk.Stub(name="stub", terminate_with_success=True) + + assert test_stub.name == "stub" + assert test_stub.create_node() == nodes.StubNode(name="stub", next_node="success", internal_name="stub") + + +def test_stub_node_takes_given_next_node(): + test_stub = sdk.Stub(name="stub", next="test") + + assert test_stub.create_node() == nodes.StubNode(name="stub", next_node="test", internal_name="stub") diff --git a/tests/magnus/test_secrets.py b/tests/magnus/test_secrets.py index 7086453d..594ab1db 100644 --- a/tests/magnus/test_secrets.py +++ b/tests/magnus/test_secrets.py @@ -1,134 +1,31 @@ import pytest from magnus import ( - defaults, # pylint: disable=import-error secrets, # pylint: disable=import-error ) -def test_base_secrets_init_config_empty_dict(): - base_secret = secrets.BaseSecrets(config=None) +@pytest.fixture(autouse=True) +def instantiable_base_class(monkeypatch): + monkeypatch.setattr(secrets.BaseSecrets, "__abstractmethods__", set()) + yield - assert base_secret.config == {} +def test_base_secrets_context_refers_to_run_context(mocker, monkeypatch, instantiable_base_class): + mock_run_context = mocker.Mock() -def test_base_secrets_get_raises_not_implemented_error(): - base_secret = secrets.BaseSecrets(config=None) + monkeypatch.setattr(secrets.context, "run_context", mock_run_context) - with pytest.raises(NotImplementedError): - base_secret.get() - - -def test_do_nothing_secrets_handler_returns_none_if_name_provided(mocker, monkeypatch): - mock_base_secret = mocker.MagicMock() - - monkeypatch.setattr(secrets, "BaseSecrets", mock_base_secret) - - dummy_secret = secrets.DoNothingSecretManager(config=None) - assert dummy_secret.get("I dont exist") == "" - - -def test_do_nothing__handler_returns_empty_dict_if_name_not_provided(mocker, monkeypatch): - mock_base_secret = mocker.MagicMock() - - monkeypatch.setattr(secrets, "BaseSecrets", mock_base_secret) - - dummy_secret = secrets.DoNothingSecretManager(config=None) - assert dummy_secret.get() == {} - - -def test_dot_env_secrets_defaults_to_default_location_if_none_provided(mocker, monkeypatch): - mock_base_secret = mocker.MagicMock() - - monkeypatch.setattr(secrets, "BaseSecrets", mock_base_secret) - - dot_env_secret = secrets.DotEnvSecrets(config=None) - assert dot_env_secret.secrets_location == defaults.DOTENV_FILE_LOCATION - - -def test_dot_env_secrets_usees_location_if_provided(mocker, monkeypatch): - mock_base_secret = mocker.MagicMock() - - monkeypatch.setattr(secrets, "BaseSecrets", mock_base_secret) - - dot_env_secret = secrets.DotEnvSecrets(config={"location": "here"}) - assert dot_env_secret.secrets_location == "here" - assert dot_env_secret.config.location == "here" - - -def test_dot_env_secrets_get_returns_all_secrets_if_no_name_provided(mocker, monkeypatch): - mock_base_secret = mocker.MagicMock() - - monkeypatch.setattr(secrets, "BaseSecrets", mock_base_secret) - monkeypatch.setattr(secrets.DotEnvSecrets, "_load_secrets", mocker.MagicMock()) - - dot_env_secret = secrets.DotEnvSecrets(config={"location": "here"}) - dot_env_secret.secrets = "TopSecret" - - assert dot_env_secret.get() == "TopSecret" + assert secrets.BaseSecrets()._context == mock_run_context -def test_dot_env_secrets_get_returns_secret_if_one_provided(mocker, monkeypatch): - monkeypatch.setattr(secrets, "BaseSecrets", mocker.MagicMock()) - monkeypatch.setattr(secrets.DotEnvSecrets, "_load_secrets", mocker.MagicMock()) +def test_base_secrets_get_raises_not_implemented_error(instantiable_base_class): + base_secret = secrets.BaseSecrets() - dot_env_secret = secrets.DotEnvSecrets(config={"location": "here"}) - dot_env_secret.secrets["give"] = "this" - - assert dot_env_secret.get("give") == "this" - - -def test_dot_env_secrets_raises_exception_if_secret_not_found(mocker, monkeypatch): - mock_base_secret = mocker.MagicMock() - - monkeypatch.setattr(secrets, "BaseSecrets", mock_base_secret) - monkeypatch.setattr(secrets.DotEnvSecrets, "_load_secrets", mocker.MagicMock()) - - dot_env_secret = secrets.DotEnvSecrets(config={"location": "here"}) - dot_env_secret.secrets["give"] = "this" - - with pytest.raises(Exception): - dot_env_secret.get("give1") - - -def test_dot_env_load_secrets_raises_exception_if_file_does_not_exist(mocker, monkeypatch): - monkeypatch.setattr(secrets.utils, "does_file_exist", mocker.MagicMock(return_value=False)) - - monkeypatch.setattr(secrets, "BaseSecrets", mocker.MagicMock()) - dot_env_secret = secrets.DotEnvSecrets(config={"location": "here"}) - - with pytest.raises(Exception): - dot_env_secret.load_secrets() - - -def test_dot_env_load_secrets_raises_exception_if_secret_formatting_is_invalid(mocker, monkeypatch): - monkeypatch.setattr(secrets.utils, "does_file_exist", mocker.MagicMock(return_value=True)) - - monkeypatch.setattr(secrets, "BaseSecrets", mocker.MagicMock()) - dot_env_secret = secrets.DotEnvSecrets(config={"location": "here"}) - - with pytest.raises(Exception): - mocker.patch("builtins.open", mocker.mock_open(read_data="data")) - dot_env_secret.load_secrets() - - -def test_dot_env_load_secrets_raises_exception_if_secret_formatting_is_invalid_ge_2(mocker, monkeypatch): - monkeypatch.setattr(secrets.utils, "does_file_exist", mocker.MagicMock(return_value=True)) - - monkeypatch.setattr(secrets, "BaseSecrets", mocker.MagicMock()) - dot_env_secret = secrets.DotEnvSecrets(config={"location": "here"}) - - with pytest.raises(Exception): - mocker.patch("builtins.open", mocker.mock_open(read_data=["data=data1="])) - dot_env_secret.load_secrets() - - -def test_dot_env_load_secrets_populates_correct_secrets_if_valid(mocker, monkeypatch): - monkeypatch.setattr(secrets.utils, "does_file_exist", mocker.MagicMock(return_value=True)) + with pytest.raises(NotImplementedError): + base_secret.get(name="secret") - monkeypatch.setattr(secrets, "BaseSecrets", mocker.MagicMock()) - dot_env_secret = secrets.DotEnvSecrets(config={"location": "here"}) - mocker.patch("builtins.open", mocker.mock_open(read_data="data=data1\n")) - dot_env_secret._load_secrets() - assert dot_env_secret.secrets == {"data": "data1"} +def test_do_nothing_secrets_handler_returns_none_if_name_provided(mocker, monkeypatch): + dummy_secret = secrets.DoNothingSecretManager() + assert dummy_secret.get("I dont exist") == "" diff --git a/tests/magnus/test_tasks.py b/tests/magnus/test_tasks.py index 9567afae..fb33c9fc 100644 --- a/tests/magnus/test_tasks.py +++ b/tests/magnus/test_tasks.py @@ -2,13 +2,14 @@ import os import pytest +from pydantic import BaseModel from magnus import defaults, tasks @pytest.fixture def configuration(): - return {"node_name": "dummy"} + return {"node_name": "dummy", "task_type": "dummy"} def test_base_task_execute_command_raises_not_implemented_error(configuration): @@ -21,7 +22,7 @@ def test_base_task_execute_command_raises_not_implemented_error(configuration): def test_base_task_get_parameters_gets_from_utils(mocker, monkeypatch, configuration): mock_get_user_set_parameters = mocker.MagicMock(configuration) - monkeypatch.setattr(tasks.utils, "get_user_set_parameters", mock_get_user_set_parameters) + monkeypatch.setattr(tasks.parameters, "get_user_set_parameters", mock_get_user_set_parameters) base_execution_type = tasks.BaseTaskType(**configuration) @@ -31,7 +32,7 @@ def test_base_task_get_parameters_gets_from_utils(mocker, monkeypatch, configura def test_base_task_set_parameters_does_nothing_if_no_parameters_sent(configuration): base_execution_type = tasks.BaseTaskType(**configuration) - base_execution_type._set_parameters() + base_execution_type._set_parameters(params={}) def test_base_task_set_parameters_sets_environ_vars_if_sent( @@ -44,7 +45,11 @@ def test_base_task_set_parameters_sets_environ_vars_if_sent( monkeypatch.setattr(tasks.os, "environ", mock_os_environ) base_execution_type = tasks.BaseTaskType(**configuration) - base_execution_type._set_parameters(parameters={"x": 10}) + + class Parameter(BaseModel): + x: int = 10 + + base_execution_type._set_parameters(Parameter()) assert mock_os_environ[defaults.PARAMETER_PREFIX + "x"] == "10" @@ -56,12 +61,12 @@ class DummyModule: def __init__(self): self.func = dummy_func - monkeypatch.setattr(tasks.utils, "get_module_and_func_names", mocker.MagicMock(return_value=("idk", "func"))) + monkeypatch.setattr(tasks.utils, "get_module_and_attr_names", mocker.MagicMock(return_value=("idk", "func"))) monkeypatch.setattr(tasks.importlib, "import_module", mocker.MagicMock(return_value=DummyModule())) monkeypatch.setattr(tasks.BaseTaskType, "output_to_file", mocker.MagicMock(return_value=contextlib.nullcontext())) - monkeypatch.setattr(tasks.utils, "filter_arguments_for_func", mocker.MagicMock(return_value={"a": 1})) + monkeypatch.setattr(tasks.parameters, "filter_arguments_for_func", mocker.MagicMock(return_value={"a": 1})) configuration["command"] = "dummy" py_exec = tasks.PythonTaskType(**configuration) @@ -76,10 +81,10 @@ class DummyModule: def __init__(self): self.func = dummy_func - monkeypatch.setattr(tasks.utils, "get_module_and_func_names", mocker.MagicMock(return_value=("idk", "func"))) + monkeypatch.setattr(tasks.utils, "get_module_and_attr_names", mocker.MagicMock(return_value=("idk", "func"))) monkeypatch.setattr(tasks.importlib, "import_module", mocker.MagicMock(return_value=DummyModule())) monkeypatch.setattr(tasks.BaseTaskType, "output_to_file", mocker.MagicMock(return_value=contextlib.nullcontext())) - monkeypatch.setattr(tasks.utils, "filter_arguments_for_func", mocker.MagicMock(return_value={})) + monkeypatch.setattr(tasks.parameters, "filter_arguments_for_func", mocker.MagicMock(return_value={})) configuration["command"] = "dummy" py_exec = tasks.PythonTaskType(**configuration) @@ -95,11 +100,11 @@ class DummyModule: def __init__(self): self.func = dummy_func - monkeypatch.setattr(tasks.utils, "get_module_and_func_names", mocker.MagicMock(return_value=("idk", "func"))) + monkeypatch.setattr(tasks.utils, "get_module_and_attr_names", mocker.MagicMock(return_value=("idk", "func"))) monkeypatch.setattr(tasks.importlib, "import_module", mocker.MagicMock(return_value=DummyModule())) monkeypatch.setattr(tasks.BaseTaskType, "output_to_file", mocker.MagicMock(return_value=contextlib.nullcontext())) - monkeypatch.setattr(tasks.utils, "filter_arguments_for_func", mocker.MagicMock(return_value={"a": 1})) + monkeypatch.setattr(tasks.parameters, "filter_arguments_for_func", mocker.MagicMock(return_value={"a": 1})) configuration["command"] = "dummy" py_exec = tasks.PythonTaskType(**configuration) @@ -114,10 +119,10 @@ class DummyModule: def __init__(self): self.func = dummy_func - monkeypatch.setattr(tasks.utils, "get_module_and_func_names", mocker.MagicMock(return_value=("idk", "func"))) + monkeypatch.setattr(tasks.utils, "get_module_and_attr_names", mocker.MagicMock(return_value=("idk", "func"))) monkeypatch.setattr(tasks.importlib, "import_module", mocker.MagicMock(return_value=DummyModule())) monkeypatch.setattr(tasks.BaseTaskType, "output_to_file", mocker.MagicMock(return_value=contextlib.nullcontext())) - monkeypatch.setattr(tasks.utils, "filter_arguments_for_func", mocker.MagicMock(return_value={"a": 1})) + monkeypatch.setattr(tasks.parameters, "filter_arguments_for_func", mocker.MagicMock(return_value={"a": 1})) configuration["command"] = "dummy" py_exec = tasks.PythonTaskType(**configuration) @@ -132,11 +137,11 @@ class DummyModule: def __init__(self): self.func = dummy_func - monkeypatch.setattr(tasks.utils, "get_module_and_func_names", mocker.MagicMock(return_value=("idk", "func"))) + monkeypatch.setattr(tasks.utils, "get_module_and_attr_names", mocker.MagicMock(return_value=("idk", "func"))) monkeypatch.setattr(tasks.importlib, "import_module", mocker.MagicMock(return_value=DummyModule())) monkeypatch.setattr(tasks.BaseTaskType, "output_to_file", mocker.MagicMock(return_value=contextlib.nullcontext())) monkeypatch.setattr( - tasks.utils, "filter_arguments_for_func", mocker.MagicMock(return_value={"a": 1, "map_name": "map_value"}) + tasks.parameters, "filter_arguments_for_func", mocker.MagicMock(return_value={"a": 1, "map_name": "map_value"}) ) configuration["command"] = "dummy" @@ -146,16 +151,19 @@ def __init__(self): def test_python_task_command_sets_env_variable_of_return_values(mocker, monkeypatch, configuration): - dummy_func = mocker.MagicMock(return_value={"a": 10}) + class Parameter(BaseModel): + a: int = 10 + + dummy_func = mocker.MagicMock(return_value=Parameter()) class DummyModule: def __init__(self): self.func = dummy_func - monkeypatch.setattr(tasks.utils, "get_module_and_func_names", mocker.MagicMock(return_value=("idk", "func"))) + monkeypatch.setattr(tasks.utils, "get_module_and_attr_names", mocker.MagicMock(return_value=("idk", "func"))) monkeypatch.setattr(tasks.importlib, "import_module", mocker.MagicMock(return_value=DummyModule())) monkeypatch.setattr(tasks.BaseTaskType, "output_to_file", mocker.MagicMock(return_value=contextlib.nullcontext())) - monkeypatch.setattr(tasks.utils, "filter_arguments_for_func", mocker.MagicMock(return_value={"a": 1})) + monkeypatch.setattr(tasks.parameters, "filter_arguments_for_func", mocker.MagicMock(return_value={"a": 1})) configuration["command"] = "dummy" py_exec = tasks.PythonTaskType(**configuration) @@ -167,13 +175,6 @@ def __init__(self): del os.environ[defaults.PARAMETER_PREFIX + "a"] -def test_python_lambda_task_type_execute_command_raises_for_under_and_dunder(): - lambda_exec = tasks.PythonLambdaTaskType(command="_ and __", node_name="dummy") - - with pytest.raises(Exception): - lambda_exec.execute_command() - - def test_notebook_raises_exception_if_command_is_not_a_notebook(): with pytest.raises(Exception): tasks.NotebookTaskType(command="path to notebook") @@ -184,3 +185,42 @@ def test_notebook_raises_exception_if_ploomber_is_not_installed(mocker, monkeypa with pytest.raises(Exception): task_exec.execute_command() + + +def test_shell_task_type_can_gather_env_vars_on_return(mocker, monkeypatch): + mock_set_params = mocker.MagicMock() + mock_output_to_file = mocker.MagicMock() + monkeypatch.setattr(tasks.ShellTaskType, "_set_parameters", mock_set_params) + monkeypatch.setattr(tasks.ShellTaskType, "output_to_file", mock_output_to_file) + + shell_task = tasks.ShellTaskType(command="export MAGNUS_PRM_x=1", node_name="dummy") + + shell_task.execute_command() + + assert mock_set_params.call_count == 1 + + _, kwargs = mock_set_params.call_args + assert kwargs["params"] == tasks.EasyModel(x="1") + + +class ParamModel(BaseModel): + x: int + + +def test_shell_task_type_can_gather_env_vars_on_return(mocker, monkeypatch): + mock_set_params = mocker.MagicMock() + mock_output_to_file = mocker.MagicMock() + monkeypatch.setattr(tasks.ShellTaskType, "_set_parameters", mock_set_params) + monkeypatch.setattr(tasks.ShellTaskType, "output_to_file", mock_output_to_file) + + shell_task = tasks.ShellTaskType( + command="export MAGNUS_PRM_x=1", + node_name="dummy", + ) + + shell_task.execute_command() + + assert mock_set_params.call_count == 1 + + _, kwargs = mock_set_params.call_args + assert kwargs["params"].x == 1 diff --git a/tests/magnus/test_utils.py b/tests/magnus/test_utils.py index 938a095e..2ffb0a98 100644 --- a/tests/magnus/test_utils.py +++ b/tests/magnus/test_utils.py @@ -65,21 +65,21 @@ def test_apply_variables_applies_variables(): def test_apply_variables_applies_known_variables(): apply_to = "${var}_${var1}" - transformed = utils.apply_variables(apply_to, variables={"var": "hello"}) - assert transformed == "hello_${var1}" + with pytest.raises(KeyError): + transformed = utils.apply_variables(apply_to, variables={"var": "hello"}) def test_get_module_and_func_names_raises_exception_for_incorrect_command(): command = "hello" with pytest.raises(Exception): - utils.get_module_and_func_names(command) + utils.get_module_and_attr_names(command) def test_get_module_and_func_names_returns_module_and_func_names(): command = "module.func" - m, f = utils.get_module_and_func_names(command) + m, f = utils.get_module_and_attr_names(command) assert m == "module" assert f == "func" @@ -88,7 +88,7 @@ def test_get_module_and_func_names_returns_module_and_func_names(): def test_get_module_and_func_names_returns_module_and_func_names_inner(): command = "module1.module2.func" - m, f = utils.get_module_and_func_names(command) + m, f = utils.get_module_and_attr_names(command) assert m == "module1.module2" assert f == "func" @@ -258,13 +258,13 @@ def test_get_git_code_identity_returns_default_in_case_of_exception(mocker, monk monkeypatch.setattr(utils, "get_current_code_commit", mock_get_current_code_commit) - class MockCodeIdentity: - pass + mock_code_identity = mocker.MagicMock() + mock_run_context = mocker.MagicMock() + mock_run_context.run_log_store.create_code_identity.return_value = mock_code_identity - run_log_store = mocker.MagicMock() - run_log_store.create_code_identity.return_value = MockCodeIdentity() + monkeypatch.setattr(utils.context, "run_context", mock_run_context) - assert isinstance(utils.get_git_code_identity(run_log_store), MockCodeIdentity) + assert utils.get_git_code_identity() == mock_code_identity def test_get_git_code_identity_returns_entities_from_other_functions(monkeypatch, mocker): @@ -276,16 +276,17 @@ def test_get_git_code_identity_returns_entities_from_other_functions(monkeypatch monkeypatch.setattr(utils, "is_git_clean", mock_is_git_clean) monkeypatch.setattr(utils, "get_git_remote", mock_get_git_remote) - mock_code_id = mocker.MagicMock() + mock_code_identity = mocker.MagicMock() + mock_run_context = mocker.MagicMock() + mock_run_context.run_log_store.create_code_identity.return_value = mock_code_identity - run_log_store = mocker.MagicMock() - run_log_store.create_code_identity.return_value = mock_code_id + monkeypatch.setattr(utils.context, "run_context", mock_run_context) - utils.get_git_code_identity(run_log_store) + utils.get_git_code_identity() - assert mock_code_id.code_identifier == "code commit" - assert mock_code_id.code_identifier_dependable is False - assert mock_code_id.code_identifier_url == "git remote" + assert mock_code_identity.code_identifier == "code commit" + assert mock_code_identity.code_identifier_dependable is False + assert mock_code_identity.code_identifier_url == "git remote" def test_remove_prefix_returns_text_as_found_if_prefix_not_found(): @@ -306,34 +307,6 @@ def test_remove_prefix_returns_text_removes_prefix_if_found_full(): assert utils.remove_prefix(text, "hi") == "" -def test_get_user_set_parameters_does_nothing_if_prefix_does_not_match(monkeypatch): - monkeypatch.setenv("random", "value") - - assert utils.get_user_set_parameters() == {} - - -def test_get_user_set_parameters_returns_the_parameter_if_prefix_match_int(monkeypatch): - monkeypatch.setenv(defaults.PARAMETER_PREFIX + "key", "1") - - assert utils.get_user_set_parameters() == {"key": 1} - - -def test_get_user_set_parameters_returns_the_parameter_if_prefix_match_string(monkeypatch): - monkeypatch.setenv(defaults.PARAMETER_PREFIX + "key", '"value"') - - assert utils.get_user_set_parameters() == {"key": "value"} - - -def test_get_user_set_parameters_removes_the_parameter_if_prefix_match_remove(monkeypatch): - monkeypatch.setenv(defaults.PARAMETER_PREFIX + "key", "1") - - assert defaults.PARAMETER_PREFIX + "key" in os.environ - - utils.get_user_set_parameters(remove=True) - - assert defaults.PARAMETER_PREFIX + "key" not in os.environ - - def test_get_tracked_data_does_nothing_if_prefix_does_not_match(monkeypatch): monkeypatch.setenv("random", "value") @@ -392,43 +365,21 @@ def test_get_local_docker_image_id_returns_none_in_exception(mocker, monkeypatch assert utils.get_local_docker_image_id("test") == "" -def test_filter_arguments_for_func_works_only_named_arguments_in_func_spec(): - def my_func(a, b): - pass - - parameters = {"a": 1, "b": 1} - - assert parameters == utils.filter_arguments_for_func(my_func, parameters, map_variable=None) - - -def test_filter_arguments_for_func_returns_empty_if_no_parameters(): - def my_func(a=2, b=1): - pass +def test_get_node_execution_command_returns_magnus_execute(mocker, monkeypatch): + import logging - parameters = {} + mock_context = mocker.MagicMock() + mock_context.run_context.run_id = "test_run_id" + mock_context.run_context.pipeline_file = "test_pipeline_file" + mock_context.run_context.configuration_file = "test_configuration_file" + mock_context.run_context.parameters_file = "test_parameters_file" + mock_context.run_context.tag = "test_tag" - assert parameters == utils.filter_arguments_for_func(my_func, parameters, map_variable=None) + monkeypatch.setattr(utils, "context", mock_context) - -def test_filter_arguments_for_func_identifies_args_from_map_variables(): - def my_func(y_i, a=2, b=1): - pass - - parameters = {"a": 1, "b": 1} - - assert {"a": 1, "b": 1, "y_i": "y"} == utils.filter_arguments_for_func( - my_func, parameters, map_variable={"y_i": "y"} - ) - - -def test_get_node_execution_command_returns_magnus_execute(): - class MockExecutor: - run_id = "test_run_id" - pipeline_file = "test_pipeline_file" - variables_file = None - configuration_file = None - parameters_file = None - tag = None + logger = logging.getLogger(name="magnus") + old_level = logger.level + logger.setLevel(defaults.LOG_LEVEL) class MockNode: internal_name = "test_node_id" @@ -436,83 +387,28 @@ class MockNode: def _command_friendly_name(self): return "test_node_id" - assert ( - utils.get_node_execution_command(MockExecutor(), MockNode()) - == "magnus execute_single_node test_run_id test_node_id --log-level WARNING --file test_pipeline_file" - ) - + test_map_variable = {"a": "b"} + try: + assert utils.get_node_execution_command(MockNode(), map_variable=test_map_variable) == ( + "magnus execute_single_node test_run_id test_node_id " + f"--log-level WARNING --file test_pipeline_file --map-variable '{json.dumps(test_map_variable)}' --config-file test_configuration_file " + "--parameters-file test_parameters_file --tag test_tag" + ) + finally: + logger.setLevel(old_level) -def test_get_node_execution_command_overwrites_run_id_if_asked(): - class MockExecutor: - run_id = "test_run_id" - pipeline_file = "test_pipeline_file" - variables_file = None - configuration_file = None - parameters_file = None - tag = None - - class MockNode: - internal_name = "test_node_id" - - def _command_friendly_name(self): - return "test_node_id" - - assert ( - utils.get_node_execution_command(MockExecutor(), MockNode(), over_write_run_id="override") - == "magnus execute_single_node override test_node_id --log-level WARNING --file test_pipeline_file" - ) - - -def test_get_node_execution_command_returns_magnus_execute_appends_variables_file(): - class MockExecutor: - run_id = "test_run_id" - pipeline_file = "test_pipeline_file" - configuration_file = None - parameters_file = None - tag = None - - class MockNode: - internal_name = "test_node_id" - - def _command_friendly_name(self): - return "test_node_id" - - assert ( - utils.get_node_execution_command(MockExecutor(), MockNode()) - == "magnus execute_single_node test_run_id test_node_id --log-level WARNING --file test_pipeline_file" - ) - - -def test_get_node_execution_command_returns_magnus_execute_appends_parameters_file(): - class MockExecutor: - run_id = "test_run_id" - pipeline_file = "test_pipeline_file" - variables_file = None - configuration_file = None - parameters_file = "test_parameters_file" - tag = None - - class MockNode: - internal_name = "test_node_id" - - def _command_friendly_name(self): - return "test_node_id" - assert ( - utils.get_node_execution_command(MockExecutor(), MockNode()) - == "magnus execute_single_node test_run_id test_node_id --log-level WARNING --file test_pipeline_file" - " --parameters-file test_parameters_file" - ) +def test_get_node_execution_command_overwrites_run_id_if_asked(mocker, monkeypatch): + import logging + mock_context = mocker.MagicMock() + mock_context.run_context.run_id = "test_run_id" + mock_context.run_context.pipeline_file = "test_pipeline_file" + mock_context.run_context.configuration_file = "test_configuration_file" + mock_context.run_context.parameters_file = "test_parameters_file" + mock_context.run_context.tag = "test_tag" -def test_get_node_execution_command_returns_magnus_execute_appends_map_variable(): - class MockExecutor: - run_id = "test_run_id" - pipeline_file = "test_pipeline_file" - variables_file = None - configuration_file = None - parameters_file = None - tag = None + monkeypatch.setattr(utils, "context", mock_context) class MockNode: internal_name = "test_node_id" @@ -520,12 +416,21 @@ class MockNode: def _command_friendly_name(self): return "test_node_id" - map_variable = {"test_map": "map_value"} - json_dump = json.dumps(map_variable) - assert ( - utils.get_node_execution_command(MockExecutor(), MockNode(), map_variable=map_variable) - == f"magnus execute_single_node test_run_id test_node_id --log-level WARNING --file test_pipeline_file --map-variable '{json_dump}'" # noqa ) - ) + logger = logging.getLogger(name="magnus") + old_level = logger.level + logger.setLevel(defaults.LOG_LEVEL) + + test_map_variable = {"a": "b"} + try: + assert utils.get_node_execution_command( + MockNode(), map_variable=test_map_variable, over_write_run_id="this" + ) == ( + "magnus execute_single_node this test_node_id " + f"--log-level WARNING --file test_pipeline_file --map-variable '{json.dumps(test_map_variable)}' --config-file test_configuration_file " + "--parameters-file test_parameters_file --tag test_tag" + ) + finally: + logger.setLevel(old_level) def test_get_service_base_class_throws_exception_for_unknown_service(): diff --git a/tests/scenarios/archive/input.ipynb b/tests/scenarios/archive/input.ipynb deleted file mode 100644 index 44747bdb..00000000 --- a/tests/scenarios/archive/input.ipynb +++ /dev/null @@ -1,151 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "aggregate-issue", - "metadata": { - "execution": { - "iopub.execute_input": "2022-02-04T14:28:34.741059Z", - "iopub.status.busy": "2022-02-04T14:28:34.739648Z", - "iopub.status.idle": "2022-02-04T14:28:34.742651Z", - "shell.execute_reply": "2022-02-04T14:28:34.743442Z" - }, - "papermill": { - "duration": 0.029046, - "end_time": "2022-02-04T14:28:34.744121", - "exception": false, - "start_time": "2022-02-04T14:28:34.715075", - "status": "completed" - }, - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "msg = None" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "28f19654", - "metadata": { - "execution": { - "iopub.execute_input": "2022-02-04T14:28:34.770267Z", - "iopub.status.busy": "2022-02-04T14:28:34.769350Z", - "iopub.status.idle": "2022-02-04T14:28:34.771241Z", - "shell.execute_reply": "2022-02-04T14:28:34.771681Z" - }, - "papermill": { - "duration": 0.015649, - "end_time": "2022-02-04T14:28:34.771892", - "exception": false, - "start_time": "2022-02-04T14:28:34.756243", - "status": "completed" - }, - "tags": [ - "injected-parameters" - ] - }, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "clean-belfast", - "metadata": { - "execution": { - "iopub.execute_input": "2022-02-04T14:28:34.796281Z", - "iopub.status.busy": "2022-02-04T14:28:34.795522Z", - "iopub.status.idle": "2022-02-04T14:28:34.799091Z", - "shell.execute_reply": "2022-02-04T14:28:34.799654Z" - }, - "papermill": { - "duration": 0.018377, - "end_time": "2022-02-04T14:28:34.799857", - "exception": false, - "start_time": "2022-02-04T14:28:34.781480", - "status": "completed" - }, - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "hello\n" - ] - } - ], - "source": [ - "print(msg)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "convenient-adjustment", - "metadata": { - "papermill": { - "duration": 0.011496, - "end_time": "2022-02-04T14:28:34.823424", - "exception": false, - "start_time": "2022-02-04T14:28:34.811928", - "status": "completed" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "from magnus import magnus_session\n", - "\n", - "magnus_session()" - ] - } - ], - "metadata": { - "celltoolbar": "Tags", - "kernelspec": { - "display_name": "magnus-DErr9fJO-py3.8", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.13 (default, Jan 9 2023, 10:00:25) \n[Clang 14.0.0 (clang-1400.0.29.202)]" - }, - "papermill": { - "default_parameters": {}, - "duration": 1.94479, - "end_time": "2022-02-04T14:28:35.167342", - "environment_variables": {}, - "exception": null, - "input_path": "input.ipynb", - "output_path": "input.ipynb", - "parameters": { - "msg": "hello" - }, - "start_time": "2022-02-04T14:28:33.222552", - "version": "2.3.4" - }, - "vscode": { - "interpreter": { - "hash": "dfd8d094d5db4af2e9fdaf00242a57c27f89d29bba160bde85b773fe4c5c37e5" - } - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/tests/scenarios/archive/input_out.ipynb b/tests/scenarios/archive/input_out.ipynb deleted file mode 100644 index 2935b501..00000000 --- a/tests/scenarios/archive/input_out.ipynb +++ /dev/null @@ -1,145 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "aggregate-issue", - "metadata": { - "execution": { - "iopub.execute_input": "2022-02-06T21:34:52.997833Z", - "iopub.status.busy": "2022-02-06T21:34:52.996709Z", - "iopub.status.idle": "2022-02-06T21:34:52.999011Z", - "shell.execute_reply": "2022-02-06T21:34:52.999512Z" - }, - "papermill": { - "duration": 0.029469, - "end_time": "2022-02-06T21:34:52.999744", - "exception": false, - "start_time": "2022-02-06T21:34:52.970275", - "status": "completed" - }, - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "msg = None" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "b537ca1f", - "metadata": { - "execution": { - "iopub.execute_input": "2022-02-06T21:34:53.028385Z", - "iopub.status.busy": "2022-02-06T21:34:53.026455Z", - "iopub.status.idle": "2022-02-06T21:34:53.030260Z", - "shell.execute_reply": "2022-02-06T21:34:53.030890Z" - }, - "papermill": { - "duration": 0.019758, - "end_time": "2022-02-06T21:34:53.031177", - "exception": false, - "start_time": "2022-02-06T21:34:53.011419", - "status": "completed" - }, - "tags": [ - "injected-parameters" - ] - }, - "outputs": [], - "source": [ - "# Parameters\n", - "msg = \"hello\"\n" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "clean-belfast", - "metadata": { - "execution": { - "iopub.execute_input": "2022-02-06T21:34:53.055557Z", - "iopub.status.busy": "2022-02-06T21:34:53.054609Z", - "iopub.status.idle": "2022-02-06T21:34:53.058079Z", - "shell.execute_reply": "2022-02-06T21:34:53.058549Z" - }, - "papermill": { - "duration": 0.017318, - "end_time": "2022-02-06T21:34:53.058723", - "exception": false, - "start_time": "2022-02-06T21:34:53.041405", - "status": "completed" - }, - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "hello\n" - ] - } - ], - "source": [ - "print(msg)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "convenient-adjustment", - "metadata": { - "papermill": { - "duration": 0.012439, - "end_time": "2022-02-06T21:34:53.082020", - "exception": false, - "start_time": "2022-02-06T21:34:53.069581", - "status": "completed" - }, - "tags": [] - }, - "outputs": [], - "source": [] - } - ], - "metadata": { - "celltoolbar": "Tags", - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.3" - }, - "papermill": { - "default_parameters": {}, - "duration": 3.12809, - "end_time": "2022-02-06T21:34:53.324219", - "environment_variables": {}, - "exception": null, - "input_path": "tests/scenarios/input.ipynb", - "output_path": "tests/scenarios/input_out.ipynb", - "parameters": { - "msg": "hello" - }, - "start_time": "2022-02-06T21:34:50.196129", - "version": "2.3.4" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/tests/scenarios/archive/local.yaml b/tests/scenarios/archive/local.yaml deleted file mode 100644 index 37cf7e27..00000000 --- a/tests/scenarios/archive/local.yaml +++ /dev/null @@ -1,7 +0,0 @@ -executor: - type: local - config: - enable_parallel: "False" - -run_log_store: - type: file-system diff --git a/tests/scenarios/archive/local_container.yaml b/tests/scenarios/archive/local_container.yaml deleted file mode 100644 index 225d37ff..00000000 --- a/tests/scenarios/archive/local_container.yaml +++ /dev/null @@ -1,8 +0,0 @@ -executor: - type: local-container - config: - enable_parallel: "False" - docker_image: magnus_app - -run_log_store: - type: file-system diff --git a/tests/scenarios/archive/local_demo_render.yaml b/tests/scenarios/archive/local_demo_render.yaml deleted file mode 100644 index 7181b52c..00000000 --- a/tests/scenarios/archive/local_demo_render.yaml +++ /dev/null @@ -1,7 +0,0 @@ -executor: - type: demo-renderer - config: - enable_parallel: "False" - -run_log_store: - type: file-system diff --git a/tests/scenarios/archive/only_for_testing.py b/tests/scenarios/archive/only_for_testing.py deleted file mode 100644 index e39948e6..00000000 --- a/tests/scenarios/archive/only_for_testing.py +++ /dev/null @@ -1,5 +0,0 @@ -# TODO: Write a programmatic way of testing all the scenarios - - -def get_map_test_variables(): - return {"map_variable_test": ["a", "b", "c"]} diff --git a/tests/scenarios/archive/parameters.yaml b/tests/scenarios/archive/parameters.yaml deleted file mode 100644 index e5cfddc4..00000000 --- a/tests/scenarios/archive/parameters.yaml +++ /dev/null @@ -1 +0,0 @@ -x: 3 diff --git a/tests/scenarios/archive/test_dag_as_is.yaml b/tests/scenarios/archive/test_dag_as_is.yaml deleted file mode 100644 index a91cb3e0..00000000 --- a/tests/scenarios/archive/test_dag_as_is.yaml +++ /dev/null @@ -1,25 +0,0 @@ -dag: - description: DAG for testing with as-is - start_at: step1 - steps: - step1: - type: as-is - next: step2 - command_type: shell - catalog: - compute_data_folder: "tests/scenarios/test_data" - put: - - "*" - step2: - type: as-is - next: success - catalog: - compute_data_folder: "tests/scenarios/test_data" - get: - - "*" - put: - - "*" - success: - type: success - fail: - type: fail diff --git a/tests/scenarios/archive/test_dag_as_is_wrong_catalog.yaml b/tests/scenarios/archive/test_dag_as_is_wrong_catalog.yaml deleted file mode 100644 index bcda267f..00000000 --- a/tests/scenarios/archive/test_dag_as_is_wrong_catalog.yaml +++ /dev/null @@ -1,26 +0,0 @@ -dag: - description: DAG for testing with as-is but fails as start step has get from catalog - start_at: step1 - steps: - step1: - type: as-is - next: step2 - catalog: - compute_data_folder: "tests/scenarios/test_data" - get: - - "*" # Wrong! - put: - - "*" - step2: - type: as-is - next: success - catalog: - compute_data_folder: "tests/scenarios/test_data" - get: - - "*" - put: - - "*" - success: - type: success - fail: - type: fail diff --git a/tests/scenarios/archive/test_dag_map.yaml b/tests/scenarios/archive/test_dag_map.yaml deleted file mode 100644 index a0c527b6..00000000 --- a/tests/scenarios/archive/test_dag_map.yaml +++ /dev/null @@ -1,27 +0,0 @@ -dag: - description: DAG for testing with as-is - start_at: step1 - steps: - step1: - type: task - command: tests.scenarios.only_for_testing.get_map_test_variables - next: step2 - step2: - type: map - iterate_on: map_variable_test - iterate_as: "dummy_iter" - next: success - branch: - start_at: step_1 - steps: - step_1: - type: as-is - next: success - success: - type: success - fail: - type: fail - success: - type: success - fail: - type: fail diff --git a/tests/scenarios/archive/test_dag_nested_dag.yaml b/tests/scenarios/archive/test_dag_nested_dag.yaml deleted file mode 100644 index febed3e0..00000000 --- a/tests/scenarios/archive/test_dag_nested_dag.yaml +++ /dev/null @@ -1,47 +0,0 @@ -dag: - description: DAG for nested graph with as-is - start_at: step1 - steps: - step1: - type: task - command: tests.scenarios.only_for_testing.get_map_test_variables - next: step_dag_within_parallel - step_dag_within_parallel: - type: parallel - next: step_dag_within_map - branches: - branch_1: - start_at: step_1 - steps: - step_1: - type: dag - dag_definition: tests/scenarios/test_dag_as_is.yaml - next: success - success: - type: success - fail: - type: fail - step_dag_within_map: - type: map - iterate_on: map_variable_test - iterate_as: "dummy_iter" - next: step_dag_within_dag - branch: - start_at: step_1 - steps: - step_1: - type: dag - dag_definition: tests/scenarios/test_dag_as_is.yaml - next: success - success: - type: success - fail: - type: fail - step_dag_within_dag: - type: dag - dag_definition: tests/scenarios/test_dag_within_dag_as_is.yaml - next: success - success: - type: success - fail: - type: fail diff --git a/tests/scenarios/archive/test_dag_nested_parallel.yaml b/tests/scenarios/archive/test_dag_nested_parallel.yaml deleted file mode 100644 index e65feb51..00000000 --- a/tests/scenarios/archive/test_dag_nested_parallel.yaml +++ /dev/null @@ -1,67 +0,0 @@ -dag: - description: DAG for nested graph with as-is - start_at: step1 - steps: - step1: - type: task - command: tests.scenarios.only_for_testing.get_map_test_variables - next: step_parallel_within_dag - step_parallel_within_dag: - type: dag - dag_definition: tests/scenarios/test_dag_parallel_as_is.yaml - next: step_parallel_within_parallel - step_parallel_within_parallel: - type: parallel - next: step_parallel_within_map - branches: - branch_1: - start_at: step_1 - steps: - step_1: - type: parallel - next: success - branches: - branch_1: - start_at: step_1 - steps: - step_1: - type: as-is - next: success - success: - type: success - fail: - type: fail - success: - type: success - fail: - type: fail - step_parallel_within_map: - type: map - iterate_on: map_variable_test - iterate_as: "dummy_iter" - next: success - branch: - start_at: step_1 - steps: - step_1: - type: parallel - next: success - branches: - branch_1: - start_at: step_1 - steps: - step_1: - type: as-is - next: success - success: - type: success - fail: - type: fail - success: - type: success - fail: - type: fail - success: - type: success - fail: - type: fail diff --git a/tests/scenarios/archive/test_dag_notebook.yaml b/tests/scenarios/archive/test_dag_notebook.yaml deleted file mode 100644 index 79d9c52b..00000000 --- a/tests/scenarios/archive/test_dag_notebook.yaml +++ /dev/null @@ -1,17 +0,0 @@ -dag: - description: DAG for testing with as-is - start_at: step1 - steps: - step1: - type: task - next: step2 - command_type: notebook - command: "tests/scenarios/input.ipynb" - command_config: - step2: - type: as-is - next: success - success: - type: success - fail: - type: fail diff --git a/tests/scenarios/archive/test_dag_parallel_as_is.yaml b/tests/scenarios/archive/test_dag_parallel_as_is.yaml deleted file mode 100644 index af2367ab..00000000 --- a/tests/scenarios/archive/test_dag_parallel_as_is.yaml +++ /dev/null @@ -1,35 +0,0 @@ -dag: - description: DAG for testing with as-is and parallel - start_at: step1 - steps: - step1: - type: as-is - next: step2 - step2: - type: parallel - next: success - branches: - branch_1: - start_at: step_1 - steps: - step_1: - type: as-is - next: success - success: - type: success - fail: - type: fail - branch_2: - start_at: step_1 - steps: - step_1: - type: as-is - next: success - success: - type: success - fail: - type: fail - success: - type: success - fail: - type: fail diff --git a/tests/scenarios/archive/test_dag_python_lambda.yaml b/tests/scenarios/archive/test_dag_python_lambda.yaml deleted file mode 100644 index 4e33c533..00000000 --- a/tests/scenarios/archive/test_dag_python_lambda.yaml +++ /dev/null @@ -1,26 +0,0 @@ -dag: - description: DAG for testing with as-is - start_at: step1 - steps: - step1: - type: task - command_type: python-lambda - command: "lambda x: {'x': int(x) + 1}" - next: step2 - catalog: - compute_data_folder: "tests/scenarios/test_data" - put: - - "*" - step2: - type: as-is - next: success - catalog: - compute_data_folder: "tests/scenarios/test_data" - get: - - "*" - put: - - "*" - success: - type: success - fail: - type: fail diff --git a/tests/scenarios/archive/test_dag_within_dag_as_is.yaml b/tests/scenarios/archive/test_dag_within_dag_as_is.yaml deleted file mode 100644 index 65d25b84..00000000 --- a/tests/scenarios/archive/test_dag_within_dag_as_is.yaml +++ /dev/null @@ -1,15 +0,0 @@ -dag: - description: DAG for testing with as-is - start_at: step1 - steps: - step1: - type: as-is - next: step2 - step2: - type: dag - dag_definition: tests/scenarios/test_dag_as_is.yaml - next: success - success: - type: success - fail: - type: fail diff --git a/tests/scenarios/conftest.py b/tests/scenarios/conftest.py deleted file mode 100644 index 7e4a93ac..00000000 --- a/tests/scenarios/conftest.py +++ /dev/null @@ -1,137 +0,0 @@ -import pytest - -from magnus import graph - - -@pytest.fixture -def as_is_node(): - def _closure(name, next_node, on_failure=""): - step_config = { - "command": "does not matter", - "command_type": "python", - "type": "as-is", - "next": next_node, - "on_failure": on_failure, - } - - return graph.create_node(name=name, step_config=step_config) - - return _closure - - -@pytest.fixture -def as_is_container_node(): - def _closure(name, next_node, on_failure=""): - step_config = { - "command": "does not matter", - "command_type": "python", - "type": "as-is", - "next": next_node, - "on_failure": on_failure, - "executor_config": {"local-container": {"run_in_local": True}}, - } - return graph.create_node(name=name, step_config=step_config) - - return _closure - - -@pytest.fixture -def exception_node(): - def _closure(name, next_node, on_failure=""): - step_config = { - "command": "exit 1", - "command_type": "shell", - "type": "task", - "next": next_node, - "on_failure": on_failure, - } - - return graph.create_node(name=name, step_config=step_config) - - return _closure - - -@pytest.fixture -def parallel_node(): - def _closure(name, branch, next_node): - step_config = { - "type": "parallel", - "next": next_node, - "branches": {"a": branch()._to_dict(), "b": branch()._to_dict()}, - } - return graph.create_node(name=name, step_config=step_config) - - return _closure - - -@pytest.fixture -def success_graph(as_is_node): - def _closure(): - dag = graph.Graph(start_at="first") - dag.add_node(as_is_node("first", "second")) - dag.add_node(as_is_node("second", "success")) - dag.add_terminal_nodes() - return dag - - return _closure - - -@pytest.fixture -def success_container_graph(as_is_container_node): - def _closure(): - dag = graph.Graph(start_at="first") - dag.add_node(as_is_container_node("first", "second")) - dag.add_node(as_is_container_node("second", "success")) - dag.add_terminal_nodes() - return dag - - return _closure - - -@pytest.fixture -def fail_graph(exception_node): - def _closure(): - dag = graph.Graph(start_at="first") - dag.add_node(exception_node("first", "success")) - dag.add_terminal_nodes() - return dag - - return _closure - - -@pytest.fixture -def on_fail_graph(as_is_node, exception_node): - def _closure(): - dag = graph.Graph(start_at="first") - first_node = exception_node("first", "second", "third") - dag.add_node(first_node) - dag.add_node(as_is_node("second", "third")) - dag.add_node(as_is_node("third", "success")) - dag.add_terminal_nodes() - return dag - - return _closure - - -@pytest.fixture -def parallel_success_graph(as_is_node, parallel_node, success_graph): - def _closure(): - dag = graph.Graph(start_at="first") - dag.add_node(as_is_node("first", "second")) - dag.add_node(parallel_node(name="second", branch=success_graph, next_node="success")) - dag.add_terminal_nodes() - return dag - - return _closure - - -@pytest.fixture -def parallel_fail_graph(as_is_node, parallel_node, fail_graph): - def _closure(): - dag = graph.Graph(start_at="first") - dag.add_node(as_is_node("first", "second")) - dag.add_node(parallel_node(name="second", branch=fail_graph, next_node="success")) - dag.add_terminal_nodes() - return dag - - return _closure diff --git a/tests/scenarios/test_scenarios.txt b/tests/scenarios/test_scenarios.txt deleted file mode 100644 index 72526568..00000000 --- a/tests/scenarios/test_scenarios.txt +++ /dev/null @@ -1,32 +0,0 @@ -test traversal's with as-is nodes: - - two node dag with terminal nodes - - two node dag with first node failing - - two node dag with first node failing and on_failure to second node - - two node with one being parallel - -Test the following scenarios: - -For catalog settings: -1). Using get from the catalog at the start of the pipeline should error out. -2). Having no compute data folder should error out. - - -For nodes: -1). Simple dag with as-is nodes. -2). Dag with parallel branches - a). Without parallel - b). with parallel -3). Dag with map node. - a). Without parallel - b). with parallel -4). Dag with embedded dag. - a). Without parallel - b). with parallel -5). Nested/complex dag. - a). Without parallel - b). With parallel - - -For re-runs. - -Do all the scenarios in local, local-container diff --git a/tests/scenarios/test_sdk_traversals.py b/tests/scenarios/test_sdk_traversals.py new file mode 100644 index 00000000..6582543b --- /dev/null +++ b/tests/scenarios/test_sdk_traversals.py @@ -0,0 +1,76 @@ +import pytest +from rich import print + +from magnus import Stub, Pipeline, Parallel + + +@pytest.mark.no_cover +def test_sequence_next_node(): + first = Stub(name="first", next="second") + second = Stub(name="second", terminate_with_success=True) + + pipeline = Pipeline(steps=[first, second], start_at=first, add_terminal_nodes=True) + + run_log = pipeline.execute() + + assert len(run_log.steps) == 3 + + +@pytest.mark.no_cover +def test_sequence_depends_on(): + first = Stub(name="first") + second = Stub(name="second", terminate_with_success=True).depends_on(first) + + pipeline = Pipeline(steps=[first, second], start_at=first, add_terminal_nodes=True) + + run_log = pipeline.execute() + + assert len(run_log.steps) == 3 + + +@pytest.mark.no_cover +def test_sequence_rshift(): + first = Stub(name="first") + second = Stub(name="second", terminate_with_success=True) + + first >> second + + pipeline = Pipeline(steps=[first, second], start_at=first, add_terminal_nodes=True) + + run_log = pipeline.execute() + + assert len(run_log.steps) == 3 + + +@pytest.mark.no_cover +def test_sequence_lshift(): + first = Stub(name="first") + second = Stub(name="second", terminate_with_success=True) + + second << first + + pipeline = Pipeline(steps=[first, second], start_at=first, add_terminal_nodes=True) + + run_log = pipeline.execute() + + assert len(run_log.steps) == 3 + + +@pytest.mark.no_cover +def test_parallel(): + first = Stub(name="first") + second = Stub(name="second").depends_on(first) + + branch_first = Stub(name="branch_first", next="branch_second") + branch_second = Stub(name="branch_second", terminate_with_success=True) + + branch_a = Pipeline(steps=[branch_first, branch_second], start_at=branch_first, add_terminal_nodes=True) + branch_b = Pipeline(steps=[branch_first, branch_second], start_at=branch_first, add_terminal_nodes=True) + + parallel_node = Parallel(name="parallel_step", branches={"a": branch_a, "b": branch_b}, terminate_with_success=True) + parallel_node << second + + parent_pipeline = Pipeline(steps=[first, second, parallel_node], start_at=first) + run_log = parent_pipeline.execute() + + assert len(run_log.steps) == 4 diff --git a/tests/scenarios/test_traversals.py b/tests/scenarios/test_traversals.py index 1e7f434b..be45e4a6 100644 --- a/tests/scenarios/test_traversals.py +++ b/tests/scenarios/test_traversals.py @@ -1,29 +1,17 @@ # ruff: noqa -import random -import string import tempfile from pathlib import Path +from rich import print import pytest import ruamel.yaml -from magnus import AsIs, Pipeline, Task, defaults, pipeline, utils +from magnus import defaults, entrypoints, utils yaml = ruamel.yaml.YAML() - -def random_run_id(): - alphabet = string.ascii_lowercase + string.digits - return "".join(random.choices(alphabet, k=8)) - - -def success_function(): - pass - - -def error_function(): - raise Exception +PIPELINES_DEFINITION = Path("examples/") def get_config(): @@ -55,14 +43,10 @@ def get_chunked_config(): def get_configs(): - return [get_chunked_config(), get_config()] - + return [get_config(), get_chunked_config()] -def write_dag_and_config(work_dir: str, dag: dict, config: dict): - if dag: - with open(work_dir / "dag.yaml", "wb") as f: - yaml.dump(dag, f) +def write_config(work_dir: Path, config: dict): config["run_log_store"]["config"]["log_folder"] = str(work_dir) with open(work_dir / "config.yaml", "wb") as f: yaml.dump(config, f) @@ -72,154 +56,53 @@ def get_run_log(work_dir, run_id): config_file = work_dir / "config.yaml" if utils.does_file_exist(config_file): - mode_executor = pipeline.prepare_configurations(configuration_file=config_file, run_id=run_id) - return mode_executor.run_log_store.get_run_log_by_id(run_id=run_id, full=True).dict() + mode_executor = entrypoints.prepare_configurations(configuration_file=str(config_file), run_id=run_id) + return mode_executor.run_log_store.get_run_log_by_id(run_id=run_id, full=True).model_dump() raise Exception @pytest.mark.no_cover -def test_success_sdk(): - configs = get_configs() - for config in configs: - first = Task(name="first", command="tests.scenarios.test_traversals.success_function", next_node="second") - second = Task(name="second", command="tests.scenarios.test_traversals.success_function") - pipeline = Pipeline(start_at=first, name="testing") - pipeline.construct([first, second]) - with tempfile.TemporaryDirectory() as context_dir: - context_dir_path = Path(context_dir) - write_dag_and_config(context_dir_path, dag=None, config=config) - - run_id = random_run_id() - pipeline.execute(configuration_file=str(context_dir_path / "config.yaml"), run_id=run_id) - - try: - run_log = get_run_log(context_dir_path, run_id) - assert run_log["status"] == defaults.SUCCESS - assert list(run_log["steps"].keys()) == ["first", "second", "success"] - except: - assert False - - -@pytest.mark.no_cover -def test_success_sdk_asis(): - configs = get_configs() - for config in configs: - first = AsIs(name="first", command="tests.scenarios.test_traversals.success_function", next_node="second") - second = AsIs(name="second", command="tests.scenarios.test_traversals.success_function") - pipeline = Pipeline(start_at=first, name="testing") - pipeline.construct([first, second]) - with tempfile.TemporaryDirectory() as context_dir: - context_dir_path = Path(context_dir) - write_dag_and_config(context_dir_path, dag=None, config=config) - - run_id = "testing_success" - pipeline.execute(configuration_file=str(context_dir_path / "config.yaml"), run_id=run_id) - - try: - run_log = get_run_log(context_dir_path, run_id) - assert run_log["status"] == defaults.SUCCESS - assert list(run_log["steps"].keys()) == ["first", "second", "success"] - except: - assert False - - -@pytest.mark.no_cover -def test_success(success_graph): +def test_success(): configs = get_configs() for config in configs: with tempfile.TemporaryDirectory() as context_dir: context_dir_path = Path(context_dir) - dag = {"dag": success_graph()._to_dict()} - write_dag_and_config(context_dir_path, dag, config) + write_config(context_dir_path, config) run_id = "testing_success" - pipeline.execute( + entrypoints.execute( configuration_file=str(context_dir_path / "config.yaml"), - pipeline_file=str(context_dir_path / "dag.yaml"), + pipeline_file=str(PIPELINES_DEFINITION / "mocking.yaml"), run_id=run_id, ) try: run_log = get_run_log(context_dir_path, run_id) assert run_log["status"] == defaults.SUCCESS - assert list(run_log["steps"].keys()) == ["first", "second", "success"] + assert list(run_log["steps"].keys()) == ["step 1", "step 2", "step 3", "success"] except: assert False @pytest.mark.no_cover -def test_success_executor_config(success_container_graph): - configs = [get_container_config()] - - for config in configs: - with tempfile.TemporaryDirectory() as context_dir: - context_dir_path = Path(context_dir) - dag = {"dag": success_container_graph()._to_dict()} - - write_dag_and_config(context_dir_path, dag, config) - - run_id = "testing_success" - - pipeline.execute( - configuration_file=str(context_dir_path / "config.yaml"), - pipeline_file=str(context_dir_path / "dag.yaml"), - run_id=run_id, - ) - - try: - run_log = get_run_log(context_dir_path, run_id) - assert run_log["status"] == defaults.SUCCESS - assert list(run_log["steps"].keys()) == ["first", "second", "success"] - except: - assert False - - -@pytest.mark.no_cover -def test_fail_sdk(): - configs = get_configs() - for config in configs: - first = Task(name="first", command="tests.scenarios.test_traversals.error_function", next_node="second") - second = Task(name="second", command="tests.scenarios.test_traversals.success_function") - pipeline = Pipeline(start_at=first, name="testing") - pipeline.construct([first, second]) - with tempfile.TemporaryDirectory() as context_dir: - context_dir_path = Path(context_dir) - write_dag_and_config(context_dir_path, dag=None, config=config) - - run_id = "testing_failure" - try: - pipeline.execute(configuration_file=str(context_dir_path / "config.yaml"), run_id=run_id) - except: - pass - - try: - run_log = get_run_log(context_dir_path, run_id) - assert run_log["status"] == defaults.FAIL - assert list(run_log["steps"].keys()) == ["first", "fail"] - except: - assert False - - -@pytest.mark.no_cover -def test_failure(fail_graph): +def test_failure(): configs = get_configs() for config in configs: with tempfile.TemporaryDirectory() as context_dir: context_dir_path = Path(context_dir) - dag = {"dag": fail_graph()._to_dict()} - write_dag_and_config(context_dir_path, dag, config) + write_config(context_dir_path, config) run_id = "testing_failure" try: - pipeline.execute( + entrypoints.execute( configuration_file=str(context_dir_path / "config.yaml"), - pipeline_file=str(context_dir_path / "dag.yaml"), + pipeline_file=str(PIPELINES_DEFINITION / "default-fail.yaml"), run_id=run_id, ) except Exception as ex: @@ -228,93 +111,26 @@ def test_failure(fail_graph): try: run_log = get_run_log(context_dir_path, run_id) assert run_log["status"] == defaults.FAIL - assert list(run_log["steps"].keys()) == ["first", "fail"] + assert list(run_log["steps"].keys()) == ["step 1", "step 2", "fail"] except: assert False @pytest.mark.no_cover -def test_on_fail_sdk(): +def test_on_failure(): configs = get_configs() - for config in configs: - first = Task( - name="first", - command="tests.scenarios.test_traversals.error_function", - on_failure="third", - next_node="second", - ) - second = Task(name="second", command="tests.scenarios.test_traversals.success_function", next_node="third") - third = Task(name="third", command="tests.scenarios.test_traversals.success_function") - pipeline = Pipeline(start_at=first, name="testing") - pipeline.construct([first, second, third]) with tempfile.TemporaryDirectory() as context_dir: context_dir_path = Path(context_dir) - write_dag_and_config(context_dir_path, dag=None, config=config) - run_id = "testing_on_failure" - try: - pipeline.execute(configuration_file=str(context_dir_path / "config.yaml"), run_id=run_id) - except: - pass - - try: - run_log = get_run_log(context_dir_path, run_id) - assert run_log["status"] == defaults.SUCCESS - assert list(run_log["steps"].keys()) == ["first", "third", "success"] - except: - assert False - - -@pytest.mark.no_cover -def test_on_fail_sdk_unchained(): - configs = get_configs() - - for config in configs: - first = Task( - name="first", - command="tests.scenarios.test_traversals.error_function", - on_failure="third", - next_node="second", - ) - second = Task(name="second", command="tests.scenarios.test_traversals.success_function") - third = Task(name="third", command="tests.scenarios.test_traversals.success_function", next_node="fail") - pipeline = Pipeline(start_at=first, name="testing") - pipeline.construct([first, second, third]) - with tempfile.TemporaryDirectory() as context_dir: - context_dir_path = Path(context_dir) - write_dag_and_config(context_dir_path, dag=None, config=config) + write_config(context_dir_path, config) run_id = "testing_on_failure" - try: - pipeline.execute(configuration_file=str(context_dir_path / "config.yaml"), run_id=run_id) - except: - pass - - try: - run_log = get_run_log(context_dir_path, run_id) - assert run_log["status"] == defaults.FAIL - assert list(run_log["steps"].keys()) == ["first", "third", "fail"] - except: - assert False - - -@pytest.mark.no_cover -def test_on_failure(on_fail_graph): - configs = get_configs() - for config in configs: - with tempfile.TemporaryDirectory() as context_dir: - context_dir_path = Path(context_dir) - dag = {"dag": on_fail_graph()._to_dict()} - - write_dag_and_config(context_dir_path, dag, config) - - run_id = "testing_failure" try: - pipeline.execute( + entrypoints.execute( configuration_file=str(context_dir_path / "config.yaml"), - pipeline_file=str(context_dir_path / "dag.yaml"), + pipeline_file=str(PIPELINES_DEFINITION / "on-failure.yaml"), run_id=run_id, ) except: @@ -323,77 +139,76 @@ def test_on_failure(on_fail_graph): try: run_log = get_run_log(context_dir_path, run_id) assert run_log["status"] == defaults.SUCCESS - assert list(run_log["steps"].keys()) == ["first", "third", "success"] + assert list(run_log["steps"].keys()) == ["step 1", "step 3", "success"] except: assert False -@pytest.mark.no_cover -def test_parallel(parallel_success_graph): - configs = get_configs() - for config in configs: - with tempfile.TemporaryDirectory() as context_dir: - context_dir_path = Path(context_dir) - dag = {"dag": parallel_success_graph()._to_dict()} - - write_dag_and_config(context_dir_path, dag, config) - run_id = "testing_parallel" - - pipeline.execute( - configuration_file=str(context_dir_path / "config.yaml"), - pipeline_file=str(context_dir_path / "dag.yaml"), - run_id=run_id, - ) - - try: - run_log = get_run_log(context_dir_path, run_id) - assert run_log["status"] == defaults.SUCCESS - assert list(run_log["steps"].keys()) == ["first", "second", "success"] - assert list(run_log["steps"]["second"]["branches"]["second.a"]["steps"].keys()) == [ - "second.a.first", - "second.a.second", - "second.a.success", - ] - assert list(run_log["steps"]["second"]["branches"]["second.b"]["steps"].keys()) == [ - "second.b.first", - "second.b.second", - "second.b.success", - ] - except: - assert False - - -@pytest.mark.no_cover -def test_parallel_fail(parallel_fail_graph): - configs = get_configs() - for config in configs: - with tempfile.TemporaryDirectory() as context_dir: - context_dir_path = Path(context_dir) - dag = {"dag": parallel_fail_graph()._to_dict()} - - write_dag_and_config(context_dir_path, dag, config) - run_id = "testing_parallel" - - try: - pipeline.execute( - configuration_file=str(context_dir_path / "config.yaml"), - pipeline_file=str(context_dir_path / "dag.yaml"), - run_id=run_id, - ) - except: - pass - - try: - run_log = get_run_log(context_dir_path, run_id) - assert run_log["status"] == defaults.FAIL - assert list(run_log["steps"].keys()) == ["first", "second", "fail"] - assert list(run_log["steps"]["second"]["branches"]["second.a"]["steps"].keys()) == [ - "second.a.first", - "second.a.fail", - ] - assert list(run_log["steps"]["second"]["branches"]["second.b"]["steps"].keys()) == [ - "second.b.first", - "second.b.fail", - ] - except: - assert False +# @pytest.mark.no_cover +# def test_parallel(): +# configs = get_configs() +# for config in configs: +# with tempfile.TemporaryDirectory() as context_dir: +# context_dir_path = Path(context_dir) + +# write_config(context_dir_path, config) +# run_id = "testing_parallel" + +# entrypoints.execute( +# configuration_file=str(context_dir_path / "config.yaml"), +# pipeline_file=str(PIPELINES_DEFINITION / "concepts/parallel.yaml"), +# run_id=run_id, +# ) + +# try: +# run_log = get_run_log(context_dir_path, run_id) +# assert run_log["status"] == defaults.SUCCESS +# assert list(run_log["steps"].keys()) == ["step 1", "step 2", "step 3", "success"] +# assert list(run_log["steps"]["step 2"]["branches"]["step 2.branch_a"]["steps"].keys()) == [ +# "step 2.branch_a.step 1", +# "step 2.branch_a.step 2", +# "step 2.branch_a.success", +# ] +# assert list(run_log["steps"]["step 2"]["branches"]["step 2.branch_b"]["steps"].keys()) == [ +# "step 2.branch_b.step 1", +# "step 2.branch_b.step 2", +# "step 2.branch_b.success", +# ] +# except: +# assert False + + +# @pytest.mark.no_cover +# def test_parallel_fail(parallel_fail_graph): +# configs = get_configs() +# for config in configs: +# with tempfile.TemporaryDirectory() as context_dir: +# context_dir_path = Path(context_dir) +# dag = {"dag": parallel_fail_graph().dict()} + +# write_dag_and_config(context_dir_path, dag, config) +# run_id = "testing_parallel" + +# try: +# entrypoints.execute( +# configuration_file=str(context_dir_path / "config.yaml"), +# pipeline_file=str(context_dir_path / "dag.yaml"), +# run_id=run_id, +# ) +# except: +# pass + +# try: +# run_log = get_run_log(context_dir_path, run_id) +# assert run_log["status"] == defaults.FAIL +# assert list(run_log["steps"].keys()) == ["first", "second", "fail"] +# assert list(run_log["steps"]["second"]["branches"]["second.a"]["steps"].keys()) == [ +# "second.a.first", +# "second.a.fail", +# ] +# assert list(run_log["steps"]["second"]["branches"]["second.b"]["steps"].keys()) == [ +# "second.b.first", +# "second.b.fail", +# ] +# except: +# assert False diff --git a/tests/test_examples.py b/tests/test_examples.py new file mode 100644 index 00000000..ff16f576 --- /dev/null +++ b/tests/test_examples.py @@ -0,0 +1,165 @@ +from contextlib import nullcontext, contextmanager +import pytest +from pathlib import Path +import os +import importlib +import subprocess + +from magnus.entrypoints import execute +from magnus import exceptions + +# (file, is_fail?, kwargs) +examples = [ + ("concepts/catalog.yaml", False, {"configuration_file": "examples/configs/fs-catalog.yaml"}), + ("concepts/experiment_tracking_env.yaml", False, {}), + ("concepts/experiment_tracking_env_step.yaml", False, {}), + ("concepts/map.yaml", False, {}), + ("concepts/map_shell.yaml", False, {}), + ("concepts/nesting.yaml", False, {}), + ("concepts/notebook_api_parameters.yaml", False, {"parameters_file": "examples/concepts/parameters.yaml"}), + ("concepts/notebook_env_parameters.yaml", False, {"parameters_file": "examples/concepts/parameters.yaml"}), + ("concepts/notebook_native_parameters.yaml", False, {"parameters_file": "examples/concepts/parameters.yaml"}), + ("concepts/parallel.yaml", False, {}), + ("concepts/simple_notebook.yaml", False, {}), + ("concepts/simple.yaml", False, {}), + ("concepts/task_shell_parameters.yaml", False, {"parameters_file": "examples/parameters_initial.yaml"}), + ("concepts/task_shell_simple.yaml", False, {}), + ("concepts/traversal.yaml", False, {}), + ("catalog.yaml", False, {"configuration_file": "examples/configs/fs-catalog.yaml"}), + ("contrived.yaml", False, {}), + ("default-fail.yaml", True, {}), + ("experiment_tracking_env.yaml", True, {}), + ("logging.yaml", False, {}), + ("mocking.yaml", False, {}), + ("on-failure.yaml", False, {}), + ("parallel-fail.yaml", True, {}), + ("parameters_env.yaml", False, {"parameters_file": "examples/parameters_initial.yaml"}), + ("parameters_flow.yaml", False, {"parameters_file": "examples/parameters_initial.yaml"}), + ("python-tasks.yaml", False, {"parameters_file": "examples/parameters_initial.yaml"}), + ("retry-fail.yaml", True, {"configuration_file": "examples/configs/fs-catalog-run_log.yaml"}), + ("retry-fixed.yaml", False, {"configuration_file": "examples/configs/fs-catalog-run_log.yaml"}), +] + + +def list_examples(): + for example in examples: + yield example + + +@pytest.mark.parametrize("example", list_examples()) +@pytest.mark.no_cover +@pytest.mark.e2e +def test_yaml_examples(example): + print(f"Testing {example}...") + examples_path = Path("examples") + file_path, status, kwargs = example + try: + full_file_path = examples_path / file_path + configuration_file = kwargs.pop("configuration_file", "") + execute(configuration_file=configuration_file, pipeline_file=str(full_file_path.resolve()), **kwargs) + except exceptions.ExecutionFailedError: + if not status: + raise + + +@pytest.mark.parametrize("example", list_examples()) +@pytest.mark.no_cover +@pytest.mark.e2e +def test_yaml_examples_argo(example): + print(f"Testing {example}...") + examples_path = Path("examples") + file_path, status, kwargs = example + try: + full_file_path = examples_path / file_path + kwargs.pop("configuration_file", "") + configuration_file = "examples/configs/argo-config.yaml" + execute(configuration_file=configuration_file, pipeline_file=str(full_file_path.resolve()), **kwargs) + subprocess.run(["argo", "lint", "--offline", "argo-pipeline.yaml"], check=True) + except exceptions.ExecutionFailedError: + if not status: + raise + + +@pytest.mark.parametrize("example", list_examples()) +@pytest.mark.no_cover +@pytest.mark.e2e_container +def test_yaml_examples_container(example): + print(f"Testing {example}...") + examples_path = Path("examples") + file_path, status, kwargs = example + try: + full_file_path = examples_path / file_path + kwargs.pop("configuration_file", "") + configuration_file = "examples/configs/local-container.yaml" + os.environ["MAGNUS_VAR_default_docker_image"] = "magnus:3.8" + execute(configuration_file=configuration_file, pipeline_file=str(full_file_path), **kwargs) + except exceptions.ExecutionFailedError: + if not status: + raise + + +@contextmanager +def secrets_env_context(): + os.environ["secret"] = "secret_value" + os.environ["MAGNUS_CONFIGURATION_FILE"] = "examples/configs/secrets-env-default.yaml" + yield + del os.environ["secret"] + del os.environ["MAGNUS_CONFIGURATION_FILE"] + + +# function, success, context +python_examples = [ + ("catalog", False, None), + ("catalog_api", False, None), + ("catalog_simple", False, None), + ("contrived", False, None), + ("mocking", False, None), + ("on_failure", False, None), + ("parameters_api", False, None), + ("parameters", False, None), + ("python-tasks", False, None), + ("secrets", False, None), + ("secrets_env", False, secrets_env_context), + ("concepts.catalog", False, None), + ("concepts.catalog_api", False, None), + ("concepts.catalog_object", False, None), + ("concepts.experiment_tracking_api", False, None), + ("concepts.experiment_tracking_env", False, None), + ("concepts.experiment_tracking_step", False, None), + ("concepts.map", False, None), + ("concepts.nesting", False, None), + ("concepts.parallel", False, None), + ("concepts.simple", False, None), + ("concepts.task_api_parameters", False, None), + ("concepts.task_env_parameters", False, None), + ("concepts.task_native_parameters", False, None), + ("concepts.traversal", False, None), +] + + +def list_python_examples(): + for example in python_examples: + yield example + + +@pytest.mark.parametrize("example", list_python_examples()) +# @pytest.mark.no_cover +@pytest.mark.e2e +def test_python_examples(example): + print(f"Testing {example}...") + + mod, status, context = example + + if not context: + context = nullcontext() + else: + context = context() + + imported_module = importlib.import_module(f"examples.{mod}") + f = getattr(imported_module, "main") + try: + with context: + f() + except exceptions.ExecutionFailedError: + if not status: + raise diff --git a/tox.ini b/tox.ini index 362b6b71..1104c2bd 100644 --- a/tox.ini +++ b/tox.ini @@ -1,18 +1,17 @@ [tox] skipsdist = True isolated_build = True -envlist = python3.7, python3.8, mypy +envlist = python3.8, mypy [testenv] whitelist_externals = poetry commands = - poetry install -E docker - poetry run python -m pytest --cov-report term-missing --cov-report xml:cov.xml --cov=magnus/ tests/ + poetry install -E docker -E notebook + poetry run python -m pytest -m "not e2e_container" --cov=magnus/ tests/ [testenv:mypy] -deps = - mypy - +whitelist_externals = poetry commands = - mypy magnus + poetry install -E docker -E notebook + poetry run mypy magnus