From 6f44baa02c2897cb8de6a5f06e9478d8a8e5fc8d Mon Sep 17 00:00:00 2001 From: Vijay Vammi Date: Fri, 12 Apr 2024 04:21:36 +0100 Subject: [PATCH 01/17] fix: minor bugs with on faliure behaviours --- examples/01-tasks/notebook.py | 45 ++++++++++ examples/01-tasks/notebook.yaml | 36 ++++++++ examples/01-tasks/python_tasks.py | 42 ++++++++++ examples/01-tasks/python_tasks.yaml | 27 ++++++ examples/01-tasks/scripts.py | 37 +++++++++ examples/01-tasks/scripts.yaml | 27 ++++++ examples/{mocking.py => 01-tasks/stub.py} | 13 ++- examples/{mocking.yaml => 01-tasks/stub.yaml} | 8 +- examples/02-sequential/default_fail.py | 33 ++++++++ .../default_fail.yaml} | 10 +-- examples/02-sequential/on_failure_fail.py | 41 ++++++++++ examples/02-sequential/on_failure_fail.yaml | 35 ++++++++ examples/02-sequential/on_failure_succeed.py | 41 ++++++++++ .../02-sequential/on_failure_succeed.yaml | 38 +++++++++ examples/02-sequential/traversal.py | 59 +++++++++++++ examples/02-sequential/traversal.yaml | 51 ++++++++++++ examples/README.md | 38 +++++++++ examples/common/functions.py | 8 ++ examples/common/simple_notebook.ipynb | 46 +++++++++++ examples/common/simple_notebook_out.ipynb | 82 +++++++++++++++++++ ...tebook_native_parameters_consume_out.ipynb | 22 ++--- .../notebook_native_parameters_out.ipynb | 22 ++--- examples/concepts/simple_notebook.ipynb | 39 +-------- examples/concepts/simple_notebook_out.ipynb | 22 ++--- examples/on-failure.yaml | 31 ------- examples/on_failure.py | 38 --------- examples/tutorials/mnist/modular_source.py | 1 + runnable/entrypoints.py | 5 ++ runnable/extensions/executor/__init__.py | 2 + runnable/sdk.py | 52 ++++++++---- runnable/tasks.py | 24 ++++-- 31 files changed, 801 insertions(+), 174 deletions(-) create mode 100644 examples/01-tasks/notebook.py create mode 100644 examples/01-tasks/notebook.yaml create mode 100644 examples/01-tasks/python_tasks.py create mode 100644 examples/01-tasks/python_tasks.yaml create mode 100644 examples/01-tasks/scripts.py create mode 100644 examples/01-tasks/scripts.yaml rename examples/{mocking.py => 01-tasks/stub.py} (65%) rename examples/{mocking.yaml => 01-tasks/stub.yaml} (79%) create mode 100644 examples/02-sequential/default_fail.py rename examples/{default-fail.yaml => 02-sequential/default_fail.yaml} (63%) create mode 100644 examples/02-sequential/on_failure_fail.py create mode 100644 examples/02-sequential/on_failure_fail.yaml create mode 100644 examples/02-sequential/on_failure_succeed.py create mode 100644 examples/02-sequential/on_failure_succeed.yaml create mode 100644 examples/02-sequential/traversal.py create mode 100644 examples/02-sequential/traversal.yaml create mode 100644 examples/README.md create mode 100644 examples/common/functions.py create mode 100644 examples/common/simple_notebook.ipynb create mode 100644 examples/common/simple_notebook_out.ipynb delete mode 100644 examples/on-failure.yaml delete mode 100644 examples/on_failure.py diff --git a/examples/01-tasks/notebook.py b/examples/01-tasks/notebook.py new file mode 100644 index 00000000..db86bb66 --- /dev/null +++ b/examples/01-tasks/notebook.py @@ -0,0 +1,45 @@ +""" +You can execute this pipeline by: + + python examples/01-tasks/notebook.py + +The notebook is executed in the same environment so any installed packages are available for the +notebook. + +Upon successful execution, the output notebook with cell outputs is stored in the catalog. +For example, the catalog structure for this execution would be: + +.catalog +└── meek-rosalind-0853 + ├── examples + │   └── common + │   └── simple_notebook_out.ipynb + └── notebook.execution.log + +The notebook simple_notebook_out.ipynb has the captured stdout of "Hello World!". +""" + +from runnable import NotebookTask, Pipeline + + +def main(): + + # Execute the notebook present in examples/common/simple_notebook.ipynb. + # The path is relative to the project root. + # If this step executes successfully, the pipeline will terminate with success + hello_task = NotebookTask( + name="hello", + notebook="examples/common/simple_notebook.ipynb", + terminate_with_success=True, + ) + + # The pipeline has only one step. + pipeline = Pipeline(steps=[hello_task]) + + pipeline.execute() + + return pipeline + + +if __name__ == "__main__": + main() diff --git a/examples/01-tasks/notebook.yaml b/examples/01-tasks/notebook.yaml new file mode 100644 index 00000000..bf6aeee3 --- /dev/null +++ b/examples/01-tasks/notebook.yaml @@ -0,0 +1,36 @@ +dag: + description: | + This is a sample pipeline with one step that executes a notebook. + + The notebook is executed in the same environment so any installed + packages are available for the notebook. + + Upon successful execution, the output notebook with cell outputs + is stored in the catalog. + + For example, the catalog structure for this execution would be: + + .catalog + └── meek-rosalind-0853 + ├── examples + │   └── common + │   └── simple_notebook_out.ipynb + └── notebook.execution.log + + The notebook simple_notebook_out.ipynb has the captured stdout of "Hello World!". + + You can run this pipeline as: + runnable execute -f examples/01-tasks/notebook.yaml + + start_at: notebook + steps: + notebook: + type: task + command_type: notebook + command: examples/common/simple_notebook.ipynb # The path is relative to the root of the project. + next: success + success: + type: success + fail: + type: fail + diff --git a/examples/01-tasks/python_tasks.py b/examples/01-tasks/python_tasks.py new file mode 100644 index 00000000..7ad97d58 --- /dev/null +++ b/examples/01-tasks/python_tasks.py @@ -0,0 +1,42 @@ +""" +You can execute this pipeline by: + + python examples/01-tasks/python_tasks.py + +The stdout of "Hello World!" would be captured as execution log and stored in the catalog. +An example of the catalog structure: + +.catalog +└── baked-heyrovsky-0602 + └── hello.execution.log + +2 directories, 1 file + + +The hello.execution.log has the captured stdout of "Hello World!". +""" + +from examples.common.functions import hello +from runnable import Pipeline, PythonTask + + +def main(): + + # Create a tasks which calls the function "hello" + # If this step executes successfully, the pipeline will terminate with success + hello_task = PythonTask( + name="hello", + function=hello, + terminate_with_success=True, + ) + + # The pipeline has only one step. + pipeline = Pipeline(steps=[hello_task]) + + pipeline.execute() + + return pipeline + + +if __name__ == "__main__": + main() diff --git a/examples/01-tasks/python_tasks.yaml b/examples/01-tasks/python_tasks.yaml new file mode 100644 index 00000000..535584f7 --- /dev/null +++ b/examples/01-tasks/python_tasks.yaml @@ -0,0 +1,27 @@ +dag: + description: | + You can run this pipeline by: + runnable execute -f examples/01-tasks/python_tasks.yaml + + The stdout of "Hello World!" would be captured as + execution log and stored in the catalog. + + An example of the catalog structure: + + .catalog + └── baked-heyrovsky-0602 + └── hello.execution.log + + 2 directories, 1 file + + The hello.execution.log has the captured stdout of "Hello World!". + start_at: hello_task + steps: + hello_task: + type: task # The functional unit of the pipeline which does the work. + command: examples.common.functions.hello # dotted path to the function. + next: success # If this function succeeds, mark the pipeline as success + success: + type: success + fail: + type: fail diff --git a/examples/01-tasks/scripts.py b/examples/01-tasks/scripts.py new file mode 100644 index 00000000..528a0fc4 --- /dev/null +++ b/examples/01-tasks/scripts.py @@ -0,0 +1,37 @@ +""" +You can execute this pipeline by: + + python examples/01-tasks/scripts.py + +The command can be anything that can be executed in a shell. +The stdout/stderr of the execution is captured as execution log and stored in the catalog. + +For example: + +.catalog +└── seasoned-perlman-1355 + └── hello.execution.log + +""" + +from runnable import Pipeline, ShellTask + + +def main(): + # If this step executes successfully, the pipeline will terminate with success + hello_task = ShellTask( + name="hello", + command="echo 'Hello World!'", + terminate_with_success=True, + ) + + # The pipeline has only one step. + pipeline = Pipeline(steps=[hello_task]) + + pipeline.execute() + + return pipeline + + +if __name__ == "__main__": + main() diff --git a/examples/01-tasks/scripts.yaml b/examples/01-tasks/scripts.yaml new file mode 100644 index 00000000..e4c60b55 --- /dev/null +++ b/examples/01-tasks/scripts.yaml @@ -0,0 +1,27 @@ +dag: + description: | + This is a sample pipeline with one step that executes a shell command. + + You can run this pipeline by: + runnable execute -f examples/01-tasks/scripts.yaml + + The command can be anything that can be executed in a shell. + The stdout/stderr of the execution is captured as execution log and + stored in the catalog. + + For example: + .catalog + └── seasoned-perlman-1355 + └── hello.execution.log + + start_at: notebook + steps: + notebook: + type: task + command_type: shell + command: echo "hello world!!" # The path is relative to the root of the project. + next: success + success: + type: success + fail: + type: fail \ No newline at end of file diff --git a/examples/mocking.py b/examples/01-tasks/stub.py similarity index 65% rename from examples/mocking.py rename to examples/01-tasks/stub.py index a7ae7d11..ea3ac68c 100644 --- a/examples/mocking.py +++ b/examples/01-tasks/stub.py @@ -10,22 +10,27 @@ to mock steps within mature pipelines. You can run this pipeline by: - python examples/mocking.py + python examples/01-tasks/stub.py """ from runnable import Pipeline, Stub def main(): - step1 = Stub(name="step1") # (1) + # this will always succeed + step1 = Stub(name="step1") + + # It takes arbitrary arguments + # Useful for temporarily silencing steps within mature pipelines step2 = Stub(name="step2", what="is this thing") - step3 = Stub(name="step3", terminate_with_success=True) # (3) + step3 = Stub(name="step3", terminate_with_success=True) - pipeline = Pipeline(steps=[step1, step2, step3], add_terminal_nodes=True) # (4) + pipeline = Pipeline(steps=[step1, step2, step3], add_terminal_nodes=True) pipeline.execute() + # A function that creates pipeline should always return a Pipeline object return pipeline diff --git a/examples/mocking.yaml b/examples/01-tasks/stub.yaml similarity index 79% rename from examples/mocking.yaml rename to examples/01-tasks/stub.yaml index d4921f2d..7a072d78 100644 --- a/examples/mocking.yaml +++ b/examples/01-tasks/stub.yaml @@ -11,15 +11,15 @@ dag: to mock steps within mature pipelines. You can run this pipeline by: - runnable execute -f examples/mocking.yaml + runnable execute -f examples/01-tasks/stub.yaml start_at: step 1 steps: step 1: - type: stub + type: stub # This will always succeed next: step 2 step 2: - type: stub - what: is this thing? + type: stub + what: is this thing? # It takes arbitrary keys It: does not matter!! next: step 3 step 3: diff --git a/examples/02-sequential/default_fail.py b/examples/02-sequential/default_fail.py new file mode 100644 index 00000000..8f473893 --- /dev/null +++ b/examples/02-sequential/default_fail.py @@ -0,0 +1,33 @@ +""" +When defining a Pipeline(), it automatically adds a success node and failure node. + +By default any failure in a step is considered to be a failure in the pipeline. + +In the below example, the progression would be as follows: + + step 1 >> step 2 >> fail + + +You can run this example by: python examples/02-sequential/default_fail.py +""" + +from examples.common.functions import raise_ex +from runnable import Pipeline, PythonTask, Stub + + +def main(): + step1 = Stub(name="step 1") + + step2 = PythonTask(name="step 2", function=raise_ex) # This will fail + + step3 = Stub(name="step 3", terminate_with_success=True) # This step will not be executed + + pipeline = Pipeline(steps=[step1, step2, step3]) + + pipeline.execute() + + return pipeline + + +if __name__ == "__main__": + main() diff --git a/examples/default-fail.yaml b/examples/02-sequential/default_fail.yaml similarity index 63% rename from examples/default-fail.yaml rename to examples/02-sequential/default_fail.yaml index 7dde5464..f8e423f9 100644 --- a/examples/default-fail.yaml +++ b/examples/02-sequential/default_fail.yaml @@ -6,9 +6,7 @@ dag: The default behavior is to traverse to step type fail and mark the run as failed. - You can control the flow by using on_failure, please check example/on-failure.yaml - - You can run this pipeline by runnable execute -f examples/default-fail.yaml + You can run this pipeline by: runnable execute -f examples/02-sequential/default_fail.yaml start_at: step 1 steps: step 1: @@ -16,11 +14,11 @@ dag: next: step 2 step 2: type: task - command_type: shell - command: exit 1 # This will fail + command_type: python + command: examples.common.functions.raise_ex # This will fail next: step 3 step 3: - type: stub + type: stub # This will never execute next: success success: type: success diff --git a/examples/02-sequential/on_failure_fail.py b/examples/02-sequential/on_failure_fail.py new file mode 100644 index 00000000..cb4fa6cd --- /dev/null +++ b/examples/02-sequential/on_failure_fail.py @@ -0,0 +1,41 @@ +""" +This pipeline showcases handling failures in a pipeline. + +The path taken if none of the steps failed: +step_1 -> step_2 -> step_3 -> success + +step_1 is a python function that raises an exception. +And we can instruct the pipeline to execute step_4 if step_1 fails +and then eventually fail. +step_1 -> step_4 -> fail + +This pattern is handy when you need to do something before eventually +failing (eg: sending a notification, updating status, etc...) + +Run this pipeline as: python examples/02-sequential/on_failure_fail.py +""" + +from examples.common.functions import raise_ex +from runnable import Pipeline, PythonTask, Stub + + +def main(): + step_1 = PythonTask(name="step 1", function=raise_ex) # This will fail + + step_2 = Stub(name="step 2") + + step_3 = Stub(name="step 3", terminate_with_success=True) + step_4 = Stub(name="step 4", terminate_with_failure=True) + + step_1.on_failure = step_4.name + + pipeline = Pipeline( + steps=[step_1, step_2, step_3, [step_4]], + ) + pipeline.execute() + + return pipeline + + +if __name__ == "__main__": + main() diff --git a/examples/02-sequential/on_failure_fail.yaml b/examples/02-sequential/on_failure_fail.yaml new file mode 100644 index 00000000..6e94242a --- /dev/null +++ b/examples/02-sequential/on_failure_fail.yaml @@ -0,0 +1,35 @@ +dag: + description: | + This pipeline showcases handling failures in a pipeline. + + The path taken if none of the steps failed: + step_1 -> step_2 -> step_3 -> success + + step_1 is a python function that raises an exception. + And we can instruct the pipeline to execute step_4 if step_1 fails + and then eventually fail. + step_1 -> step_4 -> fail + + This pattern is handy when you need to do something before eventually + failing (eg: sending a notification, updating status, etc...) + start_at: step_1 + steps: + step_1: + type: task + command_type: shell + command: exit 1 # This will fail! + next: step_2 + on_failure: step_4 + step_2: + type: stub + next: step_3 + step_3: + type: stub + next: success + step_4: + type: stub + next: fail + success: + type: success + fail: + type: fail diff --git a/examples/02-sequential/on_failure_succeed.py b/examples/02-sequential/on_failure_succeed.py new file mode 100644 index 00000000..05dd231a --- /dev/null +++ b/examples/02-sequential/on_failure_succeed.py @@ -0,0 +1,41 @@ +""" +This pipeline showcases handling failures in a pipeline. + +The path taken if none of the steps failed: +step_1 -> step_2 -> step_3 -> success + +step_1 is a python function that raises an exception. +And we can instruct the pipeline to execute step_4 if step_1 fails +and then eventually succeed too. +step_1 -> step_4 -> success + +This pattern is handy when you are expecting a failure of a step +and have ways to handle it. + +Run this pipeline: python examples/02-sequential/on_failure_succeed.py +""" + +from examples.common.functions import raise_ex +from runnable import Pipeline, PythonTask, Stub + + +def main(): + step_1 = PythonTask(name="step 1", function=raise_ex) # This will fail + + step_2 = Stub(name="step 2") + + step_3 = Stub(name="step 3", terminate_with_success=True) + step_4 = Stub(name="step 4", terminate_with_success=True) + + step_1.on_failure = step_4.name + + pipeline = Pipeline( + steps=[step_1, step_2, step_3, [step_4]], + ) + pipeline.execute() + + return pipeline + + +if __name__ == "__main__": + main() diff --git a/examples/02-sequential/on_failure_succeed.yaml b/examples/02-sequential/on_failure_succeed.yaml new file mode 100644 index 00000000..557f51d8 --- /dev/null +++ b/examples/02-sequential/on_failure_succeed.yaml @@ -0,0 +1,38 @@ +dag: + description: | + This pipeline showcases handling failures in a pipeline. + + The path taken if none of the steps failed: + step_1 -> step_2 -> step_3 -> success + + step_1 is a python function that raises an exception. + And we can instruct the pipeline to execute step_4 if step_1 fails + and then eventually fail. + step_1 -> step_4 -> success + + This pattern is handy when you are expecting a failure of a step + and have ways to handle it. + + Run this pipeline as: + runnable execute -f examples/02-sequential/on_failure_succeed.yaml + start_at: step_1 + steps: + step_1: + type: task + command_type: shell + command: exit 1 # This will fail! + next: step_2 + on_failure: step_4 + step_2: + type: stub + next: step_3 + step_3: + type: stub + next: success + step_4: + type: stub + next: fail + success: + type: success + fail: + type: fail diff --git a/examples/02-sequential/traversal.py b/examples/02-sequential/traversal.py new file mode 100644 index 00000000..b2021f13 --- /dev/null +++ b/examples/02-sequential/traversal.py @@ -0,0 +1,59 @@ +""" +You can execute this pipeline by: + + python examples/02-sequential/traversal.py + + A pipeline can have any "tasks" as part of it. In the + below example, we have a mix of stub, python, shell and notebook tasks. + + As with simpler tasks, the stdout and stderr of each task are captured + and stored in the catalog. + + .catalog + └── cold-jennings-1534 + ├── examples + │   └── common + │   └── simple_notebook_out.ipynb + ├── hello_notebook.execution.log + ├── hello_python.execution.log + └── hello_shell.execution.log + + 4 directories, 4 files + +""" + +from examples.common.functions import hello +from runnable import NotebookTask, Pipeline, PythonTask, ShellTask, Stub + + +def main(): + + stub_task = Stub(name="hello stub") + + python_task = PythonTask( + name="hello python", + function=hello, + ) + + shell_task = ShellTask( + name="hello shell", + command="echo 'Hello World!'", + ) + + notebook_task = NotebookTask( + name="hello notebook", + notebook="examples/common/simple_notebook.ipynb", + terminate_with_success=True, + ) + + # The pipeline has a mix of tasks. + # The order of execution follows the order of the tasks in the list. + pipeline = Pipeline(steps=[stub_task, python_task, shell_task, notebook_task]) + + pipeline.execute() + + return pipeline + + +if __name__ == "__main__": + main() diff --git a/examples/02-sequential/traversal.yaml b/examples/02-sequential/traversal.yaml new file mode 100644 index 00000000..7363c7d0 --- /dev/null +++ b/examples/02-sequential/traversal.yaml @@ -0,0 +1,51 @@ +dag: + description: | + A pipeline can have any "tasks" as part of it. In the + below example, we have a mix of stub, python, shell and notebook tasks. + + As with simpler tasks, the stdout and stderr of each task are captured + and stored in the catalog. + + For example, the catalog structure for this execution would be: + + .catalog + └── cold-jennings-1534 + ├── examples + │   └── common + │   └── simple_notebook_out.ipynb + ├── hello_notebook.execution.log + ├── hello_python.execution.log + └── hello_shell.execution.log + + 4 directories, 4 files + + The notebook simple_notebook_out.ipynb has the captured stdout of "Hello World!". + + You can run this pipeline as: + runnable execute -f examples/02-sequential/traversal.yaml + + start_at: hello stub + steps: + hello stub: + type: stub + next: hello python + hello python: + type: task + command_type: python + command: examples.common.functions.hello # dotted path to the function. + next: hello shell + hello shell: + type: task + command_type: shell + command: echo "Hello World!" # Command to run + next: hello notebook + hello notebook: + type: task + command_type: notebook + command: examples/common/simple_notebook.ipynb # The path is relative to the root of the project. + next: success + success: + type: success + fail: + type: fail + diff --git a/examples/README.md b/examples/README.md new file mode 100644 index 00000000..0215bd80 --- /dev/null +++ b/examples/README.md @@ -0,0 +1,38 @@ +Examples in this section are ordered from simple to advanced. +All examples have both python SDK and yaml representations. + +Please use this as an index to find specific example. + + +- common: Has python functions/notebooks/scripts that are used across the examples + +- 01-tasks: Examples of the tasks that can be part of the pipeline. + + - [stub.py](./01-tasks/stub.py), [stub.yaml](./01-tasks/stub.yaml): demonstrates the concept of a stub. + + - [python_tasks.py](./01-tasks/python_tasks.py), [python_tasks.yaml](./01-tasks/python_tasks.yaml): uses python functions as tasks. + The stdout/stderr of all the tasks are captured and stored in the catalog. + - [notebook.py](./01-tasks/notebook.py), [notebook.yaml](./01-tasks/notebook.yaml): uses notebooks as tasks + The executed notebook is captured in the catalog. + - [scripts.py](./01-tasks/scripts.py), [scripts.yaml](./01-tasks/scripts.yaml): uses shell scripts as tasks + The stdout/stderr of all scripts are captured and stored in the catalog. + + +The above examples showcase executable units of the pipeline. +The next section has examples on stitching these tasks together for complex operations. + +- 02-sequential: Examples of stitching tasks together including behavior in case of failures. + + - traversal: A pipeline which is a mixed bag of notebooks, python functions and + shell scripts. + - default_failure: The default failure behavior. + - on_failure_fail: On failure of a step, do some action and fail + - on_failure_success: On failure of a step, take a different route + + +The above examples show stitching complex operations of the pipeline. +The next section has examples on + +- 03: Examples of passing parameters between tasks + + diff --git a/examples/common/functions.py b/examples/common/functions.py new file mode 100644 index 00000000..3eb1aa7e --- /dev/null +++ b/examples/common/functions.py @@ -0,0 +1,8 @@ +def hello(): + "The most basic function" + print("Hello World!") + + +def raise_ex(): + "A function that raises an exception" + raise Exception("This is an exception") diff --git a/examples/common/simple_notebook.ipynb b/examples/common/simple_notebook.ipynb new file mode 100644 index 00000000..ced7632d --- /dev/null +++ b/examples/common/simple_notebook.ipynb @@ -0,0 +1,46 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "3e98e89e-765c-42d4-81ea-c371c2eab14d", + "metadata": {}, + "outputs": [], + "source": [ + "def function():\n", + " print(\"Hello World!\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8eac7a3f", + "metadata": {}, + "outputs": [], + "source": [ + "function()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/common/simple_notebook_out.ipynb b/examples/common/simple_notebook_out.ipynb new file mode 100644 index 00000000..064bc4e2 --- /dev/null +++ b/examples/common/simple_notebook_out.ipynb @@ -0,0 +1,82 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "2d2fa303", + "metadata": { + "ploomber": { + "timestamp_end": 1712760519.616379, + "timestamp_start": 1712760519.616218 + }, + "tags": [ + "injected-parameters" + ] + }, + "outputs": [], + "source": [ + "# Injected parameters\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "3e98e89e-765c-42d4-81ea-c371c2eab14d", + "metadata": { + "ploomber": { + "timestamp_end": 1712760519.616649, + "timestamp_start": 1712760519.616443 + } + }, + "outputs": [], + "source": [ + "def function():\n", + " print(\"Hello World!\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "8eac7a3f", + "metadata": { + "ploomber": { + "timestamp_end": 1712760519.616772, + "timestamp_start": 1712760519.616663 + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Hello World!\n" + ] + } + ], + "source": [ + "function()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/concepts/notebook_native_parameters_consume_out.ipynb b/examples/concepts/notebook_native_parameters_consume_out.ipynb index 16c8e6ad..ec796d3f 100644 --- a/examples/concepts/notebook_native_parameters_consume_out.ipynb +++ b/examples/concepts/notebook_native_parameters_consume_out.ipynb @@ -6,8 +6,8 @@ "id": "3e98e89e-765c-42d4-81ea-c371c2eab14d", "metadata": { "ploomber": { - "timestamp_end": 1712673802.476133, - "timestamp_start": 1712673802.474048 + "timestamp_end": 1712723378.894586, + "timestamp_start": 1712723378.892554 } }, "outputs": [], @@ -30,8 +30,8 @@ "id": "e7f0aab2", "metadata": { "ploomber": { - "timestamp_end": 1712673802.476318, - "timestamp_start": 1712673802.476155 + "timestamp_end": 1712723378.894782, + "timestamp_start": 1712723378.894609 }, "tags": [ "parameters" @@ -48,11 +48,11 @@ { "cell_type": "code", "execution_count": 3, - "id": "143149bb", + "id": "59862ac0", "metadata": { "ploomber": { - "timestamp_end": 1712673802.476461, - "timestamp_start": 1712673802.476332 + "timestamp_end": 1712723378.894933, + "timestamp_start": 1712723378.894796 }, "tags": [ "injected-parameters" @@ -71,8 +71,8 @@ "id": "0e04f11a", "metadata": { "ploomber": { - "timestamp_end": 1712673802.476606, - "timestamp_start": 1712673802.476473 + "timestamp_end": 1712723378.895087, + "timestamp_start": 1712723378.894946 } }, "outputs": [], @@ -86,8 +86,8 @@ "id": "9f1cbac6-cada-42b0-8fb1-ddb25a88836c", "metadata": { "ploomber": { - "timestamp_end": 1712673802.476742, - "timestamp_start": 1712673802.476618 + "timestamp_end": 1712723378.895227, + "timestamp_start": 1712723378.895099 } }, "outputs": [], diff --git a/examples/concepts/notebook_native_parameters_out.ipynb b/examples/concepts/notebook_native_parameters_out.ipynb index 0550af9c..4546caaf 100644 --- a/examples/concepts/notebook_native_parameters_out.ipynb +++ b/examples/concepts/notebook_native_parameters_out.ipynb @@ -6,8 +6,8 @@ "id": "3e98e89e-765c-42d4-81ea-c371c2eab14d", "metadata": { "ploomber": { - "timestamp_end": 1712673802.301212, - "timestamp_start": 1712673802.299645 + "timestamp_end": 1712723378.486399, + "timestamp_start": 1712723378.482792 } }, "outputs": [], @@ -36,8 +36,8 @@ "id": "e7f0aab2", "metadata": { "ploomber": { - "timestamp_end": 1712673802.30143, - "timestamp_start": 1712673802.301298 + "timestamp_end": 1712723378.486896, + "timestamp_start": 1712723378.486585 }, "tags": [ "parameters" @@ -53,11 +53,11 @@ { "cell_type": "code", "execution_count": 3, - "id": "2aa79db0", + "id": "bb75b0e8", "metadata": { "ploomber": { - "timestamp_end": 1712673802.30158, - "timestamp_start": 1712673802.301444 + "timestamp_end": 1712723378.487227, + "timestamp_start": 1712723378.486928 }, "tags": [ "injected-parameters" @@ -76,8 +76,8 @@ "id": "0e04f11a", "metadata": { "ploomber": { - "timestamp_end": 1712673802.301748, - "timestamp_start": 1712673802.301593 + "timestamp_end": 1712723378.487586, + "timestamp_start": 1712723378.487256 } }, "outputs": [], @@ -91,8 +91,8 @@ "id": "9f1cbac6-cada-42b0-8fb1-ddb25a88836c", "metadata": { "ploomber": { - "timestamp_end": 1712673802.301934, - "timestamp_start": 1712673802.301761 + "timestamp_end": 1712723378.488026, + "timestamp_start": 1712723378.487615 } }, "outputs": [], diff --git a/examples/concepts/simple_notebook.ipynb b/examples/concepts/simple_notebook.ipynb index 1f2547b7..167f26de 100644 --- a/examples/concepts/simple_notebook.ipynb +++ b/examples/concepts/simple_notebook.ipynb @@ -7,8 +7,8 @@ "metadata": {}, "outputs": [], "source": [ - "def add(x, y):\n", - " return x + y" + "def function():\n", + " print(\"hello world\")" ] }, { @@ -18,40 +18,7 @@ "metadata": {}, "outputs": [], "source": [ - "def multiply(x, y):\n", - " return x * y\n", - "\n", - "from pydantic import BaseModel\n", - "\n", - "class EggsModel(BaseModel):\n", - " ham: str\n", - "\n", - "\n", - "class ObjectType:\n", - " def __init__(self):\n", - " self.salute = \"hello\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9dcadc93-aa77-4a0a-9465-2e33eef4da44", - "metadata": {}, - "outputs": [], - "source": [ - "a = add(40, 2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7b872cdf-820b-47b5-8f22-15c4b69c8637", - "metadata": {}, - "outputs": [], - "source": [ - "b = multiply(2, 100)\n", - "\n", - "c = EggsModel(ham=\"hello\")" + "function()" ] } ], diff --git a/examples/concepts/simple_notebook_out.ipynb b/examples/concepts/simple_notebook_out.ipynb index 2abe5bb4..410609b9 100644 --- a/examples/concepts/simple_notebook_out.ipynb +++ b/examples/concepts/simple_notebook_out.ipynb @@ -3,11 +3,11 @@ { "cell_type": "code", "execution_count": 1, - "id": "eda8faba", + "id": "0ee2a616", "metadata": { "ploomber": { - "timestamp_end": 1712673803.931629, - "timestamp_start": 1712673803.931437 + "timestamp_end": 1712723380.314318, + "timestamp_start": 1712723380.314131 }, "tags": [ "injected-parameters" @@ -24,8 +24,8 @@ "id": "3e98e89e-765c-42d4-81ea-c371c2eab14d", "metadata": { "ploomber": { - "timestamp_end": 1712673803.931918, - "timestamp_start": 1712673803.931656 + "timestamp_end": 1712723380.314528, + "timestamp_start": 1712723380.314339 } }, "outputs": [], @@ -40,8 +40,8 @@ "id": "9f1cbac6-cada-42b0-8fb1-ddb25a88836c", "metadata": { "ploomber": { - "timestamp_end": 1712673803.932905, - "timestamp_start": 1712673803.931932 + "timestamp_end": 1712723380.31542, + "timestamp_start": 1712723380.314542 } }, "outputs": [], @@ -66,8 +66,8 @@ "id": "9dcadc93-aa77-4a0a-9465-2e33eef4da44", "metadata": { "ploomber": { - "timestamp_end": 1712673803.933058, - "timestamp_start": 1712673803.932921 + "timestamp_end": 1712723380.315571, + "timestamp_start": 1712723380.315436 } }, "outputs": [], @@ -81,8 +81,8 @@ "id": "7b872cdf-820b-47b5-8f22-15c4b69c8637", "metadata": { "ploomber": { - "timestamp_end": 1712673803.933233, - "timestamp_start": 1712673803.933071 + "timestamp_end": 1712723380.315741, + "timestamp_start": 1712723380.315585 } }, "outputs": [], diff --git a/examples/on-failure.yaml b/examples/on-failure.yaml deleted file mode 100644 index 02ae42d8..00000000 --- a/examples/on-failure.yaml +++ /dev/null @@ -1,31 +0,0 @@ -dag: - description: | - This is a simple pipeline to demonstrate failure in a step. - - The default behavior is to traverse to step type fail and mark the run as failed. - But you can control it by providing on_failure. - - In this example: step 1 fails and moves to step 3 skipping step 2. The pipeline status - is considered to be success. - - step 1 (FAIL) >> step 3 >> success - - You can run this pipeline by runnable execute -f examples/on-failure.yaml - start_at: step 1 - steps: - step 1: - type: task - command_type: shell - command: exit 1 # This will fail! - next: step 2 - on_failure: step 3 - step 2: - type: stub # This step will never reach - next: step 3 - step 3: - type: stub - next: success - success: - type: success - fail: - type: fail diff --git a/examples/on_failure.py b/examples/on_failure.py deleted file mode 100644 index b8464e04..00000000 --- a/examples/on_failure.py +++ /dev/null @@ -1,38 +0,0 @@ -""" -This is a simple pipeline to demonstrate failure in a step. - - The default behavior is to traverse to step type fail and mark the run as failed. - But you can control it by providing on_failure. - - In this example: step 1 fails and moves to step 3 skipping step 2. The pipeline status - is considered to be success. - - step 1 (FAIL) >> step 3 >> success - - You can run this example by: - python examples/on_failure.py -""" - -from runnable import Pipeline, ShellTask, Stub - - -def main(): - step_1 = ShellTask(name="step 1", command="exit 1") # This will fail - - step_2 = Stub(name="step 2") - - step_3 = Stub(name="step 3", terminate_with_success=True) - - step_1.on_failure = step_3.name - - pipeline = Pipeline( - steps=[step_1, step_2, step_3], - add_terminal_nodes=True, - ) - pipeline.execute() - - return pipeline - - -if __name__ == "__main__": - main() diff --git a/examples/tutorials/mnist/modular_source.py b/examples/tutorials/mnist/modular_source.py index 67ad6ea5..8411de67 100644 --- a/examples/tutorials/mnist/modular_source.py +++ b/examples/tutorials/mnist/modular_source.py @@ -1,3 +1,4 @@ +import time from typing import List import numpy as np diff --git a/runnable/entrypoints.py b/runnable/entrypoints.py index 2aa49c7c..bdddeb7e 100644 --- a/runnable/entrypoints.py +++ b/runnable/entrypoints.py @@ -172,6 +172,7 @@ def execute( ) console.print("Working with context:") console.print(run_context) + console.rule(style="[dark orange]") executor = run_context.executor @@ -243,6 +244,7 @@ def execute_single_node( ) console.print("Working with context:") console.print(run_context) + console.rule(style="[dark orange]") executor = run_context.executor run_context.execution_plan = defaults.EXECUTION_PLAN.CHAINED.value @@ -296,6 +298,7 @@ def execute_notebook( console.print("Working with context:") console.print(run_context) + console.rule(style="[dark orange]") step_config = { "command": notebook_file, @@ -358,6 +361,7 @@ def execute_function( console.print("Working with context:") console.print(run_context) + console.rule(style="[dark orange]") # Prepare the graph with a single node step_config = { @@ -427,6 +431,7 @@ def fan( ) console.print("Working with context:") console.print(run_context) + console.rule(style="[dark orange]") executor = run_context.executor run_context.execution_plan = defaults.EXECUTION_PLAN.CHAINED.value diff --git a/runnable/extensions/executor/__init__.py b/runnable/extensions/executor/__init__.py index 9ab9011e..791c483f 100644 --- a/runnable/extensions/executor/__init__.py +++ b/runnable/extensions/executor/__init__.py @@ -476,6 +476,8 @@ def execute_graph(self, dag: Graph, map_variable: TypeMapVariable = None, **kwar logger.exception(e) raise + console.rule(style="[dark orange]") + if working_on.node_type in ["success", "fail"]: break diff --git a/runnable/sdk.py b/runnable/sdk.py index 3a707a0e..9564c6b4 100644 --- a/runnable/sdk.py +++ b/runnable/sdk.py @@ -11,12 +11,19 @@ ConfigDict, Field, PrivateAttr, + ValidationInfo, computed_field, field_validator, model_validator, ) from rich import print -from rich.progress import BarColumn, Progress, TextColumn, TimeElapsedColumn +from rich.progress import ( + BarColumn, + Progress, + SpinnerColumn, + TextColumn, + TimeElapsedColumn, +) from rich.table import Column from typing_extensions import Self @@ -71,7 +78,7 @@ class Catalog(BaseModel): class BaseTraversal(ABC, BaseModel): name: str - next_node: str = Field(default="", alias="next") + next_node: str = Field(default="", serialization_alias="next_node") terminate_with_success: bool = Field(default=False, exclude=True) terminate_with_failure: bool = Field(default=False, exclude=True) on_failure: str = Field(default="", alias="on_failure") @@ -83,6 +90,12 @@ class BaseTraversal(ABC, BaseModel): def internal_name(self) -> str: return self.name + def __hash__(self): + """ + Needed to Uniqueize DataCatalog objects. + """ + return hash(self.name) + def __rshift__(self, other: StepType) -> StepType: if self.next_node: raise Exception(f"The node {self} already has a next node: {self.next_node}") @@ -125,8 +138,7 @@ def validate_terminations(self) -> Self: return self @abstractmethod - def create_node(self) -> TraversalNode: - ... + def create_node(self) -> TraversalNode: ... class BaseTask(BaseTraversal): @@ -201,7 +213,7 @@ def create_node(self) -> TaskNode: if not (self.terminate_with_failure or self.terminate_with_success): raise AssertionError("A node not being terminated must have a user defined next node") - return TaskNode.parse_from_config(self.model_dump(exclude_none=True)) + return TaskNode.parse_from_config(self.model_dump(exclude_none=True, by_alias=True)) class PythonTask(BaseTask): @@ -297,9 +309,9 @@ class NotebookTask(BaseTask): """ - notebook: str = Field(alias="command") + notebook: str = Field(serialization_alias="command") - notebook_output_path: Optional[str] = Field(default=None, alias="notebook_output_path") + notebook_output_path: Optional[str] = Field(default=None, alias="notebook_output_path", validate_default=True) optional_ploomber_args: Optional[Dict[str, Any]] = Field(default=None, alias="optional_ploomber_args") @computed_field @@ -526,7 +538,7 @@ class Pipeline(BaseModel): _dag: graph.Graph = PrivateAttr() model_config = ConfigDict(extra="forbid") - def _validate_path(self, path: List[StepType]) -> None: + def _validate_path(self, path: List[StepType], failure_path: bool = False) -> None: # Check if one and only one step terminates with success # Check no more than one step terminates with failure @@ -544,7 +556,7 @@ def _validate_path(self, path: List[StepType]) -> None: raise Exception("A pipeline cannot have more than one step that terminates with failure") reached_failure = True - if not reached_success: + if not reached_success and not reached_failure: raise Exception("A pipeline must have at least one step that terminates with success") def _construct_path(self, path: List[StepType]) -> None: @@ -594,11 +606,21 @@ def model_post_init(self, __context: Any) -> None: # Check all paths are valid and construct the path paths = [success_path] + on_failure_paths + failure_path = False for path in paths: - self._validate_path(path) + self._validate_path(path, failure_path) self._construct_path(path) - all_steps: List[StepType] = [step for step in success_path + on_failure_paths] # type: ignore + failure_path = True + + all_steps: List[StepType] = [] + + for path in paths: + for step in path: + all_steps.append(step) + + seen = set() + unique = [x for x in all_steps if not (x in seen or seen.add(x))] self._dag = graph.Graph( start_at=all_steps[0].name, @@ -606,7 +628,7 @@ def model_post_init(self, __context: Any) -> None: internal_branch_name=self.internal_branch_name, ) - for step in all_steps: + for step in unique: self._dag.add_node(step.create_node()) if self.add_terminal_nodes: @@ -675,8 +697,9 @@ def execute( run_context.dag = graph.create_graph(dag_definition) - print("Working with context:") - print(run_context) + console.print("Working with context:") + console.print(run_context) + console.rule(style="[dark orange]") if not run_context.executor._local: # We are not working with non local executor @@ -693,6 +716,7 @@ def execute( run_context.executor.prepare_for_graph_execution() with Progress( + SpinnerColumn(spinner_name="runner"), TextColumn("[progress.description]{task.description}", table_column=Column(ratio=2)), BarColumn(table_column=Column(ratio=1), style="dark_orange"), TimeElapsedColumn(table_column=Column(ratio=1)), diff --git a/runnable/tasks.py b/runnable/tasks.py index 1c46c8e2..1774b63d 100644 --- a/runnable/tasks.py +++ b/runnable/tasks.py @@ -9,9 +9,10 @@ from datetime import datetime from pickle import PicklingError from string import Template -from typing import Any, Dict, List, Literal, Tuple +from typing import Any, Dict, List, Literal, Optional, Tuple from pydantic import BaseModel, ConfigDict, Field, ValidationInfo, field_validator +from rich.console import Console from stevedore import driver import runnable.context as context @@ -32,6 +33,9 @@ # TODO: Can we add memory peak, cpu usage, etc. to the metrics? +console = Console(file=io.StringIO()) + + class TaskReturns(BaseModel): name: str kind: Literal["json", "object", "metric"] = Field(default="json") @@ -135,17 +139,21 @@ def execution_context(self, map_variable: TypeMapVariable = None, allow_complex: if not allow_complex: params = {key: value for key, value in params.items() if isinstance(value, JsonParameter)} - log_file_name = self.node_name.replace(" ", "_") + ".execution.log" + log_file_name = self.node_name # + ".execution.log" if map_variable: for _, value in map_variable.items(): log_file_name += "_" + str(value) + log_file_name = "".join(x for x in log_file_name if x.isalnum()) + ".execution.log" + log_file = open(log_file_name, "w") f = io.StringIO() try: with contextlib.redirect_stdout(f): + # with contextlib.nullcontext(): yield params + print(console.file.getvalue()) # type: ignore except Exception as e: # pylint: disable=broad-except logger.exception(e) finally: @@ -156,10 +164,11 @@ def execution_context(self, map_variable: TypeMapVariable = None, allow_complex: log_file.close() # Put the log file in the catalog - # self._context.catalog_handler.put(name=log_file.name, run_id=context.run_context.run_id) + self._context.catalog_handler.put(name=log_file.name, run_id=context.run_context.run_id) os.remove(log_file.name) # Update parameters + # This should only update the parameters that are changed at the root level. self._context.run_log_store.set_parameters(parameters=params, run_id=self._context.run_id) @@ -219,8 +228,7 @@ def execute_command( logger.info(f"Calling {func} from {module} with {filtered_parameters}") user_set_parameters = f(**filtered_parameters) # This is a tuple or single value except Exception as e: - logger.exception(e) - console.print(e, style=defaults.error_style) + console.log(e, style=defaults.error_style, markup=False) raise exceptions.CommandCallError(f"Function call: {self.command} did not succeed.\n") from e attempt_log.input_parameters = params.copy() @@ -263,9 +271,9 @@ def execute_command( attempt_log.status = defaults.SUCCESS except Exception as _e: msg = f"Call to the function {self.command} did not succeed.\n" - logger.exception(_e) attempt_log.message = msg - console.print(_e, style=defaults.error_style) + console.print_exception(show_locals=False) + console.log(_e, style=defaults.error_style) attempt_log.end_time = str(datetime.now()) @@ -277,7 +285,7 @@ class NotebookTaskType(BaseTaskType): task_type: str = Field(default="notebook", serialization_alias="command_type") command: str - notebook_output_path: str = Field(default="", validate_default=True) + notebook_output_path: Optional[str] = Field(default=None, validate_default=True) optional_ploomber_args: dict = {} @field_validator("command") From 4d7e29d53a1f70cb829f2cf2a51d1727fb50ba1b Mon Sep 17 00:00:00 2001 From: Vijay Vammi Date: Fri, 12 Apr 2024 04:22:51 +0100 Subject: [PATCH 02/17] fix: minor bugs with on faliure behaviours --- examples/01-tasks/notebook.py | 5 ++--- examples/01-tasks/notebook.yaml | 11 +++++------ examples/01-tasks/python_tasks.py | 1 - examples/01-tasks/python_tasks.yaml | 6 +++--- examples/01-tasks/scripts.yaml | 4 ++-- examples/01-tasks/stub.yaml | 2 +- examples/02-sequential/default_fail.py | 2 +- examples/02-sequential/on_failure_succeed.py | 2 +- examples/02-sequential/on_failure_succeed.yaml | 4 ++-- examples/02-sequential/traversal.py | 5 ++--- examples/02-sequential/traversal.yaml | 11 +++++------ examples/README.md | 8 +++----- runnable/sdk.py | 5 ++--- 13 files changed, 29 insertions(+), 37 deletions(-) diff --git a/examples/01-tasks/notebook.py b/examples/01-tasks/notebook.py index db86bb66..68447032 100644 --- a/examples/01-tasks/notebook.py +++ b/examples/01-tasks/notebook.py @@ -3,7 +3,7 @@ python examples/01-tasks/notebook.py -The notebook is executed in the same environment so any installed packages are available for the +The notebook is executed in the same environment so any installed packages are available for the notebook. Upon successful execution, the output notebook with cell outputs is stored in the catalog. @@ -15,7 +15,7 @@ │   └── common │   └── simple_notebook_out.ipynb └── notebook.execution.log - + The notebook simple_notebook_out.ipynb has the captured stdout of "Hello World!". """ @@ -23,7 +23,6 @@ def main(): - # Execute the notebook present in examples/common/simple_notebook.ipynb. # The path is relative to the project root. # If this step executes successfully, the pipeline will terminate with success diff --git a/examples/01-tasks/notebook.yaml b/examples/01-tasks/notebook.yaml index bf6aeee3..db79591a 100644 --- a/examples/01-tasks/notebook.yaml +++ b/examples/01-tasks/notebook.yaml @@ -2,12 +2,12 @@ dag: description: | This is a sample pipeline with one step that executes a notebook. - The notebook is executed in the same environment so any installed + The notebook is executed in the same environment so any installed packages are available for the notebook. - Upon successful execution, the output notebook with cell outputs + Upon successful execution, the output notebook with cell outputs is stored in the catalog. - + For example, the catalog structure for this execution would be: .catalog @@ -16,9 +16,9 @@ dag: │   └── common │   └── simple_notebook_out.ipynb └── notebook.execution.log - + The notebook simple_notebook_out.ipynb has the captured stdout of "Hello World!". - + You can run this pipeline as: runnable execute -f examples/01-tasks/notebook.yaml @@ -33,4 +33,3 @@ dag: type: success fail: type: fail - diff --git a/examples/01-tasks/python_tasks.py b/examples/01-tasks/python_tasks.py index 7ad97d58..5a493c5a 100644 --- a/examples/01-tasks/python_tasks.py +++ b/examples/01-tasks/python_tasks.py @@ -21,7 +21,6 @@ def main(): - # Create a tasks which calls the function "hello" # If this step executes successfully, the pipeline will terminate with success hello_task = PythonTask( diff --git a/examples/01-tasks/python_tasks.yaml b/examples/01-tasks/python_tasks.yaml index 535584f7..1a86719c 100644 --- a/examples/01-tasks/python_tasks.yaml +++ b/examples/01-tasks/python_tasks.yaml @@ -3,9 +3,9 @@ dag: You can run this pipeline by: runnable execute -f examples/01-tasks/python_tasks.yaml - The stdout of "Hello World!" would be captured as + The stdout of "Hello World!" would be captured as execution log and stored in the catalog. - + An example of the catalog structure: .catalog @@ -13,7 +13,7 @@ dag: └── hello.execution.log 2 directories, 1 file - + The hello.execution.log has the captured stdout of "Hello World!". start_at: hello_task steps: diff --git a/examples/01-tasks/scripts.yaml b/examples/01-tasks/scripts.yaml index e4c60b55..ee1687e3 100644 --- a/examples/01-tasks/scripts.yaml +++ b/examples/01-tasks/scripts.yaml @@ -6,7 +6,7 @@ dag: runnable execute -f examples/01-tasks/scripts.yaml The command can be anything that can be executed in a shell. - The stdout/stderr of the execution is captured as execution log and + The stdout/stderr of the execution is captured as execution log and stored in the catalog. For example: @@ -24,4 +24,4 @@ dag: success: type: success fail: - type: fail \ No newline at end of file + type: fail diff --git a/examples/01-tasks/stub.yaml b/examples/01-tasks/stub.yaml index 7a072d78..788828fe 100644 --- a/examples/01-tasks/stub.yaml +++ b/examples/01-tasks/stub.yaml @@ -18,7 +18,7 @@ dag: type: stub # This will always succeed next: step 2 step 2: - type: stub + type: stub what: is this thing? # It takes arbitrary keys It: does not matter!! next: step 3 diff --git a/examples/02-sequential/default_fail.py b/examples/02-sequential/default_fail.py index 8f473893..92a4e578 100644 --- a/examples/02-sequential/default_fail.py +++ b/examples/02-sequential/default_fail.py @@ -7,7 +7,7 @@ step 1 >> step 2 >> fail - + You can run this example by: python examples/02-sequential/default_fail.py """ diff --git a/examples/02-sequential/on_failure_succeed.py b/examples/02-sequential/on_failure_succeed.py index 05dd231a..6015bd01 100644 --- a/examples/02-sequential/on_failure_succeed.py +++ b/examples/02-sequential/on_failure_succeed.py @@ -9,7 +9,7 @@ and then eventually succeed too. step_1 -> step_4 -> success -This pattern is handy when you are expecting a failure of a step +This pattern is handy when you are expecting a failure of a step and have ways to handle it. Run this pipeline: python examples/02-sequential/on_failure_succeed.py diff --git a/examples/02-sequential/on_failure_succeed.yaml b/examples/02-sequential/on_failure_succeed.yaml index 557f51d8..3977e175 100644 --- a/examples/02-sequential/on_failure_succeed.yaml +++ b/examples/02-sequential/on_failure_succeed.yaml @@ -10,10 +10,10 @@ dag: and then eventually fail. step_1 -> step_4 -> success - This pattern is handy when you are expecting a failure of a step + This pattern is handy when you are expecting a failure of a step and have ways to handle it. - Run this pipeline as: + Run this pipeline as: runnable execute -f examples/02-sequential/on_failure_succeed.yaml start_at: step_1 steps: diff --git a/examples/02-sequential/traversal.py b/examples/02-sequential/traversal.py index b2021f13..bcb35254 100644 --- a/examples/02-sequential/traversal.py +++ b/examples/02-sequential/traversal.py @@ -3,10 +3,10 @@ python examples/02-sequential/traversal.py - A pipeline can have any "tasks" as part of it. In the + A pipeline can have any "tasks" as part of it. In the below example, we have a mix of stub, python, shell and notebook tasks. - As with simpler tasks, the stdout and stderr of each task are captured + As with simpler tasks, the stdout and stderr of each task are captured and stored in the catalog. .catalog @@ -27,7 +27,6 @@ def main(): - stub_task = Stub(name="hello stub") python_task = PythonTask( diff --git a/examples/02-sequential/traversal.yaml b/examples/02-sequential/traversal.yaml index 7363c7d0..88ed3632 100644 --- a/examples/02-sequential/traversal.yaml +++ b/examples/02-sequential/traversal.yaml @@ -1,11 +1,11 @@ dag: description: | - A pipeline can have any "tasks" as part of it. In the + A pipeline can have any "tasks" as part of it. In the below example, we have a mix of stub, python, shell and notebook tasks. - As with simpler tasks, the stdout and stderr of each task are captured + As with simpler tasks, the stdout and stderr of each task are captured and stored in the catalog. - + For example, the catalog structure for this execution would be: .catalog @@ -18,9 +18,9 @@ dag: └── hello_shell.execution.log 4 directories, 4 files - + The notebook simple_notebook_out.ipynb has the captured stdout of "Hello World!". - + You can run this pipeline as: runnable execute -f examples/02-sequential/traversal.yaml @@ -48,4 +48,3 @@ dag: type: success fail: type: fail - diff --git a/examples/README.md b/examples/README.md index 0215bd80..2bcea172 100644 --- a/examples/README.md +++ b/examples/README.md @@ -9,7 +9,7 @@ Please use this as an index to find specific example. - 01-tasks: Examples of the tasks that can be part of the pipeline. - [stub.py](./01-tasks/stub.py), [stub.yaml](./01-tasks/stub.yaml): demonstrates the concept of a stub. - + - [python_tasks.py](./01-tasks/python_tasks.py), [python_tasks.yaml](./01-tasks/python_tasks.yaml): uses python functions as tasks. The stdout/stderr of all the tasks are captured and stored in the catalog. - [notebook.py](./01-tasks/notebook.py), [notebook.yaml](./01-tasks/notebook.yaml): uses notebooks as tasks @@ -23,7 +23,7 @@ The next section has examples on stitching these tasks together for complex oper - 02-sequential: Examples of stitching tasks together including behavior in case of failures. - - traversal: A pipeline which is a mixed bag of notebooks, python functions and + - traversal: A pipeline which is a mixed bag of notebooks, python functions and shell scripts. - default_failure: The default failure behavior. - on_failure_fail: On failure of a step, do some action and fail @@ -31,8 +31,6 @@ The next section has examples on stitching these tasks together for complex oper The above examples show stitching complex operations of the pipeline. -The next section has examples on +The next section has examples on - 03: Examples of passing parameters between tasks - - diff --git a/runnable/sdk.py b/runnable/sdk.py index 9564c6b4..0993c017 100644 --- a/runnable/sdk.py +++ b/runnable/sdk.py @@ -11,12 +11,10 @@ ConfigDict, Field, PrivateAttr, - ValidationInfo, computed_field, field_validator, model_validator, ) -from rich import print from rich.progress import ( BarColumn, Progress, @@ -138,7 +136,8 @@ def validate_terminations(self) -> Self: return self @abstractmethod - def create_node(self) -> TraversalNode: ... + def create_node(self) -> TraversalNode: + ... class BaseTask(BaseTraversal): From fe5feb4cb662b9583873dc67154a85f76a646339 Mon Sep 17 00:00:00 2001 From: Vijay Vammi Date: Wed, 17 Apr 2024 20:10:36 +0100 Subject: [PATCH 03/17] fix: adding secrets to sdk --- .../passing_parameters_notebook.py | 40 +++ .../passing_parameters_notebook.yaml | 0 .../passing_parameters_python.py | 52 ++++ .../passing_parameters_python.yaml | 0 .../03-parameters/passing_parameters_shell.py | 41 +++ .../passing_parameters_shell.yaml | 0 .../static_parameters_non_python.py | 55 ++++ .../static_parameters_non_python.yaml | 41 +++ .../03-parameters/static_parameters_python.py | 49 ++++ .../static_parameters_python.yaml | 30 +++ examples/README.md | 28 +- examples/common/functions.py | 81 ++++++ examples/common/initial_parameters.yaml | 6 + examples/common/read_parameters.ipynb | 69 +++++ examples/common/read_parameters_out.ipynb | 105 ++++++++ examples/common/simple_notebook_out.ipynb | 14 +- examples/common/write_parameters.ipynb | 68 +++++ examples/common/write_parameters_out.ipynb | 100 ++++++++ examples/parameters_initial.yaml | 4 - runnable/__init__.py | 4 + runnable/sdk.py | 3 +- runnable/tasks.py | 114 ++++++--- tests/runnable/test_sdk.py | 4 +- tests/scenarios/test_traversals.py | 214 ---------------- tests/test_examples.py | 239 +++++++++++------- 25 files changed, 995 insertions(+), 366 deletions(-) create mode 100644 examples/03-parameters/passing_parameters_notebook.py create mode 100644 examples/03-parameters/passing_parameters_notebook.yaml create mode 100644 examples/03-parameters/passing_parameters_python.py create mode 100644 examples/03-parameters/passing_parameters_python.yaml create mode 100644 examples/03-parameters/passing_parameters_shell.py create mode 100644 examples/03-parameters/passing_parameters_shell.yaml create mode 100644 examples/03-parameters/static_parameters_non_python.py create mode 100644 examples/03-parameters/static_parameters_non_python.yaml create mode 100644 examples/03-parameters/static_parameters_python.py create mode 100644 examples/03-parameters/static_parameters_python.yaml create mode 100644 examples/common/initial_parameters.yaml create mode 100644 examples/common/read_parameters.ipynb create mode 100644 examples/common/read_parameters_out.ipynb create mode 100644 examples/common/write_parameters.ipynb create mode 100644 examples/common/write_parameters_out.ipynb delete mode 100644 examples/parameters_initial.yaml delete mode 100644 tests/scenarios/test_traversals.py diff --git a/examples/03-parameters/passing_parameters_notebook.py b/examples/03-parameters/passing_parameters_notebook.py new file mode 100644 index 00000000..ecb00783 --- /dev/null +++ b/examples/03-parameters/passing_parameters_notebook.py @@ -0,0 +1,40 @@ +from examples.common.functions import read_parameter +from runnable import NotebookTask, Pipeline, PythonTask, metric, pickled + + +def main(): + write_parameters_from_notebook = NotebookTask( + notebook="examples/common/write_parameters.ipynb", + returns=[ + pickled("df"), + "integer", + "floater", + "stringer", + "pydantic_param", + metric("score"), + ], + name="set_parameter", + ) + + read_parameters = PythonTask( + function=read_parameter, + name="get_parameters", + ) + + read_parameters_in_notebook = NotebookTask( + notebook="examples/common/read_parameters.ipynb", + terminate_with_success=True, + name="read_parameters_in_notebook", + ) + + pipeline = Pipeline( + steps=[write_parameters_from_notebook, read_parameters, read_parameters_in_notebook], + ) + + _ = pipeline.execute() + + return pipeline + + +if __name__ == "__main__": + main() diff --git a/examples/03-parameters/passing_parameters_notebook.yaml b/examples/03-parameters/passing_parameters_notebook.yaml new file mode 100644 index 00000000..e69de29b diff --git a/examples/03-parameters/passing_parameters_python.py b/examples/03-parameters/passing_parameters_python.py new file mode 100644 index 00000000..baf9c1e5 --- /dev/null +++ b/examples/03-parameters/passing_parameters_python.py @@ -0,0 +1,52 @@ +""" +The below example shows how to set/get parameters in python +tasks of the pipeline. + +The function, set_parameter, returns + - simple python data types (int, float, str) + - pydantic models + - pandas dataframe, any "object" type + +pydantic models are implicitly handled by runnable +but "object" types should be marked as "pickled". + +Use pickled even for python data types is advised for +reasonably large collections. + +""" + +from examples.common.functions import read_parameter, write_parameter +from runnable import Pipeline, PythonTask, metric, pickled + + +def main(): + write_parameters = PythonTask( + function=write_parameter, + returns=[ + pickled("df"), + "integer", + "floater", + "stringer", + "pydantic_param", + metric("score"), + ], + name="set_parameter", + ) + + read_parameters = PythonTask( + function=read_parameter, + terminate_with_success=True, + name="get_parameters", + ) + + pipeline = Pipeline( + steps=[write_parameters, read_parameters], + ) + + _ = pipeline.execute() + + return pipeline + + +if __name__ == "__main__": + main() diff --git a/examples/03-parameters/passing_parameters_python.yaml b/examples/03-parameters/passing_parameters_python.yaml new file mode 100644 index 00000000..e69de29b diff --git a/examples/03-parameters/passing_parameters_shell.py b/examples/03-parameters/passing_parameters_shell.py new file mode 100644 index 00000000..3ef14bc2 --- /dev/null +++ b/examples/03-parameters/passing_parameters_shell.py @@ -0,0 +1,41 @@ +from examples.common.functions import read_unpickled_parameter +from runnable import Pipeline, PythonTask, ShellTask, metric + + +def main(): + export_env_command = """ + export integer=1 + export floater=3.14 + export stringer="hello" + export pydantic_param='{"x": 10, "foo": "bar"}' + export score=0.9 + """ + write_parameters_in_shell = ShellTask( + command=export_env_command, + returns=[ + "integer", + "floater", + "stringer", + "pydantic_param", + metric("score"), + ], + name="write_parameter", + ) + + read_parameters = PythonTask( + function=read_unpickled_parameter, + name="read_parameters", + terminate_with_success=True, + ) + + pipeline = Pipeline( + steps=[write_parameters_in_shell, read_parameters], + ) + + _ = pipeline.execute() + + return pipeline + + +if __name__ == "__main__": + main() diff --git a/examples/03-parameters/passing_parameters_shell.yaml b/examples/03-parameters/passing_parameters_shell.yaml new file mode 100644 index 00000000..e69de29b diff --git a/examples/03-parameters/static_parameters_non_python.py b/examples/03-parameters/static_parameters_non_python.py new file mode 100644 index 00000000..41eae659 --- /dev/null +++ b/examples/03-parameters/static_parameters_non_python.py @@ -0,0 +1,55 @@ +""" +The below example showcases setting up known initial parameters for a pipeline +of notebook and shell based commands. + +The initial parameters as defined in the yaml file are: + integer: 1 + floater : 3.14 + stringer : hello + pydantic_param: + x: 10 + foo: bar + +runnable exposes the nested parameters as dictionary for notebook based tasks +as a json string for the shell based tasks. + +""" + +from runnable import NotebookTask, Pipeline, ShellTask + + +def main(): + read_params_in_notebook = NotebookTask( + name="read_params_in_notebook", + notebook="examples/common/read_parameters.ipynb", + ) + + shell_command = """ + if [ "$integer" = 1 ] \ + && [ "$floater" = 3.14 ] \ + && [ "$stringer" = "hello" ] \ + && [ "$pydantic_param" = '{"x": 10, "foo": "bar"}' ]; then + echo "yaay" + exit 0; + else + echo "naay" + exit 1; + fi + """ + read_params_in_shell = ShellTask( + name="read_params_in_shell", + command=shell_command, + terminate_with_success=True, + ) + + pipeline = Pipeline( + steps=[read_params_in_notebook, read_params_in_shell], + ) + + _ = pipeline.execute(parameters_file="examples/common/initial_parameters.yaml") + + return pipeline + + +if __name__ == "__main__": + main() diff --git a/examples/03-parameters/static_parameters_non_python.yaml b/examples/03-parameters/static_parameters_non_python.yaml new file mode 100644 index 00000000..ba581fa8 --- /dev/null +++ b/examples/03-parameters/static_parameters_non_python.yaml @@ -0,0 +1,41 @@ +dag: + description: | + The below example showcases setting up known initial parameters for a pipeline + of notebook and shell based commands. + + The initial parameters as defined in the yaml file are: + integer: 1 + floater : 3.14 + stringer : hello + pydantic_param: + x: 10 + foo: bar + + runnable exposes the nested parameters as dictionary for notebook based tasks + as a json string for the shell based tasks. + start_at: read_params_in_notebook + steps: + read_params_in_notebook: + type: task + command_type: notebook + command: examples/common/read_parameters.ipynb + next: read_params_in_shell + read_params_in_shell: + type: task + command_type: shell + command: | + if [ "$integer" = 1 ] \ + && [ "$floater" = 3.14 ] \ + && [ "$stringer" = "hello" ] \ + && [ "$pydantic_param" = '{"x": 10, "foo": "bar"}' ]; then + echo "yaay" + exit 0; + else + echo "naay" + exit 1; + fi + next: success + success: + type: success + fail: + type: fail diff --git a/examples/03-parameters/static_parameters_python.py b/examples/03-parameters/static_parameters_python.py new file mode 100644 index 00000000..7c82ea3a --- /dev/null +++ b/examples/03-parameters/static_parameters_python.py @@ -0,0 +1,49 @@ +""" +The below example showcases setting up known initial parameters for a pipeline +of only python tasks + +The initial parameters as defined in the yaml file are: + simple: 1 + complex_param: + x: 10 + y: "hello world!!" + +runnable allows using pydantic models for deeply nested parameters and +casts appropriately based on annotation. eg: read_initial_params_as_pydantic + +If no annotation is provided, the parameter is assumed to be a dictionary. +eg: read_initial_params_as_json + +""" + +from examples.common.functions import ( + read_initial_params_as_json, + read_initial_params_as_pydantic, +) +from runnable import Pipeline, PythonTask + + +def main(): + read_params_as_pydantic = PythonTask( + function=read_initial_params_as_pydantic, + name="read_params_as_pydantic", + ) + + read_params_as_json = PythonTask( + function=read_initial_params_as_json, + terminate_with_success=True, + name="read_params_json", + ) + + pipeline = Pipeline( + steps=[read_params_as_pydantic, read_params_as_json], + add_terminal_nodes=True, + ) + + _ = pipeline.execute(parameters_file="examples/common/initial_parameters.yaml") + + return pipeline + + +if __name__ == "__main__": + main() diff --git a/examples/03-parameters/static_parameters_python.yaml b/examples/03-parameters/static_parameters_python.yaml new file mode 100644 index 00000000..ea0b8b7a --- /dev/null +++ b/examples/03-parameters/static_parameters_python.yaml @@ -0,0 +1,30 @@ +dag: + description: | + The below example showcases setting up known initial parameters for a pipeline + of only python tasks + + The initial parameters as defined in the yaml file are: + simple: 1 + complex_param: + x: 10 + y: "hello world!!" + + runnable allows using pydantic models for deeply nested parameters and + casts appropriately based on annotation. eg: read_initial_params_as_pydantic + + If no annotation is provided, the parameter is assumed to be a dictionary. + eg: read_initial_params_as_json + start_at: read_params_as_pydantic + steps: + read_params_as_pydantic: + type: task + command: examples.common.functions.read_initial_params_as_pydantic + next: read_params_json + read_params_json: + type: task + command: examples.common.functions.read_initial_params_as_json + next: success + success: + type: success + fail: + type: fail diff --git a/examples/README.md b/examples/README.md index 2bcea172..a0eb415b 100644 --- a/examples/README.md +++ b/examples/README.md @@ -23,14 +23,30 @@ The next section has examples on stitching these tasks together for complex oper - 02-sequential: Examples of stitching tasks together including behavior in case of failures. - - traversal: A pipeline which is a mixed bag of notebooks, python functions and + - [traversal.py](./02-sequential/traversal.py), [traversal.yaml](./02-sequential/traversal.yaml): A pipeline which is a mixed bag of notebooks, python functions and shell scripts. - - default_failure: The default failure behavior. - - on_failure_fail: On failure of a step, do some action and fail - - on_failure_success: On failure of a step, take a different route + - [default_fail.py](./02-sequential/default_fail.py), [default_fail.yaml](./02-sequential/default_fail.yaml): The default failure behavior. + - [on_failure_fail](./02-sequential/on_failure_fail.py), [on_faliure_fail.yaml](./02-sequential/on_failure_fail.yaml) On failure of a step, do some action and fail + - [on_failure_success.py](./02-sequential/on_failure_succeed.py), [on_failure_success.yaml](./02-sequential/on_failure_succeed.yaml): On failure of a step, take a different route The above examples show stitching complex operations of the pipeline. -The next section has examples on +The next section has examples on communicating between tasks during execution. -- 03: Examples of passing parameters between tasks +- 03: Examples of passing parameters between tasks of a pipeline. + + Guidelines: + + - python functions can get/set simple python data types, pydantic models, objects marked as pickled. Some of the + simple data types can also be marked as a metric. + - + + + - [static_parameters_python.py](./03-parameters/static_parameters_python.py), [static_parameters_python.yaml](./03-parameters/static_parameters_python.yaml): A pipeline to show the access of static or known parameters by python tasks. + + - [static_parameters_non_python.py](./03-parameters/static_parameters_non_python.py), [static_parameters_non_python.yaml](./03-parameters/static_parameters_non_python.yaml): A pipeline to show the access of static or known parameters by python tasks. + + - [passing_parameters_python.py](./03-parameters/passing_parameters_python.py), [passing_parameters_python.yaml](./03-parameters/passing_parameters_python.yaml): shows the mechanism of passing parameters (simple python datatypes, "dillable" objects, pydantic models) and registering metrics between python tasks. + + - [passing_parameters_notebook.py](./03-parameters/passing_parameters_notebook.py), [passing_parameters_notebook.yaml](./03-parameters/passing_parameters_notebook.yaml): shows the mechanism of passing parameters (simple python datatypes, "dillable" objects, pydantic models) and registering metrics between tasks. runnable can "get" object + parameters from notebooks but cannot inject them into notebooks. diff --git a/examples/common/functions.py b/examples/common/functions.py index 3eb1aa7e..36eb3d9a 100644 --- a/examples/common/functions.py +++ b/examples/common/functions.py @@ -1,3 +1,9 @@ +from typing import Dict, Union + +import pandas as pd +from pydantic import BaseModel + + def hello(): "The most basic function" print("Hello World!") @@ -6,3 +12,78 @@ def hello(): def raise_ex(): "A function that raises an exception" raise Exception("This is an exception") + + +class ComplexParams(BaseModel): + x: int + foo: str + + +def read_initial_params_as_pydantic( + integer: int, + floater: float, + stringer: str, + pydantic_param: ComplexParams, +): + assert integer == 1 + assert floater == 3.14 + assert stringer == "hello" + assert pydantic_param.x == 10 + assert pydantic_param.foo == "bar" + + +def read_initial_params_as_json( + integer: int, + floater: float, + stringer: str, + pydantic_param: Dict[str, Union[int, str]], +): + assert integer == 1 + assert floater == 3.14 + assert stringer == "hello" + assert pydantic_param["x"] == 10 + assert pydantic_param["foo"] == "bar" + + +def write_parameter(): + integer = 1 + floater = 3.14 + c = ComplexParams(x=10, foo="bar") + data = {"calories": [420, 380, 390], "duration": [50, 40, 45]} + + df = pd.DataFrame(data) + score = 0.9 + + return df, integer, floater, "hello", c, score + + +def read_parameter( + df: pd.DataFrame, + integer: int, + floater: float, + stringer: str, + pydantic_param: ComplexParams, + score: float, +): + assert integer == 1 + assert floater == 3.14 + assert stringer == "hello" + assert pydantic_param.x == 10 + assert pydantic_param.foo == "bar" + assert df.shape == (3, 2) + assert score == 0.9 + + +def read_unpickled_parameter( + integer: int, + floater: float, + stringer: str, + pydantic_param: ComplexParams, + score: float, +): + assert integer == 1 + assert floater == 3.14 + assert stringer == "hello" + assert pydantic_param.x == 10 + assert pydantic_param.foo == "bar" + assert score == 0.9 diff --git a/examples/common/initial_parameters.yaml b/examples/common/initial_parameters.yaml new file mode 100644 index 00000000..eb987ed4 --- /dev/null +++ b/examples/common/initial_parameters.yaml @@ -0,0 +1,6 @@ +integer: 1 +floater : 3.14 +stringer : hello +pydantic_param: + x: 10 + foo: bar diff --git a/examples/common/read_parameters.ipynb b/examples/common/read_parameters.ipynb new file mode 100644 index 00000000..b8935014 --- /dev/null +++ b/examples/common/read_parameters.ipynb @@ -0,0 +1,69 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "41a71aa7", + "metadata": { + "tags": [ + "parameters" + ] + }, + "outputs": [], + "source": [ + "integer = None\n", + "stringer = None\n", + "floater = None\n", + "pydantic_param = None\n", + "score = None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3e98e89e-765c-42d4-81ea-c371c2eab14d", + "metadata": {}, + "outputs": [], + "source": [ + "assert integer == 1\n", + "assert stringer == \"hello\"\n", + "assert floater == 3.14" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "faf6769e", + "metadata": {}, + "outputs": [], + "source": [ + "from examples.common.functions import ComplexParams\n", + "\n", + "pydantic_param = ComplexParams(**pydantic_param)\n", + "assert pydantic_param.x == 10\n", + "assert pydantic_param.foo == \"bar\"" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/common/read_parameters_out.ipynb b/examples/common/read_parameters_out.ipynb new file mode 100644 index 00000000..ea6525f8 --- /dev/null +++ b/examples/common/read_parameters_out.ipynb @@ -0,0 +1,105 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "41a71aa7", + "metadata": { + "ploomber": { + "timestamp_end": 1713380823.765499, + "timestamp_start": 1713380823.765069 + }, + "tags": [ + "parameters" + ] + }, + "outputs": [], + "source": [ + "integer = None\n", + "stringer = None\n", + "floater = None\n", + "pydantic_param = None\n", + "score = None" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "d88d58c6", + "metadata": { + "ploomber": { + "timestamp_end": 1713380823.765846, + "timestamp_start": 1713380823.765527 + }, + "tags": [ + "injected-parameters" + ] + }, + "outputs": [], + "source": [ + "# Injected parameters\n", + "integer = 1\n", + "floater = 3.14\n", + "stringer = \"hello\"\n", + "pydantic_param = {\"x\": 10, \"foo\": \"bar\"}\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "3e98e89e-765c-42d4-81ea-c371c2eab14d", + "metadata": { + "ploomber": { + "timestamp_end": 1713380823.766088, + "timestamp_start": 1713380823.765864 + } + }, + "outputs": [], + "source": [ + "assert integer == 1\n", + "assert stringer == \"hello\"\n", + "assert floater == 3.14" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "faf6769e", + "metadata": { + "ploomber": { + "timestamp_end": 1713380823.766474, + "timestamp_start": 1713380823.766105 + } + }, + "outputs": [], + "source": [ + "from examples.common.functions import ComplexParams\n", + "\n", + "pydantic_param = ComplexParams(**pydantic_param)\n", + "assert pydantic_param.x == 10\n", + "assert pydantic_param.foo == \"bar\"" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/common/simple_notebook_out.ipynb b/examples/common/simple_notebook_out.ipynb index 064bc4e2..91e156fe 100644 --- a/examples/common/simple_notebook_out.ipynb +++ b/examples/common/simple_notebook_out.ipynb @@ -3,11 +3,11 @@ { "cell_type": "code", "execution_count": 1, - "id": "2d2fa303", + "id": "c8a68d0d", "metadata": { "ploomber": { - "timestamp_end": 1712760519.616379, - "timestamp_start": 1712760519.616218 + "timestamp_end": 1713380822.228675, + "timestamp_start": 1713380822.228447 }, "tags": [ "injected-parameters" @@ -24,8 +24,8 @@ "id": "3e98e89e-765c-42d4-81ea-c371c2eab14d", "metadata": { "ploomber": { - "timestamp_end": 1712760519.616649, - "timestamp_start": 1712760519.616443 + "timestamp_end": 1713380822.22899, + "timestamp_start": 1713380822.228748 } }, "outputs": [], @@ -40,8 +40,8 @@ "id": "8eac7a3f", "metadata": { "ploomber": { - "timestamp_end": 1712760519.616772, - "timestamp_start": 1712760519.616663 + "timestamp_end": 1713380822.229158, + "timestamp_start": 1713380822.229008 } }, "outputs": [ diff --git a/examples/common/write_parameters.ipynb b/examples/common/write_parameters.ipynb new file mode 100644 index 00000000..da016317 --- /dev/null +++ b/examples/common/write_parameters.ipynb @@ -0,0 +1,68 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "41a71aa7", + "metadata": { + "tags": [ + "parameters" + ] + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "\n", + "from examples.common.functions import ComplexParams\n", + "\n", + "pydantic_param = ComplexParams(x=10, foo=\"bar\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "764f661d", + "metadata": {}, + "outputs": [], + "source": [ + "data = {\"calories\": [420, 380, 390], \"duration\": [50, 40, 45]}\n", + "\n", + "df = pd.DataFrame(data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3e98e89e-765c-42d4-81ea-c371c2eab14d", + "metadata": {}, + "outputs": [], + "source": [ + "integer = 1\n", + "floater = 3.14\n", + "stringer = \"hello\"\n", + "score = 0.9" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/common/write_parameters_out.ipynb b/examples/common/write_parameters_out.ipynb new file mode 100644 index 00000000..e6c06d4a --- /dev/null +++ b/examples/common/write_parameters_out.ipynb @@ -0,0 +1,100 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "41a71aa7", + "metadata": { + "ploomber": { + "timestamp_end": 1713380822.509565, + "timestamp_start": 1713380822.508958 + }, + "tags": [ + "parameters" + ] + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "\n", + "from examples.common.functions import ComplexParams\n", + "\n", + "pydantic_param = ComplexParams(x=10, foo=\"bar\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "d53507d8", + "metadata": { + "ploomber": { + "timestamp_end": 1713380822.509736, + "timestamp_start": 1713380822.509595 + }, + "tags": [ + "injected-parameters" + ] + }, + "outputs": [], + "source": [ + "# Injected parameters\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "764f661d", + "metadata": { + "ploomber": { + "timestamp_end": 1713380822.511416, + "timestamp_start": 1713380822.509754 + } + }, + "outputs": [], + "source": [ + "data = {\"calories\": [420, 380, 390], \"duration\": [50, 40, 45]}\n", + "\n", + "df = pd.DataFrame(data)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "3e98e89e-765c-42d4-81ea-c371c2eab14d", + "metadata": { + "ploomber": { + "timestamp_end": 1713380822.511728, + "timestamp_start": 1713380822.51144 + } + }, + "outputs": [], + "source": [ + "integer = 1\n", + "floater = 3.14\n", + "stringer = \"hello\"\n", + "score = 0.9" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/parameters_initial.yaml b/examples/parameters_initial.yaml deleted file mode 100644 index e88e14c0..00000000 --- a/examples/parameters_initial.yaml +++ /dev/null @@ -1,4 +0,0 @@ -simple: 1 -inner: - x: 10 - y: "hello world!!" diff --git a/runnable/__init__.py b/runnable/__init__.py index 247a6311..89b9b6a1 100644 --- a/runnable/__init__.py +++ b/runnable/__init__.py @@ -29,6 +29,10 @@ pickled, ) +## TODO: Summary should be a bit better for catalog. +## If the execution fails, hint them about the retry executor. +# Make the retry executor loose! + # TODO: Think of model registry as a central place to store models. # TODO: Implement Sagemaker pipelines as a executor. diff --git a/runnable/sdk.py b/runnable/sdk.py index 0993c017..d14d8ce6 100644 --- a/runnable/sdk.py +++ b/runnable/sdk.py @@ -191,6 +191,7 @@ class BaseTask(BaseTraversal): catalog: Optional[Catalog] = Field(default=None, alias="catalog") overrides: Dict[str, Any] = Field(default_factory=dict, alias="overrides") returns: List[Union[str, TaskReturns]] = Field(default_factory=list, alias="returns") + secrets: List[str] = Field(default_factory=list) @field_validator("returns", mode="before") @classmethod @@ -619,7 +620,7 @@ def model_post_init(self, __context: Any) -> None: all_steps.append(step) seen = set() - unique = [x for x in all_steps if not (x in seen or seen.add(x))] + unique = [x for x in all_steps if not (x in seen or seen.add(x))] # type: ignore self._dag = graph.Graph( start_at=all_steps[0].name, diff --git a/runnable/tasks.py b/runnable/tasks.py index 1774b63d..e80bb468 100644 --- a/runnable/tasks.py +++ b/runnable/tasks.py @@ -16,7 +16,7 @@ from stevedore import driver import runnable.context as context -from runnable import console, defaults, exceptions, parameters, utils +from runnable import defaults, exceptions, parameters, utils from runnable.datastore import ( JsonParameter, MetricParameter, @@ -46,7 +46,7 @@ class BaseTaskType(BaseModel): task_type: str = Field(serialization_alias="command_type") node_name: str = Field(exclude=True) - secrets: Dict[str, str] = Field(default_factory=dict) + secrets: List[str] = Field(default_factory=list) returns: List[TaskReturns] = Field(default_factory=list, alias="returns") model_config = ConfigDict(extra="forbid") @@ -73,15 +73,14 @@ def get_cli_options(self) -> Tuple[str, dict]: raise NotImplementedError() def set_secrets_as_env_variables(self): - for key, value in self.secrets.items(): + for key in self.secrets: secret_value = context.run_context.secrets_handler.get(key) - self.secrets[value] = secret_value - os.environ[value] = secret_value + os.environ[key] = secret_value def delete_secrets_from_env_variables(self): - for _, value in self.secrets.items(): - if value in os.environ: - del os.environ[value] + for key in self.secrets: + if key in os.environ: + del os.environ[key] def execute_command( self, @@ -327,7 +326,7 @@ def execute_command( import ploomber_engine as pm from ploomber_engine.ipython import PloomberClient - notebook_output_path = self.notebook_output_path + notebook_output_path = self.notebook_output_path or "" with self.execution_context( map_variable=map_variable, allow_complex=False @@ -432,15 +431,17 @@ def execute_command( # Expose secrets as environment variables if self.secrets: - for key, value in self.secrets.items(): + for key in self.secrets: secret_value = context.run_context.secrets_handler.get(key) - subprocess_env[value] = secret_value + subprocess_env[key] = secret_value with self.execution_context(map_variable=map_variable, allow_complex=False) as params: subprocess_env.update({k: v.get_value() for k, v in params.items()}) # Json dumps all runnable environment variables for key, value in subprocess_env.items(): + if isinstance(value, str): + continue subprocess_env[key] = json.dumps(value) collect_delimiter = "=== COLLECT ===" @@ -449,37 +450,80 @@ def execute_command( logger.info(f"Executing shell command: {command}") capture = False - return_keys = [x.name for x in self.returns] + return_keys = {x.name: x for x in self.returns} - with subprocess.Popen( + proc = subprocess.Popen( command, shell=True, env=subprocess_env, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, - ) as proc: - for line in proc.stdout: # type: ignore - logger.info(line) - print(line) - - if line.strip() == collect_delimiter: - # The lines from now on should be captured - capture = True - continue - - if capture: - key, value = line.strip().split("=", 1) - if key in (return_keys or []): - param_name = Template(key).safe_substitute(map_variable) # type: ignore - try: - params[param_name] = JsonParameter(kind="json", value=json.loads(value)) - except json.JSONDecodeError: - params[param_name] = JsonParameter(kind="json", value=value) - - proc.wait() - if proc.returncode == 0: - attempt_log.status = defaults.SUCCESS + ) + result = proc.communicate() + logger.debug(result) + logger.info(proc.returncode) + + if proc.returncode != 0: + msg = ",".join(result[1].split("\n")) + attempt_log.status = defaults.FAIL + attempt_log.end_time = str(datetime.now()) + attempt_log.message = msg + console.print(msg, style=defaults.error_style) + return attempt_log + + # for stderr + for line in result[1].split("\n"): + if line.strip() == "": + continue + console.print(line, style=defaults.warning_style) + + output_parameters: Dict[str, Parameter] = {} + metrics: Dict[str, Parameter] = {} + + # only from stdout + for line in result[0].split("\n"): + if line.strip() == "": + continue + + logger.info(line) + console.print(line) + + if line.strip() == collect_delimiter: + # The lines from now on should be captured + capture = True + continue + + if capture: + key, value = line.strip().split("=", 1) + if key in return_keys: + task_return = return_keys[key] + + try: + value = json.loads(value) + except json.JSONDecodeError: + value = value + + output_parameter = task_return_to_parameter( + task_return=task_return, + value=value, + ) + + if task_return.kind == "metric": + metrics[task_return.name] = output_parameter + + param_name = task_return.name + if map_variable: + for _, v in map_variable.items(): + param_name = f"{param_name}_{v}" + + output_parameters[param_name] = output_parameter + + attempt_log.output_parameters = output_parameters + attempt_log.user_defined_metrics = metrics + params.update(output_parameters) + + attempt_log.status = defaults.SUCCESS attempt_log.end_time = str(datetime.now()) return attempt_log diff --git a/tests/runnable/test_sdk.py b/tests/runnable/test_sdk.py index 3958c73d..b174ba92 100644 --- a/tests/runnable/test_sdk.py +++ b/tests/runnable/test_sdk.py @@ -1,7 +1,7 @@ import pytest -from runnable.extensions import nodes from runnable import sdk +from runnable.extensions import nodes def test_success_init(): @@ -26,6 +26,6 @@ def test_stub_node_makes_next_success_if_terminate_with_success(): def test_stub_node_takes_given_next_node(): - test_stub = sdk.Stub(name="stub", next="test") + test_stub = sdk.Stub(name="stub", next_node="test") assert test_stub.create_node() == nodes.StubNode(name="stub", next_node="test", internal_name="stub") diff --git a/tests/scenarios/test_traversals.py b/tests/scenarios/test_traversals.py deleted file mode 100644 index 4bbc6993..00000000 --- a/tests/scenarios/test_traversals.py +++ /dev/null @@ -1,214 +0,0 @@ -# ruff: noqa - -import tempfile -from pathlib import Path -from rich import print - -import pytest -import ruamel.yaml - -from runnable import defaults, entrypoints, utils - -yaml = ruamel.yaml.YAML() - -PIPELINES_DEFINITION = Path("examples/") - - -def get_config(): - config = { - "executor": { - "type": "local", - }, - "run_log_store": {"type": "file-system", "config": {"log_folder": ""}}, - } - return config - - -def get_container_config(): - config = { - "executor": {"type": "local-container", "config": {"docker_image": "does-not-matter"}}, - "run_log_store": {"type": "file-system", "config": {"log_folder": ""}}, - } - return config - - -def get_chunked_config(): - config = { - "executor": { - "type": "local", - }, - "run_log_store": {"type": "chunked-fs", "config": {"log_folder": ""}}, - } - return config - - -def get_configs(): - return [get_config(), get_chunked_config()] - - -def write_config(work_dir: Path, config: dict): - config["run_log_store"]["config"]["log_folder"] = str(work_dir) - with open(work_dir / "config.yaml", "wb") as f: - yaml.dump(config, f) - - -def get_run_log(work_dir, run_id): - config_file = work_dir / "config.yaml" - - if utils.does_file_exist(config_file): - mode_executor = entrypoints.prepare_configurations(configuration_file=str(config_file), run_id=run_id) - return mode_executor.run_log_store.get_run_log_by_id(run_id=run_id, full=True).model_dump() - raise Exception - - -@pytest.mark.no_cover -def test_success(): - configs = get_configs() - - for config in configs: - with tempfile.TemporaryDirectory() as context_dir: - context_dir_path = Path(context_dir) - - write_config(context_dir_path, config) - - run_id = "testing_success" - - entrypoints.execute( - configuration_file=str(context_dir_path / "config.yaml"), - pipeline_file=str(PIPELINES_DEFINITION / "mocking.yaml"), - run_id=run_id, - ) - - try: - run_log = get_run_log(context_dir_path, run_id) - assert run_log["status"] == defaults.SUCCESS - assert list(run_log["steps"].keys()) == ["step 1", "step 2", "step 3", "success"] - except: - assert False - - -@pytest.mark.no_cover -def test_failure(): - configs = get_configs() - - for config in configs: - with tempfile.TemporaryDirectory() as context_dir: - context_dir_path = Path(context_dir) - - write_config(context_dir_path, config) - - run_id = "testing_failure" - - try: - entrypoints.execute( - configuration_file=str(context_dir_path / "config.yaml"), - pipeline_file=str(PIPELINES_DEFINITION / "default-fail.yaml"), - run_id=run_id, - ) - except Exception as ex: - print(ex) - - try: - run_log = get_run_log(context_dir_path, run_id) - assert run_log["status"] == defaults.FAIL - assert list(run_log["steps"].keys()) == ["step 1", "step 2", "fail"] - except: - assert False - - -@pytest.mark.no_cover -def test_on_failure(): - configs = get_configs() - for config in configs: - with tempfile.TemporaryDirectory() as context_dir: - context_dir_path = Path(context_dir) - - write_config(context_dir_path, config) - - run_id = "testing_on_failure" - - try: - entrypoints.execute( - configuration_file=str(context_dir_path / "config.yaml"), - pipeline_file=str(PIPELINES_DEFINITION / "on-failure.yaml"), - run_id=run_id, - ) - except: - pass - - try: - run_log = get_run_log(context_dir_path, run_id) - assert run_log["status"] == defaults.SUCCESS - assert list(run_log["steps"].keys()) == ["step 1", "step 3", "success"] - except: - assert False - - -# @pytest.mark.no_cover -# def test_parallel(): -# configs = get_configs() -# for config in configs: -# with tempfile.TemporaryDirectory() as context_dir: -# context_dir_path = Path(context_dir) - -# write_config(context_dir_path, config) -# run_id = "testing_parallel" - -# entrypoints.execute( -# configuration_file=str(context_dir_path / "config.yaml"), -# pipeline_file=str(PIPELINES_DEFINITION / "concepts/parallel.yaml"), -# run_id=run_id, -# ) - -# try: -# run_log = get_run_log(context_dir_path, run_id) -# assert run_log["status"] == defaults.SUCCESS -# assert list(run_log["steps"].keys()) == ["step 1", "step 2", "step 3", "success"] -# assert list(run_log["steps"]["step 2"]["branches"]["step 2.branch_a"]["steps"].keys()) == [ -# "step 2.branch_a.step 1", -# "step 2.branch_a.step 2", -# "step 2.branch_a.success", -# ] -# assert list(run_log["steps"]["step 2"]["branches"]["step 2.branch_b"]["steps"].keys()) == [ -# "step 2.branch_b.step 1", -# "step 2.branch_b.step 2", -# "step 2.branch_b.success", -# ] -# except: -# assert False - - -# @pytest.mark.no_cover -# def test_parallel_fail(parallel_fail_graph): -# configs = get_configs() -# for config in configs: -# with tempfile.TemporaryDirectory() as context_dir: -# context_dir_path = Path(context_dir) -# dag = {"dag": parallel_fail_graph().dict()} - -# write_dag_and_config(context_dir_path, dag, config) -# run_id = "testing_parallel" - -# try: -# entrypoints.execute( -# configuration_file=str(context_dir_path / "config.yaml"), -# pipeline_file=str(context_dir_path / "dag.yaml"), -# run_id=run_id, -# ) -# except: -# pass - -# try: -# run_log = get_run_log(context_dir_path, run_id) -# assert run_log["status"] == defaults.FAIL -# assert list(run_log["steps"].keys()) == ["first", "second", "fail"] -# assert list(run_log["steps"]["second"]["branches"]["second.a"]["steps"].keys()) == [ -# "second.a.first", -# "second.a.fail", -# ] -# assert list(run_log["steps"]["second"]["branches"]["second.b"]["steps"].keys()) == [ -# "second.b.first", -# "second.b.fail", -# ] -# except: -# assert False diff --git a/tests/test_examples.py b/tests/test_examples.py index d71971b2..9eabf0da 100644 --- a/tests/test_examples.py +++ b/tests/test_examples.py @@ -9,102 +9,21 @@ from runnable import exceptions from runnable.entrypoints import execute -# (file, is_fail?, kwargs) -examples = [ - ("concepts/catalog.yaml", False, {"configuration_file": "examples/configs/fs-catalog.yaml"}), - ("concepts/map.yaml", False, {}), - ("concepts/map_shell.yaml", False, {}), - ("concepts/nesting.yaml", False, {}), - ("concepts/notebook_native_parameters.yaml", False, {"parameters_file": "examples/concepts/parameters.yaml"}), - ("concepts/parallel.yaml", False, {}), - ("concepts/simple_notebook.yaml", False, {}), - ("concepts/simple.yaml", False, {}), - ("catalog.yaml", False, {"configuration_file": "examples/configs/fs-catalog.yaml"}), - ("default-fail.yaml", True, {}), - ("on-failure.yaml", False, {}), - ("parallel-fail.yaml", True, {}), -] - - -def list_examples(): - for example in examples: - yield example - - -@pytest.mark.parametrize("example", list_examples()) -@pytest.mark.no_cover -@pytest.mark.e2e -def test_yaml_examples(example): - print(f"Testing {example}...") - examples_path = Path("examples") - file_path, status, kwargs = example - try: - full_file_path = examples_path / file_path - configuration_file = kwargs.pop("configuration_file", "") - execute(configuration_file=configuration_file, pipeline_file=str(full_file_path.resolve()), **kwargs) - except exceptions.ExecutionFailedError: - if not status: - raise - - -@pytest.mark.parametrize("example", list_examples()) -@pytest.mark.no_cover -@pytest.mark.e2e -def test_yaml_examples_argo(example): - print(f"Testing {example}...") - examples_path = Path("examples") - file_path, status, kwargs = example - try: - full_file_path = examples_path / file_path - kwargs.pop("configuration_file", "") - configuration_file = "examples/configs/argo-config.yaml" - execute(configuration_file=configuration_file, pipeline_file=str(full_file_path.resolve()), **kwargs) - subprocess.run(["argo", "lint", "--offline", "argo-pipeline.yaml"], check=True) - except exceptions.ExecutionFailedError: - if not status: - raise - - -@pytest.mark.parametrize("example", list_examples()) -@pytest.mark.no_cover -@pytest.mark.e2e_container -def test_yaml_examples_container(example): - print(f"Testing {example}...") - examples_path = Path("examples") - file_path, status, kwargs = example - try: - full_file_path = examples_path / file_path - kwargs.pop("configuration_file", "") - configuration_file = "examples/configs/local-container.yaml" - os.environ["runnable_VAR_default_docker_image"] = "runnable:3.8" - execute(configuration_file=configuration_file, pipeline_file=str(full_file_path), **kwargs) - except exceptions.ExecutionFailedError: - if not status: - raise - - -@contextmanager -def secrets_env_context(): - os.environ["secret"] = "secret_value" - os.environ["runnable_CONFIGURATION_FILE"] = "examples/configs/secrets-env-default.yaml" - yield - del os.environ["secret"] - del os.environ["runnable_CONFIGURATION_FILE"] - - -# function, success, context +# # (file, is_fail?, kwargs) python_examples = [ - ("catalog", False, None), - ("catalog_simple", False, None), - ("mocking", False, None), - ("on_failure", False, None), - ("parameters", False, None), - ("parameters_simple", False, None), - ("concepts.catalog", False, None), - ("concepts.map", False, None), - ("concepts.nesting", False, None), - ("concepts.parallel", False, None), - ("concepts.simple", False, None), + ("01-tasks/notebook", False, None), + ("01-tasks/python_tasks", False, None), + ("01-tasks/scripts", False, None), + ("01-tasks/stub", False, None), + ("02-sequential/default_fail", False, None), + ("02-sequential/on_failure_fail", False, None), + ("02-sequential/on_failure_succeed", False, None), + ("02-sequential/traversal", False, None), + ("03-parameters/passing_parameters_notebook", False, None), + ("03-parameters/passing_parameters_python", False, None), + ("03-parameters/passing_parameters_shell", False, None), + ("03-parameters/static_parameters_non_python", False, None), + ("03-parameters/static_parameters_python", False, None), ] @@ -114,7 +33,7 @@ def list_python_examples(): @pytest.mark.parametrize("example", list_python_examples()) -@pytest.mark.no_cover +# @pytest.mark.no_cover @pytest.mark.e2e def test_python_examples(example): print(f"Testing {example}...") @@ -126,7 +45,7 @@ def test_python_examples(example): else: context = context() - imported_module = importlib.import_module(f"examples.{mod}") + imported_module = importlib.import_module(f"examples.{mod.replace('/', '.')}") f = getattr(imported_module, "main") try: with context: @@ -134,3 +53,129 @@ def test_python_examples(example): except exceptions.ExecutionFailedError: if not status: raise + + +# examples = [ +# ("concepts/catalog.yaml", False, {"configuration_file": "examples/configs/fs-catalog.yaml"}), +# ("concepts/map.yaml", False, {}), +# ("concepts/map_shell.yaml", False, {}), +# ("concepts/nesting.yaml", False, {}), +# ("concepts/notebook_native_parameters.yaml", False, {"parameters_file": "examples/concepts/parameters.yaml"}), +# ("concepts/parallel.yaml", False, {}), +# ("concepts/simple_notebook.yaml", False, {}), +# ("concepts/simple.yaml", False, {}), +# ("catalog.yaml", False, {"configuration_file": "examples/configs/fs-catalog.yaml"}), +# ("default-fail.yaml", True, {}), +# ("on-failure.yaml", False, {}), +# ("parallel-fail.yaml", True, {}), +# ] + + +# def list_examples(): +# for example in examples: +# yield example + + +# @pytest.mark.parametrize("example", list_examples()) +# @pytest.mark.no_cover +# @pytest.mark.e2e +# def test_yaml_examples(example): +# print(f"Testing {example}...") +# examples_path = Path("examples") +# file_path, status, kwargs = example +# try: +# full_file_path = examples_path / file_path +# configuration_file = kwargs.pop("configuration_file", "") +# execute(configuration_file=configuration_file, pipeline_file=str(full_file_path.resolve()), **kwargs) +# except exceptions.ExecutionFailedError: +# if not status: +# raise + + +# @pytest.mark.parametrize("example", list_examples()) +# @pytest.mark.no_cover +# @pytest.mark.e2e +# def test_yaml_examples_argo(example): +# print(f"Testing {example}...") +# examples_path = Path("examples") +# file_path, status, kwargs = example +# try: +# full_file_path = examples_path / file_path +# kwargs.pop("configuration_file", "") +# configuration_file = "examples/configs/argo-config.yaml" +# execute(configuration_file=configuration_file, pipeline_file=str(full_file_path.resolve()), **kwargs) +# subprocess.run(["argo", "lint", "--offline", "argo-pipeline.yaml"], check=True) +# except exceptions.ExecutionFailedError: +# if not status: +# raise + + +# @pytest.mark.parametrize("example", list_examples()) +# @pytest.mark.no_cover +# @pytest.mark.e2e_container +# def test_yaml_examples_container(example): +# print(f"Testing {example}...") +# examples_path = Path("examples") +# file_path, status, kwargs = example +# try: +# full_file_path = examples_path / file_path +# kwargs.pop("configuration_file", "") +# configuration_file = "examples/configs/local-container.yaml" +# os.environ["runnable_VAR_default_docker_image"] = "runnable:3.8" +# execute(configuration_file=configuration_file, pipeline_file=str(full_file_path), **kwargs) +# except exceptions.ExecutionFailedError: +# if not status: +# raise + + +# @contextmanager +# def secrets_env_context(): +# os.environ["secret"] = "secret_value" +# os.environ["runnable_CONFIGURATION_FILE"] = "examples/configs/secrets-env-default.yaml" +# yield +# del os.environ["secret"] +# del os.environ["runnable_CONFIGURATION_FILE"] + + +# # function, success, context +# python_examples = [ +# ("catalog", False, None), +# ("catalog_simple", False, None), +# ("mocking", False, None), +# ("on_failure", False, None), +# ("parameters", False, None), +# ("parameters_simple", False, None), +# ("concepts.catalog", False, None), +# ("concepts.map", False, None), +# ("concepts.nesting", False, None), +# ("concepts.parallel", False, None), +# ("concepts.simple", False, None), +# ] + + +# def list_python_examples(): +# for example in python_examples: +# yield example + + +# @pytest.mark.parametrize("example", list_python_examples()) +# @pytest.mark.no_cover +# @pytest.mark.e2e +# def test_python_examples(example): +# print(f"Testing {example}...") + +# mod, status, context = example + +# if not context: +# context = nullcontext() +# else: +# context = context() + +# imported_module = importlib.import_module(f"examples.{mod}") +# f = getattr(imported_module, "main") +# try: +# with context: +# f() +# except exceptions.ExecutionFailedError: +# if not status: +# raise From 8d0a7aca8c475148117e45669d40ce383f37572a Mon Sep 17 00:00:00 2001 From: Vijay Vammi Date: Thu, 18 Apr 2024 21:05:48 +0100 Subject: [PATCH 04/17] docs: adding more examples --- examples/README.md | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/examples/README.md b/examples/README.md index a0eb415b..476478ae 100644 --- a/examples/README.md +++ b/examples/README.md @@ -17,6 +17,8 @@ Please use this as an index to find specific example. - [scripts.py](./01-tasks/scripts.py), [scripts.yaml](./01-tasks/scripts.yaml): uses shell scripts as tasks The stdout/stderr of all scripts are captured and stored in the catalog. +--- + The above examples showcase executable units of the pipeline. The next section has examples on stitching these tasks together for complex operations. @@ -30,23 +32,31 @@ The next section has examples on stitching these tasks together for complex oper - [on_failure_success.py](./02-sequential/on_failure_succeed.py), [on_failure_success.yaml](./02-sequential/on_failure_succeed.yaml): On failure of a step, take a different route +--- + The above examples show stitching complex operations of the pipeline. The next section has examples on communicating between tasks during execution. - 03: Examples of passing parameters between tasks of a pipeline. - Guidelines: + Below table summarizes the input/output types of different task types. For ex: notebooks can only take JSON serializable + parameters as input but can return json/pydantic/objects. Any python object that could be serialized using "dill" can be used. - - python functions can get/set simple python data types, pydantic models, objects marked as pickled. Some of the - simple data types can also be marked as a metric. - - + | | Input | Output | + | -------- | :---------------------: | :----------------------: | + | python | json,pydantic, object | json, pydantic, object | + | notebook | json | json, pydantic, object | + | shell | json | json | - [static_parameters_python.py](./03-parameters/static_parameters_python.py), [static_parameters_python.yaml](./03-parameters/static_parameters_python.yaml): A pipeline to show the access of static or known parameters by python tasks. - [static_parameters_non_python.py](./03-parameters/static_parameters_non_python.py), [static_parameters_non_python.yaml](./03-parameters/static_parameters_non_python.yaml): A pipeline to show the access of static or known parameters by python tasks. - - [passing_parameters_python.py](./03-parameters/passing_parameters_python.py), [passing_parameters_python.yaml](./03-parameters/passing_parameters_python.yaml): shows the mechanism of passing parameters (simple python datatypes, "dillable" objects, pydantic models) and registering metrics between python tasks. + - [passing_parameters_python.py](./03-parameters/passing_parameters_python.py), [passing_parameters_python.yaml](./03-parameters/passing_parameters_python.yaml): shows the mechanism of passing parameters (simple python datatypes, objects, pydantic models) and registering metrics between python tasks. + + - [passing_parameters_notebook.py](./03-parameters/passing_parameters_notebook.py), [passing_parameters_notebook.yaml](./03-parameters/passing_parameters_notebook.yaml): shows the mechanism of passing parameters between notebook tasks. Please note that + we cannot inject pydantic models or objects into the notebook. - - [passing_parameters_notebook.py](./03-parameters/passing_parameters_notebook.py), [passing_parameters_notebook.yaml](./03-parameters/passing_parameters_notebook.yaml): shows the mechanism of passing parameters (simple python datatypes, "dillable" objects, pydantic models) and registering metrics between tasks. runnable can "get" object - parameters from notebooks but cannot inject them into notebooks. + - [passing_parameters_shell.py](./03-parameters/passing_parameters_shell.py), [passing_parameters_shell.yaml](./03-parameters/passing_parameters_shell.yaml): shows the mechanism of passing parameters between shell tasks. Please note that + we cannot inject pydantic models or objects into shells. From 9e1956bc30f7cffbcb71e6267b866eab35588a23 Mon Sep 17 00:00:00 2001 From: Vijay Vammi Date: Mon, 22 Apr 2024 14:53:07 +0100 Subject: [PATCH 05/17] docs: still working through --- examples/Dockerfile.39 | 2 +- examples/configs/argo-config-full.yaml | 4 ++-- examples/configs/argo-config.yaml | 12 +++++++----- examples/tutorials/mnist/hyper_parameter_tuning.py | 13 ++++++++----- examples/tutorials/mnist/parameters.yaml | 5 +++++ 5 files changed, 23 insertions(+), 13 deletions(-) diff --git a/examples/Dockerfile.39 b/examples/Dockerfile.39 index 8d772f2f..6e409edd 100755 --- a/examples/Dockerfile.39 +++ b/examples/Dockerfile.39 @@ -28,4 +28,4 @@ WORKDIR /app RUN poetry config repositories.FPHO https://files.pythonhosted.org \ && poetry config certificates.FPHO.cert false -RUN poetry install --all-extras --without dev --without tutorial +RUN poetry install --all-extras --without dev,docs,binary,tutorial,perf,release && poetry cache clear --all . diff --git a/examples/configs/argo-config-full.yaml b/examples/configs/argo-config-full.yaml index 12499d57..0600cc5b 100644 --- a/examples/configs/argo-config-full.yaml +++ b/examples/configs/argo-config-full.yaml @@ -4,7 +4,7 @@ executor: image: $argo_docker_image max_workflow_duration_in_seconds: 86400 # Apply to spec node_selector: - parallelism: 0 #apply to spec + parallelism: 1 #apply to spec service_account_name: pipeline-runner resources: limits: @@ -45,4 +45,4 @@ run_log_store: # (5) catalog: # (5) type: file-system config: - log_folder: /mnt/catalog # (6) + catalog_location: /mnt/catalog # (6) diff --git a/examples/configs/argo-config.yaml b/examples/configs/argo-config.yaml index 4b508f61..33a27986 100644 --- a/examples/configs/argo-config.yaml +++ b/examples/configs/argo-config.yaml @@ -1,10 +1,10 @@ executor: type: "argo" # (1) config: - image: runnable:demo # (2) + image: harbor.csis.astrazeneca.net/mlops/runnable:latest # (2) service_account_name: default-editor persistent_volumes: # (3) - - name: runnable-volume + - name: magnus-volume mount_path: /mnt run_log_store: # (4) @@ -13,7 +13,9 @@ run_log_store: # (4) log_folder: /mnt/run_log_store catalog: - type: do-nothing + type: file-system + config: + catalog_location: /mnt/catalog -secrets: - type: do-nothing +# secrets: +# type: do-nothing diff --git a/examples/tutorials/mnist/hyper_parameter_tuning.py b/examples/tutorials/mnist/hyper_parameter_tuning.py index 6a150dbf..8e149453 100644 --- a/examples/tutorials/mnist/hyper_parameter_tuning.py +++ b/examples/tutorials/mnist/hyper_parameter_tuning.py @@ -47,7 +47,8 @@ def convert_to_categorically(y_train: np.ndarray, y_test: np.ndarray, num_classe return y_train, y_test -def build_model(train_params: TrainParams, hp: List[int], num_classes: int): +def build_model(train_params: TrainParams, hpt_id: int, hpt: List[List[int]], num_classes: int): + hp = hpt[hpt_id] hp_id = "_".join(map(str, hp)) print(hp_id) @@ -81,7 +82,8 @@ def build_model(train_params: TrainParams, hp: List[int], num_classes: int): model.save(f"model{hp_id}.keras") -def train_model(x_train: np.ndarray, y_train: np.ndarray, train_params: TrainParams, hp: List[int]): +def train_model(x_train: np.ndarray, y_train: np.ndarray, hpt_id: int, train_params: TrainParams, hpt: List[List[int]]): + hp = hpt[hpt_id] hp_id = "_".join(map(str, hp)) model = keras.models.load_model(f"model{hp_id}.keras") model.compile(loss=train_params.loss, optimizer=train_params.optimizer, metrics=train_params.metrics) @@ -97,7 +99,8 @@ def train_model(x_train: np.ndarray, y_train: np.ndarray, train_params: TrainPar model.save(f"trained_model{hp_id}.keras") -def evaluate_model(x_test: np.ndarray, y_test: np.ndarray, hp: List[int]): +def evaluate_model(x_test: np.ndarray, y_test: np.ndarray, hpt: List[List[int]], hpt_id: int): + hp = hpt[hpt_id] hp_id = "_".join(map(str, hp)) trained_model = keras.models.load_model(f"trained_model{hp_id}.keras") @@ -161,8 +164,8 @@ def main(): hpt_step = Map( name="hpt", branch=train_pipeline, - iterate_on="hpt", - iterate_as="hp", + iterate_on="hpt_ids", + iterate_as="hpt_id", reducer="lambda *x: max(x, key=lambda x: x[1])", terminate_with_success=True, ) diff --git a/examples/tutorials/mnist/parameters.yaml b/examples/tutorials/mnist/parameters.yaml index 41974f68..394a08d5 100644 --- a/examples/tutorials/mnist/parameters.yaml +++ b/examples/tutorials/mnist/parameters.yaml @@ -23,6 +23,11 @@ baseline_params: metrics: ['accuracy'] +hpt_ids: + - 0 + - 1 + + hpt: - [16, 32] - [16, 64] From 8e634566ac011387d44d1796858744c1747163bf Mon Sep 17 00:00:00 2001 From: Vijay Vammi Date: Sat, 27 Apr 2024 09:26:44 +0100 Subject: [PATCH 06/17] docs: adding more examples --- .../02-sequential/on_failure_succeed.yaml | 2 +- .../passing_parameters_notebook.yaml | 44 ++++ .../passing_parameters_python.yaml | 39 ++++ .../03-parameters/passing_parameters_shell.py | 2 + .../passing_parameters_shell.yaml | 42 ++++ examples/common/read_parameters_out.ipynb | 22 +- examples/common/simple_notebook_out.ipynb | 20 +- examples/common/write_parameters_out.ipynb | 24 ++- examples/configs/argo-config-full.yaml | 4 +- examples/configs/fs-catalog-run_log.yaml | 5 - examples/configs/fs-catalog.yaml | 2 - examples/configs/fs-run_log.yaml | 2 - examples/configs/secrets-env-default.yaml | 2 - examples/configs/secrets-env-ps.yaml | 4 - runnable/sdk.py | 5 +- runnable/tasks.py | 17 +- tests/test_examples.py | 194 +++++------------- 17 files changed, 230 insertions(+), 200 deletions(-) delete mode 100644 examples/configs/fs-catalog-run_log.yaml delete mode 100644 examples/configs/fs-catalog.yaml delete mode 100644 examples/configs/fs-run_log.yaml delete mode 100644 examples/configs/secrets-env-default.yaml delete mode 100644 examples/configs/secrets-env-ps.yaml diff --git a/examples/02-sequential/on_failure_succeed.yaml b/examples/02-sequential/on_failure_succeed.yaml index 3977e175..50c7c4b6 100644 --- a/examples/02-sequential/on_failure_succeed.yaml +++ b/examples/02-sequential/on_failure_succeed.yaml @@ -31,7 +31,7 @@ dag: next: success step_4: type: stub - next: fail + next: success success: type: success fail: diff --git a/examples/03-parameters/passing_parameters_notebook.yaml b/examples/03-parameters/passing_parameters_notebook.yaml index e69de29b..95a1af99 100644 --- a/examples/03-parameters/passing_parameters_notebook.yaml +++ b/examples/03-parameters/passing_parameters_notebook.yaml @@ -0,0 +1,44 @@ +dag: + description: | + The below example showcases setting up known initial parameters for a pipeline + of only python tasks + + The initial parameters as defined in the yaml file are: + simple: 1 + complex_param: + x: 10 + y: "hello world!!" + + runnable allows using pydantic models for deeply nested parameters and + casts appropriately based on annotation. eg: read_initial_params_as_pydantic + + If no annotation is provided, the parameter is assumed to be a dictionary. + eg: read_initial_params_as_json + start_at: write_parameters_from_notebook + steps: + write_parameters_from_notebook: + type: task + command_type: notebook + command: examples/common/write_parameters.ipynb + returns: + - name: df + kind: object + - name: integer + - name: floater + - name: stringer + - name: pydantic_param + - name: score + next: read_parameters + read_parameters: + type: task + command: examples.common.functions.read_parameter + next: read_parameters_in_notebook + read_parameters_in_notebook: + type: task + command_type: notebook + command: examples/common/read_parameters.ipynb + next: success + success: + type: success + fail: + type: fail diff --git a/examples/03-parameters/passing_parameters_python.yaml b/examples/03-parameters/passing_parameters_python.yaml index e69de29b..9ec23dba 100644 --- a/examples/03-parameters/passing_parameters_python.yaml +++ b/examples/03-parameters/passing_parameters_python.yaml @@ -0,0 +1,39 @@ +dag: + description: | + The below example showcases setting up known initial parameters for a pipeline + of only python tasks + + The initial parameters as defined in the yaml file are: + simple: 1 + complex_param: + x: 10 + y: "hello world!!" + + runnable allows using pydantic models for deeply nested parameters and + casts appropriately based on annotation. eg: read_initial_params_as_pydantic + + If no annotation is provided, the parameter is assumed to be a dictionary. + eg: read_initial_params_as_json + start_at: write_parameters + steps: + write_parameters: + type: task + command: examples.common.functions.write_parameter + returns: + - name: df + kind: object + - name: integer + - name: floater + - name: stringer + - name: pydantic_param + - name: score + + next: read_parameters + read_parameters: + type: task + command: examples.common.functions.read_parameter + next: success + success: + type: success + fail: + type: fail diff --git a/examples/03-parameters/passing_parameters_shell.py b/examples/03-parameters/passing_parameters_shell.py index 3ef14bc2..0d769725 100644 --- a/examples/03-parameters/passing_parameters_shell.py +++ b/examples/03-parameters/passing_parameters_shell.py @@ -28,6 +28,8 @@ def main(): terminate_with_success=True, ) + # There should be read parameters using shell + pipeline = Pipeline( steps=[write_parameters_in_shell, read_parameters], ) diff --git a/examples/03-parameters/passing_parameters_shell.yaml b/examples/03-parameters/passing_parameters_shell.yaml index e69de29b..6fa7b226 100644 --- a/examples/03-parameters/passing_parameters_shell.yaml +++ b/examples/03-parameters/passing_parameters_shell.yaml @@ -0,0 +1,42 @@ +dag: + description: | + The below example showcases setting up known initial parameters for a pipeline + of only python tasks + + The initial parameters as defined in the yaml file are: + simple: 1 + complex_param: + x: 10 + y: "hello world!!" + + runnable allows using pydantic models for deeply nested parameters and + casts appropriately based on annotation. eg: read_initial_params_as_pydantic + + If no annotation is provided, the parameter is assumed to be a dictionary. + eg: read_initial_params_as_json + start_at: write_parameters_in_shell + steps: + write_parameters_in_shell: + type: task + command_type: shell + command: | + export integer=1 + export floater=3.14 + export stringer="hello" + export pydantic_param='{"x": 10, "foo": "bar"}' + export score=0.9 + returns: + - name: integer + - name: floater + - name: stringer + - name: pydantic_param + - name: score + next: read_parameters + read_parameters: + type: task + command: examples.common.functions.read_unpickled_parameter + next: success + success: + type: success + fail: + type: fail diff --git a/examples/common/read_parameters_out.ipynb b/examples/common/read_parameters_out.ipynb index ea6525f8..5e2d4231 100644 --- a/examples/common/read_parameters_out.ipynb +++ b/examples/common/read_parameters_out.ipynb @@ -6,8 +6,8 @@ "id": "41a71aa7", "metadata": { "ploomber": { - "timestamp_end": 1713380823.765499, - "timestamp_start": 1713380823.765069 + "timestamp_end": 1714206086.493554, + "timestamp_start": 1714206086.493198 }, "tags": [ "parameters" @@ -25,11 +25,11 @@ { "cell_type": "code", "execution_count": 2, - "id": "d88d58c6", + "id": "361db089", "metadata": { "ploomber": { - "timestamp_end": 1713380823.765846, - "timestamp_start": 1713380823.765527 + "timestamp_end": 1714206086.493804, + "timestamp_start": 1714206086.493582 }, "tags": [ "injected-parameters" @@ -40,8 +40,8 @@ "# Injected parameters\n", "integer = 1\n", "floater = 3.14\n", - "stringer = \"hello\"\n", - "pydantic_param = {\"x\": 10, \"foo\": \"bar\"}\n" + "pydantic_param = {\"x\": 10, \"foo\": \"bar\"}\n", + "stringer = \"hello\"\n" ] }, { @@ -50,8 +50,8 @@ "id": "3e98e89e-765c-42d4-81ea-c371c2eab14d", "metadata": { "ploomber": { - "timestamp_end": 1713380823.766088, - "timestamp_start": 1713380823.765864 + "timestamp_end": 1714206086.493983, + "timestamp_start": 1714206086.493819 } }, "outputs": [], @@ -67,8 +67,8 @@ "id": "faf6769e", "metadata": { "ploomber": { - "timestamp_end": 1713380823.766474, - "timestamp_start": 1713380823.766105 + "timestamp_end": 1714206086.494241, + "timestamp_start": 1714206086.493998 } }, "outputs": [], diff --git a/examples/common/simple_notebook_out.ipynb b/examples/common/simple_notebook_out.ipynb index 91e156fe..ab595e92 100644 --- a/examples/common/simple_notebook_out.ipynb +++ b/examples/common/simple_notebook_out.ipynb @@ -3,11 +3,11 @@ { "cell_type": "code", "execution_count": 1, - "id": "c8a68d0d", + "id": "630299e0", "metadata": { "ploomber": { - "timestamp_end": 1713380822.228675, - "timestamp_start": 1713380822.228447 + "timestamp_end": 1714206084.990392, + "timestamp_start": 1714206084.988649 }, "tags": [ "injected-parameters" @@ -15,7 +15,11 @@ }, "outputs": [], "source": [ - "# Injected parameters\n" + "# Injected parameters\n", + "integer = 1\n", + "floater = 3.14\n", + "pydantic_param = {\"x\": 10, \"foo\": \"bar\"}\n", + "stringer = \"hello\"\n" ] }, { @@ -24,8 +28,8 @@ "id": "3e98e89e-765c-42d4-81ea-c371c2eab14d", "metadata": { "ploomber": { - "timestamp_end": 1713380822.22899, - "timestamp_start": 1713380822.228748 + "timestamp_end": 1714206084.990557, + "timestamp_start": 1714206084.990414 } }, "outputs": [], @@ -40,8 +44,8 @@ "id": "8eac7a3f", "metadata": { "ploomber": { - "timestamp_end": 1713380822.229158, - "timestamp_start": 1713380822.229008 + "timestamp_end": 1714206084.990685, + "timestamp_start": 1714206084.990571 } }, "outputs": [ diff --git a/examples/common/write_parameters_out.ipynb b/examples/common/write_parameters_out.ipynb index e6c06d4a..3e70245a 100644 --- a/examples/common/write_parameters_out.ipynb +++ b/examples/common/write_parameters_out.ipynb @@ -6,8 +6,8 @@ "id": "41a71aa7", "metadata": { "ploomber": { - "timestamp_end": 1713380822.509565, - "timestamp_start": 1713380822.508958 + "timestamp_end": 1714206085.312398, + "timestamp_start": 1714206085.311976 }, "tags": [ "parameters" @@ -25,11 +25,11 @@ { "cell_type": "code", "execution_count": 2, - "id": "d53507d8", + "id": "1fbaaa0f", "metadata": { "ploomber": { - "timestamp_end": 1713380822.509736, - "timestamp_start": 1713380822.509595 + "timestamp_end": 1714206085.31265, + "timestamp_start": 1714206085.312423 }, "tags": [ "injected-parameters" @@ -37,7 +37,11 @@ }, "outputs": [], "source": [ - "# Injected parameters\n" + "# Injected parameters\n", + "integer = 1\n", + "floater = 3.14\n", + "pydantic_param = {\"x\": 10, \"foo\": \"bar\"}\n", + "stringer = \"hello\"\n" ] }, { @@ -46,8 +50,8 @@ "id": "764f661d", "metadata": { "ploomber": { - "timestamp_end": 1713380822.511416, - "timestamp_start": 1713380822.509754 + "timestamp_end": 1714206085.313101, + "timestamp_start": 1714206085.312666 } }, "outputs": [], @@ -63,8 +67,8 @@ "id": "3e98e89e-765c-42d4-81ea-c371c2eab14d", "metadata": { "ploomber": { - "timestamp_end": 1713380822.511728, - "timestamp_start": 1713380822.51144 + "timestamp_end": 1714206085.313268, + "timestamp_start": 1714206085.313116 } }, "outputs": [], diff --git a/examples/configs/argo-config-full.yaml b/examples/configs/argo-config-full.yaml index 0600cc5b..ced0ab90 100644 --- a/examples/configs/argo-config-full.yaml +++ b/examples/configs/argo-config-full.yaml @@ -5,12 +5,14 @@ executor: max_workflow_duration_in_seconds: 86400 # Apply to spec node_selector: parallelism: 1 #apply to spec - service_account_name: pipeline-runner + service_account_name1: pipeline-runner resources: limits: memory: 140Mi cpu: 100m requests: + memory: 100Mi + cpu: 10m retry_strategy: limit: 0 #int retryPolicy: "always" diff --git a/examples/configs/fs-catalog-run_log.yaml b/examples/configs/fs-catalog-run_log.yaml deleted file mode 100644 index 03487500..00000000 --- a/examples/configs/fs-catalog-run_log.yaml +++ /dev/null @@ -1,5 +0,0 @@ -catalog: - type: file-system # (1) - -run_log_store: - type: file-system # (1) diff --git a/examples/configs/fs-catalog.yaml b/examples/configs/fs-catalog.yaml deleted file mode 100644 index 48597735..00000000 --- a/examples/configs/fs-catalog.yaml +++ /dev/null @@ -1,2 +0,0 @@ -catalog: - type: file-system # (1) diff --git a/examples/configs/fs-run_log.yaml b/examples/configs/fs-run_log.yaml deleted file mode 100644 index 896955d3..00000000 --- a/examples/configs/fs-run_log.yaml +++ /dev/null @@ -1,2 +0,0 @@ -run_log_store: - type: file-system diff --git a/examples/configs/secrets-env-default.yaml b/examples/configs/secrets-env-default.yaml deleted file mode 100644 index 33975b9b..00000000 --- a/examples/configs/secrets-env-default.yaml +++ /dev/null @@ -1,2 +0,0 @@ -secrets: - type: env-secrets-manager diff --git a/examples/configs/secrets-env-ps.yaml b/examples/configs/secrets-env-ps.yaml deleted file mode 100644 index 3aaad3bd..00000000 --- a/examples/configs/secrets-env-ps.yaml +++ /dev/null @@ -1,4 +0,0 @@ -secrets: - type: env-secrets-manager - config: - prefix: "runnable_" diff --git a/runnable/sdk.py b/runnable/sdk.py index d14d8ce6..a506acc7 100644 --- a/runnable/sdk.py +++ b/runnable/sdk.py @@ -25,7 +25,7 @@ from rich.table import Column from typing_extensions import Self -from runnable import console, defaults, entrypoints, graph, utils +from runnable import console, defaults, entrypoints, exceptions, graph, utils from runnable.extensions.nodes import ( FailNode, MapNode, @@ -690,6 +690,7 @@ def execute( parameters_file=parameters_file, ) + print("sdk", configuration_file) run_context.execution_plan = defaults.EXECUTION_PLAN.CHAINED.value utils.set_runnable_environment_variables(run_id=run_id, configuration_file=configuration_file, tag=tag) @@ -734,9 +735,11 @@ def execute( progress.update(pipeline_execution_task, description="[green] Success", completed=True) else: progress.update(pipeline_execution_task, description="[red] Failed", completed=True) + raise exceptions.ExecutionFailedError(run_context.run_id) except Exception as e: # noqa: E722 console.print(e, style=defaults.error_style) progress.update(pipeline_execution_task, description="[red] Errored execution", completed=True) + raise if run_context.executor._local: return run_context.run_log_store.get_run_log_by_id(run_id=run_context.run_id) diff --git a/runnable/tasks.py b/runnable/tasks.py index 3b2403c8..bcdb6d4a 100644 --- a/runnable/tasks.py +++ b/runnable/tasks.py @@ -34,7 +34,7 @@ # TODO: Can we add memory peak, cpu usage, etc. to the metrics? -console = Console(file=io.StringIO()) +task_console = Console(file=io.StringIO()) class TaskReturns(BaseModel): @@ -169,10 +169,11 @@ def execution_context(self, map_variable: TypeMapVariable = None, allow_complex: with contextlib.redirect_stdout(f): # with contextlib.nullcontext(): yield params - print(console.file.getvalue()) # type: ignore + print(task_console.file.getvalue()) # type: ignore except Exception as e: # pylint: disable=broad-except logger.exception(e) finally: + task_console.clear() print(f.getvalue()) # print to console log_file.write(f.getvalue()) # Print to file @@ -245,7 +246,7 @@ def execute_command( logger.info(f"Calling {func} from {module} with {filtered_parameters}") user_set_parameters = f(**filtered_parameters) # This is a tuple or single value except Exception as e: - console.log(e, style=defaults.error_style, markup=False) + task_console.log(e, style=defaults.error_style, markup=False) raise exceptions.CommandCallError(f"Function call: {self.command} did not succeed.\n") from e attempt_log.input_parameters = params.copy() @@ -289,8 +290,8 @@ def execute_command( except Exception as _e: msg = f"Call to the function {self.command} did not succeed.\n" attempt_log.message = msg - console.print_exception(show_locals=False) - console.log(_e, style=defaults.error_style) + task_console.print_exception(show_locals=False) + task_console.log(_e, style=defaults.error_style) attempt_log.end_time = str(datetime.now()) @@ -487,14 +488,14 @@ def execute_command( attempt_log.status = defaults.FAIL attempt_log.end_time = str(datetime.now()) attempt_log.message = msg - console.print(msg, style=defaults.error_style) + task_console.print(msg, style=defaults.error_style) return attempt_log # for stderr for line in result[1].split("\n"): if line.strip() == "": continue - console.print(line, style=defaults.warning_style) + task_console.print(line, style=defaults.warning_style) output_parameters: Dict[str, Parameter] = {} metrics: Dict[str, Parameter] = {} @@ -505,7 +506,7 @@ def execute_command( continue logger.info(line) - console.print(line) + task_console.print(line) if line.strip() == collect_delimiter: # The lines from now on should be captured diff --git a/tests/test_examples.py b/tests/test_examples.py index 9eabf0da..6c38cbe2 100644 --- a/tests/test_examples.py +++ b/tests/test_examples.py @@ -1,29 +1,27 @@ import importlib import os -import subprocess from contextlib import contextmanager, nullcontext -from pathlib import Path import pytest from runnable import exceptions from runnable.entrypoints import execute -# # (file, is_fail?, kwargs) +# # (file, is_fail?) python_examples = [ - ("01-tasks/notebook", False, None), - ("01-tasks/python_tasks", False, None), - ("01-tasks/scripts", False, None), - ("01-tasks/stub", False, None), - ("02-sequential/default_fail", False, None), - ("02-sequential/on_failure_fail", False, None), - ("02-sequential/on_failure_succeed", False, None), - ("02-sequential/traversal", False, None), - ("03-parameters/passing_parameters_notebook", False, None), - ("03-parameters/passing_parameters_python", False, None), - ("03-parameters/passing_parameters_shell", False, None), - ("03-parameters/static_parameters_non_python", False, None), - ("03-parameters/static_parameters_python", False, None), + ("01-tasks/notebook", False), + ("01-tasks/python_tasks", False), + ("01-tasks/scripts", False), + ("01-tasks/stub", False), + ("02-sequential/default_fail", True), + ("02-sequential/on_failure_fail", True), + ("02-sequential/on_failure_succeed", False), + ("02-sequential/traversal", False), + ("03-parameters/passing_parameters_notebook", False), + ("03-parameters/passing_parameters_python", False), + ("03-parameters/passing_parameters_shell", False), + ("03-parameters/static_parameters_non_python", False), + ("03-parameters/static_parameters_python", False), ] @@ -32,13 +30,25 @@ def list_python_examples(): yield example +@contextmanager +def chunked_fs_context(): + os.environ["RUNNABLE_CONFIGURATION_FILE"] = "examples/configs/chunked-fs-run_log.yaml" + yield + del os.environ["RUNNABLE_CONFIGURATION_FILE"] + + +contexts = [None, chunked_fs_context] +configurations = [None, "examples/configs/chunked-fs-run_log.yaml"] + + @pytest.mark.parametrize("example", list_python_examples()) -# @pytest.mark.no_cover +@pytest.mark.parametrize("context", contexts) +@pytest.mark.no_cover @pytest.mark.e2e -def test_python_examples(example): +def test_python_examples(example, context): print(f"Testing {example}...") - mod, status, context = example + mod, status = example if not context: context = nullcontext() @@ -47,135 +57,29 @@ def test_python_examples(example): imported_module = importlib.import_module(f"examples.{mod.replace('/', '.')}") f = getattr(imported_module, "main") - try: - with context: + with context: + try: f() + except exceptions.ExecutionFailedError: + print("Example failed") + if not status: + raise + + +@pytest.mark.parametrize("example", list_python_examples()) +@pytest.mark.parametrize("configuration", configurations) +@pytest.mark.no_cover +@pytest.mark.e2e +def test_yaml_examples(example, configuration): + print(f"Testing {example}...") + file, status = example + example_file = f"examples/{file}.yaml" + parameters_file = "examples/common/initial_parameters.yaml" + try: + execute(configuration_file=configuration, pipeline_file=example_file, parameters_file=parameters_file) except exceptions.ExecutionFailedError: if not status: raise -# examples = [ -# ("concepts/catalog.yaml", False, {"configuration_file": "examples/configs/fs-catalog.yaml"}), -# ("concepts/map.yaml", False, {}), -# ("concepts/map_shell.yaml", False, {}), -# ("concepts/nesting.yaml", False, {}), -# ("concepts/notebook_native_parameters.yaml", False, {"parameters_file": "examples/concepts/parameters.yaml"}), -# ("concepts/parallel.yaml", False, {}), -# ("concepts/simple_notebook.yaml", False, {}), -# ("concepts/simple.yaml", False, {}), -# ("catalog.yaml", False, {"configuration_file": "examples/configs/fs-catalog.yaml"}), -# ("default-fail.yaml", True, {}), -# ("on-failure.yaml", False, {}), -# ("parallel-fail.yaml", True, {}), -# ] - - -# def list_examples(): -# for example in examples: -# yield example - - -# @pytest.mark.parametrize("example", list_examples()) -# @pytest.mark.no_cover -# @pytest.mark.e2e -# def test_yaml_examples(example): -# print(f"Testing {example}...") -# examples_path = Path("examples") -# file_path, status, kwargs = example -# try: -# full_file_path = examples_path / file_path -# configuration_file = kwargs.pop("configuration_file", "") -# execute(configuration_file=configuration_file, pipeline_file=str(full_file_path.resolve()), **kwargs) -# except exceptions.ExecutionFailedError: -# if not status: -# raise - - -# @pytest.mark.parametrize("example", list_examples()) -# @pytest.mark.no_cover -# @pytest.mark.e2e -# def test_yaml_examples_argo(example): -# print(f"Testing {example}...") -# examples_path = Path("examples") -# file_path, status, kwargs = example -# try: -# full_file_path = examples_path / file_path -# kwargs.pop("configuration_file", "") -# configuration_file = "examples/configs/argo-config.yaml" -# execute(configuration_file=configuration_file, pipeline_file=str(full_file_path.resolve()), **kwargs) -# subprocess.run(["argo", "lint", "--offline", "argo-pipeline.yaml"], check=True) -# except exceptions.ExecutionFailedError: -# if not status: -# raise - - -# @pytest.mark.parametrize("example", list_examples()) -# @pytest.mark.no_cover -# @pytest.mark.e2e_container -# def test_yaml_examples_container(example): -# print(f"Testing {example}...") -# examples_path = Path("examples") -# file_path, status, kwargs = example -# try: -# full_file_path = examples_path / file_path -# kwargs.pop("configuration_file", "") -# configuration_file = "examples/configs/local-container.yaml" -# os.environ["runnable_VAR_default_docker_image"] = "runnable:3.8" -# execute(configuration_file=configuration_file, pipeline_file=str(full_file_path), **kwargs) -# except exceptions.ExecutionFailedError: -# if not status: -# raise - - -# @contextmanager -# def secrets_env_context(): -# os.environ["secret"] = "secret_value" -# os.environ["runnable_CONFIGURATION_FILE"] = "examples/configs/secrets-env-default.yaml" -# yield -# del os.environ["secret"] -# del os.environ["runnable_CONFIGURATION_FILE"] - - -# # function, success, context -# python_examples = [ -# ("catalog", False, None), -# ("catalog_simple", False, None), -# ("mocking", False, None), -# ("on_failure", False, None), -# ("parameters", False, None), -# ("parameters_simple", False, None), -# ("concepts.catalog", False, None), -# ("concepts.map", False, None), -# ("concepts.nesting", False, None), -# ("concepts.parallel", False, None), -# ("concepts.simple", False, None), -# ] - - -# def list_python_examples(): -# for example in python_examples: -# yield example - - -# @pytest.mark.parametrize("example", list_python_examples()) -# @pytest.mark.no_cover -# @pytest.mark.e2e -# def test_python_examples(example): -# print(f"Testing {example}...") - -# mod, status, context = example - -# if not context: -# context = nullcontext() -# else: -# context = context() - -# imported_module = importlib.import_module(f"examples.{mod}") -# f = getattr(imported_module, "main") -# try: -# with context: -# f() -# except exceptions.ExecutionFailedError: -# if not status: -# raise +# TODO: Need to test argo and local container From 311511a84af5028b3abffecdc1c4be199a18475a Mon Sep 17 00:00:00 2001 From: Vijay Vammi Date: Tue, 30 Apr 2024 09:03:51 +0100 Subject: [PATCH 07/17] fix: minor changes to API --- .../passing_parameters_notebook.py | 12 ++ .../passing_parameters_notebook.yaml | 18 +- .../passing_parameters_python.yaml | 21 +- .../03-parameters/passing_parameters_shell.py | 33 ++- .../passing_parameters_shell.yaml | 32 ++- .../03-parameters/static_parameters_python.py | 7 + examples/04-catalog/catalog.py | 128 +++++++++++ examples/04-catalog/catalog.yaml | 116 ++++++++++ examples/06-parallel/parallel.py | 66 ++++++ examples/06-parallel/parallel.yaml | 46 ++++ examples/06-parallel/traversal.py | 41 ++++ examples/README.md | 42 +++- examples/common/functions.py | 24 +++ examples/common/read_files.ipynb | 43 ++++ examples/common/read_parameters_out.ipynb | 105 --------- examples/common/simple_notebook_out.ipynb | 20 +- examples/common/write_parameters_out.ipynb | 104 --------- pyproject.toml | 1 - runnable/entrypoints.py | 8 +- runnable/extensions/nodes.py | 2 - .../secrets/env_secrets/__init__.py | 0 .../secrets/env_secrets/implementation.py | 42 ---- runnable/sdk.py | 4 +- runnable/tasks.py | 203 +++++++++--------- .../secrets/test_env_secrets_manager.py | 48 ----- tests/test_examples.py | 38 ++-- 26 files changed, 728 insertions(+), 476 deletions(-) create mode 100644 examples/04-catalog/catalog.py create mode 100644 examples/04-catalog/catalog.yaml create mode 100644 examples/06-parallel/parallel.py create mode 100644 examples/06-parallel/parallel.yaml create mode 100644 examples/06-parallel/traversal.py create mode 100644 examples/common/read_files.ipynb delete mode 100644 examples/common/read_parameters_out.ipynb delete mode 100644 examples/common/write_parameters_out.ipynb delete mode 100644 runnable/extensions/secrets/env_secrets/__init__.py delete mode 100644 runnable/extensions/secrets/env_secrets/implementation.py delete mode 100644 tests/runnable/extensions/secrets/test_env_secrets_manager.py diff --git a/examples/03-parameters/passing_parameters_notebook.py b/examples/03-parameters/passing_parameters_notebook.py index ecb00783..ac8220f5 100644 --- a/examples/03-parameters/passing_parameters_notebook.py +++ b/examples/03-parameters/passing_parameters_notebook.py @@ -1,3 +1,15 @@ +""" +Demonstrates passing parameters to and from a notebook. + +We can extract json, pydantic, objects from notebook. +eg: write_parameters_from_notebook + +But can only inject json type parameters to a notebook. +eg: read_parameters_in_notebook +pydantic parameters are injected as dict. + +""" + from examples.common.functions import read_parameter from runnable import NotebookTask, Pipeline, PythonTask, metric, pickled diff --git a/examples/03-parameters/passing_parameters_notebook.yaml b/examples/03-parameters/passing_parameters_notebook.yaml index 95a1af99..d62aad95 100644 --- a/examples/03-parameters/passing_parameters_notebook.yaml +++ b/examples/03-parameters/passing_parameters_notebook.yaml @@ -1,19 +1,13 @@ dag: description: | - The below example showcases setting up known initial parameters for a pipeline - of only python tasks + Demonstrates passing parameters to and from a notebook. - The initial parameters as defined in the yaml file are: - simple: 1 - complex_param: - x: 10 - y: "hello world!!" + We can extract json, pydantic, objects from notebook. + eg: write_parameters_from_notebook - runnable allows using pydantic models for deeply nested parameters and - casts appropriately based on annotation. eg: read_initial_params_as_pydantic - - If no annotation is provided, the parameter is assumed to be a dictionary. - eg: read_initial_params_as_json + But can only inject json type parameters to a notebook. + eg: read_parameters_in_notebook + pydantic parameters are injected as dict. start_at: write_parameters_from_notebook steps: write_parameters_from_notebook: diff --git a/examples/03-parameters/passing_parameters_python.yaml b/examples/03-parameters/passing_parameters_python.yaml index 9ec23dba..7c66763f 100644 --- a/examples/03-parameters/passing_parameters_python.yaml +++ b/examples/03-parameters/passing_parameters_python.yaml @@ -1,19 +1,18 @@ dag: description: | - The below example showcases setting up known initial parameters for a pipeline - of only python tasks + The below example shows how to set/get parameters in python + tasks of the pipeline. - The initial parameters as defined in the yaml file are: - simple: 1 - complex_param: - x: 10 - y: "hello world!!" + The function, set_parameter, returns + - simple python data types (int, float, str) + - pydantic models + - pandas dataframe, any "object" type - runnable allows using pydantic models for deeply nested parameters and - casts appropriately based on annotation. eg: read_initial_params_as_pydantic + pydantic models are implicitly handled by runnable + but "object" types should be marked as "pickled". - If no annotation is provided, the parameter is assumed to be a dictionary. - eg: read_initial_params_as_json + Use pickled even for python data types is advised for + reasonably large collections. start_at: write_parameters steps: write_parameters: diff --git a/examples/03-parameters/passing_parameters_shell.py b/examples/03-parameters/passing_parameters_shell.py index 0d769725..5e0ed3a5 100644 --- a/examples/03-parameters/passing_parameters_shell.py +++ b/examples/03-parameters/passing_parameters_shell.py @@ -1,3 +1,15 @@ +""" +Demonstrates passing parameters to and from shell scripts. + +We can extract only json style parameters from shell scripts. +eg: write_parameters_in_shell + +We can only read json style parameters from shell scripts. +eg: read_parameters_in_shell +pydantic parameters are injected as json. + +""" + from examples.common.functions import read_unpickled_parameter from runnable import Pipeline, PythonTask, ShellTask, metric @@ -25,13 +37,28 @@ def main(): read_parameters = PythonTask( function=read_unpickled_parameter, name="read_parameters", - terminate_with_success=True, ) - # There should be read parameters using shell + read_parameters_command = """ + if [ "$integer" = 1 ] \ + && [ "$floater" = 3.14 ] \ + && [ "$stringer" = "hello" ] \ + && [ "$pydantic_param" = '{"x": 10, "foo": "bar"}' ]; then + echo "yaay" + exit 0; + else + echo "naay" + exit 1; + fi + """ + read_parameters_in_shell = ShellTask( + name="read_parameters_in_shell", + command=read_parameters_command, + terminate_with_success=True, + ) pipeline = Pipeline( - steps=[write_parameters_in_shell, read_parameters], + steps=[write_parameters_in_shell, read_parameters, read_parameters_in_shell], ) _ = pipeline.execute() diff --git a/examples/03-parameters/passing_parameters_shell.yaml b/examples/03-parameters/passing_parameters_shell.yaml index 6fa7b226..63623f32 100644 --- a/examples/03-parameters/passing_parameters_shell.yaml +++ b/examples/03-parameters/passing_parameters_shell.yaml @@ -1,19 +1,14 @@ dag: description: | - The below example showcases setting up known initial parameters for a pipeline - of only python tasks + Demonstrates passing parameters to and from shell scripts. - The initial parameters as defined in the yaml file are: - simple: 1 - complex_param: - x: 10 - y: "hello world!!" + We can extract only json style parameters from shell scripts. + eg: write_parameters_in_shell - runnable allows using pydantic models for deeply nested parameters and - casts appropriately based on annotation. eg: read_initial_params_as_pydantic + We can only read json style parameters from shell scripts. + eg: read_parameters_in_shell + pydantic parameters are injected as json. - If no annotation is provided, the parameter is assumed to be a dictionary. - eg: read_initial_params_as_json start_at: write_parameters_in_shell steps: write_parameters_in_shell: @@ -35,6 +30,21 @@ dag: read_parameters: type: task command: examples.common.functions.read_unpickled_parameter + next: read_parameters_in_shell + read_parameters_in_shell: + type: task + command_type: shell + command: | + if [ "$integer" = 1 ] \ + && [ "$floater" = 3.14 ] \ + && [ "$stringer" = "hello" ] \ + && [ "$pydantic_param" = '{"x": 10, "foo": "bar"}' ]; then + echo "yaay" + exit 0; + else + echo "naay" + exit 1; + fi next: success success: type: success diff --git a/examples/03-parameters/static_parameters_python.py b/examples/03-parameters/static_parameters_python.py index 7c82ea3a..edf2028e 100644 --- a/examples/03-parameters/static_parameters_python.py +++ b/examples/03-parameters/static_parameters_python.py @@ -14,8 +14,13 @@ If no annotation is provided, the parameter is assumed to be a dictionary. eg: read_initial_params_as_json +You can set the initial parameters from environment variables as well. +eg: Any environment variable prefixed by "RUNNABLE_PRM_" will be picked up by runnable + """ +import os + from examples.common.functions import ( read_initial_params_as_json, read_initial_params_as_pydantic, @@ -46,4 +51,6 @@ def main(): if __name__ == "__main__": + # Any parameter prefixed by "RUNNABLE_PRM_" will be picked up by runnable + os.environ["RUNNABLE_PRM_envvar"] = "from env" main() diff --git a/examples/04-catalog/catalog.py b/examples/04-catalog/catalog.py new file mode 100644 index 00000000..68578dad --- /dev/null +++ b/examples/04-catalog/catalog.py @@ -0,0 +1,128 @@ +""" +Demonstrates moving files within tasks. + +- generate_data: creates df.csv and data_folder/data.txt + +- delete_local_after_generate: deletes df.csv and data_folder/data.txt + This step ensures that the local files are deleted after the step + +- read_data_py: reads df.csv and data_folder/data.txt + +- delete_local_after_python_get: deletes df.csv and data_folder/data.txt + This step ensures that the local files are deleted after the step + +- read_data_shell: reads df.csv and data_folder/data.txt + +- delete_local_after_shell_get: deletes df.csv and data_folder/data.txt + This step ensures that the local files are deleted after the step + +- read_data_notebook: reads df.csv and data_folder/data.txt + +- delete_local_after_notebook_get: deletes df.csv and data_folder/data.txt + +Use this pattern to move files that are not dill friendly. + +All the files are stored in catalog. + +.catalog +└── silly-joliot-0610 + ├── data_folder + │   └── data.txt + ├── deleteaftergenerate.execution.log + ├── deleteaftergeneratenotebook.execution.log + ├── deleteaftergeneratepython.execution.log + ├── deleteaftergenerateshell.execution.log + ├── df.csv + ├── examples + │   └── common + │   └── read_files_out.ipynb + ├── generatedata.execution.log + ├── readdatanotebook.execution.log + ├── readdatapy.execution.log + └── readdatashell.execution.log + +5 directories, 11 files + +""" + +from examples.common.functions import read_files, write_files +from runnable import Catalog, NotebookTask, Pipeline, PythonTask, ShellTask + + +def main(): + write_catalog = Catalog(put=["df.csv", "data_folder/data.txt"]) + generate_data = PythonTask( + name="generate_data", + function=write_files, + catalog=write_catalog, + ) + + delete_files_command = """ + rm df.csv && \ + rm data_folder/data.txt + """ + # delete from local files after generate + # since its local catalog, we delete to show "get from catalog" + delete_local_after_generate = ShellTask( + name="delete_after_generate", + command=delete_files_command, + ) + + read_catalog = Catalog(get=["df.csv", "data_folder/data.txt"]) + read_data_python = PythonTask( + name="read_data_py", + function=read_files, + catalog=read_catalog, + ) + + delete_local_after_python_get = ShellTask( + name="delete_after_generate_python", + command=delete_files_command, + ) + + read_data_shell_command = """ + (ls df.csv >> /dev/null 2>&1 && echo yes) || exit 1 && \ + (ls data_folder/data.txt >> /dev/null 2>&1 && echo yes) || exit 1 + """ + read_data_shell = ShellTask( + name="read_data_shell", + command=read_data_shell_command, + catalog=read_catalog, + ) + + delete_local_after_shell_get = ShellTask( + name="delete_after_generate_shell", + command=delete_files_command, + ) + + read_data_notebook = NotebookTask( + notebook="examples/common/read_files.ipynb", + name="read_data_notebook", + catalog=read_catalog, + ) + + delete_local_after_notebook_get = ShellTask( + name="delete_after_generate_notebook", + command=delete_files_command, + terminate_with_success=True, + ) + + pipeline = Pipeline( + steps=[ + generate_data, + delete_local_after_generate, + read_data_python, + delete_local_after_python_get, + read_data_shell, + delete_local_after_shell_get, + read_data_notebook, + delete_local_after_notebook_get, + ] + ) + _ = pipeline.execute() + + return pipeline + + +if __name__ == "__main__": + main() diff --git a/examples/04-catalog/catalog.yaml b/examples/04-catalog/catalog.yaml new file mode 100644 index 00000000..8fe69580 --- /dev/null +++ b/examples/04-catalog/catalog.yaml @@ -0,0 +1,116 @@ +dag: + description: | + Demonstrates moving files within tasks. + + - generate_data: creates df.csv and data_folder/data.txt + + - delete_local_after_generate: deletes df.csv and data_folder/data.txt + This step ensures that the local files are deleted after the step + + - read_data_py: reads df.csv and data_folder/data.txt + + - delete_local_after_python_get: deletes df.csv and data_folder/data.txt + This step ensures that the local files are deleted after the step + + - read_data_shell: reads df.csv and data_folder/data.txt + + - delete_local_after_shell_get: deletes df.csv and data_folder/data.txt + This step ensures that the local files are deleted after the step + + - read_data_notebook: reads df.csv and data_folder/data.txt + + - delete_local_after_notebook_get: deletes df.csv and data_folder/data.txt + + Use this pattern to move files that are not dill friendly. + + All the files are stored in catalog. + + .catalog + └── silly-joliot-0610 + ├── data_folder + │   └── data.txt + ├── deleteaftergenerate.execution.log + ├── deleteaftergeneratenotebook.execution.log + ├── deleteaftergeneratepython.execution.log + ├── deleteaftergenerateshell.execution.log + ├── df.csv + ├── examples + │   └── common + │   └── read_files_out.ipynb + ├── generatedata.execution.log + ├── readdatanotebook.execution.log + ├── readdatapy.execution.log + └── readdatashell.execution.log + + 5 directories, 11 files + start_at: generate_data + steps: + generate_data: + type: task + command: examples.common.functions.write_files + catalog: + put: + - df.csv + - data_folder/data.txt + next: delete_files_after_generate + delete_files_after_generate: + type: task + command_type: shell + command: | + rm df.csv && \ + rm data_folder/data.txt + next: read_data_python + read_data_python: + type: task + command_type: python + command: examples.common.functions.read_files + catalog: + get: + - df.csv + - data_folder/data.txt + next: delete_local_after_python_get + delete_local_after_python_get: + type: task + command_type: shell + command: | + rm df.csv && \ + rm data_folder/data.txt + next: read_data_shell + read_data_shell: + type: task + command_type: shell + command: | + (ls df.csv >> /dev/null 2>&1 && echo yes) || exit 1 && \ + (ls data_folder/data.txt >> /dev/null 2>&1 && echo yes) || exit 1 + catalog: + get: + - df.csv + - data_folder/data.txt + next: delete_local_after_shell_get + delete_local_after_shell_get: + type: task + command_type: shell + command: | + rm df.csv && \ + rm data_folder/data.txt + next: read_data_notebook + read_data_notebook: + type: task + command_type: notebook + command: "examples/common/read_files.ipynb" + catalog: + get: + - df.csv + - data_folder/data.txt + next: delete_local_after_notebook_get + delete_local_after_notebook_get: + type: task + command_type: shell + command: | + rm df.csv && \ + rm data_folder/data.txt + next: success + success: + type: success + fail: + type: fail diff --git a/examples/06-parallel/parallel.py b/examples/06-parallel/parallel.py new file mode 100644 index 00000000..3d26365e --- /dev/null +++ b/examples/06-parallel/parallel.py @@ -0,0 +1,66 @@ +""" +This example demonstrates the use of the Parallel step. + +The branches of the parallel step are themselves pipelines and can be defined +as shown in 02-sequential/traversal.py. + +WARNING, the function returning the pipeline should not executed +during the definition of the branch in parallel steps. +""" + +from examples.common.functions import hello +from runnable import NotebookTask, Parallel, Pipeline, PythonTask, ShellTask, Stub + + +def traversal(execute: bool = True): + """ + Use the pattern of using "execute" to control the execution of the pipeline. + + The same pipeline can be run independently from the command line. + + WARNING: If the execution is not controlled by "execute", the pipeline will be executed + even during the definition of the branch in parallel steps. + """ + stub_task = Stub(name="hello stub") + + python_task = PythonTask( + name="hello python", + function=hello, + ) + + shell_task = ShellTask( + name="hello shell", + command="echo 'Hello World!'", + ) + + notebook_task = NotebookTask( + name="hello notebook", + notebook="examples/common/simple_notebook.ipynb", + terminate_with_success=True, + ) + + # The pipeline has a mix of tasks. + # The order of execution follows the order of the tasks in the list. + pipeline = Pipeline(steps=[stub_task, python_task, shell_task, notebook_task]) + + if execute: # Do not execute the pipeline if we are using it as a branch + pipeline.execute() + + return pipeline + + +def main(): + parallel_step = Parallel( + name="parallel step", + terminate_with_success=True, + branches={"branch1": traversal(execute=False), "branch2": traversal(execute=False)}, + ) + + pipeline = Pipeline(steps=[parallel_step]) + + pipeline.execute() + return pipeline + + +if __name__ == "__main__": + main() diff --git a/examples/06-parallel/parallel.yaml b/examples/06-parallel/parallel.yaml new file mode 100644 index 00000000..f91b2b5b --- /dev/null +++ b/examples/06-parallel/parallel.yaml @@ -0,0 +1,46 @@ +# This example demonstrates the use of the Parallel step. + +# The branches of the parallel step are themselves pipelines and can be defined +# as shown in 02-sequential/traversal.yaml +branch: &branch + description: | + Use this pattern to define repeatable branch + start_at: hello stub + steps: + hello stub: + type: stub + next: hello python + hello python: + type: task + command_type: python + command: examples.common.functions.hello # dotted path to the function. + next: hello shell + hello shell: + type: task + command_type: shell + command: echo "Hello World!" # Command to run + next: hello notebook + hello notebook: + type: task + command_type: notebook + command: examples/common/simple_notebook.ipynb # The path is relative to the root of the project. + next: success + success: + type: success + fail: + type: fail + + +dag: + start_at: parallel_step + steps: + parallel_step: + type: parallel + next: success + branches: + branch1: *branch + branch2: *branch + success: + type: success + failure: + type: fail diff --git a/examples/06-parallel/traversal.py b/examples/06-parallel/traversal.py new file mode 100644 index 00000000..916babcd --- /dev/null +++ b/examples/06-parallel/traversal.py @@ -0,0 +1,41 @@ +""" +This pipeline is same as the one seen in 02-sequential/traversal.py. + +Given the naming convention used, we cannot import it directly. + +""" + +from examples.common.functions import hello +from runnable import NotebookTask, Pipeline, PythonTask, ShellTask, Stub + + +def main(): + stub_task = Stub(name="hello stub") + + python_task = PythonTask( + name="hello python", + function=hello, + ) + + shell_task = ShellTask( + name="hello shell", + command="echo 'Hello World!'", + ) + + notebook_task = NotebookTask( + name="hello notebook", + notebook="examples/common/simple_notebook.ipynb", + terminate_with_success=True, + ) + + # The pipeline has a mix of tasks. + # The order of execution follows the order of the tasks in the list. + pipeline = Pipeline(steps=[stub_task, python_task, shell_task, notebook_task]) + + pipeline.execute() + + return pipeline + + +if __name__ == "__main__": + main() diff --git a/examples/README.md b/examples/README.md index 476478ae..67c4719d 100644 --- a/examples/README.md +++ b/examples/README.md @@ -4,7 +4,7 @@ All examples have both python SDK and yaml representations. Please use this as an index to find specific example. -- common: Has python functions/notebooks/scripts that are used across the examples +- [common](./common/): Has python functions/notebooks/scripts that are used across the examples - 01-tasks: Examples of the tasks that can be part of the pipeline. @@ -20,8 +20,9 @@ Please use this as an index to find specific example. --- -The above examples showcase executable units of the pipeline. -The next section has examples on stitching these tasks together for complex operations. +This section has examples on stitching these tasks together for complex operations. +We only show sequential pipeline while parallel and dynamic pipelines are +shown in later sections. - 02-sequential: Examples of stitching tasks together including behavior in case of failures. @@ -34,8 +35,8 @@ The next section has examples on stitching these tasks together for complex oper --- -The above examples show stitching complex operations of the pipeline. -The next section has examples on communicating between tasks during execution. +This section has examples on communicating between tasks during execution. +We only focusses on "parameters" while the next section focusses on "files". - 03: Examples of passing parameters between tasks of a pipeline. @@ -51,12 +52,39 @@ The next section has examples on communicating between tasks during execution. - [static_parameters_python.py](./03-parameters/static_parameters_python.py), [static_parameters_python.yaml](./03-parameters/static_parameters_python.yaml): A pipeline to show the access of static or known parameters by python tasks. + Any environment variables prefixed by RUNNABLE_PRM_ are recognized as parameters and + can override parameters defined by the file. + - [static_parameters_non_python.py](./03-parameters/static_parameters_non_python.py), [static_parameters_non_python.yaml](./03-parameters/static_parameters_non_python.yaml): A pipeline to show the access of static or known parameters by python tasks. + Any environment variables prefixed by RUNNABLE_PRM_ are recognized as parameters and + can override parameters defined by the file. + - [passing_parameters_python.py](./03-parameters/passing_parameters_python.py), [passing_parameters_python.yaml](./03-parameters/passing_parameters_python.yaml): shows the mechanism of passing parameters (simple python datatypes, objects, pydantic models) and registering metrics between python tasks. - [passing_parameters_notebook.py](./03-parameters/passing_parameters_notebook.py), [passing_parameters_notebook.yaml](./03-parameters/passing_parameters_notebook.yaml): shows the mechanism of passing parameters between notebook tasks. Please note that - we cannot inject pydantic models or objects into the notebook. + we cannot inject pydantic models or objects into the notebook but can capture them + as return values. - [passing_parameters_shell.py](./03-parameters/passing_parameters_shell.py), [passing_parameters_shell.yaml](./03-parameters/passing_parameters_shell.yaml): shows the mechanism of passing parameters between shell tasks. Please note that - we cannot inject pydantic models or objects into shells. + we cannot inject/capture pydantic models or objects in shells. + +--- + +This section focusses on moving files between tasks. + +- 04: Examples of moving files between tasks of the pipeline. + + - [catalog.py](./04-catalog/catalog.py), [catalog.yaml](./04-catalog/catalog.yaml): demonstrate moving files between python, shell and notebook tasks. + +--- + +This section focusses on exposing secrets to tasks. All secrets are exposed as environment +variables. The secrets are destroyed after the completion of the task. + + +--- + +Below are the examples of constructing parallel graphs and nested graphs. + +Creating parallel graphs is simple as the branches are themselves pipelines. diff --git a/examples/common/functions.py b/examples/common/functions.py index 36eb3d9a..4ca9803e 100644 --- a/examples/common/functions.py +++ b/examples/common/functions.py @@ -1,3 +1,4 @@ +from pathlib import Path from typing import Dict, Union import pandas as pd @@ -24,12 +25,14 @@ def read_initial_params_as_pydantic( floater: float, stringer: str, pydantic_param: ComplexParams, + envvar: str, ): assert integer == 1 assert floater == 3.14 assert stringer == "hello" assert pydantic_param.x == 10 assert pydantic_param.foo == "bar" + assert envvar == "from env" def read_initial_params_as_json( @@ -87,3 +90,24 @@ def read_unpickled_parameter( assert pydantic_param.x == 10 assert pydantic_param.foo == "bar" assert score == 0.9 + + +def write_files(): + data = {"calories": [420, 380, 390], "duration": [50, 40, 45]} + df = pd.DataFrame(data) + + df.to_csv("df.csv", index=False) + + Path("data_folder").mkdir(parents=True, exist_ok=True) + with open("data_folder/data.txt", "w", encoding="utf-8") as f: + f.write("hello world") + + +def read_files(): + df = pd.read_csv("df.csv") + assert df.shape == (3, 2) + + with open("data_folder/data.txt", "r", encoding="utf-8") as f: + data = f.read() + + assert data.strip() == "hello world" diff --git a/examples/common/read_files.ipynb b/examples/common/read_files.ipynb new file mode 100644 index 00000000..a9093c14 --- /dev/null +++ b/examples/common/read_files.ipynb @@ -0,0 +1,43 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "41a71aa7", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "\n", + "df = pd.read_csv(\"df.csv\")\n", + "assert df.shape == (3, 2)\n", + "\n", + "with open(\"data_folder/data.txt\", \"r\", encoding=\"utf-8\") as f:\n", + " data = f.read()\n", + "\n", + "assert data.strip() == \"hello world\"" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/common/read_parameters_out.ipynb b/examples/common/read_parameters_out.ipynb deleted file mode 100644 index 5e2d4231..00000000 --- a/examples/common/read_parameters_out.ipynb +++ /dev/null @@ -1,105 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "41a71aa7", - "metadata": { - "ploomber": { - "timestamp_end": 1714206086.493554, - "timestamp_start": 1714206086.493198 - }, - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "integer = None\n", - "stringer = None\n", - "floater = None\n", - "pydantic_param = None\n", - "score = None" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "361db089", - "metadata": { - "ploomber": { - "timestamp_end": 1714206086.493804, - "timestamp_start": 1714206086.493582 - }, - "tags": [ - "injected-parameters" - ] - }, - "outputs": [], - "source": [ - "# Injected parameters\n", - "integer = 1\n", - "floater = 3.14\n", - "pydantic_param = {\"x\": 10, \"foo\": \"bar\"}\n", - "stringer = \"hello\"\n" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "3e98e89e-765c-42d4-81ea-c371c2eab14d", - "metadata": { - "ploomber": { - "timestamp_end": 1714206086.493983, - "timestamp_start": 1714206086.493819 - } - }, - "outputs": [], - "source": [ - "assert integer == 1\n", - "assert stringer == \"hello\"\n", - "assert floater == 3.14" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "faf6769e", - "metadata": { - "ploomber": { - "timestamp_end": 1714206086.494241, - "timestamp_start": 1714206086.493998 - } - }, - "outputs": [], - "source": [ - "from examples.common.functions import ComplexParams\n", - "\n", - "pydantic_param = ComplexParams(**pydantic_param)\n", - "assert pydantic_param.x == 10\n", - "assert pydantic_param.foo == \"bar\"" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.16" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/examples/common/simple_notebook_out.ipynb b/examples/common/simple_notebook_out.ipynb index ab595e92..e3562adc 100644 --- a/examples/common/simple_notebook_out.ipynb +++ b/examples/common/simple_notebook_out.ipynb @@ -3,11 +3,11 @@ { "cell_type": "code", "execution_count": 1, - "id": "630299e0", + "id": "bd34d156", "metadata": { "ploomber": { - "timestamp_end": 1714206084.990392, - "timestamp_start": 1714206084.988649 + "timestamp_end": 1714453073.951735, + "timestamp_start": 1714453073.951505 }, "tags": [ "injected-parameters" @@ -15,11 +15,7 @@ }, "outputs": [], "source": [ - "# Injected parameters\n", - "integer = 1\n", - "floater = 3.14\n", - "pydantic_param = {\"x\": 10, \"foo\": \"bar\"}\n", - "stringer = \"hello\"\n" + "# Injected parameters\n" ] }, { @@ -28,8 +24,8 @@ "id": "3e98e89e-765c-42d4-81ea-c371c2eab14d", "metadata": { "ploomber": { - "timestamp_end": 1714206084.990557, - "timestamp_start": 1714206084.990414 + "timestamp_end": 1714453073.951955, + "timestamp_start": 1714453073.95176 } }, "outputs": [], @@ -44,8 +40,8 @@ "id": "8eac7a3f", "metadata": { "ploomber": { - "timestamp_end": 1714206084.990685, - "timestamp_start": 1714206084.990571 + "timestamp_end": 1714453073.952089, + "timestamp_start": 1714453073.951969 } }, "outputs": [ diff --git a/examples/common/write_parameters_out.ipynb b/examples/common/write_parameters_out.ipynb deleted file mode 100644 index 3e70245a..00000000 --- a/examples/common/write_parameters_out.ipynb +++ /dev/null @@ -1,104 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "41a71aa7", - "metadata": { - "ploomber": { - "timestamp_end": 1714206085.312398, - "timestamp_start": 1714206085.311976 - }, - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "import pandas as pd\n", - "\n", - "from examples.common.functions import ComplexParams\n", - "\n", - "pydantic_param = ComplexParams(x=10, foo=\"bar\")\n" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "1fbaaa0f", - "metadata": { - "ploomber": { - "timestamp_end": 1714206085.31265, - "timestamp_start": 1714206085.312423 - }, - "tags": [ - "injected-parameters" - ] - }, - "outputs": [], - "source": [ - "# Injected parameters\n", - "integer = 1\n", - "floater = 3.14\n", - "pydantic_param = {\"x\": 10, \"foo\": \"bar\"}\n", - "stringer = \"hello\"\n" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "764f661d", - "metadata": { - "ploomber": { - "timestamp_end": 1714206085.313101, - "timestamp_start": 1714206085.312666 - } - }, - "outputs": [], - "source": [ - "data = {\"calories\": [420, 380, 390], \"duration\": [50, 40, 45]}\n", - "\n", - "df = pd.DataFrame(data)" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "3e98e89e-765c-42d4-81ea-c371c2eab14d", - "metadata": { - "ploomber": { - "timestamp_end": 1714206085.313268, - "timestamp_start": 1714206085.313116 - } - }, - "outputs": [], - "source": [ - "integer = 1\n", - "floater = 3.14\n", - "stringer = \"hello\"\n", - "score = 0.9" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.16" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/pyproject.toml b/pyproject.toml index 6781373a..86726c1c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -97,7 +97,6 @@ runnable = 'runnable.cli:cli' [tool.poetry.plugins."secrets"] "do-nothing" = "runnable.secrets:DoNothingSecretManager" "dotenv" = "runnable.extensions.secrets.dotenv.implementation:DotEnvSecrets" -"env-secrets-manager" = "runnable.extensions.secrets.env_secrets.implementation:EnvSecretsManager" # Plugins for Run Log store [tool.poetry.plugins."run_log_store"] diff --git a/runnable/entrypoints.py b/runnable/entrypoints.py index bdddeb7e..a21a6a6d 100644 --- a/runnable/entrypoints.py +++ b/runnable/entrypoints.py @@ -60,6 +60,8 @@ def prepare_configurations( variables = utils.gather_variables() templated_configuration = {} + configuration_file = os.environ.get("RUNNABLE_CONFIGURATION_FILE", configuration_file) + if configuration_file: templated_configuration = utils.load_yaml(configuration_file) or {} @@ -144,8 +146,8 @@ def prepare_configurations( def execute( - configuration_file: str, pipeline_file: str, + configuration_file: str = "", tag: str = "", run_id: str = "", parameters_file: str = "", @@ -235,6 +237,8 @@ def execute_single_node( """ from runnable import nodes + configuration_file = os.environ.get("RUNNABLE_CONFIGURATION_FILE", configuration_file) + run_context = prepare_configurations( configuration_file=configuration_file, pipeline_file=pipeline_file, @@ -422,6 +426,8 @@ def fan( """ from runnable import nodes + configuration_file = os.environ.get("RUNNABLE_CONFIGURATION_FILE", configuration_file) + run_context = prepare_configurations( configuration_file=configuration_file, pipeline_file=pipeline_file, diff --git a/runnable/extensions/nodes.py b/runnable/extensions/nodes.py index 21477585..28104e05 100644 --- a/runnable/extensions/nodes.py +++ b/runnable/extensions/nodes.py @@ -46,8 +46,6 @@ def parse_from_config(cls, config: Dict[str, Any]) -> "TaskNode": task_config = {k: v for k, v in config.items() if k not in TaskNode.model_fields.keys()} node_config = {k: v for k, v in config.items() if k in TaskNode.model_fields.keys()} - task_config["node_name"] = config.get("name") - executable = create_task(task_config) return cls(executable=executable, **node_config, **task_config) diff --git a/runnable/extensions/secrets/env_secrets/__init__.py b/runnable/extensions/secrets/env_secrets/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/runnable/extensions/secrets/env_secrets/implementation.py b/runnable/extensions/secrets/env_secrets/implementation.py deleted file mode 100644 index 5b88d1bd..00000000 --- a/runnable/extensions/secrets/env_secrets/implementation.py +++ /dev/null @@ -1,42 +0,0 @@ -import logging -import os - -from runnable import defaults, exceptions -from runnable.secrets import BaseSecrets - -logger = logging.getLogger(defaults.LOGGER_NAME) - - -class EnvSecretsManager(BaseSecrets): - """ - A secret manager via environment variables. - - This secret manager returns nothing if the key does not match - """ - - service_name: str = "env-secrets-manager" - prefix: str = "" - suffix: str = "" - - def get(self, name: str = "", **kwargs) -> str: - """ - If a name is provided, we look for that in the environment. - If a environment variable by that name is not found, we raise an Exception. - - If a name is not provided, we return an empty dictionary. - - Args: - name (str): The name of the secret to retrieve - - Raises: - Exception: If the secret by the name is not found. - - Returns: - [type]: [description] - """ - - try: - return os.environ[f"{self.prefix}{name}{self.suffix}"] - except KeyError as _e: - logger.exception(f"Secret {self.prefix}{name}{self.suffix} not found in environment") - raise exceptions.SecretNotFoundError(secret_name=name, secret_setting="environment") from _e diff --git a/runnable/sdk.py b/runnable/sdk.py index a506acc7..741c7fcf 100644 --- a/runnable/sdk.py +++ b/runnable/sdk.py @@ -310,8 +310,6 @@ class NotebookTask(BaseTask): """ notebook: str = Field(serialization_alias="command") - - notebook_output_path: Optional[str] = Field(default=None, alias="notebook_output_path", validate_default=True) optional_ploomber_args: Optional[Dict[str, Any]] = Field(default=None, alias="optional_ploomber_args") @computed_field @@ -591,6 +589,7 @@ def model_post_init(self, __context: Any) -> None: Any definition of pipeline should have one node that terminates with success. """ + # TODO: Bug with repeat names success_path: List[StepType] = [] on_failure_paths: List[List[StepType]] = [] @@ -690,7 +689,6 @@ def execute( parameters_file=parameters_file, ) - print("sdk", configuration_file) run_context.execution_plan = defaults.EXECUTION_PLAN.CHAINED.value utils.set_runnable_environment_variables(run_id=run_id, configuration_file=configuration_file, tag=tag) diff --git a/runnable/tasks.py b/runnable/tasks.py index bcdb6d4a..e61853b2 100644 --- a/runnable/tasks.py +++ b/runnable/tasks.py @@ -8,11 +8,12 @@ import subprocess import sys from datetime import datetime +from pathlib import Path from pickle import PicklingError from string import Template -from typing import Any, Dict, List, Literal, Optional, Tuple +from typing import Any, Dict, List, Literal, Tuple -from pydantic import BaseModel, ConfigDict, Field, ValidationInfo, field_validator +from pydantic import BaseModel, ConfigDict, Field, field_validator from rich.console import Console from stevedore import driver @@ -34,9 +35,6 @@ # TODO: Can we add memory peak, cpu usage, etc. to the metrics? -task_console = Console(file=io.StringIO()) - - class TaskReturns(BaseModel): name: str kind: Literal["json", "object", "metric"] = Field(default="json") @@ -46,7 +44,6 @@ class BaseTaskType(BaseModel): """A base task class which does the execution of command defined by the user.""" task_type: str = Field(serialization_alias="command_type") - node_name: str = Field(exclude=True) secrets: List[str] = Field(default_factory=list) returns: List[TaskReturns] = Field(default_factory=list, alias="returns") @@ -153,7 +150,7 @@ def execution_context(self, map_variable: TypeMapVariable = None, allow_complex: if not allow_complex: params = {key: value for key, value in params.items() if isinstance(value, JsonParameter)} - log_file_name = self.node_name # + ".execution.log" + log_file_name = self._context.executor._context_node.internal_name if map_variable: for _, value in map_variable.items(): log_file_name += "_" + str(value) @@ -165,15 +162,16 @@ def execution_context(self, map_variable: TypeMapVariable = None, allow_complex: parameters_in = copy.deepcopy(params) f = io.StringIO() + task_console = Console(file=io.StringIO()) try: with contextlib.redirect_stdout(f): # with contextlib.nullcontext(): - yield params + yield params, task_console print(task_console.file.getvalue()) # type: ignore except Exception as e: # pylint: disable=broad-except logger.exception(e) finally: - task_console.clear() + task_console = None print(f.getvalue()) # print to console log_file.write(f.getvalue()) # Print to file @@ -234,7 +232,7 @@ def execute_command( """Execute the notebook as defined by the command.""" attempt_log = StepAttempt(status=defaults.FAIL, start_time=str(datetime.now())) - with self.execution_context(map_variable=map_variable) as params, self.expose_secrets() as _: + with self.execution_context(map_variable=map_variable) as (params, task_console), self.expose_secrets() as _: module, func = utils.get_module_and_attr_names(self.command) sys.path.insert(0, os.getcwd()) # Need to add the current directory to path imported_module = importlib.import_module(module) @@ -303,25 +301,22 @@ class NotebookTaskType(BaseTaskType): task_type: str = Field(default="notebook", serialization_alias="command_type") command: str - notebook_output_path: Optional[str] = Field(default=None, validate_default=True) optional_ploomber_args: dict = {} @field_validator("command") @classmethod - def notebook_should_end_with_ipynb(cls, command: str): + def notebook_should_end_with_ipynb(cls, command: str) -> str: if not command.endswith(".ipynb"): raise Exception("Notebook task should point to a ipynb file") return command - @field_validator("notebook_output_path") - @classmethod - def correct_notebook_output_path(cls, notebook_output_path: str, info: ValidationInfo): - if notebook_output_path: - return notebook_output_path + @property + def notebook_output_path(self) -> str: + output_path = Path(self.command) + file_name = output_path.resolve() / (output_path.stem + "_out.ipynb") - command = info.data["command"] - return "".join(command.split(".")[:-1]) + "_out.ipynb" + return str(file_name) def get_cli_options(self) -> Tuple[str, dict]: return "notebook", {"command": self.command, "notebook-output-path": self.notebook_output_path} @@ -347,14 +342,21 @@ def execute_command( notebook_output_path = self.notebook_output_path or "" - with self.execution_context( - map_variable=map_variable, allow_complex=False - ) as params, self.expose_secrets() as _: + with self.execution_context(map_variable=map_variable, allow_complex=False) as ( + params, + _, + ), self.expose_secrets() as _: if map_variable: for key, value in map_variable.items(): notebook_output_path += "_" + str(value) params[key] = value + node_name = self._context.executor._context_node.internal_name + "".join(x for x in node_name if x.isalnum()) + ".execution.log" + new_notebook_output_path = notebook_output_path + print(notebook_output_path) + print(new_notebook_output_path) + notebook_params = {k: v.get_value() for k, v in params.items()} ploomber_optional_args = self.optional_ploomber_args @@ -454,95 +456,98 @@ def execute_command( secret_value = context.run_context.secrets_handler.get(key) subprocess_env[key] = secret_value - with self.execution_context(map_variable=map_variable, allow_complex=False) as params: - subprocess_env.update({k: v.get_value() for k, v in params.items()}) - - # Json dumps all runnable environment variables - for key, value in subprocess_env.items(): - if isinstance(value, str): - continue - subprocess_env[key] = json.dumps(value) - - collect_delimiter = "=== COLLECT ===" - - command = self.command.strip() + f" && echo '{collect_delimiter}' && env" - logger.info(f"Executing shell command: {command}") + try: + with self.execution_context(map_variable=map_variable, allow_complex=False) as (params, task_console): + subprocess_env.update({k: v.get_value() for k, v in params.items()}) + + # Json dumps all runnable environment variables + for key, value in subprocess_env.items(): + if isinstance(value, str): + continue + subprocess_env[key] = json.dumps(value) + + collect_delimiter = "=== COLLECT ===" + + command = self.command.strip() + f" && echo '{collect_delimiter}' && env" + logger.info(f"Executing shell command: {command}") + + capture = False + return_keys = {x.name: x for x in self.returns} + + proc = subprocess.Popen( + command, + shell=True, + env=subprocess_env, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + result = proc.communicate() + logger.debug(result) + logger.info(proc.returncode) + + if proc.returncode != 0: + msg = ",".join(result[1].split("\n")) + task_console.print(msg, style=defaults.error_style) + raise exceptions.CommandCallError(msg) + + # for stderr + for line in result[1].split("\n"): + if line.strip() == "": + continue + task_console.print(line, style=defaults.warning_style) - capture = False - return_keys = {x.name: x for x in self.returns} + output_parameters: Dict[str, Parameter] = {} + metrics: Dict[str, Parameter] = {} - proc = subprocess.Popen( - command, - shell=True, - env=subprocess_env, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - text=True, - ) - result = proc.communicate() - logger.debug(result) - logger.info(proc.returncode) - - if proc.returncode != 0: - msg = ",".join(result[1].split("\n")) - attempt_log.status = defaults.FAIL - attempt_log.end_time = str(datetime.now()) - attempt_log.message = msg - task_console.print(msg, style=defaults.error_style) - return attempt_log + # only from stdout + for line in result[0].split("\n"): + if line.strip() == "": + continue - # for stderr - for line in result[1].split("\n"): - if line.strip() == "": - continue - task_console.print(line, style=defaults.warning_style) + logger.info(line) + task_console.print(line) - output_parameters: Dict[str, Parameter] = {} - metrics: Dict[str, Parameter] = {} + if line.strip() == collect_delimiter: + # The lines from now on should be captured + capture = True + continue - # only from stdout - for line in result[0].split("\n"): - if line.strip() == "": - continue + if capture: + key, value = line.strip().split("=", 1) + if key in return_keys: + task_return = return_keys[key] - logger.info(line) - task_console.print(line) + try: + value = json.loads(value) + except json.JSONDecodeError: + value = value - if line.strip() == collect_delimiter: - # The lines from now on should be captured - capture = True - continue + output_parameter = task_return_to_parameter( + task_return=task_return, + value=value, + ) - if capture: - key, value = line.strip().split("=", 1) - if key in return_keys: - task_return = return_keys[key] + if task_return.kind == "metric": + metrics[task_return.name] = output_parameter - try: - value = json.loads(value) - except json.JSONDecodeError: - value = value + param_name = task_return.name + if map_variable: + for _, v in map_variable.items(): + param_name = f"{param_name}_{v}" - output_parameter = task_return_to_parameter( - task_return=task_return, - value=value, - ) + output_parameters[param_name] = output_parameter - if task_return.kind == "metric": - metrics[task_return.name] = output_parameter - - param_name = task_return.name - if map_variable: - for _, v in map_variable.items(): - param_name = f"{param_name}_{v}" - - output_parameters[param_name] = output_parameter - - attempt_log.output_parameters = output_parameters - attempt_log.user_defined_metrics = metrics - params.update(output_parameters) + attempt_log.output_parameters = output_parameters + attempt_log.user_defined_metrics = metrics + params.update(output_parameters) - attempt_log.status = defaults.SUCCESS + attempt_log.status = defaults.SUCCESS + except exceptions.CommandCallError as e: + msg = f"Call to the command {self.command} did not succeed" + logger.exception(msg) + logger.exception(e) + attempt_log.status = defaults.FAIL attempt_log.end_time = str(datetime.now()) return attempt_log diff --git a/tests/runnable/extensions/secrets/test_env_secrets_manager.py b/tests/runnable/extensions/secrets/test_env_secrets_manager.py deleted file mode 100644 index 60294d45..00000000 --- a/tests/runnable/extensions/secrets/test_env_secrets_manager.py +++ /dev/null @@ -1,48 +0,0 @@ -import pytest -import os - -from runnable.extensions.secrets.env_secrets.implementation import EnvSecretsManager -from runnable import exceptions - - -def test_env_secrets_manager_raises_error_if_name_provided_and_not_present(): - manager = EnvSecretsManager() - - with pytest.raises(exceptions.SecretNotFoundError): - manager.get("environment") - - -def test_env_secrets_returns_secret_if_present_in_environment(monkeypatch): - monkeypatch.setenv("TEST_SECRET", "test_secret") - - manager = EnvSecretsManager() - assert manager.get("TEST_SECRET") == "test_secret" - - -def test_env_secrets_returns_secret_if_present_in_environment_with_prefix(monkeypatch): - monkeypatch.setenv("PREFIX_TEST_SECRET", "test_secret") - - manager = EnvSecretsManager(prefix="PREFIX_") - assert manager.get("TEST_SECRET") == "test_secret" - - -def test_env_secrets_returns_secret_if_present_in_environment_with_suffix(monkeypatch): - monkeypatch.setenv("TEST_SECRET_SUFFIX", "test_secret") - - manager = EnvSecretsManager(suffix="_SUFFIX") - assert manager.get("TEST_SECRET") == "test_secret" - - -def test_env_secrets_returns_secret_if_present_in_environment_with_suffix_and_prefix(monkeypatch): - monkeypatch.setenv("PREFIX_TEST_SECRET_SUFFIX", "test_secret") - - manager = EnvSecretsManager(suffix="_SUFFIX", prefix="PREFIX_") - assert manager.get("TEST_SECRET") == "test_secret" - - -def test_env_secrets_returns_matched_secrets_with_suffix(monkeypatch): - monkeypatch.setenv("TEST_SECRET_SUFFIX", "test_secret") - - manager = EnvSecretsManager(suffix="_SUFFIX") - - assert manager.get("TEST_SECRET") == "test_secret" diff --git a/tests/test_examples.py b/tests/test_examples.py index 6c38cbe2..f25a5842 100644 --- a/tests/test_examples.py +++ b/tests/test_examples.py @@ -1,6 +1,6 @@ import importlib import os -from contextlib import contextmanager, nullcontext +from contextlib import contextmanager import pytest @@ -22,6 +22,7 @@ ("03-parameters/passing_parameters_shell", False), ("03-parameters/static_parameters_non_python", False), ("03-parameters/static_parameters_python", False), + ("04-catalog/catalog", False), ] @@ -33,12 +34,20 @@ def list_python_examples(): @contextmanager def chunked_fs_context(): os.environ["RUNNABLE_CONFIGURATION_FILE"] = "examples/configs/chunked-fs-run_log.yaml" + os.environ["RUNNABLE_PRM_envvar"] = "from env" yield del os.environ["RUNNABLE_CONFIGURATION_FILE"] + del os.environ["RUNNABLE_PRM_envvar"] -contexts = [None, chunked_fs_context] -configurations = [None, "examples/configs/chunked-fs-run_log.yaml"] +@contextmanager +def default_context(): + os.environ["RUNNABLE_PRM_envvar"] = "from env" + yield + del os.environ["RUNNABLE_PRM_envvar"] + + +contexts = [default_context, chunked_fs_context] @pytest.mark.parametrize("example", list_python_examples()) @@ -49,11 +58,7 @@ def test_python_examples(example, context): print(f"Testing {example}...") mod, status = example - - if not context: - context = nullcontext() - else: - context = context() + context = context() imported_module = importlib.import_module(f"examples.{mod.replace('/', '.')}") f = getattr(imported_module, "main") @@ -67,19 +72,22 @@ def test_python_examples(example, context): @pytest.mark.parametrize("example", list_python_examples()) -@pytest.mark.parametrize("configuration", configurations) +@pytest.mark.parametrize("context", contexts) @pytest.mark.no_cover @pytest.mark.e2e -def test_yaml_examples(example, configuration): +def test_yaml_examples(example, context): print(f"Testing {example}...") file, status = example + context = context() example_file = f"examples/{file}.yaml" parameters_file = "examples/common/initial_parameters.yaml" - try: - execute(configuration_file=configuration, pipeline_file=example_file, parameters_file=parameters_file) - except exceptions.ExecutionFailedError: - if not status: - raise + + with context: + try: + execute(pipeline_file=example_file, parameters_file=parameters_file) + except exceptions.ExecutionFailedError: + if not status: + raise # TODO: Need to test argo and local container From 5edefc2bdb308df20abd0eb3de1f2635d9fe9863 Mon Sep 17 00:00:00 2001 From: Vijay Vammi Date: Thu, 2 May 2024 16:25:54 +0100 Subject: [PATCH 08/17] fix: notebook working with map parameters --- examples/01-tasks/notebook.py | 2 +- examples/01-tasks/notebook.yaml | 2 +- examples/02-sequential/default_fail.py | 3 +- examples/02-sequential/default_fail.yaml | 3 +- examples/02-sequential/on_failure_fail.py | 3 +- examples/02-sequential/on_failure_fail.yaml | 3 + examples/02-sequential/on_failure_succeed.py | 3 +- .../passing_parameters_notebook.py | 9 +- .../passing_parameters_notebook.yaml | 9 +- .../passing_parameters_python.py | 5 +- .../passing_parameters_python.yaml | 5 +- .../03-parameters/passing_parameters_shell.py | 5 +- .../passing_parameters_shell.yaml | 3 + .../static_parameters_non_python.py | 8 +- .../static_parameters_non_python.yaml | 11 +- .../03-parameters/static_parameters_python.py | 3 + .../static_parameters_python.yaml | 7 + examples/04-catalog/catalog.py | 3 + examples/04-catalog/catalog.yaml | 3 + examples/06-parallel/nesting.py | 81 +++++++++++ examples/06-parallel/nesting.yaml | 62 +++++++++ examples/06-parallel/parallel.py | 3 + examples/06-parallel/parallel.yaml | 15 ++- examples/06-parallel/traversal.py | 41 ------ examples/07-map/map.py | 126 ++++++++++++++++++ examples/07-map/reduce_python.py | 96 +++++++++++++ examples/common/functions.py | 38 +++++- examples/common/initial_parameters.yaml | 2 + examples/common/process_chunk.ipynb | 60 +++++++++ ...ableplaceholderexecutenotebook_out.ipynb_1 | 101 ++++++++++++++ ...ableplaceholderexecutenotebook_out.ipynb_2 | 101 ++++++++++++++ ...ableplaceholderexecutenotebook_out.ipynb_3 | 101 ++++++++++++++ runnable/tasks.py | 35 +++-- tests/test_examples.py | 3 + 34 files changed, 880 insertions(+), 75 deletions(-) create mode 100644 examples/06-parallel/nesting.py create mode 100644 examples/06-parallel/nesting.yaml delete mode 100644 examples/06-parallel/traversal.py create mode 100644 examples/07-map/map.py create mode 100644 examples/07-map/reduce_python.py create mode 100644 examples/common/process_chunk.ipynb create mode 100644 examples/common/process_chunkmapstatemapvariableplaceholderexecutenotebook_out.ipynb_1 create mode 100644 examples/common/process_chunkmapstatemapvariableplaceholderexecutenotebook_out.ipynb_2 create mode 100644 examples/common/process_chunkmapstatemapvariableplaceholderexecutenotebook_out.ipynb_3 diff --git a/examples/01-tasks/notebook.py b/examples/01-tasks/notebook.py index 68447032..afc99e40 100644 --- a/examples/01-tasks/notebook.py +++ b/examples/01-tasks/notebook.py @@ -16,7 +16,7 @@ │   └── simple_notebook_out.ipynb └── notebook.execution.log -The notebook simple_notebook_out.ipynb has the captured stdout of "Hello World!". +The notebook simple_notebook__out.ipynb has the captured stdout of "Hello World!". """ from runnable import NotebookTask, Pipeline diff --git a/examples/01-tasks/notebook.yaml b/examples/01-tasks/notebook.yaml index db79591a..f1826210 100644 --- a/examples/01-tasks/notebook.yaml +++ b/examples/01-tasks/notebook.yaml @@ -17,7 +17,7 @@ dag: │   └── simple_notebook_out.ipynb └── notebook.execution.log - The notebook simple_notebook_out.ipynb has the captured stdout of "Hello World!". + The notebook simple_notebook__out.ipynb has the captured stdout of "Hello World!". You can run this pipeline as: runnable execute -f examples/01-tasks/notebook.yaml diff --git a/examples/02-sequential/default_fail.py b/examples/02-sequential/default_fail.py index 92a4e578..c504e36d 100644 --- a/examples/02-sequential/default_fail.py +++ b/examples/02-sequential/default_fail.py @@ -8,7 +8,8 @@ step 1 >> step 2 >> fail -You can run this example by: python examples/02-sequential/default_fail.py +You can run this example by: + python examples/02-sequential/default_fail.py """ from examples.common.functions import raise_ex diff --git a/examples/02-sequential/default_fail.yaml b/examples/02-sequential/default_fail.yaml index f8e423f9..4802ccb3 100644 --- a/examples/02-sequential/default_fail.yaml +++ b/examples/02-sequential/default_fail.yaml @@ -6,7 +6,8 @@ dag: The default behavior is to traverse to step type fail and mark the run as failed. - You can run this pipeline by: runnable execute -f examples/02-sequential/default_fail.yaml + You can run this pipeline by: + runnable execute -f examples/02-sequential/default_fail.yaml start_at: step 1 steps: step 1: diff --git a/examples/02-sequential/on_failure_fail.py b/examples/02-sequential/on_failure_fail.py index cb4fa6cd..ce7892c9 100644 --- a/examples/02-sequential/on_failure_fail.py +++ b/examples/02-sequential/on_failure_fail.py @@ -12,7 +12,8 @@ This pattern is handy when you need to do something before eventually failing (eg: sending a notification, updating status, etc...) -Run this pipeline as: python examples/02-sequential/on_failure_fail.py +Run this pipeline as: + python examples/02-sequential/on_failure_fail.py """ from examples.common.functions import raise_ex diff --git a/examples/02-sequential/on_failure_fail.yaml b/examples/02-sequential/on_failure_fail.yaml index 6e94242a..0521038e 100644 --- a/examples/02-sequential/on_failure_fail.yaml +++ b/examples/02-sequential/on_failure_fail.yaml @@ -12,6 +12,9 @@ dag: This pattern is handy when you need to do something before eventually failing (eg: sending a notification, updating status, etc...) + + Run this pipeline as: + runnable execute -f examples/02-sequential/default_fail.yaml start_at: step_1 steps: step_1: diff --git a/examples/02-sequential/on_failure_succeed.py b/examples/02-sequential/on_failure_succeed.py index 6015bd01..b21c2cf1 100644 --- a/examples/02-sequential/on_failure_succeed.py +++ b/examples/02-sequential/on_failure_succeed.py @@ -12,7 +12,8 @@ This pattern is handy when you are expecting a failure of a step and have ways to handle it. -Run this pipeline: python examples/02-sequential/on_failure_succeed.py +Run this pipeline: + python examples/02-sequential/on_failure_succeed.py """ from examples.common.functions import raise_ex diff --git a/examples/03-parameters/passing_parameters_notebook.py b/examples/03-parameters/passing_parameters_notebook.py index ac8220f5..4ada02c1 100644 --- a/examples/03-parameters/passing_parameters_notebook.py +++ b/examples/03-parameters/passing_parameters_notebook.py @@ -1,12 +1,15 @@ """ Demonstrates passing parameters to and from a notebook. -We can extract json, pydantic, objects from notebook. +runnable can extract JSON serializable types, pydantic models, objects from notebook. eg: write_parameters_from_notebook -But can only inject json type parameters to a notebook. +But can only inject JSON type parameters to a notebook. eg: read_parameters_in_notebook -pydantic parameters are injected as dict. +pydantic parameters are injected as dictionary. + +Run the below example as: + python examples/03-parameters/passing_parameters_notebook.py """ diff --git a/examples/03-parameters/passing_parameters_notebook.yaml b/examples/03-parameters/passing_parameters_notebook.yaml index d62aad95..4eb25c0c 100644 --- a/examples/03-parameters/passing_parameters_notebook.yaml +++ b/examples/03-parameters/passing_parameters_notebook.yaml @@ -2,12 +2,15 @@ dag: description: | Demonstrates passing parameters to and from a notebook. - We can extract json, pydantic, objects from notebook. + runnable can extract JSON serializable types, pydantic models, objects from notebook. eg: write_parameters_from_notebook - But can only inject json type parameters to a notebook. + But can only inject JSON type parameters to a notebook. eg: read_parameters_in_notebook - pydantic parameters are injected as dict. + pydantic parameters are injected as dictionary. + + Run the below example as: + runnable execute examples/03-parameters/passing_parameters_notebook.yaml start_at: write_parameters_from_notebook steps: write_parameters_from_notebook: diff --git a/examples/03-parameters/passing_parameters_python.py b/examples/03-parameters/passing_parameters_python.py index baf9c1e5..ee9a72d6 100644 --- a/examples/03-parameters/passing_parameters_python.py +++ b/examples/03-parameters/passing_parameters_python.py @@ -3,7 +3,7 @@ tasks of the pipeline. The function, set_parameter, returns - - simple python data types (int, float, str) + - JSON serializable types - pydantic models - pandas dataframe, any "object" type @@ -13,6 +13,9 @@ Use pickled even for python data types is advised for reasonably large collections. +Run the below example as: + python examples/03-parameters/passing_parameters_python.py + """ from examples.common.functions import read_parameter, write_parameter diff --git a/examples/03-parameters/passing_parameters_python.yaml b/examples/03-parameters/passing_parameters_python.yaml index 7c66763f..b2d73b30 100644 --- a/examples/03-parameters/passing_parameters_python.yaml +++ b/examples/03-parameters/passing_parameters_python.yaml @@ -4,7 +4,7 @@ dag: tasks of the pipeline. The function, set_parameter, returns - - simple python data types (int, float, str) + - JSON serializable - pydantic models - pandas dataframe, any "object" type @@ -13,6 +13,9 @@ dag: Use pickled even for python data types is advised for reasonably large collections. + + Run the pipeline as: + runnable execute -f examples/03-parameters/passing_parameters_python.yaml start_at: write_parameters steps: write_parameters: diff --git a/examples/03-parameters/passing_parameters_shell.py b/examples/03-parameters/passing_parameters_shell.py index 5e0ed3a5..20bbaf90 100644 --- a/examples/03-parameters/passing_parameters_shell.py +++ b/examples/03-parameters/passing_parameters_shell.py @@ -1,13 +1,16 @@ """ Demonstrates passing parameters to and from shell scripts. -We can extract only json style parameters from shell scripts. +We can extract only JSON serializable parameters from shell scripts. eg: write_parameters_in_shell We can only read json style parameters from shell scripts. eg: read_parameters_in_shell pydantic parameters are injected as json. +Run the below example as: + python examples/03-parameters/passing_parameters_shell.py + """ from examples.common.functions import read_unpickled_parameter diff --git a/examples/03-parameters/passing_parameters_shell.yaml b/examples/03-parameters/passing_parameters_shell.yaml index 63623f32..b12e48d0 100644 --- a/examples/03-parameters/passing_parameters_shell.yaml +++ b/examples/03-parameters/passing_parameters_shell.yaml @@ -9,6 +9,9 @@ dag: eg: read_parameters_in_shell pydantic parameters are injected as json. + Run the pipeline as: + runnable execute -f examples/03-parameters/passing_parameters_shell.yaml + start_at: write_parameters_in_shell steps: write_parameters_in_shell: diff --git a/examples/03-parameters/static_parameters_non_python.py b/examples/03-parameters/static_parameters_non_python.py index 41eae659..0a095879 100644 --- a/examples/03-parameters/static_parameters_non_python.py +++ b/examples/03-parameters/static_parameters_non_python.py @@ -11,8 +11,14 @@ foo: bar runnable exposes the nested parameters as dictionary for notebook based tasks -as a json string for the shell based tasks. +and as a json string for the shell based tasks. +You can set the initial parameters from environment variables as well. +eg: Any environment variable prefixed by "RUNNABLE_PRM_" will be picked up by runnable + + +Run this pipeline as: + python examples/03-parameters/static_parameters_non_python.py """ from runnable import NotebookTask, Pipeline, ShellTask diff --git a/examples/03-parameters/static_parameters_non_python.yaml b/examples/03-parameters/static_parameters_non_python.yaml index ba581fa8..cf7809d5 100644 --- a/examples/03-parameters/static_parameters_non_python.yaml +++ b/examples/03-parameters/static_parameters_non_python.yaml @@ -12,7 +12,16 @@ dag: foo: bar runnable exposes the nested parameters as dictionary for notebook based tasks - as a json string for the shell based tasks. + and as a json string for the shell based tasks. + + You can set the initial parameters from environment variables as well. + eg: Any environment variable prefixed by "RUNNABLE_PRM_" will be picked up by runnable + + + Run this pipeline as: + runnable execute -f 03-parameters/static_parameters_non_python.yaml \ + -p common/initial_parameters.yaml + start_at: read_params_in_notebook steps: read_params_in_notebook: diff --git a/examples/03-parameters/static_parameters_python.py b/examples/03-parameters/static_parameters_python.py index edf2028e..abe8aeff 100644 --- a/examples/03-parameters/static_parameters_python.py +++ b/examples/03-parameters/static_parameters_python.py @@ -17,6 +17,9 @@ You can set the initial parameters from environment variables as well. eg: Any environment variable prefixed by "RUNNABLE_PRM_" will be picked up by runnable +Run this pipeline as: + python examples/03-parameters/static_parameters_python.py + """ import os diff --git a/examples/03-parameters/static_parameters_python.yaml b/examples/03-parameters/static_parameters_python.yaml index ea0b8b7a..f86302d7 100644 --- a/examples/03-parameters/static_parameters_python.yaml +++ b/examples/03-parameters/static_parameters_python.yaml @@ -14,6 +14,13 @@ dag: If no annotation is provided, the parameter is assumed to be a dictionary. eg: read_initial_params_as_json + + You can set the initial parameters from environment variables as well. + eg: Any environment variable prefixed by "RUNNABLE_PRM_" will be picked up by runnable + + Run this pipeline by: + runnable execute -f 03-parameters/static_parameters_python.yaml \ + -p examples/common/initial_parameters.yaml start_at: read_params_as_pydantic steps: read_params_as_pydantic: diff --git a/examples/04-catalog/catalog.py b/examples/04-catalog/catalog.py index 68578dad..63280ae6 100644 --- a/examples/04-catalog/catalog.py +++ b/examples/04-catalog/catalog.py @@ -43,6 +43,9 @@ 5 directories, 11 files +Run this pipeline as: + python examples/04-catalog/catalog.py + """ from examples.common.functions import read_files, write_files diff --git a/examples/04-catalog/catalog.yaml b/examples/04-catalog/catalog.yaml index 8fe69580..16a8c06d 100644 --- a/examples/04-catalog/catalog.yaml +++ b/examples/04-catalog/catalog.yaml @@ -43,6 +43,9 @@ dag: └── readdatashell.execution.log 5 directories, 11 files + + Run this pipeline as: + runnable execute -f examples/04-catalog/catalog.yaml start_at: generate_data steps: generate_data: diff --git a/examples/06-parallel/nesting.py b/examples/06-parallel/nesting.py new file mode 100644 index 00000000..27bf543c --- /dev/null +++ b/examples/06-parallel/nesting.py @@ -0,0 +1,81 @@ +""" +Example to show case nesting of parallel steps. + +runnable does not put a limit on the nesting of parallel steps. +Deeply nested pipelines can be hard to read and not all +executors support it. + +Run this pipeline as: + python examples/06-parallel/nesting.py +""" + +from examples.common.functions import hello +from runnable import NotebookTask, Parallel, Pipeline, PythonTask, ShellTask, Stub + + +def traversal(execute: bool = True): + """ + Use the pattern of using "execute" to control the execution of the pipeline. + + The same pipeline can be run independently from the command line. + + WARNING: If the execution is not controlled by "execute", the pipeline will be executed + even during the definition of the branch in parallel steps. + """ + stub_task = Stub(name="hello stub") + + python_task = PythonTask( + name="hello python", + function=hello, + ) + + shell_task = ShellTask( + name="hello shell", + command="echo 'Hello World!'", + ) + + notebook_task = NotebookTask( + name="hello notebook", + notebook="examples/common/simple_notebook.ipynb", + terminate_with_success=True, + ) + + # The pipeline has a mix of tasks. + # The order of execution follows the order of the tasks in the list. + pipeline = Pipeline(steps=[stub_task, python_task, shell_task, notebook_task]) + + if execute: # Do not execute the pipeline if we are using it as a branch + pipeline.execute() + + return pipeline + + +def parallel_pipeline(execute: bool = True): + parallel_step = Parallel( + name="parallel step", + terminate_with_success=True, + branches={"branch1": traversal(execute=False), "branch2": traversal(execute=False)}, + ) + + pipeline = Pipeline(steps=[parallel_step]) + + if execute: + pipeline.execute() + return pipeline + + +def main(): + # Create a parallel step with parallel steps as branches. + parallel_step = Parallel( + name="nested_parallel", + terminate_with_success=True, + branches={"branch1": parallel_pipeline(execute=False), "branch2": parallel_pipeline(execute=False)}, + ) + + pipeline = Pipeline(steps=[parallel_step]) + pipeline.execute() + return pipeline + + +if __name__ == "__main__": + main() diff --git a/examples/06-parallel/nesting.yaml b/examples/06-parallel/nesting.yaml new file mode 100644 index 00000000..32b189f3 --- /dev/null +++ b/examples/06-parallel/nesting.yaml @@ -0,0 +1,62 @@ +branch: &simple_branch + description: | + Use this pattern to define repeatable branch + + This pipeline is similar to one defined in: + examples/02-sequential/traversal.yaml + start_at: hello stub + steps: + hello stub: + type: stub + next: hello python + hello python: + type: task + command_type: python + command: examples.common.functions.hello # dotted path to the function. + next: hello shell + hello shell: + type: task + command_type: shell + command: echo "Hello World!" # Command to run + next: hello notebook + hello notebook: + type: task + command_type: notebook + command: examples/common/simple_notebook.ipynb # The path is relative to the root of the project. + next: success + success: + type: success + fail: + type: fail + + +# This branch is similar to a branch parallel.yaml +nested_branch: &nested_branch + start_at: parallel_step + steps: + parallel_step: + type: parallel + next: success + branches: + branch1: *simple_branch + branch2: *simple_branch + success: + type: success + failure: + type: fail + + +# The pipeline of nested parallel branches +dag: + start_at: parallel_step + steps: + parallel_step: + type: parallel + next: success + branches: + branch1: *nested_branch + branch2: *nested_branch + success: + type: success + failure: + type: fail diff --git a/examples/06-parallel/parallel.py b/examples/06-parallel/parallel.py index 3d26365e..05995117 100644 --- a/examples/06-parallel/parallel.py +++ b/examples/06-parallel/parallel.py @@ -6,6 +6,9 @@ WARNING, the function returning the pipeline should not executed during the definition of the branch in parallel steps. + +Run this pipeline as: + python examples/06-parallel/parallel.py """ from examples.common.functions import hello diff --git a/examples/06-parallel/parallel.yaml b/examples/06-parallel/parallel.yaml index f91b2b5b..c76d7502 100644 --- a/examples/06-parallel/parallel.yaml +++ b/examples/06-parallel/parallel.yaml @@ -1,10 +1,8 @@ -# This example demonstrates the use of the Parallel step. - -# The branches of the parallel step are themselves pipelines and can be defined -# as shown in 02-sequential/traversal.yaml branch: &branch description: | Use this pattern to define repeatable branch + + This pipeline is the same as the one defined in examples/02-sequential/traversal.yaml start_at: hello stub steps: hello stub: @@ -32,6 +30,15 @@ branch: &branch dag: + description: | + This example demonstrates the use of the Parallel step. + + parallel step takes a mapping of branches which are pipelines themselves. + + Run this pipeline as: + runnable execute -f examples/06-parallel/parallel.yaml + + start_at: parallel_step steps: parallel_step: diff --git a/examples/06-parallel/traversal.py b/examples/06-parallel/traversal.py deleted file mode 100644 index 916babcd..00000000 --- a/examples/06-parallel/traversal.py +++ /dev/null @@ -1,41 +0,0 @@ -""" -This pipeline is same as the one seen in 02-sequential/traversal.py. - -Given the naming convention used, we cannot import it directly. - -""" - -from examples.common.functions import hello -from runnable import NotebookTask, Pipeline, PythonTask, ShellTask, Stub - - -def main(): - stub_task = Stub(name="hello stub") - - python_task = PythonTask( - name="hello python", - function=hello, - ) - - shell_task = ShellTask( - name="hello shell", - command="echo 'Hello World!'", - ) - - notebook_task = NotebookTask( - name="hello notebook", - notebook="examples/common/simple_notebook.ipynb", - terminate_with_success=True, - ) - - # The pipeline has a mix of tasks. - # The order of execution follows the order of the tasks in the list. - pipeline = Pipeline(steps=[stub_task, python_task, shell_task, notebook_task]) - - pipeline.execute() - - return pipeline - - -if __name__ == "__main__": - main() diff --git a/examples/07-map/map.py b/examples/07-map/map.py new file mode 100644 index 00000000..b63896e7 --- /dev/null +++ b/examples/07-map/map.py @@ -0,0 +1,126 @@ +""" +map states allows to repeat a branch for each value of an iterable. + +The below example can written, in python, as: + +chunks = [1, 2, 3] + +for chunk in chunks: + # Any task within the pipeline can access the value of chunk as an argument. + processed = process_chunk(chunk) + + # The value of processed for every iteration is the value returned by the steps + # of the current execution. For example, the value of processed + # for chunk=1, is chunk*10 = 10 for downstream steps. + read_processed_chunk(chunk, processed) + +# Outside of loop, processed is a list of all the processed chunks. +# This is also called as the reduce pattern. +assert processed == [chunk * 10 for chunk in chunks] +""" + +from examples.common.functions import ( + assert_default_reducer, + process_chunk, + read_processed_chunk, +) +from runnable import Map, NotebookTask, Pipeline, PythonTask, ShellTask + + +def iterable_branch(execute: bool = True): + """ + Use the pattern of using "execute" to control the execution of the pipeline. + + The same pipeline can be run independently from the command line. + + WARNING: If the execution is not controlled by "execute", the pipeline will be executed + even during the definition of the branch in parallel steps. + """ + # The python function to process a single chunk of data. + # In the example, we are multiplying the chunk by 10. + process_chunk_task_python = PythonTask( + name="execute_python", + function=process_chunk, + returns=["processed_python"], + ) + + # return parameters within a map branch have to be unique + # The notebook takes in the value of processed_python as an argument. + # and returns a new parameter "processed_notebook" which is 10*processed_python + process_chunk_task_notebook = NotebookTask( + name="execute_notebook", + notebook="examples/common/process_chunk.ipynb", + returns=["processed_notebook"], + ) + + # following the pattern, the shell takes in the value of processed_notebook as an argument. + # and returns a new parameter "processed_shell" which is 10*processed_notebook. + shell_command = """ + if [ "$processed_python" = $( expr 10 '*' "$chunk" ) ] \ + && [ "$processed_notebook" = $( expr 10 '*' "$processed_python" ) ] ; then + echo "yaay" + else + echo "naay" + exit 1; + fi + export processed_shell=$( expr 10 '*' "$processed_notebook") + """ + + process_chunk_task_shell = ShellTask( + name="execute_shell", + command=shell_command, + returns=["processed_shell"], + ) + + # A downstream step of process_ which reads the parameter "processed". + # The value of processed is within the context of the branch. + # For example, for chunk=1, the value of processed_python is chunk*10 = 10 + # the value of processed_notebook is processed_python*10 = 100 + # the value of processed_shell is processed_notebook*10 = 1000 + read_chunk = PythonTask( + name="read processed chunk", + function=read_processed_chunk, + terminate_with_success=True, + ) + + pipeline = Pipeline( + steps=[process_chunk_task_python, process_chunk_task_notebook, process_chunk_task_shell, read_chunk], + add_terminal_nodes=True, + ) + + if execute: + pipeline.execute() + + return pipeline + + +def main(): + # Create a map state which iterates over a list of chunks. + # chunk is the value of the iterable. + map_state = Map( + name="map state", + iterate_on="chunks", + iterate_as="chunk", + branch=iterable_branch(execute=False), + ) + + # Outside of the loop, processed is a list of all the processed chunks. + # This is also called as the reduce pattern. + # the value of processed_python is [10, 20, 30] + # the value of processed_notebook is [100, 200, 300] + # the value of processed_shell is [1000, 2000, 3000] + collect = PythonTask( + name="collect", + function=assert_default_reducer, + terminate_with_success=True, + ) + + pipeline = Pipeline(steps=[map_state, collect]) + + pipeline.execute(parameters_file="examples/common/initial_parameters.yaml") + + return pipeline + + +if __name__ == "__main__": + main() diff --git a/examples/07-map/reduce_python.py b/examples/07-map/reduce_python.py new file mode 100644 index 00000000..f41c0fb0 --- /dev/null +++ b/examples/07-map/reduce_python.py @@ -0,0 +1,96 @@ +""" +map states allows to repeat a branch for each value of an iterable. + +The below example can written, in python, as: + +chunks = [1, 2, 3] + +for chunk in chunks: + # Any task within the pipeline can access the value of chunk as an argument. + processed = process_chunk(chunk) + + # The value of processed for every iteration is the value returned by the steps + # of the current execution. For example, the value of processed + # for chunk=1, is chunk*10 = 10 for downstream steps. + read_processed_chunk(chunk, processed) + +It is possible to use a custom reducer, for example, this reducer is a max of the collection. +# Once the reducer is applied, processed is reduced to a single value. +assert processed == max(chunk * 10 for chunk in chunks) +""" + +from examples.common.functions import ( + assert_custom_reducer, + process_chunk, + read_processed_chunk, +) +from runnable import Map, Pipeline, PythonTask + + +def iterable_branch(execute: bool = True): + """ + Use the pattern of using "execute" to control the execution of the pipeline. + + The same pipeline can be run independently from the command line. + + WARNING: If the execution is not controlled by "execute", the pipeline will be executed + even during the definition of the branch in parallel steps. + """ + # The python function to process a single chunk of data. + # In the example, we are multiplying the chunk by 10. + process_chunk_task = PythonTask( + name="execute", + function=process_chunk, + returns=["processed"], + ) + + # A downstream step of process_chunk which reads the parameter "processed". + # The value of processed is within the context of the branch. + # For example, for the value of chunk = 1, processed will be 10. + # read_processed_chunk will receive the value of 10. + read_chunk = PythonTask( + name="read processed chunk", + function=read_processed_chunk, + terminate_with_success=True, + ) + + pipeline = Pipeline( + steps=[process_chunk_task, read_chunk], + add_terminal_nodes=True, + ) + + if execute: + pipeline.execute() + + return pipeline + + +def main(): + # Create a map state which iterates over a list of chunks. + # chunk is the value of the iterable. + # Upon completion of the map state, all the parameters of the tasks + # within the pipeline will be processed by the reducer. + # In this case, the reducer is the max of all the processed chunks. + map_state = Map( + name="map state", + iterate_on="chunks", + iterate_as="chunk", + reducer="lambda *x: max(x)", + branch=iterable_branch(execute=False), + ) + + collect = PythonTask( + name="collect", + function=assert_custom_reducer, + terminate_with_success=True, + ) + + pipeline = Pipeline(steps=[map_state, collect]) + + pipeline.execute(parameters_file="examples/common/initial_parameters.yaml") + + return pipeline + + +if __name__ == "__main__": + main() diff --git a/examples/common/functions.py b/examples/common/functions.py index 4ca9803e..2c0fa0cb 100644 --- a/examples/common/functions.py +++ b/examples/common/functions.py @@ -1,5 +1,5 @@ from pathlib import Path -from typing import Dict, Union +from typing import Dict, List, Union import pandas as pd from pydantic import BaseModel @@ -111,3 +111,39 @@ def read_files(): data = f.read() assert data.strip() == "hello world" + + +def process_chunk(chunk: int): + """ + An example function that processes a chunk of data. + We are multiplying the chunk by 10. + """ + return chunk * 10 + + +def read_processed_chunk(chunk: int, processed_python: int, processed_notebook: int, processed_shell: int): + """ + A downstream step of process_chunk of map state which reads the processed chunk. + Since the process_chunk returns the chunk multiplied by 10, we assert that. + """ + assert chunk * 10 == processed_python + assert processed_python * 10 == processed_notebook + assert processed_notebook * 10 == processed_shell + + +def assert_default_reducer( + processed_python: List[int], processed_notebook: List[int], processed_shell: List[int], chunks: List[int] +) -> int: + """ + Demonstrates the default reducer which just returns the list of processed chunks. + """ + assert processed_python == [chunk * 10 for chunk in chunks] + assert processed_notebook == [chunk * 100 for chunk in chunks] + assert processed_shell == [chunk * 1000 for chunk in chunks] + + +def assert_custom_reducer(processed: int, chunks: List[int]) -> int: + """ + Asserts the custom reducer returns the max of all the processed chunks. + """ + assert processed == max(chunk * 10 for chunk in chunks) diff --git a/examples/common/initial_parameters.yaml b/examples/common/initial_parameters.yaml index eb987ed4..a60e9a47 100644 --- a/examples/common/initial_parameters.yaml +++ b/examples/common/initial_parameters.yaml @@ -4,3 +4,5 @@ stringer : hello pydantic_param: x: 10 foo: bar + +chunks: [1, 2, 3] diff --git a/examples/common/process_chunk.ipynb b/examples/common/process_chunk.ipynb new file mode 100644 index 00000000..2ac01390 --- /dev/null +++ b/examples/common/process_chunk.ipynb @@ -0,0 +1,60 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "41a71aa7", + "metadata": { + "tags": [ + "parameters" + ] + }, + "outputs": [], + "source": [ + "chunk = None\n", + "processed_python = None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "764f661d", + "metadata": {}, + "outputs": [], + "source": [ + "assert chunk*10 == processed_python" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3e98e89e-765c-42d4-81ea-c371c2eab14d", + "metadata": {}, + "outputs": [], + "source": [ + "processed_notebook = processed_python*10" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/common/process_chunkmapstatemapvariableplaceholderexecutenotebook_out.ipynb_1 b/examples/common/process_chunkmapstatemapvariableplaceholderexecutenotebook_out.ipynb_1 new file mode 100644 index 00000000..cab0215a --- /dev/null +++ b/examples/common/process_chunkmapstatemapvariableplaceholderexecutenotebook_out.ipynb_1 @@ -0,0 +1,101 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "41a71aa7", + "metadata": { + "ploomber": { + "timestamp_end": 1714663503.68139, + "timestamp_start": 1714663503.681186 + }, + "tags": [ + "parameters" + ] + }, + "outputs": [], + "source": [ + "chunk = None\n", + "processed_python = None" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "ca6e39f1", + "metadata": { + "ploomber": { + "timestamp_end": 1714663503.681826, + "timestamp_start": 1714663503.681457 + }, + "tags": [ + "injected-parameters" + ] + }, + "outputs": [], + "source": [ + "# Injected parameters\n", + "integer = 1\n", + "floater = 3.14\n", + "stringer = \"hello\"\n", + "pydantic_param = {\"x\": 10, \"foo\": \"bar\"}\n", + "chunks = [1, 2, 3]\n", + "processed_python = 10\n", + "processed_notebook = \"\"\n", + "processed_shell = \"\"\n", + "chunk = 1\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "764f661d", + "metadata": { + "ploomber": { + "timestamp_end": 1714663503.681977, + "timestamp_start": 1714663503.68184 + } + }, + "outputs": [], + "source": [ + "assert chunk*10 == processed_python" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "3e98e89e-765c-42d4-81ea-c371c2eab14d", + "metadata": { + "ploomber": { + "timestamp_end": 1714663503.682091, + "timestamp_start": 1714663503.68199 + } + }, + "outputs": [], + "source": [ + "processed_notebook = processed_python*10" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/common/process_chunkmapstatemapvariableplaceholderexecutenotebook_out.ipynb_2 b/examples/common/process_chunkmapstatemapvariableplaceholderexecutenotebook_out.ipynb_2 new file mode 100644 index 00000000..8472743f --- /dev/null +++ b/examples/common/process_chunkmapstatemapvariableplaceholderexecutenotebook_out.ipynb_2 @@ -0,0 +1,101 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "41a71aa7", + "metadata": { + "ploomber": { + "timestamp_end": 1714663504.338108, + "timestamp_start": 1714663504.337819 + }, + "tags": [ + "parameters" + ] + }, + "outputs": [], + "source": [ + "chunk = None\n", + "processed_python = None" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "c76509a6", + "metadata": { + "ploomber": { + "timestamp_end": 1714663504.33849, + "timestamp_start": 1714663504.338129 + }, + "tags": [ + "injected-parameters" + ] + }, + "outputs": [], + "source": [ + "# Injected parameters\n", + "integer = 1\n", + "floater = 3.14\n", + "stringer = \"hello\"\n", + "pydantic_param = {\"x\": 10, \"foo\": \"bar\"}\n", + "chunks = [1, 2, 3]\n", + "processed_python = 20\n", + "processed_notebook = \"\"\n", + "processed_shell = \"\"\n", + "chunk = 2\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "764f661d", + "metadata": { + "ploomber": { + "timestamp_end": 1714663504.338656, + "timestamp_start": 1714663504.338506 + } + }, + "outputs": [], + "source": [ + "assert chunk*10 == processed_python" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "3e98e89e-765c-42d4-81ea-c371c2eab14d", + "metadata": { + "ploomber": { + "timestamp_end": 1714663504.338773, + "timestamp_start": 1714663504.33867 + } + }, + "outputs": [], + "source": [ + "processed_notebook = processed_python*10" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/common/process_chunkmapstatemapvariableplaceholderexecutenotebook_out.ipynb_3 b/examples/common/process_chunkmapstatemapvariableplaceholderexecutenotebook_out.ipynb_3 new file mode 100644 index 00000000..ec15410c --- /dev/null +++ b/examples/common/process_chunkmapstatemapvariableplaceholderexecutenotebook_out.ipynb_3 @@ -0,0 +1,101 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "41a71aa7", + "metadata": { + "ploomber": { + "timestamp_end": 1714663505.186755, + "timestamp_start": 1714663505.186461 + }, + "tags": [ + "parameters" + ] + }, + "outputs": [], + "source": [ + "chunk = None\n", + "processed_python = None" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "7fb77ef2", + "metadata": { + "ploomber": { + "timestamp_end": 1714663505.187151, + "timestamp_start": 1714663505.186784 + }, + "tags": [ + "injected-parameters" + ] + }, + "outputs": [], + "source": [ + "# Injected parameters\n", + "integer = 1\n", + "floater = 3.14\n", + "stringer = \"hello\"\n", + "pydantic_param = {\"x\": 10, \"foo\": \"bar\"}\n", + "chunks = [1, 2, 3]\n", + "processed_python = 30\n", + "processed_notebook = \"\"\n", + "processed_shell = \"\"\n", + "chunk = 3\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "764f661d", + "metadata": { + "ploomber": { + "timestamp_end": 1714663505.187308, + "timestamp_start": 1714663505.187165 + } + }, + "outputs": [], + "source": [ + "assert chunk*10 == processed_python" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "3e98e89e-765c-42d4-81ea-c371c2eab14d", + "metadata": { + "ploomber": { + "timestamp_end": 1714663505.187424, + "timestamp_start": 1714663505.187321 + } + }, + "outputs": [], + "source": [ + "processed_notebook = processed_python*10" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/runnable/tasks.py b/runnable/tasks.py index e61853b2..da1bb207 100644 --- a/runnable/tasks.py +++ b/runnable/tasks.py @@ -14,6 +14,8 @@ from typing import Any, Dict, List, Literal, Tuple from pydantic import BaseModel, ConfigDict, Field, field_validator + +# from rich import print from rich.console import Console from stevedore import driver @@ -160,7 +162,6 @@ def execution_context(self, map_variable: TypeMapVariable = None, allow_complex: log_file = open(log_file_name, "w") parameters_in = copy.deepcopy(params) - f = io.StringIO() task_console = Console(file=io.StringIO()) try: @@ -313,8 +314,11 @@ def notebook_should_end_with_ipynb(cls, command: str) -> str: @property def notebook_output_path(self) -> str: - output_path = Path(self.command) - file_name = output_path.resolve() / (output_path.stem + "_out.ipynb") + node_name = self._context.executor._context_node.internal_name + sane_name = "".join(x for x in node_name if x.isalnum()) + + output_path = Path(".", self.command) + file_name = output_path.parent / (output_path.stem + f"{sane_name}_out.ipynb") return str(file_name) @@ -340,7 +344,7 @@ def execute_command( import ploomber_engine as pm from ploomber_engine.ipython import PloomberClient - notebook_output_path = self.notebook_output_path or "" + notebook_output_path = self.notebook_output_path with self.execution_context(map_variable=map_variable, allow_complex=False) as ( params, @@ -349,15 +353,17 @@ def execute_command( if map_variable: for key, value in map_variable.items(): notebook_output_path += "_" + str(value) - params[key] = value + params[key] = JsonParameter(kind="json", value=value) - node_name = self._context.executor._context_node.internal_name - "".join(x for x in node_name if x.isalnum()) + ".execution.log" - new_notebook_output_path = notebook_output_path - print(notebook_output_path) - print(new_notebook_output_path) + # Remove any {v}_unreduced parameters from the parameters + copy_params = copy.deepcopy(params) + unprocessed_params = [k for k, v in copy_params.items() if not v.reduced] - notebook_params = {k: v.get_value() for k, v in params.items()} + for key in list(copy_params.keys()): + if any(key.endswith(f"_{k}") for k in unprocessed_params): + del copy_params[key] + + notebook_params = {k: v.get_value() for k, v in copy_params.items()} ploomber_optional_args = self.optional_ploomber_args @@ -380,6 +386,11 @@ def execute_command( try: for task_return in self.returns: param_name = Template(task_return.name).safe_substitute(map_variable) # type: ignore + + if map_variable: + for _, v in map_variable.items(): + param_name = f"{v}_{param_name}" + output_parameters[param_name] = task_return_to_parameter( task_return=task_return, value=namespace[task_return.name], @@ -534,7 +545,7 @@ def execute_command( param_name = task_return.name if map_variable: for _, v in map_variable.items(): - param_name = f"{param_name}_{v}" + param_name = f"{v}_{param_name}" output_parameters[param_name] = output_parameter diff --git a/tests/test_examples.py b/tests/test_examples.py index f25a5842..a3f917c4 100644 --- a/tests/test_examples.py +++ b/tests/test_examples.py @@ -23,6 +23,9 @@ ("03-parameters/static_parameters_non_python", False), ("03-parameters/static_parameters_python", False), ("04-catalog/catalog", False), + ("06-parallel/parallel", False), + ("06-parallel/nesting", False), + ("07-map/map_python", False), ] From 78d817c33449c0b18bb14d56a1dd1dc889fc0648 Mon Sep 17 00:00:00 2001 From: Vijay Vammi Date: Thu, 2 May 2024 19:44:08 +0100 Subject: [PATCH 09/17] docs: more examples --- .../{reduce_python.py => custom_reducer.py} | 45 ++++++-- examples/07-map/custom_reducer.yaml | 81 ++++++++++++++ examples/07-map/map.py | 3 + examples/07-map/map.yaml | 82 ++++++++++++++ examples/README.md | 25 +++-- examples/common/functions.py | 8 +- ...ableplaceholderexecutenotebook_out.ipynb_1 | 101 ------------------ ...ableplaceholderexecutenotebook_out.ipynb_2 | 101 ------------------ ...ableplaceholderexecutenotebook_out.ipynb_3 | 101 ------------------ tests/test_examples.py | 3 +- 10 files changed, 225 insertions(+), 325 deletions(-) rename examples/07-map/{reduce_python.py => custom_reducer.py} (60%) create mode 100644 examples/07-map/custom_reducer.yaml create mode 100644 examples/07-map/map.yaml delete mode 100644 examples/common/process_chunkmapstatemapvariableplaceholderexecutenotebook_out.ipynb_1 delete mode 100644 examples/common/process_chunkmapstatemapvariableplaceholderexecutenotebook_out.ipynb_2 delete mode 100644 examples/common/process_chunkmapstatemapvariableplaceholderexecutenotebook_out.ipynb_3 diff --git a/examples/07-map/reduce_python.py b/examples/07-map/custom_reducer.py similarity index 60% rename from examples/07-map/reduce_python.py rename to examples/07-map/custom_reducer.py index f41c0fb0..9d178cfa 100644 --- a/examples/07-map/reduce_python.py +++ b/examples/07-map/custom_reducer.py @@ -24,7 +24,7 @@ process_chunk, read_processed_chunk, ) -from runnable import Map, Pipeline, PythonTask +from runnable import Map, NotebookTask, Pipeline, PythonTask, ShellTask def iterable_branch(execute: bool = True): @@ -38,16 +38,45 @@ def iterable_branch(execute: bool = True): """ # The python function to process a single chunk of data. # In the example, we are multiplying the chunk by 10. - process_chunk_task = PythonTask( - name="execute", + process_chunk_task_python = PythonTask( + name="execute_python", function=process_chunk, - returns=["processed"], + returns=["processed_python"], ) - # A downstream step of process_chunk which reads the parameter "processed". + # return parameters within a map branch have to be unique + # The notebook takes in the value of processed_python as an argument. + # and returns a new parameter "processed_notebook" which is 10*processed_python + process_chunk_task_notebook = NotebookTask( + name="execute_notebook", + notebook="examples/common/process_chunk.ipynb", + returns=["processed_notebook"], + ) + + # following the pattern, the shell takes in the value of processed_notebook as an argument. + # and returns a new parameter "processed_shell" which is 10*processed_notebook. + shell_command = """ + if [ "$processed_python" = $( expr 10 '*' "$chunk" ) ] \ + && [ "$processed_notebook" = $( expr 10 '*' "$processed_python" ) ] ; then + echo "yaay" + else + echo "naay" + exit 1; + fi + export processed_shell=$( expr 10 '*' "$processed_notebook") + """ + + process_chunk_task_shell = ShellTask( + name="execute_shell", + command=shell_command, + returns=["processed_shell"], + ) + + # A downstream step of process_ which reads the parameter "processed". # The value of processed is within the context of the branch. - # For example, for the value of chunk = 1, processed will be 10. - # read_processed_chunk will receive the value of 10. + # For example, for chunk=1, the value of processed_python is chunk*10 = 10 + # the value of processed_notebook is processed_python*10 = 100 + # the value of processed_shell is processed_notebook*10 = 1000 read_chunk = PythonTask( name="read processed chunk", function=read_processed_chunk, @@ -55,7 +84,7 @@ def iterable_branch(execute: bool = True): ) pipeline = Pipeline( - steps=[process_chunk_task, read_chunk], + steps=[process_chunk_task_python, process_chunk_task_notebook, process_chunk_task_shell, read_chunk], add_terminal_nodes=True, ) diff --git a/examples/07-map/custom_reducer.yaml b/examples/07-map/custom_reducer.yaml new file mode 100644 index 00000000..3189924e --- /dev/null +++ b/examples/07-map/custom_reducer.yaml @@ -0,0 +1,81 @@ +branch: &branch + start_at: execute_python + steps: + execute_python: + type: task + command: examples.common.functions.process_chunk + returns: + - name: processed_python + next: execute_notebook + execute_notebook: + type: task + command_type: notebook + command: examples/common/process_chunk.ipynb + returns: + - name: processed_notebook + next: execute_shell + execute_shell: + type: task + command_type: shell + command: | + if [ "$processed_python" = $( expr 10 '*' "$chunk" ) ] \ + && [ "$processed_notebook" = $( expr 10 '*' "$processed_python" ) ] ; then + echo "yaay" + else + echo "naay" + exit 1; + fi + export processed_shell=$( expr 10 '*' "$processed_notebook") + returns: + - name: processed_shell + next: read_chunk + read_chunk: + type: task + command: examples.common.functions.read_processed_chunk + next: success + success: + type: success + fail: + type: fail + +dag: + description: | + map states allows to repeat a branch for each value of an iterable. + + The below example can written, in python, as: + + chunks = [1, 2, 3] + + for chunk in chunks: + # Any task within the pipeline can access the value of chunk as an argument. + processed = process_chunk(chunk) + + # The value of processed for every iteration is the value returned by the steps + # of the current execution. For example, the value of processed + # for chunk=1, is chunk*10 = 10 for downstream steps. + read_processed_chunk(chunk, processed) + + It is possible to use a custom reducer, for example, this reducer is a max of the collection. + # Once the reducer is applied, processed is reduced to a single value. + assert processed == max(chunk * 10 for chunk in chunks) + + Run this pipeline as: + runnable execute -f examples/07-map/custom_reducer.yaml \ + -p examples/common/initial_parameters.yaml + start_at: map_state + steps: + map_state: + type: map + branch: *branch + iterate_on: chunks + iterate_as: chunk + reducer: "lambda *x: max(x)" + next: collect + collect: + type: task + command: examples.common.functions.assert_custom_reducer + next: success + success: + type: success + fail: + type: fail diff --git a/examples/07-map/map.py b/examples/07-map/map.py index b63896e7..e8afb0e8 100644 --- a/examples/07-map/map.py +++ b/examples/07-map/map.py @@ -17,6 +17,9 @@ # Outside of loop, processed is a list of all the processed chunks. # This is also called as the reduce pattern. assert processed == [chunk * 10 for chunk in chunks] + +Run this pipeline as: + python examples/07-map/map.py """ from examples.common.functions import ( diff --git a/examples/07-map/map.yaml b/examples/07-map/map.yaml new file mode 100644 index 00000000..d61828cd --- /dev/null +++ b/examples/07-map/map.yaml @@ -0,0 +1,82 @@ +branch: &branch + start_at: execute_python + steps: + execute_python: + type: task + command: examples.common.functions.process_chunk + returns: + - name: processed_python + next: execute_notebook + execute_notebook: + type: task + command_type: notebook + command: examples/common/process_chunk.ipynb + returns: + - name: processed_notebook + next: execute_shell + execute_shell: + type: task + command_type: shell + command: | + if [ "$processed_python" = $( expr 10 '*' "$chunk" ) ] \ + && [ "$processed_notebook" = $( expr 10 '*' "$processed_python" ) ] ; then + echo "yaay" + else + echo "naay" + exit 1; + fi + export processed_shell=$( expr 10 '*' "$processed_notebook") + returns: + - name: processed_shell + next: read_chunk + read_chunk: + type: task + command: examples.common.functions.read_processed_chunk + next: success + success: + type: success + fail: + type: fail + + +dag: + description: | + map states allows to repeat a branch for each value of an iterable. + + The below example can written, in python, as: + + chunks = [1, 2, 3] + + for chunk in chunks: + # Any task within the pipeline can access the value of chunk as an argument. + processed = process_chunk(chunk) + + # The value of processed for every iteration is the value returned by the steps + # of the current execution. For example, the value of processed + # for chunk=1, is chunk*10 = 10 for downstream steps. + read_processed_chunk(chunk, processed) + + # Outside of loop, processed is a list of all the processed chunks. + # This is also called as the reduce pattern. + assert processed == [chunk * 10 for chunk in chunks] + + Run this pipeline as: + runnable execute -f examples/07-map/map.yaml \ + -p examples/common/initial_parameters.yaml + + start_at: map_state + steps: + map_state: + type: map + branch: *branch + iterate_on: chunks + iterate_as: chunk + next: collect + collect: + type: task + command: examples.common.functions.assert_default_reducer + next: success + success: + type: success + fail: + type: fail diff --git a/examples/README.md b/examples/README.md index 67c4719d..5e743f21 100644 --- a/examples/README.md +++ b/examples/README.md @@ -12,8 +12,10 @@ Please use this as an index to find specific example. - [python_tasks.py](./01-tasks/python_tasks.py), [python_tasks.yaml](./01-tasks/python_tasks.yaml): uses python functions as tasks. The stdout/stderr of all the tasks are captured and stored in the catalog. + - [notebook.py](./01-tasks/notebook.py), [notebook.yaml](./01-tasks/notebook.yaml): uses notebooks as tasks The executed notebook is captured in the catalog. + - [scripts.py](./01-tasks/scripts.py), [scripts.yaml](./01-tasks/scripts.yaml): uses shell scripts as tasks The stdout/stderr of all scripts are captured and stored in the catalog. @@ -26,11 +28,13 @@ shown in later sections. - 02-sequential: Examples of stitching tasks together including behavior in case of failures. - - [traversal.py](./02-sequential/traversal.py), [traversal.yaml](./02-sequential/traversal.yaml): A pipeline which is a mixed bag of notebooks, python functions and - shell scripts. + - [traversal.py](./02-sequential/traversal.py), [traversal.yaml](./02-sequential/traversal.yaml): A pipeline which is a mixed bag of notebooks, python functions and shell scripts. + - [default_fail.py](./02-sequential/default_fail.py), [default_fail.yaml](./02-sequential/default_fail.yaml): The default failure behavior. + - [on_failure_fail](./02-sequential/on_failure_fail.py), [on_faliure_fail.yaml](./02-sequential/on_failure_fail.yaml) On failure of a step, do some action and fail - - [on_failure_success.py](./02-sequential/on_failure_succeed.py), [on_failure_success.yaml](./02-sequential/on_failure_succeed.yaml): On failure of a step, take a different route + + - [on_failure_success.py](./02-sequential/on_failure_succeed.py), [on_failure_success.yaml](./02-sequential/on_failure_succeed.yaml): On failure of a step, take a different route and succeed --- @@ -43,11 +47,11 @@ We only focusses on "parameters" while the next section focusses on "files". Below table summarizes the input/output types of different task types. For ex: notebooks can only take JSON serializable parameters as input but can return json/pydantic/objects. Any python object that could be serialized using "dill" can be used. - | | Input | Output | - | -------- | :---------------------: | :----------------------: | - | python | json,pydantic, object | json, pydantic, object | - | notebook | json | json, pydantic, object | - | shell | json | json | + | | Input | Output | + | -------- | :---------------------: | :----------------------: | + | python | json, pydantic, object | json, pydantic, object | + | notebook | json | json, pydantic, object | + | shell | json | json | - [static_parameters_python.py](./03-parameters/static_parameters_python.py), [static_parameters_python.yaml](./03-parameters/static_parameters_python.yaml): A pipeline to show the access of static or known parameters by python tasks. @@ -60,11 +64,10 @@ We only focusses on "parameters" while the next section focusses on "files". Any environment variables prefixed by RUNNABLE_PRM_ are recognized as parameters and can override parameters defined by the file. - - [passing_parameters_python.py](./03-parameters/passing_parameters_python.py), [passing_parameters_python.yaml](./03-parameters/passing_parameters_python.yaml): shows the mechanism of passing parameters (simple python datatypes, objects, pydantic models) and registering metrics between python tasks. + - [passing_parameters_python.py](./03-parameters/passing_parameters_python.py), [passing_parameters_python.yaml](./03-parameters/passing_parameters_python.yaml): shows the mechanism of passing parameters (JSON serializable, objects, pydantic models) and registering metrics between python tasks. - [passing_parameters_notebook.py](./03-parameters/passing_parameters_notebook.py), [passing_parameters_notebook.yaml](./03-parameters/passing_parameters_notebook.yaml): shows the mechanism of passing parameters between notebook tasks. Please note that - we cannot inject pydantic models or objects into the notebook but can capture them - as return values. + we cannot inject pydantic models or objects into the notebook but can capture them as return values. - [passing_parameters_shell.py](./03-parameters/passing_parameters_shell.py), [passing_parameters_shell.yaml](./03-parameters/passing_parameters_shell.yaml): shows the mechanism of passing parameters between shell tasks. Please note that we cannot inject/capture pydantic models or objects in shells. diff --git a/examples/common/functions.py b/examples/common/functions.py index 2c0fa0cb..fadfa1d2 100644 --- a/examples/common/functions.py +++ b/examples/common/functions.py @@ -142,8 +142,12 @@ def assert_default_reducer( assert processed_shell == [chunk * 1000 for chunk in chunks] -def assert_custom_reducer(processed: int, chunks: List[int]) -> int: +def assert_custom_reducer( + processed_python: int, processed_notebook: int, processed_shell: int, chunks: List[int] +) -> int: """ Asserts the custom reducer returns the max of all the processed chunks. """ - assert processed == max(chunk * 10 for chunk in chunks) + assert processed_python == max(chunk * 10 for chunk in chunks) + assert processed_notebook == max(chunk * 100 for chunk in chunks) + assert processed_shell == max(chunk * 1000 for chunk in chunks) diff --git a/examples/common/process_chunkmapstatemapvariableplaceholderexecutenotebook_out.ipynb_1 b/examples/common/process_chunkmapstatemapvariableplaceholderexecutenotebook_out.ipynb_1 deleted file mode 100644 index cab0215a..00000000 --- a/examples/common/process_chunkmapstatemapvariableplaceholderexecutenotebook_out.ipynb_1 +++ /dev/null @@ -1,101 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "41a71aa7", - "metadata": { - "ploomber": { - "timestamp_end": 1714663503.68139, - "timestamp_start": 1714663503.681186 - }, - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "chunk = None\n", - "processed_python = None" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "ca6e39f1", - "metadata": { - "ploomber": { - "timestamp_end": 1714663503.681826, - "timestamp_start": 1714663503.681457 - }, - "tags": [ - "injected-parameters" - ] - }, - "outputs": [], - "source": [ - "# Injected parameters\n", - "integer = 1\n", - "floater = 3.14\n", - "stringer = \"hello\"\n", - "pydantic_param = {\"x\": 10, \"foo\": \"bar\"}\n", - "chunks = [1, 2, 3]\n", - "processed_python = 10\n", - "processed_notebook = \"\"\n", - "processed_shell = \"\"\n", - "chunk = 1\n" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "764f661d", - "metadata": { - "ploomber": { - "timestamp_end": 1714663503.681977, - "timestamp_start": 1714663503.68184 - } - }, - "outputs": [], - "source": [ - "assert chunk*10 == processed_python" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "3e98e89e-765c-42d4-81ea-c371c2eab14d", - "metadata": { - "ploomber": { - "timestamp_end": 1714663503.682091, - "timestamp_start": 1714663503.68199 - } - }, - "outputs": [], - "source": [ - "processed_notebook = processed_python*10" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.16" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/examples/common/process_chunkmapstatemapvariableplaceholderexecutenotebook_out.ipynb_2 b/examples/common/process_chunkmapstatemapvariableplaceholderexecutenotebook_out.ipynb_2 deleted file mode 100644 index 8472743f..00000000 --- a/examples/common/process_chunkmapstatemapvariableplaceholderexecutenotebook_out.ipynb_2 +++ /dev/null @@ -1,101 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "41a71aa7", - "metadata": { - "ploomber": { - "timestamp_end": 1714663504.338108, - "timestamp_start": 1714663504.337819 - }, - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "chunk = None\n", - "processed_python = None" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "c76509a6", - "metadata": { - "ploomber": { - "timestamp_end": 1714663504.33849, - "timestamp_start": 1714663504.338129 - }, - "tags": [ - "injected-parameters" - ] - }, - "outputs": [], - "source": [ - "# Injected parameters\n", - "integer = 1\n", - "floater = 3.14\n", - "stringer = \"hello\"\n", - "pydantic_param = {\"x\": 10, \"foo\": \"bar\"}\n", - "chunks = [1, 2, 3]\n", - "processed_python = 20\n", - "processed_notebook = \"\"\n", - "processed_shell = \"\"\n", - "chunk = 2\n" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "764f661d", - "metadata": { - "ploomber": { - "timestamp_end": 1714663504.338656, - "timestamp_start": 1714663504.338506 - } - }, - "outputs": [], - "source": [ - "assert chunk*10 == processed_python" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "3e98e89e-765c-42d4-81ea-c371c2eab14d", - "metadata": { - "ploomber": { - "timestamp_end": 1714663504.338773, - "timestamp_start": 1714663504.33867 - } - }, - "outputs": [], - "source": [ - "processed_notebook = processed_python*10" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.16" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/examples/common/process_chunkmapstatemapvariableplaceholderexecutenotebook_out.ipynb_3 b/examples/common/process_chunkmapstatemapvariableplaceholderexecutenotebook_out.ipynb_3 deleted file mode 100644 index ec15410c..00000000 --- a/examples/common/process_chunkmapstatemapvariableplaceholderexecutenotebook_out.ipynb_3 +++ /dev/null @@ -1,101 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "41a71aa7", - "metadata": { - "ploomber": { - "timestamp_end": 1714663505.186755, - "timestamp_start": 1714663505.186461 - }, - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "chunk = None\n", - "processed_python = None" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "7fb77ef2", - "metadata": { - "ploomber": { - "timestamp_end": 1714663505.187151, - "timestamp_start": 1714663505.186784 - }, - "tags": [ - "injected-parameters" - ] - }, - "outputs": [], - "source": [ - "# Injected parameters\n", - "integer = 1\n", - "floater = 3.14\n", - "stringer = \"hello\"\n", - "pydantic_param = {\"x\": 10, \"foo\": \"bar\"}\n", - "chunks = [1, 2, 3]\n", - "processed_python = 30\n", - "processed_notebook = \"\"\n", - "processed_shell = \"\"\n", - "chunk = 3\n" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "764f661d", - "metadata": { - "ploomber": { - "timestamp_end": 1714663505.187308, - "timestamp_start": 1714663505.187165 - } - }, - "outputs": [], - "source": [ - "assert chunk*10 == processed_python" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "3e98e89e-765c-42d4-81ea-c371c2eab14d", - "metadata": { - "ploomber": { - "timestamp_end": 1714663505.187424, - "timestamp_start": 1714663505.187321 - } - }, - "outputs": [], - "source": [ - "processed_notebook = processed_python*10" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.16" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/tests/test_examples.py b/tests/test_examples.py index a3f917c4..708ee43c 100644 --- a/tests/test_examples.py +++ b/tests/test_examples.py @@ -25,7 +25,8 @@ ("04-catalog/catalog", False), ("06-parallel/parallel", False), ("06-parallel/nesting", False), - ("07-map/map_python", False), + ("07-map/map", False), + ("07-map/custom_reducer", False), ] From 8a07461d17458c416a913c31235f252db87a93c7 Mon Sep 17 00:00:00 2001 From: Vijay Vammi Date: Thu, 2 May 2024 21:56:13 +0100 Subject: [PATCH 10/17] docs: more examples --- .gitignore | 2 ++ runnable/tasks.py | 2 +- .../extensions/test_node_extensions.py | 7 ++----- tests/runnable/test_tasks.py | 21 ------------------- tests/test_examples.py | 4 ++-- 5 files changed, 7 insertions(+), 29 deletions(-) delete mode 100644 tests/runnable/test_tasks.py diff --git a/.gitignore b/.gitignore index ee5beae4..1082f8ac 100644 --- a/.gitignore +++ b/.gitignore @@ -152,3 +152,5 @@ cov.xml .DS_Store data/ + + examples/common/*_out* diff --git a/runnable/tasks.py b/runnable/tasks.py index da1bb207..98d90997 100644 --- a/runnable/tasks.py +++ b/runnable/tasks.py @@ -172,7 +172,7 @@ def execution_context(self, map_variable: TypeMapVariable = None, allow_complex: except Exception as e: # pylint: disable=broad-except logger.exception(e) finally: - task_console = None + task_console = None # type: ignore print(f.getvalue()) # print to console log_file.write(f.getvalue()) # Print to file diff --git a/tests/runnable/extensions/test_node_extensions.py b/tests/runnable/extensions/test_node_extensions.py index 06c43844..3218b933 100644 --- a/tests/runnable/extensions/test_node_extensions.py +++ b/tests/runnable/extensions/test_node_extensions.py @@ -2,7 +2,6 @@ from runnable import defaults from runnable.extensions import nodes as nodes - from runnable.tasks import BaseTaskType @@ -13,7 +12,7 @@ def instantiable_base_class(monkeypatch): def test_task_node_parse_from_config_seperates_task_from_node_confifg(mocker, monkeypatch): - base_task = BaseTaskType(node_name="test", task_type="dummy") + base_task = BaseTaskType(task_type="dummy") mock_create_task = mocker.MagicMock(return_value=base_task) command_config = {"to_be_sent_to_task": "yes"} @@ -26,8 +25,6 @@ def test_task_node_parse_from_config_seperates_task_from_node_confifg(mocker, mo monkeypatch.setattr(nodes, "create_task", mock_create_task) task_node = nodes.TaskNode.parse_from_config({**node_config, **command_config}) - command_config["node_name"] = "test" - mock_create_task.assert_called_once_with(command_config) assert task_node.executable == base_task @@ -39,7 +36,7 @@ def test_task_node_mocks_if_mock_is_true(mocker, monkeypatch): monkeypatch.setattr(nodes.TaskNode, "_context", mock_context) mock_context.run_log_store.create_attempt_log = mocker.MagicMock(return_value=mock_attempt_log) - base_task = BaseTaskType(node_name="test", task_type="dummy") + base_task = BaseTaskType(task_type="dummy") task_node = nodes.TaskNode(name="test", internal_name="test", next_node="next_node", executable=base_task) attempt_log = task_node.execute(mock=True) diff --git a/tests/runnable/test_tasks.py b/tests/runnable/test_tasks.py deleted file mode 100644 index 145efbce..00000000 --- a/tests/runnable/test_tasks.py +++ /dev/null @@ -1,21 +0,0 @@ -import pytest - - -from runnable import tasks - - -@pytest.fixture -def configuration(): - return {"node_name": "dummy", "task_type": "dummy"} - - -def test_base_task_execute_command_raises_not_implemented_error(configuration): - base_execution_type = tasks.BaseTaskType(**configuration) - - with pytest.raises(NotImplementedError): - base_execution_type.execute_command() - - -def test_notebook_raises_exception_if_command_is_not_a_notebook(): - with pytest.raises(Exception): - tasks.NotebookTaskType(command="path to notebook") diff --git a/tests/test_examples.py b/tests/test_examples.py index 708ee43c..bff35f0b 100644 --- a/tests/test_examples.py +++ b/tests/test_examples.py @@ -56,7 +56,7 @@ def default_context(): @pytest.mark.parametrize("example", list_python_examples()) @pytest.mark.parametrize("context", contexts) -@pytest.mark.no_cover +# @pytest.mark.no_cover @pytest.mark.e2e def test_python_examples(example, context): print(f"Testing {example}...") @@ -77,7 +77,7 @@ def test_python_examples(example, context): @pytest.mark.parametrize("example", list_python_examples()) @pytest.mark.parametrize("context", contexts) -@pytest.mark.no_cover +# @pytest.mark.no_cover @pytest.mark.e2e def test_yaml_examples(example, context): print(f"Testing {example}...") From d3e75b2c7581954c5ec6c99be58b3976c3ad1793 Mon Sep 17 00:00:00 2001 From: Vijay Vammi Date: Thu, 2 May 2024 22:02:22 +0100 Subject: [PATCH 11/17] chore: gitignore --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 1082f8ac..b772ee70 100644 --- a/.gitignore +++ b/.gitignore @@ -153,4 +153,4 @@ cov.xml data/ - examples/common/*_out* + *_out* From 7b41b71e56a1d92a0b8834357a6a3d268e7d35b7 Mon Sep 17 00:00:00 2001 From: Vijay Vammi Date: Thu, 2 May 2024 22:04:01 +0100 Subject: [PATCH 12/17] chore: gitignore --- .gitignore | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index b772ee70..f53c03b2 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,9 @@ __pycache__/ # C extensions *.so +# examples run time +*_out* + # Distribution / packaging .Python build/ @@ -152,5 +155,3 @@ cov.xml .DS_Store data/ - - *_out* From 02c7a05343a16b37f08aee5e126d99f813c6039b Mon Sep 17 00:00:00 2001 From: Vijay Vammi Date: Sun, 5 May 2024 09:10:59 +0100 Subject: [PATCH 13/17] fix: Bug in SDK with nested nodes --- examples/04-catalog/catalog.py | 4 +- examples/04-catalog/catalog.yaml | 16 +-- examples/06-parallel/parallel.py | 2 +- examples/08-mocking/default.yaml | 2 + examples/configs/local-container.yaml | 10 +- runnable/entrypoints.py | 4 + runnable/extensions/executor/__init__.py | 5 - .../local_container/implementation.py | 65 +++++++--- .../executor/mocked/implementation.py | 18 +++ runnable/extensions/nodes.py | 66 ++++++---- runnable/graph.py | 1 + runnable/parameters.py | 2 +- runnable/sdk.py | 7 +- tests/test_examples.py | 113 ++++++++++++++---- tox.ini | 2 +- 15 files changed, 225 insertions(+), 92 deletions(-) create mode 100644 examples/08-mocking/default.yaml diff --git a/examples/04-catalog/catalog.py b/examples/04-catalog/catalog.py index 63280ae6..f6288031 100644 --- a/examples/04-catalog/catalog.py +++ b/examples/04-catalog/catalog.py @@ -61,8 +61,8 @@ def main(): ) delete_files_command = """ - rm df.csv && \ - rm data_folder/data.txt + rm df.csv || true && \ + rm data_folder/data.txt || true """ # delete from local files after generate # since its local catalog, we delete to show "get from catalog" diff --git a/examples/04-catalog/catalog.yaml b/examples/04-catalog/catalog.yaml index 16a8c06d..64d90e24 100644 --- a/examples/04-catalog/catalog.yaml +++ b/examples/04-catalog/catalog.yaml @@ -60,8 +60,8 @@ dag: type: task command_type: shell command: | - rm df.csv && \ - rm data_folder/data.txt + rm df.csv || true && \ + rm data_folder/data.txt || true next: read_data_python read_data_python: type: task @@ -76,8 +76,8 @@ dag: type: task command_type: shell command: | - rm df.csv && \ - rm data_folder/data.txt + rm df.csv || true && \ + rm data_folder/data.txt || true next: read_data_shell read_data_shell: type: task @@ -94,8 +94,8 @@ dag: type: task command_type: shell command: | - rm df.csv && \ - rm data_folder/data.txt + rm df.csv || true && \ + rm data_folder/data.txt || true next: read_data_notebook read_data_notebook: type: task @@ -110,8 +110,8 @@ dag: type: task command_type: shell command: | - rm df.csv && \ - rm data_folder/data.txt + rm df.csv || true && \ + rm data_folder/data.txt || true next: success success: type: success diff --git a/examples/06-parallel/parallel.py b/examples/06-parallel/parallel.py index 05995117..89be2d77 100644 --- a/examples/06-parallel/parallel.py +++ b/examples/06-parallel/parallel.py @@ -54,7 +54,7 @@ def traversal(execute: bool = True): def main(): parallel_step = Parallel( - name="parallel step", + name="parallel_step", terminate_with_success=True, branches={"branch1": traversal(execute=False), "branch2": traversal(execute=False)}, ) diff --git a/examples/08-mocking/default.yaml b/examples/08-mocking/default.yaml new file mode 100644 index 00000000..e293f455 --- /dev/null +++ b/examples/08-mocking/default.yaml @@ -0,0 +1,2 @@ +executor: + type: mocked diff --git a/examples/configs/local-container.yaml b/examples/configs/local-container.yaml index b16589a4..ac46db23 100644 --- a/examples/configs/local-container.yaml +++ b/examples/configs/local-container.yaml @@ -2,14 +2,6 @@ executor: type: "local-container" # (1) config: docker_image: runnable:latest # (2) - environment: - key: value # (3) run_log_store: # (4) - type: file-system - -catalog: - type: file-system - -secrets: - type: do-nothing + type: chunked-fs diff --git a/runnable/entrypoints.py b/runnable/entrypoints.py index a21a6a6d..1fd8be98 100644 --- a/runnable/entrypoints.py +++ b/runnable/entrypoints.py @@ -207,6 +207,10 @@ def execute( except Exception as e: # noqa: E722 console.print(e, style=defaults.error_style) progress.update(pipeline_execution_task, description="[red] Errored execution", completed=True) + run_log = run_context.run_log_store.get_run_log_by_id(run_id=run_context.run_id, full=False) + run_log.status = defaults.FAIL + run_context.run_log_store.add_branch_log(run_log, run_context.run_id) + raise e executor.send_return_code() diff --git a/runnable/extensions/executor/__init__.py b/runnable/extensions/executor/__init__.py index 791c483f..4353c9e3 100644 --- a/runnable/extensions/executor/__init__.py +++ b/runnable/extensions/executor/__init__.py @@ -185,14 +185,11 @@ def _sync_catalog(self, stage: str, synced_catalogs=None) -> Optional[List[DataC data_catalogs = [] for name_pattern in node_catalog_settings.get(stage) or []: if stage == "get": - get_catalog_progress = self._context.progress.add_task(f"Getting from catalog {name_pattern}", total=1) data_catalog = self._context.catalog_handler.get( name=name_pattern, run_id=self._context.run_id, compute_data_folder=compute_data_folder ) - self._context.progress.update(get_catalog_progress, completed=True, visible=False, refresh=True) elif stage == "put": - put_catalog_progress = self._context.progress.add_task(f"Putting in catalog {name_pattern}", total=1) data_catalog = self._context.catalog_handler.put( name=name_pattern, run_id=self._context.run_id, @@ -200,8 +197,6 @@ def _sync_catalog(self, stage: str, synced_catalogs=None) -> Optional[List[DataC synced_catalogs=synced_catalogs, ) - self._context.progress.update(put_catalog_progress, completed=True, visible=False) - logger.debug(f"Added data catalog: {data_catalog} to step log") data_catalogs.extend(data_catalog) diff --git a/runnable/extensions/executor/local_container/implementation.py b/runnable/extensions/executor/local_container/implementation.py index f1e794f1..be711283 100644 --- a/runnable/extensions/executor/local_container/implementation.py +++ b/runnable/extensions/executor/local_container/implementation.py @@ -5,7 +5,7 @@ from pydantic import Field from rich import print -from runnable import defaults, integration, utils +from runnable import defaults, utils from runnable.datastore import StepLog from runnable.defaults import TypeMapVariable from runnable.extensions.executor import GenericExecutor @@ -145,16 +145,6 @@ def trigger_job(self, node: BaseNode, map_variable: TypeMapVariable = None, **kw logger.debug("Here is the resolved executor config") logger.debug(executor_config) - if executor_config.get("run_in_local", False): - # Do not change config but only validate the configuration. - # Trigger the job on local system instead of a container - integration.validate(self, self._context.run_log_store) - integration.validate(self, self._context.catalog_handler) - integration.validate(self, self._context.secrets_handler) - - self.execute_node(node=node, map_variable=map_variable, **kwargs) - return - command = utils.get_node_execution_command(node, map_variable=map_variable) self._spin_container( @@ -172,7 +162,7 @@ def trigger_job(self, node: BaseNode, map_variable: TypeMapVariable = None, **kw "Note: If you do not see any docker issue from your side and the code works properly on local execution" "please raise a bug report." ) - logger.warning(msg) + logger.error(msg) step_log.status = defaults.FAIL self._context.run_log_store.add_step_log(step_log, self._context.run_id) @@ -212,6 +202,7 @@ def _spin_container( f"Please provide a docker_image using executor_config of the step {node.name} or at global config" ) + print("container", self._volumes) # TODO: Should consider using getpass.getuser() when running the docker container? Volume permissions container = client.containers.create( image=docker_image, @@ -260,7 +251,9 @@ class LocalContainerComputeFileSystemRunLogstore(BaseIntegration): service_provider = "file-system" # The actual implementation of the service def configure_for_traversal(self, **kwargs): - from runnable.extensions.run_log_store.file_system.implementation import FileSystemRunLogstore + from runnable.extensions.run_log_store.file_system.implementation import ( + FileSystemRunLogstore, + ) self.executor = cast(LocalContainerExecutor, self.executor) self.service = cast(FileSystemRunLogstore, self.service) @@ -272,7 +265,9 @@ def configure_for_traversal(self, **kwargs): } def configure_for_execution(self, **kwargs): - from runnable.extensions.run_log_store.file_system.implementation import FileSystemRunLogstore + from runnable.extensions.run_log_store.file_system.implementation import ( + FileSystemRunLogstore, + ) self.executor = cast(LocalContainerExecutor, self.executor) self.service = cast(FileSystemRunLogstore, self.service) @@ -280,6 +275,40 @@ def configure_for_execution(self, **kwargs): self.service.log_folder = self.executor._container_log_location +class LocalContainerComputeChunkedFS(BaseIntegration): + """ + Integration pattern between Local container and File System catalog + """ + + executor_type = "local-container" + service_type = "run_log_store" # One of secret, catalog, datastore + service_provider = "chunked-fs" # The actual implementation of the service + + def configure_for_traversal(self, **kwargs): + from runnable.extensions.run_log_store.chunked_file_system.implementation import ( + ChunkedFileSystemRunLogStore, + ) + + self.executor = cast(LocalContainerExecutor, self.executor) + self.service = cast(ChunkedFileSystemRunLogStore, self.service) + + write_to = self.service.log_folder + self.executor._volumes[str(Path(write_to).resolve())] = { + "bind": f"{self.executor._container_log_location}", + "mode": "rw", + } + + def configure_for_execution(self, **kwargs): + from runnable.extensions.run_log_store.chunked_file_system.implementation import ( + ChunkedFileSystemRunLogStore, + ) + + self.executor = cast(LocalContainerExecutor, self.executor) + self.service = cast(ChunkedFileSystemRunLogStore, self.service) + + self.service.log_folder = self.executor._container_log_location + + class LocalContainerComputeFileSystemCatalog(BaseIntegration): """ Integration pattern between Local container and File System catalog @@ -290,7 +319,9 @@ class LocalContainerComputeFileSystemCatalog(BaseIntegration): service_provider = "file-system" # The actual implementation of the service def configure_for_traversal(self, **kwargs): - from runnable.extensions.catalog.file_system.implementation import FileSystemCatalog + from runnable.extensions.catalog.file_system.implementation import ( + FileSystemCatalog, + ) self.executor = cast(LocalContainerExecutor, self.executor) self.service = cast(FileSystemCatalog, self.service) @@ -302,7 +333,9 @@ def configure_for_traversal(self, **kwargs): } def configure_for_execution(self, **kwargs): - from runnable.extensions.catalog.file_system.implementation import FileSystemCatalog + from runnable.extensions.catalog.file_system.implementation import ( + FileSystemCatalog, + ) self.executor = cast(LocalContainerExecutor, self.executor) self.service = cast(FileSystemCatalog, self.service) diff --git a/runnable/extensions/executor/mocked/implementation.py b/runnable/extensions/executor/mocked/implementation.py index ab6c6dd3..79d2c86b 100644 --- a/runnable/extensions/executor/mocked/implementation.py +++ b/runnable/extensions/executor/mocked/implementation.py @@ -64,6 +64,10 @@ def execute_from_graph(self, node: BaseNode, map_variable: TypeMapVariable = Non step_log.step_type = node.node_type step_log.status = defaults.PROCESSING + self._context.run_log_store.add_step_log(step_log, self._context.run_id) + + logger.info(f"Executing node: {node.get_summary()}") + # Add the step log to the database as per the situation. # If its a terminal node, complete it now if node.node_type in ["success", "fail"]: @@ -132,3 +136,17 @@ def _resolve_executor_config(self, node: BaseNode): def execute_job(self, node: TaskNode): pass + + def execute_node(self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs): + """ + The entry point for all executors apart from local. + We have already prepared for node execution. + + Args: + node (BaseNode): The node to execute + map_variable (dict, optional): If the node is part of a map, send in the map dictionary. Defaults to None. + + Raises: + NotImplementedError: _description_ + """ + ... diff --git a/runnable/extensions/nodes.py b/runnable/extensions/nodes.py index 28104e05..dd76bd5f 100644 --- a/runnable/extensions/nodes.py +++ b/runnable/extensions/nodes.py @@ -5,7 +5,7 @@ from collections import OrderedDict from copy import deepcopy from datetime import datetime -from typing import Any, Dict, List, Optional, Tuple, Union, cast +from typing import Annotated, Any, Callable, Dict, List, Optional, Tuple, Union, cast from pydantic import ( ConfigDict, @@ -14,10 +14,15 @@ field_serializer, field_validator, ) -from typing_extensions import Annotated from runnable import datastore, defaults, utils -from runnable.datastore import JsonParameter, MetricParameter, ObjectParameter, StepLog +from runnable.datastore import ( + JsonParameter, + MetricParameter, + ObjectParameter, + Parameter, + StepLog, +) from runnable.defaults import TypeMapVariable from runnable.graph import Graph, create_graph from runnable.nodes import CompositeNode, ExecutableNode, TerminalNode @@ -541,10 +546,14 @@ def execute_as_graph(self, map_variable: TypeMapVariable = None, **kwargs): iterate_on = None try: iterate_on = self._context.run_log_store.get_parameters(self._context.run_id)[self.iterate_on].get_value() - except KeyError: + except KeyError as e: raise Exception( - f"Expected parameter {self.iterate_on} not present in Run Log parameters, was it ever set before?" - ) + ( + f"Expected parameter {self.iterate_on}", + "not present in Run Log parameters", + "was it ever set before?", + ) + ) from e if not isinstance(iterate_on, list): raise Exception("Only list is allowed as a valid iterator type") @@ -597,29 +606,44 @@ def fan_in(self, map_variable: TypeMapVariable = None, **kwargs): # The final value of the parameter is the result of the reduce function. reducer_f = self.get_reducer_function() - if map_variable: - # If we are in a map state already, the param should have an index of the map variable. - for _, v in map_variable.items(): - for branch_return in self.branch_returns: - param_name, _ = branch_return - to_reduce = [] - for iter_variable in iterate_on: - to_reduce.append(params[f"{iter_variable}_{param_name}"].get_value()) + def update_param(params: Dict[str, Parameter], reducer_f: Callable, map_prefix: str = ""): + from runnable.extensions.executor.mocked.implementation import ( + MockedExecutor, + ) - param_name = f"{v}_{param_name}" - params[param_name].value = reducer_f(to_reduce) - params[param_name].reduced = True - else: for branch_return in self.branch_returns: param_name, _ = branch_return to_reduce = [] for iter_variable in iterate_on: - to_reduce.append(params[f"{iter_variable}_{param_name}"].get_value()) - - params[param_name].value = reducer_f(*to_reduce) + try: + to_reduce.append(params[f"{iter_variable}_{param_name}"].get_value()) + except KeyError as e: + if isinstance(self._context.executor, MockedExecutor): + pass + else: + raise Exception( + ( + f"Expected parameter {iter_variable}_{param_name}", + "not present in Run Log parameters", + "was it ever set before?", + ) + ) from e + + param_name = f"{map_prefix}{param_name}" + if to_reduce: + params[param_name].value = reducer_f(*to_reduce) + else: + params[param_name].value = "" params[param_name].reduced = True + if map_variable: + # If we are in a map state already, the param should have an index of the map variable. + for _, v in map_variable.items(): + update_param(params, reducer_f, map_prefix=f"{v}_") + else: + update_param(params, reducer_f) + self._context.run_log_store.set_parameters(parameters=params, run_id=self._context.run_id) diff --git a/runnable/graph.py b/runnable/graph.py index f9a54172..e66de8e6 100644 --- a/runnable/graph.py +++ b/runnable/graph.py @@ -74,6 +74,7 @@ def get_node_by_internal_name(self, internal_name: str) -> "BaseNode": for _, value in self.nodes.items(): if value.internal_name == internal_name: return value + print("graph", internal_name) raise exceptions.NodeNotFoundError(internal_name) def __str__(self): # pragma: no cover diff --git a/runnable/parameters.py b/runnable/parameters.py index 7cc59680..d1767644 100644 --- a/runnable/parameters.py +++ b/runnable/parameters.py @@ -36,7 +36,7 @@ def get_user_set_parameters(remove: bool = False) -> Dict[str, JsonParameter]: try: parameters[key.lower()] = JsonParameter(kind="json", value=json.loads(value)) except json.decoder.JSONDecodeError: - logger.error(f"Parameter {key} could not be JSON decoded, adding the literal value") + logger.warning(f"Parameter {key} could not be JSON decoded, adding the literal value") parameters[key.lower()] = JsonParameter(kind="json", value=value) if remove: diff --git a/runnable/sdk.py b/runnable/sdk.py index 741c7fcf..1d185ff0 100644 --- a/runnable/sdk.py +++ b/runnable/sdk.py @@ -2,6 +2,7 @@ import logging import os +import re from abc import ABC, abstractmethod from pathlib import Path from typing import Any, Callable, Dict, List, Optional, Union @@ -636,7 +637,8 @@ def model_post_init(self, __context: Any) -> None: self._dag.check_graph() def return_dag(self) -> graph.Graph: - return self._dag + dag_definition = self._dag.model_dump(by_alias=True, exclude_none=True) + return graph.create_graph(dag_definition) def execute( self, @@ -707,7 +709,8 @@ def execute( caller_stack = inspect.stack()[1] relative_to_root = str(Path(caller_stack.filename).relative_to(Path.cwd())) - module_to_call = f"{relative_to_root.replace('/', '.').replace('.py', '')}.{caller_stack.function}" + module_name = re.sub(r"\b.py\b", "", relative_to_root.replace("/", ".")) + module_to_call = f"{module_name}.{caller_stack.function}" run_context.pipeline_file = f"{module_to_call}.py" diff --git a/tests/test_examples.py b/tests/test_examples.py index bff35f0b..bbec5209 100644 --- a/tests/test_examples.py +++ b/tests/test_examples.py @@ -7,34 +7,21 @@ from runnable import exceptions from runnable.entrypoints import execute -# # (file, is_fail?) -python_examples = [ - ("01-tasks/notebook", False), - ("01-tasks/python_tasks", False), - ("01-tasks/scripts", False), - ("01-tasks/stub", False), - ("02-sequential/default_fail", True), - ("02-sequential/on_failure_fail", True), - ("02-sequential/on_failure_succeed", False), - ("02-sequential/traversal", False), - ("03-parameters/passing_parameters_notebook", False), - ("03-parameters/passing_parameters_python", False), - ("03-parameters/passing_parameters_shell", False), - ("03-parameters/static_parameters_non_python", False), - ("03-parameters/static_parameters_python", False), - ("04-catalog/catalog", False), - ("06-parallel/parallel", False), - ("06-parallel/nesting", False), - ("07-map/map", False), - ("07-map/custom_reducer", False), -] - def list_python_examples(): for example in python_examples: yield example +@contextmanager +def container_context(): + os.environ["RUNNABLE_CONFIGURATION_FILE"] = "examples/configs/local-container.yaml" + os.environ["RUNNABLE_PRM_envvar"] = "from env" + yield + del os.environ["RUNNABLE_CONFIGURATION_FILE"] + del os.environ["RUNNABLE_PRM_envvar"] + + @contextmanager def chunked_fs_context(): os.environ["RUNNABLE_CONFIGURATION_FILE"] = "examples/configs/chunked-fs-run_log.yaml" @@ -44,6 +31,15 @@ def chunked_fs_context(): del os.environ["RUNNABLE_PRM_envvar"] +@contextmanager +def mocked_context(): + os.environ["RUNNABLE_CONFIGURATION_FILE"] = "examples/08-mocking/default.yaml" + os.environ["RUNNABLE_PRM_envvar"] = "from env" + yield + del os.environ["RUNNABLE_CONFIGURATION_FILE"] + del os.environ["RUNNABLE_PRM_envvar"] + + @contextmanager def default_context(): os.environ["RUNNABLE_PRM_envvar"] = "from env" @@ -51,7 +47,27 @@ def default_context(): del os.environ["RUNNABLE_PRM_envvar"] -contexts = [default_context, chunked_fs_context] +contexts = [default_context, chunked_fs_context, mocked_context] +python_examples = [ + ("01-tasks/notebook", False, []), + ("01-tasks/python_tasks", False, []), + ("01-tasks/scripts", False, []), + ("01-tasks/stub", False, []), + ("02-sequential/default_fail", True, []), + ("02-sequential/on_failure_fail", True, []), + ("02-sequential/on_failure_succeed", False, []), + ("02-sequential/traversal", False, []), + ("03-parameters/passing_parameters_notebook", False, []), + ("03-parameters/passing_parameters_python", False, []), + ("03-parameters/passing_parameters_shell", False, []), + ("03-parameters/static_parameters_non_python", False, []), + ("03-parameters/static_parameters_python", False, []), + ("04-catalog/catalog", False, [mocked_context]), + ("06-parallel/parallel", False, []), + ("06-parallel/nesting", False, []), + ("07-map/map", False, []), + ("07-map/custom_reducer", False, []), +] @pytest.mark.parametrize("example", list_python_examples()) @@ -61,7 +77,10 @@ def default_context(): def test_python_examples(example, context): print(f"Testing {example}...") - mod, status = example + mod, status, ignore_contexts = example + if context in ignore_contexts: + return + context = context() imported_module = importlib.import_module(f"examples.{mod.replace('/', '.')}") @@ -81,7 +100,11 @@ def test_python_examples(example, context): @pytest.mark.e2e def test_yaml_examples(example, context): print(f"Testing {example}...") - file, status = example + file, status, ignore_contexts = example + + if context in ignore_contexts: + return + context = context() example_file = f"examples/{file}.yaml" parameters_file = "examples/common/initial_parameters.yaml" @@ -94,4 +117,42 @@ def test_yaml_examples(example, context): raise -# TODO: Need to test argo and local container +@pytest.mark.parametrize("example", list_python_examples()) +@pytest.mark.container +def test_python_examples_container(example): + print(f"Testing {example}...") + + mod, status, _ = example + context = container_context() + + imported_module = importlib.import_module(f"examples.{mod.replace('/', '.')}") + f = getattr(imported_module, "main") + with context: + try: + f() + except exceptions.ExecutionFailedError: + print("Example failed") + if not status: + raise + + +@pytest.mark.parametrize("example", list_python_examples()) +@pytest.mark.container +def test_yaml_examples_container(example): + print(f"Testing {example}...") + file, status, _ = example + + context = container_context() + + example_file = f"examples/{file}.yaml" + parameters_file = "examples/common/initial_parameters.yaml" + + with context: + try: + execute(pipeline_file=example_file, parameters_file=parameters_file) + except exceptions.ExecutionFailedError: + if not status: + raise + + +# TODO: Need to test argo diff --git a/tox.ini b/tox.ini index 32ca062d..1ddb73a9 100644 --- a/tox.ini +++ b/tox.ini @@ -8,7 +8,7 @@ envlist = python3.9, mypy whitelist_externals = poetry commands = poetry install -E docker -E notebook --without docs,binary,perf,tutorial - poetry run python -m pytest -m "not e2e_container" --cov=runnable/ tests/ + poetry run python -m pytest -m "not container" --cov=runnable/ tests/ [testenv:mypy] whitelist_externals = poetry From 6cf3347bc727a70bbf806cb59e50975c00bbb3e0 Mon Sep 17 00:00:00 2001 From: Vijay Vammi Date: Mon, 6 May 2024 06:31:54 +0100 Subject: [PATCH 14/17] fix: Bug in argo with failure nodes --- runnable/entrypoints.py | 4 ++++ .../extensions/executor/argo/implementation.py | 4 +++- runnable/sdk.py | 3 +++ tests/test_examples.py | 15 +++++++++++---- 4 files changed, 21 insertions(+), 5 deletions(-) diff --git a/runnable/entrypoints.py b/runnable/entrypoints.py index 1fd8be98..4a1b6da8 100644 --- a/runnable/entrypoints.py +++ b/runnable/entrypoints.py @@ -198,6 +198,10 @@ def execute( run_context.progress = progress executor.execute_graph(dag=run_context.dag) # type: ignore + if not executor._local: + executor.send_return_code(stage="traversal") + return + run_log = run_context.run_log_store.get_run_log_by_id(run_id=run_context.run_id, full=False) if run_log.status == defaults.SUCCESS: diff --git a/runnable/extensions/executor/argo/implementation.py b/runnable/extensions/executor/argo/implementation.py index 135f3e6b..ef894e7b 100644 --- a/runnable/extensions/executor/argo/implementation.py +++ b/runnable/extensions/executor/argo/implementation.py @@ -1033,6 +1033,9 @@ def _gather_task_templates_of_dag( if working_on.node_type not in ["success", "fail"] and working_on._get_on_failure_node(): failure_node = dag.get_node_by_name(working_on._get_on_failure_node()) + render_obj = get_renderer(working_on)(executor=self, node=failure_node) + render_obj.render(list_of_iter_values=list_of_iter_values.copy()) + failure_template_name = self.get_clean_name(failure_node) # If a task template for clean name exists, retrieve it failure_template = templates.get( @@ -1040,7 +1043,6 @@ def _gather_task_templates_of_dag( DagTaskTemplate(name=failure_template_name, template=failure_template_name), ) failure_template.depends.append(f"{clean_name}.Failed") - templates[failure_template_name] = failure_template # If we are in a map node, we need to add the values as arguments diff --git a/runnable/sdk.py b/runnable/sdk.py index 1d185ff0..fd736f85 100644 --- a/runnable/sdk.py +++ b/runnable/sdk.py @@ -730,6 +730,9 @@ def execute( pipeline_execution_task = progress.add_task("[dark_orange] Starting execution .. ", total=1) run_context.executor.execute_graph(dag=run_context.dag) + if not run_context.executor._local: + return {} + run_log = run_context.run_log_store.get_run_log_by_id(run_id=run_context.run_id, full=False) if run_log.status == defaults.SUCCESS: diff --git a/tests/test_examples.py b/tests/test_examples.py index bbec5209..4f591cc1 100644 --- a/tests/test_examples.py +++ b/tests/test_examples.py @@ -1,5 +1,6 @@ import importlib import os +import subprocess from contextlib import contextmanager import pytest @@ -47,7 +48,16 @@ def default_context(): del os.environ["RUNNABLE_PRM_envvar"] -contexts = [default_context, chunked_fs_context, mocked_context] +@contextmanager +def argo_context(): + os.environ["RUNNABLE_CONFIGURATION_FILE"] = "examples/configs/argo-config.yaml" + yield + subprocess.run(["argo", "lint", "--offline", "argo-pipeline.yaml"], check=True) + del os.environ["RUNNABLE_CONFIGURATION_FILE"] + + +contexts = [default_context, chunked_fs_context, mocked_context, argo_context] + python_examples = [ ("01-tasks/notebook", False, []), ("01-tasks/python_tasks", False, []), @@ -153,6 +163,3 @@ def test_yaml_examples_container(example): except exceptions.ExecutionFailedError: if not status: raise - - -# TODO: Need to test argo From 4dce0e651619897015930640684ad91ec8a740f7 Mon Sep 17 00:00:00 2001 From: Vijay Vammi Date: Mon, 6 May 2024 08:45:10 +0100 Subject: [PATCH 15/17] docs: Added patching --- examples/08-mocking/patching.yaml | 10 ++++ examples/common/functions.py | 5 ++ examples/common/simple_notebook_mocked.ipynb | 46 +++++++++++++++++++ .../executor/mocked/implementation.py | 2 +- tests/test_examples.py | 9 ++++ 5 files changed, 71 insertions(+), 1 deletion(-) create mode 100644 examples/08-mocking/patching.yaml create mode 100644 examples/common/simple_notebook_mocked.ipynb diff --git a/examples/08-mocking/patching.yaml b/examples/08-mocking/patching.yaml new file mode 100644 index 00000000..0e87550c --- /dev/null +++ b/examples/08-mocking/patching.yaml @@ -0,0 +1,10 @@ +executor: + type: mocked + config: + patches: + hello python: + command: examples.common.functions.mocked_hello + hello shell: + command: echo "hello from mocked" + hello notebook: + command: examples/common/simple_notebook_mocked.ipynb diff --git a/examples/common/functions.py b/examples/common/functions.py index fadfa1d2..f090c034 100644 --- a/examples/common/functions.py +++ b/examples/common/functions.py @@ -10,6 +10,11 @@ def hello(): print("Hello World!") +def mocked_hello(): + "Mock of the hello function" + print("Hello from mock") + + def raise_ex(): "A function that raises an exception" raise Exception("This is an exception") diff --git a/examples/common/simple_notebook_mocked.ipynb b/examples/common/simple_notebook_mocked.ipynb new file mode 100644 index 00000000..cb0218d1 --- /dev/null +++ b/examples/common/simple_notebook_mocked.ipynb @@ -0,0 +1,46 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "3e98e89e-765c-42d4-81ea-c371c2eab14d", + "metadata": {}, + "outputs": [], + "source": [ + "def function():\n", + " print(\"hello from mock\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8eac7a3f", + "metadata": {}, + "outputs": [], + "source": [ + "function()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/runnable/extensions/executor/mocked/implementation.py b/runnable/extensions/executor/mocked/implementation.py index 79d2c86b..60a6a739 100644 --- a/runnable/extensions/executor/mocked/implementation.py +++ b/runnable/extensions/executor/mocked/implementation.py @@ -18,7 +18,7 @@ def create_executable(params: Dict[str, Any], model: Type[BaseTaskType], node_na class EasyModel(model): # type: ignore model_config = ConfigDict(extra="ignore") - swallow_all = EasyModel(**params, node_name=node_name) + swallow_all = EasyModel(node_name=node_name, **params) return swallow_all diff --git a/tests/test_examples.py b/tests/test_examples.py index 4f591cc1..974a4263 100644 --- a/tests/test_examples.py +++ b/tests/test_examples.py @@ -41,6 +41,15 @@ def mocked_context(): del os.environ["RUNNABLE_PRM_envvar"] +@contextmanager +def patched_context(): + os.environ["RUNNABLE_CONFIGURATION_FILE"] = "examples/08-mocking/patching.yaml" + os.environ["RUNNABLE_PRM_envvar"] = "from env" + yield + del os.environ["RUNNABLE_CONFIGURATION_FILE"] + del os.environ["RUNNABLE_PRM_envvar"] + + @contextmanager def default_context(): os.environ["RUNNABLE_PRM_envvar"] = "from env" From c491e9561615686fe81105a3a3337ef81100af22 Mon Sep 17 00:00:00 2001 From: Vijay Vammi Date: Tue, 7 May 2024 09:40:19 +0100 Subject: [PATCH 16/17] fix: Bug with mocked and retry implementation --- examples/08-mocking/default.yaml | 11 ++++ examples/08-mocking/patching.yaml | 9 +++ examples/09-retry/config.yaml | 4 ++ examples/09-retry/python_tasks.py | 60 +++++++++++++++++++ examples/09-retry/python_tasks.yaml | 37 ++++++++++++ examples/common/functions.py | 1 + runnable/datastore.py | 5 +- .../executor/mocked/implementation.py | 2 + .../executor/retry/implementation.py | 12 +++- 9 files changed, 139 insertions(+), 2 deletions(-) create mode 100644 examples/09-retry/config.yaml create mode 100644 examples/09-retry/python_tasks.py create mode 100644 examples/09-retry/python_tasks.yaml diff --git a/examples/08-mocking/default.yaml b/examples/08-mocking/default.yaml index e293f455..deca7551 100644 --- a/examples/08-mocking/default.yaml +++ b/examples/08-mocking/default.yaml @@ -1,2 +1,13 @@ +# Mocked executors are handy to temporarily disable +# all executions in the pipeline. +# Traversal nodes, like task, parallel, map work +# still function + +# Any executor can be made mocking by changing the type to "mocked" +# Arbitrary key words are allowed in mocked executor. + + executor: type: mocked + config: + what: I am allowed diff --git a/examples/08-mocking/patching.yaml b/examples/08-mocking/patching.yaml index 0e87550c..c81e64a0 100644 --- a/examples/08-mocking/patching.yaml +++ b/examples/08-mocking/patching.yaml @@ -1,3 +1,12 @@ +# Mocked executors are handy to temporarily disable +# all executions in the pipeline. +# Traversal nodes, like task, parallel, map work +# still function + +# Any executor can be made mocking by changing the type to "mocked" +# Arbitrary key words are allowed in mocked executor. + + executor: type: mocked config: diff --git a/examples/09-retry/config.yaml b/examples/09-retry/config.yaml new file mode 100644 index 00000000..60aaa083 --- /dev/null +++ b/examples/09-retry/config.yaml @@ -0,0 +1,4 @@ +executor: + type: retry + config: + run_id: grating-hugle-0551 diff --git a/examples/09-retry/python_tasks.py b/examples/09-retry/python_tasks.py new file mode 100644 index 00000000..7c4648f5 --- /dev/null +++ b/examples/09-retry/python_tasks.py @@ -0,0 +1,60 @@ +""" +The below example showcases setting up known initial parameters for a pipeline +of only python tasks + +The initial parameters as defined in the yaml file are: + simple: 1 + complex_param: + x: 10 + y: "hello world!!" + +runnable allows using pydantic models for deeply nested parameters and +casts appropriately based on annotation. eg: read_initial_params_as_pydantic + +If no annotation is provided, the parameter is assumed to be a dictionary. +eg: read_initial_params_as_json + +You can set the initial parameters from environment variables as well. +eg: Any environment variable prefixed by "RUNNABLE_PRM_" will be picked up by runnable + +Run this pipeline as: + python examples/03-parameters/static_parameters_python.py + +""" + +import os + +from examples.common.functions import ( + read_initial_params_as_json, + read_initial_params_as_pydantic, +) +from runnable import Pipeline, PythonTask + + +def main(): + read_params_as_pydantic = PythonTask( + function=read_initial_params_as_pydantic, + name="read_params_as_pydantic", + ) + + read_params_as_json = PythonTask( + function=read_initial_params_as_json, + terminate_with_success=True, + name="read_params_json", + ) + + pipeline = Pipeline( + steps=[read_params_as_pydantic, read_params_as_json], + add_terminal_nodes=True, + ) + + _ = pipeline.execute(parameters_file="examples/common/initial_parameters.yaml") + + return pipeline + + +if __name__ == "__main__": + # Any parameter prefixed by "RUNNABLE_PRM_" will be picked up by runnable + os.environ["RUNNABLE_PRM_envvar"] = "from env" + main() + del os.environ["RUNNABLE_PRM_envvar"] diff --git a/examples/09-retry/python_tasks.yaml b/examples/09-retry/python_tasks.yaml new file mode 100644 index 00000000..f86302d7 --- /dev/null +++ b/examples/09-retry/python_tasks.yaml @@ -0,0 +1,37 @@ +dag: + description: | + The below example showcases setting up known initial parameters for a pipeline + of only python tasks + + The initial parameters as defined in the yaml file are: + simple: 1 + complex_param: + x: 10 + y: "hello world!!" + + runnable allows using pydantic models for deeply nested parameters and + casts appropriately based on annotation. eg: read_initial_params_as_pydantic + + If no annotation is provided, the parameter is assumed to be a dictionary. + eg: read_initial_params_as_json + + You can set the initial parameters from environment variables as well. + eg: Any environment variable prefixed by "RUNNABLE_PRM_" will be picked up by runnable + + Run this pipeline by: + runnable execute -f 03-parameters/static_parameters_python.yaml \ + -p examples/common/initial_parameters.yaml + start_at: read_params_as_pydantic + steps: + read_params_as_pydantic: + type: task + command: examples.common.functions.read_initial_params_as_pydantic + next: read_params_json + read_params_json: + type: task + command: examples.common.functions.read_initial_params_as_json + next: success + success: + type: success + fail: + type: fail diff --git a/examples/common/functions.py b/examples/common/functions.py index f090c034..489f1a64 100644 --- a/examples/common/functions.py +++ b/examples/common/functions.py @@ -32,6 +32,7 @@ def read_initial_params_as_pydantic( pydantic_param: ComplexParams, envvar: str, ): + print(envvar) assert integer == 1 assert floater == 3.14 assert stringer == "hello" diff --git a/runnable/datastore.py b/runnable/datastore.py index bdf41a9e..c7125351 100644 --- a/runnable/datastore.py +++ b/runnable/datastore.py @@ -402,7 +402,10 @@ def search_step_by_internal_name(self, i_name: str) -> Tuple[StepLog, Union[Bran """ dot_path = i_name.split(".") if len(dot_path) == 1: - return self.steps[i_name], None + try: + return self.steps[i_name], None + except KeyError as e: + raise exceptions.StepLogNotFoundError(self.run_id, i_name) from e current_steps = self.steps current_step = None diff --git a/runnable/extensions/executor/mocked/implementation.py b/runnable/extensions/executor/mocked/implementation.py index 60a6a739..e6e74802 100644 --- a/runnable/extensions/executor/mocked/implementation.py +++ b/runnable/extensions/executor/mocked/implementation.py @@ -26,6 +26,8 @@ class MockedExecutor(GenericExecutor): service_name: str = "mocked" _local_executor: bool = True + model_config = ConfigDict(extra="ignore") + patches: Dict[str, Any] = Field(default_factory=dict) @property diff --git a/runnable/extensions/executor/retry/implementation.py b/runnable/extensions/executor/retry/implementation.py index 09256dd1..4f4cb376 100644 --- a/runnable/extensions/executor/retry/implementation.py +++ b/runnable/extensions/executor/retry/implementation.py @@ -6,6 +6,7 @@ from runnable.datastore import RunLog from runnable.defaults import TypeMapVariable from runnable.extensions.executor import GenericExecutor +from runnable.extensions.nodes import TaskNode from runnable.nodes import BaseNode logger = logging.getLogger(defaults.LOGGER_NAME) @@ -31,6 +32,7 @@ class RetryExecutor(GenericExecutor): _local: bool = True _original_run_log: Optional[RunLog] = None + _restart_initiated: bool = False @property def _context(self): @@ -38,7 +40,7 @@ def _context(self): @cached_property def original_run_log(self): - self.original_run_log = self._context.run_log_store.get_run_log_by_id( + return self._context.run_log_store.get_run_log_by_id( run_id=self.run_id, full=True, ) @@ -140,10 +142,14 @@ def _is_step_eligible_for_rerun(self, node: BaseNode, map_variable: TypeMapVaria node_step_log_name = node._get_step_log_name(map_variable=map_variable) logger.info(f"Scanning previous run logs for node logs of: {node_step_log_name}") + if self._restart_initiated: + return True + try: previous_attempt_log, _ = self.original_run_log.search_step_by_internal_name(node_step_log_name) except exceptions.StepLogNotFoundError: logger.warning(f"Did not find the node {node.name} in previous run log") + self._restart_initiated = True return True # We should re-run the node. logger.info(f"The original step status: {previous_attempt_log.status}") @@ -152,7 +158,11 @@ def _is_step_eligible_for_rerun(self, node: BaseNode, map_variable: TypeMapVaria return False # We need not run the node logger.info(f"The new execution should start executing graph from this node {node.name}") + self._restart_initiated = True return True def execute_node(self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs): self._execute_node(node, map_variable=map_variable, **kwargs) + + def execute_job(self, node: TaskNode): + pass From 04755c49cd4e6ecc5bf475f7e61ba28215ca5d86 Mon Sep 17 00:00:00 2001 From: Vijay Vammi Date: Tue, 7 May 2024 11:05:06 +0100 Subject: [PATCH 17/17] fix: disable posthog --- runnable/__init__.py | 3 +++ tox.ini | 2 ++ 2 files changed, 5 insertions(+) diff --git a/runnable/__init__.py b/runnable/__init__.py index e21c7739..2228f6e6 100644 --- a/runnable/__init__.py +++ b/runnable/__init__.py @@ -2,6 +2,7 @@ # TODO: Might need to add Rich to pyinstaller part import logging +import os from logging.config import dictConfig from rich.console import Console @@ -29,6 +30,8 @@ pickled, ) +os.environ["_PLOOMBER_TELEMETRY_DEBUG"] = "false" + ## TODO: Summary should be a bit better for catalog. ## If the execution fails, hint them about the retry executor. # Make the retry executor loose! diff --git a/tox.ini b/tox.ini index 1ddb73a9..e303c793 100644 --- a/tox.ini +++ b/tox.ini @@ -6,6 +6,8 @@ envlist = python3.9, mypy [testenv] whitelist_externals = poetry +setenv = + _PLOOMBER_TELEMETRY_DEBUG = false commands = poetry install -E docker -E notebook --without docs,binary,perf,tutorial poetry run python -m pytest -m "not container" --cov=runnable/ tests/