From d13394d066795867b0038b4fa2e93ce51f724448 Mon Sep 17 00:00:00 2001 From: Vijay Vammi Date: Tue, 11 Jun 2024 09:38:10 +0100 Subject: [PATCH] docs: updating docs --- README.md | 461 ++++++++++++++++-------------------------------------- 1 file changed, 139 insertions(+), 322 deletions(-) diff --git a/README.md b/README.md index 707c0377..f768c278 100644 --- a/README.md +++ b/README.md @@ -2,25 +2,6 @@ -

- - ,////, - /// 6| - // _| - _/_,-' - _.-/'/ \ ,/;, - ,-' /' \_ \ / _/ - `\ / _/\ ` / - | /, `\_/ - | \' - /\_ /` /\ - /' /_``--.__/\ `,. / \ - |_/` `-._ `\/ `\ `. - `-.__/' `\ | - `\ \ - `\ \ - \_\__ - \___)


@@ -32,364 +13,200 @@ Code style: black MyPy Checked Tests: -Docs:


-runnable is a simplified workflow definition language that helps in: - -- **Streamlined Design Process:** runnable enables users to efficiently plan their pipelines with -[stubbed nodes](https://astrazeneca.github.io/runnable-core/concepts/stub), along with offering support for various structures such as -[tasks](https://astrazeneca.github.io/runnable-core/concepts/task), [parallel branches](https://astrazeneca.github.io/runnable-core/concepts/parallel), and [loops or map branches](https://astrazeneca.github.io/runnable-core/concepts/map) -in both [yaml](https://astrazeneca.github.io/runnable-core/concepts/pipeline) or a [python SDK](https://astrazeneca.github.io/runnable-core/sdk) for maximum flexibility. - -- **Incremental Development:** Build your pipeline piece by piece with runnable, which allows for the -implementation of tasks as [python functions](https://astrazeneca.github.io/runnable-core/concepts/task/#python_functions), -[notebooks](https://astrazeneca.github.io/runnable-core/concepts/task/#notebooks), or [shell scripts](https://astrazeneca.github.io/runnable-core/concepts/task/#shell), -adapting to the developer's preferred tools and methods. -- **Robust Testing:** Ensure your pipeline performs as expected with the ability to test using sampled data. runnable -also provides the capability to [mock and patch tasks](https://astrazeneca.github.io/runnable-core/configurations/executors/mocked) -for thorough evaluation before full-scale deployment. +[Please check here for complete documentation](https://astrazeneca.github.io/runnable/) -- **Seamless Deployment:** Transition from the development stage to production with ease. -runnable simplifies the process by requiring [only configuration changes](https://astrazeneca.github.io/runnable-core/configurations/overview) -to adapt to different environments, including support for [argo workflows](https://astrazeneca.github.io/runnable-core/configurations/executors/argo). - -- **Efficient Debugging:** Quickly identify and resolve issues in pipeline execution with runnable's local -debugging features. Retrieve data from failed tasks and [retry failures](https://astrazeneca.github.io/runnable-core/concepts/run-log/#retrying_failures) -using your chosen debugging tools to maintain a smooth development experience. - -Along with the developer friendly features, runnable also acts as an interface to production grade concepts -such as [data catalog](https://astrazeneca.github.io/runnable-core/concepts/catalog), [reproducibility](https://astrazeneca.github.io/runnable-core/concepts/run-log), -[experiment tracking](https://astrazeneca.github.io/runnable-core/concepts/experiment-tracking) -and secure [access to secrets](https://astrazeneca.github.io/runnable-core/concepts/secrets). - -
- -## What does it do? +## Example +The below data science flavored code is a well-known +[iris example from scikit-learn](https://scikit-learn.org/stable/auto_examples/linear_model/plot_iris_logistic.html). -![works](assets/work.png) -
+```python +""" +Example of Logistic regression using scikit-learn +https://scikit-learn.org/stable/auto_examples/linear_model/plot_iris_logistic.html +""" + +import matplotlib.pyplot as plt +import numpy as np +from sklearn import datasets +from sklearn.inspection import DecisionBoundaryDisplay +from sklearn.linear_model import LogisticRegression + + +def load_data(): + # import some data to play with + iris = datasets.load_iris() + X = iris.data[:, :2] # we only take the first two features. + Y = iris.target + + return X, Y + + +def model_fit(X: np.ndarray, Y: np.ndarray, C: float = 1e5): + logreg = LogisticRegression(C=C) + logreg.fit(X, Y) + + return logreg + + +def generate_plots(X: np.ndarray, Y: np.ndarray, logreg: LogisticRegression): + _, ax = plt.subplots(figsize=(4, 3)) + DecisionBoundaryDisplay.from_estimator( + logreg, + X, + cmap=plt.cm.Paired, + ax=ax, + response_method="predict", + plot_method="pcolormesh", + shading="auto", + xlabel="Sepal length", + ylabel="Sepal width", + eps=0.5, + ) -## Documentation + # Plot also the training points + plt.scatter(X[:, 0], X[:, 1], c=Y, edgecolors="k", cmap=plt.cm.Paired) -[More details about the project and how to use it available here](https://astrazeneca.github.io/runnable-core/). + plt.xticks(()) + plt.yticks(()) -
+ plt.savefig("iris_logistic.png") -## Installation + # TODO: What is the right value? + return 0.6 -The minimum python version that runnable supports is 3.8 -```shell -pip install runnable -``` +## Without any orchestration +def main(): + X, Y = load_data() + logreg = model_fit(X, Y, C=1.0) + generate_plots(X, Y, logreg) -Please look at the [installation guide](https://astrazeneca.github.io/runnable-core/usage) -for more information. -
+## With runnable orchestration +def runnable_pipeline(): + # The below code can be anywhere + from runnable import Catalog, Pipeline, PythonTask, metric, pickled -## Example + # X, Y = load_data() + load_data_task = PythonTask( + function=load_data, + name="load_data", + returns=[pickled("X"), pickled("Y")], # (1) + ) -Your application code. Use pydantic models as DTO. + # logreg = model_fit(X, Y, C=1.0) + model_fit_task = PythonTask( + function=model_fit, + name="model_fit", + returns=[pickled("logreg")], + ) -Assumed to be present at ```functions.py``` -```python -from pydantic import BaseModel + # generate_plots(X, Y, logreg) + generate_plots_task = PythonTask( + function=generate_plots, + name="generate_plots", + terminate_with_success=True, + catalog=Catalog(put=["iris_logistic.png"]), # (2) + returns=[metric("score")], + ) -class InnerModel(BaseModel): - """ - A pydantic model representing a group of related parameters. - """ + pipeline = Pipeline( + steps=[load_data_task, model_fit_task, generate_plots_task], + ) # (4) - foo: int - bar: str + pipeline.execute() + return pipeline -class Parameter(BaseModel): - """ - A pydantic model representing the parameters of the whole pipeline. - """ - x: int - y: InnerModel +if __name__ == "__main__": + # main() + runnable_pipeline() +``` -def return_parameter() -> Parameter: - """ - The annotation of the return type of the function is not mandatory - but it is a good practice. - Returns: - Parameter: The parameters that should be used in downstream steps. - """ - # Return type of a function should be a pydantic model - return Parameter(x=1, y=InnerModel(foo=10, bar="hello world")) +1. Return two serialized objects X and Y. +2. Store the file `iris_logistic.png` for future reference. +3. Define the sequence of tasks. +4. Define a pipeline with the tasks +The difference between native driver and runnable orchestration: -def display_parameter(x: int, y: InnerModel): - """ - Annotating the arguments of the function is important for - runnable to understand the type of parameters you want. +!!! tip inline end "Notebooks and Shell scripts" - Input args can be a pydantic model or the individual attributes. - """ - print(x) - # >>> prints 1 - print(y) - # >>> prints InnerModel(foo=10, bar="hello world") -``` + You can execute notebooks and shell scripts too!! -### Application code using driver functions. + They can be written just as you would want them, *plain old notebooks and scripts*. -The code is runnable without any orchestration framework. -```python -from functions import return_parameter, display_parameter -my_param = return_parameter() -display_parameter(my_param.x, my_param.y) -``` -### Orchestration using runnable +
- - - - - - - - - +## Documentation - -
python SDKyaml

+```diff -Example present at: ```examples/python-tasks.py``` +- X, Y = load_data() ++load_data_task = PythonTask( ++ function=load_data, ++ name="load_data", ++ returns=[pickled("X"), pickled("Y")], (1) ++ ) -Run it as: ```python examples/python-tasks.py``` +-logreg = model_fit(X, Y, C=1.0) ++model_fit_task = PythonTask( ++ function=model_fit, ++ name="model_fit", ++ returns=[pickled("logreg")], ++ ) -```python -from runnable import Pipeline, Task +-generate_plots(X, Y, logreg) ++generate_plots_task = PythonTask( ++ function=generate_plots, ++ name="generate_plots", ++ terminate_with_success=True, ++ catalog=Catalog(put=["iris_logistic.png"]), (2) ++ ) -def main(): - step1 = Task( - name="step1", - command="examples.functions.return_parameter", - ) - step2 = Task( - name="step2", - command="examples.functions.display_parameter", - terminate_with_success=True, - ) - step1 >> step2 ++pipeline = Pipeline( ++ steps=[load_data_task, model_fit_task, generate_plots_task], (3) - pipeline = Pipeline( - start_at=step1, - steps=[step1, step2], - add_terminal_nodes=True, - ) +``` + - pipeline.execute() +--- -if __name__ == "__main__": - main() -``` +- [x] ```Domain``` code remains completely independent of ```driver``` code. +- [x] The ```driver``` function has an equivalent and intuitive runnable expression +- [x] Reproducible by default, runnable stores metadata about code/data/config for every execution. +- [x] The pipeline is `runnable` in any environment. -

- -Example present at: ```examples/python-tasks.yaml``` - - -Execute via the cli: ```runnable execute -f examples/python-tasks.yaml``` - -```yaml -dag: - description: | - This is a simple pipeline that does 3 steps in sequence. - In this example: - 1. First step: returns a "parameter" x as a Pydantic model - 2. Second step: Consumes that parameter and prints it - - This pipeline demonstrates one way to pass small data from one step to another. - - start_at: step 1 - steps: - step 1: - type: task - command_type: python # (2) - command: examples.functions.return_parameter # (1) - next: step 2 - step 2: - type: task - command_type: python - command: examples.functions.display_parameter - next: success - success: - type: success - fail: - type: fail -``` -

+[More details about the project and how to use it available here](https://astrazeneca.github.io/runnable/). -### Transpile to argo workflows +
-No code change, just change the configuration. +## Installation -```yaml -executor: - type: "argo" - config: - image: runnable:demo - persistent_volumes: - - name: runnable-volume - mount_path: /mnt +The minimum python version that runnable supports is 3.8 -run_log_store: - type: file-system - config: - log_folder: /mnt/run_log_store +```shell +pip install runnable ``` -More details can be found in [argo configuration](https://astrazeneca.github.io/runnable-core/configurations/executors/argo). - -Execute the code as ```runnable execute -f examples/python-tasks.yaml -c examples/configs/argo-config.yam``` - -
- Expand - -```yaml -apiVersion: argoproj.io/v1alpha1 -kind: Workflow -metadata: - generateName: runnable-dag- - annotations: {} - labels: {} -spec: - activeDeadlineSeconds: 172800 - entrypoint: runnable-dag - podGC: - strategy: OnPodCompletion - retryStrategy: - limit: '0' - retryPolicy: Always - backoff: - duration: '120' - factor: 2 - maxDuration: '3600' - serviceAccountName: default-editor - templates: - - name: runnable-dag - failFast: true - dag: - tasks: - - name: step-1-task-uvdp7h - template: step-1-task-uvdp7h - depends: '' - - name: step-2-task-772vg3 - template: step-2-task-772vg3 - depends: step-1-task-uvdp7h.Succeeded - - name: success-success-igzq2e - template: success-success-igzq2e - depends: step-2-task-772vg3.Succeeded - - name: step-1-task-uvdp7h - container: - image: runnable:demo - command: - - runnable - - execute_single_node - - '{{workflow.parameters.run_id}}' - - step%1 - - --log-level - - WARNING - - --file - - examples/python-tasks.yaml - - --config-file - - examples/configs/argo-config.yaml - volumeMounts: - - name: executor-0 - mountPath: /mnt - imagePullPolicy: '' - resources: - limits: - memory: 1Gi - cpu: 250m - requests: - memory: 1Gi - cpu: 250m - - name: step-2-task-772vg3 - container: - image: runnable:demo - command: - - runnable - - execute_single_node - - '{{workflow.parameters.run_id}}' - - step%2 - - --log-level - - WARNING - - --file - - examples/python-tasks.yaml - - --config-file - - examples/configs/argo-config.yaml - volumeMounts: - - name: executor-0 - mountPath: /mnt - imagePullPolicy: '' - resources: - limits: - memory: 1Gi - cpu: 250m - requests: - memory: 1Gi - cpu: 250m - - name: success-success-igzq2e - container: - image: runnable:demo - command: - - runnable - - execute_single_node - - '{{workflow.parameters.run_id}}' - - success - - --log-level - - WARNING - - --file - - examples/python-tasks.yaml - - --config-file - - examples/configs/argo-config.yaml - volumeMounts: - - name: executor-0 - mountPath: /mnt - imagePullPolicy: '' - resources: - limits: - memory: 1Gi - cpu: 250m - requests: - memory: 1Gi - cpu: 250m - templateDefaults: - activeDeadlineSeconds: 7200 - timeout: 10800s - arguments: - parameters: - - name: run_id - value: '{{workflow.uid}}' - volumes: - - name: executor-0 - persistentVolumeClaim: - claimName: runnable-volume - -``` +Please look at the [installation guide](https://astrazeneca.github.io/runnable-core/usage) +for more information. -
## Pipelines can be: