From d13394d066795867b0038b4fa2e93ce51f724448 Mon Sep 17 00:00:00 2001
From: Vijay Vammi <vijay.vammi@astrazeneca.com>
Date: Tue, 11 Jun 2024 09:38:10 +0100
Subject: [PATCH] docs: updating docs

---
 README.md | 461 ++++++++++++++++--------------------------------------
 1 file changed, 139 insertions(+), 322 deletions(-)
diff --git a/README.md b/README.md
index 707c0377..f768c278 100644
--- a/README.md
+++ b/README.md
@@ -2,25 +2,6 @@
 
 
 
-<p align="center">
-
-                                                      ,////,
-                                                      /// 6|
-                                                      //  _|
-                                                    _/_,-'
-                                                _.-/'/   \   ,/;,
-                                            ,-' /'  \_   \ / _/
-                                            `\ /     _/\  ` /
-                                               |     /,  `\_/
-                                               |     \'
-                                    /\_        /`      /\
-                                  /' /_``--.__/\  `,. /  \
-                                  |_/`  `-._     `\/  `\   `.
-                                            `-.__/'     `\   |
-                                                        `\  \
-                                                          `\ \
-                                                            \_\__
-                                                              \___)
 
 </p>
 <hr style="border:2px dotted orange">
@@ -32,364 +13,200 @@
 <a href="https://github.com/psf/black"><img alt="Code style: black" src="https://img.shields.io/badge/code%20style-black-000000.svg"></a>
 <a href="https://github.com/python/mypy"><img alt="MyPy Checked" src="https://www.mypy-lang.org/static/mypy_badge.svg"></a>
 <a href="https://github.com/vijayvammi/runnable/actions/workflows/release.yaml"><img alt="Tests:" src="https://github.com/vijayvammi/runnable/actions/workflows/release.yaml/badge.svg">
-<a href="https://github.com/vijayvammi/runnable/actions/workflows/docs.yaml"><img alt="Docs:" src="https://github.com/vijayvammi/runnable/actions/workflows/docs.yaml/badge.svg">
 </p>
 <hr style="border:2px dotted orange">
 
-runnable is a simplified workflow definition language that helps in:
-
-- **Streamlined Design Process:** runnable enables users to efficiently plan their pipelines with
-[stubbed nodes](https://astrazeneca.github.io/runnable-core/concepts/stub), along with offering support for various structures such as
-[tasks](https://astrazeneca.github.io/runnable-core/concepts/task), [parallel branches](https://astrazeneca.github.io/runnable-core/concepts/parallel), and [loops or map branches](https://astrazeneca.github.io/runnable-core/concepts/map)
-in both [yaml](https://astrazeneca.github.io/runnable-core/concepts/pipeline) or a [python SDK](https://astrazeneca.github.io/runnable-core/sdk) for maximum flexibility.
-
-- **Incremental Development:** Build your pipeline piece by piece with runnable, which allows for the
-implementation of tasks as [python functions](https://astrazeneca.github.io/runnable-core/concepts/task/#python_functions),
-[notebooks](https://astrazeneca.github.io/runnable-core/concepts/task/#notebooks), or [shell scripts](https://astrazeneca.github.io/runnable-core/concepts/task/#shell),
-adapting to the developer's preferred tools and methods.
 
-- **Robust Testing:** Ensure your pipeline performs as expected with the ability to test using sampled data. runnable
-also provides the capability to [mock and patch tasks](https://astrazeneca.github.io/runnable-core/configurations/executors/mocked)
-for thorough evaluation before full-scale deployment.
+[Please check here for complete documentation](https://astrazeneca.github.io/runnable/)
 
-- **Seamless Deployment:** Transition from the development stage to production with ease.
-runnable simplifies the process by requiring [only configuration changes](https://astrazeneca.github.io/runnable-core/configurations/overview)
-to adapt to different environments, including support for [argo workflows](https://astrazeneca.github.io/runnable-core/configurations/executors/argo).
-
-- **Efficient Debugging:** Quickly identify and resolve issues in pipeline execution with runnable's local
-debugging features. Retrieve data from failed tasks and [retry failures](https://astrazeneca.github.io/runnable-core/concepts/run-log/#retrying_failures)
-using your chosen debugging tools to maintain a smooth development experience.
-
-Along with the developer friendly features, runnable also acts as an interface to production grade concepts
-such as [data catalog](https://astrazeneca.github.io/runnable-core/concepts/catalog), [reproducibility](https://astrazeneca.github.io/runnable-core/concepts/run-log),
-[experiment tracking](https://astrazeneca.github.io/runnable-core/concepts/experiment-tracking)
-and secure [access to secrets](https://astrazeneca.github.io/runnable-core/concepts/secrets).
-
-<hr style="border:2px dotted orange">
-
-## What does it do?
+## Example
 
+The below data science flavored code is a well-known
+[iris example from scikit-learn](https://scikit-learn.org/stable/auto_examples/linear_model/plot_iris_logistic.html).
 
-![works](assets/work.png)
 
-<hr style="border:2px dotted orange">
+```python
+"""
+Example of Logistic regression using scikit-learn
+https://scikit-learn.org/stable/auto_examples/linear_model/plot_iris_logistic.html
+"""
+
+import matplotlib.pyplot as plt
+import numpy as np
+from sklearn import datasets
+from sklearn.inspection import DecisionBoundaryDisplay
+from sklearn.linear_model import LogisticRegression
+
+
+def load_data():
+    # import some data to play with
+    iris = datasets.load_iris()
+    X = iris.data[:, :2]  # we only take the first two features.
+    Y = iris.target
+
+    return X, Y
+
+
+def model_fit(X: np.ndarray, Y: np.ndarray, C: float = 1e5):
+    logreg = LogisticRegression(C=C)
+    logreg.fit(X, Y)
+
+    return logreg
+
+
+def generate_plots(X: np.ndarray, Y: np.ndarray, logreg: LogisticRegression):
+    _, ax = plt.subplots(figsize=(4, 3))
+    DecisionBoundaryDisplay.from_estimator(
+        logreg,
+        X,
+        cmap=plt.cm.Paired,
+        ax=ax,
+        response_method="predict",
+        plot_method="pcolormesh",
+        shading="auto",
+        xlabel="Sepal length",
+        ylabel="Sepal width",
+        eps=0.5,
+    )
 
-## Documentation
+    # Plot also the training points
+    plt.scatter(X[:, 0], X[:, 1], c=Y, edgecolors="k", cmap=plt.cm.Paired)
 
-[More details about the project and how to use it available here](https://astrazeneca.github.io/runnable-core/).
+    plt.xticks(())
+    plt.yticks(())
 
-<hr style="border:2px dotted orange">
+    plt.savefig("iris_logistic.png")
 
-## Installation
+    # TODO: What is the right value?
+    return 0.6
 
-The minimum python version that runnable supports is 3.8
 
-```shell
-pip install runnable
-```
+## Without any orchestration
+def main():
+    X, Y = load_data()
+    logreg = model_fit(X, Y, C=1.0)
+    generate_plots(X, Y, logreg)
 
-Please look at the [installation guide](https://astrazeneca.github.io/runnable-core/usage)
-for more information.
 
-<hr style="border:2px dotted orange">
+## With runnable orchestration
+def runnable_pipeline():
+    # The below code can be anywhere
+    from runnable import Catalog, Pipeline, PythonTask, metric, pickled
 
-## Example
+    # X, Y = load_data()
+    load_data_task = PythonTask(
+        function=load_data,
+        name="load_data",
+        returns=[pickled("X"), pickled("Y")],  # (1)
+    )
 
-Your application code. Use pydantic models as DTO.
+    # logreg = model_fit(X, Y, C=1.0)
+    model_fit_task = PythonTask(
+        function=model_fit,
+        name="model_fit",
+        returns=[pickled("logreg")],
+    )
 
-Assumed to be present at ```functions.py```
-```python
-from pydantic import BaseModel
+    # generate_plots(X, Y, logreg)
+    generate_plots_task = PythonTask(
+        function=generate_plots,
+        name="generate_plots",
+        terminate_with_success=True,
+        catalog=Catalog(put=["iris_logistic.png"]),  # (2)
+        returns=[metric("score")],
+    )
 
-class InnerModel(BaseModel):
-    """
-    A pydantic model representing a group of related parameters.
-    """
+    pipeline = Pipeline(
+        steps=[load_data_task, model_fit_task, generate_plots_task],
+    )  # (4)
 
-    foo: int
-    bar: str
+    pipeline.execute()
 
+    return pipeline
 
-class Parameter(BaseModel):
-    """
-    A pydantic model representing the parameters of the whole pipeline.
-    """
 
-    x: int
-    y: InnerModel
+if __name__ == "__main__":
+    # main()
+    runnable_pipeline()
 
+```
 
-def return_parameter() -> Parameter:
-    """
-    The annotation of the return type of the function is not mandatory
-    but it is a good practice.
 
-    Returns:
-        Parameter: The parameters that should be used in downstream steps.
-    """
-    # Return type of a function should be a pydantic model
-    return Parameter(x=1, y=InnerModel(foo=10, bar="hello world"))
+1. Return two serialized objects X and Y.
+2. Store the file `iris_logistic.png` for future reference.
+3. Define the sequence of tasks.
+4. Define a pipeline with the tasks
 
+The difference between native driver and runnable orchestration:
 
-def display_parameter(x: int, y: InnerModel):
-    """
-    Annotating the arguments of the function is important for
-    runnable to understand the type of parameters you want.
+!!! tip inline end "Notebooks and Shell scripts"
 
-    Input args can be a pydantic model or the individual attributes.
-    """
-    print(x)
-    # >>> prints 1
-    print(y)
-    # >>> prints InnerModel(foo=10, bar="hello world")
-```
+    You can execute notebooks and shell scripts too!!
 
-### Application code using driver functions.
+    They can be written just as you would want them, *plain old notebooks and scripts*.
 
-The code is runnable without any orchestration framework.
 
-```python
-from functions import return_parameter, display_parameter
 
-my_param = return_parameter()
-display_parameter(my_param.x, my_param.y)
-```
 
-### Orchestration using runnable
+<div class="annotate" markdown>
 
-<table>
-<tr>
-    <th>python SDK</th>
-    <th>yaml</th>
-</tr>
-<tr>
-<td valign="top"><p>
+```diff
 
-Example present at: ```examples/python-tasks.py```
+- X, Y = load_data()
++load_data_task = PythonTask(
++    function=load_data,
++     name="load_data",
++     returns=[pickled("X"), pickled("Y")], (1)
++    )
 
-Run it as: ```python examples/python-tasks.py```
+-logreg = model_fit(X, Y, C=1.0)
++model_fit_task = PythonTask(
++   function=model_fit,
++   name="model_fit",
++   returns=[pickled("logreg")],
++   )
 
-```python
-from runnable import Pipeline, Task
+-generate_plots(X, Y, logreg)
++generate_plots_task = PythonTask(
++   function=generate_plots,
++   name="generate_plots",
++   terminate_with_success=True,
++   catalog=Catalog(put=["iris_logistic.png"]), (2)
++   )
 
-def main():
-    step1 = Task(
-        name="step1",
-        command="examples.functions.return_parameter",
-    )
-    step2 = Task(
-        name="step2",
-        command="examples.functions.display_parameter",
-        terminate_with_success=True,
-    )
 
-    step1 >> step2
++pipeline = Pipeline(
++   steps=[load_data_task, model_fit_task, generate_plots_task], (3)
 
-    pipeline = Pipeline(
-        start_at=step1,
-        steps=[step1, step2],
-        add_terminal_nodes=True,
-    )
+```
+</div>
 
-    pipeline.execute()
 
+---
 
-if __name__ == "__main__":
-    main()
-```
+- [x] ```Domain``` code remains completely independent of ```driver``` code.
+- [x] The ```driver``` function has an equivalent and intuitive runnable expression
+- [x] Reproducible by default, runnable stores metadata about code/data/config for every execution.
+- [x] The pipeline is `runnable` in any environment.
 
-</p></td>
-
-<td valign="top"><p>
-
-Example present at: ```examples/python-tasks.yaml```
-
-
-Execute via the cli: ```runnable execute -f examples/python-tasks.yaml```
-
-```yaml
-dag:
-  description: |
-    This is a simple pipeline that does 3 steps in sequence.
-    In this example:
-      1. First step: returns a "parameter" x as a Pydantic model
-      2. Second step: Consumes that parameter and prints it
-
-    This pipeline demonstrates one way to pass small data from one step to another.
-
-  start_at: step 1
-  steps:
-    step 1:
-      type: task
-      command_type: python # (2)
-      command: examples.functions.return_parameter # (1)
-      next: step 2
-    step 2:
-      type: task
-      command_type: python
-      command: examples.functions.display_parameter
-      next: success
-    success:
-      type: success
-    fail:
-      type: fail
-```
 
-</p></td>
+## Documentation
 
-</tr>
-</table>
+[More details about the project and how to use it available here](https://astrazeneca.github.io/runnable/).
 
-### Transpile to argo workflows
+<hr style="border:2px dotted orange">
 
-No code change, just change the configuration.
+## Installation
 
-```yaml
-executor:
-  type: "argo"
-  config:
-    image: runnable:demo
-    persistent_volumes:
-      - name: runnable-volume
-        mount_path: /mnt
+The minimum python version that runnable supports is 3.8
 
-run_log_store:
-  type: file-system
-  config:
-    log_folder: /mnt/run_log_store
+```shell
+pip install runnable
 ```
 
-More details can be found in [argo configuration](https://astrazeneca.github.io/runnable-core/configurations/executors/argo).
-
-Execute the code as ```runnable execute -f examples/python-tasks.yaml -c examples/configs/argo-config.yam```
-
-<details>
-  <summary>Expand</summary>
-
-```yaml
-apiVersion: argoproj.io/v1alpha1
-kind: Workflow
-metadata:
-  generateName: runnable-dag-
-  annotations: {}
-  labels: {}
-spec:
-  activeDeadlineSeconds: 172800
-  entrypoint: runnable-dag
-  podGC:
-    strategy: OnPodCompletion
-  retryStrategy:
-    limit: '0'
-    retryPolicy: Always
-    backoff:
-      duration: '120'
-      factor: 2
-      maxDuration: '3600'
-  serviceAccountName: default-editor
-  templates:
-    - name: runnable-dag
-      failFast: true
-      dag:
-        tasks:
-          - name: step-1-task-uvdp7h
-            template: step-1-task-uvdp7h
-            depends: ''
-          - name: step-2-task-772vg3
-            template: step-2-task-772vg3
-            depends: step-1-task-uvdp7h.Succeeded
-          - name: success-success-igzq2e
-            template: success-success-igzq2e
-            depends: step-2-task-772vg3.Succeeded
-    - name: step-1-task-uvdp7h
-      container:
-        image: runnable:demo
-        command:
-          - runnable
-          - execute_single_node
-          - '{{workflow.parameters.run_id}}'
-          - step%1
-          - --log-level
-          - WARNING
-          - --file
-          - examples/python-tasks.yaml
-          - --config-file
-          - examples/configs/argo-config.yaml
-        volumeMounts:
-          - name: executor-0
-            mountPath: /mnt
-        imagePullPolicy: ''
-        resources:
-          limits:
-            memory: 1Gi
-            cpu: 250m
-          requests:
-            memory: 1Gi
-            cpu: 250m
-    - name: step-2-task-772vg3
-      container:
-        image: runnable:demo
-        command:
-          - runnable
-          - execute_single_node
-          - '{{workflow.parameters.run_id}}'
-          - step%2
-          - --log-level
-          - WARNING
-          - --file
-          - examples/python-tasks.yaml
-          - --config-file
-          - examples/configs/argo-config.yaml
-        volumeMounts:
-          - name: executor-0
-            mountPath: /mnt
-        imagePullPolicy: ''
-        resources:
-          limits:
-            memory: 1Gi
-            cpu: 250m
-          requests:
-            memory: 1Gi
-            cpu: 250m
-    - name: success-success-igzq2e
-      container:
-        image: runnable:demo
-        command:
-          - runnable
-          - execute_single_node
-          - '{{workflow.parameters.run_id}}'
-          - success
-          - --log-level
-          - WARNING
-          - --file
-          - examples/python-tasks.yaml
-          - --config-file
-          - examples/configs/argo-config.yaml
-        volumeMounts:
-          - name: executor-0
-            mountPath: /mnt
-        imagePullPolicy: ''
-        resources:
-          limits:
-            memory: 1Gi
-            cpu: 250m
-          requests:
-            memory: 1Gi
-            cpu: 250m
-  templateDefaults:
-    activeDeadlineSeconds: 7200
-    timeout: 10800s
-  arguments:
-    parameters:
-      - name: run_id
-        value: '{{workflow.uid}}'
-  volumes:
-    - name: executor-0
-      persistentVolumeClaim:
-        claimName: runnable-volume
-
-```
+Please look at the [installation guide](https://astrazeneca.github.io/runnable-core/usage)
+for more information.
 
-</details>
 
 ## Pipelines can be: