zenml-io · strickvl · Jul 16, 2024 · Jul 3, 2024 · Jul 5, 2024 · Jul 5, 2024
diff --git a/.gitignore b/.gitignore
@@ -19,6 +19,7 @@ out/
 build/
 *.tsbuildinfo
 .history/
+dag-packed.js
 
 # env
 .env
@@ -38,4 +39,4 @@ build/
 bundled/libs/
 **/__pycache__
 **/.pytest_cache
-**/.vs
+**/.vs
diff --git a/README.md b/README.md
@@ -9,6 +9,7 @@ The ZenML VSCode extension seamlessly integrates with [ZenML](https://github.com
 ## Features
 
 - **Server, Stacks, and Pipeline Runs Views**: Interact directly with ML stacks, pipeline runs, and server configurations from the Activity Bar.
+- **DAG Visualization for Pipeline Runs**: Explore Directed Acyclic Graphs for each pipeline view directly from command on the Activity Bar.
 - **Python Tool Integration**: Utilizes a Language Server Protocol (LSP) server for real-time synchronization with the ZenML environment.
 - **Real-Time Configuration Monitoring**: Leverages `watchdog` to dynamically update configurations, keeping the extension in sync with your ZenML setup.
 - **Status Bar**: Display the current stack name and connection status. You can
@@ -27,9 +28,22 @@ this extension and your Python version needs to be 3.8 or greater.
 
 - **Manage Server Connections**: Connect or disconnect from ZenML servers and refresh server status.
 - **Stack Operations**: View stack details, rename, copy, or set active stacks directly from VSCode.
-- **Pipeline Runs**: Monitor and manage pipeline runs, including deleting runs from the system.
+- **Pipeline Runs**: Monitor and manage pipeline runs, including deleting runs from the system and rendering DAGs.
 - **Environment Information**: Get detailed snapshots of the development environment, aiding troubleshooting.
 
+### DAG Rendering
+
+![DAG Rendering Example](resources/zenml-extension-dag.gif)
+
+- **Directed Acyclic Graph rendering**
+  - click on the Render Dag context action(labeled 1 in above image) next to the pipeline run you want to render. This will render the DAG in the editor window.
+- **Graph manuevering**
+  - Panning the graph can be done by clicking and dragging anywhere on the graph.
+  - Zooming can be controlled by the mousewheel, the control panel(labeled 2 in the above graph) or double-clicking anywhere there is not a node.
+  - Mousing over a node will highlight all edges being output by that node
+  - Clicking a node will display the data related to it in the ZenML panel view(labeled 3 in the above image)
+  - Double-clicking a node will open the dashboard in a web browser to either the pipeline run or the artifact version.
+
 ## Requirements
 
 - **ZenML Installation:** ZenML needs to be installed in the local Python environment associated with the Python interpreter selected in the current VS Code workspace. This extension interacts directly with your ZenML environment, so ensuring that ZenML is installed and properly configured is essential.

diff --git a/bundled/tool/lsp_zenml.py b/bundled/tool/lsp_zenml.py
@@ -273,3 +273,27 @@ def fetch_pipeline_runs(wrapper_instance, args):
         def delete_pipeline_run(wrapper_instance, args):
             """Deletes a specified ZenML pipeline run."""
             return wrapper_instance.delete_pipeline_run(args)
+
+        @self.command(f"{TOOL_MODULE_NAME}.getPipelineRun")
+        @self.zenml_command(wrapper_name="pipeline_runs_wrapper")
+        def get_pipeline_run(wrapper_instance, args):
+            """Gets a specified ZenML pipeline run."""
+            return wrapper_instance.get_pipeline_run(args)
+
+        @self.command(f"{TOOL_MODULE_NAME}.getPipelineRunStep")
+        @self.zenml_command(wrapper_name="pipeline_runs_wrapper")
+        def get_run_step(wrapper_instance, args):
+            """Gets a specified ZenML pipeline run step."""
+            return wrapper_instance.get_run_step(args)
+
+        @self.command(f"{TOOL_MODULE_NAME}.getPipelineRunArtifact")
+        @self.zenml_command(wrapper_name="pipeline_runs_wrapper")
+        def get_run_artifact(wrapper_instance, args):
+            """Gets a specified ZenML pipeline artifact"""
+            return wrapper_instance.get_run_artifact(args)
+
+        @self.command(f"{TOOL_MODULE_NAME}.getPipelineRunDag")
+        @self.zenml_command(wrapper_name="pipeline_runs_wrapper")
+        def get_run_dag(wrapper_instance, args):
+            """Gets graph data for a specified ZenML pipeline run"""
+            return wrapper_instance.get_pipeline_run_graph(args)
diff --git a/bundled/tool/zen_watcher.py b/bundled/tool/zen_watcher.py
@@ -49,6 +49,14 @@ def __init__(self, lsp_server):
             "always",
         ]
 
+        try:
+            with suppress_stdout_temporarily():
+                config_wrapper_instance = self.LSP_SERVER.zenml_client.config_wrapper
+                self.config_path = config_wrapper_instance.get_global_config_file_path()
+        except Exception as e:
+            self.log_error(f"Failed to retrieve global config file path: {e}")
+
+
     def process_config_change(self, config_file_path: str):
         """Process the configuration file change."""
         with suppress_stdout_temporarily():
@@ -88,20 +96,20 @@ def on_modified(self, event):
         """
         Handles the modification event triggered when the global configuration file is changed.
         """
+        if event.src_path != self.config_path:
+            return
+
         if self._timer is not None:
             self._timer.cancel()
+
         self._timer = Timer(self.debounce_interval, self.process_event, [event])
         self._timer.start()
 
     def process_event(self, event):
         """
         Processes the event with a debounce mechanism.
         """
-        with suppress_stdout_temporarily():
-            config_wrapper_instance = self.LSP_SERVER.zenml_client.config_wrapper
-            config_file_path = config_wrapper_instance.get_global_config_file_path()
-            if event.src_path == str(config_file_path):
-                self.process_config_change(config_file_path)
+        self.process_config_change(event.src_path)
 
     def watch_zenml_config_yaml(self):
         """

diff --git a/bundled/tool/zenml_grapher.py b/bundled/tool/zenml_grapher.py
@@ -0,0 +1,97 @@
+#  Copyright (c) ZenML GmbH 2024. All Rights Reserved.
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at:
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+#  or implied. See the License for the specific language governing
+#  permissions and limitations under the License.
+"""This module contains a tool to mimic LineageGraph output for pipeline runs"""
+
+class Grapher:
+    """Quick and dirty implementation of ZenML/LineageGraph to reduce number of api calls"""
+
+    def __init__(self, run):
+        self.run = run
+        self.nodes = []
+        self.artifacts = {}
+        self.edges = {}
+
+    def build_nodes_from_steps(self) -> None:
+        """Builds internal node list from run steps"""
+        self.nodes = []
+        self.artifacts = {}
+
+        for step in self.run.metadata.steps:
+            step_data = self.run.metadata.steps[step]
+            self.nodes.append({
+                "id": str(step_data.id),
+                "type": "step",
+                "data": {
+                    "execution_id": str(step_data.id),
+                    "name": step,
+                    "status": step_data.body.status,
+                },
+            })
+            self.add_artifacts_from_list(step_data.body.inputs)
+            self.add_artifacts_from_list(step_data.body.outputs)
+
-    def build_nodes_from_steps(self) -> None:
-        """Builds internal node list from run steps"""
-        self.nodes = []
-        self.artifacts = {}
-
-        for step in self.run.metadata.steps:
-            step_data = self.run.metadata.steps[step]
-            self.nodes.append({
-                "id": str(step_data.id),
-                "type": "step",
-                "data": {
-                    "execution_id": str(step_data.id),
-                    "name": step,
-                    "status": step_data.body.status,
-                },
-            })
-            self.add_artifacts_from_list(step_data.body.inputs)
-            self.add_artifacts_from_list(step_data.body.outputs)
+    def build_nodes_from_steps(self) -> None:
+        """Builds internal node list from run steps"""
+        if not self.nodes and not self.artifacts:
+            self.nodes = []
+            self.artifacts = {}
+
+        for step in self.run.metadata.steps:
+            step_data = self.run.metadata.steps[step]
+            self.nodes.append({
+                "id": str(step_data.id),
+                "type": "step",
+                "data": {
+                    "execution_id": str(step_data.id),
+                    "name": step,
+                    "status": step_data.body.status,
+                },
+            })
+            self.add_artifacts_from_list(step_data.body.inputs)
+            self.add_artifacts_from_list(step_data.body.outputs)
-    def build_nodes_from_steps(self) -> None:
-        """Builds internal node list from run steps"""
-        self.nodes = []
-        self.artifacts = {}
-
-        for step in self.run.metadata.steps:
-            step_data = self.run.metadata.steps[step]
-            self.nodes.append({
-                "id": str(step_data.id),
-                "type": "step",
-                "data": {
-                    "execution_id": str(step_data.id),
-                    "name": step,
-                    "status": step_data.body.status,
-                },
-            })
-            self.add_artifacts_from_list(step_data.body.inputs)
-            self.add_artifacts_from_list(step_data.body.outputs)
+    def build_nodes_from_steps(self) -> None:
+        """Builds internal node list from run steps"""
+        if not self.nodes and not self.artifacts:
+            self.nodes = []
+            self.artifacts = {}
+
+        for step in self.run.metadata.steps:
+            step_data = self.run.metadata.steps[step]
+            self.nodes.append({
+                "id": str(step_data.id),
+                "type": "step",
+                "data": {
+                    "execution_id": str(step_data.id),
+                    "name": step,
+                    "status": step_data.body.status,
+                },
+            })
+            self.add_artifacts_from_list(step_data.body.inputs)
+            self.add_artifacts_from_list(step_data.body.outputs)
+
+    def add_artifacts_from_list(self, list) -> None:
+        """Used to add unique artifacts to the internal nodes list by build_nodes_from_steps"""
+        for artifact in list:
+            id = str(list[artifact].body.artifact.id)
+            if id in self.artifacts:
+                continue
+
+            self.artifacts[id] = True
+
+            self.nodes.append({
+                "type": "artifact",
+                "id": id,
+                "data": {
+                    "name": artifact,
+                    "artifact_type": list[artifact].body.type,
+                    "execution_id": str(list[artifact].id),
+                },
+            })
-    def add_artifacts_from_list(self, list) -> None:
-        """Used to add unique artifacts to the internal nodes list by build_nodes_from_steps"""
-        for artifact in list:
-            id = str(list[artifact].body.artifact.id)
-            if id in self.artifacts:
-                continue
-
-            self.artifacts[id] = True
-
-            self.nodes.append({
-                "type": "artifact",
-                "id": id,
-                "data": {
-                    "name": artifact,
-                    "artifact_type": list[artifact].body.type,
-                    "execution_id": str(list[artifact].id),
-                },
-            })
+    def add_artifacts_from_list(self, artifacts_list: Dict[str, Artifact]) -> None:
+        """Used to add unique artifacts to the internal nodes list by build_nodes_from_steps"""
+        for artifact in artifacts_list:
+            id = str(artifacts_list[artifact].body.artifact.id)
+            if id in self.artifacts:
+                continue
+
+            self.artifacts[id] = True
+
+            self.nodes.append({
+                "type": "artifact",
+                "id": id,
+                "data": {
+                    "name": artifact,
+                    "artifact_type": artifacts_list[artifact].body.type,
+                    "execution_id": str(artifacts_list[artifact].id),
+                },
+            })
-    def add_artifacts_from_list(self, list) -> None:
-        """Used to add unique artifacts to the internal nodes list by build_nodes_from_steps"""
-        for artifact in list:
-            id = str(list[artifact].body.artifact.id)
-            if id in self.artifacts:
-                continue
-
-            self.artifacts[id] = True
-
-            self.nodes.append({
-                "type": "artifact",
-                "id": id,
-                "data": {
-                    "name": artifact,
-                    "artifact_type": list[artifact].body.type,
-                    "execution_id": str(list[artifact].id),
-                },
-            })
+    def add_artifacts_from_list(self, artifacts_list: Dict[str, Artifact]) -> None:
+        """Used to add unique artifacts to the internal nodes list by build_nodes_from_steps"""
+        for artifact in artifacts_list:
+            id = str(artifacts_list[artifact].body.artifact.id)
+            if id in self.artifacts:
+                continue
+
+            self.artifacts[id] = True
+
+            self.nodes.append({
+                "type": "artifact",
+                "id": id,
+                "data": {
+                    "name": artifact,
+                    "artifact_type": artifacts_list[artifact].body.type,
+                    "execution_id": str(artifacts_list[artifact].id),
+                },
+            })
+
+
+    def build_edges_from_steps(self) -> None:
+        """Builds internal edges list from run steps"""
+        self.edges = []
+
+        for step in self.run.metadata.steps:
+            step_data = self.run.metadata.steps[step]
+            step_id = str(step_data.id)
+
+            for artifact in step_data.body.inputs:
+                input_id = str(step_data.body.inputs[artifact].body.artifact.id)
+                self.add_edge(input_id, step_id)
+
+            for artifact in step_data.body.outputs:
+                output_id = str(step_data.body.outputs[artifact].body.artifact.id)
+                self.add_edge(step_id, output_id)
+
-    def build_edges_from_steps(self) -> None:
-        """Builds internal edges list from run steps"""
-        self.edges = []
-
-        for step in self.run.metadata.steps:
-            step_data = self.run.metadata.steps[step]
-            step_id = str(step_data.id)
-
-            for artifact in step_data.body.inputs:
-                input_id = str(step_data.body.inputs[artifact].body.artifact.id)
-                self.add_edge(input_id, step_id)
-
-            for artifact in step_data.body.outputs:
-                output_id = str(step_data.body.outputs[artifact].body.artifact.id)
-                self.add_edge(step_id, output_id)
+    def build_edges_from_steps(self) -> None:
+        """Builds internal edges list from run steps"""
+        self.edges = []
+
+        for step in self.run.metadata.steps:
+            step_data = self.run.metadata.steps[step]
+            step_id = str(step_data.id)
+
+            for artifact in step_data.body.inputs:
+                input_id = str(step_data.body.inputs[artifact].body.artifact.id)
+                self.add_edge(input_id, step_id)
+
+            for artifact in step_data.body.outputs:
+                output_id = str(step_data.body.outputs[artifact].body.artifact.id)
+                self.add_edge(step_id, output_id)
+
+    def add_edge(self, v: str, w: str) -> None:
+        """Helper method to add an edge to the internal edges list"""
+        edge_id = f"{v}_{w}"
+        if any(edge['id'] == edge_id for edge in self.edges):
+            return
+        self.edges.append({
+            "id": edge_id,
+            "source": v,
+            "target": w,
+        })
-    def build_edges_from_steps(self) -> None:
-        """Builds internal edges list from run steps"""
-        self.edges = []
-
-        for step in self.run.metadata.steps:
-            step_data = self.run.metadata.steps[step]
-            step_id = str(step_data.id)
-
-            for artifact in step_data.body.inputs:
-                input_id = str(step_data.body.inputs[artifact].body.artifact.id)
-                self.add_edge(input_id, step_id)
-
-            for artifact in step_data.body.outputs:
-                output_id = str(step_data.body.outputs[artifact].body.artifact.id)
-                self.add_edge(step_id, output_id)
+    def build_edges_from_steps(self) -> None:
+        """Builds internal edges list from run steps"""
+        self.edges = []
+
+        for step in self.run.metadata.steps:
+            step_data = self.run.metadata.steps[step]
+            step_id = str(step_data.id)
+
+            for artifact in step_data.body.inputs:
+                input_id = str(step_data.body.inputs[artifact].body.artifact.id)
+                self.add_edge(input_id, step_id)
+
+            for artifact in step_data.body.outputs:
+                output_id = str(step_data.body.outputs[artifact].body.artifact.id)
+                self.add_edge(step_id, output_id)
+
+    def add_edge(self, v: str, w: str) -> None:
+        """Helper method to add an edge to the internal edges list"""
+        edge_id = f"{v}_{w}"
+        if any(edge['id'] == edge_id for edge in self.edges):
+            return
+        self.edges.append({
+            "id": edge_id,
+            "source": v,
+            "target": w,
+        })
+
+    def add_edge(self, v, w) -> None:
+        """Helper method to add an edge to the internal edges list"""
+        self.edges.append({
+            "id": f"{v}_{w}",
+            "source": v,
+            "target": w,
+        })
-    def add_edge(self, v, w) -> None:
-        """Helper method to add an edge to the internal edges list"""
-        self.edges.append({
-            "id": f"{v}_{w}",
-            "source": v,
-            "target": w,
-        })
+    def add_edge(self, v: str, w: str) -> None:
+        """Helper method to add an edge to the internal edges list"""
+        self.edges.append({
+            "id": f"{v}_{w}",
+            "source": v,
+            "target": w,
+        })
-    def add_edge(self, v, w) -> None:
-        """Helper method to add an edge to the internal edges list"""
-        self.edges.append({
-            "id": f"{v}_{w}",
-            "source": v,
-            "target": w,
-        })
+    def add_edge(self, v: str, w: str) -> None:
+        """Helper method to add an edge to the internal edges list"""
+        self.edges.append({
+            "id": f"{v}_{w}",
+            "source": v,
+            "target": w,
+        })
+
+    def to_dict(self) -> dict:
+        """Returns dictionary containing graph data"""
+        return {
+            "nodes": self.nodes,
+            "edges": self.edges,
+            "status": self.run.body.status,
+            "name": self.run.body.pipeline.name,
+            "version": self.run.body.pipeline.body.version,
+        }
diff --git a/bundled/tool/zenml_wrappers.py b/bundled/tool/zenml_wrappers.py
@@ -15,6 +15,7 @@
 import json
 import pathlib
 from typing import Any
+from zenml_grapher import Grapher
 
 
 class GlobalConfigWrapper:
@@ -324,6 +325,145 @@ def delete_pipeline_run(self, args) -> dict:
             return {"message": f"Pipeline run `{run_id}` deleted successfully."}
         except self.ZenMLBaseException as e:
             return {"error": f"Failed to delete pipeline run: {str(e)}"}
+
+    def get_pipeline_run(self, args) -> dict:
+        """Gets a ZenML pipeline run.
+
+        Args:
+            args (list): List of arguments.
+        Returns:
+            dict: Dictionary containing the result of the operation.
+        """
+        try:
+            run_id = args[0]
+            run = self.client.get_pipeline_run(run_id, hydrate=True)
+            run_data = {
+                "id": str(run.id),
+                    "name": run.body.pipeline.name,
+                    "status": run.body.status,
+                    "version": run.body.pipeline.body.version,
+                    "stackName": run.body.stack.name,
+                    "startTime": (
+                        run.metadata.start_time.isoformat() if run.metadata.start_time else None
+                    ),
+                    "endTime": (
+                        run.metadata.end_time.isoformat() if run.metadata.end_time else None
+                    ),
+                    "os": run.metadata.client_environment.get("os", "Unknown OS"),
+                    "osVersion": run.metadata.client_environment.get(
+                        "os_version",
+                        run.metadata.client_environment.get("mac_version", "Unknown Version"),
+                    ),
+                    "pythonVersion": run.metadata.client_environment.get(
+                        "python_version", "Unknown"
+                    ),
+            }
+
+            return run_data
+        except self.ZenMLBaseException as e:
+            return {"error": f"Failed to retrieve pipeline run: {str(e)}"}
+
+    def get_pipeline_run_graph(self, args) -> dict:
+        """Gets a ZenML pipeline run step DAG.
+
+        Args:
+            args (list): List of arguments.
+        Returns:
+            dict: Dictionary containing the result of the operation.
+        """
+        try:
+            run_id = args[0]
+            run = self.client.get_pipeline_run(run_id, hydrate=True)
+            graph = Grapher(run)
+            graph.build_nodes_from_steps()
+            graph.build_edges_from_steps()
+            return graph.to_dict()
+        except self.ZenMLBaseException as e:
+            return {"error": f"Failed to retrieve pipeline run graph: {str(e)}"}
+
+    def get_run_step(self, args) -> dict:
+        """Gets a ZenML pipeline run step.
+
+        Args:
+            args (list): List of arguments.
+        Returns:
+            dict: Dictionary containing the result of the operation.
+        """
+        try:
+            step_run_id = args[0]
+            step = self.client.get_run_step(step_run_id, hydrate=True)
+            run = self.client.get_pipeline_run(step.metadata.pipeline_run_id, hydrate=True)
+
+            step_data = {
+                "name": step.name,
+                "id": str(step.id),
+                "status": step.body.status,
+                "author": {
+                    "fullName": step.body.user.body.full_name,
+                    "email": step.body.user.name,
+                },
+                "startTime": (
+                    step.metadata.start_time.isoformat() if step.metadata.start_time else None
+                ),
+                "endTime": (
+                    step.metadata.end_time.isoformat() if step.metadata.end_time else None
+                ),
+                "duration": (
+                    str(step.metadata.end_time - step.metadata.start_time) if step.metadata.end_time and step.metadata.start_time else None
+                ),
+                "stackName": run.body.stack.name,
+                "orchestrator": {
+                    "runId": str(run.metadata.orchestrator_run_id)
+                },
+                "pipeline": {
+                    "name": run.body.pipeline.name,
+                    "status": run.body.status,
+                    "version": run.body.pipeline.body.version,
+                },
+                "cacheKey": step.metadata.cache_key,
+                "sourceCode": step.metadata.source_code,
+                "logsUri": step.metadata.logs.body.uri
+            }
+            return step_data
+        except self.ZenMLBaseException as e:
+            return {"error": f"Failed to retrieve pipeline run step: {str(e)}"}
+
+    def get_run_artifact(self, args) -> dict:
+        """Gets a ZenML pipeline run artifact.
+
+        Args:
+            args (list): List of arguments.
+        Returns:
+            dict: Dictionary containing the result of the operation.
+        """
+        try:
+            artifact_id = args[0]
+            artifact = self.client.get_artifact_version(artifact_id, hydrate=True)
+
+            metadata = {}
+            for key in artifact.metadata.run_metadata:
+                metadata[key] = artifact.metadata.run_metadata[key].body.value
+
+            artifact_data = {
+                "name": artifact.body.artifact.name,
+                "version": artifact.body.version,
+                "id": str(artifact.id),
+                "type": artifact.body.type,
+                "author": {
+                    "fullName": artifact.body.user.body.full_name,
+                    "email": artifact.body.user.name,
+                },
+                "updated": artifact.body.updated.isoformat(),
+                "data": {
+                    "uri": artifact.body.uri,
+                    "dataType": artifact.body.data_type.attribute,
+                },
+                "metadata": metadata,
+            }
+            return artifact_data
+
+        except self.ZenMLBaseException as e:
+            return {"error": f"Failed to retrieve pipeline run artifact: {str(e)}"}
 
 
 class StacksWrapper: