From 6c84e46eda7b24c79bc0352c0be06fe3da479b46 Mon Sep 17 00:00:00 2001 From: Niels Bantilan Date: Wed, 13 Dec 2023 13:44:29 -0500 Subject: [PATCH] auto-generate toctree from flytesnacks index.md docs (#4587) * auto-generate toctree from flytesnacks index.md docs Signed-off-by: Niels Bantilan * fix environment_setup ref Signed-off-by: Niels Bantilan * make ListTableToc more generic Signed-off-by: Niels Bantilan --------- Signed-off-by: Niels Bantilan --- docs/_ext/import_projects.py | 59 ++++++++++ docs/conf.py | 12 +- docs/index.md | 12 +- docs/integrations.md | 216 ----------------------------------- docs/tutorials.md | 96 ---------------- docs/userguide.md | 65 ----------- 6 files changed, 74 insertions(+), 386 deletions(-) delete mode 100644 docs/integrations.md delete mode 100644 docs/tutorials.md delete mode 100644 docs/userguide.md diff --git a/docs/_ext/import_projects.py b/docs/_ext/import_projects.py index 8962c40855..8acb756b8e 100644 --- a/docs/_ext/import_projects.py +++ b/docs/_ext/import_projects.py @@ -1,3 +1,4 @@ +import inspect import os import re import shutil @@ -9,8 +10,11 @@ from typing import Optional, List, Union from git import Repo +from docutils import nodes +from docutils.statemachine import StringList, string2lines from sphinx.application import Sphinx from sphinx.config import Config +from sphinx.util.docutils import SphinxDirective __version__ = "0.0.0" @@ -20,6 +24,7 @@ class ImportProjectsConfig: clone_dir: str flytekit_api_dir: str source_regex_mapping: dict = field(default_factory=dict) + list_table_toc: List[str] = field(default_factory=list) @dataclass @@ -32,6 +37,46 @@ class Project: refresh: bool = False +TOC_TEMPLATE = """ +```{{toctree}} +:maxdepth: 1 +:hidden: +{toc} +``` +""" + +TABLE_TEMPLATE = """ +```{{list-table}} +{content} +``` +""" + + +class ListTableToc(SphinxDirective): + """Custom directive to convert list-table into both list-table and toctree.""" + + has_content = True + + def run(self) -> list: + return [self.parse()] + + def parse(self): + """Parses the list-table and adds a toctree""" + toc = "" + + # finds all doc references in the form + for doc in re.findall(r"<(.+)>", self.block_text): + toc += f"\n{doc}" + + container = nodes.container("") + toc = inspect.cleandoc(TOC_TEMPLATE.format(toc=toc)) + table = inspect.cleandoc(TABLE_TEMPLATE.format(content=self.block_text)) + content = f"{toc}\n\n{table}" + + self.state.nested_parse(StringList(string2lines(content)), 0, container) + return container + + def update_sys_path_for_flytekit(import_project_config: ImportProjectsConfig): # create flytekit/_version.py file manually with open(f"{import_project_config.flytekit_api_dir}/flytekit/_version.py", "w") as f: @@ -132,10 +177,24 @@ def replace_refs_in_docstrings( lines[i] = text +def add_list_table_toc(app: Sphinx, docname: str, source: List[str]): + """This replaces list-table directives in specific documents with list-table-toc. + + This is important for automatically generating a toctree and list-table from + a list-table. + """ + if docname in app.config.import_projects_config["list_table_toc"]: + text = source[0] + text = re.sub(r"{list-table}", r"{list-table-toc}", text) + source[0] = text + + def setup(app: Sphinx) -> dict: app.add_config_value("import_projects_config", None, False) app.add_config_value("import_projects", None, False) + app.add_directive("list-table-toc", ListTableToc) app.connect("config-inited", import_projects, priority=0) + app.connect("source-read", add_list_table_toc, priority=0) return { "version": __version__, diff --git a/docs/conf.py b/docs/conf.py index 904e216bfa..31af6f6cc1 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -128,10 +128,7 @@ "flytesnacks/bioinformatics_examples.md", "flytesnacks/feature_engineering.md", "flytesnacks/flyte_lab.md", - "flytesnacks/integrations.md", "flytesnacks/ml_training.md", - "flytesnacks/tutorials.md", - "flytesnacks/userguide.md", "flytesnacks/README.md", "flytekit/**/README.md", "flytekit/_templates/**", @@ -313,8 +310,10 @@ # These patterns are used to replace values in source files that are imported # from other repos. REPLACE_PATTERNS = { + r"": r"", r"": r"", INTERSPHINX_REFS_PATTERN: INTERSPHINX_REFS_REPLACE, + r"": r"", r"": r"", r"`": r"bioinformatics", PROTO_REF_PATTERN: PROTO_REF_REPLACE, r"/protos/docs/service/index": r"/protos/docs/service/service", + r"": r"", + r"": r"" } import_projects_config = { "clone_dir": "_projects", "flytekit_api_dir": "_src/flytekit/", "source_regex_mapping": REPLACE_PATTERNS, + "list_table_toc": [ + "flytesnacks/userguide", + "flytesnacks/tutorials", + "flytesnacks/integrations", + ], } # Define these environment variables to use local copies of the projects. This diff --git a/docs/index.md b/docs/index.md index e09c819175..c2bf0f9087 100644 --- a/docs/index.md +++ b/docs/index.md @@ -79,12 +79,12 @@ contribute its architecture and design. You can also access the * - {doc}`๐Ÿ”ค Intro to Flyte ` - Get your first workflow running, learn about the Flyte development lifecycle and core use cases. -* - {doc}`๐Ÿ“– User Guide ` +* - {doc}`๐Ÿ“– User Guide ` - A comprehensive view of Flyte's functionality for data and ML practitioners. -* - {doc}`๐Ÿ“š Tutorials ` +* - {doc}`๐Ÿ“š Tutorials ` - End-to-end examples of Flyte for data/feature engineering, machine learning, bioinformatics, and more. -* - {doc}`๐Ÿ”Œ Integrations ` +* - {doc}`๐Ÿ”Œ Integrations ` - Leverage a rich ecosystem of third-party tools and libraries to make your Flyte workflows even more effective. * - {ref}`๐Ÿš€ Deployment Guide ` @@ -145,9 +145,9 @@ Core Use Cases :name: examples-guides :hidden: -User Guide -Tutorials -Integrations +User Guide +Tutorials +Integrations ``` ```{toctree} diff --git a/docs/integrations.md b/docs/integrations.md deleted file mode 100644 index f2b2f76cc4..0000000000 --- a/docs/integrations.md +++ /dev/null @@ -1,216 +0,0 @@ -(integrations)= - -# Integrations - -Flyte is designed to be highly extensible and can be customized in multiple ways. - -```{note} -Want to contribute an example? Check out the {doc}`Example Contribution Guide `. -``` - -## Flytekit Plugins - -Flytekit plugins are simple plugins that can be implemented purely in python, unit tested locally and allow extending -Flytekit functionality. These plugins can be anything and for comparison can be thought of like -[Airflow Operators](https://airflow.apache.org/docs/apache-airflow/stable/howto/operator/index.html). - -```{list-table} -:header-rows: 0 -:widths: 20 30 - -* - {doc}`SQL ` - - Execute SQL queries as tasks. -* - {doc}`Great Expectations ` - - Validate data with `great_expectations`. -* - {doc}`Papermill ` - - Execute Jupyter Notebooks with `papermill`. -* - {doc}`Pandera ` - - Validate pandas dataframes with `pandera`. -* - {doc}`Modin ` - - Scale pandas workflows with `modin`. -* - {doc}`Dolt ` - - Version your SQL database with `dolt`. -* - {doc}`DBT ` - - Run and test your `dbt` pipelines in Flyte. -* - {doc}`WhyLogs ` - - `whylogs`: the open standard for data logging. -* - {doc}`MLFlow ` - - `mlflow`: the open standard for model tracking. -* - {doc}`ONNX ` - - Convert ML models to ONNX models seamlessly. -* - {doc}`DuckDB ` - - Run analytical queries using DuckDB. -``` - -:::{dropdown} {fa}`info-circle` Using flytekit plugins -:animate: fade-in-slide-down - -Data is automatically marshalled and unmarshalled in and out of the plugin. Users should mostly implement the -{py:class}`~flytekit.core.base_task.PythonTask` API defined in Flytekit. - -Flytekit Plugins are lazily loaded and can be released independently like libraries. We follow a convention to name the -plugin like `flytekitplugins-*`, where \* indicates the package to be integrated into Flytekit. For example -`flytekitplugins-papermill` enables users to author Flytekit tasks using [Papermill](https://papermill.readthedocs.io/en/latest/). - -You can find the plugins maintained by the core Flyte team [here](https://github.com/flyteorg/flytekit/tree/master/plugins). -::: - -## Native Backend Plugins - -Native Backend Plugins are the plugins that can be executed without any external service dependencies because the compute is -orchestrated by Flyte itself, within its provisioned Kubernetes clusters. - -```{list-table} -:header-rows: 0 -:widths: 20 30 - -* - {doc}`K8s Pods ` - - Execute K8s pods for arbitrary workloads. -* - {doc}`K8s Cluster Dask Jobs ` - - Run Dask jobs on a K8s Cluster. -* - {doc}`K8s Cluster Spark Jobs ` - - Run Spark jobs on a K8s Cluster. -* - {doc}`Kubeflow PyTorch ` - - Run distributed PyTorch training jobs using `Kubeflow`. -* - {doc}`Kubeflow TensorFlow ` - - Run distributed TensorFlow training jobs using `Kubeflow`. -* - {doc}`MPI Operator ` - - Run distributed deep learning training jobs using Horovod and MPI. -* - {doc}`Ray Task ` - - Run Ray jobs on a K8s Cluster. -``` - -(external_service_backend_plugins)= - -## External Service Backend Plugins - -As the term suggests, external service backend plugins relies on external services like -[AWS Sagemaker](https://aws.amazon.com/sagemaker), -[Hive](https://docs.qubole.com/en/latest/user-guide/engines/hive/index.html) or -[Snowflake](https://www.snowflake.com/) for handling the workload defined in -the Flyte task that use the respective plugin. - -```{list-table} -:header-rows: 0 -:widths: 20 30 - -* - {doc}`AWS Sagemaker: Model Training ` - - Train models with built-in or define your own custom algorithms. -* - {doc}`AWS Sagemaker: Pytorch Training ` - - Train Pytorch models using Sagemaker, with support for distributed training. -* - {doc}`AWS Athena ` - - Execute queries using AWS Athena -* - {doc}`AWS Batch ` - - Running tasks and workflows on AWS batch service -* - {doc}`Flyte Interactive ` - - Execute tasks using Flyte Interactive to debug. -* - {doc}`Hive ` - - Run Hive jobs in your workflows. -* - {doc}`MMCloud ` - - Execute tasks using MemVerge Memory Machine Cloud -* - {doc}`Snowflake ` - - Run Snowflake jobs in your workflows. -* - {doc}`Databricks ` - - Run Databricks jobs in your workflows. -* - {doc}`BigQuery ` - - Run BigQuery jobs in your workflows. -``` - -(enable-backend-plugins)= - -::::{dropdown} {fa}`info-circle` Enabling Backend Plugins -:animate: fade-in-slide-down - -To enable a backend plugin you have to add the `ID` of the plugin to the enabled plugins list. The `enabled-plugins` is available under the `tasks > task-plugins` section of FlytePropeller's configuration. -The plugin configuration structure is defined [here](https://pkg.go.dev/github.com/flyteorg/flytepropeller@v0.6.1/pkg/controller/nodes/task/config#TaskPluginConfig). An example of the config follows, - -:::{rli} https://raw.githubusercontent.com/flyteorg/flyte/master/kustomize/overlays/sandbox/flyte/config/propeller/enabled_plugins.yaml -:language: yaml -::: - -**Finding the `ID` of the Backend Plugin** - -This is a little tricky since you have to look at the source code of the plugin to figure out the `ID`. In the case of Spark, for example, the value of `ID` is used [here](https://github.com/flyteorg/flyteplugins/blob/v0.5.25/go/tasks/plugins/k8s/spark/spark.go#L424) here, defined as [spark](https://github.com/flyteorg/flyteplugins/blob/v0.5.25/go/tasks/plugins/k8s/spark/spark.go#L41). - -**Enabling a Specific Backend Plugin in Your Own Kustomize Generator** - -Flyte uses Kustomize to generate the the deployment configuration which can be leveraged to [kustomize your own deployment](https://github.com/flyteorg/flyte/tree/master/kustomize). - -:::: - -## Custom Container Tasks - -Because Flyte uses executable docker containers as the smallest unit of compute, you can write custom tasks with the -{py:class}`flytekit.ContainerTask` via the [flytekit](https://github.com/flyteorg/flytekit) SDK. - -```{list-table} -:header-rows: 0 -:widths: 20 30 - -* - {doc}`Raw Container Tasks ` - - Execute arbitrary containers: You can write C++ code, bash scripts and any containerized program. -``` - -## SDKs for Writing Tasks and Workflows - -The {ref}`community ` would love to help you with your own ideas of building a new SDK. Currently the available SDKs are: - -```{list-table} -:header-rows: 0 -:widths: 20 30 - -* - [flytekit](https://flytekit.readthedocs.io) - - The Python SDK for Flyte. -* - [flytekit-java](https://github.com/spotify/flytekit-java) - - The Java/Scala SDK for Flyte. -``` - -## Flyte Operators - -Flyte can be integrated with other orchestrators to help you leverage Flyte's -constructs natively within other orchestration tools. - -```{list-table} -:header-rows: 0 -:widths: 20 30 - -* - {doc}`Airflow ` - - Trigger Flyte executions from Airflow. -``` - -```{toctree} -:maxdepth: -1 -:caption: Integrations -:hidden: - -flytesnacks/examples/sql_plugin/index -flytesnacks/examples/greatexpectations_plugin/index -flytesnacks/examples/papermill_plugin/index -flytesnacks/examples/pandera_plugin/index -flytesnacks/examples/modin_plugin/index -flytesnacks/examples/dolt_plugin/index -flytesnacks/examples/dbt_plugin/index -flytesnacks/examples/whylogs_plugin/index -flytesnacks/examples/mlflow_plugin/index -flytesnacks/examples/onnx_plugin/index -flytesnacks/examples/duckdb_plugin/index -flytesnacks/examples/k8s_pod_plugin/index -flytesnacks/examples/k8s_dask_plugin/index -flytesnacks/examples/k8s_spark_plugin/index -flytesnacks/examples/kfpytorch_plugin/index -flytesnacks/examples/kftensorflow_plugin/index -flytesnacks/examples/kfmpi_plugin/index -flytesnacks/examples/ray_plugin/index -flytesnacks/examples/sagemaker_training_plugin/index -flytesnacks/examples/sagemaker_pytorch_plugin/index -flytesnacks/examples/athena_plugin/index -flytesnacks/examples/aws_batch_plugin/index -flytesnacks/examples/hive_plugin/index -flytesnacks/examples/mmcloud_plugin/index -flytesnacks/examples/sensor/index -flytesnacks/examples/snowflake_plugin/index -flytesnacks/examples/databricks_plugin/index -flytesnacks/examples/bigquery_plugin/index -flytesnacks/examples/airflow_plugin/index -flytesnacks/examples/flyin_plugin/index -``` diff --git a/docs/tutorials.md b/docs/tutorials.md deleted file mode 100644 index b3a3081dff..0000000000 --- a/docs/tutorials.md +++ /dev/null @@ -1,96 +0,0 @@ ---- -next-page: ml_training -next-page-title: Model Training ---- - -(tutorials)= - -# Tutorials - -This section showcases step-by-step case studies of how to combine the different -features of Flyte to achieve everything from data processing, feature engineering, -model training, to batch predictions. Code for all of the examples in the user -guide can be found in the [flytesnacks](https://github.com/flyteorg/flytesnacks) repo. - -It comes with a highly customized environment to make running, documenting and -contributing samples easy. If this is your first time running these examples, follow the -{ref}`setup guide ` to get started. - -```{note} -Want to contribute an example? Check out the {doc}`Example Contribution Guide `. -``` - -## ๐Ÿค– Model Training - -Train machine learning models from using your framework of choice. - -```{list-table} -:header-rows: 0 -:widths: 20 30 - -* - {doc}`Diabetes Classification ` - - Train an XGBoost model on the Pima Indians Diabetes Dataset. -* - {doc}`House Price Regression ` - - Use dynamic workflows to train a multiregion house price prediction model using XGBoost. -* - {doc}`MNIST Classification ` - - Train a neural network on MNIST with PyTorch and W&B -* - {doc}`NLP Processing with Gensim ` - - Word embedding and topic modelling on lee background corpus with Gensim -* - {doc}`Sales Forecasting ` - - Use the Rossmann Store data to forecast sales with distributed training using Horovod on Spark. -``` - -## ๐Ÿ›  Feature Engineering - -Engineer the data features to improve your model accuracy. - -```{list-table} -:header-rows: 0 -:widths: 20 30 - -* - {doc}`EDA and Feature Engineering With Papermill ` - - How to use Jupyter notebook within Flyte -* - {doc}`Data Cleaning and Feature Serving With Feast ` - - How to use Feast to serve data in Flyte -``` - -## ๐Ÿงช Bioinformatics - -Perform computational biology with Flyte. - -```{list-table} -:header-rows: 0 -:widths: 20 30 - -* - {doc}`Nucleotide Sequence Querying with BLASTX ` - - Use BLASTX to Query a Nucleotide Sequence Against a Local Protein Database -``` - -## ๐Ÿ”ฌ Flytelab - -The open-source repository of machine learning projects using Flyte. - -```{list-table} -:header-rows: 0 -:widths: 20 30 - -* - {doc}`Weather Forecasting ` - - Build an online weather forecasting application. -``` - - -```{toctree} -:maxdepth: -1 -:caption: Tutorials -:hidden: - -flytesnacks/examples/pima_diabetes/index -flytesnacks/examples/house_price_prediction/index -flytesnacks/examples/mnist_classifier/index -flytesnacks/examples/nlp_processing/index -flytesnacks/examples/forecasting_sales/index -flytesnacks/examples/exploratory_data_analysis/index -flytesnacks/examples/feast_integration/index -flytesnacks/examples/blast/index -flytesnacks/weather_forecasting -``` diff --git a/docs/userguide.md b/docs/userguide.md deleted file mode 100644 index 1ab9ea4a60..0000000000 --- a/docs/userguide.md +++ /dev/null @@ -1,65 +0,0 @@ -(userguide)= - -# User Guide - -If this is your first time using Flyte, check out the {doc}`Getting Started ` guide. - -This *User Guide*, the {doc}`Tutorials `, and the {doc}`Integrations ` examples cover all of -the key features of Flyte for data analytics, data science and machine learning practitioners, organized by topic. Each -section below introduces a core feature of Flyte and how you can use it to address specific use cases. Code for all -of the examples can be found in the [flytesnacks repo](https://github.com/flyteorg/flytesnacks). - -It comes with a specific environment to make running, documenting -and contributing samples easy. If this is your first time running these examples, follow the -{doc}`environment setup guide ` to get started. - -```{tip} -To learn about how to spin up and manage a Flyte cluster in the cloud, see the -{doc}`Deployment Guides `. -``` - -```{note} -Want to contribute an example? Check out the {doc}`Example Contribution Guide `. -``` - -## Table of Contents - -```{list-table} -:header-rows: 0 -:widths: 20 30 - -* - {doc}`๐ŸŒณ Environment Setup ` - - Set up a development environment to run the examples in the user guide. -* - {doc}`๐Ÿ”ค Basics ` - - Learn about tasks, workflows, launch plans, caching and managing files and directories. -* - {doc}`โŒจ๏ธ Data Types and IO ` - - Improve pipeline robustness with Flyte's portable and extensible type system. -* - {doc}`๐Ÿ”ฎ Advanced Composition ` - - Implement conditionals, nested and dynamic workflows, map tasks and even recursion! -* - {doc}`๐Ÿงฉ Customizing Dependencies ` - - Provide custom dependencies to run your Flyte entities. -* - {doc}`๐Ÿก Development Lifecycle ` - - Develop and test locally on the demo cluster. -* - {doc}`โš—๏ธ Testing ` - - Test tasks and workflows with Flyte's testing utilities. -* - {doc}`๐Ÿšข Productionizing ` - - Ship and configure your machine learning pipelines on a production Flyte installation. -* - {doc}`๐Ÿ— Extending ` - - Define custom plugins that aren't currently supported in the Flyte ecosystem. -``` - -```{toctree} -:maxdepth: -1 -:caption: User Guide -:hidden: - -Environment Setup -Basics -Data Types and IO -Advanced Composition -Customizing Dependencies -Development Lifecycle -Testing -Productionizing -Extending -```