zenml-io · htahir1 · Nov 11, 2024 · Oct 31, 2024 · Oct 31, 2024 · Oct 31, 2024
diff --git a/.github/workflows/production_run_complete_llm.yml b/.github/workflows/production_run_complete_llm.yml
@@ -56,4 +56,4 @@ jobs:
       - name: Run pipeline, create pipeline, configure trigger (Production)
         working-directory: ./llm-complete-guide
         run: |
-          python gh_action_rag.py --no-cache --create-template ----event-source-id  --service-account-id ${{ env.ZENML_SERVICE_ACCOUNT_ID }} --action-id  ${{ env.ZENML_ACTION_ID }} --config rag_gcp.yaml
+          python gh_action_rag.py --no-cache --create-template ----event-source-id  --service-account-id ${{ env.ZENML_SERVICE_ACCOUNT_ID }} --action-id  ${{ env.ZENML_ACTION_ID }} --config production/rag.yaml
diff --git a/.github/workflows/staging_run_complete_llm.yml b/.github/workflows/staging_run_complete_llm.yml
@@ -52,4 +52,4 @@ jobs:
       - name: Run pipeline (Staging)
         working-directory: ./llm-complete-guide
         run: |
-          python gh_action_rag.py --no-cache --config rag_local_dev.yaml
+          python gh_action_rag.py --no-cache --config staging/rag.yaml
diff --git a/llm-complete-guide/README.md b/llm-complete-guide/README.md
@@ -85,7 +85,7 @@ to run the pipelines in the correct order. You can run the script with the
 following command:
 
 ```shell
-python run.py --rag
+python run.py rag
 ```
 
 This will run the basic RAG pipeline, which scrapes the ZenML documentation and
@@ -100,7 +100,7 @@ use for the LLM.
 When you're ready to make the query, run the following command:
 
 ```shell
-python run.py --query "how do I use a custom materializer inside my own zenml steps? i.e. how do I set it? inside the @step decorator?" --model=gpt4
+python run.py query "how do I use a custom materializer inside my own zenml steps? i.e. how do I set it? inside the @step decorator?" --model=gpt4
 ```
 
 Alternative options for LLMs to use include:
@@ -119,7 +119,7 @@ this up.
 To run the evaluation pipeline, you can use the following command:
 
 ```shell
-python run.py --evaluation
+python run.py evaluation
 ```
 
 You'll need to have first run the RAG pipeline to have the necessary assets in
@@ -137,7 +137,7 @@ To run the `distilabel` synthetic data generation pipeline, you can use the foll
 
 ```shell
 pip install -r requirements-argilla.txt # special requirements
-python run.py --synthetic
+python run.py synthetic
 ```
 
 You will also need to have set up and connected to an Argilla instance for this
@@ -177,7 +177,7 @@ commands:
 
 ```shell
 pip install -r requirements-argilla.txt # special requirements
-python run.py --embeddings
+python run.py embeddings
 ```
 
 *Credit to Phil Schmid for his [tutorial on embeddings finetuning with Matryoshka

diff --git a/llm-complete-guide/configs/embeddings.yaml → ...omplete-guide/configs/dev/embeddings.yaml b/llm-complete-guide/configs/embeddings.yaml → ...omplete-guide/configs/dev/embeddings.yaml
@@ -33,7 +33,7 @@ settings:
 
 # configuration of the Model Control Plane
 model:
-  name: finetuned-zenml-docs-embeddings
+  name: dev_finetuned-zenml-docs-embeddings
   version: latest
   license: Apache 2.0
   description: Finetuned LLM on ZenML docs

diff --git a/...complete-guide/configs/rag_local_dev.yaml → llm-complete-guide/configs/dev/rag.yaml b/...complete-guide/configs/rag_local_dev.yaml → llm-complete-guide/configs/dev/rag.yaml
@@ -23,7 +23,7 @@ settings:
 
 # configuration of the Model Control Plane
 model:
-  name: finetuned-zenml-docs-embeddings
+  name: dev_finetuned-zenml-docs-embeddings
   license: Apache 2.0
   description: Finetuned LLM on ZenML docs
   tags: ["rag", "finetuned"]

diff --git a/llm-complete-guide/configs/rag_eval.yaml → llm-complete-guide/configs/dev/rag_eval.yaml b/llm-complete-guide/configs/rag_eval.yaml → llm-complete-guide/configs/dev/rag_eval.yaml
@@ -16,7 +16,7 @@ settings:
 
 # configuration of the Model Control Plane
 model:
-  name: finetuned-zenml-docs-embeddings
+  name: dev_finetuned-zenml-docs-embeddings
   license: Apache 2.0
   description: Finetuned LLM on ZenML docs
   tags: ["rag", "finetuned"]
diff --git a/llm-complete-guide/configs/synthetic.yaml → ...complete-guide/configs/dev/synthetic.yaml b/llm-complete-guide/configs/synthetic.yaml → ...complete-guide/configs/dev/synthetic.yaml
@@ -31,7 +31,7 @@ settings:
 
 # configuration of the Model Control Plane
 model:
-  name: finetuned-zenml-docs-embeddings
+  name: dev_finetuned-zenml-docs-embeddings
   version: latest
   license: Apache 2.0
   description: Finetuned LLM on ZenML docs

diff --git a/llm-complete-guide/configs/production/embeddings.yaml b/llm-complete-guide/configs/production/embeddings.yaml
@@ -0,0 +1,48 @@
+# enable_cache: False
+
+# environment configuration
+settings:
+  docker:
+    parent_image: "zenmldocker/prepare-release:base-0.68.1"
+    requirements:
+      - langchain-community
+      - ratelimit
+      - langchain>=0.0.325
+      - langchain-openai
+      - pgvector
+      - psycopg2-binary
+      - beautifulsoup4
+      - unstructured
+      - pandas
+      - numpy
+      - sentence-transformers>=3
+      - transformers[torch]==4.43.1
+      - litellm
+      - ollama
+      - tiktoken
+      - umap-learn
+      - matplotlib
+      - pyarrow
+      - rerankers[flashrank]
+      - datasets
+      - torch
+      - pygithub
+    environment:
+      ZENML_PROJECT_SECRET_NAME: llm_complete
+
+
+# configuration of the Model Control Plane
+model:
+  name: prod_finetuned-zenml-docs-embeddings
+  version: latest
+  license: Apache 2.0
+  description: Finetuned LLM on ZenML docs
+  tags: ["rag", "finetuned"]
+
+steps:
+  finetune:
+    step_operator: "gcp_a100"
+    settings:
+      step_operator.vertex:
+        accelerator_count: 1
+        accelerator_type: NVIDIA_TESLA_A100
diff --git a/llm-complete-guide/configs/rag_gcp.yaml → ...mplete-guide/configs/production/eval.yaml b/llm-complete-guide/configs/rag_gcp.yaml → ...mplete-guide/configs/production/eval.yaml
@@ -1,3 +1,5 @@
+enable_cache: False
+
 # environment configuration
 settings:
   docker:
@@ -11,31 +13,20 @@ settings:
       - psycopg2-binary
       - tiktoken
       - ratelimit
-      - rerankers
+      - rerankers[flashrank]
+      - matplotlib
+      - pillow
       - pygithub
     environment:
       ZENML_PROJECT_SECRET_NAME: llm_complete
       ZENML_ENABLE_RICH_TRACEBACK: FALSE
       ZENML_LOGGING_VERBOSITY: INFO
 
-steps:
-  url_scraper:
-    parameters:
-      docs_url: https://docs.zenml.io
-      repo_url: https://github.com/zenml-io/zenml
-      website_url: https://zenml.io
-
-#  generate_embeddings:
-#    step_operator: "terraform-gcp-6c0fd52233ca"
-#    settings:
-#      step_operator.vertex:
-#        accelerator_type: "NVIDIA_TESLA_P100"
-#        accelerator_count: 1
-#        machine_type: "n1-standard-8"
-
 # configuration of the Model Control Plane
 model:
-  name: finetuned-zenml-docs-embeddings
+  name: prod_finetuned-zenml-docs-embeddings
+  version: latest
   license: Apache 2.0
   description: Finetuned LLM on ZenML docs
-  tags: ["rag", "finetuned"]
+  tags: ["rag", "finetuned"]
+  limitations: "Only works for ZenML documentation. Not generalizable to other domains. Entirely build with synthetic data. The data is also quite noisy on account of how the chunks were split."
diff --git a/llm-complete-guide/configs/production/rag.yaml b/llm-complete-guide/configs/production/rag.yaml
@@ -0,0 +1,44 @@
+enable_cache: True
+
+# environment configuration
+settings:
+  docker:
+    requirements:
+      - unstructured
+      - sentence-transformers>=3
+      - pgvector
+      - datasets
+      - litellm
+      - numpy
+      - psycopg2-binary
+      - tiktoken
+      - ratelimit
+      - rerankers
+      - pygithub
+    environment:
+      ZENML_PROJECT_SECRET_NAME: llm_complete
+      ZENML_ENABLE_RICH_TRACEBACK: FALSE
+      ZENML_LOGGING_VERBOSITY: INFO
+
+
+# configuration of the Model Control Plane
+model:
+  name: prod_finetuned-zenml-docs-embeddings
+  license: Apache 2.0
+  description: A fine-tuned embeddings model for ZenML documentation. Used for RAG retrieval.
+  tags: ["rag", "finetuned"]
+  limitations: Only works for ZenML documentation. Not generalizable to other domains. Entirely build with synthetic data. The data is also quite noisy on account of how the chunks were split.
+  trade_offs: Focused on a specific RAG retrieval use case. Not generalizable to other domains.
+  audience: ZenML users
+  use_cases: RAG retrieval
+
+steps:
+  url_scraper:
+    parameters:
+      docs_url: https://docs.zenml.io
+  generate_embeddings:
+    step_operator: "gcp_a100"
+    settings:
+      step_operator.vertex:
+        accelerator_count: 1
+        accelerator_type: NVIDIA_TESLA_A100
diff --git a/llm-complete-guide/configs/production/synthetic.yaml b/llm-complete-guide/configs/production/synthetic.yaml
@@ -0,0 +1,39 @@
+# environment configuration
+settings:
+  docker:
+    requirements:
+      - langchain-community
+      - ratelimit
+      - langchain>=0.0.325
+      - langchain-openai
+      - pgvector
+      - psycopg2-binary
+      - beautifulsoup4
+      - unstructured
+      - pandas
+      - numpy
+      - sentence-transformers>=3
+      - transformers==4.43.1
+      - litellm
+      - ollama
+      - tiktoken
+      - umap-learn
+      - matplotlib
+      - pyarrow
+      - rerankers[flashrank]
+      - datasets
+      - torch
+      - distilabel
+      - argilla
+      - pygithub
+    environment:
+      ZENML_PROJECT_SECRET_NAME: llm_complete
+
+
+# configuration of the Model Control Plane
+model:
+  name: prod_finetuned-zenml-docs-embeddings
+  version: latest
+  license: Apache 2.0
+  description: Finetuned LLM on ZenML docs
+  tags: ["rag", "finetuned"]
diff --git a/llm-complete-guide/configs/staging/embeddings.yaml b/llm-complete-guide/configs/staging/embeddings.yaml
@@ -0,0 +1,40 @@
+# enable_cache: False
+
+# environment configuration
+settings:
+  docker:
+    parent_image: "zenmldocker/prepare-release:base-0.68.0"
+    requirements:
+      - langchain-community
+      - ratelimit
+      - langchain>=0.0.325
+      - langchain-openai
+      - pgvector
+      - psycopg2-binary
+      - beautifulsoup4
+      - unstructured
+      - pandas
+      - numpy
+      - sentence-transformers>=3
+      - transformers[torch]==4.43.1
+      - litellm
+      - ollama
+      - tiktoken
+      - umap-learn
+      - matplotlib
+      - pyarrow
+      - rerankers[flashrank]
+      - datasets
+      - torch
+      - pygithub
+    environment:
+      ZENML_PROJECT_SECRET_NAME: llm_complete
+
+
+# configuration of the Model Control Plane
+model:
+  name: staging_finetuned-zenml-docs-embeddings
+  version: latest
+  license: Apache 2.0
+  description: Finetuned LLM on ZenML docs
+  tags: ["rag", "finetuned"]
diff --git a/llm-complete-guide/configs/staging/eval.yaml b/llm-complete-guide/configs/staging/eval.yaml
@@ -0,0 +1,32 @@
+enable_cache: False
+
+# environment configuration
+settings:
+  docker:
+    requirements:
+      - unstructured
+      - sentence-transformers>=3
+      - pgvector
+      - datasets
+      - litellm
+      - numpy
+      - psycopg2-binary
+      - tiktoken
+      - ratelimit
+      - rerankers[flashrank]
+      - matplotlib
+      - pillow
+      - pygithub
+    environment:
+      ZENML_PROJECT_SECRET_NAME: llm_complete
+      ZENML_ENABLE_RICH_TRACEBACK: FALSE
+      ZENML_LOGGING_VERBOSITY: INFO
+
+# configuration of the Model Control Plane
+model:
+  name: staging_finetuned-zenml-docs-embeddings
+  version: latest
+  license: Apache 2.0
+  description: Finetuned LLM on ZenML docs
+  tags: ["rag", "finetuned"]
+  limitations: "Only works for ZenML documentation. Not generalizable to other domains. Entirely build with synthetic data. The data is also quite noisy on account of how the chunks were split."
diff --git a/llm-complete-guide/configs/staging/rag.yaml b/llm-complete-guide/configs/staging/rag.yaml
@@ -0,0 +1,38 @@
+enable_cache: False
+
+# environment configuration
+settings:
+  docker:
+    requirements:
+      - unstructured
+      - sentence-transformers>=3
+      - pgvector
+      - datasets
+      - litellm
+      - numpy
+      - psycopg2-binary
+      - tiktoken
+      - ratelimit
+      - rerankers
+      - pygithub
+    environment:
+      ZENML_PROJECT_SECRET_NAME: llm_complete
+      ZENML_ENABLE_RICH_TRACEBACK: FALSE
+      ZENML_LOGGING_VERBOSITY: INFO
+
+
+# configuration of the Model Control Plane
+model:
+  name: staging_finetuned-zenml-docs-embeddings
+  license: Apache 2.0
+  description: A fine-tuned embeddings model for ZenML documentation. Used for RAG retrieval.
+  tags: ["rag", "finetuned"]
+  limitations: Only works for ZenML documentation. Not generalizable to other domains. Entirely build with synthetic data. The data is also quite noisy on account of how the chunks were split.
+  trade_offs: Focused on a specific RAG retrieval use case. Not generalizable to other domains.
+  audience: ZenML users
+  use_cases: RAG retrieval
+
+steps:
+  url_scraper:
+    parameters:
+      docs_url: https://docs.zenml.io