diff --git a/llm-agents/agent/agent_executor_materializer.py b/llm-agents/agent/agent_executor_materializer.py index 6089ae1f..a77606cf 100644 --- a/llm-agents/agent/agent_executor_materializer.py +++ b/llm-agents/agent/agent_executor_materializer.py @@ -105,4 +105,4 @@ def _save_python_version(self) -> None: """Saves the Python version used to materialize the artifact.""" filepath = os.path.join(self.uri, DEFAULT_PYTHON_VERSION_FILENAME) current_python_version = Environment().python_version() - write_file_contents_as_string(filepath, current_python_version) \ No newline at end of file + write_file_contents_as_string(filepath, current_python_version) diff --git a/llm-agents/configs/agent_config.yaml b/llm-agents/configs/agent_config.yaml new file mode 100644 index 00000000..33263262 --- /dev/null +++ b/llm-agents/configs/agent_config.yaml @@ -0,0 +1,21 @@ +# environment configuration +settings: + docker: + requirements: requirements.txt + required_integrations: ["langchain", "openai", "pillow"] + +model: + name: zenml_agent + license: Apache 2.0 + description: "ZenML Agent with a vector store tool." + tags: ["llm", "agent", "rag"] + +steps: + url_scraper: + enable_cache: False + parameters: + docs_url: "https://docs.zenml.io" + website_url: "https://zenml.io" + repo_url: "https://github.com/zenml-io/zenml/tree/0.55.0/examples" + agent_creator: + enable_cache: False \ No newline at end of file diff --git a/llm-agents/pipelines/agent_creator.py b/llm-agents/pipelines/agent_creator.py index 351c6a6e..9efcdab7 100644 --- a/llm-agents/pipelines/agent_creator.py +++ b/llm-agents/pipelines/agent_creator.py @@ -17,32 +17,11 @@ from steps.index_generator import index_generator from steps.url_scraper import url_scraper from steps.web_url_loader import web_url_loader -from zenml import pipeline, Model -from zenml.config import DockerSettings -from zenml.integrations.constants import LANGCHAIN, OPEN_AI, PILLOW +from zenml import pipeline -PIPELINE_NAME = "zenml_agent_creation_pipeline" -docker_settings = DockerSettings( - requirements="requirements.txt", - required_integrations=[LANGCHAIN, OPEN_AI, PILLOW], -) - -@pipeline(name=PIPELINE_NAME, - enable_cache=True, - settings={"docker": docker_settings}, - model=Model( - name="zenml_agent", - license="Apache", - description="ZenML Agent with a vector store tool.", - tags=["llm", "agent", "rag"] - )) -def docs_to_agent_pipeline( - docs_url: str = "", - repo_url: str = "", - release_notes_url: str = "", - website_url: str = "", -) -> None: +@pipeline +def zenml_agent_creation_pipeline(): """Generate index for ZenML. Args: @@ -51,7 +30,7 @@ def docs_to_agent_pipeline( release_notes_url: URL to the release notes. website_url: URL to the website. """ - urls = url_scraper(docs_url, repo_url, release_notes_url, website_url) + urls = url_scraper() documents = web_url_loader(urls) vector_store = index_generator(documents) - agent = agent_creator(vector_store=vector_store) + _ = agent_creator(vector_store=vector_store) diff --git a/llm-agents/run.ipynb b/llm-agents/run.ipynb index 102007e2..94f6d8a2 100644 --- a/llm-agents/run.ipynb +++ b/llm-agents/run.ipynb @@ -465,7 +465,7 @@ " description=\"ZenML Agent with a vector store tool.\",\n", " tags=[\"llm\", \"agent\", \"rag\"]\n", " ))\n", - "def docs_to_agent_pipeline(\n", + "def zenml_agent_creation_pipeline(\n", " docs_url: str = \"\",\n", " repo_url: str = \"\",\n", " release_notes_url: str = \"\",\n", @@ -519,7 +519,7 @@ " f\"https://github.com/zenml-io/zenml/blob/{version}/RELEASE_NOTES.md\"\n", ")\n", "\n", - "docs_to_agent_pipeline(\n", + "zenml_agent_creation_pipeline(\n", " website_url=website_url,\n", " docs_url=docs_url,\n", " repo_url=repo_url,\n", diff --git a/llm-agents/run.py b/llm-agents/run.py index 10f653b9..1682487b 100644 --- a/llm-agents/run.py +++ b/llm-agents/run.py @@ -12,29 +12,50 @@ # or implied. See the License for the specific language governing # permissions and limitations under the License. -import logging +import os +from pipelines.agent_creator import zenml_agent_creation_pipeline -from pipelines.agent_creator import docs_to_agent_pipeline +import click +from zenml.logger import get_logger +logger = get_logger(__name__) -def main(): - version = "0.55.0" - docs_url = f"https://docs.zenml.io/v/{version}/" - website_url = "https://zenml.io" - repo_url = f"https://github.com/zenml-io/zenml/tree/{version}/examples" - release_notes_url = ( - f"https://github.com/zenml-io/zenml/blob/{version}/RELEASE_NOTES.md" - ) - docs_to_agent_pipeline( - website_url=website_url, - docs_url=docs_url, - repo_url=repo_url, - release_notes_url=release_notes_url, - ) +@click.command( + help=""" +ZenML Starter project. + +Run the ZenML starter project with basic options. +Examples: + + \b + # Run the pipeline with config.yaml in the configs folder + python run.py --config config.yaml + +""" +) +@click.option( + "--config", + type=str, + default="agent_config.yaml", + help="Path to the YAML config file.", +) +def main( + config: str = "agent_config.yaml", +): + """Main entry point for the pipeline execution.""" + config_folder = os.path.join( + os.path.dirname(os.path.realpath(__file__)), + "configs", + ) + pipeline_args = {} + if config: + pipeline_args["config_path"] = os.path.join( + config_folder, config + ) + zenml_agent_creation_pipeline.with_options(**pipeline_args)() + if __name__ == "__main__": - logging.basicConfig(level="INFO") - logging.getLogger().setLevel(logging.INFO) main() diff --git a/llm-agents/steps/agent_creator.py b/llm-agents/steps/agent_creator.py index 1374a640..77e53680 100644 --- a/llm-agents/steps/agent_creator.py +++ b/llm-agents/steps/agent_creator.py @@ -1,4 +1,4 @@ -from typing import Dict, Optional +from typing import Dict from typing_extensions import Annotated from agent.agent_executor_materializer import AgentExecutorMaterializer @@ -31,7 +31,7 @@ class Config: extra = "ignore" -@step(output_materializers=AgentExecutorMaterializer, enable_cache=False) +@step(output_materializers=AgentExecutorMaterializer) def agent_creator( vector_store: VectorStore, config: AgentParameters = AgentParameters() ) -> Annotated[ diff --git a/llm-agents/steps/index_generator.py b/llm-agents/steps/index_generator.py index 5e0ad6bc..0f08f369 100644 --- a/llm-agents/steps/index_generator.py +++ b/llm-agents/steps/index_generator.py @@ -25,7 +25,7 @@ from zenml import step, log_artifact_metadata -@step(enable_cache=True) +@step def index_generator( documents: List[Document], ) -> Annotated[VectorStore, "vector_store"]: diff --git a/llm-agents/steps/url_scraper.py b/llm-agents/steps/url_scraper.py index b7341cca..0ab4436d 100644 --- a/llm-agents/steps/url_scraper.py +++ b/llm-agents/steps/url_scraper.py @@ -18,12 +18,11 @@ from zenml import step, log_artifact_metadata -@step(enable_cache=False) +@step def url_scraper( - docs_url: str = "", - repo_url: str = "", - release_notes_url: str = "", - website_url: str = "", + docs_url: str = "https://docs.zenml.io", + repo_url: str = "https://github.com/zenml-io/zenml", + website_url: str = "https://zenml.io", ) -> Annotated[List[str], "urls"]: """Generates a list of relevant URLs to scrape. @@ -36,13 +35,14 @@ def url_scraper( Returns: List of URLs to scrape. """ + + # We comment this out to make this pipeline faster # examples_readme_urls = get_nested_readme_urls(repo_url) # docs_urls = get_all_pages(docs_url) # website_urls = get_all_pages(website_url) - # all_urls = docs_urls + website_urls + [release_notes_url] + # all_urls = docs_urls + website_urls + examples_readme_urls all_urls = [website_url] log_artifact_metadata( - artifact_name="urls", metadata={ "count": len(all_urls), }, diff --git a/llm-agents/steps/web_url_loader.py b/llm-agents/steps/web_url_loader.py index 0fbdb679..9ae8e302 100644 --- a/llm-agents/steps/web_url_loader.py +++ b/llm-agents/steps/web_url_loader.py @@ -19,7 +19,7 @@ from zenml import step -@step(enable_cache=True) +@step def web_url_loader(urls: List[str]) -> List[Document]: """Loads documents from a list of URLs.