diff --git a/langchain-llamaindex-slackbot/.gitignore b/langchain-llamaindex-slackbot/.gitignore index 6fc2f233..84ad0a94 100644 --- a/langchain-llamaindex-slackbot/.gitignore +++ b/langchain-llamaindex-slackbot/.gitignore @@ -129,7 +129,7 @@ dmypy.json .pyre/ # Zenml -.zen/ +src/.zen/ # MLflow mlruns/ diff --git a/langchain-llamaindex-slackbot/src/local_testing_slackbot.py b/langchain-llamaindex-slackbot/src/local_testing_slackbot.py index f492a5ff..3c1fc5f9 100644 --- a/langchain-llamaindex-slackbot/src/local_testing_slackbot.py +++ b/langchain-llamaindex-slackbot/src/local_testing_slackbot.py @@ -18,10 +18,15 @@ get_vector_store, ) from zenml.logger import get_logger +from zenml.client import Client + +SLACK_BOT_TOKEN = (Client().get_secret("langchain_project_secret") + .secret_values["slack_bot_token"]) +SLACK_APP_TOKEN = (Client().get_secret("langchain_project_secret") + .secret_values["slack_app_token"]) +OPENAI_API_KEY = (Client().get_secret("langchain_project_secret") + .secret_values["openai_api_key"]) -SLACK_BOT_TOKEN = os.getenv("SLACK_BOT_TOKEN") -SLACK_APP_TOKEN = os.getenv("SLACK_APP_TOKEN") -OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") PIPELINE_NAME = os.getenv("PIPELINE_NAME", "zenml_docs_index_generation") logger = get_logger(__name__) @@ -77,7 +82,7 @@ def reply_in_thread(body: dict, say, context): thread_ts = event.get("thread_ts", None) or event["ts"] if context["bot_user_id"] in event["text"]: - logger.debug(f"Received message: {event['text']}") + logger.info(f"Received message: {event['text']}") if event.get("thread_ts", None): full_thread = [ f"{msg['text']}" @@ -107,6 +112,7 @@ def reply_in_thread(body: dict, say, context): question=event["text"], verbose=True, ) + logger.info(output) say(text=output, thread_ts=thread_ts) diff --git a/langchain-llamaindex-slackbot/src/pipelines/index_builder.py b/langchain-llamaindex-slackbot/src/pipelines/index_builder.py index f0275226..1335c3b1 100644 --- a/langchain-llamaindex-slackbot/src/pipelines/index_builder.py +++ b/langchain-llamaindex-slackbot/src/pipelines/index_builder.py @@ -11,17 +11,35 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express # or implied. See the License for the specific language governing # permissions and limitations under the License. - +import os from steps.index_generator import index_generator from steps.url_scraper import url_scraper from steps.web_url_loader import web_url_loader from zenml import pipeline +from zenml.config import DockerSettings +from zenml.config.docker_settings import SourceFileMode pipeline_name = "zenml_docs_index_generation" +docker_settings = DockerSettings( + requirements=[ + "langchain==0.0.263", + "openai==0.27.2", + "slack-bolt==1.16.2", + "slack-sdk==3.20.0", + "fastapi", + "flask", + "uvicorn", + "gcsfs==2023.5.0", + "faiss-cpu==1.7.3", + "unstructured==0.5.7", + "tiktoken", + "bs4" + ], + source_files=SourceFileMode.DOWNLOAD +) - -@pipeline(name=pipeline_name) +@pipeline(name=pipeline_name, settings={"docker": docker_settings}) def docs_to_index_pipeline( docs_url: str = "", repo_url: str = "", diff --git a/langchain-llamaindex-slackbot/src/requirements-slackbot.txt b/langchain-llamaindex-slackbot/src/requirements-slackbot.txt index 62066bd2..24f3dffa 100644 --- a/langchain-llamaindex-slackbot/src/requirements-slackbot.txt +++ b/langchain-llamaindex-slackbot/src/requirements-slackbot.txt @@ -2,7 +2,7 @@ langchain==0.0.263 openai==0.27.2 slack-bolt==1.16.2 slack-sdk==3.20.0 -zenml[connectors-gcp]==0.45.3 +zenml[connectors-gcp]==0.45.5 fastapi flask uvicorn diff --git a/langchain-llamaindex-slackbot/src/requirements-zenml-io-qa.txt b/langchain-llamaindex-slackbot/src/requirements-zenml-io-qa.txt index d0ceb3dc..1fc6508c 100644 --- a/langchain-llamaindex-slackbot/src/requirements-zenml-io-qa.txt +++ b/langchain-llamaindex-slackbot/src/requirements-zenml-io-qa.txt @@ -2,7 +2,7 @@ langchain>=0.0.125,<=0.0.263 openai>=0.27.2,<=0.27.8 slack-bolt==1.16.2 slack-sdk==3.20.0 -zenml==0.44.1 +zenml==0.45.6 fastapi flask uvicorn @@ -11,3 +11,4 @@ faiss-cpu>=1.7.3,<=1.7.4 unstructured>=0.5.7,<=0.7.8 lanarky==0.7.12 tiktoken +bs4 \ No newline at end of file diff --git a/langchain-llamaindex-slackbot/src/steps/index_generator.py b/langchain-llamaindex-slackbot/src/steps/index_generator.py index 1b3d065d..7e57b888 100644 --- a/langchain-llamaindex-slackbot/src/steps/index_generator.py +++ b/langchain-llamaindex-slackbot/src/steps/index_generator.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express # or implied. See the License for the specific language governing # permissions and limitations under the License. +import os from typing import List @@ -21,10 +22,12 @@ ) from langchain.vectorstores import FAISS, VectorStore from zenml import step +from zenml.client import Client @step(enable_cache=False) def index_generator(documents: List[Document]) -> VectorStore: + os.environ["OPENAI_API_KEY"] = Client().get_secret("langchain_project_secret").secret_values["openai_api_key"] embeddings = OpenAIEmbeddings() text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) diff --git a/langchain-llamaindex-slackbot/src/steps/url_scraper.py b/langchain-llamaindex-slackbot/src/steps/url_scraper.py index fe376aee..cbd8b2be 100644 --- a/langchain-llamaindex-slackbot/src/steps/url_scraper.py +++ b/langchain-llamaindex-slackbot/src/steps/url_scraper.py @@ -16,6 +16,7 @@ from steps.url_scraping_utils import get_all_pages from zenml import step +from zenml.client import Client @step(enable_cache=True) @@ -36,5 +37,4 @@ def url_scraper( Returns: List of URLs to scrape. """ - # examples_readme_urls = get_nested_readme_urls(repo_url) return get_all_pages(docs_url)