diff --git a/apps/voice_rag/.env.template b/apps/voice_rag/.env.template new file mode 100644 index 000000000..e49e23b06 --- /dev/null +++ b/apps/voice_rag/.env.template @@ -0,0 +1,8 @@ +OPENAI_API_KEY= +DEEPGRAM_API_KEY= +AZURE_SPEECH_KEY= +AZURE_SPEECH_REGION= + +PINECONE_API_KEY= +PINECONE_ENVIRONMENT= +PINECONE_INDEX_NAME= \ No newline at end of file diff --git a/apps/voice_rag/Dockerfile b/apps/voice_rag/Dockerfile new file mode 100644 index 000000000..7a7ad834a --- /dev/null +++ b/apps/voice_rag/Dockerfile @@ -0,0 +1,54 @@ +# Use the official micromamba image as a base +FROM docker.io/mambaorg/micromamba:1.5-jammy + +# Create a new user '$MAMBA_USER' and set the working directory +COPY --chown=$MAMBA_USER:$MAMBA_USER environment.docker.yml /tmp/environment.yml + +# Install the specified packages using micromamba +RUN micromamba install -y -n base -f /tmp/environment.yml && \ + micromamba clean --all --yes + +USER root +WORKDIR /usr/local/src + +ARG VOCODE_USER=vocode +ARG VOCODE_UID=8476 +ARG VOCODE_GID=8476 + +RUN groupadd --gid $VOCODE_GID $VOCODE_USER && \ + useradd --uid $VOCODE_UID --gid $VOCODE_GID --shell /bin/bash --create-home $VOCODE_USER + +# COPY --chown=$VOCODE_USER:$VOCODE_USER ../../../ /vocode-python +# WORKDIR /usr/local/src/vocode +# RUN poetry install -E all + +# Copy the rest of your application files into the Docker image +COPY --chown=$VOCODE_USER:$VOCODE_USER . /vocode +WORKDIR /vocode + +#USER vocode +USER root + +ENV DOCKER_ENV="docker" + +# # Expose the port your FastAPI app will run on +EXPOSE 19002 + +# Set build arguments +ARG BUILD_DATE +ARG VCS_REF +ARG VERSION + +# Set labels +LABEL org.label-schema.build-date=$BUILD_DATE \ + org.label-schema.name="vocode" \ + org.label-schema.description="Vocode Docker Image" \ + org.label-schema.url="https://vocode.dev/" \ + org.label-schema.vcs-url="https://github.com/vocodedev" \ + org.label-schema.maintainer="vocode@arpagon.co" \ + org.label-schema.vcs-ref=$VCS_REF \ + org.label-schema.vendor="Vocode" \ + org.label-schema.version=$VERSION + +# Start the FastAPI app using Uvicorn +CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "19002"] \ No newline at end of file diff --git a/apps/voice_rag/README.md b/apps/voice_rag/README.md new file mode 100644 index 000000000..75208faca --- /dev/null +++ b/apps/voice_rag/README.md @@ -0,0 +1,37 @@ +# voice_rag + +## Docker + +1. Set up the configuration for your agent in `main.py`. +2. Set up an .env file using the template + +``` +cp .env.template .env +``` + +Fill in your API keys into .env + +3. Build the Docker image + +```bash +docker build --build-arg BUILD_DATE=$(date -u +'%Y-%m-%dT%H:%M:%SZ') \ + --build-arg VCS_REF=$(git rev-parse --short HEAD) \ + --build-arg VERSION=0.1.0 \ + -t vocode/vocode-voice-rag:0.1.0 . +``` + +4. Run the image and forward the port. + +```bash +docker run --env-file=.env -p 3000:3000 -t vocode/vocode-voice-rag +``` + +Now you have a client backend hosted at localhost:3000 to pass into the Vocode React SDK. You'll likely need to tunnel port 3000 to ngrok / host your server in order to use it in the React SDK. + +## Non-docker setup + +`main.py` just sets up a FastAPI server, so you can just run it with uvicorn: + +``` +uvicorn main:app +``` diff --git a/apps/voice_rag/environment.docker.yml b/apps/voice_rag/environment.docker.yml new file mode 100644 index 000000000..1a0995602 --- /dev/null +++ b/apps/voice_rag/environment.docker.yml @@ -0,0 +1,21 @@ +name: vocode-rag +channels: + - conda-forge + - pytorch +dependencies: + - python=3.10 + - openssl=1.1.1w + - portaudio + - ffmpeg + - git + - pip + - pip: + # Installing vocode from the git repository + - git+https://github.com/ArtisanLabs/vocode-python/@461-VectorDB-OpenSource-Documentation#egg=vocode + - azure-cognitiveservices-speech==1.31.0 + - python-dotenv + - ipython + - deepgram-sdk + - uvicorn + - pinecone-client + - poetry \ No newline at end of file diff --git a/apps/voice_rag/main.py b/apps/voice_rag/main.py new file mode 100644 index 000000000..d952d688f --- /dev/null +++ b/apps/voice_rag/main.py @@ -0,0 +1,85 @@ +import os +import logging +from fastapi import FastAPI + +from vocode.streaming.models.agent import ChatGPTAgentConfig +from vocode.streaming.models.synthesizer import AzureSynthesizerConfig +from vocode.streaming.synthesizer.azure_synthesizer import AzureSynthesizer + +from vocode.streaming.agent.chat_gpt_agent import ChatGPTAgent +from vocode.streaming.client_backend.conversation import ConversationRouter +from vocode.streaming.models.message import BaseMessage +from vocode.streaming.vector_db.factory import VectorDBFactory +from vocode.streaming.vector_db.pinecone import PineconeConfig +from vocode.streaming.transcriber.deepgram_transcriber import DeepgramTranscriber + + +from vocode.streaming.models.transcriber import ( + DeepgramTranscriberConfig, + TimeEndpointingConfig +) + +from dotenv import load_dotenv + +load_dotenv() + +app = FastAPI(docs_url=None) + +logging.basicConfig() +logger = logging.getLogger(__name__) +logger.setLevel(logging.DEBUG) + +vector_db_config = PineconeConfig( + index=os.getenv('PINECONE_INDEX_NAME') +) + +INITIAL_MESSAGE="Hello!" +PROMPT_PREAMBLE=''' +I want you to act as an IT Architect. +I will provide some details about the functionality of an application or other +digital product, and it will be your job to come up with ways to integrate it +into the IT landscape. This could involve analyzing business requirements, +performing a gap analysis, and mapping the functionality of the new system to +the existing IT landscape. The next steps are to create a solution design. + +You are an expert in these technologies: +- Langchain +- Supabase +- Next.js +- Fastapi +- Vocode. +''' + +TIME_ENDPOINTING_CONFIG = TimeEndpointingConfig() +TIME_ENDPOINTING_CONFIG.time_cutoff_seconds = 2 + +AZURE_SYNTHESIZER_THUNK = lambda output_audio_config: AzureSynthesizer( + AzureSynthesizerConfig.from_output_audio_config(output_audio_config, ), + logger=logger +) + +DEEPGRAM_TRANSCRIBER_THUNK = lambda input_audio_config: DeepgramTranscriber( + DeepgramTranscriberConfig.from_input_audio_config( + input_audio_config=input_audio_config, + endpointing_config=TIME_ENDPOINTING_CONFIG, + min_interrupt_confidence=0.9, + ), + logger=logger +) + +conversation_router = ConversationRouter( + agent_thunk=lambda: ChatGPTAgent( + ChatGPTAgentConfig( + initial_message=BaseMessage(text=INITIAL_MESSAGE), + prompt_preamble=PROMPT_PREAMBLE, + vector_db_config=vector_db_config, + logger=logger, + ), + logger=logger + ), + synthesizer_thunk=AZURE_SYNTHESIZER_THUNK, + transcriber_thunk=DEEPGRAM_TRANSCRIBER_THUNK, + logger=logger, +) + +app.include_router(conversation_router.get_router()) diff --git a/apps/voice_rag/manual_pinecone_ingestor.ipynb b/apps/voice_rag/manual_pinecone_ingestor.ipynb new file mode 100644 index 000000000..a4a0543b3 --- /dev/null +++ b/apps/voice_rag/manual_pinecone_ingestor.ipynb @@ -0,0 +1,223 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Colab_Link: https://colab.research.google.com/github/ArtisanLabs/vocode-python/blob/main/apps/rag/manual_pinecone_ingestor.ipynb\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Install the Python Requests library via pip\n", + "# python \n", + "# langchain \n", + "# spacy \n", + "# unstructured = {extras = [\"local-inference\"]}\n", + "# layoutparser = {extras = [\"layoutmodels\", \"tesseract\"]}\n", + "# pinecone-client\n", + "# openai \n", + "# torch \n", + "# tiktoken\n", + "# git\n", + "\n", + "%pip install langchain spacy unstructured[local-inference] layoutparser[layoutmodels,tesseract] pinecone-client openai torch tiktoken git\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "'''\n", + "Importing necessary modules and functions:\n", + "- os module to interact with the OS\n", + "- Pinecone module for vector database operations\n", + "- OpenAIEmbeddings from langchain.embeddings.openai for generating embeddings\n", + "- SpacyTextSplitter from langchain.text_splitter for splitting text into chunks\n", + "- Pinecone from langchain.vectorstores for storing and retrieving vectors\n", + "- DirectoryLoader and UnstructuredFileLoader from langchain.document_loaders for loading documents from directories and unstructured files\n", + "'''\n", + "import os\n", + "import pinecone\n", + "from langchain.embeddings.openai import OpenAIEmbeddings\n", + "from langchain.text_splitter import SpacyTextSplitter\n", + "from langchain.vectorstores import Pinecone\n", + "from langchain.document_loaders import DirectoryLoader, UnstructuredFileLoader\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "'''\n", + "We are using Google Colab's secret manager to securely input our API keys.\n", + "This ensures that the keys are not visible in the notebook and are not stored in the notebook's history.\n", + "We also retrieve the Pinecone index name from the secret manager.\n", + "Then, we add these keys to the environment variables.\n", + "'''\n", + "from google.colab import userdata\n", + "import os\n", + "\n", + "PINECONE_API_KEY = userdata.get('PINECONE_API_KEY')\n", + "os.environ['PINECONE_API_KEY'] = PINECONE_API_KEY\n", + "\n", + "PINECONE_ENVIRONMENT = userdata.get('PINECONE_ENVIRONMENT')\n", + "os.environ['PINECONE_ENVIRONMENT'] = PINECONE_ENVIRONMENT\n", + "\n", + "PINECONE_INDEX = userdata.get('PINECONE_INDEX')\n", + "os.environ['PINECONE_INDEX'] = PINECONE_INDEX\n", + "\n", + "OPENAI_API_KEY = userdata.get('OPENAI_API_KEY')\n", + "os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "'''\n", + "We are defining a list of libraries from which we want to fetch the code.\n", + "Each library is represented as a dictionary with three keys:\n", + " - 'name': the name of the library\n", + " - 'code': the URL of the library's GitHub repository\n", + " - 'documentation_path': the path to the documentation within the repository\n", + "'''\n", + "libraries = [\n", + " {\n", + " 'name': 'langchain',\n", + " 'code': 'https://github.com/langchain-ai/langchain',\n", + " 'documentation_path': 'docs'\n", + " },\n", + " {\n", + " 'name': 'supabase',\n", + " 'code': 'https://github.com/supabase/supabase',\n", + " 'documentation_path': 'apps/docs'\n", + " },\n", + " {\n", + " 'name': 'next.js',\n", + " 'code': 'https://github.com/vercel/next.js',\n", + " 'documentation_path': 'docs'\n", + " },\n", + " {\n", + " 'name': 'fastapi',\n", + " 'code': 'https://github.com/tiangolo/fastapi',\n", + " 'documentation_path': 'docs/en/docs'\n", + " },\n", + " {\n", + " 'name': 'vocode-python',\n", + " 'code': 'https://github.com/vocodedev/vocode-python',\n", + " 'documentation_path': 'docs'\n", + " }\n", + "]\n", + "\n", + "'''\n", + "The function get_code_from_github is defined to fetch the code from the GitHub repositories.\n", + "It iterates over the libraries list and for each library, it clones the repository to a temporary directory.\n", + "Then, it moves the documentation to a directory named 'libraries_documentation/{name}'.\n", + "'''\n", + "import os\n", + "import shutil\n", + "import tempfile\n", + "import subprocess\n", + "\n", + "def get_code_from_github():\n", + " # Create a temporary directory\n", + " with tempfile.TemporaryDirectory() as temp_dir:\n", + " for library in libraries:\n", + " # Define the clone directory path\n", + " clone_dir = os.path.join(temp_dir, library['name'])\n", + "\n", + " # Clone the repository using the git command\n", + " subprocess.run(['git', 'clone', library['code'], clone_dir], check=True)\n", + " \n", + " # Define the source and destination for moving documentation\n", + " doc_source = os.path.join(clone_dir, library['documentation_path'])\n", + " doc_dest = os.path.join('libraries_documentation', library['name'])\n", + "\n", + " # Create the destination directory if it doesn't exist\n", + " os.makedirs(doc_dest, exist_ok=True)\n", + "\n", + " # Move the documentation to the destination directory\n", + " shutil.move(doc_source, doc_dest)\n", + "\n", + "get_code_from_github()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "'''\n", + "The following code block is responsible for loading, splitting, and indexing the documents.\n", + "'''\n", + "\n", + "'''\n", + "Create a DirectoryLoader object to load all .md and .mdx files from the 'libraries_documentation' directory.\n", + "The glob pattern \"**/*.md*\" is used to match any markdown file in the directory or its subdirectories.\n", + "The UnstructuredFileLoader class is used to load the files.\n", + "'''\n", + "loader = DirectoryLoader('./libraries_documentation', glob=\"**/*.md*\", show_progress=True, loader_cls=UnstructuredFileLoader)\n", + "\n", + "'''\n", + "Load the documents from the directory.\n", + "'''\n", + "print(\"Loading documents...\")\n", + "documents = loader.load()\n", + "\n", + "'''\n", + "Create a SpacyTextSplitter object to split the documents into chunks of 1000 characters.\n", + "'''\n", + "text_splitter = SpacyTextSplitter(chunk_size=1000)\n", + "\n", + "'''\n", + "Split the documents into chunks.\n", + "'''\n", + "print(\"Splitting documents...\")\n", + "docs = text_splitter.split_documents(documents)\n", + "\n", + "'''\n", + "Create an OpenAIEmbeddings object to generate embeddings for the documents.\n", + "'''\n", + "embeddings = OpenAIEmbeddings()\n", + "\n", + "'''\n", + "Initialize the Pinecone client with the API key and environment variables.\n", + "'''\n", + "pinecone.init(\n", + " api_key=PINECONE_API_KEY,\n", + " environment=PINECONE_ENVIRONMENT,\n", + ")\n", + "\n", + "'''\n", + "Set the name of the Pinecone index.\n", + "'''\n", + "index_name = PINECONE_INDEX\n", + "\n", + "'''\n", + "Create a Pinecone index from the documents and their embeddings.\n", + "'''\n", + "print(\"Creating index...\")\n", + "docsearch = Pinecone.from_documents(docs, embeddings, index_name=index_name)" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/images/vectodb/pinecone-create-index.png b/docs/images/vectodb/pinecone-create-index.png new file mode 100644 index 000000000..11b84c0ba Binary files /dev/null and b/docs/images/vectodb/pinecone-create-index.png differ diff --git a/docs/open-source/rag.mdx b/docs/open-source/rag.mdx new file mode 100644 index 000000000..0c0b4f5b7 --- /dev/null +++ b/docs/open-source/rag.mdx @@ -0,0 +1,57 @@ +--- +title: "Retrieval-augmented generation (RAG)." +description: "use agents knowledge via embeddings in Open Spurce library" +--- +# Vocode Retrieval-augmented generation (RAG). +## Introduction + +For Open Source developers building advanced conversational AI models, one challenge looms large: +limited context windows in prompts. Past interactions quickly vanish, forcing bots to rely on incomplete +information. Vocode introduces a novel solution: retrieval-augmented generation (RAG) powered by vector databases. + +## Here's how it works: + +- **Pre-computed embeddings:** Vocode leverages vector databases like Pinecone to store concise representations + of relevant knowledge. These bite-sized "memory chunks" efficiently encode factual details, + contexts, and even emotional nuances. +- **Dynamic retrieval:** As your bot interacts, Vocode queries the database for embeddings most similar to the + current conversation. Think of it as a smart librarian fetching the most relevant books on the fly. +- **Contextual guidance:** The retrieved embeddings are then fed to the language model, enriching its + understanding of the ongoing dialogue. This enables the bot to generate more informed, contextually rich, + and ultimately **smarter responses.** + +## Benefits for builders: +- **Scalability:** Vector databases grow seamlessly with your needs, accommodating vast knowledge + repositories without straining your system. +- **Efficiency:** Say goodbye to bloated prompts filled with redundant information. Vocode delivers precise + relevance with minimal data overhead. +- **Openness:** Built on open-source principles, Vocode empowers developers to contribute, customize, and + build upon its foundation. +- **Transparency:** Explore the reasoning behind retrieved embeddings and gain valuable insights into your + bot's decision-making process. + +## How to set up your Pinecone database + +In this guide, we'll get your Pinecone database fired up and ready to fuel your retrieval-augmented +chatbot with a robust knowledge base. Buckle up, developers! + +### 1. Create Your Vector Index: + +1. Navigate to the Pinecone dashboard and grab the "Create Index" button. +2. Name your index something meaningful, like "vocode-documentation-vector" for our Vocode-documentation bot. +3. Set the dimensions to 1536 for compatibility with your chosen OpenAI model. +4. Stick with cosine metric for consistency. +5. Hit "Create Index" and let the magic happen! + +![Setup](images/vectodb/pinecone-create-index.png) + + +## Document loading script + +You can manaully add documents to Pinecone any way you like as long as you include the required metadata. +If you have a folder of PDFs, docx files, text files, etc. that you want to add to pinecone, you can use +the below script which uses [Unstructured](https://github.com/Unstructured-IOVector/unstructured) to parse +many kinds of files types, extract the text, and add it to pinecone. + +`apps/voice-rag/manual_pinecone_ingestor.ipynb` +