diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..d65e930 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,6 @@ +[submodule "open-webui"] + path = open-webui + url = https://github.com/open-webui/open-webui.git +[submodule "ollama-webui"] + path = ollama-webui + url = https://github.com/Sunwood-ai-labs/ollama-webui.git diff --git a/Dockerfile.openweb.ui b/Dockerfile.openweb.ui new file mode 100644 index 0000000..59c7da9 --- /dev/null +++ b/Dockerfile.openweb.ui @@ -0,0 +1,137 @@ +# syntax=docker/dockerfile:1 +# Initialize device type args +# use build args in the docker build commmand with --build-arg="BUILDARG=true" +ARG USE_CUDA=false +ARG USE_OLLAMA=false +# Tested with cu117 for CUDA 11 and cu121 for CUDA 12 (default) +ARG USE_CUDA_VER=cu121 +# any sentence transformer model; models to use can be found at https://huggingface.co/models?library=sentence-transformers +# Leaderboard: https://huggingface.co/spaces/mteb/leaderboard +# for better performance and multilangauge support use "intfloat/multilingual-e5-large" (~2.5GB) or "intfloat/multilingual-e5-base" (~1.5GB) +# IMPORTANT: If you change the embedding model (sentence-transformers/all-MiniLM-L6-v2) and vice versa, you aren't able to use RAG Chat with your previous documents loaded in the WebUI! You need to re-embed them. +ARG USE_EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2 +ARG USE_RERANKING_MODEL="" + +######## WebUI frontend ######## +FROM --platform=$BUILDPLATFORM node:21-alpine3.19 as build + +WORKDIR /app + +COPY ./ollama-webui/package.json ./ollama-webui/package-lock.json ./ +RUN npm ci + +COPY ./ollama-webui/ . +RUN npm run build + +######## WebUI backend ######## +FROM python:3.11-slim-bookworm as base + +# Use args +ARG USE_CUDA +ARG USE_OLLAMA +ARG USE_CUDA_VER +ARG USE_EMBEDDING_MODEL +ARG USE_RERANKING_MODEL + +## Basis ## +ENV ENV=prod \ + PORT=8080 \ + # pass build args to the build + USE_OLLAMA_DOCKER=${USE_OLLAMA} \ + USE_CUDA_DOCKER=${USE_CUDA} \ + USE_CUDA_DOCKER_VER=${USE_CUDA_VER} \ + USE_EMBEDDING_MODEL_DOCKER=${USE_EMBEDDING_MODEL} \ + USE_RERANKING_MODEL_DOCKER=${USE_RERANKING_MODEL} + +## Basis URL Config ## +ENV OLLAMA_BASE_URL="/ollama" \ + OPENAI_API_BASE_URL="" + +## API Key and Security Config ## +ENV OPENAI_API_KEY="" \ + WEBUI_SECRET_KEY="" \ + SCARF_NO_ANALYTICS=true \ + DO_NOT_TRACK=true \ + ANONYMIZED_TELEMETRY=false + +# Use locally bundled version of the LiteLLM cost map json +# to avoid repetitive startup connections +ENV LITELLM_LOCAL_MODEL_COST_MAP="True" + + +#### Other models ######################################################### +## whisper TTS model settings ## +ENV WHISPER_MODEL="base" \ + WHISPER_MODEL_DIR="/app/backend/data/cache/whisper/models" + +## RAG Embedding model settings ## +ENV RAG_EMBEDDING_MODEL="$USE_EMBEDDING_MODEL_DOCKER" \ + RAG_RERANKING_MODEL="$USE_RERANKING_MODEL_DOCKER" \ + SENTENCE_TRANSFORMERS_HOME="/app/backend/data/cache/embedding/models" + +## Hugging Face download cache ## +ENV HF_HOME="/app/backend/data/cache/embedding/models" +#### Other models ########################################################## + +WORKDIR /app/backend + +ENV HOME /root +RUN mkdir -p $HOME/.cache/chroma +RUN echo -n 00000000-0000-0000-0000-000000000000 > $HOME/.cache/chroma/telemetry_user_id + +RUN if [ "$USE_OLLAMA" = "true" ]; then \ + apt-get update && \ + # Install pandoc and netcat + apt-get install -y --no-install-recommends pandoc netcat-openbsd && \ + # for RAG OCR + apt-get install -y --no-install-recommends ffmpeg libsm6 libxext6 && \ + # install helper tools + apt-get install -y --no-install-recommends curl && \ + # install ollama + curl -fsSL https://ollama.com/install.sh | sh && \ + # cleanup + rm -rf /var/lib/apt/lists/*; \ + else \ + apt-get update && \ + # Install pandoc and netcat + apt-get install -y --no-install-recommends pandoc netcat-openbsd && \ + # for RAG OCR + apt-get install -y --no-install-recommends ffmpeg libsm6 libxext6 && \ + # cleanup + rm -rf /var/lib/apt/lists/*; \ + fi + +# install python dependencies +COPY ./data/litellm/requirements.txt ./requirements.txt + +RUN pip3 install uv && \ + if [ "$USE_CUDA" = "true" ]; then \ + # If you use CUDA the whisper and embedding model will be downloaded on first use + pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/$USE_CUDA_DOCKER_VER --no-cache-dir && \ + uv pip install --system -r requirements.txt --no-cache-dir && \ + python -c "import os; from sentence_transformers import SentenceTransformer; SentenceTransformer(os.environ['RAG_EMBEDDING_MODEL'], device='cpu')" && \ + python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='cpu', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])"; \ + else \ + pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu --no-cache-dir && \ + uv pip install --system -r requirements.txt --no-cache-dir && \ + python -c "import os; from sentence_transformers import SentenceTransformer; SentenceTransformer(os.environ['RAG_EMBEDDING_MODEL'], device='cpu')" && \ + python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='cpu', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])"; \ + fi + + + +# copy embedding weight from build +# RUN mkdir -p /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2 +# COPY --from=build /app/onnx /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2/onnx + +# copy built frontend files +COPY --from=build /app/build /app/build +COPY --from=build /app/CHANGELOG.md /app/CHANGELOG.md +COPY --from=build /app/package.json /app/package.json + +# copy backend files +COPY ./ollama-webui/backend . + +EXPOSE 8080 + +CMD [ "bash", "start.sh"] \ No newline at end of file diff --git a/data/litellm/config.yaml b/data/litellm/config.yaml index e3f420c..481c443 100644 --- a/data/litellm/config.yaml +++ b/data/litellm/config.yaml @@ -25,13 +25,14 @@ model_list: - litellm_params: api_key: "os.environ/ANTHROPIC_API_KEY" model: claude-3-haiku-20240307 - model_name: claude-3-haiku-20240307 + max_tokens: 2000 + model_name: claude-3-haiku-20240307-2k -- litellm_params: - model: anthropic.claude-3-haiku-20240307-v1:0 - ### [OPTIONAL] SET AWS REGION ### - aws_region_name: us-west-2 - model_name: aws_anthropic.claude-3-haiku-20240307-v1:0 +- litellm_params: + api_key: "os.environ/ANTHROPIC_API_KEY" + model: claude-3-sonnet-20240229 + max_tokens: 2000 + model_name: claude-3-sonnet-20240229-2k - litellm_params: model: anthropic.claude-3-haiku-20240307-v1:0 @@ -39,14 +40,20 @@ model_list: aws_region_name: us-west-2 model_name: aws_anthropic.claude-3-haiku-20240307-v1:0 +# - litellm_params: +# model: anthropic.claude-3-haiku-20240307-v1:0 +# ### [OPTIONAL] SET AWS REGION ### +# aws_region_name: us-west-2 +# model_name: aws_anthropic.claude-3-haiku-20240307-v1:0 + - litellm_params: model: anthropic.claude-3-sonnet-20240229-v1:0 ### [OPTIONAL] SET AWS REGION ### aws_region_name: us-west-2 model_name: aws_anthropic.claude-3-sonnet-20240229-v1:0 -- litellm_params: - model: anthropic.claude-3-sonnet-20240229-v1:0 - ### [OPTIONAL] SET AWS REGION ### - aws_region_name: us-west-2 - model_name: aws_anthropic.claude-3-sonnet-20240229-v1:0 \ No newline at end of file +# - litellm_params: +# model: anthropic.claude-3-sonnet-20240229-v1:0 +# ### [OPTIONAL] SET AWS REGION ### +# aws_region_name: us-west-2 +# model_name: aws_anthropic.claude-3-sonnet-20240229-v1:0 \ No newline at end of file diff --git a/data/litellm/requirements.txt b/data/litellm/requirements.txt new file mode 100644 index 0000000..0b04c4c --- /dev/null +++ b/data/litellm/requirements.txt @@ -0,0 +1,62 @@ +fastapi==0.109.2 +uvicorn[standard]==0.22.0 +pydantic==2.7.1 +python-multipart==0.0.9 + +Flask==3.0.3 +Flask-Cors==4.0.0 + +python-socketio==5.11.2 +python-jose==3.3.0 +passlib[bcrypt]==1.7.4 +uuid==1.30 + +requests==2.31.0 +aiohttp==3.9.5 +peewee==3.17.3 +peewee-migrate==1.12.2 +psycopg2-binary==2.9.9 +PyMySQL==1.1.0 +bcrypt==4.1.2 + +litellm +litellm[proxy] + +boto3==1.34.95 + +argon2-cffi==23.1.0 +APScheduler==3.10.4 +google-generativeai==0.5.2 + +langchain==0.1.16 +langchain-community==0.0.34 +langchain-chroma==0.1.0 + +fake-useragent==1.5.1 +chromadb==0.4.24 +sentence-transformers==2.7.0 +pypdf==4.2.0 +docx2txt==0.8 +unstructured==0.11.8 +Markdown==3.6 +pypandoc==1.13 +pandas==2.2.2 +openpyxl==3.1.2 +pyxlsb==1.0.10 +xlrd==2.0.1 +validators==0.28.1 + +opencv-python-headless==4.9.0.80 +rapidocr-onnxruntime==1.2.3 + +fpdf2==2.7.8 +rank-bm25==0.2.2 + +faster-whisper==1.0.1 + +PyJWT==2.8.0 +PyJWT[crypto]==2.8.0 + +black==24.4.2 +langfuse==2.27.3 +youtube-transcript-api diff --git a/docker-compose.ollama.yml b/docker-compose.ollama.yml index 6d53286..3586e4a 100644 --- a/docker-compose.ollama.yml +++ b/docker-compose.ollama.yml @@ -6,7 +6,7 @@ services: ports: - "11434:11434" volumes: - - ./data/ollama:/root/.ollama + - ollama-moa2:/root/.ollama env_file: - .env pull_policy: always @@ -26,23 +26,40 @@ services: context: . args: OLLAMA_BASE_URL: '/ollama' - dockerfile: Dockerfile - image: ghcr.io/open-webui/open-webui:${WEBUI_DOCKER_TAG-main} + dockerfile: Dockerfile.openweb.ui + # dockerfile: Dockerfile + image: ghcr.io/open-webui/open-webui:main volumes: - - open-webui:/app/backend/data + - open-webui-moa2:/app/backend/data - ./data/litellm/config.yaml:/app/backend/data/litellm/config.yaml + - ./:/work depends_on: - ollama ports: - ${OPEN_WEBUI_PORT-3000}:8080 - - 14365:14365 + # - 14365:14365 env_file: - .env - extra_hosts: - - host.docker.internal:host-gateway + # extra_hosts: + # - host.docker.internal:host-gateway restart: unless-stopped + litellm: + image: ghcr.io/berriai/litellm:main-latest + volumes: + - ./data/litellm/config.yaml:/app/config.yaml + ports: + - "14365:14365" + command: + - /bin/sh + - -c + - | + pip install async_generator + litellm --config '/app/config.yaml' --debug --host 0.0.0.0 --port 14365 --num_workers 8 + entrypoint: [] + env_file: + - .env volumes: - ollama: {} - open-webui: {} \ No newline at end of file + ollama-moa2: {} + open-webui-moa2: {} \ No newline at end of file diff --git a/ollama-webui b/ollama-webui new file mode 160000 index 0000000..855f2dc --- /dev/null +++ b/ollama-webui @@ -0,0 +1 @@ +Subproject commit 855f2dc6a3dfa4ff1778e2d6b7729f9d74e4872b