feat: sglang integration

av · Sep 13, 2024 · eb51529 · eb51529
1 parent d570b95
commit eb51529
Show file tree

Hide file tree

Showing 13 changed files with 141 additions and 3 deletions.
diff --git a/README.md b/README.md
@@ -12,7 +12,7 @@ Harbor is a containerized LLM toolkit that allows you to run LLMs and additional
 
 ##### Backends
 
-[Ollama](https://github.com/av/harbor/wiki/Services#ollama) ⦁︎ [llama.cpp](https://github.com/av/harbor/wiki/Services#llamacpp) ⦁︎ [vLLM](https://github.com/av/harbor/wiki/Services#vllm) ⦁︎ [TabbyAPI](https://github.com/av/harbor/wiki/Services#tabbyapi) ⦁︎ [Aphrodite Engine](https://github.com/av/harbor/wiki/Services#aphrodite-engine) ⦁︎ [mistral.rs](https://github.com/av/harbor/wiki/Services#mistralrs) ⦁︎ [openedai-speech](https://github.com/av/harbor/wiki/Services#openedai-speech) ⦁︎ [Parler](https://github.com/av/harbor/wiki/Services#parler) ⦁︎ [text-generation-inference](https://github.com/av/harbor/wiki/Services#text-generation-inference) ⦁︎ [LMDeploy](https://github.com/av/harbor/wiki/Services#lmdeploy) ⦁︎ [AirLLM](https://github.com/av/harbor/wiki/Services#airllm)
+[Ollama](https://github.com/av/harbor/wiki/Services#ollama) ⦁︎ [llama.cpp](https://github.com/av/harbor/wiki/Services#llamacpp) ⦁︎ [vLLM](https://github.com/av/harbor/wiki/Services#vllm) ⦁︎ [TabbyAPI](https://github.com/av/harbor/wiki/Services#tabbyapi) ⦁︎ [Aphrodite Engine](https://github.com/av/harbor/wiki/Services#aphrodite-engine) ⦁︎ [mistral.rs](https://github.com/av/harbor/wiki/Services#mistralrs) ⦁︎ [openedai-speech](https://github.com/av/harbor/wiki/Services#openedai-speech) ⦁︎ [Parler](https://github.com/av/harbor/wiki/Services#parler) ⦁︎ [text-generation-inference](https://github.com/av/harbor/wiki/Services#text-generation-inference) ⦁︎ [LMDeploy](https://github.com/av/harbor/wiki/Services#lmdeploy) ⦁︎ [AirLLM](https://github.com/av/harbor/wiki/Services#airllm) ⦁︎ [SGLang](https://github.com/av/harbor/wiki/Services#sglang)
 
 ##### Satellites
 
@@ -33,7 +33,7 @@ harbor up searxng
 
 # Run additional/alternative LLM Inference backends
 # Open Webui is automatically connected to them.
-harbor up llamacpp tgi litellm vllm tabbyapi aphrodite
+harbor up llamacpp tgi litellm vllm tabbyapi aphrodite sglang
 
 # Run different Frontends
 harbor up librechat chatui bionicgpt hollama
@@ -57,6 +57,7 @@ harbor aphrodite model google/gemma-2-2b-it
 harbor tabbyapi model google/gemma-2-2b-it-exl2
 harbor mistralrs model google/gemma-2-2b-it
 harbor opint model google/gemma-2-2b-it
+harbor sglang model google/gemma-2-2b-it
 
 # Convenience tools for docker setup
 harbor logs llamacpp

diff --git a/aichat/configs/aichat.sglang.yml b/aichat/configs/aichat.sglang.yml
@@ -0,0 +1,8 @@
+clients:
+  - type: openai-compatible
+    name: sglang
+    api_base: http://sglang:30000/v1
+    api_key: sk-sglang
+    models:
+      - name: ${HARBOR_AICHAT_MODEL}
+
diff --git a/aider/configs/aider.sglang.yml b/aider/configs/aider.sglang.yml
@@ -0,0 +1,4 @@
+openai-api-base: http://sglang:30000/v1
+openai-api-key: sk-sglang
+model: openai/${HARBOR_AIDER_MODEL}
+verify-ssl: false
diff --git a/compose.sglang.yml b/compose.sglang.yml
@@ -0,0 +1,23 @@
+services:
+  sglang:
+    image: lmsysorg/sglang:${HARBOR_SGLANG_VERSION}
+    container_name: ${HARBOR_CONTAINER_PREFIX}.sglang
+    volumes:
+      - ${HARBOR_HF_CACHE}:/root/.cache/huggingface
+      - ${HARBOR_VLLM_CACHE}:/root/.cache/vllm
+    ports:
+      - ${HARBOR_SGLANG_HOST_PORT}:30000
+    environment:
+      - HF_TOKEN=${HARBOR_HF_TOKEN}
+    entrypoint: python3 -m sglang.launch_server
+    command: >
+      --model-path ${HARBOR_SGLANG_MODEL}
+      --host 0.0.0.0
+      --port 30000
+      ${HARBOR_SGLANG_EXTRA_ARGS}
+    ulimits:
+      memlock: -1
+      stack: 67108864
+    ipc: host
+    networks:
+      - harbor-network
diff --git a/compose.x.aider.sglang.yml b/compose.x.aider.sglang.yml
@@ -0,0 +1,4 @@
+services:
+  aider:
+    volumes:
+      - ./aider/configs/aider.sglang.yml:/root/.aider/sglang.yml
diff --git a/compose.x.sglang.nvidia.yml b/compose.x.sglang.nvidia.yml
@@ -0,0 +1,9 @@
+services:
+  sglang:
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: all
+              capabilities: [gpu]
diff --git a/compose.x.webui.sglang.yml b/compose.x.webui.sglang.yml
@@ -0,0 +1,4 @@
+services:
+  webui:
+    volumes:
+      - ./open-webui/configs/config.sglang.json:/app/configs/config.sglang.json
diff --git a/default.env b/default.env
@@ -288,6 +288,12 @@ HARBOR_LMEVAL_EXTRA_ARGS=""
 HARBOR_LMEVAL_MODEL_SPECIFIER=""
 HARBOR_LMEVAL_MODEL_ARGS=""
 
+# SGLang
+HARBOR_SGLANG_HOST_PORT=34091
+HARBOR_SGLANG_VERSION="latest"
+HARBOR_SGLANG_MODEL="google/gemma-2-2b-it"
+HARBOR_SGLANG_EXTRA_ARGS=""
+
 # ============================================
 # Service Configuration.
 # You can specify any of the service's own environment variables here.

diff --git a/harbor.sh b/harbor.sh
@@ -44,6 +44,7 @@ show_help() {
     echo "  chatui     - Configure HuggingFace ChatUI service"
     echo "  comfyui    - Configure ComfyUI service"
     echo "  parler     - Configure Parler service"
+    echo "  sglang     - Configure SGLang CLI"
     echo "  omnichain  - Work with Omnichain service"
     echo
     echo "Service CLIs:"
@@ -55,6 +56,7 @@ show_help() {
     echo "  plandex           - Launch Plandex CLI"
     echo "  cmdh              - Run cmdh CLI"
     echo "  parllama          - Launch Parllama - TUI for chatting with Ollama models"
+    echo "  bench             - Run and manage Harbor Bench"
     echo "  hf                - Run the Harbor's Hugging Face CLI. Expanded with a few additional commands."
     echo "    hf dl           - HuggingFaceModelDownloader CLI"
     echo "    hf parse-url    - Parse file URL from Hugging Face"
@@ -2707,6 +2709,30 @@ run_lm_eval_command() {
         lmeval "$@"
 }
 
+run_sglang_command() {
+    case "$1" in
+        model)
+            shift
+            env_manager_alias sglang.model "$@"
+            return 0
+            ;;
+        args)
+            shift
+            env_manager_alias sglang.extra.args "$@"
+            return 0
+            ;;
+        -h|--help|help)
+            echo "Please note that this is not sglang CLI, but a Harbor CLI to manage sglang service."
+            echo
+            echo "Usage: harbor sglang <command>"
+            echo
+            echo "Commands:"
+            echo "  harbor sglang model [user/repo] - Get or set the sglang model repository to run"
+            echo "  harbor sglang args [args]       - Get or set extra args to pass to the sglang CLI"
+            ;;
+    esac
+}
+
 # ========================================================================
 # == Main script
 # ========================================================================
@@ -2952,6 +2978,10 @@ main_entrypoint() {
             shift
             run_lm_eval_command "$@"
             ;;
+        sglang)
+            shift
+            run_sglang_command "$@"
+            ;;
         tunnel|t)
             shift
             establish_tunnel "$@"

diff --git a/http-catalog/sglang.http b/http-catalog/sglang.http
@@ -0,0 +1,18 @@
+@host = http://localhost:34091
+
+###
+
+curl {{host}}/v1/models
+
+###
+
+curl {{host}}/v1/chat/completions -H 'Content-Type: application/json' -H "Authorization: Bearer sk-sglang" -d '{
+  "model": "anything",
+  "messages": [
+    {
+      "role": "user",
+      "content": "Bobby was born in Paris. How old is Bobby?"
+    }
+  ],
+  "max_tokens": 30
+}'
diff --git a/librechat/librechat.yml b/librechat/librechat.yml
@@ -252,3 +252,23 @@ endpoints:
       summaryModel: "togethercomputer/llama-2-7b-chat"
       forcePrompt: false
       modelDisplayLabel: "together.ai"
+
+
+    # SGLang
+    - name: "SGLang"
+      apiKey: "sk-sglang"
+      # use 'host.docker.internal' instead of localhost if running LibreChat in a docker container
+      baseURL: "http://sglang:30000/v1/chat/completions"
+      models:
+        default: [
+          ""
+        ]
+        # fetching list of models is supported but the `name` field must start
+        # with `ollama` (case-insensitive), as it does in this example.
+        fetch: true
+      titleConvo: true
+      titleModel: "current_model"
+      summarize: false
+      summaryModel: "current_model"
+      forcePrompt: false
+      modelDisplayLabel: "SGLang"
diff --git a/open-webui/configs/config.airllm.json b/open-webui/configs/config.airllm.json
@@ -1,4 +1,4 @@
-{
+open-webui/configs/config.airllm.json{
   "openai": {
 		"api_base_urls": [
 			"http://airllm:5000/v1"

diff --git a/open-webui/configs/config.sglang.json b/open-webui/configs/config.sglang.json
@@ -0,0 +1,11 @@
+{
+  "openai": {
+		"api_base_urls": [
+			"http://sglang:30000/v1"
+		],
+		"api_keys": [
+			"sk-sglang"
+		],
+		"enabled": true
+	}
+}