feat: add --split-mode
CLI option
#137
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Test API Server | |
on: | |
push: | |
branches: | |
- dev | |
- main | |
- release-* | |
- feat-* | |
- ci-* | |
- refactor-* | |
- fix-* | |
- test-* | |
paths: | |
- '.github/workflows/test_api_server.yml' | |
- '**/Cargo.toml' | |
- '**/Cargo.lock' | |
- '**/*.rs' | |
- '**/*.sh' | |
- '**/.cargo/config.toml' | |
- 'tests/*.hurl' | |
pull_request: | |
branches: | |
- dev | |
- main | |
types: [opened, synchronize, reopened] | |
paths: | |
- '.github/workflows/**' | |
- '**/Cargo.toml' | |
- '**/*.rs' | |
- '**/*.sh' | |
- 'tests/*.hurl' | |
jobs: | |
test-api-server-ubuntu: | |
runs-on: ubuntu-latest | |
strategy: | |
matrix: | |
wasmedge_version: [0.14.1] | |
ggml_version: [b4419] | |
steps: | |
- name: Clone project | |
id: checkout | |
uses: actions/checkout@v3 | |
- name: Install Rust-nightly | |
uses: actions-rust-lang/setup-rust-toolchain@v1 | |
with: | |
toolchain: nightly | |
target: wasm32-wasip1 | |
components: rustfmt, clippy | |
- name: Install Rust-stable | |
uses: actions-rust-lang/setup-rust-toolchain@v1 | |
with: | |
target: wasm32-wasip1 | |
- name: Install WasmEdge | |
run: | | |
curl -sSf https://raw.githubusercontent.com/WasmEdge/WasmEdge/master/utils/install_v2.sh | bash -s -- -v ${{ matrix.wasmedge_version }} --ggmlbn=${{ matrix.ggml_version }} | |
ls -al $HOME/.wasmedge/bin | |
- name: Install Hurl | |
run: | | |
curl --location --remote-name https://github.com/Orange-OpenSource/hurl/releases/download/5.0.1/hurl_5.0.1_amd64.deb | |
sudo apt update && sudo apt install ./hurl_5.0.1_amd64.deb | |
- name: Install Qdrant and download snapshot | |
run: | | |
# Download Qdrant | |
curl -LO https://github.com/qdrant/qdrant/releases/download/v1.11.4/qdrant-x86_64-unknown-linux-musl.tar.gz | |
tar -xvf qdrant-x86_64-unknown-linux-musl.tar.gz | |
rm qdrant-x86_64-unknown-linux-musl.tar.gz | |
# Download snapshot | |
curl -LO https://huggingface.co/datasets/gaianet/paris/resolve/main/paris_768_nomic-embed-text-v1.5-f16.snapshot | |
mv paris_768_nomic-embed-text-v1.5-f16.snapshot default.snapshot | |
ls -al | |
- name: Build rag-api-server on linux | |
env: | |
RUSTFLAGS: "--cfg wasmedge --cfg tokio_unstable" | |
run: | | |
cargo build -p rag-api-server --release | |
cp target/wasm32-wasip1/release/rag-api-server.wasm ./rag-api-server.wasm | |
- name: Download models | |
run: | | |
curl -LO https://huggingface.co/second-state/Qwen2-1.5B-Instruct-GGUF/resolve/main/Qwen2-1.5B-Instruct-Q3_K_M.gguf | |
curl -LO https://huggingface.co/second-state/Nomic-embed-text-v1.5-Embedding-GGUF/resolve/main/nomic-embed-text-v1.5-f16.gguf | |
- name: Start Qdrant | |
run: | | |
nohup ./qdrant > ./start-qdrant.log 2>&1 & | |
sleep 5 | |
cat start-qdrant.log | |
- name: Import the default.snapshot file to Qdrant | |
run: | | |
curl -s -X POST http://localhost:6333/collections/default/snapshots/upload?priority=snapshot -H 'Content-Type:multipart/form-data' -F '[email protected]' | |
- name: Start rag-api-server for testing chat completions | |
run: | | |
nohup $HOME/.wasmedge/bin/wasmedge --dir .:. --nn-preload default:GGML:AUTO:Qwen2-1.5B-Instruct-Q3_K_M.gguf --nn-preload embedding:GGML:AUTO:nomic-embed-text-v1.5-f16.gguf rag-api-server.wasm --model-name Qwen2-1.5B-Instruct,nomic-embed-text-v1.5 --ctx-size 4096,512 --batch-size 16,512 --prompt-template chatml,embedding --rag-policy last-user-message --socket-addr 0.0.0.0:8080 > ./start-llamaedge.log 2>&1 & | |
sleep 30 | |
cat start-llamaedge.log | |
# - name: Run test_chat.hurl | |
# run: | | |
# hurl --test --jobs 1 ./tests/test_chat.hurl | |
- name: Run test_embeddings.hurl | |
run: | | |
hurl --test --jobs 1 ./tests/test_embeddings.hurl | |
# - name: Run test_rag.hurl | |
# run: | | |
# hurl --test --jobs 1 ./tests/test_rag.hurl | |
- name: Stop rag-api-server for testing chat completions | |
run: | | |
pkill -f wasmedge | |
- name: Stop Qdrant | |
run: | | |
pkill -f qdrant | |
test-api-server-macos-13: | |
runs-on: macos-13 | |
needs: test-api-server-ubuntu | |
strategy: | |
matrix: | |
wasmedge_version: [0.14.1] | |
ggml_version: [b4419] | |
steps: | |
- name: Clone project | |
id: checkout | |
uses: actions/checkout@v3 | |
- name: Install Rust-nightly | |
uses: actions-rust-lang/setup-rust-toolchain@v1 | |
with: | |
toolchain: nightly | |
target: wasm32-wasip1 | |
components: rustfmt, clippy | |
- name: Install Rust-stable | |
uses: actions-rust-lang/setup-rust-toolchain@v1 | |
with: | |
target: wasm32-wasip1 | |
- name: Download wasi-sdk for x86_64-macos | |
run: | | |
curl -LO https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-24/wasi-sdk-24.0-x86_64-macos.tar.gz | |
tar -xzvf wasi-sdk-24.0-x86_64-macos.tar.gz | |
mv wasi-sdk-24.0-x86_64-macos wasi-sdk-24.0 | |
- name: Install WasmEdge | |
run: | | |
curl -sSf https://raw.githubusercontent.com/WasmEdge/WasmEdge/master/utils/install_v2.sh | bash -s -- -v ${{ matrix.wasmedge_version }} --ggmlbn=${{ matrix.ggml_version }} | |
ls -al $HOME/.wasmedge/bin | |
- name: Install Hurl | |
run: | | |
brew install hurl | |
- name: Install Qdrant and download snapshot | |
run: | | |
# Download Qdrant | |
curl -LO https://github.com/qdrant/qdrant/releases/download/v1.11.4/qdrant-x86_64-apple-darwin.tar.gz | |
tar -xzvf qdrant-x86_64-apple-darwin.tar.gz | |
rm qdrant-x86_64-apple-darwin.tar.gz | |
# Download snapshot | |
curl -LO https://huggingface.co/datasets/gaianet/paris/resolve/main/paris_768_nomic-embed-text-v1.5-f16.snapshot | |
mv paris_768_nomic-embed-text-v1.5-f16.snapshot default.snapshot | |
ls -al | |
- name: Build rag-api-server on macos-13 | |
env: | |
WASI_SDK_PATH: /Users/runner/work/rag-api-server/rag-api-server/wasi-sdk-24.0 | |
CC: "/Users/runner/work/rag-api-server/rag-api-server/wasi-sdk-24.0/bin/clang --sysroot=/Users/runner/work/rag-api-server/rag-api-server/wasi-sdk-24.0/share/wasi-sysroot" | |
RUSTFLAGS: "--cfg wasmedge --cfg tokio_unstable" | |
run: | | |
cargo build -p rag-api-server --release | |
cp target/wasm32-wasip1/release/rag-api-server.wasm ./rag-api-server.wasm | |
- name: Download models | |
run: | | |
curl -LO https://huggingface.co/second-state/Qwen2-1.5B-Instruct-GGUF/resolve/main/Qwen2-1.5B-Instruct-Q3_K_M.gguf | |
curl -LO https://huggingface.co/second-state/Nomic-embed-text-v1.5-Embedding-GGUF/resolve/main/nomic-embed-text-v1.5-f16.gguf | |
- name: Start Qdrant | |
run: | | |
nohup ./qdrant > ./start-qdrant.log 2>&1 & | |
sleep 5 | |
cat start-qdrant.log | |
- name: Import the default.snapshot file to Qdrant | |
run: | | |
curl -s -X POST http://localhost:6333/collections/default/snapshots/upload?priority=snapshot -H 'Content-Type:multipart/form-data' -F '[email protected]' | |
- name: Start rag-api-server for testing chat completions | |
run: | | |
nohup $HOME/.wasmedge/bin/wasmedge --dir .:. --nn-preload default:GGML:AUTO:Qwen2-1.5B-Instruct-Q3_K_M.gguf --nn-preload embedding:GGML:AUTO:nomic-embed-text-v1.5-f16.gguf rag-api-server.wasm --model-name Qwen2-1.5B-Instruct,nomic-embed-text-v1.5 --ctx-size 4096,512 --batch-size 16,512 --prompt-template chatml,embedding --rag-policy last-user-message --socket-addr 0.0.0.0:8080 > ./start-llamaedge.log 2>&1 & | |
sleep 30 | |
cat start-llamaedge.log | |
# - name: Run test_chat.hurl | |
# run: | | |
# hurl --test --jobs 1 ./tests/test_chat.hurl | |
- name: Run test_embeddings.hurl | |
run: | | |
hurl --test --jobs 1 ./tests/test_embeddings.hurl | |
# - name: Run test_rag.hurl | |
# run: | | |
# hurl --test --jobs 1 ./tests/test_rag.hurl | |
- name: Stop rag-api-server for testing chat completions | |
run: | | |
pkill -f wasmedge | |
- name: Stop Qdrant | |
run: | | |
pkill -f qdrant | |
test-api-server-macos-14: | |
runs-on: macos-14 | |
needs: test-api-server-macos-13 | |
strategy: | |
matrix: | |
wasmedge_version: [0.14.1] | |
ggml_version: [b4419] | |
steps: | |
- name: Clone project | |
id: checkout | |
uses: actions/checkout@v3 | |
- name: Install Rust-nightly | |
uses: actions-rust-lang/setup-rust-toolchain@v1 | |
with: | |
toolchain: nightly | |
target: wasm32-wasip1 | |
components: rustfmt, clippy | |
- name: Install Rust-stable | |
uses: actions-rust-lang/setup-rust-toolchain@v1 | |
with: | |
target: wasm32-wasip1 | |
- name: Download wasi-sdk for arm64-macos | |
run: | | |
curl -LO https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-24/wasi-sdk-24.0-arm64-macos.tar.gz | |
tar -xzvf wasi-sdk-24.0-arm64-macos.tar.gz | |
mv wasi-sdk-24.0-arm64-macos wasi-sdk-24.0 | |
- name: Install WasmEdge | |
run: | | |
curl -sSf https://raw.githubusercontent.com/WasmEdge/WasmEdge/master/utils/install_v2.sh | bash -s -- -v ${{ matrix.wasmedge_version }} --ggmlbn=${{ matrix.ggml_version }} | |
ls -al $HOME/.wasmedge/bin | |
- name: Install Hurl | |
run: | | |
brew install hurl | |
- name: Install Qdrant and download snapshot | |
run: | | |
# Download Qdrant | |
curl -LO https://github.com/qdrant/qdrant/releases/download/v1.11.4/qdrant-aarch64-apple-darwin.tar.gz | |
tar -xzvf qdrant-aarch64-apple-darwin.tar.gz | |
rm qdrant-aarch64-apple-darwin.tar.gz | |
# Download snapshot | |
curl -LO https://huggingface.co/datasets/gaianet/paris/resolve/main/paris_768_nomic-embed-text-v1.5-f16.snapshot | |
mv paris_768_nomic-embed-text-v1.5-f16.snapshot default.snapshot | |
ls -al | |
- name: Build rag-api-server on macos-14 | |
env: | |
WASI_SDK_PATH: /Users/runner/work/rag-api-server/rag-api-server/wasi-sdk-24.0 | |
CC: "/Users/runner/work/rag-api-server/rag-api-server/wasi-sdk-24.0/bin/clang --sysroot=/Users/runner/work/rag-api-server/rag-api-server/wasi-sdk-24.0/share/wasi-sysroot" | |
RUSTFLAGS: "--cfg wasmedge --cfg tokio_unstable" | |
run: | | |
cargo build -p rag-api-server --release | |
cp target/wasm32-wasip1/release/rag-api-server.wasm ./rag-api-server.wasm | |
- name: Download models | |
run: | | |
curl -LO https://huggingface.co/second-state/Qwen2-1.5B-Instruct-GGUF/resolve/main/Qwen2-1.5B-Instruct-Q3_K_M.gguf | |
curl -LO https://huggingface.co/second-state/Nomic-embed-text-v1.5-Embedding-GGUF/resolve/main/nomic-embed-text-v1.5-f16.gguf | |
- name: Start Qdrant | |
run: | | |
nohup ./qdrant > ./start-qdrant.log 2>&1 & | |
sleep 5 | |
cat start-qdrant.log | |
- name: Import the default.snapshot file to Qdrant | |
run: | | |
curl -s -X POST http://localhost:6333/collections/default/snapshots/upload?priority=snapshot -H 'Content-Type:multipart/form-data' -F '[email protected]' | |
- name: Start rag-api-server for testing chat completions | |
run: | | |
nohup $HOME/.wasmedge/bin/wasmedge --dir .:. --nn-preload default:GGML:AUTO:Qwen2-1.5B-Instruct-Q3_K_M.gguf --nn-preload embedding:GGML:AUTO:nomic-embed-text-v1.5-f16.gguf rag-api-server.wasm --model-name Qwen2-1.5B-Instruct,nomic-embed-text-v1.5 --ctx-size 4096,512 --batch-size 16,512 --prompt-template chatml,embedding --rag-policy last-user-message --socket-addr 0.0.0.0:8080 > ./start-llamaedge.log 2>&1 & | |
sleep 30 | |
cat start-llamaedge.log | |
# - name: Run test_chat.hurl | |
# run: | | |
# hurl --test --jobs 1 ./tests/test_chat.hurl | |
- name: Run test_embeddings.hurl | |
run: | | |
hurl --test --jobs 1 ./tests/test_embeddings.hurl | |
# - name: Run test_rag.hurl | |
# run: | | |
# hurl --test --jobs 1 ./tests/test_rag.hurl | |
- name: Stop rag-api-server for testing chat completions | |
run: | | |
pkill -f wasmedge | |
- name: Stop Qdrant | |
run: | | |
pkill -f qdrant | |
test-api-server-macos-15: | |
runs-on: macos-15 | |
needs: test-api-server-macos-14 | |
strategy: | |
matrix: | |
wasmedge_version: [0.14.1] | |
ggml_version: [b4419] | |
steps: | |
- name: Clone project | |
id: checkout | |
uses: actions/checkout@v3 | |
- name: Install Rust-nightly | |
uses: actions-rust-lang/setup-rust-toolchain@v1 | |
with: | |
toolchain: nightly | |
target: wasm32-wasip1 | |
components: rustfmt, clippy | |
- name: Install Rust-stable | |
uses: actions-rust-lang/setup-rust-toolchain@v1 | |
with: | |
target: wasm32-wasip1 | |
- name: Download wasi-sdk for arm64-macos | |
run: | | |
curl -LO https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-24/wasi-sdk-24.0-arm64-macos.tar.gz | |
tar -xzvf wasi-sdk-24.0-arm64-macos.tar.gz | |
mv wasi-sdk-24.0-arm64-macos wasi-sdk-24.0 | |
- name: Install WasmEdge | |
run: | | |
curl -sSf https://raw.githubusercontent.com/WasmEdge/WasmEdge/master/utils/install_v2.sh | bash -s -- -v ${{ matrix.wasmedge_version }} --ggmlbn=${{ matrix.ggml_version }} | |
ls -al $HOME/.wasmedge/bin | |
- name: Install Hurl | |
run: | | |
brew install hurl | |
- name: Install Qdrant and download snapshot | |
run: | | |
# Download Qdrant | |
curl -LO https://github.com/qdrant/qdrant/releases/download/v1.11.4/qdrant-aarch64-apple-darwin.tar.gz | |
tar -xzvf qdrant-aarch64-apple-darwin.tar.gz | |
rm qdrant-aarch64-apple-darwin.tar.gz | |
# Download snapshot | |
curl -LO https://huggingface.co/datasets/gaianet/paris/resolve/main/paris_768_nomic-embed-text-v1.5-f16.snapshot | |
mv paris_768_nomic-embed-text-v1.5-f16.snapshot default.snapshot | |
ls -al | |
- name: Build rag-api-server on macos-14 | |
env: | |
WASI_SDK_PATH: /Users/runner/work/rag-api-server/rag-api-server/wasi-sdk-24.0 | |
CC: "/Users/runner/work/rag-api-server/rag-api-server/wasi-sdk-24.0/bin/clang --sysroot=/Users/runner/work/rag-api-server/rag-api-server/wasi-sdk-24.0/share/wasi-sysroot" | |
RUSTFLAGS: "--cfg wasmedge --cfg tokio_unstable" | |
run: | | |
cargo build -p rag-api-server --release | |
cp target/wasm32-wasip1/release/rag-api-server.wasm ./rag-api-server.wasm | |
- name: Download models | |
run: | | |
curl -LO https://huggingface.co/second-state/Qwen2-1.5B-Instruct-GGUF/resolve/main/Qwen2-1.5B-Instruct-Q3_K_M.gguf | |
curl -LO https://huggingface.co/second-state/Nomic-embed-text-v1.5-Embedding-GGUF/resolve/main/nomic-embed-text-v1.5-f16.gguf | |
- name: Start Qdrant | |
run: | | |
nohup ./qdrant > ./start-qdrant.log 2>&1 & | |
sleep 5 | |
cat start-qdrant.log | |
- name: Import the default.snapshot file to Qdrant | |
run: | | |
curl -s -X POST http://localhost:6333/collections/default/snapshots/upload?priority=snapshot -H 'Content-Type:multipart/form-data' -F '[email protected]' | |
- name: Start rag-api-server for testing chat completions | |
run: | | |
nohup $HOME/.wasmedge/bin/wasmedge --dir .:. --nn-preload default:GGML:AUTO:Qwen2-1.5B-Instruct-Q3_K_M.gguf --nn-preload embedding:GGML:AUTO:nomic-embed-text-v1.5-f16.gguf rag-api-server.wasm --model-name Qwen2-1.5B-Instruct,nomic-embed-text-v1.5 --ctx-size 4096,512 --batch-size 16,512 --prompt-template chatml,embedding --rag-policy last-user-message --socket-addr 0.0.0.0:8080 > ./start-llamaedge.log 2>&1 & | |
sleep 30 | |
cat start-llamaedge.log | |
# - name: Run test_chat.hurl | |
# run: | | |
# hurl --test --jobs 1 ./tests/test_chat.hurl | |
- name: Run test_embeddings.hurl | |
run: | | |
hurl --test --jobs 1 ./tests/test_embeddings.hurl | |
# - name: Run test_rag.hurl | |
# run: | | |
# hurl --test --jobs 1 ./tests/test_rag.hurl | |
- name: Stop rag-api-server for testing chat completions | |
run: | | |
pkill -f wasmedge | |
- name: Stop Qdrant | |
run: | | |
pkill -f qdrant | |