Resources aware load balance method #6

Workflow file for this run

.github/workflows/pr-test.yml at 28cbe68

	name: PR Test

	on:
	push:
	branches: [ main ]
	paths:
	- "python/sglang/**"
	- "test/**"
	pull_request:
	branches: [ main ]
	paths:
	- "python/sglang/**"
	- "test/**"
	workflow_dispatch:

	concurrency:
	group: pr-test-${{ github.ref }}
	cancel-in-progress: true

	jobs:
	unit-test-frontend:
	if: github.repository == 'sgl-project/sglang' \|\| github.event_name == 'pull_request'
	runs-on: 1-gpu-runner
	steps:
	- name: Checkout code
	uses: actions/checkout@v3

	- name: Install dependencies
	run: \|
	pip install --upgrade pip
	pip install -e "python[dev]"
	pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall

	- name: Run test
	timeout-minutes: 20
	run: \|
	cd test/lang
	python3 run_suite.py --suite minimal

	unit-test-backend-part-0:
	if: github.repository == 'sgl-project/sglang' \|\| github.event_name == 'pull_request'
	runs-on: 1-gpu-runner
	steps:
	- name: Checkout code
	uses: actions/checkout@v3

	- name: Install dependencies
	run: \|
	pip install --upgrade pip
	pip install -e "python[dev]"
	pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall

	- name: Run test
	timeout-minutes: 20
	run: \|
	cd test/srt
	python3 run_suite.py --suite minimal --range-begin 0 --range-end 8

	unit-test-backend-part-1:
	if: github.repository == 'sgl-project/sglang' \|\| github.event_name == 'pull_request'
	runs-on: 1-gpu-runner
	steps:
	- name: Checkout code
	uses: actions/checkout@v3

	- name: Install dependencies
	run: \|
	pip install --upgrade pip
	pip install -e "python[dev]"
	pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall

	- name: Run test
	timeout-minutes: 20
	run: \|
	cd test/srt
	python3 run_suite.py --suite minimal --range-begin 8

	performance-test-1-gpu:
	if: github.repository == 'sgl-project/sglang' \|\| github.event_name == 'pull_request'
	runs-on: 1-gpu-runner
	steps:
	- name: Checkout code
	uses: actions/checkout@v3

	- name: Install dependencies
	run: \|
	pip install --upgrade pip
	pip install -e "python[all]"
	pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall

	- name: Benchmark Serving Throughput
	timeout-minutes: 10
	run: \|
	cd test/srt
	python3 -m unittest test_serving_throughput.TestServingThroughput.test_default

	- name: Benchmark Serving Latency
	timeout-minutes: 10
	run: \|
	cd test/srt
	python3 -m unittest test_serving_latency.TestServingLatency.test_default

	- name: Benchmark Serving Throughput (w/o RadixAttention)
	timeout-minutes: 10
	run: \|
	cd test/srt
	python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_without_radix_cache

	- name: Benchmark Serving Throughput (w/o ChunkedPrefill)
	timeout-minutes: 10
	run: \|
	cd test/srt
	python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_without_chunked_prefill

	performance-test-2-gpu:
	if: github.repository == 'sgl-project/sglang' \|\| github.event_name == 'pull_request'
	runs-on: 2-gpu-runner
	steps:
	- name: Checkout code
	uses: actions/checkout@v3

	- name: Install dependencies
	run: \|
	pip install --upgrade pip
	pip install -e "python[all]"
	pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall

	- name: Benchmark Serving Throughput (TP=2)
	timeout-minutes: 10
	run: \|
	cd test/srt
	python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default

	- name: Benchmark Serving Latency (TP=2)
	timeout-minutes: 10
	run: \|
	cd test/srt
	python3 -m unittest test_moe_serving_latency.TestServingLatency.test_default

	- name: Benchmark Serving Throughput (w/o RadixAttention) (TP=2)
	timeout-minutes: 10
	run: \|
	cd test/srt
	python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default_without_radix_cache

	accuracy-test-1-gpu:
	if: github.repository == 'sgl-project/sglang' \|\| github.event_name == 'pull_request'
	runs-on: 1-gpu-runner
	steps:
	- name: Checkout code
	uses: actions/checkout@v3

	- name: Install dependencies
	run: \|
	pip install --upgrade pip
	pip install -e "python[all]"
	pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall

	git clone https://github.com/merrymercy/human-eval.git
	cd human-eval
	pip install -e .

	- name: Evaluate Accuracy
	timeout-minutes: 20
	run: \|
	cd test/srt
	python3 test_eval_accuracy_large.py

	accuracy-test-2-gpu:
	if: github.repository == 'sgl-project/sglang' \|\| github.event_name == 'pull_request'
	runs-on: 2-gpu-runner
	steps:
	- name: Checkout code
	uses: actions/checkout@v3

	- name: Install dependencies
	run: \|
	pip install --upgrade pip
	pip install -e "python[all]"
	pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall

	git clone https://github.com/merrymercy/human-eval.git
	cd human-eval
	pip install -e .

	- name: Evaluate Accuracy
	timeout-minutes: 20
	run: \|
	cd test/srt
	python3 test_moe_eval_accuracy_large.py

	finish:
	needs: [
	unit-test-frontend, unit-test-backend-part-0, unit-test-backend-part-1,
	performance-test-1-gpu, performance-test-2-gpu,
	accuracy-test-1-gpu, accuracy-test-2-gpu
	]
	runs-on: ubuntu-latest
	steps:
	- name: Finish
	run: echo "This is an empty step to ensure that all jobs are completed."

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Resources aware load balance method #6

Workflow file

Resources aware load balance method #6

Jobs

Run details

Workflow file for this run