Skip to content

Commit

Permalink
remove HF token from build log (intel#253)
Browse files Browse the repository at this point in the history
* remove HF token from build log

Signed-off-by: Jiafu Zhang <[email protected]>

* remove HF token from build log

Signed-off-by: Jiafu Zhang <[email protected]>

* remove HF token from build log

Signed-off-by: Jiafu Zhang <[email protected]>

* remove HF token from build log

Signed-off-by: Jiafu Zhang <[email protected]>

* remove HF token from build log

Signed-off-by: Jiafu Zhang <[email protected]>

* remove HF token from build log

Signed-off-by: Jiafu Zhang <[email protected]>

* remove HF token from build log

Signed-off-by: Jiafu Zhang <[email protected]>

* remove HF token from build log

Signed-off-by: Jiafu Zhang <[email protected]>

* remove HF token from build log

Signed-off-by: Jiafu Zhang <[email protected]>

* remove HF token from build log

Signed-off-by: Jiafu Zhang <[email protected]>

* remove HF token from build log

Signed-off-by: Jiafu Zhang <[email protected]>

* remove HF token from build log

Signed-off-by: Jiafu Zhang <[email protected]>

* remove HF token from build log

Signed-off-by: Jiafu Zhang <[email protected]>

---------

Signed-off-by: Jiafu Zhang <[email protected]>
  • Loading branch information
jiafuzha authored Jun 18, 2024
1 parent cdce225 commit 320922f
Show file tree
Hide file tree
Showing 19 changed files with 52 additions and 58 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/config/llama-2-7b-chat-hf-vllm-fp32.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ ipex:
enabled: false
precision: bf16
model_description:
model_id_or_path: meta-llama/Llama-2-7b-chat-hf
tokenizer_name_or_path: meta-llama/Llama-2-7b-chat-hf
model_id_or_path: NousResearch/Llama-2-7b-chat-hf
tokenizer_name_or_path: NousResearch/Llama-2-7b-chat-hf
config:
use_auth_token: ''
13 changes: 5 additions & 8 deletions .github/workflows/workflow_finetune.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@ on:
default: '10.1.2.13:5000/llmray-build'
http_proxy:
type: string
default: 'http://10.24.221.169:911'
default: 'http://10.24.221.169:912'
https_proxy:
type: string
default: 'http://10.24.221.169:911'
default: 'http://10.24.221.169:912'
runner_config_path:
type: string
default: '/home/ci/llm-ray-actions-runner'
Expand All @@ -34,15 +34,15 @@ jobs:
name: finetune
strategy:
matrix:
model: [ EleutherAI/gpt-j-6b, meta-llama/Llama-2-7b-chat-hf, gpt2, bigscience/bloom-560m, facebook/opt-125m, mosaicml/mpt-7b, meta-llama/Llama-2-7b-hf, mistralai/Mistral-7B-v0.1, google/gemma-2b]
model: [ EleutherAI/gpt-j-6b, NousResearch/Llama-2-7b-chat-hf, gpt2, bigscience/bloom-560m, facebook/opt-125m, mosaicml/mpt-7b, NousResearch/Llama-2-7b-hf, mistralai/Mistral-7B-v0.1, google/gemma-2b]
isPR:
- ${{inputs.ci_type == 'pr'}}

exclude:
- { isPR: true }
include:
- { model: "EleutherAI/gpt-j-6b"}
- { model: "meta-llama/Llama-2-7b-chat-hf"}
- { model: "NousResearch/Llama-2-7b-chat-hf"}
- { model: "mistralai/Mistral-7B-v0.1"}
- { model: "google/gemma-2b"}

Expand All @@ -65,9 +65,6 @@ jobs:
- name: Checkout
uses: actions/checkout@v4

- name: Load environment variables
run: cat /root/actions-runner-config/.env >> $GITHUB_ENV

- name: Build Docker Image
run: |
DF_SUFFIX=".cpu_and_deepspeed"
Expand All @@ -83,7 +80,7 @@ jobs:
model_cache_path=${{ inputs.model_cache_path }}
USE_PROXY="1"
source dev/scripts/ci-functions.sh
start_docker ${TARGET} ${code_checkout_path} ${model_cache_path} ${USE_PROXY} ${{env.HF_ACCESS_TOKEN}}
start_docker ${TARGET} ${code_checkout_path} ${model_cache_path} ${USE_PROXY}
- name: Run Finetune Test
run: |
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/workflow_finetune_gpu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,17 @@ on:
default: '10.1.2.13:5000/llmray-build'
http_proxy:
type: string
default: 'http://10.24.221.169:911'
default: 'http://10.24.221.169:912'
https_proxy:
type: string
default: 'http://10.24.221.169:911'
default: 'http://10.24.221.169:912'

jobs:
finetune-gpu:
name: finetune-gpu
strategy:
matrix:
model: [ meta-llama/Llama-2-7b-chat-hf ]
model: [ NousResearch/Llama-2-7b-chat-hf ]
runs-on: self-hosted

defaults:
Expand Down
9 changes: 3 additions & 6 deletions .github/workflows/workflow_inference.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@ on:
default: '10.1.2.13:5000/llmray-build'
http_proxy:
type: string
default: 'http://10.24.221.169:911'
default: 'http://10.24.221.169:912'
https_proxy:
type: string
default: 'http://10.24.221.169:911'
default: 'http://10.24.221.169:912'
runner_config_path:
type: string
default: '/home/ci/llm-ray-actions-runner'
Expand Down Expand Up @@ -67,9 +67,6 @@ jobs:
- name: Checkout
uses: actions/checkout@v4

- name: Load environment variables
run: cat /root/actions-runner-config/.env >> $GITHUB_ENV

- name: Determine Target
id: "target"
run: |
Expand All @@ -94,7 +91,7 @@ jobs:
model_cache_path=${{ inputs.model_cache_path }}
USE_PROXY="1"
source dev/scripts/ci-functions.sh
start_docker ${TARGET} ${code_checkout_path} ${model_cache_path} ${USE_PROXY} ${{env.HF_ACCESS_TOKEN}}
start_docker ${TARGET} ${code_checkout_path} ${model_cache_path} ${USE_PROXY}
- name: Start Ray Cluster
run: |
Expand Down
6 changes: 0 additions & 6 deletions .github/workflows/workflow_inference_gaudi2.yml
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,6 @@ jobs:
- name: Checkout
uses: actions/checkout@v4

- name: Load environment variables
run: cat /root/actions-runner-config/.env >> $GITHUB_ENV

- name: Build Docker Image
run: |
DF_SUFFIX=".gaudi2"
Expand All @@ -98,7 +95,6 @@ jobs:
cid=$(docker ps -a -q --filter "name=${TARGET}")
if [[ ! -z "$cid" ]]; then docker rm $cid; fi
docker run -tid --name="${TARGET}" --hostname="${TARGET}-container" --runtime=habana -v /home/yizhong/Model-References:/root/Model-References -v ${{ inputs.code_checkout_path }}:/root/llm-on-ray -v ${{ inputs.model_cache_path }}:/root/.cache/huggingface/hub/ -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --cap-add sys_ptrace --net=host --ipc=host ${TARGET}:habana
- name: Start Ray Cluster
run: |
TARGET=${{steps.target.outputs.target}}
Expand All @@ -117,7 +113,6 @@ jobs:
conf_path = "llm_on_ray/inference/models/hpu/llama-2-7b-chat-hf-vllm-hpu.yaml"
with open(conf_path, encoding="utf-8") as reader:
result = yaml.load(reader, Loader=yaml.FullLoader)
result['model_description']["config"]["use_auth_token"] = "${{ env.HF_ACCESS_TOKEN }}"
with open(conf_path, 'w') as output:
yaml.dump(result, output, sort_keys=False)
EOF
Expand All @@ -128,7 +123,6 @@ jobs:
elif [[ ${{ matrix.model }} == "llama-2-70b-chat-hf" ]]; then
docker exec "${TARGET}" bash -c "llm_on_ray-serve --config_file llm_on_ray/inference/models/hpu/llama-2-70b-chat-hf-hpu.yaml --keep_serve_terminal"
elif [[ ${{ matrix.model }} == "llama-2-7b-chat-hf-vllm" ]]; then
docker exec "${TARGET}" bash -c "huggingface-cli login --token ${{ env.HF_ACCESS_TOKEN }}"
docker exec "${TARGET}" bash -c "llm_on_ray-serve --config_file llm_on_ray/inference/models/hpu/llama-2-7b-chat-hf-vllm-hpu.yaml --keep_serve_terminal"
fi
echo Streaming query:
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/workflow_test_benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@ on:
default: '10.1.2.13:5000/llmray-build'
http_proxy:
type: string
default: 'http://10.24.221.169:911'
default: 'http://10.24.221.169:912'
https_proxy:
type: string
default: 'http://10.24.221.169:911'
default: 'http://10.24.221.169:912'
runner_config_path:
type: string
default: '/home/ci/llm-ray-actions-runner'
Expand Down
12 changes: 3 additions & 9 deletions dev/scripts/ci-functions.sh
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#!/usr/bin/env bash
set -eo pipefail

HTTP_PROXY='http://10.24.221.169:911'
HTTPS_PROXY='http://10.24.221.169:911'
HTTP_PROXY='http://10.24.221.169:912'
HTTPS_PROXY='http://10.24.221.169:912'
MODEL_CACHE_PATH_LOACL='/root/.cache/huggingface/hub'
CODE_CHECKOUT_PATH_LOCAL='/root/llm-on-ray'

Expand Down Expand Up @@ -39,7 +39,6 @@ start_docker() {
local code_checkout_path=$2
local model_cache_path=$3
local USE_PROXY=$4
local HF_TOKEN=$5

cid=$(docker ps -q --filter "name=${TARGET}")
if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid; fi
Expand All @@ -66,12 +65,7 @@ start_docker() {
fi

echo "docker run -tid "${docker_args[@]}" "${TARGET}:latest""
docker run -tid "${docker_args[@]}" "${TARGET}:latest"
if [ -z "$HF_TOKEN" ]; then
echo "no hf token"
else
docker exec "${TARGET}" bash -c "huggingface-cli login --token ${HF_TOKEN}"
fi
docker run -tid "${docker_args[@]}" "${TARGET}:latest"
}

install_dependencies(){
Expand Down
4 changes: 2 additions & 2 deletions llm_on_ray/inference/models/hpu/llama-2-70b-chat-hf-hpu.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ deepspeed: true
workers_per_group: 8
device: hpu
model_description:
model_id_or_path: meta-llama/Llama-2-70b-chat-hf
tokenizer_name_or_path: meta-llama/Llama-2-70b-chat-hf
model_id_or_path: NousResearch/Llama-2-70b-chat-hf
tokenizer_name_or_path: NousResearch/Llama-2-70b-chat-hf
config:
use_auth_token: ''
4 changes: 2 additions & 2 deletions llm_on_ray/inference/models/hpu/llama-2-7b-chat-hf-hpu.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ cpus_per_worker: 8
hpus_per_worker: 1
device: hpu
model_description:
model_id_or_path: meta-llama/Llama-2-7b-chat-hf
tokenizer_name_or_path: meta-llama/Llama-2-7b-chat-hf
model_id_or_path: NousResearch/Llama-2-7b-chat-hf
tokenizer_name_or_path: NousResearch/Llama-2-7b-chat-hf
config:
use_auth_token: ''
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ ipex:
enabled: false
precision: bf16
model_description:
model_id_or_path: meta-llama/Llama-2-7b-chat-hf
tokenizer_name_or_path: meta-llama/Llama-2-7b-chat-hf
model_id_or_path: NousResearch/Llama-2-7b-chat-hf
tokenizer_name_or_path: NousResearch/Llama-2-7b-chat-hf
config:
use_auth_token: ''
4 changes: 2 additions & 2 deletions llm_on_ray/inference/models/hpu/llama-3-70b-chat-hf-hpu.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ deepspeed: true
workers_per_group: 8
device: hpu
model_description:
model_id_or_path: meta-llama/Meta-Llama-3-70b-Instruct
tokenizer_name_or_path: meta-llama/Meta-Llama-3-70b-Instruct
model_id_or_path: NousResearch/Meta-Llama-3-70B-Instruct
tokenizer_name_or_path: NousResearch/Meta-Llama-3-70B-Instruct
config:
use_auth_token: ''
4 changes: 2 additions & 2 deletions llm_on_ray/inference/models/hpu/llama-3-8b-instruct-hpu.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ cpus_per_worker: 8
hpus_per_worker: 1
device: hpu
model_description:
model_id_or_path: meta-llama/Meta-Llama-3-8b-Instruct
tokenizer_name_or_path: meta-llama/Meta-Llama-3-8b-Instruct
model_id_or_path: NousResearch/Meta-Llama-3-8B-Instruct
tokenizer_name_or_path: NousResearch/Meta-Llama-3-8B-Instruct
config:
use_auth_token: ''
4 changes: 2 additions & 2 deletions llm_on_ray/inference/models/llama-2-7b-chat-hf.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ ipex:
enabled: false
precision: bf16
model_description:
model_id_or_path: meta-llama/Llama-2-7b-chat-hf
tokenizer_name_or_path: meta-llama/Llama-2-7b-chat-hf
model_id_or_path: NousResearch/Llama-2-7b-chat-hf
tokenizer_name_or_path: NousResearch/Llama-2-7b-chat-hf
config:
use_auth_token: ''
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ ipex:
enabled: false
precision: bf16
model_description:
model_id_or_path: meta-llama/Llama-2-7b-chat-hf
tokenizer_name_or_path: meta-llama/Llama-2-7b-chat-hf
model_id_or_path: NousResearch/Llama-2-7b-chat-hf
tokenizer_name_or_path: NousResearch/Llama-2-7b-chat-hf
config:
use_auth_token: ''
4 changes: 2 additions & 2 deletions llm_on_ray/inference/models/vllm/llama-2-7b-chat-hf-vllm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ ipex:
enabled: false
precision: bf16
model_description:
model_id_or_path: meta-llama/Llama-2-7b-chat-hf
tokenizer_name_or_path: meta-llama/Llama-2-7b-chat-hf
model_id_or_path: NousResearch/Llama-2-7b-chat-hf
tokenizer_name_or_path: NousResearch/Llama-2-7b-chat-hf
config:
use_auth_token: ''
8 changes: 6 additions & 2 deletions llm_on_ray/inference/predictors/deepspeed_predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,15 @@ def __init__(self, infer_conf: InferenceConfig, pad_token_id, stopping_criteria)

model_desc = infer_conf.model_description
model_config = model_desc.config
if infer_conf.model_description.config.use_auth_token:
auth_token = infer_conf.model_description.config.use_auth_token
else:
auth_token = None
hf_config = AutoConfig.from_pretrained(
model_desc.model_id_or_path,
torchscript=True,
trust_remote_code=model_config.trust_remote_code,
use_auth_token=infer_conf.model_description.config.use_auth_token,
use_auth_token=auth_token,
)

# decide correct torch type for loading HF model
Expand All @@ -75,7 +79,7 @@ def __init__(self, infer_conf: InferenceConfig, pad_token_id, stopping_criteria)
self.model = PeftModel.from_pretrained(
self.model,
model_desc.peft_model_id_or_path,
use_auth_token=infer_conf.model_description.config.use_auth_token,
use_auth_token=auth_token,
)

self.model = self.model.merge_and_unload()
Expand Down
6 changes: 5 additions & 1 deletion llm_on_ray/inference/predictors/hpu_predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,11 +314,15 @@ def load_model(self):
model = AutoModelForCausalLM.from_config(config, torch_dtype=model_dtype)

checkpoints_json = tempfile.NamedTemporaryFile(suffix=".json", mode="+w")
if model_desc.config.use_auth_token:
auth_token = model_desc.config.use_auth_token
else:
auth_token = None
write_checkpoints_json(
model_desc.model_id_or_path,
self.local_rank,
checkpoints_json,
token=model_desc.config.use_auth_token,
token=auth_token,
)
else:
with deepspeed.OnDevice(dtype=model_dtype, device="cpu"):
Expand Down
8 changes: 6 additions & 2 deletions llm_on_ray/inference/predictors/transformer_predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,15 @@ def __init__(self, infer_conf: InferenceConfig):
super().__init__(infer_conf)
model_desc = infer_conf.model_description
model_config = model_desc.config
if infer_conf.model_description.config.use_auth_token:
auth_token = infer_conf.model_description.config.use_auth_token
else:
auth_token = None
hf_config = AutoConfig.from_pretrained(
model_desc.model_id_or_path,
torchscript=True,
trust_remote_code=model_config.trust_remote_code,
use_auth_token=infer_conf.model_description.config.use_auth_token,
use_auth_token=auth_token,
)

# decide correct torch type for loading HF model
Expand Down Expand Up @@ -74,7 +78,7 @@ def __init__(self, infer_conf: InferenceConfig):
model = PeftModel.from_pretrained(
model,
model_desc.peft_model_id_or_path,
use_auth_token=infer_conf.model_description.config.use_auth_token,
use_auth_token=auth_token,
)

model = model.merge_and_unload()
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ classifiers = [
dependencies = [
"accelerate",
"datasets>=2.14.6",
"numpy",
"numpy<2.0.0",
"ray>=2.10",
"ray[serve,tune]>=2.10",
"typing>=3.7.4.3",
Expand Down

0 comments on commit 320922f

Please sign in to comment.