Skip to content

Commit

Permalink
Add CPU oga support and update hyperparameters
Browse files Browse the repository at this point in the history
  • Loading branch information
ramkrishna2910 committed Oct 11, 2024
1 parent dbbea14 commit 394cf9d
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 10 deletions.
7 changes: 6 additions & 1 deletion src/turnkeyml/llm/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,13 @@ You can also try Phi-3-Mini-128k-Instruct with the following commands:

`lemonade -i microsoft/Phi-3-mini-128k-instruct oga-load --device igpu --dtype int4 llm-prompt -p "Hello, my thoughts are"`

You can also try out the CPU with:

> Note: no other models or devices are officially supported by `lemonade` on OGA at this time. Contributions appreciated!
`huggingface-cli download microsoft/Phi-3-mini-4k-instruct-onnx --include cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/* --local-dir REPO_ROOT/src/turnkeyml/llm/tools/ort_genai/models/phi-3-mini-128k-instruct`

`lemonade -i microsoft/Phi-3-mini-128k-instruct oga-load --device cpu --dtype int4 llm-prompt -p "Hello, my thoughts are"`

> Note: no other models or devices are officially supported by `lemonade` on OGA at this time. Contributions appreciated! It only takes a few minutes to add a new model, we just need to add a path to the downloaded model folder to the supported models dictionary in [oga.py](https://github.com/onnx/turnkeyml/blob/v4.0.2/src/turnkeyml/llm/tools/ort_genai/oga.py).
## Install RyzenAI NPU

Expand Down
56 changes: 47 additions & 9 deletions src/turnkeyml/llm/tools/ort_genai/oga.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import argparse
import os
import time
import json
from queue import Queue
import onnxruntime_genai as og
from turnkeyml.state import State
Expand Down Expand Up @@ -70,6 +71,14 @@ def __init__(self, input_folder):
super().__init__()
self.model = og.Model(input_folder)
self.type = "ort-genai"
self.config = self.load_config(input_folder)

def load_config(self, input_folder):
config_path = os.path.join(input_folder, 'genai_config.json')
if os.path.exists(config_path):
with open(config_path, 'r', encoding='utf-8') as f:
return json.load(f)
return None

def generate(
self,
Expand All @@ -90,14 +99,34 @@ def generate(
max_length = len(input_ids) + max_new_tokens

params.input_ids = input_ids
params.set_search_options(
do_sample=do_sample,
top_k=top_k,
top_p=top_p,
temperature=temperature,
max_length=max_length,
min_length=max_length,
)
if self.config and 'search' in self.config:
search_config = self.config['search']
params.set_search_options(
do_sample=search_config.get('do_sample', do_sample),
top_k=search_config.get('top_k', top_k),
top_p=search_config.get('top_p', top_p),
temperature=search_config.get('temperature', temperature),
max_length=max_length,
min_length=0,
early_stopping=search_config.get('early_stopping', False),
length_penalty=search_config.get('length_penalty', 1.0),
num_beams=search_config.get('num_beams', 1),
num_return_sequences=search_config.get('num_return_sequences', 1),
repetition_penalty=search_config.get('repetition_penalty', 1.0),
past_present_share_buffer=search_config.get('past_present_share_buffer', True),
# Not currently supported by OGA
# diversity_penalty=search_config.get('diversity_penalty', 0.0),
# no_repeat_ngram_size=search_config.get('no_repeat_ngram_size', 0),
)
else:
params.set_search_options(
do_sample=do_sample,
top_k=top_k,
top_p=top_p,
temperature=temperature,
max_length=max_length,
min_length=max_length,
)
params.try_graph_capture_with_max_batch_size(1)

generator = og.Generator(self.model, params)
Expand Down Expand Up @@ -190,7 +219,7 @@ def parser(add_help: bool = True) -> argparse.ArgumentParser:
parser.add_argument(
"-d",
"--device",
choices=["igpu", "npu"],
choices=["igpu", "npu", "cpu"],
default="igpu",
help="Which device to load the model on to (default: igpu)",
)
Expand Down Expand Up @@ -237,6 +266,15 @@ def run(
qwen_1dot5: "qwen1.5-7b-int4",
}
},
"cpu": {
"int4": {
phi_3_mini_4k: os.path.join(
"phi-3-mini-4k-instruct",
"cpu_and_mobile",
"cpu-int4-rtn-block-32-acc-level-4",
),
}
},
}

try:
Expand Down

0 comments on commit 394cf9d

Please sign in to comment.