Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added OLMo support to builder.py #1061

Open
wants to merge 14 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ See documentation at https://onnxruntime.ai/docs/genai.

| Support matrix | Supported now | Under development | On the roadmap |
| -------------- | ------------- | ----------------- | -------------- |
| Model architectures | Gemma <br/> Llama * <br/> Mistral + <br/> Phi (language + vision) <br/> Qwen <br/> Nemotron <br/> Granite <br/> | Whisper | Stable diffusion |
| Model architectures | Gemma <br/> Llama * <br/> Mistral + <br/> Phi (language + vision) <br/> Qwen <br/> Nemotron <br/> Granite <br/> AMD OLMo <br/> | Whisper | Stable diffusion |
| API | Python <br/> C# <br/> C/C++ <br/> Java ^ | Objective-C | |
| Platform | Linux <br/> Windows <br/> Mac ^ <br/> Android ^ | | iOS |
| Architecture | x86 <br/> x64 <br/> Arm64 ~ | | |
Expand Down
2 changes: 1 addition & 1 deletion cmake/deps.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,4 @@ pybind11;https://github.com/pybind/pybind11/archive/refs/tags/v2.10.1.zip;769b6a
googletest;https://github.com/google/googletest/archive/530d5c8c84abd2a46f38583ee817743c9b3a42b4.zip;5e3a61db2aa975cfd0f97ba92c818744e7fa7034
microsoft_wil;https://github.com/microsoft/wil/archive/refs/tags/v1.0.230629.1.zip;e4a542a323c070376f7c2d1973d0f7ddbc1d2fa5
directx_headers;https://github.com/microsoft/DirectX-Headers/archive/refs/tags/v1.613.1.zip;47653509a3371eabb156360f42faf582f314bf2e
onnxruntime_extensions;https://github.com/microsoft/onnxruntime-extensions.git;77a1812f55dbfed1fc4d9d219bdc4951ef7a6db2
onnxruntime_extensions;https://github.com/microsoft/onnxruntime-extensions.git;4e10ee046a2f035351f3fe88740bd8215a18fdb9
2 changes: 1 addition & 1 deletion src/models/model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -590,7 +590,7 @@ std::shared_ptr<Model> CreateModel(OrtEnv& ort_env, const char* config_path, con
}

std::shared_ptr<Model> CreateModel(OrtEnv& ort_env, std::unique_ptr<Config> config) {
std::set<std::string> llm_types = {"chatglm", "decoder", "gemma", "gemma2", "granite", "llama", "mistral", "nemotron", "phi", "phimoe", "phi3", "phi3small", "qwen2"};
std::set<std::string> llm_types = {"chatglm", "decoder", "gemma", "gemma2", "granite", "llama", "mistral", "nemotron", "phi", "phimoe", "phi3", "phi3small", "qwen2", "olmo"};
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
std::set<std::string> llm_types = {"chatglm", "decoder", "gemma", "gemma2", "granite", "llama", "mistral", "nemotron", "phi", "phimoe", "phi3", "phi3small", "qwen2", "olmo"};
std::set<std::string> llm_types = {"chatglm", "decoder", "gemma", "gemma2", "granite", "llama", "mistral", "nemotron", "olmo", "phi", "phimoe", "phi3", "phi3small", "qwen2"};

if (config->model.type == "gpt2")
return std::make_shared<Gpt_Model>(std::move(config), ort_env);
if (llm_types.find(config->model.type) != llm_types.end())
Expand Down
1 change: 1 addition & 0 deletions src/python/py/models/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ The tool currently supports the following model architectures.
- Nemotron
- Phi
- Qwen
- AMD OLMo

It is intended for supporting the latest, popular state-of-the-art models.

Expand Down
18 changes: 17 additions & 1 deletion src/python/py/models/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,7 @@ def make_genai_config(self, model_name_or_path, extra_kwargs, out_dir):

genai_config = {
"model": {
"bos_token_id": config.bos_token_id if hasattr(config, "bos_token_id") else 1, # config.bos_token_id not present in ChatGLM model configs.
"bos_token_id": config.bos_token_id if hasattr(config, "bos_token_id") and config.bos_token_id != None else 1, # config.bos_token_id not present in ChatGLM model configs.
"context_length": self.context_length,
"decoder": {
"session_options" : {
Expand Down Expand Up @@ -3085,6 +3085,14 @@ def make_layer(self, layer_id, layer):
layer.self_attn = layer.self_attn if hasattr(layer, 'self_attn') else layer.self_attention
super().make_layer(layer_id, layer)

class OLMoModel(Model):
def __init__(self, config, io_dtype, onnx_dtype, ep, cache_dir, extra_options):
super().__init__(config, io_dtype, onnx_dtype, ep, cache_dir, extra_options)

def make_layernorm(self, layer_id, layernorm, skip, simple, location):
layernorm.weight = torch.ones(self.hidden_size)
layernorm.bias = torch.zeros(self.hidden_size)
super().make_layernorm(layer_id, layernorm, skip, simple, location)

class GraniteModel(MistralModel):
def __init__(self, config, io_dtype, onnx_dtype, ep, cache_dir, extra_options):
Expand Down Expand Up @@ -3244,6 +3252,14 @@ def create_model(model_name, input_path, output_dir, precision, execution_provid
onnx_model = Phi3VModel(config, io_dtype, precision, execution_provider, cache_dir, extra_options)
elif config.architectures[0] == "Qwen2ForCausalLM":
onnx_model = QwenModel(config, io_dtype, precision, execution_provider, cache_dir, extra_options)
elif config.architectures[0] == "NemotronForCausalLM":
onnx_model = NemotronModel(config, io_dtype, precision, execution_provider, cache_dir, extra_options)
elif config.architectures[0] == "ChatGLMForConditionalGeneration" or config.architectures[0] == "ChatGLMModel":
# Quantized ChatGLM model has ChatGLMForConditionalGeneration as architecture whereas HF model as the latter
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This appears to be a merge conflict issue. Nemotron and ChatGLM are checked earlier so they don't need to be checked again here. Can you also insert OLMo into the checks so that the alphabetical order is still maintained?

config.hidden_act = "swiglu"
onnx_model = ChatGLMModel(config, io_dtype, precision, execution_provider, cache_dir, extra_options)
elif config.architectures[0] == "OlmoForCausalLM":
onnx_model = OLMoModel(config, io_dtype, precision, execution_provider, cache_dir, extra_options)
else:
raise NotImplementedError(f"The {hf_name} model is not currently supported.")

Expand Down
2 changes: 2 additions & 0 deletions test/python/_test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ def get_model_paths():
"phi-3.5": "microsoft/Phi-3.5-mini-instruct",
# "llama-3.2": "meta-llama/Llama-3.2-1B-instruct",
"granite-3.0": "ibm-granite/granite-3.0-2b-instruct",
"olmo": "amd/AMD-OLMo-1B-SFT-DPO",
"qwen": "Qwen/Qwen2.5-0.5B",
}

ci_data_path = os.path.join("/", "data", "ortgenai", "pytorch")
Expand Down
2 changes: 1 addition & 1 deletion test/python/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,5 @@ protobuf==5.27
sympy
pytest
onnx
transformers
transformers==4.44.2
huggingface_hub[cli]
Loading