microsoft · shobrienDMA · Nov 12, 2024 · Nov 12, 2024 · Nov 12, 2024 · Nov 12, 2024
diff --git a/README.md b/README.md
@@ -15,7 +15,7 @@ See documentation at https://onnxruntime.ai/docs/genai.
 
 | Support matrix | Supported now | Under development | On the roadmap |
 | -------------- | ------------- | ----------------- | -------------- |
-| Model architectures | Gemma <br/> Llama * <br/> Mistral + <br/> Phi (language + vision) <br/> Qwen <br/> Nemotron <br/> Granite <br/> | Whisper | Stable diffusion |
+| Model architectures | Gemma <br/> Llama * <br/> Mistral + <br/> Phi (language + vision) <br/> Qwen <br/> Nemotron <br/> Granite <br/> AMD OLMo <br/> | Whisper | Stable diffusion |
 | API | Python <br/> C# <br/> C/C++ <br/> Java ^ | Objective-C | |
 | Platform | Linux <br/> Windows <br/> Mac ^ <br/> Android ^ | | iOS |
 | Architecture | x86 <br/> x64 <br/> Arm64 ~ | | |

diff --git a/cmake/deps.txt b/cmake/deps.txt
@@ -14,4 +14,4 @@ pybind11;https://github.com/pybind/pybind11/archive/refs/tags/v2.10.1.zip;769b6a
 googletest;https://github.com/google/googletest/archive/530d5c8c84abd2a46f38583ee817743c9b3a42b4.zip;5e3a61db2aa975cfd0f97ba92c818744e7fa7034
 microsoft_wil;https://github.com/microsoft/wil/archive/refs/tags/v1.0.230629.1.zip;e4a542a323c070376f7c2d1973d0f7ddbc1d2fa5
 directx_headers;https://github.com/microsoft/DirectX-Headers/archive/refs/tags/v1.613.1.zip;47653509a3371eabb156360f42faf582f314bf2e
-onnxruntime_extensions;https://github.com/microsoft/onnxruntime-extensions.git;77a1812f55dbfed1fc4d9d219bdc4951ef7a6db2
+onnxruntime_extensions;https://github.com/microsoft/onnxruntime-extensions.git;4e10ee046a2f035351f3fe88740bd8215a18fdb9
diff --git a/src/models/model.cpp b/src/models/model.cpp
@@ -590,7 +590,7 @@ std::shared_ptr<Model> CreateModel(OrtEnv& ort_env, const char* config_path, con
 }
 
 std::shared_ptr<Model> CreateModel(OrtEnv& ort_env, std::unique_ptr<Config> config) {
-  std::set<std::string> llm_types = {"chatglm", "decoder", "gemma", "gemma2", "granite", "llama", "mistral", "nemotron", "phi", "phimoe", "phi3", "phi3small", "qwen2"};
+  std::set<std::string> llm_types = {"chatglm", "decoder", "gemma", "gemma2", "granite", "llama", "mistral", "nemotron", "phi", "phimoe", "phi3", "phi3small", "qwen2", "olmo"};
-  std::set<std::string> llm_types = {"chatglm", "decoder", "gemma", "gemma2", "granite", "llama", "mistral", "nemotron", "phi", "phimoe", "phi3", "phi3small", "qwen2", "olmo"};
+  std::set<std::string> llm_types = {"chatglm", "decoder", "gemma", "gemma2", "granite", "llama", "mistral", "nemotron", "olmo", "phi", "phimoe", "phi3", "phi3small", "qwen2"};
-  std::set<std::string> llm_types = {"chatglm", "decoder", "gemma", "gemma2", "granite", "llama", "mistral", "nemotron", "phi", "phimoe", "phi3", "phi3small", "qwen2", "olmo"};
+  std::set<std::string> llm_types = {"chatglm", "decoder", "gemma", "gemma2", "granite", "llama", "mistral", "nemotron", "olmo", "phi", "phimoe", "phi3", "phi3small", "qwen2"};
   if (config->model.type == "gpt2")
     return std::make_shared<Gpt_Model>(std::move(config), ort_env);
   if (llm_types.find(config->model.type) != llm_types.end())

diff --git a/src/python/py/models/README.md b/src/python/py/models/README.md
@@ -39,6 +39,7 @@ The tool currently supports the following model architectures.
 - Nemotron
 - Phi
 - Qwen
+- AMD OLMo
 
 It is intended for supporting the latest, popular state-of-the-art models.
 

diff --git a/src/python/py/models/builder.py b/src/python/py/models/builder.py
@@ -330,7 +330,7 @@ def make_genai_config(self, model_name_or_path, extra_kwargs, out_dir):
 
         genai_config = {
             "model": {
-                "bos_token_id": config.bos_token_id if hasattr(config, "bos_token_id") else 1,  # config.bos_token_id not present in ChatGLM model configs.
+                "bos_token_id": config.bos_token_id if hasattr(config, "bos_token_id") and config.bos_token_id != None else 1,  # config.bos_token_id not present in ChatGLM model configs.
                 "context_length": self.context_length,
                 "decoder": {
                     "session_options" : {
@@ -3085,6 +3085,14 @@ def make_layer(self, layer_id, layer):
         layer.self_attn = layer.self_attn if hasattr(layer, 'self_attn') else layer.self_attention
         super().make_layer(layer_id, layer)
 
+class OLMoModel(Model):
+    def __init__(self, config, io_dtype, onnx_dtype, ep, cache_dir, extra_options):
+        super().__init__(config, io_dtype, onnx_dtype, ep, cache_dir, extra_options)
+
+    def make_layernorm(self, layer_id, layernorm, skip, simple, location):
+        layernorm.weight = torch.ones(self.hidden_size)
+        layernorm.bias = torch.zeros(self.hidden_size)
+        super().make_layernorm(layer_id, layernorm, skip, simple, location)
 
 class GraniteModel(MistralModel):
     def __init__(self, config, io_dtype, onnx_dtype, ep, cache_dir, extra_options):
@@ -3244,6 +3252,14 @@ def create_model(model_name, input_path, output_dir, precision, execution_provid
             onnx_model = Phi3VModel(config, io_dtype, precision, execution_provider, cache_dir, extra_options)
         elif config.architectures[0] == "Qwen2ForCausalLM":
             onnx_model = QwenModel(config, io_dtype, precision, execution_provider, cache_dir, extra_options)
+        elif config.architectures[0] == "NemotronForCausalLM":
+            onnx_model = NemotronModel(config, io_dtype, precision, execution_provider, cache_dir, extra_options)
+        elif config.architectures[0] == "ChatGLMForConditionalGeneration" or config.architectures[0] == "ChatGLMModel":
+            # Quantized ChatGLM model has ChatGLMForConditionalGeneration as architecture whereas HF model as the latter
+            config.hidden_act = "swiglu"
+            onnx_model = ChatGLMModel(config, io_dtype, precision, execution_provider, cache_dir, extra_options)
+        elif config.architectures[0] == "OlmoForCausalLM":
+            onnx_model = OLMoModel(config, io_dtype, precision, execution_provider, cache_dir, extra_options)
         else:
             raise NotImplementedError(f"The {hf_name} model is not currently supported.")
 

diff --git a/test/python/_test_utils.py b/test/python/_test_utils.py
@@ -58,6 +58,8 @@ def get_model_paths():
         "phi-3.5": "microsoft/Phi-3.5-mini-instruct",
         # "llama-3.2": "meta-llama/Llama-3.2-1B-instruct",
         "granite-3.0": "ibm-granite/granite-3.0-2b-instruct",
+        "olmo": "amd/AMD-OLMo-1B-SFT-DPO",
+        "qwen": "Qwen/Qwen2.5-0.5B",
     }
 
     ci_data_path = os.path.join("/", "data", "ortgenai", "pytorch")

diff --git a/test/python/requirements.txt b/test/python/requirements.txt
@@ -6,5 +6,5 @@ protobuf==5.27
 sympy
 pytest
 onnx
-transformers
+transformers==4.44.2
 huggingface_hub[cli]
-Original file line number
+Diff line change
@@ Expand Up @@
     - Nemotron
     - Phi
     - Qwen
+    - AMD OLMo
     It is intended for supporting the latest, popular state-of-the-art models.
@@ Expand Down @@