Add CPU support to OGA tooling

onnx · Sep 4, 2024 · 8ed2e6b · 8ed2e6b
1 parent 642af9e
commit 8ed2e6b
Show file tree

Hide file tree

Showing 2 changed files with 16 additions and 2 deletions.
diff --git a/src/turnkeyml/llm/README.md b/src/turnkeyml/llm/README.md
@@ -99,8 +99,13 @@ You can also try Phi-3-Mini-128k-Instruct with the following commands:
 
 `lemonade -i microsoft/Phi-3-mini-128k-instruct oga-load --device igpu --dtype int4 llm-prompt -p "Hello, my thoughts are"`
 
+You can also try out the CPU with:
 
-> Note: no other models or devices are officially supported by `lemonade` on OGA at this time. Contributions appreciated!
+`huggingface-cli download microsoft/Phi-3-mini-4k-instruct-onnx --include cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/* --local-dir REPO_ROOT/src/turnkeyml/llm/tools/ort_genai/models/phi-3-mini-128k-instruct`
+
+`lemonade -i microsoft/Phi-3-mini-128k-instruct oga-load --device cpu --dtype int4 llm-prompt -p "Hello, my thoughts are"`
+
+> Note: no other models or devices are officially supported by `lemonade` on OGA at this time. Contributions appreciated! It only takes a few minutes to add a new model, we just need to add a path to the downloaded model folder to the supported models dictionary in [oga.py](https://github.com/onnx/turnkeyml/blob/v4.0.2/src/turnkeyml/llm/tools/ort_genai/oga.py).
 
 ## Install RyzenAI NPU
 

diff --git a/src/turnkeyml/llm/tools/ort_genai/oga.py b/src/turnkeyml/llm/tools/ort_genai/oga.py
@@ -190,7 +190,7 @@ def parser(add_help: bool = True) -> argparse.ArgumentParser:
         parser.add_argument(
             "-d",
             "--device",
-            choices=["igpu", "npu"],
+            choices=["igpu", "npu", "cpu"],
             default="igpu",
             help="Which device to load the model on to (default: igpu)",
         )
@@ -237,6 +237,15 @@ def run(
                     qwen_1dot5: "qwen1.5-7b-int4",
                 }
             },
+            "cpu": {
+                "int4": {
+                    phi_3_mini_4k: os.path.join(
+                        "phi-3-mini-4k-instruct",
+                        "cpu_and_mobile",
+                        "cpu-int4-rtn-block-32-acc-level-4",
+                    ),
+                }
+            },
         }
 
         try: