From 01bbab687fa4d33f15af94815fb56232a17a701c Mon Sep 17 00:00:00 2001 From: kunal-vaishnavi <115581922+kunal-vaishnavi@users.noreply.github.com> Date: Thu, 2 Jan 2025 14:09:38 -0800 Subject: [PATCH 1/3] Update phi3-qa.py --- examples/python/phi3-qa.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/python/phi3-qa.py b/examples/python/phi3-qa.py index 4e3439faf..6d4abfd96 100644 --- a/examples/python/phi3-qa.py +++ b/examples/python/phi3-qa.py @@ -8,7 +8,7 @@ def main(args): started_timestamp = 0 first_token_timestamp = 0 - config = og.Config(args.model) + config = og.Config(args.model_path) config.clear_providers() if args.execution_provider != "cpu": if args.verbose: print(f"Setting model to {args.execution_provider}") From 2367e3105271abe2e73850d616e08dbde071c038 Mon Sep 17 00:00:00 2001 From: kunal-vaishnavi <115581922+kunal-vaishnavi@users.noreply.github.com> Date: Sat, 4 Jan 2025 22:07:13 -0800 Subject: [PATCH 2/3] Update phi-3-tutorial.md --- examples/python/phi-3-tutorial.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/python/phi-3-tutorial.md b/examples/python/phi-3-tutorial.md index 16b0d1fd2..769a0209e 100644 --- a/examples/python/phi-3-tutorial.md +++ b/examples/python/phi-3-tutorial.md @@ -70,7 +70,7 @@ Are you on a Windows machine with GPU? ```bash curl https://raw.githubusercontent.com/microsoft/onnxruntime-genai/main/examples/python/phi3-qa.py -o phi3-qa.py - python phi3-qa.py -m directml\directml-int4-awq-block-128 + python phi3-qa.py -m directml\directml-int4-awq-block-128 -e dml ``` Once the script has loaded the model, it will ask you for input in a loop, streaming the output as it is produced the model. For example: @@ -109,7 +109,7 @@ Are you on a Windows machine with GPU? ```bash curl https://raw.githubusercontent.com/microsoft/onnxruntime-genai/main/examples/python/phi3-qa.py -o phi3-qa.py - python phi3-qa.py -m cuda/cuda-int4-rtn-block-32 + python phi3-qa.py -m cuda/cuda-int4-rtn-block-32 -e cuda ``` Once the script has loaded the model, it will ask you for input in a loop, streaming the output as it is produced the model. For example: @@ -142,7 +142,7 @@ Are you on a Windows machine with GPU? ```bash curl https://raw.githubusercontent.com/microsoft/onnxruntime-genai/main/examples/python/phi3-qa.py -o phi3-qa.py - python phi3-qa.py -m cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4 + python phi3-qa.py -m cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4 -p cpu ``` Once the script has loaded the model, it will ask you for input in a loop, streaming the output as it is produced the model. For example: From f4ad7da336db8cd2209fc70b9d89e103e05fc0f6 Mon Sep 17 00:00:00 2001 From: kunal-vaishnavi <115581922+kunal-vaishnavi@users.noreply.github.com> Date: Sat, 4 Jan 2025 22:08:09 -0800 Subject: [PATCH 3/3] Update phi-3-tutorial.md --- examples/python/phi-3-tutorial.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/python/phi-3-tutorial.md b/examples/python/phi-3-tutorial.md index 769a0209e..5b32f68d8 100644 --- a/examples/python/phi-3-tutorial.md +++ b/examples/python/phi-3-tutorial.md @@ -142,7 +142,7 @@ Are you on a Windows machine with GPU? ```bash curl https://raw.githubusercontent.com/microsoft/onnxruntime-genai/main/examples/python/phi3-qa.py -o phi3-qa.py - python phi3-qa.py -m cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4 -p cpu + python phi3-qa.py -m cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4 -e cpu ``` Once the script has loaded the model, it will ask you for input in a loop, streaming the output as it is produced the model. For example: