From 65db012efabe3dec8170608b87700699a80132ee Mon Sep 17 00:00:00 2001 From: Nikolay Date: Tue, 6 Aug 2024 17:54:35 +0200 Subject: [PATCH] New metrics for weight compression with dynamic quantization --- examples/llm_compression/openvino/tiny_llama/main.py | 2 +- .../openvino/tiny_llama_find_hyperparams/main.py | 1 - tests/post_training/pipelines/lm_weight_compression.py | 1 - 3 files changed, 1 insertion(+), 3 deletions(-) diff --git a/examples/llm_compression/openvino/tiny_llama/main.py b/examples/llm_compression/openvino/tiny_llama/main.py index dd03a4361c6..f2be54ce1aa 100644 --- a/examples/llm_compression/openvino/tiny_llama/main.py +++ b/examples/llm_compression/openvino/tiny_llama/main.py @@ -67,7 +67,7 @@ def transform_fn(data, model, tokenizer): ) model.save_pretrained(OUTPUT_DIR) - model = OVModelForCausalLM.from_pretrained(OUTPUT_DIR, ov_config={"DYNAMIC_QUANTIZATION_GROUP_SIZE": "0"}) + model = OVModelForCausalLM.from_pretrained(OUTPUT_DIR) input_ids = tokenizer("What is PyTorch?", return_tensors="pt").to(device=model.device) start_t = time.time() diff --git a/examples/llm_compression/openvino/tiny_llama_find_hyperparams/main.py b/examples/llm_compression/openvino/tiny_llama_find_hyperparams/main.py index 7ab0176eb85..6b57b9481f2 100644 --- a/examples/llm_compression/openvino/tiny_llama_find_hyperparams/main.py +++ b/examples/llm_compression/openvino/tiny_llama_find_hyperparams/main.py @@ -245,7 +245,6 @@ def main(): "PERFORMANCE_HINT": "LATENCY", "NUM_STREAMS": "1", "CACHE_DIR": "", - "DYNAMIC_QUANTIZATION_GROUP_SIZE": "0", } model = OVModelForCausalLM.from_pretrained( model_id, diff --git a/tests/post_training/pipelines/lm_weight_compression.py b/tests/post_training/pipelines/lm_weight_compression.py index 27479fe6a50..06074701b42 100644 --- a/tests/post_training/pipelines/lm_weight_compression.py +++ b/tests/post_training/pipelines/lm_weight_compression.py @@ -275,7 +275,6 @@ def _validate(self): load_in_8bit=False, compile=False, stateful=is_stateful, - ov_config={"DYNAMIC_QUANTIZATION_GROUP_SIZE": "0"}, ) print("Evaluation of the target model") _, all_metrics = evaluator.score(compressed_model_hf)