New metrics for weight compression with dynamic quantization

openvinotoolkit · Aug 6, 2024 · 65db012 · 65db012
1 parent b108455
commit 65db012
Show file tree

Hide file tree

Showing 3 changed files with 1 addition and 3 deletions.
diff --git a/examples/llm_compression/openvino/tiny_llama/main.py b/examples/llm_compression/openvino/tiny_llama/main.py
@@ -67,7 +67,7 @@ def transform_fn(data, model, tokenizer):
     )
     model.save_pretrained(OUTPUT_DIR)
 
-    model = OVModelForCausalLM.from_pretrained(OUTPUT_DIR, ov_config={"DYNAMIC_QUANTIZATION_GROUP_SIZE": "0"})
+    model = OVModelForCausalLM.from_pretrained(OUTPUT_DIR)
     input_ids = tokenizer("What is PyTorch?", return_tensors="pt").to(device=model.device)
 
     start_t = time.time()

diff --git a/examples/llm_compression/openvino/tiny_llama_find_hyperparams/main.py b/examples/llm_compression/openvino/tiny_llama_find_hyperparams/main.py
@@ -245,7 +245,6 @@ def main():
         "PERFORMANCE_HINT": "LATENCY",
         "NUM_STREAMS": "1",
         "CACHE_DIR": "",
-        "DYNAMIC_QUANTIZATION_GROUP_SIZE": "0",
     }
     model = OVModelForCausalLM.from_pretrained(
         model_id,

diff --git a/tests/post_training/pipelines/lm_weight_compression.py b/tests/post_training/pipelines/lm_weight_compression.py
@@ -275,7 +275,6 @@ def _validate(self):
                 load_in_8bit=False,
                 compile=False,
                 stateful=is_stateful,
-                ov_config={"DYNAMIC_QUANTIZATION_GROUP_SIZE": "0"},
             )
         print("Evaluation of the target model")
         _, all_metrics = evaluator.score(compressed_model_hf)