Skip to content

Commit

Permalink
New metrics for weight compression with dynamic quantization
Browse files Browse the repository at this point in the history
  • Loading branch information
ljaljushkin committed Aug 6, 2024
1 parent b108455 commit 65db012
Show file tree
Hide file tree
Showing 3 changed files with 1 addition and 3 deletions.
2 changes: 1 addition & 1 deletion examples/llm_compression/openvino/tiny_llama/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def transform_fn(data, model, tokenizer):
)
model.save_pretrained(OUTPUT_DIR)

model = OVModelForCausalLM.from_pretrained(OUTPUT_DIR, ov_config={"DYNAMIC_QUANTIZATION_GROUP_SIZE": "0"})
model = OVModelForCausalLM.from_pretrained(OUTPUT_DIR)
input_ids = tokenizer("What is PyTorch?", return_tensors="pt").to(device=model.device)

start_t = time.time()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,6 @@ def main():
"PERFORMANCE_HINT": "LATENCY",
"NUM_STREAMS": "1",
"CACHE_DIR": "",
"DYNAMIC_QUANTIZATION_GROUP_SIZE": "0",
}
model = OVModelForCausalLM.from_pretrained(
model_id,
Expand Down
1 change: 0 additions & 1 deletion tests/post_training/pipelines/lm_weight_compression.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,6 @@ def _validate(self):
load_in_8bit=False,
compile=False,
stateful=is_stateful,
ov_config={"DYNAMIC_QUANTIZATION_GROUP_SIZE": "0"},
)
print("Evaluation of the target model")
_, all_metrics = evaluator.score(compressed_model_hf)
Expand Down

0 comments on commit 65db012

Please sign in to comment.