Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update convert_and_optimize_asr.py #1659

Merged
merged 2 commits into from
Feb 7, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 20 additions & 10 deletions recipes/conversational_voice_agent/convert_and_optimize_asr.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,38 @@
import argparse
from pathlib import Path

import openvino as ov
from optimum.intel.openvino import OVModelForSpeechSeq2Seq
from transformers import AutoProcessor

MODEL_NAME = "distil-whisper/distil-large-v2"

MODEL_NAME = "distil-whisper/distil-large-v2"

def convert_asr_model(model_dir: Path) -> Path:
def convert_asr_model(use_quantization:bool, model_dir: Path) -> Path:
"""
Convert speech-to-text model

Params:
use_quantization: whether quantize weights to INT8
model_dir: dir to export model
Returns:
Path to exported model dir
"""
output_dir = model_dir / (MODEL_NAME.rsplit ("/")[-1] + "-FP16")

# load model and convert it to OpenVINO
model = OVModelForSpeechSeq2Seq.from_pretrained(MODEL_NAME, export=True, compile=False)
# change precision to FP16
model.half()
# save model to disk
model.save_pretrained(output_dir)
output_dir = model_dir / (MODEL_NAME.rsplit ("/")[-1] + "-FP16")

ov_model = OVModelForSpeechSeq2Seq.from_pretrained(MODEL_NAME, export=True, compile=False, load_in_8bit=False)
ov_model.half()
ov_model.save_pretrained(output_dir)

if use_quantization:
# Use Optimum-Intel to directly quantize weights of the ASR model into INT8

quantized_distil_model_path = model_dir / (MODEL_NAME.rsplit ("/")[-1] + "-INT8")
quantized_ov_model = OVModelForSpeechSeq2Seq.from_pretrained(MODEL_NAME, export=True, compile=False, load_in_8bit=True)
quantized_ov_model.save_pretrained(quantized_distil_model_path)
output_dir = quantized_distil_model_path

# export also processor
asr_processor = AutoProcessor.from_pretrained(MODEL_NAME)
Expand All @@ -34,7 +43,8 @@ def convert_asr_model(model_dir: Path) -> Path:

if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--use_quantization", default=True, help="Choose if to quantize the ASR model")
parser.add_argument("--model_dir", type=str, default="model", help="Directory to place the model in")

args = parser.parse_args()
convert_asr_model(Path(args.model_dir))

convert_asr_model(args.use_quantization, Path(args.model_dir))
Loading