Skip to content

Commit

Permalink
[Tokenizers] Revise Parameters (openvinotoolkit#777)
Browse files Browse the repository at this point in the history
* Refactoring

Del node_factory.py
Move from os to Path
Support -e install for _ext_libs_path
Wrap Core.__init__

* Rename with_decoder to with_detokenizer
  • Loading branch information
apaniukov authored Dec 8, 2023
1 parent 11b8b72 commit 138b818
Show file tree
Hide file tree
Showing 6 changed files with 44 additions and 67 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -121,14 +121,13 @@ import numpy as np
from openvino import compile_model, convert_model
from transformers import AutoModelForCausalLM, AutoTokenizer
from ov_tokenizer import (
add_greedy_decoding,
convert_tokenizer,
init_extension,
pack_strings,
unpack_strings,
add_greedy_decoding,
convert_tokenizer,
init_extension,
pack_strings,
unpack_strings,
)


init_extension("path/to/libuser_ov_extensions.so")

# Use different repo for the tokenizer because the original repo doesn't have .model file
Expand All @@ -140,7 +139,7 @@ hf_model = AutoModelForCausalLM.from_pretrained(model_checkpoint, use_cache=Fals

# convert hf tokenizer
text_input = ["Quick brown fox was"]
ov_tokenizer, ov_detokenizer = convert_tokenizer(hf_tokenizer, with_decoder=True)
ov_tokenizer, ov_detokenizer = convert_tokenizer(hf_tokenizer, with_detokenizer=True)
compiled_tokenizer = compile_model(ov_tokenizer)

# transform input text into tokens
Expand All @@ -156,20 +155,20 @@ compiled_model = compile_model(ov_model_with_greedy_decoding)
new_tokens_size = 10
prompt_size = ov_input["input_ids"].shape[-1]
input_dict = {
output.any_name: np.hstack([tensor, np.zeros(shape=(1, new_tokens_size), dtype=np.int_)])
for output, tensor in ov_input.items()
output.any_name: np.hstack([tensor, np.zeros(shape=(1, new_tokens_size), dtype=np.int_)])
for output, tensor in ov_input.items()
}
for idx in range(prompt_size, prompt_size + new_tokens_size):
output = compiled_model(input_dict)["token_ids"]
input_dict["input_ids"][:, idx] = output[:, idx - 1]
input_dict["attention_mask"][:, idx] = 1
output = compiled_model(input_dict)["token_ids"]
input_dict["input_ids"][:, idx] = output[:, idx - 1]
input_dict["attention_mask"][:, idx] = 1
ov_token_ids = input_dict["input_ids"]

hf_token_ids = hf_model.generate(
**hf_input,
min_new_tokens=new_tokens_size,
max_new_tokens=new_tokens_size,
temperature=0, # greedy decoding
**hf_input,
min_new_tokens=new_tokens_size,
max_new_tokens=new_tokens_size,
temperature=0, # greedy decoding
)

# decode model output
Expand Down
Original file line number Diff line number Diff line change
@@ -1,48 +1,51 @@
# -*- coding: utf-8 -*-
# Copyright (C) 2018-2023 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

import functools
import os
import sys
import sysconfig
from pathlib import Path

import openvino
from openvino.runtime.utils.node_factory import NodeFactory

from .convert_tokenizer import convert_tokenizer
from .node_factory import _extension_path, init_extension
from .str_pack import pack_strings, unpack_strings
from .utils import add_greedy_decoding, connect_models


_extension_path = os.environ.get("OV_TOKENIZER_PREBUILD_EXTENSION_PATH")
_ext_name = "user_ov_extensions"
if _extension_path:
# when the path to extension set manually
_ext_libs_path = os.path.dirname(_extension_path)
_ext_libs_path = Path(_extension_path).parent
else:
# python installation case
_ext_libs_path = os.path.join(os.path.dirname(__file__), "libs")
_ext_libs_path = Path(sysconfig.get_paths()["purelib"]) / __name__ / "libs"

if sys.platform == "win32":
_ext_path = os.path.join(_ext_libs_path, f"{_ext_name}.dll")
if os.path.isdir(_ext_libs_path):
_ext_path = _ext_libs_path / f"{_ext_name}.dll"
if _ext_libs_path.is_dir():
# On Windows, with Python >= 3.8, DLLs are no longer imported from the PATH.
os.add_dll_directory(os.path.abspath(_ext_libs_path))
os.add_dll_directory(str(_ext_libs_path.absolute()))
else:
sys.exit(f"Error: extention libriary path {_ext_libs_path} not found")
elif sys.platform == "darwin":
_ext_path = os.path.join(_ext_libs_path, f"lib{_ext_name}.dylib")
_ext_path = _ext_libs_path / f"lib{_ext_name}.dylib"
elif sys.platform == "linux":
_ext_path = os.path.join(_ext_libs_path, f"lib{_ext_name}.so")
_ext_path = _ext_libs_path / f"lib{_ext_name}.so"
else:
sys.exit(f"Error: extention does not support platform {sys.platform}")
sys.exit(f"Error: extension does not support platform {sys.platform}")

# patching openvino
old_core_init = openvino.runtime.Core.__init__


def new_core_init(self, *k, **kw):
old_core_init(self, *k, **kw)
self.add_extension(_ext_path)
@functools.wraps(old_core_init)
def new_core_init(self, *args, **kwargs):
old_core_init(self, *args, **kwargs)
self.add_extension(str(_ext_path)) # Core.add_extension doesn't support Path object


openvino.runtime.Core.__init__ = new_core_init
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
def convert_tokenizer(
tokenizer_object: Any,
number_of_inputs: int = 1,
with_decoder: bool = False,
with_detokenizer: bool = False,
streaming_decoder: bool = False,
tokenizer_output_type: Type = Type.i64,
detokenizer_input_type: Type = Type.i64,
Expand Down Expand Up @@ -46,21 +46,21 @@ def convert_tokenizer(
ov_tokenizers = convert_sentencepiece_model_tokenizer(
tokenizer_object,
add_attention_mask=True,
with_decoder=with_decoder,
with_detokenizer=with_detokenizer,
streaming_decoder=streaming_decoder,
)
elif is_tiktoken_model(tokenizer_object):
logger.info("Convert tiktoken-based tokenizer")
ov_tokenizers = convert_tiktoken_model_tokenizer(
tokenizer_object,
with_decoder=with_decoder,
with_detokenizer=with_detokenizer,
)
elif isinstance(tokenizer_object, PreTrainedTokenizerFast):
logger.info("Convert Huggingface Fast tokenizer pipeline.")
ov_tokenizers = convert_fast_tokenizer(
tokenizer_object,
number_of_inputs=number_of_inputs,
with_decoder=with_decoder,
with_detokenizer=with_detokenizer,
)

if ov_tokenizers is None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ def decoding(self) -> None:
def convert_fast_tokenizer(
hf_tokenizer: "PreTrainedTokenizerBase",
number_of_inputs: int = 1,
with_decoder: bool = False,
with_detokenizer: bool = False,
) -> Union[Model, Tuple[Model, Model]]:
pipeline = TransformersTokenizerPipelineParser(hf_tokenizer).parse(number_of_inputs=number_of_inputs)
ov_tokenizer = pipeline.get_encoder_ov_subgraph()
Expand All @@ -300,7 +300,7 @@ def convert_fast_tokenizer(
filtered_outputs.append(ov_tokenizer.output(i))

tokenizer_model = Model(filtered_outputs, ov_tokenizer.get_parameters(), TOKENIZER_ENCODER_NAME)
if with_decoder:
if with_detokenizer:
return tokenizer_model, pipeline.get_decoder_ov_subgraph()

return tokenizer_model
Expand Down Expand Up @@ -329,7 +329,7 @@ def add_tokens_to_sentencepiece_model(sp_model_path: Path, hf_tokenizer: "PreTra
def convert_sentencepiece_model_tokenizer(
hf_tokenizer: "PreTrainedTokenizerBase",
add_attention_mask: bool = True,
with_decoder: bool = False,
with_detokenizer: bool = False,
streaming_decoder: bool = False,
) -> Union[Model, Tuple[Model, Model]]:
if not is_sentencepiece_model(hf_tokenizer):
Expand Down Expand Up @@ -423,7 +423,7 @@ def convert_sentencepiece_model_tokenizer(
tokenizer_encoder = Model(outputs, [input_node], TOKENIZER_ENCODER_NAME)
tokenizer_encoder.validate_nodes_and_infer_types()

if not with_decoder:
if not with_detokenizer:
return tokenizer_encoder

return tokenizer_encoder, get_sp_decoder(sp_model_node, streaming_decoder=streaming_decoder)
Expand Down Expand Up @@ -460,7 +460,7 @@ def is_tiktoken_model(hf_tokenizer: "PreTrainedTokenizerBase") -> bool:

def convert_tiktoken_model_tokenizer(
hf_tokenizer: "PreTrainedTokenizerBase",
with_decoder: bool = False,
with_detokenizer: bool = False,
) -> Union[Model, Tuple[Model, Model]]:
encoding = getattr(hf_tokenizer, "tokenizer", None) or hf_tokenizer.encoder
split_pattern = encoding._pat_str
Expand All @@ -480,7 +480,7 @@ def convert_tiktoken_model_tokenizer(
CharsToBytesStep(),
]
)
if not with_decoder:
if not with_detokenizer:
return pipeline.get_encoder_ov_subgraph()

return pipeline.get_encoder_ov_subgraph(), pipeline.get_decoder_ov_subgraph()

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ def get_tokenizer(request, fast_tokenizer=True, trust_remote_code=False):
hf_tokenizer = AutoTokenizer.from_pretrained(
request.param, use_fast=fast_tokenizer, trust_remote_code=trust_remote_code
)
ov_tokenizer = convert_tokenizer(hf_tokenizer, with_decoder=False)
ov_tokenizer = convert_tokenizer(hf_tokenizer, with_detokenizer=False)
compiled_tokenizer = core.compile_model(ov_tokenizer)
return hf_tokenizer, compiled_tokenizer

Expand All @@ -133,7 +133,7 @@ def get_tokenizer_detokenizer(request, fast_tokenizer=True, trust_remote_code=Fa
hf_tokenizer = AutoTokenizer.from_pretrained(
request.param, use_fast=fast_tokenizer, trust_remote_code=trust_remote_code
)
ov_tokenizer, ov_detokenizer = convert_tokenizer(hf_tokenizer, with_decoder=True)
ov_tokenizer, ov_detokenizer = convert_tokenizer(hf_tokenizer, with_detokenizer=True)
compiled_tokenizer = core.compile_model(ov_tokenizer)
compiled_detokenizer = core.compile_model(ov_detokenizer)
return hf_tokenizer, compiled_tokenizer, compiled_detokenizer
Expand Down Expand Up @@ -326,7 +326,7 @@ def test_tiktoken_detokenizer(tiktoken_tokenizers, test_string):

def test_streaming_detokenizer():
hf_tokenizer = AutoTokenizer.from_pretrained("openlm-research/open_llama_3b_v2")
_, ov_detokenizer = convert_tokenizer(hf_tokenizer, with_decoder=True, streaming_decoder=True)
_, ov_detokenizer = convert_tokenizer(hf_tokenizer, with_detokenizer=True, streaming_decoder=True)
ov_detokenizer = core.compile_model(ov_detokenizer)

test_string = "this is a test string"
Expand Down

0 comments on commit 138b818

Please sign in to comment.