From 3aac1fb018bfcc48d8a489638099762a087934d6 Mon Sep 17 00:00:00 2001 From: Vidya S Galli Date: Wed, 10 Jul 2024 19:56:25 +0000 Subject: [PATCH] added chat template option --- examples/text-generation/README.md | 26 +++++++++++++++++++ examples/text-generation/run_generation.py | 16 ++++++++---- .../sample_command_r_template.json | 1 + tests/test_text_generation_example.py | 5 ++++ 4 files changed, 43 insertions(+), 5 deletions(-) create mode 100644 examples/text-generation/sample_command_r_template.json diff --git a/examples/text-generation/README.md b/examples/text-generation/README.md index e020e72a79..805a903908 100755 --- a/examples/text-generation/README.md +++ b/examples/text-generation/README.md @@ -544,3 +544,29 @@ deepspeed --num_gpus 8 run_lm_eval.py \ ## Text-Generation Pipeline A Transformers-like pipeline is defined and provided [here](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation/text-generation-pipeline). It is optimized for Gaudi and can be called to generate text in your scripts. + +## Conversation generation + +For models that support chat like `CohereForAI/c4ai-command-r-v01` you can provide `--chat_template ` that is applied to the tokenizer. + +### Examples + +Sample chat template `sample_command_r_template.json` for [CohereForAI/c4ai-command-r-v01](https://huggingface.co/CohereForAI/c4ai-command-r-v01) is shown below: + +```json +[{"role": "user", "content": "Hello, how are you?"}] +``` + +Command to run chat generation: + +``` +python run_generation.py \ + --model_name_or_path CohereForAI/c4ai-command-r-v01 \ + --use_hpu_graphs \ + --use_kv_cache \ + --max_new_tokens 100 \ + --do_sample \ + --chat_template sample_command_r_template.json \ + --bf16 \ + --batch_size 2 +``` diff --git a/examples/text-generation/run_generation.py b/examples/text-generation/run_generation.py index 28cd82633e..11c33b5a55 100755 --- a/examples/text-generation/run_generation.py +++ b/examples/text-generation/run_generation.py @@ -287,6 +287,12 @@ def setup_parser(parser): action="store_true", help="Whether or not to allow for custom models defined on the Hub in their own modeling files.", ) + parser.add_argument( + "--chat_template", + default=None, + type=str, + help='Optional JSON input file containing chat template for tokenizer.', + ) args = parser.parse_args() if args.torch_compile: @@ -369,11 +375,11 @@ def assemble_prompt(prompt_size, book_path): "Peace is the only way", ] - # Format message with the command-r chat template - if model.config.model_type == "cohere": - for i, sentence in enumerate(input_sentences): - message = [{"role": "user", "content": sentence}] - input_sentences[i] = tokenizer.apply_chat_template(message, tokenize=False) + # Apply tokenizer chat template + if args.chat_template and hasattr(tokenizer, 'chat_template'): + with open(args.chat_template, 'r') as fh: + messages = json.load(fh) + input_sentences = [tokenizer.apply_chat_template(messages, tokenize=False)] if args.batch_size > len(input_sentences): # Dynamically extends to support larger batch sizes diff --git a/examples/text-generation/sample_command_r_template.json b/examples/text-generation/sample_command_r_template.json new file mode 100644 index 0000000000..ddfd802b2d --- /dev/null +++ b/examples/text-generation/sample_command_r_template.json @@ -0,0 +1 @@ +[{"role": "user", "content": "Hello, how are you?"}] \ No newline at end of file diff --git a/tests/test_text_generation_example.py b/tests/test_text_generation_example.py index 72a11bc351..9f67757f6f 100644 --- a/tests/test_text_generation_example.py +++ b/tests/test_text_generation_example.py @@ -190,6 +190,11 @@ def _test_text_generation( "--limit_hpu_graphs", ] + if "command_r" in model_name.lower(): + path_to_template = os.path.join( + path_to_example_dir,"text-generation/sample_command_r_template.json") + command += [f"--chat_template {path_to_template}"] + with TemporaryDirectory() as tmp_dir: command.append(f"--output_dir {tmp_dir}") command.append(f"--token {token.value}")