Skip to content

Commit

Permalink
clean code
Browse files Browse the repository at this point in the history
  • Loading branch information
vietanh125 committed Nov 2, 2023
1 parent fa377e1 commit 30dd7be
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 5 deletions.
2 changes: 1 addition & 1 deletion src/axolotl/prompt_tokenizers.py
Original file line number Diff line number Diff line change
Expand Up @@ -394,7 +394,7 @@ def tokenize_prompt(self, prompt):
LOG.warning(f"assistant turn has empty text: {prompt}")
res = self._tokenize(
turn,
add_eos_token=False if conversation.name == 'chatml' else True,
add_eos_token=conversation.name == "chatml",
strip_bos_token=True,
)
role_res = self._tokenize(
Expand Down
10 changes: 6 additions & 4 deletions src/axolotl/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,11 @@ def train(
resume_from_checkpoint = cfg.resume_from_checkpoint

if dist.get_rank() == 0:
print('\n\n*********** INPUT SANITY CHECK ***********')
print(tokenizer.decode(train_dataset[0]['input_ids'], skip_special_tokens=False))
print('******************************************\n\n')
print("\n\n*********** INPUT SANITY CHECK ***********")
print(
tokenizer.decode(train_dataset[0]["input_ids"], skip_special_tokens=False)
)
print("******************************************\n\n")

trainer = setup_trainer(
cfg, train_dataset, eval_dataset, model, tokenizer, total_num_steps
Expand Down Expand Up @@ -171,7 +173,7 @@ def terminate_handler(_, __, model):
if not cfg.hub_model_id:
trainer.create_model_card(model_name=cfg.output_dir.lstrip("./"))
else:
dataset = [d['path'] for d in cfg.datasets]
dataset = [d["path"] for d in cfg.datasets]
trainer.push_to_hub(dataset=dataset, dataset_tags=dataset)

return model, tokenizer
Expand Down

0 comments on commit 30dd7be

Please sign in to comment.