Skip to content

Commit

Permalink
Fix bug in build (#75)
Browse files Browse the repository at this point in the history
* fix

* fix bug in build
  • Loading branch information
loubbrad authored Dec 5, 2023
1 parent 94680ec commit 722d4be
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 5 deletions.
6 changes: 4 additions & 2 deletions aria/data/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -452,7 +452,8 @@ def _get_seqs(_entry: MidiDict | dict, _tokenizer: Tokenizer):
try:
_tokenized_seq = _tokenizer.tokenize(_midi_dict)
except Exception as e:
logger.error(f"Failed to tokenize midi_dict: {e}")
logger.info(f"Skipping midi_dict: {e}")
return
else:
if _tokenizer.unk_tok in _tokenized_seq:
logger.warning("Unknown token seen while tokenizing midi_dict")
Expand Down Expand Up @@ -601,7 +602,8 @@ def _build_epoch(_save_path, _midi_dataset):

buffer = []
for entry in get_seqs(tokenizer, _midi_dataset):
buffer += entry
if entry is not None:
buffer += entry
while len(buffer) >= max_seq_len:
writer.write(buffer[:max_seq_len])
buffer = buffer[max_seq_len:]
Expand Down
6 changes: 3 additions & 3 deletions aria/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,7 @@ def _parse_pretrain_dataset_args():
return argp.parse_args(sys.argv[2:])


def build_tokenized_dataset(args):
def build_pretraining_dataset(args):
from aria.tokenizer import TokenizerLazy
from aria.data.datasets import PretrainingDataset

Expand Down Expand Up @@ -318,9 +318,9 @@ def main():
elif args.command == "midi-dataset":
build_midi_dataset(args=_parse_midi_dataset_args())
elif args.command == "pretrain-dataset":
build_tokenized_dataset(args=_parse_pretrain_dataset_args())
build_pretraining_dataset(args=_parse_pretrain_dataset_args())
elif args.command == "finetune-dataset":
build_tokenized_dataset(args=_parse_finetune_dataset_args())
build_finetune_dataset(args=_parse_finetune_dataset_args())
else:
print("Unrecognized command")
parser.print_help()
Expand Down

0 comments on commit 722d4be

Please sign in to comment.