From 2b18c58ad2a9e40a2a17c5705c46cd9c702a5b2e Mon Sep 17 00:00:00 2001 From: sichu Date: Wed, 4 Dec 2024 20:58:51 +0000 Subject: [PATCH] increase sequence length to avoid nan grad --- .../bionemo/esm2/scripts/test_train_esm2.py | 19 ++++++++++++------- .../scripts/test_train_geneformer.py | 18 +++++++++++++----- 2 files changed, 25 insertions(+), 12 deletions(-) diff --git a/sub-packages/bionemo-esm2/tests/bionemo/esm2/scripts/test_train_esm2.py b/sub-packages/bionemo-esm2/tests/bionemo/esm2/scripts/test_train_esm2.py index 9023e399eb..70a516cce5 100644 --- a/sub-packages/bionemo-esm2/tests/bionemo/esm2/scripts/test_train_esm2.py +++ b/sub-packages/bionemo-esm2/tests/bionemo/esm2/scripts/test_train_esm2.py @@ -162,15 +162,15 @@ def test_val_dataloader_in_main_runs_with_limit_val_batches( valid_database_path=dummy_protein_dataset, num_nodes=1, devices=1, - min_seq_length=None, + min_seq_length=128, max_seq_length=128, result_dir=result_dir, wandb_project=None, wandb_offline=True, - num_steps=10, + num_steps=5, warmup_steps=2, limit_val_batches=limit_val_batches, - val_check_interval=1, + val_check_interval=2, log_every_n_steps=None, num_dataset_workers=1, biobert_spec_option=BiobertSpecOption.esm2_bert_layer_with_transformer_engine_spec, @@ -219,13 +219,18 @@ def test_pretrain_cli(tmpdir, dummy_protein_dataset, dummy_parquet_train_val_inp --experiment-name test_experiment \ --num-gpus 1 \ --num-nodes 1 \ - --val-check-interval 10 \ + --val-check-interval 2 \ --num-dataset-workers 1 \ - --num-steps 55 \ + --num-steps 5 \ --max-seq-length 128 \ - --limit-val-batches 2 \ + --limit-val-batches 1 \ + --val-check-interval 2 \ --micro-batch-size 2 \ - --accumulate-grad-batches 2 + --accumulate-grad-batches 2 \ + --num-layers 2 \ + --num-attention-heads 2 \ + --hidden-size 4 \ + --ffn-hidden-size 8 """.strip() # a local copy of the environment diff --git a/sub-packages/bionemo-geneformer/tests/bionemo/geneformer/scripts/test_train_geneformer.py b/sub-packages/bionemo-geneformer/tests/bionemo/geneformer/scripts/test_train_geneformer.py index 4bfec250f7..2112a7ba89 100644 --- a/sub-packages/bionemo-geneformer/tests/bionemo/geneformer/scripts/test_train_geneformer.py +++ b/sub-packages/bionemo-geneformer/tests/bionemo/geneformer/scripts/test_train_geneformer.py @@ -49,9 +49,9 @@ def test_main_runs(tmpdir): result_dir=result_dir, wandb_project=None, wandb_offline=True, - num_steps=55, + num_steps=5, limit_val_batches=1, - val_check_interval=1, + val_check_interval=2, num_dataset_workers=0, biobert_spec_option=BiobertSpecOption.bert_layer_local_spec, lr=1e-4, @@ -63,6 +63,10 @@ def test_main_runs(tmpdir): experiment_name="test_experiment", resume_if_exists=False, create_tensorboard_logger=False, + num_layers=2, + num_attention_heads=2, + hidden_size=4, + ffn_hidden_size=4 * 2, ) assert (result_dir / "test_experiment").exists(), "Could not find test experiment directory." @@ -91,13 +95,17 @@ def test_pretrain_cli(tmpdir): --experiment-name test_experiment \ --num-gpus 1 \ --num-nodes 1 \ - --val-check-interval 10 \ + --val-check-interval 2 \ --num-dataset-workers 0 \ - --num-steps 55 \ + --num-steps 5 \ --seq-length 128 \ --limit-val-batches 2 \ --micro-batch-size 2 \ - --accumulate-grad-batches 2 + --accumulate-grad-batches 2 \ + --num-layers 2 \ + --num-attention-heads 2 \ + --hidden-size 4 \ + --ffn-hidden-size 8 """.strip() env = dict(**os.environ) # a local copy of the environment env["MASTER_PORT"] = str(open_port)