Skip to content

Commit

Permalink
increase sequence length to avoid nan grad
Browse files Browse the repository at this point in the history
  • Loading branch information
sichu2023 committed Dec 4, 2024
1 parent 2a7706a commit 2b18c58
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -162,15 +162,15 @@ def test_val_dataloader_in_main_runs_with_limit_val_batches(
valid_database_path=dummy_protein_dataset,
num_nodes=1,
devices=1,
min_seq_length=None,
min_seq_length=128,
max_seq_length=128,
result_dir=result_dir,
wandb_project=None,
wandb_offline=True,
num_steps=10,
num_steps=5,
warmup_steps=2,
limit_val_batches=limit_val_batches,
val_check_interval=1,
val_check_interval=2,
log_every_n_steps=None,
num_dataset_workers=1,
biobert_spec_option=BiobertSpecOption.esm2_bert_layer_with_transformer_engine_spec,
Expand Down Expand Up @@ -219,13 +219,18 @@ def test_pretrain_cli(tmpdir, dummy_protein_dataset, dummy_parquet_train_val_inp
--experiment-name test_experiment \
--num-gpus 1 \
--num-nodes 1 \
--val-check-interval 10 \
--val-check-interval 2 \
--num-dataset-workers 1 \
--num-steps 55 \
--num-steps 5 \
--max-seq-length 128 \
--limit-val-batches 2 \
--limit-val-batches 1 \
--val-check-interval 2 \
--micro-batch-size 2 \
--accumulate-grad-batches 2
--accumulate-grad-batches 2 \
--num-layers 2 \
--num-attention-heads 2 \
--hidden-size 4 \
--ffn-hidden-size 8
""".strip()

# a local copy of the environment
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,9 @@ def test_main_runs(tmpdir):
result_dir=result_dir,
wandb_project=None,
wandb_offline=True,
num_steps=55,
num_steps=5,
limit_val_batches=1,
val_check_interval=1,
val_check_interval=2,
num_dataset_workers=0,
biobert_spec_option=BiobertSpecOption.bert_layer_local_spec,
lr=1e-4,
Expand All @@ -63,6 +63,10 @@ def test_main_runs(tmpdir):
experiment_name="test_experiment",
resume_if_exists=False,
create_tensorboard_logger=False,
num_layers=2,
num_attention_heads=2,
hidden_size=4,
ffn_hidden_size=4 * 2,
)

assert (result_dir / "test_experiment").exists(), "Could not find test experiment directory."
Expand Down Expand Up @@ -91,13 +95,17 @@ def test_pretrain_cli(tmpdir):
--experiment-name test_experiment \
--num-gpus 1 \
--num-nodes 1 \
--val-check-interval 10 \
--val-check-interval 2 \
--num-dataset-workers 0 \
--num-steps 55 \
--num-steps 5 \
--seq-length 128 \
--limit-val-batches 2 \
--micro-batch-size 2 \
--accumulate-grad-batches 2
--accumulate-grad-batches 2 \
--num-layers 2 \
--num-attention-heads 2 \
--hidden-size 4 \
--ffn-hidden-size 8
""".strip()
env = dict(**os.environ) # a local copy of the environment
env["MASTER_PORT"] = str(open_port)
Expand Down

0 comments on commit 2b18c58

Please sign in to comment.