diff --git a/sub-packages/bionemo-llm/src/bionemo/llm/model/biobert/model.py b/sub-packages/bionemo-llm/src/bionemo/llm/model/biobert/model.py index 057859c388..dee9e002ee 100644 --- a/sub-packages/bionemo-llm/src/bionemo/llm/model/biobert/model.py +++ b/sub-packages/bionemo-llm/src/bionemo/llm/model/biobert/model.py @@ -477,13 +477,13 @@ class BioBertConfig( # From megatron.core.models.gpt.bert_model.GPTModel kv_channels: int | None = None fp16_lm_cross_entropy: bool = False - apply_rope_fusion: bool = True + apply_rope_fusion: bool = False parallel_output: bool = True - bias_dropout_fusion: bool = True - bias_activation_fusion: bool = True + bias_dropout_fusion: bool = False + bias_activation_fusion: bool = False masked_softmax_fusion: bool = True persist_layer_norm: bool = True - get_attention_mask_from_fusion: bool = True + get_attention_mask_from_fusion: bool = False share_embeddings_and_output_weights: bool = False # try True make_vocab_size_divisible_by: int = 128 position_embedding_type: PositionEmbeddingKinds = "learned_absolute"