diff --git a/ch05/03_bonus_pretraining_on_gutenberg/pretraining_simple.py b/ch05/03_bonus_pretraining_on_gutenberg/pretraining_simple.py index a4cb7de8..0bbf2b15 100644 --- a/ch05/03_bonus_pretraining_on_gutenberg/pretraining_simple.py +++ b/ch05/03_bonus_pretraining_on_gutenberg/pretraining_simple.py @@ -180,7 +180,7 @@ def train_model_simple(model, optimizer, device, n_epochs, "emb_dim": 12, # Embedding dimension "n_heads": 2, # Number of attention heads "n_layers": 2, # Number of layers - "drop_rate": 0.0, # Dropout rate + "drop_rate": 0.0, # Dropout rate, deactivated via 0.0 as dropout in LLMs is not recommended anymore "qkv_bias": False # Query-key-value bias }