From fb60d7a7eb93b2f957e728aeee8e997b3cb8b7c4 Mon Sep 17 00:00:00 2001 From: Sami Jaghouar Date: Tue, 16 Jul 2024 17:16:42 +0000 Subject: [PATCH] fix config --- open_diloco/configs/config_150m.json | 21 +++++++++------------ open_diloco/configs/config_1b.json | 5 +++-- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/open_diloco/configs/config_150m.json b/open_diloco/configs/config_150m.json index 7e27472..80e118d 100644 --- a/open_diloco/configs/config_150m.json +++ b/open_diloco/configs/config_150m.json @@ -1,13 +1,10 @@ -{ - "architectures": [ - "LlamaForCausalLM" - ], - "model_type": "llama", - "hidden_size": 1024, - "intermediate_size": 2688, - "num_attention_heads": 16, - "num_hidden_layers": 12, - "use_cache": false, - "rms_norm_eps": 1e-05 -} +{ + "name": "llama150m", + "n_embd": 1024, + "intermediate_size": 4096, + "n_head": 16, + "n_layer": 12, + "vocab_size": 32000, + "block_size": 1024 +} \ No newline at end of file diff --git a/open_diloco/configs/config_1b.json b/open_diloco/configs/config_1b.json index 8a7584d..d2a98d8 100644 --- a/open_diloco/configs/config_1b.json +++ b/open_diloco/configs/config_1b.json @@ -1,9 +1,10 @@ { - "name": "llama", + "name": "llama1b", "n_embd": 2048, "intermediate_size": 5632, "n_head": 32, "n_layer": 22, "n_query_groups": 4, - "vocab_size": 1024 + "vocab_size": 32000, + "block_size": 1024 }