diff --git a/open_diloco/configs/config_150m.json b/open_diloco/configs/config_150m.json index 7e27472..80e118d 100644 --- a/open_diloco/configs/config_150m.json +++ b/open_diloco/configs/config_150m.json @@ -1,13 +1,10 @@ -{ - "architectures": [ - "LlamaForCausalLM" - ], - "model_type": "llama", - "hidden_size": 1024, - "intermediate_size": 2688, - "num_attention_heads": 16, - "num_hidden_layers": 12, - "use_cache": false, - "rms_norm_eps": 1e-05 -} +{ + "name": "llama150m", + "n_embd": 1024, + "intermediate_size": 4096, + "n_head": 16, + "n_layer": 12, + "vocab_size": 32000, + "block_size": 1024 +} \ No newline at end of file diff --git a/open_diloco/configs/config_1b.json b/open_diloco/configs/config_1b.json index 8a7584d..d2a98d8 100644 --- a/open_diloco/configs/config_1b.json +++ b/open_diloco/configs/config_1b.json @@ -1,9 +1,10 @@ { - "name": "llama", + "name": "llama1b", "n_embd": 2048, "intermediate_size": 5632, "n_head": 32, "n_layer": 22, "n_query_groups": 4, - "vocab_size": 1024 + "vocab_size": 32000, + "block_size": 1024 }