From 306d99a852dfc9e49e61bb47a79daaa6575e2a6d Mon Sep 17 00:00:00 2001 From: weiyueli7 Date: Fri, 8 Dec 2023 03:30:34 -0800 Subject: [PATCH] update estimations --- estimations/utils.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/estimations/utils.py b/estimations/utils.py index 840201d..b8b218a 100644 --- a/estimations/utils.py +++ b/estimations/utils.py @@ -1,4 +1,4 @@ -def estimate_parameters(s_vocab, n_ctx, n_layers, d_model, n_heads, d_head, hidden_ratio): +def estimate_parameters(s_vocab, n_ctx, n_layers, d_model, n_heads, d_head, hidden_ratio=4): """ Calculate the number of parameters (in Millions) in the GPT-3 models. Args: @@ -9,7 +9,7 @@ def estimate_parameters(s_vocab, n_ctx, n_layers, d_model, n_heads, d_head, hidd d_model (int): Embedding size. n_heads (int): Number of heads. d_head (int): Size of each head. - hidden_ratio (float): Ratio of hidden size to embedding size. + hidden_ratio (float): Ratio of hidden size to embedding size. GPT-3 uses 4 and LLaMA uses 2.3 Returns: float: Number of parameters (in Millions). @@ -77,4 +77,7 @@ def estimate_backward_flops(s_vocab, n_ctx, n_layers, d_model, n_heads, d_head): * n_ctx * d_model) * \ n_layers * n_heads + d_model * n_ctx + \ d_model * n_ctx * s_vocab - return total \ No newline at end of file + return total + +def estimate_memory(s_vocab, n_ctx, n_layers, d_model, n_heads, d_head, hidden_ratio=4) + return 20 * estimate_parameters(s_vocab, n_ctx, n_layers, d_model, n_heads, d_head, hidden_ratio) \ No newline at end of file