Skip to content

Commit

Permalink
Merge pull request #15 from weiyueli7/wl_estimation
Browse files Browse the repository at this point in the history
update estimations
  • Loading branch information
weiyueli7 authored Dec 8, 2023
2 parents 717c57f + 306d99a commit 8a3cf09
Showing 1 changed file with 6 additions and 3 deletions.
9 changes: 6 additions & 3 deletions estimations/utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
def estimate_parameters(s_vocab, n_ctx, n_layers, d_model, n_heads, d_head, hidden_ratio):
def estimate_parameters(s_vocab, n_ctx, n_layers, d_model, n_heads, d_head, hidden_ratio=4):
"""
Calculate the number of parameters (in Millions) in the GPT-3 models.
Args:
Expand All @@ -9,7 +9,7 @@ def estimate_parameters(s_vocab, n_ctx, n_layers, d_model, n_heads, d_head, hidd
d_model (int): Embedding size.
n_heads (int): Number of heads.
d_head (int): Size of each head.
hidden_ratio (float): Ratio of hidden size to embedding size.
hidden_ratio (float): Ratio of hidden size to embedding size. GPT-3 uses 4 and LLaMA uses 2.3
Returns:
float: Number of parameters (in Millions).
Expand Down Expand Up @@ -77,4 +77,7 @@ def estimate_backward_flops(s_vocab, n_ctx, n_layers, d_model, n_heads, d_head):
* n_ctx * d_model) * \
n_layers * n_heads + d_model * n_ctx + \
d_model * n_ctx * s_vocab
return total
return total

def estimate_memory(s_vocab, n_ctx, n_layers, d_model, n_heads, d_head, hidden_ratio=4)
return 20 * estimate_parameters(s_vocab, n_ctx, n_layers, d_model, n_heads, d_head, hidden_ratio)

0 comments on commit 8a3cf09

Please sign in to comment.