From 306d99a852dfc9e49e61bb47a79daaa6575e2a6d Mon Sep 17 00:00:00 2001
From: weiyueli7 <wel019@ucsd.edu>
Date: Fri, 8 Dec 2023 03:30:34 -0800
Subject: [PATCH] update estimations

---
 estimations/utils.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/estimations/utils.py b/estimations/utils.py
index 840201d..b8b218a 100644
--- a/estimations/utils.py
+++ b/estimations/utils.py
@@ -1,4 +1,4 @@
-def estimate_parameters(s_vocab, n_ctx, n_layers, d_model, n_heads, d_head, hidden_ratio):
+def estimate_parameters(s_vocab, n_ctx, n_layers, d_model, n_heads, d_head, hidden_ratio=4):
     """
     Calculate the number of parameters (in Millions) in the GPT-3 models.
     Args:
@@ -9,7 +9,7 @@ def estimate_parameters(s_vocab, n_ctx, n_layers, d_model, n_heads, d_head, hidd
         d_model (int): Embedding size.
         n_heads (int): Number of heads.
         d_head (int): Size of each head.
-        hidden_ratio (float): Ratio of hidden size to embedding size.
+        hidden_ratio (float): Ratio of hidden size to embedding size. GPT-3 uses 4 and LLaMA uses 2.3
     Returns:
         float: Number of parameters (in Millions).
 
@@ -77,4 +77,7 @@ def estimate_backward_flops(s_vocab, n_ctx, n_layers, d_model, n_heads, d_head):
                         * n_ctx * d_model) * \
         n_layers * n_heads + d_model * n_ctx + \
             d_model * n_ctx * s_vocab
-    return total
\ No newline at end of file
+    return total
+
+def estimate_memory(s_vocab, n_ctx, n_layers, d_model, n_heads, d_head, hidden_ratio=4)
+    return 20 * estimate_parameters(s_vocab, n_ctx, n_layers, d_model, n_heads, d_head, hidden_ratio)
\ No newline at end of file