From 78451ef5ed0e8a0b211880093d077e65ab9c18c0 Mon Sep 17 00:00:00 2001 From: Sujee Maniyam Date: Thu, 24 Oct 2024 23:23:49 -0700 Subject: [PATCH 1/4] updating RAG example to use IBM granite model Signed-off-by: Sujee Maniyam --- examples/notebooks/rag/README.md | 2 +- examples/notebooks/rag/my_config.py | 6 ++- .../rag/rag_1A_dpk_process_ray.ipynb | 7 ++-- ...ate.ipynb => rag_1D_query_replicate.ipynb} | 42 ++++++++++++------- 4 files changed, 35 insertions(+), 22 deletions(-) rename examples/notebooks/rag/{rag_1D_query_llama_replicate.ipynb => rag_1D_query_replicate.ipynb} (92%) diff --git a/examples/notebooks/rag/README.md b/examples/notebooks/rag/README.md index f4a3460a1..16ffdb15e 100644 --- a/examples/notebooks/rag/README.md +++ b/examples/notebooks/rag/README.md @@ -76,7 +76,7 @@ REPLICATE_API_TOKEN=your REPLICATE token goes here ### 5.2 - Run the query code -Code: [rag_1D_query_llama_replicate.ipynb](rag_1D_query_llama_replicate.ipynb) +Code: [rag_1D_query_replicate.ipynb](rag_1D_query_replicate.ipynb) diff --git a/examples/notebooks/rag/my_config.py b/examples/notebooks/rag/my_config.py index ba9ea89fd..66fc1ecf7 100644 --- a/examples/notebooks/rag/my_config.py +++ b/examples/notebooks/rag/my_config.py @@ -23,8 +23,10 @@ class MyConfig: MY_CONFIG.EMBEDDING_LENGTH = 384 ## LLM Model -MY_CONFIG.LLM_MODEL = "meta/meta-llama-3-8b-instruct" - +# MY_CONFIG.LLM_MODEL = "meta/meta-llama-3-8b-instruct" +# MY_CONFIG.LLM_MODEL = "meta/meta-llama-3-70b-instruct" +# MY_CONFIG.LLM_MODEL = "ibm-granite/granite-3.0-2b-instruct" +MY_CONFIG.LLM_MODEL = "ibm-granite/granite-3.0-8b-instruct" ## RAY CONFIGURATION diff --git a/examples/notebooks/rag/rag_1A_dpk_process_ray.ipynb b/examples/notebooks/rag/rag_1A_dpk_process_ray.ipynb index 8a8942b1f..e41bc0613 100644 --- a/examples/notebooks/rag/rag_1A_dpk_process_ray.ipynb +++ b/examples/notebooks/rag/rag_1A_dpk_process_ray.ipynb @@ -303,7 +303,8 @@ " \"data_files_to_use\": ast.literal_eval(\"['.pdf']\"),\n", " # orchestrator\n", " \"runtime_worker_options\": ParamsUtils.convert_to_ast(worker_options),\n", - " \"runtime_num_workers\": MY_CONFIG.RAY_RUNTIME_WORKERS,\n", + " # \"runtime_num_workers\": MY_CONFIG.RAY_RUNTIME_WORKERS,\n", + " \"runtime_num_workers\": 1 , # Setting it to 1 for this particular run\n", " \"runtime_pipeline_id\": \"pipeline_id\",\n", " \"runtime_job_id\": \"job_id\",\n", " \"runtime_code_location\": ParamsUtils.convert_to_ast(code_location),\n", @@ -2159,7 +2160,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "data-prep-kit-3-py312", "language": "python", "name": "python3" }, @@ -2173,7 +2174,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.9" + "version": "3.12.7" } }, "nbformat": 4, diff --git a/examples/notebooks/rag/rag_1D_query_llama_replicate.ipynb b/examples/notebooks/rag/rag_1D_query_replicate.ipynb similarity index 92% rename from examples/notebooks/rag/rag_1D_query_llama_replicate.ipynb rename to examples/notebooks/rag/rag_1D_query_replicate.ipynb index 532b7ef4d..33b732d24 100644 --- a/examples/notebooks/rag/rag_1D_query_llama_replicate.ipynb +++ b/examples/notebooks/rag/rag_1D_query_replicate.ipynb @@ -267,15 +267,25 @@ "cell_type": "code", "execution_count": 8, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Using model: ibm-granite/granite-3.0-8b-instruct\n" + ] + } + ], "source": [ "import os\n", - "os.environ[\"REPLICATE_API_TOKEN\"] = MY_CONFIG.REPLICATE_API_TOKEN" + "os.environ[\"REPLICATE_API_TOKEN\"] = MY_CONFIG.REPLICATE_API_TOKEN\n", + "\n", + "print ('Using model:', MY_CONFIG.LLM_MODEL)" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -335,7 +345,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -351,11 +361,11 @@ "Mayank Mishra ⋆ Matt Stallone ⋆ Gaoyuan Zhang ⋆ Yikang Shen Aditya Prasad Adriana Meza Soria Michele Merler Parameswaran Selvam Saptha Surendran Shivdeep Singh Manish Sethi Xuan-Hong Dang Pengyuan Li Kun-Lung Wu Syed Zawad Andrew Coleman Matthew White Mark Lewis Raju Pavuluri Yan Koyfman Boris Lublinsky Maximilien de Bayser Ibrahim Abdelaziz Kinjal Basu Mayank Agarwal Yi Zhou Chris Johnson Aanchal Goyal Hima Patel Yousaf Shah Petros Zerfos Heiko Ludwig Asim Munawar Maxwell Crouse Pavan Kapanipathi Shweta Salaria Bob Calio Sophia Wen Seetharami Seelam Brian Belgodere Carlos Fonseca Amith Singhee Nirmit Desai David D. Cox Ruchir Puri † Rameswar Panda †\n", "============ end context ============\n", "============ here is the answer from LLM... STREAMING... =====\n", - "Based on the provided context, the training data used to train Granite models is not explicitly mentioned. However, it is mentioned that the 20B model was used after 1.6T tokens to start training of 34B model with the same code pretraining data without any changes to the training and inference framework. This implies that the same code pretraining data was used for both models, but the exact nature of this data is not specified.\n", + "The context does not provide specific details about the training data used to train the Granite models. It only mentions that the 20B model was trained after 1.6T tokens and then used to start training the 34B model with the same code pretraining data. However, it does not specify what this code pretraining data is.\n", "====== end LLM answer ======\n", "\n", - "CPU times: user 75.3 ms, sys: 37.8 ms, total: 113 ms\n", - "Wall time: 1.95 s\n" + "CPU times: user 63.6 ms, sys: 12 ms, total: 75.6 ms\n", + "Wall time: 1.43 s\n" ] } ], @@ -369,7 +379,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -385,11 +395,11 @@ "We are excited about the future of attention-based models and plan to apply them to other tasks. We plan to extend the Transformer to problems involving input and output modalities other than text and to investigate local, restricted attention mechanisms to efficiently handle large inputs and outputs such as images, audio and video. Making generation less sequential is another research goals of ours.\n", "============ end context ============\n", "============ here is the answer from LLM... STREAMING... =====\n", - "Based on the provided context, an attention mechanism can be described as mapping a query and a set of key-value pairs to an output, where the query, keys, values, and output are all vectors. The output is computed as a weighted sum.\n", + "An attention mechanism is a method used in sequence modeling and transduction models to model dependencies between elements in input or output sequences, regardless of their distance. It maps a query and a set of key-value pairs to an output, which is computed as a weighted sum.\n", "====== end LLM answer ======\n", "\n", - "CPU times: user 41.1 ms, sys: 28.7 ms, total: 69.8 ms\n", - "Wall time: 1.58 s\n" + "CPU times: user 30.6 ms, sys: 17.3 ms, total: 47.9 ms\n", + "Wall time: 880 ms\n" ] } ], @@ -403,7 +413,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -419,11 +429,11 @@ "The Granite Code models achieve relatively high accuracy across all sizes (e.g., outperforming CodeGemma at 2B-3B scale, StarCoder2 at 7B-8B scale and CodeLlama models with half of the sizes). This shows that our Granite Code models are not only capable of generating good code but also of using libraries more accurately in real data science workflows.\n", "============ end context ============\n", "============ here is the answer from LLM... STREAMING... =====\n", - "I apologize, but the provided context does not mention the moon landing. The context appears to be about code generation and evaluation benchmarks, specifically discussing the MBPP and MBPP+ benchmarks, and the performance of different code models. There is no mention of the moon landing. If you provide a different context or question, I'll be happy to help.\n", + "I'm sorry, the provided context does not contain information about the moon landing.\n", "====== end LLM answer ======\n", "\n", - "CPU times: user 41.5 ms, sys: 21 ms, total: 62.5 ms\n", - "Wall time: 2.13 s\n" + "CPU times: user 45 ms, sys: 3.19 ms, total: 48.2 ms\n", + "Wall time: 412 ms\n" ] } ], @@ -445,7 +455,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "data-prep-kit-4-021", "language": "python", "name": "python3" }, From 0fc1dbc16b2776efdb47a9d6235dcdef321e1cbc Mon Sep 17 00:00:00 2001 From: Sujee Maniyam Date: Fri, 25 Oct 2024 13:24:13 -0700 Subject: [PATCH 2/4] Updated documentation for LLM choices at Replicate Signed-off-by: Sujee Maniyam --- .../rag/rag_1D_query_replicate.ipynb | 27 +++++++++++-------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/examples/notebooks/rag/rag_1D_query_replicate.ipynb b/examples/notebooks/rag/rag_1D_query_replicate.ipynb index 33b732d24..4dfcba626 100644 --- a/examples/notebooks/rag/rag_1D_query_replicate.ipynb +++ b/examples/notebooks/rag/rag_1D_query_replicate.ipynb @@ -249,18 +249,23 @@ "\n", "### LLM Choices at Replicate\n", "\n", - "- llama 3.1 : Latest\n", - " - **meta/meta-llama-3.1-405b-instruct** : Meta's flagship 405 billion parameter language model, fine-tuned for chat completions\n", - "- Base version of llama-3 from meta\n", - " - [meta/meta-llama-3-8b](https://replicate.com/meta/meta-llama-3-8b) : Base version of Llama 3, an 8 billion parameter language model from Meta.\n", - " - **meta/meta-llama-3-70b** : 70 billion\n", - "- Instruct versions of llama-3 from meta, fine tuned for chat completions\n", - " - **meta/meta-llama-3-8b-instruct** : An 8 billion parameter language model from Meta, \n", - " - **meta/meta-llama-3-70b-instruct** : 70 billion\n", + "\n", + "| Model | Publisher | Params | $ / 1M input | $ / 1M output | Description |\n", + "|-------------------------------------|-----------|--------|--------------|---------------|------------------------------------------------------|\n", + "| ibm-granite/granite-3.0-8b-instruct | IBM | 8 B | $ 0.05 | $ 0.25 | IBM's newest Granite Model v3.0 (default) |\n", + "| ibm-granite/granite-3.0-2b-instruct | IBM | 2 B | $ 0.03 | $ 0.25 | IBM's newest Granite Model v3.0 |\n", + "| meta/meta-llama-3.1-405b-instruct | Meta | 405 B | $ 9.5 | $ 9.5 | Meta's flagship 405 billion parameter language model |\n", + "| meta/meta-llama-3-8b-instruct | Meta | 8 B | $ 0.05 | $ 0.05 | |\n", + "| meta/meta-llama-3-70b-instruct | Meta | 70 B | $ 0.65 | $ 2.75 | |\n", + "\n", + "\n", + "(Prices are as of Oct 2024)\n", "\n", "References \n", "\n", - "- https://docs.llamaindex.ai/en/stable/examples/llm/llama_2/?h=replicate" + "- https://www.ibm.com/granite\n", + "- https://www.llama.com/\n", + "- https://replicate.com/ , https://replicate.com/pricing" ] }, { @@ -455,7 +460,7 @@ ], "metadata": { "kernelspec": { - "display_name": "data-prep-kit-4-021", + "display_name": "data-prep-kit-3-py312", "language": "python", "name": "python3" }, @@ -469,7 +474,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.9" + "version": "3.12.7" } }, "nbformat": 4, From 6f2f9af37239c5a4401cc4db5ccb5436bca0f199 Mon Sep 17 00:00:00 2001 From: Sujee Maniyam Date: Tue, 29 Oct 2024 21:40:45 -0700 Subject: [PATCH 3/4] Updating model table description Signed-off-by: Sujee Maniyam --- .../rag/rag_1D_query_replicate.ipynb | 23 ++++++++----------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/examples/notebooks/rag/rag_1D_query_replicate.ipynb b/examples/notebooks/rag/rag_1D_query_replicate.ipynb index 4dfcba626..04bd4a0e5 100644 --- a/examples/notebooks/rag/rag_1D_query_replicate.ipynb +++ b/examples/notebooks/rag/rag_1D_query_replicate.ipynb @@ -250,22 +250,19 @@ "### LLM Choices at Replicate\n", "\n", "\n", - "| Model | Publisher | Params | $ / 1M input | $ / 1M output | Description |\n", - "|-------------------------------------|-----------|--------|--------------|---------------|------------------------------------------------------|\n", - "| ibm-granite/granite-3.0-8b-instruct | IBM | 8 B | $ 0.05 | $ 0.25 | IBM's newest Granite Model v3.0 (default) |\n", - "| ibm-granite/granite-3.0-2b-instruct | IBM | 2 B | $ 0.03 | $ 0.25 | IBM's newest Granite Model v3.0 |\n", - "| meta/meta-llama-3.1-405b-instruct | Meta | 405 B | $ 9.5 | $ 9.5 | Meta's flagship 405 billion parameter language model |\n", - "| meta/meta-llama-3-8b-instruct | Meta | 8 B | $ 0.05 | $ 0.05 | |\n", - "| meta/meta-llama-3-70b-instruct | Meta | 70 B | $ 0.65 | $ 2.75 | |\n", - "\n", - "\n", - "(Prices are as of Oct 2024)\n", + "| Model | Publisher | Params | Description |\n", + "|-------------------------------------|-----------|--------|------------------------------------------------------|\n", + "| ibm-granite/granite-3.0-8b-instruct | IBM | 8 B | IBM's newest Granite Model v3.0 (default) |\n", + "| ibm-granite/granite-3.0-2b-instruct | IBM | 2 B | IBM's newest Granite Model v3.0 |\n", + "| meta/meta-llama-3.1-405b-instruct | Meta | 405 B | Meta's flagship 405 billion parameter language model |\n", + "| meta/meta-llama-3-8b-instruct | Meta | 8 B | |\n", + "| meta/meta-llama-3-70b-instruct | Meta | 70 B | |\n", "\n", "References \n", "\n", "- https://www.ibm.com/granite\n", "- https://www.llama.com/\n", - "- https://replicate.com/ , https://replicate.com/pricing" + "- https://replicate.com/ " ] }, { @@ -460,7 +457,7 @@ ], "metadata": { "kernelspec": { - "display_name": "data-prep-kit-3-py312", + "display_name": "data-prep-kit-4-021", "language": "python", "name": "python3" }, @@ -474,7 +471,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.7" + "version": "3.11.9" } }, "nbformat": 4, From 8155bb70100110899f3ac04f0eb365f087d9f2af Mon Sep 17 00:00:00 2001 From: Sujee Maniyam Date: Wed, 30 Oct 2024 22:20:42 -0700 Subject: [PATCH 4/4] fixed model descriptions, clarified a comment Signed-off-by: Sujee Maniyam --- examples/notebooks/rag/rag_1A_dpk_process_ray.ipynb | 5 ++--- examples/notebooks/rag/rag_1D_query_replicate.ipynb | 4 ++-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/examples/notebooks/rag/rag_1A_dpk_process_ray.ipynb b/examples/notebooks/rag/rag_1A_dpk_process_ray.ipynb index e41bc0613..8bdea1ff6 100644 --- a/examples/notebooks/rag/rag_1A_dpk_process_ray.ipynb +++ b/examples/notebooks/rag/rag_1A_dpk_process_ray.ipynb @@ -222,7 +222,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "b0cd8ebd-bf71-42d6-a397-8df0c7b66a26", "metadata": {}, "outputs": [ @@ -303,8 +303,7 @@ " \"data_files_to_use\": ast.literal_eval(\"['.pdf']\"),\n", " # orchestrator\n", " \"runtime_worker_options\": ParamsUtils.convert_to_ast(worker_options),\n", - " # \"runtime_num_workers\": MY_CONFIG.RAY_RUNTIME_WORKERS,\n", - " \"runtime_num_workers\": 1 , # Setting it to 1 for this particular run\n", + " \"runtime_num_workers\": 1, # so model download to cleanup works properly\n", " \"runtime_pipeline_id\": \"pipeline_id\",\n", " \"runtime_job_id\": \"job_id\",\n", " \"runtime_code_location\": ParamsUtils.convert_to_ast(code_location),\n", diff --git a/examples/notebooks/rag/rag_1D_query_replicate.ipynb b/examples/notebooks/rag/rag_1D_query_replicate.ipynb index 04bd4a0e5..5e94ac0e8 100644 --- a/examples/notebooks/rag/rag_1D_query_replicate.ipynb +++ b/examples/notebooks/rag/rag_1D_query_replicate.ipynb @@ -255,8 +255,8 @@ "| ibm-granite/granite-3.0-8b-instruct | IBM | 8 B | IBM's newest Granite Model v3.0 (default) |\n", "| ibm-granite/granite-3.0-2b-instruct | IBM | 2 B | IBM's newest Granite Model v3.0 |\n", "| meta/meta-llama-3.1-405b-instruct | Meta | 405 B | Meta's flagship 405 billion parameter language model |\n", - "| meta/meta-llama-3-8b-instruct | Meta | 8 B | |\n", - "| meta/meta-llama-3-70b-instruct | Meta | 70 B | |\n", + "| meta/meta-llama-3-8b-instruct | Meta | 8 B | Meta's 8 billion parameter language model |\n", + "| meta/meta-llama-3-70b-instruct | Meta | 70 B | Meta's 70 billion parameter language model |\n", "\n", "References \n", "\n",