From 330079764bb60cc813086fd46ec595658d8c622d Mon Sep 17 00:00:00 2001
From: hydai <z54981220@gmail.com>
Date: Fri, 19 Apr 2024 11:12:06 +0800
Subject: [PATCH] [Example] ggml: Update llama README for llama3 support (#132)

Signed-off-by: hydai <z54981220@gmail.com>
---
 wasmedge-ggml/llama-stream/README.md | 42 +++++++++++++++++++++++++++-
 wasmedge-ggml/llama/README.md        | 42 +++++++++++++++++++++++++++-
 2 files changed, 82 insertions(+), 2 deletions(-)

diff --git a/wasmedge-ggml/llama-stream/README.md b/wasmedge-ggml/llama-stream/README.md
index fbae012..7bca7c7 100644
--- a/wasmedge-ggml/llama-stream/README.md
+++ b/wasmedge-ggml/llama-stream/README.md
@@ -1,6 +1,46 @@
 # `llama-stream`
 
-## Execute
+## Execute - llama 3
+
+### Model Download Link
+
+```console
+wget https://huggingface.co/QuantFactory/Meta-Llama-3-8B-Instruct-GGUF/blob/main/Meta-Llama-3-8B-Instruct.Q5_K_M.gguf
+```
+
+### Execution Command
+
+Please make sure you have the `Meta-Llama-3-8B-Instruct.Q5_K_M.gguf` file in the current directory.
+Don't forget to set the `llama3` environment variable to `true` to enable the llama3 prompt template.
+If you want to enable GPU support, please set the `n_gpu_layers` environment variable.
+You can also change the `ctx_size` to have a larger context window via `--env ctx_size=8192`. The default value is 1024.
+
+```console
+$ wasmedge --dir .:. \
+  --env llama3=true \
+  --env n_gpu_layers=100 \
+  --nn-preload default:GGML:AUTO:Meta-Llama-3-8B-Instruct.Q5_K_M.gguf \
+  wasmedge-ggml-llama-stream.wasm default
+
+USER:
+What's WasmEdge?
+ASSISTANT:
+WasmEdge is an open-source WebAssembly runtime and compiler that can run WebAssembly code in various environments, including web browsers, mobile devices, and server-side applications.
+USER:
+Does it support in Docker?
+ASSISTANT:
+Yes, WasmEdge supports running in Docker containers.
+USER:
+Does it support in Podman?
+ASSISTANT:
+Yes, WasmEdge also supports running in Podman containers.
+USER:
+Does it work with crun?
+ASSISTANT:
+ Yes, WasmEdge supports running in crun containers.
+```
+
+## Execute - llama 2
 
 ```console
 $ wasmedge --dir .:. \
diff --git a/wasmedge-ggml/llama/README.md b/wasmedge-ggml/llama/README.md
index f0e0c2c..670397a 100644
--- a/wasmedge-ggml/llama/README.md
+++ b/wasmedge-ggml/llama/README.md
@@ -1,6 +1,46 @@
 # `llama`
 
-## Execute
+## Execute - llama 3
+
+### Model Download Link
+
+```console
+wget https://huggingface.co/QuantFactory/Meta-Llama-3-8B-Instruct-GGUF/blob/main/Meta-Llama-3-8B-Instruct.Q5_K_M.gguf
+```
+
+### Execution Command
+
+Please make sure you have the `Meta-Llama-3-8B-Instruct.Q5_K_M.gguf` file in the current directory.
+Don't forget to set the `llama3` environment variable to `true` to enable the llama3 prompt template.
+If you want to enable GPU support, please set the `n_gpu_layers` environment variable.
+You can also change the `ctx_size` to have a larger context window via `--env ctx_size=8192`. The default value is 1024.
+
+```console
+$ wasmedge --dir .:. \
+  --env llama3=true \
+  --env n_gpu_layers=100 \
+  --nn-preload default:GGML:AUTO:Meta-Llama-3-8B-Instruct.Q5_K_M.gguf \
+  wasmedge-ggml-llama.wasm default
+
+USER:
+What's WasmEdge?
+ASSISTANT:
+WasmEdge is an open-source WebAssembly runtime and compiler that can run WebAssembly code in various environments, including web browsers, mobile devices, and server-side applications.
+USER:
+Does it support in Docker?
+ASSISTANT:
+Yes, WasmEdge supports running in Docker containers.
+USER:
+Does it support in Podman?
+ASSISTANT:
+Yes, WasmEdge also supports running in Podman containers.
+USER:
+Does it work with crun?
+ASSISTANT:
+ Yes, WasmEdge supports running in crun containers.
+```
+
+## Execute - llama 2
 
 ```console
 $ wasmedge --dir .:. \