forked from containerd/runwasi
-
Notifications
You must be signed in to change notification settings - Fork 3
75 lines (63 loc) · 2.83 KB
/
llama2.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
name: llama2
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.ref }}
cancel-in-progress: true
on:
workflow_dispatch:
inputs:
logLevel:
description: 'Log level'
required: true
default: 'info'
push:
branches:
- "main"
schedule:
- cron: "0 0 */1 * *"
jobs:
preview-feature:
runs-on: ubuntu-latest
steps:
- name: Manually update GitHub's containerd
run: |
wget https://github.com/containerd/containerd/releases/download/v1.7.5/containerd-1.7.5-linux-amd64.tar.gz
sudo tar Czxvf /usr containerd-1.7.5-linux-amd64.tar.gz
sudo systemctl restart containerd
- name: Set up Docker
uses: crazy-max/ghaction-setup-docker@v2
with:
daemon-config: |
{
"debug": true,
"features": {
"containerd-snapshotter": true
}
}
- name: Fetch Llama-2-7B-GGUF model
run: curl -LO https://huggingface.co/TheBloke/Llama-2-7B-GGUF/resolve/main/llama-2-7b.Q5_K_M.gguf
- name: Fetch WASI-NN GGML with LLAMA2 example image
run: sudo ctr image pull ghcr.io/second-state/runwasi-demo:llama-simple
- name: Install WASI-NN GGML plugin (preview)
run: |
sudo ctr content fetch ghcr.io/second-state/runwasi-wasmedge-plugin:allinone.wasi_nn-ggml
sudo ctr install ghcr.io/second-state/runwasi-wasmedge-plugin:allinone.wasi_nn-ggml -l -r
- name: Run WASI-NN GGML with LLAMA2 example (preview) through containerd
run: |
sudo ctr run --rm --runtime=io.containerd.wasmedge.v1 \
--mount type=bind,src=/opt/containerd/lib,dst=/opt/containerd/lib,options=bind:ro \
--mount type=bind,src=$PWD,dst=/resource,options=bind:ro \
--env WASMEDGE_PLUGIN_PATH=/opt/containerd/lib \
--env WASMEDGE_WASINN_PRELOAD=default:GGML:CPU:/resource/llama-2-7b.Q5_K_M.gguf \
ghcr.io/second-state/runwasi-demo:llama-simple testggml /app.wasm \
--model-alias default --ctx-size 4096 --n-predict 128 --log-enable --stream-stdout \
--prompt 'Robert Oppenheimer most important achievement is '
- name: Run WASI-NN GGML with LLAMA2 example (preview) through docker
run: |
docker run --rm --runtime=io.containerd.wasmedge.v1 --platform wasi/wasm \
-v /opt/containerd/lib:/opt/containerd/lib \
-v $PWD:/resource \
--env WASMEDGE_PLUGIN_PATH=/opt/containerd/lib \
--env WASMEDGE_WASINN_PRELOAD=default:GGML:CPU:/resource/llama-2-7b.Q5_K_M.gguf \
ghcr.io/second-state/runwasi-demo:llama-simple \
--model-alias default --ctx-size 4096 --n-predict 128 --log-enable --stream-stdout \
--prompt 'Robert Oppenheimer most important achievement is '