From ed5734ae25edadb631e9de58d1f10f9c50e18c00 Mon Sep 17 00:00:00 2001 From: Dave Date: Mon, 18 Mar 2024 14:19:43 -0400 Subject: [PATCH] test/fix: OSX Test Repair (#1843) * test with gguf instead of ggml. Updates testPrompt to match? Adds debugging line to Dockerfile that I've found helpful recently. * fix testPrompt slightly * Sad Experiment: Test GH runner without metal? * break apart CGO_LDFLAGS * switch runner * upstream llama.cpp disables Metal on Github CI! * missed a dir from clean-tests * CGO_LDFLAGS * tmate failure + NO_ACCELERATE * whisper.cpp has a metal fix * do the exact opposite of the name of this branch, but keep it around for unrelated fixes? * add back newlines * add tmate to linux for testing * update fixtures * timeout for tmate --- .github/workflows/test.yml | 12 ++++++++++-- Dockerfile | 1 + Makefile | 19 +++++++++++++++---- backend/cpp/llama/Makefile | 5 +++++ core/http/api_test.go | 8 ++++---- tests/models_fixtures/config.yaml | 4 ++-- tests/models_fixtures/gpt4.yaml | 2 +- tests/models_fixtures/gpt4_2.yaml | 2 +- 8 files changed, 39 insertions(+), 14 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 2a2cc6c89ff0..8222508ab6bd 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -105,9 +105,13 @@ jobs: - name: Test run: | GO_TAGS="stablediffusion tts" make test + - name: Setup tmate session if tests fail + if: ${{ failure() }} + uses: mxschmitt/action-tmate@v3 + timeout-minutes: 5 tests-apple: - runs-on: macOS-latest + runs-on: macOS-14 strategy: matrix: go-version: ['1.21.x'] @@ -130,4 +134,8 @@ jobs: run: | export C_INCLUDE_PATH=/usr/local/include export CPLUS_INCLUDE_PATH=/usr/local/include - CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" make test \ No newline at end of file + BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" make test + - name: Setup tmate session if tests fail + if: ${{ failure() }} + uses: mxschmitt/action-tmate@v3 + timeout-minutes: 5 \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index ebda80ba6b66..b083690efdce 100644 --- a/Dockerfile +++ b/Dockerfile @@ -108,6 +108,7 @@ WORKDIR /build COPY . . COPY .git . +RUN echo "GO_TAGS: $GO_TAGS" RUN make prepare # If we are building with clblas support, we need the libraries for the builds diff --git a/Makefile b/Makefile index 8bbc0625b23f..ff7ec7971f77 100644 --- a/Makefile +++ b/Makefile @@ -70,7 +70,7 @@ UNAME_S := $(shell uname -s) endif ifeq ($(OS),Darwin) - CGO_LDFLAGS += -lcblas -framework Accelerate + ifeq ($(OSX_SIGNING_IDENTITY),) OSX_SIGNING_IDENTITY := $(shell security find-identity -v -p codesigning | grep '"' | head -n 1 | sed -E 's/.*"(.*)"/\1/') endif @@ -81,6 +81,12 @@ ifeq ($(OS),Darwin) # disable metal if on Darwin and any other value is explicitly passed. else ifneq ($(BUILD_TYPE),metal) CMAKE_ARGS+=-DLLAMA_METAL=OFF + export LLAMA_NO_ACCELERATE=1 + endif + + ifeq ($(BUILD_TYPE),metal) +# -lcblas removed: it seems to always be listed as a duplicate flag. + CGO_LDFLAGS += -framework Accelerate endif endif @@ -286,6 +292,11 @@ clean: ## Remove build related file $(MAKE) -C backend/cpp/llama clean $(MAKE) dropreplace +clean-tests: + rm -rf test-models + rm -rf test-dir + rm -rf core/http/backend-assets + ## Build: build: prepare backend-assets grpcs ## Build the project $(info ${GREEN}I local-ai build info:${RESET}) @@ -305,10 +316,10 @@ osx-signed: build run: prepare ## run local-ai CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) run ./ -test-models/testmodel: +test-models/testmodel.ggml: mkdir test-models mkdir test-dir - wget -q https://huggingface.co/TheBloke/orca_mini_3B-GGML/resolve/main/orca-mini-3b.ggmlv3.q4_0.bin -O test-models/testmodel + wget -q https://huggingface.co/TheBloke/orca_mini_3B-GGML/resolve/main/orca-mini-3b.ggmlv3.q4_0.bin -O test-models/testmodel.ggml wget -q https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin -O test-models/whisper-en wget -q https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin -O test-models/bert wget -q https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav @@ -320,7 +331,7 @@ prepare-test: grpcs cp -rf backend-assets core/http cp tests/models_fixtures/* test-models -test: prepare test-models/testmodel grpcs +test: prepare test-models/testmodel.ggml grpcs @echo 'Running tests' export GO_TAGS="tts stablediffusion" $(MAKE) prepare-test diff --git a/backend/cpp/llama/Makefile b/backend/cpp/llama/Makefile index 8502ae2f4f20..3d31284a1bca 100644 --- a/backend/cpp/llama/Makefile +++ b/backend/cpp/llama/Makefile @@ -19,6 +19,11 @@ else ifeq ($(BUILD_TYPE),clblas) else ifeq ($(BUILD_TYPE),hipblas) CMAKE_ARGS+=-DLLAMA_HIPBLAS=ON # If it's OSX, DO NOT embed the metal library - -DLLAMA_METAL_EMBED_LIBRARY=ON requires further investigation +# But if it's OSX without metal, disable it here +else ifeq ($(OS),darwin) + ifneq ($(BUILD_TYPE),metal) + CMAKE_ARGS+=-DLLAMA_METAL=OFF + endif endif ifeq ($(BUILD_TYPE),sycl_f16) diff --git a/core/http/api_test.go b/core/http/api_test.go index b0579a19d6a3..ca69e8bf7bbd 100644 --- a/core/http/api_test.go +++ b/core/http/api_test.go @@ -666,15 +666,15 @@ var _ = Describe("API test", func() { Expect(err).ToNot(HaveOccurred()) Expect(len(models.Models)).To(Equal(6)) // If "config.yaml" should be included, this should be 8? }) - It("can generate completions", func() { - resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "testmodel", Prompt: testPrompt}) + It("can generate completions via ggml", func() { + resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "testmodel.ggml", Prompt: testPrompt}) Expect(err).ToNot(HaveOccurred()) Expect(len(resp.Choices)).To(Equal(1)) Expect(resp.Choices[0].Text).ToNot(BeEmpty()) }) - It("can generate chat completions ", func() { - resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "testmodel", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: testPrompt}}}) + It("can generate chat completions via ggml", func() { + resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "testmodel.ggml", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: testPrompt}}}) Expect(err).ToNot(HaveOccurred()) Expect(len(resp.Choices)).To(Equal(1)) Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty()) diff --git a/tests/models_fixtures/config.yaml b/tests/models_fixtures/config.yaml index 749d1699415f..f61c2a7c0cfe 100644 --- a/tests/models_fixtures/config.yaml +++ b/tests/models_fixtures/config.yaml @@ -1,6 +1,6 @@ - name: list1 parameters: - model: testmodel + model: testmodel.ggml top_p: 80 top_k: 0.9 temperature: 0.1 @@ -19,7 +19,7 @@ top_p: 80 top_k: 0.9 temperature: 0.1 - model: testmodel + model: testmodel.ggml context_size: 200 stopwords: - "HUMAN:" diff --git a/tests/models_fixtures/gpt4.yaml b/tests/models_fixtures/gpt4.yaml index 652a407ca343..43e77586d590 100644 --- a/tests/models_fixtures/gpt4.yaml +++ b/tests/models_fixtures/gpt4.yaml @@ -1,6 +1,6 @@ name: gpt4all parameters: - model: testmodel + model: testmodel.ggml top_p: 80 top_k: 0.9 temperature: 0.1 diff --git a/tests/models_fixtures/gpt4_2.yaml b/tests/models_fixtures/gpt4_2.yaml index 904693ca5ed5..8a2111530165 100644 --- a/tests/models_fixtures/gpt4_2.yaml +++ b/tests/models_fixtures/gpt4_2.yaml @@ -1,6 +1,6 @@ name: gpt4all-2 parameters: - model: testmodel + model: testmodel.ggml top_p: 80 top_k: 0.9 temperature: 0.1