Merge branch 'master' into falcon40b

jploski · Jun 25, 2023 · 78ccbba · 78ccbba
2 parents 8b22ea8 + 4a7db90
commit 78ccbba
Show file tree

Hide file tree

Showing 50 changed files with 10,842 additions and 1,991 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -0,0 +1,46 @@
+name: CI
+
+on:
+  push:
+    branches: [ master ]
+  pull_request:
+    branches: [ master ]
+
+jobs:
+  build:
+
+    strategy:
+      matrix:
+        os: [ubuntu-latest, macos-latest]
+
+    runs-on: ${{ matrix.os }}
+
+    env:
+      GGML_NLOOP: 3
+      GGML_NITER: 1
+
+    steps:
+    - uses: actions/checkout@v2
+
+    - name: Set GGML_N_THREADS for Ubuntu
+      run: echo "GGML_N_THREADS=2" >> $GITHUB_ENV
+      if: matrix.os == 'ubuntu-latest'
+
+    - name: Set GGML_N_THREADS for MacOS
+      run: echo "GGML_N_THREADS=2" >> $GITHUB_ENV
+      if: matrix.os == 'macos-latest'
+
+    - name: Create Build Environment
+      run: mkdir build
+
+    - name: Configure CMake
+      working-directory: ./build
+      run: cmake ..
+
+    - name: Build
+      working-directory: ./build
+      run: make
+
+    - name: Test
+      working-directory: ./build
+      run: ctest --verbose --timeout 900
diff --git a/.gitignore b/.gitignore
@@ -1,8 +1,12 @@
 build/
 build-debug/
 build-*/
+out/
 
 compile_commands.json
+CMakeSettings.json
+.vs/
+.vscode/
 
 .exrc
 .cache
@@ -12,3 +16,6 @@ compile_commands.json
 
 src/arm_neon.h
 tests/arm_neon.h
+
+zig-out/
+zig-cache/
diff --git a/README.md b/README.md
@@ -1,5 +1,7 @@
 # ggml
 
+[Roadmap](https://github.com/users/ggerganov/projects/7) / [Manifesto](https://github.com/ggerganov/llama.cpp/discussions/205)
+
 Tensor library for machine learning
 
 ***Note that this project is under active development. \
@@ -17,7 +19,7 @@ Some of the development is currently happening in the [llama.cpp](https://github
 - No third-party dependencies
 - Zero memory allocations during runtime
 
-## Roadmap
+## Updates
 
 - [X] Example of GPT-2 inference [examples/gpt-2](https://github.com/ggerganov/ggml/tree/master/examples/gpt-2)
 - [X] Example of GPT-J inference [examples/gpt-j](https://github.com/ggerganov/ggml/tree/master/examples/gpt-j)
@@ -36,6 +38,9 @@ Some of the development is currently happening in the [llama.cpp](https://github
 - [X] Example of 💫 StarCoder inference [examples/starcoder](https://github.com/ggerganov/ggml/tree/master/examples/starcoder)
 - [X] Example of MPT inference [examples/mpt](https://github.com/ggerganov/ggml/tree/master/examples/mpt)
 - [X] Example of Replit inference [examples/replit](https://github.com/ggerganov/ggml/tree/master/examples/replit)
+- [X] Example of BioGPT inference [PABannier/biogpt.cpp](https://github.com/PABannier/biogpt.cpp)
+- [X] Example of Encodec inference [PABannier/encodec.cpp](https://github.com/PABannier/encodec.cpp) 
+- [X] Example of CLIP inference [monatis/clip.cpp](https://github.com/monatis/clip.cpp)
 
 ## Whisper inference (example)
 
@@ -73,6 +78,9 @@ make -j4 gpt-2 gpt-j
 ../examples/gpt-j/download-ggml-model.sh 6B
 ./bin/gpt-j -m models/gpt-j-6B/ggml-model.bin -p "This is an example"
 
+# Install Python dependencies
+python3 -m pip install -r ../requirements.txt
+
 # Run the Cerebras-GPT 111M model
 # Download from: https://huggingface.co/cerebras
 python3 ../examples/gpt-2/convert-cerebras-to-ggml.py /path/to/Cerebras-GPT-111M/

diff --git a/build.zig b/build.zig
@@ -0,0 +1,113 @@
+const std = @import("std");
+
+// Zig Version: 0.11.0-dev.3798+a5e15eced
+// Zig Build Command: zig build
+// Zig Run Command:    
+//     zig build run_dolly-v2             
+//     zig build run_gpt-2                  
+//     zig build run_gpt-j                   
+//     zig build run_gpt-neox                 
+//     zig build run_mnist                    
+//     zig build run_mpt                     
+//     zig build run_replit                  
+//     zig build run_starcoder                
+//     zig build run_test-grad0               
+//     zig build run_test-mul-mat0            
+//     zig build run_test-mul-mat2            
+//     zig build run_test-opt                 
+//     zig build run_test-vec1                
+//     zig build run_test0                   
+//     zig build run_test1                    
+//     zig build run_test2                    
+//     zig build run_test3                    
+pub fn build(b: *std.build.Builder) void {
+    const target = b.standardTargetOptions(.{});
+    const optimize = b.standardOptimizeOption(.{});
+    const lib = b.addStaticLibrary(.{
+        .name = "ggml",
+        .target = target,
+        .optimize = optimize,
+    });
+    lib.addIncludePath("./include");
+    lib.addIncludePath("./include/ggml");
+    lib.addCSourceFiles(&.{
+        "src/ggml.c",
+    }, &.{"-std=c11"});
+    lib.linkLibC();
+    lib.linkLibCpp();
+    b.installArtifact(lib);
+
+    // examples
+    const examples = .{
+        "dolly-v2",
+        "gpt-2",
+        "gpt-j",
+        "gpt-neox",
+        "mnist",
+        "mpt",
+        "replit",
+        "starcoder",
+        // "whisper",
+    };
+    inline for (examples) |name| {
+        const exe = b.addExecutable(.{
+            .name = name,
+            .target = target,
+            .optimize = optimize,
+        });
+        exe.addIncludePath("./include");
+        exe.addIncludePath("./include/ggml");
+        exe.addIncludePath("./examples");
+        // exe.addIncludePath("./examples/whisper");
+        exe.addCSourceFiles(&.{
+            std.fmt.comptimePrint("examples/{s}/main.cpp", .{name}),
+            "examples/common.cpp",
+            "examples/common-ggml.cpp",
+            // "examples/whisper/whisper.cpp",
+        }, &.{"-std=c++11"});
+        exe.linkLibrary(lib);
+        b.installArtifact(exe);
+        const run_cmd = b.addRunArtifact(exe);
+        run_cmd.step.dependOn(b.getInstallStep());
+        if (b.args) |args| run_cmd.addArgs(args);
+        const run_step = b.step("run_" ++ name, "Run examples");
+        run_step.dependOn(&run_cmd.step);
+    }
+
+    // tests
+    const tests = .{
+        // "test-blas0",
+        "test-grad0",
+        "test-mul-mat0",
+        // "test-mul-mat1",
+        "test-mul-mat2",
+        "test-opt",
+        // "test-svd0",
+        // "test-vec0",
+        "test-vec1",
+        // "test-vec2",
+        "test0",
+        "test1",
+        "test2",
+        "test3",
+    };
+    inline for (tests) |name| {
+        const exe = b.addExecutable(.{
+            .name = name,
+            .target = target,
+            .optimize = optimize,
+        });
+        exe.addIncludePath("./include");
+        exe.addIncludePath("./include/ggml");
+        exe.addCSourceFiles(&.{
+            std.fmt.comptimePrint("tests/{s}.c", .{name}),
+        }, &.{"-std=c11"});
+        exe.linkLibrary(lib);
+        b.installArtifact(exe);
+        const run_cmd = b.addRunArtifact(exe);
+        run_cmd.step.dependOn(b.getInstallStep());
+        if (b.args) |args| run_cmd.addArgs(args);
+        const run_step = b.step("run_" ++ name, "Run tests");
+        run_step.dependOn(&run_cmd.step);
+    }
+}
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
@@ -2,6 +2,7 @@ if (GGML_ALL_WARNINGS)
   if (NOT MSVC)
       set(cxx_flags
           # TODO(marella): Add other warnings.
+          -Wpedantic
           -Wunused-variable
           -Wno-unused-function
           -Wno-multichar

diff --git a/examples/common-ggml.cpp b/examples/common-ggml.cpp
@@ -52,6 +52,11 @@ bool ggml_common_quantize_0(
         case GGML_FTYPE_ALL_F32:
         case GGML_FTYPE_MOSTLY_F16:
         case GGML_FTYPE_MOSTLY_Q4_1_SOME_F16:
+        case GGML_FTYPE_MOSTLY_Q2_K:
+        case GGML_FTYPE_MOSTLY_Q3_K:
+        case GGML_FTYPE_MOSTLY_Q4_K:
+        case GGML_FTYPE_MOSTLY_Q5_K:
+        case GGML_FTYPE_MOSTLY_Q6_K:
                 {
                     fprintf(stderr, "%s: invalid model type %d\n", __func__, ftype);
                     return false;
@@ -187,6 +192,12 @@ bool ggml_common_quantize_0(
                 case GGML_TYPE_I16:
                 case GGML_TYPE_I32:
                 case GGML_TYPE_Q8_1:
+                case GGML_TYPE_Q2_K:
+                case GGML_TYPE_Q3_K:
+                case GGML_TYPE_Q4_K:
+                case GGML_TYPE_Q5_K:
+                case GGML_TYPE_Q6_K:
+                case GGML_TYPE_Q8_K:
                 case GGML_TYPE_COUNT:
                     {
                         fprintf(stderr, "%s: unsupported quantization type %d (%s)\n", __func__, ttype, ggml_type_name((ggml_type) ttype));

diff --git a/examples/common.cpp b/examples/common.cpp
@@ -17,6 +17,10 @@
 #define M_PI 3.14159265358979323846
 #endif
 
+#if defined(_MSC_VER)
+#pragma warning(disable: 4244 4267) // possible loss of data
+#endif
+
 bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
     for (int i = 1; i < argc; i++) {
         std::string arg = argv[i];
@@ -232,43 +236,59 @@ std::wstring convert_to_wstring(const std::string & input) {
     return converter.from_bytes(input);
 }
 
+void gpt_split_words(std::string str, std::vector<std::string>& words) {
+    const std::string pattern = R"('s|'t|'re|'ve|'m|'ll|'d| ?[[:alpha:]]+| ?[[:digit:]]+| ?[^\s[:alpha:][:digit:]]+|\s+(?!\S)|\s+)";
+    const std::regex re(pattern);
+    std::smatch m;
+
+    while (std::regex_search(str, m, re)) {
+        for (auto x : m) {
+            words.push_back(x);
+        }
+        str = m.suffix();
+    }
+}
+
 std::vector<gpt_vocab::id> gpt_tokenize(const gpt_vocab & vocab, const std::string & text) {
     std::vector<std::string> words;
 
     // first split the text into words
     {
         std::string str = text;
-        std::string pat = R"('s|'t|'re|'ve|'m|'ll|'d| ?[[:alpha:]]+| ?[[:digit:]]+| ?[^\s[:alpha:][:digit:]]+|\s+(?!\S)|\s+)";
 
         // Generate the subpattern from the special_tokens vector if it's not empty
         if (!vocab.special_tokens.empty()) {
+            const std::regex escape(R"([\[\\\^\$\.\|\?\*\+\(\)\{\}])");
             std::string special_tokens_subpattern;
             for (const auto & token : vocab.special_tokens) {
                 if (!special_tokens_subpattern.empty()) {
                     special_tokens_subpattern += "|";
                 }
-                special_tokens_subpattern += token;
+                special_tokens_subpattern += std::regex_replace(token, escape, R"(\$&)");
             }
 
-            // Modify the regex pattern with the generated special tokens subpattern
-            pat = special_tokens_subpattern + "|" + pat;
-        }
-
-        std::regex re(pat);
-        std::smatch m;
-
-        while (std::regex_search(str, m, re)) {
-            for (auto x : m) {
-                words.push_back(x);
+            std::regex re(special_tokens_subpattern);
+            std::smatch m;
+            // Split the text by special tokens.
+            while (std::regex_search(str, m, re)) {
+                // Split the substrings in-between special tokens into words.
+                gpt_split_words(m.prefix(), words);
+                // Add matched special tokens as words.
+                for (auto x : m) {
+                    words.push_back(x);
+                }
+                str = m.suffix();
             }
-            str = m.suffix();
+            // Remaining text without special tokens will be handled below.
         }
+
+        gpt_split_words(str, words);
     }
 
     // find the longest token that forms each word in words:
     std::vector<gpt_vocab::id> tokens;
     for (const auto & word : words) {
-        for (int i = 0; i < word.size(); ){
+        for (int i = 0; i < (int) word.size(); ){
             for (int j = word.size() - 1; j >= i; j--){
                 auto cand = word.substr(i, j-i+1);
                 auto it = vocab.token_to_id.find(cand);
@@ -285,7 +305,6 @@ std::vector<gpt_vocab::id> gpt_tokenize(const gpt_vocab & vocab, const std::stri
         }
     }
 
-
     return tokens;
 }
 
@@ -350,7 +369,7 @@ void test_gpt_tokenizer(gpt_vocab & vocab, const std::string & fpath_test){
         }
     }
 
-    fprintf(stderr, "%s : %lu tests failed out of %lu tests.\n", __func__, n_fails, tests.size());
+    fprintf(stderr, "%s : %zu tests failed out of %zu tests.\n", __func__, n_fails, tests.size());
 }
 
 bool gpt_vocab_init(const std::string & fname, gpt_vocab & vocab) {

diff --git a/examples/common.h b/examples/common.h
@@ -66,6 +66,8 @@ std::string convert_to_utf8(const std::wstring & input);
 
 std::wstring convert_to_wstring(const std::string & input);
 
+void gpt_split_words(std::string str, std::vector<std::string>& words);
+
 // split text into tokens
 //
 // ref: https://github.com/openai/gpt-2/blob/a74da5d99abaaba920de8131d64da2862a8f213b/src/encoder.py#L53
@@ -80,7 +82,7 @@ std::vector<gpt_vocab::id> gpt_tokenize(const gpt_vocab & vocab, const std::stri
 
 // test outputs of gpt_tokenize
 //
-//   - compare with tokens generated by the huggingface tokenizer 
+//   - compare with tokens generated by the huggingface tokenizer
 //   - test cases are chosen based on the model's main language (under 'prompt' directory)
 //   - if all sentences are tokenized identically, print 'All tests passed.'
 //   - otherwise, print sentence, huggingface tokens, ggml tokens

diff --git a/examples/dolly-v2/README.md b/examples/dolly-v2/README.md
@@ -21,6 +21,9 @@ make -j
 # get the Dolly-V2 3B model
 git clone https://huggingface.co/databricks/dolly-v2-3b
 
+# install Python dependencies
+python3 -m pip install -r ../requirements.txt
+
 # convert model to FP16
 python3 ../examples/dolly-v2/convert-h5-to-ggml.py ./dolly-v2-3b/ 1