Skip to content

Commit

Permalink
Merge branch 'master' into falcon40b
Browse files Browse the repository at this point in the history
  • Loading branch information
jploski committed Jun 25, 2023
2 parents 8b22ea8 + 4a7db90 commit 78ccbba
Show file tree
Hide file tree
Showing 50 changed files with 10,842 additions and 1,991 deletions.
46 changes: 46 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
name: CI

on:
push:
branches: [ master ]
pull_request:
branches: [ master ]

jobs:
build:

strategy:
matrix:
os: [ubuntu-latest, macos-latest]

runs-on: ${{ matrix.os }}

env:
GGML_NLOOP: 3
GGML_NITER: 1

steps:
- uses: actions/checkout@v2

- name: Set GGML_N_THREADS for Ubuntu
run: echo "GGML_N_THREADS=2" >> $GITHUB_ENV
if: matrix.os == 'ubuntu-latest'

- name: Set GGML_N_THREADS for MacOS
run: echo "GGML_N_THREADS=2" >> $GITHUB_ENV
if: matrix.os == 'macos-latest'

- name: Create Build Environment
run: mkdir build

- name: Configure CMake
working-directory: ./build
run: cmake ..

- name: Build
working-directory: ./build
run: make

- name: Test
working-directory: ./build
run: ctest --verbose --timeout 900
7 changes: 7 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
build/
build-debug/
build-*/
out/

compile_commands.json
CMakeSettings.json
.vs/
.vscode/

.exrc
.cache
Expand All @@ -12,3 +16,6 @@ compile_commands.json

src/arm_neon.h
tests/arm_neon.h

zig-out/
zig-cache/
10 changes: 9 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# ggml

[Roadmap](https://github.com/users/ggerganov/projects/7) / [Manifesto](https://github.com/ggerganov/llama.cpp/discussions/205)

Tensor library for machine learning

***Note that this project is under active development. \
Expand All @@ -17,7 +19,7 @@ Some of the development is currently happening in the [llama.cpp](https://github
- No third-party dependencies
- Zero memory allocations during runtime

## Roadmap
## Updates

- [X] Example of GPT-2 inference [examples/gpt-2](https://github.com/ggerganov/ggml/tree/master/examples/gpt-2)
- [X] Example of GPT-J inference [examples/gpt-j](https://github.com/ggerganov/ggml/tree/master/examples/gpt-j)
Expand All @@ -36,6 +38,9 @@ Some of the development is currently happening in the [llama.cpp](https://github
- [X] Example of 💫 StarCoder inference [examples/starcoder](https://github.com/ggerganov/ggml/tree/master/examples/starcoder)
- [X] Example of MPT inference [examples/mpt](https://github.com/ggerganov/ggml/tree/master/examples/mpt)
- [X] Example of Replit inference [examples/replit](https://github.com/ggerganov/ggml/tree/master/examples/replit)
- [X] Example of BioGPT inference [PABannier/biogpt.cpp](https://github.com/PABannier/biogpt.cpp)
- [X] Example of Encodec inference [PABannier/encodec.cpp](https://github.com/PABannier/encodec.cpp)
- [X] Example of CLIP inference [monatis/clip.cpp](https://github.com/monatis/clip.cpp)

## Whisper inference (example)

Expand Down Expand Up @@ -73,6 +78,9 @@ make -j4 gpt-2 gpt-j
../examples/gpt-j/download-ggml-model.sh 6B
./bin/gpt-j -m models/gpt-j-6B/ggml-model.bin -p "This is an example"

# Install Python dependencies
python3 -m pip install -r ../requirements.txt

# Run the Cerebras-GPT 111M model
# Download from: https://huggingface.co/cerebras
python3 ../examples/gpt-2/convert-cerebras-to-ggml.py /path/to/Cerebras-GPT-111M/
Expand Down
113 changes: 113 additions & 0 deletions build.zig
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
const std = @import("std");

// Zig Version: 0.11.0-dev.3798+a5e15eced
// Zig Build Command: zig build
// Zig Run Command:
// zig build run_dolly-v2
// zig build run_gpt-2
// zig build run_gpt-j
// zig build run_gpt-neox
// zig build run_mnist
// zig build run_mpt
// zig build run_replit
// zig build run_starcoder
// zig build run_test-grad0
// zig build run_test-mul-mat0
// zig build run_test-mul-mat2
// zig build run_test-opt
// zig build run_test-vec1
// zig build run_test0
// zig build run_test1
// zig build run_test2
// zig build run_test3
pub fn build(b: *std.build.Builder) void {
const target = b.standardTargetOptions(.{});
const optimize = b.standardOptimizeOption(.{});
const lib = b.addStaticLibrary(.{
.name = "ggml",
.target = target,
.optimize = optimize,
});
lib.addIncludePath("./include");
lib.addIncludePath("./include/ggml");
lib.addCSourceFiles(&.{
"src/ggml.c",
}, &.{"-std=c11"});
lib.linkLibC();
lib.linkLibCpp();
b.installArtifact(lib);

// examples
const examples = .{
"dolly-v2",
"gpt-2",
"gpt-j",
"gpt-neox",
"mnist",
"mpt",
"replit",
"starcoder",
// "whisper",
};
inline for (examples) |name| {
const exe = b.addExecutable(.{
.name = name,
.target = target,
.optimize = optimize,
});
exe.addIncludePath("./include");
exe.addIncludePath("./include/ggml");
exe.addIncludePath("./examples");
// exe.addIncludePath("./examples/whisper");
exe.addCSourceFiles(&.{
std.fmt.comptimePrint("examples/{s}/main.cpp", .{name}),
"examples/common.cpp",
"examples/common-ggml.cpp",
// "examples/whisper/whisper.cpp",
}, &.{"-std=c++11"});
exe.linkLibrary(lib);
b.installArtifact(exe);
const run_cmd = b.addRunArtifact(exe);
run_cmd.step.dependOn(b.getInstallStep());
if (b.args) |args| run_cmd.addArgs(args);
const run_step = b.step("run_" ++ name, "Run examples");
run_step.dependOn(&run_cmd.step);
}

// tests
const tests = .{
// "test-blas0",
"test-grad0",
"test-mul-mat0",
// "test-mul-mat1",
"test-mul-mat2",
"test-opt",
// "test-svd0",
// "test-vec0",
"test-vec1",
// "test-vec2",
"test0",
"test1",
"test2",
"test3",
};
inline for (tests) |name| {
const exe = b.addExecutable(.{
.name = name,
.target = target,
.optimize = optimize,
});
exe.addIncludePath("./include");
exe.addIncludePath("./include/ggml");
exe.addCSourceFiles(&.{
std.fmt.comptimePrint("tests/{s}.c", .{name}),
}, &.{"-std=c11"});
exe.linkLibrary(lib);
b.installArtifact(exe);
const run_cmd = b.addRunArtifact(exe);
run_cmd.step.dependOn(b.getInstallStep());
if (b.args) |args| run_cmd.addArgs(args);
const run_step = b.step("run_" ++ name, "Run tests");
run_step.dependOn(&run_cmd.step);
}
}
1 change: 1 addition & 0 deletions examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ if (GGML_ALL_WARNINGS)
if (NOT MSVC)
set(cxx_flags
# TODO(marella): Add other warnings.
-Wpedantic
-Wunused-variable
-Wno-unused-function
-Wno-multichar
Expand Down
11 changes: 11 additions & 0 deletions examples/common-ggml.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,11 @@ bool ggml_common_quantize_0(
case GGML_FTYPE_ALL_F32:
case GGML_FTYPE_MOSTLY_F16:
case GGML_FTYPE_MOSTLY_Q4_1_SOME_F16:
case GGML_FTYPE_MOSTLY_Q2_K:
case GGML_FTYPE_MOSTLY_Q3_K:
case GGML_FTYPE_MOSTLY_Q4_K:
case GGML_FTYPE_MOSTLY_Q5_K:
case GGML_FTYPE_MOSTLY_Q6_K:
{
fprintf(stderr, "%s: invalid model type %d\n", __func__, ftype);
return false;
Expand Down Expand Up @@ -187,6 +192,12 @@ bool ggml_common_quantize_0(
case GGML_TYPE_I16:
case GGML_TYPE_I32:
case GGML_TYPE_Q8_1:
case GGML_TYPE_Q2_K:
case GGML_TYPE_Q3_K:
case GGML_TYPE_Q4_K:
case GGML_TYPE_Q5_K:
case GGML_TYPE_Q6_K:
case GGML_TYPE_Q8_K:
case GGML_TYPE_COUNT:
{
fprintf(stderr, "%s: unsupported quantization type %d (%s)\n", __func__, ttype, ggml_type_name((ggml_type) ttype));
Expand Down
51 changes: 35 additions & 16 deletions examples/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@
#define M_PI 3.14159265358979323846
#endif

#if defined(_MSC_VER)
#pragma warning(disable: 4244 4267) // possible loss of data
#endif

bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
for (int i = 1; i < argc; i++) {
std::string arg = argv[i];
Expand Down Expand Up @@ -232,43 +236,59 @@ std::wstring convert_to_wstring(const std::string & input) {
return converter.from_bytes(input);
}

void gpt_split_words(std::string str, std::vector<std::string>& words) {
const std::string pattern = R"('s|'t|'re|'ve|'m|'ll|'d| ?[[:alpha:]]+| ?[[:digit:]]+| ?[^\s[:alpha:][:digit:]]+|\s+(?!\S)|\s+)";
const std::regex re(pattern);
std::smatch m;

while (std::regex_search(str, m, re)) {
for (auto x : m) {
words.push_back(x);
}
str = m.suffix();
}
}

std::vector<gpt_vocab::id> gpt_tokenize(const gpt_vocab & vocab, const std::string & text) {
std::vector<std::string> words;

// first split the text into words
{
std::string str = text;
std::string pat = R"('s|'t|'re|'ve|'m|'ll|'d| ?[[:alpha:]]+| ?[[:digit:]]+| ?[^\s[:alpha:][:digit:]]+|\s+(?!\S)|\s+)";

// Generate the subpattern from the special_tokens vector if it's not empty
if (!vocab.special_tokens.empty()) {
const std::regex escape(R"([\[\\\^\$\.\|\?\*\+\(\)\{\}])");
std::string special_tokens_subpattern;
for (const auto & token : vocab.special_tokens) {
if (!special_tokens_subpattern.empty()) {
special_tokens_subpattern += "|";
}
special_tokens_subpattern += token;
special_tokens_subpattern += std::regex_replace(token, escape, R"(\$&)");
}

// Modify the regex pattern with the generated special tokens subpattern
pat = special_tokens_subpattern + "|" + pat;
}

std::regex re(pat);
std::smatch m;

while (std::regex_search(str, m, re)) {
for (auto x : m) {
words.push_back(x);
std::regex re(special_tokens_subpattern);
std::smatch m;
// Split the text by special tokens.
while (std::regex_search(str, m, re)) {
// Split the substrings in-between special tokens into words.
gpt_split_words(m.prefix(), words);
// Add matched special tokens as words.
for (auto x : m) {
words.push_back(x);
}
str = m.suffix();
}
str = m.suffix();
// Remaining text without special tokens will be handled below.
}

gpt_split_words(str, words);
}

// find the longest token that forms each word in words:
std::vector<gpt_vocab::id> tokens;
for (const auto & word : words) {
for (int i = 0; i < word.size(); ){
for (int i = 0; i < (int) word.size(); ){
for (int j = word.size() - 1; j >= i; j--){
auto cand = word.substr(i, j-i+1);
auto it = vocab.token_to_id.find(cand);
Expand All @@ -285,7 +305,6 @@ std::vector<gpt_vocab::id> gpt_tokenize(const gpt_vocab & vocab, const std::stri
}
}


return tokens;
}

Expand Down Expand Up @@ -350,7 +369,7 @@ void test_gpt_tokenizer(gpt_vocab & vocab, const std::string & fpath_test){
}
}

fprintf(stderr, "%s : %lu tests failed out of %lu tests.\n", __func__, n_fails, tests.size());
fprintf(stderr, "%s : %zu tests failed out of %zu tests.\n", __func__, n_fails, tests.size());
}

bool gpt_vocab_init(const std::string & fname, gpt_vocab & vocab) {
Expand Down
4 changes: 3 additions & 1 deletion examples/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ std::string convert_to_utf8(const std::wstring & input);

std::wstring convert_to_wstring(const std::string & input);

void gpt_split_words(std::string str, std::vector<std::string>& words);

// split text into tokens
//
// ref: https://github.com/openai/gpt-2/blob/a74da5d99abaaba920de8131d64da2862a8f213b/src/encoder.py#L53
Expand All @@ -80,7 +82,7 @@ std::vector<gpt_vocab::id> gpt_tokenize(const gpt_vocab & vocab, const std::stri

// test outputs of gpt_tokenize
//
// - compare with tokens generated by the huggingface tokenizer
// - compare with tokens generated by the huggingface tokenizer
// - test cases are chosen based on the model's main language (under 'prompt' directory)
// - if all sentences are tokenized identically, print 'All tests passed.'
// - otherwise, print sentence, huggingface tokens, ggml tokens
Expand Down
3 changes: 3 additions & 0 deletions examples/dolly-v2/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ make -j
# get the Dolly-V2 3B model
git clone https://huggingface.co/databricks/dolly-v2-3b

# install Python dependencies
python3 -m pip install -r ../requirements.txt

# convert model to FP16
python3 ../examples/dolly-v2/convert-h5-to-ggml.py ./dolly-v2-3b/ 1

Expand Down
Loading

0 comments on commit 78ccbba

Please sign in to comment.