Skip to content

Commit

Permalink
refactor gguf load
Browse files Browse the repository at this point in the history
  • Loading branch information
JohannesGaessler committed Sep 10, 2024
1 parent 035f0d7 commit 09d1703
Show file tree
Hide file tree
Showing 5 changed files with 55 additions and 38 deletions.
45 changes: 7 additions & 38 deletions examples/mnist/mnist-common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -160,20 +160,19 @@ mnist_model mnist_model_init_from_file(const std::string & fname, const std::str
mnist_model model(backend);
fprintf(stderr, "%s: loading model weights from '%s'\n", __func__, fname.c_str());

struct gguf_context * ctx_be; // be == backend

struct gguf_context * ctx;
{
struct gguf_init_params params = {
/*.no_alloc =*/ true,
/*.ctx =*/ &model.ctx_weight,
};
ctx_be = gguf_init_from_file(fname.c_str(), params);
if (!ctx_be) {
ctx = gguf_init_from_file(fname.c_str(), params);
if (!ctx) {
fprintf(stderr, "%s: gguf_init_from_file() failed\n", __func__);
exit(1);
}
}
model.arch = gguf_get_val_str(ctx_be, gguf_find_key(ctx_be, "general.architecture"));
model.arch = gguf_get_val_str(ctx, gguf_find_key(ctx, "general.architecture"));
fprintf(stderr, "%s: model arch is %s\n", __func__, model.arch.c_str());

if (model.arch == "mnist-fc") {
Expand Down Expand Up @@ -247,40 +246,10 @@ mnist_model mnist_model_init_from_file(const std::string & fname, const std::str
}
model.buf_weightt = ggml_backend_alloc_ctx_tensors(model.ctx_weight, model.backend);

void * buf_tmp = malloc(model.size_weight);
struct ggml_context * ctx_ggml_tmp;
{
struct ggml_init_params params = {
/*.mem_size =*/ model.size_weight,
/*.mem_buffer =*/ buf_tmp,
/*.no_alloc =*/ false,
};
ctx_ggml_tmp = ggml_init(params);
if(!ggml_backend_load_from_gguf(fname.c_str(), model.ctx_weight, ctx)) {
fprintf(stderr, "%s: loading weights from %s failed\n", __func__, fname.c_str());
exit(1);
}
struct gguf_context * ctx_gguf_tmp;
{
struct gguf_init_params params = {
/*.no_alloc =*/ false,
/*.ctx =*/ &ctx_ggml_tmp,
};
ctx_gguf_tmp = gguf_init_from_file(fname.c_str(), params);
if (!ctx_gguf_tmp) {
fprintf(stderr, "%s: gguf_init_from_file() failed\n", __func__);
exit(1);
}
}
for (const std::string & s : {"fc1.weight", "fc1.bias", "fc2.weight", "fc2.bias"}) {
const struct ggml_tensor * src = ggml_get_tensor(ctx_ggml_tmp, s.c_str());
struct ggml_tensor * dst = ggml_get_tensor(model.ctx_weight, s.c_str());
GGML_ASSERT(ggml_nbytes(src) == ggml_nbytes(dst));
ggml_backend_tensor_set(dst, src->data, 0, ggml_nbytes(dst));
}

gguf_free(ctx_gguf_tmp);
ggml_free(ctx_ggml_tmp);
free(buf_tmp);

gguf_free(ctx_be);

fprintf(stderr, "%s: successfully loaded weights from %s\n", __func__, fname.c_str());
return model;
Expand Down
1 change: 1 addition & 0 deletions include/ggml-alloc.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ GGML_API bool ggml_gallocr_alloc_graph(ggml_gallocr_t galloc, struct ggml_cgraph
GGML_API size_t ggml_gallocr_get_buffer_size(ggml_gallocr_t galloc, int buffer_id);

// Utils

// Create a buffer and allocate all the tensors in a ggml_context
GGML_API struct ggml_backend_buffer * ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_context * ctx, ggml_backend_buffer_type_t buft);
GGML_API struct ggml_backend_buffer * ggml_backend_alloc_ctx_tensors(struct ggml_context * ctx, ggml_backend_t backend);
Expand Down
1 change: 1 addition & 0 deletions include/ggml-backend.h
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,7 @@ extern "C" {
GGML_API void ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr);
GGML_API void ggml_backend_view_init(struct ggml_tensor * tensor);

GGML_API bool ggml_backend_load_from_gguf(const char * fname, struct ggml_context * ctx_ggml, struct gguf_context * ctx_gguf);

#ifdef __cplusplus
}
Expand Down
45 changes: 45 additions & 0 deletions src/ggml-backend.c
Original file line number Diff line number Diff line change
Expand Up @@ -2267,3 +2267,48 @@ bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t

return true;
}

bool ggml_backend_load_from_gguf(const char * fname, struct ggml_context * ctx_ggml, struct gguf_context * ctx_gguf) {
FILE * f = ggml_fopen(fname, "rb");
if (!f) {
return false;
}

const size_t buf_size = 4*1024*1024;
void * buf = malloc(buf_size);

const int n_tensors = gguf_get_n_tensors(ctx_gguf);
for (int i = 0; i < n_tensors; i++) {
const char * name = gguf_get_tensor_name(ctx_gguf, i);

struct ggml_tensor * tensor = ggml_get_tensor(ctx_ggml, name);
if (!tensor) {
return false;
}

const size_t offs = gguf_get_data_offset(ctx_gguf) + gguf_get_tensor_offset(ctx_gguf, i);

if (fseek(f, offs, SEEK_SET) != 0) {
fclose(f);
free(buf);
return false;
}

const size_t nbytes = ggml_nbytes(tensor);
for (size_t pos = 0; pos < nbytes; pos += buf_size) {
const size_t nbytes_cpy = MIN(buf_size, nbytes - pos);

if (fread(buf, 1, nbytes_cpy, f) != nbytes_cpy) {
fclose(f);
free(buf);
return false;
}

ggml_backend_tensor_set(tensor, buf, pos, nbytes_cpy);
}
}

fclose(f);
free(buf);
return true;
}
1 change: 1 addition & 0 deletions src/ggml-cuda/out-prod.cu
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#include "out-prod.cuh"
#include "opt-step-adam.cuh"
#include "vendors/cuda.h"

Expand Down

0 comments on commit 09d1703

Please sign in to comment.