From 09d1703ee81a0ff47cb43e5d44eb644769bed489 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20G=C3=A4=C3=9Fler?= Date: Wed, 11 Sep 2024 00:15:31 +0200 Subject: [PATCH] refactor gguf load --- examples/mnist/mnist-common.cpp | 45 +++++---------------------------- include/ggml-alloc.h | 1 + include/ggml-backend.h | 1 + src/ggml-backend.c | 45 +++++++++++++++++++++++++++++++++ src/ggml-cuda/out-prod.cu | 1 + 5 files changed, 55 insertions(+), 38 deletions(-) diff --git a/examples/mnist/mnist-common.cpp b/examples/mnist/mnist-common.cpp index 4b583ac7e..4e6eedaf0 100644 --- a/examples/mnist/mnist-common.cpp +++ b/examples/mnist/mnist-common.cpp @@ -160,20 +160,19 @@ mnist_model mnist_model_init_from_file(const std::string & fname, const std::str mnist_model model(backend); fprintf(stderr, "%s: loading model weights from '%s'\n", __func__, fname.c_str()); - struct gguf_context * ctx_be; // be == backend - + struct gguf_context * ctx; { struct gguf_init_params params = { /*.no_alloc =*/ true, /*.ctx =*/ &model.ctx_weight, }; - ctx_be = gguf_init_from_file(fname.c_str(), params); - if (!ctx_be) { + ctx = gguf_init_from_file(fname.c_str(), params); + if (!ctx) { fprintf(stderr, "%s: gguf_init_from_file() failed\n", __func__); exit(1); } } - model.arch = gguf_get_val_str(ctx_be, gguf_find_key(ctx_be, "general.architecture")); + model.arch = gguf_get_val_str(ctx, gguf_find_key(ctx, "general.architecture")); fprintf(stderr, "%s: model arch is %s\n", __func__, model.arch.c_str()); if (model.arch == "mnist-fc") { @@ -247,40 +246,10 @@ mnist_model mnist_model_init_from_file(const std::string & fname, const std::str } model.buf_weightt = ggml_backend_alloc_ctx_tensors(model.ctx_weight, model.backend); - void * buf_tmp = malloc(model.size_weight); - struct ggml_context * ctx_ggml_tmp; - { - struct ggml_init_params params = { - /*.mem_size =*/ model.size_weight, - /*.mem_buffer =*/ buf_tmp, - /*.no_alloc =*/ false, - }; - ctx_ggml_tmp = ggml_init(params); + if(!ggml_backend_load_from_gguf(fname.c_str(), model.ctx_weight, ctx)) { + fprintf(stderr, "%s: loading weights from %s failed\n", __func__, fname.c_str()); + exit(1); } - struct gguf_context * ctx_gguf_tmp; - { - struct gguf_init_params params = { - /*.no_alloc =*/ false, - /*.ctx =*/ &ctx_ggml_tmp, - }; - ctx_gguf_tmp = gguf_init_from_file(fname.c_str(), params); - if (!ctx_gguf_tmp) { - fprintf(stderr, "%s: gguf_init_from_file() failed\n", __func__); - exit(1); - } - } - for (const std::string & s : {"fc1.weight", "fc1.bias", "fc2.weight", "fc2.bias"}) { - const struct ggml_tensor * src = ggml_get_tensor(ctx_ggml_tmp, s.c_str()); - struct ggml_tensor * dst = ggml_get_tensor(model.ctx_weight, s.c_str()); - GGML_ASSERT(ggml_nbytes(src) == ggml_nbytes(dst)); - ggml_backend_tensor_set(dst, src->data, 0, ggml_nbytes(dst)); - } - - gguf_free(ctx_gguf_tmp); - ggml_free(ctx_ggml_tmp); - free(buf_tmp); - - gguf_free(ctx_be); fprintf(stderr, "%s: successfully loaded weights from %s\n", __func__, fname.c_str()); return model; diff --git a/include/ggml-alloc.h b/include/ggml-alloc.h index 434c13b34..45732ea65 100644 --- a/include/ggml-alloc.h +++ b/include/ggml-alloc.h @@ -67,6 +67,7 @@ GGML_API bool ggml_gallocr_alloc_graph(ggml_gallocr_t galloc, struct ggml_cgraph GGML_API size_t ggml_gallocr_get_buffer_size(ggml_gallocr_t galloc, int buffer_id); // Utils + // Create a buffer and allocate all the tensors in a ggml_context GGML_API struct ggml_backend_buffer * ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_context * ctx, ggml_backend_buffer_type_t buft); GGML_API struct ggml_backend_buffer * ggml_backend_alloc_ctx_tensors(struct ggml_context * ctx, ggml_backend_t backend); diff --git a/include/ggml-backend.h b/include/ggml-backend.h index 96af726e8..0742cff9c 100644 --- a/include/ggml-backend.h +++ b/include/ggml-backend.h @@ -234,6 +234,7 @@ extern "C" { GGML_API void ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr); GGML_API void ggml_backend_view_init(struct ggml_tensor * tensor); + GGML_API bool ggml_backend_load_from_gguf(const char * fname, struct ggml_context * ctx_ggml, struct gguf_context * ctx_gguf); #ifdef __cplusplus } diff --git a/src/ggml-backend.c b/src/ggml-backend.c index a938dc67e..798004a24 100644 --- a/src/ggml-backend.c +++ b/src/ggml-backend.c @@ -2267,3 +2267,48 @@ bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t return true; } + +bool ggml_backend_load_from_gguf(const char * fname, struct ggml_context * ctx_ggml, struct gguf_context * ctx_gguf) { + FILE * f = ggml_fopen(fname, "rb"); + if (!f) { + return false; + } + + const size_t buf_size = 4*1024*1024; + void * buf = malloc(buf_size); + + const int n_tensors = gguf_get_n_tensors(ctx_gguf); + for (int i = 0; i < n_tensors; i++) { + const char * name = gguf_get_tensor_name(ctx_gguf, i); + + struct ggml_tensor * tensor = ggml_get_tensor(ctx_ggml, name); + if (!tensor) { + return false; + } + + const size_t offs = gguf_get_data_offset(ctx_gguf) + gguf_get_tensor_offset(ctx_gguf, i); + + if (fseek(f, offs, SEEK_SET) != 0) { + fclose(f); + free(buf); + return false; + } + + const size_t nbytes = ggml_nbytes(tensor); + for (size_t pos = 0; pos < nbytes; pos += buf_size) { + const size_t nbytes_cpy = MIN(buf_size, nbytes - pos); + + if (fread(buf, 1, nbytes_cpy, f) != nbytes_cpy) { + fclose(f); + free(buf); + return false; + } + + ggml_backend_tensor_set(tensor, buf, pos, nbytes_cpy); + } + } + + fclose(f); + free(buf); + return true; +} diff --git a/src/ggml-cuda/out-prod.cu b/src/ggml-cuda/out-prod.cu index b36bd9777..5a19f08f2 100644 --- a/src/ggml-cuda/out-prod.cu +++ b/src/ggml-cuda/out-prod.cu @@ -1,3 +1,4 @@ +#include "out-prod.cuh" #include "opt-step-adam.cuh" #include "vendors/cuda.h"