Skip to content

Commit

Permalink
adjust buffer size, thread count
Browse files Browse the repository at this point in the history
  • Loading branch information
JohannesGaessler committed Sep 15, 2024
1 parent d813691 commit cf0f60e
Show file tree
Hide file tree
Showing 4 changed files with 13 additions and 10 deletions.
4 changes: 2 additions & 2 deletions examples/mnist/mnist-common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -530,7 +530,7 @@ mnist_eval_result mnist_model_eval(mnist_model & model, const float * images, co
void mnist_model_train(mnist_model & model, const float * images, const float * labels, const int nex, const int nepoch, const float val_split) {
const int64_t t_start_us = ggml_time_us();

struct ggml_cgraph * gf = ggml_new_graph_custom(model.ctx_compute, 16384, true); // Forward pass.
struct ggml_cgraph * gf = ggml_new_graph_custom(model.ctx_compute, GGML_DEFAULT_GRAPH_SIZE, /*grads =*/ true); // Forward pass.
ggml_build_forward_expand(gf, model.loss);

struct ggml_cgraph * gb_grad = ggml_graph_dup(model.ctx_compute, gf); // Backward pass, gradients.
Expand Down Expand Up @@ -634,7 +634,7 @@ void mnist_model_save(mnist_model & model, const std::string & fname) {
struct ggml_context * ggml_ctx;
{
struct ggml_init_params params = {
/*.mem_size =*/ model.size_weight,
/*.mem_size =*/ 100 * 1024*1024,
/*.mem_buffer =*/ NULL,
/*.no_alloc =*/ false,
};
Expand Down
13 changes: 7 additions & 6 deletions examples/mnist/mnist-common.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,6 @@ struct mnist_model {
struct ggml_tensor * dense_weight = nullptr;
struct ggml_tensor * dense_bias = nullptr;

static const size_t size_weight = 100 * 1024*1024;
static const size_t size_compute = 1 * 1024*1024*1024;

struct ggml_context * ctx_weight = nullptr;
struct ggml_context * ctx_compute = nullptr;
ggml_backend_buffer_t buf_weight = nullptr;
Expand All @@ -70,21 +67,25 @@ struct mnist_model {
fprintf(stderr, "%s: using %s backend\n", __func__, backend_name.c_str());
backend = ggml_backend_reg_init_backend(backend_index, nullptr);
if (ggml_backend_is_cpu(backend)) {
ggml_backend_cpu_set_n_threads(backend, std::thread::hardware_concurrency());
const int ncores_logical = std::thread::hardware_concurrency();
ggml_backend_cpu_set_n_threads(backend, std::min(ncores_logical, (ncores_logical + 4)/2));
}

{
const size_t size_meta = 1024*ggml_tensor_overhead();
struct ggml_init_params params = {
/*.mem_size =*/ size_weight,
/*.mem_size =*/ size_meta,
/*.mem_buffer =*/ nullptr,
/*.no_alloc =*/ true,
};
ctx_weight = ggml_init(params);
}

{
// The compute context needs a total of 3 compute graphs: forward pass + backwards pass (with/without optimizer step).
const size_t size_meta = GGML_DEFAULT_GRAPH_SIZE*ggml_tensor_overhead() + 3*ggml_graph_overhead();
struct ggml_init_params params = {
/*.mem_size =*/ size_compute,
/*.mem_size =*/ size_meta,
/*.mem_buffer =*/ nullptr,
/*.no_alloc =*/ true,
};
Expand Down
4 changes: 3 additions & 1 deletion examples/mnist/mnist-eval.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,9 @@ int main(int argc, char ** argv) {
mnist_eval_result result_eval;

if (backend == "CPU") {
result_eval = mnist_graph_eval(argv[1], images.data(), labels.data(), MNIST_NTEST, std::thread::hardware_concurrency());
const int ncores_logical = std::thread::hardware_concurrency();
result_eval = mnist_graph_eval(
argv[1], images.data(), labels.data(), MNIST_NTEST, std::min(ncores_logical, (ncores_logical + 4)/2));
if (result_eval.success) {
fprintf(stdout, "%s: predicted digit is %d\n", __func__, result_eval.pred[iex]);

Expand Down
2 changes: 1 addition & 1 deletion tests/test-backend-ops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3229,7 +3229,7 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
test_cases.emplace_back(new test_conv_transpose_1d({3,2,1,1}, {3,1,2,1}, 1, 0, 1));
test_cases.emplace_back(new test_conv_transpose_1d({2,1,1,1}, {3,1,1,1}, 1, 0, 1));

for (const int64_t & ne3 : {1, 3}) { // CUDA only supports ne3 == 1
for (const int64_t & ne3 : {1, 3}) { // CUDA backwards pass only supports ne3 == 1
test_cases.emplace_back(new test_repeat(GGML_TYPE_F32, {10, 5, 4, ne3}, {1, 1, 1, 1}));
test_cases.emplace_back(new test_repeat(GGML_TYPE_F32, {10, 5, 4, ne3}, {2, 1, 1, 1}));
test_cases.emplace_back(new test_repeat(GGML_TYPE_F32, {10, 5, 4, ne3}, {1, 2, 1, 1}));
Expand Down

0 comments on commit cf0f60e

Please sign in to comment.