Skip to content

Commit

Permalink
increment iter per eval instead of epoch
Browse files Browse the repository at this point in the history
  • Loading branch information
JohannesGaessler committed Sep 13, 2024
1 parent 884431c commit edefe47
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 25 deletions.
19 changes: 12 additions & 7 deletions examples/mnist/mnist-common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -555,15 +555,20 @@ void mnist_model_train(mnist_model & model, const float * images, const float *
// For the last iteration, calculate gradients and also apply the optimizer:
ggml_backend_graph_compute(model.backend, gb_opt);
ggml_graph_reset(gb_grad); // Set gradients to zero, do not reset optimizer.
}
for (int j = 0; j < gb_grad->n_nodes; ++j) {
struct ggml_tensor * node = gb_grad->nodes[j];

if (node->op != GGML_OP_OPT_STEP_ADAM) {
continue;
}
// Increment iterations for the optimizer tensors:
for (int j = 0; j < gb_opt->n_nodes; ++j) {
struct ggml_tensor * node = gb_opt->nodes[j];

node->op_params[0]++;
if (node->op != GGML_OP_OPT_STEP_ADAM) {
continue;
}

int64_t iter;
memcpy(&iter, node->op_params + 0, sizeof(int64_t));
iter++;
memcpy(node->op_params + 0, &iter, sizeof(int64_t));
}
}

ggml_backend_tensor_get(model.loss, &loss, 0, ggml_nbytes(model.loss));
Expand Down
12 changes: 6 additions & 6 deletions src/ggml-cuda/opt-step-adam.cu
Original file line number Diff line number Diff line change
Expand Up @@ -63,12 +63,12 @@ void ggml_cuda_opt_step_adam(ggml_backend_cuda_context & ctx, ggml_tensor * dst)

const int64_t ne = ggml_nelements(src0);

int32_t iter; memcpy(&iter, &dst->op_params[0], sizeof(float));
float alpha; memcpy(&alpha, &dst->op_params[1], sizeof(float));
float beta1; memcpy(&beta1, &dst->op_params[2], sizeof(float));
float beta2; memcpy(&beta2, &dst->op_params[3], sizeof(float));
float eps; memcpy(&eps, &dst->op_params[4], sizeof(float));
float l1; memcpy(&l1, &dst->op_params[5], sizeof(float));
int64_t iter; memcpy(&iter, &dst->op_params[0], sizeof(int64_t));
float alpha; memcpy(&alpha, &dst->op_params[2], sizeof(float));
float beta1; memcpy(&beta1, &dst->op_params[3], sizeof(float));
float beta2; memcpy(&beta2, &dst->op_params[4], sizeof(float));
float eps; memcpy(&eps, &dst->op_params[5], sizeof(float));
float l1; memcpy(&l1, &dst->op_params[6], sizeof(float));

const float beta1h = alpha/(1.0f - powf(beta1, iter));
const float beta2h = 1.0f/(1.0f - powf(beta2, iter));
Expand Down
25 changes: 13 additions & 12 deletions src/ggml.c
Original file line number Diff line number Diff line change
Expand Up @@ -8132,12 +8132,13 @@ struct ggml_tensor * ggml_opt_step_adam(
result->src[2] = ggml_dup_tensor(ctx, a->grad);
result->src[3] = ggml_dup_tensor(ctx, a->grad);

ggml_set_op_params_i32(result, 0, 1); // iteration
ggml_set_op_params_f32(result, 1, alpha);
ggml_set_op_params_f32(result, 2, beta1);
ggml_set_op_params_f32(result, 3, beta2);
ggml_set_op_params_f32(result, 4, eps);
ggml_set_op_params_f32(result, 5, l1);
const int64_t iter = 1;
memcpy(result->op_params + 0, &iter, sizeof(int64_t));
ggml_set_op_params_f32(result, 2, alpha);
ggml_set_op_params_f32(result, 3, beta1);
ggml_set_op_params_f32(result, 4, beta2);
ggml_set_op_params_f32(result, 5, eps);
ggml_set_op_params_f32(result, 6, l1);

return result;
}
Expand Down Expand Up @@ -17162,12 +17163,12 @@ static void ggml_compute_forward_opt_step_adam_f32(
const int ir1 = MIN(ir0 + dr, nr);

/* const float gnorm = 1.0f; */
const int32_t iter = ggml_get_op_params_i32(dst, 0);
const float alpha = ggml_get_op_params_f32(dst, 1);
const float beta1 = ggml_get_op_params_f32(dst, 2);
const float beta2 = ggml_get_op_params_f32(dst, 3);
const float eps = ggml_get_op_params_f32(dst, 4);
const float l1 = ggml_get_op_params_f32(dst, 5);
int64_t iter; memcpy(&iter, dst->op_params + 0, sizeof(int64_t));
const float alpha = ggml_get_op_params_f32(dst, 2);
const float beta1 = ggml_get_op_params_f32(dst, 3);
const float beta2 = ggml_get_op_params_f32(dst, 4);
const float eps = ggml_get_op_params_f32(dst, 5);
const float l1 = ggml_get_op_params_f32(dst, 6);

const float beta1h = alpha/(1.0f - powf(beta1, iter));
const float beta2h = 1.0f/(1.0f - powf(beta2, iter));
Expand Down

0 comments on commit edefe47

Please sign in to comment.