ggerganov · gartia · Dec 6, 2023 · Dec 6, 2023 · Dec 7, 2023
diff --git a/build.zig b/build.zig
@@ -17,6 +17,7 @@ const builtin = @import("builtin");
 //     zig build run_test-mul-mat2
 //     zig build run_test-opt
 //     zig build run_test-vec1
+//     zig build run_test-pad-circular
 //     zig build run_test0
 //     zig build run_test1
 //     zig build run_test2
@@ -95,6 +96,7 @@ pub fn build(b: *std.build.Builder) void {
         "test1",
         "test2",
         "test3",
+        "test-pad-circular",
     } else .{
         // "test-blas0",
         // "test-grad0",
@@ -110,6 +112,7 @@ pub fn build(b: *std.build.Builder) void {
         "test1",
         "test2",
         "test3",
+        "test-pad-circular",
     };
     inline for (tests) |name| {
         const exe = b.addExecutable(.{

diff --git a/include/ggml/ggml.h b/include/ggml/ggml.h
@@ -450,6 +450,8 @@ extern "C" {
         GGML_OP_CROSS_ENTROPY_LOSS,
         GGML_OP_CROSS_ENTROPY_LOSS_BACK,
 
+        GGML_OP_PAD_CIRCULAR,
+
         GGML_OP_COUNT,
     };
 
@@ -1778,6 +1780,13 @@ extern "C" {
             struct ggml_tensor          * b,
             struct ggml_tensor          * c);
 
+    // Circular Padding
+
+    GGML_API struct ggml_tensor * ggml_pad_circular(
+            struct ggml_context * ctx,
+            struct ggml_tensor  * a,
+            int                   padding);
+
     //
     // automatic differentiation
     //

diff --git a/src/ggml.c b/src/ggml.c
@@ -1648,9 +1648,11 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
 
     "CROSS_ENTROPY_LOSS",
     "CROSS_ENTROPY_LOSS_BACK",
+
+    "PAD_CIRCULAR"
 };
 
-static_assert(GGML_OP_COUNT == 70, "GGML_OP_COUNT != 70");
+static_assert(GGML_OP_COUNT == 71, "GGML_OP_COUNT != 71");
 
 static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
     "none",
@@ -1732,9 +1734,11 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
 
     "cross_entropy_loss(x,y)",
     "cross_entropy_loss_back(x,y)",
+
+    "pad_circular(x)"
 };
 
-static_assert(GGML_OP_COUNT == 70, "GGML_OP_COUNT != 70");
+static_assert(GGML_OP_COUNT == 71, "GGML_OP_COUNT != 71");
 
 static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
 
@@ -6303,6 +6307,44 @@ struct ggml_tensor * ggml_cross_entropy_loss_back(
     return result;
 }
 
+//GGML_PAD_CIRCULAR
+
+static struct ggml_tensor * ggml_pad_circular_impl(
+    struct ggml_context * ctx,
+    struct ggml_tensor * a,
+    int padding) {
+    bool is_node = false;
+
+    if (a->grad) {
+        GGML_ASSERT(false);
+        is_node = true;
+    }
+
+    int new_height = a->ne[0] + 2 * padding;
+    int new_width = a->ne[1] + 2 * padding;
+
+    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type,
+            new_height,
+            new_width,
+            a->ne[2], a->ne[3]);
+
+    result->op = GGML_OP_PAD_CIRCULAR;
+    result->op_params[0] = padding;
+    result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
+    result->src[0] = a;
+    result->src[1] = NULL;
+
+    return result;
+}
+
+struct ggml_tensor * ggml_pad_circular(
+    struct ggml_context * ctx,
+    struct ggml_tensor * a,
+    int padding) {
+    return ggml_pad_circular_impl(ctx, a, padding);
+}
+
+
 ////////////////////////////////////////////////////////////////////////////////
 
 void ggml_set_param(
@@ -13834,6 +13876,152 @@ static void ggml_compute_forward_cross_entropy_loss_back(
     }
 }
 
+// ggml_compute_pad_circular
+
+static void ggml_compute_forward_pad_circular_f16(
+        const struct ggml_compute_params * params,
+        const struct ggml_tensor * src,
+        struct ggml_tensor * dst) {
+
+    if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+        return;
+    }
+    const int padding = dst->op_params[0];
+    const int ith = params->ith;
+    const int nth = params->nth;
+
+    ggml_fp16_t * src_data = (ggml_fp16_t *) src->data;
+    ggml_fp16_t * dst_data = (ggml_fp16_t *) dst->data;
+
+    const int64_t orig_height = src->ne[0];
+    const int64_t orig_width = src->ne[1];
+    const int64_t new_height = orig_height + 2 * padding;
+    const int64_t new_width = orig_width + 2 * padding;
+
+    int64_t total_elements = new_height * new_width;
+    int64_t start_index = ith * (total_elements / nth);
+    int64_t end_index = (ith + 1) == nth ? total_elements : (ith + 1) * (total_elements / nth);
+
+    for (int64_t idx = start_index; idx < end_index; ++idx) {
+        int64_t i = idx / new_width;
+        int64_t j = idx % new_width;
+
+        int64_t orig_i = (i - padding + orig_height) % orig_height;
+        int64_t orig_j = (j - padding + orig_width) % orig_width;
+
+        dst_data[i * new_width + j] = src_data[orig_i * orig_width + orig_j];
+    }
+}
+
+static void ggml_compute_forward_pad_circular_f32(
+        const struct ggml_compute_params * params,
+        const struct ggml_tensor * src,
+        struct ggml_tensor * dst) {
+
+    if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+        return;
+    }
+
+    const int padding = dst->op_params[0];
+    const int ith = params->ith;
+    const int nth = params->nth;
+
+    const int64_t orig_height = src->ne[0];
+    const int64_t orig_width = src->ne[1];
+    const int64_t new_height = orig_height + 2 * padding;
+    const int64_t new_width = orig_width + 2 * padding;
+
+    float * src_data = (float *) src->data;
+    float * dst_data = (float *) dst->data;
+
+    int64_t total_elements = new_height * new_width;
+    int64_t start_index = ith * (total_elements / nth);
+    int64_t end_index = (ith + 1) == nth ? total_elements : (ith + 1) * (total_elements / nth);
+
+    for (int64_t idx = start_index; idx < end_index; ++idx) {
+        int64_t i = idx / new_width;
+        int64_t j = idx % new_width;
+
+        int64_t orig_i = (i - padding + orig_height) % orig_height;
+        int64_t orig_j = (j - padding + orig_width) % orig_width;
+
+        dst_data[i * new_width + j] = src_data[orig_i * orig_width + orig_j];
+    }
+}
+
+// TODO: Make sure this works i am not familiar with quantization.
+static void ggml_compute_forward_pad_circular_q_f32(
+        const struct ggml_compute_params * params,
+        const struct ggml_tensor * src,
+        struct ggml_tensor * dst) {
+
+    if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+        return;
+    }
+
+    const int padding = dst->op_params[0];
+    const int ith = params->ith;
+    const int nth = params->nth;
+
+    const int64_t orig_height = src->ne[0];
+    const int64_t orig_width = src->ne[1];
+    const int64_t new_height = orig_height + 2 * padding;
+    const int64_t new_width = orig_width + 2 * padding;
+
+    float * src_data = (float *) src->data;
+    float * dst_data = (float *) dst->data;
+
+    int64_t total_elements = new_height * new_width;
+    int64_t start_index = ith * (total_elements / nth);
+    int64_t end_index = (ith + 1) == nth ? total_elements : (ith + 1) * (total_elements / nth);
+
+    for (int64_t idx = start_index; idx < end_index; ++idx) {
+        int64_t i = idx / new_width;
+        int64_t j = idx % new_width;
+
+        int64_t orig_i = (i - padding + orig_height) % orig_height;
+        int64_t orig_j = (j - padding + orig_width) % orig_width;
+
+        dst_data[i * new_width + j] = src_data[orig_i * orig_width + orig_j];
+    }
+
+}
+
+static void ggml_compute_forward_pad_circular(
+        const struct ggml_compute_params * params,
+        const struct ggml_tensor * src,
+        struct ggml_tensor * dst) {
+
+    int padding = dst->op_params[0];
+
+    switch (src->type) {
+        case GGML_TYPE_F32:
+            ggml_compute_forward_pad_circular_f32(params, src, dst);
+            break;
+        case GGML_TYPE_F16:
+            ggml_compute_forward_pad_circular_f16(params, src, dst);
+            break;
+        case GGML_TYPE_Q4_0:
+        case GGML_TYPE_Q4_1:
+        case GGML_TYPE_Q5_0:
+        case GGML_TYPE_Q5_1:
+        case GGML_TYPE_Q8_0:
+        case GGML_TYPE_Q2_K:
+        case GGML_TYPE_Q3_K:
+        case GGML_TYPE_Q4_K:
+        case GGML_TYPE_Q5_K:
+        case GGML_TYPE_Q6_K:
+            {
+                ggml_compute_forward_pad_circular_q_f32(params, src,dst);
+            } break;
+        default:
+            {
+                GGML_ASSERT(false);
+            } break;
+    }
+}
+
+
 /////////////////////////////////
 
 static void ggml_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor) {
@@ -14155,6 +14343,11 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
                 ggml_compute_forward_cross_entropy_loss_back(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor);
             }
             break;
+        case GGML_OP_PAD_CIRCULAR:
+            {
+                ggml_compute_forward_pad_circular(params, tensor->src[0], tensor);
+            }
+            break;
         case GGML_OP_NONE:
             {
                 // nop
@@ -15220,6 +15413,10 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
             {
                 GGML_ASSERT(false); // not supported
             } break;
+        case GGML_OP_PAD_CIRCULAR:
+            {
+                GGML_ASSERT(false); // TODO: not implemented
+            } break;
         case GGML_OP_NONE:
             {
                 // nop
@@ -15848,6 +16045,10 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
             {
                 n_tasks = n_threads;
             } break;
+        case GGML_OP_PAD_CIRCULAR:
+            {
+                n_tasks=n_threads;
+            };
         case GGML_OP_NONE:
             {
                 n_tasks = 1;

diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
@@ -404,3 +404,12 @@ add_executable(${TEST_TARGET} ${TEST_TARGET}.cpp)
 target_link_libraries(${TEST_TARGET} PRIVATE ggml)
 add_test(NAME ${TEST_TARGET} COMMAND $<TARGET_FILE:${TEST_TARGET}>)
 set_property(TEST ${TEST_TARGET} PROPERTY ENVIRONMENT "LLVM_PROFILE_FILE=${TEST_TARGET}.profraw")
+
+#
+# test-pad-circular
+
+set(TEST_TARGET test-pad-circular)
+add_executable(${TEST_TARGET} ${TEST_TARGET}.cpp)
+target_link_libraries(${TEST_TARGET} PRIVATE ggml)
+add_test(NAME ${TEST_TARGET} COMMAND $<TARGET_FILE:${TEST_TARGET}>)
+set_property(TEST ${TEST_TARGET} PROPERTY ENVIRONMENT "LLVM_PROFILE_FILE=${TEST_TARGET}.profraw")