Skip to content

Commit

Permalink
Merge branch 'fix-the-unmatched-unit_size-of-bpf_mem_cache'
Browse files Browse the repository at this point in the history
Hou Tao says:

====================
Fix the unmatched unit_size of bpf_mem_cache

From: Hou Tao <[email protected]>

Hi,

The patchset aims to fix the reported warning [0] when the unit_size of
bpf_mem_cache is mismatched with the object size of underly slab-cache.

Patch #1 fixes the warning by adjusting size_index according to the
value of KMALLOC_MIN_SIZE, so bpf_mem_cache with unit_size which is
smaller than KMALLOC_MIN_SIZE or is not aligned with KMALLOC_MIN_SIZE
will be redirected to bpf_mem_cache with bigger unit_size. Patch #2
doesn't do prefill for these redirected bpf_mem_cache to save memory.
Patch #3 adds further error check in bpf_mem_alloc_init() to ensure the
unit_size and object_size are always matched and to prevent potential
issues due to the mismatch.

Please see individual patches for more details. And comments are always
welcome.

[0]: https://lore.kernel.org/bpf/[email protected]
====================

Link: https://lore.kernel.org/r/[email protected]
Signed-off-by: Alexei Starovoitov <[email protected]>
  • Loading branch information
Alexei Starovoitov committed Sep 11, 2023
2 parents 7182e56 + f0a42ab commit 9458964
Show file tree
Hide file tree
Showing 3 changed files with 256 additions and 4 deletions.
87 changes: 83 additions & 4 deletions kernel/bpf/memalloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -459,8 +459,7 @@ static void notrace irq_work_raise(struct bpf_mem_cache *c)
* Typical case will be between 11K and 116K closer to 11K.
* bpf progs can and should share bpf_mem_cache when possible.
*/

static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu)
static void init_refill_work(struct bpf_mem_cache *c)
{
init_irq_work(&c->refill_work, bpf_mem_refill);
if (c->unit_size <= 256) {
Expand All @@ -476,14 +475,35 @@ static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu)
c->high_watermark = max(96 * 256 / c->unit_size, 3);
}
c->batch = max((c->high_watermark - c->low_watermark) / 4 * 3, 1);
}

static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu)
{
/* To avoid consuming memory assume that 1st run of bpf
* prog won't be doing more than 4 map_update_elem from
* irq disabled region
*/
alloc_bulk(c, c->unit_size <= 256 ? 4 : 1, cpu_to_node(cpu), false);
}

static int check_obj_size(struct bpf_mem_cache *c, unsigned int idx)
{
struct llist_node *first;
unsigned int obj_size;

first = c->free_llist.first;
if (!first)
return 0;

obj_size = ksize(first);
if (obj_size != c->unit_size) {
WARN_ONCE(1, "bpf_mem_cache[%u]: unexpected object size %u, expect %u\n",
idx, obj_size, c->unit_size);
return -EINVAL;
}
return 0;
}

/* When size != 0 bpf_mem_cache for each cpu.
* This is typical bpf hash map use case when all elements have equal size.
*
Expand All @@ -494,10 +514,10 @@ static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu)
int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu)
{
static u16 sizes[NUM_CACHES] = {96, 192, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096};
int cpu, i, err, unit_size, percpu_size = 0;
struct bpf_mem_caches *cc, __percpu *pcc;
struct bpf_mem_cache *c, __percpu *pc;
struct obj_cgroup *objcg = NULL;
int cpu, i, unit_size, percpu_size = 0;

if (size) {
pc = __alloc_percpu_gfp(sizeof(*pc), 8, GFP_KERNEL);
Expand All @@ -521,6 +541,7 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu)
c->objcg = objcg;
c->percpu_size = percpu_size;
c->tgt = c;
init_refill_work(c);
prefill_mem_cache(c, cpu);
}
ma->cache = pc;
Expand All @@ -534,6 +555,7 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu)
pcc = __alloc_percpu_gfp(sizeof(*cc), 8, GFP_KERNEL);
if (!pcc)
return -ENOMEM;
err = 0;
#ifdef CONFIG_MEMCG_KMEM
objcg = get_obj_cgroup_from_current();
#endif
Expand All @@ -544,11 +566,30 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu)
c->unit_size = sizes[i];
c->objcg = objcg;
c->tgt = c;

init_refill_work(c);
/* Another bpf_mem_cache will be used when allocating
* c->unit_size in bpf_mem_alloc(), so doesn't prefill
* for the bpf_mem_cache because these free objects will
* never be used.
*/
if (i != bpf_mem_cache_idx(c->unit_size))
continue;
prefill_mem_cache(c, cpu);
err = check_obj_size(c, i);
if (err)
goto out;
}
}

out:
ma->caches = pcc;
return 0;
/* refill_work is either zeroed or initialized, so it is safe to
* call irq_work_sync().
*/
if (err)
bpf_mem_alloc_destroy(ma);
return err;
}

static void drain_mem_cache(struct bpf_mem_cache *c)
Expand Down Expand Up @@ -916,3 +957,41 @@ void notrace *bpf_mem_cache_alloc_flags(struct bpf_mem_alloc *ma, gfp_t flags)

return !ret ? NULL : ret + LLIST_NODE_SZ;
}

/* Most of the logic is taken from setup_kmalloc_cache_index_table() */
static __init int bpf_mem_cache_adjust_size(void)
{
unsigned int size, index;

/* Normally KMALLOC_MIN_SIZE is 8-bytes, but it can be
* up-to 256-bytes.
*/
size = KMALLOC_MIN_SIZE;
if (size <= 192)
index = size_index[(size - 1) / 8];
else
index = fls(size - 1) - 1;
for (size = 8; size < KMALLOC_MIN_SIZE && size <= 192; size += 8)
size_index[(size - 1) / 8] = index;

/* The minimal alignment is 64-bytes, so disable 96-bytes cache and
* use 128-bytes cache instead.
*/
if (KMALLOC_MIN_SIZE >= 64) {
index = size_index[(128 - 1) / 8];
for (size = 64 + 8; size <= 96; size += 8)
size_index[(size - 1) / 8] = index;
}

/* The minimal alignment is 128-bytes, so disable 192-bytes cache and
* use 256-bytes cache instead.
*/
if (KMALLOC_MIN_SIZE >= 128) {
index = fls(256 - 1) - 1;
for (size = 128 + 8; size <= 192; size += 8)
size_index[(size - 1) / 8] = index;
}

return 0;
}
subsys_initcall(bpf_mem_cache_adjust_size);
50 changes: 50 additions & 0 deletions tools/testing/selftests/bpf/prog_tests/test_bpf_ma.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (C) 2023. Huawei Technologies Co., Ltd */
#define _GNU_SOURCE
#include <sched.h>
#include <pthread.h>
#include <stdbool.h>
#include <bpf/btf.h>
#include <test_progs.h>

#include "test_bpf_ma.skel.h"

void test_test_bpf_ma(void)
{
struct test_bpf_ma *skel;
struct btf *btf;
int i, err;

skel = test_bpf_ma__open();
if (!ASSERT_OK_PTR(skel, "open"))
return;

btf = bpf_object__btf(skel->obj);
if (!ASSERT_OK_PTR(btf, "btf"))
goto out;

for (i = 0; i < ARRAY_SIZE(skel->rodata->data_sizes); i++) {
char name[32];
int id;

snprintf(name, sizeof(name), "bin_data_%u", skel->rodata->data_sizes[i]);
id = btf__find_by_name_kind(btf, name, BTF_KIND_STRUCT);
if (!ASSERT_GT(id, 0, "bin_data"))
goto out;
skel->rodata->data_btf_ids[i] = id;
}

err = test_bpf_ma__load(skel);
if (!ASSERT_OK(err, "load"))
goto out;

err = test_bpf_ma__attach(skel);
if (!ASSERT_OK(err, "attach"))
goto out;

skel->bss->pid = getpid();
usleep(1);
ASSERT_OK(skel->bss->err, "test error");
out:
test_bpf_ma__destroy(skel);
}
123 changes: 123 additions & 0 deletions tools/testing/selftests/bpf/progs/test_bpf_ma.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (C) 2023. Huawei Technologies Co., Ltd */
#include <vmlinux.h>
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_helpers.h>

#include "bpf_experimental.h"
#include "bpf_misc.h"

#ifndef ARRAY_SIZE
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
#endif

struct generic_map_value {
void *data;
};

char _license[] SEC("license") = "GPL";

const unsigned int data_sizes[] = {8, 16, 32, 64, 96, 128, 192, 256, 512, 1024, 2048, 4096};
const volatile unsigned int data_btf_ids[ARRAY_SIZE(data_sizes)] = {};

int err = 0;
int pid = 0;

#define DEFINE_ARRAY_WITH_KPTR(_size) \
struct bin_data_##_size { \
char data[_size - sizeof(void *)]; \
}; \
struct map_value_##_size { \
struct bin_data_##_size __kptr * data; \
/* To emit BTF info for bin_data_xx */ \
struct bin_data_##_size not_used; \
}; \
struct { \
__uint(type, BPF_MAP_TYPE_ARRAY); \
__type(key, int); \
__type(value, struct map_value_##_size); \
__uint(max_entries, 128); \
} array_##_size SEC(".maps");

static __always_inline void batch_alloc_free(struct bpf_map *map, unsigned int batch,
unsigned int idx)
{
struct generic_map_value *value;
unsigned int i, key;
void *old, *new;

for (i = 0; i < batch; i++) {
key = i;
value = bpf_map_lookup_elem(map, &key);
if (!value) {
err = 1;
return;
}
new = bpf_obj_new_impl(data_btf_ids[idx], NULL);
if (!new) {
err = 2;
return;
}
old = bpf_kptr_xchg(&value->data, new);
if (old) {
bpf_obj_drop(old);
err = 3;
return;
}
}
for (i = 0; i < batch; i++) {
key = i;
value = bpf_map_lookup_elem(map, &key);
if (!value) {
err = 4;
return;
}
old = bpf_kptr_xchg(&value->data, NULL);
if (!old) {
err = 5;
return;
}
bpf_obj_drop(old);
}
}

#define CALL_BATCH_ALLOC_FREE(size, batch, idx) \
batch_alloc_free((struct bpf_map *)(&array_##size), batch, idx)

DEFINE_ARRAY_WITH_KPTR(8);
DEFINE_ARRAY_WITH_KPTR(16);
DEFINE_ARRAY_WITH_KPTR(32);
DEFINE_ARRAY_WITH_KPTR(64);
DEFINE_ARRAY_WITH_KPTR(96);
DEFINE_ARRAY_WITH_KPTR(128);
DEFINE_ARRAY_WITH_KPTR(192);
DEFINE_ARRAY_WITH_KPTR(256);
DEFINE_ARRAY_WITH_KPTR(512);
DEFINE_ARRAY_WITH_KPTR(1024);
DEFINE_ARRAY_WITH_KPTR(2048);
DEFINE_ARRAY_WITH_KPTR(4096);

SEC("fentry/" SYS_PREFIX "sys_nanosleep")
int test_bpf_mem_alloc_free(void *ctx)
{
if ((u32)bpf_get_current_pid_tgid() != pid)
return 0;

/* Alloc 128 8-bytes objects in batch to trigger refilling,
* then free 128 8-bytes objects in batch to trigger freeing.
*/
CALL_BATCH_ALLOC_FREE(8, 128, 0);
CALL_BATCH_ALLOC_FREE(16, 128, 1);
CALL_BATCH_ALLOC_FREE(32, 128, 2);
CALL_BATCH_ALLOC_FREE(64, 128, 3);
CALL_BATCH_ALLOC_FREE(96, 128, 4);
CALL_BATCH_ALLOC_FREE(128, 128, 5);
CALL_BATCH_ALLOC_FREE(192, 128, 6);
CALL_BATCH_ALLOC_FREE(256, 128, 7);
CALL_BATCH_ALLOC_FREE(512, 64, 8);
CALL_BATCH_ALLOC_FREE(1024, 32, 9);
CALL_BATCH_ALLOC_FREE(2048, 16, 10);
CALL_BATCH_ALLOC_FREE(4096, 8, 11);

return 0;
}

0 comments on commit 9458964

Please sign in to comment.