From ed47cb27586d2d339403563d3aa3a999a7d30927 Mon Sep 17 00:00:00 2001 From: Yuran Pereira Date: Sat, 28 Oct 2023 10:54:13 +0530 Subject: [PATCH 01/41] selftests/bpf: Convert CHECK macros to ASSERT_* macros in bpf_iter As it was pointed out by Yonghong Song [1], in the bpf selftests the use of the ASSERT_* series of macros is preferred over the CHECK macro. This patch replaces all CHECK calls in bpf_iter with the appropriate ASSERT_* macros. [1] https://lore.kernel.org/lkml/0a142924-633c-44e6-9a92-2dc019656bf2@linux.dev Suggested-by: Yonghong Song Signed-off-by: Yuran Pereira Acked-by: Yonghong Song Acked-by: Kui-Feng Lee Link: https://lore.kernel.org/r/DB3PR10MB6835E9C8DFCA226DD6FEF914E8A3A@DB3PR10MB6835.EURPRD10.PROD.OUTLOOK.COM Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/bpf_iter.c | 79 ++++++++----------- 1 file changed, 35 insertions(+), 44 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c index e3498f607b49d..5e334d3d7ac23 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c @@ -34,8 +34,6 @@ #include "bpf_iter_ksym.skel.h" #include "bpf_iter_sockmap.skel.h" -static int duration; - static void test_btf_id_or_null(void) { struct bpf_iter_test_kern3 *skel; @@ -64,7 +62,7 @@ static void do_dummy_read_opts(struct bpf_program *prog, struct bpf_iter_attach_ /* not check contents, but ensure read() ends without error */ while ((len = read(iter_fd, buf, sizeof(buf))) > 0) ; - CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)); + ASSERT_GE(len, 0, "read"); close(iter_fd); @@ -413,7 +411,7 @@ static int do_btf_read(struct bpf_iter_task_btf *skel) goto free_link; } - if (CHECK(err < 0, "read", "read failed: %s\n", strerror(errno))) + if (!ASSERT_GE(err, 0, "read")) goto free_link; ASSERT_HAS_SUBSTR(taskbuf, "(struct task_struct)", @@ -526,11 +524,11 @@ static int do_read_with_fd(int iter_fd, const char *expected, start = 0; while ((len = read(iter_fd, buf + start, read_buf_len)) > 0) { start += len; - if (CHECK(start >= 16, "read", "read len %d\n", len)) + if (!ASSERT_LT(start, 16, "read")) return -1; read_buf_len = read_one_char ? 1 : 16 - start; } - if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno))) + if (!ASSERT_GE(len, 0, "read")) return -1; if (!ASSERT_STREQ(buf, expected, "read")) @@ -571,8 +569,7 @@ static int do_read(const char *path, const char *expected) int err, iter_fd; iter_fd = open(path, O_RDONLY); - if (CHECK(iter_fd < 0, "open", "open %s failed: %s\n", - path, strerror(errno))) + if (!ASSERT_GE(iter_fd, 0, "open")) return -1; err = do_read_with_fd(iter_fd, expected, false); @@ -600,7 +597,7 @@ static void test_file_iter(void) unlink(path); err = bpf_link__pin(link, path); - if (CHECK(err, "pin_iter", "pin_iter to %s failed: %d\n", path, err)) + if (!ASSERT_OK(err, "pin_iter")) goto free_link; err = do_read(path, "abcd"); @@ -651,12 +648,10 @@ static void test_overflow(bool test_e2big_overflow, bool ret1) * overflow and needs restart. */ map1_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, 4, 8, 1, NULL); - if (CHECK(map1_fd < 0, "bpf_map_create", - "map_creation failed: %s\n", strerror(errno))) + if (!ASSERT_GE(map1_fd, 0, "bpf_map_create")) goto out; map2_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, 4, 8, 1, NULL); - if (CHECK(map2_fd < 0, "bpf_map_create", - "map_creation failed: %s\n", strerror(errno))) + if (!ASSERT_GE(map2_fd, 0, "bpf_map_create")) goto free_map1; /* bpf_seq_printf kernel buffer is 8 pages, so one map @@ -685,14 +680,12 @@ static void test_overflow(bool test_e2big_overflow, bool ret1) /* setup filtering map_id in bpf program */ map_info_len = sizeof(map_info); err = bpf_map_get_info_by_fd(map1_fd, &map_info, &map_info_len); - if (CHECK(err, "get_map_info", "get map info failed: %s\n", - strerror(errno))) + if (!ASSERT_OK(err, "get_map_info")) goto free_map2; skel->bss->map1_id = map_info.id; err = bpf_map_get_info_by_fd(map2_fd, &map_info, &map_info_len); - if (CHECK(err, "get_map_info", "get map info failed: %s\n", - strerror(errno))) + if (!ASSERT_OK(err, "get_map_info")) goto free_map2; skel->bss->map2_id = map_info.id; @@ -714,16 +707,14 @@ static void test_overflow(bool test_e2big_overflow, bool ret1) while ((len = read(iter_fd, buf, expected_read_len)) > 0) total_read_len += len; - CHECK(len != -1 || errno != E2BIG, "read", - "expected ret -1, errno E2BIG, but get ret %d, error %s\n", - len, strerror(errno)); + ASSERT_EQ(len, -1, "read"); + ASSERT_EQ(errno, E2BIG, "read"); goto free_buf; } else if (!ret1) { while ((len = read(iter_fd, buf, expected_read_len)) > 0) total_read_len += len; - if (CHECK(len < 0, "read", "read failed: %s\n", - strerror(errno))) + if (!ASSERT_GE(len, 0, "read")) goto free_buf; } else { do { @@ -732,8 +723,7 @@ static void test_overflow(bool test_e2big_overflow, bool ret1) total_read_len += len; } while (len > 0 || len == -EAGAIN); - if (CHECK(len < 0, "read", "read failed: %s\n", - strerror(errno))) + if (!ASSERT_GE(len, 0, "read")) goto free_buf; } @@ -836,7 +826,7 @@ static void test_bpf_hash_map(void) /* do some tests */ while ((len = read(iter_fd, buf, sizeof(buf))) > 0) ; - if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno))) + if (!ASSERT_GE(len, 0, "read")) goto close_iter; /* test results */ @@ -917,7 +907,7 @@ static void test_bpf_percpu_hash_map(void) /* do some tests */ while ((len = read(iter_fd, buf, sizeof(buf))) > 0) ; - if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno))) + if (!ASSERT_GE(len, 0, "read")) goto close_iter; /* test results */ @@ -983,17 +973,14 @@ static void test_bpf_array_map(void) start = 0; while ((len = read(iter_fd, buf + start, sizeof(buf) - start)) > 0) start += len; - if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno))) + if (!ASSERT_GE(len, 0, "read")) goto close_iter; /* test results */ res_first_key = *(__u32 *)buf; res_first_val = *(__u64 *)(buf + sizeof(__u32)); - if (CHECK(res_first_key != 0 || res_first_val != first_val, - "bpf_seq_write", - "seq_write failure: first key %u vs expected 0, " - " first value %llu vs expected %llu\n", - res_first_key, res_first_val, first_val)) + if (!ASSERT_EQ(res_first_key, 0, "bpf_seq_write") || + !ASSERT_EQ(res_first_val, first_val, "bpf_seq_write")) goto close_iter; if (!ASSERT_EQ(skel->bss->key_sum, expected_key, "key_sum")) @@ -1092,7 +1079,7 @@ static void test_bpf_percpu_array_map(void) /* do some tests */ while ((len = read(iter_fd, buf, sizeof(buf))) > 0) ; - if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno))) + if (!ASSERT_GE(len, 0, "read")) goto close_iter; /* test results */ @@ -1131,6 +1118,7 @@ static void test_bpf_sk_storage_delete(void) sock_fd = socket(AF_INET6, SOCK_STREAM, 0); if (!ASSERT_GE(sock_fd, 0, "socket")) goto out; + err = bpf_map_update_elem(map_fd, &sock_fd, &val, BPF_NOEXIST); if (!ASSERT_OK(err, "map_update")) goto out; @@ -1151,14 +1139,19 @@ static void test_bpf_sk_storage_delete(void) /* do some tests */ while ((len = read(iter_fd, buf, sizeof(buf))) > 0) ; - if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno))) + if (!ASSERT_GE(len, 0, "read")) goto close_iter; /* test results */ err = bpf_map_lookup_elem(map_fd, &sock_fd, &val); - if (CHECK(!err || errno != ENOENT, "bpf_map_lookup_elem", - "map value wasn't deleted (err=%d, errno=%d)\n", err, errno)) - goto close_iter; + + /* Note: The following assertions serve to ensure + * the value was deleted. It does so by asserting + * that bpf_map_lookup_elem has failed. This might + * seem counterintuitive at first. + */ + ASSERT_ERR(err, "bpf_map_lookup_elem"); + ASSERT_EQ(errno, ENOENT, "bpf_map_lookup_elem"); close_iter: close(iter_fd); @@ -1203,17 +1196,15 @@ static void test_bpf_sk_storage_get(void) do_dummy_read(skel->progs.fill_socket_owner); err = bpf_map_lookup_elem(map_fd, &sock_fd, &val); - if (CHECK(err || val != getpid(), "bpf_map_lookup_elem", - "map value wasn't set correctly (expected %d, got %d, err=%d)\n", - getpid(), val, err)) + if (!ASSERT_OK(err, "bpf_map_lookup_elem") || + !ASSERT_EQ(val, getpid(), "bpf_map_lookup_elem")) goto close_socket; do_dummy_read(skel->progs.negate_socket_local_storage); err = bpf_map_lookup_elem(map_fd, &sock_fd, &val); - CHECK(err || val != -getpid(), "bpf_map_lookup_elem", - "map value wasn't set correctly (expected %d, got %d, err=%d)\n", - -getpid(), val, err); + ASSERT_OK(err, "bpf_map_lookup_elem"); + ASSERT_EQ(val, -getpid(), "bpf_map_lookup_elem"); close_socket: close(sock_fd); @@ -1290,7 +1281,7 @@ static void test_bpf_sk_storage_map(void) /* do some tests */ while ((len = read(iter_fd, buf, sizeof(buf))) > 0) ; - if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno))) + if (!ASSERT_GE(len, 0, "read")) goto close_iter; /* test results */ From cb3c6a58be50c65014296aa3455cae0fa1e82eac Mon Sep 17 00:00:00 2001 From: Yuran Pereira Date: Sat, 28 Oct 2023 10:54:14 +0530 Subject: [PATCH 02/41] selftests/bpf: Add malloc failure checks in bpf_iter Since some malloc calls in bpf_iter may at times fail, this patch adds the appropriate fail checks, and ensures that any previously allocated resource is appropriately destroyed before returning the function. Signed-off-by: Yuran Pereira Acked-by: Yonghong Song Acked-by: Kui-Feng Lee Link: https://lore.kernel.org/r/DB3PR10MB6835F0ECA792265FA41FC39BE8A3A@DB3PR10MB6835.EURPRD10.PROD.OUTLOOK.COM Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/bpf_iter.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c index 5e334d3d7ac23..4e02093c2cbef 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c @@ -698,7 +698,7 @@ static void test_overflow(bool test_e2big_overflow, bool ret1) goto free_link; buf = malloc(expected_read_len); - if (!buf) + if (!ASSERT_OK_PTR(buf, "malloc")) goto close_iter; /* do read */ @@ -868,6 +868,8 @@ static void test_bpf_percpu_hash_map(void) skel->rodata->num_cpus = bpf_num_possible_cpus(); val = malloc(8 * bpf_num_possible_cpus()); + if (!ASSERT_OK_PTR(val, "malloc")) + goto out; err = bpf_iter_bpf_percpu_hash_map__load(skel); if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_percpu_hash_map__load")) @@ -1044,6 +1046,8 @@ static void test_bpf_percpu_array_map(void) skel->rodata->num_cpus = bpf_num_possible_cpus(); val = malloc(8 * bpf_num_possible_cpus()); + if (!ASSERT_OK_PTR(val, "malloc")) + goto out; err = bpf_iter_bpf_percpu_array_map__load(skel); if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_percpu_array_map__load")) From 3cda0779ded173572e7a23a5da174cdc69b5978b Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 1 Nov 2023 20:37:43 -0700 Subject: [PATCH 03/41] selftests/bpf: fix RELEASE=1 build for tc_opts Compiler complains about malloc(). We also don't need to dynamically allocate anything, so make the life easier by using statically sized buffer. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231102033759.2541186-2-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/tc_opts.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/tc_opts.c b/tools/testing/selftests/bpf/prog_tests/tc_opts.c index 51883ccb80206..196abf2234656 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_opts.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_opts.c @@ -2387,12 +2387,9 @@ static int generate_dummy_prog(void) const size_t prog_insn_cnt = sizeof(prog_insns) / sizeof(struct bpf_insn); LIBBPF_OPTS(bpf_prog_load_opts, opts); const size_t log_buf_sz = 256; - char *log_buf; + char log_buf[log_buf_sz]; int fd = -1; - log_buf = malloc(log_buf_sz); - if (!ASSERT_OK_PTR(log_buf, "log_buf_alloc")) - return fd; opts.log_buf = log_buf; opts.log_size = log_buf_sz; @@ -2402,7 +2399,6 @@ static int generate_dummy_prog(void) prog_insns, prog_insn_cnt, &opts); ASSERT_STREQ(log_buf, "", "log_0"); ASSERT_GE(fd, 0, "prog_fd"); - free(log_buf); return fd; } From 7bcc07dcd835833e9ca38e4121653b6b6d4bb4a8 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 1 Nov 2023 20:37:44 -0700 Subject: [PATCH 04/41] selftests/bpf: satisfy compiler by having explicit return in btf test Some compilers complain about get_pprint_mapv_size() not returning value in some code paths. Fix with explicit return. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231102033759.2541186-3-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/btf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c index 92d51f377fe59..8fb4a04fbbc04 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf.c +++ b/tools/testing/selftests/bpf/prog_tests/btf.c @@ -5265,6 +5265,7 @@ static size_t get_pprint_mapv_size(enum pprint_mapv_kind_t mapv_kind) #endif assert(0); + return 0; } static void set_pprint_mapv(enum pprint_mapv_kind_t mapv_kind, From 2e74aef782d38f73b65c56c8ff5973488daa91c4 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 1 Nov 2023 20:37:45 -0700 Subject: [PATCH 05/41] bpf: derive smin/smax from umin/max bounds Add smin/smax derivation from appropriate umin/umax values. Previously the logic was surprisingly asymmetric, trying to derive umin/umax from smin/smax (if possible), but not trying to do the same in the other direction. A simple addition to __reg64_deduce_bounds() fixes this. Added also generic comment about u64/s64 ranges and their relationship. Hopefully that helps readers to understand all the bounds deductions a bit better. Acked-by: Eduard Zingerman Acked-by: Shung-Hsi Yu Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231102033759.2541186-4-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 71 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 857d766945171..8a4cdd2787ecc 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2358,6 +2358,77 @@ static void __reg32_deduce_bounds(struct bpf_reg_state *reg) static void __reg64_deduce_bounds(struct bpf_reg_state *reg) { + /* If u64 range forms a valid s64 range (due to matching sign bit), + * try to learn from that. Let's do a bit of ASCII art to see when + * this is happening. Let's take u64 range first: + * + * 0 0x7fffffffffffffff 0x8000000000000000 U64_MAX + * |-------------------------------|--------------------------------| + * + * Valid u64 range is formed when umin and umax are anywhere in the + * range [0, U64_MAX], and umin <= umax. u64 case is simple and + * straightforward. Let's see how s64 range maps onto the same range + * of values, annotated below the line for comparison: + * + * 0 0x7fffffffffffffff 0x8000000000000000 U64_MAX + * |-------------------------------|--------------------------------| + * 0 S64_MAX S64_MIN -1 + * + * So s64 values basically start in the middle and they are logically + * contiguous to the right of it, wrapping around from -1 to 0, and + * then finishing as S64_MAX (0x7fffffffffffffff) right before + * S64_MIN. We can try drawing the continuity of u64 vs s64 values + * more visually as mapped to sign-agnostic range of hex values. + * + * u64 start u64 end + * _______________________________________________________________ + * / \ + * 0 0x7fffffffffffffff 0x8000000000000000 U64_MAX + * |-------------------------------|--------------------------------| + * 0 S64_MAX S64_MIN -1 + * / \ + * >------------------------------ -------------------------------> + * s64 continues... s64 end s64 start s64 "midpoint" + * + * What this means is that, in general, we can't always derive + * something new about u64 from any random s64 range, and vice versa. + * + * But we can do that in two particular cases. One is when entire + * u64/s64 range is *entirely* contained within left half of the above + * diagram or when it is *entirely* contained in the right half. I.e.: + * + * |-------------------------------|--------------------------------| + * ^ ^ ^ ^ + * A B C D + * + * [A, B] and [C, D] are contained entirely in their respective halves + * and form valid contiguous ranges as both u64 and s64 values. [A, B] + * will be non-negative both as u64 and s64 (and in fact it will be + * identical ranges no matter the signedness). [C, D] treated as s64 + * will be a range of negative values, while in u64 it will be + * non-negative range of values larger than 0x8000000000000000. + * + * Now, any other range here can't be represented in both u64 and s64 + * simultaneously. E.g., [A, C], [A, D], [B, C], [B, D] are valid + * contiguous u64 ranges, but they are discontinuous in s64. [B, C] + * in s64 would be properly presented as [S64_MIN, C] and [B, S64_MAX], + * for example. Similarly, valid s64 range [D, A] (going from negative + * to positive values), would be two separate [D, U64_MAX] and [0, A] + * ranges as u64. Currently reg_state can't represent two segments per + * numeric domain, so in such situations we can only derive maximal + * possible range ([0, U64_MAX] for u64, and [S64_MIN, S64_MAX] for s64). + * + * So we use these facts to derive umin/umax from smin/smax and vice + * versa only if they stay within the same "half". This is equivalent + * to checking sign bit: lower half will have sign bit as zero, upper + * half have sign bit 1. Below in code we simplify this by just + * casting umin/umax as smin/smax and checking if they form valid + * range, and vice versa. Those are equivalent checks. + */ + if ((s64)reg->umin_value <= (s64)reg->umax_value) { + reg->smin_value = max_t(s64, reg->smin_value, reg->umin_value); + reg->smax_value = min_t(s64, reg->smax_value, reg->umax_value); + } /* Learn sign from signed bounds. * If we cannot cross the sign boundary, then signed and unsigned bounds * are the same, so combine. This works even in the negative case, e.g. From f188765f23a58bebce48bd078effa95733fe37de Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 1 Nov 2023 20:37:46 -0700 Subject: [PATCH 06/41] bpf: derive smin32/smax32 from umin32/umax32 bounds All the logic that applies to u64 vs s64, equally applies for u32 vs s32 relationships (just taken in a smaller 32-bit numeric space). So do the same deduction of smin32/smax32 from umin32/umax32, if we can. Acked-by: Eduard Zingerman Acked-by: Shung-Hsi Yu Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231102033759.2541186-5-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 8a4cdd2787ecc..b93818abe7fc9 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2324,6 +2324,13 @@ static void __update_reg_bounds(struct bpf_reg_state *reg) /* Uses signed min/max values to inform unsigned, and vice-versa */ static void __reg32_deduce_bounds(struct bpf_reg_state *reg) { + /* if u32 range forms a valid s32 range (due to matching sign bit), + * try to learn from that + */ + if ((s32)reg->u32_min_value <= (s32)reg->u32_max_value) { + reg->s32_min_value = max_t(s32, reg->s32_min_value, reg->u32_min_value); + reg->s32_max_value = min_t(s32, reg->s32_max_value, reg->u32_max_value); + } /* Learn sign from signed bounds. * If we cannot cross the sign boundary, then signed and unsigned bounds * are the same, so combine. This works even in the negative case, e.g. From f404ef3b42c8e6719f0039d83668c837e5daca57 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 1 Nov 2023 20:37:47 -0700 Subject: [PATCH 07/41] bpf: derive subreg bounds from full bounds when upper 32 bits are constant Comments in code try to explain the idea behind why this is correct. Please check the code and comments. Acked-by: Eduard Zingerman Acked-by: Shung-Hsi Yu Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231102033759.2541186-6-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 45 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index b93818abe7fc9..e48a6180627bf 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2324,6 +2324,51 @@ static void __update_reg_bounds(struct bpf_reg_state *reg) /* Uses signed min/max values to inform unsigned, and vice-versa */ static void __reg32_deduce_bounds(struct bpf_reg_state *reg) { + /* If upper 32 bits of u64/s64 range don't change, we can use lower 32 + * bits to improve our u32/s32 boundaries. + * + * E.g., the case where we have upper 32 bits as zero ([10, 20] in + * u64) is pretty trivial, it's obvious that in u32 we'll also have + * [10, 20] range. But this property holds for any 64-bit range as + * long as upper 32 bits in that entire range of values stay the same. + * + * E.g., u64 range [0x10000000A, 0x10000000F] ([4294967306, 4294967311] + * in decimal) has the same upper 32 bits throughout all the values in + * that range. As such, lower 32 bits form a valid [0xA, 0xF] ([10, 15]) + * range. + * + * Note also, that [0xA, 0xF] is a valid range both in u32 and in s32, + * following the rules outlined below about u64/s64 correspondence + * (which equally applies to u32 vs s32 correspondence). In general it + * depends on actual hexadecimal values of 32-bit range. They can form + * only valid u32, or only valid s32 ranges in some cases. + * + * So we use all these insights to derive bounds for subregisters here. + */ + if ((reg->umin_value >> 32) == (reg->umax_value >> 32)) { + /* u64 to u32 casting preserves validity of low 32 bits as + * a range, if upper 32 bits are the same + */ + reg->u32_min_value = max_t(u32, reg->u32_min_value, (u32)reg->umin_value); + reg->u32_max_value = min_t(u32, reg->u32_max_value, (u32)reg->umax_value); + + if ((s32)reg->umin_value <= (s32)reg->umax_value) { + reg->s32_min_value = max_t(s32, reg->s32_min_value, (s32)reg->umin_value); + reg->s32_max_value = min_t(s32, reg->s32_max_value, (s32)reg->umax_value); + } + } + if ((reg->smin_value >> 32) == (reg->smax_value >> 32)) { + /* low 32 bits should form a proper u32 range */ + if ((u32)reg->smin_value <= (u32)reg->smax_value) { + reg->u32_min_value = max_t(u32, reg->u32_min_value, (u32)reg->smin_value); + reg->u32_max_value = min_t(u32, reg->u32_max_value, (u32)reg->smax_value); + } + /* low 32 bits should form a proper s32 range */ + if ((s32)reg->smin_value <= (s32)reg->smax_value) { + reg->s32_min_value = max_t(s32, reg->s32_min_value, (s32)reg->smin_value); + reg->s32_max_value = min_t(s32, reg->s32_max_value, (s32)reg->smax_value); + } + } /* if u32 range forms a valid s32 range (due to matching sign bit), * try to learn from that */ From 6533e0acff58b9f141c0c7dc93114535ac5a3985 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 1 Nov 2023 20:37:48 -0700 Subject: [PATCH 08/41] bpf: add special smin32/smax32 derivation from 64-bit bounds Add a special case where we can derive valid s32 bounds from umin/umax or smin/smax by stitching together negative s32 subrange and non-negative s32 subrange. That requires upper 32 bits to form a [N, N+1] range in u32 domain (taking into account wrap around, so 0xffffffff to 0x00000000 is a valid [N, N+1] range in this sense). See code comment for concrete examples. Eduard Zingerman also provided an alternative explanation ([0]) for more mathematically inclined readers: Suppose: . there are numbers a, b, c . 2**31 <= b < 2**32 . 0 <= c < 2**31 . umin = 2**32 * a + b . umax = 2**32 * (a + 1) + c The number of values in the range represented by [umin; umax] is: . N = umax - umin + 1 = 2**32 + c - b + 1 . min(N) = 2**32 + 0 - (2**32-1) + 1 = 2, with b = 2**32-1, c = 0 . max(N) = 2**32 + (2**31 - 1) - 2**31 + 1 = 2**32, with b = 2**31, c = 2**31-1 Hence [(s32)b; (s32)c] forms a valid range. [0] https://lore.kernel.org/bpf/d7af631802f0cfae20df77fe70068702d24bbd31.camel@gmail.com/ Acked-by: Eduard Zingerman Acked-by: Shung-Hsi Yu Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231102033759.2541186-7-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index e48a6180627bf..08888784cbc8d 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2369,6 +2369,29 @@ static void __reg32_deduce_bounds(struct bpf_reg_state *reg) reg->s32_max_value = min_t(s32, reg->s32_max_value, (s32)reg->smax_value); } } + /* Special case where upper bits form a small sequence of two + * sequential numbers (in 32-bit unsigned space, so 0xffffffff to + * 0x00000000 is also valid), while lower bits form a proper s32 range + * going from negative numbers to positive numbers. E.g., let's say we + * have s64 range [-1, 1] ([0xffffffffffffffff, 0x0000000000000001]). + * Possible s64 values are {-1, 0, 1} ({0xffffffffffffffff, + * 0x0000000000000000, 0x00000000000001}). Ignoring upper 32 bits, + * we still get a valid s32 range [-1, 1] ([0xffffffff, 0x00000001]). + * Note that it doesn't have to be 0xffffffff going to 0x00000000 in + * upper 32 bits. As a random example, s64 range + * [0xfffffff0fffffff0; 0xfffffff100000010], forms a valid s32 range + * [-16, 16] ([0xfffffff0; 0x00000010]) in its 32 bit subregister. + */ + if ((u32)(reg->umin_value >> 32) + 1 == (u32)(reg->umax_value >> 32) && + (s32)reg->umin_value < 0 && (s32)reg->umax_value >= 0) { + reg->s32_min_value = max_t(s32, reg->s32_min_value, (s32)reg->umin_value); + reg->s32_max_value = min_t(s32, reg->s32_max_value, (s32)reg->umax_value); + } + if ((u32)(reg->smin_value >> 32) + 1 == (u32)(reg->smax_value >> 32) && + (s32)reg->smin_value < 0 && (s32)reg->smax_value >= 0) { + reg->s32_min_value = max_t(s32, reg->s32_min_value, (s32)reg->smin_value); + reg->s32_max_value = min_t(s32, reg->s32_max_value, (s32)reg->smax_value); + } /* if u32 range forms a valid s32 range (due to matching sign bit), * try to learn from that */ From 3d6940ddd9b56b3fc376ee39656f6fb1b4e1a981 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 1 Nov 2023 20:37:49 -0700 Subject: [PATCH 09/41] bpf: improve deduction of 64-bit bounds from 32-bit bounds Add a few interesting cases in which we can tighten 64-bit bounds based on newly learnt information about 32-bit bounds. E.g., when full u64/s64 registers are used in BPF program, and then eventually compared as u32/s32. The latter comparison doesn't change the value of full register, but it does impose new restrictions on possible lower 32 bits of such full registers. And we can use that to derive additional full register bounds information. Acked-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Acked-by: Shung-Hsi Yu Link: https://lore.kernel.org/r/20231102033759.2541186-8-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 44 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 08888784cbc8d..d0d0a1a1b662b 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2536,10 +2536,54 @@ static void __reg64_deduce_bounds(struct bpf_reg_state *reg) } } +static void __reg_deduce_mixed_bounds(struct bpf_reg_state *reg) +{ + /* Try to tighten 64-bit bounds from 32-bit knowledge, using 32-bit + * values on both sides of 64-bit range in hope to have tigher range. + * E.g., if r1 is [0x1'00000000, 0x3'80000000], and we learn from + * 32-bit signed > 0 operation that s32 bounds are now [1; 0x7fffffff]. + * With this, we can substitute 1 as low 32-bits of _low_ 64-bit bound + * (0x100000000 -> 0x100000001) and 0x7fffffff as low 32-bits of + * _high_ 64-bit bound (0x380000000 -> 0x37fffffff) and arrive at a + * better overall bounds for r1 as [0x1'000000001; 0x3'7fffffff]. + * We just need to make sure that derived bounds we are intersecting + * with are well-formed ranges in respecitve s64 or u64 domain, just + * like we do with similar kinds of 32-to-64 or 64-to-32 adjustments. + */ + __u64 new_umin, new_umax; + __s64 new_smin, new_smax; + + /* u32 -> u64 tightening, it's always well-formed */ + new_umin = (reg->umin_value & ~0xffffffffULL) | reg->u32_min_value; + new_umax = (reg->umax_value & ~0xffffffffULL) | reg->u32_max_value; + reg->umin_value = max_t(u64, reg->umin_value, new_umin); + reg->umax_value = min_t(u64, reg->umax_value, new_umax); + /* u32 -> s64 tightening, u32 range embedded into s64 preserves range validity */ + new_smin = (reg->smin_value & ~0xffffffffULL) | reg->u32_min_value; + new_smax = (reg->smax_value & ~0xffffffffULL) | reg->u32_max_value; + reg->smin_value = max_t(s64, reg->smin_value, new_smin); + reg->smax_value = min_t(s64, reg->smax_value, new_smax); + + /* if s32 can be treated as valid u32 range, we can use it as well */ + if ((u32)reg->s32_min_value <= (u32)reg->s32_max_value) { + /* s32 -> u64 tightening */ + new_umin = (reg->umin_value & ~0xffffffffULL) | (u32)reg->s32_min_value; + new_umax = (reg->umax_value & ~0xffffffffULL) | (u32)reg->s32_max_value; + reg->umin_value = max_t(u64, reg->umin_value, new_umin); + reg->umax_value = min_t(u64, reg->umax_value, new_umax); + /* s32 -> s64 tightening */ + new_smin = (reg->smin_value & ~0xffffffffULL) | (u32)reg->s32_min_value; + new_smax = (reg->smax_value & ~0xffffffffULL) | (u32)reg->s32_max_value; + reg->smin_value = max_t(s64, reg->smin_value, new_smin); + reg->smax_value = min_t(s64, reg->smax_value, new_smax); + } +} + static void __reg_deduce_bounds(struct bpf_reg_state *reg) { __reg32_deduce_bounds(reg); __reg64_deduce_bounds(reg); + __reg_deduce_mixed_bounds(reg); } /* Attempts to improve var_off based on unsigned min/max information */ From 558c06e551a3e2fd166e80aefb6bbd51c83737d1 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 1 Nov 2023 20:37:50 -0700 Subject: [PATCH 10/41] bpf: try harder to deduce register bounds from different numeric domains There are cases (caught by subsequent reg_bounds tests in selftests/bpf) where performing one round of __reg_deduce_bounds() doesn't propagate all the information from, say, s32 to u32 bounds and than from newly learned u32 bounds back to u64 and s64. So perform __reg_deduce_bounds() twice to make sure such derivations are propagated fully after reg_bounds_sync(). One such example is test `(s64)[0xffffffff00000001; 0] (u64)< 0xffffffff00000000` from selftest patch from this patch set. It demonstrates an intricate dance of u64 -> s64 -> u64 -> u32 bounds adjustments, which requires two rounds of __reg_deduce_bounds(). Here are corresponding refinement log from selftest, showing evolution of knowledge. REFINING (FALSE R1) (u64)SRC=[0xffffffff00000000; U64_MAX] (u64)DST_OLD=[0; U64_MAX] (u64)DST_NEW=[0xffffffff00000000; U64_MAX] REFINING (FALSE R1) (u64)SRC=[0xffffffff00000000; U64_MAX] (s64)DST_OLD=[0xffffffff00000001; 0] (s64)DST_NEW=[0xffffffff00000001; -1] REFINING (FALSE R1) (s64)SRC=[0xffffffff00000001; -1] (u64)DST_OLD=[0xffffffff00000000; U64_MAX] (u64)DST_NEW=[0xffffffff00000001; U64_MAX] REFINING (FALSE R1) (u64)SRC=[0xffffffff00000001; U64_MAX] (u32)DST_OLD=[0; U32_MAX] (u32)DST_NEW=[1; U32_MAX] R1 initially has smin/smax set to [0xffffffff00000001; -1], while umin/umax is unknown. After (u64)< comparison, in FALSE branch we gain knowledge that umin/umax is [0xffffffff00000000; U64_MAX]. That causes smin/smax to learn that zero can't happen and upper bound is -1. Then smin/smax is adjusted from umin/umax improving lower bound from 0xffffffff00000000 to 0xffffffff00000001. And then eventually umin32/umax32 bounds are drived from umin/umax and become [1; U32_MAX]. Selftest in the last patch is actually implementing a multi-round fixed-point convergence logic, but so far all the tests are handled by two rounds of reg_bounds_sync() on the verifier state, so we keep it simple for now. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231102033759.2541186-9-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index d0d0a1a1b662b..2991e9dd44755 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2605,6 +2605,7 @@ static void reg_bounds_sync(struct bpf_reg_state *reg) __update_reg_bounds(reg); /* We might have learned something about the sign bit. */ __reg_deduce_bounds(reg); + __reg_deduce_bounds(reg); /* We might have learned some bits from the bounds. */ __reg_bound_offset(reg); /* Intersecting with the old var_off might have improved our bounds From b929d4979b2be8ab0e6f539bebadbb4e38dc5e90 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 1 Nov 2023 20:37:51 -0700 Subject: [PATCH 11/41] bpf: drop knowledge-losing __reg_combine_{32,64}_into_{64,32} logic When performing 32-bit conditional operation operating on lower 32 bits of a full 64-bit register, register full value isn't changed. We just potentially gain new knowledge about that register's lower 32 bits. Unfortunately, __reg_combine_{32,64}_into_{64,32} logic that reg_set_min_max() performs as a last step, can lose information in some cases due to __mark_reg64_unbounded() and __reg_assign_32_into_64(). That's bad and completely unnecessary. Especially __reg_assign_32_into_64() looks completely out of place here, because we are not performing zero-extending subregister assignment during conditional jump. So this patch replaced __reg_combine_* with just a normal reg_bounds_sync() which will do a proper job of deriving u64/s64 bounds from u32/s32, and vice versa (among all other combinations). __reg_combine_64_into_32() is also used in one more place, coerce_reg_to_size(), while handling 1- and 2-byte register loads. Looking into this, it seems like besides marking subregister as unbounded before performing reg_bounds_sync(), we were also performing deduction of smin32/smax32 and umin32/umax32 bounds from respective smin/smax and umin/umax bounds. It's now redundant as reg_bounds_sync() performs all the same logic more generically (e.g., without unnecessary assumption that upper 32 bits of full register should be zero). Long story short, we remove __reg_combine_64_into_32() completely, and coerce_reg_to_size() now only does resetting subreg to unbounded and then performing reg_bounds_sync() to recover as much information as possible from 64-bit umin/umax and smin/smax bounds, set explicitly in coerce_reg_to_size() earlier. Acked-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Acked-by: Shung-Hsi Yu Link: https://lore.kernel.org/r/20231102033759.2541186-10-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 60 ++++++------------------------------------- 1 file changed, 8 insertions(+), 52 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 2991e9dd44755..8802172fe8c9e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2639,51 +2639,6 @@ static void __reg_assign_32_into_64(struct bpf_reg_state *reg) } } -static void __reg_combine_32_into_64(struct bpf_reg_state *reg) -{ - /* special case when 64-bit register has upper 32-bit register - * zeroed. Typically happens after zext or <<32, >>32 sequence - * allowing us to use 32-bit bounds directly, - */ - if (tnum_equals_const(tnum_clear_subreg(reg->var_off), 0)) { - __reg_assign_32_into_64(reg); - } else { - /* Otherwise the best we can do is push lower 32bit known and - * unknown bits into register (var_off set from jmp logic) - * then learn as much as possible from the 64-bit tnum - * known and unknown bits. The previous smin/smax bounds are - * invalid here because of jmp32 compare so mark them unknown - * so they do not impact tnum bounds calculation. - */ - __mark_reg64_unbounded(reg); - } - reg_bounds_sync(reg); -} - -static bool __reg64_bound_s32(s64 a) -{ - return a >= S32_MIN && a <= S32_MAX; -} - -static bool __reg64_bound_u32(u64 a) -{ - return a >= U32_MIN && a <= U32_MAX; -} - -static void __reg_combine_64_into_32(struct bpf_reg_state *reg) -{ - __mark_reg32_unbounded(reg); - if (__reg64_bound_s32(reg->smin_value) && __reg64_bound_s32(reg->smax_value)) { - reg->s32_min_value = (s32)reg->smin_value; - reg->s32_max_value = (s32)reg->smax_value; - } - if (__reg64_bound_u32(reg->umin_value) && __reg64_bound_u32(reg->umax_value)) { - reg->u32_min_value = (u32)reg->umin_value; - reg->u32_max_value = (u32)reg->umax_value; - } - reg_bounds_sync(reg); -} - /* Mark a register as having a completely unknown (scalar) value. */ static void __mark_reg_unknown(const struct bpf_verifier_env *env, struct bpf_reg_state *reg) @@ -6380,9 +6335,10 @@ static void coerce_reg_to_size(struct bpf_reg_state *reg, int size) * values are also truncated so we push 64-bit bounds into * 32-bit bounds. Above were truncated < 32-bits already. */ - if (size >= 4) - return; - __reg_combine_64_into_32(reg); + if (size < 4) { + __mark_reg32_unbounded(reg); + reg_bounds_sync(reg); + } } static void set_sext64_default_val(struct bpf_reg_state *reg, int size) @@ -14621,13 +14577,13 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg, tnum_subreg(false_32off)); true_reg->var_off = tnum_or(tnum_clear_subreg(true_64off), tnum_subreg(true_32off)); - __reg_combine_32_into_64(false_reg); - __reg_combine_32_into_64(true_reg); + reg_bounds_sync(false_reg); + reg_bounds_sync(true_reg); } else { false_reg->var_off = false_64off; true_reg->var_off = true_64off; - __reg_combine_64_into_32(false_reg); - __reg_combine_64_into_32(true_reg); + reg_bounds_sync(false_reg); + reg_bounds_sync(true_reg); } } From cdeb5dab9238520c39c4701d551f5d66463c504b Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 1 Nov 2023 20:37:53 -0700 Subject: [PATCH 12/41] bpf: rename is_branch_taken reg arguments to prepare for the second one Just taking mundane refactoring bits out into a separate patch. No functional changes. Signed-off-by: Andrii Nakryiko Acked-by: Shung-Hsi Yu Link: https://lore.kernel.org/r/20231102033759.2541186-12-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 108 +++++++++++++++++++++--------------------- 1 file changed, 54 insertions(+), 54 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 8802172fe8c9e..725f327ce5eb8 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -14167,26 +14167,26 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *vstate, })); } -static int is_branch32_taken(struct bpf_reg_state *reg, u32 val, u8 opcode) +static int is_branch32_taken(struct bpf_reg_state *reg1, u32 val, u8 opcode) { - struct tnum subreg = tnum_subreg(reg->var_off); + struct tnum subreg = tnum_subreg(reg1->var_off); s32 sval = (s32)val; switch (opcode) { case BPF_JEQ: if (tnum_is_const(subreg)) return !!tnum_equals_const(subreg, val); - else if (val < reg->u32_min_value || val > reg->u32_max_value) + else if (val < reg1->u32_min_value || val > reg1->u32_max_value) return 0; - else if (sval < reg->s32_min_value || sval > reg->s32_max_value) + else if (sval < reg1->s32_min_value || sval > reg1->s32_max_value) return 0; break; case BPF_JNE: if (tnum_is_const(subreg)) return !tnum_equals_const(subreg, val); - else if (val < reg->u32_min_value || val > reg->u32_max_value) + else if (val < reg1->u32_min_value || val > reg1->u32_max_value) return 1; - else if (sval < reg->s32_min_value || sval > reg->s32_max_value) + else if (sval < reg1->s32_min_value || sval > reg1->s32_max_value) return 1; break; case BPF_JSET: @@ -14196,51 +14196,51 @@ static int is_branch32_taken(struct bpf_reg_state *reg, u32 val, u8 opcode) return 0; break; case BPF_JGT: - if (reg->u32_min_value > val) + if (reg1->u32_min_value > val) return 1; - else if (reg->u32_max_value <= val) + else if (reg1->u32_max_value <= val) return 0; break; case BPF_JSGT: - if (reg->s32_min_value > sval) + if (reg1->s32_min_value > sval) return 1; - else if (reg->s32_max_value <= sval) + else if (reg1->s32_max_value <= sval) return 0; break; case BPF_JLT: - if (reg->u32_max_value < val) + if (reg1->u32_max_value < val) return 1; - else if (reg->u32_min_value >= val) + else if (reg1->u32_min_value >= val) return 0; break; case BPF_JSLT: - if (reg->s32_max_value < sval) + if (reg1->s32_max_value < sval) return 1; - else if (reg->s32_min_value >= sval) + else if (reg1->s32_min_value >= sval) return 0; break; case BPF_JGE: - if (reg->u32_min_value >= val) + if (reg1->u32_min_value >= val) return 1; - else if (reg->u32_max_value < val) + else if (reg1->u32_max_value < val) return 0; break; case BPF_JSGE: - if (reg->s32_min_value >= sval) + if (reg1->s32_min_value >= sval) return 1; - else if (reg->s32_max_value < sval) + else if (reg1->s32_max_value < sval) return 0; break; case BPF_JLE: - if (reg->u32_max_value <= val) + if (reg1->u32_max_value <= val) return 1; - else if (reg->u32_min_value > val) + else if (reg1->u32_min_value > val) return 0; break; case BPF_JSLE: - if (reg->s32_max_value <= sval) + if (reg1->s32_max_value <= sval) return 1; - else if (reg->s32_min_value > sval) + else if (reg1->s32_min_value > sval) return 0; break; } @@ -14249,79 +14249,79 @@ static int is_branch32_taken(struct bpf_reg_state *reg, u32 val, u8 opcode) } -static int is_branch64_taken(struct bpf_reg_state *reg, u64 val, u8 opcode) +static int is_branch64_taken(struct bpf_reg_state *reg1, u64 val, u8 opcode) { s64 sval = (s64)val; switch (opcode) { case BPF_JEQ: - if (tnum_is_const(reg->var_off)) - return !!tnum_equals_const(reg->var_off, val); - else if (val < reg->umin_value || val > reg->umax_value) + if (tnum_is_const(reg1->var_off)) + return !!tnum_equals_const(reg1->var_off, val); + else if (val < reg1->umin_value || val > reg1->umax_value) return 0; - else if (sval < reg->smin_value || sval > reg->smax_value) + else if (sval < reg1->smin_value || sval > reg1->smax_value) return 0; break; case BPF_JNE: - if (tnum_is_const(reg->var_off)) - return !tnum_equals_const(reg->var_off, val); - else if (val < reg->umin_value || val > reg->umax_value) + if (tnum_is_const(reg1->var_off)) + return !tnum_equals_const(reg1->var_off, val); + else if (val < reg1->umin_value || val > reg1->umax_value) return 1; - else if (sval < reg->smin_value || sval > reg->smax_value) + else if (sval < reg1->smin_value || sval > reg1->smax_value) return 1; break; case BPF_JSET: - if ((~reg->var_off.mask & reg->var_off.value) & val) + if ((~reg1->var_off.mask & reg1->var_off.value) & val) return 1; - if (!((reg->var_off.mask | reg->var_off.value) & val)) + if (!((reg1->var_off.mask | reg1->var_off.value) & val)) return 0; break; case BPF_JGT: - if (reg->umin_value > val) + if (reg1->umin_value > val) return 1; - else if (reg->umax_value <= val) + else if (reg1->umax_value <= val) return 0; break; case BPF_JSGT: - if (reg->smin_value > sval) + if (reg1->smin_value > sval) return 1; - else if (reg->smax_value <= sval) + else if (reg1->smax_value <= sval) return 0; break; case BPF_JLT: - if (reg->umax_value < val) + if (reg1->umax_value < val) return 1; - else if (reg->umin_value >= val) + else if (reg1->umin_value >= val) return 0; break; case BPF_JSLT: - if (reg->smax_value < sval) + if (reg1->smax_value < sval) return 1; - else if (reg->smin_value >= sval) + else if (reg1->smin_value >= sval) return 0; break; case BPF_JGE: - if (reg->umin_value >= val) + if (reg1->umin_value >= val) return 1; - else if (reg->umax_value < val) + else if (reg1->umax_value < val) return 0; break; case BPF_JSGE: - if (reg->smin_value >= sval) + if (reg1->smin_value >= sval) return 1; - else if (reg->smax_value < sval) + else if (reg1->smax_value < sval) return 0; break; case BPF_JLE: - if (reg->umax_value <= val) + if (reg1->umax_value <= val) return 1; - else if (reg->umin_value > val) + else if (reg1->umin_value > val) return 0; break; case BPF_JSLE: - if (reg->smax_value <= sval) + if (reg1->smax_value <= sval) return 1; - else if (reg->smin_value > sval) + else if (reg1->smin_value > sval) return 0; break; } @@ -14336,11 +14336,11 @@ static int is_branch64_taken(struct bpf_reg_state *reg, u64 val, u8 opcode) * -1 - unknown. Example: "if (reg < 5)" is unknown when register value * range [0,10] */ -static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode, +static int is_branch_taken(struct bpf_reg_state *reg1, u64 val, u8 opcode, bool is_jmp32) { - if (__is_pointer_value(false, reg)) { - if (!reg_not_null(reg)) + if (__is_pointer_value(false, reg1)) { + if (!reg_not_null(reg1)) return -1; /* If pointer is valid tests against zero will fail so we can @@ -14360,8 +14360,8 @@ static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode, } if (is_jmp32) - return is_branch32_taken(reg, val, opcode); - return is_branch64_taken(reg, val, opcode); + return is_branch32_taken(reg1, val, opcode); + return is_branch64_taken(reg1, val, opcode); } static int flip_opcode(u32 opcode) From fc3615dd0ee9c2c9667a0d48e6f2376b50343c6e Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 1 Nov 2023 20:37:54 -0700 Subject: [PATCH 13/41] bpf: generalize is_branch_taken() to work with two registers While still assuming that second register is a constant, generalize is_branch_taken-related code to accept two registers instead of register plus explicit constant value. This also, as a side effect, allows to simplify check_cond_jmp_op() by unifying BPF_K case with BPF_X case, for which we use a fake register to represent BPF_K's imm constant as a register. Acked-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Acked-by: Shung-Hsi Yu Link: https://lore.kernel.org/r/20231102033759.2541186-13-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 57 ++++++++++++++++++++++++------------------- 1 file changed, 32 insertions(+), 25 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 725f327ce5eb8..5e722aaef7edb 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -14167,9 +14167,13 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *vstate, })); } -static int is_branch32_taken(struct bpf_reg_state *reg1, u32 val, u8 opcode) +/* + * , currently assuming reg2 is a constant + */ +static int is_branch32_taken(struct bpf_reg_state *reg1, struct bpf_reg_state *reg2, u8 opcode) { struct tnum subreg = tnum_subreg(reg1->var_off); + u32 val = (u32)tnum_subreg(reg2->var_off).value; s32 sval = (s32)val; switch (opcode) { @@ -14249,8 +14253,12 @@ static int is_branch32_taken(struct bpf_reg_state *reg1, u32 val, u8 opcode) } -static int is_branch64_taken(struct bpf_reg_state *reg1, u64 val, u8 opcode) +/* + * , currently assuming reg2 is a constant + */ +static int is_branch64_taken(struct bpf_reg_state *reg1, struct bpf_reg_state *reg2, u8 opcode) { + u64 val = reg2->var_off.value; s64 sval = (s64)val; switch (opcode) { @@ -14329,16 +14337,23 @@ static int is_branch64_taken(struct bpf_reg_state *reg1, u64 val, u8 opcode) return -1; } -/* compute branch direction of the expression "if (reg opcode val) goto target;" +/* compute branch direction of the expression "if ( opcode ) goto target;" * and return: * 1 - branch will be taken and "goto target" will be executed * 0 - branch will not be taken and fall-through to next insn - * -1 - unknown. Example: "if (reg < 5)" is unknown when register value + * -1 - unknown. Example: "if (reg1 < 5)" is unknown when register value * range [0,10] */ -static int is_branch_taken(struct bpf_reg_state *reg1, u64 val, u8 opcode, - bool is_jmp32) +static int is_branch_taken(struct bpf_reg_state *reg1, struct bpf_reg_state *reg2, + u8 opcode, bool is_jmp32) { + struct tnum reg2_tnum = is_jmp32 ? tnum_subreg(reg2->var_off) : reg2->var_off; + u64 val; + + if (!tnum_is_const(reg2_tnum)) + return -1; + val = reg2_tnum.value; + if (__is_pointer_value(false, reg1)) { if (!reg_not_null(reg1)) return -1; @@ -14360,8 +14375,8 @@ static int is_branch_taken(struct bpf_reg_state *reg1, u64 val, u8 opcode, } if (is_jmp32) - return is_branch32_taken(reg1, val, opcode); - return is_branch64_taken(reg1, val, opcode); + return is_branch32_taken(reg1, reg2, opcode); + return is_branch64_taken(reg1, reg2, opcode); } static int flip_opcode(u32 opcode) @@ -14832,6 +14847,7 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs; struct bpf_reg_state *dst_reg, *other_branch_regs, *src_reg = NULL; struct bpf_reg_state *eq_branch_regs; + struct bpf_reg_state fake_reg = {}; u8 opcode = BPF_OP(insn->code); bool is_jmp32; int pred = -1; @@ -14872,36 +14888,27 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, verbose(env, "BPF_JMP/JMP32 uses reserved fields\n"); return -EINVAL; } + src_reg = &fake_reg; + src_reg->type = SCALAR_VALUE; + __mark_reg_known(src_reg, insn->imm); } is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32; if (BPF_SRC(insn->code) == BPF_K) { - pred = is_branch_taken(dst_reg, insn->imm, opcode, is_jmp32); + pred = is_branch_taken(dst_reg, src_reg, opcode, is_jmp32); } else if (src_reg->type == SCALAR_VALUE && is_jmp32 && tnum_is_const(tnum_subreg(src_reg->var_off))) { - pred = is_branch_taken(dst_reg, - tnum_subreg(src_reg->var_off).value, - opcode, - is_jmp32); + pred = is_branch_taken(dst_reg, src_reg, opcode, is_jmp32); } else if (src_reg->type == SCALAR_VALUE && !is_jmp32 && tnum_is_const(src_reg->var_off)) { - pred = is_branch_taken(dst_reg, - src_reg->var_off.value, - opcode, - is_jmp32); + pred = is_branch_taken(dst_reg, src_reg, opcode, is_jmp32); } else if (dst_reg->type == SCALAR_VALUE && is_jmp32 && tnum_is_const(tnum_subreg(dst_reg->var_off))) { - pred = is_branch_taken(src_reg, - tnum_subreg(dst_reg->var_off).value, - flip_opcode(opcode), - is_jmp32); + pred = is_branch_taken(src_reg, dst_reg, flip_opcode(opcode), is_jmp32); } else if (dst_reg->type == SCALAR_VALUE && !is_jmp32 && tnum_is_const(dst_reg->var_off)) { - pred = is_branch_taken(src_reg, - dst_reg->var_off.value, - flip_opcode(opcode), - is_jmp32); + pred = is_branch_taken(src_reg, dst_reg, flip_opcode(opcode), is_jmp32); } else if (reg_is_pkt_pointer_any(dst_reg) && reg_is_pkt_pointer_any(src_reg) && !is_jmp32) { From dd2a2cc3c1bfaa9dd755fafcb7812eb6d26ca2f9 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 1 Nov 2023 20:37:55 -0700 Subject: [PATCH 14/41] bpf: move is_branch_taken() down Move is_branch_taken() slightly down. In subsequent patched we'll need both flip_opcode() and is_pkt_ptr_branch_taken() for is_branch_taken(), but instead of sprinkling forward declarations around, it makes more sense to move is_branch_taken() lower below is_pkt_ptr_branch_taken(), and also keep it closer to very tightly related reg_set_min_max(), as they are two critical parts of the same SCALAR range tracking logic. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231102033759.2541186-14-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 84 +++++++++++++++++++++---------------------- 1 file changed, 42 insertions(+), 42 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 5e722aaef7edb..c5d187d43fa1c 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -14337,48 +14337,6 @@ static int is_branch64_taken(struct bpf_reg_state *reg1, struct bpf_reg_state *r return -1; } -/* compute branch direction of the expression "if ( opcode ) goto target;" - * and return: - * 1 - branch will be taken and "goto target" will be executed - * 0 - branch will not be taken and fall-through to next insn - * -1 - unknown. Example: "if (reg1 < 5)" is unknown when register value - * range [0,10] - */ -static int is_branch_taken(struct bpf_reg_state *reg1, struct bpf_reg_state *reg2, - u8 opcode, bool is_jmp32) -{ - struct tnum reg2_tnum = is_jmp32 ? tnum_subreg(reg2->var_off) : reg2->var_off; - u64 val; - - if (!tnum_is_const(reg2_tnum)) - return -1; - val = reg2_tnum.value; - - if (__is_pointer_value(false, reg1)) { - if (!reg_not_null(reg1)) - return -1; - - /* If pointer is valid tests against zero will fail so we can - * use this to direct branch taken. - */ - if (val != 0) - return -1; - - switch (opcode) { - case BPF_JEQ: - return 0; - case BPF_JNE: - return 1; - default: - return -1; - } - } - - if (is_jmp32) - return is_branch32_taken(reg1, reg2, opcode); - return is_branch64_taken(reg1, reg2, opcode); -} - static int flip_opcode(u32 opcode) { /* How can we transform "a b" into "b a"? */ @@ -14440,6 +14398,48 @@ static int is_pkt_ptr_branch_taken(struct bpf_reg_state *dst_reg, return -1; } +/* compute branch direction of the expression "if ( opcode ) goto target;" + * and return: + * 1 - branch will be taken and "goto target" will be executed + * 0 - branch will not be taken and fall-through to next insn + * -1 - unknown. Example: "if (reg1 < 5)" is unknown when register value + * range [0,10] + */ +static int is_branch_taken(struct bpf_reg_state *reg1, struct bpf_reg_state *reg2, + u8 opcode, bool is_jmp32) +{ + struct tnum reg2_tnum = is_jmp32 ? tnum_subreg(reg2->var_off) : reg2->var_off; + u64 val; + + if (!tnum_is_const(reg2_tnum)) + return -1; + val = reg2_tnum.value; + + if (__is_pointer_value(false, reg1)) { + if (!reg_not_null(reg1)) + return -1; + + /* If pointer is valid tests against zero will fail so we can + * use this to direct branch taken. + */ + if (val != 0) + return -1; + + switch (opcode) { + case BPF_JEQ: + return 0; + case BPF_JNE: + return 1; + default: + return -1; + } + } + + if (is_jmp32) + return is_branch32_taken(reg1, reg2, opcode); + return is_branch64_taken(reg1, reg2, opcode); +} + /* Adjusts the register min/max values in the case that the dst_reg is the * variable register that we are working on, and src_reg is a constant or we're * simply doing a BPF_K check. From 171de12646d23b240fc73113acf1f62d85dd9d02 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 1 Nov 2023 20:37:56 -0700 Subject: [PATCH 15/41] bpf: generalize is_branch_taken to handle all conditional jumps in one place Make is_branch_taken() a single entry point for branch pruning decision making, handling both pointer vs pointer, pointer vs scalar, and scalar vs scalar cases in one place. This also nicely cleans up check_cond_jmp_op(). Acked-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231102033759.2541186-15-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 49 ++++++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 24 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index c5d187d43fa1c..d5213cef53891 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -14167,6 +14167,19 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *vstate, })); } +/* check if register is a constant scalar value */ +static bool is_reg_const(struct bpf_reg_state *reg, bool subreg32) +{ + return reg->type == SCALAR_VALUE && + tnum_is_const(subreg32 ? tnum_subreg(reg->var_off) : reg->var_off); +} + +/* assuming is_reg_const() is true, return constant value of a register */ +static u64 reg_const_value(struct bpf_reg_state *reg, bool subreg32) +{ + return subreg32 ? tnum_subreg(reg->var_off).value : reg->var_off.value; +} + /* * , currently assuming reg2 is a constant */ @@ -14408,12 +14421,20 @@ static int is_pkt_ptr_branch_taken(struct bpf_reg_state *dst_reg, static int is_branch_taken(struct bpf_reg_state *reg1, struct bpf_reg_state *reg2, u8 opcode, bool is_jmp32) { - struct tnum reg2_tnum = is_jmp32 ? tnum_subreg(reg2->var_off) : reg2->var_off; u64 val; - if (!tnum_is_const(reg2_tnum)) + if (reg_is_pkt_pointer_any(reg1) && reg_is_pkt_pointer_any(reg2) && !is_jmp32) + return is_pkt_ptr_branch_taken(reg1, reg2, opcode); + + /* try to make sure reg2 is a constant SCALAR_VALUE */ + if (!is_reg_const(reg2, is_jmp32)) { + opcode = flip_opcode(opcode); + swap(reg1, reg2); + } + /* for now we expect reg2 to be a constant to make any useful decisions */ + if (!is_reg_const(reg2, is_jmp32)) return -1; - val = reg2_tnum.value; + val = reg_const_value(reg2, is_jmp32); if (__is_pointer_value(false, reg1)) { if (!reg_not_null(reg1)) @@ -14894,27 +14915,7 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, } is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32; - - if (BPF_SRC(insn->code) == BPF_K) { - pred = is_branch_taken(dst_reg, src_reg, opcode, is_jmp32); - } else if (src_reg->type == SCALAR_VALUE && - is_jmp32 && tnum_is_const(tnum_subreg(src_reg->var_off))) { - pred = is_branch_taken(dst_reg, src_reg, opcode, is_jmp32); - } else if (src_reg->type == SCALAR_VALUE && - !is_jmp32 && tnum_is_const(src_reg->var_off)) { - pred = is_branch_taken(dst_reg, src_reg, opcode, is_jmp32); - } else if (dst_reg->type == SCALAR_VALUE && - is_jmp32 && tnum_is_const(tnum_subreg(dst_reg->var_off))) { - pred = is_branch_taken(src_reg, dst_reg, flip_opcode(opcode), is_jmp32); - } else if (dst_reg->type == SCALAR_VALUE && - !is_jmp32 && tnum_is_const(dst_reg->var_off)) { - pred = is_branch_taken(src_reg, dst_reg, flip_opcode(opcode), is_jmp32); - } else if (reg_is_pkt_pointer_any(dst_reg) && - reg_is_pkt_pointer_any(src_reg) && - !is_jmp32) { - pred = is_pkt_ptr_branch_taken(dst_reg, src_reg, opcode); - } - + pred = is_branch_taken(dst_reg, src_reg, opcode, is_jmp32); if (pred >= 0) { /* If we get here with a dst_reg pointer type it is because * above is_branch_taken() special cased the 0 comparison. From 761a9e560d0ca0a6a2235c426cca80926672467e Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 1 Nov 2023 20:37:57 -0700 Subject: [PATCH 16/41] bpf: unify 32-bit and 64-bit is_branch_taken logic Combine 32-bit and 64-bit is_branch_taken logic for SCALAR_VALUE registers. It makes it easier to see parallels between two domains (32-bit and 64-bit), and makes subsequent refactoring more straightforward. No functional changes. Acked-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231102033759.2541186-16-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 200 +++++++++++++----------------------------- 1 file changed, 59 insertions(+), 141 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index d5213cef53891..b077dd99b1592 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -14183,166 +14183,86 @@ static u64 reg_const_value(struct bpf_reg_state *reg, bool subreg32) /* * , currently assuming reg2 is a constant */ -static int is_branch32_taken(struct bpf_reg_state *reg1, struct bpf_reg_state *reg2, u8 opcode) +static int is_scalar_branch_taken(struct bpf_reg_state *reg1, struct bpf_reg_state *reg2, + u8 opcode, bool is_jmp32) { - struct tnum subreg = tnum_subreg(reg1->var_off); - u32 val = (u32)tnum_subreg(reg2->var_off).value; - s32 sval = (s32)val; + struct tnum t1 = is_jmp32 ? tnum_subreg(reg1->var_off) : reg1->var_off; + u64 umin1 = is_jmp32 ? (u64)reg1->u32_min_value : reg1->umin_value; + u64 umax1 = is_jmp32 ? (u64)reg1->u32_max_value : reg1->umax_value; + s64 smin1 = is_jmp32 ? (s64)reg1->s32_min_value : reg1->smin_value; + s64 smax1 = is_jmp32 ? (s64)reg1->s32_max_value : reg1->smax_value; + u64 uval = is_jmp32 ? (u32)tnum_subreg(reg2->var_off).value : reg2->var_off.value; + s64 sval = is_jmp32 ? (s32)uval : (s64)uval; switch (opcode) { case BPF_JEQ: - if (tnum_is_const(subreg)) - return !!tnum_equals_const(subreg, val); - else if (val < reg1->u32_min_value || val > reg1->u32_max_value) + if (tnum_is_const(t1)) + return !!tnum_equals_const(t1, uval); + else if (uval < umin1 || uval > umax1) return 0; - else if (sval < reg1->s32_min_value || sval > reg1->s32_max_value) + else if (sval < smin1 || sval > smax1) return 0; break; case BPF_JNE: - if (tnum_is_const(subreg)) - return !tnum_equals_const(subreg, val); - else if (val < reg1->u32_min_value || val > reg1->u32_max_value) + if (tnum_is_const(t1)) + return !tnum_equals_const(t1, uval); + else if (uval < umin1 || uval > umax1) return 1; - else if (sval < reg1->s32_min_value || sval > reg1->s32_max_value) + else if (sval < smin1 || sval > smax1) return 1; break; case BPF_JSET: - if ((~subreg.mask & subreg.value) & val) + if ((~t1.mask & t1.value) & uval) return 1; - if (!((subreg.mask | subreg.value) & val)) + if (!((t1.mask | t1.value) & uval)) return 0; break; case BPF_JGT: - if (reg1->u32_min_value > val) + if (umin1 > uval ) return 1; - else if (reg1->u32_max_value <= val) + else if (umax1 <= uval) return 0; break; case BPF_JSGT: - if (reg1->s32_min_value > sval) + if (smin1 > sval) return 1; - else if (reg1->s32_max_value <= sval) + else if (smax1 <= sval) return 0; break; case BPF_JLT: - if (reg1->u32_max_value < val) + if (umax1 < uval) return 1; - else if (reg1->u32_min_value >= val) + else if (umin1 >= uval) return 0; break; case BPF_JSLT: - if (reg1->s32_max_value < sval) + if (smax1 < sval) return 1; - else if (reg1->s32_min_value >= sval) + else if (smin1 >= sval) return 0; break; case BPF_JGE: - if (reg1->u32_min_value >= val) + if (umin1 >= uval) return 1; - else if (reg1->u32_max_value < val) + else if (umax1 < uval) return 0; break; case BPF_JSGE: - if (reg1->s32_min_value >= sval) + if (smin1 >= sval) return 1; - else if (reg1->s32_max_value < sval) + else if (smax1 < sval) return 0; break; case BPF_JLE: - if (reg1->u32_max_value <= val) + if (umax1 <= uval) return 1; - else if (reg1->u32_min_value > val) + else if (umin1 > uval) return 0; break; case BPF_JSLE: - if (reg1->s32_max_value <= sval) + if (smax1 <= sval) return 1; - else if (reg1->s32_min_value > sval) - return 0; - break; - } - - return -1; -} - - -/* - * , currently assuming reg2 is a constant - */ -static int is_branch64_taken(struct bpf_reg_state *reg1, struct bpf_reg_state *reg2, u8 opcode) -{ - u64 val = reg2->var_off.value; - s64 sval = (s64)val; - - switch (opcode) { - case BPF_JEQ: - if (tnum_is_const(reg1->var_off)) - return !!tnum_equals_const(reg1->var_off, val); - else if (val < reg1->umin_value || val > reg1->umax_value) - return 0; - else if (sval < reg1->smin_value || sval > reg1->smax_value) - return 0; - break; - case BPF_JNE: - if (tnum_is_const(reg1->var_off)) - return !tnum_equals_const(reg1->var_off, val); - else if (val < reg1->umin_value || val > reg1->umax_value) - return 1; - else if (sval < reg1->smin_value || sval > reg1->smax_value) - return 1; - break; - case BPF_JSET: - if ((~reg1->var_off.mask & reg1->var_off.value) & val) - return 1; - if (!((reg1->var_off.mask | reg1->var_off.value) & val)) - return 0; - break; - case BPF_JGT: - if (reg1->umin_value > val) - return 1; - else if (reg1->umax_value <= val) - return 0; - break; - case BPF_JSGT: - if (reg1->smin_value > sval) - return 1; - else if (reg1->smax_value <= sval) - return 0; - break; - case BPF_JLT: - if (reg1->umax_value < val) - return 1; - else if (reg1->umin_value >= val) - return 0; - break; - case BPF_JSLT: - if (reg1->smax_value < sval) - return 1; - else if (reg1->smin_value >= sval) - return 0; - break; - case BPF_JGE: - if (reg1->umin_value >= val) - return 1; - else if (reg1->umax_value < val) - return 0; - break; - case BPF_JSGE: - if (reg1->smin_value >= sval) - return 1; - else if (reg1->smax_value < sval) - return 0; - break; - case BPF_JLE: - if (reg1->umax_value <= val) - return 1; - else if (reg1->umin_value > val) - return 0; - break; - case BPF_JSLE: - if (reg1->smax_value <= sval) - return 1; - else if (reg1->smin_value > sval) + else if (smin1 > sval) return 0; break; } @@ -14456,9 +14376,7 @@ static int is_branch_taken(struct bpf_reg_state *reg1, struct bpf_reg_state *reg } } - if (is_jmp32) - return is_branch32_taken(reg1, reg2, opcode); - return is_branch64_taken(reg1, reg2, opcode); + return is_scalar_branch_taken(reg1, reg2, opcode, is_jmp32); } /* Adjusts the register min/max values in the case that the dst_reg is the @@ -14468,15 +14386,15 @@ static int is_branch_taken(struct bpf_reg_state *reg1, struct bpf_reg_state *reg */ static void reg_set_min_max(struct bpf_reg_state *true_reg, struct bpf_reg_state *false_reg, - u64 val, u32 val32, + u64 uval, u32 uval32, u8 opcode, bool is_jmp32) { struct tnum false_32off = tnum_subreg(false_reg->var_off); struct tnum false_64off = false_reg->var_off; struct tnum true_32off = tnum_subreg(true_reg->var_off); struct tnum true_64off = true_reg->var_off; - s64 sval = (s64)val; - s32 sval32 = (s32)val32; + s64 sval = (s64)uval; + s32 sval32 = (s32)uval32; /* If the dst_reg is a pointer, we can't learn anything about its * variable offset from the compare (unless src_reg were a pointer into @@ -14499,49 +14417,49 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg, */ case BPF_JEQ: if (is_jmp32) { - __mark_reg32_known(true_reg, val32); + __mark_reg32_known(true_reg, uval32); true_32off = tnum_subreg(true_reg->var_off); } else { - ___mark_reg_known(true_reg, val); + ___mark_reg_known(true_reg, uval); true_64off = true_reg->var_off; } break; case BPF_JNE: if (is_jmp32) { - __mark_reg32_known(false_reg, val32); + __mark_reg32_known(false_reg, uval32); false_32off = tnum_subreg(false_reg->var_off); } else { - ___mark_reg_known(false_reg, val); + ___mark_reg_known(false_reg, uval); false_64off = false_reg->var_off; } break; case BPF_JSET: if (is_jmp32) { - false_32off = tnum_and(false_32off, tnum_const(~val32)); - if (is_power_of_2(val32)) + false_32off = tnum_and(false_32off, tnum_const(~uval32)); + if (is_power_of_2(uval32)) true_32off = tnum_or(true_32off, - tnum_const(val32)); + tnum_const(uval32)); } else { - false_64off = tnum_and(false_64off, tnum_const(~val)); - if (is_power_of_2(val)) + false_64off = tnum_and(false_64off, tnum_const(~uval)); + if (is_power_of_2(uval)) true_64off = tnum_or(true_64off, - tnum_const(val)); + tnum_const(uval)); } break; case BPF_JGE: case BPF_JGT: { if (is_jmp32) { - u32 false_umax = opcode == BPF_JGT ? val32 : val32 - 1; - u32 true_umin = opcode == BPF_JGT ? val32 + 1 : val32; + u32 false_umax = opcode == BPF_JGT ? uval32 : uval32 - 1; + u32 true_umin = opcode == BPF_JGT ? uval32 + 1 : uval32; false_reg->u32_max_value = min(false_reg->u32_max_value, false_umax); true_reg->u32_min_value = max(true_reg->u32_min_value, true_umin); } else { - u64 false_umax = opcode == BPF_JGT ? val : val - 1; - u64 true_umin = opcode == BPF_JGT ? val + 1 : val; + u64 false_umax = opcode == BPF_JGT ? uval : uval - 1; + u64 true_umin = opcode == BPF_JGT ? uval + 1 : uval; false_reg->umax_value = min(false_reg->umax_value, false_umax); true_reg->umin_value = max(true_reg->umin_value, true_umin); @@ -14570,16 +14488,16 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg, case BPF_JLT: { if (is_jmp32) { - u32 false_umin = opcode == BPF_JLT ? val32 : val32 + 1; - u32 true_umax = opcode == BPF_JLT ? val32 - 1 : val32; + u32 false_umin = opcode == BPF_JLT ? uval32 : uval32 + 1; + u32 true_umax = opcode == BPF_JLT ? uval32 - 1 : uval32; false_reg->u32_min_value = max(false_reg->u32_min_value, false_umin); true_reg->u32_max_value = min(true_reg->u32_max_value, true_umax); } else { - u64 false_umin = opcode == BPF_JLT ? val : val + 1; - u64 true_umax = opcode == BPF_JLT ? val - 1 : val; + u64 false_umin = opcode == BPF_JLT ? uval : uval + 1; + u64 true_umax = opcode == BPF_JLT ? uval - 1 : uval; false_reg->umin_value = max(false_reg->umin_value, false_umin); true_reg->umax_value = min(true_reg->umax_value, true_umax); @@ -14628,7 +14546,7 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg, */ static void reg_set_min_max_inv(struct bpf_reg_state *true_reg, struct bpf_reg_state *false_reg, - u64 val, u32 val32, + u64 uval, u32 uval32, u8 opcode, bool is_jmp32) { opcode = flip_opcode(opcode); @@ -14636,7 +14554,7 @@ static void reg_set_min_max_inv(struct bpf_reg_state *true_reg, * BPF_JA, can't get here. */ if (opcode) - reg_set_min_max(true_reg, false_reg, val, val32, opcode, is_jmp32); + reg_set_min_max(true_reg, false_reg, uval, uval32, opcode, is_jmp32); } /* Regs are known to be equal, so intersect their min/max/var_off */ From 4c617286771ed129dddbeb7d15cb1cc63b2ba08a Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 1 Nov 2023 20:37:58 -0700 Subject: [PATCH 17/41] bpf: prepare reg_set_min_max for second set of registers Similarly to is_branch_taken()-related refactorings, start preparing reg_set_min_max() to handle more generic case of two non-const registers. Start with renaming arguments to accommodate later addition of second register as an input argument. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231102033759.2541186-17-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 80 +++++++++++++++++++++---------------------- 1 file changed, 40 insertions(+), 40 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index b077dd99b1592..438bf96b1c2d1 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -14384,25 +14384,25 @@ static int is_branch_taken(struct bpf_reg_state *reg1, struct bpf_reg_state *reg * simply doing a BPF_K check. * In JEQ/JNE cases we also adjust the var_off values. */ -static void reg_set_min_max(struct bpf_reg_state *true_reg, - struct bpf_reg_state *false_reg, +static void reg_set_min_max(struct bpf_reg_state *true_reg1, + struct bpf_reg_state *false_reg1, u64 uval, u32 uval32, u8 opcode, bool is_jmp32) { - struct tnum false_32off = tnum_subreg(false_reg->var_off); - struct tnum false_64off = false_reg->var_off; - struct tnum true_32off = tnum_subreg(true_reg->var_off); - struct tnum true_64off = true_reg->var_off; + struct tnum false_32off = tnum_subreg(false_reg1->var_off); + struct tnum false_64off = false_reg1->var_off; + struct tnum true_32off = tnum_subreg(true_reg1->var_off); + struct tnum true_64off = true_reg1->var_off; s64 sval = (s64)uval; s32 sval32 = (s32)uval32; /* If the dst_reg is a pointer, we can't learn anything about its * variable offset from the compare (unless src_reg were a pointer into * the same object, but we don't bother with that. - * Since false_reg and true_reg have the same type by construction, we + * Since false_reg1 and true_reg1 have the same type by construction, we * only need to check one of them for pointerness. */ - if (__is_pointer_value(false, false_reg)) + if (__is_pointer_value(false, false_reg1)) return; switch (opcode) { @@ -14417,20 +14417,20 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg, */ case BPF_JEQ: if (is_jmp32) { - __mark_reg32_known(true_reg, uval32); - true_32off = tnum_subreg(true_reg->var_off); + __mark_reg32_known(true_reg1, uval32); + true_32off = tnum_subreg(true_reg1->var_off); } else { - ___mark_reg_known(true_reg, uval); - true_64off = true_reg->var_off; + ___mark_reg_known(true_reg1, uval); + true_64off = true_reg1->var_off; } break; case BPF_JNE: if (is_jmp32) { - __mark_reg32_known(false_reg, uval32); - false_32off = tnum_subreg(false_reg->var_off); + __mark_reg32_known(false_reg1, uval32); + false_32off = tnum_subreg(false_reg1->var_off); } else { - ___mark_reg_known(false_reg, uval); - false_64off = false_reg->var_off; + ___mark_reg_known(false_reg1, uval); + false_64off = false_reg1->var_off; } break; case BPF_JSET: @@ -14453,16 +14453,16 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg, u32 false_umax = opcode == BPF_JGT ? uval32 : uval32 - 1; u32 true_umin = opcode == BPF_JGT ? uval32 + 1 : uval32; - false_reg->u32_max_value = min(false_reg->u32_max_value, + false_reg1->u32_max_value = min(false_reg1->u32_max_value, false_umax); - true_reg->u32_min_value = max(true_reg->u32_min_value, + true_reg1->u32_min_value = max(true_reg1->u32_min_value, true_umin); } else { u64 false_umax = opcode == BPF_JGT ? uval : uval - 1; u64 true_umin = opcode == BPF_JGT ? uval + 1 : uval; - false_reg->umax_value = min(false_reg->umax_value, false_umax); - true_reg->umin_value = max(true_reg->umin_value, true_umin); + false_reg1->umax_value = min(false_reg1->umax_value, false_umax); + true_reg1->umin_value = max(true_reg1->umin_value, true_umin); } break; } @@ -14473,14 +14473,14 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg, s32 false_smax = opcode == BPF_JSGT ? sval32 : sval32 - 1; s32 true_smin = opcode == BPF_JSGT ? sval32 + 1 : sval32; - false_reg->s32_max_value = min(false_reg->s32_max_value, false_smax); - true_reg->s32_min_value = max(true_reg->s32_min_value, true_smin); + false_reg1->s32_max_value = min(false_reg1->s32_max_value, false_smax); + true_reg1->s32_min_value = max(true_reg1->s32_min_value, true_smin); } else { s64 false_smax = opcode == BPF_JSGT ? sval : sval - 1; s64 true_smin = opcode == BPF_JSGT ? sval + 1 : sval; - false_reg->smax_value = min(false_reg->smax_value, false_smax); - true_reg->smin_value = max(true_reg->smin_value, true_smin); + false_reg1->smax_value = min(false_reg1->smax_value, false_smax); + true_reg1->smin_value = max(true_reg1->smin_value, true_smin); } break; } @@ -14491,16 +14491,16 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg, u32 false_umin = opcode == BPF_JLT ? uval32 : uval32 + 1; u32 true_umax = opcode == BPF_JLT ? uval32 - 1 : uval32; - false_reg->u32_min_value = max(false_reg->u32_min_value, + false_reg1->u32_min_value = max(false_reg1->u32_min_value, false_umin); - true_reg->u32_max_value = min(true_reg->u32_max_value, + true_reg1->u32_max_value = min(true_reg1->u32_max_value, true_umax); } else { u64 false_umin = opcode == BPF_JLT ? uval : uval + 1; u64 true_umax = opcode == BPF_JLT ? uval - 1 : uval; - false_reg->umin_value = max(false_reg->umin_value, false_umin); - true_reg->umax_value = min(true_reg->umax_value, true_umax); + false_reg1->umin_value = max(false_reg1->umin_value, false_umin); + true_reg1->umax_value = min(true_reg1->umax_value, true_umax); } break; } @@ -14511,14 +14511,14 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg, s32 false_smin = opcode == BPF_JSLT ? sval32 : sval32 + 1; s32 true_smax = opcode == BPF_JSLT ? sval32 - 1 : sval32; - false_reg->s32_min_value = max(false_reg->s32_min_value, false_smin); - true_reg->s32_max_value = min(true_reg->s32_max_value, true_smax); + false_reg1->s32_min_value = max(false_reg1->s32_min_value, false_smin); + true_reg1->s32_max_value = min(true_reg1->s32_max_value, true_smax); } else { s64 false_smin = opcode == BPF_JSLT ? sval : sval + 1; s64 true_smax = opcode == BPF_JSLT ? sval - 1 : sval; - false_reg->smin_value = max(false_reg->smin_value, false_smin); - true_reg->smax_value = min(true_reg->smax_value, true_smax); + false_reg1->smin_value = max(false_reg1->smin_value, false_smin); + true_reg1->smax_value = min(true_reg1->smax_value, true_smax); } break; } @@ -14527,17 +14527,17 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg, } if (is_jmp32) { - false_reg->var_off = tnum_or(tnum_clear_subreg(false_64off), + false_reg1->var_off = tnum_or(tnum_clear_subreg(false_64off), tnum_subreg(false_32off)); - true_reg->var_off = tnum_or(tnum_clear_subreg(true_64off), + true_reg1->var_off = tnum_or(tnum_clear_subreg(true_64off), tnum_subreg(true_32off)); - reg_bounds_sync(false_reg); - reg_bounds_sync(true_reg); + reg_bounds_sync(false_reg1); + reg_bounds_sync(true_reg1); } else { - false_reg->var_off = false_64off; - true_reg->var_off = true_64off; - reg_bounds_sync(false_reg); - reg_bounds_sync(true_reg); + false_reg1->var_off = false_64off; + true_reg1->var_off = true_64off; + reg_bounds_sync(false_reg1); + reg_bounds_sync(true_reg1); } } From 9a14d62a2cdb33e983cddf2e994b9766c54ceedb Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 1 Nov 2023 20:37:59 -0700 Subject: [PATCH 18/41] bpf: generalize reg_set_min_max() to handle two sets of two registers Change reg_set_min_max() to take FALSE/TRUE sets of two registers each, instead of assuming that we are always comparing to a constant. For now we still assume that right-hand side registers are constants (and make sure that's the case by swapping src/dst regs, if necessary), but subsequent patches will remove this limitation. reg_set_min_max() is now called unconditionally for any register comparison, so that might include pointer vs pointer. This makes it consistent with is_branch_taken() generality. But we currently only support adjustments based on SCALAR vs SCALAR comparisons, so reg_set_min_max() has to guard itself againts pointers. Taking two by two registers allows to further unify and simplify check_cond_jmp_op() logic. We utilize fake register for BPF_K conditional jump case, just like with is_branch_taken() part. Acked-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231102033759.2541186-18-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 131 ++++++++++++++++++------------------------ 1 file changed, 56 insertions(+), 75 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 438bf96b1c2d1..2197385d91dc6 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -14379,32 +14379,50 @@ static int is_branch_taken(struct bpf_reg_state *reg1, struct bpf_reg_state *reg return is_scalar_branch_taken(reg1, reg2, opcode, is_jmp32); } -/* Adjusts the register min/max values in the case that the dst_reg is the - * variable register that we are working on, and src_reg is a constant or we're - * simply doing a BPF_K check. - * In JEQ/JNE cases we also adjust the var_off values. +/* Adjusts the register min/max values in the case that the dst_reg and + * src_reg are both SCALAR_VALUE registers (or we are simply doing a BPF_K + * check, in which case we havea fake SCALAR_VALUE representing insn->imm). + * Technically we can do similar adjustments for pointers to the same object, + * but we don't support that right now. */ static void reg_set_min_max(struct bpf_reg_state *true_reg1, + struct bpf_reg_state *true_reg2, struct bpf_reg_state *false_reg1, - u64 uval, u32 uval32, + struct bpf_reg_state *false_reg2, u8 opcode, bool is_jmp32) { - struct tnum false_32off = tnum_subreg(false_reg1->var_off); - struct tnum false_64off = false_reg1->var_off; - struct tnum true_32off = tnum_subreg(true_reg1->var_off); - struct tnum true_64off = true_reg1->var_off; - s64 sval = (s64)uval; - s32 sval32 = (s32)uval32; - - /* If the dst_reg is a pointer, we can't learn anything about its - * variable offset from the compare (unless src_reg were a pointer into - * the same object, but we don't bother with that. - * Since false_reg1 and true_reg1 have the same type by construction, we - * only need to check one of them for pointerness. + struct tnum false_32off, false_64off; + struct tnum true_32off, true_64off; + u64 uval; + u32 uval32; + s64 sval; + s32 sval32; + + /* If either register is a pointer, we can't learn anything about its + * variable offset from the compare (unless they were a pointer into + * the same object, but we don't bother with that). */ - if (__is_pointer_value(false, false_reg1)) + if (false_reg1->type != SCALAR_VALUE || false_reg2->type != SCALAR_VALUE) + return; + + /* we expect right-hand registers (src ones) to be constants, for now */ + if (!is_reg_const(false_reg2, is_jmp32)) { + opcode = flip_opcode(opcode); + swap(true_reg1, true_reg2); + swap(false_reg1, false_reg2); + } + if (!is_reg_const(false_reg2, is_jmp32)) return; + false_32off = tnum_subreg(false_reg1->var_off); + false_64off = false_reg1->var_off; + true_32off = tnum_subreg(true_reg1->var_off); + true_64off = true_reg1->var_off; + uval = false_reg2->var_off.value; + uval32 = (u32)tnum_subreg(false_reg2->var_off).value; + sval = (s64)uval; + sval32 = (s32)uval32; + switch (opcode) { /* JEQ/JNE comparison doesn't change the register equivalence. * @@ -14541,22 +14559,6 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg1, } } -/* Same as above, but for the case that dst_reg holds a constant and src_reg is - * the variable reg. - */ -static void reg_set_min_max_inv(struct bpf_reg_state *true_reg, - struct bpf_reg_state *false_reg, - u64 uval, u32 uval32, - u8 opcode, bool is_jmp32) -{ - opcode = flip_opcode(opcode); - /* This uses zero as "not present in table"; luckily the zero opcode, - * BPF_JA, can't get here. - */ - if (opcode) - reg_set_min_max(true_reg, false_reg, uval, uval32, opcode, is_jmp32); -} - /* Regs are known to be equal, so intersect their min/max/var_off */ static void __reg_combine_min_max(struct bpf_reg_state *src_reg, struct bpf_reg_state *dst_reg) @@ -14881,53 +14883,32 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, return -EFAULT; other_branch_regs = other_branch->frame[other_branch->curframe]->regs; - /* detect if we are comparing against a constant value so we can adjust - * our min/max values for our dst register. - * this is only legit if both are scalars (or pointers to the same - * object, I suppose, see the PTR_MAYBE_NULL related if block below), - * because otherwise the different base pointers mean the offsets aren't - * comparable. - */ if (BPF_SRC(insn->code) == BPF_X) { - struct bpf_reg_state *src_reg = ®s[insn->src_reg]; + reg_set_min_max(&other_branch_regs[insn->dst_reg], + &other_branch_regs[insn->src_reg], + dst_reg, src_reg, opcode, is_jmp32); if (dst_reg->type == SCALAR_VALUE && - src_reg->type == SCALAR_VALUE) { - if (tnum_is_const(src_reg->var_off) || - (is_jmp32 && - tnum_is_const(tnum_subreg(src_reg->var_off)))) - reg_set_min_max(&other_branch_regs[insn->dst_reg], - dst_reg, - src_reg->var_off.value, - tnum_subreg(src_reg->var_off).value, - opcode, is_jmp32); - else if (tnum_is_const(dst_reg->var_off) || - (is_jmp32 && - tnum_is_const(tnum_subreg(dst_reg->var_off)))) - reg_set_min_max_inv(&other_branch_regs[insn->src_reg], - src_reg, - dst_reg->var_off.value, - tnum_subreg(dst_reg->var_off).value, - opcode, is_jmp32); - else if (!is_jmp32 && - (opcode == BPF_JEQ || opcode == BPF_JNE)) - /* Comparing for equality, we can combine knowledge */ - reg_combine_min_max(&other_branch_regs[insn->src_reg], - &other_branch_regs[insn->dst_reg], - src_reg, dst_reg, opcode); - if (src_reg->id && - !WARN_ON_ONCE(src_reg->id != other_branch_regs[insn->src_reg].id)) { - find_equal_scalars(this_branch, src_reg); - find_equal_scalars(other_branch, &other_branch_regs[insn->src_reg]); - } - - } - } else if (dst_reg->type == SCALAR_VALUE) { + src_reg->type == SCALAR_VALUE && + !is_jmp32 && (opcode == BPF_JEQ || opcode == BPF_JNE)) { + /* Comparing for equality, we can combine knowledge */ + reg_combine_min_max(&other_branch_regs[insn->src_reg], + &other_branch_regs[insn->dst_reg], + src_reg, dst_reg, opcode); + } + } else /* BPF_SRC(insn->code) == BPF_K */ { reg_set_min_max(&other_branch_regs[insn->dst_reg], - dst_reg, insn->imm, (u32)insn->imm, - opcode, is_jmp32); + src_reg /* fake one */, + dst_reg, src_reg /* same fake one */, + opcode, is_jmp32); } + if (BPF_SRC(insn->code) == BPF_X && + src_reg->type == SCALAR_VALUE && src_reg->id && + !WARN_ON_ONCE(src_reg->id != other_branch_regs[insn->src_reg].id)) { + find_equal_scalars(this_branch, src_reg); + find_equal_scalars(other_branch, &other_branch_regs[insn->src_reg]); + } if (dst_reg->type == SCALAR_VALUE && dst_reg->id && !WARN_ON_ONCE(dst_reg->id != other_branch_regs[insn->dst_reg].id)) { find_equal_scalars(this_branch, dst_reg); From 3f1f234e677b81b2f57d5b01e55e0dff9871a986 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Wed, 1 Nov 2023 11:24:53 +0800 Subject: [PATCH 19/41] selftests/bpf: Use value with enough-size when updating per-cpu map When updating per-cpu map in map_percpu_stats test, patch_map_thread() only passes 4-bytes-sized value to bpf_map_update_elem(). The expected size of the value is 8 * num_possible_cpus(), so fix it by passing a value with enough-size for per-cpu map update. Signed-off-by: Hou Tao Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20231101032455.3808547-2-houtao@huaweicloud.com --- .../bpf/map_tests/map_percpu_stats.c | 21 ++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c b/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c index 1a9eeefda9a87..8ad17d051ef8f 100644 --- a/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c +++ b/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c @@ -131,6 +131,12 @@ static bool is_lru(__u32 map_type) map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH; } +static bool is_percpu(__u32 map_type) +{ + return map_type == BPF_MAP_TYPE_PERCPU_HASH || + map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH; +} + struct upsert_opts { __u32 map_type; int map_fd; @@ -150,17 +156,26 @@ static int create_small_hash(void) static void *patch_map_thread(void *arg) { + /* 8KB is enough for 1024 CPUs. And it is shared between N_THREADS. */ + static __u8 blob[8 << 10]; struct upsert_opts *opts = arg; + void *val_ptr; int val; int ret; int i; for (i = 0; i < opts->n; i++) { - if (opts->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) + if (opts->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { val = create_small_hash(); - else + val_ptr = &val; + } else if (is_percpu(opts->map_type)) { + val_ptr = blob; + } else { val = rand(); - ret = bpf_map_update_elem(opts->map_fd, &i, &val, 0); + val_ptr = &val; + } + + ret = bpf_map_update_elem(opts->map_fd, &i, val_ptr, 0); CHECK(ret < 0, "bpf_map_update_elem", "key=%d error: %s\n", i, strerror(errno)); if (opts->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) From ff38534e8251cf42831c0d16df28ff9ca6c94e6d Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Wed, 1 Nov 2023 11:24:54 +0800 Subject: [PATCH 20/41] selftests/bpf: Export map_update_retriable() Export map_update_retriable() to make it usable for other map_test cases. These cases may only need retry for specific errno, so add a new callback parameter to let map_update_retriable() decide whether or not the errno is retriable. Signed-off-by: Hou Tao Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20231101032455.3808547-3-houtao@huaweicloud.com --- tools/testing/selftests/bpf/test_maps.c | 17 ++++++++++++----- tools/testing/selftests/bpf/test_maps.h | 5 +++++ 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index 7fc00e423e4dd..767e0693df106 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -1396,13 +1396,18 @@ static void test_map_stress(void) #define MAX_DELAY_US 50000 #define MIN_DELAY_RANGE_US 5000 -static int map_update_retriable(int map_fd, const void *key, const void *value, - int flags, int attempts) +static bool retry_for_again_or_busy(int err) +{ + return (err == EAGAIN || err == EBUSY); +} + +int map_update_retriable(int map_fd, const void *key, const void *value, int flags, int attempts, + retry_for_error_fn need_retry) { int delay = rand() % MIN_DELAY_RANGE_US; while (bpf_map_update_elem(map_fd, key, value, flags)) { - if (!attempts || (errno != EAGAIN && errno != EBUSY)) + if (!attempts || !need_retry(errno)) return -errno; if (delay <= MAX_DELAY_US / 2) @@ -1445,11 +1450,13 @@ static void test_update_delete(unsigned int fn, void *data) key = value = i; if (do_update) { - err = map_update_retriable(fd, &key, &value, BPF_NOEXIST, MAP_RETRIES); + err = map_update_retriable(fd, &key, &value, BPF_NOEXIST, MAP_RETRIES, + retry_for_again_or_busy); if (err) printf("error %d %d\n", err, errno); assert(err == 0); - err = map_update_retriable(fd, &key, &value, BPF_EXIST, MAP_RETRIES); + err = map_update_retriable(fd, &key, &value, BPF_EXIST, MAP_RETRIES, + retry_for_again_or_busy); if (err) printf("error %d %d\n", err, errno); assert(err == 0); diff --git a/tools/testing/selftests/bpf/test_maps.h b/tools/testing/selftests/bpf/test_maps.h index f6fbca761732f..e4ac704a536c1 100644 --- a/tools/testing/selftests/bpf/test_maps.h +++ b/tools/testing/selftests/bpf/test_maps.h @@ -4,6 +4,7 @@ #include #include +#include #define CHECK(condition, tag, format...) ({ \ int __ret = !!(condition); \ @@ -16,4 +17,8 @@ extern int skips; +typedef bool (*retry_for_error_fn)(int err); +int map_update_retriable(int map_fd, const void *key, const void *value, int flags, int attempts, + retry_for_error_fn need_retry); + #endif From 57688b2a543be340d80c568b7f91c0660bb65c2f Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Wed, 1 Nov 2023 11:24:55 +0800 Subject: [PATCH 21/41] selftsets/bpf: Retry map update for non-preallocated per-cpu map BPF CI failed due to map_percpu_stats_percpu_hash from time to time [1]. It seems that the failure reason is per-cpu bpf memory allocator may not be able to allocate per-cpu pointer successfully and it can not refill free llist timely, and bpf_map_update_elem() will return -ENOMEM. So mitigate the problem by retrying the update operation for non-preallocated per-cpu map. [1]: https://github.com/kernel-patches/bpf/actions/runs/6713177520/job/18244865326?pr=5909 Signed-off-by: Hou Tao Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20231101032455.3808547-4-houtao@huaweicloud.com --- .../bpf/map_tests/map_percpu_stats.c | 20 ++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c b/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c index 8ad17d051ef8f..e152535e9e3ec 100644 --- a/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c +++ b/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c @@ -141,6 +141,7 @@ struct upsert_opts { __u32 map_type; int map_fd; __u32 n; + bool retry_for_nomem; }; static int create_small_hash(void) @@ -154,6 +155,11 @@ static int create_small_hash(void) return map_fd; } +static bool retry_for_nomem_fn(int err) +{ + return err == ENOMEM; +} + static void *patch_map_thread(void *arg) { /* 8KB is enough for 1024 CPUs. And it is shared between N_THREADS. */ @@ -175,7 +181,12 @@ static void *patch_map_thread(void *arg) val_ptr = &val; } - ret = bpf_map_update_elem(opts->map_fd, &i, val_ptr, 0); + /* 2 seconds may be enough ? */ + if (opts->retry_for_nomem) + ret = map_update_retriable(opts->map_fd, &i, val_ptr, 0, + 40, retry_for_nomem_fn); + else + ret = bpf_map_update_elem(opts->map_fd, &i, val_ptr, 0); CHECK(ret < 0, "bpf_map_update_elem", "key=%d error: %s\n", i, strerror(errno)); if (opts->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) @@ -296,6 +307,13 @@ static void __test(int map_fd) else opts.n /= 2; + /* per-cpu bpf memory allocator may not be able to allocate per-cpu + * pointer successfully and it can not refill free llist timely, and + * bpf_map_update_elem() will return -ENOMEM. so just retry to mitigate + * the problem temporarily. + */ + opts.retry_for_nomem = is_percpu(opts.map_type) && (info.map_flags & BPF_F_NO_PREALLOC); + /* * Upsert keys [0, n) under some competition: with random values from * N_THREADS threads. Check values, then delete all elements and check From c4be30a8ce45da6cd361dad9e56b24c9ba4488b2 Mon Sep 17 00:00:00 2001 From: Manu Bretelle Date: Tue, 31 Oct 2023 14:27:17 -0700 Subject: [PATCH 22/41] selftests/bpf: Consolidate VIRTIO/9P configs in config.vm file Those configs are needed to be able to run VM somewhat consistently. For instance, ATM, s390x is missing the `CONFIG_VIRTIO_CONSOLE` which prevents s390x kernels built in CI to leverage qemu-guest-agent. By moving them to `config,vm`, we should have selftest kernels which are equal in term of VM functionalities when they include this file. The set of config unabled were picked using grep -h -E '(_9P|_VIRTIO)' config.x86_64 config | sort | uniq added to `config.vm` and then grep -vE '(_9P|_VIRTIO)' config.{x86_64,aarch64,s390x} as a side-effect, some config may have disappeared to the aarch64 and s390x kernels, but they should not be needed. CI will tell. Signed-off-by: Manu Bretelle Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20231031212717.4037892-1-chantr4@gmail.com --- tools/testing/selftests/bpf/config.aarch64 | 16 ---------------- tools/testing/selftests/bpf/config.s390x | 9 --------- tools/testing/selftests/bpf/config.vm | 12 ++++++++++++ tools/testing/selftests/bpf/config.x86_64 | 12 ------------ tools/testing/selftests/bpf/vmtest.sh | 4 +++- 5 files changed, 15 insertions(+), 38 deletions(-) create mode 100644 tools/testing/selftests/bpf/config.vm diff --git a/tools/testing/selftests/bpf/config.aarch64 b/tools/testing/selftests/bpf/config.aarch64 index 2538214948848..fa8ecf626c73e 100644 --- a/tools/testing/selftests/bpf/config.aarch64 +++ b/tools/testing/selftests/bpf/config.aarch64 @@ -1,4 +1,3 @@ -CONFIG_9P_FS=y CONFIG_ARCH_VEXPRESS=y CONFIG_ARCH_WANT_DEFAULT_BPF_JIT=y CONFIG_ARM_SMMU_V3=y @@ -46,7 +45,6 @@ CONFIG_DEBUG_SG=y CONFIG_DETECT_HUNG_TASK=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_DEVTMPFS=y -CONFIG_DRM_VIRTIO_GPU=y CONFIG_DRM=y CONFIG_DUMMY=y CONFIG_EXPERT=y @@ -67,7 +65,6 @@ CONFIG_HAVE_KRETPROBES=y CONFIG_HEADERS_INSTALL=y CONFIG_HIGH_RES_TIMERS=y CONFIG_HUGETLBFS=y -CONFIG_HW_RANDOM_VIRTIO=y CONFIG_HW_RANDOM=y CONFIG_HZ_100=y CONFIG_IDLE_PAGE_TRACKING=y @@ -99,8 +96,6 @@ CONFIG_MEMCG=y CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y CONFIG_NAMESPACES=y -CONFIG_NET_9P_VIRTIO=y -CONFIG_NET_9P=y CONFIG_NET_ACT_BPF=y CONFIG_NET_ACT_GACT=y CONFIG_NETDEVICES=y @@ -140,7 +135,6 @@ CONFIG_SCHED_TRACER=y CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_LOGGING=y CONFIG_SCSI_SCAN_ASYNC=y -CONFIG_SCSI_VIRTIO=y CONFIG_SCSI=y CONFIG_SECURITY_NETWORK=y CONFIG_SERIAL_AMBA_PL011_CONSOLE=y @@ -167,16 +161,6 @@ CONFIG_UPROBES=y CONFIG_USELIB=y CONFIG_USER_NS=y CONFIG_VETH=y -CONFIG_VIRTIO_BALLOON=y -CONFIG_VIRTIO_BLK=y -CONFIG_VIRTIO_CONSOLE=y -CONFIG_VIRTIO_FS=y -CONFIG_VIRTIO_INPUT=y -CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y -CONFIG_VIRTIO_MMIO=y -CONFIG_VIRTIO_NET=y -CONFIG_VIRTIO_PCI=y -CONFIG_VIRTIO_VSOCKETS_COMMON=y CONFIG_VLAN_8021Q=y CONFIG_VSOCKETS=y CONFIG_VSOCKETS_LOOPBACK=y diff --git a/tools/testing/selftests/bpf/config.s390x b/tools/testing/selftests/bpf/config.s390x index 2ba92167be358..e933303828494 100644 --- a/tools/testing/selftests/bpf/config.s390x +++ b/tools/testing/selftests/bpf/config.s390x @@ -1,4 +1,3 @@ -CONFIG_9P_FS=y CONFIG_ARCH_WANT_DEFAULT_BPF_JIT=y CONFIG_AUDIT=y CONFIG_BLK_CGROUP=y @@ -84,8 +83,6 @@ CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y CONFIG_NAMESPACES=y CONFIG_NET=y -CONFIG_NET_9P=y -CONFIG_NET_9P_VIRTIO=y CONFIG_NET_ACT_BPF=y CONFIG_NET_ACT_GACT=y CONFIG_NET_KEY=y @@ -114,7 +111,6 @@ CONFIG_SAMPLE_SECCOMP=y CONFIG_SAMPLES=y CONFIG_SCHED_TRACER=y CONFIG_SCSI=y -CONFIG_SCSI_VIRTIO=y CONFIG_SECURITY_NETWORK=y CONFIG_STACK_TRACER=y CONFIG_STATIC_KEYS_SELFTEST=y @@ -136,11 +132,6 @@ CONFIG_UPROBES=y CONFIG_USELIB=y CONFIG_USER_NS=y CONFIG_VETH=y -CONFIG_VIRTIO_BALLOON=y -CONFIG_VIRTIO_BLK=y -CONFIG_VIRTIO_NET=y -CONFIG_VIRTIO_PCI=y -CONFIG_VIRTIO_VSOCKETS_COMMON=y CONFIG_VLAN_8021Q=y CONFIG_VSOCKETS=y CONFIG_VSOCKETS_LOOPBACK=y diff --git a/tools/testing/selftests/bpf/config.vm b/tools/testing/selftests/bpf/config.vm new file mode 100644 index 0000000000000..a9746ca787773 --- /dev/null +++ b/tools/testing/selftests/bpf/config.vm @@ -0,0 +1,12 @@ +CONFIG_9P_FS=y +CONFIG_9P_FS_POSIX_ACL=y +CONFIG_9P_FS_SECURITY=y +CONFIG_CRYPTO_DEV_VIRTIO=y +CONFIG_NET_9P=y +CONFIG_NET_9P_VIRTIO=y +CONFIG_VIRTIO_BALLOON=y +CONFIG_VIRTIO_BLK=y +CONFIG_VIRTIO_CONSOLE=y +CONFIG_VIRTIO_NET=y +CONFIG_VIRTIO_PCI=y +CONFIG_VIRTIO_VSOCKETS_COMMON=y diff --git a/tools/testing/selftests/bpf/config.x86_64 b/tools/testing/selftests/bpf/config.x86_64 index 2e70a60482784..f7bfb2b09c82b 100644 --- a/tools/testing/selftests/bpf/config.x86_64 +++ b/tools/testing/selftests/bpf/config.x86_64 @@ -1,6 +1,3 @@ -CONFIG_9P_FS=y -CONFIG_9P_FS_POSIX_ACL=y -CONFIG_9P_FS_SECURITY=y CONFIG_AGP=y CONFIG_AGP_AMD64=y CONFIG_AGP_INTEL=y @@ -45,7 +42,6 @@ CONFIG_CPU_IDLE_GOV_LADDER=y CONFIG_CPUSETS=y CONFIG_CRC_T10DIF=y CONFIG_CRYPTO_BLAKE2B=y -CONFIG_CRYPTO_DEV_VIRTIO=y CONFIG_CRYPTO_SEQIV=y CONFIG_CRYPTO_XXHASH=y CONFIG_DCB=y @@ -145,8 +141,6 @@ CONFIG_MEMORY_FAILURE=y CONFIG_MINIX_SUBPARTITION=y CONFIG_NAMESPACES=y CONFIG_NET=y -CONFIG_NET_9P=y -CONFIG_NET_9P_VIRTIO=y CONFIG_NET_ACT_BPF=y CONFIG_NET_CLS_CGROUP=y CONFIG_NET_EMATCH=y @@ -228,12 +222,6 @@ CONFIG_USER_NS=y CONFIG_VALIDATE_FS_PARSER=y CONFIG_VETH=y CONFIG_VIRT_DRIVERS=y -CONFIG_VIRTIO_BALLOON=y -CONFIG_VIRTIO_BLK=y -CONFIG_VIRTIO_CONSOLE=y -CONFIG_VIRTIO_NET=y -CONFIG_VIRTIO_PCI=y -CONFIG_VIRTIO_VSOCKETS_COMMON=y CONFIG_VLAN_8021Q=y CONFIG_VSOCKETS=y CONFIG_VSOCKETS_LOOPBACK=y diff --git a/tools/testing/selftests/bpf/vmtest.sh b/tools/testing/selftests/bpf/vmtest.sh index 6850345280184..65d14f3bbe301 100755 --- a/tools/testing/selftests/bpf/vmtest.sh +++ b/tools/testing/selftests/bpf/vmtest.sh @@ -36,7 +36,9 @@ DEFAULT_COMMAND="./test_progs" MOUNT_DIR="mnt" ROOTFS_IMAGE="root.img" OUTPUT_DIR="$HOME/.bpf_selftests" -KCONFIG_REL_PATHS=("tools/testing/selftests/bpf/config" "tools/testing/selftests/bpf/config.${ARCH}") +KCONFIG_REL_PATHS=("tools/testing/selftests/bpf/config" + "tools/testing/selftests/bpf/config.vm" + "tools/testing/selftests/bpf/config.${ARCH}") INDEX_URL="https://raw.githubusercontent.com/libbpf/ci/master/INDEX" NUM_COMPILE_JOBS="$(nproc)" LOG_FILE_BASE="$(date +"bpf_selftests.%Y-%m-%d_%H-%M-%S")" From 7d188e7d25e12deb727cbdd99ae377b99cb8ca3d Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 6 Nov 2023 20:57:23 -0800 Subject: [PATCH 23/41] bpf: Add __bpf_dynptr_data* for in kernel use Different types of bpf dynptr have different internal data storage. Specifically, SKB and XDP type of dynptr may have non-continuous data. Therefore, it is not always safe to directly access dynptr->data. Add __bpf_dynptr_data and __bpf_dynptr_data_rw to replace direct access to dynptr->data. Update bpf_verify_pkcs7_signature to use __bpf_dynptr_data instead of dynptr->data. Signed-off-by: Song Liu Signed-off-by: Andrii Nakryiko Acked-by: Vadim Fedorenko Link: https://lore.kernel.org/bpf/20231107045725.2278852-2-song@kernel.org --- include/linux/bpf.h | 2 ++ kernel/bpf/helpers.c | 19 +++++++++++++++++++ kernel/trace/bpf_trace.c | 12 ++++++++---- 3 files changed, 29 insertions(+), 4 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index b4825d3cdb292..eb84caf133df9 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1222,6 +1222,8 @@ enum bpf_dynptr_type { int bpf_dynptr_check_size(u32 size); u32 __bpf_dynptr_size(const struct bpf_dynptr_kern *ptr); +const void *__bpf_dynptr_data(const struct bpf_dynptr_kern *ptr, u32 len); +void *__bpf_dynptr_data_rw(const struct bpf_dynptr_kern *ptr, u32 len); #ifdef CONFIG_BPF_JIT int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr); diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index e46ac288a1080..6b7c8163e128d 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -2611,3 +2611,22 @@ static int __init kfunc_init(void) } late_initcall(kfunc_init); + +/* Get a pointer to dynptr data up to len bytes for read only access. If + * the dynptr doesn't have continuous data up to len bytes, return NULL. + */ +const void *__bpf_dynptr_data(const struct bpf_dynptr_kern *ptr, u32 len) +{ + return bpf_dynptr_slice(ptr, 0, NULL, len); +} + +/* Get a pointer to dynptr data up to len bytes for read write access. If + * the dynptr doesn't have continuous data up to len bytes, or the dynptr + * is read only, return NULL. + */ +void *__bpf_dynptr_data_rw(const struct bpf_dynptr_kern *ptr, u32 len) +{ + if (__bpf_dynptr_is_rdonly(ptr)) + return NULL; + return (void *)__bpf_dynptr_data(ptr, len); +} diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index df697c74d5197..d525a22b8d560 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1378,6 +1378,8 @@ __bpf_kfunc int bpf_verify_pkcs7_signature(struct bpf_dynptr_kern *data_ptr, struct bpf_dynptr_kern *sig_ptr, struct bpf_key *trusted_keyring) { + const void *data, *sig; + u32 data_len, sig_len; int ret; if (trusted_keyring->has_ref) { @@ -1394,10 +1396,12 @@ __bpf_kfunc int bpf_verify_pkcs7_signature(struct bpf_dynptr_kern *data_ptr, return ret; } - return verify_pkcs7_signature(data_ptr->data, - __bpf_dynptr_size(data_ptr), - sig_ptr->data, - __bpf_dynptr_size(sig_ptr), + data_len = __bpf_dynptr_size(data_ptr); + data = __bpf_dynptr_data(data_ptr, data_len); + sig_len = __bpf_dynptr_size(sig_ptr); + sig = __bpf_dynptr_data(sig_ptr, sig_len); + + return verify_pkcs7_signature(data, data_len, sig, sig_len, trusted_keyring->key, VERIFYING_UNSPECIFIED_SIGNATURE, NULL, NULL); From e1e43a420bd074eda92640607c65591a212fc54f Mon Sep 17 00:00:00 2001 From: Artem Savkov Date: Fri, 3 Nov 2023 09:11:26 +0100 Subject: [PATCH 24/41] bpftool: Fix prog object type in manpage bpftool's man page lists "program" as one of possible values for OBJECT, while in fact bpftool accepts "prog" instead. Reported-by: Jerry Snitselaar Signed-off-by: Artem Savkov Signed-off-by: Andrii Nakryiko Acked-by: Yonghong Song Acked-by: Quentin Monnet Link: https://lore.kernel.org/bpf/20231103081126.170034-1-asavkov@redhat.com --- tools/bpf/bpftool/Documentation/bpftool.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst index 6965c94dfdafe..09e4f2ff5658b 100644 --- a/tools/bpf/bpftool/Documentation/bpftool.rst +++ b/tools/bpf/bpftool/Documentation/bpftool.rst @@ -20,7 +20,7 @@ SYNOPSIS **bpftool** **version** - *OBJECT* := { **map** | **program** | **link** | **cgroup** | **perf** | **net** | **feature** | + *OBJECT* := { **map** | **prog** | **link** | **cgroup** | **perf** | **net** | **feature** | **btf** | **gen** | **struct_ops** | **iter** } *OPTIONS* := { { **-V** | **--version** } | |COMMON_OPTIONS| } From 042c604f07654d301c7ac08707bc9aad38ccf143 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 6 Nov 2023 20:57:24 -0800 Subject: [PATCH 25/41] bpf: Factor out helper check_reg_const_str() ARG_PTR_TO_CONST_STR is used to specify constant string args for BPF helpers. The logic that verifies a reg is ARG_PTR_TO_CONST_STR is implemented in check_func_arg(). As we introduce kfuncs with constant string args, it is necessary to do the same check for kfuncs (in check_kfunc_args). Factor out the logic for ARG_PTR_TO_CONST_STR to a new check_reg_const_str() so that it can be reused. check_func_arg() ensures check_reg_const_str() is only called with reg of type PTR_TO_MAP_VALUE. Add a redundent type check in check_reg_const_str() to avoid misuse in the future. Other than this redundent check, there is no change in behavior. Signed-off-by: Song Liu Signed-off-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Acked-by: Vadim Fedorenko Link: https://lore.kernel.org/bpf/20231107045725.2278852-3-song@kernel.org --- kernel/bpf/verifier.c | 85 +++++++++++++++++++++++++------------------ 1 file changed, 49 insertions(+), 36 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 2197385d91dc6..618446006d5a0 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -8718,6 +8718,54 @@ static enum bpf_dynptr_type dynptr_get_type(struct bpf_verifier_env *env, return state->stack[spi].spilled_ptr.dynptr.type; } +static int check_reg_const_str(struct bpf_verifier_env *env, + struct bpf_reg_state *reg, u32 regno) +{ + struct bpf_map *map = reg->map_ptr; + int err; + int map_off; + u64 map_addr; + char *str_ptr; + + if (reg->type != PTR_TO_MAP_VALUE) + return -EINVAL; + + if (!bpf_map_is_rdonly(map)) { + verbose(env, "R%d does not point to a readonly map'\n", regno); + return -EACCES; + } + + if (!tnum_is_const(reg->var_off)) { + verbose(env, "R%d is not a constant address'\n", regno); + return -EACCES; + } + + if (!map->ops->map_direct_value_addr) { + verbose(env, "no direct value access support for this map type\n"); + return -EACCES; + } + + err = check_map_access(env, regno, reg->off, + map->value_size - reg->off, false, + ACCESS_HELPER); + if (err) + return err; + + map_off = reg->off + reg->var_off.value; + err = map->ops->map_direct_value_addr(map, &map_addr, map_off); + if (err) { + verbose(env, "direct value access on string failed\n"); + return err; + } + + str_ptr = (char *)(long)(map_addr); + if (!strnchr(str_ptr + map_off, map->value_size - map_off, 0)) { + verbose(env, "string is not zero-terminated\n"); + return -EINVAL; + } + return 0; +} + static int check_func_arg(struct bpf_verifier_env *env, u32 arg, struct bpf_call_arg_meta *meta, const struct bpf_func_proto *fn, @@ -8962,44 +9010,9 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg, } case ARG_PTR_TO_CONST_STR: { - struct bpf_map *map = reg->map_ptr; - int map_off; - u64 map_addr; - char *str_ptr; - - if (!bpf_map_is_rdonly(map)) { - verbose(env, "R%d does not point to a readonly map'\n", regno); - return -EACCES; - } - - if (!tnum_is_const(reg->var_off)) { - verbose(env, "R%d is not a constant address'\n", regno); - return -EACCES; - } - - if (!map->ops->map_direct_value_addr) { - verbose(env, "no direct value access support for this map type\n"); - return -EACCES; - } - - err = check_map_access(env, regno, reg->off, - map->value_size - reg->off, false, - ACCESS_HELPER); + err = check_reg_const_str(env, reg, regno); if (err) return err; - - map_off = reg->off + reg->var_off.value; - err = map->ops->map_direct_value_addr(map, &map_addr, map_off); - if (err) { - verbose(env, "direct value access on string failed\n"); - return err; - } - - str_ptr = (char *)(long)(map_addr); - if (!strnchr(str_ptr + map_off, map->value_size - map_off, 0)) { - verbose(env, "string is not zero-terminated\n"); - return -EINVAL; - } break; } case ARG_PTR_TO_KPTR: From 47696c01057fca4e8a01c12db8bcbb55bebda587 Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Fri, 3 Nov 2023 23:09:12 +0100 Subject: [PATCH 26/41] selftests/bpf: Disable CONFIG_DEBUG_INFO_REDUCED in config.aarch64 Building an arm64 kernel and seftests/bpf with defconfig + selftests/bpf/config and selftests/bpf/config.aarch64 the fragment CONFIG_DEBUG_INFO_REDUCED is enabled in arm64's defconfig, it should be disabled in file sefltests/bpf/config.aarch64 since if its not disabled CONFIG_DEBUG_INFO_BTF wont be enabled. Signed-off-by: Anders Roxell Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20231103220912.333930-1-anders.roxell@linaro.org --- tools/testing/selftests/bpf/config.aarch64 | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/bpf/config.aarch64 b/tools/testing/selftests/bpf/config.aarch64 index fa8ecf626c73e..29c8635c57220 100644 --- a/tools/testing/selftests/bpf/config.aarch64 +++ b/tools/testing/selftests/bpf/config.aarch64 @@ -36,6 +36,7 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=y CONFIG_DEBUG_ATOMIC_SLEEP=y CONFIG_DEBUG_INFO_BTF=y CONFIG_DEBUG_INFO_DWARF4=y +CONFIG_DEBUG_INFO_REDUCED=n CONFIG_DEBUG_LIST=y CONFIG_DEBUG_LOCKDEP=y CONFIG_DEBUG_NOTIFIERS=y From 24a46ed7ef03d3c74bdd69da9dadd04065313220 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 6 Nov 2023 20:57:25 -0800 Subject: [PATCH 27/41] bpf: Introduce KF_ARG_PTR_TO_CONST_STR Similar to ARG_PTR_TO_CONST_STR for BPF helpers, KF_ARG_PTR_TO_CONST_STR specifies kfunc args that point to const strings. Annotation "__str" is used to specify kfunc arg of type KF_ARG_PTR_TO_CONST_STR. Also, add documentation for the "__str" annotation. bpf_get_file_xattr() will be the first kfunc that uses this type. Signed-off-by: Song Liu Signed-off-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Acked-by: Vadim Fedorenko Link: https://lore.kernel.org/bpf/20231107045725.2278852-4-song@kernel.org --- Documentation/bpf/kfuncs.rst | 24 ++++++++++++++++++++++++ kernel/bpf/verifier.c | 19 +++++++++++++++++++ 2 files changed, 43 insertions(+) diff --git a/Documentation/bpf/kfuncs.rst b/Documentation/bpf/kfuncs.rst index 0d2647fb358d7..bfe065f7e23c8 100644 --- a/Documentation/bpf/kfuncs.rst +++ b/Documentation/bpf/kfuncs.rst @@ -137,6 +137,30 @@ Either way, the returned buffer is either NULL, or of size buffer_szk. Without t annotation, the verifier will reject the program if a null pointer is passed in with a nonzero size. +2.2.5 __str Annotation +---------------------------- +This annotation is used to indicate that the argument is a constant string. + +An example is given below:: + + __bpf_kfunc bpf_get_file_xattr(..., const char *name__str, ...) + { + ... + } + +In this case, ``bpf_get_file_xattr()`` can be called as:: + + bpf_get_file_xattr(..., "xattr_name", ...); + +Or:: + + const char name[] = "xattr_name"; /* This need to be global */ + int BPF_PROG(...) + { + ... + bpf_get_file_xattr(..., name, ...); + ... + } .. _BPF_kfunc_nodef: diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 618446006d5a0..bf94ba50c6ee7 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -10803,6 +10803,11 @@ static bool is_kfunc_arg_nullable(const struct btf *btf, const struct btf_param return __kfunc_param_match_suffix(btf, arg, "__nullable"); } +static bool is_kfunc_arg_const_str(const struct btf *btf, const struct btf_param *arg) +{ + return __kfunc_param_match_suffix(btf, arg, "__str"); +} + static bool is_kfunc_arg_scalar_with_name(const struct btf *btf, const struct btf_param *arg, const char *name) @@ -10946,6 +10951,7 @@ enum kfunc_ptr_arg_type { KF_ARG_PTR_TO_RB_ROOT, KF_ARG_PTR_TO_RB_NODE, KF_ARG_PTR_TO_NULL, + KF_ARG_PTR_TO_CONST_STR, }; enum special_kfunc_type { @@ -11090,6 +11096,9 @@ get_kfunc_ptr_arg_type(struct bpf_verifier_env *env, if (is_kfunc_arg_rbtree_node(meta->btf, &args[argno])) return KF_ARG_PTR_TO_RB_NODE; + if (is_kfunc_arg_const_str(meta->btf, &args[argno])) + return KF_ARG_PTR_TO_CONST_STR; + if ((base_type(reg->type) == PTR_TO_BTF_ID || reg2btf_ids[base_type(reg->type)])) { if (!btf_type_is_struct(ref_t)) { verbose(env, "kernel function %s args#%d pointer type %s %s is not supported\n", @@ -11713,6 +11722,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ case KF_ARG_PTR_TO_MEM_SIZE: case KF_ARG_PTR_TO_CALLBACK: case KF_ARG_PTR_TO_REFCOUNTED_KPTR: + case KF_ARG_PTR_TO_CONST_STR: /* Trusted by default */ break; default: @@ -11984,6 +11994,15 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ meta->arg_btf = reg->btf; meta->arg_btf_id = reg->btf_id; break; + case KF_ARG_PTR_TO_CONST_STR: + if (reg->type != PTR_TO_MAP_VALUE) { + verbose(env, "arg#%d doesn't point to a const string\n", i); + return -EINVAL; + } + ret = check_reg_const_str(env, reg, regno); + if (ret) + return ret; + break; } } From ef3b159f0603815d950fff4b068318b6afc0c0ae Mon Sep 17 00:00:00 2001 From: Florian Lehner Date: Sun, 5 Nov 2023 09:58:01 +0100 Subject: [PATCH 28/41] bpf, lpm: Fix check prefixlen before walking trie When looking up an element in LPM trie, the condition 'matchlen == trie->max_prefixlen' will never return true, if key->prefixlen is larger than trie->max_prefixlen. Consequently all elements in the LPM trie will be visited and no element is returned in the end. To resolve this, check key->prefixlen first before walking the LPM trie. Fixes: b95a5c4db09b ("bpf: add a longest prefix match trie map implementation") Signed-off-by: Florian Lehner Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20231105085801.3742-1-dev@der-flo.net --- kernel/bpf/lpm_trie.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index 17c7e7782a1f7..b32be680da6cd 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c @@ -231,6 +231,9 @@ static void *trie_lookup_elem(struct bpf_map *map, void *_key) struct lpm_trie_node *node, *found = NULL; struct bpf_lpm_trie_key *key = _key; + if (key->prefixlen > trie->max_prefixlen) + return NULL; + /* Start walking the trie from the root node ... */ for (node = rcu_dereference_check(trie->root, rcu_read_lock_bh_held()); From b4e59c17bed4ab14a50e03e2a577578fc54fc403 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 7 Nov 2023 12:15:11 -0800 Subject: [PATCH 29/41] libbpf: Fix potential uninitialized tail padding with LIBBPF_OPTS_RESET Martin reported that there is a libbpf complaining of non-zero-value tail padding with LIBBPF_OPTS_RESET macro if struct bpf_netkit_opts is modified to have a 4-byte tail padding. This only happens to clang compiler. The commend line is: ./test_progs -t tc_netkit_multi_links Martin and I did some investigation and found this indeed the case and the following are the investigation details. Clang: clang version 18.0.0 tools/lib/bpf/libbpf_common.h: #define LIBBPF_OPTS_RESET(NAME, ...) \ do { \ memset(&NAME, 0, sizeof(NAME)); \ NAME = (typeof(NAME)) { \ .sz = sizeof(NAME), \ __VA_ARGS__ \ }; \ } while (0) #endif tools/lib/bpf/libbpf.h: struct bpf_netkit_opts { /* size of this struct, for forward/backward compatibility */ size_t sz; __u32 flags; __u32 relative_fd; __u32 relative_id; __u64 expected_revision; size_t :0; }; #define bpf_netkit_opts__last_field expected_revision In the above struct bpf_netkit_opts, there is no tail padding. prog_tests/tc_netkit.c: static void serial_test_tc_netkit_multi_links_target(int mode, int target) { ... LIBBPF_OPTS(bpf_netkit_opts, optl); ... LIBBPF_OPTS_RESET(optl, .flags = BPF_F_BEFORE, .relative_fd = bpf_program__fd(skel->progs.tc1), ); ... } Let us make the following source change, note that we have a 4-byte tailing padding now. diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 6cd9c501624f..0dd83910ae9a 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -803,13 +803,13 @@ bpf_program__attach_tcx(const struct bpf_program *prog, int ifindex, struct bpf_netkit_opts { /* size of this struct, for forward/backward compatibility */ size_t sz; - __u32 flags; __u32 relative_fd; __u32 relative_id; __u64 expected_revision; + __u32 flags; size_t :0; }; -#define bpf_netkit_opts__last_field expected_revision +#define bpf_netkit_opts__last_field flags The clang 18 generated asm code looks like below: ; LIBBPF_OPTS_RESET(optl, 55e3: 48 8d 7d 98 leaq -0x68(%rbp), %rdi 55e7: 31 f6 xorl %esi, %esi 55e9: ba 20 00 00 00 movl $0x20, %edx 55ee: e8 00 00 00 00 callq 0x55f3 55f3: 48 c7 85 10 fd ff ff 20 00 00 00 movq $0x20, -0x2f0(%rbp) 55fe: 48 8b 85 68 ff ff ff movq -0x98(%rbp), %rax 5605: 48 8b 78 18 movq 0x18(%rax), %rdi 5609: e8 00 00 00 00 callq 0x560e 560e: 89 85 18 fd ff ff movl %eax, -0x2e8(%rbp) 5614: c7 85 1c fd ff ff 00 00 00 00 movl $0x0, -0x2e4(%rbp) 561e: 48 c7 85 20 fd ff ff 00 00 00 00 movq $0x0, -0x2e0(%rbp) 5629: c7 85 28 fd ff ff 08 00 00 00 movl $0x8, -0x2d8(%rbp) 5633: 48 8b 85 10 fd ff ff movq -0x2f0(%rbp), %rax 563a: 48 89 45 98 movq %rax, -0x68(%rbp) 563e: 48 8b 85 18 fd ff ff movq -0x2e8(%rbp), %rax 5645: 48 89 45 a0 movq %rax, -0x60(%rbp) 5649: 48 8b 85 20 fd ff ff movq -0x2e0(%rbp), %rax 5650: 48 89 45 a8 movq %rax, -0x58(%rbp) 5654: 48 8b 85 28 fd ff ff movq -0x2d8(%rbp), %rax 565b: 48 89 45 b0 movq %rax, -0x50(%rbp) ; link = bpf_program__attach_netkit(skel->progs.tc2, ifindex, &optl); At -O0 level, the clang compiler creates an intermediate copy. We have below to store 'flags' with 4-byte store and leave another 4 byte in the same 8-byte-aligned storage undefined, 5629: c7 85 28 fd ff ff 08 00 00 00 movl $0x8, -0x2d8(%rbp) and later we store 8-byte to the original zero'ed buffer 5654: 48 8b 85 28 fd ff ff movq -0x2d8(%rbp), %rax 565b: 48 89 45 b0 movq %rax, -0x50(%rbp) This caused a problem as the 4-byte value at [%rbp-0x2dc, %rbp-0x2e0) may be garbage. gcc (gcc 11.4) does not have this issue as it does zeroing struct first before doing assignments: ; LIBBPF_OPTS_RESET(optl, 50fd: 48 8d 85 40 fc ff ff leaq -0x3c0(%rbp), %rax 5104: ba 20 00 00 00 movl $0x20, %edx 5109: be 00 00 00 00 movl $0x0, %esi 510e: 48 89 c7 movq %rax, %rdi 5111: e8 00 00 00 00 callq 0x5116 5116: 48 8b 45 f0 movq -0x10(%rbp), %rax 511a: 48 8b 40 18 movq 0x18(%rax), %rax 511e: 48 89 c7 movq %rax, %rdi 5121: e8 00 00 00 00 callq 0x5126 5126: 48 c7 85 40 fc ff ff 00 00 00 00 movq $0x0, -0x3c0(%rbp) 5131: 48 c7 85 48 fc ff ff 00 00 00 00 movq $0x0, -0x3b8(%rbp) 513c: 48 c7 85 50 fc ff ff 00 00 00 00 movq $0x0, -0x3b0(%rbp) 5147: 48 c7 85 58 fc ff ff 00 00 00 00 movq $0x0, -0x3a8(%rbp) 5152: 48 c7 85 40 fc ff ff 20 00 00 00 movq $0x20, -0x3c0(%rbp) 515d: 89 85 48 fc ff ff movl %eax, -0x3b8(%rbp) 5163: c7 85 58 fc ff ff 08 00 00 00 movl $0x8, -0x3a8(%rbp) ; link = bpf_program__attach_netkit(skel->progs.tc2, ifindex, &optl); It is not clear how to resolve the compiler code generation as the compiler generates correct code w.r.t. how to handle unnamed padding in C standard. So this patch changed LIBBPF_OPTS_RESET macro to avoid uninitialized tail padding. We already knows LIBBPF_OPTS macro works on both gcc and clang, even with tail padding. So LIBBPF_OPTS_RESET is changed to be a LIBBPF_OPTS followed by a memcpy(), thus avoiding uninitialized tail padding. The below is asm code generated with this patch and with clang compiler: ; LIBBPF_OPTS_RESET(optl, 55e3: 48 8d bd 10 fd ff ff leaq -0x2f0(%rbp), %rdi 55ea: 31 f6 xorl %esi, %esi 55ec: ba 20 00 00 00 movl $0x20, %edx 55f1: e8 00 00 00 00 callq 0x55f6 55f6: 48 c7 85 10 fd ff ff 20 00 00 00 movq $0x20, -0x2f0(%rbp) 5601: 48 8b 85 68 ff ff ff movq -0x98(%rbp), %rax 5608: 48 8b 78 18 movq 0x18(%rax), %rdi 560c: e8 00 00 00 00 callq 0x5611 5611: 89 85 18 fd ff ff movl %eax, -0x2e8(%rbp) 5617: c7 85 1c fd ff ff 00 00 00 00 movl $0x0, -0x2e4(%rbp) 5621: 48 c7 85 20 fd ff ff 00 00 00 00 movq $0x0, -0x2e0(%rbp) 562c: c7 85 28 fd ff ff 08 00 00 00 movl $0x8, -0x2d8(%rbp) 5636: 48 8b 85 10 fd ff ff movq -0x2f0(%rbp), %rax 563d: 48 89 45 98 movq %rax, -0x68(%rbp) 5641: 48 8b 85 18 fd ff ff movq -0x2e8(%rbp), %rax 5648: 48 89 45 a0 movq %rax, -0x60(%rbp) 564c: 48 8b 85 20 fd ff ff movq -0x2e0(%rbp), %rax 5653: 48 89 45 a8 movq %rax, -0x58(%rbp) 5657: 48 8b 85 28 fd ff ff movq -0x2d8(%rbp), %rax 565e: 48 89 45 b0 movq %rax, -0x50(%rbp) ; link = bpf_program__attach_netkit(skel->progs.tc2, ifindex, &optl); In the above code, a temporary buffer is zeroed and then has proper value assigned. Finally, values in temporary buffer are copied to the original variable buffer, hence tail padding is guaranteed to be 0. Signed-off-by: Yonghong Song Signed-off-by: Andrii Nakryiko Tested-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20231107201511.2548645-1-yonghong.song@linux.dev --- tools/lib/bpf/libbpf_common.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/tools/lib/bpf/libbpf_common.h b/tools/lib/bpf/libbpf_common.h index b7060f2544861..8fe248e14eb63 100644 --- a/tools/lib/bpf/libbpf_common.h +++ b/tools/lib/bpf/libbpf_common.h @@ -79,11 +79,14 @@ */ #define LIBBPF_OPTS_RESET(NAME, ...) \ do { \ - memset(&NAME, 0, sizeof(NAME)); \ - NAME = (typeof(NAME)) { \ - .sz = sizeof(NAME), \ - __VA_ARGS__ \ - }; \ + typeof(NAME) ___##NAME = ({ \ + memset(&___##NAME, 0, sizeof(NAME)); \ + (typeof(NAME)) { \ + .sz = sizeof(NAME), \ + __VA_ARGS__ \ + }; \ + }); \ + memcpy(&NAME, &___##NAME, sizeof(NAME)); \ } while (0) #endif /* __LIBBPF_LIBBPF_COMMON_H */ From dae6c6b3b79f6fe23b1be5f2b6d58e5d1151a092 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 7 Nov 2023 21:14:29 -0800 Subject: [PATCH 30/41] veristat: add ability to sort by stat's absolute value Add ability to sort results by absolute values of specified stats. This is especially useful to find biggest deviations in comparison mode. When comparing verifier change effect against a large base of BPF object files, it's necessary to see big changes both in positive and negative directions, as both might be a signal for regressions or bugs. The syntax is natural, e.g., adding `-s '|insns_diff|'^` will instruct veristat to sort by absolute value of instructions difference in ascending order. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231108051430.1830950-1-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/veristat.c | 68 +++++++++++++++++++++----- 1 file changed, 56 insertions(+), 12 deletions(-) diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c index 655095810d4a1..102914f705737 100644 --- a/tools/testing/selftests/bpf/veristat.c +++ b/tools/testing/selftests/bpf/veristat.c @@ -18,6 +18,7 @@ #include #include #include +#include #ifndef ARRAY_SIZE #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) @@ -99,6 +100,7 @@ struct stat_specs { enum stat_id ids[ALL_STATS_CNT]; enum stat_variant variants[ALL_STATS_CNT]; bool asc[ALL_STATS_CNT]; + bool abs[ALL_STATS_CNT]; int lens[ALL_STATS_CNT * 3]; /* 3x for comparison mode */ }; @@ -133,6 +135,7 @@ struct filter { int stat_id; enum stat_variant stat_var; long value; + bool abs; }; static struct env { @@ -455,7 +458,8 @@ static struct { { OP_EQ, "=" }, }; -static bool parse_stat_id_var(const char *name, size_t len, int *id, enum stat_variant *var); +static bool parse_stat_id_var(const char *name, size_t len, int *id, + enum stat_variant *var, bool *is_abs); static int append_filter(struct filter **filters, int *cnt, const char *str) { @@ -488,13 +492,14 @@ static int append_filter(struct filter **filters, int *cnt, const char *str) long val; const char *end = str; const char *op_str; + bool is_abs; op_str = operators[i].op_str; p = strstr(str, op_str); if (!p) continue; - if (!parse_stat_id_var(str, p - str, &id, &var)) { + if (!parse_stat_id_var(str, p - str, &id, &var, &is_abs)) { fprintf(stderr, "Unrecognized stat name in '%s'!\n", str); return -EINVAL; } @@ -533,6 +538,7 @@ static int append_filter(struct filter **filters, int *cnt, const char *str) f->stat_id = id; f->stat_var = var; f->op = operators[i].op_kind; + f->abs = true; f->value = val; *cnt += 1; @@ -657,7 +663,8 @@ static struct stat_def { [MARK_READ_MAX_LEN] = { "Max mark read length", {"max_mark_read_len", "mark_read"}, }, }; -static bool parse_stat_id_var(const char *name, size_t len, int *id, enum stat_variant *var) +static bool parse_stat_id_var(const char *name, size_t len, int *id, + enum stat_variant *var, bool *is_abs) { static const char *var_sfxs[] = { [VARIANT_A] = "_a", @@ -667,6 +674,14 @@ static bool parse_stat_id_var(const char *name, size_t len, int *id, enum stat_v }; int i, j, k; + /* || means we take absolute value of given stat */ + *is_abs = false; + if (len > 2 && name[0] == '|' && name[len - 1] == '|') { + *is_abs = true; + name += 1; + len -= 2; + } + for (i = 0; i < ARRAY_SIZE(stat_defs); i++) { struct stat_def *def = &stat_defs[i]; size_t alias_len, sfx_len; @@ -722,7 +737,7 @@ static bool is_desc_sym(char c) static int parse_stat(const char *stat_name, struct stat_specs *specs) { int id; - bool has_order = false, is_asc = false; + bool has_order = false, is_asc = false, is_abs = false; size_t len = strlen(stat_name); enum stat_variant var; @@ -737,7 +752,7 @@ static int parse_stat(const char *stat_name, struct stat_specs *specs) len -= 1; } - if (!parse_stat_id_var(stat_name, len, &id, &var)) { + if (!parse_stat_id_var(stat_name, len, &id, &var, &is_abs)) { fprintf(stderr, "Unrecognized stat name '%s'\n", stat_name); return -ESRCH; } @@ -745,6 +760,7 @@ static int parse_stat(const char *stat_name, struct stat_specs *specs) specs->ids[specs->spec_cnt] = id; specs->variants[specs->spec_cnt] = var; specs->asc[specs->spec_cnt] = has_order ? is_asc : stat_defs[id].asc_by_default; + specs->abs[specs->spec_cnt] = is_abs; specs->spec_cnt++; return 0; @@ -1103,7 +1119,7 @@ static int process_obj(const char *filename) } static int cmp_stat(const struct verif_stats *s1, const struct verif_stats *s2, - enum stat_id id, bool asc) + enum stat_id id, bool asc, bool abs) { int cmp = 0; @@ -1124,6 +1140,11 @@ static int cmp_stat(const struct verif_stats *s1, const struct verif_stats *s2, long v1 = s1->stats[id]; long v2 = s2->stats[id]; + if (abs) { + v1 = v1 < 0 ? -v1 : v1; + v2 = v2 < 0 ? -v2 : v2; + } + if (v1 != v2) cmp = v1 < v2 ? -1 : 1; break; @@ -1142,7 +1163,8 @@ static int cmp_prog_stats(const void *v1, const void *v2) int i, cmp; for (i = 0; i < env.sort_spec.spec_cnt; i++) { - cmp = cmp_stat(s1, s2, env.sort_spec.ids[i], env.sort_spec.asc[i]); + cmp = cmp_stat(s1, s2, env.sort_spec.ids[i], + env.sort_spec.asc[i], env.sort_spec.abs[i]); if (cmp != 0) return cmp; } @@ -1211,7 +1233,8 @@ static void fetch_join_stat_value(const struct verif_stats_join *s, static int cmp_join_stat(const struct verif_stats_join *s1, const struct verif_stats_join *s2, - enum stat_id id, enum stat_variant var, bool asc) + enum stat_id id, enum stat_variant var, + bool asc, bool abs) { const char *str1 = NULL, *str2 = NULL; double v1, v2; @@ -1220,6 +1243,11 @@ static int cmp_join_stat(const struct verif_stats_join *s1, fetch_join_stat_value(s1, id, var, &str1, &v1); fetch_join_stat_value(s2, id, var, &str2, &v2); + if (abs) { + v1 = fabs(v1); + v2 = fabs(v2); + } + if (str1) cmp = strcmp(str1, str2); else if (v1 != v2) @@ -1237,7 +1265,8 @@ static int cmp_join_stats(const void *v1, const void *v2) cmp = cmp_join_stat(s1, s2, env.sort_spec.ids[i], env.sort_spec.variants[i], - env.sort_spec.asc[i]); + env.sort_spec.asc[i], + env.sort_spec.abs[i]); if (cmp != 0) return cmp; } @@ -1720,6 +1749,9 @@ static bool is_join_stat_filter_matched(struct filter *f, const struct verif_sta fetch_join_stat_value(stats, f->stat_id, f->stat_var, &str, &value); + if (f->abs) + value = fabs(value); + switch (f->op) { case OP_EQ: return value > f->value - eps && value < f->value + eps; case OP_NEQ: return value < f->value - eps || value > f->value + eps; @@ -1766,7 +1798,7 @@ static int handle_comparison_mode(void) struct stat_specs base_specs = {}, comp_specs = {}; struct stat_specs tmp_sort_spec; enum resfmt cur_fmt; - int err, i, j, last_idx; + int err, i, j, last_idx, cnt; if (env.filename_cnt != 2) { fprintf(stderr, "Comparison mode expects exactly two input CSV files!\n\n"); @@ -1879,7 +1911,7 @@ static int handle_comparison_mode(void) env.join_stat_cnt += 1; } - /* now sort joined results accorsing to sort spec */ + /* now sort joined results according to sort spec */ qsort(env.join_stats, env.join_stat_cnt, sizeof(*env.join_stats), cmp_join_stats); /* for human-readable table output we need to do extra pass to @@ -1896,16 +1928,22 @@ static int handle_comparison_mode(void) output_comp_headers(cur_fmt); last_idx = -1; + cnt = 0; for (i = 0; i < env.join_stat_cnt; i++) { const struct verif_stats_join *join = &env.join_stats[i]; if (!should_output_join_stats(join)) continue; + if (env.top_n && cnt >= env.top_n) + break; + if (cur_fmt == RESFMT_TABLE_CALCLEN) last_idx = i; output_comp_stats(join, cur_fmt, i == last_idx); + + cnt++; } if (cur_fmt == RESFMT_TABLE_CALCLEN) { @@ -1920,6 +1958,9 @@ static bool is_stat_filter_matched(struct filter *f, const struct verif_stats *s { long value = stats->stats[f->stat_id]; + if (f->abs) + value = value < 0 ? -value : value; + switch (f->op) { case OP_EQ: return value == f->value; case OP_NEQ: return value != f->value; @@ -1964,7 +2005,7 @@ static bool should_output_stats(const struct verif_stats *stats) static void output_prog_stats(void) { const struct verif_stats *stats; - int i, last_stat_idx = 0; + int i, last_stat_idx = 0, cnt = 0; if (env.out_fmt == RESFMT_TABLE) { /* calculate column widths */ @@ -1984,7 +2025,10 @@ static void output_prog_stats(void) stats = &env.prog_stats[i]; if (!should_output_stats(stats)) continue; + if (env.top_n && cnt >= env.top_n) + break; output_stats(stats, env.out_fmt, i == last_stat_idx); + cnt++; } } From 0ca98fca84b33ca250798d6625cf3b1b607d4877 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 7 Nov 2023 21:14:30 -0800 Subject: [PATCH 31/41] veristat: add ability to filter top N results Add ability to filter top B results, both in replay/verifier mode and comparison mode. Just adding `-n10` will emit only first 10 rows, or less, if there is not enough rows. This is not just a shortcut instead of passing veristat output through `head`, though. Filtering out all the other rows influences final table formatting, as table column widths are calculated based on actual emitted test. To demonstrate the difference, compare two "equivalent" forms below, one using head and another using -n argument. TOP N FEATURE ============= [vmuser@archvm bpf]$ sudo ./veristat -C ~/baseline-results-selftests.csv ~/sanity2-results-selftests.csv -e file,prog,insns,states -s '|insns_diff|' -n10 File Program Insns (A) Insns (B) Insns (DIFF) States (A) States (B) States (DIFF) ---------------------------------------- --------------------- --------- --------- ------------ ---------- ---------- ------------- test_seg6_loop.bpf.linked3.o __add_egr_x 12440 12360 -80 (-0.64%) 364 357 -7 (-1.92%) async_stack_depth.bpf.linked3.o async_call_root_check 145 145 +0 (+0.00%) 3 3 +0 (+0.00%) async_stack_depth.bpf.linked3.o pseudo_call_check 139 139 +0 (+0.00%) 3 3 +0 (+0.00%) atomic_bounds.bpf.linked3.o sub 7 7 +0 (+0.00%) 0 0 +0 (+0.00%) bench_local_storage_create.bpf.linked3.o kmalloc 5 5 +0 (+0.00%) 0 0 +0 (+0.00%) bench_local_storage_create.bpf.linked3.o sched_process_fork 22 22 +0 (+0.00%) 2 2 +0 (+0.00%) bench_local_storage_create.bpf.linked3.o socket_post_create 23 23 +0 (+0.00%) 2 2 +0 (+0.00%) bind4_prog.bpf.linked3.o bind_v4_prog 358 358 +0 (+0.00%) 33 33 +0 (+0.00%) bind6_prog.bpf.linked3.o bind_v6_prog 429 429 +0 (+0.00%) 37 37 +0 (+0.00%) bind_perm.bpf.linked3.o bind_v4_prog 15 15 +0 (+0.00%) 1 1 +0 (+0.00%) PIPING TO HEAD ============== [vmuser@archvm bpf]$ sudo ./veristat -C ~/baseline-results-selftests.csv ~/sanity2-results-selftests.csv -e file,prog,insns,states -s '|insns_diff|' | head -n12 File Program Insns (A) Insns (B) Insns (DIFF) States (A) States (B) States (DIFF) ----------------------------------------------------- ---------------------------------------------------- --------- --------- ------------ ---------- ---------- ------------- test_seg6_loop.bpf.linked3.o __add_egr_x 12440 12360 -80 (-0.64%) 364 357 -7 (-1.92%) async_stack_depth.bpf.linked3.o async_call_root_check 145 145 +0 (+0.00%) 3 3 +0 (+0.00%) async_stack_depth.bpf.linked3.o pseudo_call_check 139 139 +0 (+0.00%) 3 3 +0 (+0.00%) atomic_bounds.bpf.linked3.o sub 7 7 +0 (+0.00%) 0 0 +0 (+0.00%) bench_local_storage_create.bpf.linked3.o kmalloc 5 5 +0 (+0.00%) 0 0 +0 (+0.00%) bench_local_storage_create.bpf.linked3.o sched_process_fork 22 22 +0 (+0.00%) 2 2 +0 (+0.00%) bench_local_storage_create.bpf.linked3.o socket_post_create 23 23 +0 (+0.00%) 2 2 +0 (+0.00%) bind4_prog.bpf.linked3.o bind_v4_prog 358 358 +0 (+0.00%) 33 33 +0 (+0.00%) bind6_prog.bpf.linked3.o bind_v6_prog 429 429 +0 (+0.00%) 37 37 +0 (+0.00%) bind_perm.bpf.linked3.o bind_v4_prog 15 15 +0 (+0.00%) 1 1 +0 (+0.00%) Note all the wasted whitespace in the "PIPING TO HEAD" variant. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231108051430.1830950-2-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/veristat.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c index 102914f705737..443a29fc6a62a 100644 --- a/tools/testing/selftests/bpf/veristat.c +++ b/tools/testing/selftests/bpf/veristat.c @@ -149,6 +149,7 @@ static struct env { bool show_version; bool comparison_mode; bool replay_mode; + int top_n; int log_level; int log_size; @@ -215,6 +216,7 @@ static const struct argp_option opts[] = { { "log-size", OPT_LOG_SIZE, "BYTES", 0, "Customize verifier log size (default to 16MB)" }, { "test-states", 't', NULL, 0, "Force frequent BPF verifier state checkpointing (set BPF_F_TEST_STATE_FREQ program flag)" }, + { "top-n", 'n', "N", 0, "Emit only up to first N results." }, { "quiet", 'q', NULL, 0, "Quiet mode" }, { "emit", 'e', "SPEC", 0, "Specify stats to be emitted" }, { "sort", 's', "SPEC", 0, "Specify sort order" }, @@ -293,6 +295,14 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) case 't': env.force_checkpoints = true; break; + case 'n': + errno = 0; + env.top_n = strtol(arg, NULL, 10); + if (errno) { + fprintf(stderr, "invalid top N specifier: %s\n", arg); + argp_usage(state); + } + break; case 'C': env.comparison_mode = true; break; From 3815f89592d523792d3b9c87e496cf768f115b8b Mon Sep 17 00:00:00 2001 From: Shung-Hsi Yu Date: Wed, 8 Nov 2023 22:00:41 +0800 Subject: [PATCH 32/41] bpf: replace register_is_const() with is_reg_const() The addition of is_reg_const() in commit 171de12646d2 ("bpf: generalize is_branch_taken to handle all conditional jumps in one place") has made the register_is_const() redundant. Give the former has more feature, plus the fact the latter is only used in one place, replace register_is_const() with is_reg_const(), and remove the definition of register_is_const. This requires moving the definition of is_reg_const() further up. And since the comment of reg_const_value() reference is_reg_const(), move it up as well. Signed-off-by: Shung-Hsi Yu Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231108140043.12282-1-shung-hsi.yu@suse.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index bf94ba50c6ee7..e3d5d465c2f23 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4685,9 +4685,17 @@ static bool register_is_null(struct bpf_reg_state *reg) return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0); } -static bool register_is_const(struct bpf_reg_state *reg) +/* check if register is a constant scalar value */ +static bool is_reg_const(struct bpf_reg_state *reg, bool subreg32) +{ + return reg->type == SCALAR_VALUE && + tnum_is_const(subreg32 ? tnum_subreg(reg->var_off) : reg->var_off); +} + +/* assuming is_reg_const() is true, return constant value of a register */ +static u64 reg_const_value(struct bpf_reg_state *reg, bool subreg32) { - return reg->type == SCALAR_VALUE && tnum_is_const(reg->var_off); + return subreg32 ? tnum_subreg(reg->var_off).value : reg->var_off.value; } static bool __is_scalar_unbounded(struct bpf_reg_state *reg) @@ -10043,7 +10051,7 @@ record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta, val = reg->var_off.value; max = map->max_entries; - if (!(register_is_const(reg) && val < max)) { + if (!(is_reg_const(reg, false) && val < max)) { bpf_map_key_store(aux, BPF_MAP_KEY_POISON); return 0; } @@ -14199,19 +14207,6 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *vstate, })); } -/* check if register is a constant scalar value */ -static bool is_reg_const(struct bpf_reg_state *reg, bool subreg32) -{ - return reg->type == SCALAR_VALUE && - tnum_is_const(subreg32 ? tnum_subreg(reg->var_off) : reg->var_off); -} - -/* assuming is_reg_const() is true, return constant value of a register */ -static u64 reg_const_value(struct bpf_reg_state *reg, bool subreg32) -{ - return subreg32 ? tnum_subreg(reg->var_off).value : reg->var_off.value; -} - /* * , currently assuming reg2 is a constant */ From 8a53f8c65c9fa6670fd27a8510ce8b52f244d2b9 Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Tue, 7 Nov 2023 00:56:34 -0800 Subject: [PATCH 33/41] bpf: Add KF_RCU flag to bpf_refcount_acquire_impl Refcounted local kptrs are kptrs to user-defined types with a bpf_refcount field. Recent commits ([0], [1]) modified the lifetime of refcounted local kptrs such that the underlying memory is not reused until RCU grace period has elapsed. Separately, verification of bpf_refcount_acquire calls currently succeeds for MAYBE_NULL non-owning reference input, which is a problem as bpf_refcount_acquire_impl has no handling for this case. This patch takes advantage of aforementioned lifetime changes to tag bpf_refcount_acquire_impl kfunc KF_RCU, thereby preventing MAYBE_NULL input to the kfunc. The KF_RCU flag applies to all kfunc params; it's fine for it to apply to the void *meta__ign param as that's populated by the verifier and is tagged __ign regardless. [0]: commit 7e26cd12ad1c ("bpf: Use bpf_mem_free_rcu when bpf_obj_dropping refcounted nodes") is the actual change to allocation behaivor [1]: commit 0816b8c6bf7f ("bpf: Consider non-owning refs to refcounted nodes RCU protected") modified verifier understanding of refcounted local kptrs to match [0]'s changes Signed-off-by: Dave Marchevsky Fixes: 7c50b1cb76ac ("bpf: Add bpf_refcount_acquire kfunc") Link: https://lore.kernel.org/r/20231107085639.3016113-2-davemarchevsky@fb.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/helpers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 6b7c8163e128d..f305b09c82991 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -2515,7 +2515,7 @@ BTF_ID_FLAGS(func, bpf_obj_new_impl, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_percpu_obj_new_impl, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_obj_drop_impl, KF_RELEASE) BTF_ID_FLAGS(func, bpf_percpu_obj_drop_impl, KF_RELEASE) -BTF_ID_FLAGS(func, bpf_refcount_acquire_impl, KF_ACQUIRE | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_refcount_acquire_impl, KF_ACQUIRE | KF_RET_NULL | KF_RCU) BTF_ID_FLAGS(func, bpf_list_push_front_impl) BTF_ID_FLAGS(func, bpf_list_push_back_impl) BTF_ID_FLAGS(func, bpf_list_pop_front, KF_ACQUIRE | KF_RET_NULL) From 315bbb93dafc11f6be77306532c1861b30389058 Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Tue, 7 Nov 2023 00:56:35 -0800 Subject: [PATCH 34/41] selftests/bpf: Add test passing MAYBE_NULL reg to bpf_refcount_acquire The test added in this patch exercises the logic fixed in the previous patch in this series. Before the previous patch's changes, bpf_refcount_acquire accepts MAYBE_NULL local kptrs; after the change the verifier correctly rejects the such a call. Signed-off-by: Dave Marchevsky Link: https://lore.kernel.org/r/20231107085639.3016113-3-davemarchevsky@fb.com Signed-off-by: Alexei Starovoitov --- .../bpf/progs/refcounted_kptr_fail.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c b/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c index 1ef07f6ee580d..1553b9c16aa7f 100644 --- a/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c +++ b/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c @@ -53,6 +53,25 @@ long rbtree_refcounted_node_ref_escapes(void *ctx) return 0; } +SEC("?tc") +__failure __msg("Possibly NULL pointer passed to trusted arg0") +long refcount_acquire_maybe_null(void *ctx) +{ + struct node_acquire *n, *m; + + n = bpf_obj_new(typeof(*n)); + /* Intentionally not testing !n + * it's MAYBE_NULL for refcount_acquire + */ + m = bpf_refcount_acquire(n); + if (m) + bpf_obj_drop(m); + if (n) + bpf_obj_drop(n); + + return 0; +} + SEC("?tc") __failure __msg("Unreleased reference id=3 alloc_insn=9") long rbtree_refcounted_node_ref_escapes_owning_input(void *ctx) From cf1ec20de34b6741e898b02c5f44c792a0437008 Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Tue, 7 Nov 2023 00:56:36 -0800 Subject: [PATCH 35/41] bpf: Use bpf_mem_free_rcu when bpf_obj_dropping non-refcounted nodes The use of bpf_mem_free_rcu to free refcounted local kptrs was added in commit 7e26cd12ad1c ("bpf: Use bpf_mem_free_rcu when bpf_obj_dropping refcounted nodes"). In the cover letter for the series containing that patch [0] I commented: Perhaps it makes sense to move to mem_free_rcu for _all_ non-owning refs in the future, not just refcounted. This might allow custom non-owning ref lifetime + invalidation logic to be entirely subsumed by MEM_RCU handling. IMO this needs a bit more thought and should be tackled outside of a fix series, so it's not attempted here. It's time to start moving in the "non-owning refs have MEM_RCU lifetime" direction. As mentioned in that comment, using bpf_mem_free_rcu for all local kptrs - not just refcounted - is necessarily the first step towards that goal. This patch does so. After this patch the memory pointed to by all local kptrs will not be reused until RCU grace period elapses. The verifier's understanding of non-owning ref validity and the clobbering logic it uses to enforce that understanding are not changed here, that'll happen gradually in future work, including further patches in the series. [0]: https://lore.kernel.org/all/20230821193311.3290257-1-davemarchevsky@fb.com/ Signed-off-by: Dave Marchevsky Link: https://lore.kernel.org/r/20231107085639.3016113-4-davemarchevsky@fb.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/helpers.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index f305b09c82991..3f79bc87b70f9 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1932,10 +1932,7 @@ void __bpf_obj_drop_impl(void *p, const struct btf_record *rec, bool percpu) ma = &bpf_global_percpu_ma; else ma = &bpf_global_ma; - if (rec && rec->refcount_off >= 0) - bpf_mem_free_rcu(ma, p); - else - bpf_mem_free(ma, p); + bpf_mem_free_rcu(ma, p); } __bpf_kfunc void bpf_obj_drop_impl(void *p__alloc, void *meta__ign) From a7b6ab9481d03f593aac9074b0be7ea773d78d17 Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Tue, 7 Nov 2023 00:56:37 -0800 Subject: [PATCH 36/41] bpf: Move GRAPH_{ROOT,NODE}_MASK macros into btf_field_type enum This refactoring patch removes the unused BPF_GRAPH_NODE_OR_ROOT btf_field_type and moves BPF_GRAPH_{NODE,ROOT} macros into the btf_field_type enum. Further patches in the series will use BPF_GRAPH_NODE, so let's move this useful definition out of btf.c. Signed-off-by: Dave Marchevsky Link: https://lore.kernel.org/r/20231107085639.3016113-5-davemarchevsky@fb.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 4 ++-- kernel/bpf/btf.c | 11 ++++------- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index eb84caf133df9..4001d11be1516 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -186,8 +186,8 @@ enum btf_field_type { BPF_LIST_NODE = (1 << 6), BPF_RB_ROOT = (1 << 7), BPF_RB_NODE = (1 << 8), - BPF_GRAPH_NODE_OR_ROOT = BPF_LIST_NODE | BPF_LIST_HEAD | - BPF_RB_NODE | BPF_RB_ROOT, + BPF_GRAPH_NODE = BPF_RB_NODE | BPF_LIST_NODE, + BPF_GRAPH_ROOT = BPF_RB_ROOT | BPF_LIST_HEAD, BPF_REFCOUNT = (1 << 9), }; diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 15d71d2986d3a..63cf4128fc059 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -3840,9 +3840,6 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type return ERR_PTR(ret); } -#define GRAPH_ROOT_MASK (BPF_LIST_HEAD | BPF_RB_ROOT) -#define GRAPH_NODE_MASK (BPF_LIST_NODE | BPF_RB_NODE) - int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec) { int i; @@ -3855,13 +3852,13 @@ int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec) * Hence we only need to ensure that bpf_{list_head,rb_root} ownership * does not form cycles. */ - if (IS_ERR_OR_NULL(rec) || !(rec->field_mask & GRAPH_ROOT_MASK)) + if (IS_ERR_OR_NULL(rec) || !(rec->field_mask & BPF_GRAPH_ROOT)) return 0; for (i = 0; i < rec->cnt; i++) { struct btf_struct_meta *meta; u32 btf_id; - if (!(rec->fields[i].type & GRAPH_ROOT_MASK)) + if (!(rec->fields[i].type & BPF_GRAPH_ROOT)) continue; btf_id = rec->fields[i].graph_root.value_btf_id; meta = btf_find_struct_meta(btf, btf_id); @@ -3873,7 +3870,7 @@ int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec) * to check ownership cycle for a type unless it's also a * node type. */ - if (!(rec->field_mask & GRAPH_NODE_MASK)) + if (!(rec->field_mask & BPF_GRAPH_NODE)) continue; /* We need to ensure ownership acyclicity among all types. The @@ -3909,7 +3906,7 @@ int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec) * - A is both an root and node. * - B is only an node. */ - if (meta->record->field_mask & GRAPH_ROOT_MASK) + if (meta->record->field_mask & BPF_GRAPH_ROOT) return -ELOOP; } return 0; From 6eba51e33182f47e7b727cb9d118ba8f1ca7a5e9 Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Tue, 7 Nov 2023 00:56:38 -0800 Subject: [PATCH 37/41] bpf: Mark direct ld of stashed bpf_{rb,list}_node as non-owning ref This patch enables the following pattern: /* mapval contains a __kptr pointing to refcounted local kptr */ mapval = bpf_map_lookup_elem(&map, &idx); if (!mapval || !mapval->some_kptr) { /* omitted */ } p = bpf_refcount_acquire(&mapval->some_kptr); Currently this doesn't work because bpf_refcount_acquire expects an owning or non-owning ref. The verifier defines non-owning ref as a type: PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF while mapval->some_kptr is PTR_TO_BTF_ID | PTR_UNTRUSTED. It's possible to do the refcount_acquire by first bpf_kptr_xchg'ing mapval->some_kptr into a temp kptr, refcount_acquiring that, and xchg'ing back into mapval, but this is unwieldy and shouldn't be necessary. This patch modifies btf_ld_kptr_type such that user-allocated types are marked MEM_ALLOC and if those types have a bpf_{rb,list}_node they're marked NON_OWN_REF as well. Additionally, due to changes to bpf_obj_drop_impl earlier in this series, rcu_protected_object now returns true for all user-allocated types, resulting in mapval->some_kptr being marked MEM_RCU. After this patch's changes, mapval->some_kptr is now: PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF | MEM_RCU which results in it passing the non-owning ref test, and the motivating example passing verification. Future work will likely get rid of special non-owning ref lifetime logic in the verifier, at which point we'll be able to delete the NON_OWN_REF flag entirely. Signed-off-by: Dave Marchevsky Link: https://lore.kernel.org/r/20231107085639.3016113-6-davemarchevsky@fb.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 36 +++++++++++++++++++++++++++++++----- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index e3d5d465c2f23..b058d3a3bb6a0 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5550,10 +5550,23 @@ BTF_SET_END(rcu_protected_types) static bool rcu_protected_object(const struct btf *btf, u32 btf_id) { if (!btf_is_kernel(btf)) - return false; + return true; return btf_id_set_contains(&rcu_protected_types, btf_id); } +static struct btf_record *kptr_pointee_btf_record(struct btf_field *kptr_field) +{ + struct btf_struct_meta *meta; + + if (btf_is_kernel(kptr_field->kptr.btf)) + return NULL; + + meta = btf_find_struct_meta(kptr_field->kptr.btf, + kptr_field->kptr.btf_id); + + return meta ? meta->record : NULL; +} + static bool rcu_safe_kptr(const struct btf_field *field) { const struct btf_field_kptr *kptr = &field->kptr; @@ -5564,12 +5577,25 @@ static bool rcu_safe_kptr(const struct btf_field *field) static u32 btf_ld_kptr_type(struct bpf_verifier_env *env, struct btf_field *kptr_field) { + struct btf_record *rec; + u32 ret; + + ret = PTR_MAYBE_NULL; if (rcu_safe_kptr(kptr_field) && in_rcu_cs(env)) { - if (kptr_field->type != BPF_KPTR_PERCPU) - return PTR_MAYBE_NULL | MEM_RCU; - return PTR_MAYBE_NULL | MEM_RCU | MEM_PERCPU; + ret |= MEM_RCU; + if (kptr_field->type == BPF_KPTR_PERCPU) + ret |= MEM_PERCPU; + else if (!btf_is_kernel(kptr_field->kptr.btf)) + ret |= MEM_ALLOC; + + rec = kptr_pointee_btf_record(kptr_field); + if (rec && btf_record_has_field(rec, BPF_GRAPH_NODE)) + ret |= NON_OWN_REF; + } else { + ret |= PTR_UNTRUSTED; } - return PTR_MAYBE_NULL | PTR_UNTRUSTED; + + return ret; } static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno, From 3975f7e7494b9f692518ade0686aba9fead010b7 Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Tue, 7 Nov 2023 00:56:39 -0800 Subject: [PATCH 38/41] selftests/bpf: Test bpf_refcount_acquire of node obtained via direct ld This patch demonstrates that verifier changes earlier in this series result in bpf_refcount_acquire(mapval->stashed_kptr) passing verification. The added test additionally validates that stashing a kptr in mapval and - in a separate BPF program - refcount_acquiring the kptr without unstashing works as expected at runtime. Signed-off-by: Dave Marchevsky Link: https://lore.kernel.org/r/20231107085639.3016113-7-davemarchevsky@fb.com Signed-off-by: Alexei Starovoitov --- .../bpf/prog_tests/local_kptr_stash.c | 33 +++++++++ .../selftests/bpf/progs/local_kptr_stash.c | 71 +++++++++++++++++++ 2 files changed, 104 insertions(+) diff --git a/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c b/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c index b25b870f87ba9..e6e50a394472c 100644 --- a/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c +++ b/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c @@ -73,6 +73,37 @@ static void test_local_kptr_stash_unstash(void) local_kptr_stash__destroy(skel); } +static void test_refcount_acquire_without_unstash(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1, + ); + struct local_kptr_stash *skel; + int ret; + + skel = local_kptr_stash__open_and_load(); + if (!ASSERT_OK_PTR(skel, "local_kptr_stash__open_and_load")) + return; + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.refcount_acquire_without_unstash), + &opts); + ASSERT_OK(ret, "refcount_acquire_without_unstash run"); + ASSERT_EQ(opts.retval, 2, "refcount_acquire_without_unstash retval"); + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.stash_refcounted_node), &opts); + ASSERT_OK(ret, "stash_refcounted_node run"); + ASSERT_OK(opts.retval, "stash_refcounted_node retval"); + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.refcount_acquire_without_unstash), + &opts); + ASSERT_OK(ret, "refcount_acquire_without_unstash (2) run"); + ASSERT_EQ(opts.retval, 42, "refcount_acquire_without_unstash (2) retval"); + + local_kptr_stash__destroy(skel); +} + static void test_local_kptr_stash_fail(void) { RUN_TESTS(local_kptr_stash_fail); @@ -86,6 +117,8 @@ void test_local_kptr_stash(void) test_local_kptr_stash_plain(); if (test__start_subtest("local_kptr_stash_unstash")) test_local_kptr_stash_unstash(); + if (test__start_subtest("refcount_acquire_without_unstash")) + test_refcount_acquire_without_unstash(); if (test__start_subtest("local_kptr_stash_fail")) test_local_kptr_stash_fail(); } diff --git a/tools/testing/selftests/bpf/progs/local_kptr_stash.c b/tools/testing/selftests/bpf/progs/local_kptr_stash.c index b567a666d2b87..1769fdff6aeae 100644 --- a/tools/testing/selftests/bpf/progs/local_kptr_stash.c +++ b/tools/testing/selftests/bpf/progs/local_kptr_stash.c @@ -14,6 +14,24 @@ struct node_data { struct bpf_rb_node node; }; +struct refcounted_node { + long data; + struct bpf_rb_node rb_node; + struct bpf_refcount refcount; +}; + +struct stash { + struct bpf_spin_lock l; + struct refcounted_node __kptr *stashed; +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, int); + __type(value, struct stash); + __uint(max_entries, 10); +} refcounted_node_stash SEC(".maps"); + struct plain_local { long key; long data; @@ -38,6 +56,7 @@ struct map_value { * Had to do the same w/ bpf_kfunc_call_test_release below */ struct node_data *just_here_because_btf_bug; +struct refcounted_node *just_here_because_btf_bug2; struct { __uint(type, BPF_MAP_TYPE_ARRAY); @@ -132,4 +151,56 @@ long stash_test_ref_kfunc(void *ctx) return 0; } +SEC("tc") +long refcount_acquire_without_unstash(void *ctx) +{ + struct refcounted_node *p; + struct stash *s; + int ret = 0; + + s = bpf_map_lookup_elem(&refcounted_node_stash, &ret); + if (!s) + return 1; + + if (!s->stashed) + /* refcount_acquire failure is expected when no refcounted_node + * has been stashed before this program executes + */ + return 2; + + p = bpf_refcount_acquire(s->stashed); + if (!p) + return 3; + + ret = s->stashed ? s->stashed->data : -1; + bpf_obj_drop(p); + return ret; +} + +/* Helper for refcount_acquire_without_unstash test */ +SEC("tc") +long stash_refcounted_node(void *ctx) +{ + struct refcounted_node *p; + struct stash *s; + int key = 0; + + s = bpf_map_lookup_elem(&refcounted_node_stash, &key); + if (!s) + return 1; + + p = bpf_obj_new(typeof(*p)); + if (!p) + return 2; + p->data = 42; + + p = bpf_kptr_xchg(&s->stashed, p); + if (p) { + bpf_obj_drop(p); + return 3; + } + + return 0; +} + char _license[] SEC("license") = "GPL"; From e80742d917492f10926b46b0caca050c6c9231d6 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Fri, 3 Nov 2023 19:49:00 -0700 Subject: [PATCH 39/41] bpf: Use named fields for certain bpf uapi structs Martin and Vadim reported a verifier failure with bpf_dynptr usage. The issue is mentioned but Vadim workarounded the issue with source change ([1]). The below describes what is the issue and why there is a verification failure. int BPF_PROG(skb_crypto_setup) { struct bpf_dynptr algo, key; ... bpf_dynptr_from_mem(..., ..., 0, &algo); ... } The bpf program is using vmlinux.h, so we have the following definition in vmlinux.h: struct bpf_dynptr { long: 64; long: 64; }; Note that in uapi header bpf.h, we have struct bpf_dynptr { long: 64; long: 64; } __attribute__((aligned(8))); So we lost alignment information for struct bpf_dynptr by using vmlinux.h. Let us take a look at a simple program below: $ cat align.c typedef unsigned long long __u64; struct bpf_dynptr_no_align { __u64 :64; __u64 :64; }; struct bpf_dynptr_yes_align { __u64 :64; __u64 :64; } __attribute__((aligned(8))); void bar(void *, void *); int foo() { struct bpf_dynptr_no_align a; struct bpf_dynptr_yes_align b; bar(&a, &b); return 0; } $ clang --target=bpf -O2 -S -emit-llvm align.c Look at the generated IR file align.ll: ... %a = alloca %struct.bpf_dynptr_no_align, align 1 %b = alloca %struct.bpf_dynptr_yes_align, align 8 ... The compiler dictates the alignment for struct bpf_dynptr_no_align is 1 and the alignment for struct bpf_dynptr_yes_align is 8. So theoretically compiler could allocate variable %a with alignment 1 although in reallity the compiler may choose a different alignment by considering other local variables. In [1], the verification failure happens because variable 'algo' is allocated on the stack with alignment 4 (fp-28). But the verifer wants its alignment to be 8. To fix the issue, the RFC patch ([1]) tried to add '__attribute__((aligned(8)))' to struct bpf_dynptr plus other similar structs. Andrii suggested that we could directly modify uapi struct with named fields like struct 'bpf_iter_num': struct bpf_iter_num { /* opaque iterator state; having __u64 here allows to preserve correct * alignment requirements in vmlinux.h, generated from BTF */ __u64 __opaque[1]; } __attribute__((aligned(8))); Indeed, adding named fields for those affected structs in this patch can preserve alignment when bpf program references them in vmlinux.h. With this patch, the verification failure in [1] can also be resolved. [1] https://lore.kernel.org/bpf/1b100f73-7625-4c1f-3ae5-50ecf84d3ff0@linux.dev/ [2] https://lore.kernel.org/bpf/20231103055218.2395034-1-yonghong.song@linux.dev/ Cc: Vadim Fedorenko Cc: Martin KaFai Lau Suggested-by: Andrii Nakryiko Signed-off-by: Yonghong Song Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231104024900.1539182-1-yonghong.song@linux.dev Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 23 +++++++---------------- tools/include/uapi/linux/bpf.h | 23 +++++++---------------- 2 files changed, 14 insertions(+), 32 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 0f6cdf52b1dab..095ca7238ac20 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -7151,40 +7151,31 @@ struct bpf_spin_lock { }; struct bpf_timer { - __u64 :64; - __u64 :64; + __u64 __opaque[2]; } __attribute__((aligned(8))); struct bpf_dynptr { - __u64 :64; - __u64 :64; + __u64 __opaque[2]; } __attribute__((aligned(8))); struct bpf_list_head { - __u64 :64; - __u64 :64; + __u64 __opaque[2]; } __attribute__((aligned(8))); struct bpf_list_node { - __u64 :64; - __u64 :64; - __u64 :64; + __u64 __opaque[3]; } __attribute__((aligned(8))); struct bpf_rb_root { - __u64 :64; - __u64 :64; + __u64 __opaque[2]; } __attribute__((aligned(8))); struct bpf_rb_node { - __u64 :64; - __u64 :64; - __u64 :64; - __u64 :64; + __u64 __opaque[4]; } __attribute__((aligned(8))); struct bpf_refcount { - __u32 :32; + __u32 __opaque[1]; } __attribute__((aligned(4))); struct bpf_sysctl { diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 0f6cdf52b1dab..095ca7238ac20 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -7151,40 +7151,31 @@ struct bpf_spin_lock { }; struct bpf_timer { - __u64 :64; - __u64 :64; + __u64 __opaque[2]; } __attribute__((aligned(8))); struct bpf_dynptr { - __u64 :64; - __u64 :64; + __u64 __opaque[2]; } __attribute__((aligned(8))); struct bpf_list_head { - __u64 :64; - __u64 :64; + __u64 __opaque[2]; } __attribute__((aligned(8))); struct bpf_list_node { - __u64 :64; - __u64 :64; - __u64 :64; + __u64 __opaque[3]; } __attribute__((aligned(8))); struct bpf_rb_root { - __u64 :64; - __u64 :64; + __u64 __opaque[2]; } __attribute__((aligned(8))); struct bpf_rb_node { - __u64 :64; - __u64 :64; - __u64 :64; - __u64 :64; + __u64 __opaque[4]; } __attribute__((aligned(8))); struct bpf_refcount { - __u32 :32; + __u32 __opaque[1]; } __attribute__((aligned(4))); struct bpf_sysctl { From 260b428a2806b11be5038d4cd4af28c1fa640d33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20M=C3=BCller?= Date: Thu, 9 Nov 2023 13:14:48 -0800 Subject: [PATCH 40/41] adding ci files --- .github/scripts/veristat-compare.py | 202 ++++++++ .github/workflows/lint.yml | 22 + .github/workflows/test.yml | 451 ++++++++++++++++++ README | 18 - ci/diffs/.keep | 0 ...x-broken-BuildID-for-arm64-and-riscv.patch | 30 ++ ...btf_put-to-register_btf_id_dtor_kfun.patch | 41 ++ ...issing-nospec.h-to-avoid-build-error.patch | 45 ++ ...-pointer-dereference-when-pin-PROG-M.patch | 45 ++ ...type_data_check_overflow-needs-to-co.patch | 115 +++++ ...les-bpf-drop-unnecessary-fallthrough.patch | 32 ++ ...001-selftests-bpf-Add-config.aarch64.patch | 207 ++++++++ ...dd-json-summary-option-to-test_progs.patch | 357 ++++++++++++++ ...just-expected-error-message-for-test.patch | 43 ++ ...x-compilation-errors-Assign-a-value-.patch | 50 ++ ...ests-bpf-Fix-decap_sanity_ns-cleanup.patch | 36 ++ ...sts-bpf-Initial-DENYLIST-for-aarch64.patch | 118 +++++ ...ftests-bpf-Panic-on-hard-soft-lockup.patch | 57 +++ ...iptables-iptables-legacy-in-the-bpf_.patch | 77 +++ ...lect-CONFIG_FUNCTION_ERROR_INJECTION.patch | 45 ++ ...nitialize-ret-valiable-to-fix-smatch.patch | 68 +++ ...ccount-peer-device-for-NETDEV_XDP_AC.patch | 83 ++++ ...ally-export-__vdso_sgx_enter_enclave.patch | 44 ++ ...Set-CONFIG_BOOTPARAM_HUNG_TASK_PANIC.patch | 39 ++ ...pi-pull-in-stddef.h-to-fix-BPF-selft.patch | 104 ++++ ci/vmtest/configs/DENYLIST | 7 + ci/vmtest/configs/DENYLIST.aarch64 | 4 + ci/vmtest/configs/DENYLIST.s390x | 5 + ci/vmtest/configs/DENYLIST.x86_64 | 1 + ci/vmtest/helpers.sh | 38 ++ ci/vmtest/run_selftests.sh | 168 +++++++ 31 files changed, 2534 insertions(+), 18 deletions(-) create mode 100644 .github/scripts/veristat-compare.py create mode 100644 .github/workflows/lint.yml create mode 100644 .github/workflows/test.yml create mode 100644 ci/diffs/.keep create mode 100644 ci/diffs/0001-Revert-arch-fix-broken-BuildID-for-arm64-and-riscv.patch create mode 100644 ci/diffs/0001-bpf-Add-missing-btf_put-to-register_btf_id_dtor_kfun.patch create mode 100644 ci/diffs/0001-bpf-Include-missing-nospec.h-to-avoid-build-error.patch create mode 100644 ci/diffs/0001-bpftool-Fix-NULL-pointer-dereference-when-pin-PROG-M.patch create mode 100644 ci/diffs/0001-libbpf-btf_dump_type_data_check_overflow-needs-to-co.patch create mode 100644 ci/diffs/0001-samples-bpf-drop-unnecessary-fallthrough.patch create mode 100644 ci/diffs/0001-selftests-bpf-Add-config.aarch64.patch create mode 100644 ci/diffs/0001-selftests-bpf-Add-json-summary-option-to-test_progs.patch create mode 100644 ci/diffs/0001-selftests-bpf-Adjust-expected-error-message-for-test.patch create mode 100644 ci/diffs/0001-selftests-bpf-Fix-compilation-errors-Assign-a-value-.patch create mode 100644 ci/diffs/0001-selftests-bpf-Fix-decap_sanity_ns-cleanup.patch create mode 100644 ci/diffs/0001-selftests-bpf-Initial-DENYLIST-for-aarch64.patch create mode 100644 ci/diffs/0001-selftests-bpf-Panic-on-hard-soft-lockup.patch create mode 100644 ci/diffs/0001-selftests-bpf-S-iptables-iptables-legacy-in-the-bpf_.patch create mode 100644 ci/diffs/0001-selftests-bpf-Select-CONFIG_FUNCTION_ERROR_INJECTION.patch create mode 100644 ci/diffs/0001-tracing-fprobe-Initialize-ret-valiable-to-fix-smatch.patch create mode 100644 ci/diffs/0001-veth-take-into-account-peer-device-for-NETDEV_XDP_AC.patch create mode 100644 ci/diffs/0001-x86-vdso-Conditionally-export-__vdso_sgx_enter_enclave.patch create mode 100644 ci/diffs/0002-selftests-bpf-Set-CONFIG_BOOTPARAM_HUNG_TASK_PANIC.patch create mode 100644 ci/diffs/0002-tools-headers-uapi-pull-in-stddef.h-to-fix-BPF-selft.patch create mode 100644 ci/vmtest/configs/DENYLIST create mode 100644 ci/vmtest/configs/DENYLIST.aarch64 create mode 100644 ci/vmtest/configs/DENYLIST.s390x create mode 100644 ci/vmtest/configs/DENYLIST.x86_64 create mode 100755 ci/vmtest/helpers.sh create mode 100755 ci/vmtest/run_selftests.sh diff --git a/.github/scripts/veristat-compare.py b/.github/scripts/veristat-compare.py new file mode 100644 index 0000000000000..588fa186597ad --- /dev/null +++ b/.github/scripts/veristat-compare.py @@ -0,0 +1,202 @@ +#!/usr/bin/env python3 + +# This script reads a CSV file produced by the following invocation: +# +# veristat --emit file,prog,verdict,states \ +# --output-format csv \ +# --compare ... +# +# And produces a markdown summary for the file. +# The summary is printed to standard output and appended to a file +# pointed to by GITHUB_STEP_SUMMARY variable. +# +# Script exits with return code 1 if there are new failures in the +# veristat results. +# +# For testing purposes invoke as follows: +# +# GITHUB_STEP_SUMMARY=/dev/null python3 veristat-compare.py test.csv +# +# File format (columns): +# 0. file_name +# 1. prog_name +# 2. verdict_base +# 3. verdict_comp +# 4. verdict_diff +# 5. total_states_base +# 6. total_states_comp +# 7. total_states_diff +# +# Records sample: +# file-a,a,success,failure,MISMATCH,12,12,+0 (+0.00%) +# file-b,b,success,success,MATCH,67,67,+0 (+0.00%) +# +# For better readability suffixes '_OLD' and '_NEW' +# are used instead of '_base' and '_comp' for variable +# names etc. + +import io +import os +import sys +import csv +import logging +import argparse +from functools import reduce +from dataclasses import dataclass + +TRESHOLD_PCT = 0 + +HEADERS = ['file_name', 'prog_name', 'verdict_base', 'verdict_comp', + 'verdict_diff', 'total_states_base', 'total_states_comp', + 'total_states_diff'] + +FILE = 0 +PROG = 1 +VERDICT_OLD = 2 +VERDICT_NEW = 3 +STATES_OLD = 5 +STATES_NEW = 6 + +# Given a table row, compute relative increase in the number of +# processed states. +def compute_diff(v): + old = int(v[STATES_OLD]) if v[STATES_OLD] != 'N/A' else 0 + new = int(v[STATES_NEW]) if v[STATES_NEW] != 'N/A' else 0 + if old == 0: + return 1 + return (new - old) / old + +@dataclass +class VeristatInfo: + table: list + changes: bool + new_failures: bool + +# Read CSV table expecting the above described format. +# Return VeristatInfo instance. +def parse_table(csv_filename): + new_failures = False + changes = False + table = [] + + with open(csv_filename, newline='') as file: + reader = csv.reader(file) + headers = next(reader) + if headers != HEADERS: + raise Exception(f'Unexpected table header for {filename}: {headers}') + + for v in reader: + add = False + verdict = v[VERDICT_NEW] + diff = compute_diff(v) + + if v[VERDICT_OLD] != v[VERDICT_NEW]: + changes = True + add = True + verdict = f'{v[VERDICT_OLD]} -> {v[VERDICT_NEW]}' + if v[VERDICT_NEW] == 'failure': + new_failures = True + verdict += ' (!!)' + + if abs(diff * 100) > TRESHOLD_PCT: + changes = True + add = True + + if not add: + continue + + diff_txt = '{:+.1f} %'.format(diff * 100) + table.append([v[FILE], v[PROG], verdict, diff_txt]) + + return VeristatInfo(table=table, + changes=changes, + new_failures=new_failures) + +def format_table(headers, rows, html_mode): + def decorate(val, width): + s = str(val) + if html_mode: + s = s.replace(' -> ', ' → '); + s = s.replace(' (!!)', ' :bangbang: '); + return s.ljust(width) + + column_widths = list(reduce(lambda acc, row: map(max, map(len, row), acc), + rows, + map(len, headers))) + + with io.StringIO() as out: + def print_row(row): + out.write('| ') + out.write(' | '.join(map(decorate, row, column_widths))) + out.write(' |\n') + + print_row(headers) + + out.write('|') + out.write('|'.join(map(lambda w: '-' * (w + 2), column_widths))) + out.write('|\n') + + for row in rows: + print_row(row) + + return out.getvalue() + +def format_section_name(info): + if info.new_failures: + return 'There are new veristat failures' + if info.changes: + return 'There are changes in verification performance' + return 'No changes in verification performance' + +SUMMARY_HEADERS = ['File', 'Program', 'Verdict', 'States Diff (%)'] + +def format_html_summary(info): + section_name = format_section_name(info) + if not info.table: + return f'# {section_name}\n' + + table = format_table(SUMMARY_HEADERS, info.table, True) + return f''' +# {section_name} + +
+Click to expand + +{table} +
+'''.lstrip() + +def format_text_summary(info): + section_name = format_section_name(info) + table = format_table(SUMMARY_HEADERS, info.table, False) + if not info.table: + return f'# {section_name}\n' + + return f''' +# {section_name} + +{table} +'''.lstrip() + +def main(compare_csv_filename, summary_filename): + info = parse_table(compare_csv_filename) + sys.stdout.write(format_text_summary(info)) + with open(summary_filename, 'a') as f: + f.write(format_html_summary(info)) + + if info.new_failures: + return 1 + + return 0 + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description="""Print veristat comparison output as markdown step summary""" + ) + parser.add_argument('filename') + args = parser.parse_args() + summary_filename = os.getenv('GITHUB_STEP_SUMMARY') + if not summary_filename: + logging.error('GITHUB_STEP_SUMMARY environment variable is not set') + sys.exit(1) + sys.exit(main(args.filename, summary_filename)) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 0000000000000..8805283a42271 --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,22 @@ +name: "lint" + +on: + pull_request: + push: + branches: + - master + +jobs: + shellcheck: + # This workflow gets injected into other Linux repositories, but we don't + # want it to run there. + if: ${{ github.repository == 'kernel-patches/vmtest' }} + name: ShellCheck + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v3 + - name: Run ShellCheck + uses: ludeeus/action-shellcheck@master + env: + SHELLCHECK_OPTS: --severity=warning --exclude=SC1091 diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000000000..6989bad93093e --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,451 @@ +name: bpf-ci + +on: + pull_request: + push: + branches: + - bpf_base + - bpf-next_base + +env: + veristat_arch: x86_64 + veristat_toolchain: gcc + +concurrency: + group: ci-test-${{ github.ref_name }} + cancel-in-progress: true + +jobs: + + set-matrix: + # FIXME: set-matrix is lightweight, run it on any self-hosted machines for kernel-patches org + # so we do not wait for GH hosted runners when there potentially all are busy because of bpf-rc + # repo for instance. + # This could be somehow fixed long term by making this action/workflow re-usable and letting the called + # specify what to run on. + runs-on: ${{ github.repository_owner == 'kernel-patches' && 'x86_64' || 'ubuntu-latest' }} + outputs: + build-matrix: ${{ steps.set-matrix-impl.outputs.build_matrix }} + test-matrix: ${{ steps.set-matrix-impl.outputs.test_matrix }} + veristat-runs-on: ${{ steps.set-matrix-impl.outputs.veristat_runs_on }} + steps: + - id: set-matrix-impl + shell: python3 -I {0} + run: | + from json import dumps + from enum import Enum + import os + + class Arch(Enum): + """ + CPU architecture supported by CI. + """ + aarch64 = "aarch64" + s390x = "s390x" + x86_64 = "x86_64" + + def set_output(name, value): + """Write an output variable to the GitHub output file.""" + with open(os.getenv("GITHUB_OUTPUT"), "a") as f: + f.write(f"{name}={value}\n") + + def generate_test_config(test): + """Create the configuration for the provided test.""" + experimental = test.endswith("_parallel") + config = { + "test": test, + "continue_on_error": experimental, + # While in experimental mode, parallel jobs may get stuck + # anywhere, including in user space where the kernel won't detect + # a problem and panic. We add a second layer of (smaller) timeouts + # here such that if we get stuck in a parallel run, we hit this + # timeout and fail without affecting the overall job success (as + # would be the case if we hit the job-wide timeout). For + # non-experimental jobs, 360 is the default which will be + # superseded by the overall workflow timeout (but we need to + # specify something). + "timeout_minutes": 30 if experimental else 360, + } + return config + + matrix = [ + {"kernel": "LATEST", "runs_on": [], "arch": Arch.x86_64.value, "toolchain": "gcc", "llvm-version": "16"}, + ] + self_hosted_repos = [ + "kernel-patches/bpf", + "kernel-patches/vmtest", + ] + + for idx in range(len(matrix) - 1, -1, -1): + if matrix[idx]['toolchain'] == 'gcc': + matrix[idx]['toolchain_full'] = 'gcc' + else: + matrix[idx]['toolchain_full'] = 'llvm-' + matrix[idx]['llvm-version'] + # Only a few repository within "kernel-patches" use self-hosted runners. + if "${{ github.repository_owner }}" != "kernel-patches" or "${{ github.repository }}" not in self_hosted_repos: + # Outside of those repositories, we only run on x86_64 GH hosted runners (ubuntu-latest) + for idx in range(len(matrix) - 1, -1, -1): + if matrix[idx]["arch"] != Arch.x86_64.value: + del matrix[idx] + else: + matrix[idx]["runs_on"] = ["ubuntu-latest"] + else: + # Otherwise, run on (self-hosted, arch) runners + for idx in range(len(matrix) - 1, -1, -1): + matrix[idx]["runs_on"].extend(["self-hosted", matrix[idx]["arch"]]) + + build_matrix = {"include": matrix} + set_output("build_matrix", dumps(build_matrix)) + + def get_tests(config): + tests = [ + "test_progs", + ] + if config.get("parallel_tests", True): + return tests + return [test for test in tests if not test.endswith("parallel") ] + + test_matrix = {"include": [{**config, **generate_test_config(test)} + for config in matrix + for test in get_tests(config) + ]} + set_output("test_matrix", dumps(test_matrix)) + + veristat_runs_on = next(x['runs_on'] + for x in matrix + if x['arch'] == "${{env.veristat_arch}}" and + x['toolchain'] == "${{env.veristat_toolchain}}") + set_output("veristat_runs_on", veristat_runs_on) + build: + name: build for ${{ matrix.arch }} with ${{ matrix.toolchain_full }} + needs: set-matrix + runs-on: ${{ matrix.runs_on }} + timeout-minutes: 100 + strategy: + fail-fast: false + matrix: ${{ fromJSON(needs.set-matrix.outputs.build-matrix) }} + env: + KERNEL: ${{ matrix.kernel }} + REPO_ROOT: ${{ github.workspace }} + REPO_PATH: "" + KBUILD_OUTPUT: kbuild-output/ + steps: + - uses: actions/checkout@v3 + # We fetch an actual bit of history here to facilitate incremental + # builds (which may check out some earlier upstream change). + with: + fetch-depth: 50 + - if: ${{ github.repository == 'kernel-patches/vmtest' }} + name: Download bpf-next tree + uses: libbpf/ci/get-linux-source@main + with: + dest: '.kernel' + - if: ${{ github.repository == 'kernel-patches/vmtest' }} + name: Move linux source in place + shell: bash + run: | + rm -rf .kernel/.git + cp -rf .kernel/. . + rm -rf .kernel + - name: Get commit meta-data + id: get-commit-metadata + shell: bash + run: | + if [ ${{ github.event_name }} = 'push' ]; then + branch="${{ github.ref_name }}" + echo "branch=${branch}" >> "${GITHUB_OUTPUT}" + else + branch="${{ github.base_ref }}" + echo "branch=${branch}" >> "${GITHUB_OUTPUT}" + fi + + upstream=$(echo "${branch}" | sed 's@_base$@@') + commit="$( + git rev-parse "origin/${upstream}" &> /dev/null \ + || ( + git fetch --quiet --prune --no-tags --depth=1 --no-recurse-submodules origin +refs/heads/${upstream}:refs/remotes/origin/${upstream} \ + && git rev-parse "origin/${upstream}" + ) + )" + + echo "timestamp=$(TZ=utc git show --format='%cd' --no-patch --date=iso-strict-local ${commit})" >> "${GITHUB_OUTPUT}" + echo "commit=${commit}" >> "${GITHUB_OUTPUT}" + echo "Most recent upstream commit is ${commit}" + - name: Pull recent KBUILD_OUTPUT contents + uses: actions/cache@v3 + with: + path: ${{ env.KBUILD_OUTPUT }} + key: kbuild-output-${{ matrix.arch }}-${{ matrix.toolchain_full }}-${{ steps.get-commit-metadata.outputs.branch }}-${{ steps.get-commit-metadata.outputs.timestamp }}-${{ steps.get-commit-metadata.outputs.commit }} + restore-keys: | + kbuild-output-${{ matrix.arch }}-${{ matrix.toolchain_full }}-${{ steps.get-commit-metadata.outputs.branch }}-${{ steps.get-commit-metadata.outputs.timestamp }}- + kbuild-output-${{ matrix.arch }}-${{ matrix.toolchain_full }}-${{ steps.get-commit-metadata.outputs.branch }}- + kbuild-output-${{ matrix.arch }}-${{ matrix.toolchain_full }}- + - name: Prepare incremental build + shell: bash + run: | + set -e -u + + # $1 - the SHA-1 to fetch and check out + fetch_and_checkout() { + local build_base_sha="${1}" + + # If cached artifacts became stale for one reason or another, we + # may not have the build base SHA available. Fetch it and retry. + git fetch origin "${build_base_sha}" && git checkout --quiet "${build_base_sha}" + } + + # $1 - value of KBUILD_OUTPUT + clear_cache_artifacts() { + local kbuild_output="${1}" + echo "Unable to find earlier upstream ref. Discarding KBUILD_OUTPUT contents..." + rm --recursive --force "${kbuild_output}" + mkdir "${kbuild_output}" + false + } + + # $1 - value of KBUILD_OUTPUT + # $2 - current time in ISO 8601 format + restore_source_code_times() { + local kbuild_output="${1}" + local current_time="${2}" + local src_time="$(date --iso-8601=ns --date="${current_time} - 2 minutes")" + local obj_time="$(date --iso-8601=ns --date="${current_time} - 1 minute")" + + git ls-files | xargs --max-args=10000 touch -m --no-create --date="${src_time}" + find "${kbuild_output}" -type f | xargs --max-args=10000 touch -m --no-create --date="${obj_time}" + git checkout --quiet - + echo "Adjusted src and obj time stamps relative to system time" + } + + mkdir --parents "${KBUILD_OUTPUT}" + current_time="$(date --iso-8601=ns)" + + if [ -f "${KBUILD_OUTPUT}/.build-base-sha" ]; then + build_base_sha="$(cat "${KBUILD_OUTPUT}/.build-base-sha")" + echo "Setting up base build state for ${build_base_sha}" + + ( + git checkout --quiet "${build_base_sha}" \ + || fetch_and_checkout "${build_base_sha}" \ + || clear_cache_artifacts "${KBUILD_OUTPUT}" + ) && restore_source_code_times "${KBUILD_OUTPUT}" "${current_time}" + else + echo "No previous build data found" + fi + + echo -n "${{ steps.get-commit-metadata.outputs.commit }}" > "${KBUILD_OUTPUT}/.build-base-sha" + - uses: libbpf/ci/patch-kernel@main + with: + patches-root: '${{ github.workspace }}/ci/diffs' + repo-root: '${{ github.workspace }}' + - name: Setup build environment + uses: libbpf/ci/setup-build-env@main + with: + llvm-version: ${{ matrix.llvm-version }} + - name: Build kernel image + uses: libbpf/ci/build-linux@main + with: + arch: ${{ matrix.arch }} + toolchain: ${{ matrix.toolchain }} + kbuild-output: ${{ env.KBUILD_OUTPUT }} + max-make-jobs: 32 + llvm-version: ${{ matrix.llvm-version }} + - name: Build selftests + uses: libbpf/ci/build-selftests@main + with: + toolchain: ${{ matrix.toolchain }} + kbuild-output: ${{ env.KBUILD_OUTPUT }} + max-make-jobs: 32 + llvm-version: ${{ matrix.llvm-version }} + - if: ${{ github.event_name != 'push' }} + name: Build samples + uses: libbpf/ci/build-samples@main + with: + toolchain: ${{ matrix.toolchain }} + kbuild-output: ${{ env.KBUILD_OUTPUT }} + max-make-jobs: 32 + llvm-version: ${{ matrix.llvm-version }} + - name: Tar artifacts + run: | + # Remove intermediate object files that we have no use for. Ideally + # we'd just exclude them from tar below, but it does not provide + # options to express the precise constraints. + find selftests/ -name "*.o" -a ! -name "*.bpf.o" -print0 | \ + xargs --null --max-args=10000 rm + + # Strip debug information, which is excessively large (consuming + # bandwidth) while not actually being used (the kernel does not use + # DWARF to symbolize stacktraces). + strip --strip-debug "${KBUILD_OUTPUT}"/vmlinux + + file_list="" + if [ "${{ github.repository }}" == "kernel-patches/vmtest" ]; then + # Package up a bunch of additional infrastructure to support running + # 'make kernelrelease' and bpf tool checks later on. + file_list="$(find . -iname Makefile | xargs) \ + scripts/ \ + tools/testing/selftests/bpf/ \ + tools/include/ \ + tools/bpf/bpftool/"; + fi + # zstd is installed by default in the runner images. + tar -cf - \ + "${KBUILD_OUTPUT}"/.config \ + "${KBUILD_OUTPUT}"/$(KBUILD_OUTPUT="${KBUILD_OUTPUT}" make -s image_name) \ + "${KBUILD_OUTPUT}"/include/config/auto.conf \ + "${KBUILD_OUTPUT}"/include/generated/autoconf.h \ + "${KBUILD_OUTPUT}"/vmlinux \ + ${file_list} \ + --exclude '*.cmd' \ + --exclude '*.d' \ + --exclude '*.h' \ + --exclude '*.output' \ + selftests/bpf/ | zstd -T0 -19 -o vmlinux-${{ matrix.arch }}-${{ matrix.toolchain_full }}.tar.zst + - if: ${{ github.event_name != 'push' }} + name: Remove KBUILD_OUTPUT contents + shell: bash + run: | + # Remove $KBUILD_OUTPUT to prevent cache creation for pull requests. + # Only on pushed changes are build artifacts actually cached, because + # of github.com/actions/cache's cache isolation logic. + rm -rf "${KBUILD_OUTPUT}" + - uses: actions/upload-artifact@v3 + with: + name: vmlinux-${{ matrix.arch }}-${{ matrix.toolchain_full }} + if-no-files-found: error + path: vmlinux-${{ matrix.arch }}-${{ matrix.toolchain_full }}.tar.zst + test: + if: ${{ github.event_name != 'push' }} + name: ${{ matrix.test }} on ${{ matrix.arch }} with ${{ matrix.toolchain_full }} + needs: [set-matrix, build] + strategy: + fail-fast: false + matrix: ${{ fromJSON(needs.set-matrix.outputs.test-matrix) }} + runs-on: ${{ matrix.runs_on }} + timeout-minutes: 100 + env: + KERNEL: ${{ matrix.kernel }} + REPO_ROOT: ${{ github.workspace }} + REPO_PATH: "" + KBUILD_OUTPUT: kbuild-output/ + steps: + - uses: actions/checkout@v3 + - uses: actions/download-artifact@v3 + with: + name: vmlinux-${{ matrix.arch }}-${{ matrix.toolchain_full }} + path: . + - name: Untar artifacts + # zstd is installed by default in the runner images. + run: zstd -d -T0 vmlinux-${{ matrix.arch }}-${{ matrix.toolchain_full }}.tar.zst --stdout | tar -xf - + - name: Prepare rootfs + uses: libbpf/ci/prepare-rootfs@main + with: + project-name: 'libbpf' + arch: ${{ matrix.arch }} + kernel: ${{ matrix.kernel }} + kernel-root: '.' + kbuild-output: ${{ env.KBUILD_OUTPUT }} + image-output: '/tmp/root.img' + test: ${{ matrix.test }} + - name: Run selftests + uses: libbpf/ci/run-qemu@main + continue-on-error: ${{ matrix.continue_on_error }} + timeout-minutes: ${{ matrix.timeout_minutes }} + with: + arch: ${{ matrix.arch}} + img: '/tmp/root.img' + vmlinuz: '${{ github.workspace }}/vmlinuz' + kernel-root: '.' + max-cpu: 8 + kernel-test: ${{ matrix.test }} + veristat: + name: veristat + needs: [set-matrix, build] + runs-on: ${{ fromJSON(needs.set-matrix.outputs.veristat-runs-on) }} + timeout-minutes: 100 + env: + KERNEL: LATEST + REPO_ROOT: ${{ github.workspace }} + REPO_PATH: "" + KBUILD_OUTPUT: kbuild-output/ + steps: + - name: Setup environment variables + run: | + echo arch_and_tool=${{ env.veristat_arch }}-${{ env.veristat_toolchain }} > \ + ${GITHUB_ENV} + - uses: actions/checkout@v3 + - uses: actions/download-artifact@v3 + with: + name: vmlinux-${{ env.arch_and_tool }} + path: . + - name: Untar artifacts + # zstd is installed by default in the runner images. + run: zstd -d -T0 vmlinux-${{ env.arch_and_tool }}.tar.zst --stdout | tar -xf - + + - name: Prepare rootfs + uses: libbpf/ci/prepare-rootfs@main + with: + project-name: 'libbpf' + arch: x86_64 + kernel: LATEST + kernel-root: '.' + kbuild-output: ${{ env.KBUILD_OUTPUT }} + image-output: '/tmp/root.img' + test: run_veristat + + - name: Run veristat + uses: libbpf/ci/run-qemu@main + timeout-minutes: 10 + with: + arch: x86_64 + img: '/tmp/root.img' + vmlinuz: '${{ github.workspace }}/vmlinuz' + kernel-root: '.' + max-cpu: 8 + kernel-test: run_veristat + output-dir: '${{ github.workspace }}' + + # veristat.csv is produced by run-qemu run_veristat action + - uses: actions/upload-artifact@v3 + with: + name: ${{ env.arch_and_tool }}-veristat-log + if-no-files-found: error + path: '${{ github.workspace }}/veristat.csv' + + # For pull request: + # - get baseline log from cache + # - compare it to current run + - if: ${{ github.event_name == 'pull_request' }} + uses: actions/cache/restore@v3 + with: + key: ${{ env.arch_and_tool }}-veristat-baseline + restore-keys: | + ${{ env.arch_and_tool }}-veristat-baseline- + path: '${{ github.workspace }}/veristat-baseline.csv' + + - if: ${{ github.event_name == 'pull_request' }} + name: Show veristat comparison + run: | + cd ${{ github.workspace }} + if [[ ! -f veristat-baseline.csv ]]; then + echo "No veristat-baseline.csv available" + echo "# No veristat-baseline.csv available" >> $GITHUB_STEP_SUMMARY + exit + fi + selftests/bpf/veristat \ + --output-format csv \ + --emit file,prog,verdict,states \ + --compare veristat-baseline.csv veristat.csv > compare.csv + python3 ./.github/scripts/veristat-compare.py compare.csv + + # For push: just put baseline log to cache + - if: ${{ github.event_name == 'push' }} + run: | + mv '${{ github.workspace }}/veristat.csv' \ + '${{ github.workspace }}/veristat-baseline.csv' + + - if: ${{ github.event_name == 'push' }} + uses: actions/cache/save@v3 + with: + key: ${{ env.arch_and_tool }}-veristat-baseline-${{ github.run_id }} + path: '${{ github.workspace }}/veristat-baseline.csv' diff --git a/README b/README index 669ac7c322927..e69de29bb2d1d 100644 --- a/README +++ b/README @@ -1,18 +0,0 @@ -Linux kernel -============ - -There are several guides for kernel developers and users. These guides can -be rendered in a number of formats, like HTML and PDF. Please read -Documentation/admin-guide/README.rst first. - -In order to build the documentation, use ``make htmldocs`` or -``make pdfdocs``. The formatted documentation can also be read online at: - - https://www.kernel.org/doc/html/latest/ - -There are various text files in the Documentation/ subdirectory, -several of them using the Restructured Text markup notation. - -Please read the Documentation/process/changes.rst file, as it contains the -requirements for building and running the kernel, and information about -the problems which may result by upgrading your kernel. diff --git a/ci/diffs/.keep b/ci/diffs/.keep new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/ci/diffs/0001-Revert-arch-fix-broken-BuildID-for-arm64-and-riscv.patch b/ci/diffs/0001-Revert-arch-fix-broken-BuildID-for-arm64-and-riscv.patch new file mode 100644 index 0000000000000..3d8ea87a1dbda --- /dev/null +++ b/ci/diffs/0001-Revert-arch-fix-broken-BuildID-for-arm64-and-riscv.patch @@ -0,0 +1,30 @@ +From cb50dac513235c6996b9d26f959886ba1d7be607 Mon Sep 17 00:00:00 2001 +From: Eduard Zingerman +Date: Fri, 6 Jan 2023 13:59:26 +0200 +Subject: [PATCH] Revert "arch: fix broken BuildID for arm64 and riscv" + +This reverts commit 99cb0d917ffa1ab628bb67364ca9b162c07699b1. +--- + include/asm-generic/vmlinux.lds.h | 5 ----- + 1 file changed, 5 deletions(-) + +diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h +index 659bf3b31c91..a94219e9916f 100644 +--- a/include/asm-generic/vmlinux.lds.h ++++ b/include/asm-generic/vmlinux.lds.h +@@ -891,12 +891,7 @@ + #define PRINTK_INDEX + #endif + +-/* +- * Discard .note.GNU-stack, which is emitted as PROGBITS by the compiler. +- * Otherwise, the type of .notes section would become PROGBITS instead of NOTES. +- */ + #define NOTES \ +- /DISCARD/ : { *(.note.GNU-stack) } \ + .notes : AT(ADDR(.notes) - LOAD_OFFSET) { \ + BOUNDED_SECTION_BY(.note.*, _notes) \ + } NOTES_HEADERS \ +-- +2.39.0 + diff --git a/ci/diffs/0001-bpf-Add-missing-btf_put-to-register_btf_id_dtor_kfun.patch b/ci/diffs/0001-bpf-Add-missing-btf_put-to-register_btf_id_dtor_kfun.patch new file mode 100644 index 0000000000000..4fcc0146effc3 --- /dev/null +++ b/ci/diffs/0001-bpf-Add-missing-btf_put-to-register_btf_id_dtor_kfun.patch @@ -0,0 +1,41 @@ +From 74bc3a5acc82f020d2e126f56c535d02d1e74e37 Mon Sep 17 00:00:00 2001 +From: Jiri Olsa +Date: Fri, 20 Jan 2023 13:21:48 +0100 +Subject: [PATCH] bpf: Add missing btf_put to register_btf_id_dtor_kfuncs + +We take the BTF reference before we register dtors and we need +to put it back when it's done. + +We probably won't se a problem with kernel BTF, but module BTF +would stay loaded (because of the extra ref) even when its module +is removed. + +Cc: Kumar Kartikeya Dwivedi +Fixes: 5ce937d613a4 ("bpf: Populate pairs of btf_id and destructor kfunc in btf") +Acked-by: Kumar Kartikeya Dwivedi +Signed-off-by: Jiri Olsa +Link: https://lore.kernel.org/r/20230120122148.1522359-1-jolsa@kernel.org +Signed-off-by: Alexei Starovoitov +--- + kernel/bpf/btf.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c +index f7dd8af06413..b7017cae6fd1 100644 +--- a/kernel/bpf/btf.c ++++ b/kernel/bpf/btf.c +@@ -7782,9 +7782,9 @@ int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dtors, u32 add_c + + sort(tab->dtors, tab->cnt, sizeof(tab->dtors[0]), btf_id_cmp_func, NULL); + +- return 0; + end: +- btf_free_dtor_kfunc_tab(btf); ++ if (ret) ++ btf_free_dtor_kfunc_tab(btf); + btf_put(btf); + return ret; + } +-- +2.39.1 + diff --git a/ci/diffs/0001-bpf-Include-missing-nospec.h-to-avoid-build-error.patch b/ci/diffs/0001-bpf-Include-missing-nospec.h-to-avoid-build-error.patch new file mode 100644 index 0000000000000..669bde57d04f0 --- /dev/null +++ b/ci/diffs/0001-bpf-Include-missing-nospec.h-to-avoid-build-error.patch @@ -0,0 +1,45 @@ +From 345d24a91c79f408e355c8b7e873ccde0f097eea Mon Sep 17 00:00:00 2001 +From: Huacai Chen +Date: Wed, 22 Feb 2023 10:50:48 +0800 +Subject: [PATCH] bpf: Include missing nospec.h to avoid build error. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Commit 74e19ef0ff80 ("uaccess: Add speculation barrier to copy_from_user()") +defines a default barrier_nospec() and removes the +such a build error: + + CC kernel/bpf/core.o +kernel/bpf/core.c: In function ‘___bpf_prog_run’: +kernel/bpf/core.c:1913:3: error: implicit declaration of function ‘barrier_nospec’; did you mean ‘barrier_data’? [-Werror=implicit-function-declaration] + barrier_nospec(); + ^~~~~~~~~~~~~~ + barrier_data +cc1: some warnings being treated as errors + +So include nospec.h to avoid the build error. + +Fixes: 74e19ef0ff80 ("uaccess: Add speculation barrier to copy_from_user()") +Signed-off-by: Huacai Chen +Link: https://lore.kernel.org/r/20230222025048.3677315-1-chenhuacai@loongson.cn +Signed-off-by: Alexei Starovoitov +--- + kernel/bpf/core.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c +index 933869983e2a..b297e9f60ca1 100644 +--- a/kernel/bpf/core.c ++++ b/kernel/bpf/core.c +@@ -34,6 +34,7 @@ + #include + #include + #include ++#include + #include + #include + +-- +2.30.2 + diff --git a/ci/diffs/0001-bpftool-Fix-NULL-pointer-dereference-when-pin-PROG-M.patch b/ci/diffs/0001-bpftool-Fix-NULL-pointer-dereference-when-pin-PROG-M.patch new file mode 100644 index 0000000000000..bfb7de10b4793 --- /dev/null +++ b/ci/diffs/0001-bpftool-Fix-NULL-pointer-dereference-when-pin-PROG-M.patch @@ -0,0 +1,45 @@ +From 0dd340f3549863e1289a872057743c9a177d1e3f Mon Sep 17 00:00:00 2001 +From: Pu Lehui +Date: Wed, 2 Nov 2022 16:40:34 +0800 +Subject: [PATCH 1/2] bpftool: Fix NULL pointer dereference when pin {PROG, + MAP, LINK} without FILE + +When using bpftool to pin {PROG, MAP, LINK} without FILE, +segmentation fault will occur. The reson is that the lack +of FILE will cause strlen to trigger NULL pointer dereference. +The corresponding stacktrace is shown below: + +do_pin + do_pin_any + do_pin_fd + mount_bpffs_for_pin + strlen(name) <- NULL pointer dereference + +Fix it by adding validation to the common process. + +Fixes: 75a1e792c335 ("tools: bpftool: Allow all prog/map handles for pinning objects") +Signed-off-by: Pu Lehui +Signed-off-by: Daniel Borkmann +Reviewed-by: Quentin Monnet +Link: https://lore.kernel.org/bpf/20221102084034.3342995-1-pulehui@huaweicloud.com +--- + tools/bpf/bpftool/common.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c +index e4d33bc8bbbf..653c130a0aaa 100644 +--- a/tools/bpf/bpftool/common.c ++++ b/tools/bpf/bpftool/common.c +@@ -302,6 +302,9 @@ int do_pin_any(int argc, char **argv, int (*get_fd)(int *, char ***)) + int err; + int fd; + ++ if (!REQ_ARGS(3)) ++ return -EINVAL; ++ + fd = get_fd(&argc, &argv); + if (fd < 0) + return fd; +-- +2.30.2 + diff --git a/ci/diffs/0001-libbpf-btf_dump_type_data_check_overflow-needs-to-co.patch b/ci/diffs/0001-libbpf-btf_dump_type_data_check_overflow-needs-to-co.patch new file mode 100644 index 0000000000000..1b8842db87594 --- /dev/null +++ b/ci/diffs/0001-libbpf-btf_dump_type_data_check_overflow-needs-to-co.patch @@ -0,0 +1,115 @@ +From c39028b333f3a3a765c5c0b9726b8e38aedf0ba1 Mon Sep 17 00:00:00 2001 +From: Martin KaFai Lau +Date: Thu, 27 Apr 2023 18:36:38 -0700 +Subject: [PATCH] libbpf: btf_dump_type_data_check_overflow needs to consider + BTF_MEMBER_BITFIELD_SIZE + +The btf_dump/struct_data selftest is failing with: + + [...] + test_btf_dump_struct_data:FAIL:unexpected return value dumping fs_context unexpected unexpected return value dumping fs_context: actual -7 != expected 264 + [...] + +The reason is in btf_dump_type_data_check_overflow(). It does not use +BTF_MEMBER_BITFIELD_SIZE from the struct's member (btf_member). Instead, +it is using the enum size which is 4. It had been working till the recent +commit 4e04143c869c ("fs_context: drop the unused lsm_flags member") +removed an integer member which also removed the 4 bytes padding at the +end of the fs_context. Missing this 4 bytes padding exposed this bug. In +particular, when btf_dump_type_data_check_overflow() reaches the member +'phase', -E2BIG is returned. + +The fix is to pass bit_sz to btf_dump_type_data_check_overflow(). In +btf_dump_type_data_check_overflow(), it does a different size check when +bit_sz is not zero. + +The current fs_context: + +[3600] ENUM 'fs_context_purpose' encoding=UNSIGNED size=4 vlen=3 + 'FS_CONTEXT_FOR_MOUNT' val=0 + 'FS_CONTEXT_FOR_SUBMOUNT' val=1 + 'FS_CONTEXT_FOR_RECONFIGURE' val=2 +[3601] ENUM 'fs_context_phase' encoding=UNSIGNED size=4 vlen=7 + 'FS_CONTEXT_CREATE_PARAMS' val=0 + 'FS_CONTEXT_CREATING' val=1 + 'FS_CONTEXT_AWAITING_MOUNT' val=2 + 'FS_CONTEXT_AWAITING_RECONF' val=3 + 'FS_CONTEXT_RECONF_PARAMS' val=4 + 'FS_CONTEXT_RECONFIGURING' val=5 + 'FS_CONTEXT_FAILED' val=6 +[3602] STRUCT 'fs_context' size=264 vlen=21 + 'ops' type_id=3603 bits_offset=0 + 'uapi_mutex' type_id=235 bits_offset=64 + 'fs_type' type_id=872 bits_offset=1216 + 'fs_private' type_id=21 bits_offset=1280 + 'sget_key' type_id=21 bits_offset=1344 + 'root' type_id=781 bits_offset=1408 + 'user_ns' type_id=251 bits_offset=1472 + 'net_ns' type_id=984 bits_offset=1536 + 'cred' type_id=1785 bits_offset=1600 + 'log' type_id=3621 bits_offset=1664 + 'source' type_id=42 bits_offset=1792 + 'security' type_id=21 bits_offset=1856 + 's_fs_info' type_id=21 bits_offset=1920 + 'sb_flags' type_id=20 bits_offset=1984 + 'sb_flags_mask' type_id=20 bits_offset=2016 + 's_iflags' type_id=20 bits_offset=2048 + 'purpose' type_id=3600 bits_offset=2080 bitfield_size=8 + 'phase' type_id=3601 bits_offset=2088 bitfield_size=8 + 'need_free' type_id=67 bits_offset=2096 bitfield_size=1 + 'global' type_id=67 bits_offset=2097 bitfield_size=1 + 'oldapi' type_id=67 bits_offset=2098 bitfield_size=1 + +Fixes: 920d16af9b42 ("libbpf: BTF dumper support for typed data") +Signed-off-by: Martin KaFai Lau +Signed-off-by: Daniel Borkmann +Acked-by: Yonghong Song +Link: https://lore.kernel.org/bpf/20230428013638.1581263-1-martin.lau@linux.dev +--- + tools/lib/bpf/btf_dump.c | 22 +++++++++++++++++++--- + 1 file changed, 19 insertions(+), 3 deletions(-) + +diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c +index 580985ee5545..4d9f30bf7f01 100644 +--- a/tools/lib/bpf/btf_dump.c ++++ b/tools/lib/bpf/btf_dump.c +@@ -2250,9 +2250,25 @@ static int btf_dump_type_data_check_overflow(struct btf_dump *d, + const struct btf_type *t, + __u32 id, + const void *data, +- __u8 bits_offset) ++ __u8 bits_offset, ++ __u8 bit_sz) + { +- __s64 size = btf__resolve_size(d->btf, id); ++ __s64 size; ++ ++ if (bit_sz) { ++ /* bits_offset is at most 7. bit_sz is at most 128. */ ++ __u8 nr_bytes = (bits_offset + bit_sz + 7) / 8; ++ ++ /* When bit_sz is non zero, it is called from ++ * btf_dump_struct_data() where it only cares about ++ * negative error value. ++ * Return nr_bytes in success case to make it ++ * consistent as the regular integer case below. ++ */ ++ return data + nr_bytes > d->typed_dump->data_end ? -E2BIG : nr_bytes; ++ } ++ ++ size = btf__resolve_size(d->btf, id); + + if (size < 0 || size >= INT_MAX) { + pr_warn("unexpected size [%zu] for id [%u]\n", +@@ -2407,7 +2423,7 @@ static int btf_dump_dump_type_data(struct btf_dump *d, + { + int size, err = 0; + +- size = btf_dump_type_data_check_overflow(d, t, id, data, bits_offset); ++ size = btf_dump_type_data_check_overflow(d, t, id, data, bits_offset, bit_sz); + if (size < 0) + return size; + err = btf_dump_type_data_check_zero(d, t, id, data, bits_offset, bit_sz); +-- +2.34.1 + diff --git a/ci/diffs/0001-samples-bpf-drop-unnecessary-fallthrough.patch b/ci/diffs/0001-samples-bpf-drop-unnecessary-fallthrough.patch new file mode 100644 index 0000000000000..bb02a27e1592a --- /dev/null +++ b/ci/diffs/0001-samples-bpf-drop-unnecessary-fallthrough.patch @@ -0,0 +1,32 @@ +From f23a4ed043dfd36b758e627bdb30fc8e686f330d Mon Sep 17 00:00:00 2001 +From: Andrii Nakryiko +Date: Mon, 15 May 2023 13:00:20 -0700 +Subject: [PATCH bpf] samples/bpf: drop unnecessary fallthrough + +__fallthrough is now not supported. Instead of renaming it to +now-canonical ([0]) fallthrough pseudo-keyword, just get rid of it and +equate 'h' case to default case, as both emit usage information and +succeed. + + [0] https://www.kernel.org/doc/html/latest/process/deprecated.html?highlight=fallthrough#implicit-switch-case-fall-through + +Signed-off-by: Andrii Nakryiko +--- + samples/bpf/hbm.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/samples/bpf/hbm.c b/samples/bpf/hbm.c +index 6448b7826107..bf66277115e2 100644 +--- a/samples/bpf/hbm.c ++++ b/samples/bpf/hbm.c +@@ -498,7 +498,6 @@ int main(int argc, char **argv) + "Option -%c requires an argument.\n\n", + optopt); + case 'h': +- __fallthrough; + default: + Usage(); + return 0; +-- +2.34.1 + diff --git a/ci/diffs/0001-selftests-bpf-Add-config.aarch64.patch b/ci/diffs/0001-selftests-bpf-Add-config.aarch64.patch new file mode 100644 index 0000000000000..1797384c1b5c8 --- /dev/null +++ b/ci/diffs/0001-selftests-bpf-Add-config.aarch64.patch @@ -0,0 +1,207 @@ +From ec99451f0a488e50aaf0ce467db8771411edc407 Mon Sep 17 00:00:00 2001 +From: Manu Bretelle +Date: Fri, 21 Oct 2022 14:06:59 -0700 +Subject: [PATCH] selftests/bpf: Add config.aarch64 + +config.aarch64, similarly to config.{s390x,x86_64} is a config enabling +building a kernel on aarch64 to be used in bpf's +selftests/kernel-patches CI. + +Signed-off-by: Manu Bretelle +Signed-off-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20221021210701.728135-3-chantr4@gmail.com +--- + tools/testing/selftests/bpf/config.aarch64 | 181 +++++++++++++++++++++ + 1 file changed, 181 insertions(+) + create mode 100644 tools/testing/selftests/bpf/config.aarch64 + +diff --git a/tools/testing/selftests/bpf/config.aarch64 b/tools/testing/selftests/bpf/config.aarch64 +new file mode 100644 +index 000000000000..1f0437644186 +--- /dev/null ++++ b/tools/testing/selftests/bpf/config.aarch64 +@@ -0,0 +1,181 @@ ++CONFIG_9P_FS=y ++CONFIG_ARCH_VEXPRESS=y ++CONFIG_ARCH_WANT_DEFAULT_BPF_JIT=y ++CONFIG_ARM_SMMU_V3=y ++CONFIG_ATA=y ++CONFIG_AUDIT=y ++CONFIG_BINFMT_MISC=y ++CONFIG_BLK_CGROUP=y ++CONFIG_BLK_DEV_BSGLIB=y ++CONFIG_BLK_DEV_INITRD=y ++CONFIG_BLK_DEV_IO_TRACE=y ++CONFIG_BLK_DEV_RAM=y ++CONFIG_BLK_DEV_SD=y ++CONFIG_BONDING=y ++CONFIG_BPFILTER=y ++CONFIG_BPF_JIT_ALWAYS_ON=y ++CONFIG_BPF_JIT_DEFAULT_ON=y ++CONFIG_BPF_PRELOAD_UMD=y ++CONFIG_BPF_PRELOAD=y ++CONFIG_BRIDGE=m ++CONFIG_CGROUP_CPUACCT=y ++CONFIG_CGROUP_DEVICE=y ++CONFIG_CGROUP_FREEZER=y ++CONFIG_CGROUP_HUGETLB=y ++CONFIG_CGROUP_NET_CLASSID=y ++CONFIG_CGROUP_PERF=y ++CONFIG_CGROUP_PIDS=y ++CONFIG_CGROUP_SCHED=y ++CONFIG_CGROUPS=y ++CONFIG_CHECKPOINT_RESTORE=y ++CONFIG_CHR_DEV_SG=y ++CONFIG_COMPAT=y ++CONFIG_CPUSETS=y ++CONFIG_CRASH_DUMP=y ++CONFIG_CRYPTO_USER_API_RNG=y ++CONFIG_CRYPTO_USER_API_SKCIPHER=y ++CONFIG_DEBUG_ATOMIC_SLEEP=y ++CONFIG_DEBUG_INFO_BTF=y ++CONFIG_DEBUG_INFO_DWARF4=y ++CONFIG_DEBUG_LIST=y ++CONFIG_DEBUG_LOCKDEP=y ++CONFIG_DEBUG_NOTIFIERS=y ++CONFIG_DEBUG_PAGEALLOC=y ++CONFIG_DEBUG_SECTION_MISMATCH=y ++CONFIG_DEBUG_SG=y ++CONFIG_DETECT_HUNG_TASK=y ++CONFIG_DEVTMPFS_MOUNT=y ++CONFIG_DEVTMPFS=y ++CONFIG_DRM_VIRTIO_GPU=y ++CONFIG_DRM=y ++CONFIG_DUMMY=y ++CONFIG_EXPERT=y ++CONFIG_EXT4_FS_POSIX_ACL=y ++CONFIG_EXT4_FS_SECURITY=y ++CONFIG_EXT4_FS=y ++CONFIG_FANOTIFY=y ++CONFIG_FB=y ++CONFIG_FUNCTION_PROFILER=y ++CONFIG_FUSE_FS=y ++CONFIG_FW_CFG_SYSFS_CMDLINE=y ++CONFIG_FW_CFG_SYSFS=y ++CONFIG_GDB_SCRIPTS=y ++CONFIG_HAVE_EBPF_JIT=y ++CONFIG_HAVE_KPROBES_ON_FTRACE=y ++CONFIG_HAVE_KPROBES=y ++CONFIG_HAVE_KRETPROBES=y ++CONFIG_HEADERS_INSTALL=y ++CONFIG_HIGH_RES_TIMERS=y ++CONFIG_HUGETLBFS=y ++CONFIG_HW_RANDOM_VIRTIO=y ++CONFIG_HW_RANDOM=y ++CONFIG_HZ_100=y ++CONFIG_IDLE_PAGE_TRACKING=y ++CONFIG_IKHEADERS=y ++CONFIG_INET6_ESP=y ++CONFIG_INET_ESP=y ++CONFIG_INET=y ++CONFIG_INPUT_EVDEV=y ++CONFIG_IP_ADVANCED_ROUTER=y ++CONFIG_IP_MULTICAST=y ++CONFIG_IP_MULTIPLE_TABLES=y ++CONFIG_IP_NF_IPTABLES=y ++CONFIG_IPV6_SEG6_LWTUNNEL=y ++CONFIG_IPVLAN=y ++CONFIG_JUMP_LABEL=y ++CONFIG_KERNEL_UNCOMPRESSED=y ++CONFIG_KPROBES_ON_FTRACE=y ++CONFIG_KPROBES=y ++CONFIG_KRETPROBES=y ++CONFIG_KSM=y ++CONFIG_LATENCYTOP=y ++CONFIG_LIVEPATCH=y ++CONFIG_LOCK_STAT=y ++CONFIG_MACVLAN=y ++CONFIG_MACVTAP=y ++CONFIG_MAGIC_SYSRQ=y ++CONFIG_MAILBOX=y ++CONFIG_MEMCG=y ++CONFIG_MEMORY_HOTPLUG=y ++CONFIG_MEMORY_HOTREMOVE=y ++CONFIG_NAMESPACES=y ++CONFIG_NET_9P_VIRTIO=y ++CONFIG_NET_9P=y ++CONFIG_NET_ACT_BPF=y ++CONFIG_NET_ACT_GACT=y ++CONFIG_NETDEVICES=y ++CONFIG_NETFILTER_XT_MATCH_BPF=y ++CONFIG_NETFILTER_XT_TARGET_MARK=y ++CONFIG_NET_KEY=y ++CONFIG_NET_SCH_FQ=y ++CONFIG_NET_VRF=y ++CONFIG_NET=y ++CONFIG_NF_TABLES=y ++CONFIG_NLMON=y ++CONFIG_NO_HZ_IDLE=y ++CONFIG_NR_CPUS=256 ++CONFIG_NUMA=y ++CONFIG_OVERLAY_FS=y ++CONFIG_PACKET_DIAG=y ++CONFIG_PACKET=y ++CONFIG_PANIC_ON_OOPS=y ++CONFIG_PARTITION_ADVANCED=y ++CONFIG_PCI_HOST_GENERIC=y ++CONFIG_PCI=y ++CONFIG_PL320_MBOX=y ++CONFIG_POSIX_MQUEUE=y ++CONFIG_PROC_KCORE=y ++CONFIG_PROFILING=y ++CONFIG_PROVE_LOCKING=y ++CONFIG_PTDUMP_DEBUGFS=y ++CONFIG_RC_DEVICES=y ++CONFIG_RC_LOOPBACK=y ++CONFIG_RTC_CLASS=y ++CONFIG_RTC_DRV_PL031=y ++CONFIG_RT_GROUP_SCHED=y ++CONFIG_SAMPLE_SECCOMP=y ++CONFIG_SAMPLES=y ++CONFIG_SCHED_AUTOGROUP=y ++CONFIG_SCHED_TRACER=y ++CONFIG_SCSI_CONSTANTS=y ++CONFIG_SCSI_LOGGING=y ++CONFIG_SCSI_SCAN_ASYNC=y ++CONFIG_SCSI_VIRTIO=y ++CONFIG_SCSI=y ++CONFIG_SECURITY_NETWORK=y ++CONFIG_SERIAL_AMBA_PL011_CONSOLE=y ++CONFIG_SERIAL_AMBA_PL011=y ++CONFIG_STACK_TRACER=y ++CONFIG_STATIC_KEYS_SELFTEST=y ++CONFIG_SYSVIPC=y ++CONFIG_TASK_DELAY_ACCT=y ++CONFIG_TASK_IO_ACCOUNTING=y ++CONFIG_TASKSTATS=y ++CONFIG_TASK_XACCT=y ++CONFIG_TCG_TIS=y ++CONFIG_TCG_TPM=y ++CONFIG_TCP_CONG_ADVANCED=y ++CONFIG_TCP_CONG_DCTCP=y ++CONFIG_TLS=y ++CONFIG_TMPFS_POSIX_ACL=y ++CONFIG_TMPFS=y ++CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y ++CONFIG_TRANSPARENT_HUGEPAGE=y ++CONFIG_TUN=y ++CONFIG_UNIX=y ++CONFIG_UPROBES=y ++CONFIG_USELIB=y ++CONFIG_USER_NS=y ++CONFIG_VETH=y ++CONFIG_VIRTIO_BALLOON=y ++CONFIG_VIRTIO_BLK=y ++CONFIG_VIRTIO_CONSOLE=y ++CONFIG_VIRTIO_FS=y ++CONFIG_VIRTIO_INPUT=y ++CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y ++CONFIG_VIRTIO_MMIO=y ++CONFIG_VIRTIO_NET=y ++CONFIG_VIRTIO_PCI=y ++CONFIG_VLAN_8021Q=y ++CONFIG_VSOCKETS=y ++CONFIG_XFRM_USER=y +-- +2.38.1 + diff --git a/ci/diffs/0001-selftests-bpf-Add-json-summary-option-to-test_progs.patch b/ci/diffs/0001-selftests-bpf-Add-json-summary-option-to-test_progs.patch new file mode 100644 index 0000000000000..ec424feb15c4e --- /dev/null +++ b/ci/diffs/0001-selftests-bpf-Add-json-summary-option-to-test_progs.patch @@ -0,0 +1,357 @@ +From 2be7aa76cc69633930fb747e1d85d33a63a60c02 Mon Sep 17 00:00:00 2001 +From: Manu Bretelle +Date: Fri, 17 Mar 2023 09:32:56 -0700 +Subject: [PATCH] selftests/bpf: Add --json-summary option to test_progs + +Currently, test_progs outputs all stdout/stderr as it runs, and when it +is done, prints a summary. + +It is non-trivial for tooling to parse that output and extract meaningful +information from it. + +This change adds a new option, `--json-summary`/`-J` that let the caller +specify a file where `test_progs{,-no_alu32}` can write a summary of the +run in a json format that can later be parsed by tooling. + +Currently, it creates a summary section with successes/skipped/failures +followed by a list of failed tests and subtests. + +A test contains the following fields: +- name: the name of the test +- number: the number of the test +- message: the log message that was printed by the test. +- failed: A boolean indicating whether the test failed or not. Currently +we only output failed tests, but in the future, successful tests could +be added. +- subtests: A list of subtests associated with this test. + +A subtest contains the following fields: +- name: same as above +- number: sanme as above +- message: the log message that was printed by the subtest. +- failed: same as above but for the subtest + +An example run and json content below: +``` +$ sudo ./test_progs -a $(grep -v '^#' ./DENYLIST.aarch64 | awk '{print +$1","}' | tr -d '\n') -j -J /tmp/test_progs.json +$ jq < /tmp/test_progs.json | head -n 30 +{ + "success": 29, + "success_subtest": 23, + "skipped": 3, + "failed": 28, + "results": [ + { + "name": "bpf_cookie", + "number": 10, + "message": "test_bpf_cookie:PASS:skel_open 0 nsec\n", + "failed": true, + "subtests": [ + { + "name": "multi_kprobe_link_api", + "number": 2, + "message": "kprobe_multi_link_api_subtest:PASS:load_kallsyms 0 nsec\nlibbpf: extern 'bpf_testmod_fentry_test1' (strong): not resolved\nlibbpf: failed to load object 'kprobe_multi'\nlibbpf: failed to load BPF skeleton 'kprobe_multi': -3\nkprobe_multi_link_api_subtest:FAIL:fentry_raw_skel_load unexpected error: -3\n", + "failed": true + }, + { + "name": "multi_kprobe_attach_api", + "number": 3, + "message": "libbpf: extern 'bpf_testmod_fentry_test1' (strong): not resolved\nlibbpf: failed to load object 'kprobe_multi'\nlibbpf: failed to load BPF skeleton 'kprobe_multi': -3\nkprobe_multi_attach_api_subtest:FAIL:fentry_raw_skel_load unexpected error: -3\n", + "failed": true + }, + { + "name": "lsm", + "number": 8, + "message": "lsm_subtest:PASS:lsm.link_create 0 nsec\nlsm_subtest:FAIL:stack_mprotect unexpected stack_mprotect: actual 0 != expected -1\n", + "failed": true + } +``` + +The file can then be used to print a summary of the test run and list of +failing tests/subtests: + +``` +$ jq -r < /tmp/test_progs.json '"Success: \(.success)/\(.success_subtest), Skipped: \(.skipped), Failed: \(.failed)"' + +Success: 29/23, Skipped: 3, Failed: 28 +$ jq -r < /tmp/test_progs.json '.results | map([ + if .failed then "#\(.number) \(.name)" else empty end, + ( + . as {name: $tname, number: $tnum} | .subtests | map( + if .failed then "#\($tnum)/\(.number) \($tname)/\(.name)" else empty end + ) + ) +]) | flatten | .[]' | head -n 20 + #10 bpf_cookie + #10/2 bpf_cookie/multi_kprobe_link_api + #10/3 bpf_cookie/multi_kprobe_attach_api + #10/8 bpf_cookie/lsm + #15 bpf_mod_race + #15/1 bpf_mod_race/ksym (used_btfs UAF) + #15/2 bpf_mod_race/kfunc (kfunc_btf_tab UAF) + #36 cgroup_hierarchical_stats + #61 deny_namespace + #61/1 deny_namespace/unpriv_userns_create_no_bpf + #73 fexit_stress + #83 get_func_ip_test + #99 kfunc_dynptr_param + #99/1 kfunc_dynptr_param/dynptr_data_null + #99/4 kfunc_dynptr_param/dynptr_data_null + #100 kprobe_multi_bench_attach + #100/1 kprobe_multi_bench_attach/kernel + #100/2 kprobe_multi_bench_attach/modules + #101 kprobe_multi_test + #101/1 kprobe_multi_test/skel_api +``` + +Signed-off-by: Manu Bretelle +Signed-off-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20230317163256.3809328-1-chantr4@gmail.com +--- + tools/testing/selftests/bpf/Makefile | 4 +- + tools/testing/selftests/bpf/json_writer.c | 1 + + tools/testing/selftests/bpf/json_writer.h | 1 + + tools/testing/selftests/bpf/test_progs.c | 83 +++++++++++++++++++++-- + tools/testing/selftests/bpf/test_progs.h | 1 + + 5 files changed, 84 insertions(+), 6 deletions(-) + create mode 120000 tools/testing/selftests/bpf/json_writer.c + create mode 120000 tools/testing/selftests/bpf/json_writer.h + +diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile +index b677dcd0b77a..59173eb636f5 100644 +--- a/tools/testing/selftests/bpf/Makefile ++++ b/tools/testing/selftests/bpf/Makefile +@@ -234,6 +234,7 @@ $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): $(BPFOBJ) + CGROUP_HELPERS := $(OUTPUT)/cgroup_helpers.o + TESTING_HELPERS := $(OUTPUT)/testing_helpers.o + TRACE_HELPERS := $(OUTPUT)/trace_helpers.o ++JSON_WRITER := $(OUTPUT)/json_writer.o + CAP_HELPERS := $(OUTPUT)/cap_helpers.o + + $(OUTPUT)/test_dev_cgroup: $(CGROUP_HELPERS) $(TESTING_HELPERS) +@@ -558,7 +559,8 @@ TRUNNER_BPF_PROGS_DIR := progs + TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c \ + network_helpers.c testing_helpers.c \ + btf_helpers.c flow_dissector_load.h \ +- cap_helpers.c test_loader.c xsk.c ++ cap_helpers.c test_loader.c xsk.c \ ++ json_writer.c + TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read $(OUTPUT)/bpf_testmod.ko \ + $(OUTPUT)/liburandom_read.so \ + $(OUTPUT)/xdp_synproxy \ +diff --git a/tools/testing/selftests/bpf/json_writer.c b/tools/testing/selftests/bpf/json_writer.c +new file mode 120000 +index 000000000000..5effa31e2f39 +--- /dev/null ++++ b/tools/testing/selftests/bpf/json_writer.c +@@ -0,0 +1 @@ ++../../../bpf/bpftool/json_writer.c +\ No newline at end of file +diff --git a/tools/testing/selftests/bpf/json_writer.h b/tools/testing/selftests/bpf/json_writer.h +new file mode 120000 +index 000000000000..e0a264c26752 +--- /dev/null ++++ b/tools/testing/selftests/bpf/json_writer.h +@@ -0,0 +1 @@ ++../../../bpf/bpftool/json_writer.h +\ No newline at end of file +diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c +index 6d5e3022c75f..d903e6a72a96 100644 +--- a/tools/testing/selftests/bpf/test_progs.c ++++ b/tools/testing/selftests/bpf/test_progs.c +@@ -18,6 +18,7 @@ + #include + #include + #include ++#include "json_writer.h" + + static bool verbose(void) + { +@@ -269,10 +270,23 @@ static void print_subtest_name(int test_num, int subtest_num, + fprintf(env.stdout, "\n"); + } + ++static void jsonw_write_log_message(json_writer_t *w, char *log_buf, size_t log_cnt) ++{ ++ /* open_memstream (from stdio_hijack_init) ensures that log_bug is terminated by a ++ * null byte. Yet in parallel mode, log_buf will be NULL if there is no message. ++ */ ++ if (log_cnt) { ++ jsonw_string_field(w, "message", log_buf); ++ } else { ++ jsonw_string_field(w, "message", ""); ++ } ++} ++ + static void dump_test_log(const struct prog_test_def *test, + const struct test_state *test_state, + bool skip_ok_subtests, +- bool par_exec_result) ++ bool par_exec_result, ++ json_writer_t *w) + { + bool test_failed = test_state->error_cnt > 0; + bool force_log = test_state->force_log; +@@ -296,6 +310,16 @@ static void dump_test_log(const struct prog_test_def *test, + if (test_state->log_cnt && print_test) + print_test_log(test_state->log_buf, test_state->log_cnt); + ++ if (w && print_test) { ++ jsonw_start_object(w); ++ jsonw_string_field(w, "name", test->test_name); ++ jsonw_uint_field(w, "number", test->test_num); ++ jsonw_write_log_message(w, test_state->log_buf, test_state->log_cnt); ++ jsonw_bool_field(w, "failed", test_failed); ++ jsonw_name(w, "subtests"); ++ jsonw_start_array(w); ++ } ++ + for (i = 0; i < test_state->subtest_num; i++) { + subtest_state = &test_state->subtest_states[i]; + subtest_failed = subtest_state->error_cnt; +@@ -314,6 +338,20 @@ static void dump_test_log(const struct prog_test_def *test, + test->test_name, subtest_state->name, + test_result(subtest_state->error_cnt, + subtest_state->skipped)); ++ ++ if (w && print_subtest) { ++ jsonw_start_object(w); ++ jsonw_string_field(w, "name", subtest_state->name); ++ jsonw_uint_field(w, "number", i+1); ++ jsonw_write_log_message(w, subtest_state->log_buf, subtest_state->log_cnt); ++ jsonw_bool_field(w, "failed", subtest_failed); ++ jsonw_end_object(w); ++ } ++ } ++ ++ if (w && print_test) { ++ jsonw_end_array(w); ++ jsonw_end_object(w); + } + + print_test_result(test, test_state); +@@ -715,6 +753,7 @@ enum ARG_KEYS { + ARG_TEST_NAME_GLOB_DENYLIST = 'd', + ARG_NUM_WORKERS = 'j', + ARG_DEBUG = -1, ++ ARG_JSON_SUMMARY = 'J' + }; + + static const struct argp_option opts[] = { +@@ -740,6 +779,7 @@ static const struct argp_option opts[] = { + "Number of workers to run in parallel, default to number of cpus." }, + { "debug", ARG_DEBUG, NULL, 0, + "print extra debug information for test_progs." }, ++ { "json-summary", ARG_JSON_SUMMARY, "FILE", 0, "Write report in json format to this file."}, + {}, + }; + +@@ -870,6 +910,13 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) + case ARG_DEBUG: + env->debug = true; + break; ++ case ARG_JSON_SUMMARY: ++ env->json = fopen(arg, "w"); ++ if (env->json == NULL) { ++ perror("Failed to open json summary file"); ++ return -errno; ++ } ++ break; + case ARGP_KEY_ARG: + argp_usage(state); + break; +@@ -1017,7 +1064,7 @@ void crash_handler(int signum) + stdio_restore(); + if (env.test) { + env.test_state->error_cnt++; +- dump_test_log(env.test, env.test_state, true, false); ++ dump_test_log(env.test, env.test_state, true, false, NULL); + } + if (env.worker_id != -1) + fprintf(stderr, "[%d]: ", env.worker_id); +@@ -1124,7 +1171,7 @@ static void run_one_test(int test_num) + + stdio_restore(); + +- dump_test_log(test, state, false, false); ++ dump_test_log(test, state, false, false, NULL); + } + + struct dispatch_data { +@@ -1283,7 +1330,7 @@ static void *dispatch_thread(void *ctx) + } while (false); + + pthread_mutex_lock(&stdout_output_lock); +- dump_test_log(test, state, false, true); ++ dump_test_log(test, state, false, true, NULL); + pthread_mutex_unlock(&stdout_output_lock); + } /* while (true) */ + error: +@@ -1308,6 +1355,7 @@ static void calculate_summary_and_print_errors(struct test_env *env) + { + int i; + int succ_cnt = 0, fail_cnt = 0, sub_succ_cnt = 0, skip_cnt = 0; ++ json_writer_t *w = NULL; + + for (i = 0; i < prog_test_cnt; i++) { + struct test_state *state = &test_states[i]; +@@ -1324,6 +1372,22 @@ static void calculate_summary_and_print_errors(struct test_env *env) + succ_cnt++; + } + ++ if (env->json) { ++ w = jsonw_new(env->json); ++ if (!w) ++ fprintf(env->stderr, "Failed to create new JSON stream."); ++ } ++ ++ if (w) { ++ jsonw_start_object(w); ++ jsonw_uint_field(w, "success", succ_cnt); ++ jsonw_uint_field(w, "success_subtest", sub_succ_cnt); ++ jsonw_uint_field(w, "skipped", skip_cnt); ++ jsonw_uint_field(w, "failed", fail_cnt); ++ jsonw_name(w, "results"); ++ jsonw_start_array(w); ++ } ++ + /* + * We only print error logs summary when there are failed tests and + * verbose mode is not enabled. Otherwise, results may be incosistent. +@@ -1340,10 +1404,19 @@ static void calculate_summary_and_print_errors(struct test_env *env) + if (!state->tested || !state->error_cnt) + continue; + +- dump_test_log(test, state, true, true); ++ dump_test_log(test, state, true, true, w); + } + } + ++ if (w) { ++ jsonw_end_array(w); ++ jsonw_end_object(w); ++ jsonw_destroy(&w); ++ } ++ ++ if (env->json) ++ fclose(env->json); ++ + printf("Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n", + succ_cnt, sub_succ_cnt, skip_cnt, fail_cnt); + +diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h +index 3cbf005747ed..4b06b8347cd4 100644 +--- a/tools/testing/selftests/bpf/test_progs.h ++++ b/tools/testing/selftests/bpf/test_progs.h +@@ -114,6 +114,7 @@ struct test_env { + FILE *stdout; + FILE *stderr; + int nr_cpus; ++ FILE *json; + + int succ_cnt; /* successful tests */ + int sub_succ_cnt; /* successful sub-tests */ +-- +2.39.2 + diff --git a/ci/diffs/0001-selftests-bpf-Adjust-expected-error-message-for-test.patch b/ci/diffs/0001-selftests-bpf-Adjust-expected-error-message-for-test.patch new file mode 100644 index 0000000000000..11d5233552b07 --- /dev/null +++ b/ci/diffs/0001-selftests-bpf-Adjust-expected-error-message-for-test.patch @@ -0,0 +1,43 @@ +From fa95252a62bc120fb1f939c46991280ba1375196 Mon Sep 17 00:00:00 2001 +From: Song Liu +Date: Thu, 2 Mar 2023 13:49:44 -0800 +Subject: [PATCH] selftests/bpf: Adjust expected error message for + test_global_func10.c + +For test programs that are expected to be failed verifier, we use +__failure __msg(...) to specify the expected error message. However, the +error message may change slightly among different versions of llvm. For +example, in [1], the program compiled by llvm-17 gets + + "invalid indirect access to stack ..." + +but the same program compile by llvm-16 gets + + "invalid indirect read from stack ..." + +To avoid such issues, only compares "invalid indirect" part of the error +message for test_global_func10.c. + +[1] https://github.com/kernel-patches/bpf/actions/runs/4288572350/jobs/7533052993 + +Signed-off-by: Song Liu +--- + tools/testing/selftests/bpf/progs/test_global_func10.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/tools/testing/selftests/bpf/progs/test_global_func10.c b/tools/testing/selftests/bpf/progs/test_global_func10.c +index 98327bdbbfd2..7a591d946027 100644 +--- a/tools/testing/selftests/bpf/progs/test_global_func10.c ++++ b/tools/testing/selftests/bpf/progs/test_global_func10.c +@@ -22,7 +22,7 @@ __noinline int foo(const struct Big *big) + } + + SEC("cgroup_skb/ingress") +-__failure __msg("invalid indirect read from stack") ++__failure __msg("invalid indirect") + int global_func10(struct __sk_buff *skb) + { + const struct Small small = {.x = skb->len }; +-- +2.30.2 + diff --git a/ci/diffs/0001-selftests-bpf-Fix-compilation-errors-Assign-a-value-.patch b/ci/diffs/0001-selftests-bpf-Fix-compilation-errors-Assign-a-value-.patch new file mode 100644 index 0000000000000..14a62c2d5d6c8 --- /dev/null +++ b/ci/diffs/0001-selftests-bpf-Fix-compilation-errors-Assign-a-value-.patch @@ -0,0 +1,50 @@ +From 11e456cae91e9044cb12c2b037b52c9b268925f7 Mon Sep 17 00:00:00 2001 +From: Rong Tao +Date: Fri, 24 Feb 2023 23:10:02 +0800 +Subject: [PATCH bpf] selftests/bpf: Fix compilation errors: Assign a value to + a constant + +Commit bc292ab00f6c("mm: introduce vma->vm_flags wrapper functions") +turns the vm_flags into a const variable. + +Added bpf_find_vma test in commit f108662b27c9("selftests/bpf: Add tests +for bpf_find_vma") to assign values to variables that declare const in +find_vma_fail1.c programs, which is an error to the compiler and does not +test BPF verifiers. It is better to replace 'const vm_flags_t vm_flags' +with 'unsigned long vm_start' for testing. + + $ make -C tools/testing/selftests/bpf/ -j8 + ... + progs/find_vma_fail1.c:16:16: error: cannot assign to non-static data + member 'vm_flags' with const-qualified type 'const vm_flags_t' (aka + 'const unsigned long') + vma->vm_flags |= 0x55; + ~~~~~~~~~~~~~ ^ + ../tools/testing/selftests/bpf/tools/include/vmlinux.h:1898:20: + note: non-static data member 'vm_flags' declared const here + const vm_flags_t vm_flags; + ~~~~~~~~~~~`~~~~~~^~~~~~~~ + +Signed-off-by: Rong Tao +Signed-off-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/tencent_CB281722B3C1BD504C16CDE586CACC2BE706@qq.com +--- + tools/testing/selftests/bpf/progs/find_vma_fail1.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/tools/testing/selftests/bpf/progs/find_vma_fail1.c b/tools/testing/selftests/bpf/progs/find_vma_fail1.c +index b3b326b8e2d1..47d5dedff554 100644 +--- a/tools/testing/selftests/bpf/progs/find_vma_fail1.c ++++ b/tools/testing/selftests/bpf/progs/find_vma_fail1.c +@@ -13,7 +13,7 @@ static long write_vma(struct task_struct *task, struct vm_area_struct *vma, + struct callback_ctx *data) + { + /* writing to vma, which is illegal */ +- vma->vm_flags |= 0x55; ++ vma->vm_start = 0xffffffffff600000; + + return 0; + } +-- +2.39.0 + diff --git a/ci/diffs/0001-selftests-bpf-Fix-decap_sanity_ns-cleanup.patch b/ci/diffs/0001-selftests-bpf-Fix-decap_sanity_ns-cleanup.patch new file mode 100644 index 0000000000000..41fd6e38e8678 --- /dev/null +++ b/ci/diffs/0001-selftests-bpf-Fix-decap_sanity_ns-cleanup.patch @@ -0,0 +1,36 @@ +From: Ilya Leoshkevich +Subject: [PATCH bpf-next 07/24] selftests/bpf: Fix decap_sanity_ns cleanup +Date: Wed, 25 Jan 2023 22:38:00 +0100 + +decap_sanity prints the following on the 1st run: + + decap_sanity: sh: 1: Syntax error: Bad fd number + +and the following on the 2nd run: + + Cannot create namespace file "/run/netns/decap_sanity_ns": File exists + +The problem is that the cleanup command has a typo and does nothing. +Fix the typo. + +Signed-off-by: Ilya Leoshkevich +--- + tools/testing/selftests/bpf/prog_tests/decap_sanity.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/tools/testing/selftests/bpf/prog_tests/decap_sanity.c b/tools/testing/selftests/bpf/prog_tests/decap_sanity.c +index 0b2f73b88c53..2853883b7cbb 100644 +--- a/tools/testing/selftests/bpf/prog_tests/decap_sanity.c ++++ b/tools/testing/selftests/bpf/prog_tests/decap_sanity.c +@@ -80,6 +80,6 @@ void test_decap_sanity(void) + bpf_tc_hook_destroy(&qdisc_hook); + close_netns(nstoken); + } +- system("ip netns del " NS_TEST " >& /dev/null"); ++ system("ip netns del " NS_TEST " &> /dev/null"); + decap_sanity__destroy(skel); + } +-- +2.39.1 + + diff --git a/ci/diffs/0001-selftests-bpf-Initial-DENYLIST-for-aarch64.patch b/ci/diffs/0001-selftests-bpf-Initial-DENYLIST-for-aarch64.patch new file mode 100644 index 0000000000000..7d3a35de2a636 --- /dev/null +++ b/ci/diffs/0001-selftests-bpf-Initial-DENYLIST-for-aarch64.patch @@ -0,0 +1,118 @@ +From 94d52a19180726ee8ddc70bea75d6605e1dd6029 Mon Sep 17 00:00:00 2001 +From: Manu Bretelle +Date: Fri, 21 Oct 2022 14:07:01 -0700 +Subject: [PATCH] selftests/bpf: Initial DENYLIST for aarch64 + +Those tests are currently failing on aarch64, ignore them until they are +individually addressed. + +Using this deny list, vmtest.sh ran successfully using + +LLVM_STRIP=llvm-strip-16 CLANG=clang-16 \ + tools/testing/selftests/bpf/vmtest.sh -- \ + ./test_progs -d \ + \"$(cat tools/testing/selftests/bpf/DENYLIST{,.aarch64} \ + | cut -d'#' -f1 \ + | sed -e 's/^[[:space:]]*//' \ + -e 's/[[:space:]]*$//' \ + | tr -s '\n' ','\ + )\" + +Signed-off-by: Manu Bretelle +Signed-off-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20221021210701.728135-5-chantr4@gmail.com +--- + tools/testing/selftests/bpf/DENYLIST.aarch64 | 81 ++++++++++++++++++++ + 1 file changed, 81 insertions(+) + create mode 100644 tools/testing/selftests/bpf/DENYLIST.aarch64 + +diff --git a/tools/testing/selftests/bpf/DENYLIST.aarch64 b/tools/testing/selftests/bpf/DENYLIST.aarch64 +new file mode 100644 +index 000000000000..09416d5d2e33 +--- /dev/null ++++ b/tools/testing/selftests/bpf/DENYLIST.aarch64 +@@ -0,0 +1,81 @@ ++bloom_filter_map # libbpf: prog 'check_bloom': failed to attach: ERROR: strerror_r(-524)=22 ++bpf_cookie/lsm ++bpf_cookie/multi_kprobe_attach_api ++bpf_cookie/multi_kprobe_link_api ++bpf_cookie/trampoline ++bpf_loop/check_callback_fn_stop # link unexpected error: -524 ++bpf_loop/check_invalid_flags ++bpf_loop/check_nested_calls ++bpf_loop/check_non_constant_callback ++bpf_loop/check_nr_loops ++bpf_loop/check_null_callback_ctx ++bpf_loop/check_stack ++bpf_mod_race # bpf_mod_kfunc_race__attach unexpected error: -524 (errno 524) ++bpf_tcp_ca/dctcp_fallback ++btf_dump/btf_dump: var_data # find type id unexpected find type id: actual -2 < expected 0 ++cgroup_hierarchical_stats # attach unexpected error: -524 (errno 524) ++d_path/basic # setup attach failed: -524 ++deny_namespace # attach unexpected error: -524 (errno 524) ++fentry_fexit # fentry_attach unexpected error: -1 (errno 524) ++fentry_test # fentry_attach unexpected error: -1 (errno 524) ++fexit_sleep # fexit_attach fexit attach failed: -1 ++fexit_stress # fexit attach unexpected fexit attach: actual -524 < expected 0 ++fexit_test # fexit_attach unexpected error: -1 (errno 524) ++get_func_args_test # get_func_args_test__attach unexpected error: -524 (errno 524) (trampoline) ++get_func_ip_test # get_func_ip_test__attach unexpected error: -524 (errno 524) (trampoline) ++htab_update/reenter_update ++kfree_skb # attach fentry unexpected error: -524 (trampoline) ++kfunc_call/subprog # extern (var ksym) 'bpf_prog_active': not found in kernel BTF ++kfunc_call/subprog_lskel # skel unexpected error: -2 ++kfunc_dynptr_param/dynptr_data_null # libbpf: prog 'dynptr_data_null': failed to attach: ERROR: strerror_r(-524)=22 ++kprobe_multi_test/attach_api_addrs # bpf_program__attach_kprobe_multi_opts unexpected error: -95 ++kprobe_multi_test/attach_api_pattern # bpf_program__attach_kprobe_multi_opts unexpected error: -95 ++kprobe_multi_test/attach_api_syms # bpf_program__attach_kprobe_multi_opts unexpected error: -95 ++kprobe_multi_test/bench_attach # bpf_program__attach_kprobe_multi_opts unexpected error: -95 ++kprobe_multi_test/link_api_addrs # link_fd unexpected link_fd: actual -95 < expected 0 ++kprobe_multi_test/link_api_syms # link_fd unexpected link_fd: actual -95 < expected 0 ++kprobe_multi_test/skel_api # kprobe_multi__attach unexpected error: -524 (errno 524) ++ksyms_module/libbpf # 'bpf_testmod_ksym_percpu': not found in kernel BTF ++ksyms_module/lskel # test_ksyms_module_lskel__open_and_load unexpected error: -2 ++libbpf_get_fd_by_id_opts # test_libbpf_get_fd_by_id_opts__attach unexpected error: -524 (errno 524) ++lookup_key # test_lookup_key__attach unexpected error: -524 (errno 524) ++lru_bug # lru_bug__attach unexpected error: -524 (errno 524) ++modify_return # modify_return__attach failed unexpected error: -524 (errno 524) ++module_attach # skel_attach skeleton attach failed: -524 ++mptcp/base # run_test mptcp unexpected error: -524 (errno 524) ++netcnt # packets unexpected packets: actual 10001 != expected 10000 ++recursion # skel_attach unexpected error: -524 (errno 524) ++ringbuf # skel_attach skeleton attachment failed: -1 ++setget_sockopt # attach_cgroup unexpected error: -524 ++sk_storage_tracing # test_sk_storage_tracing__attach unexpected error: -524 (errno 524) ++skc_to_unix_sock # could not attach BPF object unexpected error: -524 (errno 524) ++socket_cookie # prog_attach unexpected error: -524 ++stacktrace_build_id # compare_stack_ips stackmap vs. stack_amap err -1 errno 2 ++task_local_storage/exit_creds # skel_attach unexpected error: -524 (errno 524) ++task_local_storage/recursion # skel_attach unexpected error: -524 (errno 524) ++test_bprm_opts # attach attach failed: -524 ++test_ima # attach attach failed: -524 ++test_local_storage # attach lsm attach failed: -524 ++test_lsm # test_lsm_first_attach unexpected error: -524 (errno 524) ++test_overhead # attach_fentry unexpected error: -524 ++timer # timer unexpected error: -524 (errno 524) ++timer_crash # timer_crash__attach unexpected error: -524 (errno 524) ++timer_mim # timer_mim unexpected error: -524 (errno 524) ++trace_printk # trace_printk__attach unexpected error: -1 (errno 524) ++trace_vprintk # trace_vprintk__attach unexpected error: -1 (errno 524) ++tracing_struct # tracing_struct__attach unexpected error: -524 (errno 524) ++trampoline_count # attach_prog unexpected error: -524 ++unpriv_bpf_disabled # skel_attach unexpected error: -524 (errno 524) ++user_ringbuf/test_user_ringbuf_post_misaligned # misaligned_skel unexpected error: -524 (errno 524) ++user_ringbuf/test_user_ringbuf_post_producer_wrong_offset ++user_ringbuf/test_user_ringbuf_post_larger_than_ringbuf_sz ++user_ringbuf/test_user_ringbuf_basic # ringbuf_basic_skel unexpected error: -524 (errno 524) ++user_ringbuf/test_user_ringbuf_sample_full_ring_buffer ++user_ringbuf/test_user_ringbuf_post_alignment_autoadjust ++user_ringbuf/test_user_ringbuf_overfill ++user_ringbuf/test_user_ringbuf_discards_properly_ignored ++user_ringbuf/test_user_ringbuf_loop ++user_ringbuf/test_user_ringbuf_msg_protocol ++user_ringbuf/test_user_ringbuf_blocking_reserve ++verify_pkcs7_sig # test_verify_pkcs7_sig__attach unexpected error: -524 (errno 524) ++vmlinux # skel_attach skeleton attach failed: -524 +-- +2.30.2 + diff --git a/ci/diffs/0001-selftests-bpf-Panic-on-hard-soft-lockup.patch b/ci/diffs/0001-selftests-bpf-Panic-on-hard-soft-lockup.patch new file mode 100644 index 0000000000000..08f2352bc1992 --- /dev/null +++ b/ci/diffs/0001-selftests-bpf-Panic-on-hard-soft-lockup.patch @@ -0,0 +1,57 @@ +From 5ed88f81511ce695692f0510ab3ca17eee68eff6 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Daniel=20M=C3=BCller?= +Date: Tue, 25 Oct 2022 23:15:46 +0000 +Subject: [PATCH] selftests/bpf: Panic on hard/soft lockup +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +When running tests, we should probably accept any help we can get when +it comes to detecting issues early or making them more debuggable. We +have seen a few cases where a test_progs_noalu32 run, for example, +encountered a soft lockup and stopped making progress. It was only +interrupted once we hit the overall test timeout [0]. We can not and do +not want to necessarily rely on test timeouts, because those rely on +infrastructure provided by the environment we run in (and which is not +present in tools/testing/selftests/bpf/vmtest.sh, for example). +To that end, let's enable panics on soft as well as hard lockups to fail +fast should we encounter one. That's happening in the configuration +indented to be used for selftests (including when using vmtest.sh or +when running in BPF CI). + +[0] https://github.com/kernel-patches/bpf/runs/7844499997 + +Signed-off-by: Daniel Müller +Link: https://lore.kernel.org/r/20221025231546.811766-1-deso@posteo.net +Signed-off-by: Alexei Starovoitov +--- + tools/testing/selftests/bpf/config | 2 ++ + tools/testing/selftests/bpf/config.x86_64 | 1 - + 2 files changed, 2 insertions(+), 1 deletion(-) + +diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config +index 921356..7a99a6 100644 +--- a/tools/testing/selftests/bpf/config ++++ b/tools/testing/selftests/bpf/config +@@ -1,4 +1,6 @@ + CONFIG_BLK_DEV_LOOP=y ++CONFIG_BOOTPARAM_HARDLOCKUP_PANIC=y ++CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y + CONFIG_BPF=y + CONFIG_BPF_EVENTS=y + CONFIG_BPF_JIT=y +diff --git a/tools/testing/selftests/bpf/config.x86_64 b/tools/testing/selftests/bpf/config.x86_64 +index 21ce5e..dd97d6 100644 +--- a/tools/testing/selftests/bpf/config.x86_64 ++++ b/tools/testing/selftests/bpf/config.x86_64 +@@ -18,7 +18,6 @@ CONFIG_BLK_DEV_RAM=y + CONFIG_BLK_DEV_RAM_SIZE=16384 + CONFIG_BLK_DEV_THROTTLING=y + CONFIG_BONDING=y +-CONFIG_BOOTPARAM_HARDLOCKUP_PANIC=y + CONFIG_BOOTTIME_TRACING=y + CONFIG_BPF_JIT_ALWAYS_ON=y + CONFIG_BPF_KPROBE_OVERRIDE=y +-- +2.30.2 + diff --git a/ci/diffs/0001-selftests-bpf-S-iptables-iptables-legacy-in-the-bpf_.patch b/ci/diffs/0001-selftests-bpf-S-iptables-iptables-legacy-in-the-bpf_.patch new file mode 100644 index 0000000000000..e1e5f01a59930 --- /dev/null +++ b/ci/diffs/0001-selftests-bpf-S-iptables-iptables-legacy-in-the-bpf_.patch @@ -0,0 +1,77 @@ +From de9c8d848d90cf2e53aced50b350827442ca5a4f Mon Sep 17 00:00:00 2001 +From: Martin KaFai Lau +Date: Wed, 12 Oct 2022 15:12:35 -0700 +Subject: [PATCH] selftests/bpf: S/iptables/iptables-legacy/ in the bpf_nf and + xdp_synproxy test + +The recent vm image in CI has reported error in selftests that use +the iptables command. Manu Bretelle has pointed out the difference +in the recent vm image that the iptables is sym-linked to the iptables-nft. +With this knowledge, I can also reproduce the CI error by manually running +with the 'iptables-nft'. + +This patch is to replace the iptables command with iptables-legacy +to unblock the CI tests. + +Signed-off-by: Martin KaFai Lau +Signed-off-by: Andrii Nakryiko +Acked-by: David Vernet +Link: https://lore.kernel.org/bpf/20221012221235.3529719-1-martin.lau@linux.dev +--- + tools/testing/selftests/bpf/prog_tests/bpf_nf.c | 6 +++--- + tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c | 6 +++--- + 2 files changed, 6 insertions(+), 6 deletions(-) + +diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c +index 8a838ea8bdf3..c8ba4009e4ab 100644 +--- a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c ++++ b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c +@@ -49,14 +49,14 @@ static int connect_to_server(int srv_fd) + + static void test_bpf_nf_ct(int mode) + { +- const char *iptables = "iptables -t raw %s PREROUTING -j CONNMARK --set-mark 42/0"; ++ const char *iptables = "iptables-legacy -t raw %s PREROUTING -j CONNMARK --set-mark 42/0"; + int srv_fd = -1, client_fd = -1, srv_client_fd = -1; + struct sockaddr_in peer_addr = {}; + struct test_bpf_nf *skel; + int prog_fd, err; + socklen_t len; + u16 srv_port; +- char cmd[64]; ++ char cmd[128]; + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), +@@ -69,7 +69,7 @@ static void test_bpf_nf_ct(int mode) + + /* Enable connection tracking */ + snprintf(cmd, sizeof(cmd), iptables, "-A"); +- if (!ASSERT_OK(system(cmd), "iptables")) ++ if (!ASSERT_OK(system(cmd), cmd)) + goto end; + + srv_port = (mode == TEST_XDP) ? 5005 : 5006; +diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c b/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c +index 75550a40e029..c72083885b6d 100644 +--- a/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c ++++ b/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c +@@ -94,12 +94,12 @@ static void test_synproxy(bool xdp) + SYS("sysctl -w net.ipv4.tcp_syncookies=2"); + SYS("sysctl -w net.ipv4.tcp_timestamps=1"); + SYS("sysctl -w net.netfilter.nf_conntrack_tcp_loose=0"); +- SYS("iptables -t raw -I PREROUTING \ ++ SYS("iptables-legacy -t raw -I PREROUTING \ + -i tmp1 -p tcp -m tcp --syn --dport 8080 -j CT --notrack"); +- SYS("iptables -t filter -A INPUT \ ++ SYS("iptables-legacy -t filter -A INPUT \ + -i tmp1 -p tcp -m tcp --dport 8080 -m state --state INVALID,UNTRACKED \ + -j SYNPROXY --sack-perm --timestamp --wscale 7 --mss 1460"); +- SYS("iptables -t filter -A INPUT \ ++ SYS("iptables-legacy -t filter -A INPUT \ + -i tmp1 -m state --state INVALID -j DROP"); + + ctrl_file = SYS_OUT("./xdp_synproxy --iface tmp1 --ports 8080 \ +-- +2.30.2 + diff --git a/ci/diffs/0001-selftests-bpf-Select-CONFIG_FUNCTION_ERROR_INJECTION.patch b/ci/diffs/0001-selftests-bpf-Select-CONFIG_FUNCTION_ERROR_INJECTION.patch new file mode 100644 index 0000000000000..b4fc1bb37dbdc --- /dev/null +++ b/ci/diffs/0001-selftests-bpf-Select-CONFIG_FUNCTION_ERROR_INJECTION.patch @@ -0,0 +1,45 @@ +From e561fc8365da0215f68cfcffb6c309d1d7eb8c2b Mon Sep 17 00:00:00 2001 +From: Song Liu +Date: Tue, 13 Dec 2022 14:05:00 -0800 +Subject: [PATCH bpf-next] selftests/bpf: Select + CONFIG_FUNCTION_ERROR_INJECTION +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +BPF selftests require CONFIG_FUNCTION_ERROR_INJECTION to work. However, +CONFIG_FUNCTION_ERROR_INJECTION is no longer 'y' by default after recent +changes. As a result, we are seeing errors like the following from BPF CI: + + bpf_testmod_test_read() is not modifiable + __x64_sys_setdomainname is not sleepable + __x64_sys_getpgid is not sleepable + +Fix this by explicitly selecting CONFIG_FUNCTION_ERROR_INJECTION in the +selftest config. + +Fixes: a4412fdd49dc ("error-injection: Add prompt for function error injection") +Reported-by: Daniel Müller +Signed-off-by: Song Liu +Signed-off-by: Andrii Nakryiko +Acked-by: Daniel Müller +Link: https://lore.kernel.org/bpf/20221213220500.3427947-1-song@kernel.org +--- + tools/testing/selftests/bpf/config | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config +index 612f699dc4f7..63cd4ab70171 100644 +--- a/tools/testing/selftests/bpf/config ++++ b/tools/testing/selftests/bpf/config +@@ -16,6 +16,7 @@ CONFIG_CRYPTO_USER_API_HASH=y + CONFIG_DYNAMIC_FTRACE=y + CONFIG_FPROBE=y + CONFIG_FTRACE_SYSCALLS=y ++CONFIG_FUNCTION_ERROR_INJECTION=y + CONFIG_FUNCTION_TRACER=y + CONFIG_GENEVE=y + CONFIG_IKCONFIG=y +-- +2.30.2 + diff --git a/ci/diffs/0001-tracing-fprobe-Initialize-ret-valiable-to-fix-smatch.patch b/ci/diffs/0001-tracing-fprobe-Initialize-ret-valiable-to-fix-smatch.patch new file mode 100644 index 0000000000000..9547c62c40bf9 --- /dev/null +++ b/ci/diffs/0001-tracing-fprobe-Initialize-ret-valiable-to-fix-smatch.patch @@ -0,0 +1,68 @@ +From d3484f640bc82cff459beb85a00f7ebab20f0a41 Mon Sep 17 00:00:00 2001 +From: "Masami Hiramatsu (Google)" +Date: Sun, 9 Apr 2023 11:28:31 +0900 +Subject: [PATCH] tracing: fprobe: Initialize ret valiable to fix smatch error + +The commit 39d954200bf6 ("fprobe: Skip exit_handler if entry_handler returns +!0") introduced a hidden dependency of 'ret' local variable in the +fprobe_handler(), Smatch warns the `ret` can be accessed without +initialization. + + kernel/trace/fprobe.c:59 fprobe_handler() + error: uninitialized symbol 'ret'. + +kernel/trace/fprobe.c + 49 fpr->entry_ip = ip; + 50 if (fp->entry_data_size) + 51 entry_data = fpr->data; + 52 } + 53 + 54 if (fp->entry_handler) + 55 ret = fp->entry_handler(fp, ip, ftrace_get_regs(fregs), entry_data); + +ret is only initialized if there is an ->entry_handler + + 56 + 57 /* If entry_handler returns !0, nmissed is not counted. */ + 58 if (rh) { + +rh is only true if there is an ->exit_handler. Presumably if you have +and ->exit_handler that means you also have a ->entry_handler but Smatch +is not smart enough to figure it out. + +--> 59 if (ret) + ^^^ +Warning here. + + 60 rethook_recycle(rh); + 61 else + 62 rethook_hook(rh, ftrace_get_regs(fregs), true); + 63 } + 64 out: + 65 ftrace_test_recursion_unlock(bit); + 66 } + +Reported-by: Dan Carpenter +Link: https://lore.kernel.org/all/85429a5c-a4b9-499e-b6c0-cbd313291c49@kili.mountain +Fixes: 39d954200bf6 ("fprobe: Skip exit_handler if entry_handler returns !0") +Signed-off-by: Masami Hiramatsu (Google) +--- + kernel/trace/fprobe.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c +index 9abb3905bc8e..293184227394 100644 +--- a/kernel/trace/fprobe.c ++++ b/kernel/trace/fprobe.c +@@ -27,7 +27,7 @@ static void fprobe_handler(unsigned long ip, unsigned long parent_ip, + struct rethook_node *rh = NULL; + struct fprobe *fp; + void *entry_data = NULL; +- int bit, ret; ++ int bit, ret = 0; + + fp = container_of(ops, struct fprobe, ops); + if (fprobe_disabled(fp)) +-- +2.34.1 + diff --git a/ci/diffs/0001-veth-take-into-account-peer-device-for-NETDEV_XDP_AC.patch b/ci/diffs/0001-veth-take-into-account-peer-device-for-NETDEV_XDP_AC.patch new file mode 100644 index 0000000000000..b97dba0accaee --- /dev/null +++ b/ci/diffs/0001-veth-take-into-account-peer-device-for-NETDEV_XDP_AC.patch @@ -0,0 +1,83 @@ +From 8267fc71abb2dc47338570e56dd3473a58313fce Mon Sep 17 00:00:00 2001 +From: Lorenzo Bianconi +Date: Mon, 17 Apr 2023 23:53:22 +0200 +Subject: [PATCH] veth: take into account peer device for + NETDEV_XDP_ACT_NDO_XMIT xdp_features flag + +For veth pairs, NETDEV_XDP_ACT_NDO_XMIT is supported by the current +device if the peer one is running a XDP program or if it has GRO enabled. +Fix the xdp_features flags reporting considering peer device and not +current one for NETDEV_XDP_ACT_NDO_XMIT. + +Fixes: fccca038f300 ("veth: take into account device reconfiguration for xdp_features flag") +Signed-off-by: Lorenzo Bianconi +Link: https://lore.kernel.org/r/4f1ca6f6f6b42ae125bfdb5c7782217c83968b2e.1681767806.git.lorenzo@kernel.org +Signed-off-by: Alexei Starovoitov +--- + drivers/net/veth.c | 17 +++++++++++------ + 1 file changed, 11 insertions(+), 6 deletions(-) + +diff --git a/drivers/net/veth.c b/drivers/net/veth.c +index e1b38fbf1dd9..4b3c6647edc6 100644 +--- a/drivers/net/veth.c ++++ b/drivers/net/veth.c +@@ -1262,11 +1262,12 @@ static void veth_set_xdp_features(struct net_device *dev) + + peer = rtnl_dereference(priv->peer); + if (peer && peer->real_num_tx_queues <= dev->real_num_rx_queues) { ++ struct veth_priv *priv_peer = netdev_priv(peer); + xdp_features_t val = NETDEV_XDP_ACT_BASIC | + NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_RX_SG; + +- if (priv->_xdp_prog || veth_gro_requested(dev)) ++ if (priv_peer->_xdp_prog || veth_gro_requested(peer)) + val |= NETDEV_XDP_ACT_NDO_XMIT | + NETDEV_XDP_ACT_NDO_XMIT_SG; + xdp_set_features_flag(dev, val); +@@ -1504,19 +1505,23 @@ static int veth_set_features(struct net_device *dev, + { + netdev_features_t changed = features ^ dev->features; + struct veth_priv *priv = netdev_priv(dev); ++ struct net_device *peer; + int err; + + if (!(changed & NETIF_F_GRO) || !(dev->flags & IFF_UP) || priv->_xdp_prog) + return 0; + ++ peer = rtnl_dereference(priv->peer); + if (features & NETIF_F_GRO) { + err = veth_napi_enable(dev); + if (err) + return err; + +- xdp_features_set_redirect_target(dev, true); ++ if (peer) ++ xdp_features_set_redirect_target(peer, true); + } else { +- xdp_features_clear_redirect_target(dev); ++ if (peer) ++ xdp_features_clear_redirect_target(peer); + veth_napi_del(dev); + } + return 0; +@@ -1598,13 +1603,13 @@ static int veth_xdp_set(struct net_device *dev, struct bpf_prog *prog, + peer->max_mtu = max_mtu; + } + +- xdp_features_set_redirect_target(dev, true); ++ xdp_features_set_redirect_target(peer, true); + } + + if (old_prog) { + if (!prog) { +- if (!veth_gro_requested(dev)) +- xdp_features_clear_redirect_target(dev); ++ if (peer && !veth_gro_requested(dev)) ++ xdp_features_clear_redirect_target(peer); + + if (dev->flags & IFF_UP) + veth_disable_xdp(dev); +-- +2.34.1 + diff --git a/ci/diffs/0001-x86-vdso-Conditionally-export-__vdso_sgx_enter_enclave.patch b/ci/diffs/0001-x86-vdso-Conditionally-export-__vdso_sgx_enter_enclave.patch new file mode 100644 index 0000000000000..c5f90daa56d3b --- /dev/null +++ b/ci/diffs/0001-x86-vdso-Conditionally-export-__vdso_sgx_enter_enclave.patch @@ -0,0 +1,44 @@ +Recently, ld.lld moved from '--undefined-version' to +'--no-undefined-version' as the default, which breaks building the vDSO +when CONFIG_X86_SGX is not set: + + ld.lld: error: version script assignment of 'LINUX_2.6' to symbol '__vdso_sgx_enter_enclave' failed: symbol not defined + +__vdso_sgx_enter_enclave is only included in the vDSO when +CONFIG_X86_SGX is set. Only export it if it will be present in the final +object, which clears up the error. + +Link: https://github.com/ClangBuiltLinux/linux/issues/1756 +Signed-off-by: Nathan Chancellor +--- + +It would be nice if this could be picked up for an -rc release but I +won't argue otherwise. + +Alternatively, we could add '--undefined-version' to the vDSO ldflags +but this does not seem unreasonable to me. + + arch/x86/entry/vdso/vdso.lds.S | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/arch/x86/entry/vdso/vdso.lds.S b/arch/x86/entry/vdso/vdso.lds.S +index 4bf48462fca7..e8c60ae7a7c8 100644 +--- a/arch/x86/entry/vdso/vdso.lds.S ++++ b/arch/x86/entry/vdso/vdso.lds.S +@@ -27,7 +27,9 @@ VERSION { + __vdso_time; + clock_getres; + __vdso_clock_getres; ++#ifdef CONFIG_X86_SGX + __vdso_sgx_enter_enclave; ++#endif + local: *; + }; + } + +base-commit: f0c4d9fc9cc9462659728d168387191387e903cc + +-- +2.38.1 + + diff --git a/ci/diffs/0002-selftests-bpf-Set-CONFIG_BOOTPARAM_HUNG_TASK_PANIC.patch b/ci/diffs/0002-selftests-bpf-Set-CONFIG_BOOTPARAM_HUNG_TASK_PANIC.patch new file mode 100644 index 0000000000000..2db04e0b9670c --- /dev/null +++ b/ci/diffs/0002-selftests-bpf-Set-CONFIG_BOOTPARAM_HUNG_TASK_PANIC.patch @@ -0,0 +1,39 @@ +From 91c614a38376374ff39c4cc678c2c5cd22cbf8fc Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Daniel=20M=C3=BCller?= +Date: Wed, 26 Oct 2022 13:52:28 -0700 +Subject: [PATCH] selftests/bpf: Set CONFIG_BOOTPARAM_HUNG_TASK_PANIC +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +With commit 5ed88f81511ce ("selftests/bpf: Panic on hard/soft lockup") +we enabled the means to panic test runs quickly when they are stuck +because of a hard or soft lockup. What we did not include is the means +to do the same when a hung task is detected. The reasoning there was +that virtualization effects may lead to delays and, hence, spurious +failures. +However, we see the occasional CI timeout when running the test_progs +selftest with internal parallelism enabled (-j) that is not caused by a +hard or soft lockup but due to a hung task. Hence, it makes sense to +enable this detection as well. But let's give it some mileage first +before upstreaming, though, and only include it in BPF CI. + +Signed-off-by: Daniel Müller +--- + tools/testing/selftests/bpf/config | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config +index 7a99a6..6c6821a 100644 +--- a/tools/testing/selftests/bpf/config ++++ b/tools/testing/selftests/bpf/config +@@ -1,5 +1,6 @@ + CONFIG_BLK_DEV_LOOP=y + CONFIG_BOOTPARAM_HARDLOCKUP_PANIC=y ++CONFIG_BOOTPARAM_HUNG_TASK_PANIC=y + CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y + CONFIG_BPF=y + CONFIG_BPF_EVENTS=y +-- +2.30.2 + diff --git a/ci/diffs/0002-tools-headers-uapi-pull-in-stddef.h-to-fix-BPF-selft.patch b/ci/diffs/0002-tools-headers-uapi-pull-in-stddef.h-to-fix-BPF-selft.patch new file mode 100644 index 0000000000000..9070b76442dda --- /dev/null +++ b/ci/diffs/0002-tools-headers-uapi-pull-in-stddef.h-to-fix-BPF-selft.patch @@ -0,0 +1,104 @@ +From 038fafe1d1c92b8488e5e71ebea819050219dd6f Mon Sep 17 00:00:00 2001 +From: Andrii Nakryiko +Date: Wed, 2 Nov 2022 11:04:17 -0700 +Subject: [PATCH 2/2] tools headers uapi: pull in stddef.h to fix BPF selftests + build in CI + +With recent sync of linux/in.h tools/include headers are now relying on +__DECLARE_FLEX_ARRAY macro, which isn't itself defined inside +tools/include headers anywhere and is instead assumed to be present in +system-wide UAPI header. This breaks isolated environments that don't +have kernel UAPI headers installed system-wide, like BPF CI ([0]). + +To fix this, bring in include/uapi/linux/stddef.h into tools/include. We +can't just copy/paste it, though, it has to be processed with +scripts/headers_install.sh, which has a dependency on scripts/unifdef. +So the full command to (re-)generate stddef.h for inclusion into +tools/include directory is: + + $ make scripts_unifdef && \ + cp $KBUILD_OUTPUT/scripts/unifdef scripts/ && \ + scripts/headers_install.sh include/uapi/linux/stddef.h tools/include/uapi/linux/stddef.h + +This assumes KBUILD_OUTPUT envvar is set and used for out-of-tree builds. + + [0] https://github.com/kernel-patches/bpf/actions/runs/3379432493/jobs/5610982609 + +Cc: Jakub Kicinski +Cc: Arnaldo Carvalho de Melo +Fixes: 036b8f5b8970 ("tools headers uapi: Update linux/in.h copy") +Signed-off-by: Andrii Nakryiko +--- + tools/include/uapi/linux/in.h | 1 + + tools/include/uapi/linux/stddef.h | 47 +++++++++++++++++++++++++++++++ + 2 files changed, 48 insertions(+) + create mode 100644 tools/include/uapi/linux/stddef.h + +diff --git a/tools/include/uapi/linux/in.h b/tools/include/uapi/linux/in.h +index f243ce665f74..07a4cb149305 100644 +--- a/tools/include/uapi/linux/in.h ++++ b/tools/include/uapi/linux/in.h +@@ -20,6 +20,7 @@ + #define _UAPI_LINUX_IN_H + + #include ++#include + #include + #include + +diff --git a/tools/include/uapi/linux/stddef.h b/tools/include/uapi/linux/stddef.h +new file mode 100644 +index 000000000000..bb6ea517efb5 +--- /dev/null ++++ b/tools/include/uapi/linux/stddef.h +@@ -0,0 +1,47 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++#ifndef _LINUX_STDDEF_H ++#define _LINUX_STDDEF_H ++ ++ ++ ++#ifndef __always_inline ++#define __always_inline __inline__ ++#endif ++ ++/** ++ * __struct_group() - Create a mirrored named and anonyomous struct ++ * ++ * @TAG: The tag name for the named sub-struct (usually empty) ++ * @NAME: The identifier name of the mirrored sub-struct ++ * @ATTRS: Any struct attributes (usually empty) ++ * @MEMBERS: The member declarations for the mirrored structs ++ * ++ * Used to create an anonymous union of two structs with identical layout ++ * and size: one anonymous and one named. The former's members can be used ++ * normally without sub-struct naming, and the latter can be used to ++ * reason about the start, end, and size of the group of struct members. ++ * The named struct can also be explicitly tagged for layer reuse, as well ++ * as both having struct attributes appended. ++ */ ++#define __struct_group(TAG, NAME, ATTRS, MEMBERS...) \ ++ union { \ ++ struct { MEMBERS } ATTRS; \ ++ struct TAG { MEMBERS } ATTRS NAME; \ ++ } ++ ++/** ++ * __DECLARE_FLEX_ARRAY() - Declare a flexible array usable in a union ++ * ++ * @TYPE: The type of each flexible array element ++ * @NAME: The name of the flexible array member ++ * ++ * In order to have a flexible array member in a union or alone in a ++ * struct, it needs to be wrapped in an anonymous struct with at least 1 ++ * named member, but that member can be empty. ++ */ ++#define __DECLARE_FLEX_ARRAY(TYPE, NAME) \ ++ struct { \ ++ struct { } __empty_ ## NAME; \ ++ TYPE NAME[]; \ ++ } ++#endif +-- +2.30.2 + diff --git a/ci/vmtest/configs/DENYLIST b/ci/vmtest/configs/DENYLIST new file mode 100644 index 0000000000000..e53b4640180e8 --- /dev/null +++ b/ci/vmtest/configs/DENYLIST @@ -0,0 +1,7 @@ +# TEMPORARY +btf_dump/btf_dump: syntax +kprobe_multi_bench_attach +core_reloc/enum64val +core_reloc/size___diff_sz +core_reloc/type_based___diff_sz +test_ima # All of CI is broken on it following 6.3-rc1 merge diff --git a/ci/vmtest/configs/DENYLIST.aarch64 b/ci/vmtest/configs/DENYLIST.aarch64 new file mode 100644 index 0000000000000..487b19ede4b61 --- /dev/null +++ b/ci/vmtest/configs/DENYLIST.aarch64 @@ -0,0 +1,4 @@ +cgrp_local_storage # libbpf: prog 'update_cookie_tracing': failed to attach: ERROR: strerror_r(-524)=22 +core_reloc_btfgen # run_core_reloc_tests:FAIL:run_btfgen unexpected error: 32512 (errno 22) +usdt/multispec # usdt_300_bad_attach unexpected pointer: 0x558c63d8f0 +xdp_bonding # whole test suite is very unstable on aarch64 diff --git a/ci/vmtest/configs/DENYLIST.s390x b/ci/vmtest/configs/DENYLIST.s390x new file mode 100644 index 0000000000000..e6829c94bdaae --- /dev/null +++ b/ci/vmtest/configs/DENYLIST.s390x @@ -0,0 +1,5 @@ +deny_namespace # not yet in bpf denylist +tc_redirect/tc_redirect_dtime # very flaky +lru_bug # not yet in bpf-next denylist +usdt/basic # failing verifier due to bounds check after LLVM update +usdt/multispec # same as above diff --git a/ci/vmtest/configs/DENYLIST.x86_64 b/ci/vmtest/configs/DENYLIST.x86_64 new file mode 100644 index 0000000000000..6fc3413daab9f --- /dev/null +++ b/ci/vmtest/configs/DENYLIST.x86_64 @@ -0,0 +1 @@ +netcnt # with kvm enabled, fail with packets unexpected packets: actual 10001 != expected 10000 diff --git a/ci/vmtest/helpers.sh b/ci/vmtest/helpers.sh new file mode 100755 index 0000000000000..c44d0983156d0 --- /dev/null +++ b/ci/vmtest/helpers.sh @@ -0,0 +1,38 @@ +# shellcheck shell=bash + +# $1 - start or end +# $2 - fold identifier, no spaces +# $3 - fold section description +foldable() { + local YELLOW='\033[1;33m' + local NOCOLOR='\033[0m' + if [ $1 = "start" ]; then + line="::group::$2" + if [ ! -z "${3:-}" ]; then + line="$line - ${YELLOW}$3${NOCOLOR}" + fi + else + line="::endgroup::" + fi + echo -e "$line" +} + +__print() { + local TITLE="" + if [[ -n $2 ]]; then + TITLE=" title=$2" + fi + echo "::$1${TITLE}::$3" +} + +# $1 - title +# $2 - message +print_error() { + __print error $1 $2 +} + +# $1 - title +# $2 - message +print_notice() { + __print notice $1 $2 +} diff --git a/ci/vmtest/run_selftests.sh b/ci/vmtest/run_selftests.sh new file mode 100755 index 0000000000000..1cc1bf834837f --- /dev/null +++ b/ci/vmtest/run_selftests.sh @@ -0,0 +1,168 @@ +#!/bin/bash + +# run_selftest.sh will run the tests within /${PROJECT_NAME}/selftests/bpf +# If no specific test names are given, all test will be ran, otherwise, it will +# run the test passed as parameters. +# There is 2 ways to pass test names. +# 1) command-line arguments to this script +# 2) a comma-separated list of test names passed as `run_tests` boot parameters. +# test names passed as any of those methods will be ran. + +set -euo pipefail + +source "$(cd "$(dirname "$0")" && pwd)/helpers.sh" + +ARCH=$(uname -m) + +STATUS_FILE=/exitstatus +OUTPUT_DIR=/command_output + +declare -a TEST_NAMES=() + +read_lists() { + (for path in "$@"; do + if [[ -s "$path" ]]; then + cat "$path" + fi; + done) | cut -d'#' -f1 | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//' | tr -s '\n' ',' +} + +read_test_names() { + foldable start read_test_names "Reading test names from boot parameters and command line arguments" + # Check if test names were passed as boot parameter. + # We expect `run_tests` to be a comma-separated list of test names. + IFS=',' read -r -a test_names_from_boot <<< \ + "$(sed -n 's/.*run_tests=\([^ ]*\).*/\1/p' /proc/cmdline)" + + echo "${#test_names_from_boot[@]} tests extracted from boot parameters: ${test_names_from_boot[*]}" + # Sort and only keep unique test names from both boot params and arguments + # TEST_NAMES will contain a sorted list of uniq tests to be ran. + # Only do this if any of $test_names_from_boot[@] or $@ has elements as + # "printf '%s\0'" will otherwise generate an empty element. + if [[ ${#test_names_from_boot[@]} -gt 0 || $# -gt 0 ]] + then + readarray -t TEST_NAMES < \ + <(printf '%s\0' "${test_names_from_boot[@]}" "$@" | \ + sort --zero-terminated --unique | \ + xargs --null --max-args=1) + fi + foldable end read_test_names +} + +test_progs_helper() { + local selftest="test_progs${1}" + local args="$2" + + json_file=${selftest/-/_} + if [ "$2" == "-j" ] + then + json_file+="_parallel" + fi + json_file="/${json_file}.json" + + foldable start ${selftest} "Testing ${selftest}" + # "&& true" does not change the return code (it is not executed + # if the Python script fails), but it prevents exiting on a + # failure due to the "set -e". + ./${selftest} ${args} ${DENYLIST:+-d"$DENYLIST"} ${ALLOWLIST:+-a"$ALLOWLIST"} --json-summary "${json_file}" && true + echo "${selftest}:$?" >>"${STATUS_FILE}" + foldable end ${selftest} +} + +test_progs() { + test_progs_helper "" "" +} + +test_progs_parallel() { + test_progs_helper "" "-j" +} + +test_progs_no_alu32() { + test_progs_helper "-no_alu32" "" +} + +test_progs_no_alu32_parallel() { + test_progs_helper "-no_alu32" "-j" +} + +test_maps() { + foldable start test_maps "Testing test_maps" + taskset 0xF ./test_maps && true + echo "test_maps:$?" >>"${STATUS_FILE}" + foldable end test_maps +} + +test_verifier() { + foldable start test_verifier "Testing test_verifier" + ./test_verifier && true + echo "test_verifier:$?" >>"${STATUS_FILE}" + foldable end test_verifier +} + +run_veristat() { + foldable start run_veristat "Running veristat" + + # Make veristat commands visible in the log + if [ -o xtrace ]; then + xtrace_was_on="1" + else + xtrace_was_on="" + set -x + fi + + globs=$(awk '/^#/ { next; } { print $0 ".bpf.o"; }' ./veristat.cfg) + mkdir -p ${OUTPUT_DIR} + ./veristat -o csv -q -e file,prog,verdict,states ${globs} > ${OUTPUT_DIR}/veristat.csv + echo "run_veristat:$?" >> ${STATUS_FILE} + + # Hide commands again + if [ -z "$xtrace_was_on" ]; then + set +x + fi + + foldable end run_veristat +} + +foldable end vm_init + +foldable start kernel_config "Kconfig" + +zcat /proc/config.gz + +foldable end kernel_config + +configs_path=${PROJECT_NAME}/selftests/bpf +local_configs_path=${PROJECT_NAME}/vmtest/configs +DENYLIST=$(read_lists \ + "$configs_path/DENYLIST" \ + "$configs_path/DENYLIST.${ARCH}" \ + "$local_configs_path/DENYLIST" \ + "$local_configs_path/DENYLIST.${ARCH}" \ +) +ALLOWLIST=$(read_lists \ + "$configs_path/ALLOWLIST" \ + "$configs_path/ALLOWLIST.${ARCH}" \ + "$local_configs_path/ALLOWLIST" \ + "$local_configs_path/ALLOWLIST.${ARCH}" \ +) + +echo "DENYLIST: ${DENYLIST}" +echo "ALLOWLIST: ${ALLOWLIST}" + +cd ${PROJECT_NAME}/selftests/bpf + +# populate TEST_NAMES +read_test_names "$@" +# if we don't have any test name provided to the script, we run all tests. +if [ ${#TEST_NAMES[@]} -eq 0 ]; then + test_progs + test_progs_no_alu32 + test_maps + test_verifier +else + # else we run the tests passed as command-line arguments and through boot + # parameter. + for test_name in "${TEST_NAMES[@]}"; do + "${test_name}" + done +fi From c7bf4e6f36de808765a1f7f77b6c9085eb8c3bf2 Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Thu, 9 Nov 2023 18:43:28 +0100 Subject: [PATCH 41/41] selftests: bpf: xskxceiver: ksft_print_msg: fix format type error Crossbuilding selftests/bpf for architecture arm64, format specifies type error show up like. xskxceiver.c:912:34: error: format specifies type 'int' but the argument has type '__u64' (aka 'unsigned long long') [-Werror,-Wformat] ksft_print_msg("[%s] expected meta_count [%d], got meta_count [%d]\n", ~~ %llu __func__, pkt->pkt_nb, meta->count); ^~~~~~~~~~~ xskxceiver.c:929:55: error: format specifies type 'unsigned long long' but the argument has type 'u64' (aka 'unsigned long') [-Werror,-Wformat] ksft_print_msg("Frag invalid addr: %llx len: %u\n", addr, len); ~~~~ ^~~~ Fixing the issues by casting to (unsigned long long) and changing the specifiers to be %llu from %d and %u, since with u64s it might be %llx or %lx, depending on architecture. Signed-off-by: Anders Roxell --- tools/testing/selftests/bpf/xskxceiver.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c index 591ca9637b23e..b604c570309a7 100644 --- a/tools/testing/selftests/bpf/xskxceiver.c +++ b/tools/testing/selftests/bpf/xskxceiver.c @@ -908,8 +908,9 @@ static bool is_metadata_correct(struct pkt *pkt, void *buffer, u64 addr) struct xdp_info *meta = data - sizeof(struct xdp_info); if (meta->count != pkt->pkt_nb) { - ksft_print_msg("[%s] expected meta_count [%d], got meta_count [%d]\n", - __func__, pkt->pkt_nb, meta->count); + ksft_print_msg("[%s] expected meta_count [%d], got meta_count [%llu]\n", + __func__, pkt->pkt_nb, + (unsigned long long)meta->count); return false; } @@ -926,11 +927,13 @@ static bool is_frag_valid(struct xsk_umem_info *umem, u64 addr, u32 len, u32 exp if (addr >= umem->num_frames * umem->frame_size || addr + len > umem->num_frames * umem->frame_size) { - ksft_print_msg("Frag invalid addr: %llx len: %u\n", addr, len); + ksft_print_msg("Frag invalid addr: %llx len: %u\n", + (unsigned long long)addr, len); return false; } if (!umem->unaligned_mode && addr % umem->frame_size + len > umem->frame_size) { - ksft_print_msg("Frag crosses frame boundary addr: %llx len: %u\n", addr, len); + ksft_print_msg("Frag crosses frame boundary addr: %llx len: %u\n", + (unsigned long long)addr, len); return false; } @@ -1029,7 +1032,8 @@ static int complete_pkts(struct xsk_socket_info *xsk, int batch_size) u64 addr = *xsk_ring_cons__comp_addr(&xsk->umem->cq, idx + rcvd - 1); ksft_print_msg("[%s] Too many packets completed\n", __func__); - ksft_print_msg("Last completion address: %llx\n", addr); + ksft_print_msg("Last completion address: %llx\n", + (unsigned long long)addr); return TEST_FAILURE; } @@ -1513,8 +1517,9 @@ static int validate_tx_invalid_descs(struct ifobject *ifobject) } if (stats.tx_invalid_descs != ifobject->xsk->pkt_stream->nb_pkts / 2) { - ksft_print_msg("[%s] tx_invalid_descs incorrect. Got [%u] expected [%u]\n", - __func__, stats.tx_invalid_descs, + ksft_print_msg("[%s] tx_invalid_descs incorrect. Got [%llu] expected [%u]\n", + __func__, + (unsigned long long)stats.tx_invalid_descs, ifobject->xsk->pkt_stream->nb_pkts); return TEST_FAILURE; }