From a07d882196358e02ccd25f663698d04c03bc1abd Mon Sep 17 00:00:00 2001 From: Ivan Babrou Date: Wed, 17 Apr 2024 11:09:06 -0700 Subject: [PATCH 1/2] Rewrite sock trace with socket cookies --- examples/sock-trace.bpf.c | 179 ++++++++++++----------------------- examples/sock-trace.yaml | 10 +- tracing/demos/sock/main.go | 2 - tracing/demos/sock/stitch.go | 21 ++-- 4 files changed, 71 insertions(+), 141 deletions(-) diff --git a/examples/sock-trace.bpf.c b/examples/sock-trace.bpf.c index 91ef598e..da602e9b 100644 --- a/examples/sock-trace.bpf.c +++ b/examples/sock-trace.bpf.c @@ -1,15 +1,16 @@ #include #include -#include #include #include "tracing.bpf.h" -u32 yes = true; +// Skipping 3 frames off the top as they are just bpf trampoline +#define SKIP_FRAMES (3 & BPF_F_SKIP_FIELD_MASK) + +extern int LINUX_KERNEL_VERSION __kconfig; struct stitch_span_t { struct span_base_t span_base; - u32 fd; - u64 addr; + u64 socket_cookie; }; struct sock_release_span_t { @@ -17,16 +18,11 @@ struct sock_release_span_t { u64 span_id; }; -struct skb_span_t { +struct sk_span_t { struct span_base_t span_base; u64 ksym; }; -struct file_key_t { - u32 tgid; - u32 fd; -}; - struct { __uint(type, BPF_MAP_TYPE_RINGBUF); __uint(max_entries, 256 * 1024); @@ -40,101 +36,23 @@ struct { struct { __uint(type, BPF_MAP_TYPE_RINGBUF); __uint(max_entries, 256 * 1024); -} skb_spans SEC(".maps"); +} sk_spans SEC(".maps"); struct { __uint(type, BPF_MAP_TYPE_LRU_HASH); __uint(max_entries, 1024 * 10); - __type(key, u32); - __type(value, bool); -} traced_tgids SEC(".maps"); - -struct { - __uint(type, BPF_MAP_TYPE_LRU_HASH); - __uint(max_entries, 1024 * 10); - __type(key, struct sock *); + __type(key, u64); __type(value, struct span_parent_t); -} traced_socks SEC(".maps"); - -struct { - __uint(type, BPF_MAP_TYPE_LRU_HASH); - __uint(max_entries, 1024 * 10); - __type(key, struct file_key_t); - __type(value, struct sock *); -} fd_to_sock SEC(".maps"); - -SEC("fentry/fd_install") -int BPF_PROG(fd_install, unsigned int fd, struct file *file) -{ - u32 tgid = bpf_get_current_pid_tgid() >> 32; - struct file_key_t key = { .tgid = tgid, .fd = fd }; - bool *traced = bpf_map_lookup_elem(&traced_tgids, &tgid); - struct sock *sk; - - if (!traced) { - return 0; - } - - sk = BPF_CORE_READ((struct socket *) file->private_data, sk); - - bpf_map_update_elem(&fd_to_sock, &key, &sk, BPF_ANY); - - return 0; -} - -SEC("fentry/close_fd") -int BPF_PROG(close_fd, unsigned int fd) -{ - u32 tgid = bpf_get_current_pid_tgid() >> 32; - struct file_key_t key = { .tgid = tgid, .fd = fd }; - - bpf_map_delete_elem(&traced_socks, &key); - - return 0; -} - -SEC("usdt/./tracing/demos/sock/demo:ebpf_exporter:enable_kernel_tracing") -int BPF_USDT(enable_kernel_tracing) -{ - u32 tgid = bpf_get_current_pid_tgid() >> 32; - - bpf_map_update_elem(&traced_tgids, &tgid, &yes, BPF_NOEXIST); - - return 0; -} - -SEC("tp_btf/sched_process_exit") -int BPF_PROG(sched_process_exit, struct task_struct *p) -{ - u32 tgid = p->tgid; - - if (p->pid != p->tgid) { - return 0; - } - - bpf_map_delete_elem(&traced_tgids, &tgid); - - return 0; -} +} traced_socket_cookies SEC(".maps"); SEC("usdt/./tracing/demos/sock/demo:ebpf_exporter:sock_set_parent_span") -int BPF_USDT(sock_set_parent_span, int fd, u64 trace_id_hi, u64 trace_id_lo, u64 span_id) +int BPF_USDT(sock_set_parent_span, u64 socket_cookie, u64 trace_id_hi, u64 trace_id_lo, u64 span_id) { - u32 tgid = bpf_get_current_pid_tgid() >> 32; struct span_parent_t parent = { .trace_id_hi = trace_id_hi, .trace_id_lo = trace_id_lo, .span_id = span_id }; - struct file_key_t key = { .tgid = tgid, .fd = fd }; - struct sock **sk = bpf_map_lookup_elem(&fd_to_sock, &key); - - if (!sk) { - return 0; - } - bpf_map_update_elem(&traced_socks, sk, &parent, BPF_ANY); + bpf_map_update_elem(&traced_socket_cookies, &socket_cookie, &parent, BPF_ANY); - submit_span(&stitch_spans, struct stitch_span_t, &parent, { - span->fd = fd; - span->addr = (u64) *sk; - }); + submit_span(&stitch_spans, struct stitch_span_t, &parent, { span->socket_cookie = socket_cookie; }); return 0; } @@ -142,8 +60,8 @@ int BPF_USDT(sock_set_parent_span, int fd, u64 trace_id_hi, u64 trace_id_lo, u64 SEC("fentry/__sock_release") int BPF_PROG(__sock_release, struct socket *sock) { - struct sock *sk = BPF_CORE_READ(sock, sk); - struct span_parent_t *parent = bpf_map_lookup_elem(&traced_socks, &sk); + u64 socket_cookie = bpf_get_socket_cookie(sock->sk); + struct span_parent_t *parent = bpf_map_lookup_elem(&traced_socket_cookies, &socket_cookie); if (!parent) { return 0; @@ -151,64 +69,85 @@ int BPF_PROG(__sock_release, struct socket *sock) submit_span(&sock_release_spans, struct sock_release_span_t, parent, { span->span_id = 0xdead; }); - bpf_map_delete_elem(&traced_socks, &sk); + bpf_map_delete_elem(&traced_socket_cookies, &socket_cookie); return 0; } -static int handle_skb(struct pt_regs *ctx, struct sock *sk, struct sk_buff *skb) +static int handle_sk(struct pt_regs *ctx, u64 socket_cookie) { - struct span_parent_t *parent = bpf_map_lookup_elem(&traced_socks, &sk); + struct span_parent_t *parent = bpf_map_lookup_elem(&traced_socket_cookies, &socket_cookie); if (!parent) { return 0; } - submit_span(&skb_spans, struct skb_span_t, parent, { span->ksym = PT_REGS_IP_CORE(ctx); }); + submit_span(&sk_spans, struct sk_span_t, parent, { + // FIXME: PT_REGS_IP_CORE(ctx) does not work for fentry, so we abuse kstack + bpf_get_stack(ctx, &span->ksym, sizeof(span->ksym), SKIP_FRAMES); + span->ksym -= 8; + }); return 0; } -SEC("kprobe/tcp_v4_do_rcv") +SEC("fentry/tcp_v4_do_rcv") int BPF_PROG(tcp_v4_do_rcv, struct sock *sk, struct sk_buff *skb) { - return handle_skb((struct pt_regs *) ctx, sk, skb); + return handle_sk((struct pt_regs *) ctx, bpf_get_socket_cookie(sk)); } -SEC("kprobe/nf_hook_slow") -int BPF_PROG(nf_hook_slow, struct sk_buff *skb) +SEC("fentry/__ip_local_out") +int BPF_PROG(__ip_local_out, struct net *net, struct sock *sk, struct sk_buff *skb) { - return handle_skb((struct pt_regs *) ctx, BPF_CORE_READ(skb, sk), skb); + return handle_sk((struct pt_regs *) ctx, bpf_get_socket_cookie(sk)); } -SEC("kprobe/__ip_local_out") -int BPF_PROG(__ip_local_out, struct net *net, struct sock *sk, struct sk_buff *skb) +SEC("fentry/ip_finish_output") +int BPF_PROG(ip_finish_output, struct net *net, struct sock *sk, struct sk_buff *skb) { - return handle_skb((struct pt_regs *) ctx, sk, skb); + return handle_sk((struct pt_regs *) ctx, bpf_get_socket_cookie(sk)); } -SEC("kprobe/ip_finish_output") -int BPF_PROG(ip_finish_output, struct net *net, struct sock *sk, struct sk_buff *skb) +SEC("fentry/__tcp_retransmit_skb") +int BPF_PROG(__tcp_retransmit_skb, struct sock *sk, struct sk_buff *skb) { - return handle_skb((struct pt_regs *) ctx, sk, skb); + return handle_sk((struct pt_regs *) ctx, bpf_get_socket_cookie(sk)); } -SEC("kprobe/__dev_queue_xmit") -int BPF_PROG(__dev_queue_xmit, struct sk_buff *skb) +// Older kernels are not happy with calls to bpf_get_socket_cookie(skb->sk): +// +// ; return handle_sk((struct pt_regs *) ctx, bpf_get_socket_cookie(skb->sk)); +// 3: (85) call bpf_get_socket_cookie#46 +// R1 type=untrusted_ptr_ expected=sock_common, sock, tcp_sock, xdp_sock, ptr_, trusted_ptr_ +// +// I'm not sure which is the oldest available kernel, but I know it doesn't work on v6.5 +// in Github Actions, but runs fine on v6.9-rc3 locally. I'm too lazy to bisect. +static int handle_skb(struct pt_regs *ctx, struct sk_buff *skb) +{ + if (LINUX_KERNEL_VERSION < KERNEL_VERSION(6, 9, 0)) { + return 0; + } + + return handle_sk(ctx, bpf_get_socket_cookie(skb->sk)); +} + +SEC("fentry/nf_hook_slow") +int BPF_PROG(nf_hook_slow, struct sk_buff *skb) { - return handle_skb((struct pt_regs *) ctx, BPF_CORE_READ(skb, sk), skb); + return handle_skb((struct pt_regs *) ctx, skb); } -SEC("kprobe/dev_hard_start_xmit") -int BPF_PROG(dev_hard_start_xmit, struct sk_buff *skb) +SEC("fentry/__dev_queue_xmit") +int BPF_PROG(__dev_queue_xmit, struct sk_buff *skb) { - return handle_skb((struct pt_regs *) ctx, BPF_CORE_READ(skb, sk), skb); + return handle_skb((struct pt_regs *) ctx, skb); } -SEC("kprobe/__tcp_retransmit_skb") -int BPF_PROG(__tcp_retransmit_skb, struct sock *sk, struct sk_buff *skb) +SEC("fentry/dev_hard_start_xmit") +int BPF_PROG(dev_hard_start_xmit, struct sk_buff *skb) { - return handle_skb((struct pt_regs *) ctx, sk, skb); + return handle_skb((struct pt_regs *) ctx, skb); } char LICENSE[] SEC("license") = "GPL"; diff --git a/examples/sock-trace.yaml b/examples/sock-trace.yaml index 08a9ed53..b1e3ba5a 100644 --- a/examples/sock-trace.yaml +++ b/examples/sock-trace.yaml @@ -23,11 +23,7 @@ tracing: size: 8 decoders: - name: uint - - name: fd - size: 8 - decoders: - - name: uint - - name: addr_bytes # will look weird in little endian + - name: socket_cookie size: 8 decoders: - name: hex @@ -55,8 +51,8 @@ tracing: size: 8 decoders: - name: uint - - name: skb - ringbuf: skb_spans + - name: sk_spans + ringbuf: sk_spans service: kernel labels: - name: trace_id diff --git a/tracing/demos/sock/main.go b/tracing/demos/sock/main.go index 8fc83e1e..2841419d 100644 --- a/tracing/demos/sock/main.go +++ b/tracing/demos/sock/main.go @@ -12,8 +12,6 @@ import ( ) func main() { - enableKernelTracing() - processor, err := demos.SetupTracing() if err != nil { log.Fatalf("Error setting up tracing: %v", err) diff --git a/tracing/demos/sock/stitch.go b/tracing/demos/sock/stitch.go index efc79820..4251e1d9 100644 --- a/tracing/demos/sock/stitch.go +++ b/tracing/demos/sock/stitch.go @@ -4,31 +4,28 @@ package main #include #include -void enable_kernel_tracing() +void sock_set_parent_span(uint64_t socket_cookie, uint64_t trace_id_hi, uint64_t trace_id_lo, uint64_t span_id) { - DTRACE_PROBE(ebpf_exporter, enable_kernel_tracing); -} - -void sock_set_parent_span(int sock_fd, uint64_t trace_id_hi, uint64_t trace_id_lo, uint64_t span_id) -{ - DTRACE_PROBE4(ebpf_exporter, sock_set_parent_span, sock_fd, trace_id_hi, trace_id_lo, span_id); + DTRACE_PROBE4(ebpf_exporter, sock_set_parent_span, socket_cookie, trace_id_hi, trace_id_lo, span_id); } */ import "C" import ( "github.com/cloudflare/ebpf_exporter/v2/tracing/demos" "go.opentelemetry.io/otel/trace" + "golang.org/x/sys/unix" ) -func enableKernelTracing() { - C.enable_kernel_tracing() -} - func sockSentParentSpan(fd uintptr, span trace.Span) { traceIDHi, traceIDLo, spanID := demos.PropagationArgs(span) + cookie, err := unix.GetsockoptUint64(int(fd), unix.SOL_SOCKET, unix.SO_COOKIE) + if err != nil { + panic(err) + } + C.sock_set_parent_span( - C.int(fd), + C.uint64_t(cookie), C.uint64_t(traceIDHi), C.uint64_t(traceIDLo), C.uint64_t(spanID), From 34aea784eb29c81e0b5929900cb6762f6738c4b3 Mon Sep 17 00:00:00 2001 From: Ivan Babrou Date: Thu, 18 Apr 2024 14:24:35 -0700 Subject: [PATCH 2/2] Add sk_error_report span to sock-trace --- examples/sock-trace.bpf.c | 33 +++++++++++++++++++++++++++++++++ examples/sock-trace.yaml | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+) diff --git a/examples/sock-trace.bpf.c b/examples/sock-trace.bpf.c index da602e9b..efe8d105 100644 --- a/examples/sock-trace.bpf.c +++ b/examples/sock-trace.bpf.c @@ -3,6 +3,8 @@ #include #include "tracing.bpf.h" +#define MAX_STACK_DEPTH 8 + // Skipping 3 frames off the top as they are just bpf trampoline #define SKIP_FRAMES (3 & BPF_F_SKIP_FIELD_MASK) @@ -23,6 +25,12 @@ struct sk_span_t { u64 ksym; }; +struct sk_error_report_span_t { + struct span_base_t span_base; + u64 kstack[MAX_STACK_DEPTH]; + u32 sk_err; +}; + struct { __uint(type, BPF_MAP_TYPE_RINGBUF); __uint(max_entries, 256 * 1024); @@ -38,6 +46,11 @@ struct { __uint(max_entries, 256 * 1024); } sk_spans SEC(".maps"); +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, 256 * 1024); +} sk_error_report_spans SEC(".maps"); + struct { __uint(type, BPF_MAP_TYPE_LRU_HASH); __uint(max_entries, 1024 * 10); @@ -150,4 +163,24 @@ int BPF_PROG(dev_hard_start_xmit, struct sk_buff *skb) return handle_skb((struct pt_regs *) ctx, skb); } +// bpf_get_socket_cookie is not available in raw_tp: +// * https://github.com/torvalds/linux/blob/v6.6/kernel/trace/bpf_trace.c#L1926-L1939 +SEC("fentry/sk_error_report") +int BPF_PROG(sk_error_report, struct sock *sk) +{ + u64 socket_cookie = bpf_get_socket_cookie(sk); + struct span_parent_t *parent = bpf_map_lookup_elem(&traced_socket_cookies, &socket_cookie); + + if (!parent) { + return 0; + } + + submit_span(&sk_error_report_spans, struct sk_error_report_span_t, parent, { + bpf_get_stack(ctx, &span->kstack, sizeof(span->kstack), SKIP_FRAMES); + span->sk_err = sk->sk_err; + }); + + return 0; +} + char LICENSE[] SEC("license") = "GPL"; diff --git a/examples/sock-trace.yaml b/examples/sock-trace.yaml index b1e3ba5a..83279e42 100644 --- a/examples/sock-trace.yaml +++ b/examples/sock-trace.yaml @@ -79,3 +79,36 @@ tracing: size: 8 decoders: - name: ksym + - name: sk_error_report + ringbuf: sk_error_report_spans + service: kernel + labels: + - name: trace_id + size: 16 + decoders: + - name: hex + - name: parent_span_id + size: 8 + decoders: + - name: hex + - name: span_id + size: 8 + decoders: + - name: hex + - name: span_monotonic_timestamp_ns + size: 8 + decoders: + - name: uint + - name: span_duration_ns + size: 8 + decoders: + - name: uint + - name: kstack + size: 64 + decoders: + - name: kstack + - name: sk_err + size: 4 + decoders: + - name: uint + - name: errno