-
Notifications
You must be signed in to change notification settings - Fork 240
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add an eBPF program to measure synchronous connect() calls latencies #254
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,133 @@ | ||
#include <vmlinux.h> | ||
#include <bpf/bpf_helpers.h> | ||
#include <bpf/bpf_tracing.h> | ||
#include <bpf/bpf_core_read.h> | ||
#include "bits.bpf.h" | ||
#include "maps.bpf.h" | ||
|
||
#define MAX_LATENCY_SLOT 26 | ||
|
||
struct connect_start_key_t { | ||
u64 pid_tgid; | ||
}; | ||
|
||
struct connect_start_val_t { | ||
u64 ts; | ||
int addrlen; | ||
u32 d_ip; // Destination IPv4 address | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
u16 d_port; // Destination port number | ||
}; | ||
|
||
struct connect_latency_key_t { | ||
u32 d_ip; // Destination IPv4 address | ||
u16 d_port; // Destination port number | ||
u64 slot; | ||
}; | ||
|
||
struct { | ||
__uint(type, BPF_MAP_TYPE_HASH); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
__uint(max_entries, 10240); | ||
__type(key, struct connect_start_key_t); | ||
__type(value, struct connect_start_val_t); | ||
} connect_start SEC(".maps"); | ||
|
||
struct { | ||
__uint(type, BPF_MAP_TYPE_HASH); | ||
__uint(max_entries, MAX_LATENCY_SLOT + 2); | ||
__type(key, struct connect_latency_key_t); | ||
__type(value, u64); | ||
} connect_latency_seconds SEC(".maps"); | ||
|
||
static inline __u16 ntohs(__u16 value) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Any reason not to use |
||
return ((value & 0x00FF) << 8) | ((value & 0xFF00) >> 8); | ||
} | ||
|
||
SEC("kprobe/__sys_connect") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
You also get:
|
||
int BPF_KPROBE(kprobe__sys_connect, int sockfd, const struct sockaddr *addr, int addrlen) | ||
{ | ||
struct sockaddr sa; | ||
struct connect_start_val_t start_val = {}; | ||
|
||
bpf_probe_read(&sa, sizeof(sa), addr); | ||
|
||
if (sa.sa_family == 1) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There are lots of families and you probably only care about I suggest you add |
||
return 0; // Ignore UNIX domain sockets | ||
} | ||
|
||
if (addrlen == sizeof(struct sockaddr_in)) { | ||
struct sockaddr_in v4; | ||
bpf_probe_read(&v4, sizeof(v4), addr); | ||
start_val.d_ip = v4.sin_addr.s_addr; | ||
start_val.d_port = v4.sin_port; | ||
} else if (addrlen == sizeof(struct sockaddr_in6)) { | ||
const char debug_str[] = "This is ipv6!\n"; | ||
bpf_trace_printk(debug_str, sizeof(debug_str)); | ||
struct sockaddr_in6 v6; | ||
bpf_probe_read(&v6, sizeof(v6), addr); | ||
|
||
if (BPF_CORE_READ(&v6.sin6_addr.in6_u, u6_addr32[0]) == 0x00000000 && | ||
BPF_CORE_READ(&v6.sin6_addr.in6_u, u6_addr32[1]) == 0x00000000 && | ||
BPF_CORE_READ(&v6.sin6_addr.in6_u, u6_addr32[2]) == 0x0000FFFF) { | ||
|
||
start_val.d_ip = BPF_CORE_READ(&v6.sin6_addr.in6_u, u6_addr32[3]); | ||
start_val.d_port = v6.sin6_port; | ||
} else { | ||
const char debug_str[] = "This is native ipv6, I'm giving up!\n"; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please implement IPv6 as well. Normally it's in a separate map: #251. |
||
bpf_trace_printk(debug_str, sizeof(debug_str)); | ||
return 0; | ||
} | ||
} else { | ||
const char debug_str[] = "Unexpected addrlen: %d, address family: %d\n"; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We generally don't leave debug statements around. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would recommend |
||
bpf_trace_printk(debug_str, sizeof(debug_str), addrlen, sa.sa_family); | ||
return 0; | ||
} | ||
|
||
struct connect_start_key_t start_key = {}; | ||
start_key.pid_tgid = bpf_get_current_pid_tgid(); | ||
start_val.ts = bpf_ktime_get_ns(); | ||
start_val.addrlen = addrlen; | ||
bpf_map_update_elem(&connect_start, &start_key, &start_val, BPF_ANY); | ||
|
||
return 0; | ||
} | ||
|
||
SEC("kretprobe/__sys_connect") | ||
int BPF_KRETPROBE(kretprobe__sys_connect, int ret) | ||
{ | ||
u64 delta_us, latency_slot; | ||
struct connect_start_key_t start_key = {}; | ||
start_key.pid_tgid = bpf_get_current_pid_tgid(); | ||
struct connect_start_val_t *start_val; | ||
start_val = bpf_map_lookup_elem(&connect_start, &start_key); | ||
if (!start_val) { | ||
const char debug_str[] = "Did not find anything in the map!\n"; | ||
bpf_trace_printk(debug_str, sizeof(debug_str)); | ||
return 0; | ||
} | ||
if (ret != 0) { | ||
return 0; // Filter out non-blocking sockets and errors | ||
} | ||
const char debug_str[] = "Return code is: %d\n"; | ||
bpf_trace_printk(debug_str, sizeof(debug_str), ret); | ||
struct connect_latency_key_t key = {}; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please put all definitions at the top of the function. |
||
key.d_ip = start_val->d_ip; | ||
key.d_port = ntohs(start_val->d_port); | ||
|
||
delta_us = (bpf_ktime_get_ns() - start_val->ts) / 1000; | ||
latency_slot = log2l(delta_us); | ||
if (latency_slot > MAX_LATENCY_SLOT) { | ||
latency_slot = MAX_LATENCY_SLOT; | ||
} | ||
|
||
key.slot = latency_slot; | ||
increment_map(&connect_latency_seconds, &key, 1); | ||
|
||
key.slot = MAX_LATENCY_SLOT + 1; | ||
increment_map(&connect_latency_seconds, &key, delta_us); | ||
|
||
bpf_map_delete_elem(&connect_start, &start_key); | ||
|
||
return 0; | ||
} | ||
|
||
char LICENSE[] SEC("license") = "GPL"; |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
metrics: | ||
histograms: | ||
- name: connect_latency_seconds | ||
help: Latency histogram for TCP connect() syscall | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If it's TCP only, then let's call the metric There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since we do this:
It's probably a good idea to add |
||
bucket_type: exp2 | ||
bucket_min: 0 | ||
bucket_max: 26 | ||
bucket_multiplier: 0.000001 # microseconds to seconds | ||
labels: | ||
- name: ip | ||
size: 4 | ||
decoders: | ||
- name: inet_ip | ||
- name: port | ||
size: 4 | ||
decoders: | ||
- name: uint | ||
- name: bucket | ||
size: 8 | ||
decoders: | ||
- name: uint |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You aren't really using
addrlen
.