Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add an eBPF program to measure synchronous connect() calls latencies #254

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
133 changes: 133 additions & 0 deletions examples/connect-latency.bpf.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_core_read.h>
#include "bits.bpf.h"
#include "maps.bpf.h"

#define MAX_LATENCY_SLOT 26

struct connect_start_key_t {
u64 pid_tgid;
};

struct connect_start_val_t {
u64 ts;
int addrlen;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You aren't really using addrlen.

u32 d_ip; // Destination IPv4 address
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

clang-format is not happy here

u16 d_port; // Destination port number
};

struct connect_latency_key_t {
u32 d_ip; // Destination IPv4 address
u16 d_port; // Destination port number
u64 slot;
};

struct {
__uint(type, BPF_MAP_TYPE_HASH);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

BPF_MAP_TYPE_LRU_HASH is preferred for resiliency against map getting full.

__uint(max_entries, 10240);
__type(key, struct connect_start_key_t);
__type(value, struct connect_start_val_t);
} connect_start SEC(".maps");

struct {
__uint(type, BPF_MAP_TYPE_HASH);
__uint(max_entries, MAX_LATENCY_SLOT + 2);
__type(key, struct connect_latency_key_t);
__type(value, u64);
} connect_latency_seconds SEC(".maps");

static inline __u16 ntohs(__u16 value) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any reason not to use bpf_ntohs?

return ((value & 0x00FF) << 8) | ((value & 0xFF00) >> 8);
}

SEC("kprobe/__sys_connect")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fentry is a lot faster:

You also get:

  • Function arguments in fexit, allowing you to key on addr rather than pid
  • No need for BPF_CORE_READ as BTF allows direct reads

int BPF_KPROBE(kprobe__sys_connect, int sockfd, const struct sockaddr *addr, int addrlen)
{
struct sockaddr sa;
struct connect_start_val_t start_val = {};

bpf_probe_read(&sa, sizeof(sa), addr);

if (sa.sa_family == 1) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are lots of families and you probably only care about AF_INET:

I suggest you add #define AF_INET 2 and use it here.

return 0; // Ignore UNIX domain sockets
}

if (addrlen == sizeof(struct sockaddr_in)) {
struct sockaddr_in v4;
bpf_probe_read(&v4, sizeof(v4), addr);
start_val.d_ip = v4.sin_addr.s_addr;
start_val.d_port = v4.sin_port;
} else if (addrlen == sizeof(struct sockaddr_in6)) {
const char debug_str[] = "This is ipv6!\n";
bpf_trace_printk(debug_str, sizeof(debug_str));
struct sockaddr_in6 v6;
bpf_probe_read(&v6, sizeof(v6), addr);

if (BPF_CORE_READ(&v6.sin6_addr.in6_u, u6_addr32[0]) == 0x00000000 &&
BPF_CORE_READ(&v6.sin6_addr.in6_u, u6_addr32[1]) == 0x00000000 &&
BPF_CORE_READ(&v6.sin6_addr.in6_u, u6_addr32[2]) == 0x0000FFFF) {

start_val.d_ip = BPF_CORE_READ(&v6.sin6_addr.in6_u, u6_addr32[3]);
start_val.d_port = v6.sin6_port;
} else {
const char debug_str[] = "This is native ipv6, I'm giving up!\n";
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please implement IPv6 as well. Normally it's in a separate map: #251.

bpf_trace_printk(debug_str, sizeof(debug_str));
return 0;
}
} else {
const char debug_str[] = "Unexpected addrlen: %d, address family: %d\n";
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We generally don't leave debug statements around.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would recommend bpf_printk() when you debug locally:

bpf_trace_printk(debug_str, sizeof(debug_str), addrlen, sa.sa_family);
return 0;
}

struct connect_start_key_t start_key = {};
start_key.pid_tgid = bpf_get_current_pid_tgid();
start_val.ts = bpf_ktime_get_ns();
start_val.addrlen = addrlen;
bpf_map_update_elem(&connect_start, &start_key, &start_val, BPF_ANY);

return 0;
}

SEC("kretprobe/__sys_connect")
int BPF_KRETPROBE(kretprobe__sys_connect, int ret)
{
u64 delta_us, latency_slot;
struct connect_start_key_t start_key = {};
start_key.pid_tgid = bpf_get_current_pid_tgid();
struct connect_start_val_t *start_val;
start_val = bpf_map_lookup_elem(&connect_start, &start_key);
if (!start_val) {
const char debug_str[] = "Did not find anything in the map!\n";
bpf_trace_printk(debug_str, sizeof(debug_str));
return 0;
}
if (ret != 0) {
return 0; // Filter out non-blocking sockets and errors
}
const char debug_str[] = "Return code is: %d\n";
bpf_trace_printk(debug_str, sizeof(debug_str), ret);
struct connect_latency_key_t key = {};
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please put all definitions at the top of the function.

key.d_ip = start_val->d_ip;
key.d_port = ntohs(start_val->d_port);

delta_us = (bpf_ktime_get_ns() - start_val->ts) / 1000;
latency_slot = log2l(delta_us);
if (latency_slot > MAX_LATENCY_SLOT) {
latency_slot = MAX_LATENCY_SLOT;
}

key.slot = latency_slot;
increment_map(&connect_latency_seconds, &key, 1);

key.slot = MAX_LATENCY_SLOT + 1;
increment_map(&connect_latency_seconds, &key, delta_us);

bpf_map_delete_elem(&connect_start, &start_key);

return 0;
}

char LICENSE[] SEC("license") = "GPL";
21 changes: 21 additions & 0 deletions examples/connect-latency.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
metrics:
histograms:
- name: connect_latency_seconds
help: Latency histogram for TCP connect() syscall
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If it's TCP only, then let's call the metric tcp_connect_latency_seconds and rename the file into tcp-connect-latency.yaml.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since we do this:

// Filter out non-blocking sockets and errors

It's probably a good idea to add blocking in the name as well.

bucket_type: exp2
bucket_min: 0
bucket_max: 26
bucket_multiplier: 0.000001 # microseconds to seconds
labels:
- name: ip
size: 4
decoders:
- name: inet_ip
- name: port
size: 4
decoders:
- name: uint
- name: bucket
size: 8
decoders:
- name: uint
Loading