Skip to content

Commit

Permalink
replace pid with task pointer
Browse files Browse the repository at this point in the history
  • Loading branch information
likewhatevs committed Oct 4, 2024
1 parent e60f8b9 commit 146e7e8
Show file tree
Hide file tree
Showing 3 changed files with 116 additions and 71 deletions.
4 changes: 2 additions & 2 deletions scheds/rust/scx_rusty/src/bpf/intf.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ enum consts {
* this isn't a practical problem as the LB rounds are best-effort
* anyway and will be retried until loads are balanced.
*/
MAX_DOM_ACTIVE_PIDS = 1024,
MAX_DOM_ACTIVE_TPTRS = 1024,
};

/* Statistics */
Expand Down Expand Up @@ -106,7 +106,7 @@ struct task_ctx {
u32 dom_id;
u32 weight;
bool runnable;
u64 dom_active_pids_gen;
u64 dom_active_tptrs_gen;
u64 deadline;

u64 sum_runtime;
Expand Down
137 changes: 91 additions & 46 deletions scheds/rust/scx_rusty/src/bpf/main.bpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -149,21 +149,21 @@ struct {
__uint(map_flags, 0);
} dom_dcycle_locks SEC(".maps");

struct dom_active_pids {
struct dom_active_tptrs {
u64 gen;
u64 read_idx;
u64 write_idx;
s32 pids[MAX_DOM_ACTIVE_PIDS];
u64 tptrs[MAX_DOM_ACTIVE_TPTRS];
};

struct dom_active_pids dom_active_pids[MAX_DOMS];
struct dom_active_tptrs dom_active_tptrs[MAX_DOMS];

const u64 ravg_1 = 1 << RAVG_FRAC_BITS;

/* Map pid -> task_ctx */
/* Map tptr -> task_ctx */
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__type(key, pid_t);
__type(key, u64);
__type(value, struct task_ctx);
__uint(max_entries, 1000000);
__uint(map_flags, 0);
Expand All @@ -185,20 +185,37 @@ static struct dom_ctx *lookup_dom_ctx(u32 dom_id)
return domc;
}

static u64 t_to_tptr(struct task_struct *p)
{
u64 tptr;
int err;

err = bpf_probe_read_kernel(&tptr, sizeof(tptr), &p);

if (err){
scx_bpf_error("Failed to cast task_struct addr to tptr");
return 0;
}
return tptr;
}

static struct task_ctx *try_lookup_task_ctx(struct task_struct *p)
{
s32 pid = p->pid;
u64 tptr;
tptr = t_to_tptr(p);

return bpf_map_lookup_elem(&task_data, &pid);
return bpf_map_lookup_elem(&task_data, &tptr);
}

static struct task_ctx *lookup_task_ctx(struct task_struct *p)
{
struct task_ctx *taskc;
u64 tptr;
tptr = t_to_tptr(p);

taskc = try_lookup_task_ctx(p);
if (!taskc)
scx_bpf_error("task_ctx lookup failed for pid %d", p->pid);
scx_bpf_error("task_ctx lookup failed for tptr %llu", tptr);

return taskc;
}
Expand Down Expand Up @@ -397,31 +414,49 @@ static u64 dom_min_vruntime(struct dom_ctx *domc)
return READ_ONCE(domc->min_vruntime);
}

int dom_xfer_task(pid_t pid, u32 new_dom_id, u64 now)
static struct task_struct *tptr_to_task(u64 tptr){
struct task_struct *p;
int err_pid;
int err_task;
int loop_guard;
pid_t pid;
int i;
struct task_struct *task;

task = (struct task_struct *)tptr;

err_pid = bpf_probe_read_kernel(&p, sizeof(struct task_struct *), &task);

if (err_pid) scx_bpf_error("Failed to retrieve task_struct for tptr %llu", tptr);
if(p) return p;
scx_bpf_error("Failed to retrieve task_struct for pid %d", pid);
}

int dom_xfer_task(u64 tptr, u32 new_dom_id, u64 now)
{
struct dom_ctx *from_domc, *to_domc;
struct task_ctx *taskc;
struct task_struct *p;

p = bpf_task_from_pid(pid);
p = tptr_to_task(tptr);

if (!p) {
scx_bpf_error("Failed to lookup task %d", pid);
scx_bpf_error("Failed to lookup task %llu", tptr);
return 0;
}

taskc = lookup_task_ctx(p);
if (!taskc)
goto free_task;
return 0;

from_domc = lookup_dom_ctx(taskc->dom_id);
to_domc = lookup_dom_ctx(new_dom_id);

if (!from_domc || !to_domc || !taskc)
goto free_task;
return 0;

dom_dcycle_xfer_task(p, taskc, from_domc, to_domc, now);
free_task:
bpf_task_release(p);

return 0;
}

Expand All @@ -445,12 +480,12 @@ static inline void stat_add(enum stat_idx idx, u64 addend)
}

/*
* This is populated from userspace to indicate which pids should be reassigned
* This is populated from userspace to indicate which tptrs should be reassigned
* to new doms.
*/
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__type(key, pid_t);
__type(key, u64);
__type(value, u32);
__uint(max_entries, 1000);
__uint(map_flags, 0);
Expand Down Expand Up @@ -752,8 +787,11 @@ static bool task_set_domain(struct task_ctx *taskc, struct task_struct *p,
{
struct dom_ctx *old_domc, *new_domc;
struct bpf_cpumask *d_cpumask, *t_cpumask;
u64 tptr;
u32 old_dom_id = taskc->dom_id;

tptr = t_to_tptr(p);

t_cpumask = taskc->cpumask;
if (!t_cpumask) {
scx_bpf_error("Failed to look up task cpumask");
Expand Down Expand Up @@ -788,9 +826,10 @@ static bool task_set_domain(struct task_ctx *taskc, struct task_struct *p,
if (bpf_cpumask_intersects((const struct cpumask *)d_cpumask,
p->cpus_ptr)) {
u64 now = bpf_ktime_get_ns();

if (!init_dsq_vtime)
dom_xfer_task(p->pid, new_dom_id, now);

if (!init_dsq_vtime){
dom_xfer_task(tptr, new_dom_id, now);
}
taskc->dom_id = new_dom_id;
p->scx.dsq_vtime = dom_min_vruntime(new_domc);
taskc->deadline = p->scx.dsq_vtime +
Expand Down Expand Up @@ -1095,9 +1134,10 @@ void BPF_STRUCT_OPS(rusty_enqueue, struct task_struct *p, u64 enq_flags)
{
struct task_ctx *taskc;
struct bpf_cpumask *p_cpumask;
pid_t pid = p->pid;
u64 tptr;
u32 *new_dom;
s32 cpu;
tptr = t_to_tptr(p);

if (!(taskc = lookup_task_ctx(p)))
return;
Expand All @@ -1109,7 +1149,7 @@ void BPF_STRUCT_OPS(rusty_enqueue, struct task_struct *p, u64 enq_flags)
/*
* Migrate @p to a new domain if requested by userland through lb_data.
*/
new_dom = bpf_map_lookup_elem(&lb_data, &pid);
new_dom = bpf_map_lookup_elem(&lb_data, &tptr);
if (new_dom && *new_dom != taskc->dom_id &&
task_set_domain(taskc, p, *new_dom, false)) {
stat_add(RUSTY_STAT_LOAD_BALANCE, 1);
Expand Down Expand Up @@ -1405,21 +1445,21 @@ void BPF_STRUCT_OPS(rusty_running, struct task_struct *p)
* consider recently active tasks. Access synchronization rules aren't
* strict. We just need to be right most of the time.
*/
dap_gen = dom_active_pids[dom_id].gen;
if (taskc->dom_active_pids_gen != dap_gen) {
u64 idx = __sync_fetch_and_add(&dom_active_pids[dom_id].write_idx, 1) %
MAX_DOM_ACTIVE_PIDS;
s32 *pidp;

pidp = MEMBER_VPTR(dom_active_pids, [dom_id].pids[idx]);
if (!pidp) {
scx_bpf_error("dom_active_pids[%u][%llu] indexing failed",
dap_gen = dom_active_tptrs[dom_id].gen;
if (taskc->dom_active_tptrs_gen != dap_gen) {
u64 idx = __sync_fetch_and_add(&dom_active_tptrs[dom_id].write_idx, 1) %
MAX_DOM_ACTIVE_TPTRS;
u64 *tptrp;

tptrp = MEMBER_VPTR(dom_active_tptrs, [dom_id].tptrs[idx]);
if (!tptrp) {
scx_bpf_error("dom_active_tptrs[%u][%llu] indexing failed",
dom_id, idx);
return;
}

*pidp = p->pid;
taskc->dom_active_pids_gen = dap_gen;
*tptrp = t_to_tptr(p);
taskc->dom_active_tptrs_gen = dap_gen;
}

if (fifo_sched)
Expand Down Expand Up @@ -1496,12 +1536,14 @@ void BPF_STRUCT_OPS(rusty_quiescent, struct task_struct *p, u64 deq_flags)
void BPF_STRUCT_OPS(rusty_set_weight, struct task_struct *p, u32 weight)
{
struct task_ctx *taskc;
u64 tptr;
tptr = t_to_tptr(p);

if (!(taskc = lookup_task_ctx(p)))
return;

if (debug >= 2)
bpf_printk("%s[%d]: SET_WEIGHT %u -> %u", p->comm, p->pid,
bpf_printk("%s[%llu]: SET_WEIGHT %u -> %u", p->comm, tptr,
taskc->weight, weight);

taskc->weight = weight;
Expand Down Expand Up @@ -1549,14 +1591,17 @@ static void task_pick_and_set_domain(struct task_ctx *taskc,
const struct cpumask *cpumask,
bool init_dsq_vtime)
{
u64 tptr;
u32 dom_id = 0;

tptr = t_to_tptr(p);

if (nr_doms > 1)
dom_id = task_pick_domain(taskc, p, cpumask);

if (!task_set_domain(taskc, p, dom_id, init_dsq_vtime))
scx_bpf_error("Failed to set dom%d for %s[%d]",
dom_id, p->comm, p->pid);
scx_bpf_error("Failed to set dom%d for %s[%llu]",
dom_id, p->comm, tptr);
}

void BPF_STRUCT_OPS(rusty_set_cpumask, struct task_struct *p,
Expand Down Expand Up @@ -1597,50 +1642,50 @@ s32 BPF_STRUCT_OPS(rusty_init_task, struct task_struct *p,
{
u64 now = bpf_ktime_get_ns();
struct task_ctx taskc = {
.dom_active_pids_gen = -1,
.dom_active_tptrs_gen = -1,
.last_blocked_at = now,
.last_woke_at = now,
.preferred_dom_mask = 0,

};
struct task_ctx *map_value;
long ret;
pid_t pid;
u64 tptr;

pid = p->pid;
tptr = t_to_tptr(p);

/*
* XXX - We want BPF_NOEXIST but bpf_map_delete_elem() in .disable() may
* fail spuriously due to BPF recursion protection triggering
* unnecessarily.
*/
ret = bpf_map_update_elem(&task_data, &pid, &taskc, 0 /*BPF_NOEXIST*/);
ret = bpf_map_update_elem(&task_data, &tptr, &taskc, 0 /*BPF_NOEXIST*/);
if (ret) {
stat_add(RUSTY_STAT_TASK_GET_ERR, 1);
return ret;
}

if (debug >= 2)
bpf_printk("%s[%d]: INIT (weight %u))", p->comm, p->pid, p->scx.weight);
bpf_printk("%s[%llu]: INIT (weight %u))", p->comm, tptr, p->scx.weight);

/*
* Read the entry from the map immediately so we can add the cpumask
* with bpf_kptr_xchg().
*/
map_value = bpf_map_lookup_elem(&task_data, &pid);
map_value = bpf_map_lookup_elem(&task_data, &tptr);
if (!map_value)
/* Should never happen -- it was just inserted above. */
return -EINVAL;

ret = create_save_cpumask(&map_value->cpumask);
if (ret) {
bpf_map_delete_elem(&task_data, &pid);
bpf_map_delete_elem(&task_data, &tptr);
return ret;
}

ret = create_save_cpumask(&map_value->tmp_cpumask);
if (ret) {
bpf_map_delete_elem(&task_data, &pid);
bpf_map_delete_elem(&task_data, &tptr);
return ret;
}

Expand All @@ -1652,7 +1697,7 @@ s32 BPF_STRUCT_OPS(rusty_init_task, struct task_struct *p,
void BPF_STRUCT_OPS(rusty_exit_task, struct task_struct *p,
struct scx_exit_task_args *args)
{
pid_t pid = p->pid;
u64 tptr = t_to_tptr(p);
long ret;

/*
Expand All @@ -1661,7 +1706,7 @@ void BPF_STRUCT_OPS(rusty_exit_task, struct task_struct *p,
* deletions aren't reliable means that we sometimes leak task_ctx and
* can't use BPF_NOEXIST on allocation in .prep_enable().
*/
ret = bpf_map_delete_elem(&task_data, &pid);
ret = bpf_map_delete_elem(&task_data, &tptr);
if (ret) {
stat_add(RUSTY_STAT_TASK_GET_ERR, 1);
return;
Expand Down
Loading

0 comments on commit 146e7e8

Please sign in to comment.