Skip to content

Commit

Permalink
Merge pull request #56 from arighi/scx-rustland-reduce-scheduler-over…
Browse files Browse the repository at this point in the history
…head

scx_rustland: reduce scheduler overhead
  • Loading branch information
htejun authored Dec 29, 2023
2 parents 3206464 + e90bc92 commit 474a149
Show file tree
Hide file tree
Showing 2 changed files with 83 additions and 17 deletions.
65 changes: 53 additions & 12 deletions scheds/rust/scx_rustland/src/bpf/main.bpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,25 @@ u32 usersched_pid; /* User-space scheduler PID */
const volatile bool switch_partial; /* Switch all tasks or SCHED_EXT tasks */
const volatile u64 slice_ns = SCX_SLICE_DFL; /* Base time slice duration */

/* Statistics */
u64 nr_queued, nr_user_dispatches, nr_kernel_dispatches, nr_sched_congested;
/*
* Number of tasks that are queued for scheduling.
*
* This number is incremented by the BPF component when a task is queued to the
* user-space scheduler and it must be decremented by the user-space scheduler
* when a task is consumed.
*/
volatile u64 nr_queued;

/*
* Number of tasks that are waiting for scheduling.
*
* This number must be updated by the user-space scheduler to keep track if
* there is still some scheduling work to do.
*/
volatile u64 nr_scheduled;

/* Misc statistics */
volatile u64 nr_user_dispatches, nr_kernel_dispatches, nr_sched_congested;

/* Report additional debugging information */
const volatile bool debug;
Expand Down Expand Up @@ -162,7 +179,7 @@ static void set_cpu_owner(u32 cpu, u32 pid)
scx_bpf_error("Failed to look up cpu_map for cpu %u", cpu);
return;
}
*owner= pid;
*owner = pid;
}

/*
Expand Down Expand Up @@ -417,7 +434,6 @@ void BPF_STRUCT_OPS(rustland_dispatch, s32 cpu, struct task_struct *prev)
/* Pop first task from the dispatched queue */
if (bpf_map_pop_elem(&dispatched, &task))
break;
__sync_fetch_and_sub(&nr_queued, 1);

/* Ignore entry if the task doesn't exist anymore */
p = bpf_task_from_pid(task.pid);
Expand All @@ -444,9 +460,11 @@ void BPF_STRUCT_OPS(rustland_running, struct task_struct *p)
{
dbg_msg("start: pid=%d (%s)", p->pid, p->comm);
/*
* Mark the CPU as busy by setting the pid as owner.
* Mark the CPU as busy by setting the pid as owner (ignoring the
* user-space scheduler).
*/
set_cpu_owner(scx_bpf_task_cpu(p), p->pid);
if (!is_usersched_task(p))
set_cpu_owner(scx_bpf_task_cpu(p), p->pid);
}

/* Task @p releases a CPU */
Expand All @@ -456,7 +474,8 @@ void BPF_STRUCT_OPS(rustland_stopping, struct task_struct *p, bool runnable)
/*
* Mark the CPU as idle by setting the owner to 0.
*/
set_cpu_owner(scx_bpf_task_cpu(p), 0);
if (!is_usersched_task(p))
set_cpu_owner(scx_bpf_task_cpu(p), 0);
}

/*
Expand All @@ -480,13 +499,35 @@ void BPF_STRUCT_OPS(rustland_update_idle, s32 cpu, bool idle)
return;
/*
* A CPU is now available, notify the user-space scheduler that tasks
* can be dispatched, if there is at least one task queued (ready to be
* scheduled).
* can be dispatched, if there is at least one task waiting to be
* scheduled, either queued (accounted in nr_queued) or scheduled
* (accounted in nr_scheduled).
*
* NOTE: nr_queued is incremented by the BPF component, more exactly in
* enqueue(), when a task is sent to the user-space scheduler, then
* the scheduler drains the queued tasks (updating nr_queued) and adds
* them to its internal data structures / state; at this point tasks
* become "scheduled" and the user-space scheduler will take care of
* updating nr_scheduled accordingly; lastly tasks will be dispatched
* and the user-space scheduler will update nr_scheduled again.
*
* Moreover, kick the CPU to make it immediately ready to accept
* dispatched tasks.
* Checking both counters allows to determine if there is still some
* pending work to do for the scheduler: new tasks have been queued
* since last check, or there are still tasks "queued" or "scheduled"
* since the previous user-space scheduler run. If the counters are
* both zero it is pointless to wake-up the scheduler (even if a CPU
* becomes idle), because there is nothing to do.
*
* Keep in mind that update_idle() doesn't run concurrently with the
* user-space scheduler (that is single-threaded): this function is
* naturally serialized with the user-space scheduler code, therefore
* this check here is also safe from a concurrency perspective.
*/
if (nr_queued > 0) {
if (nr_queued || nr_scheduled) {
/*
* Kick the CPU to make it immediately ready to accept
* dispatched tasks.
*/
scx_bpf_kick_cpu(cpu, 0);
set_usersched_needed();
}
Expand Down
35 changes: 30 additions & 5 deletions scheds/rust/scx_rustland/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -387,7 +387,14 @@ impl<'a> Scheduler<'a> {
self.task_pool.push(task.pid, cpu, self.min_vruntime);
}
}
Ok(None) => break,
Ok(None) => {
// Reset nr_queued and update nr_scheduled, to notify the dispatcher that
// queued tasks are drained, but there is still some work left to do in the
// scheduler.
self.skel.bss_mut().nr_queued = 0;
self.skel.bss_mut().nr_scheduled = self.task_pool.tasks.len() as u64;
break;
}
Err(err) => {
warn!("Error: {}", err);
break;
Expand Down Expand Up @@ -429,6 +436,10 @@ impl<'a> Scheduler<'a> {
None => break,
}
}
// Reset nr_scheduled to notify the dispatcher that all the tasks received by the scheduler
// has been dispatched, so there is no reason to re-activate the scheduler, unless more
// tasks are queued.
self.skel.bss_mut().nr_scheduled = self.task_pool.tasks.len() as u64;
}

// Main scheduling function (called in a loop to periodically drain tasks from the queued list
Expand All @@ -443,18 +454,32 @@ impl<'a> Scheduler<'a> {

// Print internal scheduler statistics (fetched from the BPF part)
fn print_stats(&mut self) {
let nr_queued = self.skel.bss().nr_queued as u64;
// Show minimum vruntime (this should be constantly incrementing).
info!("vruntime={}", self.min_vruntime);

// Show general statistics.
let nr_user_dispatches = self.skel.bss().nr_user_dispatches as u64;
let nr_kernel_dispatches = self.skel.bss().nr_kernel_dispatches as u64;
let nr_sched_congested = self.skel.bss().nr_sched_congested as u64;
info!(
" nr_user_dispatched={} nr_kernel_dispatches={} nr_sched_congested={}",
nr_user_dispatches, nr_kernel_dispatches, nr_sched_congested
);

// Show tasks that are waiting to be dispatched.
let nr_queued = self.skel.bss().nr_queued as u64;
let nr_scheduled = self.skel.bss().nr_scheduled as u64;
let nr_waiting = nr_queued + nr_scheduled;
info!(
"min_vtime={} nr_queued={} nr_user_dispatched={} nr_kernel_dispatches={} nr_sched_congested={}",
self.min_vruntime, nr_queued, nr_user_dispatches, nr_kernel_dispatches, nr_sched_congested
" nr_waiting={} [nr_queued={} + nr_scheduled={}]",
nr_waiting, nr_queued, nr_scheduled
);

// Show tasks that are currently running.
info!("Running tasks:");
for cpu in 0..self.nr_cpus_online {
let pid = self.get_cpu_pid(cpu as u32);
info!("cpu={} pid={}", cpu, pid);
info!(" cpu={} pid={}", cpu, pid);
}

log::logger().flush();
Expand Down

0 comments on commit 474a149

Please sign in to comment.