From 49f2e7ce06bc7d90c8d4949b561f129a28e851e2 Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Sun, 31 Dec 2023 15:13:36 +0100 Subject: [PATCH 1/4] scx_rustland: enable SCX_OPS_ENQ_LAST Make sure the scheduler is not activated if we are deadling with the last task running. This allows to consistency reduce scx_rustland CPU usage in systems that are mostly idle (and avoid unnecessary power consumption). Signed-off-by: Andrea Righi --- scheds/rust/scx_rustland/src/bpf/main.bpf.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/scheds/rust/scx_rustland/src/bpf/main.bpf.c b/scheds/rust/scx_rustland/src/bpf/main.bpf.c index 70f1c3b3..d8647f09 100644 --- a/scheds/rust/scx_rustland/src/bpf/main.bpf.c +++ b/scheds/rust/scx_rustland/src/bpf/main.bpf.c @@ -309,7 +309,7 @@ s32 BPF_STRUCT_OPS(rustland_select_cpu, struct task_struct *p, s32 prev_cpu, * .select_cpu()), since this function may be called on a different CPU (so we * cannot check the current CPU directly). */ -static bool is_task_cpu_available(struct task_struct *p) +static bool is_task_cpu_available(struct task_struct *p, u64 enq_flags) { struct task_ctx *tctx; @@ -322,6 +322,12 @@ static bool is_task_cpu_available(struct task_struct *p) if (is_kthread(p) && p->nr_cpus_allowed == 1) return true; + /* + * No scheduling required if it's the last task running. + */ + if (enq_flags & SCX_ENQ_LAST) + return true; + /* * For regular tasks always rely on force_local to determine if we can * bypass the scheduler. @@ -365,7 +371,7 @@ void BPF_STRUCT_OPS(rustland_enqueue, struct task_struct *p, u64 enq_flags) * Dispatch the task on the local FIFO directly if the selected task's * CPU is available (no scheduling decision required). */ - if (is_task_cpu_available(p)) { + if (is_task_cpu_available(p, enq_flags)) { dispatch_local(p, enq_flags); __sync_fetch_and_add(&nr_kernel_dispatches, 1); return; @@ -626,6 +632,7 @@ struct sched_ext_ops rustland = { .prep_enable = (void *)rustland_prep_enable, .init = (void *)rustland_init, .exit = (void *)rustland_exit, + .flags = SCX_OPS_ENQ_LAST, .timeout_ms = 5000, .name = "rustland", }; From 405a11308ee9348fa7c2cb0237f9894d29845a37 Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Sun, 31 Dec 2023 16:03:11 +0100 Subject: [PATCH 2/4] scx_rustland: always use dispatch_on_cpu() when possible Use dispatch_on_cpu() when possible, so that all tasks dispatched by the user-space scheduler gets the same priority, instead of having some of them dispatched to the global DSQ and others dispatched to the per-CPU DSQ. Signed-off-by: Andrea Righi --- scheds/rust/scx_rustland/src/bpf/main.bpf.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/scheds/rust/scx_rustland/src/bpf/main.bpf.c b/scheds/rust/scx_rustland/src/bpf/main.bpf.c index d8647f09..2b827cd9 100644 --- a/scheds/rust/scx_rustland/src/bpf/main.bpf.c +++ b/scheds/rust/scx_rustland/src/bpf/main.bpf.c @@ -450,11 +450,9 @@ void BPF_STRUCT_OPS(rustland_dispatch, s32 cpu, struct task_struct *prev) * task and migrate (if possible); otherwise, dispatch on the * global DSQ. */ - prev_cpu = scx_bpf_task_cpu(p); - dbg_msg("usersched: pid=%d prev_cpu=%d cpu=%d payload=%llu", + dbg_msg("usersched: pid=%d cpu=%d payload=%llu", task.pid, task.cpu, task.payload); - if ((task.cpu != prev_cpu) && - bpf_cpumask_test_cpu(task.cpu, p->cpus_ptr)) + if (bpf_cpumask_test_cpu(task.cpu, p->cpus_ptr)) dispatch_on_cpu(p, task.cpu, 0); else dispatch_global(p, 0); From a7677fdf281ef9ad26fdd0be109936a45e5363bb Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Sun, 31 Dec 2023 16:35:44 +0100 Subject: [PATCH 3/4] scx_rustland: bypass user-space scheduler for short-lived kthreads Bypass the user-space scheduler for kthreads that still have more than half of their runtime budget. As they are likely to release the CPU soon, granting them a substantial priority boost can enhance the overall system performance. In the event that one of these kthreads turns into a CPU hog, it will deplete its runtime budget and therefore it will be scheduled like any other normal task through the user-space scheduler. Signed-off-by: Andrea Righi --- scheds/rust/scx_rustland/src/bpf/main.bpf.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/scheds/rust/scx_rustland/src/bpf/main.bpf.c b/scheds/rust/scx_rustland/src/bpf/main.bpf.c index 2b827cd9..dd806bb8 100644 --- a/scheds/rust/scx_rustland/src/bpf/main.bpf.c +++ b/scheds/rust/scx_rustland/src/bpf/main.bpf.c @@ -314,7 +314,7 @@ static bool is_task_cpu_available(struct task_struct *p, u64 enq_flags) struct task_ctx *tctx; /* - * Always dispatch per-CPU kthread on the same CPU, bypassing the + * Always dispatch per-CPU kthreads on the same CPU, bypassing the * user-space scheduler (in this way we can to prioritize critical * kernel threads that may potentially slow down the entire system if * they are blocked for too long). @@ -322,6 +322,19 @@ static bool is_task_cpu_available(struct task_struct *p, u64 enq_flags) if (is_kthread(p) && p->nr_cpus_allowed == 1) return true; + /* + * Moreover, immediately dispatch kthreads that still have more than + * half of their runtime budget. As they are likely to release the CPU + * soon, granting them a substantial priority boost can enhance the + * overall system performance. + * + * In the event that one of these kthreads turns into a CPU hog, it + * will deplete its runtime budget and therefore it will be scheduled + * like any other normal task. + */ + if (is_kthread(p) && p->scx.slice > slice_ns / 2) + return true; + /* * No scheduling required if it's the last task running. */ From 1cdcb8af6052046481e33b23525168b87ad66816 Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Sun, 31 Dec 2023 17:03:30 +0100 Subject: [PATCH 4/4] scx_rustland: show the CPU where the scheduler is running In the scheduler statistics reported periodically to stdout, instead of showing "pid=0" for the CPU where the scheduler is running (like an idle CPU), show "[self]". This helps to identify exactly where the user-space scheduler is running (when and where it migrates, etc.). Signed-off-by: Andrea Righi --- scheds/rust/scx_rustland/src/main.rs | 45 +++++++++++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) diff --git a/scheds/rust/scx_rustland/src/main.rs b/scheds/rust/scx_rustland/src/main.rs index 91ca5391..5b1b3faf 100644 --- a/scheds/rust/scx_rustland/src/main.rs +++ b/scheds/rust/scx_rustland/src/main.rs @@ -17,6 +17,10 @@ use std::sync::atomic::Ordering; use std::sync::Arc; use std::time::{Duration, SystemTime}; +use std::fs::File; +use std::io::{self, Read}; +use std::path::Path; + use anyhow::bail; use anyhow::Context; use anyhow::Result; @@ -449,6 +453,37 @@ impl<'a> Scheduler<'a> { thread::yield_now(); } + // Get the current CPU where the scheduler is running. + fn get_current_cpu() -> io::Result { + // Open /proc/self/stat file + let path = Path::new("/proc/self/stat"); + let mut file = File::open(path)?; + + // Read the content of the file into a String + let mut content = String::new(); + file.read_to_string(&mut content)?; + + // Split the content into fields using whitespace as the delimiter + let fields: Vec<&str> = content.split_whitespace().collect(); + + // Parse the 39th field as an i32 and return it. + if let Some(field) = fields.get(38) { + if let Ok(value) = field.parse::() { + Ok(value) + } else { + Err(io::Error::new( + io::ErrorKind::InvalidData, + "Unable to parse current CPU information as i32", + )) + } + } else { + Err(io::Error::new( + io::ErrorKind::InvalidData, + "Unable to get current CPU information", + )) + } + } + // Print internal scheduler statistics (fetched from the BPF part) fn print_stats(&mut self) { // Show minimum vruntime (this should be constantly incrementing). @@ -473,9 +508,17 @@ impl<'a> Scheduler<'a> { ); // Show tasks that are currently running. + let sched_cpu = match Self::get_current_cpu() { + Ok(cpu_info) => cpu_info, + Err(_) => -1, + }; info!("Running tasks:"); for cpu in 0..self.nr_cpus_online { - let pid = self.get_cpu_pid(cpu); + let pid = if cpu == sched_cpu { + "[self]".to_string() + } else { + self.get_cpu_pid(cpu).to_string() + }; info!(" cpu={} pid={}", cpu, pid); }