Skip to content

Commit

Permalink
Merge pull request #702 from multics69/lavd-dyn-pc-thr
Browse files Browse the repository at this point in the history
scx_lavd: more accurately determine the performance criticality threshold
  • Loading branch information
multics69 authored Sep 28, 2024
2 parents c19b391 + cd7846f commit e8ebc09
Show file tree
Hide file tree
Showing 4 changed files with 144 additions and 12 deletions.
7 changes: 6 additions & 1 deletion scheds/rust/scx_lavd/src/bpf/intf.h
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,10 @@ struct sys_stat {
volatile u32 max_lat_cri; /* maximum latency criticality (LC) */
volatile u32 thr_lat_cri; /* latency criticality threshold for kicking */

volatile u32 min_perf_cri; /* minimum performance criticality */
volatile u32 avg_perf_cri; /* average performance criticality */
volatile u32 max_perf_cri; /* maximum performance criticality */
volatile u32 thr_perf_cri; /* performance criticality threshold */

volatile u32 nr_violation; /* number of utilization violation */
volatile u32 nr_active; /* number of active cores */
Expand Down Expand Up @@ -187,6 +190,8 @@ struct cpu_ctx {
* Information used to keep track of performance criticality
*/
volatile u64 sum_perf_cri; /* sum of performance criticality */
volatile u64 min_perf_cri; /* mininum performance criticality */
volatile u64 max_perf_cri; /* maximum performance criticality */

/*
* Information of a current running task for preemption
Expand Down Expand Up @@ -289,7 +294,7 @@ struct task_ctx_x {
u16 static_prio; /* nice priority */
u32 cpu_id; /* where a task ran */
u64 cpu_util; /* cpu utilization in [0..100] */
u32 avg_perf_cri; /* average performance criticality */
u32 thr_perf_cri; /* performance criticality threshold */
u32 avg_lat_cri; /* average latency criticality */
u32 nr_active; /* number of active cores */
u32 cpuperf_cur; /* CPU's current performance target */
Expand Down
139 changes: 133 additions & 6 deletions scheds/rust/scx_lavd/src/bpf/main.bpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,16 @@ static u64 cur_logical_clk;
*/
static u64 cur_svc_time;

/*
* Big core's compute ratio among currently active cores
*/
static u32 cur_big_core_ratio;

/*
* Big core's compute ratio when all cores are active
*/
static u32 default_big_core_ratio;

/*
* Options
*/
Expand Down Expand Up @@ -438,7 +448,7 @@ static bool is_lat_cri(struct task_ctx *taskc, struct sys_stat *stat_cur)
static bool is_perf_cri(struct task_ctx *taskc, struct sys_stat *stat_cur)
{
if (READ_ONCE(taskc->on_big) && READ_ONCE(taskc->on_little))
return taskc->perf_cri >= stat_cur->avg_perf_cri;
return taskc->perf_cri >= stat_cur->thr_perf_cri;
return READ_ONCE(taskc->on_big);
}

Expand Down Expand Up @@ -475,7 +485,7 @@ int submit_task_ctx(struct task_struct *p, struct task_ctx *taskc, u32 cpu_id)
m->taskc_x.cpu_util = cpuc->util / 10;
m->taskc_x.cpu_id = cpu_id;
m->taskc_x.avg_lat_cri = stat_cur->avg_lat_cri;
m->taskc_x.avg_perf_cri = stat_cur->avg_perf_cri;
m->taskc_x.thr_perf_cri = stat_cur->thr_perf_cri;
m->taskc_x.nr_active = stat_cur->nr_active;
m->taskc_x.cpuperf_cur = cpuc->cpuperf_cur;

Expand Down Expand Up @@ -612,8 +622,11 @@ struct sys_stat_ctx {
u32 nr_big;
u32 nr_pc_on_big;
u32 nr_lc_on_big;
u64 min_perf_cri;
u64 avg_perf_cri;
u64 max_perf_cri;
u64 sum_perf_cri;
u32 avg_perf_cri;
u32 thr_perf_cri;
u64 new_util;
u32 nr_violation;
};
Expand All @@ -624,6 +637,7 @@ static void init_sys_stat_ctx(struct sys_stat_ctx *c)

c->stat_cur = get_sys_stat_cur();
c->stat_next = get_sys_stat_next();
c->min_perf_cri = 1000;
c->now = bpf_ktime_get_ns();
c->duration = c->now - c->stat_cur->last_update_clk;
c->stat_next->last_update_clk = c->now;
Expand Down Expand Up @@ -692,6 +706,14 @@ static void collect_sys_stat(struct sys_stat_ctx *c)
/*
* Accumulate task's performance criticlity information.
*/
if (cpuc->min_perf_cri < c->min_perf_cri)
c->min_perf_cri = cpuc->min_perf_cri;
cpuc->min_perf_cri = 1000;

if (cpuc->max_perf_cri > c->max_perf_cri)
c->max_perf_cri = cpuc->max_perf_cri;
cpuc->max_perf_cri = 0;

c->sum_perf_cri += cpuc->sum_perf_cri;
cpuc->sum_perf_cri = 0;

Expand Down Expand Up @@ -761,6 +783,9 @@ static void calc_sys_stat(struct sys_stat_ctx *c)
*/
c->max_lat_cri = c->stat_cur->max_lat_cri;
c->avg_lat_cri = c->stat_cur->avg_lat_cri;

c->min_perf_cri = c->stat_cur->min_perf_cri;
c->max_perf_cri = c->stat_cur->max_perf_cri;
c->avg_perf_cri = c->stat_cur->avg_perf_cri;
}
else {
Expand Down Expand Up @@ -791,8 +816,15 @@ static void update_sys_stat_next(struct sys_stat_ctx *c)
calc_avg32(stat_cur->avg_lat_cri, c->avg_lat_cri);
stat_next->thr_lat_cri = stat_next->max_lat_cri -
((stat_next->max_lat_cri - stat_next->avg_lat_cri) >> 1);

stat_next->min_perf_cri =
calc_avg32(stat_cur->min_perf_cri, c->min_perf_cri);
stat_next->avg_perf_cri =
calc_avg32(stat_cur->avg_perf_cri, c->avg_perf_cri);
stat_next->max_perf_cri =
calc_avg32(stat_cur->max_perf_cri, c->max_perf_cri);
stat_next->thr_perf_cri =
c->stat_cur->thr_perf_cri; /* will be updated later */

stat_next->nr_violation =
calc_avg32(stat_cur->nr_violation, c->nr_violation);
Expand Down Expand Up @@ -900,6 +932,7 @@ static void do_core_compaction(void)
struct cpu_ctx *cpuc;
struct bpf_cpumask *active, *ovrflw;
int nr_cpus, nr_active, nr_active_old, cpu, i;
u32 sum_capacity = 0, big_capacity = 0;
bool clear;
const volatile u16 *cpu_order;

Expand Down Expand Up @@ -957,6 +990,13 @@ static void do_core_compaction(void)
bpf_cpumask_clear_cpu(cpu, active);
}
scx_bpf_kick_cpu(cpu, SCX_KICK_IDLE);

/*
* Calculate big capacity ratio among active cores.
*/
sum_capacity += cpuc->capacity;
if (cpuc->big_core)
big_capacity += cpuc->capacity;
}
else {
if (i < nr_active_old) {
Expand Down Expand Up @@ -986,6 +1026,7 @@ static void do_core_compaction(void)
}
}

cur_big_core_ratio = (1000 * big_capacity) / sum_capacity;
stat_cur->nr_active = nr_active;

unlock_out:
Expand Down Expand Up @@ -1104,6 +1145,83 @@ static int do_autopilot(void)
return do_set_power_profile(LAVD_PM_PERFORMANCE, stat_cur->util);
}

static void update_thr_perf_cri(void)
{
struct sys_stat *stat_cur = get_sys_stat_cur();
u32 little_core_ratio, delta, diff, thr;

if (no_core_compaction || !have_little_core)
cur_big_core_ratio = default_big_core_ratio;

/*
* If all active cores are big, all tasks should run on the big cores.
*/
if (cur_big_core_ratio == 1000) {
stat_cur->thr_perf_cri = 0;
return;
}

/*
* We approximate the distribution of performance criticality of tasks
* using min, avg, and max performance criticality of a given period.
*
* min_perf_cri
* | avg_perf_cri
* | | max_perf_cri
* | | |
* <--------><----------------------->
*
* The half of compute capacity should be assigned to the below average
* tasks (< avg_perf_cri), and the other half should assigned to the
* above average tasks (>= avg_perf_cri).
*
* <------------><------------------->
* | | |
* | | 1000
* | 1000 - big_core_ratio (i.e., little_core_ratio)
* 0
*/
little_core_ratio = 1000 - cur_big_core_ratio;
if (little_core_ratio < 500) {
/*
* min_perf_cri
* | avg_perf_cri
* | | max_perf_cri
* | | |
* <--------><----------------------->
*
* <-///-><-------------------------->
* | | |
* | | 1000
* | little_core_ratio
* 0
*/
delta = stat_cur->avg_perf_cri - stat_cur->min_perf_cri;
diff = (delta * little_core_ratio) / 1000;
thr = diff + stat_cur->min_perf_cri;
}
else {
/*
* min_perf_cri
* | avg_perf_cri
* | | max_perf_cri
* | | |
* <--------><----------------------->
*
* <---------------------><-////////->
* | | |
* | | 1000
* | little_core_ratio
* 0
*/
delta = stat_cur->max_perf_cri - stat_cur->avg_perf_cri;
diff = (delta * cur_big_core_ratio) / 1000;
thr = stat_cur->max_perf_cri - diff;
}

stat_cur->thr_perf_cri = thr;
}

static void update_sys_stat(void)
{
do_update_sys_stat();
Expand All @@ -1114,6 +1232,8 @@ static void update_sys_stat(void)
if (!no_core_compaction)
do_core_compaction();

update_thr_perf_cri();

if (reinit_cpumask_for_performance) {
reinit_cpumask_for_performance = false;
reinit_active_cpumask_for_performance();
Expand Down Expand Up @@ -1455,6 +1575,10 @@ static void update_stat_for_running(struct task_struct *p,
taskc->wakeup_ft = 0;

taskc->perf_cri = perf_cri;
if (cpuc->max_perf_cri < taskc->perf_cri)
cpuc->max_perf_cri = taskc->perf_cri;
if (cpuc->min_perf_cri > taskc->perf_cri)
cpuc->min_perf_cri = taskc->perf_cri;
cpuc->sum_perf_cri += taskc->perf_cri;

/*
Expand Down Expand Up @@ -2554,7 +2678,7 @@ static int calc_cpuperf_target(struct sys_stat *stat_cur,
* current CPU utilization (cpuc->util) and 2) the current task's
* performance criticality (taskc->perf_cri) compared to the
* system-wide average performance criticality
* (stat_cur->avg_perf_cri).
* (stat_cur->thr_perf_cri).
*
* When a current CPU utilization is 85% and the current task's
* performance criticality is the same as the system-wide average
Expand All @@ -2567,7 +2691,7 @@ static int calc_cpuperf_target(struct sys_stat *stat_cur,
* high when a non-performance-critical task is running (i.e.,
* deboosting CPU frequency).
*/
max_load = stat_cur->avg_perf_cri * LAVD_CPU_UTIL_MAX_FOR_CPUPERF;
max_load = stat_cur->thr_perf_cri * LAVD_CPU_UTIL_MAX_FOR_CPUPERF;
cpu_load = taskc->perf_cri * cpuc->util;
cpuperf_target = (cpu_load * SCX_CPUPERF_ONE) / max_load;
cpuperf_target = min(cpuperf_target, SCX_CPUPERF_ONE);
Expand Down Expand Up @@ -3215,7 +3339,7 @@ static s32 init_per_cpu_ctx(u64 now)
struct cpdom_ctx *cpdomc;
int cpu, i, j, err = 0;
u64 cpdom_id;
u32 sum_capacity = 0, avg_capacity;
u32 sum_capacity = 0, avg_capacity, big_capacity = 0;
u16 turbo_cap;

bpf_rcu_read_lock();
Expand Down Expand Up @@ -3265,6 +3389,7 @@ static s32 init_per_cpu_ctx(u64 now)
cpuc->capacity = get_cpuperf_cap(cpu);
cpuc->offline_clk = now;
cpuc->cpdom_poll_pos = cpu % LAVD_CPDOM_MAX_NR;
cpuc->min_perf_cri = 1000;

sum_capacity += cpuc->capacity;
}
Expand All @@ -3289,6 +3414,7 @@ static s32 init_per_cpu_ctx(u64 now)
cpuc->big_core = cpuc->capacity >= avg_capacity;
if (cpuc->big_core) {
nr_cpus_big++;
big_capacity += cpuc->capacity;
bpf_cpumask_set_cpu(cpu, big);
/*
* Initially, all big cores are in the active domain
Expand All @@ -3309,6 +3435,7 @@ static s32 init_per_cpu_ctx(u64 now)
debugln("CPU %d is a turbo core.", cpu);
}
}
default_big_core_ratio = (1000 * big_capacity) / sum_capacity;

/*
* Initialize compute domain id.
Expand Down
2 changes: 1 addition & 1 deletion scheds/rust/scx_lavd/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -637,7 +637,7 @@ impl<'a> Scheduler<'a> {
wait_freq: tc.wait_freq,
wake_freq: tc.wake_freq,
perf_cri: tc.perf_cri,
avg_perf_cri: tx.avg_perf_cri,
thr_perf_cri: tx.thr_perf_cri,
cpuperf_cur: tx.cpuperf_cur,
cpu_util: tx.cpu_util,
nr_active: tx.nr_active,
Expand Down
8 changes: 4 additions & 4 deletions scheds/rust/scx_lavd/src/stats.rs
Original file line number Diff line number Diff line change
Expand Up @@ -165,8 +165,8 @@ pub struct SchedSample {
pub wake_freq: u64,
#[stat(desc = "Performance criticality of this task")]
pub perf_cri: u32,
#[stat(desc = "Average performance criticality in a system")]
pub avg_perf_cri: u32,
#[stat(desc = "Performance criticality threshold")]
pub thr_perf_cri: u32,
#[stat(desc = "Target performance level of this CPU")]
pub cpuperf_cur: u32,
#[stat(desc = "CPU utilization of this particular CPU")]
Expand Down Expand Up @@ -205,7 +205,7 @@ impl SchedSample {
"WAIT_FREQ",
"WAKE_FREQ",
"PERF_CRI",
"AVG_PC",
"THR_PC",
"CPUFREQ",
"CPU_UTIL",
"NR_ACT",
Expand Down Expand Up @@ -246,7 +246,7 @@ impl SchedSample {
self.wait_freq,
self.wake_freq,
self.perf_cri,
self.avg_perf_cri,
self.thr_perf_cri,
self.cpuperf_cur,
self.cpu_util,
self.nr_active,
Expand Down

0 comments on commit e8ebc09

Please sign in to comment.