Skip to content

Commit cfb0a78

Browse files
authored
Merge pull request #1622 from JakeHillion/pr1622
chaos: guarantee dispatch of tasks on quiet cpus
2 parents 4d8b509 + 276b7bd commit cfb0a78

File tree

2 files changed

+133
-12
lines changed

2 files changed

+133
-12
lines changed

scheds/rust/scx_chaos/src/bpf/main.bpf.c

Lines changed: 131 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,30 @@
1212

1313
#include <stdbool.h>
1414

15+
const volatile u64 chaos_timer_check_queues_min_ns = 500000;
16+
const volatile u64 chaos_timer_check_queues_max_ns = 2000000;
17+
const volatile u64 chaos_timer_check_queues_slack_ns = 2500000;
18+
1519
const volatile u32 random_delays_freq_frac32 = 1; /* for veristat */
16-
const volatile u32 random_delays_min_ns = 1; /* for veristat */
17-
const volatile u32 random_delays_max_ns = 2; /* for veristat */
20+
const volatile u64 random_delays_min_ns = 1; /* for veristat */
21+
const volatile u64 random_delays_max_ns = 2; /* for veristat */
22+
23+
#define MIN(x, y) ((x) < (y) ? (x) : (y))
24+
#define MAX(x, y) ((x) > (y) ? (x) : (y))
25+
26+
#define MAX_ITERS_IN_DISPATCH 8
27+
28+
enum chaos_timer_callbacks {
29+
CHAOS_TIMER_CHECK_QUEUES,
30+
CHAOS_MAX_TIMERS,
31+
};
32+
33+
struct {
34+
__uint(type, BPF_MAP_TYPE_ARRAY);
35+
__uint(max_entries, CHAOS_MAX_TIMERS);
36+
__type(key, int);
37+
__type(value, struct timer_wrapper);
38+
} chaos_timers SEC(".maps");
1839

1940
struct {
2041
__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
@@ -36,14 +57,16 @@ static __always_inline enum chaos_trait_kind choose_chaos()
3657
return CHAOS_TRAIT_NONE;
3758
}
3859

39-
static __always_inline u32 get_current_cpu_delay_dsq()
60+
static __always_inline u64 get_cpu_delay_dsq(int cpu_idx)
4061
{
62+
if (cpu_idx >= 0)
63+
return CHAOS_DSQ_BASE | cpu_idx;
64+
4165
// use current processor so enqueue runs here next time too
4266
// TODO: this assumes CPU IDs are linear, and probably needs to be mapped
4367
// into linear IDs with topology information passed from userspace
44-
u32 cpu = bpf_get_smp_processor_id();
45-
46-
return CHAOS_DSQ_BASE | cpu;
68+
cpu_idx = bpf_get_smp_processor_id();
69+
return CHAOS_DSQ_BASE | cpu_idx;
4770
}
4871

4972
__weak s32 enqueue_random_delay(struct task_struct *p __arg_trusted, u64 enq_flags,
@@ -56,7 +79,7 @@ __weak s32 enqueue_random_delay(struct task_struct *p __arg_trusted, u64 enq_fla
5679
vtime += rand64 % (random_delays_max_ns - random_delays_min_ns);
5780
}
5881

59-
scx_bpf_dsq_insert_vtime(p, get_current_cpu_delay_dsq(), 0, vtime, enq_flags);
82+
scx_bpf_dsq_insert_vtime(p, get_cpu_delay_dsq(-1), 0, vtime, enq_flags);
6083

6184
return true;
6285
}
@@ -81,11 +104,90 @@ __weak s32 enqueue_chaotic(struct task_struct *p __arg_trusted, u64 enq_flags,
81104
return out;
82105
}
83106

107+
enum dsq_check_result {
108+
CHAOS_DSQ_CHECK_OKAY,
109+
CHAOS_DSQ_CHECK_DISPATCH_NOW,
110+
CHAOS_DSQ_CHECK_DISPATCH_SLOW,
111+
};
112+
113+
/*
114+
* Walk a CPU's delay dsq and kick it if the task should already have been
115+
* scheduled. Use a slack time to avoid preempting for small differences. Return
116+
* the next time a task in this DSQ might need kicking. The next time is
117+
* obviously very racy and may return 0 if the DSQ will all be handled by the
118+
* next dispatch, so should be clamped before being relied on.
119+
*/
120+
__weak u64 check_dsq_times(int cpu_idx)
121+
{
122+
struct task_struct *p;
123+
u64 next_trigger_time = 0;
124+
u64 now = bpf_ktime_get_ns();
125+
int i = 0;
126+
bool has_kicked = false;
127+
128+
bpf_rcu_read_lock();
129+
bpf_for_each(scx_dsq, p, get_cpu_delay_dsq(cpu_idx), 0) {
130+
p = bpf_task_from_pid(p->pid);
131+
if (!p)
132+
break;
133+
134+
if (i++ >= MAX_ITERS_IN_DISPATCH) {
135+
next_trigger_time = p->scx.dsq_vtime;
136+
bpf_task_release(p);
137+
break;
138+
}
139+
140+
if (has_kicked) {
141+
bpf_task_release(p);
142+
continue;
143+
}
144+
145+
if (p->scx.dsq_vtime < now + chaos_timer_check_queues_slack_ns) {
146+
has_kicked = true;
147+
scx_bpf_kick_cpu(cpu_idx, SCX_KICK_PREEMPT);
148+
} else if (p->scx.dsq_vtime < now) {
149+
has_kicked = true;
150+
scx_bpf_kick_cpu(cpu_idx, SCX_KICK_IDLE);
151+
}
152+
153+
bpf_task_release(p);
154+
}
155+
bpf_rcu_read_unlock();
156+
157+
return next_trigger_time;
158+
}
159+
160+
static int chaos_timer_check_queues_callback(void *map, int key, struct timer_wrapper *timerw)
161+
{
162+
u64 started_at = bpf_ktime_get_ns();
163+
u64 next_trigger_time = 0;
164+
u64 this_next_trigger_time;
165+
int cpu_idx;
166+
167+
bpf_for(cpu_idx, 0, nr_cpus) {
168+
this_next_trigger_time = check_dsq_times(cpu_idx);
169+
next_trigger_time = MAX(next_trigger_time, this_next_trigger_time);
170+
}
171+
172+
if (next_trigger_time == 0) {
173+
bpf_timer_start(&timerw->timer, chaos_timer_check_queues_max_ns, 0);
174+
return 0;
175+
}
176+
177+
next_trigger_time = MAX(next_trigger_time, started_at + chaos_timer_check_queues_min_ns);
178+
next_trigger_time = MIN(next_trigger_time, started_at + chaos_timer_check_queues_max_ns);
179+
180+
bpf_timer_start(&timerw->timer, next_trigger_time, BPF_F_TIMER_ABS);
181+
return 0;
182+
}
183+
184+
84185
s32 BPF_STRUCT_OPS_SLEEPABLE(chaos_init)
85186
{
187+
struct timer_wrapper *timerw;
86188
struct llc_ctx *llcx;
87189
struct cpu_ctx *cpuc;
88-
int i, ret;
190+
int timer_id, ret, i;
89191

90192
bpf_for(i, 0, nr_cpus) {
91193
if (!(cpuc = lookup_cpu_ctx(i)) ||
@@ -97,6 +199,25 @@ s32 BPF_STRUCT_OPS_SLEEPABLE(chaos_init)
97199
return ret;
98200
}
99201

202+
timer_id = CHAOS_TIMER_CHECK_QUEUES;
203+
timerw = bpf_map_lookup_elem(&chaos_timers, &timer_id);
204+
if (!timerw)
205+
return -1;
206+
207+
timerw->key = timer_id;
208+
209+
ret = bpf_timer_init(&timerw->timer, &chaos_timers, CLOCK_BOOTTIME);
210+
if (ret)
211+
return -1;
212+
213+
ret = bpf_timer_set_callback(&timerw->timer, &chaos_timer_check_queues_callback);
214+
if (ret)
215+
return -1;
216+
217+
ret = bpf_timer_start(&timerw->timer, chaos_timer_check_queues_max_ns, 0);
218+
if (ret)
219+
return -1;
220+
100221
return p2dq_init_impl();
101222
}
102223

@@ -130,8 +251,8 @@ void BPF_STRUCT_OPS(chaos_dispatch, s32 cpu, struct task_struct *prev)
130251
u64 now = bpf_ktime_get_ns();
131252

132253
int i = 0;
133-
bpf_for_each(scx_dsq, p, get_current_cpu_delay_dsq(), 0) {
134-
if (++i >= 8)
254+
bpf_for_each(scx_dsq, p, get_cpu_delay_dsq(-1), 0) {
255+
if (i++ >= MAX_ITERS_IN_DISPATCH)
135256
break; // the verifier can't handle this loop, so limit it
136257

137258
p = bpf_task_from_pid(p->pid);

scheds/rust/scx_chaos/src/lib.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -127,8 +127,8 @@ impl<'a> TryFrom<Builder<'a>> for Pin<Box<Scheduler>> {
127127
} => {
128128
open_skel.maps.rodata_data.random_delays_freq_frac32 =
129129
(frequency * 2_f64.powf(32_f64)) as u32;
130-
open_skel.maps.rodata_data.random_delays_min_ns = (min_us * 1000) as u32;
131-
open_skel.maps.rodata_data.random_delays_max_ns = (max_us * 1000) as u32;
130+
open_skel.maps.rodata_data.random_delays_min_ns = min_us * 1000;
131+
open_skel.maps.rodata_data.random_delays_max_ns = max_us * 1000;
132132
}
133133
}
134134
}

0 commit comments

Comments
 (0)