12
12
13
13
#include <stdbool.h>
14
14
15
+ const volatile u64 chaos_timer_check_queues_min_ns = 500000 ;
16
+ const volatile u64 chaos_timer_check_queues_max_ns = 2000000 ;
17
+ const volatile u64 chaos_timer_check_queues_slack_ns = 2500000 ;
18
+
15
19
const volatile u32 random_delays_freq_frac32 = 1 ; /* for veristat */
16
- const volatile u32 random_delays_min_ns = 1 ; /* for veristat */
17
- const volatile u32 random_delays_max_ns = 2 ; /* for veristat */
20
+ const volatile u64 random_delays_min_ns = 1 ; /* for veristat */
21
+ const volatile u64 random_delays_max_ns = 2 ; /* for veristat */
22
+
23
+ #define MIN (x , y ) ((x) < (y) ? (x) : (y))
24
+ #define MAX (x , y ) ((x) > (y) ? (x) : (y))
25
+
26
+ #define MAX_ITERS_IN_DISPATCH 8
27
+
28
+ enum chaos_timer_callbacks {
29
+ CHAOS_TIMER_CHECK_QUEUES ,
30
+ CHAOS_MAX_TIMERS ,
31
+ };
32
+
33
+ struct {
34
+ __uint (type , BPF_MAP_TYPE_ARRAY );
35
+ __uint (max_entries , CHAOS_MAX_TIMERS );
36
+ __type (key , int );
37
+ __type (value , struct timer_wrapper );
38
+ } chaos_timers SEC (".maps" );
18
39
19
40
struct {
20
41
__uint (type , BPF_MAP_TYPE_TASK_STORAGE );
@@ -36,14 +57,16 @@ static __always_inline enum chaos_trait_kind choose_chaos()
36
57
return CHAOS_TRAIT_NONE ;
37
58
}
38
59
39
- static __always_inline u32 get_current_cpu_delay_dsq ( )
60
+ static __always_inline u64 get_cpu_delay_dsq ( int cpu_idx )
40
61
{
62
+ if (cpu_idx >= 0 )
63
+ return CHAOS_DSQ_BASE | cpu_idx ;
64
+
41
65
// use current processor so enqueue runs here next time too
42
66
// TODO: this assumes CPU IDs are linear, and probably needs to be mapped
43
67
// into linear IDs with topology information passed from userspace
44
- u32 cpu = bpf_get_smp_processor_id ();
45
-
46
- return CHAOS_DSQ_BASE | cpu ;
68
+ cpu_idx = bpf_get_smp_processor_id ();
69
+ return CHAOS_DSQ_BASE | cpu_idx ;
47
70
}
48
71
49
72
__weak s32 enqueue_random_delay (struct task_struct * p __arg_trusted , u64 enq_flags ,
@@ -56,7 +79,7 @@ __weak s32 enqueue_random_delay(struct task_struct *p __arg_trusted, u64 enq_fla
56
79
vtime += rand64 % (random_delays_max_ns - random_delays_min_ns );
57
80
}
58
81
59
- scx_bpf_dsq_insert_vtime (p , get_current_cpu_delay_dsq ( ), 0 , vtime , enq_flags );
82
+ scx_bpf_dsq_insert_vtime (p , get_cpu_delay_dsq ( -1 ), 0 , vtime , enq_flags );
60
83
61
84
return true;
62
85
}
@@ -81,11 +104,90 @@ __weak s32 enqueue_chaotic(struct task_struct *p __arg_trusted, u64 enq_flags,
81
104
return out ;
82
105
}
83
106
107
+ enum dsq_check_result {
108
+ CHAOS_DSQ_CHECK_OKAY ,
109
+ CHAOS_DSQ_CHECK_DISPATCH_NOW ,
110
+ CHAOS_DSQ_CHECK_DISPATCH_SLOW ,
111
+ };
112
+
113
+ /*
114
+ * Walk a CPU's delay dsq and kick it if the task should already have been
115
+ * scheduled. Use a slack time to avoid preempting for small differences. Return
116
+ * the next time a task in this DSQ might need kicking. The next time is
117
+ * obviously very racy and may return 0 if the DSQ will all be handled by the
118
+ * next dispatch, so should be clamped before being relied on.
119
+ */
120
+ __weak u64 check_dsq_times (int cpu_idx )
121
+ {
122
+ struct task_struct * p ;
123
+ u64 next_trigger_time = 0 ;
124
+ u64 now = bpf_ktime_get_ns ();
125
+ int i = 0 ;
126
+ bool has_kicked = false;
127
+
128
+ bpf_rcu_read_lock ();
129
+ bpf_for_each (scx_dsq , p , get_cpu_delay_dsq (cpu_idx ), 0 ) {
130
+ p = bpf_task_from_pid (p -> pid );
131
+ if (!p )
132
+ break ;
133
+
134
+ if (i ++ >= MAX_ITERS_IN_DISPATCH ) {
135
+ next_trigger_time = p -> scx .dsq_vtime ;
136
+ bpf_task_release (p );
137
+ break ;
138
+ }
139
+
140
+ if (has_kicked ) {
141
+ bpf_task_release (p );
142
+ continue ;
143
+ }
144
+
145
+ if (p -> scx .dsq_vtime < now + chaos_timer_check_queues_slack_ns ) {
146
+ has_kicked = true;
147
+ scx_bpf_kick_cpu (cpu_idx , SCX_KICK_PREEMPT );
148
+ } else if (p -> scx .dsq_vtime < now ) {
149
+ has_kicked = true;
150
+ scx_bpf_kick_cpu (cpu_idx , SCX_KICK_IDLE );
151
+ }
152
+
153
+ bpf_task_release (p );
154
+ }
155
+ bpf_rcu_read_unlock ();
156
+
157
+ return next_trigger_time ;
158
+ }
159
+
160
+ static int chaos_timer_check_queues_callback (void * map , int key , struct timer_wrapper * timerw )
161
+ {
162
+ u64 started_at = bpf_ktime_get_ns ();
163
+ u64 next_trigger_time = 0 ;
164
+ u64 this_next_trigger_time ;
165
+ int cpu_idx ;
166
+
167
+ bpf_for (cpu_idx , 0 , nr_cpus ) {
168
+ this_next_trigger_time = check_dsq_times (cpu_idx );
169
+ next_trigger_time = MAX (next_trigger_time , this_next_trigger_time );
170
+ }
171
+
172
+ if (next_trigger_time == 0 ) {
173
+ bpf_timer_start (& timerw -> timer , chaos_timer_check_queues_max_ns , 0 );
174
+ return 0 ;
175
+ }
176
+
177
+ next_trigger_time = MAX (next_trigger_time , started_at + chaos_timer_check_queues_min_ns );
178
+ next_trigger_time = MIN (next_trigger_time , started_at + chaos_timer_check_queues_max_ns );
179
+
180
+ bpf_timer_start (& timerw -> timer , next_trigger_time , BPF_F_TIMER_ABS );
181
+ return 0 ;
182
+ }
183
+
184
+
84
185
s32 BPF_STRUCT_OPS_SLEEPABLE (chaos_init )
85
186
{
187
+ struct timer_wrapper * timerw ;
86
188
struct llc_ctx * llcx ;
87
189
struct cpu_ctx * cpuc ;
88
- int i , ret ;
190
+ int timer_id , ret , i ;
89
191
90
192
bpf_for (i , 0 , nr_cpus ) {
91
193
if (!(cpuc = lookup_cpu_ctx (i )) ||
@@ -97,6 +199,25 @@ s32 BPF_STRUCT_OPS_SLEEPABLE(chaos_init)
97
199
return ret ;
98
200
}
99
201
202
+ timer_id = CHAOS_TIMER_CHECK_QUEUES ;
203
+ timerw = bpf_map_lookup_elem (& chaos_timers , & timer_id );
204
+ if (!timerw )
205
+ return -1 ;
206
+
207
+ timerw -> key = timer_id ;
208
+
209
+ ret = bpf_timer_init (& timerw -> timer , & chaos_timers , CLOCK_BOOTTIME );
210
+ if (ret )
211
+ return -1 ;
212
+
213
+ ret = bpf_timer_set_callback (& timerw -> timer , & chaos_timer_check_queues_callback );
214
+ if (ret )
215
+ return -1 ;
216
+
217
+ ret = bpf_timer_start (& timerw -> timer , chaos_timer_check_queues_max_ns , 0 );
218
+ if (ret )
219
+ return -1 ;
220
+
100
221
return p2dq_init_impl ();
101
222
}
102
223
@@ -130,8 +251,8 @@ void BPF_STRUCT_OPS(chaos_dispatch, s32 cpu, struct task_struct *prev)
130
251
u64 now = bpf_ktime_get_ns ();
131
252
132
253
int i = 0 ;
133
- bpf_for_each (scx_dsq , p , get_current_cpu_delay_dsq ( ), 0 ) {
134
- if (++ i >= 8 )
254
+ bpf_for_each (scx_dsq , p , get_cpu_delay_dsq ( -1 ), 0 ) {
255
+ if (i ++ >= MAX_ITERS_IN_DISPATCH )
135
256
break ; // the verifier can't handle this loop, so limit it
136
257
137
258
p = bpf_task_from_pid (p -> pid );
0 commit comments