Skip to content

Commit 9216477

Browse files
LorenzoBianconiborkmann
authored andcommitted
bpf: cpumap: Add the possibility to attach an eBPF program to cpumap
Introduce the capability to attach an eBPF program to cpumap entries. The idea behind this feature is to add the possibility to define on which CPU run the eBPF program if the underlying hw does not support RSS. Current supported verdicts are XDP_DROP and XDP_PASS. This patch has been tested on Marvell ESPRESSObin using xdp_redirect_cpu sample available in the kernel tree to identify possible performance regressions. Results show there are no observable differences in packet-per-second: $./xdp_redirect_cpu --progname xdp_cpu_map0 --dev eth0 --cpu 1 rx: 354.8 Kpps rx: 356.0 Kpps rx: 356.8 Kpps rx: 356.3 Kpps rx: 356.6 Kpps rx: 356.6 Kpps rx: 356.7 Kpps rx: 355.8 Kpps rx: 356.8 Kpps rx: 356.8 Kpps Co-developed-by: Jesper Dangaard Brouer <[email protected]> Signed-off-by: Jesper Dangaard Brouer <[email protected]> Signed-off-by: Lorenzo Bianconi <[email protected]> Signed-off-by: Daniel Borkmann <[email protected]> Acked-by: Jesper Dangaard Brouer <[email protected]> Link: https://lore.kernel.org/bpf/5c9febdf903d810b3415732e5cd98491d7d9067a.1594734381.git.lorenzo@kernel.org
1 parent 644bfe5 commit 9216477

File tree

7 files changed

+148
-17
lines changed

7 files changed

+148
-17
lines changed

include/linux/bpf.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1272,6 +1272,7 @@ struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key);
12721272
void __cpu_map_flush(void);
12731273
int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
12741274
struct net_device *dev_rx);
1275+
bool cpu_map_prog_allowed(struct bpf_map *map);
12751276

12761277
/* Return map's numa specified by userspace */
12771278
static inline int bpf_map_attr_numa_node(const union bpf_attr *attr)
@@ -1432,6 +1433,11 @@ static inline int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu,
14321433
return 0;
14331434
}
14341435

1436+
static inline bool cpu_map_prog_allowed(struct bpf_map *map)
1437+
{
1438+
return false;
1439+
}
1440+
14351441
static inline struct bpf_prog *bpf_prog_get_type_path(const char *name,
14361442
enum bpf_prog_type type)
14371443
{

include/net/xdp.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,11 @@ struct xdp_frame {
9898
struct net_device *dev_rx; /* used by cpumap */
9999
};
100100

101+
struct xdp_cpumap_stats {
102+
unsigned int pass;
103+
unsigned int drop;
104+
};
105+
101106
/* Clear kernel pointers in xdp_frame */
102107
static inline void xdp_scrub_frame(struct xdp_frame *frame)
103108
{

include/trace/events/xdp.h

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -177,9 +177,9 @@ DEFINE_EVENT(xdp_redirect_template, xdp_redirect_map_err,
177177
TRACE_EVENT(xdp_cpumap_kthread,
178178

179179
TP_PROTO(int map_id, unsigned int processed, unsigned int drops,
180-
int sched),
180+
int sched, struct xdp_cpumap_stats *xdp_stats),
181181

182-
TP_ARGS(map_id, processed, drops, sched),
182+
TP_ARGS(map_id, processed, drops, sched, xdp_stats),
183183

184184
TP_STRUCT__entry(
185185
__field(int, map_id)
@@ -188,6 +188,8 @@ TRACE_EVENT(xdp_cpumap_kthread,
188188
__field(unsigned int, drops)
189189
__field(unsigned int, processed)
190190
__field(int, sched)
191+
__field(unsigned int, xdp_pass)
192+
__field(unsigned int, xdp_drop)
191193
),
192194

193195
TP_fast_assign(
@@ -197,16 +199,20 @@ TRACE_EVENT(xdp_cpumap_kthread,
197199
__entry->drops = drops;
198200
__entry->processed = processed;
199201
__entry->sched = sched;
202+
__entry->xdp_pass = xdp_stats->pass;
203+
__entry->xdp_drop = xdp_stats->drop;
200204
),
201205

202206
TP_printk("kthread"
203207
" cpu=%d map_id=%d action=%s"
204208
" processed=%u drops=%u"
205-
" sched=%d",
209+
" sched=%d"
210+
" xdp_pass=%u xdp_drop=%u",
206211
__entry->cpu, __entry->map_id,
207212
__print_symbolic(__entry->act, __XDP_ACT_SYM_TAB),
208213
__entry->processed, __entry->drops,
209-
__entry->sched)
214+
__entry->sched,
215+
__entry->xdp_pass, __entry->xdp_drop)
210216
);
211217

212218
TRACE_EVENT(xdp_cpumap_enqueue,

include/uapi/linux/bpf.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,7 @@ enum bpf_attach_type {
227227
BPF_CGROUP_INET6_GETSOCKNAME,
228228
BPF_XDP_DEVMAP,
229229
BPF_CGROUP_INET_SOCK_RELEASE,
230+
BPF_XDP_CPUMAP,
230231
__MAX_BPF_ATTACH_TYPE
231232
};
232233

@@ -3856,6 +3857,10 @@ struct bpf_devmap_val {
38563857
*/
38573858
struct bpf_cpumap_val {
38583859
__u32 qsize; /* queue size to remote target CPU */
3860+
union {
3861+
int fd; /* prog fd on map write */
3862+
__u32 id; /* prog id on map read */
3863+
} bpf_prog;
38593864
};
38603865

38613866
enum sk_action {

kernel/bpf/cpumap.c

Lines changed: 108 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ struct bpf_cpu_map_entry {
6363
struct task_struct *kthread;
6464

6565
struct bpf_cpumap_val value;
66+
struct bpf_prog *prog;
6667

6768
atomic_t refcnt; /* Control when this struct can be free'ed */
6869
struct rcu_head rcu;
@@ -82,6 +83,7 @@ static int bq_flush_to_queue(struct xdp_bulk_queue *bq);
8283

8384
static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
8485
{
86+
u32 value_size = attr->value_size;
8587
struct bpf_cpu_map *cmap;
8688
int err = -ENOMEM;
8789
u64 cost;
@@ -92,7 +94,9 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
9294

9395
/* check sanity of attributes */
9496
if (attr->max_entries == 0 || attr->key_size != 4 ||
95-
attr->value_size != 4 || attr->map_flags & ~BPF_F_NUMA_NODE)
97+
(value_size != offsetofend(struct bpf_cpumap_val, qsize) &&
98+
value_size != offsetofend(struct bpf_cpumap_val, bpf_prog.fd)) ||
99+
attr->map_flags & ~BPF_F_NUMA_NODE)
96100
return ERR_PTR(-EINVAL);
97101

98102
cmap = kzalloc(sizeof(*cmap), GFP_USER);
@@ -214,6 +218,8 @@ static void __cpu_map_ring_cleanup(struct ptr_ring *ring)
214218
static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu)
215219
{
216220
if (atomic_dec_and_test(&rcpu->refcnt)) {
221+
if (rcpu->prog)
222+
bpf_prog_put(rcpu->prog);
217223
/* The queue should be empty at this point */
218224
__cpu_map_ring_cleanup(rcpu->queue);
219225
ptr_ring_cleanup(rcpu->queue, NULL);
@@ -222,6 +228,62 @@ static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu)
222228
}
223229
}
224230

231+
static int cpu_map_bpf_prog_run_xdp(struct bpf_cpu_map_entry *rcpu,
232+
void **frames, int n,
233+
struct xdp_cpumap_stats *stats)
234+
{
235+
struct xdp_rxq_info rxq;
236+
struct xdp_buff xdp;
237+
int i, nframes = 0;
238+
239+
if (!rcpu->prog)
240+
return n;
241+
242+
rcu_read_lock();
243+
244+
xdp_set_return_frame_no_direct();
245+
xdp.rxq = &rxq;
246+
247+
for (i = 0; i < n; i++) {
248+
struct xdp_frame *xdpf = frames[i];
249+
u32 act;
250+
int err;
251+
252+
rxq.dev = xdpf->dev_rx;
253+
rxq.mem = xdpf->mem;
254+
/* TODO: report queue_index to xdp_rxq_info */
255+
256+
xdp_convert_frame_to_buff(xdpf, &xdp);
257+
258+
act = bpf_prog_run_xdp(rcpu->prog, &xdp);
259+
switch (act) {
260+
case XDP_PASS:
261+
err = xdp_update_frame_from_buff(&xdp, xdpf);
262+
if (err < 0) {
263+
xdp_return_frame(xdpf);
264+
stats->drop++;
265+
} else {
266+
frames[nframes++] = xdpf;
267+
stats->pass++;
268+
}
269+
break;
270+
default:
271+
bpf_warn_invalid_xdp_action(act);
272+
/* fallthrough */
273+
case XDP_DROP:
274+
xdp_return_frame(xdpf);
275+
stats->drop++;
276+
break;
277+
}
278+
}
279+
280+
xdp_clear_return_frame_no_direct();
281+
282+
rcu_read_unlock();
283+
284+
return nframes;
285+
}
286+
225287
#define CPUMAP_BATCH 8
226288

227289
static int cpu_map_kthread_run(void *data)
@@ -236,11 +298,12 @@ static int cpu_map_kthread_run(void *data)
236298
* kthread_stop signal until queue is empty.
237299
*/
238300
while (!kthread_should_stop() || !__ptr_ring_empty(rcpu->queue)) {
301+
struct xdp_cpumap_stats stats = {}; /* zero stats */
302+
gfp_t gfp = __GFP_ZERO | GFP_ATOMIC;
239303
unsigned int drops = 0, sched = 0;
240304
void *frames[CPUMAP_BATCH];
241305
void *skbs[CPUMAP_BATCH];
242-
gfp_t gfp = __GFP_ZERO | GFP_ATOMIC;
243-
int i, n, m;
306+
int i, n, m, nframes;
244307

245308
/* Release CPU reschedule checks */
246309
if (__ptr_ring_empty(rcpu->queue)) {
@@ -261,8 +324,8 @@ static int cpu_map_kthread_run(void *data)
261324
* kthread CPU pinned. Lockless access to ptr_ring
262325
* consume side valid as no-resize allowed of queue.
263326
*/
264-
n = __ptr_ring_consume_batched(rcpu->queue, frames, CPUMAP_BATCH);
265-
327+
n = __ptr_ring_consume_batched(rcpu->queue, frames,
328+
CPUMAP_BATCH);
266329
for (i = 0; i < n; i++) {
267330
void *f = frames[i];
268331
struct page *page = virt_to_page(f);
@@ -274,15 +337,19 @@ static int cpu_map_kthread_run(void *data)
274337
prefetchw(page);
275338
}
276339

277-
m = kmem_cache_alloc_bulk(skbuff_head_cache, gfp, n, skbs);
278-
if (unlikely(m == 0)) {
279-
for (i = 0; i < n; i++)
280-
skbs[i] = NULL; /* effect: xdp_return_frame */
281-
drops = n;
340+
/* Support running another XDP prog on this CPU */
341+
nframes = cpu_map_bpf_prog_run_xdp(rcpu, frames, n, &stats);
342+
if (nframes) {
343+
m = kmem_cache_alloc_bulk(skbuff_head_cache, gfp, nframes, skbs);
344+
if (unlikely(m == 0)) {
345+
for (i = 0; i < nframes; i++)
346+
skbs[i] = NULL; /* effect: xdp_return_frame */
347+
drops += nframes;
348+
}
282349
}
283350

284351
local_bh_disable();
285-
for (i = 0; i < n; i++) {
352+
for (i = 0; i < nframes; i++) {
286353
struct xdp_frame *xdpf = frames[i];
287354
struct sk_buff *skb = skbs[i];
288355
int ret;
@@ -299,7 +366,7 @@ static int cpu_map_kthread_run(void *data)
299366
drops++;
300367
}
301368
/* Feedback loop via tracepoint */
302-
trace_xdp_cpumap_kthread(rcpu->map_id, n, drops, sched);
369+
trace_xdp_cpumap_kthread(rcpu->map_id, n, drops, sched, &stats);
303370

304371
local_bh_enable(); /* resched point, may call do_softirq() */
305372
}
@@ -309,13 +376,38 @@ static int cpu_map_kthread_run(void *data)
309376
return 0;
310377
}
311378

379+
bool cpu_map_prog_allowed(struct bpf_map *map)
380+
{
381+
return map->map_type == BPF_MAP_TYPE_CPUMAP &&
382+
map->value_size != offsetofend(struct bpf_cpumap_val, qsize);
383+
}
384+
385+
static int __cpu_map_load_bpf_program(struct bpf_cpu_map_entry *rcpu, int fd)
386+
{
387+
struct bpf_prog *prog;
388+
389+
prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP);
390+
if (IS_ERR(prog))
391+
return PTR_ERR(prog);
392+
393+
if (prog->expected_attach_type != BPF_XDP_CPUMAP) {
394+
bpf_prog_put(prog);
395+
return -EINVAL;
396+
}
397+
398+
rcpu->value.bpf_prog.id = prog->aux->id;
399+
rcpu->prog = prog;
400+
401+
return 0;
402+
}
403+
312404
static struct bpf_cpu_map_entry *
313405
__cpu_map_entry_alloc(struct bpf_cpumap_val *value, u32 cpu, int map_id)
314406
{
407+
int numa, err, i, fd = value->bpf_prog.fd;
315408
gfp_t gfp = GFP_KERNEL | __GFP_NOWARN;
316409
struct bpf_cpu_map_entry *rcpu;
317410
struct xdp_bulk_queue *bq;
318-
int numa, err, i;
319411

320412
/* Have map->numa_node, but choose node of redirect target CPU */
321413
numa = cpu_to_node(cpu);
@@ -357,6 +449,9 @@ __cpu_map_entry_alloc(struct bpf_cpumap_val *value, u32 cpu, int map_id)
357449
get_cpu_map_entry(rcpu); /* 1-refcnt for being in cmap->cpu_map[] */
358450
get_cpu_map_entry(rcpu); /* 1-refcnt for kthread */
359451

452+
if (fd > 0 && __cpu_map_load_bpf_program(rcpu, fd))
453+
goto free_ptr_ring;
454+
360455
/* Make sure kthread runs on a single CPU */
361456
kthread_bind(rcpu->kthread, cpu);
362457
wake_up_process(rcpu->kthread);

net/core/dev.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5448,6 +5448,8 @@ static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp)
54485448
for (i = 0; i < new->aux->used_map_cnt; i++) {
54495449
if (dev_map_can_have_prog(new->aux->used_maps[i]))
54505450
return -EINVAL;
5451+
if (cpu_map_prog_allowed(new->aux->used_maps[i]))
5452+
return -EINVAL;
54515453
}
54525454
}
54535455

@@ -8875,6 +8877,13 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
88758877
return -EINVAL;
88768878
}
88778879

8880+
if (prog->expected_attach_type == BPF_XDP_CPUMAP) {
8881+
NL_SET_ERR_MSG(extack,
8882+
"BPF_XDP_CPUMAP programs can not be attached to a device");
8883+
bpf_prog_put(prog);
8884+
return -EINVAL;
8885+
}
8886+
88788887
/* prog->aux->id may be 0 for orphaned device-bound progs */
88798888
if (prog->aux->id && prog->aux->id == prog_id) {
88808889
bpf_prog_put(prog);

tools/include/uapi/linux/bpf.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,7 @@ enum bpf_attach_type {
227227
BPF_CGROUP_INET6_GETSOCKNAME,
228228
BPF_XDP_DEVMAP,
229229
BPF_CGROUP_INET_SOCK_RELEASE,
230+
BPF_XDP_CPUMAP,
230231
__MAX_BPF_ATTACH_TYPE
231232
};
232233

@@ -3856,6 +3857,10 @@ struct bpf_devmap_val {
38563857
*/
38573858
struct bpf_cpumap_val {
38583859
__u32 qsize; /* queue size to remote target CPU */
3860+
union {
3861+
int fd; /* prog fd on map write */
3862+
__u32 id; /* prog id on map read */
3863+
} bpf_prog;
38593864
};
38603865

38613866
enum sk_action {

0 commit comments

Comments
 (0)