diff options
author | Daniel Borkmann <daniel@iogearbox.net> | 2018-08-09 15:50:45 -0400 |
---|---|---|
committer | Daniel Borkmann <daniel@iogearbox.net> | 2018-08-09 15:50:45 -0400 |
commit | 9c95420117393ed5f76de373e3c6479c21e3e380 (patch) | |
tree | e758a1cbc47dcaff5e9c13ee2b336dd48994c881 | |
parent | bf9bae0ea6ec7013ef37b19fbbf29b62a35474fb (diff) | |
parent | 1bf9116d0866a649104a5dfa008c302ad54d1e02 (diff) |
Merge branch 'bpf-fix-cpu-and-devmap-teardown'
Jesper Dangaard Brouer says:
====================
Removing entries from cpumap and devmap, goes through a number of
syncronization steps to make sure no new xdp_frames can be enqueued.
But there is a small chance, that xdp_frames remains which have not
been flushed/processed yet. Flushing these during teardown, happens
from RCU context and not as usual under RX NAPI context.
The optimization introduced in commt 389ab7f01af9 ("xdp: introduce
xdp_return_frame_rx_napi"), missed that the flush operation can also
be called from RCU context. Thus, we cannot always use the
xdp_return_frame_rx_napi call, which take advantage of the protection
provided by XDP RX running under NAPI protection.
The samples/bpf xdp_redirect_cpu have a --stress-mode, that is
adjusted to easier reproduce (verified by Red Hat QA).
====================
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
-rw-r--r-- | kernel/bpf/cpumap.c | 15 | ||||
-rw-r--r-- | kernel/bpf/devmap.c | 14 | ||||
-rw-r--r-- | samples/bpf/xdp_redirect_cpu_kern.c | 2 | ||||
-rw-r--r-- | samples/bpf/xdp_redirect_cpu_user.c | 4 |
4 files changed, 21 insertions, 14 deletions
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index e0918d180f08..46f5f29605d4 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c | |||
@@ -69,7 +69,7 @@ struct bpf_cpu_map { | |||
69 | }; | 69 | }; |
70 | 70 | ||
71 | static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu, | 71 | static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu, |
72 | struct xdp_bulk_queue *bq); | 72 | struct xdp_bulk_queue *bq, bool in_napi_ctx); |
73 | 73 | ||
74 | static u64 cpu_map_bitmap_size(const union bpf_attr *attr) | 74 | static u64 cpu_map_bitmap_size(const union bpf_attr *attr) |
75 | { | 75 | { |
@@ -375,7 +375,7 @@ static void __cpu_map_entry_free(struct rcu_head *rcu) | |||
375 | struct xdp_bulk_queue *bq = per_cpu_ptr(rcpu->bulkq, cpu); | 375 | struct xdp_bulk_queue *bq = per_cpu_ptr(rcpu->bulkq, cpu); |
376 | 376 | ||
377 | /* No concurrent bq_enqueue can run at this point */ | 377 | /* No concurrent bq_enqueue can run at this point */ |
378 | bq_flush_to_queue(rcpu, bq); | 378 | bq_flush_to_queue(rcpu, bq, false); |
379 | } | 379 | } |
380 | free_percpu(rcpu->bulkq); | 380 | free_percpu(rcpu->bulkq); |
381 | /* Cannot kthread_stop() here, last put free rcpu resources */ | 381 | /* Cannot kthread_stop() here, last put free rcpu resources */ |
@@ -558,7 +558,7 @@ const struct bpf_map_ops cpu_map_ops = { | |||
558 | }; | 558 | }; |
559 | 559 | ||
560 | static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu, | 560 | static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu, |
561 | struct xdp_bulk_queue *bq) | 561 | struct xdp_bulk_queue *bq, bool in_napi_ctx) |
562 | { | 562 | { |
563 | unsigned int processed = 0, drops = 0; | 563 | unsigned int processed = 0, drops = 0; |
564 | const int to_cpu = rcpu->cpu; | 564 | const int to_cpu = rcpu->cpu; |
@@ -578,7 +578,10 @@ static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu, | |||
578 | err = __ptr_ring_produce(q, xdpf); | 578 | err = __ptr_ring_produce(q, xdpf); |
579 | if (err) { | 579 | if (err) { |
580 | drops++; | 580 | drops++; |
581 | xdp_return_frame_rx_napi(xdpf); | 581 | if (likely(in_napi_ctx)) |
582 | xdp_return_frame_rx_napi(xdpf); | ||
583 | else | ||
584 | xdp_return_frame(xdpf); | ||
582 | } | 585 | } |
583 | processed++; | 586 | processed++; |
584 | } | 587 | } |
@@ -598,7 +601,7 @@ static int bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf) | |||
598 | struct xdp_bulk_queue *bq = this_cpu_ptr(rcpu->bulkq); | 601 | struct xdp_bulk_queue *bq = this_cpu_ptr(rcpu->bulkq); |
599 | 602 | ||
600 | if (unlikely(bq->count == CPU_MAP_BULK_SIZE)) | 603 | if (unlikely(bq->count == CPU_MAP_BULK_SIZE)) |
601 | bq_flush_to_queue(rcpu, bq); | 604 | bq_flush_to_queue(rcpu, bq, true); |
602 | 605 | ||
603 | /* Notice, xdp_buff/page MUST be queued here, long enough for | 606 | /* Notice, xdp_buff/page MUST be queued here, long enough for |
604 | * driver to code invoking us to finished, due to driver | 607 | * driver to code invoking us to finished, due to driver |
@@ -661,7 +664,7 @@ void __cpu_map_flush(struct bpf_map *map) | |||
661 | 664 | ||
662 | /* Flush all frames in bulkq to real queue */ | 665 | /* Flush all frames in bulkq to real queue */ |
663 | bq = this_cpu_ptr(rcpu->bulkq); | 666 | bq = this_cpu_ptr(rcpu->bulkq); |
664 | bq_flush_to_queue(rcpu, bq); | 667 | bq_flush_to_queue(rcpu, bq, true); |
665 | 668 | ||
666 | /* If already running, costs spin_lock_irqsave + smb_mb */ | 669 | /* If already running, costs spin_lock_irqsave + smb_mb */ |
667 | wake_up_process(rcpu->kthread); | 670 | wake_up_process(rcpu->kthread); |
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index d361fc1e3bf3..750d45edae79 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c | |||
@@ -217,7 +217,8 @@ void __dev_map_insert_ctx(struct bpf_map *map, u32 bit) | |||
217 | } | 217 | } |
218 | 218 | ||
219 | static int bq_xmit_all(struct bpf_dtab_netdev *obj, | 219 | static int bq_xmit_all(struct bpf_dtab_netdev *obj, |
220 | struct xdp_bulk_queue *bq, u32 flags) | 220 | struct xdp_bulk_queue *bq, u32 flags, |
221 | bool in_napi_ctx) | ||
221 | { | 222 | { |
222 | struct net_device *dev = obj->dev; | 223 | struct net_device *dev = obj->dev; |
223 | int sent = 0, drops = 0, err = 0; | 224 | int sent = 0, drops = 0, err = 0; |
@@ -254,7 +255,10 @@ error: | |||
254 | struct xdp_frame *xdpf = bq->q[i]; | 255 | struct xdp_frame *xdpf = bq->q[i]; |
255 | 256 | ||
256 | /* RX path under NAPI protection, can return frames faster */ | 257 | /* RX path under NAPI protection, can return frames faster */ |
257 | xdp_return_frame_rx_napi(xdpf); | 258 | if (likely(in_napi_ctx)) |
259 | xdp_return_frame_rx_napi(xdpf); | ||
260 | else | ||
261 | xdp_return_frame(xdpf); | ||
258 | drops++; | 262 | drops++; |
259 | } | 263 | } |
260 | goto out; | 264 | goto out; |
@@ -286,7 +290,7 @@ void __dev_map_flush(struct bpf_map *map) | |||
286 | __clear_bit(bit, bitmap); | 290 | __clear_bit(bit, bitmap); |
287 | 291 | ||
288 | bq = this_cpu_ptr(dev->bulkq); | 292 | bq = this_cpu_ptr(dev->bulkq); |
289 | bq_xmit_all(dev, bq, XDP_XMIT_FLUSH); | 293 | bq_xmit_all(dev, bq, XDP_XMIT_FLUSH, true); |
290 | } | 294 | } |
291 | } | 295 | } |
292 | 296 | ||
@@ -316,7 +320,7 @@ static int bq_enqueue(struct bpf_dtab_netdev *obj, struct xdp_frame *xdpf, | |||
316 | struct xdp_bulk_queue *bq = this_cpu_ptr(obj->bulkq); | 320 | struct xdp_bulk_queue *bq = this_cpu_ptr(obj->bulkq); |
317 | 321 | ||
318 | if (unlikely(bq->count == DEV_MAP_BULK_SIZE)) | 322 | if (unlikely(bq->count == DEV_MAP_BULK_SIZE)) |
319 | bq_xmit_all(obj, bq, 0); | 323 | bq_xmit_all(obj, bq, 0, true); |
320 | 324 | ||
321 | /* Ingress dev_rx will be the same for all xdp_frame's in | 325 | /* Ingress dev_rx will be the same for all xdp_frame's in |
322 | * bulk_queue, because bq stored per-CPU and must be flushed | 326 | * bulk_queue, because bq stored per-CPU and must be flushed |
@@ -385,7 +389,7 @@ static void dev_map_flush_old(struct bpf_dtab_netdev *dev) | |||
385 | __clear_bit(dev->bit, bitmap); | 389 | __clear_bit(dev->bit, bitmap); |
386 | 390 | ||
387 | bq = per_cpu_ptr(dev->bulkq, cpu); | 391 | bq = per_cpu_ptr(dev->bulkq, cpu); |
388 | bq_xmit_all(dev, bq, XDP_XMIT_FLUSH); | 392 | bq_xmit_all(dev, bq, XDP_XMIT_FLUSH, false); |
389 | } | 393 | } |
390 | } | 394 | } |
391 | } | 395 | } |
diff --git a/samples/bpf/xdp_redirect_cpu_kern.c b/samples/bpf/xdp_redirect_cpu_kern.c index 303e9e7161f3..4938dcbaecbf 100644 --- a/samples/bpf/xdp_redirect_cpu_kern.c +++ b/samples/bpf/xdp_redirect_cpu_kern.c | |||
@@ -14,7 +14,7 @@ | |||
14 | #include <uapi/linux/bpf.h> | 14 | #include <uapi/linux/bpf.h> |
15 | #include "bpf_helpers.h" | 15 | #include "bpf_helpers.h" |
16 | 16 | ||
17 | #define MAX_CPUS 12 /* WARNING - sync with _user.c */ | 17 | #define MAX_CPUS 64 /* WARNING - sync with _user.c */ |
18 | 18 | ||
19 | /* Special map type that can XDP_REDIRECT frames to another CPU */ | 19 | /* Special map type that can XDP_REDIRECT frames to another CPU */ |
20 | struct bpf_map_def SEC("maps") cpu_map = { | 20 | struct bpf_map_def SEC("maps") cpu_map = { |
diff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c index f6efaefd485b..4b4d78fffe30 100644 --- a/samples/bpf/xdp_redirect_cpu_user.c +++ b/samples/bpf/xdp_redirect_cpu_user.c | |||
@@ -19,7 +19,7 @@ static const char *__doc__ = | |||
19 | #include <arpa/inet.h> | 19 | #include <arpa/inet.h> |
20 | #include <linux/if_link.h> | 20 | #include <linux/if_link.h> |
21 | 21 | ||
22 | #define MAX_CPUS 12 /* WARNING - sync with _kern.c */ | 22 | #define MAX_CPUS 64 /* WARNING - sync with _kern.c */ |
23 | 23 | ||
24 | /* How many xdp_progs are defined in _kern.c */ | 24 | /* How many xdp_progs are defined in _kern.c */ |
25 | #define MAX_PROG 5 | 25 | #define MAX_PROG 5 |
@@ -527,7 +527,7 @@ static void stress_cpumap(void) | |||
527 | * procedure. | 527 | * procedure. |
528 | */ | 528 | */ |
529 | create_cpu_entry(1, 1024, 0, false); | 529 | create_cpu_entry(1, 1024, 0, false); |
530 | create_cpu_entry(1, 128, 0, false); | 530 | create_cpu_entry(1, 8, 0, false); |
531 | create_cpu_entry(1, 16000, 0, false); | 531 | create_cpu_entry(1, 16000, 0, false); |
532 | } | 532 | } |
533 | 533 | ||