aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDaniel Borkmann <daniel@iogearbox.net>2018-08-09 15:50:45 -0400
committerDaniel Borkmann <daniel@iogearbox.net>2018-08-09 15:50:45 -0400
commit9c95420117393ed5f76de373e3c6479c21e3e380 (patch)
treee758a1cbc47dcaff5e9c13ee2b336dd48994c881
parentbf9bae0ea6ec7013ef37b19fbbf29b62a35474fb (diff)
parent1bf9116d0866a649104a5dfa008c302ad54d1e02 (diff)
Merge branch 'bpf-fix-cpu-and-devmap-teardown'
Jesper Dangaard Brouer says: ==================== Removing entries from cpumap and devmap, goes through a number of syncronization steps to make sure no new xdp_frames can be enqueued. But there is a small chance, that xdp_frames remains which have not been flushed/processed yet. Flushing these during teardown, happens from RCU context and not as usual under RX NAPI context. The optimization introduced in commt 389ab7f01af9 ("xdp: introduce xdp_return_frame_rx_napi"), missed that the flush operation can also be called from RCU context. Thus, we cannot always use the xdp_return_frame_rx_napi call, which take advantage of the protection provided by XDP RX running under NAPI protection. The samples/bpf xdp_redirect_cpu have a --stress-mode, that is adjusted to easier reproduce (verified by Red Hat QA). ==================== Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
-rw-r--r--kernel/bpf/cpumap.c15
-rw-r--r--kernel/bpf/devmap.c14
-rw-r--r--samples/bpf/xdp_redirect_cpu_kern.c2
-rw-r--r--samples/bpf/xdp_redirect_cpu_user.c4
4 files changed, 21 insertions, 14 deletions
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index e0918d180f08..46f5f29605d4 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -69,7 +69,7 @@ struct bpf_cpu_map {
69}; 69};
70 70
71static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu, 71static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu,
72 struct xdp_bulk_queue *bq); 72 struct xdp_bulk_queue *bq, bool in_napi_ctx);
73 73
74static u64 cpu_map_bitmap_size(const union bpf_attr *attr) 74static u64 cpu_map_bitmap_size(const union bpf_attr *attr)
75{ 75{
@@ -375,7 +375,7 @@ static void __cpu_map_entry_free(struct rcu_head *rcu)
375 struct xdp_bulk_queue *bq = per_cpu_ptr(rcpu->bulkq, cpu); 375 struct xdp_bulk_queue *bq = per_cpu_ptr(rcpu->bulkq, cpu);
376 376
377 /* No concurrent bq_enqueue can run at this point */ 377 /* No concurrent bq_enqueue can run at this point */
378 bq_flush_to_queue(rcpu, bq); 378 bq_flush_to_queue(rcpu, bq, false);
379 } 379 }
380 free_percpu(rcpu->bulkq); 380 free_percpu(rcpu->bulkq);
381 /* Cannot kthread_stop() here, last put free rcpu resources */ 381 /* Cannot kthread_stop() here, last put free rcpu resources */
@@ -558,7 +558,7 @@ const struct bpf_map_ops cpu_map_ops = {
558}; 558};
559 559
560static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu, 560static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu,
561 struct xdp_bulk_queue *bq) 561 struct xdp_bulk_queue *bq, bool in_napi_ctx)
562{ 562{
563 unsigned int processed = 0, drops = 0; 563 unsigned int processed = 0, drops = 0;
564 const int to_cpu = rcpu->cpu; 564 const int to_cpu = rcpu->cpu;
@@ -578,7 +578,10 @@ static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu,
578 err = __ptr_ring_produce(q, xdpf); 578 err = __ptr_ring_produce(q, xdpf);
579 if (err) { 579 if (err) {
580 drops++; 580 drops++;
581 xdp_return_frame_rx_napi(xdpf); 581 if (likely(in_napi_ctx))
582 xdp_return_frame_rx_napi(xdpf);
583 else
584 xdp_return_frame(xdpf);
582 } 585 }
583 processed++; 586 processed++;
584 } 587 }
@@ -598,7 +601,7 @@ static int bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf)
598 struct xdp_bulk_queue *bq = this_cpu_ptr(rcpu->bulkq); 601 struct xdp_bulk_queue *bq = this_cpu_ptr(rcpu->bulkq);
599 602
600 if (unlikely(bq->count == CPU_MAP_BULK_SIZE)) 603 if (unlikely(bq->count == CPU_MAP_BULK_SIZE))
601 bq_flush_to_queue(rcpu, bq); 604 bq_flush_to_queue(rcpu, bq, true);
602 605
603 /* Notice, xdp_buff/page MUST be queued here, long enough for 606 /* Notice, xdp_buff/page MUST be queued here, long enough for
604 * driver to code invoking us to finished, due to driver 607 * driver to code invoking us to finished, due to driver
@@ -661,7 +664,7 @@ void __cpu_map_flush(struct bpf_map *map)
661 664
662 /* Flush all frames in bulkq to real queue */ 665 /* Flush all frames in bulkq to real queue */
663 bq = this_cpu_ptr(rcpu->bulkq); 666 bq = this_cpu_ptr(rcpu->bulkq);
664 bq_flush_to_queue(rcpu, bq); 667 bq_flush_to_queue(rcpu, bq, true);
665 668
666 /* If already running, costs spin_lock_irqsave + smb_mb */ 669 /* If already running, costs spin_lock_irqsave + smb_mb */
667 wake_up_process(rcpu->kthread); 670 wake_up_process(rcpu->kthread);
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index d361fc1e3bf3..750d45edae79 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -217,7 +217,8 @@ void __dev_map_insert_ctx(struct bpf_map *map, u32 bit)
217} 217}
218 218
219static int bq_xmit_all(struct bpf_dtab_netdev *obj, 219static int bq_xmit_all(struct bpf_dtab_netdev *obj,
220 struct xdp_bulk_queue *bq, u32 flags) 220 struct xdp_bulk_queue *bq, u32 flags,
221 bool in_napi_ctx)
221{ 222{
222 struct net_device *dev = obj->dev; 223 struct net_device *dev = obj->dev;
223 int sent = 0, drops = 0, err = 0; 224 int sent = 0, drops = 0, err = 0;
@@ -254,7 +255,10 @@ error:
254 struct xdp_frame *xdpf = bq->q[i]; 255 struct xdp_frame *xdpf = bq->q[i];
255 256
256 /* RX path under NAPI protection, can return frames faster */ 257 /* RX path under NAPI protection, can return frames faster */
257 xdp_return_frame_rx_napi(xdpf); 258 if (likely(in_napi_ctx))
259 xdp_return_frame_rx_napi(xdpf);
260 else
261 xdp_return_frame(xdpf);
258 drops++; 262 drops++;
259 } 263 }
260 goto out; 264 goto out;
@@ -286,7 +290,7 @@ void __dev_map_flush(struct bpf_map *map)
286 __clear_bit(bit, bitmap); 290 __clear_bit(bit, bitmap);
287 291
288 bq = this_cpu_ptr(dev->bulkq); 292 bq = this_cpu_ptr(dev->bulkq);
289 bq_xmit_all(dev, bq, XDP_XMIT_FLUSH); 293 bq_xmit_all(dev, bq, XDP_XMIT_FLUSH, true);
290 } 294 }
291} 295}
292 296
@@ -316,7 +320,7 @@ static int bq_enqueue(struct bpf_dtab_netdev *obj, struct xdp_frame *xdpf,
316 struct xdp_bulk_queue *bq = this_cpu_ptr(obj->bulkq); 320 struct xdp_bulk_queue *bq = this_cpu_ptr(obj->bulkq);
317 321
318 if (unlikely(bq->count == DEV_MAP_BULK_SIZE)) 322 if (unlikely(bq->count == DEV_MAP_BULK_SIZE))
319 bq_xmit_all(obj, bq, 0); 323 bq_xmit_all(obj, bq, 0, true);
320 324
321 /* Ingress dev_rx will be the same for all xdp_frame's in 325 /* Ingress dev_rx will be the same for all xdp_frame's in
322 * bulk_queue, because bq stored per-CPU and must be flushed 326 * bulk_queue, because bq stored per-CPU and must be flushed
@@ -385,7 +389,7 @@ static void dev_map_flush_old(struct bpf_dtab_netdev *dev)
385 __clear_bit(dev->bit, bitmap); 389 __clear_bit(dev->bit, bitmap);
386 390
387 bq = per_cpu_ptr(dev->bulkq, cpu); 391 bq = per_cpu_ptr(dev->bulkq, cpu);
388 bq_xmit_all(dev, bq, XDP_XMIT_FLUSH); 392 bq_xmit_all(dev, bq, XDP_XMIT_FLUSH, false);
389 } 393 }
390 } 394 }
391} 395}
diff --git a/samples/bpf/xdp_redirect_cpu_kern.c b/samples/bpf/xdp_redirect_cpu_kern.c
index 303e9e7161f3..4938dcbaecbf 100644
--- a/samples/bpf/xdp_redirect_cpu_kern.c
+++ b/samples/bpf/xdp_redirect_cpu_kern.c
@@ -14,7 +14,7 @@
14#include <uapi/linux/bpf.h> 14#include <uapi/linux/bpf.h>
15#include "bpf_helpers.h" 15#include "bpf_helpers.h"
16 16
17#define MAX_CPUS 12 /* WARNING - sync with _user.c */ 17#define MAX_CPUS 64 /* WARNING - sync with _user.c */
18 18
19/* Special map type that can XDP_REDIRECT frames to another CPU */ 19/* Special map type that can XDP_REDIRECT frames to another CPU */
20struct bpf_map_def SEC("maps") cpu_map = { 20struct bpf_map_def SEC("maps") cpu_map = {
diff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c
index f6efaefd485b..4b4d78fffe30 100644
--- a/samples/bpf/xdp_redirect_cpu_user.c
+++ b/samples/bpf/xdp_redirect_cpu_user.c
@@ -19,7 +19,7 @@ static const char *__doc__ =
19#include <arpa/inet.h> 19#include <arpa/inet.h>
20#include <linux/if_link.h> 20#include <linux/if_link.h>
21 21
22#define MAX_CPUS 12 /* WARNING - sync with _kern.c */ 22#define MAX_CPUS 64 /* WARNING - sync with _kern.c */
23 23
24/* How many xdp_progs are defined in _kern.c */ 24/* How many xdp_progs are defined in _kern.c */
25#define MAX_PROG 5 25#define MAX_PROG 5
@@ -527,7 +527,7 @@ static void stress_cpumap(void)
527 * procedure. 527 * procedure.
528 */ 528 */
529 create_cpu_entry(1, 1024, 0, false); 529 create_cpu_entry(1, 1024, 0, false);
530 create_cpu_entry(1, 128, 0, false); 530 create_cpu_entry(1, 8, 0, false);
531 create_cpu_entry(1, 16000, 0, false); 531 create_cpu_entry(1, 16000, 0, false);
532} 532}
533 533