aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJesper Dangaard Brouer <brouer@redhat.com>2018-08-08 17:00:34 -0400
committerDaniel Borkmann <daniel@iogearbox.net>2018-08-09 15:50:44 -0400
commitad0ab027fc6da08cbd34070d816ff3b7986c64ae (patch)
tree36f75aa1790a438a29950b2ecd0dddf9baf0b107
parentbf9bae0ea6ec7013ef37b19fbbf29b62a35474fb (diff)
xdp: fix bug in cpumap teardown code path
When removing a cpumap entry, a number of syncronization steps happen. Eventually the teardown code __cpu_map_entry_free is invoked from/via call_rcu. The teardown code __cpu_map_entry_free() flushes remaining xdp_frames, by invoking bq_flush_to_queue, which calls xdp_return_frame_rx_napi(). The issues is that the teardown code is not running in the RX NAPI code path. Thus, it is not allowed to invoke the NAPI variant of xdp_return_frame. This bug was found and triggered by using the --stress-mode option to the samples/bpf program xdp_redirect_cpu. It is hard to trigger, because the ptr_ring have to be full and cpumap bulk queue max contains 8 packets, and a remote CPU is racing to empty the ptr_ring queue. Fixes: 389ab7f01af9 ("xdp: introduce xdp_return_frame_rx_napi") Tested-by: Jean-Tsung Hsiao <jhsiao@redhat.com> Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
-rw-r--r--kernel/bpf/cpumap.c15
1 files changed, 9 insertions, 6 deletions
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index e0918d180f08..46f5f29605d4 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -69,7 +69,7 @@ struct bpf_cpu_map {
69}; 69};
70 70
71static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu, 71static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu,
72 struct xdp_bulk_queue *bq); 72 struct xdp_bulk_queue *bq, bool in_napi_ctx);
73 73
74static u64 cpu_map_bitmap_size(const union bpf_attr *attr) 74static u64 cpu_map_bitmap_size(const union bpf_attr *attr)
75{ 75{
@@ -375,7 +375,7 @@ static void __cpu_map_entry_free(struct rcu_head *rcu)
375 struct xdp_bulk_queue *bq = per_cpu_ptr(rcpu->bulkq, cpu); 375 struct xdp_bulk_queue *bq = per_cpu_ptr(rcpu->bulkq, cpu);
376 376
377 /* No concurrent bq_enqueue can run at this point */ 377 /* No concurrent bq_enqueue can run at this point */
378 bq_flush_to_queue(rcpu, bq); 378 bq_flush_to_queue(rcpu, bq, false);
379 } 379 }
380 free_percpu(rcpu->bulkq); 380 free_percpu(rcpu->bulkq);
381 /* Cannot kthread_stop() here, last put free rcpu resources */ 381 /* Cannot kthread_stop() here, last put free rcpu resources */
@@ -558,7 +558,7 @@ const struct bpf_map_ops cpu_map_ops = {
558}; 558};
559 559
560static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu, 560static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu,
561 struct xdp_bulk_queue *bq) 561 struct xdp_bulk_queue *bq, bool in_napi_ctx)
562{ 562{
563 unsigned int processed = 0, drops = 0; 563 unsigned int processed = 0, drops = 0;
564 const int to_cpu = rcpu->cpu; 564 const int to_cpu = rcpu->cpu;
@@ -578,7 +578,10 @@ static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu,
578 err = __ptr_ring_produce(q, xdpf); 578 err = __ptr_ring_produce(q, xdpf);
579 if (err) { 579 if (err) {
580 drops++; 580 drops++;
581 xdp_return_frame_rx_napi(xdpf); 581 if (likely(in_napi_ctx))
582 xdp_return_frame_rx_napi(xdpf);
583 else
584 xdp_return_frame(xdpf);
582 } 585 }
583 processed++; 586 processed++;
584 } 587 }
@@ -598,7 +601,7 @@ static int bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf)
598 struct xdp_bulk_queue *bq = this_cpu_ptr(rcpu->bulkq); 601 struct xdp_bulk_queue *bq = this_cpu_ptr(rcpu->bulkq);
599 602
600 if (unlikely(bq->count == CPU_MAP_BULK_SIZE)) 603 if (unlikely(bq->count == CPU_MAP_BULK_SIZE))
601 bq_flush_to_queue(rcpu, bq); 604 bq_flush_to_queue(rcpu, bq, true);
602 605
603 /* Notice, xdp_buff/page MUST be queued here, long enough for 606 /* Notice, xdp_buff/page MUST be queued here, long enough for
604 * driver to code invoking us to finished, due to driver 607 * driver to code invoking us to finished, due to driver
@@ -661,7 +664,7 @@ void __cpu_map_flush(struct bpf_map *map)
661 664
662 /* Flush all frames in bulkq to real queue */ 665 /* Flush all frames in bulkq to real queue */
663 bq = this_cpu_ptr(rcpu->bulkq); 666 bq = this_cpu_ptr(rcpu->bulkq);
664 bq_flush_to_queue(rcpu, bq); 667 bq_flush_to_queue(rcpu, bq, true);
665 668
666 /* If already running, costs spin_lock_irqsave + smb_mb */ 669 /* If already running, costs spin_lock_irqsave + smb_mb */
667 wake_up_process(rcpu->kthread); 670 wake_up_process(rcpu->kthread);