diff options
author | Jesper Dangaard Brouer <brouer@redhat.com> | 2018-08-08 17:00:34 -0400 |
---|---|---|
committer | Daniel Borkmann <daniel@iogearbox.net> | 2018-08-09 15:50:44 -0400 |
commit | ad0ab027fc6da08cbd34070d816ff3b7986c64ae (patch) | |
tree | 36f75aa1790a438a29950b2ecd0dddf9baf0b107 | |
parent | bf9bae0ea6ec7013ef37b19fbbf29b62a35474fb (diff) |
xdp: fix bug in cpumap teardown code path
When removing a cpumap entry, a number of syncronization steps happen.
Eventually the teardown code __cpu_map_entry_free is invoked from/via
call_rcu.
The teardown code __cpu_map_entry_free() flushes remaining xdp_frames,
by invoking bq_flush_to_queue, which calls xdp_return_frame_rx_napi().
The issues is that the teardown code is not running in the RX NAPI
code path. Thus, it is not allowed to invoke the NAPI variant of
xdp_return_frame.
This bug was found and triggered by using the --stress-mode option to
the samples/bpf program xdp_redirect_cpu. It is hard to trigger,
because the ptr_ring have to be full and cpumap bulk queue max
contains 8 packets, and a remote CPU is racing to empty the ptr_ring
queue.
Fixes: 389ab7f01af9 ("xdp: introduce xdp_return_frame_rx_napi")
Tested-by: Jean-Tsung Hsiao <jhsiao@redhat.com>
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
-rw-r--r-- | kernel/bpf/cpumap.c | 15 |
1 files changed, 9 insertions, 6 deletions
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index e0918d180f08..46f5f29605d4 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c | |||
@@ -69,7 +69,7 @@ struct bpf_cpu_map { | |||
69 | }; | 69 | }; |
70 | 70 | ||
71 | static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu, | 71 | static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu, |
72 | struct xdp_bulk_queue *bq); | 72 | struct xdp_bulk_queue *bq, bool in_napi_ctx); |
73 | 73 | ||
74 | static u64 cpu_map_bitmap_size(const union bpf_attr *attr) | 74 | static u64 cpu_map_bitmap_size(const union bpf_attr *attr) |
75 | { | 75 | { |
@@ -375,7 +375,7 @@ static void __cpu_map_entry_free(struct rcu_head *rcu) | |||
375 | struct xdp_bulk_queue *bq = per_cpu_ptr(rcpu->bulkq, cpu); | 375 | struct xdp_bulk_queue *bq = per_cpu_ptr(rcpu->bulkq, cpu); |
376 | 376 | ||
377 | /* No concurrent bq_enqueue can run at this point */ | 377 | /* No concurrent bq_enqueue can run at this point */ |
378 | bq_flush_to_queue(rcpu, bq); | 378 | bq_flush_to_queue(rcpu, bq, false); |
379 | } | 379 | } |
380 | free_percpu(rcpu->bulkq); | 380 | free_percpu(rcpu->bulkq); |
381 | /* Cannot kthread_stop() here, last put free rcpu resources */ | 381 | /* Cannot kthread_stop() here, last put free rcpu resources */ |
@@ -558,7 +558,7 @@ const struct bpf_map_ops cpu_map_ops = { | |||
558 | }; | 558 | }; |
559 | 559 | ||
560 | static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu, | 560 | static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu, |
561 | struct xdp_bulk_queue *bq) | 561 | struct xdp_bulk_queue *bq, bool in_napi_ctx) |
562 | { | 562 | { |
563 | unsigned int processed = 0, drops = 0; | 563 | unsigned int processed = 0, drops = 0; |
564 | const int to_cpu = rcpu->cpu; | 564 | const int to_cpu = rcpu->cpu; |
@@ -578,7 +578,10 @@ static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu, | |||
578 | err = __ptr_ring_produce(q, xdpf); | 578 | err = __ptr_ring_produce(q, xdpf); |
579 | if (err) { | 579 | if (err) { |
580 | drops++; | 580 | drops++; |
581 | xdp_return_frame_rx_napi(xdpf); | 581 | if (likely(in_napi_ctx)) |
582 | xdp_return_frame_rx_napi(xdpf); | ||
583 | else | ||
584 | xdp_return_frame(xdpf); | ||
582 | } | 585 | } |
583 | processed++; | 586 | processed++; |
584 | } | 587 | } |
@@ -598,7 +601,7 @@ static int bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf) | |||
598 | struct xdp_bulk_queue *bq = this_cpu_ptr(rcpu->bulkq); | 601 | struct xdp_bulk_queue *bq = this_cpu_ptr(rcpu->bulkq); |
599 | 602 | ||
600 | if (unlikely(bq->count == CPU_MAP_BULK_SIZE)) | 603 | if (unlikely(bq->count == CPU_MAP_BULK_SIZE)) |
601 | bq_flush_to_queue(rcpu, bq); | 604 | bq_flush_to_queue(rcpu, bq, true); |
602 | 605 | ||
603 | /* Notice, xdp_buff/page MUST be queued here, long enough for | 606 | /* Notice, xdp_buff/page MUST be queued here, long enough for |
604 | * driver to code invoking us to finished, due to driver | 607 | * driver to code invoking us to finished, due to driver |
@@ -661,7 +664,7 @@ void __cpu_map_flush(struct bpf_map *map) | |||
661 | 664 | ||
662 | /* Flush all frames in bulkq to real queue */ | 665 | /* Flush all frames in bulkq to real queue */ |
663 | bq = this_cpu_ptr(rcpu->bulkq); | 666 | bq = this_cpu_ptr(rcpu->bulkq); |
664 | bq_flush_to_queue(rcpu, bq); | 667 | bq_flush_to_queue(rcpu, bq, true); |
665 | 668 | ||
666 | /* If already running, costs spin_lock_irqsave + smb_mb */ | 669 | /* If already running, costs spin_lock_irqsave + smb_mb */ |
667 | wake_up_process(rcpu->kthread); | 670 | wake_up_process(rcpu->kthread); |