aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorPeter Zijlstra <peterz@infradead.org>2019-04-04 09:03:00 -0400
committerIngo Molnar <mingo@kernel.org>2019-04-12 02:55:55 -0400
commit1d54ad944074010609562da5c89e4f5df2f4e5db (patch)
tree1840b09597a2d00ad131bbf9c94931f6123248a1 /kernel
parent3966c3feca3fd10b2935caa0b4a08c7dd59469e5 (diff)
perf/core: Fix perf_event_disable_inatomic() race
Thomas-Mich Richter reported he triggered a WARN()ing from event_function_local() on his s390. The problem boils down to: CPU-A CPU-B perf_event_overflow() perf_event_disable_inatomic() @pending_disable = 1 irq_work_queue(); sched-out event_sched_out() @pending_disable = 0 sched-in perf_event_overflow() perf_event_disable_inatomic() @pending_disable = 1; irq_work_queue(); // FAILS irq_work_run() perf_pending_event() if (@pending_disable) perf_event_disable_local(); // WHOOPS The problem exists in generic, but s390 is particularly sensitive because it doesn't implement arch_irq_work_raise(), nor does it call irq_work_run() from it's PMU interrupt handler (nor would that be sufficient in this case, because s390 also generates perf_event_overflow() from pmu::stop). Add to that the fact that s390 is a virtual architecture and (virtual) CPU-A can stall long enough for the above race to happen, even if it would self-IPI. Adding a irq_work_sync() to event_sched_in() would work for all hardare PMUs that properly use irq_work_run() but fails for software PMUs. Instead encode the CPU number in @pending_disable, such that we can tell which CPU requested the disable. This then allows us to detect the above scenario and even redirect the IPI to make up for the failed queue. Reported-by: Thomas-Mich Richter <tmricht@linux.ibm.com> Tested-by: Thomas Richter <tmricht@linux.ibm.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Acked-by: Mark Rutland <mark.rutland@arm.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Hendrik Brueckner <brueckner@linux.ibm.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Kees Cook <keescook@chromium.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/events/core.c52
-rw-r--r--kernel/events/ring_buffer.c4
2 files changed, 45 insertions, 11 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 72d06e302e99..534e01e7bc36 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2009,8 +2009,8 @@ event_sched_out(struct perf_event *event,
2009 event->pmu->del(event, 0); 2009 event->pmu->del(event, 0);
2010 event->oncpu = -1; 2010 event->oncpu = -1;
2011 2011
2012 if (event->pending_disable) { 2012 if (READ_ONCE(event->pending_disable) >= 0) {
2013 event->pending_disable = 0; 2013 WRITE_ONCE(event->pending_disable, -1);
2014 state = PERF_EVENT_STATE_OFF; 2014 state = PERF_EVENT_STATE_OFF;
2015 } 2015 }
2016 perf_event_set_state(event, state); 2016 perf_event_set_state(event, state);
@@ -2198,7 +2198,8 @@ EXPORT_SYMBOL_GPL(perf_event_disable);
2198 2198
2199void perf_event_disable_inatomic(struct perf_event *event) 2199void perf_event_disable_inatomic(struct perf_event *event)
2200{ 2200{
2201 event->pending_disable = 1; 2201 WRITE_ONCE(event->pending_disable, smp_processor_id());
2202 /* can fail, see perf_pending_event_disable() */
2202 irq_work_queue(&event->pending); 2203 irq_work_queue(&event->pending);
2203} 2204}
2204 2205
@@ -5810,10 +5811,45 @@ void perf_event_wakeup(struct perf_event *event)
5810 } 5811 }
5811} 5812}
5812 5813
5814static void perf_pending_event_disable(struct perf_event *event)
5815{
5816 int cpu = READ_ONCE(event->pending_disable);
5817
5818 if (cpu < 0)
5819 return;
5820
5821 if (cpu == smp_processor_id()) {
5822 WRITE_ONCE(event->pending_disable, -1);
5823 perf_event_disable_local(event);
5824 return;
5825 }
5826
5827 /*
5828 * CPU-A CPU-B
5829 *
5830 * perf_event_disable_inatomic()
5831 * @pending_disable = CPU-A;
5832 * irq_work_queue();
5833 *
5834 * sched-out
5835 * @pending_disable = -1;
5836 *
5837 * sched-in
5838 * perf_event_disable_inatomic()
5839 * @pending_disable = CPU-B;
5840 * irq_work_queue(); // FAILS
5841 *
5842 * irq_work_run()
5843 * perf_pending_event()
5844 *
5845 * But the event runs on CPU-B and wants disabling there.
5846 */
5847 irq_work_queue_on(&event->pending, cpu);
5848}
5849
5813static void perf_pending_event(struct irq_work *entry) 5850static void perf_pending_event(struct irq_work *entry)
5814{ 5851{
5815 struct perf_event *event = container_of(entry, 5852 struct perf_event *event = container_of(entry, struct perf_event, pending);
5816 struct perf_event, pending);
5817 int rctx; 5853 int rctx;
5818 5854
5819 rctx = perf_swevent_get_recursion_context(); 5855 rctx = perf_swevent_get_recursion_context();
@@ -5822,10 +5858,7 @@ static void perf_pending_event(struct irq_work *entry)
5822 * and we won't recurse 'further'. 5858 * and we won't recurse 'further'.
5823 */ 5859 */
5824 5860
5825 if (event->pending_disable) { 5861 perf_pending_event_disable(event);
5826 event->pending_disable = 0;
5827 perf_event_disable_local(event);
5828 }
5829 5862
5830 if (event->pending_wakeup) { 5863 if (event->pending_wakeup) {
5831 event->pending_wakeup = 0; 5864 event->pending_wakeup = 0;
@@ -10236,6 +10269,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
10236 10269
10237 10270
10238 init_waitqueue_head(&event->waitq); 10271 init_waitqueue_head(&event->waitq);
10272 event->pending_disable = -1;
10239 init_irq_work(&event->pending, perf_pending_event); 10273 init_irq_work(&event->pending, perf_pending_event);
10240 10274
10241 mutex_init(&event->mmap_mutex); 10275 mutex_init(&event->mmap_mutex);
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index a4047321d7d8..2545ac08cc77 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -392,7 +392,7 @@ void *perf_aux_output_begin(struct perf_output_handle *handle,
392 * store that will be enabled on successful return 392 * store that will be enabled on successful return
393 */ 393 */
394 if (!handle->size) { /* A, matches D */ 394 if (!handle->size) { /* A, matches D */
395 event->pending_disable = 1; 395 event->pending_disable = smp_processor_id();
396 perf_output_wakeup(handle); 396 perf_output_wakeup(handle);
397 local_set(&rb->aux_nest, 0); 397 local_set(&rb->aux_nest, 0);
398 goto err_put; 398 goto err_put;
@@ -480,7 +480,7 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size)
480 480
481 if (wakeup) { 481 if (wakeup) {
482 if (handle->aux_flags & PERF_AUX_FLAG_TRUNCATED) 482 if (handle->aux_flags & PERF_AUX_FLAG_TRUNCATED)
483 handle->event->pending_disable = 1; 483 handle->event->pending_disable = smp_processor_id();
484 perf_output_wakeup(handle); 484 perf_output_wakeup(handle);
485 } 485 }
486 486