aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/events/ring_buffer.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/events/ring_buffer.c')
-rw-r--r--kernel/events/ring_buffer.c64
1 files changed, 50 insertions, 14 deletions
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 674b35383491..ffb59a4ef4ff 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -38,7 +38,12 @@ static void perf_output_get_handle(struct perf_output_handle *handle)
38 struct ring_buffer *rb = handle->rb; 38 struct ring_buffer *rb = handle->rb;
39 39
40 preempt_disable(); 40 preempt_disable();
41 local_inc(&rb->nest); 41
42 /*
43 * Avoid an explicit LOAD/STORE such that architectures with memops
44 * can use them.
45 */
46 (*(volatile unsigned int *)&rb->nest)++;
42 handle->wakeup = local_read(&rb->wakeup); 47 handle->wakeup = local_read(&rb->wakeup);
43} 48}
44 49
@@ -46,17 +51,35 @@ static void perf_output_put_handle(struct perf_output_handle *handle)
46{ 51{
47 struct ring_buffer *rb = handle->rb; 52 struct ring_buffer *rb = handle->rb;
48 unsigned long head; 53 unsigned long head;
54 unsigned int nest;
55
56 /*
57 * If this isn't the outermost nesting, we don't have to update
58 * @rb->user_page->data_head.
59 */
60 nest = READ_ONCE(rb->nest);
61 if (nest > 1) {
62 WRITE_ONCE(rb->nest, nest - 1);
63 goto out;
64 }
49 65
50again: 66again:
67 /*
68 * In order to avoid publishing a head value that goes backwards,
69 * we must ensure the load of @rb->head happens after we've
70 * incremented @rb->nest.
71 *
72 * Otherwise we can observe a @rb->head value before one published
73 * by an IRQ/NMI happening between the load and the increment.
74 */
75 barrier();
51 head = local_read(&rb->head); 76 head = local_read(&rb->head);
52 77
53 /* 78 /*
54 * IRQ/NMI can happen here, which means we can miss a head update. 79 * IRQ/NMI can happen here and advance @rb->head, causing our
80 * load above to be stale.
55 */ 81 */
56 82
57 if (!local_dec_and_test(&rb->nest))
58 goto out;
59
60 /* 83 /*
61 * Since the mmap() consumer (userspace) can run on a different CPU: 84 * Since the mmap() consumer (userspace) can run on a different CPU:
62 * 85 *
@@ -84,14 +107,23 @@ again:
84 * See perf_output_begin(). 107 * See perf_output_begin().
85 */ 108 */
86 smp_wmb(); /* B, matches C */ 109 smp_wmb(); /* B, matches C */
87 rb->user_page->data_head = head; 110 WRITE_ONCE(rb->user_page->data_head, head);
88 111
89 /* 112 /*
90 * Now check if we missed an update -- rely on previous implied 113 * We must publish the head before decrementing the nest count,
91 * compiler barriers to force a re-read. 114 * otherwise an IRQ/NMI can publish a more recent head value and our
115 * write will (temporarily) publish a stale value.
92 */ 116 */
117 barrier();
118 WRITE_ONCE(rb->nest, 0);
119
120 /*
121 * Ensure we decrement @rb->nest before we validate the @rb->head.
122 * Otherwise we cannot be sure we caught the 'last' nested update.
123 */
124 barrier();
93 if (unlikely(head != local_read(&rb->head))) { 125 if (unlikely(head != local_read(&rb->head))) {
94 local_inc(&rb->nest); 126 WRITE_ONCE(rb->nest, 1);
95 goto again; 127 goto again;
96 } 128 }
97 129
@@ -330,6 +362,7 @@ void *perf_aux_output_begin(struct perf_output_handle *handle,
330 struct perf_event *output_event = event; 362 struct perf_event *output_event = event;
331 unsigned long aux_head, aux_tail; 363 unsigned long aux_head, aux_tail;
332 struct ring_buffer *rb; 364 struct ring_buffer *rb;
365 unsigned int nest;
333 366
334 if (output_event->parent) 367 if (output_event->parent)
335 output_event = output_event->parent; 368 output_event = output_event->parent;
@@ -360,13 +393,16 @@ void *perf_aux_output_begin(struct perf_output_handle *handle,
360 if (!refcount_inc_not_zero(&rb->aux_refcount)) 393 if (!refcount_inc_not_zero(&rb->aux_refcount))
361 goto err; 394 goto err;
362 395
396 nest = READ_ONCE(rb->aux_nest);
363 /* 397 /*
364 * Nesting is not supported for AUX area, make sure nested 398 * Nesting is not supported for AUX area, make sure nested
365 * writers are caught early 399 * writers are caught early
366 */ 400 */
367 if (WARN_ON_ONCE(local_xchg(&rb->aux_nest, 1))) 401 if (WARN_ON_ONCE(nest))
368 goto err_put; 402 goto err_put;
369 403
404 WRITE_ONCE(rb->aux_nest, nest + 1);
405
370 aux_head = rb->aux_head; 406 aux_head = rb->aux_head;
371 407
372 handle->rb = rb; 408 handle->rb = rb;
@@ -394,7 +430,7 @@ void *perf_aux_output_begin(struct perf_output_handle *handle,
394 if (!handle->size) { /* A, matches D */ 430 if (!handle->size) { /* A, matches D */
395 event->pending_disable = smp_processor_id(); 431 event->pending_disable = smp_processor_id();
396 perf_output_wakeup(handle); 432 perf_output_wakeup(handle);
397 local_set(&rb->aux_nest, 0); 433 WRITE_ONCE(rb->aux_nest, 0);
398 goto err_put; 434 goto err_put;
399 } 435 }
400 } 436 }
@@ -471,7 +507,7 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size)
471 perf_event_aux_event(handle->event, aux_head, size, 507 perf_event_aux_event(handle->event, aux_head, size,
472 handle->aux_flags); 508 handle->aux_flags);
473 509
474 rb->user_page->aux_head = rb->aux_head; 510 WRITE_ONCE(rb->user_page->aux_head, rb->aux_head);
475 if (rb_need_aux_wakeup(rb)) 511 if (rb_need_aux_wakeup(rb))
476 wakeup = true; 512 wakeup = true;
477 513
@@ -483,7 +519,7 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size)
483 519
484 handle->event = NULL; 520 handle->event = NULL;
485 521
486 local_set(&rb->aux_nest, 0); 522 WRITE_ONCE(rb->aux_nest, 0);
487 /* can't be last */ 523 /* can't be last */
488 rb_free_aux(rb); 524 rb_free_aux(rb);
489 ring_buffer_put(rb); 525 ring_buffer_put(rb);
@@ -503,7 +539,7 @@ int perf_aux_output_skip(struct perf_output_handle *handle, unsigned long size)
503 539
504 rb->aux_head += size; 540 rb->aux_head += size;
505 541
506 rb->user_page->aux_head = rb->aux_head; 542 WRITE_ONCE(rb->user_page->aux_head, rb->aux_head);
507 if (rb_need_aux_wakeup(rb)) { 543 if (rb_need_aux_wakeup(rb)) {
508 perf_output_wakeup(handle); 544 perf_output_wakeup(handle);
509 handle->wakeup = rb->aux_wakeup + rb->aux_watermark; 545 handle->wakeup = rb->aux_wakeup + rb->aux_watermark;