diff options
Diffstat (limited to 'kernel/events/ring_buffer.c')
-rw-r--r-- | kernel/events/ring_buffer.c | 64 |
1 files changed, 50 insertions, 14 deletions
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 674b35383491..ffb59a4ef4ff 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c | |||
@@ -38,7 +38,12 @@ static void perf_output_get_handle(struct perf_output_handle *handle) | |||
38 | struct ring_buffer *rb = handle->rb; | 38 | struct ring_buffer *rb = handle->rb; |
39 | 39 | ||
40 | preempt_disable(); | 40 | preempt_disable(); |
41 | local_inc(&rb->nest); | 41 | |
42 | /* | ||
43 | * Avoid an explicit LOAD/STORE such that architectures with memops | ||
44 | * can use them. | ||
45 | */ | ||
46 | (*(volatile unsigned int *)&rb->nest)++; | ||
42 | handle->wakeup = local_read(&rb->wakeup); | 47 | handle->wakeup = local_read(&rb->wakeup); |
43 | } | 48 | } |
44 | 49 | ||
@@ -46,17 +51,35 @@ static void perf_output_put_handle(struct perf_output_handle *handle) | |||
46 | { | 51 | { |
47 | struct ring_buffer *rb = handle->rb; | 52 | struct ring_buffer *rb = handle->rb; |
48 | unsigned long head; | 53 | unsigned long head; |
54 | unsigned int nest; | ||
55 | |||
56 | /* | ||
57 | * If this isn't the outermost nesting, we don't have to update | ||
58 | * @rb->user_page->data_head. | ||
59 | */ | ||
60 | nest = READ_ONCE(rb->nest); | ||
61 | if (nest > 1) { | ||
62 | WRITE_ONCE(rb->nest, nest - 1); | ||
63 | goto out; | ||
64 | } | ||
49 | 65 | ||
50 | again: | 66 | again: |
67 | /* | ||
68 | * In order to avoid publishing a head value that goes backwards, | ||
69 | * we must ensure the load of @rb->head happens after we've | ||
70 | * incremented @rb->nest. | ||
71 | * | ||
72 | * Otherwise we can observe a @rb->head value before one published | ||
73 | * by an IRQ/NMI happening between the load and the increment. | ||
74 | */ | ||
75 | barrier(); | ||
51 | head = local_read(&rb->head); | 76 | head = local_read(&rb->head); |
52 | 77 | ||
53 | /* | 78 | /* |
54 | * IRQ/NMI can happen here, which means we can miss a head update. | 79 | * IRQ/NMI can happen here and advance @rb->head, causing our |
80 | * load above to be stale. | ||
55 | */ | 81 | */ |
56 | 82 | ||
57 | if (!local_dec_and_test(&rb->nest)) | ||
58 | goto out; | ||
59 | |||
60 | /* | 83 | /* |
61 | * Since the mmap() consumer (userspace) can run on a different CPU: | 84 | * Since the mmap() consumer (userspace) can run on a different CPU: |
62 | * | 85 | * |
@@ -84,14 +107,23 @@ again: | |||
84 | * See perf_output_begin(). | 107 | * See perf_output_begin(). |
85 | */ | 108 | */ |
86 | smp_wmb(); /* B, matches C */ | 109 | smp_wmb(); /* B, matches C */ |
87 | rb->user_page->data_head = head; | 110 | WRITE_ONCE(rb->user_page->data_head, head); |
88 | 111 | ||
89 | /* | 112 | /* |
90 | * Now check if we missed an update -- rely on previous implied | 113 | * We must publish the head before decrementing the nest count, |
91 | * compiler barriers to force a re-read. | 114 | * otherwise an IRQ/NMI can publish a more recent head value and our |
115 | * write will (temporarily) publish a stale value. | ||
92 | */ | 116 | */ |
117 | barrier(); | ||
118 | WRITE_ONCE(rb->nest, 0); | ||
119 | |||
120 | /* | ||
121 | * Ensure we decrement @rb->nest before we validate the @rb->head. | ||
122 | * Otherwise we cannot be sure we caught the 'last' nested update. | ||
123 | */ | ||
124 | barrier(); | ||
93 | if (unlikely(head != local_read(&rb->head))) { | 125 | if (unlikely(head != local_read(&rb->head))) { |
94 | local_inc(&rb->nest); | 126 | WRITE_ONCE(rb->nest, 1); |
95 | goto again; | 127 | goto again; |
96 | } | 128 | } |
97 | 129 | ||
@@ -330,6 +362,7 @@ void *perf_aux_output_begin(struct perf_output_handle *handle, | |||
330 | struct perf_event *output_event = event; | 362 | struct perf_event *output_event = event; |
331 | unsigned long aux_head, aux_tail; | 363 | unsigned long aux_head, aux_tail; |
332 | struct ring_buffer *rb; | 364 | struct ring_buffer *rb; |
365 | unsigned int nest; | ||
333 | 366 | ||
334 | if (output_event->parent) | 367 | if (output_event->parent) |
335 | output_event = output_event->parent; | 368 | output_event = output_event->parent; |
@@ -360,13 +393,16 @@ void *perf_aux_output_begin(struct perf_output_handle *handle, | |||
360 | if (!refcount_inc_not_zero(&rb->aux_refcount)) | 393 | if (!refcount_inc_not_zero(&rb->aux_refcount)) |
361 | goto err; | 394 | goto err; |
362 | 395 | ||
396 | nest = READ_ONCE(rb->aux_nest); | ||
363 | /* | 397 | /* |
364 | * Nesting is not supported for AUX area, make sure nested | 398 | * Nesting is not supported for AUX area, make sure nested |
365 | * writers are caught early | 399 | * writers are caught early |
366 | */ | 400 | */ |
367 | if (WARN_ON_ONCE(local_xchg(&rb->aux_nest, 1))) | 401 | if (WARN_ON_ONCE(nest)) |
368 | goto err_put; | 402 | goto err_put; |
369 | 403 | ||
404 | WRITE_ONCE(rb->aux_nest, nest + 1); | ||
405 | |||
370 | aux_head = rb->aux_head; | 406 | aux_head = rb->aux_head; |
371 | 407 | ||
372 | handle->rb = rb; | 408 | handle->rb = rb; |
@@ -394,7 +430,7 @@ void *perf_aux_output_begin(struct perf_output_handle *handle, | |||
394 | if (!handle->size) { /* A, matches D */ | 430 | if (!handle->size) { /* A, matches D */ |
395 | event->pending_disable = smp_processor_id(); | 431 | event->pending_disable = smp_processor_id(); |
396 | perf_output_wakeup(handle); | 432 | perf_output_wakeup(handle); |
397 | local_set(&rb->aux_nest, 0); | 433 | WRITE_ONCE(rb->aux_nest, 0); |
398 | goto err_put; | 434 | goto err_put; |
399 | } | 435 | } |
400 | } | 436 | } |
@@ -471,7 +507,7 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size) | |||
471 | perf_event_aux_event(handle->event, aux_head, size, | 507 | perf_event_aux_event(handle->event, aux_head, size, |
472 | handle->aux_flags); | 508 | handle->aux_flags); |
473 | 509 | ||
474 | rb->user_page->aux_head = rb->aux_head; | 510 | WRITE_ONCE(rb->user_page->aux_head, rb->aux_head); |
475 | if (rb_need_aux_wakeup(rb)) | 511 | if (rb_need_aux_wakeup(rb)) |
476 | wakeup = true; | 512 | wakeup = true; |
477 | 513 | ||
@@ -483,7 +519,7 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size) | |||
483 | 519 | ||
484 | handle->event = NULL; | 520 | handle->event = NULL; |
485 | 521 | ||
486 | local_set(&rb->aux_nest, 0); | 522 | WRITE_ONCE(rb->aux_nest, 0); |
487 | /* can't be last */ | 523 | /* can't be last */ |
488 | rb_free_aux(rb); | 524 | rb_free_aux(rb); |
489 | ring_buffer_put(rb); | 525 | ring_buffer_put(rb); |
@@ -503,7 +539,7 @@ int perf_aux_output_skip(struct perf_output_handle *handle, unsigned long size) | |||
503 | 539 | ||
504 | rb->aux_head += size; | 540 | rb->aux_head += size; |
505 | 541 | ||
506 | rb->user_page->aux_head = rb->aux_head; | 542 | WRITE_ONCE(rb->user_page->aux_head, rb->aux_head); |
507 | if (rb_need_aux_wakeup(rb)) { | 543 | if (rb_need_aux_wakeup(rb)) { |
508 | perf_output_wakeup(handle); | 544 | perf_output_wakeup(handle); |
509 | handle->wakeup = rb->aux_wakeup + rb->aux_watermark; | 545 | handle->wakeup = rb->aux_wakeup + rb->aux_watermark; |