diff options
Diffstat (limited to 'kernel/events/ring_buffer.c')
-rw-r--r-- | kernel/events/ring_buffer.c | 101 |
1 files changed, 38 insertions, 63 deletions
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 9c2ddfbf4525..e8b168af135b 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c | |||
@@ -12,40 +12,10 @@ | |||
12 | #include <linux/perf_event.h> | 12 | #include <linux/perf_event.h> |
13 | #include <linux/vmalloc.h> | 13 | #include <linux/vmalloc.h> |
14 | #include <linux/slab.h> | 14 | #include <linux/slab.h> |
15 | #include <linux/circ_buf.h> | ||
15 | 16 | ||
16 | #include "internal.h" | 17 | #include "internal.h" |
17 | 18 | ||
18 | static bool perf_output_space(struct ring_buffer *rb, unsigned long tail, | ||
19 | unsigned long offset, unsigned long head) | ||
20 | { | ||
21 | unsigned long sz = perf_data_size(rb); | ||
22 | unsigned long mask = sz - 1; | ||
23 | |||
24 | /* | ||
25 | * check if user-writable | ||
26 | * overwrite : over-write its own tail | ||
27 | * !overwrite: buffer possibly drops events. | ||
28 | */ | ||
29 | if (rb->overwrite) | ||
30 | return true; | ||
31 | |||
32 | /* | ||
33 | * verify that payload is not bigger than buffer | ||
34 | * otherwise masking logic may fail to detect | ||
35 | * the "not enough space" condition | ||
36 | */ | ||
37 | if ((head - offset) > sz) | ||
38 | return false; | ||
39 | |||
40 | offset = (offset - tail) & mask; | ||
41 | head = (head - tail) & mask; | ||
42 | |||
43 | if ((int)(head - offset) < 0) | ||
44 | return false; | ||
45 | |||
46 | return true; | ||
47 | } | ||
48 | |||
49 | static void perf_output_wakeup(struct perf_output_handle *handle) | 19 | static void perf_output_wakeup(struct perf_output_handle *handle) |
50 | { | 20 | { |
51 | atomic_set(&handle->rb->poll, POLL_IN); | 21 | atomic_set(&handle->rb->poll, POLL_IN); |
@@ -115,8 +85,8 @@ again: | |||
115 | rb->user_page->data_head = head; | 85 | rb->user_page->data_head = head; |
116 | 86 | ||
117 | /* | 87 | /* |
118 | * Now check if we missed an update, rely on the (compiler) | 88 | * Now check if we missed an update -- rely on previous implied |
119 | * barrier in atomic_dec_and_test() to re-read rb->head. | 89 | * compiler barriers to force a re-read. |
120 | */ | 90 | */ |
121 | if (unlikely(head != local_read(&rb->head))) { | 91 | if (unlikely(head != local_read(&rb->head))) { |
122 | local_inc(&rb->nest); | 92 | local_inc(&rb->nest); |
@@ -135,8 +105,7 @@ int perf_output_begin(struct perf_output_handle *handle, | |||
135 | { | 105 | { |
136 | struct ring_buffer *rb; | 106 | struct ring_buffer *rb; |
137 | unsigned long tail, offset, head; | 107 | unsigned long tail, offset, head; |
138 | int have_lost; | 108 | int have_lost, page_shift; |
139 | struct perf_sample_data sample_data; | ||
140 | struct { | 109 | struct { |
141 | struct perf_event_header header; | 110 | struct perf_event_header header; |
142 | u64 id; | 111 | u64 id; |
@@ -151,57 +120,63 @@ int perf_output_begin(struct perf_output_handle *handle, | |||
151 | event = event->parent; | 120 | event = event->parent; |
152 | 121 | ||
153 | rb = rcu_dereference(event->rb); | 122 | rb = rcu_dereference(event->rb); |
154 | if (!rb) | 123 | if (unlikely(!rb)) |
155 | goto out; | 124 | goto out; |
156 | 125 | ||
157 | handle->rb = rb; | 126 | if (unlikely(!rb->nr_pages)) |
158 | handle->event = event; | ||
159 | |||
160 | if (!rb->nr_pages) | ||
161 | goto out; | 127 | goto out; |
162 | 128 | ||
129 | handle->rb = rb; | ||
130 | handle->event = event; | ||
131 | |||
163 | have_lost = local_read(&rb->lost); | 132 | have_lost = local_read(&rb->lost); |
164 | if (have_lost) { | 133 | if (unlikely(have_lost)) { |
165 | lost_event.header.size = sizeof(lost_event); | 134 | size += sizeof(lost_event); |
166 | perf_event_header__init_id(&lost_event.header, &sample_data, | 135 | if (event->attr.sample_id_all) |
167 | event); | 136 | size += event->id_header_size; |
168 | size += lost_event.header.size; | ||
169 | } | 137 | } |
170 | 138 | ||
171 | perf_output_get_handle(handle); | 139 | perf_output_get_handle(handle); |
172 | 140 | ||
173 | do { | 141 | do { |
174 | /* | ||
175 | * Userspace could choose to issue a mb() before updating the | ||
176 | * tail pointer. So that all reads will be completed before the | ||
177 | * write is issued. | ||
178 | * | ||
179 | * See perf_output_put_handle(). | ||
180 | */ | ||
181 | tail = ACCESS_ONCE(rb->user_page->data_tail); | 142 | tail = ACCESS_ONCE(rb->user_page->data_tail); |
182 | smp_mb(); | ||
183 | offset = head = local_read(&rb->head); | 143 | offset = head = local_read(&rb->head); |
184 | head += size; | 144 | if (!rb->overwrite && |
185 | if (unlikely(!perf_output_space(rb, tail, offset, head))) | 145 | unlikely(CIRC_SPACE(head, tail, perf_data_size(rb)) < size)) |
186 | goto fail; | 146 | goto fail; |
147 | head += size; | ||
187 | } while (local_cmpxchg(&rb->head, offset, head) != offset); | 148 | } while (local_cmpxchg(&rb->head, offset, head) != offset); |
188 | 149 | ||
189 | if (head - local_read(&rb->wakeup) > rb->watermark) | 150 | /* |
151 | * Separate the userpage->tail read from the data stores below. | ||
152 | * Matches the MB userspace SHOULD issue after reading the data | ||
153 | * and before storing the new tail position. | ||
154 | * | ||
155 | * See perf_output_put_handle(). | ||
156 | */ | ||
157 | smp_mb(); | ||
158 | |||
159 | if (unlikely(head - local_read(&rb->wakeup) > rb->watermark)) | ||
190 | local_add(rb->watermark, &rb->wakeup); | 160 | local_add(rb->watermark, &rb->wakeup); |
191 | 161 | ||
192 | handle->page = offset >> (PAGE_SHIFT + page_order(rb)); | 162 | page_shift = PAGE_SHIFT + page_order(rb); |
193 | handle->page &= rb->nr_pages - 1; | ||
194 | handle->size = offset & ((PAGE_SIZE << page_order(rb)) - 1); | ||
195 | handle->addr = rb->data_pages[handle->page]; | ||
196 | handle->addr += handle->size; | ||
197 | handle->size = (PAGE_SIZE << page_order(rb)) - handle->size; | ||
198 | 163 | ||
199 | if (have_lost) { | 164 | handle->page = (offset >> page_shift) & (rb->nr_pages - 1); |
165 | offset &= (1UL << page_shift) - 1; | ||
166 | handle->addr = rb->data_pages[handle->page] + offset; | ||
167 | handle->size = (1UL << page_shift) - offset; | ||
168 | |||
169 | if (unlikely(have_lost)) { | ||
170 | struct perf_sample_data sample_data; | ||
171 | |||
172 | lost_event.header.size = sizeof(lost_event); | ||
200 | lost_event.header.type = PERF_RECORD_LOST; | 173 | lost_event.header.type = PERF_RECORD_LOST; |
201 | lost_event.header.misc = 0; | 174 | lost_event.header.misc = 0; |
202 | lost_event.id = event->id; | 175 | lost_event.id = event->id; |
203 | lost_event.lost = local_xchg(&rb->lost, 0); | 176 | lost_event.lost = local_xchg(&rb->lost, 0); |
204 | 177 | ||
178 | perf_event_header__init_id(&lost_event.header, | ||
179 | &sample_data, event); | ||
205 | perf_output_put(handle, lost_event); | 180 | perf_output_put(handle, lost_event); |
206 | perf_event__output_id_sample(event, handle, &sample_data); | 181 | perf_event__output_id_sample(event, handle, &sample_data); |
207 | } | 182 | } |