diff options
Diffstat (limited to 'kernel/trace/ring_buffer.c')
-rw-r--r-- | kernel/trace/ring_buffer.c | 693 |
1 files changed, 529 insertions, 164 deletions
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index bd38c5cfd8a..960cbf44c84 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c | |||
@@ -4,21 +4,92 @@ | |||
4 | * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com> | 4 | * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com> |
5 | */ | 5 | */ |
6 | #include <linux/ring_buffer.h> | 6 | #include <linux/ring_buffer.h> |
7 | #include <linux/trace_clock.h> | ||
8 | #include <linux/ftrace_irq.h> | ||
7 | #include <linux/spinlock.h> | 9 | #include <linux/spinlock.h> |
8 | #include <linux/debugfs.h> | 10 | #include <linux/debugfs.h> |
9 | #include <linux/uaccess.h> | 11 | #include <linux/uaccess.h> |
12 | #include <linux/hardirq.h> | ||
10 | #include <linux/module.h> | 13 | #include <linux/module.h> |
11 | #include <linux/percpu.h> | 14 | #include <linux/percpu.h> |
12 | #include <linux/mutex.h> | 15 | #include <linux/mutex.h> |
13 | #include <linux/sched.h> /* used for sched_clock() (for now) */ | ||
14 | #include <linux/init.h> | 16 | #include <linux/init.h> |
15 | #include <linux/hash.h> | 17 | #include <linux/hash.h> |
16 | #include <linux/list.h> | 18 | #include <linux/list.h> |
19 | #include <linux/cpu.h> | ||
17 | #include <linux/fs.h> | 20 | #include <linux/fs.h> |
18 | 21 | ||
19 | #include "trace.h" | 22 | #include "trace.h" |
20 | 23 | ||
21 | /* | 24 | /* |
25 | * The ring buffer is made up of a list of pages. A separate list of pages is | ||
26 | * allocated for each CPU. A writer may only write to a buffer that is | ||
27 | * associated with the CPU it is currently executing on. A reader may read | ||
28 | * from any per cpu buffer. | ||
29 | * | ||
30 | * The reader is special. For each per cpu buffer, the reader has its own | ||
31 | * reader page. When a reader has read the entire reader page, this reader | ||
32 | * page is swapped with another page in the ring buffer. | ||
33 | * | ||
34 | * Now, as long as the writer is off the reader page, the reader can do what | ||
35 | * ever it wants with that page. The writer will never write to that page | ||
36 | * again (as long as it is out of the ring buffer). | ||
37 | * | ||
38 | * Here's some silly ASCII art. | ||
39 | * | ||
40 | * +------+ | ||
41 | * |reader| RING BUFFER | ||
42 | * |page | | ||
43 | * +------+ +---+ +---+ +---+ | ||
44 | * | |-->| |-->| | | ||
45 | * +---+ +---+ +---+ | ||
46 | * ^ | | ||
47 | * | | | ||
48 | * +---------------+ | ||
49 | * | ||
50 | * | ||
51 | * +------+ | ||
52 | * |reader| RING BUFFER | ||
53 | * |page |------------------v | ||
54 | * +------+ +---+ +---+ +---+ | ||
55 | * | |-->| |-->| | | ||
56 | * +---+ +---+ +---+ | ||
57 | * ^ | | ||
58 | * | | | ||
59 | * +---------------+ | ||
60 | * | ||
61 | * | ||
62 | * +------+ | ||
63 | * |reader| RING BUFFER | ||
64 | * |page |------------------v | ||
65 | * +------+ +---+ +---+ +---+ | ||
66 | * ^ | |-->| |-->| | | ||
67 | * | +---+ +---+ +---+ | ||
68 | * | | | ||
69 | * | | | ||
70 | * +------------------------------+ | ||
71 | * | ||
72 | * | ||
73 | * +------+ | ||
74 | * |buffer| RING BUFFER | ||
75 | * |page |------------------v | ||
76 | * +------+ +---+ +---+ +---+ | ||
77 | * ^ | | | |-->| | | ||
78 | * | New +---+ +---+ +---+ | ||
79 | * | Reader------^ | | ||
80 | * | page | | ||
81 | * +------------------------------+ | ||
82 | * | ||
83 | * | ||
84 | * After we make this swap, the reader can hand this page off to the splice | ||
85 | * code and be done with it. It can even allocate a new page if it needs to | ||
86 | * and swap that into the ring buffer. | ||
87 | * | ||
88 | * We will be using cmpxchg soon to make all this lockless. | ||
89 | * | ||
90 | */ | ||
91 | |||
92 | /* | ||
22 | * A fast way to enable or disable all ring buffers is to | 93 | * A fast way to enable or disable all ring buffers is to |
23 | * call tracing_on or tracing_off. Turning off the ring buffers | 94 | * call tracing_on or tracing_off. Turning off the ring buffers |
24 | * prevents all ring buffers from being recorded to. | 95 | * prevents all ring buffers from being recorded to. |
@@ -57,7 +128,9 @@ enum { | |||
57 | RB_BUFFERS_DISABLED = 1 << RB_BUFFERS_DISABLED_BIT, | 128 | RB_BUFFERS_DISABLED = 1 << RB_BUFFERS_DISABLED_BIT, |
58 | }; | 129 | }; |
59 | 130 | ||
60 | static long ring_buffer_flags __read_mostly = RB_BUFFERS_ON; | 131 | static unsigned long ring_buffer_flags __read_mostly = RB_BUFFERS_ON; |
132 | |||
133 | #define BUF_PAGE_HDR_SIZE offsetof(struct buffer_data_page, data) | ||
61 | 134 | ||
62 | /** | 135 | /** |
63 | * tracing_on - enable all tracing buffers | 136 | * tracing_on - enable all tracing buffers |
@@ -89,59 +162,92 @@ EXPORT_SYMBOL_GPL(tracing_off); | |||
89 | * tracing_off_permanent - permanently disable ring buffers | 162 | * tracing_off_permanent - permanently disable ring buffers |
90 | * | 163 | * |
91 | * This function, once called, will disable all ring buffers | 164 | * This function, once called, will disable all ring buffers |
92 | * permanenty. | 165 | * permanently. |
93 | */ | 166 | */ |
94 | void tracing_off_permanent(void) | 167 | void tracing_off_permanent(void) |
95 | { | 168 | { |
96 | set_bit(RB_BUFFERS_DISABLED_BIT, &ring_buffer_flags); | 169 | set_bit(RB_BUFFERS_DISABLED_BIT, &ring_buffer_flags); |
97 | } | 170 | } |
98 | 171 | ||
172 | /** | ||
173 | * tracing_is_on - show state of ring buffers enabled | ||
174 | */ | ||
175 | int tracing_is_on(void) | ||
176 | { | ||
177 | return ring_buffer_flags == RB_BUFFERS_ON; | ||
178 | } | ||
179 | EXPORT_SYMBOL_GPL(tracing_is_on); | ||
180 | |||
99 | #include "trace.h" | 181 | #include "trace.h" |
100 | 182 | ||
101 | /* Up this if you want to test the TIME_EXTENTS and normalization */ | 183 | #define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array)) |
102 | #define DEBUG_SHIFT 0 | 184 | #define RB_ALIGNMENT 4U |
185 | #define RB_MAX_SMALL_DATA 28 | ||
186 | |||
187 | enum { | ||
188 | RB_LEN_TIME_EXTEND = 8, | ||
189 | RB_LEN_TIME_STAMP = 16, | ||
190 | }; | ||
103 | 191 | ||
104 | /* FIXME!!! */ | 192 | static inline int rb_null_event(struct ring_buffer_event *event) |
105 | u64 ring_buffer_time_stamp(int cpu) | ||
106 | { | 193 | { |
107 | u64 time; | 194 | return event->type == RINGBUF_TYPE_PADDING && event->time_delta == 0; |
195 | } | ||
108 | 196 | ||
109 | preempt_disable_notrace(); | 197 | static inline int rb_discarded_event(struct ring_buffer_event *event) |
110 | /* shift to debug/test normalization and TIME_EXTENTS */ | 198 | { |
111 | time = sched_clock() << DEBUG_SHIFT; | 199 | return event->type == RINGBUF_TYPE_PADDING && event->time_delta; |
112 | preempt_enable_no_resched_notrace(); | 200 | } |
113 | 201 | ||
114 | return time; | 202 | static void rb_event_set_padding(struct ring_buffer_event *event) |
203 | { | ||
204 | event->type = RINGBUF_TYPE_PADDING; | ||
205 | event->time_delta = 0; | ||
115 | } | 206 | } |
116 | EXPORT_SYMBOL_GPL(ring_buffer_time_stamp); | ||
117 | 207 | ||
118 | void ring_buffer_normalize_time_stamp(int cpu, u64 *ts) | 208 | /** |
209 | * ring_buffer_event_discard - discard an event in the ring buffer | ||
210 | * @buffer: the ring buffer | ||
211 | * @event: the event to discard | ||
212 | * | ||
213 | * Sometimes a event that is in the ring buffer needs to be ignored. | ||
214 | * This function lets the user discard an event in the ring buffer | ||
215 | * and then that event will not be read later. | ||
216 | * | ||
217 | * Note, it is up to the user to be careful with this, and protect | ||
218 | * against races. If the user discards an event that has been consumed | ||
219 | * it is possible that it could corrupt the ring buffer. | ||
220 | */ | ||
221 | void ring_buffer_event_discard(struct ring_buffer_event *event) | ||
119 | { | 222 | { |
120 | /* Just stupid testing the normalize function and deltas */ | 223 | event->type = RINGBUF_TYPE_PADDING; |
121 | *ts >>= DEBUG_SHIFT; | 224 | /* time delta must be non zero */ |
225 | if (!event->time_delta) | ||
226 | event->time_delta = 1; | ||
122 | } | 227 | } |
123 | EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp); | ||
124 | 228 | ||
125 | #define RB_EVNT_HDR_SIZE (sizeof(struct ring_buffer_event)) | 229 | static unsigned |
126 | #define RB_ALIGNMENT_SHIFT 2 | 230 | rb_event_data_length(struct ring_buffer_event *event) |
127 | #define RB_ALIGNMENT (1 << RB_ALIGNMENT_SHIFT) | 231 | { |
128 | #define RB_MAX_SMALL_DATA 28 | 232 | unsigned length; |
129 | 233 | ||
130 | enum { | 234 | if (event->len) |
131 | RB_LEN_TIME_EXTEND = 8, | 235 | length = event->len * RB_ALIGNMENT; |
132 | RB_LEN_TIME_STAMP = 16, | 236 | else |
133 | }; | 237 | length = event->array[0]; |
238 | return length + RB_EVNT_HDR_SIZE; | ||
239 | } | ||
134 | 240 | ||
135 | /* inline for ring buffer fast paths */ | 241 | /* inline for ring buffer fast paths */ |
136 | static inline unsigned | 242 | static unsigned |
137 | rb_event_length(struct ring_buffer_event *event) | 243 | rb_event_length(struct ring_buffer_event *event) |
138 | { | 244 | { |
139 | unsigned length; | ||
140 | |||
141 | switch (event->type) { | 245 | switch (event->type) { |
142 | case RINGBUF_TYPE_PADDING: | 246 | case RINGBUF_TYPE_PADDING: |
143 | /* undefined */ | 247 | if (rb_null_event(event)) |
144 | return -1; | 248 | /* undefined */ |
249 | return -1; | ||
250 | return rb_event_data_length(event); | ||
145 | 251 | ||
146 | case RINGBUF_TYPE_TIME_EXTEND: | 252 | case RINGBUF_TYPE_TIME_EXTEND: |
147 | return RB_LEN_TIME_EXTEND; | 253 | return RB_LEN_TIME_EXTEND; |
@@ -150,11 +256,7 @@ rb_event_length(struct ring_buffer_event *event) | |||
150 | return RB_LEN_TIME_STAMP; | 256 | return RB_LEN_TIME_STAMP; |
151 | 257 | ||
152 | case RINGBUF_TYPE_DATA: | 258 | case RINGBUF_TYPE_DATA: |
153 | if (event->len) | 259 | return rb_event_data_length(event); |
154 | length = event->len << RB_ALIGNMENT_SHIFT; | ||
155 | else | ||
156 | length = event->array[0]; | ||
157 | return length + RB_EVNT_HDR_SIZE; | ||
158 | default: | 260 | default: |
159 | BUG(); | 261 | BUG(); |
160 | } | 262 | } |
@@ -179,7 +281,7 @@ unsigned ring_buffer_event_length(struct ring_buffer_event *event) | |||
179 | EXPORT_SYMBOL_GPL(ring_buffer_event_length); | 281 | EXPORT_SYMBOL_GPL(ring_buffer_event_length); |
180 | 282 | ||
181 | /* inline for ring buffer fast paths */ | 283 | /* inline for ring buffer fast paths */ |
182 | static inline void * | 284 | static void * |
183 | rb_event_data(struct ring_buffer_event *event) | 285 | rb_event_data(struct ring_buffer_event *event) |
184 | { | 286 | { |
185 | BUG_ON(event->type != RINGBUF_TYPE_DATA); | 287 | BUG_ON(event->type != RINGBUF_TYPE_DATA); |
@@ -209,7 +311,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data); | |||
209 | 311 | ||
210 | struct buffer_data_page { | 312 | struct buffer_data_page { |
211 | u64 time_stamp; /* page time stamp */ | 313 | u64 time_stamp; /* page time stamp */ |
212 | local_t commit; /* write commited index */ | 314 | local_t commit; /* write committed index */ |
213 | unsigned char data[]; /* data of buffer page */ | 315 | unsigned char data[]; /* data of buffer page */ |
214 | }; | 316 | }; |
215 | 317 | ||
@@ -225,14 +327,25 @@ static void rb_init_page(struct buffer_data_page *bpage) | |||
225 | local_set(&bpage->commit, 0); | 327 | local_set(&bpage->commit, 0); |
226 | } | 328 | } |
227 | 329 | ||
330 | /** | ||
331 | * ring_buffer_page_len - the size of data on the page. | ||
332 | * @page: The page to read | ||
333 | * | ||
334 | * Returns the amount of data on the page, including buffer page header. | ||
335 | */ | ||
336 | size_t ring_buffer_page_len(void *page) | ||
337 | { | ||
338 | return local_read(&((struct buffer_data_page *)page)->commit) | ||
339 | + BUF_PAGE_HDR_SIZE; | ||
340 | } | ||
341 | |||
228 | /* | 342 | /* |
229 | * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing | 343 | * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing |
230 | * this issue out. | 344 | * this issue out. |
231 | */ | 345 | */ |
232 | static inline void free_buffer_page(struct buffer_page *bpage) | 346 | static void free_buffer_page(struct buffer_page *bpage) |
233 | { | 347 | { |
234 | if (bpage->page) | 348 | free_page((unsigned long)bpage->page); |
235 | free_page((unsigned long)bpage->page); | ||
236 | kfree(bpage); | 349 | kfree(bpage); |
237 | } | 350 | } |
238 | 351 | ||
@@ -246,7 +359,7 @@ static inline int test_time_stamp(u64 delta) | |||
246 | return 0; | 359 | return 0; |
247 | } | 360 | } |
248 | 361 | ||
249 | #define BUF_PAGE_SIZE (PAGE_SIZE - offsetof(struct buffer_data_page, data)) | 362 | #define BUF_PAGE_SIZE (PAGE_SIZE - BUF_PAGE_HDR_SIZE) |
250 | 363 | ||
251 | /* | 364 | /* |
252 | * head_page == tail_page && head == tail then buffer is empty. | 365 | * head_page == tail_page && head == tail then buffer is empty. |
@@ -260,7 +373,7 @@ struct ring_buffer_per_cpu { | |||
260 | struct list_head pages; | 373 | struct list_head pages; |
261 | struct buffer_page *head_page; /* read from head */ | 374 | struct buffer_page *head_page; /* read from head */ |
262 | struct buffer_page *tail_page; /* write to tail */ | 375 | struct buffer_page *tail_page; /* write to tail */ |
263 | struct buffer_page *commit_page; /* commited pages */ | 376 | struct buffer_page *commit_page; /* committed pages */ |
264 | struct buffer_page *reader_page; | 377 | struct buffer_page *reader_page; |
265 | unsigned long overrun; | 378 | unsigned long overrun; |
266 | unsigned long entries; | 379 | unsigned long entries; |
@@ -273,12 +386,17 @@ struct ring_buffer { | |||
273 | unsigned pages; | 386 | unsigned pages; |
274 | unsigned flags; | 387 | unsigned flags; |
275 | int cpus; | 388 | int cpus; |
276 | cpumask_var_t cpumask; | ||
277 | atomic_t record_disabled; | 389 | atomic_t record_disabled; |
390 | cpumask_var_t cpumask; | ||
278 | 391 | ||
279 | struct mutex mutex; | 392 | struct mutex mutex; |
280 | 393 | ||
281 | struct ring_buffer_per_cpu **buffers; | 394 | struct ring_buffer_per_cpu **buffers; |
395 | |||
396 | #ifdef CONFIG_HOTPLUG_CPU | ||
397 | struct notifier_block cpu_notify; | ||
398 | #endif | ||
399 | u64 (*clock)(void); | ||
282 | }; | 400 | }; |
283 | 401 | ||
284 | struct ring_buffer_iter { | 402 | struct ring_buffer_iter { |
@@ -299,11 +417,35 @@ struct ring_buffer_iter { | |||
299 | _____ret; \ | 417 | _____ret; \ |
300 | }) | 418 | }) |
301 | 419 | ||
420 | /* Up this if you want to test the TIME_EXTENTS and normalization */ | ||
421 | #define DEBUG_SHIFT 0 | ||
422 | |||
423 | u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu) | ||
424 | { | ||
425 | u64 time; | ||
426 | |||
427 | preempt_disable_notrace(); | ||
428 | /* shift to debug/test normalization and TIME_EXTENTS */ | ||
429 | time = buffer->clock() << DEBUG_SHIFT; | ||
430 | preempt_enable_no_resched_notrace(); | ||
431 | |||
432 | return time; | ||
433 | } | ||
434 | EXPORT_SYMBOL_GPL(ring_buffer_time_stamp); | ||
435 | |||
436 | void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer, | ||
437 | int cpu, u64 *ts) | ||
438 | { | ||
439 | /* Just stupid testing the normalize function and deltas */ | ||
440 | *ts >>= DEBUG_SHIFT; | ||
441 | } | ||
442 | EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp); | ||
443 | |||
302 | /** | 444 | /** |
303 | * check_pages - integrity check of buffer pages | 445 | * check_pages - integrity check of buffer pages |
304 | * @cpu_buffer: CPU buffer with pages to test | 446 | * @cpu_buffer: CPU buffer with pages to test |
305 | * | 447 | * |
306 | * As a safty measure we check to make sure the data pages have not | 448 | * As a safety measure we check to make sure the data pages have not |
307 | * been corrupted. | 449 | * been corrupted. |
308 | */ | 450 | */ |
309 | static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer) | 451 | static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer) |
@@ -421,7 +563,6 @@ static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer) | |||
421 | struct list_head *head = &cpu_buffer->pages; | 563 | struct list_head *head = &cpu_buffer->pages; |
422 | struct buffer_page *bpage, *tmp; | 564 | struct buffer_page *bpage, *tmp; |
423 | 565 | ||
424 | list_del_init(&cpu_buffer->reader_page->list); | ||
425 | free_buffer_page(cpu_buffer->reader_page); | 566 | free_buffer_page(cpu_buffer->reader_page); |
426 | 567 | ||
427 | list_for_each_entry_safe(bpage, tmp, head, list) { | 568 | list_for_each_entry_safe(bpage, tmp, head, list) { |
@@ -437,6 +578,11 @@ static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer) | |||
437 | */ | 578 | */ |
438 | extern int ring_buffer_page_too_big(void); | 579 | extern int ring_buffer_page_too_big(void); |
439 | 580 | ||
581 | #ifdef CONFIG_HOTPLUG_CPU | ||
582 | static int rb_cpu_notify(struct notifier_block *self, | ||
583 | unsigned long action, void *hcpu); | ||
584 | #endif | ||
585 | |||
440 | /** | 586 | /** |
441 | * ring_buffer_alloc - allocate a new ring_buffer | 587 | * ring_buffer_alloc - allocate a new ring_buffer |
442 | * @size: the size in bytes per cpu that is needed. | 588 | * @size: the size in bytes per cpu that is needed. |
@@ -469,12 +615,23 @@ struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags) | |||
469 | 615 | ||
470 | buffer->pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE); | 616 | buffer->pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE); |
471 | buffer->flags = flags; | 617 | buffer->flags = flags; |
618 | buffer->clock = trace_clock_local; | ||
472 | 619 | ||
473 | /* need at least two pages */ | 620 | /* need at least two pages */ |
474 | if (buffer->pages == 1) | 621 | if (buffer->pages == 1) |
475 | buffer->pages++; | 622 | buffer->pages++; |
476 | 623 | ||
624 | /* | ||
625 | * In case of non-hotplug cpu, if the ring-buffer is allocated | ||
626 | * in early initcall, it will not be notified of secondary cpus. | ||
627 | * In that off case, we need to allocate for all possible cpus. | ||
628 | */ | ||
629 | #ifdef CONFIG_HOTPLUG_CPU | ||
630 | get_online_cpus(); | ||
631 | cpumask_copy(buffer->cpumask, cpu_online_mask); | ||
632 | #else | ||
477 | cpumask_copy(buffer->cpumask, cpu_possible_mask); | 633 | cpumask_copy(buffer->cpumask, cpu_possible_mask); |
634 | #endif | ||
478 | buffer->cpus = nr_cpu_ids; | 635 | buffer->cpus = nr_cpu_ids; |
479 | 636 | ||
480 | bsize = sizeof(void *) * nr_cpu_ids; | 637 | bsize = sizeof(void *) * nr_cpu_ids; |
@@ -490,6 +647,13 @@ struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags) | |||
490 | goto fail_free_buffers; | 647 | goto fail_free_buffers; |
491 | } | 648 | } |
492 | 649 | ||
650 | #ifdef CONFIG_HOTPLUG_CPU | ||
651 | buffer->cpu_notify.notifier_call = rb_cpu_notify; | ||
652 | buffer->cpu_notify.priority = 0; | ||
653 | register_cpu_notifier(&buffer->cpu_notify); | ||
654 | #endif | ||
655 | |||
656 | put_online_cpus(); | ||
493 | mutex_init(&buffer->mutex); | 657 | mutex_init(&buffer->mutex); |
494 | 658 | ||
495 | return buffer; | 659 | return buffer; |
@@ -503,6 +667,7 @@ struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags) | |||
503 | 667 | ||
504 | fail_free_cpumask: | 668 | fail_free_cpumask: |
505 | free_cpumask_var(buffer->cpumask); | 669 | free_cpumask_var(buffer->cpumask); |
670 | put_online_cpus(); | ||
506 | 671 | ||
507 | fail_free_buffer: | 672 | fail_free_buffer: |
508 | kfree(buffer); | 673 | kfree(buffer); |
@@ -519,15 +684,29 @@ ring_buffer_free(struct ring_buffer *buffer) | |||
519 | { | 684 | { |
520 | int cpu; | 685 | int cpu; |
521 | 686 | ||
687 | get_online_cpus(); | ||
688 | |||
689 | #ifdef CONFIG_HOTPLUG_CPU | ||
690 | unregister_cpu_notifier(&buffer->cpu_notify); | ||
691 | #endif | ||
692 | |||
522 | for_each_buffer_cpu(buffer, cpu) | 693 | for_each_buffer_cpu(buffer, cpu) |
523 | rb_free_cpu_buffer(buffer->buffers[cpu]); | 694 | rb_free_cpu_buffer(buffer->buffers[cpu]); |
524 | 695 | ||
696 | put_online_cpus(); | ||
697 | |||
525 | free_cpumask_var(buffer->cpumask); | 698 | free_cpumask_var(buffer->cpumask); |
526 | 699 | ||
527 | kfree(buffer); | 700 | kfree(buffer); |
528 | } | 701 | } |
529 | EXPORT_SYMBOL_GPL(ring_buffer_free); | 702 | EXPORT_SYMBOL_GPL(ring_buffer_free); |
530 | 703 | ||
704 | void ring_buffer_set_clock(struct ring_buffer *buffer, | ||
705 | u64 (*clock)(void)) | ||
706 | { | ||
707 | buffer->clock = clock; | ||
708 | } | ||
709 | |||
531 | static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer); | 710 | static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer); |
532 | 711 | ||
533 | static void | 712 | static void |
@@ -627,16 +806,15 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size) | |||
627 | return size; | 806 | return size; |
628 | 807 | ||
629 | mutex_lock(&buffer->mutex); | 808 | mutex_lock(&buffer->mutex); |
809 | get_online_cpus(); | ||
630 | 810 | ||
631 | nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE); | 811 | nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE); |
632 | 812 | ||
633 | if (size < buffer_size) { | 813 | if (size < buffer_size) { |
634 | 814 | ||
635 | /* easy case, just free pages */ | 815 | /* easy case, just free pages */ |
636 | if (RB_WARN_ON(buffer, nr_pages >= buffer->pages)) { | 816 | if (RB_WARN_ON(buffer, nr_pages >= buffer->pages)) |
637 | mutex_unlock(&buffer->mutex); | 817 | goto out_fail; |
638 | return -1; | ||
639 | } | ||
640 | 818 | ||
641 | rm_pages = buffer->pages - nr_pages; | 819 | rm_pages = buffer->pages - nr_pages; |
642 | 820 | ||
@@ -655,10 +833,8 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size) | |||
655 | * add these pages to the cpu_buffers. Otherwise we just free | 833 | * add these pages to the cpu_buffers. Otherwise we just free |
656 | * them all and return -ENOMEM; | 834 | * them all and return -ENOMEM; |
657 | */ | 835 | */ |
658 | if (RB_WARN_ON(buffer, nr_pages <= buffer->pages)) { | 836 | if (RB_WARN_ON(buffer, nr_pages <= buffer->pages)) |
659 | mutex_unlock(&buffer->mutex); | 837 | goto out_fail; |
660 | return -1; | ||
661 | } | ||
662 | 838 | ||
663 | new_pages = nr_pages - buffer->pages; | 839 | new_pages = nr_pages - buffer->pages; |
664 | 840 | ||
@@ -683,13 +859,12 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size) | |||
683 | rb_insert_pages(cpu_buffer, &pages, new_pages); | 859 | rb_insert_pages(cpu_buffer, &pages, new_pages); |
684 | } | 860 | } |
685 | 861 | ||
686 | if (RB_WARN_ON(buffer, !list_empty(&pages))) { | 862 | if (RB_WARN_ON(buffer, !list_empty(&pages))) |
687 | mutex_unlock(&buffer->mutex); | 863 | goto out_fail; |
688 | return -1; | ||
689 | } | ||
690 | 864 | ||
691 | out: | 865 | out: |
692 | buffer->pages = nr_pages; | 866 | buffer->pages = nr_pages; |
867 | put_online_cpus(); | ||
693 | mutex_unlock(&buffer->mutex); | 868 | mutex_unlock(&buffer->mutex); |
694 | 869 | ||
695 | return size; | 870 | return size; |
@@ -699,15 +874,20 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size) | |||
699 | list_del_init(&bpage->list); | 874 | list_del_init(&bpage->list); |
700 | free_buffer_page(bpage); | 875 | free_buffer_page(bpage); |
701 | } | 876 | } |
877 | put_online_cpus(); | ||
702 | mutex_unlock(&buffer->mutex); | 878 | mutex_unlock(&buffer->mutex); |
703 | return -ENOMEM; | 879 | return -ENOMEM; |
704 | } | ||
705 | EXPORT_SYMBOL_GPL(ring_buffer_resize); | ||
706 | 880 | ||
707 | static inline int rb_null_event(struct ring_buffer_event *event) | 881 | /* |
708 | { | 882 | * Something went totally wrong, and we are too paranoid |
709 | return event->type == RINGBUF_TYPE_PADDING; | 883 | * to even clean up the mess. |
884 | */ | ||
885 | out_fail: | ||
886 | put_online_cpus(); | ||
887 | mutex_unlock(&buffer->mutex); | ||
888 | return -1; | ||
710 | } | 889 | } |
890 | EXPORT_SYMBOL_GPL(ring_buffer_resize); | ||
711 | 891 | ||
712 | static inline void * | 892 | static inline void * |
713 | __rb_data_page_index(struct buffer_data_page *bpage, unsigned index) | 893 | __rb_data_page_index(struct buffer_data_page *bpage, unsigned index) |
@@ -811,7 +991,7 @@ rb_event_index(struct ring_buffer_event *event) | |||
811 | return (addr & ~PAGE_MASK) - (PAGE_SIZE - BUF_PAGE_SIZE); | 991 | return (addr & ~PAGE_MASK) - (PAGE_SIZE - BUF_PAGE_SIZE); |
812 | } | 992 | } |
813 | 993 | ||
814 | static inline int | 994 | static int |
815 | rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer, | 995 | rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer, |
816 | struct ring_buffer_event *event) | 996 | struct ring_buffer_event *event) |
817 | { | 997 | { |
@@ -825,7 +1005,7 @@ rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer, | |||
825 | rb_commit_index(cpu_buffer) == index; | 1005 | rb_commit_index(cpu_buffer) == index; |
826 | } | 1006 | } |
827 | 1007 | ||
828 | static inline void | 1008 | static void |
829 | rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer, | 1009 | rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer, |
830 | struct ring_buffer_event *event) | 1010 | struct ring_buffer_event *event) |
831 | { | 1011 | { |
@@ -850,7 +1030,7 @@ rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer, | |||
850 | local_set(&cpu_buffer->commit_page->page->commit, index); | 1030 | local_set(&cpu_buffer->commit_page->page->commit, index); |
851 | } | 1031 | } |
852 | 1032 | ||
853 | static inline void | 1033 | static void |
854 | rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) | 1034 | rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) |
855 | { | 1035 | { |
856 | /* | 1036 | /* |
@@ -896,7 +1076,7 @@ static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer) | |||
896 | cpu_buffer->reader_page->read = 0; | 1076 | cpu_buffer->reader_page->read = 0; |
897 | } | 1077 | } |
898 | 1078 | ||
899 | static inline void rb_inc_iter(struct ring_buffer_iter *iter) | 1079 | static void rb_inc_iter(struct ring_buffer_iter *iter) |
900 | { | 1080 | { |
901 | struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; | 1081 | struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; |
902 | 1082 | ||
@@ -926,7 +1106,7 @@ static inline void rb_inc_iter(struct ring_buffer_iter *iter) | |||
926 | * and with this, we can determine what to place into the | 1106 | * and with this, we can determine what to place into the |
927 | * data field. | 1107 | * data field. |
928 | */ | 1108 | */ |
929 | static inline void | 1109 | static void |
930 | rb_update_event(struct ring_buffer_event *event, | 1110 | rb_update_event(struct ring_buffer_event *event, |
931 | unsigned type, unsigned length) | 1111 | unsigned type, unsigned length) |
932 | { | 1112 | { |
@@ -938,15 +1118,11 @@ rb_update_event(struct ring_buffer_event *event, | |||
938 | break; | 1118 | break; |
939 | 1119 | ||
940 | case RINGBUF_TYPE_TIME_EXTEND: | 1120 | case RINGBUF_TYPE_TIME_EXTEND: |
941 | event->len = | 1121 | event->len = DIV_ROUND_UP(RB_LEN_TIME_EXTEND, RB_ALIGNMENT); |
942 | (RB_LEN_TIME_EXTEND + (RB_ALIGNMENT-1)) | ||
943 | >> RB_ALIGNMENT_SHIFT; | ||
944 | break; | 1122 | break; |
945 | 1123 | ||
946 | case RINGBUF_TYPE_TIME_STAMP: | 1124 | case RINGBUF_TYPE_TIME_STAMP: |
947 | event->len = | 1125 | event->len = DIV_ROUND_UP(RB_LEN_TIME_STAMP, RB_ALIGNMENT); |
948 | (RB_LEN_TIME_STAMP + (RB_ALIGNMENT-1)) | ||
949 | >> RB_ALIGNMENT_SHIFT; | ||
950 | break; | 1126 | break; |
951 | 1127 | ||
952 | case RINGBUF_TYPE_DATA: | 1128 | case RINGBUF_TYPE_DATA: |
@@ -955,16 +1131,14 @@ rb_update_event(struct ring_buffer_event *event, | |||
955 | event->len = 0; | 1131 | event->len = 0; |
956 | event->array[0] = length; | 1132 | event->array[0] = length; |
957 | } else | 1133 | } else |
958 | event->len = | 1134 | event->len = DIV_ROUND_UP(length, RB_ALIGNMENT); |
959 | (length + (RB_ALIGNMENT-1)) | ||
960 | >> RB_ALIGNMENT_SHIFT; | ||
961 | break; | 1135 | break; |
962 | default: | 1136 | default: |
963 | BUG(); | 1137 | BUG(); |
964 | } | 1138 | } |
965 | } | 1139 | } |
966 | 1140 | ||
967 | static inline unsigned rb_calculate_event_length(unsigned length) | 1141 | static unsigned rb_calculate_event_length(unsigned length) |
968 | { | 1142 | { |
969 | struct ring_buffer_event event; /* Used only for sizeof array */ | 1143 | struct ring_buffer_event event; /* Used only for sizeof array */ |
970 | 1144 | ||
@@ -990,6 +1164,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, | |||
990 | struct ring_buffer *buffer = cpu_buffer->buffer; | 1164 | struct ring_buffer *buffer = cpu_buffer->buffer; |
991 | struct ring_buffer_event *event; | 1165 | struct ring_buffer_event *event; |
992 | unsigned long flags; | 1166 | unsigned long flags; |
1167 | bool lock_taken = false; | ||
993 | 1168 | ||
994 | commit_page = cpu_buffer->commit_page; | 1169 | commit_page = cpu_buffer->commit_page; |
995 | /* we just need to protect against interrupts */ | 1170 | /* we just need to protect against interrupts */ |
@@ -1003,7 +1178,30 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, | |||
1003 | struct buffer_page *next_page = tail_page; | 1178 | struct buffer_page *next_page = tail_page; |
1004 | 1179 | ||
1005 | local_irq_save(flags); | 1180 | local_irq_save(flags); |
1006 | __raw_spin_lock(&cpu_buffer->lock); | 1181 | /* |
1182 | * Since the write to the buffer is still not | ||
1183 | * fully lockless, we must be careful with NMIs. | ||
1184 | * The locks in the writers are taken when a write | ||
1185 | * crosses to a new page. The locks protect against | ||
1186 | * races with the readers (this will soon be fixed | ||
1187 | * with a lockless solution). | ||
1188 | * | ||
1189 | * Because we can not protect against NMIs, and we | ||
1190 | * want to keep traces reentrant, we need to manage | ||
1191 | * what happens when we are in an NMI. | ||
1192 | * | ||
1193 | * NMIs can happen after we take the lock. | ||
1194 | * If we are in an NMI, only take the lock | ||
1195 | * if it is not already taken. Otherwise | ||
1196 | * simply fail. | ||
1197 | */ | ||
1198 | if (unlikely(in_nmi())) { | ||
1199 | if (!__raw_spin_trylock(&cpu_buffer->lock)) | ||
1200 | goto out_reset; | ||
1201 | } else | ||
1202 | __raw_spin_lock(&cpu_buffer->lock); | ||
1203 | |||
1204 | lock_taken = true; | ||
1007 | 1205 | ||
1008 | rb_inc_page(cpu_buffer, &next_page); | 1206 | rb_inc_page(cpu_buffer, &next_page); |
1009 | 1207 | ||
@@ -1012,7 +1210,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, | |||
1012 | 1210 | ||
1013 | /* we grabbed the lock before incrementing */ | 1211 | /* we grabbed the lock before incrementing */ |
1014 | if (RB_WARN_ON(cpu_buffer, next_page == reader_page)) | 1212 | if (RB_WARN_ON(cpu_buffer, next_page == reader_page)) |
1015 | goto out_unlock; | 1213 | goto out_reset; |
1016 | 1214 | ||
1017 | /* | 1215 | /* |
1018 | * If for some reason, we had an interrupt storm that made | 1216 | * If for some reason, we had an interrupt storm that made |
@@ -1021,12 +1219,12 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, | |||
1021 | */ | 1219 | */ |
1022 | if (unlikely(next_page == commit_page)) { | 1220 | if (unlikely(next_page == commit_page)) { |
1023 | WARN_ON_ONCE(1); | 1221 | WARN_ON_ONCE(1); |
1024 | goto out_unlock; | 1222 | goto out_reset; |
1025 | } | 1223 | } |
1026 | 1224 | ||
1027 | if (next_page == head_page) { | 1225 | if (next_page == head_page) { |
1028 | if (!(buffer->flags & RB_FL_OVERWRITE)) | 1226 | if (!(buffer->flags & RB_FL_OVERWRITE)) |
1029 | goto out_unlock; | 1227 | goto out_reset; |
1030 | 1228 | ||
1031 | /* tail_page has not moved yet? */ | 1229 | /* tail_page has not moved yet? */ |
1032 | if (tail_page == cpu_buffer->tail_page) { | 1230 | if (tail_page == cpu_buffer->tail_page) { |
@@ -1050,7 +1248,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, | |||
1050 | cpu_buffer->tail_page = next_page; | 1248 | cpu_buffer->tail_page = next_page; |
1051 | 1249 | ||
1052 | /* reread the time stamp */ | 1250 | /* reread the time stamp */ |
1053 | *ts = ring_buffer_time_stamp(cpu_buffer->cpu); | 1251 | *ts = ring_buffer_time_stamp(buffer, cpu_buffer->cpu); |
1054 | cpu_buffer->tail_page->page->time_stamp = *ts; | 1252 | cpu_buffer->tail_page->page->time_stamp = *ts; |
1055 | } | 1253 | } |
1056 | 1254 | ||
@@ -1060,7 +1258,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, | |||
1060 | if (tail < BUF_PAGE_SIZE) { | 1258 | if (tail < BUF_PAGE_SIZE) { |
1061 | /* Mark the rest of the page with padding */ | 1259 | /* Mark the rest of the page with padding */ |
1062 | event = __rb_page_index(tail_page, tail); | 1260 | event = __rb_page_index(tail_page, tail); |
1063 | event->type = RINGBUF_TYPE_PADDING; | 1261 | rb_event_set_padding(event); |
1064 | } | 1262 | } |
1065 | 1263 | ||
1066 | if (tail <= BUF_PAGE_SIZE) | 1264 | if (tail <= BUF_PAGE_SIZE) |
@@ -1100,12 +1298,13 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, | |||
1100 | 1298 | ||
1101 | return event; | 1299 | return event; |
1102 | 1300 | ||
1103 | out_unlock: | 1301 | out_reset: |
1104 | /* reset write */ | 1302 | /* reset write */ |
1105 | if (tail <= BUF_PAGE_SIZE) | 1303 | if (tail <= BUF_PAGE_SIZE) |
1106 | local_set(&tail_page->write, tail); | 1304 | local_set(&tail_page->write, tail); |
1107 | 1305 | ||
1108 | __raw_spin_unlock(&cpu_buffer->lock); | 1306 | if (likely(lock_taken)) |
1307 | __raw_spin_unlock(&cpu_buffer->lock); | ||
1109 | local_irq_restore(flags); | 1308 | local_irq_restore(flags); |
1110 | return NULL; | 1309 | return NULL; |
1111 | } | 1310 | } |
@@ -1192,7 +1391,7 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, | |||
1192 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000)) | 1391 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000)) |
1193 | return NULL; | 1392 | return NULL; |
1194 | 1393 | ||
1195 | ts = ring_buffer_time_stamp(cpu_buffer->cpu); | 1394 | ts = ring_buffer_time_stamp(cpu_buffer->buffer, cpu_buffer->cpu); |
1196 | 1395 | ||
1197 | /* | 1396 | /* |
1198 | * Only the first commit can update the timestamp. | 1397 | * Only the first commit can update the timestamp. |
@@ -1265,7 +1464,6 @@ static DEFINE_PER_CPU(int, rb_need_resched); | |||
1265 | * ring_buffer_lock_reserve - reserve a part of the buffer | 1464 | * ring_buffer_lock_reserve - reserve a part of the buffer |
1266 | * @buffer: the ring buffer to reserve from | 1465 | * @buffer: the ring buffer to reserve from |
1267 | * @length: the length of the data to reserve (excluding event header) | 1466 | * @length: the length of the data to reserve (excluding event header) |
1268 | * @flags: a pointer to save the interrupt flags | ||
1269 | * | 1467 | * |
1270 | * Returns a reseverd event on the ring buffer to copy directly to. | 1468 | * Returns a reseverd event on the ring buffer to copy directly to. |
1271 | * The user of this interface will need to get the body to write into | 1469 | * The user of this interface will need to get the body to write into |
@@ -1278,9 +1476,7 @@ static DEFINE_PER_CPU(int, rb_need_resched); | |||
1278 | * If NULL is returned, then nothing has been allocated or locked. | 1476 | * If NULL is returned, then nothing has been allocated or locked. |
1279 | */ | 1477 | */ |
1280 | struct ring_buffer_event * | 1478 | struct ring_buffer_event * |
1281 | ring_buffer_lock_reserve(struct ring_buffer *buffer, | 1479 | ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length) |
1282 | unsigned long length, | ||
1283 | unsigned long *flags) | ||
1284 | { | 1480 | { |
1285 | struct ring_buffer_per_cpu *cpu_buffer; | 1481 | struct ring_buffer_per_cpu *cpu_buffer; |
1286 | struct ring_buffer_event *event; | 1482 | struct ring_buffer_event *event; |
@@ -1347,15 +1543,13 @@ static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer, | |||
1347 | * ring_buffer_unlock_commit - commit a reserved | 1543 | * ring_buffer_unlock_commit - commit a reserved |
1348 | * @buffer: The buffer to commit to | 1544 | * @buffer: The buffer to commit to |
1349 | * @event: The event pointer to commit. | 1545 | * @event: The event pointer to commit. |
1350 | * @flags: the interrupt flags received from ring_buffer_lock_reserve. | ||
1351 | * | 1546 | * |
1352 | * This commits the data to the ring buffer, and releases any locks held. | 1547 | * This commits the data to the ring buffer, and releases any locks held. |
1353 | * | 1548 | * |
1354 | * Must be paired with ring_buffer_lock_reserve. | 1549 | * Must be paired with ring_buffer_lock_reserve. |
1355 | */ | 1550 | */ |
1356 | int ring_buffer_unlock_commit(struct ring_buffer *buffer, | 1551 | int ring_buffer_unlock_commit(struct ring_buffer *buffer, |
1357 | struct ring_buffer_event *event, | 1552 | struct ring_buffer_event *event) |
1358 | unsigned long flags) | ||
1359 | { | 1553 | { |
1360 | struct ring_buffer_per_cpu *cpu_buffer; | 1554 | struct ring_buffer_per_cpu *cpu_buffer; |
1361 | int cpu = raw_smp_processor_id(); | 1555 | int cpu = raw_smp_processor_id(); |
@@ -1438,7 +1632,7 @@ int ring_buffer_write(struct ring_buffer *buffer, | |||
1438 | } | 1632 | } |
1439 | EXPORT_SYMBOL_GPL(ring_buffer_write); | 1633 | EXPORT_SYMBOL_GPL(ring_buffer_write); |
1440 | 1634 | ||
1441 | static inline int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer) | 1635 | static int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer) |
1442 | { | 1636 | { |
1443 | struct buffer_page *reader = cpu_buffer->reader_page; | 1637 | struct buffer_page *reader = cpu_buffer->reader_page; |
1444 | struct buffer_page *head = cpu_buffer->head_page; | 1638 | struct buffer_page *head = cpu_buffer->head_page; |
@@ -1528,12 +1722,15 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu); | |||
1528 | unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu) | 1722 | unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu) |
1529 | { | 1723 | { |
1530 | struct ring_buffer_per_cpu *cpu_buffer; | 1724 | struct ring_buffer_per_cpu *cpu_buffer; |
1725 | unsigned long ret; | ||
1531 | 1726 | ||
1532 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) | 1727 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) |
1533 | return 0; | 1728 | return 0; |
1534 | 1729 | ||
1535 | cpu_buffer = buffer->buffers[cpu]; | 1730 | cpu_buffer = buffer->buffers[cpu]; |
1536 | return cpu_buffer->entries; | 1731 | ret = cpu_buffer->entries; |
1732 | |||
1733 | return ret; | ||
1537 | } | 1734 | } |
1538 | EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu); | 1735 | EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu); |
1539 | 1736 | ||
@@ -1545,12 +1742,15 @@ EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu); | |||
1545 | unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu) | 1742 | unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu) |
1546 | { | 1743 | { |
1547 | struct ring_buffer_per_cpu *cpu_buffer; | 1744 | struct ring_buffer_per_cpu *cpu_buffer; |
1745 | unsigned long ret; | ||
1548 | 1746 | ||
1549 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) | 1747 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) |
1550 | return 0; | 1748 | return 0; |
1551 | 1749 | ||
1552 | cpu_buffer = buffer->buffers[cpu]; | 1750 | cpu_buffer = buffer->buffers[cpu]; |
1553 | return cpu_buffer->overrun; | 1751 | ret = cpu_buffer->overrun; |
1752 | |||
1753 | return ret; | ||
1554 | } | 1754 | } |
1555 | EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu); | 1755 | EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu); |
1556 | 1756 | ||
@@ -1627,9 +1827,14 @@ static void rb_iter_reset(struct ring_buffer_iter *iter) | |||
1627 | */ | 1827 | */ |
1628 | void ring_buffer_iter_reset(struct ring_buffer_iter *iter) | 1828 | void ring_buffer_iter_reset(struct ring_buffer_iter *iter) |
1629 | { | 1829 | { |
1630 | struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; | 1830 | struct ring_buffer_per_cpu *cpu_buffer; |
1631 | unsigned long flags; | 1831 | unsigned long flags; |
1632 | 1832 | ||
1833 | if (!iter) | ||
1834 | return; | ||
1835 | |||
1836 | cpu_buffer = iter->cpu_buffer; | ||
1837 | |||
1633 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); | 1838 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); |
1634 | rb_iter_reset(iter); | 1839 | rb_iter_reset(iter); |
1635 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | 1840 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); |
@@ -1803,7 +2008,7 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer) | |||
1803 | 2008 | ||
1804 | event = rb_reader_event(cpu_buffer); | 2009 | event = rb_reader_event(cpu_buffer); |
1805 | 2010 | ||
1806 | if (event->type == RINGBUF_TYPE_DATA) | 2011 | if (event->type == RINGBUF_TYPE_DATA || rb_discarded_event(event)) |
1807 | cpu_buffer->entries--; | 2012 | cpu_buffer->entries--; |
1808 | 2013 | ||
1809 | rb_update_read_stamp(cpu_buffer, event); | 2014 | rb_update_read_stamp(cpu_buffer, event); |
@@ -1864,9 +2069,6 @@ rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
1864 | struct buffer_page *reader; | 2069 | struct buffer_page *reader; |
1865 | int nr_loops = 0; | 2070 | int nr_loops = 0; |
1866 | 2071 | ||
1867 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) | ||
1868 | return NULL; | ||
1869 | |||
1870 | cpu_buffer = buffer->buffers[cpu]; | 2072 | cpu_buffer = buffer->buffers[cpu]; |
1871 | 2073 | ||
1872 | again: | 2074 | again: |
@@ -1889,9 +2091,18 @@ rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
1889 | 2091 | ||
1890 | switch (event->type) { | 2092 | switch (event->type) { |
1891 | case RINGBUF_TYPE_PADDING: | 2093 | case RINGBUF_TYPE_PADDING: |
1892 | RB_WARN_ON(cpu_buffer, 1); | 2094 | if (rb_null_event(event)) |
2095 | RB_WARN_ON(cpu_buffer, 1); | ||
2096 | /* | ||
2097 | * Because the writer could be discarding every | ||
2098 | * event it creates (which would probably be bad) | ||
2099 | * if we were to go back to "again" then we may never | ||
2100 | * catch up, and will trigger the warn on, or lock | ||
2101 | * the box. Return the padding, and we will release | ||
2102 | * the current locks, and try again. | ||
2103 | */ | ||
1893 | rb_advance_reader(cpu_buffer); | 2104 | rb_advance_reader(cpu_buffer); |
1894 | return NULL; | 2105 | return event; |
1895 | 2106 | ||
1896 | case RINGBUF_TYPE_TIME_EXTEND: | 2107 | case RINGBUF_TYPE_TIME_EXTEND: |
1897 | /* Internal data, OK to advance */ | 2108 | /* Internal data, OK to advance */ |
@@ -1906,7 +2117,8 @@ rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
1906 | case RINGBUF_TYPE_DATA: | 2117 | case RINGBUF_TYPE_DATA: |
1907 | if (ts) { | 2118 | if (ts) { |
1908 | *ts = cpu_buffer->read_stamp + event->time_delta; | 2119 | *ts = cpu_buffer->read_stamp + event->time_delta; |
1909 | ring_buffer_normalize_time_stamp(cpu_buffer->cpu, ts); | 2120 | ring_buffer_normalize_time_stamp(buffer, |
2121 | cpu_buffer->cpu, ts); | ||
1910 | } | 2122 | } |
1911 | return event; | 2123 | return event; |
1912 | 2124 | ||
@@ -1951,8 +2163,12 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) | |||
1951 | 2163 | ||
1952 | switch (event->type) { | 2164 | switch (event->type) { |
1953 | case RINGBUF_TYPE_PADDING: | 2165 | case RINGBUF_TYPE_PADDING: |
1954 | rb_inc_iter(iter); | 2166 | if (rb_null_event(event)) { |
1955 | goto again; | 2167 | rb_inc_iter(iter); |
2168 | goto again; | ||
2169 | } | ||
2170 | rb_advance_iter(iter); | ||
2171 | return event; | ||
1956 | 2172 | ||
1957 | case RINGBUF_TYPE_TIME_EXTEND: | 2173 | case RINGBUF_TYPE_TIME_EXTEND: |
1958 | /* Internal data, OK to advance */ | 2174 | /* Internal data, OK to advance */ |
@@ -1967,7 +2183,8 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) | |||
1967 | case RINGBUF_TYPE_DATA: | 2183 | case RINGBUF_TYPE_DATA: |
1968 | if (ts) { | 2184 | if (ts) { |
1969 | *ts = iter->read_stamp + event->time_delta; | 2185 | *ts = iter->read_stamp + event->time_delta; |
1970 | ring_buffer_normalize_time_stamp(cpu_buffer->cpu, ts); | 2186 | ring_buffer_normalize_time_stamp(buffer, |
2187 | cpu_buffer->cpu, ts); | ||
1971 | } | 2188 | } |
1972 | return event; | 2189 | return event; |
1973 | 2190 | ||
@@ -1995,10 +2212,19 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
1995 | struct ring_buffer_event *event; | 2212 | struct ring_buffer_event *event; |
1996 | unsigned long flags; | 2213 | unsigned long flags; |
1997 | 2214 | ||
2215 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) | ||
2216 | return NULL; | ||
2217 | |||
2218 | again: | ||
1998 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); | 2219 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); |
1999 | event = rb_buffer_peek(buffer, cpu, ts); | 2220 | event = rb_buffer_peek(buffer, cpu, ts); |
2000 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | 2221 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); |
2001 | 2222 | ||
2223 | if (event && event->type == RINGBUF_TYPE_PADDING) { | ||
2224 | cpu_relax(); | ||
2225 | goto again; | ||
2226 | } | ||
2227 | |||
2002 | return event; | 2228 | return event; |
2003 | } | 2229 | } |
2004 | 2230 | ||
@@ -2017,10 +2243,16 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts) | |||
2017 | struct ring_buffer_event *event; | 2243 | struct ring_buffer_event *event; |
2018 | unsigned long flags; | 2244 | unsigned long flags; |
2019 | 2245 | ||
2246 | again: | ||
2020 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); | 2247 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); |
2021 | event = rb_iter_peek(iter, ts); | 2248 | event = rb_iter_peek(iter, ts); |
2022 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | 2249 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); |
2023 | 2250 | ||
2251 | if (event && event->type == RINGBUF_TYPE_PADDING) { | ||
2252 | cpu_relax(); | ||
2253 | goto again; | ||
2254 | } | ||
2255 | |||
2024 | return event; | 2256 | return event; |
2025 | } | 2257 | } |
2026 | 2258 | ||
@@ -2035,24 +2267,37 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts) | |||
2035 | struct ring_buffer_event * | 2267 | struct ring_buffer_event * |
2036 | ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) | 2268 | ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) |
2037 | { | 2269 | { |
2038 | struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; | 2270 | struct ring_buffer_per_cpu *cpu_buffer; |
2039 | struct ring_buffer_event *event; | 2271 | struct ring_buffer_event *event = NULL; |
2040 | unsigned long flags; | 2272 | unsigned long flags; |
2041 | 2273 | ||
2274 | again: | ||
2275 | /* might be called in atomic */ | ||
2276 | preempt_disable(); | ||
2277 | |||
2042 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) | 2278 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) |
2043 | return NULL; | 2279 | goto out; |
2044 | 2280 | ||
2281 | cpu_buffer = buffer->buffers[cpu]; | ||
2045 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); | 2282 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); |
2046 | 2283 | ||
2047 | event = rb_buffer_peek(buffer, cpu, ts); | 2284 | event = rb_buffer_peek(buffer, cpu, ts); |
2048 | if (!event) | 2285 | if (!event) |
2049 | goto out; | 2286 | goto out_unlock; |
2050 | 2287 | ||
2051 | rb_advance_reader(cpu_buffer); | 2288 | rb_advance_reader(cpu_buffer); |
2052 | 2289 | ||
2053 | out: | 2290 | out_unlock: |
2054 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | 2291 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); |
2055 | 2292 | ||
2293 | out: | ||
2294 | preempt_enable(); | ||
2295 | |||
2296 | if (event && event->type == RINGBUF_TYPE_PADDING) { | ||
2297 | cpu_relax(); | ||
2298 | goto again; | ||
2299 | } | ||
2300 | |||
2056 | return event; | 2301 | return event; |
2057 | } | 2302 | } |
2058 | EXPORT_SYMBOL_GPL(ring_buffer_consume); | 2303 | EXPORT_SYMBOL_GPL(ring_buffer_consume); |
@@ -2131,6 +2376,7 @@ ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts) | |||
2131 | struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; | 2376 | struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; |
2132 | unsigned long flags; | 2377 | unsigned long flags; |
2133 | 2378 | ||
2379 | again: | ||
2134 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); | 2380 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); |
2135 | event = rb_iter_peek(iter, ts); | 2381 | event = rb_iter_peek(iter, ts); |
2136 | if (!event) | 2382 | if (!event) |
@@ -2140,6 +2386,11 @@ ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts) | |||
2140 | out: | 2386 | out: |
2141 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | 2387 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); |
2142 | 2388 | ||
2389 | if (event && event->type == RINGBUF_TYPE_PADDING) { | ||
2390 | cpu_relax(); | ||
2391 | goto again; | ||
2392 | } | ||
2393 | |||
2143 | return event; | 2394 | return event; |
2144 | } | 2395 | } |
2145 | EXPORT_SYMBOL_GPL(ring_buffer_read); | 2396 | EXPORT_SYMBOL_GPL(ring_buffer_read); |
@@ -2232,6 +2483,7 @@ int ring_buffer_empty(struct ring_buffer *buffer) | |||
2232 | if (!rb_per_cpu_empty(cpu_buffer)) | 2483 | if (!rb_per_cpu_empty(cpu_buffer)) |
2233 | return 0; | 2484 | return 0; |
2234 | } | 2485 | } |
2486 | |||
2235 | return 1; | 2487 | return 1; |
2236 | } | 2488 | } |
2237 | EXPORT_SYMBOL_GPL(ring_buffer_empty); | 2489 | EXPORT_SYMBOL_GPL(ring_buffer_empty); |
@@ -2244,12 +2496,16 @@ EXPORT_SYMBOL_GPL(ring_buffer_empty); | |||
2244 | int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu) | 2496 | int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu) |
2245 | { | 2497 | { |
2246 | struct ring_buffer_per_cpu *cpu_buffer; | 2498 | struct ring_buffer_per_cpu *cpu_buffer; |
2499 | int ret; | ||
2247 | 2500 | ||
2248 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) | 2501 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) |
2249 | return 1; | 2502 | return 1; |
2250 | 2503 | ||
2251 | cpu_buffer = buffer->buffers[cpu]; | 2504 | cpu_buffer = buffer->buffers[cpu]; |
2252 | return rb_per_cpu_empty(cpu_buffer); | 2505 | ret = rb_per_cpu_empty(cpu_buffer); |
2506 | |||
2507 | |||
2508 | return ret; | ||
2253 | } | 2509 | } |
2254 | EXPORT_SYMBOL_GPL(ring_buffer_empty_cpu); | 2510 | EXPORT_SYMBOL_GPL(ring_buffer_empty_cpu); |
2255 | 2511 | ||
@@ -2268,18 +2524,36 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a, | |||
2268 | { | 2524 | { |
2269 | struct ring_buffer_per_cpu *cpu_buffer_a; | 2525 | struct ring_buffer_per_cpu *cpu_buffer_a; |
2270 | struct ring_buffer_per_cpu *cpu_buffer_b; | 2526 | struct ring_buffer_per_cpu *cpu_buffer_b; |
2527 | int ret = -EINVAL; | ||
2271 | 2528 | ||
2272 | if (!cpumask_test_cpu(cpu, buffer_a->cpumask) || | 2529 | if (!cpumask_test_cpu(cpu, buffer_a->cpumask) || |
2273 | !cpumask_test_cpu(cpu, buffer_b->cpumask)) | 2530 | !cpumask_test_cpu(cpu, buffer_b->cpumask)) |
2274 | return -EINVAL; | 2531 | goto out; |
2275 | 2532 | ||
2276 | /* At least make sure the two buffers are somewhat the same */ | 2533 | /* At least make sure the two buffers are somewhat the same */ |
2277 | if (buffer_a->pages != buffer_b->pages) | 2534 | if (buffer_a->pages != buffer_b->pages) |
2278 | return -EINVAL; | 2535 | goto out; |
2536 | |||
2537 | ret = -EAGAIN; | ||
2538 | |||
2539 | if (ring_buffer_flags != RB_BUFFERS_ON) | ||
2540 | goto out; | ||
2541 | |||
2542 | if (atomic_read(&buffer_a->record_disabled)) | ||
2543 | goto out; | ||
2544 | |||
2545 | if (atomic_read(&buffer_b->record_disabled)) | ||
2546 | goto out; | ||
2279 | 2547 | ||
2280 | cpu_buffer_a = buffer_a->buffers[cpu]; | 2548 | cpu_buffer_a = buffer_a->buffers[cpu]; |
2281 | cpu_buffer_b = buffer_b->buffers[cpu]; | 2549 | cpu_buffer_b = buffer_b->buffers[cpu]; |
2282 | 2550 | ||
2551 | if (atomic_read(&cpu_buffer_a->record_disabled)) | ||
2552 | goto out; | ||
2553 | |||
2554 | if (atomic_read(&cpu_buffer_b->record_disabled)) | ||
2555 | goto out; | ||
2556 | |||
2283 | /* | 2557 | /* |
2284 | * We can't do a synchronize_sched here because this | 2558 | * We can't do a synchronize_sched here because this |
2285 | * function can be called in atomic context. | 2559 | * function can be called in atomic context. |
@@ -2298,18 +2572,21 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a, | |||
2298 | atomic_dec(&cpu_buffer_a->record_disabled); | 2572 | atomic_dec(&cpu_buffer_a->record_disabled); |
2299 | atomic_dec(&cpu_buffer_b->record_disabled); | 2573 | atomic_dec(&cpu_buffer_b->record_disabled); |
2300 | 2574 | ||
2301 | return 0; | 2575 | ret = 0; |
2576 | out: | ||
2577 | return ret; | ||
2302 | } | 2578 | } |
2303 | EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu); | 2579 | EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu); |
2304 | 2580 | ||
2305 | static void rb_remove_entries(struct ring_buffer_per_cpu *cpu_buffer, | 2581 | static void rb_remove_entries(struct ring_buffer_per_cpu *cpu_buffer, |
2306 | struct buffer_data_page *bpage) | 2582 | struct buffer_data_page *bpage, |
2583 | unsigned int offset) | ||
2307 | { | 2584 | { |
2308 | struct ring_buffer_event *event; | 2585 | struct ring_buffer_event *event; |
2309 | unsigned long head; | 2586 | unsigned long head; |
2310 | 2587 | ||
2311 | __raw_spin_lock(&cpu_buffer->lock); | 2588 | __raw_spin_lock(&cpu_buffer->lock); |
2312 | for (head = 0; head < local_read(&bpage->commit); | 2589 | for (head = offset; head < local_read(&bpage->commit); |
2313 | head += rb_event_length(event)) { | 2590 | head += rb_event_length(event)) { |
2314 | 2591 | ||
2315 | event = __rb_data_page_index(bpage, head); | 2592 | event = __rb_data_page_index(bpage, head); |
@@ -2340,8 +2617,8 @@ static void rb_remove_entries(struct ring_buffer_per_cpu *cpu_buffer, | |||
2340 | */ | 2617 | */ |
2341 | void *ring_buffer_alloc_read_page(struct ring_buffer *buffer) | 2618 | void *ring_buffer_alloc_read_page(struct ring_buffer *buffer) |
2342 | { | 2619 | { |
2343 | unsigned long addr; | ||
2344 | struct buffer_data_page *bpage; | 2620 | struct buffer_data_page *bpage; |
2621 | unsigned long addr; | ||
2345 | 2622 | ||
2346 | addr = __get_free_page(GFP_KERNEL); | 2623 | addr = __get_free_page(GFP_KERNEL); |
2347 | if (!addr) | 2624 | if (!addr) |
@@ -2349,6 +2626,8 @@ void *ring_buffer_alloc_read_page(struct ring_buffer *buffer) | |||
2349 | 2626 | ||
2350 | bpage = (void *)addr; | 2627 | bpage = (void *)addr; |
2351 | 2628 | ||
2629 | rb_init_page(bpage); | ||
2630 | |||
2352 | return bpage; | 2631 | return bpage; |
2353 | } | 2632 | } |
2354 | 2633 | ||
@@ -2368,6 +2647,7 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data) | |||
2368 | * ring_buffer_read_page - extract a page from the ring buffer | 2647 | * ring_buffer_read_page - extract a page from the ring buffer |
2369 | * @buffer: buffer to extract from | 2648 | * @buffer: buffer to extract from |
2370 | * @data_page: the page to use allocated from ring_buffer_alloc_read_page | 2649 | * @data_page: the page to use allocated from ring_buffer_alloc_read_page |
2650 | * @len: amount to extract | ||
2371 | * @cpu: the cpu of the buffer to extract | 2651 | * @cpu: the cpu of the buffer to extract |
2372 | * @full: should the extraction only happen when the page is full. | 2652 | * @full: should the extraction only happen when the page is full. |
2373 | * | 2653 | * |
@@ -2377,12 +2657,12 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data) | |||
2377 | * to swap with a page in the ring buffer. | 2657 | * to swap with a page in the ring buffer. |
2378 | * | 2658 | * |
2379 | * for example: | 2659 | * for example: |
2380 | * rpage = ring_buffer_alloc_page(buffer); | 2660 | * rpage = ring_buffer_alloc_read_page(buffer); |
2381 | * if (!rpage) | 2661 | * if (!rpage) |
2382 | * return error; | 2662 | * return error; |
2383 | * ret = ring_buffer_read_page(buffer, &rpage, cpu, 0); | 2663 | * ret = ring_buffer_read_page(buffer, &rpage, len, cpu, 0); |
2384 | * if (ret) | 2664 | * if (ret >= 0) |
2385 | * process_page(rpage); | 2665 | * process_page(rpage, ret); |
2386 | * | 2666 | * |
2387 | * When @full is set, the function will not return true unless | 2667 | * When @full is set, the function will not return true unless |
2388 | * the writer is off the reader page. | 2668 | * the writer is off the reader page. |
@@ -2393,72 +2673,118 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data) | |||
2393 | * responsible for that. | 2673 | * responsible for that. |
2394 | * | 2674 | * |
2395 | * Returns: | 2675 | * Returns: |
2396 | * 1 if data has been transferred | 2676 | * >=0 if data has been transferred, returns the offset of consumed data. |
2397 | * 0 if no data has been transferred. | 2677 | * <0 if no data has been transferred. |
2398 | */ | 2678 | */ |
2399 | int ring_buffer_read_page(struct ring_buffer *buffer, | 2679 | int ring_buffer_read_page(struct ring_buffer *buffer, |
2400 | void **data_page, int cpu, int full) | 2680 | void **data_page, size_t len, int cpu, int full) |
2401 | { | 2681 | { |
2402 | struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; | 2682 | struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; |
2403 | struct ring_buffer_event *event; | 2683 | struct ring_buffer_event *event; |
2404 | struct buffer_data_page *bpage; | 2684 | struct buffer_data_page *bpage; |
2685 | struct buffer_page *reader; | ||
2405 | unsigned long flags; | 2686 | unsigned long flags; |
2406 | int ret = 0; | 2687 | unsigned int commit; |
2688 | unsigned int read; | ||
2689 | u64 save_timestamp; | ||
2690 | int ret = -1; | ||
2691 | |||
2692 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) | ||
2693 | goto out; | ||
2694 | |||
2695 | /* | ||
2696 | * If len is not big enough to hold the page header, then | ||
2697 | * we can not copy anything. | ||
2698 | */ | ||
2699 | if (len <= BUF_PAGE_HDR_SIZE) | ||
2700 | goto out; | ||
2701 | |||
2702 | len -= BUF_PAGE_HDR_SIZE; | ||
2407 | 2703 | ||
2408 | if (!data_page) | 2704 | if (!data_page) |
2409 | return 0; | 2705 | goto out; |
2410 | 2706 | ||
2411 | bpage = *data_page; | 2707 | bpage = *data_page; |
2412 | if (!bpage) | 2708 | if (!bpage) |
2413 | return 0; | 2709 | goto out; |
2414 | 2710 | ||
2415 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); | 2711 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); |
2416 | 2712 | ||
2417 | /* | 2713 | reader = rb_get_reader_page(cpu_buffer); |
2418 | * rb_buffer_peek will get the next ring buffer if | 2714 | if (!reader) |
2419 | * the current reader page is empty. | 2715 | goto out_unlock; |
2420 | */ | 2716 | |
2421 | event = rb_buffer_peek(buffer, cpu, NULL); | 2717 | event = rb_reader_event(cpu_buffer); |
2422 | if (!event) | 2718 | |
2423 | goto out; | 2719 | read = reader->read; |
2720 | commit = rb_page_commit(reader); | ||
2424 | 2721 | ||
2425 | /* check for data */ | ||
2426 | if (!local_read(&cpu_buffer->reader_page->page->commit)) | ||
2427 | goto out; | ||
2428 | /* | 2722 | /* |
2429 | * If the writer is already off of the read page, then simply | 2723 | * If this page has been partially read or |
2430 | * switch the read page with the given page. Otherwise | 2724 | * if len is not big enough to read the rest of the page or |
2431 | * we need to copy the data from the reader to the writer. | 2725 | * a writer is still on the page, then |
2726 | * we must copy the data from the page to the buffer. | ||
2727 | * Otherwise, we can simply swap the page with the one passed in. | ||
2432 | */ | 2728 | */ |
2433 | if (cpu_buffer->reader_page == cpu_buffer->commit_page) { | 2729 | if (read || (len < (commit - read)) || |
2434 | unsigned int read = cpu_buffer->reader_page->read; | 2730 | cpu_buffer->reader_page == cpu_buffer->commit_page) { |
2731 | struct buffer_data_page *rpage = cpu_buffer->reader_page->page; | ||
2732 | unsigned int rpos = read; | ||
2733 | unsigned int pos = 0; | ||
2734 | unsigned int size; | ||
2435 | 2735 | ||
2436 | if (full) | 2736 | if (full) |
2437 | goto out; | 2737 | goto out_unlock; |
2438 | /* The writer is still on the reader page, we must copy */ | 2738 | |
2439 | bpage = cpu_buffer->reader_page->page; | 2739 | if (len > (commit - read)) |
2440 | memcpy(bpage->data, | 2740 | len = (commit - read); |
2441 | cpu_buffer->reader_page->page->data + read, | 2741 | |
2442 | local_read(&bpage->commit) - read); | 2742 | size = rb_event_length(event); |
2743 | |||
2744 | if (len < size) | ||
2745 | goto out_unlock; | ||
2443 | 2746 | ||
2444 | /* consume what was read */ | 2747 | /* save the current timestamp, since the user will need it */ |
2445 | cpu_buffer->reader_page += read; | 2748 | save_timestamp = cpu_buffer->read_stamp; |
2446 | 2749 | ||
2750 | /* Need to copy one event at a time */ | ||
2751 | do { | ||
2752 | memcpy(bpage->data + pos, rpage->data + rpos, size); | ||
2753 | |||
2754 | len -= size; | ||
2755 | |||
2756 | rb_advance_reader(cpu_buffer); | ||
2757 | rpos = reader->read; | ||
2758 | pos += size; | ||
2759 | |||
2760 | event = rb_reader_event(cpu_buffer); | ||
2761 | size = rb_event_length(event); | ||
2762 | } while (len > size); | ||
2763 | |||
2764 | /* update bpage */ | ||
2765 | local_set(&bpage->commit, pos); | ||
2766 | bpage->time_stamp = save_timestamp; | ||
2767 | |||
2768 | /* we copied everything to the beginning */ | ||
2769 | read = 0; | ||
2447 | } else { | 2770 | } else { |
2448 | /* swap the pages */ | 2771 | /* swap the pages */ |
2449 | rb_init_page(bpage); | 2772 | rb_init_page(bpage); |
2450 | bpage = cpu_buffer->reader_page->page; | 2773 | bpage = reader->page; |
2451 | cpu_buffer->reader_page->page = *data_page; | 2774 | reader->page = *data_page; |
2452 | cpu_buffer->reader_page->read = 0; | 2775 | local_set(&reader->write, 0); |
2776 | reader->read = 0; | ||
2453 | *data_page = bpage; | 2777 | *data_page = bpage; |
2778 | |||
2779 | /* update the entry counter */ | ||
2780 | rb_remove_entries(cpu_buffer, bpage, read); | ||
2454 | } | 2781 | } |
2455 | ret = 1; | 2782 | ret = read; |
2456 | 2783 | ||
2457 | /* update the entry counter */ | 2784 | out_unlock: |
2458 | rb_remove_entries(cpu_buffer, bpage); | ||
2459 | out: | ||
2460 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | 2785 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); |
2461 | 2786 | ||
2787 | out: | ||
2462 | return ret; | 2788 | return ret; |
2463 | } | 2789 | } |
2464 | 2790 | ||
@@ -2466,7 +2792,7 @@ static ssize_t | |||
2466 | rb_simple_read(struct file *filp, char __user *ubuf, | 2792 | rb_simple_read(struct file *filp, char __user *ubuf, |
2467 | size_t cnt, loff_t *ppos) | 2793 | size_t cnt, loff_t *ppos) |
2468 | { | 2794 | { |
2469 | long *p = filp->private_data; | 2795 | unsigned long *p = filp->private_data; |
2470 | char buf[64]; | 2796 | char buf[64]; |
2471 | int r; | 2797 | int r; |
2472 | 2798 | ||
@@ -2482,9 +2808,9 @@ static ssize_t | |||
2482 | rb_simple_write(struct file *filp, const char __user *ubuf, | 2808 | rb_simple_write(struct file *filp, const char __user *ubuf, |
2483 | size_t cnt, loff_t *ppos) | 2809 | size_t cnt, loff_t *ppos) |
2484 | { | 2810 | { |
2485 | long *p = filp->private_data; | 2811 | unsigned long *p = filp->private_data; |
2486 | char buf[64]; | 2812 | char buf[64]; |
2487 | long val; | 2813 | unsigned long val; |
2488 | int ret; | 2814 | int ret; |
2489 | 2815 | ||
2490 | if (cnt >= sizeof(buf)) | 2816 | if (cnt >= sizeof(buf)) |
@@ -2509,7 +2835,7 @@ rb_simple_write(struct file *filp, const char __user *ubuf, | |||
2509 | return cnt; | 2835 | return cnt; |
2510 | } | 2836 | } |
2511 | 2837 | ||
2512 | static struct file_operations rb_simple_fops = { | 2838 | static const struct file_operations rb_simple_fops = { |
2513 | .open = tracing_open_generic, | 2839 | .open = tracing_open_generic, |
2514 | .read = rb_simple_read, | 2840 | .read = rb_simple_read, |
2515 | .write = rb_simple_write, | 2841 | .write = rb_simple_write, |
@@ -2532,3 +2858,42 @@ static __init int rb_init_debugfs(void) | |||
2532 | } | 2858 | } |
2533 | 2859 | ||
2534 | fs_initcall(rb_init_debugfs); | 2860 | fs_initcall(rb_init_debugfs); |
2861 | |||
2862 | #ifdef CONFIG_HOTPLUG_CPU | ||
2863 | static int rb_cpu_notify(struct notifier_block *self, | ||
2864 | unsigned long action, void *hcpu) | ||
2865 | { | ||
2866 | struct ring_buffer *buffer = | ||
2867 | container_of(self, struct ring_buffer, cpu_notify); | ||
2868 | long cpu = (long)hcpu; | ||
2869 | |||
2870 | switch (action) { | ||
2871 | case CPU_UP_PREPARE: | ||
2872 | case CPU_UP_PREPARE_FROZEN: | ||
2873 | if (cpu_isset(cpu, *buffer->cpumask)) | ||
2874 | return NOTIFY_OK; | ||
2875 | |||
2876 | buffer->buffers[cpu] = | ||
2877 | rb_allocate_cpu_buffer(buffer, cpu); | ||
2878 | if (!buffer->buffers[cpu]) { | ||
2879 | WARN(1, "failed to allocate ring buffer on CPU %ld\n", | ||
2880 | cpu); | ||
2881 | return NOTIFY_OK; | ||
2882 | } | ||
2883 | smp_wmb(); | ||
2884 | cpu_set(cpu, *buffer->cpumask); | ||
2885 | break; | ||
2886 | case CPU_DOWN_PREPARE: | ||
2887 | case CPU_DOWN_PREPARE_FROZEN: | ||
2888 | /* | ||
2889 | * Do nothing. | ||
2890 | * If we were to free the buffer, then the user would | ||
2891 | * lose any trace that was in the buffer. | ||
2892 | */ | ||
2893 | break; | ||
2894 | default: | ||
2895 | break; | ||
2896 | } | ||
2897 | return NOTIFY_OK; | ||
2898 | } | ||
2899 | #endif | ||