aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace/ring_buffer.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace/ring_buffer.c')
-rw-r--r--kernel/trace/ring_buffer.c709
1 files changed, 503 insertions, 206 deletions
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 30d57dd01a85..1d601a7c4587 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -18,8 +18,46 @@
18 18
19#include "trace.h" 19#include "trace.h"
20 20
21/* Global flag to disable all recording to ring buffers */ 21/*
22static int ring_buffers_off __read_mostly; 22 * A fast way to enable or disable all ring buffers is to
23 * call tracing_on or tracing_off. Turning off the ring buffers
24 * prevents all ring buffers from being recorded to.
25 * Turning this switch on, makes it OK to write to the
26 * ring buffer, if the ring buffer is enabled itself.
27 *
28 * There's three layers that must be on in order to write
29 * to the ring buffer.
30 *
31 * 1) This global flag must be set.
32 * 2) The ring buffer must be enabled for recording.
33 * 3) The per cpu buffer must be enabled for recording.
34 *
35 * In case of an anomaly, this global flag has a bit set that
36 * will permantly disable all ring buffers.
37 */
38
39/*
40 * Global flag to disable all recording to ring buffers
41 * This has two bits: ON, DISABLED
42 *
43 * ON DISABLED
44 * ---- ----------
45 * 0 0 : ring buffers are off
46 * 1 0 : ring buffers are on
47 * X 1 : ring buffers are permanently disabled
48 */
49
50enum {
51 RB_BUFFERS_ON_BIT = 0,
52 RB_BUFFERS_DISABLED_BIT = 1,
53};
54
55enum {
56 RB_BUFFERS_ON = 1 << RB_BUFFERS_ON_BIT,
57 RB_BUFFERS_DISABLED = 1 << RB_BUFFERS_DISABLED_BIT,
58};
59
60static long ring_buffer_flags __read_mostly = RB_BUFFERS_ON;
23 61
24/** 62/**
25 * tracing_on - enable all tracing buffers 63 * tracing_on - enable all tracing buffers
@@ -29,7 +67,7 @@ static int ring_buffers_off __read_mostly;
29 */ 67 */
30void tracing_on(void) 68void tracing_on(void)
31{ 69{
32 ring_buffers_off = 0; 70 set_bit(RB_BUFFERS_ON_BIT, &ring_buffer_flags);
33} 71}
34EXPORT_SYMBOL_GPL(tracing_on); 72EXPORT_SYMBOL_GPL(tracing_on);
35 73
@@ -43,10 +81,23 @@ EXPORT_SYMBOL_GPL(tracing_on);
43 */ 81 */
44void tracing_off(void) 82void tracing_off(void)
45{ 83{
46 ring_buffers_off = 1; 84 clear_bit(RB_BUFFERS_ON_BIT, &ring_buffer_flags);
47} 85}
48EXPORT_SYMBOL_GPL(tracing_off); 86EXPORT_SYMBOL_GPL(tracing_off);
49 87
88/**
89 * tracing_off_permanent - permanently disable ring buffers
90 *
91 * This function, once called, will disable all ring buffers
92 * permanenty.
93 */
94void tracing_off_permanent(void)
95{
96 set_bit(RB_BUFFERS_DISABLED_BIT, &ring_buffer_flags);
97}
98
99#include "trace.h"
100
50/* Up this if you want to test the TIME_EXTENTS and normalization */ 101/* Up this if you want to test the TIME_EXTENTS and normalization */
51#define DEBUG_SHIFT 0 102#define DEBUG_SHIFT 0
52 103
@@ -58,7 +109,7 @@ u64 ring_buffer_time_stamp(int cpu)
58 preempt_disable_notrace(); 109 preempt_disable_notrace();
59 /* shift to debug/test normalization and TIME_EXTENTS */ 110 /* shift to debug/test normalization and TIME_EXTENTS */
60 time = sched_clock() << DEBUG_SHIFT; 111 time = sched_clock() << DEBUG_SHIFT;
61 preempt_enable_notrace(); 112 preempt_enable_no_resched_notrace();
62 113
63 return time; 114 return time;
64} 115}
@@ -150,20 +201,24 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data);
150#define TS_MASK ((1ULL << TS_SHIFT) - 1) 201#define TS_MASK ((1ULL << TS_SHIFT) - 1)
151#define TS_DELTA_TEST (~TS_MASK) 202#define TS_DELTA_TEST (~TS_MASK)
152 203
153/* 204struct buffer_data_page {
154 * This hack stolen from mm/slob.c.
155 * We can store per page timing information in the page frame of the page.
156 * Thanks to Peter Zijlstra for suggesting this idea.
157 */
158struct buffer_page {
159 u64 time_stamp; /* page time stamp */ 205 u64 time_stamp; /* page time stamp */
160 local_t write; /* index for next write */
161 local_t commit; /* write commited index */ 206 local_t commit; /* write commited index */
207 unsigned char data[]; /* data of buffer page */
208};
209
210struct buffer_page {
211 local_t write; /* index for next write */
162 unsigned read; /* index for next read */ 212 unsigned read; /* index for next read */
163 struct list_head list; /* list of free pages */ 213 struct list_head list; /* list of free pages */
164 void *page; /* Actual data page */ 214 struct buffer_data_page *page; /* Actual data page */
165}; 215};
166 216
217static void rb_init_page(struct buffer_data_page *bpage)
218{
219 local_set(&bpage->commit, 0);
220}
221
167/* 222/*
168 * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing 223 * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing
169 * this issue out. 224 * this issue out.
@@ -185,7 +240,7 @@ static inline int test_time_stamp(u64 delta)
185 return 0; 240 return 0;
186} 241}
187 242
188#define BUF_PAGE_SIZE PAGE_SIZE 243#define BUF_PAGE_SIZE (PAGE_SIZE - sizeof(struct buffer_data_page))
189 244
190/* 245/*
191 * head_page == tail_page && head == tail then buffer is empty. 246 * head_page == tail_page && head == tail then buffer is empty.
@@ -193,7 +248,8 @@ static inline int test_time_stamp(u64 delta)
193struct ring_buffer_per_cpu { 248struct ring_buffer_per_cpu {
194 int cpu; 249 int cpu;
195 struct ring_buffer *buffer; 250 struct ring_buffer *buffer;
196 spinlock_t lock; 251 spinlock_t reader_lock; /* serialize readers */
252 raw_spinlock_t lock;
197 struct lock_class_key lock_key; 253 struct lock_class_key lock_key;
198 struct list_head pages; 254 struct list_head pages;
199 struct buffer_page *head_page; /* read from head */ 255 struct buffer_page *head_page; /* read from head */
@@ -208,7 +264,6 @@ struct ring_buffer_per_cpu {
208}; 264};
209 265
210struct ring_buffer { 266struct ring_buffer {
211 unsigned long size;
212 unsigned pages; 267 unsigned pages;
213 unsigned flags; 268 unsigned flags;
214 int cpus; 269 int cpus;
@@ -227,32 +282,16 @@ struct ring_buffer_iter {
227 u64 read_stamp; 282 u64 read_stamp;
228}; 283};
229 284
285/* buffer may be either ring_buffer or ring_buffer_per_cpu */
230#define RB_WARN_ON(buffer, cond) \ 286#define RB_WARN_ON(buffer, cond) \
231 do { \ 287 ({ \
232 if (unlikely(cond)) { \ 288 int _____ret = unlikely(cond); \
233 atomic_inc(&buffer->record_disabled); \ 289 if (_____ret) { \
234 WARN_ON(1); \
235 } \
236 } while (0)
237
238#define RB_WARN_ON_RET(buffer, cond) \
239 do { \
240 if (unlikely(cond)) { \
241 atomic_inc(&buffer->record_disabled); \
242 WARN_ON(1); \
243 return -1; \
244 } \
245 } while (0)
246
247#define RB_WARN_ON_ONCE(buffer, cond) \
248 do { \
249 static int once; \
250 if (unlikely(cond) && !once) { \
251 once++; \
252 atomic_inc(&buffer->record_disabled); \ 290 atomic_inc(&buffer->record_disabled); \
253 WARN_ON(1); \ 291 WARN_ON(1); \
254 } \ 292 } \
255 } while (0) 293 _____ret; \
294 })
256 295
257/** 296/**
258 * check_pages - integrity check of buffer pages 297 * check_pages - integrity check of buffer pages
@@ -264,16 +303,20 @@ struct ring_buffer_iter {
264static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer) 303static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
265{ 304{
266 struct list_head *head = &cpu_buffer->pages; 305 struct list_head *head = &cpu_buffer->pages;
267 struct buffer_page *page, *tmp; 306 struct buffer_page *bpage, *tmp;
268 307
269 RB_WARN_ON_RET(cpu_buffer, head->next->prev != head); 308 if (RB_WARN_ON(cpu_buffer, head->next->prev != head))
270 RB_WARN_ON_RET(cpu_buffer, head->prev->next != head); 309 return -1;
310 if (RB_WARN_ON(cpu_buffer, head->prev->next != head))
311 return -1;
271 312
272 list_for_each_entry_safe(page, tmp, head, list) { 313 list_for_each_entry_safe(bpage, tmp, head, list) {
273 RB_WARN_ON_RET(cpu_buffer, 314 if (RB_WARN_ON(cpu_buffer,
274 page->list.next->prev != &page->list); 315 bpage->list.next->prev != &bpage->list))
275 RB_WARN_ON_RET(cpu_buffer, 316 return -1;
276 page->list.prev->next != &page->list); 317 if (RB_WARN_ON(cpu_buffer,
318 bpage->list.prev->next != &bpage->list))
319 return -1;
277 } 320 }
278 321
279 return 0; 322 return 0;
@@ -283,22 +326,23 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
283 unsigned nr_pages) 326 unsigned nr_pages)
284{ 327{
285 struct list_head *head = &cpu_buffer->pages; 328 struct list_head *head = &cpu_buffer->pages;
286 struct buffer_page *page, *tmp; 329 struct buffer_page *bpage, *tmp;
287 unsigned long addr; 330 unsigned long addr;
288 LIST_HEAD(pages); 331 LIST_HEAD(pages);
289 unsigned i; 332 unsigned i;
290 333
291 for (i = 0; i < nr_pages; i++) { 334 for (i = 0; i < nr_pages; i++) {
292 page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()), 335 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
293 GFP_KERNEL, cpu_to_node(cpu_buffer->cpu)); 336 GFP_KERNEL, cpu_to_node(cpu_buffer->cpu));
294 if (!page) 337 if (!bpage)
295 goto free_pages; 338 goto free_pages;
296 list_add(&page->list, &pages); 339 list_add(&bpage->list, &pages);
297 340
298 addr = __get_free_page(GFP_KERNEL); 341 addr = __get_free_page(GFP_KERNEL);
299 if (!addr) 342 if (!addr)
300 goto free_pages; 343 goto free_pages;
301 page->page = (void *)addr; 344 bpage->page = (void *)addr;
345 rb_init_page(bpage->page);
302 } 346 }
303 347
304 list_splice(&pages, head); 348 list_splice(&pages, head);
@@ -308,9 +352,9 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
308 return 0; 352 return 0;
309 353
310 free_pages: 354 free_pages:
311 list_for_each_entry_safe(page, tmp, &pages, list) { 355 list_for_each_entry_safe(bpage, tmp, &pages, list) {
312 list_del_init(&page->list); 356 list_del_init(&bpage->list);
313 free_buffer_page(page); 357 free_buffer_page(bpage);
314 } 358 }
315 return -ENOMEM; 359 return -ENOMEM;
316} 360}
@@ -319,7 +363,7 @@ static struct ring_buffer_per_cpu *
319rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu) 363rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
320{ 364{
321 struct ring_buffer_per_cpu *cpu_buffer; 365 struct ring_buffer_per_cpu *cpu_buffer;
322 struct buffer_page *page; 366 struct buffer_page *bpage;
323 unsigned long addr; 367 unsigned long addr;
324 int ret; 368 int ret;
325 369
@@ -330,19 +374,21 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
330 374
331 cpu_buffer->cpu = cpu; 375 cpu_buffer->cpu = cpu;
332 cpu_buffer->buffer = buffer; 376 cpu_buffer->buffer = buffer;
333 spin_lock_init(&cpu_buffer->lock); 377 spin_lock_init(&cpu_buffer->reader_lock);
378 cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
334 INIT_LIST_HEAD(&cpu_buffer->pages); 379 INIT_LIST_HEAD(&cpu_buffer->pages);
335 380
336 page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()), 381 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
337 GFP_KERNEL, cpu_to_node(cpu)); 382 GFP_KERNEL, cpu_to_node(cpu));
338 if (!page) 383 if (!bpage)
339 goto fail_free_buffer; 384 goto fail_free_buffer;
340 385
341 cpu_buffer->reader_page = page; 386 cpu_buffer->reader_page = bpage;
342 addr = __get_free_page(GFP_KERNEL); 387 addr = __get_free_page(GFP_KERNEL);
343 if (!addr) 388 if (!addr)
344 goto fail_free_reader; 389 goto fail_free_reader;
345 page->page = (void *)addr; 390 bpage->page = (void *)addr;
391 rb_init_page(bpage->page);
346 392
347 INIT_LIST_HEAD(&cpu_buffer->reader_page->list); 393 INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
348 394
@@ -367,14 +413,14 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
367static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer) 413static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
368{ 414{
369 struct list_head *head = &cpu_buffer->pages; 415 struct list_head *head = &cpu_buffer->pages;
370 struct buffer_page *page, *tmp; 416 struct buffer_page *bpage, *tmp;
371 417
372 list_del_init(&cpu_buffer->reader_page->list); 418 list_del_init(&cpu_buffer->reader_page->list);
373 free_buffer_page(cpu_buffer->reader_page); 419 free_buffer_page(cpu_buffer->reader_page);
374 420
375 list_for_each_entry_safe(page, tmp, head, list) { 421 list_for_each_entry_safe(bpage, tmp, head, list) {
376 list_del_init(&page->list); 422 list_del_init(&bpage->list);
377 free_buffer_page(page); 423 free_buffer_page(bpage);
378 } 424 }
379 kfree(cpu_buffer); 425 kfree(cpu_buffer);
380} 426}
@@ -473,7 +519,7 @@ static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer);
473static void 519static void
474rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages) 520rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
475{ 521{
476 struct buffer_page *page; 522 struct buffer_page *bpage;
477 struct list_head *p; 523 struct list_head *p;
478 unsigned i; 524 unsigned i;
479 525
@@ -481,13 +527,15 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
481 synchronize_sched(); 527 synchronize_sched();
482 528
483 for (i = 0; i < nr_pages; i++) { 529 for (i = 0; i < nr_pages; i++) {
484 BUG_ON(list_empty(&cpu_buffer->pages)); 530 if (RB_WARN_ON(cpu_buffer, list_empty(&cpu_buffer->pages)))
531 return;
485 p = cpu_buffer->pages.next; 532 p = cpu_buffer->pages.next;
486 page = list_entry(p, struct buffer_page, list); 533 bpage = list_entry(p, struct buffer_page, list);
487 list_del_init(&page->list); 534 list_del_init(&bpage->list);
488 free_buffer_page(page); 535 free_buffer_page(bpage);
489 } 536 }
490 BUG_ON(list_empty(&cpu_buffer->pages)); 537 if (RB_WARN_ON(cpu_buffer, list_empty(&cpu_buffer->pages)))
538 return;
491 539
492 rb_reset_cpu(cpu_buffer); 540 rb_reset_cpu(cpu_buffer);
493 541
@@ -501,7 +549,7 @@ static void
501rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer, 549rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
502 struct list_head *pages, unsigned nr_pages) 550 struct list_head *pages, unsigned nr_pages)
503{ 551{
504 struct buffer_page *page; 552 struct buffer_page *bpage;
505 struct list_head *p; 553 struct list_head *p;
506 unsigned i; 554 unsigned i;
507 555
@@ -509,11 +557,12 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
509 synchronize_sched(); 557 synchronize_sched();
510 558
511 for (i = 0; i < nr_pages; i++) { 559 for (i = 0; i < nr_pages; i++) {
512 BUG_ON(list_empty(pages)); 560 if (RB_WARN_ON(cpu_buffer, list_empty(pages)))
561 return;
513 p = pages->next; 562 p = pages->next;
514 page = list_entry(p, struct buffer_page, list); 563 bpage = list_entry(p, struct buffer_page, list);
515 list_del_init(&page->list); 564 list_del_init(&bpage->list);
516 list_add_tail(&page->list, &cpu_buffer->pages); 565 list_add_tail(&bpage->list, &cpu_buffer->pages);
517 } 566 }
518 rb_reset_cpu(cpu_buffer); 567 rb_reset_cpu(cpu_buffer);
519 568
@@ -540,7 +589,7 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
540{ 589{
541 struct ring_buffer_per_cpu *cpu_buffer; 590 struct ring_buffer_per_cpu *cpu_buffer;
542 unsigned nr_pages, rm_pages, new_pages; 591 unsigned nr_pages, rm_pages, new_pages;
543 struct buffer_page *page, *tmp; 592 struct buffer_page *bpage, *tmp;
544 unsigned long buffer_size; 593 unsigned long buffer_size;
545 unsigned long addr; 594 unsigned long addr;
546 LIST_HEAD(pages); 595 LIST_HEAD(pages);
@@ -570,7 +619,10 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
570 if (size < buffer_size) { 619 if (size < buffer_size) {
571 620
572 /* easy case, just free pages */ 621 /* easy case, just free pages */
573 BUG_ON(nr_pages >= buffer->pages); 622 if (RB_WARN_ON(buffer, nr_pages >= buffer->pages)) {
623 mutex_unlock(&buffer->mutex);
624 return -1;
625 }
574 626
575 rm_pages = buffer->pages - nr_pages; 627 rm_pages = buffer->pages - nr_pages;
576 628
@@ -589,21 +641,26 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
589 * add these pages to the cpu_buffers. Otherwise we just free 641 * add these pages to the cpu_buffers. Otherwise we just free
590 * them all and return -ENOMEM; 642 * them all and return -ENOMEM;
591 */ 643 */
592 BUG_ON(nr_pages <= buffer->pages); 644 if (RB_WARN_ON(buffer, nr_pages <= buffer->pages)) {
645 mutex_unlock(&buffer->mutex);
646 return -1;
647 }
648
593 new_pages = nr_pages - buffer->pages; 649 new_pages = nr_pages - buffer->pages;
594 650
595 for_each_buffer_cpu(buffer, cpu) { 651 for_each_buffer_cpu(buffer, cpu) {
596 for (i = 0; i < new_pages; i++) { 652 for (i = 0; i < new_pages; i++) {
597 page = kzalloc_node(ALIGN(sizeof(*page), 653 bpage = kzalloc_node(ALIGN(sizeof(*bpage),
598 cache_line_size()), 654 cache_line_size()),
599 GFP_KERNEL, cpu_to_node(cpu)); 655 GFP_KERNEL, cpu_to_node(cpu));
600 if (!page) 656 if (!bpage)
601 goto free_pages; 657 goto free_pages;
602 list_add(&page->list, &pages); 658 list_add(&bpage->list, &pages);
603 addr = __get_free_page(GFP_KERNEL); 659 addr = __get_free_page(GFP_KERNEL);
604 if (!addr) 660 if (!addr)
605 goto free_pages; 661 goto free_pages;
606 page->page = (void *)addr; 662 bpage->page = (void *)addr;
663 rb_init_page(bpage->page);
607 } 664 }
608 } 665 }
609 666
@@ -612,7 +669,10 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
612 rb_insert_pages(cpu_buffer, &pages, new_pages); 669 rb_insert_pages(cpu_buffer, &pages, new_pages);
613 } 670 }
614 671
615 BUG_ON(!list_empty(&pages)); 672 if (RB_WARN_ON(buffer, !list_empty(&pages))) {
673 mutex_unlock(&buffer->mutex);
674 return -1;
675 }
616 676
617 out: 677 out:
618 buffer->pages = nr_pages; 678 buffer->pages = nr_pages;
@@ -621,9 +681,9 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
621 return size; 681 return size;
622 682
623 free_pages: 683 free_pages:
624 list_for_each_entry_safe(page, tmp, &pages, list) { 684 list_for_each_entry_safe(bpage, tmp, &pages, list) {
625 list_del_init(&page->list); 685 list_del_init(&bpage->list);
626 free_buffer_page(page); 686 free_buffer_page(bpage);
627 } 687 }
628 mutex_unlock(&buffer->mutex); 688 mutex_unlock(&buffer->mutex);
629 return -ENOMEM; 689 return -ENOMEM;
@@ -635,9 +695,15 @@ static inline int rb_null_event(struct ring_buffer_event *event)
635 return event->type == RINGBUF_TYPE_PADDING; 695 return event->type == RINGBUF_TYPE_PADDING;
636} 696}
637 697
638static inline void *__rb_page_index(struct buffer_page *page, unsigned index) 698static inline void *
699__rb_data_page_index(struct buffer_data_page *bpage, unsigned index)
700{
701 return bpage->data + index;
702}
703
704static inline void *__rb_page_index(struct buffer_page *bpage, unsigned index)
639{ 705{
640 return page->page + index; 706 return bpage->page->data + index;
641} 707}
642 708
643static inline struct ring_buffer_event * 709static inline struct ring_buffer_event *
@@ -667,7 +733,7 @@ static inline unsigned rb_page_write(struct buffer_page *bpage)
667 733
668static inline unsigned rb_page_commit(struct buffer_page *bpage) 734static inline unsigned rb_page_commit(struct buffer_page *bpage)
669{ 735{
670 return local_read(&bpage->commit); 736 return local_read(&bpage->page->commit);
671} 737}
672 738
673/* Size is determined by what has been commited */ 739/* Size is determined by what has been commited */
@@ -702,7 +768,8 @@ static void rb_update_overflow(struct ring_buffer_per_cpu *cpu_buffer)
702 head += rb_event_length(event)) { 768 head += rb_event_length(event)) {
703 769
704 event = __rb_page_index(cpu_buffer->head_page, head); 770 event = __rb_page_index(cpu_buffer->head_page, head);
705 BUG_ON(rb_null_event(event)); 771 if (RB_WARN_ON(cpu_buffer, rb_null_event(event)))
772 return;
706 /* Only count data entries */ 773 /* Only count data entries */
707 if (event->type != RINGBUF_TYPE_DATA) 774 if (event->type != RINGBUF_TYPE_DATA)
708 continue; 775 continue;
@@ -712,14 +779,14 @@ static void rb_update_overflow(struct ring_buffer_per_cpu *cpu_buffer)
712} 779}
713 780
714static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer, 781static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer,
715 struct buffer_page **page) 782 struct buffer_page **bpage)
716{ 783{
717 struct list_head *p = (*page)->list.next; 784 struct list_head *p = (*bpage)->list.next;
718 785
719 if (p == &cpu_buffer->pages) 786 if (p == &cpu_buffer->pages)
720 p = p->next; 787 p = p->next;
721 788
722 *page = list_entry(p, struct buffer_page, list); 789 *bpage = list_entry(p, struct buffer_page, list);
723} 790}
724 791
725static inline unsigned 792static inline unsigned
@@ -755,16 +822,18 @@ rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer,
755 addr &= PAGE_MASK; 822 addr &= PAGE_MASK;
756 823
757 while (cpu_buffer->commit_page->page != (void *)addr) { 824 while (cpu_buffer->commit_page->page != (void *)addr) {
758 RB_WARN_ON(cpu_buffer, 825 if (RB_WARN_ON(cpu_buffer,
759 cpu_buffer->commit_page == cpu_buffer->tail_page); 826 cpu_buffer->commit_page == cpu_buffer->tail_page))
760 cpu_buffer->commit_page->commit = 827 return;
828 cpu_buffer->commit_page->page->commit =
761 cpu_buffer->commit_page->write; 829 cpu_buffer->commit_page->write;
762 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page); 830 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
763 cpu_buffer->write_stamp = cpu_buffer->commit_page->time_stamp; 831 cpu_buffer->write_stamp =
832 cpu_buffer->commit_page->page->time_stamp;
764 } 833 }
765 834
766 /* Now set the commit to the event's index */ 835 /* Now set the commit to the event's index */
767 local_set(&cpu_buffer->commit_page->commit, index); 836 local_set(&cpu_buffer->commit_page->page->commit, index);
768} 837}
769 838
770static inline void 839static inline void
@@ -778,25 +847,38 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
778 * back to us). This allows us to do a simple loop to 847 * back to us). This allows us to do a simple loop to
779 * assign the commit to the tail. 848 * assign the commit to the tail.
780 */ 849 */
850 again:
781 while (cpu_buffer->commit_page != cpu_buffer->tail_page) { 851 while (cpu_buffer->commit_page != cpu_buffer->tail_page) {
782 cpu_buffer->commit_page->commit = 852 cpu_buffer->commit_page->page->commit =
783 cpu_buffer->commit_page->write; 853 cpu_buffer->commit_page->write;
784 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page); 854 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
785 cpu_buffer->write_stamp = cpu_buffer->commit_page->time_stamp; 855 cpu_buffer->write_stamp =
856 cpu_buffer->commit_page->page->time_stamp;
786 /* add barrier to keep gcc from optimizing too much */ 857 /* add barrier to keep gcc from optimizing too much */
787 barrier(); 858 barrier();
788 } 859 }
789 while (rb_commit_index(cpu_buffer) != 860 while (rb_commit_index(cpu_buffer) !=
790 rb_page_write(cpu_buffer->commit_page)) { 861 rb_page_write(cpu_buffer->commit_page)) {
791 cpu_buffer->commit_page->commit = 862 cpu_buffer->commit_page->page->commit =
792 cpu_buffer->commit_page->write; 863 cpu_buffer->commit_page->write;
793 barrier(); 864 barrier();
794 } 865 }
866
867 /* again, keep gcc from optimizing */
868 barrier();
869
870 /*
871 * If an interrupt came in just after the first while loop
872 * and pushed the tail page forward, we will be left with
873 * a dangling commit that will never go forward.
874 */
875 if (unlikely(cpu_buffer->commit_page != cpu_buffer->tail_page))
876 goto again;
795} 877}
796 878
797static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer) 879static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
798{ 880{
799 cpu_buffer->read_stamp = cpu_buffer->reader_page->time_stamp; 881 cpu_buffer->read_stamp = cpu_buffer->reader_page->page->time_stamp;
800 cpu_buffer->reader_page->read = 0; 882 cpu_buffer->reader_page->read = 0;
801} 883}
802 884
@@ -815,7 +897,7 @@ static inline void rb_inc_iter(struct ring_buffer_iter *iter)
815 else 897 else
816 rb_inc_page(cpu_buffer, &iter->head_page); 898 rb_inc_page(cpu_buffer, &iter->head_page);
817 899
818 iter->read_stamp = iter->head_page->time_stamp; 900 iter->read_stamp = iter->head_page->page->time_stamp;
819 iter->head = 0; 901 iter->head = 0;
820} 902}
821 903
@@ -889,12 +971,15 @@ static struct ring_buffer_event *
889__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, 971__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
890 unsigned type, unsigned long length, u64 *ts) 972 unsigned type, unsigned long length, u64 *ts)
891{ 973{
892 struct buffer_page *tail_page, *head_page, *reader_page; 974 struct buffer_page *tail_page, *head_page, *reader_page, *commit_page;
893 unsigned long tail, write; 975 unsigned long tail, write;
894 struct ring_buffer *buffer = cpu_buffer->buffer; 976 struct ring_buffer *buffer = cpu_buffer->buffer;
895 struct ring_buffer_event *event; 977 struct ring_buffer_event *event;
896 unsigned long flags; 978 unsigned long flags;
897 979
980 commit_page = cpu_buffer->commit_page;
981 /* we just need to protect against interrupts */
982 barrier();
898 tail_page = cpu_buffer->tail_page; 983 tail_page = cpu_buffer->tail_page;
899 write = local_add_return(length, &tail_page->write); 984 write = local_add_return(length, &tail_page->write);
900 tail = write - length; 985 tail = write - length;
@@ -903,7 +988,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
903 if (write > BUF_PAGE_SIZE) { 988 if (write > BUF_PAGE_SIZE) {
904 struct buffer_page *next_page = tail_page; 989 struct buffer_page *next_page = tail_page;
905 990
906 spin_lock_irqsave(&cpu_buffer->lock, flags); 991 local_irq_save(flags);
992 __raw_spin_lock(&cpu_buffer->lock);
907 993
908 rb_inc_page(cpu_buffer, &next_page); 994 rb_inc_page(cpu_buffer, &next_page);
909 995
@@ -911,14 +997,15 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
911 reader_page = cpu_buffer->reader_page; 997 reader_page = cpu_buffer->reader_page;
912 998
913 /* we grabbed the lock before incrementing */ 999 /* we grabbed the lock before incrementing */
914 RB_WARN_ON(cpu_buffer, next_page == reader_page); 1000 if (RB_WARN_ON(cpu_buffer, next_page == reader_page))
1001 goto out_unlock;
915 1002
916 /* 1003 /*
917 * If for some reason, we had an interrupt storm that made 1004 * If for some reason, we had an interrupt storm that made
918 * it all the way around the buffer, bail, and warn 1005 * it all the way around the buffer, bail, and warn
919 * about it. 1006 * about it.
920 */ 1007 */
921 if (unlikely(next_page == cpu_buffer->commit_page)) { 1008 if (unlikely(next_page == commit_page)) {
922 WARN_ON_ONCE(1); 1009 WARN_ON_ONCE(1);
923 goto out_unlock; 1010 goto out_unlock;
924 } 1011 }
@@ -949,12 +1036,12 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
949 */ 1036 */
950 if (tail_page == cpu_buffer->tail_page) { 1037 if (tail_page == cpu_buffer->tail_page) {
951 local_set(&next_page->write, 0); 1038 local_set(&next_page->write, 0);
952 local_set(&next_page->commit, 0); 1039 local_set(&next_page->page->commit, 0);
953 cpu_buffer->tail_page = next_page; 1040 cpu_buffer->tail_page = next_page;
954 1041
955 /* reread the time stamp */ 1042 /* reread the time stamp */
956 *ts = ring_buffer_time_stamp(cpu_buffer->cpu); 1043 *ts = ring_buffer_time_stamp(cpu_buffer->cpu);
957 cpu_buffer->tail_page->time_stamp = *ts; 1044 cpu_buffer->tail_page->page->time_stamp = *ts;
958 } 1045 }
959 1046
960 /* 1047 /*
@@ -979,7 +1066,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
979 rb_set_commit_to_write(cpu_buffer); 1066 rb_set_commit_to_write(cpu_buffer);
980 } 1067 }
981 1068
982 spin_unlock_irqrestore(&cpu_buffer->lock, flags); 1069 __raw_spin_unlock(&cpu_buffer->lock);
1070 local_irq_restore(flags);
983 1071
984 /* fail and let the caller try again */ 1072 /* fail and let the caller try again */
985 return ERR_PTR(-EAGAIN); 1073 return ERR_PTR(-EAGAIN);
@@ -987,7 +1075,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
987 1075
988 /* We reserved something on the buffer */ 1076 /* We reserved something on the buffer */
989 1077
990 BUG_ON(write > BUF_PAGE_SIZE); 1078 if (RB_WARN_ON(cpu_buffer, write > BUF_PAGE_SIZE))
1079 return NULL;
991 1080
992 event = __rb_page_index(tail_page, tail); 1081 event = __rb_page_index(tail_page, tail);
993 rb_update_event(event, type, length); 1082 rb_update_event(event, type, length);
@@ -997,12 +1086,13 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
997 * this page's time stamp. 1086 * this page's time stamp.
998 */ 1087 */
999 if (!tail && rb_is_commit(cpu_buffer, event)) 1088 if (!tail && rb_is_commit(cpu_buffer, event))
1000 cpu_buffer->commit_page->time_stamp = *ts; 1089 cpu_buffer->commit_page->page->time_stamp = *ts;
1001 1090
1002 return event; 1091 return event;
1003 1092
1004 out_unlock: 1093 out_unlock:
1005 spin_unlock_irqrestore(&cpu_buffer->lock, flags); 1094 __raw_spin_unlock(&cpu_buffer->lock);
1095 local_irq_restore(flags);
1006 return NULL; 1096 return NULL;
1007} 1097}
1008 1098
@@ -1047,7 +1137,7 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
1047 event->time_delta = *delta & TS_MASK; 1137 event->time_delta = *delta & TS_MASK;
1048 event->array[0] = *delta >> TS_SHIFT; 1138 event->array[0] = *delta >> TS_SHIFT;
1049 } else { 1139 } else {
1050 cpu_buffer->commit_page->time_stamp = *ts; 1140 cpu_buffer->commit_page->page->time_stamp = *ts;
1051 event->time_delta = 0; 1141 event->time_delta = 0;
1052 event->array[0] = 0; 1142 event->array[0] = 0;
1053 } 1143 }
@@ -1085,10 +1175,8 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
1085 * storm or we have something buggy. 1175 * storm or we have something buggy.
1086 * Bail! 1176 * Bail!
1087 */ 1177 */
1088 if (unlikely(++nr_loops > 1000)) { 1178 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000))
1089 RB_WARN_ON(cpu_buffer, 1);
1090 return NULL; 1179 return NULL;
1091 }
1092 1180
1093 ts = ring_buffer_time_stamp(cpu_buffer->cpu); 1181 ts = ring_buffer_time_stamp(cpu_buffer->cpu);
1094 1182
@@ -1184,15 +1272,14 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer,
1184 struct ring_buffer_event *event; 1272 struct ring_buffer_event *event;
1185 int cpu, resched; 1273 int cpu, resched;
1186 1274
1187 if (ring_buffers_off) 1275 if (ring_buffer_flags != RB_BUFFERS_ON)
1188 return NULL; 1276 return NULL;
1189 1277
1190 if (atomic_read(&buffer->record_disabled)) 1278 if (atomic_read(&buffer->record_disabled))
1191 return NULL; 1279 return NULL;
1192 1280
1193 /* If we are tracing schedule, we don't want to recurse */ 1281 /* If we are tracing schedule, we don't want to recurse */
1194 resched = need_resched(); 1282 resched = ftrace_preempt_disable();
1195 preempt_disable_notrace();
1196 1283
1197 cpu = raw_smp_processor_id(); 1284 cpu = raw_smp_processor_id();
1198 1285
@@ -1223,10 +1310,7 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer,
1223 return event; 1310 return event;
1224 1311
1225 out: 1312 out:
1226 if (resched) 1313 ftrace_preempt_enable(resched);
1227 preempt_enable_no_resched_notrace();
1228 else
1229 preempt_enable_notrace();
1230 return NULL; 1314 return NULL;
1231} 1315}
1232EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve); 1316EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve);
@@ -1269,12 +1353,9 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer,
1269 /* 1353 /*
1270 * Only the last preempt count needs to restore preemption. 1354 * Only the last preempt count needs to restore preemption.
1271 */ 1355 */
1272 if (preempt_count() == 1) { 1356 if (preempt_count() == 1)
1273 if (per_cpu(rb_need_resched, cpu)) 1357 ftrace_preempt_enable(per_cpu(rb_need_resched, cpu));
1274 preempt_enable_no_resched_notrace(); 1358 else
1275 else
1276 preempt_enable_notrace();
1277 } else
1278 preempt_enable_no_resched_notrace(); 1359 preempt_enable_no_resched_notrace();
1279 1360
1280 return 0; 1361 return 0;
@@ -1305,14 +1386,13 @@ int ring_buffer_write(struct ring_buffer *buffer,
1305 int ret = -EBUSY; 1386 int ret = -EBUSY;
1306 int cpu, resched; 1387 int cpu, resched;
1307 1388
1308 if (ring_buffers_off) 1389 if (ring_buffer_flags != RB_BUFFERS_ON)
1309 return -EBUSY; 1390 return -EBUSY;
1310 1391
1311 if (atomic_read(&buffer->record_disabled)) 1392 if (atomic_read(&buffer->record_disabled))
1312 return -EBUSY; 1393 return -EBUSY;
1313 1394
1314 resched = need_resched(); 1395 resched = ftrace_preempt_disable();
1315 preempt_disable_notrace();
1316 1396
1317 cpu = raw_smp_processor_id(); 1397 cpu = raw_smp_processor_id();
1318 1398
@@ -1338,10 +1418,7 @@ int ring_buffer_write(struct ring_buffer *buffer,
1338 1418
1339 ret = 0; 1419 ret = 0;
1340 out: 1420 out:
1341 if (resched) 1421 ftrace_preempt_enable(resched);
1342 preempt_enable_no_resched_notrace();
1343 else
1344 preempt_enable_notrace();
1345 1422
1346 return ret; 1423 return ret;
1347} 1424}
@@ -1509,14 +1586,7 @@ unsigned long ring_buffer_overruns(struct ring_buffer *buffer)
1509} 1586}
1510EXPORT_SYMBOL_GPL(ring_buffer_overruns); 1587EXPORT_SYMBOL_GPL(ring_buffer_overruns);
1511 1588
1512/** 1589static void rb_iter_reset(struct ring_buffer_iter *iter)
1513 * ring_buffer_iter_reset - reset an iterator
1514 * @iter: The iterator to reset
1515 *
1516 * Resets the iterator, so that it will start from the beginning
1517 * again.
1518 */
1519void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
1520{ 1590{
1521 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; 1591 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
1522 1592
@@ -1531,7 +1601,24 @@ void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
1531 if (iter->head) 1601 if (iter->head)
1532 iter->read_stamp = cpu_buffer->read_stamp; 1602 iter->read_stamp = cpu_buffer->read_stamp;
1533 else 1603 else
1534 iter->read_stamp = iter->head_page->time_stamp; 1604 iter->read_stamp = iter->head_page->page->time_stamp;
1605}
1606
1607/**
1608 * ring_buffer_iter_reset - reset an iterator
1609 * @iter: The iterator to reset
1610 *
1611 * Resets the iterator, so that it will start from the beginning
1612 * again.
1613 */
1614void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
1615{
1616 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
1617 unsigned long flags;
1618
1619 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
1620 rb_iter_reset(iter);
1621 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
1535} 1622}
1536EXPORT_SYMBOL_GPL(ring_buffer_iter_reset); 1623EXPORT_SYMBOL_GPL(ring_buffer_iter_reset);
1537 1624
@@ -1619,7 +1706,8 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
1619 unsigned long flags; 1706 unsigned long flags;
1620 int nr_loops = 0; 1707 int nr_loops = 0;
1621 1708
1622 spin_lock_irqsave(&cpu_buffer->lock, flags); 1709 local_irq_save(flags);
1710 __raw_spin_lock(&cpu_buffer->lock);
1623 1711
1624 again: 1712 again:
1625 /* 1713 /*
@@ -1628,8 +1716,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
1628 * a case where we will loop three times. There should be no 1716 * a case where we will loop three times. There should be no
1629 * reason to loop four times (that I know of). 1717 * reason to loop four times (that I know of).
1630 */ 1718 */
1631 if (unlikely(++nr_loops > 3)) { 1719 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 3)) {
1632 RB_WARN_ON(cpu_buffer, 1);
1633 reader = NULL; 1720 reader = NULL;
1634 goto out; 1721 goto out;
1635 } 1722 }
@@ -1641,8 +1728,9 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
1641 goto out; 1728 goto out;
1642 1729
1643 /* Never should we have an index greater than the size */ 1730 /* Never should we have an index greater than the size */
1644 RB_WARN_ON(cpu_buffer, 1731 if (RB_WARN_ON(cpu_buffer,
1645 cpu_buffer->reader_page->read > rb_page_size(reader)); 1732 cpu_buffer->reader_page->read > rb_page_size(reader)))
1733 goto out;
1646 1734
1647 /* check if we caught up to the tail */ 1735 /* check if we caught up to the tail */
1648 reader = NULL; 1736 reader = NULL;
@@ -1659,7 +1747,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
1659 cpu_buffer->reader_page->list.prev = reader->list.prev; 1747 cpu_buffer->reader_page->list.prev = reader->list.prev;
1660 1748
1661 local_set(&cpu_buffer->reader_page->write, 0); 1749 local_set(&cpu_buffer->reader_page->write, 0);
1662 local_set(&cpu_buffer->reader_page->commit, 0); 1750 local_set(&cpu_buffer->reader_page->page->commit, 0);
1663 1751
1664 /* Make the reader page now replace the head */ 1752 /* Make the reader page now replace the head */
1665 reader->list.prev->next = &cpu_buffer->reader_page->list; 1753 reader->list.prev->next = &cpu_buffer->reader_page->list;
@@ -1681,7 +1769,8 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
1681 goto again; 1769 goto again;
1682 1770
1683 out: 1771 out:
1684 spin_unlock_irqrestore(&cpu_buffer->lock, flags); 1772 __raw_spin_unlock(&cpu_buffer->lock);
1773 local_irq_restore(flags);
1685 1774
1686 return reader; 1775 return reader;
1687} 1776}
@@ -1695,7 +1784,8 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
1695 reader = rb_get_reader_page(cpu_buffer); 1784 reader = rb_get_reader_page(cpu_buffer);
1696 1785
1697 /* This function should not be called when buffer is empty */ 1786 /* This function should not be called when buffer is empty */
1698 BUG_ON(!reader); 1787 if (RB_WARN_ON(cpu_buffer, !reader))
1788 return;
1699 1789
1700 event = rb_reader_event(cpu_buffer); 1790 event = rb_reader_event(cpu_buffer);
1701 1791
@@ -1722,7 +1812,9 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
1722 * Check if we are at the end of the buffer. 1812 * Check if we are at the end of the buffer.
1723 */ 1813 */
1724 if (iter->head >= rb_page_size(iter->head_page)) { 1814 if (iter->head >= rb_page_size(iter->head_page)) {
1725 BUG_ON(iter->head_page == cpu_buffer->commit_page); 1815 if (RB_WARN_ON(buffer,
1816 iter->head_page == cpu_buffer->commit_page))
1817 return;
1726 rb_inc_iter(iter); 1818 rb_inc_iter(iter);
1727 return; 1819 return;
1728 } 1820 }
@@ -1735,8 +1827,10 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
1735 * This should not be called to advance the header if we are 1827 * This should not be called to advance the header if we are
1736 * at the tail of the buffer. 1828 * at the tail of the buffer.
1737 */ 1829 */
1738 BUG_ON((iter->head_page == cpu_buffer->commit_page) && 1830 if (RB_WARN_ON(cpu_buffer,
1739 (iter->head + length > rb_commit_index(cpu_buffer))); 1831 (iter->head_page == cpu_buffer->commit_page) &&
1832 (iter->head + length > rb_commit_index(cpu_buffer))))
1833 return;
1740 1834
1741 rb_update_iter_read_stamp(iter, event); 1835 rb_update_iter_read_stamp(iter, event);
1742 1836
@@ -1748,17 +1842,8 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
1748 rb_advance_iter(iter); 1842 rb_advance_iter(iter);
1749} 1843}
1750 1844
1751/** 1845static struct ring_buffer_event *
1752 * ring_buffer_peek - peek at the next event to be read 1846rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1753 * @buffer: The ring buffer to read
1754 * @cpu: The cpu to peak at
1755 * @ts: The timestamp counter of this event.
1756 *
1757 * This will return the event that will be read next, but does
1758 * not consume the data.
1759 */
1760struct ring_buffer_event *
1761ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1762{ 1847{
1763 struct ring_buffer_per_cpu *cpu_buffer; 1848 struct ring_buffer_per_cpu *cpu_buffer;
1764 struct ring_buffer_event *event; 1849 struct ring_buffer_event *event;
@@ -1779,10 +1864,8 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1779 * can have. Nesting 10 deep of interrupts is clearly 1864 * can have. Nesting 10 deep of interrupts is clearly
1780 * an anomaly. 1865 * an anomaly.
1781 */ 1866 */
1782 if (unlikely(++nr_loops > 10)) { 1867 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 10))
1783 RB_WARN_ON(cpu_buffer, 1);
1784 return NULL; 1868 return NULL;
1785 }
1786 1869
1787 reader = rb_get_reader_page(cpu_buffer); 1870 reader = rb_get_reader_page(cpu_buffer);
1788 if (!reader) 1871 if (!reader)
@@ -1821,16 +1904,8 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1821} 1904}
1822EXPORT_SYMBOL_GPL(ring_buffer_peek); 1905EXPORT_SYMBOL_GPL(ring_buffer_peek);
1823 1906
1824/** 1907static struct ring_buffer_event *
1825 * ring_buffer_iter_peek - peek at the next event to be read 1908rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1826 * @iter: The ring buffer iterator
1827 * @ts: The timestamp counter of this event.
1828 *
1829 * This will return the event that will be read next, but does
1830 * not increment the iterator.
1831 */
1832struct ring_buffer_event *
1833ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1834{ 1909{
1835 struct ring_buffer *buffer; 1910 struct ring_buffer *buffer;
1836 struct ring_buffer_per_cpu *cpu_buffer; 1911 struct ring_buffer_per_cpu *cpu_buffer;
@@ -1852,10 +1927,8 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1852 * can have. Nesting 10 deep of interrupts is clearly 1927 * can have. Nesting 10 deep of interrupts is clearly
1853 * an anomaly. 1928 * an anomaly.
1854 */ 1929 */
1855 if (unlikely(++nr_loops > 10)) { 1930 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 10))
1856 RB_WARN_ON(cpu_buffer, 1);
1857 return NULL; 1931 return NULL;
1858 }
1859 1932
1860 if (rb_per_cpu_empty(cpu_buffer)) 1933 if (rb_per_cpu_empty(cpu_buffer))
1861 return NULL; 1934 return NULL;
@@ -1893,6 +1966,51 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1893EXPORT_SYMBOL_GPL(ring_buffer_iter_peek); 1966EXPORT_SYMBOL_GPL(ring_buffer_iter_peek);
1894 1967
1895/** 1968/**
1969 * ring_buffer_peek - peek at the next event to be read
1970 * @buffer: The ring buffer to read
1971 * @cpu: The cpu to peak at
1972 * @ts: The timestamp counter of this event.
1973 *
1974 * This will return the event that will be read next, but does
1975 * not consume the data.
1976 */
1977struct ring_buffer_event *
1978ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1979{
1980 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
1981 struct ring_buffer_event *event;
1982 unsigned long flags;
1983
1984 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
1985 event = rb_buffer_peek(buffer, cpu, ts);
1986 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
1987
1988 return event;
1989}
1990
1991/**
1992 * ring_buffer_iter_peek - peek at the next event to be read
1993 * @iter: The ring buffer iterator
1994 * @ts: The timestamp counter of this event.
1995 *
1996 * This will return the event that will be read next, but does
1997 * not increment the iterator.
1998 */
1999struct ring_buffer_event *
2000ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
2001{
2002 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
2003 struct ring_buffer_event *event;
2004 unsigned long flags;
2005
2006 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
2007 event = rb_iter_peek(iter, ts);
2008 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2009
2010 return event;
2011}
2012
2013/**
1896 * ring_buffer_consume - return an event and consume it 2014 * ring_buffer_consume - return an event and consume it
1897 * @buffer: The ring buffer to get the next event from 2015 * @buffer: The ring buffer to get the next event from
1898 * 2016 *
@@ -1903,19 +2021,24 @@ EXPORT_SYMBOL_GPL(ring_buffer_iter_peek);
1903struct ring_buffer_event * 2021struct ring_buffer_event *
1904ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) 2022ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
1905{ 2023{
1906 struct ring_buffer_per_cpu *cpu_buffer; 2024 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
1907 struct ring_buffer_event *event; 2025 struct ring_buffer_event *event;
2026 unsigned long flags;
1908 2027
1909 if (!cpu_isset(cpu, buffer->cpumask)) 2028 if (!cpu_isset(cpu, buffer->cpumask))
1910 return NULL; 2029 return NULL;
1911 2030
1912 event = ring_buffer_peek(buffer, cpu, ts); 2031 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
2032
2033 event = rb_buffer_peek(buffer, cpu, ts);
1913 if (!event) 2034 if (!event)
1914 return NULL; 2035 goto out;
1915 2036
1916 cpu_buffer = buffer->buffers[cpu];
1917 rb_advance_reader(cpu_buffer); 2037 rb_advance_reader(cpu_buffer);
1918 2038
2039 out:
2040 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2041
1919 return event; 2042 return event;
1920} 2043}
1921EXPORT_SYMBOL_GPL(ring_buffer_consume); 2044EXPORT_SYMBOL_GPL(ring_buffer_consume);
@@ -1953,9 +2076,11 @@ ring_buffer_read_start(struct ring_buffer *buffer, int cpu)
1953 atomic_inc(&cpu_buffer->record_disabled); 2076 atomic_inc(&cpu_buffer->record_disabled);
1954 synchronize_sched(); 2077 synchronize_sched();
1955 2078
1956 spin_lock_irqsave(&cpu_buffer->lock, flags); 2079 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
1957 ring_buffer_iter_reset(iter); 2080 __raw_spin_lock(&cpu_buffer->lock);
1958 spin_unlock_irqrestore(&cpu_buffer->lock, flags); 2081 rb_iter_reset(iter);
2082 __raw_spin_unlock(&cpu_buffer->lock);
2083 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
1959 2084
1960 return iter; 2085 return iter;
1961} 2086}
@@ -1989,12 +2114,17 @@ struct ring_buffer_event *
1989ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts) 2114ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts)
1990{ 2115{
1991 struct ring_buffer_event *event; 2116 struct ring_buffer_event *event;
2117 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
2118 unsigned long flags;
1992 2119
1993 event = ring_buffer_iter_peek(iter, ts); 2120 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
2121 event = rb_iter_peek(iter, ts);
1994 if (!event) 2122 if (!event)
1995 return NULL; 2123 goto out;
1996 2124
1997 rb_advance_iter(iter); 2125 rb_advance_iter(iter);
2126 out:
2127 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
1998 2128
1999 return event; 2129 return event;
2000} 2130}
@@ -2016,7 +2146,7 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
2016 cpu_buffer->head_page 2146 cpu_buffer->head_page
2017 = list_entry(cpu_buffer->pages.next, struct buffer_page, list); 2147 = list_entry(cpu_buffer->pages.next, struct buffer_page, list);
2018 local_set(&cpu_buffer->head_page->write, 0); 2148 local_set(&cpu_buffer->head_page->write, 0);
2019 local_set(&cpu_buffer->head_page->commit, 0); 2149 local_set(&cpu_buffer->head_page->page->commit, 0);
2020 2150
2021 cpu_buffer->head_page->read = 0; 2151 cpu_buffer->head_page->read = 0;
2022 2152
@@ -2025,7 +2155,7 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
2025 2155
2026 INIT_LIST_HEAD(&cpu_buffer->reader_page->list); 2156 INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
2027 local_set(&cpu_buffer->reader_page->write, 0); 2157 local_set(&cpu_buffer->reader_page->write, 0);
2028 local_set(&cpu_buffer->reader_page->commit, 0); 2158 local_set(&cpu_buffer->reader_page->page->commit, 0);
2029 cpu_buffer->reader_page->read = 0; 2159 cpu_buffer->reader_page->read = 0;
2030 2160
2031 cpu_buffer->overrun = 0; 2161 cpu_buffer->overrun = 0;
@@ -2045,11 +2175,15 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
2045 if (!cpu_isset(cpu, buffer->cpumask)) 2175 if (!cpu_isset(cpu, buffer->cpumask))
2046 return; 2176 return;
2047 2177
2048 spin_lock_irqsave(&cpu_buffer->lock, flags); 2178 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
2179
2180 __raw_spin_lock(&cpu_buffer->lock);
2049 2181
2050 rb_reset_cpu(cpu_buffer); 2182 rb_reset_cpu(cpu_buffer);
2051 2183
2052 spin_unlock_irqrestore(&cpu_buffer->lock, flags); 2184 __raw_spin_unlock(&cpu_buffer->lock);
2185
2186 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2053} 2187}
2054EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu); 2188EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu);
2055 2189
@@ -2123,8 +2257,7 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
2123 return -EINVAL; 2257 return -EINVAL;
2124 2258
2125 /* At least make sure the two buffers are somewhat the same */ 2259 /* At least make sure the two buffers are somewhat the same */
2126 if (buffer_a->size != buffer_b->size || 2260 if (buffer_a->pages != buffer_b->pages)
2127 buffer_a->pages != buffer_b->pages)
2128 return -EINVAL; 2261 return -EINVAL;
2129 2262
2130 cpu_buffer_a = buffer_a->buffers[cpu]; 2263 cpu_buffer_a = buffer_a->buffers[cpu];
@@ -2152,16 +2285,178 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
2152} 2285}
2153EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu); 2286EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu);
2154 2287
2288static void rb_remove_entries(struct ring_buffer_per_cpu *cpu_buffer,
2289 struct buffer_data_page *bpage)
2290{
2291 struct ring_buffer_event *event;
2292 unsigned long head;
2293
2294 __raw_spin_lock(&cpu_buffer->lock);
2295 for (head = 0; head < local_read(&bpage->commit);
2296 head += rb_event_length(event)) {
2297
2298 event = __rb_data_page_index(bpage, head);
2299 if (RB_WARN_ON(cpu_buffer, rb_null_event(event)))
2300 return;
2301 /* Only count data entries */
2302 if (event->type != RINGBUF_TYPE_DATA)
2303 continue;
2304 cpu_buffer->entries--;
2305 }
2306 __raw_spin_unlock(&cpu_buffer->lock);
2307}
2308
2309/**
2310 * ring_buffer_alloc_read_page - allocate a page to read from buffer
2311 * @buffer: the buffer to allocate for.
2312 *
2313 * This function is used in conjunction with ring_buffer_read_page.
2314 * When reading a full page from the ring buffer, these functions
2315 * can be used to speed up the process. The calling function should
2316 * allocate a few pages first with this function. Then when it
2317 * needs to get pages from the ring buffer, it passes the result
2318 * of this function into ring_buffer_read_page, which will swap
2319 * the page that was allocated, with the read page of the buffer.
2320 *
2321 * Returns:
2322 * The page allocated, or NULL on error.
2323 */
2324void *ring_buffer_alloc_read_page(struct ring_buffer *buffer)
2325{
2326 unsigned long addr;
2327 struct buffer_data_page *bpage;
2328
2329 addr = __get_free_page(GFP_KERNEL);
2330 if (!addr)
2331 return NULL;
2332
2333 bpage = (void *)addr;
2334
2335 return bpage;
2336}
2337
2338/**
2339 * ring_buffer_free_read_page - free an allocated read page
2340 * @buffer: the buffer the page was allocate for
2341 * @data: the page to free
2342 *
2343 * Free a page allocated from ring_buffer_alloc_read_page.
2344 */
2345void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data)
2346{
2347 free_page((unsigned long)data);
2348}
2349
2350/**
2351 * ring_buffer_read_page - extract a page from the ring buffer
2352 * @buffer: buffer to extract from
2353 * @data_page: the page to use allocated from ring_buffer_alloc_read_page
2354 * @cpu: the cpu of the buffer to extract
2355 * @full: should the extraction only happen when the page is full.
2356 *
2357 * This function will pull out a page from the ring buffer and consume it.
2358 * @data_page must be the address of the variable that was returned
2359 * from ring_buffer_alloc_read_page. This is because the page might be used
2360 * to swap with a page in the ring buffer.
2361 *
2362 * for example:
2363 * rpage = ring_buffer_alloc_page(buffer);
2364 * if (!rpage)
2365 * return error;
2366 * ret = ring_buffer_read_page(buffer, &rpage, cpu, 0);
2367 * if (ret)
2368 * process_page(rpage);
2369 *
2370 * When @full is set, the function will not return true unless
2371 * the writer is off the reader page.
2372 *
2373 * Note: it is up to the calling functions to handle sleeps and wakeups.
2374 * The ring buffer can be used anywhere in the kernel and can not
2375 * blindly call wake_up. The layer that uses the ring buffer must be
2376 * responsible for that.
2377 *
2378 * Returns:
2379 * 1 if data has been transferred
2380 * 0 if no data has been transferred.
2381 */
2382int ring_buffer_read_page(struct ring_buffer *buffer,
2383 void **data_page, int cpu, int full)
2384{
2385 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
2386 struct ring_buffer_event *event;
2387 struct buffer_data_page *bpage;
2388 unsigned long flags;
2389 int ret = 0;
2390
2391 if (!data_page)
2392 return 0;
2393
2394 bpage = *data_page;
2395 if (!bpage)
2396 return 0;
2397
2398 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
2399
2400 /*
2401 * rb_buffer_peek will get the next ring buffer if
2402 * the current reader page is empty.
2403 */
2404 event = rb_buffer_peek(buffer, cpu, NULL);
2405 if (!event)
2406 goto out;
2407
2408 /* check for data */
2409 if (!local_read(&cpu_buffer->reader_page->page->commit))
2410 goto out;
2411 /*
2412 * If the writer is already off of the read page, then simply
2413 * switch the read page with the given page. Otherwise
2414 * we need to copy the data from the reader to the writer.
2415 */
2416 if (cpu_buffer->reader_page == cpu_buffer->commit_page) {
2417 unsigned int read = cpu_buffer->reader_page->read;
2418
2419 if (full)
2420 goto out;
2421 /* The writer is still on the reader page, we must copy */
2422 bpage = cpu_buffer->reader_page->page;
2423 memcpy(bpage->data,
2424 cpu_buffer->reader_page->page->data + read,
2425 local_read(&bpage->commit) - read);
2426
2427 /* consume what was read */
2428 cpu_buffer->reader_page += read;
2429
2430 } else {
2431 /* swap the pages */
2432 rb_init_page(bpage);
2433 bpage = cpu_buffer->reader_page->page;
2434 cpu_buffer->reader_page->page = *data_page;
2435 cpu_buffer->reader_page->read = 0;
2436 *data_page = bpage;
2437 }
2438 ret = 1;
2439
2440 /* update the entry counter */
2441 rb_remove_entries(cpu_buffer, bpage);
2442 out:
2443 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2444
2445 return ret;
2446}
2447
2155static ssize_t 2448static ssize_t
2156rb_simple_read(struct file *filp, char __user *ubuf, 2449rb_simple_read(struct file *filp, char __user *ubuf,
2157 size_t cnt, loff_t *ppos) 2450 size_t cnt, loff_t *ppos)
2158{ 2451{
2159 int *p = filp->private_data; 2452 long *p = filp->private_data;
2160 char buf[64]; 2453 char buf[64];
2161 int r; 2454 int r;
2162 2455
2163 /* !ring_buffers_off == tracing_on */ 2456 if (test_bit(RB_BUFFERS_DISABLED_BIT, p))
2164 r = sprintf(buf, "%d\n", !*p); 2457 r = sprintf(buf, "permanently disabled\n");
2458 else
2459 r = sprintf(buf, "%d\n", test_bit(RB_BUFFERS_ON_BIT, p));
2165 2460
2166 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 2461 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2167} 2462}
@@ -2170,7 +2465,7 @@ static ssize_t
2170rb_simple_write(struct file *filp, const char __user *ubuf, 2465rb_simple_write(struct file *filp, const char __user *ubuf,
2171 size_t cnt, loff_t *ppos) 2466 size_t cnt, loff_t *ppos)
2172{ 2467{
2173 int *p = filp->private_data; 2468 long *p = filp->private_data;
2174 char buf[64]; 2469 char buf[64];
2175 long val; 2470 long val;
2176 int ret; 2471 int ret;
@@ -2187,8 +2482,10 @@ rb_simple_write(struct file *filp, const char __user *ubuf,
2187 if (ret < 0) 2482 if (ret < 0)
2188 return ret; 2483 return ret;
2189 2484
2190 /* !ring_buffers_off == tracing_on */ 2485 if (val)
2191 *p = !val; 2486 set_bit(RB_BUFFERS_ON_BIT, p);
2487 else
2488 clear_bit(RB_BUFFERS_ON_BIT, p);
2192 2489
2193 (*ppos)++; 2490 (*ppos)++;
2194 2491
@@ -2210,7 +2507,7 @@ static __init int rb_init_debugfs(void)
2210 d_tracer = tracing_init_dentry(); 2507 d_tracer = tracing_init_dentry();
2211 2508
2212 entry = debugfs_create_file("tracing_on", 0644, d_tracer, 2509 entry = debugfs_create_file("tracing_on", 0644, d_tracer,
2213 &ring_buffers_off, &rb_simple_fops); 2510 &ring_buffer_flags, &rb_simple_fops);
2214 if (!entry) 2511 if (!entry)
2215 pr_warning("Could not create debugfs 'tracing_on' entry\n"); 2512 pr_warning("Could not create debugfs 'tracing_on' entry\n");
2216 2513