aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace/ring_buffer.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace/ring_buffer.c')
-rw-r--r--kernel/trace/ring_buffer.c684
1 files changed, 484 insertions, 200 deletions
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 668bbb5ef2bd..7f69cfeaadf7 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -18,8 +18,46 @@
18 18
19#include "trace.h" 19#include "trace.h"
20 20
21/* Global flag to disable all recording to ring buffers */ 21/*
22static int ring_buffers_off __read_mostly; 22 * A fast way to enable or disable all ring buffers is to
23 * call tracing_on or tracing_off. Turning off the ring buffers
24 * prevents all ring buffers from being recorded to.
25 * Turning this switch on, makes it OK to write to the
26 * ring buffer, if the ring buffer is enabled itself.
27 *
28 * There's three layers that must be on in order to write
29 * to the ring buffer.
30 *
31 * 1) This global flag must be set.
32 * 2) The ring buffer must be enabled for recording.
33 * 3) The per cpu buffer must be enabled for recording.
34 *
35 * In case of an anomaly, this global flag has a bit set that
36 * will permantly disable all ring buffers.
37 */
38
39/*
40 * Global flag to disable all recording to ring buffers
41 * This has two bits: ON, DISABLED
42 *
43 * ON DISABLED
44 * ---- ----------
45 * 0 0 : ring buffers are off
46 * 1 0 : ring buffers are on
47 * X 1 : ring buffers are permanently disabled
48 */
49
50enum {
51 RB_BUFFERS_ON_BIT = 0,
52 RB_BUFFERS_DISABLED_BIT = 1,
53};
54
55enum {
56 RB_BUFFERS_ON = 1 << RB_BUFFERS_ON_BIT,
57 RB_BUFFERS_DISABLED = 1 << RB_BUFFERS_DISABLED_BIT,
58};
59
60static long ring_buffer_flags __read_mostly = RB_BUFFERS_ON;
23 61
24/** 62/**
25 * tracing_on - enable all tracing buffers 63 * tracing_on - enable all tracing buffers
@@ -29,7 +67,7 @@ static int ring_buffers_off __read_mostly;
29 */ 67 */
30void tracing_on(void) 68void tracing_on(void)
31{ 69{
32 ring_buffers_off = 0; 70 set_bit(RB_BUFFERS_ON_BIT, &ring_buffer_flags);
33} 71}
34 72
35/** 73/**
@@ -42,9 +80,22 @@ void tracing_on(void)
42 */ 80 */
43void tracing_off(void) 81void tracing_off(void)
44{ 82{
45 ring_buffers_off = 1; 83 clear_bit(RB_BUFFERS_ON_BIT, &ring_buffer_flags);
46} 84}
47 85
86/**
87 * tracing_off_permanent - permanently disable ring buffers
88 *
89 * This function, once called, will disable all ring buffers
90 * permanenty.
91 */
92void tracing_off_permanent(void)
93{
94 set_bit(RB_BUFFERS_DISABLED_BIT, &ring_buffer_flags);
95}
96
97#include "trace.h"
98
48/* Up this if you want to test the TIME_EXTENTS and normalization */ 99/* Up this if you want to test the TIME_EXTENTS and normalization */
49#define DEBUG_SHIFT 0 100#define DEBUG_SHIFT 0
50 101
@@ -144,20 +195,24 @@ void *ring_buffer_event_data(struct ring_buffer_event *event)
144#define TS_MASK ((1ULL << TS_SHIFT) - 1) 195#define TS_MASK ((1ULL << TS_SHIFT) - 1)
145#define TS_DELTA_TEST (~TS_MASK) 196#define TS_DELTA_TEST (~TS_MASK)
146 197
147/* 198struct buffer_data_page {
148 * This hack stolen from mm/slob.c.
149 * We can store per page timing information in the page frame of the page.
150 * Thanks to Peter Zijlstra for suggesting this idea.
151 */
152struct buffer_page {
153 u64 time_stamp; /* page time stamp */ 199 u64 time_stamp; /* page time stamp */
154 local_t write; /* index for next write */
155 local_t commit; /* write commited index */ 200 local_t commit; /* write commited index */
201 unsigned char data[]; /* data of buffer page */
202};
203
204struct buffer_page {
205 local_t write; /* index for next write */
156 unsigned read; /* index for next read */ 206 unsigned read; /* index for next read */
157 struct list_head list; /* list of free pages */ 207 struct list_head list; /* list of free pages */
158 void *page; /* Actual data page */ 208 struct buffer_data_page *page; /* Actual data page */
159}; 209};
160 210
211static void rb_init_page(struct buffer_data_page *bpage)
212{
213 local_set(&bpage->commit, 0);
214}
215
161/* 216/*
162 * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing 217 * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing
163 * this issue out. 218 * this issue out.
@@ -179,7 +234,7 @@ static inline int test_time_stamp(u64 delta)
179 return 0; 234 return 0;
180} 235}
181 236
182#define BUF_PAGE_SIZE PAGE_SIZE 237#define BUF_PAGE_SIZE (PAGE_SIZE - sizeof(struct buffer_data_page))
183 238
184/* 239/*
185 * head_page == tail_page && head == tail then buffer is empty. 240 * head_page == tail_page && head == tail then buffer is empty.
@@ -187,7 +242,8 @@ static inline int test_time_stamp(u64 delta)
187struct ring_buffer_per_cpu { 242struct ring_buffer_per_cpu {
188 int cpu; 243 int cpu;
189 struct ring_buffer *buffer; 244 struct ring_buffer *buffer;
190 spinlock_t lock; 245 spinlock_t reader_lock; /* serialize readers */
246 raw_spinlock_t lock;
191 struct lock_class_key lock_key; 247 struct lock_class_key lock_key;
192 struct list_head pages; 248 struct list_head pages;
193 struct buffer_page *head_page; /* read from head */ 249 struct buffer_page *head_page; /* read from head */
@@ -221,32 +277,16 @@ struct ring_buffer_iter {
221 u64 read_stamp; 277 u64 read_stamp;
222}; 278};
223 279
280/* buffer may be either ring_buffer or ring_buffer_per_cpu */
224#define RB_WARN_ON(buffer, cond) \ 281#define RB_WARN_ON(buffer, cond) \
225 do { \ 282 ({ \
226 if (unlikely(cond)) { \ 283 int _____ret = unlikely(cond); \
227 atomic_inc(&buffer->record_disabled); \ 284 if (_____ret) { \
228 WARN_ON(1); \
229 } \
230 } while (0)
231
232#define RB_WARN_ON_RET(buffer, cond) \
233 do { \
234 if (unlikely(cond)) { \
235 atomic_inc(&buffer->record_disabled); \
236 WARN_ON(1); \
237 return -1; \
238 } \
239 } while (0)
240
241#define RB_WARN_ON_ONCE(buffer, cond) \
242 do { \
243 static int once; \
244 if (unlikely(cond) && !once) { \
245 once++; \
246 atomic_inc(&buffer->record_disabled); \ 285 atomic_inc(&buffer->record_disabled); \
247 WARN_ON(1); \ 286 WARN_ON(1); \
248 } \ 287 } \
249 } while (0) 288 _____ret; \
289 })
250 290
251/** 291/**
252 * check_pages - integrity check of buffer pages 292 * check_pages - integrity check of buffer pages
@@ -258,16 +298,20 @@ struct ring_buffer_iter {
258static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer) 298static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
259{ 299{
260 struct list_head *head = &cpu_buffer->pages; 300 struct list_head *head = &cpu_buffer->pages;
261 struct buffer_page *page, *tmp; 301 struct buffer_page *bpage, *tmp;
262 302
263 RB_WARN_ON_RET(cpu_buffer, head->next->prev != head); 303 if (RB_WARN_ON(cpu_buffer, head->next->prev != head))
264 RB_WARN_ON_RET(cpu_buffer, head->prev->next != head); 304 return -1;
305 if (RB_WARN_ON(cpu_buffer, head->prev->next != head))
306 return -1;
265 307
266 list_for_each_entry_safe(page, tmp, head, list) { 308 list_for_each_entry_safe(bpage, tmp, head, list) {
267 RB_WARN_ON_RET(cpu_buffer, 309 if (RB_WARN_ON(cpu_buffer,
268 page->list.next->prev != &page->list); 310 bpage->list.next->prev != &bpage->list))
269 RB_WARN_ON_RET(cpu_buffer, 311 return -1;
270 page->list.prev->next != &page->list); 312 if (RB_WARN_ON(cpu_buffer,
313 bpage->list.prev->next != &bpage->list))
314 return -1;
271 } 315 }
272 316
273 return 0; 317 return 0;
@@ -277,22 +321,23 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
277 unsigned nr_pages) 321 unsigned nr_pages)
278{ 322{
279 struct list_head *head = &cpu_buffer->pages; 323 struct list_head *head = &cpu_buffer->pages;
280 struct buffer_page *page, *tmp; 324 struct buffer_page *bpage, *tmp;
281 unsigned long addr; 325 unsigned long addr;
282 LIST_HEAD(pages); 326 LIST_HEAD(pages);
283 unsigned i; 327 unsigned i;
284 328
285 for (i = 0; i < nr_pages; i++) { 329 for (i = 0; i < nr_pages; i++) {
286 page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()), 330 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
287 GFP_KERNEL, cpu_to_node(cpu_buffer->cpu)); 331 GFP_KERNEL, cpu_to_node(cpu_buffer->cpu));
288 if (!page) 332 if (!bpage)
289 goto free_pages; 333 goto free_pages;
290 list_add(&page->list, &pages); 334 list_add(&bpage->list, &pages);
291 335
292 addr = __get_free_page(GFP_KERNEL); 336 addr = __get_free_page(GFP_KERNEL);
293 if (!addr) 337 if (!addr)
294 goto free_pages; 338 goto free_pages;
295 page->page = (void *)addr; 339 bpage->page = (void *)addr;
340 rb_init_page(bpage->page);
296 } 341 }
297 342
298 list_splice(&pages, head); 343 list_splice(&pages, head);
@@ -302,9 +347,9 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
302 return 0; 347 return 0;
303 348
304 free_pages: 349 free_pages:
305 list_for_each_entry_safe(page, tmp, &pages, list) { 350 list_for_each_entry_safe(bpage, tmp, &pages, list) {
306 list_del_init(&page->list); 351 list_del_init(&bpage->list);
307 free_buffer_page(page); 352 free_buffer_page(bpage);
308 } 353 }
309 return -ENOMEM; 354 return -ENOMEM;
310} 355}
@@ -313,7 +358,7 @@ static struct ring_buffer_per_cpu *
313rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu) 358rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
314{ 359{
315 struct ring_buffer_per_cpu *cpu_buffer; 360 struct ring_buffer_per_cpu *cpu_buffer;
316 struct buffer_page *page; 361 struct buffer_page *bpage;
317 unsigned long addr; 362 unsigned long addr;
318 int ret; 363 int ret;
319 364
@@ -324,19 +369,21 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
324 369
325 cpu_buffer->cpu = cpu; 370 cpu_buffer->cpu = cpu;
326 cpu_buffer->buffer = buffer; 371 cpu_buffer->buffer = buffer;
327 spin_lock_init(&cpu_buffer->lock); 372 spin_lock_init(&cpu_buffer->reader_lock);
373 cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
328 INIT_LIST_HEAD(&cpu_buffer->pages); 374 INIT_LIST_HEAD(&cpu_buffer->pages);
329 375
330 page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()), 376 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
331 GFP_KERNEL, cpu_to_node(cpu)); 377 GFP_KERNEL, cpu_to_node(cpu));
332 if (!page) 378 if (!bpage)
333 goto fail_free_buffer; 379 goto fail_free_buffer;
334 380
335 cpu_buffer->reader_page = page; 381 cpu_buffer->reader_page = bpage;
336 addr = __get_free_page(GFP_KERNEL); 382 addr = __get_free_page(GFP_KERNEL);
337 if (!addr) 383 if (!addr)
338 goto fail_free_reader; 384 goto fail_free_reader;
339 page->page = (void *)addr; 385 bpage->page = (void *)addr;
386 rb_init_page(bpage->page);
340 387
341 INIT_LIST_HEAD(&cpu_buffer->reader_page->list); 388 INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
342 389
@@ -361,14 +408,14 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
361static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer) 408static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
362{ 409{
363 struct list_head *head = &cpu_buffer->pages; 410 struct list_head *head = &cpu_buffer->pages;
364 struct buffer_page *page, *tmp; 411 struct buffer_page *bpage, *tmp;
365 412
366 list_del_init(&cpu_buffer->reader_page->list); 413 list_del_init(&cpu_buffer->reader_page->list);
367 free_buffer_page(cpu_buffer->reader_page); 414 free_buffer_page(cpu_buffer->reader_page);
368 415
369 list_for_each_entry_safe(page, tmp, head, list) { 416 list_for_each_entry_safe(bpage, tmp, head, list) {
370 list_del_init(&page->list); 417 list_del_init(&bpage->list);
371 free_buffer_page(page); 418 free_buffer_page(bpage);
372 } 419 }
373 kfree(cpu_buffer); 420 kfree(cpu_buffer);
374} 421}
@@ -465,7 +512,7 @@ static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer);
465static void 512static void
466rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages) 513rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
467{ 514{
468 struct buffer_page *page; 515 struct buffer_page *bpage;
469 struct list_head *p; 516 struct list_head *p;
470 unsigned i; 517 unsigned i;
471 518
@@ -473,13 +520,15 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
473 synchronize_sched(); 520 synchronize_sched();
474 521
475 for (i = 0; i < nr_pages; i++) { 522 for (i = 0; i < nr_pages; i++) {
476 BUG_ON(list_empty(&cpu_buffer->pages)); 523 if (RB_WARN_ON(cpu_buffer, list_empty(&cpu_buffer->pages)))
524 return;
477 p = cpu_buffer->pages.next; 525 p = cpu_buffer->pages.next;
478 page = list_entry(p, struct buffer_page, list); 526 bpage = list_entry(p, struct buffer_page, list);
479 list_del_init(&page->list); 527 list_del_init(&bpage->list);
480 free_buffer_page(page); 528 free_buffer_page(bpage);
481 } 529 }
482 BUG_ON(list_empty(&cpu_buffer->pages)); 530 if (RB_WARN_ON(cpu_buffer, list_empty(&cpu_buffer->pages)))
531 return;
483 532
484 rb_reset_cpu(cpu_buffer); 533 rb_reset_cpu(cpu_buffer);
485 534
@@ -493,7 +542,7 @@ static void
493rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer, 542rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
494 struct list_head *pages, unsigned nr_pages) 543 struct list_head *pages, unsigned nr_pages)
495{ 544{
496 struct buffer_page *page; 545 struct buffer_page *bpage;
497 struct list_head *p; 546 struct list_head *p;
498 unsigned i; 547 unsigned i;
499 548
@@ -501,11 +550,12 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
501 synchronize_sched(); 550 synchronize_sched();
502 551
503 for (i = 0; i < nr_pages; i++) { 552 for (i = 0; i < nr_pages; i++) {
504 BUG_ON(list_empty(pages)); 553 if (RB_WARN_ON(cpu_buffer, list_empty(pages)))
554 return;
505 p = pages->next; 555 p = pages->next;
506 page = list_entry(p, struct buffer_page, list); 556 bpage = list_entry(p, struct buffer_page, list);
507 list_del_init(&page->list); 557 list_del_init(&bpage->list);
508 list_add_tail(&page->list, &cpu_buffer->pages); 558 list_add_tail(&bpage->list, &cpu_buffer->pages);
509 } 559 }
510 rb_reset_cpu(cpu_buffer); 560 rb_reset_cpu(cpu_buffer);
511 561
@@ -532,7 +582,7 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
532{ 582{
533 struct ring_buffer_per_cpu *cpu_buffer; 583 struct ring_buffer_per_cpu *cpu_buffer;
534 unsigned nr_pages, rm_pages, new_pages; 584 unsigned nr_pages, rm_pages, new_pages;
535 struct buffer_page *page, *tmp; 585 struct buffer_page *bpage, *tmp;
536 unsigned long buffer_size; 586 unsigned long buffer_size;
537 unsigned long addr; 587 unsigned long addr;
538 LIST_HEAD(pages); 588 LIST_HEAD(pages);
@@ -562,7 +612,10 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
562 if (size < buffer_size) { 612 if (size < buffer_size) {
563 613
564 /* easy case, just free pages */ 614 /* easy case, just free pages */
565 BUG_ON(nr_pages >= buffer->pages); 615 if (RB_WARN_ON(buffer, nr_pages >= buffer->pages)) {
616 mutex_unlock(&buffer->mutex);
617 return -1;
618 }
566 619
567 rm_pages = buffer->pages - nr_pages; 620 rm_pages = buffer->pages - nr_pages;
568 621
@@ -581,21 +634,26 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
581 * add these pages to the cpu_buffers. Otherwise we just free 634 * add these pages to the cpu_buffers. Otherwise we just free
582 * them all and return -ENOMEM; 635 * them all and return -ENOMEM;
583 */ 636 */
584 BUG_ON(nr_pages <= buffer->pages); 637 if (RB_WARN_ON(buffer, nr_pages <= buffer->pages)) {
638 mutex_unlock(&buffer->mutex);
639 return -1;
640 }
641
585 new_pages = nr_pages - buffer->pages; 642 new_pages = nr_pages - buffer->pages;
586 643
587 for_each_buffer_cpu(buffer, cpu) { 644 for_each_buffer_cpu(buffer, cpu) {
588 for (i = 0; i < new_pages; i++) { 645 for (i = 0; i < new_pages; i++) {
589 page = kzalloc_node(ALIGN(sizeof(*page), 646 bpage = kzalloc_node(ALIGN(sizeof(*bpage),
590 cache_line_size()), 647 cache_line_size()),
591 GFP_KERNEL, cpu_to_node(cpu)); 648 GFP_KERNEL, cpu_to_node(cpu));
592 if (!page) 649 if (!bpage)
593 goto free_pages; 650 goto free_pages;
594 list_add(&page->list, &pages); 651 list_add(&bpage->list, &pages);
595 addr = __get_free_page(GFP_KERNEL); 652 addr = __get_free_page(GFP_KERNEL);
596 if (!addr) 653 if (!addr)
597 goto free_pages; 654 goto free_pages;
598 page->page = (void *)addr; 655 bpage->page = (void *)addr;
656 rb_init_page(bpage->page);
599 } 657 }
600 } 658 }
601 659
@@ -604,7 +662,10 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
604 rb_insert_pages(cpu_buffer, &pages, new_pages); 662 rb_insert_pages(cpu_buffer, &pages, new_pages);
605 } 663 }
606 664
607 BUG_ON(!list_empty(&pages)); 665 if (RB_WARN_ON(buffer, !list_empty(&pages))) {
666 mutex_unlock(&buffer->mutex);
667 return -1;
668 }
608 669
609 out: 670 out:
610 buffer->pages = nr_pages; 671 buffer->pages = nr_pages;
@@ -613,9 +674,9 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
613 return size; 674 return size;
614 675
615 free_pages: 676 free_pages:
616 list_for_each_entry_safe(page, tmp, &pages, list) { 677 list_for_each_entry_safe(bpage, tmp, &pages, list) {
617 list_del_init(&page->list); 678 list_del_init(&bpage->list);
618 free_buffer_page(page); 679 free_buffer_page(bpage);
619 } 680 }
620 mutex_unlock(&buffer->mutex); 681 mutex_unlock(&buffer->mutex);
621 return -ENOMEM; 682 return -ENOMEM;
@@ -626,9 +687,15 @@ static inline int rb_null_event(struct ring_buffer_event *event)
626 return event->type == RINGBUF_TYPE_PADDING; 687 return event->type == RINGBUF_TYPE_PADDING;
627} 688}
628 689
629static inline void *__rb_page_index(struct buffer_page *page, unsigned index) 690static inline void *
691__rb_data_page_index(struct buffer_data_page *bpage, unsigned index)
692{
693 return bpage->data + index;
694}
695
696static inline void *__rb_page_index(struct buffer_page *bpage, unsigned index)
630{ 697{
631 return page->page + index; 698 return bpage->page->data + index;
632} 699}
633 700
634static inline struct ring_buffer_event * 701static inline struct ring_buffer_event *
@@ -658,7 +725,7 @@ static inline unsigned rb_page_write(struct buffer_page *bpage)
658 725
659static inline unsigned rb_page_commit(struct buffer_page *bpage) 726static inline unsigned rb_page_commit(struct buffer_page *bpage)
660{ 727{
661 return local_read(&bpage->commit); 728 return local_read(&bpage->page->commit);
662} 729}
663 730
664/* Size is determined by what has been commited */ 731/* Size is determined by what has been commited */
@@ -693,7 +760,8 @@ static void rb_update_overflow(struct ring_buffer_per_cpu *cpu_buffer)
693 head += rb_event_length(event)) { 760 head += rb_event_length(event)) {
694 761
695 event = __rb_page_index(cpu_buffer->head_page, head); 762 event = __rb_page_index(cpu_buffer->head_page, head);
696 BUG_ON(rb_null_event(event)); 763 if (RB_WARN_ON(cpu_buffer, rb_null_event(event)))
764 return;
697 /* Only count data entries */ 765 /* Only count data entries */
698 if (event->type != RINGBUF_TYPE_DATA) 766 if (event->type != RINGBUF_TYPE_DATA)
699 continue; 767 continue;
@@ -703,14 +771,14 @@ static void rb_update_overflow(struct ring_buffer_per_cpu *cpu_buffer)
703} 771}
704 772
705static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer, 773static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer,
706 struct buffer_page **page) 774 struct buffer_page **bpage)
707{ 775{
708 struct list_head *p = (*page)->list.next; 776 struct list_head *p = (*bpage)->list.next;
709 777
710 if (p == &cpu_buffer->pages) 778 if (p == &cpu_buffer->pages)
711 p = p->next; 779 p = p->next;
712 780
713 *page = list_entry(p, struct buffer_page, list); 781 *bpage = list_entry(p, struct buffer_page, list);
714} 782}
715 783
716static inline unsigned 784static inline unsigned
@@ -746,16 +814,18 @@ rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer,
746 addr &= PAGE_MASK; 814 addr &= PAGE_MASK;
747 815
748 while (cpu_buffer->commit_page->page != (void *)addr) { 816 while (cpu_buffer->commit_page->page != (void *)addr) {
749 RB_WARN_ON(cpu_buffer, 817 if (RB_WARN_ON(cpu_buffer,
750 cpu_buffer->commit_page == cpu_buffer->tail_page); 818 cpu_buffer->commit_page == cpu_buffer->tail_page))
751 cpu_buffer->commit_page->commit = 819 return;
820 cpu_buffer->commit_page->page->commit =
752 cpu_buffer->commit_page->write; 821 cpu_buffer->commit_page->write;
753 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page); 822 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
754 cpu_buffer->write_stamp = cpu_buffer->commit_page->time_stamp; 823 cpu_buffer->write_stamp =
824 cpu_buffer->commit_page->page->time_stamp;
755 } 825 }
756 826
757 /* Now set the commit to the event's index */ 827 /* Now set the commit to the event's index */
758 local_set(&cpu_buffer->commit_page->commit, index); 828 local_set(&cpu_buffer->commit_page->page->commit, index);
759} 829}
760 830
761static inline void 831static inline void
@@ -770,16 +840,17 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
770 * assign the commit to the tail. 840 * assign the commit to the tail.
771 */ 841 */
772 while (cpu_buffer->commit_page != cpu_buffer->tail_page) { 842 while (cpu_buffer->commit_page != cpu_buffer->tail_page) {
773 cpu_buffer->commit_page->commit = 843 cpu_buffer->commit_page->page->commit =
774 cpu_buffer->commit_page->write; 844 cpu_buffer->commit_page->write;
775 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page); 845 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
776 cpu_buffer->write_stamp = cpu_buffer->commit_page->time_stamp; 846 cpu_buffer->write_stamp =
847 cpu_buffer->commit_page->page->time_stamp;
777 /* add barrier to keep gcc from optimizing too much */ 848 /* add barrier to keep gcc from optimizing too much */
778 barrier(); 849 barrier();
779 } 850 }
780 while (rb_commit_index(cpu_buffer) != 851 while (rb_commit_index(cpu_buffer) !=
781 rb_page_write(cpu_buffer->commit_page)) { 852 rb_page_write(cpu_buffer->commit_page)) {
782 cpu_buffer->commit_page->commit = 853 cpu_buffer->commit_page->page->commit =
783 cpu_buffer->commit_page->write; 854 cpu_buffer->commit_page->write;
784 barrier(); 855 barrier();
785 } 856 }
@@ -787,7 +858,7 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
787 858
788static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer) 859static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
789{ 860{
790 cpu_buffer->read_stamp = cpu_buffer->reader_page->time_stamp; 861 cpu_buffer->read_stamp = cpu_buffer->reader_page->page->time_stamp;
791 cpu_buffer->reader_page->read = 0; 862 cpu_buffer->reader_page->read = 0;
792} 863}
793 864
@@ -806,7 +877,7 @@ static inline void rb_inc_iter(struct ring_buffer_iter *iter)
806 else 877 else
807 rb_inc_page(cpu_buffer, &iter->head_page); 878 rb_inc_page(cpu_buffer, &iter->head_page);
808 879
809 iter->read_stamp = iter->head_page->time_stamp; 880 iter->read_stamp = iter->head_page->page->time_stamp;
810 iter->head = 0; 881 iter->head = 0;
811} 882}
812 883
@@ -894,7 +965,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
894 if (write > BUF_PAGE_SIZE) { 965 if (write > BUF_PAGE_SIZE) {
895 struct buffer_page *next_page = tail_page; 966 struct buffer_page *next_page = tail_page;
896 967
897 spin_lock_irqsave(&cpu_buffer->lock, flags); 968 local_irq_save(flags);
969 __raw_spin_lock(&cpu_buffer->lock);
898 970
899 rb_inc_page(cpu_buffer, &next_page); 971 rb_inc_page(cpu_buffer, &next_page);
900 972
@@ -902,7 +974,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
902 reader_page = cpu_buffer->reader_page; 974 reader_page = cpu_buffer->reader_page;
903 975
904 /* we grabbed the lock before incrementing */ 976 /* we grabbed the lock before incrementing */
905 RB_WARN_ON(cpu_buffer, next_page == reader_page); 977 if (RB_WARN_ON(cpu_buffer, next_page == reader_page))
978 goto out_unlock;
906 979
907 /* 980 /*
908 * If for some reason, we had an interrupt storm that made 981 * If for some reason, we had an interrupt storm that made
@@ -940,12 +1013,12 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
940 */ 1013 */
941 if (tail_page == cpu_buffer->tail_page) { 1014 if (tail_page == cpu_buffer->tail_page) {
942 local_set(&next_page->write, 0); 1015 local_set(&next_page->write, 0);
943 local_set(&next_page->commit, 0); 1016 local_set(&next_page->page->commit, 0);
944 cpu_buffer->tail_page = next_page; 1017 cpu_buffer->tail_page = next_page;
945 1018
946 /* reread the time stamp */ 1019 /* reread the time stamp */
947 *ts = ring_buffer_time_stamp(cpu_buffer->cpu); 1020 *ts = ring_buffer_time_stamp(cpu_buffer->cpu);
948 cpu_buffer->tail_page->time_stamp = *ts; 1021 cpu_buffer->tail_page->page->time_stamp = *ts;
949 } 1022 }
950 1023
951 /* 1024 /*
@@ -970,7 +1043,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
970 rb_set_commit_to_write(cpu_buffer); 1043 rb_set_commit_to_write(cpu_buffer);
971 } 1044 }
972 1045
973 spin_unlock_irqrestore(&cpu_buffer->lock, flags); 1046 __raw_spin_unlock(&cpu_buffer->lock);
1047 local_irq_restore(flags);
974 1048
975 /* fail and let the caller try again */ 1049 /* fail and let the caller try again */
976 return ERR_PTR(-EAGAIN); 1050 return ERR_PTR(-EAGAIN);
@@ -978,7 +1052,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
978 1052
979 /* We reserved something on the buffer */ 1053 /* We reserved something on the buffer */
980 1054
981 BUG_ON(write > BUF_PAGE_SIZE); 1055 if (RB_WARN_ON(cpu_buffer, write > BUF_PAGE_SIZE))
1056 return NULL;
982 1057
983 event = __rb_page_index(tail_page, tail); 1058 event = __rb_page_index(tail_page, tail);
984 rb_update_event(event, type, length); 1059 rb_update_event(event, type, length);
@@ -988,12 +1063,13 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
988 * this page's time stamp. 1063 * this page's time stamp.
989 */ 1064 */
990 if (!tail && rb_is_commit(cpu_buffer, event)) 1065 if (!tail && rb_is_commit(cpu_buffer, event))
991 cpu_buffer->commit_page->time_stamp = *ts; 1066 cpu_buffer->commit_page->page->time_stamp = *ts;
992 1067
993 return event; 1068 return event;
994 1069
995 out_unlock: 1070 out_unlock:
996 spin_unlock_irqrestore(&cpu_buffer->lock, flags); 1071 __raw_spin_unlock(&cpu_buffer->lock);
1072 local_irq_restore(flags);
997 return NULL; 1073 return NULL;
998} 1074}
999 1075
@@ -1038,7 +1114,7 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
1038 event->time_delta = *delta & TS_MASK; 1114 event->time_delta = *delta & TS_MASK;
1039 event->array[0] = *delta >> TS_SHIFT; 1115 event->array[0] = *delta >> TS_SHIFT;
1040 } else { 1116 } else {
1041 cpu_buffer->commit_page->time_stamp = *ts; 1117 cpu_buffer->commit_page->page->time_stamp = *ts;
1042 event->time_delta = 0; 1118 event->time_delta = 0;
1043 event->array[0] = 0; 1119 event->array[0] = 0;
1044 } 1120 }
@@ -1076,10 +1152,8 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
1076 * storm or we have something buggy. 1152 * storm or we have something buggy.
1077 * Bail! 1153 * Bail!
1078 */ 1154 */
1079 if (unlikely(++nr_loops > 1000)) { 1155 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000))
1080 RB_WARN_ON(cpu_buffer, 1);
1081 return NULL; 1156 return NULL;
1082 }
1083 1157
1084 ts = ring_buffer_time_stamp(cpu_buffer->cpu); 1158 ts = ring_buffer_time_stamp(cpu_buffer->cpu);
1085 1159
@@ -1175,15 +1249,14 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer,
1175 struct ring_buffer_event *event; 1249 struct ring_buffer_event *event;
1176 int cpu, resched; 1250 int cpu, resched;
1177 1251
1178 if (ring_buffers_off) 1252 if (ring_buffer_flags != RB_BUFFERS_ON)
1179 return NULL; 1253 return NULL;
1180 1254
1181 if (atomic_read(&buffer->record_disabled)) 1255 if (atomic_read(&buffer->record_disabled))
1182 return NULL; 1256 return NULL;
1183 1257
1184 /* If we are tracing schedule, we don't want to recurse */ 1258 /* If we are tracing schedule, we don't want to recurse */
1185 resched = need_resched(); 1259 resched = ftrace_preempt_disable();
1186 preempt_disable_notrace();
1187 1260
1188 cpu = raw_smp_processor_id(); 1261 cpu = raw_smp_processor_id();
1189 1262
@@ -1214,10 +1287,7 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer,
1214 return event; 1287 return event;
1215 1288
1216 out: 1289 out:
1217 if (resched) 1290 ftrace_preempt_enable(resched);
1218 preempt_enable_no_resched_notrace();
1219 else
1220 preempt_enable_notrace();
1221 return NULL; 1291 return NULL;
1222} 1292}
1223 1293
@@ -1259,12 +1329,9 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer,
1259 /* 1329 /*
1260 * Only the last preempt count needs to restore preemption. 1330 * Only the last preempt count needs to restore preemption.
1261 */ 1331 */
1262 if (preempt_count() == 1) { 1332 if (preempt_count() == 1)
1263 if (per_cpu(rb_need_resched, cpu)) 1333 ftrace_preempt_enable(per_cpu(rb_need_resched, cpu));
1264 preempt_enable_no_resched_notrace(); 1334 else
1265 else
1266 preempt_enable_notrace();
1267 } else
1268 preempt_enable_no_resched_notrace(); 1335 preempt_enable_no_resched_notrace();
1269 1336
1270 return 0; 1337 return 0;
@@ -1294,14 +1361,13 @@ int ring_buffer_write(struct ring_buffer *buffer,
1294 int ret = -EBUSY; 1361 int ret = -EBUSY;
1295 int cpu, resched; 1362 int cpu, resched;
1296 1363
1297 if (ring_buffers_off) 1364 if (ring_buffer_flags != RB_BUFFERS_ON)
1298 return -EBUSY; 1365 return -EBUSY;
1299 1366
1300 if (atomic_read(&buffer->record_disabled)) 1367 if (atomic_read(&buffer->record_disabled))
1301 return -EBUSY; 1368 return -EBUSY;
1302 1369
1303 resched = need_resched(); 1370 resched = ftrace_preempt_disable();
1304 preempt_disable_notrace();
1305 1371
1306 cpu = raw_smp_processor_id(); 1372 cpu = raw_smp_processor_id();
1307 1373
@@ -1327,10 +1393,7 @@ int ring_buffer_write(struct ring_buffer *buffer,
1327 1393
1328 ret = 0; 1394 ret = 0;
1329 out: 1395 out:
1330 if (resched) 1396 ftrace_preempt_enable(resched);
1331 preempt_enable_no_resched_notrace();
1332 else
1333 preempt_enable_notrace();
1334 1397
1335 return ret; 1398 return ret;
1336} 1399}
@@ -1489,14 +1552,7 @@ unsigned long ring_buffer_overruns(struct ring_buffer *buffer)
1489 return overruns; 1552 return overruns;
1490} 1553}
1491 1554
1492/** 1555static void rb_iter_reset(struct ring_buffer_iter *iter)
1493 * ring_buffer_iter_reset - reset an iterator
1494 * @iter: The iterator to reset
1495 *
1496 * Resets the iterator, so that it will start from the beginning
1497 * again.
1498 */
1499void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
1500{ 1556{
1501 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; 1557 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
1502 1558
@@ -1511,7 +1567,24 @@ void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
1511 if (iter->head) 1567 if (iter->head)
1512 iter->read_stamp = cpu_buffer->read_stamp; 1568 iter->read_stamp = cpu_buffer->read_stamp;
1513 else 1569 else
1514 iter->read_stamp = iter->head_page->time_stamp; 1570 iter->read_stamp = iter->head_page->page->time_stamp;
1571}
1572
1573/**
1574 * ring_buffer_iter_reset - reset an iterator
1575 * @iter: The iterator to reset
1576 *
1577 * Resets the iterator, so that it will start from the beginning
1578 * again.
1579 */
1580void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
1581{
1582 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
1583 unsigned long flags;
1584
1585 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
1586 rb_iter_reset(iter);
1587 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
1515} 1588}
1516 1589
1517/** 1590/**
@@ -1597,7 +1670,8 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
1597 unsigned long flags; 1670 unsigned long flags;
1598 int nr_loops = 0; 1671 int nr_loops = 0;
1599 1672
1600 spin_lock_irqsave(&cpu_buffer->lock, flags); 1673 local_irq_save(flags);
1674 __raw_spin_lock(&cpu_buffer->lock);
1601 1675
1602 again: 1676 again:
1603 /* 1677 /*
@@ -1606,8 +1680,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
1606 * a case where we will loop three times. There should be no 1680 * a case where we will loop three times. There should be no
1607 * reason to loop four times (that I know of). 1681 * reason to loop four times (that I know of).
1608 */ 1682 */
1609 if (unlikely(++nr_loops > 3)) { 1683 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 3)) {
1610 RB_WARN_ON(cpu_buffer, 1);
1611 reader = NULL; 1684 reader = NULL;
1612 goto out; 1685 goto out;
1613 } 1686 }
@@ -1619,8 +1692,9 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
1619 goto out; 1692 goto out;
1620 1693
1621 /* Never should we have an index greater than the size */ 1694 /* Never should we have an index greater than the size */
1622 RB_WARN_ON(cpu_buffer, 1695 if (RB_WARN_ON(cpu_buffer,
1623 cpu_buffer->reader_page->read > rb_page_size(reader)); 1696 cpu_buffer->reader_page->read > rb_page_size(reader)))
1697 goto out;
1624 1698
1625 /* check if we caught up to the tail */ 1699 /* check if we caught up to the tail */
1626 reader = NULL; 1700 reader = NULL;
@@ -1637,7 +1711,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
1637 cpu_buffer->reader_page->list.prev = reader->list.prev; 1711 cpu_buffer->reader_page->list.prev = reader->list.prev;
1638 1712
1639 local_set(&cpu_buffer->reader_page->write, 0); 1713 local_set(&cpu_buffer->reader_page->write, 0);
1640 local_set(&cpu_buffer->reader_page->commit, 0); 1714 local_set(&cpu_buffer->reader_page->page->commit, 0);
1641 1715
1642 /* Make the reader page now replace the head */ 1716 /* Make the reader page now replace the head */
1643 reader->list.prev->next = &cpu_buffer->reader_page->list; 1717 reader->list.prev->next = &cpu_buffer->reader_page->list;
@@ -1659,7 +1733,8 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
1659 goto again; 1733 goto again;
1660 1734
1661 out: 1735 out:
1662 spin_unlock_irqrestore(&cpu_buffer->lock, flags); 1736 __raw_spin_unlock(&cpu_buffer->lock);
1737 local_irq_restore(flags);
1663 1738
1664 return reader; 1739 return reader;
1665} 1740}
@@ -1673,7 +1748,8 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
1673 reader = rb_get_reader_page(cpu_buffer); 1748 reader = rb_get_reader_page(cpu_buffer);
1674 1749
1675 /* This function should not be called when buffer is empty */ 1750 /* This function should not be called when buffer is empty */
1676 BUG_ON(!reader); 1751 if (RB_WARN_ON(cpu_buffer, !reader))
1752 return;
1677 1753
1678 event = rb_reader_event(cpu_buffer); 1754 event = rb_reader_event(cpu_buffer);
1679 1755
@@ -1700,7 +1776,9 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
1700 * Check if we are at the end of the buffer. 1776 * Check if we are at the end of the buffer.
1701 */ 1777 */
1702 if (iter->head >= rb_page_size(iter->head_page)) { 1778 if (iter->head >= rb_page_size(iter->head_page)) {
1703 BUG_ON(iter->head_page == cpu_buffer->commit_page); 1779 if (RB_WARN_ON(buffer,
1780 iter->head_page == cpu_buffer->commit_page))
1781 return;
1704 rb_inc_iter(iter); 1782 rb_inc_iter(iter);
1705 return; 1783 return;
1706 } 1784 }
@@ -1713,8 +1791,10 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
1713 * This should not be called to advance the header if we are 1791 * This should not be called to advance the header if we are
1714 * at the tail of the buffer. 1792 * at the tail of the buffer.
1715 */ 1793 */
1716 BUG_ON((iter->head_page == cpu_buffer->commit_page) && 1794 if (RB_WARN_ON(cpu_buffer,
1717 (iter->head + length > rb_commit_index(cpu_buffer))); 1795 (iter->head_page == cpu_buffer->commit_page) &&
1796 (iter->head + length > rb_commit_index(cpu_buffer))))
1797 return;
1718 1798
1719 rb_update_iter_read_stamp(iter, event); 1799 rb_update_iter_read_stamp(iter, event);
1720 1800
@@ -1726,17 +1806,8 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
1726 rb_advance_iter(iter); 1806 rb_advance_iter(iter);
1727} 1807}
1728 1808
1729/** 1809static struct ring_buffer_event *
1730 * ring_buffer_peek - peek at the next event to be read 1810rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1731 * @buffer: The ring buffer to read
1732 * @cpu: The cpu to peak at
1733 * @ts: The timestamp counter of this event.
1734 *
1735 * This will return the event that will be read next, but does
1736 * not consume the data.
1737 */
1738struct ring_buffer_event *
1739ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1740{ 1811{
1741 struct ring_buffer_per_cpu *cpu_buffer; 1812 struct ring_buffer_per_cpu *cpu_buffer;
1742 struct ring_buffer_event *event; 1813 struct ring_buffer_event *event;
@@ -1757,10 +1828,8 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1757 * can have. Nesting 10 deep of interrupts is clearly 1828 * can have. Nesting 10 deep of interrupts is clearly
1758 * an anomaly. 1829 * an anomaly.
1759 */ 1830 */
1760 if (unlikely(++nr_loops > 10)) { 1831 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 10))
1761 RB_WARN_ON(cpu_buffer, 1);
1762 return NULL; 1832 return NULL;
1763 }
1764 1833
1765 reader = rb_get_reader_page(cpu_buffer); 1834 reader = rb_get_reader_page(cpu_buffer);
1766 if (!reader) 1835 if (!reader)
@@ -1798,16 +1867,8 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1798 return NULL; 1867 return NULL;
1799} 1868}
1800 1869
1801/** 1870static struct ring_buffer_event *
1802 * ring_buffer_iter_peek - peek at the next event to be read 1871rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1803 * @iter: The ring buffer iterator
1804 * @ts: The timestamp counter of this event.
1805 *
1806 * This will return the event that will be read next, but does
1807 * not increment the iterator.
1808 */
1809struct ring_buffer_event *
1810ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1811{ 1872{
1812 struct ring_buffer *buffer; 1873 struct ring_buffer *buffer;
1813 struct ring_buffer_per_cpu *cpu_buffer; 1874 struct ring_buffer_per_cpu *cpu_buffer;
@@ -1829,10 +1890,8 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1829 * can have. Nesting 10 deep of interrupts is clearly 1890 * can have. Nesting 10 deep of interrupts is clearly
1830 * an anomaly. 1891 * an anomaly.
1831 */ 1892 */
1832 if (unlikely(++nr_loops > 10)) { 1893 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 10))
1833 RB_WARN_ON(cpu_buffer, 1);
1834 return NULL; 1894 return NULL;
1835 }
1836 1895
1837 if (rb_per_cpu_empty(cpu_buffer)) 1896 if (rb_per_cpu_empty(cpu_buffer))
1838 return NULL; 1897 return NULL;
@@ -1869,6 +1928,51 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1869} 1928}
1870 1929
1871/** 1930/**
1931 * ring_buffer_peek - peek at the next event to be read
1932 * @buffer: The ring buffer to read
1933 * @cpu: The cpu to peak at
1934 * @ts: The timestamp counter of this event.
1935 *
1936 * This will return the event that will be read next, but does
1937 * not consume the data.
1938 */
1939struct ring_buffer_event *
1940ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1941{
1942 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
1943 struct ring_buffer_event *event;
1944 unsigned long flags;
1945
1946 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
1947 event = rb_buffer_peek(buffer, cpu, ts);
1948 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
1949
1950 return event;
1951}
1952
1953/**
1954 * ring_buffer_iter_peek - peek at the next event to be read
1955 * @iter: The ring buffer iterator
1956 * @ts: The timestamp counter of this event.
1957 *
1958 * This will return the event that will be read next, but does
1959 * not increment the iterator.
1960 */
1961struct ring_buffer_event *
1962ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1963{
1964 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
1965 struct ring_buffer_event *event;
1966 unsigned long flags;
1967
1968 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
1969 event = rb_iter_peek(iter, ts);
1970 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
1971
1972 return event;
1973}
1974
1975/**
1872 * ring_buffer_consume - return an event and consume it 1976 * ring_buffer_consume - return an event and consume it
1873 * @buffer: The ring buffer to get the next event from 1977 * @buffer: The ring buffer to get the next event from
1874 * 1978 *
@@ -1879,19 +1983,24 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1879struct ring_buffer_event * 1983struct ring_buffer_event *
1880ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) 1984ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
1881{ 1985{
1882 struct ring_buffer_per_cpu *cpu_buffer; 1986 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
1883 struct ring_buffer_event *event; 1987 struct ring_buffer_event *event;
1988 unsigned long flags;
1884 1989
1885 if (!cpu_isset(cpu, buffer->cpumask)) 1990 if (!cpu_isset(cpu, buffer->cpumask))
1886 return NULL; 1991 return NULL;
1887 1992
1888 event = ring_buffer_peek(buffer, cpu, ts); 1993 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
1994
1995 event = rb_buffer_peek(buffer, cpu, ts);
1889 if (!event) 1996 if (!event)
1890 return NULL; 1997 goto out;
1891 1998
1892 cpu_buffer = buffer->buffers[cpu];
1893 rb_advance_reader(cpu_buffer); 1999 rb_advance_reader(cpu_buffer);
1894 2000
2001 out:
2002 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2003
1895 return event; 2004 return event;
1896} 2005}
1897 2006
@@ -1928,9 +2037,11 @@ ring_buffer_read_start(struct ring_buffer *buffer, int cpu)
1928 atomic_inc(&cpu_buffer->record_disabled); 2037 atomic_inc(&cpu_buffer->record_disabled);
1929 synchronize_sched(); 2038 synchronize_sched();
1930 2039
1931 spin_lock_irqsave(&cpu_buffer->lock, flags); 2040 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
1932 ring_buffer_iter_reset(iter); 2041 __raw_spin_lock(&cpu_buffer->lock);
1933 spin_unlock_irqrestore(&cpu_buffer->lock, flags); 2042 rb_iter_reset(iter);
2043 __raw_spin_unlock(&cpu_buffer->lock);
2044 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
1934 2045
1935 return iter; 2046 return iter;
1936} 2047}
@@ -1962,12 +2073,17 @@ struct ring_buffer_event *
1962ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts) 2073ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts)
1963{ 2074{
1964 struct ring_buffer_event *event; 2075 struct ring_buffer_event *event;
2076 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
2077 unsigned long flags;
1965 2078
1966 event = ring_buffer_iter_peek(iter, ts); 2079 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
2080 event = rb_iter_peek(iter, ts);
1967 if (!event) 2081 if (!event)
1968 return NULL; 2082 goto out;
1969 2083
1970 rb_advance_iter(iter); 2084 rb_advance_iter(iter);
2085 out:
2086 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
1971 2087
1972 return event; 2088 return event;
1973} 2089}
@@ -1987,7 +2103,7 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
1987 cpu_buffer->head_page 2103 cpu_buffer->head_page
1988 = list_entry(cpu_buffer->pages.next, struct buffer_page, list); 2104 = list_entry(cpu_buffer->pages.next, struct buffer_page, list);
1989 local_set(&cpu_buffer->head_page->write, 0); 2105 local_set(&cpu_buffer->head_page->write, 0);
1990 local_set(&cpu_buffer->head_page->commit, 0); 2106 local_set(&cpu_buffer->head_page->page->commit, 0);
1991 2107
1992 cpu_buffer->head_page->read = 0; 2108 cpu_buffer->head_page->read = 0;
1993 2109
@@ -1996,7 +2112,7 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
1996 2112
1997 INIT_LIST_HEAD(&cpu_buffer->reader_page->list); 2113 INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
1998 local_set(&cpu_buffer->reader_page->write, 0); 2114 local_set(&cpu_buffer->reader_page->write, 0);
1999 local_set(&cpu_buffer->reader_page->commit, 0); 2115 local_set(&cpu_buffer->reader_page->page->commit, 0);
2000 cpu_buffer->reader_page->read = 0; 2116 cpu_buffer->reader_page->read = 0;
2001 2117
2002 cpu_buffer->overrun = 0; 2118 cpu_buffer->overrun = 0;
@@ -2016,11 +2132,15 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
2016 if (!cpu_isset(cpu, buffer->cpumask)) 2132 if (!cpu_isset(cpu, buffer->cpumask))
2017 return; 2133 return;
2018 2134
2019 spin_lock_irqsave(&cpu_buffer->lock, flags); 2135 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
2136
2137 __raw_spin_lock(&cpu_buffer->lock);
2020 2138
2021 rb_reset_cpu(cpu_buffer); 2139 rb_reset_cpu(cpu_buffer);
2022 2140
2023 spin_unlock_irqrestore(&cpu_buffer->lock, flags); 2141 __raw_spin_unlock(&cpu_buffer->lock);
2142
2143 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2024} 2144}
2025 2145
2026/** 2146/**
@@ -2118,16 +2238,178 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
2118 return 0; 2238 return 0;
2119} 2239}
2120 2240
2241static void rb_remove_entries(struct ring_buffer_per_cpu *cpu_buffer,
2242 struct buffer_data_page *bpage)
2243{
2244 struct ring_buffer_event *event;
2245 unsigned long head;
2246
2247 __raw_spin_lock(&cpu_buffer->lock);
2248 for (head = 0; head < local_read(&bpage->commit);
2249 head += rb_event_length(event)) {
2250
2251 event = __rb_data_page_index(bpage, head);
2252 if (RB_WARN_ON(cpu_buffer, rb_null_event(event)))
2253 return;
2254 /* Only count data entries */
2255 if (event->type != RINGBUF_TYPE_DATA)
2256 continue;
2257 cpu_buffer->entries--;
2258 }
2259 __raw_spin_unlock(&cpu_buffer->lock);
2260}
2261
2262/**
2263 * ring_buffer_alloc_read_page - allocate a page to read from buffer
2264 * @buffer: the buffer to allocate for.
2265 *
2266 * This function is used in conjunction with ring_buffer_read_page.
2267 * When reading a full page from the ring buffer, these functions
2268 * can be used to speed up the process. The calling function should
2269 * allocate a few pages first with this function. Then when it
2270 * needs to get pages from the ring buffer, it passes the result
2271 * of this function into ring_buffer_read_page, which will swap
2272 * the page that was allocated, with the read page of the buffer.
2273 *
2274 * Returns:
2275 * The page allocated, or NULL on error.
2276 */
2277void *ring_buffer_alloc_read_page(struct ring_buffer *buffer)
2278{
2279 unsigned long addr;
2280 struct buffer_data_page *bpage;
2281
2282 addr = __get_free_page(GFP_KERNEL);
2283 if (!addr)
2284 return NULL;
2285
2286 bpage = (void *)addr;
2287
2288 return bpage;
2289}
2290
2291/**
2292 * ring_buffer_free_read_page - free an allocated read page
2293 * @buffer: the buffer the page was allocate for
2294 * @data: the page to free
2295 *
2296 * Free a page allocated from ring_buffer_alloc_read_page.
2297 */
2298void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data)
2299{
2300 free_page((unsigned long)data);
2301}
2302
2303/**
2304 * ring_buffer_read_page - extract a page from the ring buffer
2305 * @buffer: buffer to extract from
2306 * @data_page: the page to use allocated from ring_buffer_alloc_read_page
2307 * @cpu: the cpu of the buffer to extract
2308 * @full: should the extraction only happen when the page is full.
2309 *
2310 * This function will pull out a page from the ring buffer and consume it.
2311 * @data_page must be the address of the variable that was returned
2312 * from ring_buffer_alloc_read_page. This is because the page might be used
2313 * to swap with a page in the ring buffer.
2314 *
2315 * for example:
2316 * rpage = ring_buffer_alloc_page(buffer);
2317 * if (!rpage)
2318 * return error;
2319 * ret = ring_buffer_read_page(buffer, &rpage, cpu, 0);
2320 * if (ret)
2321 * process_page(rpage);
2322 *
2323 * When @full is set, the function will not return true unless
2324 * the writer is off the reader page.
2325 *
2326 * Note: it is up to the calling functions to handle sleeps and wakeups.
2327 * The ring buffer can be used anywhere in the kernel and can not
2328 * blindly call wake_up. The layer that uses the ring buffer must be
2329 * responsible for that.
2330 *
2331 * Returns:
2332 * 1 if data has been transferred
2333 * 0 if no data has been transferred.
2334 */
2335int ring_buffer_read_page(struct ring_buffer *buffer,
2336 void **data_page, int cpu, int full)
2337{
2338 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
2339 struct ring_buffer_event *event;
2340 struct buffer_data_page *bpage;
2341 unsigned long flags;
2342 int ret = 0;
2343
2344 if (!data_page)
2345 return 0;
2346
2347 bpage = *data_page;
2348 if (!bpage)
2349 return 0;
2350
2351 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
2352
2353 /*
2354 * rb_buffer_peek will get the next ring buffer if
2355 * the current reader page is empty.
2356 */
2357 event = rb_buffer_peek(buffer, cpu, NULL);
2358 if (!event)
2359 goto out;
2360
2361 /* check for data */
2362 if (!local_read(&cpu_buffer->reader_page->page->commit))
2363 goto out;
2364 /*
2365 * If the writer is already off of the read page, then simply
2366 * switch the read page with the given page. Otherwise
2367 * we need to copy the data from the reader to the writer.
2368 */
2369 if (cpu_buffer->reader_page == cpu_buffer->commit_page) {
2370 unsigned int read = cpu_buffer->reader_page->read;
2371
2372 if (full)
2373 goto out;
2374 /* The writer is still on the reader page, we must copy */
2375 bpage = cpu_buffer->reader_page->page;
2376 memcpy(bpage->data,
2377 cpu_buffer->reader_page->page->data + read,
2378 local_read(&bpage->commit) - read);
2379
2380 /* consume what was read */
2381 cpu_buffer->reader_page += read;
2382
2383 } else {
2384 /* swap the pages */
2385 rb_init_page(bpage);
2386 bpage = cpu_buffer->reader_page->page;
2387 cpu_buffer->reader_page->page = *data_page;
2388 cpu_buffer->reader_page->read = 0;
2389 *data_page = bpage;
2390 }
2391 ret = 1;
2392
2393 /* update the entry counter */
2394 rb_remove_entries(cpu_buffer, bpage);
2395 out:
2396 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2397
2398 return ret;
2399}
2400
2121static ssize_t 2401static ssize_t
2122rb_simple_read(struct file *filp, char __user *ubuf, 2402rb_simple_read(struct file *filp, char __user *ubuf,
2123 size_t cnt, loff_t *ppos) 2403 size_t cnt, loff_t *ppos)
2124{ 2404{
2125 int *p = filp->private_data; 2405 long *p = filp->private_data;
2126 char buf[64]; 2406 char buf[64];
2127 int r; 2407 int r;
2128 2408
2129 /* !ring_buffers_off == tracing_on */ 2409 if (test_bit(RB_BUFFERS_DISABLED_BIT, p))
2130 r = sprintf(buf, "%d\n", !*p); 2410 r = sprintf(buf, "permanently disabled\n");
2411 else
2412 r = sprintf(buf, "%d\n", test_bit(RB_BUFFERS_ON_BIT, p));
2131 2413
2132 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 2414 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2133} 2415}
@@ -2136,7 +2418,7 @@ static ssize_t
2136rb_simple_write(struct file *filp, const char __user *ubuf, 2418rb_simple_write(struct file *filp, const char __user *ubuf,
2137 size_t cnt, loff_t *ppos) 2419 size_t cnt, loff_t *ppos)
2138{ 2420{
2139 int *p = filp->private_data; 2421 long *p = filp->private_data;
2140 char buf[64]; 2422 char buf[64];
2141 long val; 2423 long val;
2142 int ret; 2424 int ret;
@@ -2153,8 +2435,10 @@ rb_simple_write(struct file *filp, const char __user *ubuf,
2153 if (ret < 0) 2435 if (ret < 0)
2154 return ret; 2436 return ret;
2155 2437
2156 /* !ring_buffers_off == tracing_on */ 2438 if (val)
2157 *p = !val; 2439 set_bit(RB_BUFFERS_ON_BIT, p);
2440 else
2441 clear_bit(RB_BUFFERS_ON_BIT, p);
2158 2442
2159 (*ppos)++; 2443 (*ppos)++;
2160 2444
@@ -2176,7 +2460,7 @@ static __init int rb_init_debugfs(void)
2176 d_tracer = tracing_init_dentry(); 2460 d_tracer = tracing_init_dentry();
2177 2461
2178 entry = debugfs_create_file("tracing_on", 0644, d_tracer, 2462 entry = debugfs_create_file("tracing_on", 0644, d_tracer,
2179 &ring_buffers_off, &rb_simple_fops); 2463 &ring_buffer_flags, &rb_simple_fops);
2180 if (!entry) 2464 if (!entry)
2181 pr_warning("Could not create debugfs 'tracing_on' entry\n"); 2465 pr_warning("Could not create debugfs 'tracing_on' entry\n");
2182 2466