aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace/ring_buffer.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace/ring_buffer.c')
-rw-r--r--kernel/trace/ring_buffer.c709
1 files changed, 503 insertions, 206 deletions
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 668bbb5ef2bd..76f34c0ef29c 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -18,8 +18,46 @@
18 18
19#include "trace.h" 19#include "trace.h"
20 20
21/* Global flag to disable all recording to ring buffers */ 21/*
22static int ring_buffers_off __read_mostly; 22 * A fast way to enable or disable all ring buffers is to
23 * call tracing_on or tracing_off. Turning off the ring buffers
24 * prevents all ring buffers from being recorded to.
25 * Turning this switch on, makes it OK to write to the
26 * ring buffer, if the ring buffer is enabled itself.
27 *
28 * There's three layers that must be on in order to write
29 * to the ring buffer.
30 *
31 * 1) This global flag must be set.
32 * 2) The ring buffer must be enabled for recording.
33 * 3) The per cpu buffer must be enabled for recording.
34 *
35 * In case of an anomaly, this global flag has a bit set that
36 * will permantly disable all ring buffers.
37 */
38
39/*
40 * Global flag to disable all recording to ring buffers
41 * This has two bits: ON, DISABLED
42 *
43 * ON DISABLED
44 * ---- ----------
45 * 0 0 : ring buffers are off
46 * 1 0 : ring buffers are on
47 * X 1 : ring buffers are permanently disabled
48 */
49
50enum {
51 RB_BUFFERS_ON_BIT = 0,
52 RB_BUFFERS_DISABLED_BIT = 1,
53};
54
55enum {
56 RB_BUFFERS_ON = 1 << RB_BUFFERS_ON_BIT,
57 RB_BUFFERS_DISABLED = 1 << RB_BUFFERS_DISABLED_BIT,
58};
59
60static long ring_buffer_flags __read_mostly = RB_BUFFERS_ON;
23 61
24/** 62/**
25 * tracing_on - enable all tracing buffers 63 * tracing_on - enable all tracing buffers
@@ -29,7 +67,7 @@ static int ring_buffers_off __read_mostly;
29 */ 67 */
30void tracing_on(void) 68void tracing_on(void)
31{ 69{
32 ring_buffers_off = 0; 70 set_bit(RB_BUFFERS_ON_BIT, &ring_buffer_flags);
33} 71}
34 72
35/** 73/**
@@ -42,9 +80,22 @@ void tracing_on(void)
42 */ 80 */
43void tracing_off(void) 81void tracing_off(void)
44{ 82{
45 ring_buffers_off = 1; 83 clear_bit(RB_BUFFERS_ON_BIT, &ring_buffer_flags);
46} 84}
47 85
86/**
87 * tracing_off_permanent - permanently disable ring buffers
88 *
89 * This function, once called, will disable all ring buffers
90 * permanenty.
91 */
92void tracing_off_permanent(void)
93{
94 set_bit(RB_BUFFERS_DISABLED_BIT, &ring_buffer_flags);
95}
96
97#include "trace.h"
98
48/* Up this if you want to test the TIME_EXTENTS and normalization */ 99/* Up this if you want to test the TIME_EXTENTS and normalization */
49#define DEBUG_SHIFT 0 100#define DEBUG_SHIFT 0
50 101
@@ -56,7 +107,7 @@ u64 ring_buffer_time_stamp(int cpu)
56 preempt_disable_notrace(); 107 preempt_disable_notrace();
57 /* shift to debug/test normalization and TIME_EXTENTS */ 108 /* shift to debug/test normalization and TIME_EXTENTS */
58 time = sched_clock() << DEBUG_SHIFT; 109 time = sched_clock() << DEBUG_SHIFT;
59 preempt_enable_notrace(); 110 preempt_enable_no_resched_notrace();
60 111
61 return time; 112 return time;
62} 113}
@@ -144,20 +195,24 @@ void *ring_buffer_event_data(struct ring_buffer_event *event)
144#define TS_MASK ((1ULL << TS_SHIFT) - 1) 195#define TS_MASK ((1ULL << TS_SHIFT) - 1)
145#define TS_DELTA_TEST (~TS_MASK) 196#define TS_DELTA_TEST (~TS_MASK)
146 197
147/* 198struct buffer_data_page {
148 * This hack stolen from mm/slob.c.
149 * We can store per page timing information in the page frame of the page.
150 * Thanks to Peter Zijlstra for suggesting this idea.
151 */
152struct buffer_page {
153 u64 time_stamp; /* page time stamp */ 199 u64 time_stamp; /* page time stamp */
154 local_t write; /* index for next write */
155 local_t commit; /* write commited index */ 200 local_t commit; /* write commited index */
201 unsigned char data[]; /* data of buffer page */
202};
203
204struct buffer_page {
205 local_t write; /* index for next write */
156 unsigned read; /* index for next read */ 206 unsigned read; /* index for next read */
157 struct list_head list; /* list of free pages */ 207 struct list_head list; /* list of free pages */
158 void *page; /* Actual data page */ 208 struct buffer_data_page *page; /* Actual data page */
159}; 209};
160 210
211static void rb_init_page(struct buffer_data_page *bpage)
212{
213 local_set(&bpage->commit, 0);
214}
215
161/* 216/*
162 * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing 217 * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing
163 * this issue out. 218 * this issue out.
@@ -179,7 +234,7 @@ static inline int test_time_stamp(u64 delta)
179 return 0; 234 return 0;
180} 235}
181 236
182#define BUF_PAGE_SIZE PAGE_SIZE 237#define BUF_PAGE_SIZE (PAGE_SIZE - sizeof(struct buffer_data_page))
183 238
184/* 239/*
185 * head_page == tail_page && head == tail then buffer is empty. 240 * head_page == tail_page && head == tail then buffer is empty.
@@ -187,7 +242,8 @@ static inline int test_time_stamp(u64 delta)
187struct ring_buffer_per_cpu { 242struct ring_buffer_per_cpu {
188 int cpu; 243 int cpu;
189 struct ring_buffer *buffer; 244 struct ring_buffer *buffer;
190 spinlock_t lock; 245 spinlock_t reader_lock; /* serialize readers */
246 raw_spinlock_t lock;
191 struct lock_class_key lock_key; 247 struct lock_class_key lock_key;
192 struct list_head pages; 248 struct list_head pages;
193 struct buffer_page *head_page; /* read from head */ 249 struct buffer_page *head_page; /* read from head */
@@ -202,7 +258,6 @@ struct ring_buffer_per_cpu {
202}; 258};
203 259
204struct ring_buffer { 260struct ring_buffer {
205 unsigned long size;
206 unsigned pages; 261 unsigned pages;
207 unsigned flags; 262 unsigned flags;
208 int cpus; 263 int cpus;
@@ -221,32 +276,16 @@ struct ring_buffer_iter {
221 u64 read_stamp; 276 u64 read_stamp;
222}; 277};
223 278
279/* buffer may be either ring_buffer or ring_buffer_per_cpu */
224#define RB_WARN_ON(buffer, cond) \ 280#define RB_WARN_ON(buffer, cond) \
225 do { \ 281 ({ \
226 if (unlikely(cond)) { \ 282 int _____ret = unlikely(cond); \
227 atomic_inc(&buffer->record_disabled); \ 283 if (_____ret) { \
228 WARN_ON(1); \
229 } \
230 } while (0)
231
232#define RB_WARN_ON_RET(buffer, cond) \
233 do { \
234 if (unlikely(cond)) { \
235 atomic_inc(&buffer->record_disabled); \
236 WARN_ON(1); \
237 return -1; \
238 } \
239 } while (0)
240
241#define RB_WARN_ON_ONCE(buffer, cond) \
242 do { \
243 static int once; \
244 if (unlikely(cond) && !once) { \
245 once++; \
246 atomic_inc(&buffer->record_disabled); \ 284 atomic_inc(&buffer->record_disabled); \
247 WARN_ON(1); \ 285 WARN_ON(1); \
248 } \ 286 } \
249 } while (0) 287 _____ret; \
288 })
250 289
251/** 290/**
252 * check_pages - integrity check of buffer pages 291 * check_pages - integrity check of buffer pages
@@ -258,16 +297,20 @@ struct ring_buffer_iter {
258static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer) 297static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
259{ 298{
260 struct list_head *head = &cpu_buffer->pages; 299 struct list_head *head = &cpu_buffer->pages;
261 struct buffer_page *page, *tmp; 300 struct buffer_page *bpage, *tmp;
262 301
263 RB_WARN_ON_RET(cpu_buffer, head->next->prev != head); 302 if (RB_WARN_ON(cpu_buffer, head->next->prev != head))
264 RB_WARN_ON_RET(cpu_buffer, head->prev->next != head); 303 return -1;
304 if (RB_WARN_ON(cpu_buffer, head->prev->next != head))
305 return -1;
265 306
266 list_for_each_entry_safe(page, tmp, head, list) { 307 list_for_each_entry_safe(bpage, tmp, head, list) {
267 RB_WARN_ON_RET(cpu_buffer, 308 if (RB_WARN_ON(cpu_buffer,
268 page->list.next->prev != &page->list); 309 bpage->list.next->prev != &bpage->list))
269 RB_WARN_ON_RET(cpu_buffer, 310 return -1;
270 page->list.prev->next != &page->list); 311 if (RB_WARN_ON(cpu_buffer,
312 bpage->list.prev->next != &bpage->list))
313 return -1;
271 } 314 }
272 315
273 return 0; 316 return 0;
@@ -277,22 +320,23 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
277 unsigned nr_pages) 320 unsigned nr_pages)
278{ 321{
279 struct list_head *head = &cpu_buffer->pages; 322 struct list_head *head = &cpu_buffer->pages;
280 struct buffer_page *page, *tmp; 323 struct buffer_page *bpage, *tmp;
281 unsigned long addr; 324 unsigned long addr;
282 LIST_HEAD(pages); 325 LIST_HEAD(pages);
283 unsigned i; 326 unsigned i;
284 327
285 for (i = 0; i < nr_pages; i++) { 328 for (i = 0; i < nr_pages; i++) {
286 page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()), 329 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
287 GFP_KERNEL, cpu_to_node(cpu_buffer->cpu)); 330 GFP_KERNEL, cpu_to_node(cpu_buffer->cpu));
288 if (!page) 331 if (!bpage)
289 goto free_pages; 332 goto free_pages;
290 list_add(&page->list, &pages); 333 list_add(&bpage->list, &pages);
291 334
292 addr = __get_free_page(GFP_KERNEL); 335 addr = __get_free_page(GFP_KERNEL);
293 if (!addr) 336 if (!addr)
294 goto free_pages; 337 goto free_pages;
295 page->page = (void *)addr; 338 bpage->page = (void *)addr;
339 rb_init_page(bpage->page);
296 } 340 }
297 341
298 list_splice(&pages, head); 342 list_splice(&pages, head);
@@ -302,9 +346,9 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
302 return 0; 346 return 0;
303 347
304 free_pages: 348 free_pages:
305 list_for_each_entry_safe(page, tmp, &pages, list) { 349 list_for_each_entry_safe(bpage, tmp, &pages, list) {
306 list_del_init(&page->list); 350 list_del_init(&bpage->list);
307 free_buffer_page(page); 351 free_buffer_page(bpage);
308 } 352 }
309 return -ENOMEM; 353 return -ENOMEM;
310} 354}
@@ -313,7 +357,7 @@ static struct ring_buffer_per_cpu *
313rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu) 357rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
314{ 358{
315 struct ring_buffer_per_cpu *cpu_buffer; 359 struct ring_buffer_per_cpu *cpu_buffer;
316 struct buffer_page *page; 360 struct buffer_page *bpage;
317 unsigned long addr; 361 unsigned long addr;
318 int ret; 362 int ret;
319 363
@@ -324,19 +368,21 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
324 368
325 cpu_buffer->cpu = cpu; 369 cpu_buffer->cpu = cpu;
326 cpu_buffer->buffer = buffer; 370 cpu_buffer->buffer = buffer;
327 spin_lock_init(&cpu_buffer->lock); 371 spin_lock_init(&cpu_buffer->reader_lock);
372 cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
328 INIT_LIST_HEAD(&cpu_buffer->pages); 373 INIT_LIST_HEAD(&cpu_buffer->pages);
329 374
330 page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()), 375 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
331 GFP_KERNEL, cpu_to_node(cpu)); 376 GFP_KERNEL, cpu_to_node(cpu));
332 if (!page) 377 if (!bpage)
333 goto fail_free_buffer; 378 goto fail_free_buffer;
334 379
335 cpu_buffer->reader_page = page; 380 cpu_buffer->reader_page = bpage;
336 addr = __get_free_page(GFP_KERNEL); 381 addr = __get_free_page(GFP_KERNEL);
337 if (!addr) 382 if (!addr)
338 goto fail_free_reader; 383 goto fail_free_reader;
339 page->page = (void *)addr; 384 bpage->page = (void *)addr;
385 rb_init_page(bpage->page);
340 386
341 INIT_LIST_HEAD(&cpu_buffer->reader_page->list); 387 INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
342 388
@@ -361,14 +407,14 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
361static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer) 407static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
362{ 408{
363 struct list_head *head = &cpu_buffer->pages; 409 struct list_head *head = &cpu_buffer->pages;
364 struct buffer_page *page, *tmp; 410 struct buffer_page *bpage, *tmp;
365 411
366 list_del_init(&cpu_buffer->reader_page->list); 412 list_del_init(&cpu_buffer->reader_page->list);
367 free_buffer_page(cpu_buffer->reader_page); 413 free_buffer_page(cpu_buffer->reader_page);
368 414
369 list_for_each_entry_safe(page, tmp, head, list) { 415 list_for_each_entry_safe(bpage, tmp, head, list) {
370 list_del_init(&page->list); 416 list_del_init(&bpage->list);
371 free_buffer_page(page); 417 free_buffer_page(bpage);
372 } 418 }
373 kfree(cpu_buffer); 419 kfree(cpu_buffer);
374} 420}
@@ -465,7 +511,7 @@ static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer);
465static void 511static void
466rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages) 512rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
467{ 513{
468 struct buffer_page *page; 514 struct buffer_page *bpage;
469 struct list_head *p; 515 struct list_head *p;
470 unsigned i; 516 unsigned i;
471 517
@@ -473,13 +519,15 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
473 synchronize_sched(); 519 synchronize_sched();
474 520
475 for (i = 0; i < nr_pages; i++) { 521 for (i = 0; i < nr_pages; i++) {
476 BUG_ON(list_empty(&cpu_buffer->pages)); 522 if (RB_WARN_ON(cpu_buffer, list_empty(&cpu_buffer->pages)))
523 return;
477 p = cpu_buffer->pages.next; 524 p = cpu_buffer->pages.next;
478 page = list_entry(p, struct buffer_page, list); 525 bpage = list_entry(p, struct buffer_page, list);
479 list_del_init(&page->list); 526 list_del_init(&bpage->list);
480 free_buffer_page(page); 527 free_buffer_page(bpage);
481 } 528 }
482 BUG_ON(list_empty(&cpu_buffer->pages)); 529 if (RB_WARN_ON(cpu_buffer, list_empty(&cpu_buffer->pages)))
530 return;
483 531
484 rb_reset_cpu(cpu_buffer); 532 rb_reset_cpu(cpu_buffer);
485 533
@@ -493,7 +541,7 @@ static void
493rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer, 541rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
494 struct list_head *pages, unsigned nr_pages) 542 struct list_head *pages, unsigned nr_pages)
495{ 543{
496 struct buffer_page *page; 544 struct buffer_page *bpage;
497 struct list_head *p; 545 struct list_head *p;
498 unsigned i; 546 unsigned i;
499 547
@@ -501,11 +549,12 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
501 synchronize_sched(); 549 synchronize_sched();
502 550
503 for (i = 0; i < nr_pages; i++) { 551 for (i = 0; i < nr_pages; i++) {
504 BUG_ON(list_empty(pages)); 552 if (RB_WARN_ON(cpu_buffer, list_empty(pages)))
553 return;
505 p = pages->next; 554 p = pages->next;
506 page = list_entry(p, struct buffer_page, list); 555 bpage = list_entry(p, struct buffer_page, list);
507 list_del_init(&page->list); 556 list_del_init(&bpage->list);
508 list_add_tail(&page->list, &cpu_buffer->pages); 557 list_add_tail(&bpage->list, &cpu_buffer->pages);
509 } 558 }
510 rb_reset_cpu(cpu_buffer); 559 rb_reset_cpu(cpu_buffer);
511 560
@@ -532,7 +581,7 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
532{ 581{
533 struct ring_buffer_per_cpu *cpu_buffer; 582 struct ring_buffer_per_cpu *cpu_buffer;
534 unsigned nr_pages, rm_pages, new_pages; 583 unsigned nr_pages, rm_pages, new_pages;
535 struct buffer_page *page, *tmp; 584 struct buffer_page *bpage, *tmp;
536 unsigned long buffer_size; 585 unsigned long buffer_size;
537 unsigned long addr; 586 unsigned long addr;
538 LIST_HEAD(pages); 587 LIST_HEAD(pages);
@@ -562,7 +611,10 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
562 if (size < buffer_size) { 611 if (size < buffer_size) {
563 612
564 /* easy case, just free pages */ 613 /* easy case, just free pages */
565 BUG_ON(nr_pages >= buffer->pages); 614 if (RB_WARN_ON(buffer, nr_pages >= buffer->pages)) {
615 mutex_unlock(&buffer->mutex);
616 return -1;
617 }
566 618
567 rm_pages = buffer->pages - nr_pages; 619 rm_pages = buffer->pages - nr_pages;
568 620
@@ -581,21 +633,26 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
581 * add these pages to the cpu_buffers. Otherwise we just free 633 * add these pages to the cpu_buffers. Otherwise we just free
582 * them all and return -ENOMEM; 634 * them all and return -ENOMEM;
583 */ 635 */
584 BUG_ON(nr_pages <= buffer->pages); 636 if (RB_WARN_ON(buffer, nr_pages <= buffer->pages)) {
637 mutex_unlock(&buffer->mutex);
638 return -1;
639 }
640
585 new_pages = nr_pages - buffer->pages; 641 new_pages = nr_pages - buffer->pages;
586 642
587 for_each_buffer_cpu(buffer, cpu) { 643 for_each_buffer_cpu(buffer, cpu) {
588 for (i = 0; i < new_pages; i++) { 644 for (i = 0; i < new_pages; i++) {
589 page = kzalloc_node(ALIGN(sizeof(*page), 645 bpage = kzalloc_node(ALIGN(sizeof(*bpage),
590 cache_line_size()), 646 cache_line_size()),
591 GFP_KERNEL, cpu_to_node(cpu)); 647 GFP_KERNEL, cpu_to_node(cpu));
592 if (!page) 648 if (!bpage)
593 goto free_pages; 649 goto free_pages;
594 list_add(&page->list, &pages); 650 list_add(&bpage->list, &pages);
595 addr = __get_free_page(GFP_KERNEL); 651 addr = __get_free_page(GFP_KERNEL);
596 if (!addr) 652 if (!addr)
597 goto free_pages; 653 goto free_pages;
598 page->page = (void *)addr; 654 bpage->page = (void *)addr;
655 rb_init_page(bpage->page);
599 } 656 }
600 } 657 }
601 658
@@ -604,7 +661,10 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
604 rb_insert_pages(cpu_buffer, &pages, new_pages); 661 rb_insert_pages(cpu_buffer, &pages, new_pages);
605 } 662 }
606 663
607 BUG_ON(!list_empty(&pages)); 664 if (RB_WARN_ON(buffer, !list_empty(&pages))) {
665 mutex_unlock(&buffer->mutex);
666 return -1;
667 }
608 668
609 out: 669 out:
610 buffer->pages = nr_pages; 670 buffer->pages = nr_pages;
@@ -613,9 +673,9 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
613 return size; 673 return size;
614 674
615 free_pages: 675 free_pages:
616 list_for_each_entry_safe(page, tmp, &pages, list) { 676 list_for_each_entry_safe(bpage, tmp, &pages, list) {
617 list_del_init(&page->list); 677 list_del_init(&bpage->list);
618 free_buffer_page(page); 678 free_buffer_page(bpage);
619 } 679 }
620 mutex_unlock(&buffer->mutex); 680 mutex_unlock(&buffer->mutex);
621 return -ENOMEM; 681 return -ENOMEM;
@@ -626,9 +686,15 @@ static inline int rb_null_event(struct ring_buffer_event *event)
626 return event->type == RINGBUF_TYPE_PADDING; 686 return event->type == RINGBUF_TYPE_PADDING;
627} 687}
628 688
629static inline void *__rb_page_index(struct buffer_page *page, unsigned index) 689static inline void *
690__rb_data_page_index(struct buffer_data_page *bpage, unsigned index)
691{
692 return bpage->data + index;
693}
694
695static inline void *__rb_page_index(struct buffer_page *bpage, unsigned index)
630{ 696{
631 return page->page + index; 697 return bpage->page->data + index;
632} 698}
633 699
634static inline struct ring_buffer_event * 700static inline struct ring_buffer_event *
@@ -658,7 +724,7 @@ static inline unsigned rb_page_write(struct buffer_page *bpage)
658 724
659static inline unsigned rb_page_commit(struct buffer_page *bpage) 725static inline unsigned rb_page_commit(struct buffer_page *bpage)
660{ 726{
661 return local_read(&bpage->commit); 727 return local_read(&bpage->page->commit);
662} 728}
663 729
664/* Size is determined by what has been commited */ 730/* Size is determined by what has been commited */
@@ -693,7 +759,8 @@ static void rb_update_overflow(struct ring_buffer_per_cpu *cpu_buffer)
693 head += rb_event_length(event)) { 759 head += rb_event_length(event)) {
694 760
695 event = __rb_page_index(cpu_buffer->head_page, head); 761 event = __rb_page_index(cpu_buffer->head_page, head);
696 BUG_ON(rb_null_event(event)); 762 if (RB_WARN_ON(cpu_buffer, rb_null_event(event)))
763 return;
697 /* Only count data entries */ 764 /* Only count data entries */
698 if (event->type != RINGBUF_TYPE_DATA) 765 if (event->type != RINGBUF_TYPE_DATA)
699 continue; 766 continue;
@@ -703,14 +770,14 @@ static void rb_update_overflow(struct ring_buffer_per_cpu *cpu_buffer)
703} 770}
704 771
705static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer, 772static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer,
706 struct buffer_page **page) 773 struct buffer_page **bpage)
707{ 774{
708 struct list_head *p = (*page)->list.next; 775 struct list_head *p = (*bpage)->list.next;
709 776
710 if (p == &cpu_buffer->pages) 777 if (p == &cpu_buffer->pages)
711 p = p->next; 778 p = p->next;
712 779
713 *page = list_entry(p, struct buffer_page, list); 780 *bpage = list_entry(p, struct buffer_page, list);
714} 781}
715 782
716static inline unsigned 783static inline unsigned
@@ -746,16 +813,18 @@ rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer,
746 addr &= PAGE_MASK; 813 addr &= PAGE_MASK;
747 814
748 while (cpu_buffer->commit_page->page != (void *)addr) { 815 while (cpu_buffer->commit_page->page != (void *)addr) {
749 RB_WARN_ON(cpu_buffer, 816 if (RB_WARN_ON(cpu_buffer,
750 cpu_buffer->commit_page == cpu_buffer->tail_page); 817 cpu_buffer->commit_page == cpu_buffer->tail_page))
751 cpu_buffer->commit_page->commit = 818 return;
819 cpu_buffer->commit_page->page->commit =
752 cpu_buffer->commit_page->write; 820 cpu_buffer->commit_page->write;
753 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page); 821 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
754 cpu_buffer->write_stamp = cpu_buffer->commit_page->time_stamp; 822 cpu_buffer->write_stamp =
823 cpu_buffer->commit_page->page->time_stamp;
755 } 824 }
756 825
757 /* Now set the commit to the event's index */ 826 /* Now set the commit to the event's index */
758 local_set(&cpu_buffer->commit_page->commit, index); 827 local_set(&cpu_buffer->commit_page->page->commit, index);
759} 828}
760 829
761static inline void 830static inline void
@@ -769,25 +838,38 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
769 * back to us). This allows us to do a simple loop to 838 * back to us). This allows us to do a simple loop to
770 * assign the commit to the tail. 839 * assign the commit to the tail.
771 */ 840 */
841 again:
772 while (cpu_buffer->commit_page != cpu_buffer->tail_page) { 842 while (cpu_buffer->commit_page != cpu_buffer->tail_page) {
773 cpu_buffer->commit_page->commit = 843 cpu_buffer->commit_page->page->commit =
774 cpu_buffer->commit_page->write; 844 cpu_buffer->commit_page->write;
775 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page); 845 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
776 cpu_buffer->write_stamp = cpu_buffer->commit_page->time_stamp; 846 cpu_buffer->write_stamp =
847 cpu_buffer->commit_page->page->time_stamp;
777 /* add barrier to keep gcc from optimizing too much */ 848 /* add barrier to keep gcc from optimizing too much */
778 barrier(); 849 barrier();
779 } 850 }
780 while (rb_commit_index(cpu_buffer) != 851 while (rb_commit_index(cpu_buffer) !=
781 rb_page_write(cpu_buffer->commit_page)) { 852 rb_page_write(cpu_buffer->commit_page)) {
782 cpu_buffer->commit_page->commit = 853 cpu_buffer->commit_page->page->commit =
783 cpu_buffer->commit_page->write; 854 cpu_buffer->commit_page->write;
784 barrier(); 855 barrier();
785 } 856 }
857
858 /* again, keep gcc from optimizing */
859 barrier();
860
861 /*
862 * If an interrupt came in just after the first while loop
863 * and pushed the tail page forward, we will be left with
864 * a dangling commit that will never go forward.
865 */
866 if (unlikely(cpu_buffer->commit_page != cpu_buffer->tail_page))
867 goto again;
786} 868}
787 869
788static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer) 870static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
789{ 871{
790 cpu_buffer->read_stamp = cpu_buffer->reader_page->time_stamp; 872 cpu_buffer->read_stamp = cpu_buffer->reader_page->page->time_stamp;
791 cpu_buffer->reader_page->read = 0; 873 cpu_buffer->reader_page->read = 0;
792} 874}
793 875
@@ -806,7 +888,7 @@ static inline void rb_inc_iter(struct ring_buffer_iter *iter)
806 else 888 else
807 rb_inc_page(cpu_buffer, &iter->head_page); 889 rb_inc_page(cpu_buffer, &iter->head_page);
808 890
809 iter->read_stamp = iter->head_page->time_stamp; 891 iter->read_stamp = iter->head_page->page->time_stamp;
810 iter->head = 0; 892 iter->head = 0;
811} 893}
812 894
@@ -880,12 +962,15 @@ static struct ring_buffer_event *
880__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, 962__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
881 unsigned type, unsigned long length, u64 *ts) 963 unsigned type, unsigned long length, u64 *ts)
882{ 964{
883 struct buffer_page *tail_page, *head_page, *reader_page; 965 struct buffer_page *tail_page, *head_page, *reader_page, *commit_page;
884 unsigned long tail, write; 966 unsigned long tail, write;
885 struct ring_buffer *buffer = cpu_buffer->buffer; 967 struct ring_buffer *buffer = cpu_buffer->buffer;
886 struct ring_buffer_event *event; 968 struct ring_buffer_event *event;
887 unsigned long flags; 969 unsigned long flags;
888 970
971 commit_page = cpu_buffer->commit_page;
972 /* we just need to protect against interrupts */
973 barrier();
889 tail_page = cpu_buffer->tail_page; 974 tail_page = cpu_buffer->tail_page;
890 write = local_add_return(length, &tail_page->write); 975 write = local_add_return(length, &tail_page->write);
891 tail = write - length; 976 tail = write - length;
@@ -894,7 +979,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
894 if (write > BUF_PAGE_SIZE) { 979 if (write > BUF_PAGE_SIZE) {
895 struct buffer_page *next_page = tail_page; 980 struct buffer_page *next_page = tail_page;
896 981
897 spin_lock_irqsave(&cpu_buffer->lock, flags); 982 local_irq_save(flags);
983 __raw_spin_lock(&cpu_buffer->lock);
898 984
899 rb_inc_page(cpu_buffer, &next_page); 985 rb_inc_page(cpu_buffer, &next_page);
900 986
@@ -902,14 +988,15 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
902 reader_page = cpu_buffer->reader_page; 988 reader_page = cpu_buffer->reader_page;
903 989
904 /* we grabbed the lock before incrementing */ 990 /* we grabbed the lock before incrementing */
905 RB_WARN_ON(cpu_buffer, next_page == reader_page); 991 if (RB_WARN_ON(cpu_buffer, next_page == reader_page))
992 goto out_unlock;
906 993
907 /* 994 /*
908 * If for some reason, we had an interrupt storm that made 995 * If for some reason, we had an interrupt storm that made
909 * it all the way around the buffer, bail, and warn 996 * it all the way around the buffer, bail, and warn
910 * about it. 997 * about it.
911 */ 998 */
912 if (unlikely(next_page == cpu_buffer->commit_page)) { 999 if (unlikely(next_page == commit_page)) {
913 WARN_ON_ONCE(1); 1000 WARN_ON_ONCE(1);
914 goto out_unlock; 1001 goto out_unlock;
915 } 1002 }
@@ -940,12 +1027,12 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
940 */ 1027 */
941 if (tail_page == cpu_buffer->tail_page) { 1028 if (tail_page == cpu_buffer->tail_page) {
942 local_set(&next_page->write, 0); 1029 local_set(&next_page->write, 0);
943 local_set(&next_page->commit, 0); 1030 local_set(&next_page->page->commit, 0);
944 cpu_buffer->tail_page = next_page; 1031 cpu_buffer->tail_page = next_page;
945 1032
946 /* reread the time stamp */ 1033 /* reread the time stamp */
947 *ts = ring_buffer_time_stamp(cpu_buffer->cpu); 1034 *ts = ring_buffer_time_stamp(cpu_buffer->cpu);
948 cpu_buffer->tail_page->time_stamp = *ts; 1035 cpu_buffer->tail_page->page->time_stamp = *ts;
949 } 1036 }
950 1037
951 /* 1038 /*
@@ -970,7 +1057,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
970 rb_set_commit_to_write(cpu_buffer); 1057 rb_set_commit_to_write(cpu_buffer);
971 } 1058 }
972 1059
973 spin_unlock_irqrestore(&cpu_buffer->lock, flags); 1060 __raw_spin_unlock(&cpu_buffer->lock);
1061 local_irq_restore(flags);
974 1062
975 /* fail and let the caller try again */ 1063 /* fail and let the caller try again */
976 return ERR_PTR(-EAGAIN); 1064 return ERR_PTR(-EAGAIN);
@@ -978,7 +1066,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
978 1066
979 /* We reserved something on the buffer */ 1067 /* We reserved something on the buffer */
980 1068
981 BUG_ON(write > BUF_PAGE_SIZE); 1069 if (RB_WARN_ON(cpu_buffer, write > BUF_PAGE_SIZE))
1070 return NULL;
982 1071
983 event = __rb_page_index(tail_page, tail); 1072 event = __rb_page_index(tail_page, tail);
984 rb_update_event(event, type, length); 1073 rb_update_event(event, type, length);
@@ -988,12 +1077,13 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
988 * this page's time stamp. 1077 * this page's time stamp.
989 */ 1078 */
990 if (!tail && rb_is_commit(cpu_buffer, event)) 1079 if (!tail && rb_is_commit(cpu_buffer, event))
991 cpu_buffer->commit_page->time_stamp = *ts; 1080 cpu_buffer->commit_page->page->time_stamp = *ts;
992 1081
993 return event; 1082 return event;
994 1083
995 out_unlock: 1084 out_unlock:
996 spin_unlock_irqrestore(&cpu_buffer->lock, flags); 1085 __raw_spin_unlock(&cpu_buffer->lock);
1086 local_irq_restore(flags);
997 return NULL; 1087 return NULL;
998} 1088}
999 1089
@@ -1038,7 +1128,7 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
1038 event->time_delta = *delta & TS_MASK; 1128 event->time_delta = *delta & TS_MASK;
1039 event->array[0] = *delta >> TS_SHIFT; 1129 event->array[0] = *delta >> TS_SHIFT;
1040 } else { 1130 } else {
1041 cpu_buffer->commit_page->time_stamp = *ts; 1131 cpu_buffer->commit_page->page->time_stamp = *ts;
1042 event->time_delta = 0; 1132 event->time_delta = 0;
1043 event->array[0] = 0; 1133 event->array[0] = 0;
1044 } 1134 }
@@ -1076,10 +1166,8 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
1076 * storm or we have something buggy. 1166 * storm or we have something buggy.
1077 * Bail! 1167 * Bail!
1078 */ 1168 */
1079 if (unlikely(++nr_loops > 1000)) { 1169 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000))
1080 RB_WARN_ON(cpu_buffer, 1);
1081 return NULL; 1170 return NULL;
1082 }
1083 1171
1084 ts = ring_buffer_time_stamp(cpu_buffer->cpu); 1172 ts = ring_buffer_time_stamp(cpu_buffer->cpu);
1085 1173
@@ -1175,15 +1263,14 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer,
1175 struct ring_buffer_event *event; 1263 struct ring_buffer_event *event;
1176 int cpu, resched; 1264 int cpu, resched;
1177 1265
1178 if (ring_buffers_off) 1266 if (ring_buffer_flags != RB_BUFFERS_ON)
1179 return NULL; 1267 return NULL;
1180 1268
1181 if (atomic_read(&buffer->record_disabled)) 1269 if (atomic_read(&buffer->record_disabled))
1182 return NULL; 1270 return NULL;
1183 1271
1184 /* If we are tracing schedule, we don't want to recurse */ 1272 /* If we are tracing schedule, we don't want to recurse */
1185 resched = need_resched(); 1273 resched = ftrace_preempt_disable();
1186 preempt_disable_notrace();
1187 1274
1188 cpu = raw_smp_processor_id(); 1275 cpu = raw_smp_processor_id();
1189 1276
@@ -1214,10 +1301,7 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer,
1214 return event; 1301 return event;
1215 1302
1216 out: 1303 out:
1217 if (resched) 1304 ftrace_preempt_enable(resched);
1218 preempt_enable_no_resched_notrace();
1219 else
1220 preempt_enable_notrace();
1221 return NULL; 1305 return NULL;
1222} 1306}
1223 1307
@@ -1259,12 +1343,9 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer,
1259 /* 1343 /*
1260 * Only the last preempt count needs to restore preemption. 1344 * Only the last preempt count needs to restore preemption.
1261 */ 1345 */
1262 if (preempt_count() == 1) { 1346 if (preempt_count() == 1)
1263 if (per_cpu(rb_need_resched, cpu)) 1347 ftrace_preempt_enable(per_cpu(rb_need_resched, cpu));
1264 preempt_enable_no_resched_notrace(); 1348 else
1265 else
1266 preempt_enable_notrace();
1267 } else
1268 preempt_enable_no_resched_notrace(); 1349 preempt_enable_no_resched_notrace();
1269 1350
1270 return 0; 1351 return 0;
@@ -1294,14 +1375,13 @@ int ring_buffer_write(struct ring_buffer *buffer,
1294 int ret = -EBUSY; 1375 int ret = -EBUSY;
1295 int cpu, resched; 1376 int cpu, resched;
1296 1377
1297 if (ring_buffers_off) 1378 if (ring_buffer_flags != RB_BUFFERS_ON)
1298 return -EBUSY; 1379 return -EBUSY;
1299 1380
1300 if (atomic_read(&buffer->record_disabled)) 1381 if (atomic_read(&buffer->record_disabled))
1301 return -EBUSY; 1382 return -EBUSY;
1302 1383
1303 resched = need_resched(); 1384 resched = ftrace_preempt_disable();
1304 preempt_disable_notrace();
1305 1385
1306 cpu = raw_smp_processor_id(); 1386 cpu = raw_smp_processor_id();
1307 1387
@@ -1327,10 +1407,7 @@ int ring_buffer_write(struct ring_buffer *buffer,
1327 1407
1328 ret = 0; 1408 ret = 0;
1329 out: 1409 out:
1330 if (resched) 1410 ftrace_preempt_enable(resched);
1331 preempt_enable_no_resched_notrace();
1332 else
1333 preempt_enable_notrace();
1334 1411
1335 return ret; 1412 return ret;
1336} 1413}
@@ -1489,14 +1566,7 @@ unsigned long ring_buffer_overruns(struct ring_buffer *buffer)
1489 return overruns; 1566 return overruns;
1490} 1567}
1491 1568
1492/** 1569static void rb_iter_reset(struct ring_buffer_iter *iter)
1493 * ring_buffer_iter_reset - reset an iterator
1494 * @iter: The iterator to reset
1495 *
1496 * Resets the iterator, so that it will start from the beginning
1497 * again.
1498 */
1499void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
1500{ 1570{
1501 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; 1571 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
1502 1572
@@ -1511,7 +1581,24 @@ void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
1511 if (iter->head) 1581 if (iter->head)
1512 iter->read_stamp = cpu_buffer->read_stamp; 1582 iter->read_stamp = cpu_buffer->read_stamp;
1513 else 1583 else
1514 iter->read_stamp = iter->head_page->time_stamp; 1584 iter->read_stamp = iter->head_page->page->time_stamp;
1585}
1586
1587/**
1588 * ring_buffer_iter_reset - reset an iterator
1589 * @iter: The iterator to reset
1590 *
1591 * Resets the iterator, so that it will start from the beginning
1592 * again.
1593 */
1594void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
1595{
1596 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
1597 unsigned long flags;
1598
1599 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
1600 rb_iter_reset(iter);
1601 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
1515} 1602}
1516 1603
1517/** 1604/**
@@ -1597,7 +1684,8 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
1597 unsigned long flags; 1684 unsigned long flags;
1598 int nr_loops = 0; 1685 int nr_loops = 0;
1599 1686
1600 spin_lock_irqsave(&cpu_buffer->lock, flags); 1687 local_irq_save(flags);
1688 __raw_spin_lock(&cpu_buffer->lock);
1601 1689
1602 again: 1690 again:
1603 /* 1691 /*
@@ -1606,8 +1694,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
1606 * a case where we will loop three times. There should be no 1694 * a case where we will loop three times. There should be no
1607 * reason to loop four times (that I know of). 1695 * reason to loop four times (that I know of).
1608 */ 1696 */
1609 if (unlikely(++nr_loops > 3)) { 1697 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 3)) {
1610 RB_WARN_ON(cpu_buffer, 1);
1611 reader = NULL; 1698 reader = NULL;
1612 goto out; 1699 goto out;
1613 } 1700 }
@@ -1619,8 +1706,9 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
1619 goto out; 1706 goto out;
1620 1707
1621 /* Never should we have an index greater than the size */ 1708 /* Never should we have an index greater than the size */
1622 RB_WARN_ON(cpu_buffer, 1709 if (RB_WARN_ON(cpu_buffer,
1623 cpu_buffer->reader_page->read > rb_page_size(reader)); 1710 cpu_buffer->reader_page->read > rb_page_size(reader)))
1711 goto out;
1624 1712
1625 /* check if we caught up to the tail */ 1713 /* check if we caught up to the tail */
1626 reader = NULL; 1714 reader = NULL;
@@ -1637,7 +1725,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
1637 cpu_buffer->reader_page->list.prev = reader->list.prev; 1725 cpu_buffer->reader_page->list.prev = reader->list.prev;
1638 1726
1639 local_set(&cpu_buffer->reader_page->write, 0); 1727 local_set(&cpu_buffer->reader_page->write, 0);
1640 local_set(&cpu_buffer->reader_page->commit, 0); 1728 local_set(&cpu_buffer->reader_page->page->commit, 0);
1641 1729
1642 /* Make the reader page now replace the head */ 1730 /* Make the reader page now replace the head */
1643 reader->list.prev->next = &cpu_buffer->reader_page->list; 1731 reader->list.prev->next = &cpu_buffer->reader_page->list;
@@ -1659,7 +1747,8 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
1659 goto again; 1747 goto again;
1660 1748
1661 out: 1749 out:
1662 spin_unlock_irqrestore(&cpu_buffer->lock, flags); 1750 __raw_spin_unlock(&cpu_buffer->lock);
1751 local_irq_restore(flags);
1663 1752
1664 return reader; 1753 return reader;
1665} 1754}
@@ -1673,7 +1762,8 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
1673 reader = rb_get_reader_page(cpu_buffer); 1762 reader = rb_get_reader_page(cpu_buffer);
1674 1763
1675 /* This function should not be called when buffer is empty */ 1764 /* This function should not be called when buffer is empty */
1676 BUG_ON(!reader); 1765 if (RB_WARN_ON(cpu_buffer, !reader))
1766 return;
1677 1767
1678 event = rb_reader_event(cpu_buffer); 1768 event = rb_reader_event(cpu_buffer);
1679 1769
@@ -1700,7 +1790,9 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
1700 * Check if we are at the end of the buffer. 1790 * Check if we are at the end of the buffer.
1701 */ 1791 */
1702 if (iter->head >= rb_page_size(iter->head_page)) { 1792 if (iter->head >= rb_page_size(iter->head_page)) {
1703 BUG_ON(iter->head_page == cpu_buffer->commit_page); 1793 if (RB_WARN_ON(buffer,
1794 iter->head_page == cpu_buffer->commit_page))
1795 return;
1704 rb_inc_iter(iter); 1796 rb_inc_iter(iter);
1705 return; 1797 return;
1706 } 1798 }
@@ -1713,8 +1805,10 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
1713 * This should not be called to advance the header if we are 1805 * This should not be called to advance the header if we are
1714 * at the tail of the buffer. 1806 * at the tail of the buffer.
1715 */ 1807 */
1716 BUG_ON((iter->head_page == cpu_buffer->commit_page) && 1808 if (RB_WARN_ON(cpu_buffer,
1717 (iter->head + length > rb_commit_index(cpu_buffer))); 1809 (iter->head_page == cpu_buffer->commit_page) &&
1810 (iter->head + length > rb_commit_index(cpu_buffer))))
1811 return;
1718 1812
1719 rb_update_iter_read_stamp(iter, event); 1813 rb_update_iter_read_stamp(iter, event);
1720 1814
@@ -1726,17 +1820,8 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
1726 rb_advance_iter(iter); 1820 rb_advance_iter(iter);
1727} 1821}
1728 1822
1729/** 1823static struct ring_buffer_event *
1730 * ring_buffer_peek - peek at the next event to be read 1824rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1731 * @buffer: The ring buffer to read
1732 * @cpu: The cpu to peak at
1733 * @ts: The timestamp counter of this event.
1734 *
1735 * This will return the event that will be read next, but does
1736 * not consume the data.
1737 */
1738struct ring_buffer_event *
1739ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1740{ 1825{
1741 struct ring_buffer_per_cpu *cpu_buffer; 1826 struct ring_buffer_per_cpu *cpu_buffer;
1742 struct ring_buffer_event *event; 1827 struct ring_buffer_event *event;
@@ -1757,10 +1842,8 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1757 * can have. Nesting 10 deep of interrupts is clearly 1842 * can have. Nesting 10 deep of interrupts is clearly
1758 * an anomaly. 1843 * an anomaly.
1759 */ 1844 */
1760 if (unlikely(++nr_loops > 10)) { 1845 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 10))
1761 RB_WARN_ON(cpu_buffer, 1);
1762 return NULL; 1846 return NULL;
1763 }
1764 1847
1765 reader = rb_get_reader_page(cpu_buffer); 1848 reader = rb_get_reader_page(cpu_buffer);
1766 if (!reader) 1849 if (!reader)
@@ -1798,16 +1881,8 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1798 return NULL; 1881 return NULL;
1799} 1882}
1800 1883
1801/** 1884static struct ring_buffer_event *
1802 * ring_buffer_iter_peek - peek at the next event to be read 1885rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1803 * @iter: The ring buffer iterator
1804 * @ts: The timestamp counter of this event.
1805 *
1806 * This will return the event that will be read next, but does
1807 * not increment the iterator.
1808 */
1809struct ring_buffer_event *
1810ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1811{ 1886{
1812 struct ring_buffer *buffer; 1887 struct ring_buffer *buffer;
1813 struct ring_buffer_per_cpu *cpu_buffer; 1888 struct ring_buffer_per_cpu *cpu_buffer;
@@ -1829,10 +1904,8 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1829 * can have. Nesting 10 deep of interrupts is clearly 1904 * can have. Nesting 10 deep of interrupts is clearly
1830 * an anomaly. 1905 * an anomaly.
1831 */ 1906 */
1832 if (unlikely(++nr_loops > 10)) { 1907 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 10))
1833 RB_WARN_ON(cpu_buffer, 1);
1834 return NULL; 1908 return NULL;
1835 }
1836 1909
1837 if (rb_per_cpu_empty(cpu_buffer)) 1910 if (rb_per_cpu_empty(cpu_buffer))
1838 return NULL; 1911 return NULL;
@@ -1869,6 +1942,51 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1869} 1942}
1870 1943
1871/** 1944/**
1945 * ring_buffer_peek - peek at the next event to be read
1946 * @buffer: The ring buffer to read
1947 * @cpu: The cpu to peak at
1948 * @ts: The timestamp counter of this event.
1949 *
1950 * This will return the event that will be read next, but does
1951 * not consume the data.
1952 */
1953struct ring_buffer_event *
1954ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1955{
1956 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
1957 struct ring_buffer_event *event;
1958 unsigned long flags;
1959
1960 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
1961 event = rb_buffer_peek(buffer, cpu, ts);
1962 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
1963
1964 return event;
1965}
1966
1967/**
1968 * ring_buffer_iter_peek - peek at the next event to be read
1969 * @iter: The ring buffer iterator
1970 * @ts: The timestamp counter of this event.
1971 *
1972 * This will return the event that will be read next, but does
1973 * not increment the iterator.
1974 */
1975struct ring_buffer_event *
1976ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1977{
1978 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
1979 struct ring_buffer_event *event;
1980 unsigned long flags;
1981
1982 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
1983 event = rb_iter_peek(iter, ts);
1984 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
1985
1986 return event;
1987}
1988
1989/**
1872 * ring_buffer_consume - return an event and consume it 1990 * ring_buffer_consume - return an event and consume it
1873 * @buffer: The ring buffer to get the next event from 1991 * @buffer: The ring buffer to get the next event from
1874 * 1992 *
@@ -1879,19 +1997,24 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1879struct ring_buffer_event * 1997struct ring_buffer_event *
1880ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) 1998ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
1881{ 1999{
1882 struct ring_buffer_per_cpu *cpu_buffer; 2000 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
1883 struct ring_buffer_event *event; 2001 struct ring_buffer_event *event;
2002 unsigned long flags;
1884 2003
1885 if (!cpu_isset(cpu, buffer->cpumask)) 2004 if (!cpu_isset(cpu, buffer->cpumask))
1886 return NULL; 2005 return NULL;
1887 2006
1888 event = ring_buffer_peek(buffer, cpu, ts); 2007 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
2008
2009 event = rb_buffer_peek(buffer, cpu, ts);
1889 if (!event) 2010 if (!event)
1890 return NULL; 2011 goto out;
1891 2012
1892 cpu_buffer = buffer->buffers[cpu];
1893 rb_advance_reader(cpu_buffer); 2013 rb_advance_reader(cpu_buffer);
1894 2014
2015 out:
2016 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2017
1895 return event; 2018 return event;
1896} 2019}
1897 2020
@@ -1928,9 +2051,11 @@ ring_buffer_read_start(struct ring_buffer *buffer, int cpu)
1928 atomic_inc(&cpu_buffer->record_disabled); 2051 atomic_inc(&cpu_buffer->record_disabled);
1929 synchronize_sched(); 2052 synchronize_sched();
1930 2053
1931 spin_lock_irqsave(&cpu_buffer->lock, flags); 2054 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
1932 ring_buffer_iter_reset(iter); 2055 __raw_spin_lock(&cpu_buffer->lock);
1933 spin_unlock_irqrestore(&cpu_buffer->lock, flags); 2056 rb_iter_reset(iter);
2057 __raw_spin_unlock(&cpu_buffer->lock);
2058 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
1934 2059
1935 return iter; 2060 return iter;
1936} 2061}
@@ -1962,12 +2087,17 @@ struct ring_buffer_event *
1962ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts) 2087ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts)
1963{ 2088{
1964 struct ring_buffer_event *event; 2089 struct ring_buffer_event *event;
2090 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
2091 unsigned long flags;
1965 2092
1966 event = ring_buffer_iter_peek(iter, ts); 2093 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
2094 event = rb_iter_peek(iter, ts);
1967 if (!event) 2095 if (!event)
1968 return NULL; 2096 goto out;
1969 2097
1970 rb_advance_iter(iter); 2098 rb_advance_iter(iter);
2099 out:
2100 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
1971 2101
1972 return event; 2102 return event;
1973} 2103}
@@ -1987,7 +2117,7 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
1987 cpu_buffer->head_page 2117 cpu_buffer->head_page
1988 = list_entry(cpu_buffer->pages.next, struct buffer_page, list); 2118 = list_entry(cpu_buffer->pages.next, struct buffer_page, list);
1989 local_set(&cpu_buffer->head_page->write, 0); 2119 local_set(&cpu_buffer->head_page->write, 0);
1990 local_set(&cpu_buffer->head_page->commit, 0); 2120 local_set(&cpu_buffer->head_page->page->commit, 0);
1991 2121
1992 cpu_buffer->head_page->read = 0; 2122 cpu_buffer->head_page->read = 0;
1993 2123
@@ -1996,7 +2126,7 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
1996 2126
1997 INIT_LIST_HEAD(&cpu_buffer->reader_page->list); 2127 INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
1998 local_set(&cpu_buffer->reader_page->write, 0); 2128 local_set(&cpu_buffer->reader_page->write, 0);
1999 local_set(&cpu_buffer->reader_page->commit, 0); 2129 local_set(&cpu_buffer->reader_page->page->commit, 0);
2000 cpu_buffer->reader_page->read = 0; 2130 cpu_buffer->reader_page->read = 0;
2001 2131
2002 cpu_buffer->overrun = 0; 2132 cpu_buffer->overrun = 0;
@@ -2016,11 +2146,15 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
2016 if (!cpu_isset(cpu, buffer->cpumask)) 2146 if (!cpu_isset(cpu, buffer->cpumask))
2017 return; 2147 return;
2018 2148
2019 spin_lock_irqsave(&cpu_buffer->lock, flags); 2149 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
2150
2151 __raw_spin_lock(&cpu_buffer->lock);
2020 2152
2021 rb_reset_cpu(cpu_buffer); 2153 rb_reset_cpu(cpu_buffer);
2022 2154
2023 spin_unlock_irqrestore(&cpu_buffer->lock, flags); 2155 __raw_spin_unlock(&cpu_buffer->lock);
2156
2157 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2024} 2158}
2025 2159
2026/** 2160/**
@@ -2090,8 +2224,7 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
2090 return -EINVAL; 2224 return -EINVAL;
2091 2225
2092 /* At least make sure the two buffers are somewhat the same */ 2226 /* At least make sure the two buffers are somewhat the same */
2093 if (buffer_a->size != buffer_b->size || 2227 if (buffer_a->pages != buffer_b->pages)
2094 buffer_a->pages != buffer_b->pages)
2095 return -EINVAL; 2228 return -EINVAL;
2096 2229
2097 cpu_buffer_a = buffer_a->buffers[cpu]; 2230 cpu_buffer_a = buffer_a->buffers[cpu];
@@ -2118,16 +2251,178 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
2118 return 0; 2251 return 0;
2119} 2252}
2120 2253
2254static void rb_remove_entries(struct ring_buffer_per_cpu *cpu_buffer,
2255 struct buffer_data_page *bpage)
2256{
2257 struct ring_buffer_event *event;
2258 unsigned long head;
2259
2260 __raw_spin_lock(&cpu_buffer->lock);
2261 for (head = 0; head < local_read(&bpage->commit);
2262 head += rb_event_length(event)) {
2263
2264 event = __rb_data_page_index(bpage, head);
2265 if (RB_WARN_ON(cpu_buffer, rb_null_event(event)))
2266 return;
2267 /* Only count data entries */
2268 if (event->type != RINGBUF_TYPE_DATA)
2269 continue;
2270 cpu_buffer->entries--;
2271 }
2272 __raw_spin_unlock(&cpu_buffer->lock);
2273}
2274
2275/**
2276 * ring_buffer_alloc_read_page - allocate a page to read from buffer
2277 * @buffer: the buffer to allocate for.
2278 *
2279 * This function is used in conjunction with ring_buffer_read_page.
2280 * When reading a full page from the ring buffer, these functions
2281 * can be used to speed up the process. The calling function should
2282 * allocate a few pages first with this function. Then when it
2283 * needs to get pages from the ring buffer, it passes the result
2284 * of this function into ring_buffer_read_page, which will swap
2285 * the page that was allocated, with the read page of the buffer.
2286 *
2287 * Returns:
2288 * The page allocated, or NULL on error.
2289 */
2290void *ring_buffer_alloc_read_page(struct ring_buffer *buffer)
2291{
2292 unsigned long addr;
2293 struct buffer_data_page *bpage;
2294
2295 addr = __get_free_page(GFP_KERNEL);
2296 if (!addr)
2297 return NULL;
2298
2299 bpage = (void *)addr;
2300
2301 return bpage;
2302}
2303
2304/**
2305 * ring_buffer_free_read_page - free an allocated read page
2306 * @buffer: the buffer the page was allocate for
2307 * @data: the page to free
2308 *
2309 * Free a page allocated from ring_buffer_alloc_read_page.
2310 */
2311void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data)
2312{
2313 free_page((unsigned long)data);
2314}
2315
2316/**
2317 * ring_buffer_read_page - extract a page from the ring buffer
2318 * @buffer: buffer to extract from
2319 * @data_page: the page to use allocated from ring_buffer_alloc_read_page
2320 * @cpu: the cpu of the buffer to extract
2321 * @full: should the extraction only happen when the page is full.
2322 *
2323 * This function will pull out a page from the ring buffer and consume it.
2324 * @data_page must be the address of the variable that was returned
2325 * from ring_buffer_alloc_read_page. This is because the page might be used
2326 * to swap with a page in the ring buffer.
2327 *
2328 * for example:
2329 * rpage = ring_buffer_alloc_page(buffer);
2330 * if (!rpage)
2331 * return error;
2332 * ret = ring_buffer_read_page(buffer, &rpage, cpu, 0);
2333 * if (ret)
2334 * process_page(rpage);
2335 *
2336 * When @full is set, the function will not return true unless
2337 * the writer is off the reader page.
2338 *
2339 * Note: it is up to the calling functions to handle sleeps and wakeups.
2340 * The ring buffer can be used anywhere in the kernel and can not
2341 * blindly call wake_up. The layer that uses the ring buffer must be
2342 * responsible for that.
2343 *
2344 * Returns:
2345 * 1 if data has been transferred
2346 * 0 if no data has been transferred.
2347 */
2348int ring_buffer_read_page(struct ring_buffer *buffer,
2349 void **data_page, int cpu, int full)
2350{
2351 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
2352 struct ring_buffer_event *event;
2353 struct buffer_data_page *bpage;
2354 unsigned long flags;
2355 int ret = 0;
2356
2357 if (!data_page)
2358 return 0;
2359
2360 bpage = *data_page;
2361 if (!bpage)
2362 return 0;
2363
2364 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
2365
2366 /*
2367 * rb_buffer_peek will get the next ring buffer if
2368 * the current reader page is empty.
2369 */
2370 event = rb_buffer_peek(buffer, cpu, NULL);
2371 if (!event)
2372 goto out;
2373
2374 /* check for data */
2375 if (!local_read(&cpu_buffer->reader_page->page->commit))
2376 goto out;
2377 /*
2378 * If the writer is already off of the read page, then simply
2379 * switch the read page with the given page. Otherwise
2380 * we need to copy the data from the reader to the writer.
2381 */
2382 if (cpu_buffer->reader_page == cpu_buffer->commit_page) {
2383 unsigned int read = cpu_buffer->reader_page->read;
2384
2385 if (full)
2386 goto out;
2387 /* The writer is still on the reader page, we must copy */
2388 bpage = cpu_buffer->reader_page->page;
2389 memcpy(bpage->data,
2390 cpu_buffer->reader_page->page->data + read,
2391 local_read(&bpage->commit) - read);
2392
2393 /* consume what was read */
2394 cpu_buffer->reader_page += read;
2395
2396 } else {
2397 /* swap the pages */
2398 rb_init_page(bpage);
2399 bpage = cpu_buffer->reader_page->page;
2400 cpu_buffer->reader_page->page = *data_page;
2401 cpu_buffer->reader_page->read = 0;
2402 *data_page = bpage;
2403 }
2404 ret = 1;
2405
2406 /* update the entry counter */
2407 rb_remove_entries(cpu_buffer, bpage);
2408 out:
2409 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2410
2411 return ret;
2412}
2413
2121static ssize_t 2414static ssize_t
2122rb_simple_read(struct file *filp, char __user *ubuf, 2415rb_simple_read(struct file *filp, char __user *ubuf,
2123 size_t cnt, loff_t *ppos) 2416 size_t cnt, loff_t *ppos)
2124{ 2417{
2125 int *p = filp->private_data; 2418 long *p = filp->private_data;
2126 char buf[64]; 2419 char buf[64];
2127 int r; 2420 int r;
2128 2421
2129 /* !ring_buffers_off == tracing_on */ 2422 if (test_bit(RB_BUFFERS_DISABLED_BIT, p))
2130 r = sprintf(buf, "%d\n", !*p); 2423 r = sprintf(buf, "permanently disabled\n");
2424 else
2425 r = sprintf(buf, "%d\n", test_bit(RB_BUFFERS_ON_BIT, p));
2131 2426
2132 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 2427 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2133} 2428}
@@ -2136,7 +2431,7 @@ static ssize_t
2136rb_simple_write(struct file *filp, const char __user *ubuf, 2431rb_simple_write(struct file *filp, const char __user *ubuf,
2137 size_t cnt, loff_t *ppos) 2432 size_t cnt, loff_t *ppos)
2138{ 2433{
2139 int *p = filp->private_data; 2434 long *p = filp->private_data;
2140 char buf[64]; 2435 char buf[64];
2141 long val; 2436 long val;
2142 int ret; 2437 int ret;
@@ -2153,8 +2448,10 @@ rb_simple_write(struct file *filp, const char __user *ubuf,
2153 if (ret < 0) 2448 if (ret < 0)
2154 return ret; 2449 return ret;
2155 2450
2156 /* !ring_buffers_off == tracing_on */ 2451 if (val)
2157 *p = !val; 2452 set_bit(RB_BUFFERS_ON_BIT, p);
2453 else
2454 clear_bit(RB_BUFFERS_ON_BIT, p);
2158 2455
2159 (*ppos)++; 2456 (*ppos)++;
2160 2457
@@ -2176,7 +2473,7 @@ static __init int rb_init_debugfs(void)
2176 d_tracer = tracing_init_dentry(); 2473 d_tracer = tracing_init_dentry();
2177 2474
2178 entry = debugfs_create_file("tracing_on", 0644, d_tracer, 2475 entry = debugfs_create_file("tracing_on", 0644, d_tracer,
2179 &ring_buffers_off, &rb_simple_fops); 2476 &ring_buffer_flags, &rb_simple_fops);
2180 if (!entry) 2477 if (!entry)
2181 pr_warning("Could not create debugfs 'tracing_on' entry\n"); 2478 pr_warning("Could not create debugfs 'tracing_on' entry\n");
2182 2479