aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace/ring_buffer.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace/ring_buffer.c')
-rw-r--r--kernel/trace/ring_buffer.c774
1 files changed, 586 insertions, 188 deletions
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 2f76193c3489..7f69cfeaadf7 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -16,14 +16,100 @@
16#include <linux/list.h> 16#include <linux/list.h>
17#include <linux/fs.h> 17#include <linux/fs.h>
18 18
19#include "trace.h"
20
21/*
22 * A fast way to enable or disable all ring buffers is to
23 * call tracing_on or tracing_off. Turning off the ring buffers
24 * prevents all ring buffers from being recorded to.
25 * Turning this switch on, makes it OK to write to the
26 * ring buffer, if the ring buffer is enabled itself.
27 *
28 * There's three layers that must be on in order to write
29 * to the ring buffer.
30 *
31 * 1) This global flag must be set.
32 * 2) The ring buffer must be enabled for recording.
33 * 3) The per cpu buffer must be enabled for recording.
34 *
35 * In case of an anomaly, this global flag has a bit set that
36 * will permantly disable all ring buffers.
37 */
38
39/*
40 * Global flag to disable all recording to ring buffers
41 * This has two bits: ON, DISABLED
42 *
43 * ON DISABLED
44 * ---- ----------
45 * 0 0 : ring buffers are off
46 * 1 0 : ring buffers are on
47 * X 1 : ring buffers are permanently disabled
48 */
49
50enum {
51 RB_BUFFERS_ON_BIT = 0,
52 RB_BUFFERS_DISABLED_BIT = 1,
53};
54
55enum {
56 RB_BUFFERS_ON = 1 << RB_BUFFERS_ON_BIT,
57 RB_BUFFERS_DISABLED = 1 << RB_BUFFERS_DISABLED_BIT,
58};
59
60static long ring_buffer_flags __read_mostly = RB_BUFFERS_ON;
61
62/**
63 * tracing_on - enable all tracing buffers
64 *
65 * This function enables all tracing buffers that may have been
66 * disabled with tracing_off.
67 */
68void tracing_on(void)
69{
70 set_bit(RB_BUFFERS_ON_BIT, &ring_buffer_flags);
71}
72
73/**
74 * tracing_off - turn off all tracing buffers
75 *
76 * This function stops all tracing buffers from recording data.
77 * It does not disable any overhead the tracers themselves may
78 * be causing. This function simply causes all recording to
79 * the ring buffers to fail.
80 */
81void tracing_off(void)
82{
83 clear_bit(RB_BUFFERS_ON_BIT, &ring_buffer_flags);
84}
85
86/**
87 * tracing_off_permanent - permanently disable ring buffers
88 *
89 * This function, once called, will disable all ring buffers
90 * permanenty.
91 */
92void tracing_off_permanent(void)
93{
94 set_bit(RB_BUFFERS_DISABLED_BIT, &ring_buffer_flags);
95}
96
97#include "trace.h"
98
19/* Up this if you want to test the TIME_EXTENTS and normalization */ 99/* Up this if you want to test the TIME_EXTENTS and normalization */
20#define DEBUG_SHIFT 0 100#define DEBUG_SHIFT 0
21 101
22/* FIXME!!! */ 102/* FIXME!!! */
23u64 ring_buffer_time_stamp(int cpu) 103u64 ring_buffer_time_stamp(int cpu)
24{ 104{
105 u64 time;
106
107 preempt_disable_notrace();
25 /* shift to debug/test normalization and TIME_EXTENTS */ 108 /* shift to debug/test normalization and TIME_EXTENTS */
26 return sched_clock() << DEBUG_SHIFT; 109 time = sched_clock() << DEBUG_SHIFT;
110 preempt_enable_notrace();
111
112 return time;
27} 113}
28 114
29void ring_buffer_normalize_time_stamp(int cpu, u64 *ts) 115void ring_buffer_normalize_time_stamp(int cpu, u64 *ts)
@@ -109,20 +195,24 @@ void *ring_buffer_event_data(struct ring_buffer_event *event)
109#define TS_MASK ((1ULL << TS_SHIFT) - 1) 195#define TS_MASK ((1ULL << TS_SHIFT) - 1)
110#define TS_DELTA_TEST (~TS_MASK) 196#define TS_DELTA_TEST (~TS_MASK)
111 197
112/* 198struct buffer_data_page {
113 * This hack stolen from mm/slob.c.
114 * We can store per page timing information in the page frame of the page.
115 * Thanks to Peter Zijlstra for suggesting this idea.
116 */
117struct buffer_page {
118 u64 time_stamp; /* page time stamp */ 199 u64 time_stamp; /* page time stamp */
119 local_t write; /* index for next write */
120 local_t commit; /* write commited index */ 200 local_t commit; /* write commited index */
201 unsigned char data[]; /* data of buffer page */
202};
203
204struct buffer_page {
205 local_t write; /* index for next write */
121 unsigned read; /* index for next read */ 206 unsigned read; /* index for next read */
122 struct list_head list; /* list of free pages */ 207 struct list_head list; /* list of free pages */
123 void *page; /* Actual data page */ 208 struct buffer_data_page *page; /* Actual data page */
124}; 209};
125 210
211static void rb_init_page(struct buffer_data_page *bpage)
212{
213 local_set(&bpage->commit, 0);
214}
215
126/* 216/*
127 * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing 217 * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing
128 * this issue out. 218 * this issue out.
@@ -144,7 +234,7 @@ static inline int test_time_stamp(u64 delta)
144 return 0; 234 return 0;
145} 235}
146 236
147#define BUF_PAGE_SIZE PAGE_SIZE 237#define BUF_PAGE_SIZE (PAGE_SIZE - sizeof(struct buffer_data_page))
148 238
149/* 239/*
150 * head_page == tail_page && head == tail then buffer is empty. 240 * head_page == tail_page && head == tail then buffer is empty.
@@ -152,7 +242,8 @@ static inline int test_time_stamp(u64 delta)
152struct ring_buffer_per_cpu { 242struct ring_buffer_per_cpu {
153 int cpu; 243 int cpu;
154 struct ring_buffer *buffer; 244 struct ring_buffer *buffer;
155 spinlock_t lock; 245 spinlock_t reader_lock; /* serialize readers */
246 raw_spinlock_t lock;
156 struct lock_class_key lock_key; 247 struct lock_class_key lock_key;
157 struct list_head pages; 248 struct list_head pages;
158 struct buffer_page *head_page; /* read from head */ 249 struct buffer_page *head_page; /* read from head */
@@ -186,32 +277,16 @@ struct ring_buffer_iter {
186 u64 read_stamp; 277 u64 read_stamp;
187}; 278};
188 279
280/* buffer may be either ring_buffer or ring_buffer_per_cpu */
189#define RB_WARN_ON(buffer, cond) \ 281#define RB_WARN_ON(buffer, cond) \
190 do { \ 282 ({ \
191 if (unlikely(cond)) { \ 283 int _____ret = unlikely(cond); \
192 atomic_inc(&buffer->record_disabled); \ 284 if (_____ret) { \
193 WARN_ON(1); \
194 } \
195 } while (0)
196
197#define RB_WARN_ON_RET(buffer, cond) \
198 do { \
199 if (unlikely(cond)) { \
200 atomic_inc(&buffer->record_disabled); \
201 WARN_ON(1); \
202 return -1; \
203 } \
204 } while (0)
205
206#define RB_WARN_ON_ONCE(buffer, cond) \
207 do { \
208 static int once; \
209 if (unlikely(cond) && !once) { \
210 once++; \
211 atomic_inc(&buffer->record_disabled); \ 285 atomic_inc(&buffer->record_disabled); \
212 WARN_ON(1); \ 286 WARN_ON(1); \
213 } \ 287 } \
214 } while (0) 288 _____ret; \
289 })
215 290
216/** 291/**
217 * check_pages - integrity check of buffer pages 292 * check_pages - integrity check of buffer pages
@@ -223,16 +298,20 @@ struct ring_buffer_iter {
223static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer) 298static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
224{ 299{
225 struct list_head *head = &cpu_buffer->pages; 300 struct list_head *head = &cpu_buffer->pages;
226 struct buffer_page *page, *tmp; 301 struct buffer_page *bpage, *tmp;
227 302
228 RB_WARN_ON_RET(cpu_buffer, head->next->prev != head); 303 if (RB_WARN_ON(cpu_buffer, head->next->prev != head))
229 RB_WARN_ON_RET(cpu_buffer, head->prev->next != head); 304 return -1;
305 if (RB_WARN_ON(cpu_buffer, head->prev->next != head))
306 return -1;
230 307
231 list_for_each_entry_safe(page, tmp, head, list) { 308 list_for_each_entry_safe(bpage, tmp, head, list) {
232 RB_WARN_ON_RET(cpu_buffer, 309 if (RB_WARN_ON(cpu_buffer,
233 page->list.next->prev != &page->list); 310 bpage->list.next->prev != &bpage->list))
234 RB_WARN_ON_RET(cpu_buffer, 311 return -1;
235 page->list.prev->next != &page->list); 312 if (RB_WARN_ON(cpu_buffer,
313 bpage->list.prev->next != &bpage->list))
314 return -1;
236 } 315 }
237 316
238 return 0; 317 return 0;
@@ -242,22 +321,23 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
242 unsigned nr_pages) 321 unsigned nr_pages)
243{ 322{
244 struct list_head *head = &cpu_buffer->pages; 323 struct list_head *head = &cpu_buffer->pages;
245 struct buffer_page *page, *tmp; 324 struct buffer_page *bpage, *tmp;
246 unsigned long addr; 325 unsigned long addr;
247 LIST_HEAD(pages); 326 LIST_HEAD(pages);
248 unsigned i; 327 unsigned i;
249 328
250 for (i = 0; i < nr_pages; i++) { 329 for (i = 0; i < nr_pages; i++) {
251 page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()), 330 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
252 GFP_KERNEL, cpu_to_node(cpu_buffer->cpu)); 331 GFP_KERNEL, cpu_to_node(cpu_buffer->cpu));
253 if (!page) 332 if (!bpage)
254 goto free_pages; 333 goto free_pages;
255 list_add(&page->list, &pages); 334 list_add(&bpage->list, &pages);
256 335
257 addr = __get_free_page(GFP_KERNEL); 336 addr = __get_free_page(GFP_KERNEL);
258 if (!addr) 337 if (!addr)
259 goto free_pages; 338 goto free_pages;
260 page->page = (void *)addr; 339 bpage->page = (void *)addr;
340 rb_init_page(bpage->page);
261 } 341 }
262 342
263 list_splice(&pages, head); 343 list_splice(&pages, head);
@@ -267,9 +347,9 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
267 return 0; 347 return 0;
268 348
269 free_pages: 349 free_pages:
270 list_for_each_entry_safe(page, tmp, &pages, list) { 350 list_for_each_entry_safe(bpage, tmp, &pages, list) {
271 list_del_init(&page->list); 351 list_del_init(&bpage->list);
272 free_buffer_page(page); 352 free_buffer_page(bpage);
273 } 353 }
274 return -ENOMEM; 354 return -ENOMEM;
275} 355}
@@ -278,7 +358,7 @@ static struct ring_buffer_per_cpu *
278rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu) 358rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
279{ 359{
280 struct ring_buffer_per_cpu *cpu_buffer; 360 struct ring_buffer_per_cpu *cpu_buffer;
281 struct buffer_page *page; 361 struct buffer_page *bpage;
282 unsigned long addr; 362 unsigned long addr;
283 int ret; 363 int ret;
284 364
@@ -289,19 +369,21 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
289 369
290 cpu_buffer->cpu = cpu; 370 cpu_buffer->cpu = cpu;
291 cpu_buffer->buffer = buffer; 371 cpu_buffer->buffer = buffer;
292 spin_lock_init(&cpu_buffer->lock); 372 spin_lock_init(&cpu_buffer->reader_lock);
373 cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
293 INIT_LIST_HEAD(&cpu_buffer->pages); 374 INIT_LIST_HEAD(&cpu_buffer->pages);
294 375
295 page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()), 376 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
296 GFP_KERNEL, cpu_to_node(cpu)); 377 GFP_KERNEL, cpu_to_node(cpu));
297 if (!page) 378 if (!bpage)
298 goto fail_free_buffer; 379 goto fail_free_buffer;
299 380
300 cpu_buffer->reader_page = page; 381 cpu_buffer->reader_page = bpage;
301 addr = __get_free_page(GFP_KERNEL); 382 addr = __get_free_page(GFP_KERNEL);
302 if (!addr) 383 if (!addr)
303 goto fail_free_reader; 384 goto fail_free_reader;
304 page->page = (void *)addr; 385 bpage->page = (void *)addr;
386 rb_init_page(bpage->page);
305 387
306 INIT_LIST_HEAD(&cpu_buffer->reader_page->list); 388 INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
307 389
@@ -326,14 +408,14 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
326static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer) 408static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
327{ 409{
328 struct list_head *head = &cpu_buffer->pages; 410 struct list_head *head = &cpu_buffer->pages;
329 struct buffer_page *page, *tmp; 411 struct buffer_page *bpage, *tmp;
330 412
331 list_del_init(&cpu_buffer->reader_page->list); 413 list_del_init(&cpu_buffer->reader_page->list);
332 free_buffer_page(cpu_buffer->reader_page); 414 free_buffer_page(cpu_buffer->reader_page);
333 415
334 list_for_each_entry_safe(page, tmp, head, list) { 416 list_for_each_entry_safe(bpage, tmp, head, list) {
335 list_del_init(&page->list); 417 list_del_init(&bpage->list);
336 free_buffer_page(page); 418 free_buffer_page(bpage);
337 } 419 }
338 kfree(cpu_buffer); 420 kfree(cpu_buffer);
339} 421}
@@ -430,7 +512,7 @@ static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer);
430static void 512static void
431rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages) 513rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
432{ 514{
433 struct buffer_page *page; 515 struct buffer_page *bpage;
434 struct list_head *p; 516 struct list_head *p;
435 unsigned i; 517 unsigned i;
436 518
@@ -438,13 +520,15 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
438 synchronize_sched(); 520 synchronize_sched();
439 521
440 for (i = 0; i < nr_pages; i++) { 522 for (i = 0; i < nr_pages; i++) {
441 BUG_ON(list_empty(&cpu_buffer->pages)); 523 if (RB_WARN_ON(cpu_buffer, list_empty(&cpu_buffer->pages)))
524 return;
442 p = cpu_buffer->pages.next; 525 p = cpu_buffer->pages.next;
443 page = list_entry(p, struct buffer_page, list); 526 bpage = list_entry(p, struct buffer_page, list);
444 list_del_init(&page->list); 527 list_del_init(&bpage->list);
445 free_buffer_page(page); 528 free_buffer_page(bpage);
446 } 529 }
447 BUG_ON(list_empty(&cpu_buffer->pages)); 530 if (RB_WARN_ON(cpu_buffer, list_empty(&cpu_buffer->pages)))
531 return;
448 532
449 rb_reset_cpu(cpu_buffer); 533 rb_reset_cpu(cpu_buffer);
450 534
@@ -458,7 +542,7 @@ static void
458rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer, 542rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
459 struct list_head *pages, unsigned nr_pages) 543 struct list_head *pages, unsigned nr_pages)
460{ 544{
461 struct buffer_page *page; 545 struct buffer_page *bpage;
462 struct list_head *p; 546 struct list_head *p;
463 unsigned i; 547 unsigned i;
464 548
@@ -466,11 +550,12 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
466 synchronize_sched(); 550 synchronize_sched();
467 551
468 for (i = 0; i < nr_pages; i++) { 552 for (i = 0; i < nr_pages; i++) {
469 BUG_ON(list_empty(pages)); 553 if (RB_WARN_ON(cpu_buffer, list_empty(pages)))
554 return;
470 p = pages->next; 555 p = pages->next;
471 page = list_entry(p, struct buffer_page, list); 556 bpage = list_entry(p, struct buffer_page, list);
472 list_del_init(&page->list); 557 list_del_init(&bpage->list);
473 list_add_tail(&page->list, &cpu_buffer->pages); 558 list_add_tail(&bpage->list, &cpu_buffer->pages);
474 } 559 }
475 rb_reset_cpu(cpu_buffer); 560 rb_reset_cpu(cpu_buffer);
476 561
@@ -497,12 +582,18 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
497{ 582{
498 struct ring_buffer_per_cpu *cpu_buffer; 583 struct ring_buffer_per_cpu *cpu_buffer;
499 unsigned nr_pages, rm_pages, new_pages; 584 unsigned nr_pages, rm_pages, new_pages;
500 struct buffer_page *page, *tmp; 585 struct buffer_page *bpage, *tmp;
501 unsigned long buffer_size; 586 unsigned long buffer_size;
502 unsigned long addr; 587 unsigned long addr;
503 LIST_HEAD(pages); 588 LIST_HEAD(pages);
504 int i, cpu; 589 int i, cpu;
505 590
591 /*
592 * Always succeed at resizing a non-existent buffer:
593 */
594 if (!buffer)
595 return size;
596
506 size = DIV_ROUND_UP(size, BUF_PAGE_SIZE); 597 size = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
507 size *= BUF_PAGE_SIZE; 598 size *= BUF_PAGE_SIZE;
508 buffer_size = buffer->pages * BUF_PAGE_SIZE; 599 buffer_size = buffer->pages * BUF_PAGE_SIZE;
@@ -521,7 +612,10 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
521 if (size < buffer_size) { 612 if (size < buffer_size) {
522 613
523 /* easy case, just free pages */ 614 /* easy case, just free pages */
524 BUG_ON(nr_pages >= buffer->pages); 615 if (RB_WARN_ON(buffer, nr_pages >= buffer->pages)) {
616 mutex_unlock(&buffer->mutex);
617 return -1;
618 }
525 619
526 rm_pages = buffer->pages - nr_pages; 620 rm_pages = buffer->pages - nr_pages;
527 621
@@ -540,21 +634,26 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
540 * add these pages to the cpu_buffers. Otherwise we just free 634 * add these pages to the cpu_buffers. Otherwise we just free
541 * them all and return -ENOMEM; 635 * them all and return -ENOMEM;
542 */ 636 */
543 BUG_ON(nr_pages <= buffer->pages); 637 if (RB_WARN_ON(buffer, nr_pages <= buffer->pages)) {
638 mutex_unlock(&buffer->mutex);
639 return -1;
640 }
641
544 new_pages = nr_pages - buffer->pages; 642 new_pages = nr_pages - buffer->pages;
545 643
546 for_each_buffer_cpu(buffer, cpu) { 644 for_each_buffer_cpu(buffer, cpu) {
547 for (i = 0; i < new_pages; i++) { 645 for (i = 0; i < new_pages; i++) {
548 page = kzalloc_node(ALIGN(sizeof(*page), 646 bpage = kzalloc_node(ALIGN(sizeof(*bpage),
549 cache_line_size()), 647 cache_line_size()),
550 GFP_KERNEL, cpu_to_node(cpu)); 648 GFP_KERNEL, cpu_to_node(cpu));
551 if (!page) 649 if (!bpage)
552 goto free_pages; 650 goto free_pages;
553 list_add(&page->list, &pages); 651 list_add(&bpage->list, &pages);
554 addr = __get_free_page(GFP_KERNEL); 652 addr = __get_free_page(GFP_KERNEL);
555 if (!addr) 653 if (!addr)
556 goto free_pages; 654 goto free_pages;
557 page->page = (void *)addr; 655 bpage->page = (void *)addr;
656 rb_init_page(bpage->page);
558 } 657 }
559 } 658 }
560 659
@@ -563,7 +662,10 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
563 rb_insert_pages(cpu_buffer, &pages, new_pages); 662 rb_insert_pages(cpu_buffer, &pages, new_pages);
564 } 663 }
565 664
566 BUG_ON(!list_empty(&pages)); 665 if (RB_WARN_ON(buffer, !list_empty(&pages))) {
666 mutex_unlock(&buffer->mutex);
667 return -1;
668 }
567 669
568 out: 670 out:
569 buffer->pages = nr_pages; 671 buffer->pages = nr_pages;
@@ -572,10 +674,11 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
572 return size; 674 return size;
573 675
574 free_pages: 676 free_pages:
575 list_for_each_entry_safe(page, tmp, &pages, list) { 677 list_for_each_entry_safe(bpage, tmp, &pages, list) {
576 list_del_init(&page->list); 678 list_del_init(&bpage->list);
577 free_buffer_page(page); 679 free_buffer_page(bpage);
578 } 680 }
681 mutex_unlock(&buffer->mutex);
579 return -ENOMEM; 682 return -ENOMEM;
580} 683}
581 684
@@ -584,9 +687,15 @@ static inline int rb_null_event(struct ring_buffer_event *event)
584 return event->type == RINGBUF_TYPE_PADDING; 687 return event->type == RINGBUF_TYPE_PADDING;
585} 688}
586 689
587static inline void *__rb_page_index(struct buffer_page *page, unsigned index) 690static inline void *
691__rb_data_page_index(struct buffer_data_page *bpage, unsigned index)
588{ 692{
589 return page->page + index; 693 return bpage->data + index;
694}
695
696static inline void *__rb_page_index(struct buffer_page *bpage, unsigned index)
697{
698 return bpage->page->data + index;
590} 699}
591 700
592static inline struct ring_buffer_event * 701static inline struct ring_buffer_event *
@@ -616,7 +725,7 @@ static inline unsigned rb_page_write(struct buffer_page *bpage)
616 725
617static inline unsigned rb_page_commit(struct buffer_page *bpage) 726static inline unsigned rb_page_commit(struct buffer_page *bpage)
618{ 727{
619 return local_read(&bpage->commit); 728 return local_read(&bpage->page->commit);
620} 729}
621 730
622/* Size is determined by what has been commited */ 731/* Size is determined by what has been commited */
@@ -651,7 +760,8 @@ static void rb_update_overflow(struct ring_buffer_per_cpu *cpu_buffer)
651 head += rb_event_length(event)) { 760 head += rb_event_length(event)) {
652 761
653 event = __rb_page_index(cpu_buffer->head_page, head); 762 event = __rb_page_index(cpu_buffer->head_page, head);
654 BUG_ON(rb_null_event(event)); 763 if (RB_WARN_ON(cpu_buffer, rb_null_event(event)))
764 return;
655 /* Only count data entries */ 765 /* Only count data entries */
656 if (event->type != RINGBUF_TYPE_DATA) 766 if (event->type != RINGBUF_TYPE_DATA)
657 continue; 767 continue;
@@ -661,14 +771,14 @@ static void rb_update_overflow(struct ring_buffer_per_cpu *cpu_buffer)
661} 771}
662 772
663static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer, 773static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer,
664 struct buffer_page **page) 774 struct buffer_page **bpage)
665{ 775{
666 struct list_head *p = (*page)->list.next; 776 struct list_head *p = (*bpage)->list.next;
667 777
668 if (p == &cpu_buffer->pages) 778 if (p == &cpu_buffer->pages)
669 p = p->next; 779 p = p->next;
670 780
671 *page = list_entry(p, struct buffer_page, list); 781 *bpage = list_entry(p, struct buffer_page, list);
672} 782}
673 783
674static inline unsigned 784static inline unsigned
@@ -704,16 +814,18 @@ rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer,
704 addr &= PAGE_MASK; 814 addr &= PAGE_MASK;
705 815
706 while (cpu_buffer->commit_page->page != (void *)addr) { 816 while (cpu_buffer->commit_page->page != (void *)addr) {
707 RB_WARN_ON(cpu_buffer, 817 if (RB_WARN_ON(cpu_buffer,
708 cpu_buffer->commit_page == cpu_buffer->tail_page); 818 cpu_buffer->commit_page == cpu_buffer->tail_page))
709 cpu_buffer->commit_page->commit = 819 return;
820 cpu_buffer->commit_page->page->commit =
710 cpu_buffer->commit_page->write; 821 cpu_buffer->commit_page->write;
711 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page); 822 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
712 cpu_buffer->write_stamp = cpu_buffer->commit_page->time_stamp; 823 cpu_buffer->write_stamp =
824 cpu_buffer->commit_page->page->time_stamp;
713 } 825 }
714 826
715 /* Now set the commit to the event's index */ 827 /* Now set the commit to the event's index */
716 local_set(&cpu_buffer->commit_page->commit, index); 828 local_set(&cpu_buffer->commit_page->page->commit, index);
717} 829}
718 830
719static inline void 831static inline void
@@ -728,16 +840,17 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
728 * assign the commit to the tail. 840 * assign the commit to the tail.
729 */ 841 */
730 while (cpu_buffer->commit_page != cpu_buffer->tail_page) { 842 while (cpu_buffer->commit_page != cpu_buffer->tail_page) {
731 cpu_buffer->commit_page->commit = 843 cpu_buffer->commit_page->page->commit =
732 cpu_buffer->commit_page->write; 844 cpu_buffer->commit_page->write;
733 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page); 845 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
734 cpu_buffer->write_stamp = cpu_buffer->commit_page->time_stamp; 846 cpu_buffer->write_stamp =
847 cpu_buffer->commit_page->page->time_stamp;
735 /* add barrier to keep gcc from optimizing too much */ 848 /* add barrier to keep gcc from optimizing too much */
736 barrier(); 849 barrier();
737 } 850 }
738 while (rb_commit_index(cpu_buffer) != 851 while (rb_commit_index(cpu_buffer) !=
739 rb_page_write(cpu_buffer->commit_page)) { 852 rb_page_write(cpu_buffer->commit_page)) {
740 cpu_buffer->commit_page->commit = 853 cpu_buffer->commit_page->page->commit =
741 cpu_buffer->commit_page->write; 854 cpu_buffer->commit_page->write;
742 barrier(); 855 barrier();
743 } 856 }
@@ -745,7 +858,7 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
745 858
746static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer) 859static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
747{ 860{
748 cpu_buffer->read_stamp = cpu_buffer->reader_page->time_stamp; 861 cpu_buffer->read_stamp = cpu_buffer->reader_page->page->time_stamp;
749 cpu_buffer->reader_page->read = 0; 862 cpu_buffer->reader_page->read = 0;
750} 863}
751 864
@@ -764,7 +877,7 @@ static inline void rb_inc_iter(struct ring_buffer_iter *iter)
764 else 877 else
765 rb_inc_page(cpu_buffer, &iter->head_page); 878 rb_inc_page(cpu_buffer, &iter->head_page);
766 879
767 iter->read_stamp = iter->head_page->time_stamp; 880 iter->read_stamp = iter->head_page->page->time_stamp;
768 iter->head = 0; 881 iter->head = 0;
769} 882}
770 883
@@ -852,7 +965,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
852 if (write > BUF_PAGE_SIZE) { 965 if (write > BUF_PAGE_SIZE) {
853 struct buffer_page *next_page = tail_page; 966 struct buffer_page *next_page = tail_page;
854 967
855 spin_lock_irqsave(&cpu_buffer->lock, flags); 968 local_irq_save(flags);
969 __raw_spin_lock(&cpu_buffer->lock);
856 970
857 rb_inc_page(cpu_buffer, &next_page); 971 rb_inc_page(cpu_buffer, &next_page);
858 972
@@ -860,7 +974,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
860 reader_page = cpu_buffer->reader_page; 974 reader_page = cpu_buffer->reader_page;
861 975
862 /* we grabbed the lock before incrementing */ 976 /* we grabbed the lock before incrementing */
863 RB_WARN_ON(cpu_buffer, next_page == reader_page); 977 if (RB_WARN_ON(cpu_buffer, next_page == reader_page))
978 goto out_unlock;
864 979
865 /* 980 /*
866 * If for some reason, we had an interrupt storm that made 981 * If for some reason, we had an interrupt storm that made
@@ -898,12 +1013,12 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
898 */ 1013 */
899 if (tail_page == cpu_buffer->tail_page) { 1014 if (tail_page == cpu_buffer->tail_page) {
900 local_set(&next_page->write, 0); 1015 local_set(&next_page->write, 0);
901 local_set(&next_page->commit, 0); 1016 local_set(&next_page->page->commit, 0);
902 cpu_buffer->tail_page = next_page; 1017 cpu_buffer->tail_page = next_page;
903 1018
904 /* reread the time stamp */ 1019 /* reread the time stamp */
905 *ts = ring_buffer_time_stamp(cpu_buffer->cpu); 1020 *ts = ring_buffer_time_stamp(cpu_buffer->cpu);
906 cpu_buffer->tail_page->time_stamp = *ts; 1021 cpu_buffer->tail_page->page->time_stamp = *ts;
907 } 1022 }
908 1023
909 /* 1024 /*
@@ -928,7 +1043,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
928 rb_set_commit_to_write(cpu_buffer); 1043 rb_set_commit_to_write(cpu_buffer);
929 } 1044 }
930 1045
931 spin_unlock_irqrestore(&cpu_buffer->lock, flags); 1046 __raw_spin_unlock(&cpu_buffer->lock);
1047 local_irq_restore(flags);
932 1048
933 /* fail and let the caller try again */ 1049 /* fail and let the caller try again */
934 return ERR_PTR(-EAGAIN); 1050 return ERR_PTR(-EAGAIN);
@@ -936,7 +1052,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
936 1052
937 /* We reserved something on the buffer */ 1053 /* We reserved something on the buffer */
938 1054
939 BUG_ON(write > BUF_PAGE_SIZE); 1055 if (RB_WARN_ON(cpu_buffer, write > BUF_PAGE_SIZE))
1056 return NULL;
940 1057
941 event = __rb_page_index(tail_page, tail); 1058 event = __rb_page_index(tail_page, tail);
942 rb_update_event(event, type, length); 1059 rb_update_event(event, type, length);
@@ -946,12 +1063,13 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
946 * this page's time stamp. 1063 * this page's time stamp.
947 */ 1064 */
948 if (!tail && rb_is_commit(cpu_buffer, event)) 1065 if (!tail && rb_is_commit(cpu_buffer, event))
949 cpu_buffer->commit_page->time_stamp = *ts; 1066 cpu_buffer->commit_page->page->time_stamp = *ts;
950 1067
951 return event; 1068 return event;
952 1069
953 out_unlock: 1070 out_unlock:
954 spin_unlock_irqrestore(&cpu_buffer->lock, flags); 1071 __raw_spin_unlock(&cpu_buffer->lock);
1072 local_irq_restore(flags);
955 return NULL; 1073 return NULL;
956} 1074}
957 1075
@@ -996,7 +1114,7 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
996 event->time_delta = *delta & TS_MASK; 1114 event->time_delta = *delta & TS_MASK;
997 event->array[0] = *delta >> TS_SHIFT; 1115 event->array[0] = *delta >> TS_SHIFT;
998 } else { 1116 } else {
999 cpu_buffer->commit_page->time_stamp = *ts; 1117 cpu_buffer->commit_page->page->time_stamp = *ts;
1000 event->time_delta = 0; 1118 event->time_delta = 0;
1001 event->array[0] = 0; 1119 event->array[0] = 0;
1002 } 1120 }
@@ -1034,10 +1152,8 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
1034 * storm or we have something buggy. 1152 * storm or we have something buggy.
1035 * Bail! 1153 * Bail!
1036 */ 1154 */
1037 if (unlikely(++nr_loops > 1000)) { 1155 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000))
1038 RB_WARN_ON(cpu_buffer, 1);
1039 return NULL; 1156 return NULL;
1040 }
1041 1157
1042 ts = ring_buffer_time_stamp(cpu_buffer->cpu); 1158 ts = ring_buffer_time_stamp(cpu_buffer->cpu);
1043 1159
@@ -1133,12 +1249,14 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer,
1133 struct ring_buffer_event *event; 1249 struct ring_buffer_event *event;
1134 int cpu, resched; 1250 int cpu, resched;
1135 1251
1252 if (ring_buffer_flags != RB_BUFFERS_ON)
1253 return NULL;
1254
1136 if (atomic_read(&buffer->record_disabled)) 1255 if (atomic_read(&buffer->record_disabled))
1137 return NULL; 1256 return NULL;
1138 1257
1139 /* If we are tracing schedule, we don't want to recurse */ 1258 /* If we are tracing schedule, we don't want to recurse */
1140 resched = need_resched(); 1259 resched = ftrace_preempt_disable();
1141 preempt_disable_notrace();
1142 1260
1143 cpu = raw_smp_processor_id(); 1261 cpu = raw_smp_processor_id();
1144 1262
@@ -1169,10 +1287,7 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer,
1169 return event; 1287 return event;
1170 1288
1171 out: 1289 out:
1172 if (resched) 1290 ftrace_preempt_enable(resched);
1173 preempt_enable_notrace();
1174 else
1175 preempt_enable_notrace();
1176 return NULL; 1291 return NULL;
1177} 1292}
1178 1293
@@ -1214,12 +1329,9 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer,
1214 /* 1329 /*
1215 * Only the last preempt count needs to restore preemption. 1330 * Only the last preempt count needs to restore preemption.
1216 */ 1331 */
1217 if (preempt_count() == 1) { 1332 if (preempt_count() == 1)
1218 if (per_cpu(rb_need_resched, cpu)) 1333 ftrace_preempt_enable(per_cpu(rb_need_resched, cpu));
1219 preempt_enable_no_resched_notrace(); 1334 else
1220 else
1221 preempt_enable_notrace();
1222 } else
1223 preempt_enable_no_resched_notrace(); 1335 preempt_enable_no_resched_notrace();
1224 1336
1225 return 0; 1337 return 0;
@@ -1249,11 +1361,13 @@ int ring_buffer_write(struct ring_buffer *buffer,
1249 int ret = -EBUSY; 1361 int ret = -EBUSY;
1250 int cpu, resched; 1362 int cpu, resched;
1251 1363
1364 if (ring_buffer_flags != RB_BUFFERS_ON)
1365 return -EBUSY;
1366
1252 if (atomic_read(&buffer->record_disabled)) 1367 if (atomic_read(&buffer->record_disabled))
1253 return -EBUSY; 1368 return -EBUSY;
1254 1369
1255 resched = need_resched(); 1370 resched = ftrace_preempt_disable();
1256 preempt_disable_notrace();
1257 1371
1258 cpu = raw_smp_processor_id(); 1372 cpu = raw_smp_processor_id();
1259 1373
@@ -1279,10 +1393,7 @@ int ring_buffer_write(struct ring_buffer *buffer,
1279 1393
1280 ret = 0; 1394 ret = 0;
1281 out: 1395 out:
1282 if (resched) 1396 ftrace_preempt_enable(resched);
1283 preempt_enable_no_resched_notrace();
1284 else
1285 preempt_enable_notrace();
1286 1397
1287 return ret; 1398 return ret;
1288} 1399}
@@ -1441,14 +1552,7 @@ unsigned long ring_buffer_overruns(struct ring_buffer *buffer)
1441 return overruns; 1552 return overruns;
1442} 1553}
1443 1554
1444/** 1555static void rb_iter_reset(struct ring_buffer_iter *iter)
1445 * ring_buffer_iter_reset - reset an iterator
1446 * @iter: The iterator to reset
1447 *
1448 * Resets the iterator, so that it will start from the beginning
1449 * again.
1450 */
1451void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
1452{ 1556{
1453 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; 1557 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
1454 1558
@@ -1463,7 +1567,24 @@ void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
1463 if (iter->head) 1567 if (iter->head)
1464 iter->read_stamp = cpu_buffer->read_stamp; 1568 iter->read_stamp = cpu_buffer->read_stamp;
1465 else 1569 else
1466 iter->read_stamp = iter->head_page->time_stamp; 1570 iter->read_stamp = iter->head_page->page->time_stamp;
1571}
1572
1573/**
1574 * ring_buffer_iter_reset - reset an iterator
1575 * @iter: The iterator to reset
1576 *
1577 * Resets the iterator, so that it will start from the beginning
1578 * again.
1579 */
1580void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
1581{
1582 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
1583 unsigned long flags;
1584
1585 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
1586 rb_iter_reset(iter);
1587 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
1467} 1588}
1468 1589
1469/** 1590/**
@@ -1549,7 +1670,8 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
1549 unsigned long flags; 1670 unsigned long flags;
1550 int nr_loops = 0; 1671 int nr_loops = 0;
1551 1672
1552 spin_lock_irqsave(&cpu_buffer->lock, flags); 1673 local_irq_save(flags);
1674 __raw_spin_lock(&cpu_buffer->lock);
1553 1675
1554 again: 1676 again:
1555 /* 1677 /*
@@ -1558,8 +1680,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
1558 * a case where we will loop three times. There should be no 1680 * a case where we will loop three times. There should be no
1559 * reason to loop four times (that I know of). 1681 * reason to loop four times (that I know of).
1560 */ 1682 */
1561 if (unlikely(++nr_loops > 3)) { 1683 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 3)) {
1562 RB_WARN_ON(cpu_buffer, 1);
1563 reader = NULL; 1684 reader = NULL;
1564 goto out; 1685 goto out;
1565 } 1686 }
@@ -1571,8 +1692,9 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
1571 goto out; 1692 goto out;
1572 1693
1573 /* Never should we have an index greater than the size */ 1694 /* Never should we have an index greater than the size */
1574 RB_WARN_ON(cpu_buffer, 1695 if (RB_WARN_ON(cpu_buffer,
1575 cpu_buffer->reader_page->read > rb_page_size(reader)); 1696 cpu_buffer->reader_page->read > rb_page_size(reader)))
1697 goto out;
1576 1698
1577 /* check if we caught up to the tail */ 1699 /* check if we caught up to the tail */
1578 reader = NULL; 1700 reader = NULL;
@@ -1589,7 +1711,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
1589 cpu_buffer->reader_page->list.prev = reader->list.prev; 1711 cpu_buffer->reader_page->list.prev = reader->list.prev;
1590 1712
1591 local_set(&cpu_buffer->reader_page->write, 0); 1713 local_set(&cpu_buffer->reader_page->write, 0);
1592 local_set(&cpu_buffer->reader_page->commit, 0); 1714 local_set(&cpu_buffer->reader_page->page->commit, 0);
1593 1715
1594 /* Make the reader page now replace the head */ 1716 /* Make the reader page now replace the head */
1595 reader->list.prev->next = &cpu_buffer->reader_page->list; 1717 reader->list.prev->next = &cpu_buffer->reader_page->list;
@@ -1611,7 +1733,8 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
1611 goto again; 1733 goto again;
1612 1734
1613 out: 1735 out:
1614 spin_unlock_irqrestore(&cpu_buffer->lock, flags); 1736 __raw_spin_unlock(&cpu_buffer->lock);
1737 local_irq_restore(flags);
1615 1738
1616 return reader; 1739 return reader;
1617} 1740}
@@ -1625,7 +1748,8 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
1625 reader = rb_get_reader_page(cpu_buffer); 1748 reader = rb_get_reader_page(cpu_buffer);
1626 1749
1627 /* This function should not be called when buffer is empty */ 1750 /* This function should not be called when buffer is empty */
1628 BUG_ON(!reader); 1751 if (RB_WARN_ON(cpu_buffer, !reader))
1752 return;
1629 1753
1630 event = rb_reader_event(cpu_buffer); 1754 event = rb_reader_event(cpu_buffer);
1631 1755
@@ -1652,7 +1776,9 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
1652 * Check if we are at the end of the buffer. 1776 * Check if we are at the end of the buffer.
1653 */ 1777 */
1654 if (iter->head >= rb_page_size(iter->head_page)) { 1778 if (iter->head >= rb_page_size(iter->head_page)) {
1655 BUG_ON(iter->head_page == cpu_buffer->commit_page); 1779 if (RB_WARN_ON(buffer,
1780 iter->head_page == cpu_buffer->commit_page))
1781 return;
1656 rb_inc_iter(iter); 1782 rb_inc_iter(iter);
1657 return; 1783 return;
1658 } 1784 }
@@ -1665,8 +1791,10 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
1665 * This should not be called to advance the header if we are 1791 * This should not be called to advance the header if we are
1666 * at the tail of the buffer. 1792 * at the tail of the buffer.
1667 */ 1793 */
1668 BUG_ON((iter->head_page == cpu_buffer->commit_page) && 1794 if (RB_WARN_ON(cpu_buffer,
1669 (iter->head + length > rb_commit_index(cpu_buffer))); 1795 (iter->head_page == cpu_buffer->commit_page) &&
1796 (iter->head + length > rb_commit_index(cpu_buffer))))
1797 return;
1670 1798
1671 rb_update_iter_read_stamp(iter, event); 1799 rb_update_iter_read_stamp(iter, event);
1672 1800
@@ -1678,17 +1806,8 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
1678 rb_advance_iter(iter); 1806 rb_advance_iter(iter);
1679} 1807}
1680 1808
1681/** 1809static struct ring_buffer_event *
1682 * ring_buffer_peek - peek at the next event to be read 1810rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1683 * @buffer: The ring buffer to read
1684 * @cpu: The cpu to peak at
1685 * @ts: The timestamp counter of this event.
1686 *
1687 * This will return the event that will be read next, but does
1688 * not consume the data.
1689 */
1690struct ring_buffer_event *
1691ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1692{ 1811{
1693 struct ring_buffer_per_cpu *cpu_buffer; 1812 struct ring_buffer_per_cpu *cpu_buffer;
1694 struct ring_buffer_event *event; 1813 struct ring_buffer_event *event;
@@ -1709,10 +1828,8 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1709 * can have. Nesting 10 deep of interrupts is clearly 1828 * can have. Nesting 10 deep of interrupts is clearly
1710 * an anomaly. 1829 * an anomaly.
1711 */ 1830 */
1712 if (unlikely(++nr_loops > 10)) { 1831 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 10))
1713 RB_WARN_ON(cpu_buffer, 1);
1714 return NULL; 1832 return NULL;
1715 }
1716 1833
1717 reader = rb_get_reader_page(cpu_buffer); 1834 reader = rb_get_reader_page(cpu_buffer);
1718 if (!reader) 1835 if (!reader)
@@ -1750,16 +1867,8 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1750 return NULL; 1867 return NULL;
1751} 1868}
1752 1869
1753/** 1870static struct ring_buffer_event *
1754 * ring_buffer_iter_peek - peek at the next event to be read 1871rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1755 * @iter: The ring buffer iterator
1756 * @ts: The timestamp counter of this event.
1757 *
1758 * This will return the event that will be read next, but does
1759 * not increment the iterator.
1760 */
1761struct ring_buffer_event *
1762ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1763{ 1872{
1764 struct ring_buffer *buffer; 1873 struct ring_buffer *buffer;
1765 struct ring_buffer_per_cpu *cpu_buffer; 1874 struct ring_buffer_per_cpu *cpu_buffer;
@@ -1781,10 +1890,8 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1781 * can have. Nesting 10 deep of interrupts is clearly 1890 * can have. Nesting 10 deep of interrupts is clearly
1782 * an anomaly. 1891 * an anomaly.
1783 */ 1892 */
1784 if (unlikely(++nr_loops > 10)) { 1893 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 10))
1785 RB_WARN_ON(cpu_buffer, 1);
1786 return NULL; 1894 return NULL;
1787 }
1788 1895
1789 if (rb_per_cpu_empty(cpu_buffer)) 1896 if (rb_per_cpu_empty(cpu_buffer))
1790 return NULL; 1897 return NULL;
@@ -1821,6 +1928,51 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1821} 1928}
1822 1929
1823/** 1930/**
1931 * ring_buffer_peek - peek at the next event to be read
1932 * @buffer: The ring buffer to read
1933 * @cpu: The cpu to peak at
1934 * @ts: The timestamp counter of this event.
1935 *
1936 * This will return the event that will be read next, but does
1937 * not consume the data.
1938 */
1939struct ring_buffer_event *
1940ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1941{
1942 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
1943 struct ring_buffer_event *event;
1944 unsigned long flags;
1945
1946 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
1947 event = rb_buffer_peek(buffer, cpu, ts);
1948 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
1949
1950 return event;
1951}
1952
1953/**
1954 * ring_buffer_iter_peek - peek at the next event to be read
1955 * @iter: The ring buffer iterator
1956 * @ts: The timestamp counter of this event.
1957 *
1958 * This will return the event that will be read next, but does
1959 * not increment the iterator.
1960 */
1961struct ring_buffer_event *
1962ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1963{
1964 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
1965 struct ring_buffer_event *event;
1966 unsigned long flags;
1967
1968 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
1969 event = rb_iter_peek(iter, ts);
1970 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
1971
1972 return event;
1973}
1974
1975/**
1824 * ring_buffer_consume - return an event and consume it 1976 * ring_buffer_consume - return an event and consume it
1825 * @buffer: The ring buffer to get the next event from 1977 * @buffer: The ring buffer to get the next event from
1826 * 1978 *
@@ -1831,19 +1983,24 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1831struct ring_buffer_event * 1983struct ring_buffer_event *
1832ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) 1984ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
1833{ 1985{
1834 struct ring_buffer_per_cpu *cpu_buffer; 1986 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
1835 struct ring_buffer_event *event; 1987 struct ring_buffer_event *event;
1988 unsigned long flags;
1836 1989
1837 if (!cpu_isset(cpu, buffer->cpumask)) 1990 if (!cpu_isset(cpu, buffer->cpumask))
1838 return NULL; 1991 return NULL;
1839 1992
1840 event = ring_buffer_peek(buffer, cpu, ts); 1993 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
1994
1995 event = rb_buffer_peek(buffer, cpu, ts);
1841 if (!event) 1996 if (!event)
1842 return NULL; 1997 goto out;
1843 1998
1844 cpu_buffer = buffer->buffers[cpu];
1845 rb_advance_reader(cpu_buffer); 1999 rb_advance_reader(cpu_buffer);
1846 2000
2001 out:
2002 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2003
1847 return event; 2004 return event;
1848} 2005}
1849 2006
@@ -1880,9 +2037,11 @@ ring_buffer_read_start(struct ring_buffer *buffer, int cpu)
1880 atomic_inc(&cpu_buffer->record_disabled); 2037 atomic_inc(&cpu_buffer->record_disabled);
1881 synchronize_sched(); 2038 synchronize_sched();
1882 2039
1883 spin_lock_irqsave(&cpu_buffer->lock, flags); 2040 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
1884 ring_buffer_iter_reset(iter); 2041 __raw_spin_lock(&cpu_buffer->lock);
1885 spin_unlock_irqrestore(&cpu_buffer->lock, flags); 2042 rb_iter_reset(iter);
2043 __raw_spin_unlock(&cpu_buffer->lock);
2044 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
1886 2045
1887 return iter; 2046 return iter;
1888} 2047}
@@ -1914,12 +2073,17 @@ struct ring_buffer_event *
1914ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts) 2073ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts)
1915{ 2074{
1916 struct ring_buffer_event *event; 2075 struct ring_buffer_event *event;
2076 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
2077 unsigned long flags;
1917 2078
1918 event = ring_buffer_iter_peek(iter, ts); 2079 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
2080 event = rb_iter_peek(iter, ts);
1919 if (!event) 2081 if (!event)
1920 return NULL; 2082 goto out;
1921 2083
1922 rb_advance_iter(iter); 2084 rb_advance_iter(iter);
2085 out:
2086 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
1923 2087
1924 return event; 2088 return event;
1925} 2089}
@@ -1939,7 +2103,7 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
1939 cpu_buffer->head_page 2103 cpu_buffer->head_page
1940 = list_entry(cpu_buffer->pages.next, struct buffer_page, list); 2104 = list_entry(cpu_buffer->pages.next, struct buffer_page, list);
1941 local_set(&cpu_buffer->head_page->write, 0); 2105 local_set(&cpu_buffer->head_page->write, 0);
1942 local_set(&cpu_buffer->head_page->commit, 0); 2106 local_set(&cpu_buffer->head_page->page->commit, 0);
1943 2107
1944 cpu_buffer->head_page->read = 0; 2108 cpu_buffer->head_page->read = 0;
1945 2109
@@ -1948,7 +2112,7 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
1948 2112
1949 INIT_LIST_HEAD(&cpu_buffer->reader_page->list); 2113 INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
1950 local_set(&cpu_buffer->reader_page->write, 0); 2114 local_set(&cpu_buffer->reader_page->write, 0);
1951 local_set(&cpu_buffer->reader_page->commit, 0); 2115 local_set(&cpu_buffer->reader_page->page->commit, 0);
1952 cpu_buffer->reader_page->read = 0; 2116 cpu_buffer->reader_page->read = 0;
1953 2117
1954 cpu_buffer->overrun = 0; 2118 cpu_buffer->overrun = 0;
@@ -1968,11 +2132,15 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
1968 if (!cpu_isset(cpu, buffer->cpumask)) 2132 if (!cpu_isset(cpu, buffer->cpumask))
1969 return; 2133 return;
1970 2134
1971 spin_lock_irqsave(&cpu_buffer->lock, flags); 2135 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
2136
2137 __raw_spin_lock(&cpu_buffer->lock);
1972 2138
1973 rb_reset_cpu(cpu_buffer); 2139 rb_reset_cpu(cpu_buffer);
1974 2140
1975 spin_unlock_irqrestore(&cpu_buffer->lock, flags); 2141 __raw_spin_unlock(&cpu_buffer->lock);
2142
2143 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
1976} 2144}
1977 2145
1978/** 2146/**
@@ -2070,3 +2238,233 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
2070 return 0; 2238 return 0;
2071} 2239}
2072 2240
2241static void rb_remove_entries(struct ring_buffer_per_cpu *cpu_buffer,
2242 struct buffer_data_page *bpage)
2243{
2244 struct ring_buffer_event *event;
2245 unsigned long head;
2246
2247 __raw_spin_lock(&cpu_buffer->lock);
2248 for (head = 0; head < local_read(&bpage->commit);
2249 head += rb_event_length(event)) {
2250
2251 event = __rb_data_page_index(bpage, head);
2252 if (RB_WARN_ON(cpu_buffer, rb_null_event(event)))
2253 return;
2254 /* Only count data entries */
2255 if (event->type != RINGBUF_TYPE_DATA)
2256 continue;
2257 cpu_buffer->entries--;
2258 }
2259 __raw_spin_unlock(&cpu_buffer->lock);
2260}
2261
2262/**
2263 * ring_buffer_alloc_read_page - allocate a page to read from buffer
2264 * @buffer: the buffer to allocate for.
2265 *
2266 * This function is used in conjunction with ring_buffer_read_page.
2267 * When reading a full page from the ring buffer, these functions
2268 * can be used to speed up the process. The calling function should
2269 * allocate a few pages first with this function. Then when it
2270 * needs to get pages from the ring buffer, it passes the result
2271 * of this function into ring_buffer_read_page, which will swap
2272 * the page that was allocated, with the read page of the buffer.
2273 *
2274 * Returns:
2275 * The page allocated, or NULL on error.
2276 */
2277void *ring_buffer_alloc_read_page(struct ring_buffer *buffer)
2278{
2279 unsigned long addr;
2280 struct buffer_data_page *bpage;
2281
2282 addr = __get_free_page(GFP_KERNEL);
2283 if (!addr)
2284 return NULL;
2285
2286 bpage = (void *)addr;
2287
2288 return bpage;
2289}
2290
2291/**
2292 * ring_buffer_free_read_page - free an allocated read page
2293 * @buffer: the buffer the page was allocate for
2294 * @data: the page to free
2295 *
2296 * Free a page allocated from ring_buffer_alloc_read_page.
2297 */
2298void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data)
2299{
2300 free_page((unsigned long)data);
2301}
2302
2303/**
2304 * ring_buffer_read_page - extract a page from the ring buffer
2305 * @buffer: buffer to extract from
2306 * @data_page: the page to use allocated from ring_buffer_alloc_read_page
2307 * @cpu: the cpu of the buffer to extract
2308 * @full: should the extraction only happen when the page is full.
2309 *
2310 * This function will pull out a page from the ring buffer and consume it.
2311 * @data_page must be the address of the variable that was returned
2312 * from ring_buffer_alloc_read_page. This is because the page might be used
2313 * to swap with a page in the ring buffer.
2314 *
2315 * for example:
2316 * rpage = ring_buffer_alloc_page(buffer);
2317 * if (!rpage)
2318 * return error;
2319 * ret = ring_buffer_read_page(buffer, &rpage, cpu, 0);
2320 * if (ret)
2321 * process_page(rpage);
2322 *
2323 * When @full is set, the function will not return true unless
2324 * the writer is off the reader page.
2325 *
2326 * Note: it is up to the calling functions to handle sleeps and wakeups.
2327 * The ring buffer can be used anywhere in the kernel and can not
2328 * blindly call wake_up. The layer that uses the ring buffer must be
2329 * responsible for that.
2330 *
2331 * Returns:
2332 * 1 if data has been transferred
2333 * 0 if no data has been transferred.
2334 */
2335int ring_buffer_read_page(struct ring_buffer *buffer,
2336 void **data_page, int cpu, int full)
2337{
2338 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
2339 struct ring_buffer_event *event;
2340 struct buffer_data_page *bpage;
2341 unsigned long flags;
2342 int ret = 0;
2343
2344 if (!data_page)
2345 return 0;
2346
2347 bpage = *data_page;
2348 if (!bpage)
2349 return 0;
2350
2351 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
2352
2353 /*
2354 * rb_buffer_peek will get the next ring buffer if
2355 * the current reader page is empty.
2356 */
2357 event = rb_buffer_peek(buffer, cpu, NULL);
2358 if (!event)
2359 goto out;
2360
2361 /* check for data */
2362 if (!local_read(&cpu_buffer->reader_page->page->commit))
2363 goto out;
2364 /*
2365 * If the writer is already off of the read page, then simply
2366 * switch the read page with the given page. Otherwise
2367 * we need to copy the data from the reader to the writer.
2368 */
2369 if (cpu_buffer->reader_page == cpu_buffer->commit_page) {
2370 unsigned int read = cpu_buffer->reader_page->read;
2371
2372 if (full)
2373 goto out;
2374 /* The writer is still on the reader page, we must copy */
2375 bpage = cpu_buffer->reader_page->page;
2376 memcpy(bpage->data,
2377 cpu_buffer->reader_page->page->data + read,
2378 local_read(&bpage->commit) - read);
2379
2380 /* consume what was read */
2381 cpu_buffer->reader_page += read;
2382
2383 } else {
2384 /* swap the pages */
2385 rb_init_page(bpage);
2386 bpage = cpu_buffer->reader_page->page;
2387 cpu_buffer->reader_page->page = *data_page;
2388 cpu_buffer->reader_page->read = 0;
2389 *data_page = bpage;
2390 }
2391 ret = 1;
2392
2393 /* update the entry counter */
2394 rb_remove_entries(cpu_buffer, bpage);
2395 out:
2396 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2397
2398 return ret;
2399}
2400
2401static ssize_t
2402rb_simple_read(struct file *filp, char __user *ubuf,
2403 size_t cnt, loff_t *ppos)
2404{
2405 long *p = filp->private_data;
2406 char buf[64];
2407 int r;
2408
2409 if (test_bit(RB_BUFFERS_DISABLED_BIT, p))
2410 r = sprintf(buf, "permanently disabled\n");
2411 else
2412 r = sprintf(buf, "%d\n", test_bit(RB_BUFFERS_ON_BIT, p));
2413
2414 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2415}
2416
2417static ssize_t
2418rb_simple_write(struct file *filp, const char __user *ubuf,
2419 size_t cnt, loff_t *ppos)
2420{
2421 long *p = filp->private_data;
2422 char buf[64];
2423 long val;
2424 int ret;
2425
2426 if (cnt >= sizeof(buf))
2427 return -EINVAL;
2428
2429 if (copy_from_user(&buf, ubuf, cnt))
2430 return -EFAULT;
2431
2432 buf[cnt] = 0;
2433
2434 ret = strict_strtoul(buf, 10, &val);
2435 if (ret < 0)
2436 return ret;
2437
2438 if (val)
2439 set_bit(RB_BUFFERS_ON_BIT, p);
2440 else
2441 clear_bit(RB_BUFFERS_ON_BIT, p);
2442
2443 (*ppos)++;
2444
2445 return cnt;
2446}
2447
2448static struct file_operations rb_simple_fops = {
2449 .open = tracing_open_generic,
2450 .read = rb_simple_read,
2451 .write = rb_simple_write,
2452};
2453
2454
2455static __init int rb_init_debugfs(void)
2456{
2457 struct dentry *d_tracer;
2458 struct dentry *entry;
2459
2460 d_tracer = tracing_init_dentry();
2461
2462 entry = debugfs_create_file("tracing_on", 0644, d_tracer,
2463 &ring_buffer_flags, &rb_simple_fops);
2464 if (!entry)
2465 pr_warning("Could not create debugfs 'tracing_on' entry\n");
2466
2467 return 0;
2468}
2469
2470fs_initcall(rb_init_debugfs);