aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace/trace.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace/trace.c')
-rw-r--r--kernel/trace/trace.c932
1 files changed, 234 insertions, 698 deletions
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 6ada059832a6..ef80793858b8 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -33,25 +33,22 @@
33#include <linux/writeback.h> 33#include <linux/writeback.h>
34 34
35#include <linux/stacktrace.h> 35#include <linux/stacktrace.h>
36#include <linux/ring_buffer.h>
36 37
37#include "trace.h" 38#include "trace.h"
38 39
40#define TRACE_BUFFER_FLAGS (RB_FL_OVERWRITE)
41
39unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX; 42unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX;
40unsigned long __read_mostly tracing_thresh; 43unsigned long __read_mostly tracing_thresh;
41 44
42static unsigned long __read_mostly tracing_nr_buffers;
43static cpumask_t __read_mostly tracing_buffer_mask; 45static cpumask_t __read_mostly tracing_buffer_mask;
44 46
45#define for_each_tracing_cpu(cpu) \ 47#define for_each_tracing_cpu(cpu) \
46 for_each_cpu_mask(cpu, tracing_buffer_mask) 48 for_each_cpu_mask(cpu, tracing_buffer_mask)
47 49
48static int trace_alloc_page(void);
49static int trace_free_page(void);
50
51static int tracing_disabled = 1; 50static int tracing_disabled = 1;
52 51
53static unsigned long tracing_pages_allocated;
54
55long 52long
56ns2usecs(cycle_t nsec) 53ns2usecs(cycle_t nsec)
57{ 54{
@@ -62,7 +59,9 @@ ns2usecs(cycle_t nsec)
62 59
63cycle_t ftrace_now(int cpu) 60cycle_t ftrace_now(int cpu)
64{ 61{
65 return cpu_clock(cpu); 62 u64 ts = ring_buffer_time_stamp(cpu);
63 ring_buffer_normalize_time_stamp(cpu, &ts);
64 return ts;
66} 65}
67 66
68/* 67/*
@@ -102,18 +101,18 @@ static int tracer_enabled = 1;
102int ftrace_function_enabled; 101int ftrace_function_enabled;
103 102
104/* 103/*
105 * trace_nr_entries is the number of entries that is allocated 104 * trace_buf_size is the size in bytes that is allocated
106 * for a buffer. Note, the number of entries is always rounded 105 * for a buffer. Note, the number of bytes is always rounded
107 * to ENTRIES_PER_PAGE. 106 * to page size.
108 * 107 *
109 * This number is purposely set to a low number of 16384. 108 * This number is purposely set to a low number of 16384.
110 * If the dump on oops happens, it will be much appreciated 109 * If the dump on oops happens, it will be much appreciated
111 * to not have to wait for all that output. Anyway this can be 110 * to not have to wait for all that output. Anyway this can be
112 * boot time and run time configurable. 111 * boot time and run time configurable.
113 */ 112 */
114#define TRACE_ENTRIES_DEFAULT 16384UL 113#define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */
115 114
116static unsigned long trace_nr_entries = TRACE_ENTRIES_DEFAULT; 115static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
117 116
118/* trace_types holds a link list of available tracers. */ 117/* trace_types holds a link list of available tracers. */
119static struct tracer *trace_types __read_mostly; 118static struct tracer *trace_types __read_mostly;
@@ -158,23 +157,21 @@ void trace_wake_up(void)
158 wake_up(&trace_wait); 157 wake_up(&trace_wait);
159} 158}
160 159
161#define ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(struct trace_entry)) 160static int __init set_buf_size(char *str)
162
163static int __init set_nr_entries(char *str)
164{ 161{
165 unsigned long nr_entries; 162 unsigned long buf_size;
166 int ret; 163 int ret;
167 164
168 if (!str) 165 if (!str)
169 return 0; 166 return 0;
170 ret = strict_strtoul(str, 0, &nr_entries); 167 ret = strict_strtoul(str, 0, &buf_size);
171 /* nr_entries can not be zero */ 168 /* nr_entries can not be zero */
172 if (ret < 0 || nr_entries == 0) 169 if (ret < 0 || buf_size == 0)
173 return 0; 170 return 0;
174 trace_nr_entries = nr_entries; 171 trace_buf_size = buf_size;
175 return 1; 172 return 1;
176} 173}
177__setup("trace_entries=", set_nr_entries); 174__setup("trace_buf_size=", set_buf_size);
178 175
179unsigned long nsecs_to_usecs(unsigned long nsecs) 176unsigned long nsecs_to_usecs(unsigned long nsecs)
180{ 177{
@@ -243,54 +240,6 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
243 tracing_record_cmdline(current); 240 tracing_record_cmdline(current);
244} 241}
245 242
246#define CHECK_COND(cond) \
247 if (unlikely(cond)) { \
248 tracing_disabled = 1; \
249 WARN_ON(1); \
250 return -1; \
251 }
252
253/**
254 * check_pages - integrity check of trace buffers
255 *
256 * As a safty measure we check to make sure the data pages have not
257 * been corrupted.
258 */
259int check_pages(struct trace_array_cpu *data)
260{
261 struct page *page, *tmp;
262
263 CHECK_COND(data->trace_pages.next->prev != &data->trace_pages);
264 CHECK_COND(data->trace_pages.prev->next != &data->trace_pages);
265
266 list_for_each_entry_safe(page, tmp, &data->trace_pages, lru) {
267 CHECK_COND(page->lru.next->prev != &page->lru);
268 CHECK_COND(page->lru.prev->next != &page->lru);
269 }
270
271 return 0;
272}
273
274/**
275 * head_page - page address of the first page in per_cpu buffer.
276 *
277 * head_page returns the page address of the first page in
278 * a per_cpu buffer. This also preforms various consistency
279 * checks to make sure the buffer has not been corrupted.
280 */
281void *head_page(struct trace_array_cpu *data)
282{
283 struct page *page;
284
285 if (list_empty(&data->trace_pages))
286 return NULL;
287
288 page = list_entry(data->trace_pages.next, struct page, lru);
289 BUG_ON(&page->lru == &data->trace_pages);
290
291 return page_address(page);
292}
293
294/** 243/**
295 * trace_seq_printf - sequence printing of trace information 244 * trace_seq_printf - sequence printing of trace information
296 * @s: trace sequence descriptor 245 * @s: trace sequence descriptor
@@ -437,34 +386,6 @@ trace_print_seq(struct seq_file *m, struct trace_seq *s)
437 trace_seq_reset(s); 386 trace_seq_reset(s);
438} 387}
439 388
440/*
441 * flip the trace buffers between two trace descriptors.
442 * This usually is the buffers between the global_trace and
443 * the max_tr to record a snapshot of a current trace.
444 *
445 * The ftrace_max_lock must be held.
446 */
447static void
448flip_trace(struct trace_array_cpu *tr1, struct trace_array_cpu *tr2)
449{
450 struct list_head flip_pages;
451
452 INIT_LIST_HEAD(&flip_pages);
453
454 memcpy(&tr1->trace_head_idx, &tr2->trace_head_idx,
455 sizeof(struct trace_array_cpu) -
456 offsetof(struct trace_array_cpu, trace_head_idx));
457
458 check_pages(tr1);
459 check_pages(tr2);
460 list_splice_init(&tr1->trace_pages, &flip_pages);
461 list_splice_init(&tr2->trace_pages, &tr1->trace_pages);
462 list_splice_init(&flip_pages, &tr2->trace_pages);
463 BUG_ON(!list_empty(&flip_pages));
464 check_pages(tr1);
465 check_pages(tr2);
466}
467
468/** 389/**
469 * update_max_tr - snapshot all trace buffers from global_trace to max_tr 390 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
470 * @tr: tracer 391 * @tr: tracer
@@ -477,17 +398,15 @@ flip_trace(struct trace_array_cpu *tr1, struct trace_array_cpu *tr2)
477void 398void
478update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) 399update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
479{ 400{
480 struct trace_array_cpu *data; 401 struct ring_buffer *buf = tr->buffer;
481 int i;
482 402
483 WARN_ON_ONCE(!irqs_disabled()); 403 WARN_ON_ONCE(!irqs_disabled());
484 __raw_spin_lock(&ftrace_max_lock); 404 __raw_spin_lock(&ftrace_max_lock);
485 /* clear out all the previous traces */ 405
486 for_each_tracing_cpu(i) { 406 tr->buffer = max_tr.buffer;
487 data = tr->data[i]; 407 max_tr.buffer = buf;
488 flip_trace(max_tr.data[i], data); 408
489 tracing_reset(data); 409 ring_buffer_reset(tr->buffer);
490 }
491 410
492 __update_max_tr(tr, tsk, cpu); 411 __update_max_tr(tr, tsk, cpu);
493 __raw_spin_unlock(&ftrace_max_lock); 412 __raw_spin_unlock(&ftrace_max_lock);
@@ -504,16 +423,15 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
504void 423void
505update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) 424update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
506{ 425{
507 struct trace_array_cpu *data = tr->data[cpu]; 426 int ret;
508 int i;
509 427
510 WARN_ON_ONCE(!irqs_disabled()); 428 WARN_ON_ONCE(!irqs_disabled());
511 __raw_spin_lock(&ftrace_max_lock); 429 __raw_spin_lock(&ftrace_max_lock);
512 for_each_tracing_cpu(i)
513 tracing_reset(max_tr.data[i]);
514 430
515 flip_trace(max_tr.data[cpu], data); 431 ring_buffer_reset(max_tr.buffer);
516 tracing_reset(data); 432 ret = ring_buffer_swap_cpu(max_tr.buffer, tr->buffer, cpu);
433
434 WARN_ON_ONCE(ret);
517 435
518 __update_max_tr(tr, tsk, cpu); 436 __update_max_tr(tr, tsk, cpu);
519 __raw_spin_unlock(&ftrace_max_lock); 437 __raw_spin_unlock(&ftrace_max_lock);
@@ -550,7 +468,6 @@ int register_tracer(struct tracer *type)
550#ifdef CONFIG_FTRACE_STARTUP_TEST 468#ifdef CONFIG_FTRACE_STARTUP_TEST
551 if (type->selftest) { 469 if (type->selftest) {
552 struct tracer *saved_tracer = current_trace; 470 struct tracer *saved_tracer = current_trace;
553 struct trace_array_cpu *data;
554 struct trace_array *tr = &global_trace; 471 struct trace_array *tr = &global_trace;
555 int saved_ctrl = tr->ctrl; 472 int saved_ctrl = tr->ctrl;
556 int i; 473 int i;
@@ -562,10 +479,7 @@ int register_tracer(struct tracer *type)
562 * If we fail, we do not register this tracer. 479 * If we fail, we do not register this tracer.
563 */ 480 */
564 for_each_tracing_cpu(i) { 481 for_each_tracing_cpu(i) {
565 data = tr->data[i]; 482 tracing_reset(tr, i);
566 if (!head_page(data))
567 continue;
568 tracing_reset(data);
569 } 483 }
570 current_trace = type; 484 current_trace = type;
571 tr->ctrl = 0; 485 tr->ctrl = 0;
@@ -581,10 +495,7 @@ int register_tracer(struct tracer *type)
581 } 495 }
582 /* Only reset on passing, to avoid touching corrupted buffers */ 496 /* Only reset on passing, to avoid touching corrupted buffers */
583 for_each_tracing_cpu(i) { 497 for_each_tracing_cpu(i) {
584 data = tr->data[i]; 498 tracing_reset(tr, i);
585 if (!head_page(data))
586 continue;
587 tracing_reset(data);
588 } 499 }
589 printk(KERN_CONT "PASSED\n"); 500 printk(KERN_CONT "PASSED\n");
590 } 501 }
@@ -630,13 +541,9 @@ void unregister_tracer(struct tracer *type)
630 mutex_unlock(&trace_types_lock); 541 mutex_unlock(&trace_types_lock);
631} 542}
632 543
633void tracing_reset(struct trace_array_cpu *data) 544void tracing_reset(struct trace_array *tr, int cpu)
634{ 545{
635 data->trace_idx = 0; 546 ring_buffer_reset_cpu(tr->buffer, cpu);
636 data->overrun = 0;
637 data->trace_head = data->trace_tail = head_page(data);
638 data->trace_head_idx = 0;
639 data->trace_tail_idx = 0;
640} 547}
641 548
642#define SAVED_CMDLINES 128 549#define SAVED_CMDLINES 128
@@ -722,70 +629,6 @@ void tracing_record_cmdline(struct task_struct *tsk)
722 trace_save_cmdline(tsk); 629 trace_save_cmdline(tsk);
723} 630}
724 631
725static inline struct list_head *
726trace_next_list(struct trace_array_cpu *data, struct list_head *next)
727{
728 /*
729 * Roundrobin - but skip the head (which is not a real page):
730 */
731 next = next->next;
732 if (unlikely(next == &data->trace_pages))
733 next = next->next;
734 BUG_ON(next == &data->trace_pages);
735
736 return next;
737}
738
739static inline void *
740trace_next_page(struct trace_array_cpu *data, void *addr)
741{
742 struct list_head *next;
743 struct page *page;
744
745 page = virt_to_page(addr);
746
747 next = trace_next_list(data, &page->lru);
748 page = list_entry(next, struct page, lru);
749
750 return page_address(page);
751}
752
753struct trace_entry *
754tracing_get_trace_entry(struct trace_array *tr, struct trace_array_cpu *data)
755{
756 unsigned long idx, idx_next;
757 struct trace_entry *entry;
758
759 data->trace_idx++;
760 idx = data->trace_head_idx;
761 idx_next = idx + 1;
762
763 BUG_ON(idx * TRACE_ENTRY_SIZE >= PAGE_SIZE);
764
765 entry = data->trace_head + idx * TRACE_ENTRY_SIZE;
766
767 if (unlikely(idx_next >= ENTRIES_PER_PAGE)) {
768 data->trace_head = trace_next_page(data, data->trace_head);
769 idx_next = 0;
770 }
771
772 if (data->trace_head == data->trace_tail &&
773 idx_next == data->trace_tail_idx) {
774 /* overrun */
775 data->overrun++;
776 data->trace_tail_idx++;
777 if (data->trace_tail_idx >= ENTRIES_PER_PAGE) {
778 data->trace_tail =
779 trace_next_page(data, data->trace_tail);
780 data->trace_tail_idx = 0;
781 }
782 }
783
784 data->trace_head_idx = idx_next;
785
786 return entry;
787}
788
789void 632void
790tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags) 633tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags)
791{ 634{
@@ -796,7 +639,6 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags)
796 639
797 entry->field.preempt_count = pc & 0xff; 640 entry->field.preempt_count = pc & 0xff;
798 entry->field.pid = (tsk) ? tsk->pid : 0; 641 entry->field.pid = (tsk) ? tsk->pid : 0;
799 entry->field.t = ftrace_now(raw_smp_processor_id());
800 entry->field.flags = 642 entry->field.flags =
801 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) | 643 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
802 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) | 644 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
@@ -808,18 +650,20 @@ void
808trace_function(struct trace_array *tr, struct trace_array_cpu *data, 650trace_function(struct trace_array *tr, struct trace_array_cpu *data,
809 unsigned long ip, unsigned long parent_ip, unsigned long flags) 651 unsigned long ip, unsigned long parent_ip, unsigned long flags)
810{ 652{
653 struct ring_buffer_event *event;
811 struct trace_entry *entry; 654 struct trace_entry *entry;
812 unsigned long irq_flags; 655 unsigned long irq_flags;
813 656
814 raw_local_irq_save(irq_flags); 657 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
815 __raw_spin_lock(&data->lock); 658 &irq_flags);
816 entry = tracing_get_trace_entry(tr, data); 659 if (!event)
660 return;
661 entry = ring_buffer_event_data(event);
817 tracing_generic_entry_update(entry, flags); 662 tracing_generic_entry_update(entry, flags);
818 entry->type = TRACE_FN; 663 entry->type = TRACE_FN;
819 entry->field.fn.ip = ip; 664 entry->field.fn.ip = ip;
820 entry->field.fn.parent_ip = parent_ip; 665 entry->field.fn.parent_ip = parent_ip;
821 __raw_spin_unlock(&data->lock); 666 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
822 raw_local_irq_restore(irq_flags);
823} 667}
824 668
825void 669void
@@ -835,13 +679,19 @@ void __trace_stack(struct trace_array *tr,
835 unsigned long flags, 679 unsigned long flags,
836 int skip) 680 int skip)
837{ 681{
682 struct ring_buffer_event *event;
838 struct trace_entry *entry; 683 struct trace_entry *entry;
839 struct stack_trace trace; 684 struct stack_trace trace;
685 unsigned long irq_flags;
840 686
841 if (!(trace_flags & TRACE_ITER_STACKTRACE)) 687 if (!(trace_flags & TRACE_ITER_STACKTRACE))
842 return; 688 return;
843 689
844 entry = tracing_get_trace_entry(tr, data); 690 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
691 &irq_flags);
692 if (!event)
693 return;
694 entry = ring_buffer_event_data(event);
845 tracing_generic_entry_update(entry, flags); 695 tracing_generic_entry_update(entry, flags);
846 entry->type = TRACE_STACK; 696 entry->type = TRACE_STACK;
847 697
@@ -853,28 +703,31 @@ void __trace_stack(struct trace_array *tr,
853 trace.entries = entry->field.stack.caller; 703 trace.entries = entry->field.stack.caller;
854 704
855 save_stack_trace(&trace); 705 save_stack_trace(&trace);
706 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
856} 707}
857 708
858void 709void
859__trace_special(void *__tr, void *__data, 710__trace_special(void *__tr, void *__data,
860 unsigned long arg1, unsigned long arg2, unsigned long arg3) 711 unsigned long arg1, unsigned long arg2, unsigned long arg3)
861{ 712{
713 struct ring_buffer_event *event;
862 struct trace_array_cpu *data = __data; 714 struct trace_array_cpu *data = __data;
863 struct trace_array *tr = __tr; 715 struct trace_array *tr = __tr;
864 struct trace_entry *entry; 716 struct trace_entry *entry;
865 unsigned long irq_flags; 717 unsigned long irq_flags;
866 718
867 raw_local_irq_save(irq_flags); 719 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
868 __raw_spin_lock(&data->lock); 720 &irq_flags);
869 entry = tracing_get_trace_entry(tr, data); 721 if (!event)
722 return;
723 entry = ring_buffer_event_data(event);
870 tracing_generic_entry_update(entry, 0); 724 tracing_generic_entry_update(entry, 0);
871 entry->type = TRACE_SPECIAL; 725 entry->type = TRACE_SPECIAL;
872 entry->field.special.arg1 = arg1; 726 entry->field.special.arg1 = arg1;
873 entry->field.special.arg2 = arg2; 727 entry->field.special.arg2 = arg2;
874 entry->field.special.arg3 = arg3; 728 entry->field.special.arg3 = arg3;
729 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
875 __trace_stack(tr, data, irq_flags, 4); 730 __trace_stack(tr, data, irq_flags, 4);
876 __raw_spin_unlock(&data->lock);
877 raw_local_irq_restore(irq_flags);
878 731
879 trace_wake_up(); 732 trace_wake_up();
880} 733}
@@ -886,12 +739,15 @@ tracing_sched_switch_trace(struct trace_array *tr,
886 struct task_struct *next, 739 struct task_struct *next,
887 unsigned long flags) 740 unsigned long flags)
888{ 741{
742 struct ring_buffer_event *event;
889 struct trace_entry *entry; 743 struct trace_entry *entry;
890 unsigned long irq_flags; 744 unsigned long irq_flags;
891 745
892 raw_local_irq_save(irq_flags); 746 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
893 __raw_spin_lock(&data->lock); 747 &irq_flags);
894 entry = tracing_get_trace_entry(tr, data); 748 if (!event)
749 return;
750 entry = ring_buffer_event_data(event);
895 tracing_generic_entry_update(entry, flags); 751 tracing_generic_entry_update(entry, flags);
896 entry->type = TRACE_CTX; 752 entry->type = TRACE_CTX;
897 entry->field.ctx.prev_pid = prev->pid; 753 entry->field.ctx.prev_pid = prev->pid;
@@ -901,9 +757,8 @@ tracing_sched_switch_trace(struct trace_array *tr,
901 entry->field.ctx.next_prio = next->prio; 757 entry->field.ctx.next_prio = next->prio;
902 entry->field.ctx.next_state = next->state; 758 entry->field.ctx.next_state = next->state;
903 entry->field.ctx.next_cpu = task_cpu(next); 759 entry->field.ctx.next_cpu = task_cpu(next);
760 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
904 __trace_stack(tr, data, flags, 5); 761 __trace_stack(tr, data, flags, 5);
905 __raw_spin_unlock(&data->lock);
906 raw_local_irq_restore(irq_flags);
907} 762}
908 763
909void 764void
@@ -913,12 +768,15 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
913 struct task_struct *curr, 768 struct task_struct *curr,
914 unsigned long flags) 769 unsigned long flags)
915{ 770{
771 struct ring_buffer_event *event;
916 struct trace_entry *entry; 772 struct trace_entry *entry;
917 unsigned long irq_flags; 773 unsigned long irq_flags;
918 774
919 raw_local_irq_save(irq_flags); 775 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
920 __raw_spin_lock(&data->lock); 776 &irq_flags);
921 entry = tracing_get_trace_entry(tr, data); 777 if (!event)
778 return;
779 entry = ring_buffer_event_data(event);
922 tracing_generic_entry_update(entry, flags); 780 tracing_generic_entry_update(entry, flags);
923 entry->type = TRACE_WAKE; 781 entry->type = TRACE_WAKE;
924 entry->field.ctx.prev_pid = curr->pid; 782 entry->field.ctx.prev_pid = curr->pid;
@@ -928,9 +786,8 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
928 entry->field.ctx.next_prio = wakee->prio; 786 entry->field.ctx.next_prio = wakee->prio;
929 entry->field.ctx.next_state = wakee->state; 787 entry->field.ctx.next_state = wakee->state;
930 entry->field.ctx.next_cpu = task_cpu(wakee); 788 entry->field.ctx.next_cpu = task_cpu(wakee);
789 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
931 __trace_stack(tr, data, flags, 6); 790 __trace_stack(tr, data, flags, 6);
932 __raw_spin_unlock(&data->lock);
933 raw_local_irq_restore(irq_flags);
934 791
935 trace_wake_up(); 792 trace_wake_up();
936} 793}
@@ -1011,183 +868,77 @@ enum trace_file_type {
1011 TRACE_FILE_LAT_FMT = 1, 868 TRACE_FILE_LAT_FMT = 1,
1012}; 869};
1013 870
1014/* Return the current entry. */
1015static struct trace_entry *
1016trace_entry_idx(struct trace_array *tr, struct trace_array_cpu *data,
1017 struct trace_iterator *iter, int cpu)
1018{
1019 struct page *page;
1020 struct trace_entry *array;
1021
1022 if (iter->next_idx[cpu] >= tr->entries ||
1023 iter->next_idx[cpu] >= data->trace_idx ||
1024 (data->trace_head == data->trace_tail &&
1025 data->trace_head_idx == data->trace_tail_idx))
1026 return NULL;
1027
1028 if (!iter->next_page[cpu]) {
1029 /* Initialize the iterator for this cpu trace buffer */
1030 WARN_ON(!data->trace_tail);
1031 page = virt_to_page(data->trace_tail);
1032 iter->next_page[cpu] = &page->lru;
1033 iter->next_page_idx[cpu] = data->trace_tail_idx;
1034 }
1035
1036 page = list_entry(iter->next_page[cpu], struct page, lru);
1037 BUG_ON(&data->trace_pages == &page->lru);
1038
1039 array = page_address(page);
1040
1041 WARN_ON(iter->next_page_idx[cpu] >= ENTRIES_PER_PAGE);
1042 return &array[iter->next_page_idx[cpu]];
1043}
1044
1045/* Increment the index counter of an iterator by one */
1046static void __trace_iterator_increment(struct trace_iterator *iter, int cpu)
1047{
1048 iter->next_idx[cpu]++;
1049 iter->next_page_idx[cpu]++;
1050
1051 if (iter->next_page_idx[cpu] >= ENTRIES_PER_PAGE) {
1052 struct trace_array_cpu *data = iter->tr->data[cpu];
1053
1054 iter->next_page_idx[cpu] = 0;
1055 iter->next_page[cpu] =
1056 trace_next_list(data, iter->next_page[cpu]);
1057 }
1058}
1059
1060static void trace_iterator_increment(struct trace_iterator *iter, int cpu) 871static void trace_iterator_increment(struct trace_iterator *iter, int cpu)
1061{ 872{
1062 iter->idx++; 873 iter->idx++;
1063 __trace_iterator_increment(iter, cpu); 874 ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
1064} 875}
1065 876
1066static struct trace_entry * 877static struct trace_entry *
1067trace_entry_next(struct trace_array *tr, struct trace_array_cpu *data, 878peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts)
1068 struct trace_iterator *iter, int cpu)
1069{ 879{
1070 struct list_head *next_page; 880 struct ring_buffer_event *event;
1071 struct trace_entry *ent; 881 struct ring_buffer_iter *buf_iter = iter->buffer_iter[cpu];
1072 int idx, next_idx, next_page_idx;
1073
1074 ent = trace_entry_idx(tr, tr->data[cpu], iter, cpu);
1075
1076 if (likely(!ent || ent->type != TRACE_CONT))
1077 return ent;
1078
1079 /* save the iterator details */
1080 idx = iter->idx;
1081 next_idx = iter->next_idx[cpu];
1082 next_page_idx = iter->next_page_idx[cpu];
1083 next_page = iter->next_page[cpu];
1084
1085 /* find a real entry */
1086 do {
1087 __trace_iterator_increment(iter, cpu);
1088 ent = trace_entry_idx(tr, tr->data[cpu], iter, cpu);
1089 } while (ent && ent->type != TRACE_CONT);
1090
1091 /* reset the iterator */
1092 iter->idx = idx;
1093 iter->next_idx[cpu] = next_idx;
1094 iter->next_page_idx[cpu] = next_page_idx;
1095 iter->next_page[cpu] = next_page;
1096 882
1097 return ent; 883 event = ring_buffer_iter_peek(buf_iter, ts);
884 return event ? ring_buffer_event_data(event) : NULL;
1098} 885}
1099
1100static struct trace_entry * 886static struct trace_entry *
1101__find_next_entry(struct trace_iterator *iter, int *ent_cpu, int inc) 887__find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
1102{ 888{
1103 struct trace_array *tr = iter->tr; 889 struct ring_buffer *buffer = iter->tr->buffer;
1104 struct trace_entry *ent, *next = NULL; 890 struct trace_entry *ent, *next = NULL;
891 u64 next_ts = 0, ts;
1105 int next_cpu = -1; 892 int next_cpu = -1;
1106 int cpu; 893 int cpu;
1107 894
1108 for_each_tracing_cpu(cpu) { 895 for_each_tracing_cpu(cpu) {
1109 if (!head_page(tr->data[cpu]))
1110 continue;
1111 896
1112 ent = trace_entry_idx(tr, tr->data[cpu], iter, cpu); 897 if (ring_buffer_empty_cpu(buffer, cpu))
898 continue;
1113 899
1114 if (ent && ent->type == TRACE_CONT) { 900 ent = peek_next_entry(iter, cpu, &ts);
1115 struct trace_array_cpu *data = tr->data[cpu];
1116
1117 if (!inc)
1118 ent = trace_entry_next(tr, data, iter, cpu);
1119 else {
1120 while (ent && ent->type == TRACE_CONT) {
1121 __trace_iterator_increment(iter, cpu);
1122 ent = trace_entry_idx(tr, tr->data[cpu],
1123 iter, cpu);
1124 }
1125 }
1126 }
1127 901
1128 /* 902 /*
1129 * Pick the entry with the smallest timestamp: 903 * Pick the entry with the smallest timestamp:
1130 */ 904 */
1131 if (ent && (!next || ent->field.t < next->field.t)) { 905 if (ent && (!next || ts < next_ts)) {
1132 next = ent; 906 next = ent;
1133 next_cpu = cpu; 907 next_cpu = cpu;
908 next_ts = ts;
1134 } 909 }
1135 } 910 }
1136 911
1137 if (ent_cpu) 912 if (ent_cpu)
1138 *ent_cpu = next_cpu; 913 *ent_cpu = next_cpu;
1139 914
915 if (ent_ts)
916 *ent_ts = next_ts;
917
1140 return next; 918 return next;
1141} 919}
1142 920
1143/* Find the next real entry, without updating the iterator itself */ 921/* Find the next real entry, without updating the iterator itself */
1144static struct trace_entry * 922static struct trace_entry *
1145find_next_entry(struct trace_iterator *iter, int *ent_cpu) 923find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
1146{ 924{
1147 return __find_next_entry(iter, ent_cpu, 0); 925 return __find_next_entry(iter, ent_cpu, ent_ts);
1148} 926}
1149 927
1150/* Find the next real entry, and increment the iterator to the next entry */ 928/* Find the next real entry, and increment the iterator to the next entry */
1151static void *find_next_entry_inc(struct trace_iterator *iter) 929static void *find_next_entry_inc(struct trace_iterator *iter)
1152{ 930{
1153 struct trace_entry *next; 931 iter->ent = __find_next_entry(iter, &iter->cpu, &iter->ts);
1154 int next_cpu = -1;
1155
1156 next = __find_next_entry(iter, &next_cpu, 1);
1157
1158 iter->prev_ent = iter->ent;
1159 iter->prev_cpu = iter->cpu;
1160 932
1161 iter->ent = next; 933 if (iter->ent)
1162 iter->cpu = next_cpu;
1163
1164 if (next)
1165 trace_iterator_increment(iter, iter->cpu); 934 trace_iterator_increment(iter, iter->cpu);
1166 935
1167 return next ? iter : NULL; 936 return iter->ent ? iter : NULL;
1168} 937}
1169 938
1170static void trace_consume(struct trace_iterator *iter) 939static void trace_consume(struct trace_iterator *iter)
1171{ 940{
1172 struct trace_array_cpu *data = iter->tr->data[iter->cpu]; 941 ring_buffer_consume(iter->tr->buffer, iter->cpu, &iter->ts);
1173 struct trace_entry *ent;
1174
1175 again:
1176 data->trace_tail_idx++;
1177 if (data->trace_tail_idx >= ENTRIES_PER_PAGE) {
1178 data->trace_tail = trace_next_page(data, data->trace_tail);
1179 data->trace_tail_idx = 0;
1180 }
1181
1182 /* Check if we empty it, then reset the index */
1183 if (data->trace_head == data->trace_tail &&
1184 data->trace_head_idx == data->trace_tail_idx)
1185 data->trace_idx = 0;
1186
1187 ent = trace_entry_idx(iter->tr, iter->tr->data[iter->cpu],
1188 iter, iter->cpu);
1189 if (ent && ent->type == TRACE_CONT)
1190 goto again;
1191} 942}
1192 943
1193static void *s_next(struct seq_file *m, void *v, loff_t *pos) 944static void *s_next(struct seq_file *m, void *v, loff_t *pos)
@@ -1220,7 +971,7 @@ static void *s_start(struct seq_file *m, loff_t *pos)
1220 struct trace_iterator *iter = m->private; 971 struct trace_iterator *iter = m->private;
1221 void *p = NULL; 972 void *p = NULL;
1222 loff_t l = 0; 973 loff_t l = 0;
1223 int i; 974 int cpu;
1224 975
1225 mutex_lock(&trace_types_lock); 976 mutex_lock(&trace_types_lock);
1226 977
@@ -1239,12 +990,9 @@ static void *s_start(struct seq_file *m, loff_t *pos)
1239 iter->ent = NULL; 990 iter->ent = NULL;
1240 iter->cpu = 0; 991 iter->cpu = 0;
1241 iter->idx = -1; 992 iter->idx = -1;
1242 iter->prev_ent = NULL;
1243 iter->prev_cpu = -1;
1244 993
1245 for_each_tracing_cpu(i) { 994 for_each_tracing_cpu(cpu) {
1246 iter->next_idx[i] = 0; 995 ring_buffer_iter_reset(iter->buffer_iter[cpu]);
1247 iter->next_page[i] = NULL;
1248 } 996 }
1249 997
1250 for (p = iter; p && l < *pos; p = s_next(m, p, &l)) 998 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
@@ -1365,23 +1113,16 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter)
1365 struct trace_array *tr = iter->tr; 1113 struct trace_array *tr = iter->tr;
1366 struct trace_array_cpu *data = tr->data[tr->cpu]; 1114 struct trace_array_cpu *data = tr->data[tr->cpu];
1367 struct tracer *type = current_trace; 1115 struct tracer *type = current_trace;
1368 unsigned long total = 0; 1116 unsigned long total;
1369 unsigned long entries = 0; 1117 unsigned long entries;
1370 int cpu;
1371 const char *name = "preemption"; 1118 const char *name = "preemption";
1372 1119
1373 if (type) 1120 if (type)
1374 name = type->name; 1121 name = type->name;
1375 1122
1376 for_each_tracing_cpu(cpu) { 1123 entries = ring_buffer_entries(iter->tr->buffer);
1377 if (head_page(tr->data[cpu])) { 1124 total = entries +
1378 total += tr->data[cpu]->trace_idx; 1125 ring_buffer_overruns(iter->tr->buffer);
1379 if (tr->data[cpu]->trace_idx > tr->entries)
1380 entries += tr->entries;
1381 else
1382 entries += tr->data[cpu]->trace_idx;
1383 }
1384 }
1385 1126
1386 seq_printf(m, "%s latency trace v1.1.5 on %s\n", 1127 seq_printf(m, "%s latency trace v1.1.5 on %s\n",
1387 name, UTS_RELEASE); 1128 name, UTS_RELEASE);
@@ -1468,7 +1209,7 @@ lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
1468unsigned long preempt_mark_thresh = 100; 1209unsigned long preempt_mark_thresh = 100;
1469 1210
1470static void 1211static void
1471lat_print_timestamp(struct trace_seq *s, unsigned long long abs_usecs, 1212lat_print_timestamp(struct trace_seq *s, u64 abs_usecs,
1472 unsigned long rel_usecs) 1213 unsigned long rel_usecs)
1473{ 1214{
1474 trace_seq_printf(s, " %4lldus", abs_usecs); 1215 trace_seq_printf(s, " %4lldus", abs_usecs);
@@ -1488,12 +1229,10 @@ static const char state_to_char[] = TASK_STATE_TO_CHAR_STR;
1488 */ 1229 */
1489void trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter) 1230void trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter)
1490{ 1231{
1491 struct trace_array *tr = iter->tr;
1492 struct trace_array_cpu *data = tr->data[iter->cpu];
1493 struct trace_entry *ent; 1232 struct trace_entry *ent;
1494 bool ok = true; 1233 bool ok = true;
1495 1234
1496 ent = trace_entry_idx(tr, data, iter, iter->cpu); 1235 ent = peek_next_entry(iter, iter->cpu, NULL);
1497 if (!ent || ent->type != TRACE_CONT) { 1236 if (!ent || ent->type != TRACE_CONT) {
1498 trace_seq_putc(s, '\n'); 1237 trace_seq_putc(s, '\n');
1499 return; 1238 return;
@@ -1502,8 +1241,8 @@ void trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter)
1502 do { 1241 do {
1503 if (ok) 1242 if (ok)
1504 ok = (trace_seq_printf(s, "%s", ent->cont.buf) > 0); 1243 ok = (trace_seq_printf(s, "%s", ent->cont.buf) > 0);
1505 __trace_iterator_increment(iter, iter->cpu); 1244 ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
1506 ent = trace_entry_idx(tr, data, iter, iter->cpu); 1245 ent = peek_next_entry(iter, iter->cpu, NULL);
1507 } while (ent && ent->type == TRACE_CONT); 1246 } while (ent && ent->type == TRACE_CONT);
1508 1247
1509 if (!ok) 1248 if (!ok)
@@ -1515,25 +1254,26 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
1515{ 1254{
1516 struct trace_seq *s = &iter->seq; 1255 struct trace_seq *s = &iter->seq;
1517 unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); 1256 unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
1518 struct trace_entry *next_entry = find_next_entry(iter, NULL); 1257 struct trace_entry *next_entry;
1519 unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE); 1258 unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE);
1520 struct trace_entry *entry = iter->ent; 1259 struct trace_entry *entry = iter->ent;
1521 struct trace_field *field = &entry->field; 1260 struct trace_field *field = &entry->field;
1522 unsigned long abs_usecs; 1261 unsigned long abs_usecs;
1523 unsigned long rel_usecs; 1262 unsigned long rel_usecs;
1263 u64 next_ts;
1524 char *comm; 1264 char *comm;
1525 int S, T; 1265 int S, T;
1526 int i; 1266 int i;
1527 unsigned state; 1267 unsigned state;
1528 1268
1529 if (!next_entry)
1530 next_entry = entry;
1531
1532 if (entry->type == TRACE_CONT) 1269 if (entry->type == TRACE_CONT)
1533 return 1; 1270 return 1;
1534 1271
1535 rel_usecs = ns2usecs(next_entry->field.t - entry->field.t); 1272 next_entry = find_next_entry(iter, NULL, &next_ts);
1536 abs_usecs = ns2usecs(entry->field.t - iter->tr->time_start); 1273 if (!next_entry)
1274 next_ts = iter->ts;
1275 rel_usecs = ns2usecs(next_ts - iter->ts);
1276 abs_usecs = ns2usecs(iter->ts - iter->tr->time_start);
1537 1277
1538 if (verbose) { 1278 if (verbose) {
1539 comm = trace_find_cmdline(field->pid); 1279 comm = trace_find_cmdline(field->pid);
@@ -1542,7 +1282,7 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
1542 comm, 1282 comm,
1543 field->pid, cpu, field->flags, 1283 field->pid, cpu, field->flags,
1544 field->preempt_count, trace_idx, 1284 field->preempt_count, trace_idx,
1545 ns2usecs(field->t), 1285 ns2usecs(iter->ts),
1546 abs_usecs/1000, 1286 abs_usecs/1000,
1547 abs_usecs % 1000, rel_usecs/1000, 1287 abs_usecs % 1000, rel_usecs/1000,
1548 rel_usecs % 1000); 1288 rel_usecs % 1000);
@@ -1627,7 +1367,7 @@ static int print_trace_fmt(struct trace_iterator *iter)
1627 1367
1628 comm = trace_find_cmdline(iter->ent->field.pid); 1368 comm = trace_find_cmdline(iter->ent->field.pid);
1629 1369
1630 t = ns2usecs(field->t); 1370 t = ns2usecs(iter->ts);
1631 usec_rem = do_div(t, 1000000ULL); 1371 usec_rem = do_div(t, 1000000ULL);
1632 secs = (unsigned long)t; 1372 secs = (unsigned long)t;
1633 1373
@@ -1732,7 +1472,7 @@ static int print_raw_fmt(struct trace_iterator *iter)
1732 field = &entry->field; 1472 field = &entry->field;
1733 1473
1734 ret = trace_seq_printf(s, "%d %d %llu ", 1474 ret = trace_seq_printf(s, "%d %d %llu ",
1735 field->pid, iter->cpu, field->t); 1475 field->pid, iter->cpu, iter->ts);
1736 if (!ret) 1476 if (!ret)
1737 return 0; 1477 return 0;
1738 1478
@@ -1811,7 +1551,7 @@ static int print_hex_fmt(struct trace_iterator *iter)
1811 1551
1812 SEQ_PUT_HEX_FIELD_RET(s, field->pid); 1552 SEQ_PUT_HEX_FIELD_RET(s, field->pid);
1813 SEQ_PUT_HEX_FIELD_RET(s, iter->cpu); 1553 SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
1814 SEQ_PUT_HEX_FIELD_RET(s, field->t); 1554 SEQ_PUT_HEX_FIELD_RET(s, iter->ts);
1815 1555
1816 switch (entry->type) { 1556 switch (entry->type) {
1817 case TRACE_FN: 1557 case TRACE_FN:
@@ -1861,7 +1601,7 @@ static int print_bin_fmt(struct trace_iterator *iter)
1861 1601
1862 SEQ_PUT_FIELD_RET(s, field->pid); 1602 SEQ_PUT_FIELD_RET(s, field->pid);
1863 SEQ_PUT_FIELD_RET(s, field->cpu); 1603 SEQ_PUT_FIELD_RET(s, field->cpu);
1864 SEQ_PUT_FIELD_RET(s, field->t); 1604 SEQ_PUT_FIELD_RET(s, iter->ts);
1865 1605
1866 switch (entry->type) { 1606 switch (entry->type) {
1867 case TRACE_FN: 1607 case TRACE_FN:
@@ -1888,15 +1628,10 @@ static int print_bin_fmt(struct trace_iterator *iter)
1888 1628
1889static int trace_empty(struct trace_iterator *iter) 1629static int trace_empty(struct trace_iterator *iter)
1890{ 1630{
1891 struct trace_array_cpu *data;
1892 int cpu; 1631 int cpu;
1893 1632
1894 for_each_tracing_cpu(cpu) { 1633 for_each_tracing_cpu(cpu) {
1895 data = iter->tr->data[cpu]; 1634 if (!ring_buffer_iter_empty(iter->buffer_iter[cpu]))
1896
1897 if (head_page(data) && data->trace_idx &&
1898 (data->trace_tail != data->trace_head ||
1899 data->trace_tail_idx != data->trace_head_idx))
1900 return 0; 1635 return 0;
1901 } 1636 }
1902 return 1; 1637 return 1;
@@ -1961,6 +1696,8 @@ static struct trace_iterator *
1961__tracing_open(struct inode *inode, struct file *file, int *ret) 1696__tracing_open(struct inode *inode, struct file *file, int *ret)
1962{ 1697{
1963 struct trace_iterator *iter; 1698 struct trace_iterator *iter;
1699 struct seq_file *m;
1700 int cpu;
1964 1701
1965 if (tracing_disabled) { 1702 if (tracing_disabled) {
1966 *ret = -ENODEV; 1703 *ret = -ENODEV;
@@ -1981,28 +1718,43 @@ __tracing_open(struct inode *inode, struct file *file, int *ret)
1981 iter->trace = current_trace; 1718 iter->trace = current_trace;
1982 iter->pos = -1; 1719 iter->pos = -1;
1983 1720
1721 for_each_tracing_cpu(cpu) {
1722 iter->buffer_iter[cpu] =
1723 ring_buffer_read_start(iter->tr->buffer, cpu);
1724 if (!iter->buffer_iter[cpu])
1725 goto fail_buffer;
1726 }
1727
1984 /* TODO stop tracer */ 1728 /* TODO stop tracer */
1985 *ret = seq_open(file, &tracer_seq_ops); 1729 *ret = seq_open(file, &tracer_seq_ops);
1986 if (!*ret) { 1730 if (*ret)
1987 struct seq_file *m = file->private_data; 1731 goto fail_buffer;
1988 m->private = iter;
1989 1732
1990 /* stop the trace while dumping */ 1733 m = file->private_data;
1991 if (iter->tr->ctrl) { 1734 m->private = iter;
1992 tracer_enabled = 0;
1993 ftrace_function_enabled = 0;
1994 }
1995 1735
1996 if (iter->trace && iter->trace->open) 1736 /* stop the trace while dumping */
1997 iter->trace->open(iter); 1737 if (iter->tr->ctrl) {
1998 } else { 1738 tracer_enabled = 0;
1999 kfree(iter); 1739 ftrace_function_enabled = 0;
2000 iter = NULL;
2001 } 1740 }
1741
1742 if (iter->trace && iter->trace->open)
1743 iter->trace->open(iter);
1744
2002 mutex_unlock(&trace_types_lock); 1745 mutex_unlock(&trace_types_lock);
2003 1746
2004 out: 1747 out:
2005 return iter; 1748 return iter;
1749
1750 fail_buffer:
1751 for_each_tracing_cpu(cpu) {
1752 if (iter->buffer_iter[cpu])
1753 ring_buffer_read_finish(iter->buffer_iter[cpu]);
1754 }
1755 mutex_unlock(&trace_types_lock);
1756
1757 return ERR_PTR(-ENOMEM);
2006} 1758}
2007 1759
2008int tracing_open_generic(struct inode *inode, struct file *filp) 1760int tracing_open_generic(struct inode *inode, struct file *filp)
@@ -2018,8 +1770,14 @@ int tracing_release(struct inode *inode, struct file *file)
2018{ 1770{
2019 struct seq_file *m = (struct seq_file *)file->private_data; 1771 struct seq_file *m = (struct seq_file *)file->private_data;
2020 struct trace_iterator *iter = m->private; 1772 struct trace_iterator *iter = m->private;
1773 int cpu;
2021 1774
2022 mutex_lock(&trace_types_lock); 1775 mutex_lock(&trace_types_lock);
1776 for_each_tracing_cpu(cpu) {
1777 if (iter->buffer_iter[cpu])
1778 ring_buffer_read_finish(iter->buffer_iter[cpu]);
1779 }
1780
2023 if (iter->trace && iter->trace->close) 1781 if (iter->trace && iter->trace->close)
2024 iter->trace->close(iter); 1782 iter->trace->close(iter);
2025 1783
@@ -2526,6 +2284,7 @@ static atomic_t tracing_reader;
2526static int tracing_open_pipe(struct inode *inode, struct file *filp) 2284static int tracing_open_pipe(struct inode *inode, struct file *filp)
2527{ 2285{
2528 struct trace_iterator *iter; 2286 struct trace_iterator *iter;
2287 int cpu;
2529 2288
2530 if (tracing_disabled) 2289 if (tracing_disabled)
2531 return -ENODEV; 2290 return -ENODEV;
@@ -2546,17 +2305,38 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
2546 iter->trace = current_trace; 2305 iter->trace = current_trace;
2547 filp->private_data = iter; 2306 filp->private_data = iter;
2548 2307
2308 for_each_tracing_cpu(cpu) {
2309 iter->buffer_iter[cpu] =
2310 ring_buffer_read_start(iter->tr->buffer, cpu);
2311 if (!iter->buffer_iter[cpu])
2312 goto fail_buffer;
2313 }
2314
2549 if (iter->trace->pipe_open) 2315 if (iter->trace->pipe_open)
2550 iter->trace->pipe_open(iter); 2316 iter->trace->pipe_open(iter);
2551 mutex_unlock(&trace_types_lock); 2317 mutex_unlock(&trace_types_lock);
2552 2318
2553 return 0; 2319 return 0;
2320
2321 fail_buffer:
2322 for_each_tracing_cpu(cpu) {
2323 if (iter->buffer_iter[cpu])
2324 ring_buffer_read_finish(iter->buffer_iter[cpu]);
2325 }
2326 mutex_unlock(&trace_types_lock);
2327
2328 return -ENOMEM;
2554} 2329}
2555 2330
2556static int tracing_release_pipe(struct inode *inode, struct file *file) 2331static int tracing_release_pipe(struct inode *inode, struct file *file)
2557{ 2332{
2558 struct trace_iterator *iter = file->private_data; 2333 struct trace_iterator *iter = file->private_data;
2334 int cpu;
2559 2335
2336 for_each_tracing_cpu(cpu) {
2337 if (iter->buffer_iter[cpu])
2338 ring_buffer_read_finish(iter->buffer_iter[cpu]);
2339 }
2560 kfree(iter); 2340 kfree(iter);
2561 atomic_dec(&tracing_reader); 2341 atomic_dec(&tracing_reader);
2562 2342
@@ -2592,13 +2372,10 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
2592 size_t cnt, loff_t *ppos) 2372 size_t cnt, loff_t *ppos)
2593{ 2373{
2594 struct trace_iterator *iter = filp->private_data; 2374 struct trace_iterator *iter = filp->private_data;
2595 struct trace_array_cpu *data;
2596 static cpumask_t mask;
2597 unsigned long flags; 2375 unsigned long flags;
2598#ifdef CONFIG_FTRACE 2376#ifdef CONFIG_FTRACE
2599 int ftrace_save; 2377 int ftrace_save;
2600#endif 2378#endif
2601 int cpu;
2602 ssize_t sret; 2379 ssize_t sret;
2603 2380
2604 /* return any leftover data */ 2381 /* return any leftover data */
@@ -2687,32 +2464,13 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
2687 * and then release the locks again. 2464 * and then release the locks again.
2688 */ 2465 */
2689 2466
2690 cpus_clear(mask); 2467 local_irq_disable();
2691 local_irq_save(flags);
2692#ifdef CONFIG_FTRACE 2468#ifdef CONFIG_FTRACE
2693 ftrace_save = ftrace_enabled; 2469 ftrace_save = ftrace_enabled;
2694 ftrace_enabled = 0; 2470 ftrace_enabled = 0;
2695#endif 2471#endif
2696 smp_wmb(); 2472 smp_wmb();
2697 for_each_tracing_cpu(cpu) { 2473 ring_buffer_lock(iter->tr->buffer, &flags);
2698 data = iter->tr->data[cpu];
2699
2700 if (!head_page(data) || !data->trace_idx)
2701 continue;
2702
2703 atomic_inc(&data->disabled);
2704 cpu_set(cpu, mask);
2705 }
2706
2707 for_each_cpu_mask(cpu, mask) {
2708 data = iter->tr->data[cpu];
2709 __raw_spin_lock(&data->lock);
2710
2711 if (data->overrun > iter->last_overrun[cpu])
2712 iter->overrun[cpu] +=
2713 data->overrun - iter->last_overrun[cpu];
2714 iter->last_overrun[cpu] = data->overrun;
2715 }
2716 2474
2717 while (find_next_entry_inc(iter) != NULL) { 2475 while (find_next_entry_inc(iter) != NULL) {
2718 int ret; 2476 int ret;
@@ -2731,19 +2489,11 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
2731 break; 2489 break;
2732 } 2490 }
2733 2491
2734 for_each_cpu_mask(cpu, mask) { 2492 ring_buffer_unlock(iter->tr->buffer, flags);
2735 data = iter->tr->data[cpu];
2736 __raw_spin_unlock(&data->lock);
2737 }
2738
2739 for_each_cpu_mask(cpu, mask) {
2740 data = iter->tr->data[cpu];
2741 atomic_dec(&data->disabled);
2742 }
2743#ifdef CONFIG_FTRACE 2493#ifdef CONFIG_FTRACE
2744 ftrace_enabled = ftrace_save; 2494 ftrace_enabled = ftrace_save;
2745#endif 2495#endif
2746 local_irq_restore(flags); 2496 local_irq_enable();
2747 2497
2748 /* Now copy what we have to the user */ 2498 /* Now copy what we have to the user */
2749 sret = trace_seq_to_user(&iter->seq, ubuf, cnt); 2499 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
@@ -2776,7 +2526,7 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
2776{ 2526{
2777 unsigned long val; 2527 unsigned long val;
2778 char buf[64]; 2528 char buf[64];
2779 int i, ret; 2529 int ret;
2780 struct trace_array *tr = filp->private_data; 2530 struct trace_array *tr = filp->private_data;
2781 2531
2782 if (cnt >= sizeof(buf)) 2532 if (cnt >= sizeof(buf))
@@ -2804,52 +2554,31 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
2804 goto out; 2554 goto out;
2805 } 2555 }
2806 2556
2807 if (val > global_trace.entries) { 2557 if (val != global_trace.entries) {
2808 long pages_requested; 2558 ret = ring_buffer_resize(global_trace.buffer, val);
2809 unsigned long freeable_pages; 2559 if (ret < 0) {
2810 2560 cnt = ret;
2811 /* make sure we have enough memory before mapping */
2812 pages_requested =
2813 (val + (ENTRIES_PER_PAGE-1)) / ENTRIES_PER_PAGE;
2814
2815 /* account for each buffer (and max_tr) */
2816 pages_requested *= tracing_nr_buffers * 2;
2817
2818 /* Check for overflow */
2819 if (pages_requested < 0) {
2820 cnt = -ENOMEM;
2821 goto out;
2822 }
2823
2824 freeable_pages = determine_dirtyable_memory();
2825
2826 /* we only allow to request 1/4 of useable memory */
2827 if (pages_requested >
2828 ((freeable_pages + tracing_pages_allocated) / 4)) {
2829 cnt = -ENOMEM;
2830 goto out; 2561 goto out;
2831 } 2562 }
2832 2563
2833 while (global_trace.entries < val) { 2564 ret = ring_buffer_resize(max_tr.buffer, val);
2834 if (trace_alloc_page()) { 2565 if (ret < 0) {
2835 cnt = -ENOMEM; 2566 int r;
2836 goto out; 2567 cnt = ret;
2568 r = ring_buffer_resize(global_trace.buffer,
2569 global_trace.entries);
2570 if (r < 0) {
2571 /* AARGH! We are left with different
2572 * size max buffer!!!! */
2573 WARN_ON(1);
2574 tracing_disabled = 1;
2837 } 2575 }
2838 /* double check that we don't go over the known pages */ 2576 goto out;
2839 if (tracing_pages_allocated > pages_requested)
2840 break;
2841 } 2577 }
2842 2578
2843 } else { 2579 global_trace.entries = val;
2844 /* include the number of entries in val (inc of page entries) */
2845 while (global_trace.entries > val + (ENTRIES_PER_PAGE - 1))
2846 trace_free_page();
2847 } 2580 }
2848 2581
2849 /* check integrity */
2850 for_each_tracing_cpu(i)
2851 check_pages(global_trace.data[i]);
2852
2853 filp->f_pos += cnt; 2582 filp->f_pos += cnt;
2854 2583
2855 /* If check pages failed, return ENOMEM */ 2584 /* If check pages failed, return ENOMEM */
@@ -3086,10 +2815,11 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3086 static DEFINE_SPINLOCK(trace_buf_lock); 2815 static DEFINE_SPINLOCK(trace_buf_lock);
3087 static char trace_buf[TRACE_BUF_SIZE]; 2816 static char trace_buf[TRACE_BUF_SIZE];
3088 2817
2818 struct ring_buffer_event *event;
3089 struct trace_array *tr = &global_trace; 2819 struct trace_array *tr = &global_trace;
3090 struct trace_array_cpu *data; 2820 struct trace_array_cpu *data;
3091 struct trace_entry *entry; 2821 struct trace_entry *entry;
3092 unsigned long flags; 2822 unsigned long flags, irq_flags;
3093 long disabled; 2823 long disabled;
3094 int cpu, len = 0, write, written = 0; 2824 int cpu, len = 0, write, written = 0;
3095 2825
@@ -3110,8 +2840,11 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3110 len = min(len, TRACE_BUF_SIZE-1); 2840 len = min(len, TRACE_BUF_SIZE-1);
3111 trace_buf[len] = 0; 2841 trace_buf[len] = 0;
3112 2842
3113 __raw_spin_lock(&data->lock); 2843 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
3114 entry = tracing_get_trace_entry(tr, data); 2844 &irq_flags);
2845 if (!event)
2846 goto out_unlock;
2847 entry = ring_buffer_event_data(event);
3115 tracing_generic_entry_update(entry, flags); 2848 tracing_generic_entry_update(entry, flags);
3116 entry->type = TRACE_PRINT; 2849 entry->type = TRACE_PRINT;
3117 entry->field.print.ip = ip; 2850 entry->field.print.ip = ip;
@@ -3121,21 +2854,27 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3121 memcpy(&entry->field.print.buf, trace_buf, write); 2854 memcpy(&entry->field.print.buf, trace_buf, write);
3122 entry->field.print.buf[write] = 0; 2855 entry->field.print.buf[write] = 0;
3123 written = write; 2856 written = write;
2857 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
3124 2858
3125 if (written != len) 2859 if (written != len)
3126 entry->field.flags |= TRACE_FLAG_CONT; 2860 entry->field.flags |= TRACE_FLAG_CONT;
3127 2861
3128 while (written != len) { 2862 while (written != len) {
3129 entry = tracing_get_trace_entry(tr, data); 2863 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
2864 &irq_flags);
2865 if (!event)
2866 goto out_unlock;
2867 entry = ring_buffer_event_data(event);
3130 2868
3131 entry->type = TRACE_CONT; 2869 entry->type = TRACE_CONT;
3132 write = min(len - written, (int)(TRACE_CONT_BUF_SIZE-1)); 2870 write = min(len - written, (int)(TRACE_CONT_BUF_SIZE-1));
3133 memcpy(&entry->cont.buf, trace_buf+written, write); 2871 memcpy(&entry->cont.buf, trace_buf+written, write);
3134 entry->cont.buf[write] = 0; 2872 entry->cont.buf[write] = 0;
3135 written += write; 2873 written += write;
2874 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
3136 } 2875 }
3137 __raw_spin_unlock(&data->lock);
3138 2876
2877 out_unlock:
3139 spin_unlock(&trace_buf_lock); 2878 spin_unlock(&trace_buf_lock);
3140 2879
3141 out: 2880 out:
@@ -3227,12 +2966,10 @@ void ftrace_dump(void)
3227 static DEFINE_SPINLOCK(ftrace_dump_lock); 2966 static DEFINE_SPINLOCK(ftrace_dump_lock);
3228 /* use static because iter can be a bit big for the stack */ 2967 /* use static because iter can be a bit big for the stack */
3229 static struct trace_iterator iter; 2968 static struct trace_iterator iter;
3230 struct trace_array_cpu *data;
3231 static cpumask_t mask; 2969 static cpumask_t mask;
3232 static int dump_ran; 2970 static int dump_ran;
3233 unsigned long flags; 2971 unsigned long flags, irq_flags;
3234 int cnt = 0; 2972 int cnt = 0;
3235 int cpu;
3236 2973
3237 /* only one dump */ 2974 /* only one dump */
3238 spin_lock_irqsave(&ftrace_dump_lock, flags); 2975 spin_lock_irqsave(&ftrace_dump_lock, flags);
@@ -3258,25 +2995,7 @@ void ftrace_dump(void)
3258 2995
3259 cpus_clear(mask); 2996 cpus_clear(mask);
3260 2997
3261 for_each_tracing_cpu(cpu) { 2998 ring_buffer_lock(iter.tr->buffer, &irq_flags);
3262 data = iter.tr->data[cpu];
3263
3264 if (!head_page(data) || !data->trace_idx)
3265 continue;
3266
3267 atomic_inc(&data->disabled);
3268 cpu_set(cpu, mask);
3269 }
3270
3271 for_each_cpu_mask(cpu, mask) {
3272 data = iter.tr->data[cpu];
3273 __raw_spin_lock(&data->lock);
3274
3275 if (data->overrun > iter.last_overrun[cpu])
3276 iter.overrun[cpu] +=
3277 data->overrun - iter.last_overrun[cpu];
3278 iter.last_overrun[cpu] = data->overrun;
3279 }
3280 2999
3281 while (!trace_empty(&iter)) { 3000 while (!trace_empty(&iter)) {
3282 3001
@@ -3305,205 +3024,47 @@ void ftrace_dump(void)
3305 else 3024 else
3306 printk(KERN_TRACE "---------------------------------\n"); 3025 printk(KERN_TRACE "---------------------------------\n");
3307 3026
3308 for_each_cpu_mask(cpu, mask) { 3027 ring_buffer_unlock(iter.tr->buffer, irq_flags);
3309 data = iter.tr->data[cpu];
3310 __raw_spin_unlock(&data->lock);
3311 }
3312
3313 for_each_cpu_mask(cpu, mask) {
3314 data = iter.tr->data[cpu];
3315 atomic_dec(&data->disabled);
3316 }
3317
3318 3028
3319 out: 3029 out:
3320 spin_unlock_irqrestore(&ftrace_dump_lock, flags); 3030 spin_unlock_irqrestore(&ftrace_dump_lock, flags);
3321} 3031}
3322 3032
3323static int trace_alloc_page(void) 3033__init static int tracer_alloc_buffers(void)
3324{ 3034{
3325 struct trace_array_cpu *data; 3035 struct trace_array_cpu *data;
3326 struct page *page, *tmp;
3327 LIST_HEAD(pages);
3328 void *array;
3329 unsigned pages_allocated = 0;
3330 int i; 3036 int i;
3331 3037
3332 /* first allocate a page for each CPU */ 3038 /* TODO: make the number of buffers hot pluggable with CPUS */
3333 for_each_tracing_cpu(i) { 3039 tracing_buffer_mask = cpu_possible_map;
3334 array = (void *)__get_free_page(GFP_KERNEL);
3335 if (array == NULL) {
3336 printk(KERN_ERR "tracer: failed to allocate page"
3337 "for trace buffer!\n");
3338 goto free_pages;
3339 }
3340
3341 pages_allocated++;
3342 page = virt_to_page(array);
3343 list_add(&page->lru, &pages);
3344 3040
3345/* Only allocate if we are actually using the max trace */ 3041 global_trace.buffer = ring_buffer_alloc(trace_buf_size,
3346#ifdef CONFIG_TRACER_MAX_TRACE 3042 TRACE_BUFFER_FLAGS);
3347 array = (void *)__get_free_page(GFP_KERNEL); 3043 if (!global_trace.buffer) {
3348 if (array == NULL) { 3044 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
3349 printk(KERN_ERR "tracer: failed to allocate page" 3045 WARN_ON(1);
3350 "for trace buffer!\n"); 3046 return 0;
3351 goto free_pages;
3352 }
3353 pages_allocated++;
3354 page = virt_to_page(array);
3355 list_add(&page->lru, &pages);
3356#endif
3357 } 3047 }
3358 3048 global_trace.entries = ring_buffer_size(global_trace.buffer);
3359 /* Now that we successfully allocate a page per CPU, add them */
3360 for_each_tracing_cpu(i) {
3361 data = global_trace.data[i];
3362 page = list_entry(pages.next, struct page, lru);
3363 list_del_init(&page->lru);
3364 list_add_tail(&page->lru, &data->trace_pages);
3365 ClearPageLRU(page);
3366 3049
3367#ifdef CONFIG_TRACER_MAX_TRACE 3050#ifdef CONFIG_TRACER_MAX_TRACE
3368 data = max_tr.data[i]; 3051 max_tr.buffer = ring_buffer_alloc(trace_buf_size,
3369 page = list_entry(pages.next, struct page, lru); 3052 TRACE_BUFFER_FLAGS);
3370 list_del_init(&page->lru); 3053 if (!max_tr.buffer) {
3371 list_add_tail(&page->lru, &data->trace_pages); 3054 printk(KERN_ERR "tracer: failed to allocate max ring buffer!\n");
3372 SetPageLRU(page); 3055 WARN_ON(1);
3373#endif 3056 ring_buffer_free(global_trace.buffer);
3374 } 3057 return 0;
3375 tracing_pages_allocated += pages_allocated;
3376 global_trace.entries += ENTRIES_PER_PAGE;
3377
3378 return 0;
3379
3380 free_pages:
3381 list_for_each_entry_safe(page, tmp, &pages, lru) {
3382 list_del_init(&page->lru);
3383 __free_page(page);
3384 } 3058 }
3385 return -ENOMEM; 3059 max_tr.entries = ring_buffer_size(max_tr.buffer);
3386} 3060 WARN_ON(max_tr.entries != global_trace.entries);
3387
3388static int trace_free_page(void)
3389{
3390 struct trace_array_cpu *data;
3391 struct page *page;
3392 struct list_head *p;
3393 int i;
3394 int ret = 0;
3395
3396 /* free one page from each buffer */
3397 for_each_tracing_cpu(i) {
3398 data = global_trace.data[i];
3399 p = data->trace_pages.next;
3400 if (p == &data->trace_pages) {
3401 /* should never happen */
3402 WARN_ON(1);
3403 tracing_disabled = 1;
3404 ret = -1;
3405 break;
3406 }
3407 page = list_entry(p, struct page, lru);
3408 ClearPageLRU(page);
3409 list_del(&page->lru);
3410 tracing_pages_allocated--;
3411 tracing_pages_allocated--;
3412 __free_page(page);
3413
3414 tracing_reset(data);
3415
3416#ifdef CONFIG_TRACER_MAX_TRACE
3417 data = max_tr.data[i];
3418 p = data->trace_pages.next;
3419 if (p == &data->trace_pages) {
3420 /* should never happen */
3421 WARN_ON(1);
3422 tracing_disabled = 1;
3423 ret = -1;
3424 break;
3425 }
3426 page = list_entry(p, struct page, lru);
3427 ClearPageLRU(page);
3428 list_del(&page->lru);
3429 __free_page(page);
3430
3431 tracing_reset(data);
3432#endif 3061#endif
3433 }
3434 global_trace.entries -= ENTRIES_PER_PAGE;
3435
3436 return ret;
3437}
3438
3439__init static int tracer_alloc_buffers(void)
3440{
3441 struct trace_array_cpu *data;
3442 void *array;
3443 struct page *page;
3444 int pages = 0;
3445 int ret = -ENOMEM;
3446 int i;
3447
3448 /* TODO: make the number of buffers hot pluggable with CPUS */
3449 tracing_nr_buffers = num_possible_cpus();
3450 tracing_buffer_mask = cpu_possible_map;
3451 3062
3452 /* Allocate the first page for all buffers */ 3063 /* Allocate the first page for all buffers */
3453 for_each_tracing_cpu(i) { 3064 for_each_tracing_cpu(i) {
3454 data = global_trace.data[i] = &per_cpu(global_trace_cpu, i); 3065 data = global_trace.data[i] = &per_cpu(global_trace_cpu, i);
3455 max_tr.data[i] = &per_cpu(max_data, i); 3066 max_tr.data[i] = &per_cpu(max_data, i);
3456
3457 array = (void *)__get_free_page(GFP_KERNEL);
3458 if (array == NULL) {
3459 printk(KERN_ERR "tracer: failed to allocate page"
3460 "for trace buffer!\n");
3461 goto free_buffers;
3462 }
3463
3464 /* set the array to the list */
3465 INIT_LIST_HEAD(&data->trace_pages);
3466 page = virt_to_page(array);
3467 list_add(&page->lru, &data->trace_pages);
3468 /* use the LRU flag to differentiate the two buffers */
3469 ClearPageLRU(page);
3470
3471 data->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
3472 max_tr.data[i]->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
3473
3474/* Only allocate if we are actually using the max trace */
3475#ifdef CONFIG_TRACER_MAX_TRACE
3476 array = (void *)__get_free_page(GFP_KERNEL);
3477 if (array == NULL) {
3478 printk(KERN_ERR "tracer: failed to allocate page"
3479 "for trace buffer!\n");
3480 goto free_buffers;
3481 }
3482
3483 INIT_LIST_HEAD(&max_tr.data[i]->trace_pages);
3484 page = virt_to_page(array);
3485 list_add(&page->lru, &max_tr.data[i]->trace_pages);
3486 SetPageLRU(page);
3487#endif
3488 }
3489
3490 /*
3491 * Since we allocate by orders of pages, we may be able to
3492 * round up a bit.
3493 */
3494 global_trace.entries = ENTRIES_PER_PAGE;
3495 pages++;
3496
3497 while (global_trace.entries < trace_nr_entries) {
3498 if (trace_alloc_page())
3499 break;
3500 pages++;
3501 } 3067 }
3502 max_tr.entries = global_trace.entries;
3503
3504 pr_info("tracer: %d pages allocated for %ld entries of %ld bytes\n",
3505 pages, trace_nr_entries, (long)TRACE_ENTRY_SIZE);
3506 pr_info(" actual entries %ld\n", global_trace.entries);
3507 3068
3508 trace_init_cmdlines(); 3069 trace_init_cmdlines();
3509 3070
@@ -3519,38 +3080,13 @@ __init static int tracer_alloc_buffers(void)
3519 /* All seems OK, enable tracing */ 3080 /* All seems OK, enable tracing */
3520 global_trace.ctrl = tracer_enabled; 3081 global_trace.ctrl = tracer_enabled;
3521 tracing_disabled = 0; 3082 tracing_disabled = 0;
3083
3522 atomic_notifier_chain_register(&panic_notifier_list, 3084 atomic_notifier_chain_register(&panic_notifier_list,
3523 &trace_panic_notifier); 3085 &trace_panic_notifier);
3524 3086
3525 register_die_notifier(&trace_die_notifier); 3087 register_die_notifier(&trace_die_notifier);
3526 3088
3527 return 0; 3089 return 0;
3528
3529 free_buffers:
3530 for (i-- ; i >= 0; i--) {
3531 struct page *page, *tmp;
3532 struct trace_array_cpu *data = global_trace.data[i];
3533
3534 if (data) {
3535 list_for_each_entry_safe(page, tmp,
3536 &data->trace_pages, lru) {
3537 list_del_init(&page->lru);
3538 __free_page(page);
3539 }
3540 }
3541
3542#ifdef CONFIG_TRACER_MAX_TRACE
3543 data = max_tr.data[i];
3544 if (data) {
3545 list_for_each_entry_safe(page, tmp,
3546 &data->trace_pages, lru) {
3547 list_del_init(&page->lru);
3548 __free_page(page);
3549 }
3550 }
3551#endif
3552 }
3553 return ret;
3554} 3090}
3555early_initcall(tracer_alloc_buffers); 3091early_initcall(tracer_alloc_buffers);
3556fs_initcall(tracer_init_debugfs); 3092fs_initcall(tracer_init_debugfs);