aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/Kconfig135
-rw-r--r--kernel/trace/Makefile24
-rw-r--r--kernel/trace/ftrace.c1727
-rw-r--r--kernel/trace/trace.c3157
-rw-r--r--kernel/trace/trace.h339
-rw-r--r--kernel/trace/trace_functions.c81
-rw-r--r--kernel/trace/trace_irqsoff.c490
-rw-r--r--kernel/trace/trace_mmiotrace.c295
-rw-r--r--kernel/trace/trace_sched_switch.c286
-rw-r--r--kernel/trace/trace_sched_wakeup.c453
-rw-r--r--kernel/trace/trace_selftest.c563
-rw-r--r--kernel/trace/trace_selftest_dynamic.c7
-rw-r--r--kernel/trace/trace_sysprof.c365
13 files changed, 7922 insertions, 0 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
new file mode 100644
index 000000000000..263e9e6bbd60
--- /dev/null
+++ b/kernel/trace/Kconfig
@@ -0,0 +1,135 @@
1#
2# Architectures that offer an FTRACE implementation should select HAVE_FTRACE:
3#
4config HAVE_FTRACE
5 bool
6
7config HAVE_DYNAMIC_FTRACE
8 bool
9
10config TRACER_MAX_TRACE
11 bool
12
13config TRACING
14 bool
15 select DEBUG_FS
16 select STACKTRACE
17
18config FTRACE
19 bool "Kernel Function Tracer"
20 depends on HAVE_FTRACE
21 select FRAME_POINTER
22 select TRACING
23 select CONTEXT_SWITCH_TRACER
24 help
25 Enable the kernel to trace every kernel function. This is done
26 by using a compiler feature to insert a small, 5-byte No-Operation
27 instruction to the beginning of every kernel function, which NOP
28 sequence is then dynamically patched into a tracer call when
29 tracing is enabled by the administrator. If it's runtime disabled
30 (the bootup default), then the overhead of the instructions is very
31 small and not measurable even in micro-benchmarks.
32
33config IRQSOFF_TRACER
34 bool "Interrupts-off Latency Tracer"
35 default n
36 depends on TRACE_IRQFLAGS_SUPPORT
37 depends on GENERIC_TIME
38 depends on HAVE_FTRACE
39 select TRACE_IRQFLAGS
40 select TRACING
41 select TRACER_MAX_TRACE
42 help
43 This option measures the time spent in irqs-off critical
44 sections, with microsecond accuracy.
45
46 The default measurement method is a maximum search, which is
47 disabled by default and can be runtime (re-)started
48 via:
49
50 echo 0 > /debugfs/tracing/tracing_max_latency
51
52 (Note that kernel size and overhead increases with this option
53 enabled. This option and the preempt-off timing option can be
54 used together or separately.)
55
56config PREEMPT_TRACER
57 bool "Preemption-off Latency Tracer"
58 default n
59 depends on GENERIC_TIME
60 depends on PREEMPT
61 depends on HAVE_FTRACE
62 select TRACING
63 select TRACER_MAX_TRACE
64 help
65 This option measures the time spent in preemption off critical
66 sections, with microsecond accuracy.
67
68 The default measurement method is a maximum search, which is
69 disabled by default and can be runtime (re-)started
70 via:
71
72 echo 0 > /debugfs/tracing/tracing_max_latency
73
74 (Note that kernel size and overhead increases with this option
75 enabled. This option and the irqs-off timing option can be
76 used together or separately.)
77
78config SYSPROF_TRACER
79 bool "Sysprof Tracer"
80 depends on X86
81 select TRACING
82 help
83 This tracer provides the trace needed by the 'Sysprof' userspace
84 tool.
85
86config SCHED_TRACER
87 bool "Scheduling Latency Tracer"
88 depends on HAVE_FTRACE
89 select TRACING
90 select CONTEXT_SWITCH_TRACER
91 select TRACER_MAX_TRACE
92 help
93 This tracer tracks the latency of the highest priority task
94 to be scheduled in, starting from the point it has woken up.
95
96config CONTEXT_SWITCH_TRACER
97 bool "Trace process context switches"
98 depends on HAVE_FTRACE
99 select TRACING
100 select MARKERS
101 help
102 This tracer gets called from the context switch and records
103 all switching of tasks.
104
105config DYNAMIC_FTRACE
106 bool "enable/disable ftrace tracepoints dynamically"
107 depends on FTRACE
108 depends on HAVE_DYNAMIC_FTRACE
109 default y
110 help
111 This option will modify all the calls to ftrace dynamically
112 (will patch them out of the binary image and replaces them
113 with a No-Op instruction) as they are called. A table is
114 created to dynamically enable them again.
115
116 This way a CONFIG_FTRACE kernel is slightly larger, but otherwise
117 has native performance as long as no tracing is active.
118
119 The changes to the code are done by a kernel thread that
120 wakes up once a second and checks to see if any ftrace calls
121 were made. If so, it runs stop_machine (stops all CPUS)
122 and modifies the code to jump over the call to ftrace.
123
124config FTRACE_SELFTEST
125 bool
126
127config FTRACE_STARTUP_TEST
128 bool "Perform a startup test on ftrace"
129 depends on TRACING
130 select FTRACE_SELFTEST
131 help
132 This option performs a series of startup tests on ftrace. On bootup
133 a series of tests are made to verify that the tracer is
134 functioning properly. It will do tests on all the configured
135 tracers of ftrace.
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
new file mode 100644
index 000000000000..71d17de17288
--- /dev/null
+++ b/kernel/trace/Makefile
@@ -0,0 +1,24 @@
1
2# Do not instrument the tracer itself:
3
4ifdef CONFIG_FTRACE
5ORIG_CFLAGS := $(KBUILD_CFLAGS)
6KBUILD_CFLAGS = $(subst -pg,,$(ORIG_CFLAGS))
7
8# selftest needs instrumentation
9CFLAGS_trace_selftest_dynamic.o = -pg
10obj-y += trace_selftest_dynamic.o
11endif
12
13obj-$(CONFIG_FTRACE) += libftrace.o
14
15obj-$(CONFIG_TRACING) += trace.o
16obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
17obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o
18obj-$(CONFIG_FTRACE) += trace_functions.o
19obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o
20obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o
21obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o
22obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o
23
24libftrace-y := ftrace.o
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
new file mode 100644
index 000000000000..f6e3af31b403
--- /dev/null
+++ b/kernel/trace/ftrace.c
@@ -0,0 +1,1727 @@
1/*
2 * Infrastructure for profiling code inserted by 'gcc -pg'.
3 *
4 * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
5 * Copyright (C) 2004-2008 Ingo Molnar <mingo@redhat.com>
6 *
7 * Originally ported from the -rt patch by:
8 * Copyright (C) 2007 Arnaldo Carvalho de Melo <acme@redhat.com>
9 *
10 * Based on code in the latency_tracer, that is:
11 *
12 * Copyright (C) 2004-2006 Ingo Molnar
13 * Copyright (C) 2004 William Lee Irwin III
14 */
15
16#include <linux/stop_machine.h>
17#include <linux/clocksource.h>
18#include <linux/kallsyms.h>
19#include <linux/seq_file.h>
20#include <linux/debugfs.h>
21#include <linux/hardirq.h>
22#include <linux/kthread.h>
23#include <linux/uaccess.h>
24#include <linux/kprobes.h>
25#include <linux/ftrace.h>
26#include <linux/sysctl.h>
27#include <linux/ctype.h>
28#include <linux/hash.h>
29#include <linux/list.h>
30
31#include <asm/ftrace.h>
32
33#include "trace.h"
34
35/* ftrace_enabled is a method to turn ftrace on or off */
36int ftrace_enabled __read_mostly;
37static int last_ftrace_enabled;
38
39/*
40 * ftrace_disabled is set when an anomaly is discovered.
41 * ftrace_disabled is much stronger than ftrace_enabled.
42 */
43static int ftrace_disabled __read_mostly;
44
45static DEFINE_SPINLOCK(ftrace_lock);
46static DEFINE_MUTEX(ftrace_sysctl_lock);
47
48static struct ftrace_ops ftrace_list_end __read_mostly =
49{
50 .func = ftrace_stub,
51};
52
53static struct ftrace_ops *ftrace_list __read_mostly = &ftrace_list_end;
54ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;
55
56static void ftrace_list_func(unsigned long ip, unsigned long parent_ip)
57{
58 struct ftrace_ops *op = ftrace_list;
59
60 /* in case someone actually ports this to alpha! */
61 read_barrier_depends();
62
63 while (op != &ftrace_list_end) {
64 /* silly alpha */
65 read_barrier_depends();
66 op->func(ip, parent_ip);
67 op = op->next;
68 };
69}
70
71/**
72 * clear_ftrace_function - reset the ftrace function
73 *
74 * This NULLs the ftrace function and in essence stops
75 * tracing. There may be lag
76 */
77void clear_ftrace_function(void)
78{
79 ftrace_trace_function = ftrace_stub;
80}
81
82static int __register_ftrace_function(struct ftrace_ops *ops)
83{
84 /* Should never be called by interrupts */
85 spin_lock(&ftrace_lock);
86
87 ops->next = ftrace_list;
88 /*
89 * We are entering ops into the ftrace_list but another
90 * CPU might be walking that list. We need to make sure
91 * the ops->next pointer is valid before another CPU sees
92 * the ops pointer included into the ftrace_list.
93 */
94 smp_wmb();
95 ftrace_list = ops;
96
97 if (ftrace_enabled) {
98 /*
99 * For one func, simply call it directly.
100 * For more than one func, call the chain.
101 */
102 if (ops->next == &ftrace_list_end)
103 ftrace_trace_function = ops->func;
104 else
105 ftrace_trace_function = ftrace_list_func;
106 }
107
108 spin_unlock(&ftrace_lock);
109
110 return 0;
111}
112
113static int __unregister_ftrace_function(struct ftrace_ops *ops)
114{
115 struct ftrace_ops **p;
116 int ret = 0;
117
118 spin_lock(&ftrace_lock);
119
120 /*
121 * If we are removing the last function, then simply point
122 * to the ftrace_stub.
123 */
124 if (ftrace_list == ops && ops->next == &ftrace_list_end) {
125 ftrace_trace_function = ftrace_stub;
126 ftrace_list = &ftrace_list_end;
127 goto out;
128 }
129
130 for (p = &ftrace_list; *p != &ftrace_list_end; p = &(*p)->next)
131 if (*p == ops)
132 break;
133
134 if (*p != ops) {
135 ret = -1;
136 goto out;
137 }
138
139 *p = (*p)->next;
140
141 if (ftrace_enabled) {
142 /* If we only have one func left, then call that directly */
143 if (ftrace_list == &ftrace_list_end ||
144 ftrace_list->next == &ftrace_list_end)
145 ftrace_trace_function = ftrace_list->func;
146 }
147
148 out:
149 spin_unlock(&ftrace_lock);
150
151 return ret;
152}
153
154#ifdef CONFIG_DYNAMIC_FTRACE
155
156static struct task_struct *ftraced_task;
157
158enum {
159 FTRACE_ENABLE_CALLS = (1 << 0),
160 FTRACE_DISABLE_CALLS = (1 << 1),
161 FTRACE_UPDATE_TRACE_FUNC = (1 << 2),
162 FTRACE_ENABLE_MCOUNT = (1 << 3),
163 FTRACE_DISABLE_MCOUNT = (1 << 4),
164};
165
166static int ftrace_filtered;
167static int tracing_on;
168static int frozen_record_count;
169
170static struct hlist_head ftrace_hash[FTRACE_HASHSIZE];
171
172static DEFINE_PER_CPU(int, ftrace_shutdown_disable_cpu);
173
174static DEFINE_SPINLOCK(ftrace_shutdown_lock);
175static DEFINE_MUTEX(ftraced_lock);
176static DEFINE_MUTEX(ftrace_regex_lock);
177
178struct ftrace_page {
179 struct ftrace_page *next;
180 unsigned long index;
181 struct dyn_ftrace records[];
182};
183
184#define ENTRIES_PER_PAGE \
185 ((PAGE_SIZE - sizeof(struct ftrace_page)) / sizeof(struct dyn_ftrace))
186
187/* estimate from running different kernels */
188#define NR_TO_INIT 10000
189
190static struct ftrace_page *ftrace_pages_start;
191static struct ftrace_page *ftrace_pages;
192
193static int ftraced_trigger;
194static int ftraced_suspend;
195static int ftraced_stop;
196
197static int ftrace_record_suspend;
198
199static struct dyn_ftrace *ftrace_free_records;
200
201
202#ifdef CONFIG_KPROBES
203static inline void freeze_record(struct dyn_ftrace *rec)
204{
205 if (!(rec->flags & FTRACE_FL_FROZEN)) {
206 rec->flags |= FTRACE_FL_FROZEN;
207 frozen_record_count++;
208 }
209}
210
211static inline void unfreeze_record(struct dyn_ftrace *rec)
212{
213 if (rec->flags & FTRACE_FL_FROZEN) {
214 rec->flags &= ~FTRACE_FL_FROZEN;
215 frozen_record_count--;
216 }
217}
218
219static inline int record_frozen(struct dyn_ftrace *rec)
220{
221 return rec->flags & FTRACE_FL_FROZEN;
222}
223#else
224# define freeze_record(rec) ({ 0; })
225# define unfreeze_record(rec) ({ 0; })
226# define record_frozen(rec) ({ 0; })
227#endif /* CONFIG_KPROBES */
228
229int skip_trace(unsigned long ip)
230{
231 unsigned long fl;
232 struct dyn_ftrace *rec;
233 struct hlist_node *t;
234 struct hlist_head *head;
235
236 if (frozen_record_count == 0)
237 return 0;
238
239 head = &ftrace_hash[hash_long(ip, FTRACE_HASHBITS)];
240 hlist_for_each_entry_rcu(rec, t, head, node) {
241 if (rec->ip == ip) {
242 if (record_frozen(rec)) {
243 if (rec->flags & FTRACE_FL_FAILED)
244 return 1;
245
246 if (!(rec->flags & FTRACE_FL_CONVERTED))
247 return 1;
248
249 if (!tracing_on || !ftrace_enabled)
250 return 1;
251
252 if (ftrace_filtered) {
253 fl = rec->flags & (FTRACE_FL_FILTER |
254 FTRACE_FL_NOTRACE);
255 if (!fl || (fl & FTRACE_FL_NOTRACE))
256 return 1;
257 }
258 }
259 break;
260 }
261 }
262
263 return 0;
264}
265
266static inline int
267ftrace_ip_in_hash(unsigned long ip, unsigned long key)
268{
269 struct dyn_ftrace *p;
270 struct hlist_node *t;
271 int found = 0;
272
273 hlist_for_each_entry_rcu(p, t, &ftrace_hash[key], node) {
274 if (p->ip == ip) {
275 found = 1;
276 break;
277 }
278 }
279
280 return found;
281}
282
283static inline void
284ftrace_add_hash(struct dyn_ftrace *node, unsigned long key)
285{
286 hlist_add_head_rcu(&node->node, &ftrace_hash[key]);
287}
288
289/* called from kstop_machine */
290static inline void ftrace_del_hash(struct dyn_ftrace *node)
291{
292 hlist_del(&node->node);
293}
294
295static void ftrace_free_rec(struct dyn_ftrace *rec)
296{
297 /* no locking, only called from kstop_machine */
298
299 rec->ip = (unsigned long)ftrace_free_records;
300 ftrace_free_records = rec;
301 rec->flags |= FTRACE_FL_FREE;
302}
303
304static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
305{
306 struct dyn_ftrace *rec;
307
308 /* First check for freed records */
309 if (ftrace_free_records) {
310 rec = ftrace_free_records;
311
312 if (unlikely(!(rec->flags & FTRACE_FL_FREE))) {
313 WARN_ON_ONCE(1);
314 ftrace_free_records = NULL;
315 ftrace_disabled = 1;
316 ftrace_enabled = 0;
317 return NULL;
318 }
319
320 ftrace_free_records = (void *)rec->ip;
321 memset(rec, 0, sizeof(*rec));
322 return rec;
323 }
324
325 if (ftrace_pages->index == ENTRIES_PER_PAGE) {
326 if (!ftrace_pages->next)
327 return NULL;
328 ftrace_pages = ftrace_pages->next;
329 }
330
331 return &ftrace_pages->records[ftrace_pages->index++];
332}
333
334static void
335ftrace_record_ip(unsigned long ip)
336{
337 struct dyn_ftrace *node;
338 unsigned long flags;
339 unsigned long key;
340 int resched;
341 int atomic;
342 int cpu;
343
344 if (!ftrace_enabled || ftrace_disabled)
345 return;
346
347 resched = need_resched();
348 preempt_disable_notrace();
349
350 /*
351 * We simply need to protect against recursion.
352 * Use the the raw version of smp_processor_id and not
353 * __get_cpu_var which can call debug hooks that can
354 * cause a recursive crash here.
355 */
356 cpu = raw_smp_processor_id();
357 per_cpu(ftrace_shutdown_disable_cpu, cpu)++;
358 if (per_cpu(ftrace_shutdown_disable_cpu, cpu) != 1)
359 goto out;
360
361 if (unlikely(ftrace_record_suspend))
362 goto out;
363
364 key = hash_long(ip, FTRACE_HASHBITS);
365
366 WARN_ON_ONCE(key >= FTRACE_HASHSIZE);
367
368 if (ftrace_ip_in_hash(ip, key))
369 goto out;
370
371 atomic = irqs_disabled();
372
373 spin_lock_irqsave(&ftrace_shutdown_lock, flags);
374
375 /* This ip may have hit the hash before the lock */
376 if (ftrace_ip_in_hash(ip, key))
377 goto out_unlock;
378
379 node = ftrace_alloc_dyn_node(ip);
380 if (!node)
381 goto out_unlock;
382
383 node->ip = ip;
384
385 ftrace_add_hash(node, key);
386
387 ftraced_trigger = 1;
388
389 out_unlock:
390 spin_unlock_irqrestore(&ftrace_shutdown_lock, flags);
391 out:
392 per_cpu(ftrace_shutdown_disable_cpu, cpu)--;
393
394 /* prevent recursion with scheduler */
395 if (resched)
396 preempt_enable_no_resched_notrace();
397 else
398 preempt_enable_notrace();
399}
400
401#define FTRACE_ADDR ((long)(ftrace_caller))
402
403static int
404__ftrace_replace_code(struct dyn_ftrace *rec,
405 unsigned char *old, unsigned char *new, int enable)
406{
407 unsigned long ip, fl;
408
409 ip = rec->ip;
410
411 if (ftrace_filtered && enable) {
412 /*
413 * If filtering is on:
414 *
415 * If this record is set to be filtered and
416 * is enabled then do nothing.
417 *
418 * If this record is set to be filtered and
419 * it is not enabled, enable it.
420 *
421 * If this record is not set to be filtered
422 * and it is not enabled do nothing.
423 *
424 * If this record is set not to trace then
425 * do nothing.
426 *
427 * If this record is set not to trace and
428 * it is enabled then disable it.
429 *
430 * If this record is not set to be filtered and
431 * it is enabled, disable it.
432 */
433
434 fl = rec->flags & (FTRACE_FL_FILTER | FTRACE_FL_NOTRACE |
435 FTRACE_FL_ENABLED);
436
437 if ((fl == (FTRACE_FL_FILTER | FTRACE_FL_ENABLED)) ||
438 (fl == (FTRACE_FL_FILTER | FTRACE_FL_NOTRACE)) ||
439 !fl || (fl == FTRACE_FL_NOTRACE))
440 return 0;
441
442 /*
443 * If it is enabled disable it,
444 * otherwise enable it!
445 */
446 if (fl & FTRACE_FL_ENABLED) {
447 /* swap new and old */
448 new = old;
449 old = ftrace_call_replace(ip, FTRACE_ADDR);
450 rec->flags &= ~FTRACE_FL_ENABLED;
451 } else {
452 new = ftrace_call_replace(ip, FTRACE_ADDR);
453 rec->flags |= FTRACE_FL_ENABLED;
454 }
455 } else {
456
457 if (enable) {
458 /*
459 * If this record is set not to trace and is
460 * not enabled, do nothing.
461 */
462 fl = rec->flags & (FTRACE_FL_NOTRACE | FTRACE_FL_ENABLED);
463 if (fl == FTRACE_FL_NOTRACE)
464 return 0;
465
466 new = ftrace_call_replace(ip, FTRACE_ADDR);
467 } else
468 old = ftrace_call_replace(ip, FTRACE_ADDR);
469
470 if (enable) {
471 if (rec->flags & FTRACE_FL_ENABLED)
472 return 0;
473 rec->flags |= FTRACE_FL_ENABLED;
474 } else {
475 if (!(rec->flags & FTRACE_FL_ENABLED))
476 return 0;
477 rec->flags &= ~FTRACE_FL_ENABLED;
478 }
479 }
480
481 return ftrace_modify_code(ip, old, new);
482}
483
484static void ftrace_replace_code(int enable)
485{
486 int i, failed;
487 unsigned char *new = NULL, *old = NULL;
488 struct dyn_ftrace *rec;
489 struct ftrace_page *pg;
490
491 if (enable)
492 old = ftrace_nop_replace();
493 else
494 new = ftrace_nop_replace();
495
496 for (pg = ftrace_pages_start; pg; pg = pg->next) {
497 for (i = 0; i < pg->index; i++) {
498 rec = &pg->records[i];
499
500 /* don't modify code that has already faulted */
501 if (rec->flags & FTRACE_FL_FAILED)
502 continue;
503
504 /* ignore updates to this record's mcount site */
505 if (get_kprobe((void *)rec->ip)) {
506 freeze_record(rec);
507 continue;
508 } else {
509 unfreeze_record(rec);
510 }
511
512 failed = __ftrace_replace_code(rec, old, new, enable);
513 if (failed && (rec->flags & FTRACE_FL_CONVERTED)) {
514 rec->flags |= FTRACE_FL_FAILED;
515 if ((system_state == SYSTEM_BOOTING) ||
516 !core_kernel_text(rec->ip)) {
517 ftrace_del_hash(rec);
518 ftrace_free_rec(rec);
519 }
520 }
521 }
522 }
523}
524
525static void ftrace_shutdown_replenish(void)
526{
527 if (ftrace_pages->next)
528 return;
529
530 /* allocate another page */
531 ftrace_pages->next = (void *)get_zeroed_page(GFP_KERNEL);
532}
533
534static int
535ftrace_code_disable(struct dyn_ftrace *rec)
536{
537 unsigned long ip;
538 unsigned char *nop, *call;
539 int failed;
540
541 ip = rec->ip;
542
543 nop = ftrace_nop_replace();
544 call = ftrace_call_replace(ip, MCOUNT_ADDR);
545
546 failed = ftrace_modify_code(ip, call, nop);
547 if (failed) {
548 rec->flags |= FTRACE_FL_FAILED;
549 return 0;
550 }
551 return 1;
552}
553
554static int __ftrace_update_code(void *ignore);
555
556static int __ftrace_modify_code(void *data)
557{
558 unsigned long addr;
559 int *command = data;
560
561 if (*command & FTRACE_ENABLE_CALLS) {
562 /*
563 * Update any recorded ips now that we have the
564 * machine stopped
565 */
566 __ftrace_update_code(NULL);
567 ftrace_replace_code(1);
568 tracing_on = 1;
569 } else if (*command & FTRACE_DISABLE_CALLS) {
570 ftrace_replace_code(0);
571 tracing_on = 0;
572 }
573
574 if (*command & FTRACE_UPDATE_TRACE_FUNC)
575 ftrace_update_ftrace_func(ftrace_trace_function);
576
577 if (*command & FTRACE_ENABLE_MCOUNT) {
578 addr = (unsigned long)ftrace_record_ip;
579 ftrace_mcount_set(&addr);
580 } else if (*command & FTRACE_DISABLE_MCOUNT) {
581 addr = (unsigned long)ftrace_stub;
582 ftrace_mcount_set(&addr);
583 }
584
585 return 0;
586}
587
588static void ftrace_run_update_code(int command)
589{
590 stop_machine(__ftrace_modify_code, &command, NULL);
591}
592
593void ftrace_disable_daemon(void)
594{
595 /* Stop the daemon from calling kstop_machine */
596 mutex_lock(&ftraced_lock);
597 ftraced_stop = 1;
598 mutex_unlock(&ftraced_lock);
599
600 ftrace_force_update();
601}
602
603void ftrace_enable_daemon(void)
604{
605 mutex_lock(&ftraced_lock);
606 ftraced_stop = 0;
607 mutex_unlock(&ftraced_lock);
608
609 ftrace_force_update();
610}
611
612static ftrace_func_t saved_ftrace_func;
613
614static void ftrace_startup(void)
615{
616 int command = 0;
617
618 if (unlikely(ftrace_disabled))
619 return;
620
621 mutex_lock(&ftraced_lock);
622 ftraced_suspend++;
623 if (ftraced_suspend == 1)
624 command |= FTRACE_ENABLE_CALLS;
625
626 if (saved_ftrace_func != ftrace_trace_function) {
627 saved_ftrace_func = ftrace_trace_function;
628 command |= FTRACE_UPDATE_TRACE_FUNC;
629 }
630
631 if (!command || !ftrace_enabled)
632 goto out;
633
634 ftrace_run_update_code(command);
635 out:
636 mutex_unlock(&ftraced_lock);
637}
638
639static void ftrace_shutdown(void)
640{
641 int command = 0;
642
643 if (unlikely(ftrace_disabled))
644 return;
645
646 mutex_lock(&ftraced_lock);
647 ftraced_suspend--;
648 if (!ftraced_suspend)
649 command |= FTRACE_DISABLE_CALLS;
650
651 if (saved_ftrace_func != ftrace_trace_function) {
652 saved_ftrace_func = ftrace_trace_function;
653 command |= FTRACE_UPDATE_TRACE_FUNC;
654 }
655
656 if (!command || !ftrace_enabled)
657 goto out;
658
659 ftrace_run_update_code(command);
660 out:
661 mutex_unlock(&ftraced_lock);
662}
663
664static void ftrace_startup_sysctl(void)
665{
666 int command = FTRACE_ENABLE_MCOUNT;
667
668 if (unlikely(ftrace_disabled))
669 return;
670
671 mutex_lock(&ftraced_lock);
672 /* Force update next time */
673 saved_ftrace_func = NULL;
674 /* ftraced_suspend is true if we want ftrace running */
675 if (ftraced_suspend)
676 command |= FTRACE_ENABLE_CALLS;
677
678 ftrace_run_update_code(command);
679 mutex_unlock(&ftraced_lock);
680}
681
682static void ftrace_shutdown_sysctl(void)
683{
684 int command = FTRACE_DISABLE_MCOUNT;
685
686 if (unlikely(ftrace_disabled))
687 return;
688
689 mutex_lock(&ftraced_lock);
690 /* ftraced_suspend is true if ftrace is running */
691 if (ftraced_suspend)
692 command |= FTRACE_DISABLE_CALLS;
693
694 ftrace_run_update_code(command);
695 mutex_unlock(&ftraced_lock);
696}
697
698static cycle_t ftrace_update_time;
699static unsigned long ftrace_update_cnt;
700unsigned long ftrace_update_tot_cnt;
701
702static int __ftrace_update_code(void *ignore)
703{
704 int i, save_ftrace_enabled;
705 cycle_t start, stop;
706 struct dyn_ftrace *p;
707 struct hlist_node *t, *n;
708 struct hlist_head *head, temp_list;
709
710 /* Don't be recording funcs now */
711 ftrace_record_suspend++;
712 save_ftrace_enabled = ftrace_enabled;
713 ftrace_enabled = 0;
714
715 start = ftrace_now(raw_smp_processor_id());
716 ftrace_update_cnt = 0;
717
718 /* No locks needed, the machine is stopped! */
719 for (i = 0; i < FTRACE_HASHSIZE; i++) {
720 INIT_HLIST_HEAD(&temp_list);
721 head = &ftrace_hash[i];
722
723 /* all CPUS are stopped, we are safe to modify code */
724 hlist_for_each_entry_safe(p, t, n, head, node) {
725 /* Skip over failed records which have not been
726 * freed. */
727 if (p->flags & FTRACE_FL_FAILED)
728 continue;
729
730 /* Unconverted records are always at the head of the
731 * hash bucket. Once we encounter a converted record,
732 * simply skip over to the next bucket. Saves ftraced
733 * some processor cycles (ftrace does its bid for
734 * global warming :-p ). */
735 if (p->flags & (FTRACE_FL_CONVERTED))
736 break;
737
738 /* Ignore updates to this record's mcount site.
739 * Reintroduce this record at the head of this
740 * bucket to attempt to "convert" it again if
741 * the kprobe on it is unregistered before the
742 * next run. */
743 if (get_kprobe((void *)p->ip)) {
744 ftrace_del_hash(p);
745 INIT_HLIST_NODE(&p->node);
746 hlist_add_head(&p->node, &temp_list);
747 freeze_record(p);
748 continue;
749 } else {
750 unfreeze_record(p);
751 }
752
753 /* convert record (i.e, patch mcount-call with NOP) */
754 if (ftrace_code_disable(p)) {
755 p->flags |= FTRACE_FL_CONVERTED;
756 ftrace_update_cnt++;
757 } else {
758 if ((system_state == SYSTEM_BOOTING) ||
759 !core_kernel_text(p->ip)) {
760 ftrace_del_hash(p);
761 ftrace_free_rec(p);
762 }
763 }
764 }
765
766 hlist_for_each_entry_safe(p, t, n, &temp_list, node) {
767 hlist_del(&p->node);
768 INIT_HLIST_NODE(&p->node);
769 hlist_add_head(&p->node, head);
770 }
771 }
772
773 stop = ftrace_now(raw_smp_processor_id());
774 ftrace_update_time = stop - start;
775 ftrace_update_tot_cnt += ftrace_update_cnt;
776 ftraced_trigger = 0;
777
778 ftrace_enabled = save_ftrace_enabled;
779 ftrace_record_suspend--;
780
781 return 0;
782}
783
784static int ftrace_update_code(void)
785{
786 if (unlikely(ftrace_disabled) ||
787 !ftrace_enabled || !ftraced_trigger)
788 return 0;
789
790 stop_machine(__ftrace_update_code, NULL, NULL);
791
792 return 1;
793}
794
795static int ftraced(void *ignore)
796{
797 unsigned long usecs;
798
799 while (!kthread_should_stop()) {
800
801 set_current_state(TASK_INTERRUPTIBLE);
802
803 /* check once a second */
804 schedule_timeout(HZ);
805
806 if (unlikely(ftrace_disabled))
807 continue;
808
809 mutex_lock(&ftrace_sysctl_lock);
810 mutex_lock(&ftraced_lock);
811 if (!ftraced_suspend && !ftraced_stop &&
812 ftrace_update_code()) {
813 usecs = nsecs_to_usecs(ftrace_update_time);
814 if (ftrace_update_tot_cnt > 100000) {
815 ftrace_update_tot_cnt = 0;
816 pr_info("hm, dftrace overflow: %lu change%s"
817 " (%lu total) in %lu usec%s\n",
818 ftrace_update_cnt,
819 ftrace_update_cnt != 1 ? "s" : "",
820 ftrace_update_tot_cnt,
821 usecs, usecs != 1 ? "s" : "");
822 ftrace_disabled = 1;
823 WARN_ON_ONCE(1);
824 }
825 }
826 mutex_unlock(&ftraced_lock);
827 mutex_unlock(&ftrace_sysctl_lock);
828
829 ftrace_shutdown_replenish();
830 }
831 __set_current_state(TASK_RUNNING);
832 return 0;
833}
834
835static int __init ftrace_dyn_table_alloc(void)
836{
837 struct ftrace_page *pg;
838 int cnt;
839 int i;
840
841 /* allocate a few pages */
842 ftrace_pages_start = (void *)get_zeroed_page(GFP_KERNEL);
843 if (!ftrace_pages_start)
844 return -1;
845
846 /*
847 * Allocate a few more pages.
848 *
849 * TODO: have some parser search vmlinux before
850 * final linking to find all calls to ftrace.
851 * Then we can:
852 * a) know how many pages to allocate.
853 * and/or
854 * b) set up the table then.
855 *
856 * The dynamic code is still necessary for
857 * modules.
858 */
859
860 pg = ftrace_pages = ftrace_pages_start;
861
862 cnt = NR_TO_INIT / ENTRIES_PER_PAGE;
863
864 for (i = 0; i < cnt; i++) {
865 pg->next = (void *)get_zeroed_page(GFP_KERNEL);
866
867 /* If we fail, we'll try later anyway */
868 if (!pg->next)
869 break;
870
871 pg = pg->next;
872 }
873
874 return 0;
875}
876
877enum {
878 FTRACE_ITER_FILTER = (1 << 0),
879 FTRACE_ITER_CONT = (1 << 1),
880 FTRACE_ITER_NOTRACE = (1 << 2),
881 FTRACE_ITER_FAILURES = (1 << 3),
882};
883
884#define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */
885
886struct ftrace_iterator {
887 loff_t pos;
888 struct ftrace_page *pg;
889 unsigned idx;
890 unsigned flags;
891 unsigned char buffer[FTRACE_BUFF_MAX+1];
892 unsigned buffer_idx;
893 unsigned filtered;
894};
895
896static void *
897t_next(struct seq_file *m, void *v, loff_t *pos)
898{
899 struct ftrace_iterator *iter = m->private;
900 struct dyn_ftrace *rec = NULL;
901
902 (*pos)++;
903
904 retry:
905 if (iter->idx >= iter->pg->index) {
906 if (iter->pg->next) {
907 iter->pg = iter->pg->next;
908 iter->idx = 0;
909 goto retry;
910 }
911 } else {
912 rec = &iter->pg->records[iter->idx++];
913 if ((!(iter->flags & FTRACE_ITER_FAILURES) &&
914 (rec->flags & FTRACE_FL_FAILED)) ||
915
916 ((iter->flags & FTRACE_ITER_FAILURES) &&
917 (!(rec->flags & FTRACE_FL_FAILED) ||
918 (rec->flags & FTRACE_FL_FREE))) ||
919
920 ((iter->flags & FTRACE_ITER_FILTER) &&
921 !(rec->flags & FTRACE_FL_FILTER)) ||
922
923 ((iter->flags & FTRACE_ITER_NOTRACE) &&
924 !(rec->flags & FTRACE_FL_NOTRACE))) {
925 rec = NULL;
926 goto retry;
927 }
928 }
929
930 iter->pos = *pos;
931
932 return rec;
933}
934
935static void *t_start(struct seq_file *m, loff_t *pos)
936{
937 struct ftrace_iterator *iter = m->private;
938 void *p = NULL;
939 loff_t l = -1;
940
941 if (*pos != iter->pos) {
942 for (p = t_next(m, p, &l); p && l < *pos; p = t_next(m, p, &l))
943 ;
944 } else {
945 l = *pos;
946 p = t_next(m, p, &l);
947 }
948
949 return p;
950}
951
952static void t_stop(struct seq_file *m, void *p)
953{
954}
955
956static int t_show(struct seq_file *m, void *v)
957{
958 struct dyn_ftrace *rec = v;
959 char str[KSYM_SYMBOL_LEN];
960
961 if (!rec)
962 return 0;
963
964 kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
965
966 seq_printf(m, "%s\n", str);
967
968 return 0;
969}
970
971static struct seq_operations show_ftrace_seq_ops = {
972 .start = t_start,
973 .next = t_next,
974 .stop = t_stop,
975 .show = t_show,
976};
977
978static int
979ftrace_avail_open(struct inode *inode, struct file *file)
980{
981 struct ftrace_iterator *iter;
982 int ret;
983
984 if (unlikely(ftrace_disabled))
985 return -ENODEV;
986
987 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
988 if (!iter)
989 return -ENOMEM;
990
991 iter->pg = ftrace_pages_start;
992 iter->pos = -1;
993
994 ret = seq_open(file, &show_ftrace_seq_ops);
995 if (!ret) {
996 struct seq_file *m = file->private_data;
997
998 m->private = iter;
999 } else {
1000 kfree(iter);
1001 }
1002
1003 return ret;
1004}
1005
1006int ftrace_avail_release(struct inode *inode, struct file *file)
1007{
1008 struct seq_file *m = (struct seq_file *)file->private_data;
1009 struct ftrace_iterator *iter = m->private;
1010
1011 seq_release(inode, file);
1012 kfree(iter);
1013
1014 return 0;
1015}
1016
1017static int
1018ftrace_failures_open(struct inode *inode, struct file *file)
1019{
1020 int ret;
1021 struct seq_file *m;
1022 struct ftrace_iterator *iter;
1023
1024 ret = ftrace_avail_open(inode, file);
1025 if (!ret) {
1026 m = (struct seq_file *)file->private_data;
1027 iter = (struct ftrace_iterator *)m->private;
1028 iter->flags = FTRACE_ITER_FAILURES;
1029 }
1030
1031 return ret;
1032}
1033
1034
1035static void ftrace_filter_reset(int enable)
1036{
1037 struct ftrace_page *pg;
1038 struct dyn_ftrace *rec;
1039 unsigned long type = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
1040 unsigned i;
1041
1042 /* keep kstop machine from running */
1043 preempt_disable();
1044 if (enable)
1045 ftrace_filtered = 0;
1046 pg = ftrace_pages_start;
1047 while (pg) {
1048 for (i = 0; i < pg->index; i++) {
1049 rec = &pg->records[i];
1050 if (rec->flags & FTRACE_FL_FAILED)
1051 continue;
1052 rec->flags &= ~type;
1053 }
1054 pg = pg->next;
1055 }
1056 preempt_enable();
1057}
1058
1059static int
1060ftrace_regex_open(struct inode *inode, struct file *file, int enable)
1061{
1062 struct ftrace_iterator *iter;
1063 int ret = 0;
1064
1065 if (unlikely(ftrace_disabled))
1066 return -ENODEV;
1067
1068 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
1069 if (!iter)
1070 return -ENOMEM;
1071
1072 mutex_lock(&ftrace_regex_lock);
1073 if ((file->f_mode & FMODE_WRITE) &&
1074 !(file->f_flags & O_APPEND))
1075 ftrace_filter_reset(enable);
1076
1077 if (file->f_mode & FMODE_READ) {
1078 iter->pg = ftrace_pages_start;
1079 iter->pos = -1;
1080 iter->flags = enable ? FTRACE_ITER_FILTER :
1081 FTRACE_ITER_NOTRACE;
1082
1083 ret = seq_open(file, &show_ftrace_seq_ops);
1084 if (!ret) {
1085 struct seq_file *m = file->private_data;
1086 m->private = iter;
1087 } else
1088 kfree(iter);
1089 } else
1090 file->private_data = iter;
1091 mutex_unlock(&ftrace_regex_lock);
1092
1093 return ret;
1094}
1095
1096static int
1097ftrace_filter_open(struct inode *inode, struct file *file)
1098{
1099 return ftrace_regex_open(inode, file, 1);
1100}
1101
1102static int
1103ftrace_notrace_open(struct inode *inode, struct file *file)
1104{
1105 return ftrace_regex_open(inode, file, 0);
1106}
1107
1108static ssize_t
1109ftrace_regex_read(struct file *file, char __user *ubuf,
1110 size_t cnt, loff_t *ppos)
1111{
1112 if (file->f_mode & FMODE_READ)
1113 return seq_read(file, ubuf, cnt, ppos);
1114 else
1115 return -EPERM;
1116}
1117
1118static loff_t
1119ftrace_regex_lseek(struct file *file, loff_t offset, int origin)
1120{
1121 loff_t ret;
1122
1123 if (file->f_mode & FMODE_READ)
1124 ret = seq_lseek(file, offset, origin);
1125 else
1126 file->f_pos = ret = 1;
1127
1128 return ret;
1129}
1130
1131enum {
1132 MATCH_FULL,
1133 MATCH_FRONT_ONLY,
1134 MATCH_MIDDLE_ONLY,
1135 MATCH_END_ONLY,
1136};
1137
1138static void
1139ftrace_match(unsigned char *buff, int len, int enable)
1140{
1141 char str[KSYM_SYMBOL_LEN];
1142 char *search = NULL;
1143 struct ftrace_page *pg;
1144 struct dyn_ftrace *rec;
1145 int type = MATCH_FULL;
1146 unsigned long flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
1147 unsigned i, match = 0, search_len = 0;
1148
1149 for (i = 0; i < len; i++) {
1150 if (buff[i] == '*') {
1151 if (!i) {
1152 search = buff + i + 1;
1153 type = MATCH_END_ONLY;
1154 search_len = len - (i + 1);
1155 } else {
1156 if (type == MATCH_END_ONLY) {
1157 type = MATCH_MIDDLE_ONLY;
1158 } else {
1159 match = i;
1160 type = MATCH_FRONT_ONLY;
1161 }
1162 buff[i] = 0;
1163 break;
1164 }
1165 }
1166 }
1167
1168 /* keep kstop machine from running */
1169 preempt_disable();
1170 if (enable)
1171 ftrace_filtered = 1;
1172 pg = ftrace_pages_start;
1173 while (pg) {
1174 for (i = 0; i < pg->index; i++) {
1175 int matched = 0;
1176 char *ptr;
1177
1178 rec = &pg->records[i];
1179 if (rec->flags & FTRACE_FL_FAILED)
1180 continue;
1181 kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
1182 switch (type) {
1183 case MATCH_FULL:
1184 if (strcmp(str, buff) == 0)
1185 matched = 1;
1186 break;
1187 case MATCH_FRONT_ONLY:
1188 if (memcmp(str, buff, match) == 0)
1189 matched = 1;
1190 break;
1191 case MATCH_MIDDLE_ONLY:
1192 if (strstr(str, search))
1193 matched = 1;
1194 break;
1195 case MATCH_END_ONLY:
1196 ptr = strstr(str, search);
1197 if (ptr && (ptr[search_len] == 0))
1198 matched = 1;
1199 break;
1200 }
1201 if (matched)
1202 rec->flags |= flag;
1203 }
1204 pg = pg->next;
1205 }
1206 preempt_enable();
1207}
1208
1209static ssize_t
1210ftrace_regex_write(struct file *file, const char __user *ubuf,
1211 size_t cnt, loff_t *ppos, int enable)
1212{
1213 struct ftrace_iterator *iter;
1214 char ch;
1215 size_t read = 0;
1216 ssize_t ret;
1217
1218 if (!cnt || cnt < 0)
1219 return 0;
1220
1221 mutex_lock(&ftrace_regex_lock);
1222
1223 if (file->f_mode & FMODE_READ) {
1224 struct seq_file *m = file->private_data;
1225 iter = m->private;
1226 } else
1227 iter = file->private_data;
1228
1229 if (!*ppos) {
1230 iter->flags &= ~FTRACE_ITER_CONT;
1231 iter->buffer_idx = 0;
1232 }
1233
1234 ret = get_user(ch, ubuf++);
1235 if (ret)
1236 goto out;
1237 read++;
1238 cnt--;
1239
1240 if (!(iter->flags & ~FTRACE_ITER_CONT)) {
1241 /* skip white space */
1242 while (cnt && isspace(ch)) {
1243 ret = get_user(ch, ubuf++);
1244 if (ret)
1245 goto out;
1246 read++;
1247 cnt--;
1248 }
1249
1250 if (isspace(ch)) {
1251 file->f_pos += read;
1252 ret = read;
1253 goto out;
1254 }
1255
1256 iter->buffer_idx = 0;
1257 }
1258
1259 while (cnt && !isspace(ch)) {
1260 if (iter->buffer_idx < FTRACE_BUFF_MAX)
1261 iter->buffer[iter->buffer_idx++] = ch;
1262 else {
1263 ret = -EINVAL;
1264 goto out;
1265 }
1266 ret = get_user(ch, ubuf++);
1267 if (ret)
1268 goto out;
1269 read++;
1270 cnt--;
1271 }
1272
1273 if (isspace(ch)) {
1274 iter->filtered++;
1275 iter->buffer[iter->buffer_idx] = 0;
1276 ftrace_match(iter->buffer, iter->buffer_idx, enable);
1277 iter->buffer_idx = 0;
1278 } else
1279 iter->flags |= FTRACE_ITER_CONT;
1280
1281
1282 file->f_pos += read;
1283
1284 ret = read;
1285 out:
1286 mutex_unlock(&ftrace_regex_lock);
1287
1288 return ret;
1289}
1290
1291static ssize_t
1292ftrace_filter_write(struct file *file, const char __user *ubuf,
1293 size_t cnt, loff_t *ppos)
1294{
1295 return ftrace_regex_write(file, ubuf, cnt, ppos, 1);
1296}
1297
1298static ssize_t
1299ftrace_notrace_write(struct file *file, const char __user *ubuf,
1300 size_t cnt, loff_t *ppos)
1301{
1302 return ftrace_regex_write(file, ubuf, cnt, ppos, 0);
1303}
1304
1305static void
1306ftrace_set_regex(unsigned char *buf, int len, int reset, int enable)
1307{
1308 if (unlikely(ftrace_disabled))
1309 return;
1310
1311 mutex_lock(&ftrace_regex_lock);
1312 if (reset)
1313 ftrace_filter_reset(enable);
1314 if (buf)
1315 ftrace_match(buf, len, enable);
1316 mutex_unlock(&ftrace_regex_lock);
1317}
1318
1319/**
1320 * ftrace_set_filter - set a function to filter on in ftrace
1321 * @buf - the string that holds the function filter text.
1322 * @len - the length of the string.
1323 * @reset - non zero to reset all filters before applying this filter.
1324 *
1325 * Filters denote which functions should be enabled when tracing is enabled.
1326 * If @buf is NULL and reset is set, all functions will be enabled for tracing.
1327 */
1328void ftrace_set_filter(unsigned char *buf, int len, int reset)
1329{
1330 ftrace_set_regex(buf, len, reset, 1);
1331}
1332
1333/**
1334 * ftrace_set_notrace - set a function to not trace in ftrace
1335 * @buf - the string that holds the function notrace text.
1336 * @len - the length of the string.
1337 * @reset - non zero to reset all filters before applying this filter.
1338 *
1339 * Notrace Filters denote which functions should not be enabled when tracing
1340 * is enabled. If @buf is NULL and reset is set, all functions will be enabled
1341 * for tracing.
1342 */
1343void ftrace_set_notrace(unsigned char *buf, int len, int reset)
1344{
1345 ftrace_set_regex(buf, len, reset, 0);
1346}
1347
1348static int
1349ftrace_regex_release(struct inode *inode, struct file *file, int enable)
1350{
1351 struct seq_file *m = (struct seq_file *)file->private_data;
1352 struct ftrace_iterator *iter;
1353
1354 mutex_lock(&ftrace_regex_lock);
1355 if (file->f_mode & FMODE_READ) {
1356 iter = m->private;
1357
1358 seq_release(inode, file);
1359 } else
1360 iter = file->private_data;
1361
1362 if (iter->buffer_idx) {
1363 iter->filtered++;
1364 iter->buffer[iter->buffer_idx] = 0;
1365 ftrace_match(iter->buffer, iter->buffer_idx, enable);
1366 }
1367
1368 mutex_lock(&ftrace_sysctl_lock);
1369 mutex_lock(&ftraced_lock);
1370 if (iter->filtered && ftraced_suspend && ftrace_enabled)
1371 ftrace_run_update_code(FTRACE_ENABLE_CALLS);
1372 mutex_unlock(&ftraced_lock);
1373 mutex_unlock(&ftrace_sysctl_lock);
1374
1375 kfree(iter);
1376 mutex_unlock(&ftrace_regex_lock);
1377 return 0;
1378}
1379
1380static int
1381ftrace_filter_release(struct inode *inode, struct file *file)
1382{
1383 return ftrace_regex_release(inode, file, 1);
1384}
1385
1386static int
1387ftrace_notrace_release(struct inode *inode, struct file *file)
1388{
1389 return ftrace_regex_release(inode, file, 0);
1390}
1391
1392static ssize_t
1393ftraced_read(struct file *filp, char __user *ubuf,
1394 size_t cnt, loff_t *ppos)
1395{
1396 /* don't worry about races */
1397 char *buf = ftraced_stop ? "disabled\n" : "enabled\n";
1398 int r = strlen(buf);
1399
1400 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
1401}
1402
1403static ssize_t
1404ftraced_write(struct file *filp, const char __user *ubuf,
1405 size_t cnt, loff_t *ppos)
1406{
1407 char buf[64];
1408 long val;
1409 int ret;
1410
1411 if (cnt >= sizeof(buf))
1412 return -EINVAL;
1413
1414 if (copy_from_user(&buf, ubuf, cnt))
1415 return -EFAULT;
1416
1417 if (strncmp(buf, "enable", 6) == 0)
1418 val = 1;
1419 else if (strncmp(buf, "disable", 7) == 0)
1420 val = 0;
1421 else {
1422 buf[cnt] = 0;
1423
1424 ret = strict_strtoul(buf, 10, &val);
1425 if (ret < 0)
1426 return ret;
1427
1428 val = !!val;
1429 }
1430
1431 if (val)
1432 ftrace_enable_daemon();
1433 else
1434 ftrace_disable_daemon();
1435
1436 filp->f_pos += cnt;
1437
1438 return cnt;
1439}
1440
1441static struct file_operations ftrace_avail_fops = {
1442 .open = ftrace_avail_open,
1443 .read = seq_read,
1444 .llseek = seq_lseek,
1445 .release = ftrace_avail_release,
1446};
1447
1448static struct file_operations ftrace_failures_fops = {
1449 .open = ftrace_failures_open,
1450 .read = seq_read,
1451 .llseek = seq_lseek,
1452 .release = ftrace_avail_release,
1453};
1454
1455static struct file_operations ftrace_filter_fops = {
1456 .open = ftrace_filter_open,
1457 .read = ftrace_regex_read,
1458 .write = ftrace_filter_write,
1459 .llseek = ftrace_regex_lseek,
1460 .release = ftrace_filter_release,
1461};
1462
1463static struct file_operations ftrace_notrace_fops = {
1464 .open = ftrace_notrace_open,
1465 .read = ftrace_regex_read,
1466 .write = ftrace_notrace_write,
1467 .llseek = ftrace_regex_lseek,
1468 .release = ftrace_notrace_release,
1469};
1470
1471static struct file_operations ftraced_fops = {
1472 .open = tracing_open_generic,
1473 .read = ftraced_read,
1474 .write = ftraced_write,
1475};
1476
1477/**
1478 * ftrace_force_update - force an update to all recording ftrace functions
1479 */
1480int ftrace_force_update(void)
1481{
1482 int ret = 0;
1483
1484 if (unlikely(ftrace_disabled))
1485 return -ENODEV;
1486
1487 mutex_lock(&ftrace_sysctl_lock);
1488 mutex_lock(&ftraced_lock);
1489
1490 /*
1491 * If ftraced_trigger is not set, then there is nothing
1492 * to update.
1493 */
1494 if (ftraced_trigger && !ftrace_update_code())
1495 ret = -EBUSY;
1496
1497 mutex_unlock(&ftraced_lock);
1498 mutex_unlock(&ftrace_sysctl_lock);
1499
1500 return ret;
1501}
1502
1503static void ftrace_force_shutdown(void)
1504{
1505 struct task_struct *task;
1506 int command = FTRACE_DISABLE_CALLS | FTRACE_UPDATE_TRACE_FUNC;
1507
1508 mutex_lock(&ftraced_lock);
1509 task = ftraced_task;
1510 ftraced_task = NULL;
1511 ftraced_suspend = -1;
1512 ftrace_run_update_code(command);
1513 mutex_unlock(&ftraced_lock);
1514
1515 if (task)
1516 kthread_stop(task);
1517}
1518
1519static __init int ftrace_init_debugfs(void)
1520{
1521 struct dentry *d_tracer;
1522 struct dentry *entry;
1523
1524 d_tracer = tracing_init_dentry();
1525
1526 entry = debugfs_create_file("available_filter_functions", 0444,
1527 d_tracer, NULL, &ftrace_avail_fops);
1528 if (!entry)
1529 pr_warning("Could not create debugfs "
1530 "'available_filter_functions' entry\n");
1531
1532 entry = debugfs_create_file("failures", 0444,
1533 d_tracer, NULL, &ftrace_failures_fops);
1534 if (!entry)
1535 pr_warning("Could not create debugfs 'failures' entry\n");
1536
1537 entry = debugfs_create_file("set_ftrace_filter", 0644, d_tracer,
1538 NULL, &ftrace_filter_fops);
1539 if (!entry)
1540 pr_warning("Could not create debugfs "
1541 "'set_ftrace_filter' entry\n");
1542
1543 entry = debugfs_create_file("set_ftrace_notrace", 0644, d_tracer,
1544 NULL, &ftrace_notrace_fops);
1545 if (!entry)
1546 pr_warning("Could not create debugfs "
1547 "'set_ftrace_notrace' entry\n");
1548
1549 entry = debugfs_create_file("ftraced_enabled", 0644, d_tracer,
1550 NULL, &ftraced_fops);
1551 if (!entry)
1552 pr_warning("Could not create debugfs "
1553 "'ftraced_enabled' entry\n");
1554 return 0;
1555}
1556
1557fs_initcall(ftrace_init_debugfs);
1558
1559static int __init ftrace_dynamic_init(void)
1560{
1561 struct task_struct *p;
1562 unsigned long addr;
1563 int ret;
1564
1565 addr = (unsigned long)ftrace_record_ip;
1566
1567 stop_machine(ftrace_dyn_arch_init, &addr, NULL);
1568
1569 /* ftrace_dyn_arch_init places the return code in addr */
1570 if (addr) {
1571 ret = (int)addr;
1572 goto failed;
1573 }
1574
1575 ret = ftrace_dyn_table_alloc();
1576 if (ret)
1577 goto failed;
1578
1579 p = kthread_run(ftraced, NULL, "ftraced");
1580 if (IS_ERR(p)) {
1581 ret = -1;
1582 goto failed;
1583 }
1584
1585 last_ftrace_enabled = ftrace_enabled = 1;
1586 ftraced_task = p;
1587
1588 return 0;
1589
1590 failed:
1591 ftrace_disabled = 1;
1592 return ret;
1593}
1594
1595core_initcall(ftrace_dynamic_init);
1596#else
1597# define ftrace_startup() do { } while (0)
1598# define ftrace_shutdown() do { } while (0)
1599# define ftrace_startup_sysctl() do { } while (0)
1600# define ftrace_shutdown_sysctl() do { } while (0)
1601# define ftrace_force_shutdown() do { } while (0)
1602#endif /* CONFIG_DYNAMIC_FTRACE */
1603
1604/**
1605 * ftrace_kill_atomic - kill ftrace from critical sections
1606 *
1607 * This function should be used by panic code. It stops ftrace
1608 * but in a not so nice way. If you need to simply kill ftrace
1609 * from a non-atomic section, use ftrace_kill.
1610 */
1611void ftrace_kill_atomic(void)
1612{
1613 ftrace_disabled = 1;
1614 ftrace_enabled = 0;
1615#ifdef CONFIG_DYNAMIC_FTRACE
1616 ftraced_suspend = -1;
1617#endif
1618 clear_ftrace_function();
1619}
1620
1621/**
1622 * ftrace_kill - totally shutdown ftrace
1623 *
1624 * This is a safety measure. If something was detected that seems
1625 * wrong, calling this function will keep ftrace from doing
1626 * any more modifications, and updates.
1627 * used when something went wrong.
1628 */
1629void ftrace_kill(void)
1630{
1631 mutex_lock(&ftrace_sysctl_lock);
1632 ftrace_disabled = 1;
1633 ftrace_enabled = 0;
1634
1635 clear_ftrace_function();
1636 mutex_unlock(&ftrace_sysctl_lock);
1637
1638 /* Try to totally disable ftrace */
1639 ftrace_force_shutdown();
1640}
1641
1642/**
1643 * register_ftrace_function - register a function for profiling
1644 * @ops - ops structure that holds the function for profiling.
1645 *
1646 * Register a function to be called by all functions in the
1647 * kernel.
1648 *
1649 * Note: @ops->func and all the functions it calls must be labeled
1650 * with "notrace", otherwise it will go into a
1651 * recursive loop.
1652 */
1653int register_ftrace_function(struct ftrace_ops *ops)
1654{
1655 int ret;
1656
1657 if (unlikely(ftrace_disabled))
1658 return -1;
1659
1660 mutex_lock(&ftrace_sysctl_lock);
1661 ret = __register_ftrace_function(ops);
1662 ftrace_startup();
1663 mutex_unlock(&ftrace_sysctl_lock);
1664
1665 return ret;
1666}
1667
1668/**
1669 * unregister_ftrace_function - unresgister a function for profiling.
1670 * @ops - ops structure that holds the function to unregister
1671 *
1672 * Unregister a function that was added to be called by ftrace profiling.
1673 */
1674int unregister_ftrace_function(struct ftrace_ops *ops)
1675{
1676 int ret;
1677
1678 mutex_lock(&ftrace_sysctl_lock);
1679 ret = __unregister_ftrace_function(ops);
1680 ftrace_shutdown();
1681 mutex_unlock(&ftrace_sysctl_lock);
1682
1683 return ret;
1684}
1685
1686int
1687ftrace_enable_sysctl(struct ctl_table *table, int write,
1688 struct file *file, void __user *buffer, size_t *lenp,
1689 loff_t *ppos)
1690{
1691 int ret;
1692
1693 if (unlikely(ftrace_disabled))
1694 return -ENODEV;
1695
1696 mutex_lock(&ftrace_sysctl_lock);
1697
1698 ret = proc_dointvec(table, write, file, buffer, lenp, ppos);
1699
1700 if (ret || !write || (last_ftrace_enabled == ftrace_enabled))
1701 goto out;
1702
1703 last_ftrace_enabled = ftrace_enabled;
1704
1705 if (ftrace_enabled) {
1706
1707 ftrace_startup_sysctl();
1708
1709 /* we are starting ftrace again */
1710 if (ftrace_list != &ftrace_list_end) {
1711 if (ftrace_list->next == &ftrace_list_end)
1712 ftrace_trace_function = ftrace_list->func;
1713 else
1714 ftrace_trace_function = ftrace_list_func;
1715 }
1716
1717 } else {
1718 /* stopping ftrace calls (just send to ftrace_stub) */
1719 ftrace_trace_function = ftrace_stub;
1720
1721 ftrace_shutdown_sysctl();
1722 }
1723
1724 out:
1725 mutex_unlock(&ftrace_sysctl_lock);
1726 return ret;
1727}
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
new file mode 100644
index 000000000000..8f3fb3db61c3
--- /dev/null
+++ b/kernel/trace/trace.c
@@ -0,0 +1,3157 @@
1/*
2 * ring buffer based function tracer
3 *
4 * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
5 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6 *
7 * Originally taken from the RT patch by:
8 * Arnaldo Carvalho de Melo <acme@redhat.com>
9 *
10 * Based on code from the latency_tracer, that is:
11 * Copyright (C) 2004-2006 Ingo Molnar
12 * Copyright (C) 2004 William Lee Irwin III
13 */
14#include <linux/utsrelease.h>
15#include <linux/kallsyms.h>
16#include <linux/seq_file.h>
17#include <linux/debugfs.h>
18#include <linux/pagemap.h>
19#include <linux/hardirq.h>
20#include <linux/linkage.h>
21#include <linux/uaccess.h>
22#include <linux/ftrace.h>
23#include <linux/module.h>
24#include <linux/percpu.h>
25#include <linux/ctype.h>
26#include <linux/init.h>
27#include <linux/poll.h>
28#include <linux/gfp.h>
29#include <linux/fs.h>
30#include <linux/kprobes.h>
31#include <linux/writeback.h>
32
33#include <linux/stacktrace.h>
34
35#include "trace.h"
36
37unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX;
38unsigned long __read_mostly tracing_thresh;
39
40static unsigned long __read_mostly tracing_nr_buffers;
41static cpumask_t __read_mostly tracing_buffer_mask;
42
43#define for_each_tracing_cpu(cpu) \
44 for_each_cpu_mask(cpu, tracing_buffer_mask)
45
46static int trace_alloc_page(void);
47static int trace_free_page(void);
48
49static int tracing_disabled = 1;
50
51static unsigned long tracing_pages_allocated;
52
53long
54ns2usecs(cycle_t nsec)
55{
56 nsec += 500;
57 do_div(nsec, 1000);
58 return nsec;
59}
60
61cycle_t ftrace_now(int cpu)
62{
63 return cpu_clock(cpu);
64}
65
66/*
67 * The global_trace is the descriptor that holds the tracing
68 * buffers for the live tracing. For each CPU, it contains
69 * a link list of pages that will store trace entries. The
70 * page descriptor of the pages in the memory is used to hold
71 * the link list by linking the lru item in the page descriptor
72 * to each of the pages in the buffer per CPU.
73 *
74 * For each active CPU there is a data field that holds the
75 * pages for the buffer for that CPU. Each CPU has the same number
76 * of pages allocated for its buffer.
77 */
78static struct trace_array global_trace;
79
80static DEFINE_PER_CPU(struct trace_array_cpu, global_trace_cpu);
81
82/*
83 * The max_tr is used to snapshot the global_trace when a maximum
84 * latency is reached. Some tracers will use this to store a maximum
85 * trace while it continues examining live traces.
86 *
87 * The buffers for the max_tr are set up the same as the global_trace.
88 * When a snapshot is taken, the link list of the max_tr is swapped
89 * with the link list of the global_trace and the buffers are reset for
90 * the global_trace so the tracing can continue.
91 */
92static struct trace_array max_tr;
93
94static DEFINE_PER_CPU(struct trace_array_cpu, max_data);
95
96/* tracer_enabled is used to toggle activation of a tracer */
97static int tracer_enabled = 1;
98
99/* function tracing enabled */
100int ftrace_function_enabled;
101
102/*
103 * trace_nr_entries is the number of entries that is allocated
104 * for a buffer. Note, the number of entries is always rounded
105 * to ENTRIES_PER_PAGE.
106 */
107static unsigned long trace_nr_entries = 65536UL;
108
109/* trace_types holds a link list of available tracers. */
110static struct tracer *trace_types __read_mostly;
111
112/* current_trace points to the tracer that is currently active */
113static struct tracer *current_trace __read_mostly;
114
115/*
116 * max_tracer_type_len is used to simplify the allocating of
117 * buffers to read userspace tracer names. We keep track of
118 * the longest tracer name registered.
119 */
120static int max_tracer_type_len;
121
122/*
123 * trace_types_lock is used to protect the trace_types list.
124 * This lock is also used to keep user access serialized.
125 * Accesses from userspace will grab this lock while userspace
126 * activities happen inside the kernel.
127 */
128static DEFINE_MUTEX(trace_types_lock);
129
130/* trace_wait is a waitqueue for tasks blocked on trace_poll */
131static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
132
133/* trace_flags holds iter_ctrl options */
134unsigned long trace_flags = TRACE_ITER_PRINT_PARENT;
135
136static notrace void no_trace_init(struct trace_array *tr)
137{
138 int cpu;
139
140 ftrace_function_enabled = 0;
141 if(tr->ctrl)
142 for_each_online_cpu(cpu)
143 tracing_reset(tr->data[cpu]);
144 tracer_enabled = 0;
145}
146
147/* dummy trace to disable tracing */
148static struct tracer no_tracer __read_mostly = {
149 .name = "none",
150 .init = no_trace_init
151};
152
153
154/**
155 * trace_wake_up - wake up tasks waiting for trace input
156 *
157 * Simply wakes up any task that is blocked on the trace_wait
158 * queue. These is used with trace_poll for tasks polling the trace.
159 */
160void trace_wake_up(void)
161{
162 /*
163 * The runqueue_is_locked() can fail, but this is the best we
164 * have for now:
165 */
166 if (!(trace_flags & TRACE_ITER_BLOCK) && !runqueue_is_locked())
167 wake_up(&trace_wait);
168}
169
170#define ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(struct trace_entry))
171
172static int __init set_nr_entries(char *str)
173{
174 unsigned long nr_entries;
175 int ret;
176
177 if (!str)
178 return 0;
179 ret = strict_strtoul(str, 0, &nr_entries);
180 /* nr_entries can not be zero */
181 if (ret < 0 || nr_entries == 0)
182 return 0;
183 trace_nr_entries = nr_entries;
184 return 1;
185}
186__setup("trace_entries=", set_nr_entries);
187
188unsigned long nsecs_to_usecs(unsigned long nsecs)
189{
190 return nsecs / 1000;
191}
192
193/*
194 * trace_flag_type is an enumeration that holds different
195 * states when a trace occurs. These are:
196 * IRQS_OFF - interrupts were disabled
197 * NEED_RESCED - reschedule is requested
198 * HARDIRQ - inside an interrupt handler
199 * SOFTIRQ - inside a softirq handler
200 */
201enum trace_flag_type {
202 TRACE_FLAG_IRQS_OFF = 0x01,
203 TRACE_FLAG_NEED_RESCHED = 0x02,
204 TRACE_FLAG_HARDIRQ = 0x04,
205 TRACE_FLAG_SOFTIRQ = 0x08,
206};
207
208/*
209 * TRACE_ITER_SYM_MASK masks the options in trace_flags that
210 * control the output of kernel symbols.
211 */
212#define TRACE_ITER_SYM_MASK \
213 (TRACE_ITER_PRINT_PARENT|TRACE_ITER_SYM_OFFSET|TRACE_ITER_SYM_ADDR)
214
215/* These must match the bit postions in trace_iterator_flags */
216static const char *trace_options[] = {
217 "print-parent",
218 "sym-offset",
219 "sym-addr",
220 "verbose",
221 "raw",
222 "hex",
223 "bin",
224 "block",
225 "stacktrace",
226 "sched-tree",
227 NULL
228};
229
230/*
231 * ftrace_max_lock is used to protect the swapping of buffers
232 * when taking a max snapshot. The buffers themselves are
233 * protected by per_cpu spinlocks. But the action of the swap
234 * needs its own lock.
235 *
236 * This is defined as a raw_spinlock_t in order to help
237 * with performance when lockdep debugging is enabled.
238 */
239static raw_spinlock_t ftrace_max_lock =
240 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
241
242/*
243 * Copy the new maximum trace into the separate maximum-trace
244 * structure. (this way the maximum trace is permanently saved,
245 * for later retrieval via /debugfs/tracing/latency_trace)
246 */
247static void
248__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
249{
250 struct trace_array_cpu *data = tr->data[cpu];
251
252 max_tr.cpu = cpu;
253 max_tr.time_start = data->preempt_timestamp;
254
255 data = max_tr.data[cpu];
256 data->saved_latency = tracing_max_latency;
257
258 memcpy(data->comm, tsk->comm, TASK_COMM_LEN);
259 data->pid = tsk->pid;
260 data->uid = tsk->uid;
261 data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
262 data->policy = tsk->policy;
263 data->rt_priority = tsk->rt_priority;
264
265 /* record this tasks comm */
266 tracing_record_cmdline(current);
267}
268
269#define CHECK_COND(cond) \
270 if (unlikely(cond)) { \
271 tracing_disabled = 1; \
272 WARN_ON(1); \
273 return -1; \
274 }
275
276/**
277 * check_pages - integrity check of trace buffers
278 *
279 * As a safty measure we check to make sure the data pages have not
280 * been corrupted.
281 */
282int check_pages(struct trace_array_cpu *data)
283{
284 struct page *page, *tmp;
285
286 CHECK_COND(data->trace_pages.next->prev != &data->trace_pages);
287 CHECK_COND(data->trace_pages.prev->next != &data->trace_pages);
288
289 list_for_each_entry_safe(page, tmp, &data->trace_pages, lru) {
290 CHECK_COND(page->lru.next->prev != &page->lru);
291 CHECK_COND(page->lru.prev->next != &page->lru);
292 }
293
294 return 0;
295}
296
297/**
298 * head_page - page address of the first page in per_cpu buffer.
299 *
300 * head_page returns the page address of the first page in
301 * a per_cpu buffer. This also preforms various consistency
302 * checks to make sure the buffer has not been corrupted.
303 */
304void *head_page(struct trace_array_cpu *data)
305{
306 struct page *page;
307
308 if (list_empty(&data->trace_pages))
309 return NULL;
310
311 page = list_entry(data->trace_pages.next, struct page, lru);
312 BUG_ON(&page->lru == &data->trace_pages);
313
314 return page_address(page);
315}
316
317/**
318 * trace_seq_printf - sequence printing of trace information
319 * @s: trace sequence descriptor
320 * @fmt: printf format string
321 *
322 * The tracer may use either sequence operations or its own
323 * copy to user routines. To simplify formating of a trace
324 * trace_seq_printf is used to store strings into a special
325 * buffer (@s). Then the output may be either used by
326 * the sequencer or pulled into another buffer.
327 */
328int
329trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
330{
331 int len = (PAGE_SIZE - 1) - s->len;
332 va_list ap;
333 int ret;
334
335 if (!len)
336 return 0;
337
338 va_start(ap, fmt);
339 ret = vsnprintf(s->buffer + s->len, len, fmt, ap);
340 va_end(ap);
341
342 /* If we can't write it all, don't bother writing anything */
343 if (ret >= len)
344 return 0;
345
346 s->len += ret;
347
348 return len;
349}
350
351/**
352 * trace_seq_puts - trace sequence printing of simple string
353 * @s: trace sequence descriptor
354 * @str: simple string to record
355 *
356 * The tracer may use either the sequence operations or its own
357 * copy to user routines. This function records a simple string
358 * into a special buffer (@s) for later retrieval by a sequencer
359 * or other mechanism.
360 */
361static int
362trace_seq_puts(struct trace_seq *s, const char *str)
363{
364 int len = strlen(str);
365
366 if (len > ((PAGE_SIZE - 1) - s->len))
367 return 0;
368
369 memcpy(s->buffer + s->len, str, len);
370 s->len += len;
371
372 return len;
373}
374
375static int
376trace_seq_putc(struct trace_seq *s, unsigned char c)
377{
378 if (s->len >= (PAGE_SIZE - 1))
379 return 0;
380
381 s->buffer[s->len++] = c;
382
383 return 1;
384}
385
386static int
387trace_seq_putmem(struct trace_seq *s, void *mem, size_t len)
388{
389 if (len > ((PAGE_SIZE - 1) - s->len))
390 return 0;
391
392 memcpy(s->buffer + s->len, mem, len);
393 s->len += len;
394
395 return len;
396}
397
398#define HEX_CHARS 17
399static const char hex2asc[] = "0123456789abcdef";
400
401static int
402trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len)
403{
404 unsigned char hex[HEX_CHARS];
405 unsigned char *data = mem;
406 unsigned char byte;
407 int i, j;
408
409 BUG_ON(len >= HEX_CHARS);
410
411#ifdef __BIG_ENDIAN
412 for (i = 0, j = 0; i < len; i++) {
413#else
414 for (i = len-1, j = 0; i >= 0; i--) {
415#endif
416 byte = data[i];
417
418 hex[j++] = hex2asc[byte & 0x0f];
419 hex[j++] = hex2asc[byte >> 4];
420 }
421 hex[j++] = ' ';
422
423 return trace_seq_putmem(s, hex, j);
424}
425
426static void
427trace_seq_reset(struct trace_seq *s)
428{
429 s->len = 0;
430 s->readpos = 0;
431}
432
433ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt)
434{
435 int len;
436 int ret;
437
438 if (s->len <= s->readpos)
439 return -EBUSY;
440
441 len = s->len - s->readpos;
442 if (cnt > len)
443 cnt = len;
444 ret = copy_to_user(ubuf, s->buffer + s->readpos, cnt);
445 if (ret)
446 return -EFAULT;
447
448 s->readpos += len;
449 return cnt;
450}
451
452static void
453trace_print_seq(struct seq_file *m, struct trace_seq *s)
454{
455 int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len;
456
457 s->buffer[len] = 0;
458 seq_puts(m, s->buffer);
459
460 trace_seq_reset(s);
461}
462
463/*
464 * flip the trace buffers between two trace descriptors.
465 * This usually is the buffers between the global_trace and
466 * the max_tr to record a snapshot of a current trace.
467 *
468 * The ftrace_max_lock must be held.
469 */
470static void
471flip_trace(struct trace_array_cpu *tr1, struct trace_array_cpu *tr2)
472{
473 struct list_head flip_pages;
474
475 INIT_LIST_HEAD(&flip_pages);
476
477 memcpy(&tr1->trace_head_idx, &tr2->trace_head_idx,
478 sizeof(struct trace_array_cpu) -
479 offsetof(struct trace_array_cpu, trace_head_idx));
480
481 check_pages(tr1);
482 check_pages(tr2);
483 list_splice_init(&tr1->trace_pages, &flip_pages);
484 list_splice_init(&tr2->trace_pages, &tr1->trace_pages);
485 list_splice_init(&flip_pages, &tr2->trace_pages);
486 BUG_ON(!list_empty(&flip_pages));
487 check_pages(tr1);
488 check_pages(tr2);
489}
490
491/**
492 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
493 * @tr: tracer
494 * @tsk: the task with the latency
495 * @cpu: The cpu that initiated the trace.
496 *
497 * Flip the buffers between the @tr and the max_tr and record information
498 * about which task was the cause of this latency.
499 */
500void
501update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
502{
503 struct trace_array_cpu *data;
504 int i;
505
506 WARN_ON_ONCE(!irqs_disabled());
507 __raw_spin_lock(&ftrace_max_lock);
508 /* clear out all the previous traces */
509 for_each_tracing_cpu(i) {
510 data = tr->data[i];
511 flip_trace(max_tr.data[i], data);
512 tracing_reset(data);
513 }
514
515 __update_max_tr(tr, tsk, cpu);
516 __raw_spin_unlock(&ftrace_max_lock);
517}
518
519/**
520 * update_max_tr_single - only copy one trace over, and reset the rest
521 * @tr - tracer
522 * @tsk - task with the latency
523 * @cpu - the cpu of the buffer to copy.
524 *
525 * Flip the trace of a single CPU buffer between the @tr and the max_tr.
526 */
527void
528update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
529{
530 struct trace_array_cpu *data = tr->data[cpu];
531 int i;
532
533 WARN_ON_ONCE(!irqs_disabled());
534 __raw_spin_lock(&ftrace_max_lock);
535 for_each_tracing_cpu(i)
536 tracing_reset(max_tr.data[i]);
537
538 flip_trace(max_tr.data[cpu], data);
539 tracing_reset(data);
540
541 __update_max_tr(tr, tsk, cpu);
542 __raw_spin_unlock(&ftrace_max_lock);
543}
544
545/**
546 * register_tracer - register a tracer with the ftrace system.
547 * @type - the plugin for the tracer
548 *
549 * Register a new plugin tracer.
550 */
551int register_tracer(struct tracer *type)
552{
553 struct tracer *t;
554 int len;
555 int ret = 0;
556
557 if (!type->name) {
558 pr_info("Tracer must have a name\n");
559 return -1;
560 }
561
562 mutex_lock(&trace_types_lock);
563 for (t = trace_types; t; t = t->next) {
564 if (strcmp(type->name, t->name) == 0) {
565 /* already found */
566 pr_info("Trace %s already registered\n",
567 type->name);
568 ret = -1;
569 goto out;
570 }
571 }
572
573#ifdef CONFIG_FTRACE_STARTUP_TEST
574 if (type->selftest) {
575 struct tracer *saved_tracer = current_trace;
576 struct trace_array_cpu *data;
577 struct trace_array *tr = &global_trace;
578 int saved_ctrl = tr->ctrl;
579 int i;
580 /*
581 * Run a selftest on this tracer.
582 * Here we reset the trace buffer, and set the current
583 * tracer to be this tracer. The tracer can then run some
584 * internal tracing to verify that everything is in order.
585 * If we fail, we do not register this tracer.
586 */
587 for_each_tracing_cpu(i) {
588 data = tr->data[i];
589 if (!head_page(data))
590 continue;
591 tracing_reset(data);
592 }
593 current_trace = type;
594 tr->ctrl = 0;
595 /* the test is responsible for initializing and enabling */
596 pr_info("Testing tracer %s: ", type->name);
597 ret = type->selftest(type, tr);
598 /* the test is responsible for resetting too */
599 current_trace = saved_tracer;
600 tr->ctrl = saved_ctrl;
601 if (ret) {
602 printk(KERN_CONT "FAILED!\n");
603 goto out;
604 }
605 /* Only reset on passing, to avoid touching corrupted buffers */
606 for_each_tracing_cpu(i) {
607 data = tr->data[i];
608 if (!head_page(data))
609 continue;
610 tracing_reset(data);
611 }
612 printk(KERN_CONT "PASSED\n");
613 }
614#endif
615
616 type->next = trace_types;
617 trace_types = type;
618 len = strlen(type->name);
619 if (len > max_tracer_type_len)
620 max_tracer_type_len = len;
621
622 out:
623 mutex_unlock(&trace_types_lock);
624
625 return ret;
626}
627
628void unregister_tracer(struct tracer *type)
629{
630 struct tracer **t;
631 int len;
632
633 mutex_lock(&trace_types_lock);
634 for (t = &trace_types; *t; t = &(*t)->next) {
635 if (*t == type)
636 goto found;
637 }
638 pr_info("Trace %s not registered\n", type->name);
639 goto out;
640
641 found:
642 *t = (*t)->next;
643 if (strlen(type->name) != max_tracer_type_len)
644 goto out;
645
646 max_tracer_type_len = 0;
647 for (t = &trace_types; *t; t = &(*t)->next) {
648 len = strlen((*t)->name);
649 if (len > max_tracer_type_len)
650 max_tracer_type_len = len;
651 }
652 out:
653 mutex_unlock(&trace_types_lock);
654}
655
656void tracing_reset(struct trace_array_cpu *data)
657{
658 data->trace_idx = 0;
659 data->overrun = 0;
660 data->trace_head = data->trace_tail = head_page(data);
661 data->trace_head_idx = 0;
662 data->trace_tail_idx = 0;
663}
664
665#define SAVED_CMDLINES 128
666static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
667static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
668static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
669static int cmdline_idx;
670static DEFINE_SPINLOCK(trace_cmdline_lock);
671
672/* temporary disable recording */
673atomic_t trace_record_cmdline_disabled __read_mostly;
674
675static void trace_init_cmdlines(void)
676{
677 memset(&map_pid_to_cmdline, -1, sizeof(map_pid_to_cmdline));
678 memset(&map_cmdline_to_pid, -1, sizeof(map_cmdline_to_pid));
679 cmdline_idx = 0;
680}
681
682void trace_stop_cmdline_recording(void);
683
684static void trace_save_cmdline(struct task_struct *tsk)
685{
686 unsigned map;
687 unsigned idx;
688
689 if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
690 return;
691
692 /*
693 * It's not the end of the world if we don't get
694 * the lock, but we also don't want to spin
695 * nor do we want to disable interrupts,
696 * so if we miss here, then better luck next time.
697 */
698 if (!spin_trylock(&trace_cmdline_lock))
699 return;
700
701 idx = map_pid_to_cmdline[tsk->pid];
702 if (idx >= SAVED_CMDLINES) {
703 idx = (cmdline_idx + 1) % SAVED_CMDLINES;
704
705 map = map_cmdline_to_pid[idx];
706 if (map <= PID_MAX_DEFAULT)
707 map_pid_to_cmdline[map] = (unsigned)-1;
708
709 map_pid_to_cmdline[tsk->pid] = idx;
710
711 cmdline_idx = idx;
712 }
713
714 memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);
715
716 spin_unlock(&trace_cmdline_lock);
717}
718
719static char *trace_find_cmdline(int pid)
720{
721 char *cmdline = "<...>";
722 unsigned map;
723
724 if (!pid)
725 return "<idle>";
726
727 if (pid > PID_MAX_DEFAULT)
728 goto out;
729
730 map = map_pid_to_cmdline[pid];
731 if (map >= SAVED_CMDLINES)
732 goto out;
733
734 cmdline = saved_cmdlines[map];
735
736 out:
737 return cmdline;
738}
739
740void tracing_record_cmdline(struct task_struct *tsk)
741{
742 if (atomic_read(&trace_record_cmdline_disabled))
743 return;
744
745 trace_save_cmdline(tsk);
746}
747
748static inline struct list_head *
749trace_next_list(struct trace_array_cpu *data, struct list_head *next)
750{
751 /*
752 * Roundrobin - but skip the head (which is not a real page):
753 */
754 next = next->next;
755 if (unlikely(next == &data->trace_pages))
756 next = next->next;
757 BUG_ON(next == &data->trace_pages);
758
759 return next;
760}
761
762static inline void *
763trace_next_page(struct trace_array_cpu *data, void *addr)
764{
765 struct list_head *next;
766 struct page *page;
767
768 page = virt_to_page(addr);
769
770 next = trace_next_list(data, &page->lru);
771 page = list_entry(next, struct page, lru);
772
773 return page_address(page);
774}
775
776static inline struct trace_entry *
777tracing_get_trace_entry(struct trace_array *tr, struct trace_array_cpu *data)
778{
779 unsigned long idx, idx_next;
780 struct trace_entry *entry;
781
782 data->trace_idx++;
783 idx = data->trace_head_idx;
784 idx_next = idx + 1;
785
786 BUG_ON(idx * TRACE_ENTRY_SIZE >= PAGE_SIZE);
787
788 entry = data->trace_head + idx * TRACE_ENTRY_SIZE;
789
790 if (unlikely(idx_next >= ENTRIES_PER_PAGE)) {
791 data->trace_head = trace_next_page(data, data->trace_head);
792 idx_next = 0;
793 }
794
795 if (data->trace_head == data->trace_tail &&
796 idx_next == data->trace_tail_idx) {
797 /* overrun */
798 data->overrun++;
799 data->trace_tail_idx++;
800 if (data->trace_tail_idx >= ENTRIES_PER_PAGE) {
801 data->trace_tail =
802 trace_next_page(data, data->trace_tail);
803 data->trace_tail_idx = 0;
804 }
805 }
806
807 data->trace_head_idx = idx_next;
808
809 return entry;
810}
811
812static inline void
813tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags)
814{
815 struct task_struct *tsk = current;
816 unsigned long pc;
817
818 pc = preempt_count();
819
820 entry->preempt_count = pc & 0xff;
821 entry->pid = (tsk) ? tsk->pid : 0;
822 entry->t = ftrace_now(raw_smp_processor_id());
823 entry->flags = (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
824 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
825 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
826 (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0);
827}
828
829void
830trace_function(struct trace_array *tr, struct trace_array_cpu *data,
831 unsigned long ip, unsigned long parent_ip, unsigned long flags)
832{
833 struct trace_entry *entry;
834 unsigned long irq_flags;
835
836 raw_local_irq_save(irq_flags);
837 __raw_spin_lock(&data->lock);
838 entry = tracing_get_trace_entry(tr, data);
839 tracing_generic_entry_update(entry, flags);
840 entry->type = TRACE_FN;
841 entry->fn.ip = ip;
842 entry->fn.parent_ip = parent_ip;
843 __raw_spin_unlock(&data->lock);
844 raw_local_irq_restore(irq_flags);
845}
846
847void
848ftrace(struct trace_array *tr, struct trace_array_cpu *data,
849 unsigned long ip, unsigned long parent_ip, unsigned long flags)
850{
851 if (likely(!atomic_read(&data->disabled)))
852 trace_function(tr, data, ip, parent_ip, flags);
853}
854
855#ifdef CONFIG_MMIOTRACE
856void __trace_mmiotrace_rw(struct trace_array *tr, struct trace_array_cpu *data,
857 struct mmiotrace_rw *rw)
858{
859 struct trace_entry *entry;
860 unsigned long irq_flags;
861
862 raw_local_irq_save(irq_flags);
863 __raw_spin_lock(&data->lock);
864
865 entry = tracing_get_trace_entry(tr, data);
866 tracing_generic_entry_update(entry, 0);
867 entry->type = TRACE_MMIO_RW;
868 entry->mmiorw = *rw;
869
870 __raw_spin_unlock(&data->lock);
871 raw_local_irq_restore(irq_flags);
872
873 trace_wake_up();
874}
875
876void __trace_mmiotrace_map(struct trace_array *tr, struct trace_array_cpu *data,
877 struct mmiotrace_map *map)
878{
879 struct trace_entry *entry;
880 unsigned long irq_flags;
881
882 raw_local_irq_save(irq_flags);
883 __raw_spin_lock(&data->lock);
884
885 entry = tracing_get_trace_entry(tr, data);
886 tracing_generic_entry_update(entry, 0);
887 entry->type = TRACE_MMIO_MAP;
888 entry->mmiomap = *map;
889
890 __raw_spin_unlock(&data->lock);
891 raw_local_irq_restore(irq_flags);
892
893 trace_wake_up();
894}
895#endif
896
897void __trace_stack(struct trace_array *tr,
898 struct trace_array_cpu *data,
899 unsigned long flags,
900 int skip)
901{
902 struct trace_entry *entry;
903 struct stack_trace trace;
904
905 if (!(trace_flags & TRACE_ITER_STACKTRACE))
906 return;
907
908 entry = tracing_get_trace_entry(tr, data);
909 tracing_generic_entry_update(entry, flags);
910 entry->type = TRACE_STACK;
911
912 memset(&entry->stack, 0, sizeof(entry->stack));
913
914 trace.nr_entries = 0;
915 trace.max_entries = FTRACE_STACK_ENTRIES;
916 trace.skip = skip;
917 trace.entries = entry->stack.caller;
918
919 save_stack_trace(&trace);
920}
921
922void
923__trace_special(void *__tr, void *__data,
924 unsigned long arg1, unsigned long arg2, unsigned long arg3)
925{
926 struct trace_array_cpu *data = __data;
927 struct trace_array *tr = __tr;
928 struct trace_entry *entry;
929 unsigned long irq_flags;
930
931 raw_local_irq_save(irq_flags);
932 __raw_spin_lock(&data->lock);
933 entry = tracing_get_trace_entry(tr, data);
934 tracing_generic_entry_update(entry, 0);
935 entry->type = TRACE_SPECIAL;
936 entry->special.arg1 = arg1;
937 entry->special.arg2 = arg2;
938 entry->special.arg3 = arg3;
939 __trace_stack(tr, data, irq_flags, 4);
940 __raw_spin_unlock(&data->lock);
941 raw_local_irq_restore(irq_flags);
942
943 trace_wake_up();
944}
945
946void
947tracing_sched_switch_trace(struct trace_array *tr,
948 struct trace_array_cpu *data,
949 struct task_struct *prev,
950 struct task_struct *next,
951 unsigned long flags)
952{
953 struct trace_entry *entry;
954 unsigned long irq_flags;
955
956 raw_local_irq_save(irq_flags);
957 __raw_spin_lock(&data->lock);
958 entry = tracing_get_trace_entry(tr, data);
959 tracing_generic_entry_update(entry, flags);
960 entry->type = TRACE_CTX;
961 entry->ctx.prev_pid = prev->pid;
962 entry->ctx.prev_prio = prev->prio;
963 entry->ctx.prev_state = prev->state;
964 entry->ctx.next_pid = next->pid;
965 entry->ctx.next_prio = next->prio;
966 entry->ctx.next_state = next->state;
967 __trace_stack(tr, data, flags, 5);
968 __raw_spin_unlock(&data->lock);
969 raw_local_irq_restore(irq_flags);
970}
971
972void
973tracing_sched_wakeup_trace(struct trace_array *tr,
974 struct trace_array_cpu *data,
975 struct task_struct *wakee,
976 struct task_struct *curr,
977 unsigned long flags)
978{
979 struct trace_entry *entry;
980 unsigned long irq_flags;
981
982 raw_local_irq_save(irq_flags);
983 __raw_spin_lock(&data->lock);
984 entry = tracing_get_trace_entry(tr, data);
985 tracing_generic_entry_update(entry, flags);
986 entry->type = TRACE_WAKE;
987 entry->ctx.prev_pid = curr->pid;
988 entry->ctx.prev_prio = curr->prio;
989 entry->ctx.prev_state = curr->state;
990 entry->ctx.next_pid = wakee->pid;
991 entry->ctx.next_prio = wakee->prio;
992 entry->ctx.next_state = wakee->state;
993 __trace_stack(tr, data, flags, 6);
994 __raw_spin_unlock(&data->lock);
995 raw_local_irq_restore(irq_flags);
996
997 trace_wake_up();
998}
999
1000void
1001ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
1002{
1003 struct trace_array *tr = &global_trace;
1004 struct trace_array_cpu *data;
1005 unsigned long flags;
1006 long disabled;
1007 int cpu;
1008
1009 if (tracing_disabled || current_trace == &no_tracer || !tr->ctrl)
1010 return;
1011
1012 local_irq_save(flags);
1013 cpu = raw_smp_processor_id();
1014 data = tr->data[cpu];
1015 disabled = atomic_inc_return(&data->disabled);
1016
1017 if (likely(disabled == 1))
1018 __trace_special(tr, data, arg1, arg2, arg3);
1019
1020 atomic_dec(&data->disabled);
1021 local_irq_restore(flags);
1022}
1023
1024#ifdef CONFIG_FTRACE
1025static void
1026function_trace_call(unsigned long ip, unsigned long parent_ip)
1027{
1028 struct trace_array *tr = &global_trace;
1029 struct trace_array_cpu *data;
1030 unsigned long flags;
1031 long disabled;
1032 int cpu;
1033
1034 if (unlikely(!ftrace_function_enabled))
1035 return;
1036
1037 if (skip_trace(ip))
1038 return;
1039
1040 local_irq_save(flags);
1041 cpu = raw_smp_processor_id();
1042 data = tr->data[cpu];
1043 disabled = atomic_inc_return(&data->disabled);
1044
1045 if (likely(disabled == 1))
1046 trace_function(tr, data, ip, parent_ip, flags);
1047
1048 atomic_dec(&data->disabled);
1049 local_irq_restore(flags);
1050}
1051
1052static struct ftrace_ops trace_ops __read_mostly =
1053{
1054 .func = function_trace_call,
1055};
1056
1057void tracing_start_function_trace(void)
1058{
1059 ftrace_function_enabled = 0;
1060 register_ftrace_function(&trace_ops);
1061 if (tracer_enabled)
1062 ftrace_function_enabled = 1;
1063}
1064
1065void tracing_stop_function_trace(void)
1066{
1067 ftrace_function_enabled = 0;
1068 unregister_ftrace_function(&trace_ops);
1069}
1070#endif
1071
1072enum trace_file_type {
1073 TRACE_FILE_LAT_FMT = 1,
1074};
1075
1076static struct trace_entry *
1077trace_entry_idx(struct trace_array *tr, struct trace_array_cpu *data,
1078 struct trace_iterator *iter, int cpu)
1079{
1080 struct page *page;
1081 struct trace_entry *array;
1082
1083 if (iter->next_idx[cpu] >= tr->entries ||
1084 iter->next_idx[cpu] >= data->trace_idx ||
1085 (data->trace_head == data->trace_tail &&
1086 data->trace_head_idx == data->trace_tail_idx))
1087 return NULL;
1088
1089 if (!iter->next_page[cpu]) {
1090 /* Initialize the iterator for this cpu trace buffer */
1091 WARN_ON(!data->trace_tail);
1092 page = virt_to_page(data->trace_tail);
1093 iter->next_page[cpu] = &page->lru;
1094 iter->next_page_idx[cpu] = data->trace_tail_idx;
1095 }
1096
1097 page = list_entry(iter->next_page[cpu], struct page, lru);
1098 BUG_ON(&data->trace_pages == &page->lru);
1099
1100 array = page_address(page);
1101
1102 WARN_ON(iter->next_page_idx[cpu] >= ENTRIES_PER_PAGE);
1103 return &array[iter->next_page_idx[cpu]];
1104}
1105
1106static struct trace_entry *
1107find_next_entry(struct trace_iterator *iter, int *ent_cpu)
1108{
1109 struct trace_array *tr = iter->tr;
1110 struct trace_entry *ent, *next = NULL;
1111 int next_cpu = -1;
1112 int cpu;
1113
1114 for_each_tracing_cpu(cpu) {
1115 if (!head_page(tr->data[cpu]))
1116 continue;
1117 ent = trace_entry_idx(tr, tr->data[cpu], iter, cpu);
1118 /*
1119 * Pick the entry with the smallest timestamp:
1120 */
1121 if (ent && (!next || ent->t < next->t)) {
1122 next = ent;
1123 next_cpu = cpu;
1124 }
1125 }
1126
1127 if (ent_cpu)
1128 *ent_cpu = next_cpu;
1129
1130 return next;
1131}
1132
1133static void trace_iterator_increment(struct trace_iterator *iter)
1134{
1135 iter->idx++;
1136 iter->next_idx[iter->cpu]++;
1137 iter->next_page_idx[iter->cpu]++;
1138
1139 if (iter->next_page_idx[iter->cpu] >= ENTRIES_PER_PAGE) {
1140 struct trace_array_cpu *data = iter->tr->data[iter->cpu];
1141
1142 iter->next_page_idx[iter->cpu] = 0;
1143 iter->next_page[iter->cpu] =
1144 trace_next_list(data, iter->next_page[iter->cpu]);
1145 }
1146}
1147
1148static void trace_consume(struct trace_iterator *iter)
1149{
1150 struct trace_array_cpu *data = iter->tr->data[iter->cpu];
1151
1152 data->trace_tail_idx++;
1153 if (data->trace_tail_idx >= ENTRIES_PER_PAGE) {
1154 data->trace_tail = trace_next_page(data, data->trace_tail);
1155 data->trace_tail_idx = 0;
1156 }
1157
1158 /* Check if we empty it, then reset the index */
1159 if (data->trace_head == data->trace_tail &&
1160 data->trace_head_idx == data->trace_tail_idx)
1161 data->trace_idx = 0;
1162}
1163
1164static void *find_next_entry_inc(struct trace_iterator *iter)
1165{
1166 struct trace_entry *next;
1167 int next_cpu = -1;
1168
1169 next = find_next_entry(iter, &next_cpu);
1170
1171 iter->prev_ent = iter->ent;
1172 iter->prev_cpu = iter->cpu;
1173
1174 iter->ent = next;
1175 iter->cpu = next_cpu;
1176
1177 if (next)
1178 trace_iterator_increment(iter);
1179
1180 return next ? iter : NULL;
1181}
1182
1183static void *s_next(struct seq_file *m, void *v, loff_t *pos)
1184{
1185 struct trace_iterator *iter = m->private;
1186 int i = (int)*pos;
1187 void *ent;
1188
1189 (*pos)++;
1190
1191 /* can't go backwards */
1192 if (iter->idx > i)
1193 return NULL;
1194
1195 if (iter->idx < 0)
1196 ent = find_next_entry_inc(iter);
1197 else
1198 ent = iter;
1199
1200 while (ent && iter->idx < i)
1201 ent = find_next_entry_inc(iter);
1202
1203 iter->pos = *pos;
1204
1205 return ent;
1206}
1207
1208static void *s_start(struct seq_file *m, loff_t *pos)
1209{
1210 struct trace_iterator *iter = m->private;
1211 void *p = NULL;
1212 loff_t l = 0;
1213 int i;
1214
1215 mutex_lock(&trace_types_lock);
1216
1217 if (!current_trace || current_trace != iter->trace) {
1218 mutex_unlock(&trace_types_lock);
1219 return NULL;
1220 }
1221
1222 atomic_inc(&trace_record_cmdline_disabled);
1223
1224 /* let the tracer grab locks here if needed */
1225 if (current_trace->start)
1226 current_trace->start(iter);
1227
1228 if (*pos != iter->pos) {
1229 iter->ent = NULL;
1230 iter->cpu = 0;
1231 iter->idx = -1;
1232 iter->prev_ent = NULL;
1233 iter->prev_cpu = -1;
1234
1235 for_each_tracing_cpu(i) {
1236 iter->next_idx[i] = 0;
1237 iter->next_page[i] = NULL;
1238 }
1239
1240 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
1241 ;
1242
1243 } else {
1244 l = *pos - 1;
1245 p = s_next(m, p, &l);
1246 }
1247
1248 return p;
1249}
1250
1251static void s_stop(struct seq_file *m, void *p)
1252{
1253 struct trace_iterator *iter = m->private;
1254
1255 atomic_dec(&trace_record_cmdline_disabled);
1256
1257 /* let the tracer release locks here if needed */
1258 if (current_trace && current_trace == iter->trace && iter->trace->stop)
1259 iter->trace->stop(iter);
1260
1261 mutex_unlock(&trace_types_lock);
1262}
1263
1264#define KRETPROBE_MSG "[unknown/kretprobe'd]"
1265
1266#ifdef CONFIG_KRETPROBES
1267static inline int kretprobed(unsigned long addr)
1268{
1269 return addr == (unsigned long)kretprobe_trampoline;
1270}
1271#else
1272static inline int kretprobed(unsigned long addr)
1273{
1274 return 0;
1275}
1276#endif /* CONFIG_KRETPROBES */
1277
1278static int
1279seq_print_sym_short(struct trace_seq *s, const char *fmt, unsigned long address)
1280{
1281#ifdef CONFIG_KALLSYMS
1282 char str[KSYM_SYMBOL_LEN];
1283
1284 kallsyms_lookup(address, NULL, NULL, NULL, str);
1285
1286 return trace_seq_printf(s, fmt, str);
1287#endif
1288 return 1;
1289}
1290
1291static int
1292seq_print_sym_offset(struct trace_seq *s, const char *fmt,
1293 unsigned long address)
1294{
1295#ifdef CONFIG_KALLSYMS
1296 char str[KSYM_SYMBOL_LEN];
1297
1298 sprint_symbol(str, address);
1299 return trace_seq_printf(s, fmt, str);
1300#endif
1301 return 1;
1302}
1303
1304#ifndef CONFIG_64BIT
1305# define IP_FMT "%08lx"
1306#else
1307# define IP_FMT "%016lx"
1308#endif
1309
1310static int
1311seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
1312{
1313 int ret;
1314
1315 if (!ip)
1316 return trace_seq_printf(s, "0");
1317
1318 if (sym_flags & TRACE_ITER_SYM_OFFSET)
1319 ret = seq_print_sym_offset(s, "%s", ip);
1320 else
1321 ret = seq_print_sym_short(s, "%s", ip);
1322
1323 if (!ret)
1324 return 0;
1325
1326 if (sym_flags & TRACE_ITER_SYM_ADDR)
1327 ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
1328 return ret;
1329}
1330
1331static void print_lat_help_header(struct seq_file *m)
1332{
1333 seq_puts(m, "# _------=> CPU# \n");
1334 seq_puts(m, "# / _-----=> irqs-off \n");
1335 seq_puts(m, "# | / _----=> need-resched \n");
1336 seq_puts(m, "# || / _---=> hardirq/softirq \n");
1337 seq_puts(m, "# ||| / _--=> preempt-depth \n");
1338 seq_puts(m, "# |||| / \n");
1339 seq_puts(m, "# ||||| delay \n");
1340 seq_puts(m, "# cmd pid ||||| time | caller \n");
1341 seq_puts(m, "# \\ / ||||| \\ | / \n");
1342}
1343
1344static void print_func_help_header(struct seq_file *m)
1345{
1346 seq_puts(m, "# TASK-PID CPU# TIMESTAMP FUNCTION\n");
1347 seq_puts(m, "# | | | | |\n");
1348}
1349
1350
1351static void
1352print_trace_header(struct seq_file *m, struct trace_iterator *iter)
1353{
1354 unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
1355 struct trace_array *tr = iter->tr;
1356 struct trace_array_cpu *data = tr->data[tr->cpu];
1357 struct tracer *type = current_trace;
1358 unsigned long total = 0;
1359 unsigned long entries = 0;
1360 int cpu;
1361 const char *name = "preemption";
1362
1363 if (type)
1364 name = type->name;
1365
1366 for_each_tracing_cpu(cpu) {
1367 if (head_page(tr->data[cpu])) {
1368 total += tr->data[cpu]->trace_idx;
1369 if (tr->data[cpu]->trace_idx > tr->entries)
1370 entries += tr->entries;
1371 else
1372 entries += tr->data[cpu]->trace_idx;
1373 }
1374 }
1375
1376 seq_printf(m, "%s latency trace v1.1.5 on %s\n",
1377 name, UTS_RELEASE);
1378 seq_puts(m, "-----------------------------------"
1379 "---------------------------------\n");
1380 seq_printf(m, " latency: %lu us, #%lu/%lu, CPU#%d |"
1381 " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
1382 nsecs_to_usecs(data->saved_latency),
1383 entries,
1384 total,
1385 tr->cpu,
1386#if defined(CONFIG_PREEMPT_NONE)
1387 "server",
1388#elif defined(CONFIG_PREEMPT_VOLUNTARY)
1389 "desktop",
1390#elif defined(CONFIG_PREEMPT)
1391 "preempt",
1392#else
1393 "unknown",
1394#endif
1395 /* These are reserved for later use */
1396 0, 0, 0, 0);
1397#ifdef CONFIG_SMP
1398 seq_printf(m, " #P:%d)\n", num_online_cpus());
1399#else
1400 seq_puts(m, ")\n");
1401#endif
1402 seq_puts(m, " -----------------\n");
1403 seq_printf(m, " | task: %.16s-%d "
1404 "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
1405 data->comm, data->pid, data->uid, data->nice,
1406 data->policy, data->rt_priority);
1407 seq_puts(m, " -----------------\n");
1408
1409 if (data->critical_start) {
1410 seq_puts(m, " => started at: ");
1411 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
1412 trace_print_seq(m, &iter->seq);
1413 seq_puts(m, "\n => ended at: ");
1414 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
1415 trace_print_seq(m, &iter->seq);
1416 seq_puts(m, "\n");
1417 }
1418
1419 seq_puts(m, "\n");
1420}
1421
1422static void
1423lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
1424{
1425 int hardirq, softirq;
1426 char *comm;
1427
1428 comm = trace_find_cmdline(entry->pid);
1429
1430 trace_seq_printf(s, "%8.8s-%-5d ", comm, entry->pid);
1431 trace_seq_printf(s, "%d", cpu);
1432 trace_seq_printf(s, "%c%c",
1433 (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : '.',
1434 ((entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.'));
1435
1436 hardirq = entry->flags & TRACE_FLAG_HARDIRQ;
1437 softirq = entry->flags & TRACE_FLAG_SOFTIRQ;
1438 if (hardirq && softirq) {
1439 trace_seq_putc(s, 'H');
1440 } else {
1441 if (hardirq) {
1442 trace_seq_putc(s, 'h');
1443 } else {
1444 if (softirq)
1445 trace_seq_putc(s, 's');
1446 else
1447 trace_seq_putc(s, '.');
1448 }
1449 }
1450
1451 if (entry->preempt_count)
1452 trace_seq_printf(s, "%x", entry->preempt_count);
1453 else
1454 trace_seq_puts(s, ".");
1455}
1456
1457unsigned long preempt_mark_thresh = 100;
1458
1459static void
1460lat_print_timestamp(struct trace_seq *s, unsigned long long abs_usecs,
1461 unsigned long rel_usecs)
1462{
1463 trace_seq_printf(s, " %4lldus", abs_usecs);
1464 if (rel_usecs > preempt_mark_thresh)
1465 trace_seq_puts(s, "!: ");
1466 else if (rel_usecs > 1)
1467 trace_seq_puts(s, "+: ");
1468 else
1469 trace_seq_puts(s, " : ");
1470}
1471
1472static const char state_to_char[] = TASK_STATE_TO_CHAR_STR;
1473
1474static int
1475print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
1476{
1477 struct trace_seq *s = &iter->seq;
1478 unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
1479 struct trace_entry *next_entry = find_next_entry(iter, NULL);
1480 unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE);
1481 struct trace_entry *entry = iter->ent;
1482 unsigned long abs_usecs;
1483 unsigned long rel_usecs;
1484 char *comm;
1485 int S, T;
1486 int i;
1487 unsigned state;
1488
1489 if (!next_entry)
1490 next_entry = entry;
1491 rel_usecs = ns2usecs(next_entry->t - entry->t);
1492 abs_usecs = ns2usecs(entry->t - iter->tr->time_start);
1493
1494 if (verbose) {
1495 comm = trace_find_cmdline(entry->pid);
1496 trace_seq_printf(s, "%16s %5d %d %d %08x %08x [%08lx]"
1497 " %ld.%03ldms (+%ld.%03ldms): ",
1498 comm,
1499 entry->pid, cpu, entry->flags,
1500 entry->preempt_count, trace_idx,
1501 ns2usecs(entry->t),
1502 abs_usecs/1000,
1503 abs_usecs % 1000, rel_usecs/1000,
1504 rel_usecs % 1000);
1505 } else {
1506 lat_print_generic(s, entry, cpu);
1507 lat_print_timestamp(s, abs_usecs, rel_usecs);
1508 }
1509 switch (entry->type) {
1510 case TRACE_FN:
1511 seq_print_ip_sym(s, entry->fn.ip, sym_flags);
1512 trace_seq_puts(s, " (");
1513 if (kretprobed(entry->fn.parent_ip))
1514 trace_seq_puts(s, KRETPROBE_MSG);
1515 else
1516 seq_print_ip_sym(s, entry->fn.parent_ip, sym_flags);
1517 trace_seq_puts(s, ")\n");
1518 break;
1519 case TRACE_CTX:
1520 case TRACE_WAKE:
1521 T = entry->ctx.next_state < sizeof(state_to_char) ?
1522 state_to_char[entry->ctx.next_state] : 'X';
1523
1524 state = entry->ctx.prev_state ? __ffs(entry->ctx.prev_state) + 1 : 0;
1525 S = state < sizeof(state_to_char) - 1 ? state_to_char[state] : 'X';
1526 comm = trace_find_cmdline(entry->ctx.next_pid);
1527 trace_seq_printf(s, " %5d:%3d:%c %s %5d:%3d:%c %s\n",
1528 entry->ctx.prev_pid,
1529 entry->ctx.prev_prio,
1530 S, entry->type == TRACE_CTX ? "==>" : " +",
1531 entry->ctx.next_pid,
1532 entry->ctx.next_prio,
1533 T, comm);
1534 break;
1535 case TRACE_SPECIAL:
1536 trace_seq_printf(s, "# %ld %ld %ld\n",
1537 entry->special.arg1,
1538 entry->special.arg2,
1539 entry->special.arg3);
1540 break;
1541 case TRACE_STACK:
1542 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
1543 if (i)
1544 trace_seq_puts(s, " <= ");
1545 seq_print_ip_sym(s, entry->stack.caller[i], sym_flags);
1546 }
1547 trace_seq_puts(s, "\n");
1548 break;
1549 default:
1550 trace_seq_printf(s, "Unknown type %d\n", entry->type);
1551 }
1552 return 1;
1553}
1554
1555static int print_trace_fmt(struct trace_iterator *iter)
1556{
1557 struct trace_seq *s = &iter->seq;
1558 unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
1559 struct trace_entry *entry;
1560 unsigned long usec_rem;
1561 unsigned long long t;
1562 unsigned long secs;
1563 char *comm;
1564 int ret;
1565 int S, T;
1566 int i;
1567
1568 entry = iter->ent;
1569
1570 comm = trace_find_cmdline(iter->ent->pid);
1571
1572 t = ns2usecs(entry->t);
1573 usec_rem = do_div(t, 1000000ULL);
1574 secs = (unsigned long)t;
1575
1576 ret = trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid);
1577 if (!ret)
1578 return 0;
1579 ret = trace_seq_printf(s, "[%02d] ", iter->cpu);
1580 if (!ret)
1581 return 0;
1582 ret = trace_seq_printf(s, "%5lu.%06lu: ", secs, usec_rem);
1583 if (!ret)
1584 return 0;
1585
1586 switch (entry->type) {
1587 case TRACE_FN:
1588 ret = seq_print_ip_sym(s, entry->fn.ip, sym_flags);
1589 if (!ret)
1590 return 0;
1591 if ((sym_flags & TRACE_ITER_PRINT_PARENT) &&
1592 entry->fn.parent_ip) {
1593 ret = trace_seq_printf(s, " <-");
1594 if (!ret)
1595 return 0;
1596 if (kretprobed(entry->fn.parent_ip))
1597 ret = trace_seq_puts(s, KRETPROBE_MSG);
1598 else
1599 ret = seq_print_ip_sym(s, entry->fn.parent_ip,
1600 sym_flags);
1601 if (!ret)
1602 return 0;
1603 }
1604 ret = trace_seq_printf(s, "\n");
1605 if (!ret)
1606 return 0;
1607 break;
1608 case TRACE_CTX:
1609 case TRACE_WAKE:
1610 S = entry->ctx.prev_state < sizeof(state_to_char) ?
1611 state_to_char[entry->ctx.prev_state] : 'X';
1612 T = entry->ctx.next_state < sizeof(state_to_char) ?
1613 state_to_char[entry->ctx.next_state] : 'X';
1614 ret = trace_seq_printf(s, " %5d:%3d:%c %s %5d:%3d:%c\n",
1615 entry->ctx.prev_pid,
1616 entry->ctx.prev_prio,
1617 S,
1618 entry->type == TRACE_CTX ? "==>" : " +",
1619 entry->ctx.next_pid,
1620 entry->ctx.next_prio,
1621 T);
1622 if (!ret)
1623 return 0;
1624 break;
1625 case TRACE_SPECIAL:
1626 ret = trace_seq_printf(s, "# %ld %ld %ld\n",
1627 entry->special.arg1,
1628 entry->special.arg2,
1629 entry->special.arg3);
1630 if (!ret)
1631 return 0;
1632 break;
1633 case TRACE_STACK:
1634 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
1635 if (i) {
1636 ret = trace_seq_puts(s, " <= ");
1637 if (!ret)
1638 return 0;
1639 }
1640 ret = seq_print_ip_sym(s, entry->stack.caller[i],
1641 sym_flags);
1642 if (!ret)
1643 return 0;
1644 }
1645 ret = trace_seq_puts(s, "\n");
1646 if (!ret)
1647 return 0;
1648 break;
1649 }
1650 return 1;
1651}
1652
1653static int print_raw_fmt(struct trace_iterator *iter)
1654{
1655 struct trace_seq *s = &iter->seq;
1656 struct trace_entry *entry;
1657 int ret;
1658 int S, T;
1659
1660 entry = iter->ent;
1661
1662 ret = trace_seq_printf(s, "%d %d %llu ",
1663 entry->pid, iter->cpu, entry->t);
1664 if (!ret)
1665 return 0;
1666
1667 switch (entry->type) {
1668 case TRACE_FN:
1669 ret = trace_seq_printf(s, "%x %x\n",
1670 entry->fn.ip, entry->fn.parent_ip);
1671 if (!ret)
1672 return 0;
1673 break;
1674 case TRACE_CTX:
1675 case TRACE_WAKE:
1676 S = entry->ctx.prev_state < sizeof(state_to_char) ?
1677 state_to_char[entry->ctx.prev_state] : 'X';
1678 T = entry->ctx.next_state < sizeof(state_to_char) ?
1679 state_to_char[entry->ctx.next_state] : 'X';
1680 if (entry->type == TRACE_WAKE)
1681 S = '+';
1682 ret = trace_seq_printf(s, "%d %d %c %d %d %c\n",
1683 entry->ctx.prev_pid,
1684 entry->ctx.prev_prio,
1685 S,
1686 entry->ctx.next_pid,
1687 entry->ctx.next_prio,
1688 T);
1689 if (!ret)
1690 return 0;
1691 break;
1692 case TRACE_SPECIAL:
1693 case TRACE_STACK:
1694 ret = trace_seq_printf(s, "# %ld %ld %ld\n",
1695 entry->special.arg1,
1696 entry->special.arg2,
1697 entry->special.arg3);
1698 if (!ret)
1699 return 0;
1700 break;
1701 }
1702 return 1;
1703}
1704
1705#define SEQ_PUT_FIELD_RET(s, x) \
1706do { \
1707 if (!trace_seq_putmem(s, &(x), sizeof(x))) \
1708 return 0; \
1709} while (0)
1710
1711#define SEQ_PUT_HEX_FIELD_RET(s, x) \
1712do { \
1713 if (!trace_seq_putmem_hex(s, &(x), sizeof(x))) \
1714 return 0; \
1715} while (0)
1716
1717static int print_hex_fmt(struct trace_iterator *iter)
1718{
1719 struct trace_seq *s = &iter->seq;
1720 unsigned char newline = '\n';
1721 struct trace_entry *entry;
1722 int S, T;
1723
1724 entry = iter->ent;
1725
1726 SEQ_PUT_HEX_FIELD_RET(s, entry->pid);
1727 SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
1728 SEQ_PUT_HEX_FIELD_RET(s, entry->t);
1729
1730 switch (entry->type) {
1731 case TRACE_FN:
1732 SEQ_PUT_HEX_FIELD_RET(s, entry->fn.ip);
1733 SEQ_PUT_HEX_FIELD_RET(s, entry->fn.parent_ip);
1734 break;
1735 case TRACE_CTX:
1736 case TRACE_WAKE:
1737 S = entry->ctx.prev_state < sizeof(state_to_char) ?
1738 state_to_char[entry->ctx.prev_state] : 'X';
1739 T = entry->ctx.next_state < sizeof(state_to_char) ?
1740 state_to_char[entry->ctx.next_state] : 'X';
1741 if (entry->type == TRACE_WAKE)
1742 S = '+';
1743 SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.prev_pid);
1744 SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.prev_prio);
1745 SEQ_PUT_HEX_FIELD_RET(s, S);
1746 SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.next_pid);
1747 SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.next_prio);
1748 SEQ_PUT_HEX_FIELD_RET(s, entry->fn.parent_ip);
1749 SEQ_PUT_HEX_FIELD_RET(s, T);
1750 break;
1751 case TRACE_SPECIAL:
1752 case TRACE_STACK:
1753 SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg1);
1754 SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg2);
1755 SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg3);
1756 break;
1757 }
1758 SEQ_PUT_FIELD_RET(s, newline);
1759
1760 return 1;
1761}
1762
1763static int print_bin_fmt(struct trace_iterator *iter)
1764{
1765 struct trace_seq *s = &iter->seq;
1766 struct trace_entry *entry;
1767
1768 entry = iter->ent;
1769
1770 SEQ_PUT_FIELD_RET(s, entry->pid);
1771 SEQ_PUT_FIELD_RET(s, entry->cpu);
1772 SEQ_PUT_FIELD_RET(s, entry->t);
1773
1774 switch (entry->type) {
1775 case TRACE_FN:
1776 SEQ_PUT_FIELD_RET(s, entry->fn.ip);
1777 SEQ_PUT_FIELD_RET(s, entry->fn.parent_ip);
1778 break;
1779 case TRACE_CTX:
1780 SEQ_PUT_FIELD_RET(s, entry->ctx.prev_pid);
1781 SEQ_PUT_FIELD_RET(s, entry->ctx.prev_prio);
1782 SEQ_PUT_FIELD_RET(s, entry->ctx.prev_state);
1783 SEQ_PUT_FIELD_RET(s, entry->ctx.next_pid);
1784 SEQ_PUT_FIELD_RET(s, entry->ctx.next_prio);
1785 SEQ_PUT_FIELD_RET(s, entry->ctx.next_state);
1786 break;
1787 case TRACE_SPECIAL:
1788 case TRACE_STACK:
1789 SEQ_PUT_FIELD_RET(s, entry->special.arg1);
1790 SEQ_PUT_FIELD_RET(s, entry->special.arg2);
1791 SEQ_PUT_FIELD_RET(s, entry->special.arg3);
1792 break;
1793 }
1794 return 1;
1795}
1796
1797static int trace_empty(struct trace_iterator *iter)
1798{
1799 struct trace_array_cpu *data;
1800 int cpu;
1801
1802 for_each_tracing_cpu(cpu) {
1803 data = iter->tr->data[cpu];
1804
1805 if (head_page(data) && data->trace_idx &&
1806 (data->trace_tail != data->trace_head ||
1807 data->trace_tail_idx != data->trace_head_idx))
1808 return 0;
1809 }
1810 return 1;
1811}
1812
1813static int print_trace_line(struct trace_iterator *iter)
1814{
1815 if (iter->trace && iter->trace->print_line)
1816 return iter->trace->print_line(iter);
1817
1818 if (trace_flags & TRACE_ITER_BIN)
1819 return print_bin_fmt(iter);
1820
1821 if (trace_flags & TRACE_ITER_HEX)
1822 return print_hex_fmt(iter);
1823
1824 if (trace_flags & TRACE_ITER_RAW)
1825 return print_raw_fmt(iter);
1826
1827 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
1828 return print_lat_fmt(iter, iter->idx, iter->cpu);
1829
1830 return print_trace_fmt(iter);
1831}
1832
1833static int s_show(struct seq_file *m, void *v)
1834{
1835 struct trace_iterator *iter = v;
1836
1837 if (iter->ent == NULL) {
1838 if (iter->tr) {
1839 seq_printf(m, "# tracer: %s\n", iter->trace->name);
1840 seq_puts(m, "#\n");
1841 }
1842 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
1843 /* print nothing if the buffers are empty */
1844 if (trace_empty(iter))
1845 return 0;
1846 print_trace_header(m, iter);
1847 if (!(trace_flags & TRACE_ITER_VERBOSE))
1848 print_lat_help_header(m);
1849 } else {
1850 if (!(trace_flags & TRACE_ITER_VERBOSE))
1851 print_func_help_header(m);
1852 }
1853 } else {
1854 print_trace_line(iter);
1855 trace_print_seq(m, &iter->seq);
1856 }
1857
1858 return 0;
1859}
1860
1861static struct seq_operations tracer_seq_ops = {
1862 .start = s_start,
1863 .next = s_next,
1864 .stop = s_stop,
1865 .show = s_show,
1866};
1867
1868static struct trace_iterator *
1869__tracing_open(struct inode *inode, struct file *file, int *ret)
1870{
1871 struct trace_iterator *iter;
1872
1873 if (tracing_disabled) {
1874 *ret = -ENODEV;
1875 return NULL;
1876 }
1877
1878 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
1879 if (!iter) {
1880 *ret = -ENOMEM;
1881 goto out;
1882 }
1883
1884 mutex_lock(&trace_types_lock);
1885 if (current_trace && current_trace->print_max)
1886 iter->tr = &max_tr;
1887 else
1888 iter->tr = inode->i_private;
1889 iter->trace = current_trace;
1890 iter->pos = -1;
1891
1892 /* TODO stop tracer */
1893 *ret = seq_open(file, &tracer_seq_ops);
1894 if (!*ret) {
1895 struct seq_file *m = file->private_data;
1896 m->private = iter;
1897
1898 /* stop the trace while dumping */
1899 if (iter->tr->ctrl) {
1900 tracer_enabled = 0;
1901 ftrace_function_enabled = 0;
1902 }
1903
1904 if (iter->trace && iter->trace->open)
1905 iter->trace->open(iter);
1906 } else {
1907 kfree(iter);
1908 iter = NULL;
1909 }
1910 mutex_unlock(&trace_types_lock);
1911
1912 out:
1913 return iter;
1914}
1915
1916int tracing_open_generic(struct inode *inode, struct file *filp)
1917{
1918 if (tracing_disabled)
1919 return -ENODEV;
1920
1921 filp->private_data = inode->i_private;
1922 return 0;
1923}
1924
1925int tracing_release(struct inode *inode, struct file *file)
1926{
1927 struct seq_file *m = (struct seq_file *)file->private_data;
1928 struct trace_iterator *iter = m->private;
1929
1930 mutex_lock(&trace_types_lock);
1931 if (iter->trace && iter->trace->close)
1932 iter->trace->close(iter);
1933
1934 /* reenable tracing if it was previously enabled */
1935 if (iter->tr->ctrl) {
1936 tracer_enabled = 1;
1937 /*
1938 * It is safe to enable function tracing even if it
1939 * isn't used
1940 */
1941 ftrace_function_enabled = 1;
1942 }
1943 mutex_unlock(&trace_types_lock);
1944
1945 seq_release(inode, file);
1946 kfree(iter);
1947 return 0;
1948}
1949
1950static int tracing_open(struct inode *inode, struct file *file)
1951{
1952 int ret;
1953
1954 __tracing_open(inode, file, &ret);
1955
1956 return ret;
1957}
1958
1959static int tracing_lt_open(struct inode *inode, struct file *file)
1960{
1961 struct trace_iterator *iter;
1962 int ret;
1963
1964 iter = __tracing_open(inode, file, &ret);
1965
1966 if (!ret)
1967 iter->iter_flags |= TRACE_FILE_LAT_FMT;
1968
1969 return ret;
1970}
1971
1972
1973static void *
1974t_next(struct seq_file *m, void *v, loff_t *pos)
1975{
1976 struct tracer *t = m->private;
1977
1978 (*pos)++;
1979
1980 if (t)
1981 t = t->next;
1982
1983 m->private = t;
1984
1985 return t;
1986}
1987
1988static void *t_start(struct seq_file *m, loff_t *pos)
1989{
1990 struct tracer *t = m->private;
1991 loff_t l = 0;
1992
1993 mutex_lock(&trace_types_lock);
1994 for (; t && l < *pos; t = t_next(m, t, &l))
1995 ;
1996
1997 return t;
1998}
1999
2000static void t_stop(struct seq_file *m, void *p)
2001{
2002 mutex_unlock(&trace_types_lock);
2003}
2004
2005static int t_show(struct seq_file *m, void *v)
2006{
2007 struct tracer *t = v;
2008
2009 if (!t)
2010 return 0;
2011
2012 seq_printf(m, "%s", t->name);
2013 if (t->next)
2014 seq_putc(m, ' ');
2015 else
2016 seq_putc(m, '\n');
2017
2018 return 0;
2019}
2020
2021static struct seq_operations show_traces_seq_ops = {
2022 .start = t_start,
2023 .next = t_next,
2024 .stop = t_stop,
2025 .show = t_show,
2026};
2027
2028static int show_traces_open(struct inode *inode, struct file *file)
2029{
2030 int ret;
2031
2032 if (tracing_disabled)
2033 return -ENODEV;
2034
2035 ret = seq_open(file, &show_traces_seq_ops);
2036 if (!ret) {
2037 struct seq_file *m = file->private_data;
2038 m->private = trace_types;
2039 }
2040
2041 return ret;
2042}
2043
2044static struct file_operations tracing_fops = {
2045 .open = tracing_open,
2046 .read = seq_read,
2047 .llseek = seq_lseek,
2048 .release = tracing_release,
2049};
2050
2051static struct file_operations tracing_lt_fops = {
2052 .open = tracing_lt_open,
2053 .read = seq_read,
2054 .llseek = seq_lseek,
2055 .release = tracing_release,
2056};
2057
2058static struct file_operations show_traces_fops = {
2059 .open = show_traces_open,
2060 .read = seq_read,
2061 .release = seq_release,
2062};
2063
2064/*
2065 * Only trace on a CPU if the bitmask is set:
2066 */
2067static cpumask_t tracing_cpumask = CPU_MASK_ALL;
2068
2069/*
2070 * When tracing/tracing_cpu_mask is modified then this holds
2071 * the new bitmask we are about to install:
2072 */
2073static cpumask_t tracing_cpumask_new;
2074
2075/*
2076 * The tracer itself will not take this lock, but still we want
2077 * to provide a consistent cpumask to user-space:
2078 */
2079static DEFINE_MUTEX(tracing_cpumask_update_lock);
2080
2081/*
2082 * Temporary storage for the character representation of the
2083 * CPU bitmask (and one more byte for the newline):
2084 */
2085static char mask_str[NR_CPUS + 1];
2086
2087static ssize_t
2088tracing_cpumask_read(struct file *filp, char __user *ubuf,
2089 size_t count, loff_t *ppos)
2090{
2091 int len;
2092
2093 mutex_lock(&tracing_cpumask_update_lock);
2094
2095 len = cpumask_scnprintf(mask_str, count, tracing_cpumask);
2096 if (count - len < 2) {
2097 count = -EINVAL;
2098 goto out_err;
2099 }
2100 len += sprintf(mask_str + len, "\n");
2101 count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
2102
2103out_err:
2104 mutex_unlock(&tracing_cpumask_update_lock);
2105
2106 return count;
2107}
2108
2109static ssize_t
2110tracing_cpumask_write(struct file *filp, const char __user *ubuf,
2111 size_t count, loff_t *ppos)
2112{
2113 int err, cpu;
2114
2115 mutex_lock(&tracing_cpumask_update_lock);
2116 err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
2117 if (err)
2118 goto err_unlock;
2119
2120 raw_local_irq_disable();
2121 __raw_spin_lock(&ftrace_max_lock);
2122 for_each_tracing_cpu(cpu) {
2123 /*
2124 * Increase/decrease the disabled counter if we are
2125 * about to flip a bit in the cpumask:
2126 */
2127 if (cpu_isset(cpu, tracing_cpumask) &&
2128 !cpu_isset(cpu, tracing_cpumask_new)) {
2129 atomic_inc(&global_trace.data[cpu]->disabled);
2130 }
2131 if (!cpu_isset(cpu, tracing_cpumask) &&
2132 cpu_isset(cpu, tracing_cpumask_new)) {
2133 atomic_dec(&global_trace.data[cpu]->disabled);
2134 }
2135 }
2136 __raw_spin_unlock(&ftrace_max_lock);
2137 raw_local_irq_enable();
2138
2139 tracing_cpumask = tracing_cpumask_new;
2140
2141 mutex_unlock(&tracing_cpumask_update_lock);
2142
2143 return count;
2144
2145err_unlock:
2146 mutex_unlock(&tracing_cpumask_update_lock);
2147
2148 return err;
2149}
2150
2151static struct file_operations tracing_cpumask_fops = {
2152 .open = tracing_open_generic,
2153 .read = tracing_cpumask_read,
2154 .write = tracing_cpumask_write,
2155};
2156
2157static ssize_t
2158tracing_iter_ctrl_read(struct file *filp, char __user *ubuf,
2159 size_t cnt, loff_t *ppos)
2160{
2161 char *buf;
2162 int r = 0;
2163 int len = 0;
2164 int i;
2165
2166 /* calulate max size */
2167 for (i = 0; trace_options[i]; i++) {
2168 len += strlen(trace_options[i]);
2169 len += 3; /* "no" and space */
2170 }
2171
2172 /* +2 for \n and \0 */
2173 buf = kmalloc(len + 2, GFP_KERNEL);
2174 if (!buf)
2175 return -ENOMEM;
2176
2177 for (i = 0; trace_options[i]; i++) {
2178 if (trace_flags & (1 << i))
2179 r += sprintf(buf + r, "%s ", trace_options[i]);
2180 else
2181 r += sprintf(buf + r, "no%s ", trace_options[i]);
2182 }
2183
2184 r += sprintf(buf + r, "\n");
2185 WARN_ON(r >= len + 2);
2186
2187 r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2188
2189 kfree(buf);
2190
2191 return r;
2192}
2193
2194static ssize_t
2195tracing_iter_ctrl_write(struct file *filp, const char __user *ubuf,
2196 size_t cnt, loff_t *ppos)
2197{
2198 char buf[64];
2199 char *cmp = buf;
2200 int neg = 0;
2201 int i;
2202
2203 if (cnt >= sizeof(buf))
2204 return -EINVAL;
2205
2206 if (copy_from_user(&buf, ubuf, cnt))
2207 return -EFAULT;
2208
2209 buf[cnt] = 0;
2210
2211 if (strncmp(buf, "no", 2) == 0) {
2212 neg = 1;
2213 cmp += 2;
2214 }
2215
2216 for (i = 0; trace_options[i]; i++) {
2217 int len = strlen(trace_options[i]);
2218
2219 if (strncmp(cmp, trace_options[i], len) == 0) {
2220 if (neg)
2221 trace_flags &= ~(1 << i);
2222 else
2223 trace_flags |= (1 << i);
2224 break;
2225 }
2226 }
2227 /*
2228 * If no option could be set, return an error:
2229 */
2230 if (!trace_options[i])
2231 return -EINVAL;
2232
2233 filp->f_pos += cnt;
2234
2235 return cnt;
2236}
2237
2238static struct file_operations tracing_iter_fops = {
2239 .open = tracing_open_generic,
2240 .read = tracing_iter_ctrl_read,
2241 .write = tracing_iter_ctrl_write,
2242};
2243
2244static const char readme_msg[] =
2245 "tracing mini-HOWTO:\n\n"
2246 "# mkdir /debug\n"
2247 "# mount -t debugfs nodev /debug\n\n"
2248 "# cat /debug/tracing/available_tracers\n"
2249 "wakeup preemptirqsoff preemptoff irqsoff ftrace sched_switch none\n\n"
2250 "# cat /debug/tracing/current_tracer\n"
2251 "none\n"
2252 "# echo sched_switch > /debug/tracing/current_tracer\n"
2253 "# cat /debug/tracing/current_tracer\n"
2254 "sched_switch\n"
2255 "# cat /debug/tracing/iter_ctrl\n"
2256 "noprint-parent nosym-offset nosym-addr noverbose\n"
2257 "# echo print-parent > /debug/tracing/iter_ctrl\n"
2258 "# echo 1 > /debug/tracing/tracing_enabled\n"
2259 "# cat /debug/tracing/trace > /tmp/trace.txt\n"
2260 "echo 0 > /debug/tracing/tracing_enabled\n"
2261;
2262
2263static ssize_t
2264tracing_readme_read(struct file *filp, char __user *ubuf,
2265 size_t cnt, loff_t *ppos)
2266{
2267 return simple_read_from_buffer(ubuf, cnt, ppos,
2268 readme_msg, strlen(readme_msg));
2269}
2270
2271static struct file_operations tracing_readme_fops = {
2272 .open = tracing_open_generic,
2273 .read = tracing_readme_read,
2274};
2275
2276static ssize_t
2277tracing_ctrl_read(struct file *filp, char __user *ubuf,
2278 size_t cnt, loff_t *ppos)
2279{
2280 struct trace_array *tr = filp->private_data;
2281 char buf[64];
2282 int r;
2283
2284 r = sprintf(buf, "%ld\n", tr->ctrl);
2285 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2286}
2287
2288static ssize_t
2289tracing_ctrl_write(struct file *filp, const char __user *ubuf,
2290 size_t cnt, loff_t *ppos)
2291{
2292 struct trace_array *tr = filp->private_data;
2293 char buf[64];
2294 long val;
2295 int ret;
2296
2297 if (cnt >= sizeof(buf))
2298 return -EINVAL;
2299
2300 if (copy_from_user(&buf, ubuf, cnt))
2301 return -EFAULT;
2302
2303 buf[cnt] = 0;
2304
2305 ret = strict_strtoul(buf, 10, &val);
2306 if (ret < 0)
2307 return ret;
2308
2309 val = !!val;
2310
2311 mutex_lock(&trace_types_lock);
2312 if (tr->ctrl ^ val) {
2313 if (val)
2314 tracer_enabled = 1;
2315 else
2316 tracer_enabled = 0;
2317
2318 tr->ctrl = val;
2319
2320 if (current_trace && current_trace->ctrl_update)
2321 current_trace->ctrl_update(tr);
2322 }
2323 mutex_unlock(&trace_types_lock);
2324
2325 filp->f_pos += cnt;
2326
2327 return cnt;
2328}
2329
2330static ssize_t
2331tracing_set_trace_read(struct file *filp, char __user *ubuf,
2332 size_t cnt, loff_t *ppos)
2333{
2334 char buf[max_tracer_type_len+2];
2335 int r;
2336
2337 mutex_lock(&trace_types_lock);
2338 if (current_trace)
2339 r = sprintf(buf, "%s\n", current_trace->name);
2340 else
2341 r = sprintf(buf, "\n");
2342 mutex_unlock(&trace_types_lock);
2343
2344 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2345}
2346
2347static ssize_t
2348tracing_set_trace_write(struct file *filp, const char __user *ubuf,
2349 size_t cnt, loff_t *ppos)
2350{
2351 struct trace_array *tr = &global_trace;
2352 struct tracer *t;
2353 char buf[max_tracer_type_len+1];
2354 int i;
2355
2356 if (cnt > max_tracer_type_len)
2357 cnt = max_tracer_type_len;
2358
2359 if (copy_from_user(&buf, ubuf, cnt))
2360 return -EFAULT;
2361
2362 buf[cnt] = 0;
2363
2364 /* strip ending whitespace. */
2365 for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
2366 buf[i] = 0;
2367
2368 mutex_lock(&trace_types_lock);
2369 for (t = trace_types; t; t = t->next) {
2370 if (strcmp(t->name, buf) == 0)
2371 break;
2372 }
2373 if (!t || t == current_trace)
2374 goto out;
2375
2376 if (current_trace && current_trace->reset)
2377 current_trace->reset(tr);
2378
2379 current_trace = t;
2380 if (t->init)
2381 t->init(tr);
2382
2383 out:
2384 mutex_unlock(&trace_types_lock);
2385
2386 filp->f_pos += cnt;
2387
2388 return cnt;
2389}
2390
2391static ssize_t
2392tracing_max_lat_read(struct file *filp, char __user *ubuf,
2393 size_t cnt, loff_t *ppos)
2394{
2395 unsigned long *ptr = filp->private_data;
2396 char buf[64];
2397 int r;
2398
2399 r = snprintf(buf, sizeof(buf), "%ld\n",
2400 *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
2401 if (r > sizeof(buf))
2402 r = sizeof(buf);
2403 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2404}
2405
2406static ssize_t
2407tracing_max_lat_write(struct file *filp, const char __user *ubuf,
2408 size_t cnt, loff_t *ppos)
2409{
2410 long *ptr = filp->private_data;
2411 char buf[64];
2412 long val;
2413 int ret;
2414
2415 if (cnt >= sizeof(buf))
2416 return -EINVAL;
2417
2418 if (copy_from_user(&buf, ubuf, cnt))
2419 return -EFAULT;
2420
2421 buf[cnt] = 0;
2422
2423 ret = strict_strtoul(buf, 10, &val);
2424 if (ret < 0)
2425 return ret;
2426
2427 *ptr = val * 1000;
2428
2429 return cnt;
2430}
2431
2432static atomic_t tracing_reader;
2433
2434static int tracing_open_pipe(struct inode *inode, struct file *filp)
2435{
2436 struct trace_iterator *iter;
2437
2438 if (tracing_disabled)
2439 return -ENODEV;
2440
2441 /* We only allow for reader of the pipe */
2442 if (atomic_inc_return(&tracing_reader) != 1) {
2443 atomic_dec(&tracing_reader);
2444 return -EBUSY;
2445 }
2446
2447 /* create a buffer to store the information to pass to userspace */
2448 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
2449 if (!iter)
2450 return -ENOMEM;
2451
2452 mutex_lock(&trace_types_lock);
2453 iter->tr = &global_trace;
2454 iter->trace = current_trace;
2455 filp->private_data = iter;
2456
2457 if (iter->trace->pipe_open)
2458 iter->trace->pipe_open(iter);
2459 mutex_unlock(&trace_types_lock);
2460
2461 return 0;
2462}
2463
2464static int tracing_release_pipe(struct inode *inode, struct file *file)
2465{
2466 struct trace_iterator *iter = file->private_data;
2467
2468 kfree(iter);
2469 atomic_dec(&tracing_reader);
2470
2471 return 0;
2472}
2473
2474static unsigned int
2475tracing_poll_pipe(struct file *filp, poll_table *poll_table)
2476{
2477 struct trace_iterator *iter = filp->private_data;
2478
2479 if (trace_flags & TRACE_ITER_BLOCK) {
2480 /*
2481 * Always select as readable when in blocking mode
2482 */
2483 return POLLIN | POLLRDNORM;
2484 } else {
2485 if (!trace_empty(iter))
2486 return POLLIN | POLLRDNORM;
2487 poll_wait(filp, &trace_wait, poll_table);
2488 if (!trace_empty(iter))
2489 return POLLIN | POLLRDNORM;
2490
2491 return 0;
2492 }
2493}
2494
2495/*
2496 * Consumer reader.
2497 */
2498static ssize_t
2499tracing_read_pipe(struct file *filp, char __user *ubuf,
2500 size_t cnt, loff_t *ppos)
2501{
2502 struct trace_iterator *iter = filp->private_data;
2503 struct trace_array_cpu *data;
2504 static cpumask_t mask;
2505 unsigned long flags;
2506#ifdef CONFIG_FTRACE
2507 int ftrace_save;
2508#endif
2509 int cpu;
2510 ssize_t sret;
2511
2512 /* return any leftover data */
2513 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
2514 if (sret != -EBUSY)
2515 return sret;
2516 sret = 0;
2517
2518 trace_seq_reset(&iter->seq);
2519
2520 mutex_lock(&trace_types_lock);
2521 if (iter->trace->read) {
2522 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
2523 if (sret)
2524 goto out;
2525 }
2526
2527 while (trace_empty(iter)) {
2528
2529 if ((filp->f_flags & O_NONBLOCK)) {
2530 sret = -EAGAIN;
2531 goto out;
2532 }
2533
2534 /*
2535 * This is a make-shift waitqueue. The reason we don't use
2536 * an actual wait queue is because:
2537 * 1) we only ever have one waiter
2538 * 2) the tracing, traces all functions, we don't want
2539 * the overhead of calling wake_up and friends
2540 * (and tracing them too)
2541 * Anyway, this is really very primitive wakeup.
2542 */
2543 set_current_state(TASK_INTERRUPTIBLE);
2544 iter->tr->waiter = current;
2545
2546 mutex_unlock(&trace_types_lock);
2547
2548 /* sleep for 100 msecs, and try again. */
2549 schedule_timeout(HZ/10);
2550
2551 mutex_lock(&trace_types_lock);
2552
2553 iter->tr->waiter = NULL;
2554
2555 if (signal_pending(current)) {
2556 sret = -EINTR;
2557 goto out;
2558 }
2559
2560 if (iter->trace != current_trace)
2561 goto out;
2562
2563 /*
2564 * We block until we read something and tracing is disabled.
2565 * We still block if tracing is disabled, but we have never
2566 * read anything. This allows a user to cat this file, and
2567 * then enable tracing. But after we have read something,
2568 * we give an EOF when tracing is again disabled.
2569 *
2570 * iter->pos will be 0 if we haven't read anything.
2571 */
2572 if (!tracer_enabled && iter->pos)
2573 break;
2574
2575 continue;
2576 }
2577
2578 /* stop when tracing is finished */
2579 if (trace_empty(iter))
2580 goto out;
2581
2582 if (cnt >= PAGE_SIZE)
2583 cnt = PAGE_SIZE - 1;
2584
2585 /* reset all but tr, trace, and overruns */
2586 memset(&iter->seq, 0,
2587 sizeof(struct trace_iterator) -
2588 offsetof(struct trace_iterator, seq));
2589 iter->pos = -1;
2590
2591 /*
2592 * We need to stop all tracing on all CPUS to read the
2593 * the next buffer. This is a bit expensive, but is
2594 * not done often. We fill all what we can read,
2595 * and then release the locks again.
2596 */
2597
2598 cpus_clear(mask);
2599 local_irq_save(flags);
2600#ifdef CONFIG_FTRACE
2601 ftrace_save = ftrace_enabled;
2602 ftrace_enabled = 0;
2603#endif
2604 smp_wmb();
2605 for_each_tracing_cpu(cpu) {
2606 data = iter->tr->data[cpu];
2607
2608 if (!head_page(data) || !data->trace_idx)
2609 continue;
2610
2611 atomic_inc(&data->disabled);
2612 cpu_set(cpu, mask);
2613 }
2614
2615 for_each_cpu_mask(cpu, mask) {
2616 data = iter->tr->data[cpu];
2617 __raw_spin_lock(&data->lock);
2618
2619 if (data->overrun > iter->last_overrun[cpu])
2620 iter->overrun[cpu] +=
2621 data->overrun - iter->last_overrun[cpu];
2622 iter->last_overrun[cpu] = data->overrun;
2623 }
2624
2625 while (find_next_entry_inc(iter) != NULL) {
2626 int ret;
2627 int len = iter->seq.len;
2628
2629 ret = print_trace_line(iter);
2630 if (!ret) {
2631 /* don't print partial lines */
2632 iter->seq.len = len;
2633 break;
2634 }
2635
2636 trace_consume(iter);
2637
2638 if (iter->seq.len >= cnt)
2639 break;
2640 }
2641
2642 for_each_cpu_mask(cpu, mask) {
2643 data = iter->tr->data[cpu];
2644 __raw_spin_unlock(&data->lock);
2645 }
2646
2647 for_each_cpu_mask(cpu, mask) {
2648 data = iter->tr->data[cpu];
2649 atomic_dec(&data->disabled);
2650 }
2651#ifdef CONFIG_FTRACE
2652 ftrace_enabled = ftrace_save;
2653#endif
2654 local_irq_restore(flags);
2655
2656 /* Now copy what we have to the user */
2657 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
2658 if (iter->seq.readpos >= iter->seq.len)
2659 trace_seq_reset(&iter->seq);
2660 if (sret == -EBUSY)
2661 sret = 0;
2662
2663out:
2664 mutex_unlock(&trace_types_lock);
2665
2666 return sret;
2667}
2668
2669static ssize_t
2670tracing_entries_read(struct file *filp, char __user *ubuf,
2671 size_t cnt, loff_t *ppos)
2672{
2673 struct trace_array *tr = filp->private_data;
2674 char buf[64];
2675 int r;
2676
2677 r = sprintf(buf, "%lu\n", tr->entries);
2678 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2679}
2680
2681static ssize_t
2682tracing_entries_write(struct file *filp, const char __user *ubuf,
2683 size_t cnt, loff_t *ppos)
2684{
2685 unsigned long val;
2686 char buf[64];
2687 int i, ret;
2688
2689 if (cnt >= sizeof(buf))
2690 return -EINVAL;
2691
2692 if (copy_from_user(&buf, ubuf, cnt))
2693 return -EFAULT;
2694
2695 buf[cnt] = 0;
2696
2697 ret = strict_strtoul(buf, 10, &val);
2698 if (ret < 0)
2699 return ret;
2700
2701 /* must have at least 1 entry */
2702 if (!val)
2703 return -EINVAL;
2704
2705 mutex_lock(&trace_types_lock);
2706
2707 if (current_trace != &no_tracer) {
2708 cnt = -EBUSY;
2709 pr_info("ftrace: set current_tracer to none"
2710 " before modifying buffer size\n");
2711 goto out;
2712 }
2713
2714 if (val > global_trace.entries) {
2715 long pages_requested;
2716 unsigned long freeable_pages;
2717
2718 /* make sure we have enough memory before mapping */
2719 pages_requested =
2720 (val + (ENTRIES_PER_PAGE-1)) / ENTRIES_PER_PAGE;
2721
2722 /* account for each buffer (and max_tr) */
2723 pages_requested *= tracing_nr_buffers * 2;
2724
2725 /* Check for overflow */
2726 if (pages_requested < 0) {
2727 cnt = -ENOMEM;
2728 goto out;
2729 }
2730
2731 freeable_pages = determine_dirtyable_memory();
2732
2733 /* we only allow to request 1/4 of useable memory */
2734 if (pages_requested >
2735 ((freeable_pages + tracing_pages_allocated) / 4)) {
2736 cnt = -ENOMEM;
2737 goto out;
2738 }
2739
2740 while (global_trace.entries < val) {
2741 if (trace_alloc_page()) {
2742 cnt = -ENOMEM;
2743 goto out;
2744 }
2745 /* double check that we don't go over the known pages */
2746 if (tracing_pages_allocated > pages_requested)
2747 break;
2748 }
2749
2750 } else {
2751 /* include the number of entries in val (inc of page entries) */
2752 while (global_trace.entries > val + (ENTRIES_PER_PAGE - 1))
2753 trace_free_page();
2754 }
2755
2756 /* check integrity */
2757 for_each_tracing_cpu(i)
2758 check_pages(global_trace.data[i]);
2759
2760 filp->f_pos += cnt;
2761
2762 /* If check pages failed, return ENOMEM */
2763 if (tracing_disabled)
2764 cnt = -ENOMEM;
2765 out:
2766 max_tr.entries = global_trace.entries;
2767 mutex_unlock(&trace_types_lock);
2768
2769 return cnt;
2770}
2771
2772static struct file_operations tracing_max_lat_fops = {
2773 .open = tracing_open_generic,
2774 .read = tracing_max_lat_read,
2775 .write = tracing_max_lat_write,
2776};
2777
2778static struct file_operations tracing_ctrl_fops = {
2779 .open = tracing_open_generic,
2780 .read = tracing_ctrl_read,
2781 .write = tracing_ctrl_write,
2782};
2783
2784static struct file_operations set_tracer_fops = {
2785 .open = tracing_open_generic,
2786 .read = tracing_set_trace_read,
2787 .write = tracing_set_trace_write,
2788};
2789
2790static struct file_operations tracing_pipe_fops = {
2791 .open = tracing_open_pipe,
2792 .poll = tracing_poll_pipe,
2793 .read = tracing_read_pipe,
2794 .release = tracing_release_pipe,
2795};
2796
2797static struct file_operations tracing_entries_fops = {
2798 .open = tracing_open_generic,
2799 .read = tracing_entries_read,
2800 .write = tracing_entries_write,
2801};
2802
2803#ifdef CONFIG_DYNAMIC_FTRACE
2804
2805static ssize_t
2806tracing_read_long(struct file *filp, char __user *ubuf,
2807 size_t cnt, loff_t *ppos)
2808{
2809 unsigned long *p = filp->private_data;
2810 char buf[64];
2811 int r;
2812
2813 r = sprintf(buf, "%ld\n", *p);
2814
2815 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2816}
2817
2818static struct file_operations tracing_read_long_fops = {
2819 .open = tracing_open_generic,
2820 .read = tracing_read_long,
2821};
2822#endif
2823
2824static struct dentry *d_tracer;
2825
2826struct dentry *tracing_init_dentry(void)
2827{
2828 static int once;
2829
2830 if (d_tracer)
2831 return d_tracer;
2832
2833 d_tracer = debugfs_create_dir("tracing", NULL);
2834
2835 if (!d_tracer && !once) {
2836 once = 1;
2837 pr_warning("Could not create debugfs directory 'tracing'\n");
2838 return NULL;
2839 }
2840
2841 return d_tracer;
2842}
2843
2844#ifdef CONFIG_FTRACE_SELFTEST
2845/* Let selftest have access to static functions in this file */
2846#include "trace_selftest.c"
2847#endif
2848
2849static __init void tracer_init_debugfs(void)
2850{
2851 struct dentry *d_tracer;
2852 struct dentry *entry;
2853
2854 d_tracer = tracing_init_dentry();
2855
2856 entry = debugfs_create_file("tracing_enabled", 0644, d_tracer,
2857 &global_trace, &tracing_ctrl_fops);
2858 if (!entry)
2859 pr_warning("Could not create debugfs 'tracing_enabled' entry\n");
2860
2861 entry = debugfs_create_file("iter_ctrl", 0644, d_tracer,
2862 NULL, &tracing_iter_fops);
2863 if (!entry)
2864 pr_warning("Could not create debugfs 'iter_ctrl' entry\n");
2865
2866 entry = debugfs_create_file("tracing_cpumask", 0644, d_tracer,
2867 NULL, &tracing_cpumask_fops);
2868 if (!entry)
2869 pr_warning("Could not create debugfs 'tracing_cpumask' entry\n");
2870
2871 entry = debugfs_create_file("latency_trace", 0444, d_tracer,
2872 &global_trace, &tracing_lt_fops);
2873 if (!entry)
2874 pr_warning("Could not create debugfs 'latency_trace' entry\n");
2875
2876 entry = debugfs_create_file("trace", 0444, d_tracer,
2877 &global_trace, &tracing_fops);
2878 if (!entry)
2879 pr_warning("Could not create debugfs 'trace' entry\n");
2880
2881 entry = debugfs_create_file("available_tracers", 0444, d_tracer,
2882 &global_trace, &show_traces_fops);
2883 if (!entry)
2884 pr_warning("Could not create debugfs 'trace' entry\n");
2885
2886 entry = debugfs_create_file("current_tracer", 0444, d_tracer,
2887 &global_trace, &set_tracer_fops);
2888 if (!entry)
2889 pr_warning("Could not create debugfs 'trace' entry\n");
2890
2891 entry = debugfs_create_file("tracing_max_latency", 0644, d_tracer,
2892 &tracing_max_latency,
2893 &tracing_max_lat_fops);
2894 if (!entry)
2895 pr_warning("Could not create debugfs "
2896 "'tracing_max_latency' entry\n");
2897
2898 entry = debugfs_create_file("tracing_thresh", 0644, d_tracer,
2899 &tracing_thresh, &tracing_max_lat_fops);
2900 if (!entry)
2901 pr_warning("Could not create debugfs "
2902 "'tracing_threash' entry\n");
2903 entry = debugfs_create_file("README", 0644, d_tracer,
2904 NULL, &tracing_readme_fops);
2905 if (!entry)
2906 pr_warning("Could not create debugfs 'README' entry\n");
2907
2908 entry = debugfs_create_file("trace_pipe", 0644, d_tracer,
2909 NULL, &tracing_pipe_fops);
2910 if (!entry)
2911 pr_warning("Could not create debugfs "
2912 "'tracing_threash' entry\n");
2913
2914 entry = debugfs_create_file("trace_entries", 0644, d_tracer,
2915 &global_trace, &tracing_entries_fops);
2916 if (!entry)
2917 pr_warning("Could not create debugfs "
2918 "'tracing_threash' entry\n");
2919
2920#ifdef CONFIG_DYNAMIC_FTRACE
2921 entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer,
2922 &ftrace_update_tot_cnt,
2923 &tracing_read_long_fops);
2924 if (!entry)
2925 pr_warning("Could not create debugfs "
2926 "'dyn_ftrace_total_info' entry\n");
2927#endif
2928#ifdef CONFIG_SYSPROF_TRACER
2929 init_tracer_sysprof_debugfs(d_tracer);
2930#endif
2931}
2932
2933static int trace_alloc_page(void)
2934{
2935 struct trace_array_cpu *data;
2936 struct page *page, *tmp;
2937 LIST_HEAD(pages);
2938 void *array;
2939 unsigned pages_allocated = 0;
2940 int i;
2941
2942 /* first allocate a page for each CPU */
2943 for_each_tracing_cpu(i) {
2944 array = (void *)__get_free_page(GFP_KERNEL);
2945 if (array == NULL) {
2946 printk(KERN_ERR "tracer: failed to allocate page"
2947 "for trace buffer!\n");
2948 goto free_pages;
2949 }
2950
2951 pages_allocated++;
2952 page = virt_to_page(array);
2953 list_add(&page->lru, &pages);
2954
2955/* Only allocate if we are actually using the max trace */
2956#ifdef CONFIG_TRACER_MAX_TRACE
2957 array = (void *)__get_free_page(GFP_KERNEL);
2958 if (array == NULL) {
2959 printk(KERN_ERR "tracer: failed to allocate page"
2960 "for trace buffer!\n");
2961 goto free_pages;
2962 }
2963 pages_allocated++;
2964 page = virt_to_page(array);
2965 list_add(&page->lru, &pages);
2966#endif
2967 }
2968
2969 /* Now that we successfully allocate a page per CPU, add them */
2970 for_each_tracing_cpu(i) {
2971 data = global_trace.data[i];
2972 page = list_entry(pages.next, struct page, lru);
2973 list_del_init(&page->lru);
2974 list_add_tail(&page->lru, &data->trace_pages);
2975 ClearPageLRU(page);
2976
2977#ifdef CONFIG_TRACER_MAX_TRACE
2978 data = max_tr.data[i];
2979 page = list_entry(pages.next, struct page, lru);
2980 list_del_init(&page->lru);
2981 list_add_tail(&page->lru, &data->trace_pages);
2982 SetPageLRU(page);
2983#endif
2984 }
2985 tracing_pages_allocated += pages_allocated;
2986 global_trace.entries += ENTRIES_PER_PAGE;
2987
2988 return 0;
2989
2990 free_pages:
2991 list_for_each_entry_safe(page, tmp, &pages, lru) {
2992 list_del_init(&page->lru);
2993 __free_page(page);
2994 }
2995 return -ENOMEM;
2996}
2997
2998static int trace_free_page(void)
2999{
3000 struct trace_array_cpu *data;
3001 struct page *page;
3002 struct list_head *p;
3003 int i;
3004 int ret = 0;
3005
3006 /* free one page from each buffer */
3007 for_each_tracing_cpu(i) {
3008 data = global_trace.data[i];
3009 p = data->trace_pages.next;
3010 if (p == &data->trace_pages) {
3011 /* should never happen */
3012 WARN_ON(1);
3013 tracing_disabled = 1;
3014 ret = -1;
3015 break;
3016 }
3017 page = list_entry(p, struct page, lru);
3018 ClearPageLRU(page);
3019 list_del(&page->lru);
3020 tracing_pages_allocated--;
3021 tracing_pages_allocated--;
3022 __free_page(page);
3023
3024 tracing_reset(data);
3025
3026#ifdef CONFIG_TRACER_MAX_TRACE
3027 data = max_tr.data[i];
3028 p = data->trace_pages.next;
3029 if (p == &data->trace_pages) {
3030 /* should never happen */
3031 WARN_ON(1);
3032 tracing_disabled = 1;
3033 ret = -1;
3034 break;
3035 }
3036 page = list_entry(p, struct page, lru);
3037 ClearPageLRU(page);
3038 list_del(&page->lru);
3039 __free_page(page);
3040
3041 tracing_reset(data);
3042#endif
3043 }
3044 global_trace.entries -= ENTRIES_PER_PAGE;
3045
3046 return ret;
3047}
3048
3049__init static int tracer_alloc_buffers(void)
3050{
3051 struct trace_array_cpu *data;
3052 void *array;
3053 struct page *page;
3054 int pages = 0;
3055 int ret = -ENOMEM;
3056 int i;
3057
3058 /* TODO: make the number of buffers hot pluggable with CPUS */
3059 tracing_nr_buffers = num_possible_cpus();
3060 tracing_buffer_mask = cpu_possible_map;
3061
3062 /* Allocate the first page for all buffers */
3063 for_each_tracing_cpu(i) {
3064 data = global_trace.data[i] = &per_cpu(global_trace_cpu, i);
3065 max_tr.data[i] = &per_cpu(max_data, i);
3066
3067 array = (void *)__get_free_page(GFP_KERNEL);
3068 if (array == NULL) {
3069 printk(KERN_ERR "tracer: failed to allocate page"
3070 "for trace buffer!\n");
3071 goto free_buffers;
3072 }
3073
3074 /* set the array to the list */
3075 INIT_LIST_HEAD(&data->trace_pages);
3076 page = virt_to_page(array);
3077 list_add(&page->lru, &data->trace_pages);
3078 /* use the LRU flag to differentiate the two buffers */
3079 ClearPageLRU(page);
3080
3081 data->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
3082 max_tr.data[i]->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
3083
3084/* Only allocate if we are actually using the max trace */
3085#ifdef CONFIG_TRACER_MAX_TRACE
3086 array = (void *)__get_free_page(GFP_KERNEL);
3087 if (array == NULL) {
3088 printk(KERN_ERR "tracer: failed to allocate page"
3089 "for trace buffer!\n");
3090 goto free_buffers;
3091 }
3092
3093 INIT_LIST_HEAD(&max_tr.data[i]->trace_pages);
3094 page = virt_to_page(array);
3095 list_add(&page->lru, &max_tr.data[i]->trace_pages);
3096 SetPageLRU(page);
3097#endif
3098 }
3099
3100 /*
3101 * Since we allocate by orders of pages, we may be able to
3102 * round up a bit.
3103 */
3104 global_trace.entries = ENTRIES_PER_PAGE;
3105 pages++;
3106
3107 while (global_trace.entries < trace_nr_entries) {
3108 if (trace_alloc_page())
3109 break;
3110 pages++;
3111 }
3112 max_tr.entries = global_trace.entries;
3113
3114 pr_info("tracer: %d pages allocated for %ld entries of %ld bytes\n",
3115 pages, trace_nr_entries, (long)TRACE_ENTRY_SIZE);
3116 pr_info(" actual entries %ld\n", global_trace.entries);
3117
3118 tracer_init_debugfs();
3119
3120 trace_init_cmdlines();
3121
3122 register_tracer(&no_tracer);
3123 current_trace = &no_tracer;
3124
3125 /* All seems OK, enable tracing */
3126 global_trace.ctrl = tracer_enabled;
3127 tracing_disabled = 0;
3128
3129 return 0;
3130
3131 free_buffers:
3132 for (i-- ; i >= 0; i--) {
3133 struct page *page, *tmp;
3134 struct trace_array_cpu *data = global_trace.data[i];
3135
3136 if (data) {
3137 list_for_each_entry_safe(page, tmp,
3138 &data->trace_pages, lru) {
3139 list_del_init(&page->lru);
3140 __free_page(page);
3141 }
3142 }
3143
3144#ifdef CONFIG_TRACER_MAX_TRACE
3145 data = max_tr.data[i];
3146 if (data) {
3147 list_for_each_entry_safe(page, tmp,
3148 &data->trace_pages, lru) {
3149 list_del_init(&page->lru);
3150 __free_page(page);
3151 }
3152 }
3153#endif
3154 }
3155 return ret;
3156}
3157fs_initcall(tracer_alloc_buffers);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
new file mode 100644
index 000000000000..f69f86788c2b
--- /dev/null
+++ b/kernel/trace/trace.h
@@ -0,0 +1,339 @@
1#ifndef _LINUX_KERNEL_TRACE_H
2#define _LINUX_KERNEL_TRACE_H
3
4#include <linux/fs.h>
5#include <asm/atomic.h>
6#include <linux/sched.h>
7#include <linux/clocksource.h>
8#include <linux/mmiotrace.h>
9
10enum trace_type {
11 __TRACE_FIRST_TYPE = 0,
12
13 TRACE_FN,
14 TRACE_CTX,
15 TRACE_WAKE,
16 TRACE_STACK,
17 TRACE_SPECIAL,
18 TRACE_MMIO_RW,
19 TRACE_MMIO_MAP,
20
21 __TRACE_LAST_TYPE
22};
23
24/*
25 * Function trace entry - function address and parent function addres:
26 */
27struct ftrace_entry {
28 unsigned long ip;
29 unsigned long parent_ip;
30};
31
32/*
33 * Context switch trace entry - which task (and prio) we switched from/to:
34 */
35struct ctx_switch_entry {
36 unsigned int prev_pid;
37 unsigned char prev_prio;
38 unsigned char prev_state;
39 unsigned int next_pid;
40 unsigned char next_prio;
41 unsigned char next_state;
42};
43
44/*
45 * Special (free-form) trace entry:
46 */
47struct special_entry {
48 unsigned long arg1;
49 unsigned long arg2;
50 unsigned long arg3;
51};
52
53/*
54 * Stack-trace entry:
55 */
56
57#define FTRACE_STACK_ENTRIES 8
58
59struct stack_entry {
60 unsigned long caller[FTRACE_STACK_ENTRIES];
61};
62
63/*
64 * The trace entry - the most basic unit of tracing. This is what
65 * is printed in the end as a single line in the trace output, such as:
66 *
67 * bash-15816 [01] 235.197585: idle_cpu <- irq_enter
68 */
69struct trace_entry {
70 char type;
71 char cpu;
72 char flags;
73 char preempt_count;
74 int pid;
75 cycle_t t;
76 union {
77 struct ftrace_entry fn;
78 struct ctx_switch_entry ctx;
79 struct special_entry special;
80 struct stack_entry stack;
81 struct mmiotrace_rw mmiorw;
82 struct mmiotrace_map mmiomap;
83 };
84};
85
86#define TRACE_ENTRY_SIZE sizeof(struct trace_entry)
87
88/*
89 * The CPU trace array - it consists of thousands of trace entries
90 * plus some other descriptor data: (for example which task started
91 * the trace, etc.)
92 */
93struct trace_array_cpu {
94 struct list_head trace_pages;
95 atomic_t disabled;
96 raw_spinlock_t lock;
97 struct lock_class_key lock_key;
98
99 /* these fields get copied into max-trace: */
100 unsigned trace_head_idx;
101 unsigned trace_tail_idx;
102 void *trace_head; /* producer */
103 void *trace_tail; /* consumer */
104 unsigned long trace_idx;
105 unsigned long overrun;
106 unsigned long saved_latency;
107 unsigned long critical_start;
108 unsigned long critical_end;
109 unsigned long critical_sequence;
110 unsigned long nice;
111 unsigned long policy;
112 unsigned long rt_priority;
113 cycle_t preempt_timestamp;
114 pid_t pid;
115 uid_t uid;
116 char comm[TASK_COMM_LEN];
117};
118
119struct trace_iterator;
120
121/*
122 * The trace array - an array of per-CPU trace arrays. This is the
123 * highest level data structure that individual tracers deal with.
124 * They have on/off state as well:
125 */
126struct trace_array {
127 unsigned long entries;
128 long ctrl;
129 int cpu;
130 cycle_t time_start;
131 struct task_struct *waiter;
132 struct trace_array_cpu *data[NR_CPUS];
133};
134
135/*
136 * A specific tracer, represented by methods that operate on a trace array:
137 */
138struct tracer {
139 const char *name;
140 void (*init)(struct trace_array *tr);
141 void (*reset)(struct trace_array *tr);
142 void (*open)(struct trace_iterator *iter);
143 void (*pipe_open)(struct trace_iterator *iter);
144 void (*close)(struct trace_iterator *iter);
145 void (*start)(struct trace_iterator *iter);
146 void (*stop)(struct trace_iterator *iter);
147 ssize_t (*read)(struct trace_iterator *iter,
148 struct file *filp, char __user *ubuf,
149 size_t cnt, loff_t *ppos);
150 void (*ctrl_update)(struct trace_array *tr);
151#ifdef CONFIG_FTRACE_STARTUP_TEST
152 int (*selftest)(struct tracer *trace,
153 struct trace_array *tr);
154#endif
155 int (*print_line)(struct trace_iterator *iter);
156 struct tracer *next;
157 int print_max;
158};
159
160struct trace_seq {
161 unsigned char buffer[PAGE_SIZE];
162 unsigned int len;
163 unsigned int readpos;
164};
165
166/*
167 * Trace iterator - used by printout routines who present trace
168 * results to users and which routines might sleep, etc:
169 */
170struct trace_iterator {
171 struct trace_array *tr;
172 struct tracer *trace;
173 void *private;
174 long last_overrun[NR_CPUS];
175 long overrun[NR_CPUS];
176
177 /* The below is zeroed out in pipe_read */
178 struct trace_seq seq;
179 struct trace_entry *ent;
180 int cpu;
181
182 struct trace_entry *prev_ent;
183 int prev_cpu;
184
185 unsigned long iter_flags;
186 loff_t pos;
187 unsigned long next_idx[NR_CPUS];
188 struct list_head *next_page[NR_CPUS];
189 unsigned next_page_idx[NR_CPUS];
190 long idx;
191};
192
193void tracing_reset(struct trace_array_cpu *data);
194int tracing_open_generic(struct inode *inode, struct file *filp);
195struct dentry *tracing_init_dentry(void);
196void init_tracer_sysprof_debugfs(struct dentry *d_tracer);
197
198void ftrace(struct trace_array *tr,
199 struct trace_array_cpu *data,
200 unsigned long ip,
201 unsigned long parent_ip,
202 unsigned long flags);
203void tracing_sched_switch_trace(struct trace_array *tr,
204 struct trace_array_cpu *data,
205 struct task_struct *prev,
206 struct task_struct *next,
207 unsigned long flags);
208void tracing_record_cmdline(struct task_struct *tsk);
209
210void tracing_sched_wakeup_trace(struct trace_array *tr,
211 struct trace_array_cpu *data,
212 struct task_struct *wakee,
213 struct task_struct *cur,
214 unsigned long flags);
215void trace_special(struct trace_array *tr,
216 struct trace_array_cpu *data,
217 unsigned long arg1,
218 unsigned long arg2,
219 unsigned long arg3);
220void trace_function(struct trace_array *tr,
221 struct trace_array_cpu *data,
222 unsigned long ip,
223 unsigned long parent_ip,
224 unsigned long flags);
225
226void tracing_start_cmdline_record(void);
227void tracing_stop_cmdline_record(void);
228int register_tracer(struct tracer *type);
229void unregister_tracer(struct tracer *type);
230
231extern unsigned long nsecs_to_usecs(unsigned long nsecs);
232
233extern unsigned long tracing_max_latency;
234extern unsigned long tracing_thresh;
235
236void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu);
237void update_max_tr_single(struct trace_array *tr,
238 struct task_struct *tsk, int cpu);
239
240extern cycle_t ftrace_now(int cpu);
241
242#ifdef CONFIG_FTRACE
243void tracing_start_function_trace(void);
244void tracing_stop_function_trace(void);
245#else
246# define tracing_start_function_trace() do { } while (0)
247# define tracing_stop_function_trace() do { } while (0)
248#endif
249
250#ifdef CONFIG_CONTEXT_SWITCH_TRACER
251typedef void
252(*tracer_switch_func_t)(void *private,
253 void *__rq,
254 struct task_struct *prev,
255 struct task_struct *next);
256
257struct tracer_switch_ops {
258 tracer_switch_func_t func;
259 void *private;
260 struct tracer_switch_ops *next;
261};
262
263#endif /* CONFIG_CONTEXT_SWITCH_TRACER */
264
265#ifdef CONFIG_DYNAMIC_FTRACE
266extern unsigned long ftrace_update_tot_cnt;
267#define DYN_FTRACE_TEST_NAME trace_selftest_dynamic_test_func
268extern int DYN_FTRACE_TEST_NAME(void);
269#endif
270
271#ifdef CONFIG_MMIOTRACE
272extern void __trace_mmiotrace_rw(struct trace_array *tr,
273 struct trace_array_cpu *data,
274 struct mmiotrace_rw *rw);
275extern void __trace_mmiotrace_map(struct trace_array *tr,
276 struct trace_array_cpu *data,
277 struct mmiotrace_map *map);
278#endif
279
280#ifdef CONFIG_FTRACE_STARTUP_TEST
281#ifdef CONFIG_FTRACE
282extern int trace_selftest_startup_function(struct tracer *trace,
283 struct trace_array *tr);
284#endif
285#ifdef CONFIG_IRQSOFF_TRACER
286extern int trace_selftest_startup_irqsoff(struct tracer *trace,
287 struct trace_array *tr);
288#endif
289#ifdef CONFIG_PREEMPT_TRACER
290extern int trace_selftest_startup_preemptoff(struct tracer *trace,
291 struct trace_array *tr);
292#endif
293#if defined(CONFIG_IRQSOFF_TRACER) && defined(CONFIG_PREEMPT_TRACER)
294extern int trace_selftest_startup_preemptirqsoff(struct tracer *trace,
295 struct trace_array *tr);
296#endif
297#ifdef CONFIG_SCHED_TRACER
298extern int trace_selftest_startup_wakeup(struct tracer *trace,
299 struct trace_array *tr);
300#endif
301#ifdef CONFIG_CONTEXT_SWITCH_TRACER
302extern int trace_selftest_startup_sched_switch(struct tracer *trace,
303 struct trace_array *tr);
304#endif
305#ifdef CONFIG_SYSPROF_TRACER
306extern int trace_selftest_startup_sysprof(struct tracer *trace,
307 struct trace_array *tr);
308#endif
309#endif /* CONFIG_FTRACE_STARTUP_TEST */
310
311extern void *head_page(struct trace_array_cpu *data);
312extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...);
313extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
314 size_t cnt);
315extern long ns2usecs(cycle_t nsec);
316
317extern unsigned long trace_flags;
318
319/*
320 * trace_iterator_flags is an enumeration that defines bit
321 * positions into trace_flags that controls the output.
322 *
323 * NOTE: These bits must match the trace_options array in
324 * trace.c.
325 */
326enum trace_iterator_flags {
327 TRACE_ITER_PRINT_PARENT = 0x01,
328 TRACE_ITER_SYM_OFFSET = 0x02,
329 TRACE_ITER_SYM_ADDR = 0x04,
330 TRACE_ITER_VERBOSE = 0x08,
331 TRACE_ITER_RAW = 0x10,
332 TRACE_ITER_HEX = 0x20,
333 TRACE_ITER_BIN = 0x40,
334 TRACE_ITER_BLOCK = 0x80,
335 TRACE_ITER_STACKTRACE = 0x100,
336 TRACE_ITER_SCHED_TREE = 0x200,
337};
338
339#endif /* _LINUX_KERNEL_TRACE_H */
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
new file mode 100644
index 000000000000..312144897970
--- /dev/null
+++ b/kernel/trace/trace_functions.c
@@ -0,0 +1,81 @@
1/*
2 * ring buffer based function tracer
3 *
4 * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
5 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6 *
7 * Based on code from the latency_tracer, that is:
8 *
9 * Copyright (C) 2004-2006 Ingo Molnar
10 * Copyright (C) 2004 William Lee Irwin III
11 */
12#include <linux/debugfs.h>
13#include <linux/uaccess.h>
14#include <linux/ftrace.h>
15#include <linux/fs.h>
16
17#include "trace.h"
18
19static void function_reset(struct trace_array *tr)
20{
21 int cpu;
22
23 tr->time_start = ftrace_now(tr->cpu);
24
25 for_each_online_cpu(cpu)
26 tracing_reset(tr->data[cpu]);
27}
28
29static void start_function_trace(struct trace_array *tr)
30{
31 tr->cpu = get_cpu();
32 function_reset(tr);
33 put_cpu();
34
35 tracing_start_cmdline_record();
36 tracing_start_function_trace();
37}
38
39static void stop_function_trace(struct trace_array *tr)
40{
41 tracing_stop_function_trace();
42 tracing_stop_cmdline_record();
43}
44
45static void function_trace_init(struct trace_array *tr)
46{
47 if (tr->ctrl)
48 start_function_trace(tr);
49}
50
51static void function_trace_reset(struct trace_array *tr)
52{
53 if (tr->ctrl)
54 stop_function_trace(tr);
55}
56
57static void function_trace_ctrl_update(struct trace_array *tr)
58{
59 if (tr->ctrl)
60 start_function_trace(tr);
61 else
62 stop_function_trace(tr);
63}
64
65static struct tracer function_trace __read_mostly =
66{
67 .name = "ftrace",
68 .init = function_trace_init,
69 .reset = function_trace_reset,
70 .ctrl_update = function_trace_ctrl_update,
71#ifdef CONFIG_FTRACE_SELFTEST
72 .selftest = trace_selftest_startup_function,
73#endif
74};
75
76static __init int init_function_trace(void)
77{
78 return register_tracer(&function_trace);
79}
80
81device_initcall(init_function_trace);
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
new file mode 100644
index 000000000000..ece6cfb649fa
--- /dev/null
+++ b/kernel/trace/trace_irqsoff.c
@@ -0,0 +1,490 @@
1/*
2 * trace irqs off criticall timings
3 *
4 * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
5 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6 *
7 * From code in the latency_tracer, that is:
8 *
9 * Copyright (C) 2004-2006 Ingo Molnar
10 * Copyright (C) 2004 William Lee Irwin III
11 */
12#include <linux/kallsyms.h>
13#include <linux/debugfs.h>
14#include <linux/uaccess.h>
15#include <linux/module.h>
16#include <linux/ftrace.h>
17#include <linux/fs.h>
18
19#include "trace.h"
20
21static struct trace_array *irqsoff_trace __read_mostly;
22static int tracer_enabled __read_mostly;
23
24static DEFINE_PER_CPU(int, tracing_cpu);
25
26static DEFINE_SPINLOCK(max_trace_lock);
27
28enum {
29 TRACER_IRQS_OFF = (1 << 1),
30 TRACER_PREEMPT_OFF = (1 << 2),
31};
32
33static int trace_type __read_mostly;
34
35#ifdef CONFIG_PREEMPT_TRACER
36static inline int
37preempt_trace(void)
38{
39 return ((trace_type & TRACER_PREEMPT_OFF) && preempt_count());
40}
41#else
42# define preempt_trace() (0)
43#endif
44
45#ifdef CONFIG_IRQSOFF_TRACER
46static inline int
47irq_trace(void)
48{
49 return ((trace_type & TRACER_IRQS_OFF) &&
50 irqs_disabled());
51}
52#else
53# define irq_trace() (0)
54#endif
55
56/*
57 * Sequence count - we record it when starting a measurement and
58 * skip the latency if the sequence has changed - some other section
59 * did a maximum and could disturb our measurement with serial console
60 * printouts, etc. Truly coinciding maximum latencies should be rare
61 * and what happens together happens separately as well, so this doesnt
62 * decrease the validity of the maximum found:
63 */
64static __cacheline_aligned_in_smp unsigned long max_sequence;
65
66#ifdef CONFIG_FTRACE
67/*
68 * irqsoff uses its own tracer function to keep the overhead down:
69 */
70static void
71irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip)
72{
73 struct trace_array *tr = irqsoff_trace;
74 struct trace_array_cpu *data;
75 unsigned long flags;
76 long disabled;
77 int cpu;
78
79 /*
80 * Does not matter if we preempt. We test the flags
81 * afterward, to see if irqs are disabled or not.
82 * If we preempt and get a false positive, the flags
83 * test will fail.
84 */
85 cpu = raw_smp_processor_id();
86 if (likely(!per_cpu(tracing_cpu, cpu)))
87 return;
88
89 local_save_flags(flags);
90 /* slight chance to get a false positive on tracing_cpu */
91 if (!irqs_disabled_flags(flags))
92 return;
93
94 data = tr->data[cpu];
95 disabled = atomic_inc_return(&data->disabled);
96
97 if (likely(disabled == 1))
98 trace_function(tr, data, ip, parent_ip, flags);
99
100 atomic_dec(&data->disabled);
101}
102
103static struct ftrace_ops trace_ops __read_mostly =
104{
105 .func = irqsoff_tracer_call,
106};
107#endif /* CONFIG_FTRACE */
108
109/*
110 * Should this new latency be reported/recorded?
111 */
112static int report_latency(cycle_t delta)
113{
114 if (tracing_thresh) {
115 if (delta < tracing_thresh)
116 return 0;
117 } else {
118 if (delta <= tracing_max_latency)
119 return 0;
120 }
121 return 1;
122}
123
124static void
125check_critical_timing(struct trace_array *tr,
126 struct trace_array_cpu *data,
127 unsigned long parent_ip,
128 int cpu)
129{
130 unsigned long latency, t0, t1;
131 cycle_t T0, T1, delta;
132 unsigned long flags;
133
134 /*
135 * usecs conversion is slow so we try to delay the conversion
136 * as long as possible:
137 */
138 T0 = data->preempt_timestamp;
139 T1 = ftrace_now(cpu);
140 delta = T1-T0;
141
142 local_save_flags(flags);
143
144 if (!report_latency(delta))
145 goto out;
146
147 spin_lock_irqsave(&max_trace_lock, flags);
148
149 /* check if we are still the max latency */
150 if (!report_latency(delta))
151 goto out_unlock;
152
153 trace_function(tr, data, CALLER_ADDR0, parent_ip, flags);
154
155 latency = nsecs_to_usecs(delta);
156
157 if (data->critical_sequence != max_sequence)
158 goto out_unlock;
159
160 tracing_max_latency = delta;
161 t0 = nsecs_to_usecs(T0);
162 t1 = nsecs_to_usecs(T1);
163
164 data->critical_end = parent_ip;
165
166 update_max_tr_single(tr, current, cpu);
167
168 max_sequence++;
169
170out_unlock:
171 spin_unlock_irqrestore(&max_trace_lock, flags);
172
173out:
174 data->critical_sequence = max_sequence;
175 data->preempt_timestamp = ftrace_now(cpu);
176 tracing_reset(data);
177 trace_function(tr, data, CALLER_ADDR0, parent_ip, flags);
178}
179
180static inline void
181start_critical_timing(unsigned long ip, unsigned long parent_ip)
182{
183 int cpu;
184 struct trace_array *tr = irqsoff_trace;
185 struct trace_array_cpu *data;
186 unsigned long flags;
187
188 if (likely(!tracer_enabled))
189 return;
190
191 cpu = raw_smp_processor_id();
192
193 if (per_cpu(tracing_cpu, cpu))
194 return;
195
196 data = tr->data[cpu];
197
198 if (unlikely(!data) || atomic_read(&data->disabled))
199 return;
200
201 atomic_inc(&data->disabled);
202
203 data->critical_sequence = max_sequence;
204 data->preempt_timestamp = ftrace_now(cpu);
205 data->critical_start = parent_ip ? : ip;
206 tracing_reset(data);
207
208 local_save_flags(flags);
209
210 trace_function(tr, data, ip, parent_ip, flags);
211
212 per_cpu(tracing_cpu, cpu) = 1;
213
214 atomic_dec(&data->disabled);
215}
216
217static inline void
218stop_critical_timing(unsigned long ip, unsigned long parent_ip)
219{
220 int cpu;
221 struct trace_array *tr = irqsoff_trace;
222 struct trace_array_cpu *data;
223 unsigned long flags;
224
225 cpu = raw_smp_processor_id();
226 /* Always clear the tracing cpu on stopping the trace */
227 if (unlikely(per_cpu(tracing_cpu, cpu)))
228 per_cpu(tracing_cpu, cpu) = 0;
229 else
230 return;
231
232 if (!tracer_enabled)
233 return;
234
235 data = tr->data[cpu];
236
237 if (unlikely(!data) || unlikely(!head_page(data)) ||
238 !data->critical_start || atomic_read(&data->disabled))
239 return;
240
241 atomic_inc(&data->disabled);
242
243 local_save_flags(flags);
244 trace_function(tr, data, ip, parent_ip, flags);
245 check_critical_timing(tr, data, parent_ip ? : ip, cpu);
246 data->critical_start = 0;
247 atomic_dec(&data->disabled);
248}
249
250/* start and stop critical timings used to for stoppage (in idle) */
251void start_critical_timings(void)
252{
253 if (preempt_trace() || irq_trace())
254 start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
255}
256EXPORT_SYMBOL_GPL(start_critical_timings);
257
258void stop_critical_timings(void)
259{
260 if (preempt_trace() || irq_trace())
261 stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
262}
263EXPORT_SYMBOL_GPL(stop_critical_timings);
264
265#ifdef CONFIG_IRQSOFF_TRACER
266#ifdef CONFIG_PROVE_LOCKING
267void time_hardirqs_on(unsigned long a0, unsigned long a1)
268{
269 if (!preempt_trace() && irq_trace())
270 stop_critical_timing(a0, a1);
271}
272
273void time_hardirqs_off(unsigned long a0, unsigned long a1)
274{
275 if (!preempt_trace() && irq_trace())
276 start_critical_timing(a0, a1);
277}
278
279#else /* !CONFIG_PROVE_LOCKING */
280
281/*
282 * Stubs:
283 */
284
285void early_boot_irqs_off(void)
286{
287}
288
289void early_boot_irqs_on(void)
290{
291}
292
293void trace_softirqs_on(unsigned long ip)
294{
295}
296
297void trace_softirqs_off(unsigned long ip)
298{
299}
300
301inline void print_irqtrace_events(struct task_struct *curr)
302{
303}
304
305/*
306 * We are only interested in hardirq on/off events:
307 */
308void trace_hardirqs_on(void)
309{
310 if (!preempt_trace() && irq_trace())
311 stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
312}
313EXPORT_SYMBOL(trace_hardirqs_on);
314
315void trace_hardirqs_off(void)
316{
317 if (!preempt_trace() && irq_trace())
318 start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
319}
320EXPORT_SYMBOL(trace_hardirqs_off);
321
322void trace_hardirqs_on_caller(unsigned long caller_addr)
323{
324 if (!preempt_trace() && irq_trace())
325 stop_critical_timing(CALLER_ADDR0, caller_addr);
326}
327EXPORT_SYMBOL(trace_hardirqs_on_caller);
328
329void trace_hardirqs_off_caller(unsigned long caller_addr)
330{
331 if (!preempt_trace() && irq_trace())
332 start_critical_timing(CALLER_ADDR0, caller_addr);
333}
334EXPORT_SYMBOL(trace_hardirqs_off_caller);
335
336#endif /* CONFIG_PROVE_LOCKING */
337#endif /* CONFIG_IRQSOFF_TRACER */
338
339#ifdef CONFIG_PREEMPT_TRACER
340void trace_preempt_on(unsigned long a0, unsigned long a1)
341{
342 if (preempt_trace())
343 stop_critical_timing(a0, a1);
344}
345
346void trace_preempt_off(unsigned long a0, unsigned long a1)
347{
348 if (preempt_trace())
349 start_critical_timing(a0, a1);
350}
351#endif /* CONFIG_PREEMPT_TRACER */
352
353static void start_irqsoff_tracer(struct trace_array *tr)
354{
355 register_ftrace_function(&trace_ops);
356 tracer_enabled = 1;
357}
358
359static void stop_irqsoff_tracer(struct trace_array *tr)
360{
361 tracer_enabled = 0;
362 unregister_ftrace_function(&trace_ops);
363}
364
365static void __irqsoff_tracer_init(struct trace_array *tr)
366{
367 irqsoff_trace = tr;
368 /* make sure that the tracer is visible */
369 smp_wmb();
370
371 if (tr->ctrl)
372 start_irqsoff_tracer(tr);
373}
374
375static void irqsoff_tracer_reset(struct trace_array *tr)
376{
377 if (tr->ctrl)
378 stop_irqsoff_tracer(tr);
379}
380
381static void irqsoff_tracer_ctrl_update(struct trace_array *tr)
382{
383 if (tr->ctrl)
384 start_irqsoff_tracer(tr);
385 else
386 stop_irqsoff_tracer(tr);
387}
388
389static void irqsoff_tracer_open(struct trace_iterator *iter)
390{
391 /* stop the trace while dumping */
392 if (iter->tr->ctrl)
393 stop_irqsoff_tracer(iter->tr);
394}
395
396static void irqsoff_tracer_close(struct trace_iterator *iter)
397{
398 if (iter->tr->ctrl)
399 start_irqsoff_tracer(iter->tr);
400}
401
402#ifdef CONFIG_IRQSOFF_TRACER
403static void irqsoff_tracer_init(struct trace_array *tr)
404{
405 trace_type = TRACER_IRQS_OFF;
406
407 __irqsoff_tracer_init(tr);
408}
409static struct tracer irqsoff_tracer __read_mostly =
410{
411 .name = "irqsoff",
412 .init = irqsoff_tracer_init,
413 .reset = irqsoff_tracer_reset,
414 .open = irqsoff_tracer_open,
415 .close = irqsoff_tracer_close,
416 .ctrl_update = irqsoff_tracer_ctrl_update,
417 .print_max = 1,
418#ifdef CONFIG_FTRACE_SELFTEST
419 .selftest = trace_selftest_startup_irqsoff,
420#endif
421};
422# define register_irqsoff(trace) register_tracer(&trace)
423#else
424# define register_irqsoff(trace) do { } while (0)
425#endif
426
427#ifdef CONFIG_PREEMPT_TRACER
428static void preemptoff_tracer_init(struct trace_array *tr)
429{
430 trace_type = TRACER_PREEMPT_OFF;
431
432 __irqsoff_tracer_init(tr);
433}
434
435static struct tracer preemptoff_tracer __read_mostly =
436{
437 .name = "preemptoff",
438 .init = preemptoff_tracer_init,
439 .reset = irqsoff_tracer_reset,
440 .open = irqsoff_tracer_open,
441 .close = irqsoff_tracer_close,
442 .ctrl_update = irqsoff_tracer_ctrl_update,
443 .print_max = 1,
444#ifdef CONFIG_FTRACE_SELFTEST
445 .selftest = trace_selftest_startup_preemptoff,
446#endif
447};
448# define register_preemptoff(trace) register_tracer(&trace)
449#else
450# define register_preemptoff(trace) do { } while (0)
451#endif
452
453#if defined(CONFIG_IRQSOFF_TRACER) && \
454 defined(CONFIG_PREEMPT_TRACER)
455
456static void preemptirqsoff_tracer_init(struct trace_array *tr)
457{
458 trace_type = TRACER_IRQS_OFF | TRACER_PREEMPT_OFF;
459
460 __irqsoff_tracer_init(tr);
461}
462
463static struct tracer preemptirqsoff_tracer __read_mostly =
464{
465 .name = "preemptirqsoff",
466 .init = preemptirqsoff_tracer_init,
467 .reset = irqsoff_tracer_reset,
468 .open = irqsoff_tracer_open,
469 .close = irqsoff_tracer_close,
470 .ctrl_update = irqsoff_tracer_ctrl_update,
471 .print_max = 1,
472#ifdef CONFIG_FTRACE_SELFTEST
473 .selftest = trace_selftest_startup_preemptirqsoff,
474#endif
475};
476
477# define register_preemptirqsoff(trace) register_tracer(&trace)
478#else
479# define register_preemptirqsoff(trace) do { } while (0)
480#endif
481
482__init static int init_irqsoff_tracer(void)
483{
484 register_irqsoff(irqsoff_tracer);
485 register_preemptoff(preemptoff_tracer);
486 register_preemptirqsoff(preemptirqsoff_tracer);
487
488 return 0;
489}
490device_initcall(init_irqsoff_tracer);
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
new file mode 100644
index 000000000000..b13dc19dcbb4
--- /dev/null
+++ b/kernel/trace/trace_mmiotrace.c
@@ -0,0 +1,295 @@
1/*
2 * Memory mapped I/O tracing
3 *
4 * Copyright (C) 2008 Pekka Paalanen <pq@iki.fi>
5 */
6
7#define DEBUG 1
8
9#include <linux/kernel.h>
10#include <linux/mmiotrace.h>
11#include <linux/pci.h>
12
13#include "trace.h"
14
15struct header_iter {
16 struct pci_dev *dev;
17};
18
19static struct trace_array *mmio_trace_array;
20static bool overrun_detected;
21
22static void mmio_reset_data(struct trace_array *tr)
23{
24 int cpu;
25
26 overrun_detected = false;
27 tr->time_start = ftrace_now(tr->cpu);
28
29 for_each_online_cpu(cpu)
30 tracing_reset(tr->data[cpu]);
31}
32
33static void mmio_trace_init(struct trace_array *tr)
34{
35 pr_debug("in %s\n", __func__);
36 mmio_trace_array = tr;
37 if (tr->ctrl) {
38 mmio_reset_data(tr);
39 enable_mmiotrace();
40 }
41}
42
43static void mmio_trace_reset(struct trace_array *tr)
44{
45 pr_debug("in %s\n", __func__);
46 if (tr->ctrl)
47 disable_mmiotrace();
48 mmio_reset_data(tr);
49 mmio_trace_array = NULL;
50}
51
52static void mmio_trace_ctrl_update(struct trace_array *tr)
53{
54 pr_debug("in %s\n", __func__);
55 if (tr->ctrl) {
56 mmio_reset_data(tr);
57 enable_mmiotrace();
58 } else {
59 disable_mmiotrace();
60 }
61}
62
63static int mmio_print_pcidev(struct trace_seq *s, const struct pci_dev *dev)
64{
65 int ret = 0;
66 int i;
67 resource_size_t start, end;
68 const struct pci_driver *drv = pci_dev_driver(dev);
69
70 /* XXX: incomplete checks for trace_seq_printf() return value */
71 ret += trace_seq_printf(s, "PCIDEV %02x%02x %04x%04x %x",
72 dev->bus->number, dev->devfn,
73 dev->vendor, dev->device, dev->irq);
74 /*
75 * XXX: is pci_resource_to_user() appropriate, since we are
76 * supposed to interpret the __ioremap() phys_addr argument based on
77 * these printed values?
78 */
79 for (i = 0; i < 7; i++) {
80 pci_resource_to_user(dev, i, &dev->resource[i], &start, &end);
81 ret += trace_seq_printf(s, " %llx",
82 (unsigned long long)(start |
83 (dev->resource[i].flags & PCI_REGION_FLAG_MASK)));
84 }
85 for (i = 0; i < 7; i++) {
86 pci_resource_to_user(dev, i, &dev->resource[i], &start, &end);
87 ret += trace_seq_printf(s, " %llx",
88 dev->resource[i].start < dev->resource[i].end ?
89 (unsigned long long)(end - start) + 1 : 0);
90 }
91 if (drv)
92 ret += trace_seq_printf(s, " %s\n", drv->name);
93 else
94 ret += trace_seq_printf(s, " \n");
95 return ret;
96}
97
98static void destroy_header_iter(struct header_iter *hiter)
99{
100 if (!hiter)
101 return;
102 pci_dev_put(hiter->dev);
103 kfree(hiter);
104}
105
106static void mmio_pipe_open(struct trace_iterator *iter)
107{
108 struct header_iter *hiter;
109 struct trace_seq *s = &iter->seq;
110
111 trace_seq_printf(s, "VERSION 20070824\n");
112
113 hiter = kzalloc(sizeof(*hiter), GFP_KERNEL);
114 if (!hiter)
115 return;
116
117 hiter->dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, NULL);
118 iter->private = hiter;
119}
120
121/* XXX: This is not called when the pipe is closed! */
122static void mmio_close(struct trace_iterator *iter)
123{
124 struct header_iter *hiter = iter->private;
125 destroy_header_iter(hiter);
126 iter->private = NULL;
127}
128
129static unsigned long count_overruns(struct trace_iterator *iter)
130{
131 int cpu;
132 unsigned long cnt = 0;
133 for_each_online_cpu(cpu) {
134 cnt += iter->overrun[cpu];
135 iter->overrun[cpu] = 0;
136 }
137 return cnt;
138}
139
140static ssize_t mmio_read(struct trace_iterator *iter, struct file *filp,
141 char __user *ubuf, size_t cnt, loff_t *ppos)
142{
143 ssize_t ret;
144 struct header_iter *hiter = iter->private;
145 struct trace_seq *s = &iter->seq;
146 unsigned long n;
147
148 n = count_overruns(iter);
149 if (n) {
150 /* XXX: This is later than where events were lost. */
151 trace_seq_printf(s, "MARK 0.000000 Lost %lu events.\n", n);
152 if (!overrun_detected)
153 pr_warning("mmiotrace has lost events.\n");
154 overrun_detected = true;
155 goto print_out;
156 }
157
158 if (!hiter)
159 return 0;
160
161 mmio_print_pcidev(s, hiter->dev);
162 hiter->dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, hiter->dev);
163
164 if (!hiter->dev) {
165 destroy_header_iter(hiter);
166 iter->private = NULL;
167 }
168
169print_out:
170 ret = trace_seq_to_user(s, ubuf, cnt);
171 return (ret == -EBUSY) ? 0 : ret;
172}
173
174static int mmio_print_rw(struct trace_iterator *iter)
175{
176 struct trace_entry *entry = iter->ent;
177 struct mmiotrace_rw *rw = &entry->mmiorw;
178 struct trace_seq *s = &iter->seq;
179 unsigned long long t = ns2usecs(entry->t);
180 unsigned long usec_rem = do_div(t, 1000000ULL);
181 unsigned secs = (unsigned long)t;
182 int ret = 1;
183
184 switch (entry->mmiorw.opcode) {
185 case MMIO_READ:
186 ret = trace_seq_printf(s,
187 "R %d %lu.%06lu %d 0x%llx 0x%lx 0x%lx %d\n",
188 rw->width, secs, usec_rem, rw->map_id,
189 (unsigned long long)rw->phys,
190 rw->value, rw->pc, 0);
191 break;
192 case MMIO_WRITE:
193 ret = trace_seq_printf(s,
194 "W %d %lu.%06lu %d 0x%llx 0x%lx 0x%lx %d\n",
195 rw->width, secs, usec_rem, rw->map_id,
196 (unsigned long long)rw->phys,
197 rw->value, rw->pc, 0);
198 break;
199 case MMIO_UNKNOWN_OP:
200 ret = trace_seq_printf(s,
201 "UNKNOWN %lu.%06lu %d 0x%llx %02x,%02x,%02x 0x%lx %d\n",
202 secs, usec_rem, rw->map_id,
203 (unsigned long long)rw->phys,
204 (rw->value >> 16) & 0xff, (rw->value >> 8) & 0xff,
205 (rw->value >> 0) & 0xff, rw->pc, 0);
206 break;
207 default:
208 ret = trace_seq_printf(s, "rw what?\n");
209 break;
210 }
211 if (ret)
212 return 1;
213 return 0;
214}
215
216static int mmio_print_map(struct trace_iterator *iter)
217{
218 struct trace_entry *entry = iter->ent;
219 struct mmiotrace_map *m = &entry->mmiomap;
220 struct trace_seq *s = &iter->seq;
221 unsigned long long t = ns2usecs(entry->t);
222 unsigned long usec_rem = do_div(t, 1000000ULL);
223 unsigned secs = (unsigned long)t;
224 int ret = 1;
225
226 switch (entry->mmiorw.opcode) {
227 case MMIO_PROBE:
228 ret = trace_seq_printf(s,
229 "MAP %lu.%06lu %d 0x%llx 0x%lx 0x%lx 0x%lx %d\n",
230 secs, usec_rem, m->map_id,
231 (unsigned long long)m->phys, m->virt, m->len,
232 0UL, 0);
233 break;
234 case MMIO_UNPROBE:
235 ret = trace_seq_printf(s,
236 "UNMAP %lu.%06lu %d 0x%lx %d\n",
237 secs, usec_rem, m->map_id, 0UL, 0);
238 break;
239 default:
240 ret = trace_seq_printf(s, "map what?\n");
241 break;
242 }
243 if (ret)
244 return 1;
245 return 0;
246}
247
248/* return 0 to abort printing without consuming current entry in pipe mode */
249static int mmio_print_line(struct trace_iterator *iter)
250{
251 switch (iter->ent->type) {
252 case TRACE_MMIO_RW:
253 return mmio_print_rw(iter);
254 case TRACE_MMIO_MAP:
255 return mmio_print_map(iter);
256 default:
257 return 1; /* ignore unknown entries */
258 }
259}
260
261static struct tracer mmio_tracer __read_mostly =
262{
263 .name = "mmiotrace",
264 .init = mmio_trace_init,
265 .reset = mmio_trace_reset,
266 .pipe_open = mmio_pipe_open,
267 .close = mmio_close,
268 .read = mmio_read,
269 .ctrl_update = mmio_trace_ctrl_update,
270 .print_line = mmio_print_line,
271};
272
273__init static int init_mmio_trace(void)
274{
275 return register_tracer(&mmio_tracer);
276}
277device_initcall(init_mmio_trace);
278
279void mmio_trace_rw(struct mmiotrace_rw *rw)
280{
281 struct trace_array *tr = mmio_trace_array;
282 struct trace_array_cpu *data = tr->data[smp_processor_id()];
283 __trace_mmiotrace_rw(tr, data, rw);
284}
285
286void mmio_trace_mapping(struct mmiotrace_map *map)
287{
288 struct trace_array *tr = mmio_trace_array;
289 struct trace_array_cpu *data;
290
291 preempt_disable();
292 data = tr->data[smp_processor_id()];
293 __trace_mmiotrace_map(tr, data, map);
294 preempt_enable();
295}
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
new file mode 100644
index 000000000000..cb817a209aa0
--- /dev/null
+++ b/kernel/trace/trace_sched_switch.c
@@ -0,0 +1,286 @@
1/*
2 * trace context switch
3 *
4 * Copyright (C) 2007 Steven Rostedt <srostedt@redhat.com>
5 *
6 */
7#include <linux/module.h>
8#include <linux/fs.h>
9#include <linux/debugfs.h>
10#include <linux/kallsyms.h>
11#include <linux/uaccess.h>
12#include <linux/marker.h>
13#include <linux/ftrace.h>
14
15#include "trace.h"
16
17static struct trace_array *ctx_trace;
18static int __read_mostly tracer_enabled;
19static atomic_t sched_ref;
20
21static void
22sched_switch_func(void *private, void *__rq, struct task_struct *prev,
23 struct task_struct *next)
24{
25 struct trace_array **ptr = private;
26 struct trace_array *tr = *ptr;
27 struct trace_array_cpu *data;
28 unsigned long flags;
29 long disabled;
30 int cpu;
31
32 tracing_record_cmdline(prev);
33 tracing_record_cmdline(next);
34
35 if (!tracer_enabled)
36 return;
37
38 local_irq_save(flags);
39 cpu = raw_smp_processor_id();
40 data = tr->data[cpu];
41 disabled = atomic_inc_return(&data->disabled);
42
43 if (likely(disabled == 1))
44 tracing_sched_switch_trace(tr, data, prev, next, flags);
45
46 atomic_dec(&data->disabled);
47 local_irq_restore(flags);
48}
49
50static notrace void
51sched_switch_callback(void *probe_data, void *call_data,
52 const char *format, va_list *args)
53{
54 struct task_struct *prev;
55 struct task_struct *next;
56 struct rq *__rq;
57
58 if (!atomic_read(&sched_ref))
59 return;
60
61 /* skip prev_pid %d next_pid %d prev_state %ld */
62 (void)va_arg(*args, int);
63 (void)va_arg(*args, int);
64 (void)va_arg(*args, long);
65 __rq = va_arg(*args, typeof(__rq));
66 prev = va_arg(*args, typeof(prev));
67 next = va_arg(*args, typeof(next));
68
69 /*
70 * If tracer_switch_func only points to the local
71 * switch func, it still needs the ptr passed to it.
72 */
73 sched_switch_func(probe_data, __rq, prev, next);
74}
75
76static void
77wakeup_func(void *private, void *__rq, struct task_struct *wakee, struct
78 task_struct *curr)
79{
80 struct trace_array **ptr = private;
81 struct trace_array *tr = *ptr;
82 struct trace_array_cpu *data;
83 unsigned long flags;
84 long disabled;
85 int cpu;
86
87 if (!tracer_enabled)
88 return;
89
90 tracing_record_cmdline(curr);
91
92 local_irq_save(flags);
93 cpu = raw_smp_processor_id();
94 data = tr->data[cpu];
95 disabled = atomic_inc_return(&data->disabled);
96
97 if (likely(disabled == 1))
98 tracing_sched_wakeup_trace(tr, data, wakee, curr, flags);
99
100 atomic_dec(&data->disabled);
101 local_irq_restore(flags);
102}
103
104static notrace void
105wake_up_callback(void *probe_data, void *call_data,
106 const char *format, va_list *args)
107{
108 struct task_struct *curr;
109 struct task_struct *task;
110 struct rq *__rq;
111
112 if (likely(!tracer_enabled))
113 return;
114
115 /* Skip pid %d state %ld */
116 (void)va_arg(*args, int);
117 (void)va_arg(*args, long);
118 /* now get the meat: "rq %p task %p rq->curr %p" */
119 __rq = va_arg(*args, typeof(__rq));
120 task = va_arg(*args, typeof(task));
121 curr = va_arg(*args, typeof(curr));
122
123 tracing_record_cmdline(task);
124 tracing_record_cmdline(curr);
125
126 wakeup_func(probe_data, __rq, task, curr);
127}
128
129static void sched_switch_reset(struct trace_array *tr)
130{
131 int cpu;
132
133 tr->time_start = ftrace_now(tr->cpu);
134
135 for_each_online_cpu(cpu)
136 tracing_reset(tr->data[cpu]);
137}
138
139static int tracing_sched_register(void)
140{
141 int ret;
142
143 ret = marker_probe_register("kernel_sched_wakeup",
144 "pid %d state %ld ## rq %p task %p rq->curr %p",
145 wake_up_callback,
146 &ctx_trace);
147 if (ret) {
148 pr_info("wakeup trace: Couldn't add marker"
149 " probe to kernel_sched_wakeup\n");
150 return ret;
151 }
152
153 ret = marker_probe_register("kernel_sched_wakeup_new",
154 "pid %d state %ld ## rq %p task %p rq->curr %p",
155 wake_up_callback,
156 &ctx_trace);
157 if (ret) {
158 pr_info("wakeup trace: Couldn't add marker"
159 " probe to kernel_sched_wakeup_new\n");
160 goto fail_deprobe;
161 }
162
163 ret = marker_probe_register("kernel_sched_schedule",
164 "prev_pid %d next_pid %d prev_state %ld "
165 "## rq %p prev %p next %p",
166 sched_switch_callback,
167 &ctx_trace);
168 if (ret) {
169 pr_info("sched trace: Couldn't add marker"
170 " probe to kernel_sched_schedule\n");
171 goto fail_deprobe_wake_new;
172 }
173
174 return ret;
175fail_deprobe_wake_new:
176 marker_probe_unregister("kernel_sched_wakeup_new",
177 wake_up_callback,
178 &ctx_trace);
179fail_deprobe:
180 marker_probe_unregister("kernel_sched_wakeup",
181 wake_up_callback,
182 &ctx_trace);
183 return ret;
184}
185
186static void tracing_sched_unregister(void)
187{
188 marker_probe_unregister("kernel_sched_schedule",
189 sched_switch_callback,
190 &ctx_trace);
191 marker_probe_unregister("kernel_sched_wakeup_new",
192 wake_up_callback,
193 &ctx_trace);
194 marker_probe_unregister("kernel_sched_wakeup",
195 wake_up_callback,
196 &ctx_trace);
197}
198
199static void tracing_start_sched_switch(void)
200{
201 long ref;
202
203 ref = atomic_inc_return(&sched_ref);
204 if (ref == 1)
205 tracing_sched_register();
206}
207
208static void tracing_stop_sched_switch(void)
209{
210 long ref;
211
212 ref = atomic_dec_and_test(&sched_ref);
213 if (ref)
214 tracing_sched_unregister();
215}
216
217void tracing_start_cmdline_record(void)
218{
219 tracing_start_sched_switch();
220}
221
222void tracing_stop_cmdline_record(void)
223{
224 tracing_stop_sched_switch();
225}
226
227static void start_sched_trace(struct trace_array *tr)
228{
229 sched_switch_reset(tr);
230 tracing_start_cmdline_record();
231 tracer_enabled = 1;
232}
233
234static void stop_sched_trace(struct trace_array *tr)
235{
236 tracer_enabled = 0;
237 tracing_stop_cmdline_record();
238}
239
240static void sched_switch_trace_init(struct trace_array *tr)
241{
242 ctx_trace = tr;
243
244 if (tr->ctrl)
245 start_sched_trace(tr);
246}
247
248static void sched_switch_trace_reset(struct trace_array *tr)
249{
250 if (tr->ctrl)
251 stop_sched_trace(tr);
252}
253
254static void sched_switch_trace_ctrl_update(struct trace_array *tr)
255{
256 /* When starting a new trace, reset the buffers */
257 if (tr->ctrl)
258 start_sched_trace(tr);
259 else
260 stop_sched_trace(tr);
261}
262
263static struct tracer sched_switch_trace __read_mostly =
264{
265 .name = "sched_switch",
266 .init = sched_switch_trace_init,
267 .reset = sched_switch_trace_reset,
268 .ctrl_update = sched_switch_trace_ctrl_update,
269#ifdef CONFIG_FTRACE_SELFTEST
270 .selftest = trace_selftest_startup_sched_switch,
271#endif
272};
273
274__init static int init_sched_switch_trace(void)
275{
276 int ret = 0;
277
278 if (atomic_read(&sched_ref))
279 ret = tracing_sched_register();
280 if (ret) {
281 pr_info("error registering scheduler trace\n");
282 return ret;
283 }
284 return register_tracer(&sched_switch_trace);
285}
286device_initcall(init_sched_switch_trace);
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
new file mode 100644
index 000000000000..e303ccb62cdf
--- /dev/null
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -0,0 +1,453 @@
1/*
2 * trace task wakeup timings
3 *
4 * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
5 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6 *
7 * Based on code from the latency_tracer, that is:
8 *
9 * Copyright (C) 2004-2006 Ingo Molnar
10 * Copyright (C) 2004 William Lee Irwin III
11 */
12#include <linux/module.h>
13#include <linux/fs.h>
14#include <linux/debugfs.h>
15#include <linux/kallsyms.h>
16#include <linux/uaccess.h>
17#include <linux/ftrace.h>
18#include <linux/marker.h>
19
20#include "trace.h"
21
22static struct trace_array *wakeup_trace;
23static int __read_mostly tracer_enabled;
24
25static struct task_struct *wakeup_task;
26static int wakeup_cpu;
27static unsigned wakeup_prio = -1;
28
29static raw_spinlock_t wakeup_lock =
30 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
31
32static void __wakeup_reset(struct trace_array *tr);
33
34#ifdef CONFIG_FTRACE
35/*
36 * irqsoff uses its own tracer function to keep the overhead down:
37 */
38static void
39wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
40{
41 struct trace_array *tr = wakeup_trace;
42 struct trace_array_cpu *data;
43 unsigned long flags;
44 long disabled;
45 int resched;
46 int cpu;
47
48 if (likely(!wakeup_task))
49 return;
50
51 resched = need_resched();
52 preempt_disable_notrace();
53
54 cpu = raw_smp_processor_id();
55 data = tr->data[cpu];
56 disabled = atomic_inc_return(&data->disabled);
57 if (unlikely(disabled != 1))
58 goto out;
59
60 local_irq_save(flags);
61 __raw_spin_lock(&wakeup_lock);
62
63 if (unlikely(!wakeup_task))
64 goto unlock;
65
66 /*
67 * The task can't disappear because it needs to
68 * wake up first, and we have the wakeup_lock.
69 */
70 if (task_cpu(wakeup_task) != cpu)
71 goto unlock;
72
73 trace_function(tr, data, ip, parent_ip, flags);
74
75 unlock:
76 __raw_spin_unlock(&wakeup_lock);
77 local_irq_restore(flags);
78
79 out:
80 atomic_dec(&data->disabled);
81
82 /*
83 * To prevent recursion from the scheduler, if the
84 * resched flag was set before we entered, then
85 * don't reschedule.
86 */
87 if (resched)
88 preempt_enable_no_resched_notrace();
89 else
90 preempt_enable_notrace();
91}
92
93static struct ftrace_ops trace_ops __read_mostly =
94{
95 .func = wakeup_tracer_call,
96};
97#endif /* CONFIG_FTRACE */
98
99/*
100 * Should this new latency be reported/recorded?
101 */
102static int report_latency(cycle_t delta)
103{
104 if (tracing_thresh) {
105 if (delta < tracing_thresh)
106 return 0;
107 } else {
108 if (delta <= tracing_max_latency)
109 return 0;
110 }
111 return 1;
112}
113
114static void notrace
115wakeup_sched_switch(void *private, void *rq, struct task_struct *prev,
116 struct task_struct *next)
117{
118 unsigned long latency = 0, t0 = 0, t1 = 0;
119 struct trace_array **ptr = private;
120 struct trace_array *tr = *ptr;
121 struct trace_array_cpu *data;
122 cycle_t T0, T1, delta;
123 unsigned long flags;
124 long disabled;
125 int cpu;
126
127 if (unlikely(!tracer_enabled))
128 return;
129
130 /*
131 * When we start a new trace, we set wakeup_task to NULL
132 * and then set tracer_enabled = 1. We want to make sure
133 * that another CPU does not see the tracer_enabled = 1
134 * and the wakeup_task with an older task, that might
135 * actually be the same as next.
136 */
137 smp_rmb();
138
139 if (next != wakeup_task)
140 return;
141
142 /* The task we are waiting for is waking up */
143 data = tr->data[wakeup_cpu];
144
145 /* disable local data, not wakeup_cpu data */
146 cpu = raw_smp_processor_id();
147 disabled = atomic_inc_return(&tr->data[cpu]->disabled);
148 if (likely(disabled != 1))
149 goto out;
150
151 local_irq_save(flags);
152 __raw_spin_lock(&wakeup_lock);
153
154 /* We could race with grabbing wakeup_lock */
155 if (unlikely(!tracer_enabled || next != wakeup_task))
156 goto out_unlock;
157
158 trace_function(tr, data, CALLER_ADDR1, CALLER_ADDR2, flags);
159
160 /*
161 * usecs conversion is slow so we try to delay the conversion
162 * as long as possible:
163 */
164 T0 = data->preempt_timestamp;
165 T1 = ftrace_now(cpu);
166 delta = T1-T0;
167
168 if (!report_latency(delta))
169 goto out_unlock;
170
171 latency = nsecs_to_usecs(delta);
172
173 tracing_max_latency = delta;
174 t0 = nsecs_to_usecs(T0);
175 t1 = nsecs_to_usecs(T1);
176
177 update_max_tr(tr, wakeup_task, wakeup_cpu);
178
179out_unlock:
180 __wakeup_reset(tr);
181 __raw_spin_unlock(&wakeup_lock);
182 local_irq_restore(flags);
183out:
184 atomic_dec(&tr->data[cpu]->disabled);
185}
186
187static notrace void
188sched_switch_callback(void *probe_data, void *call_data,
189 const char *format, va_list *args)
190{
191 struct task_struct *prev;
192 struct task_struct *next;
193 struct rq *__rq;
194
195 /* skip prev_pid %d next_pid %d prev_state %ld */
196 (void)va_arg(*args, int);
197 (void)va_arg(*args, int);
198 (void)va_arg(*args, long);
199 __rq = va_arg(*args, typeof(__rq));
200 prev = va_arg(*args, typeof(prev));
201 next = va_arg(*args, typeof(next));
202
203 tracing_record_cmdline(prev);
204
205 /*
206 * If tracer_switch_func only points to the local
207 * switch func, it still needs the ptr passed to it.
208 */
209 wakeup_sched_switch(probe_data, __rq, prev, next);
210}
211
212static void __wakeup_reset(struct trace_array *tr)
213{
214 struct trace_array_cpu *data;
215 int cpu;
216
217 for_each_possible_cpu(cpu) {
218 data = tr->data[cpu];
219 tracing_reset(data);
220 }
221
222 wakeup_cpu = -1;
223 wakeup_prio = -1;
224
225 if (wakeup_task)
226 put_task_struct(wakeup_task);
227
228 wakeup_task = NULL;
229}
230
231static void wakeup_reset(struct trace_array *tr)
232{
233 unsigned long flags;
234
235 local_irq_save(flags);
236 __raw_spin_lock(&wakeup_lock);
237 __wakeup_reset(tr);
238 __raw_spin_unlock(&wakeup_lock);
239 local_irq_restore(flags);
240}
241
242static void
243wakeup_check_start(struct trace_array *tr, struct task_struct *p,
244 struct task_struct *curr)
245{
246 int cpu = smp_processor_id();
247 unsigned long flags;
248 long disabled;
249
250 if (likely(!rt_task(p)) ||
251 p->prio >= wakeup_prio ||
252 p->prio >= curr->prio)
253 return;
254
255 disabled = atomic_inc_return(&tr->data[cpu]->disabled);
256 if (unlikely(disabled != 1))
257 goto out;
258
259 /* interrupts should be off from try_to_wake_up */
260 __raw_spin_lock(&wakeup_lock);
261
262 /* check for races. */
263 if (!tracer_enabled || p->prio >= wakeup_prio)
264 goto out_locked;
265
266 /* reset the trace */
267 __wakeup_reset(tr);
268
269 wakeup_cpu = task_cpu(p);
270 wakeup_prio = p->prio;
271
272 wakeup_task = p;
273 get_task_struct(wakeup_task);
274
275 local_save_flags(flags);
276
277 tr->data[wakeup_cpu]->preempt_timestamp = ftrace_now(cpu);
278 trace_function(tr, tr->data[wakeup_cpu],
279 CALLER_ADDR1, CALLER_ADDR2, flags);
280
281out_locked:
282 __raw_spin_unlock(&wakeup_lock);
283out:
284 atomic_dec(&tr->data[cpu]->disabled);
285}
286
287static notrace void
288wake_up_callback(void *probe_data, void *call_data,
289 const char *format, va_list *args)
290{
291 struct trace_array **ptr = probe_data;
292 struct trace_array *tr = *ptr;
293 struct task_struct *curr;
294 struct task_struct *task;
295 struct rq *__rq;
296
297 if (likely(!tracer_enabled))
298 return;
299
300 /* Skip pid %d state %ld */
301 (void)va_arg(*args, int);
302 (void)va_arg(*args, long);
303 /* now get the meat: "rq %p task %p rq->curr %p" */
304 __rq = va_arg(*args, typeof(__rq));
305 task = va_arg(*args, typeof(task));
306 curr = va_arg(*args, typeof(curr));
307
308 tracing_record_cmdline(task);
309 tracing_record_cmdline(curr);
310
311 wakeup_check_start(tr, task, curr);
312}
313
314static void start_wakeup_tracer(struct trace_array *tr)
315{
316 int ret;
317
318 ret = marker_probe_register("kernel_sched_wakeup",
319 "pid %d state %ld ## rq %p task %p rq->curr %p",
320 wake_up_callback,
321 &wakeup_trace);
322 if (ret) {
323 pr_info("wakeup trace: Couldn't add marker"
324 " probe to kernel_sched_wakeup\n");
325 return;
326 }
327
328 ret = marker_probe_register("kernel_sched_wakeup_new",
329 "pid %d state %ld ## rq %p task %p rq->curr %p",
330 wake_up_callback,
331 &wakeup_trace);
332 if (ret) {
333 pr_info("wakeup trace: Couldn't add marker"
334 " probe to kernel_sched_wakeup_new\n");
335 goto fail_deprobe;
336 }
337
338 ret = marker_probe_register("kernel_sched_schedule",
339 "prev_pid %d next_pid %d prev_state %ld "
340 "## rq %p prev %p next %p",
341 sched_switch_callback,
342 &wakeup_trace);
343 if (ret) {
344 pr_info("sched trace: Couldn't add marker"
345 " probe to kernel_sched_schedule\n");
346 goto fail_deprobe_wake_new;
347 }
348
349 wakeup_reset(tr);
350
351 /*
352 * Don't let the tracer_enabled = 1 show up before
353 * the wakeup_task is reset. This may be overkill since
354 * wakeup_reset does a spin_unlock after setting the
355 * wakeup_task to NULL, but I want to be safe.
356 * This is a slow path anyway.
357 */
358 smp_wmb();
359
360 register_ftrace_function(&trace_ops);
361
362 tracer_enabled = 1;
363
364 return;
365fail_deprobe_wake_new:
366 marker_probe_unregister("kernel_sched_wakeup_new",
367 wake_up_callback,
368 &wakeup_trace);
369fail_deprobe:
370 marker_probe_unregister("kernel_sched_wakeup",
371 wake_up_callback,
372 &wakeup_trace);
373}
374
375static void stop_wakeup_tracer(struct trace_array *tr)
376{
377 tracer_enabled = 0;
378 unregister_ftrace_function(&trace_ops);
379 marker_probe_unregister("kernel_sched_schedule",
380 sched_switch_callback,
381 &wakeup_trace);
382 marker_probe_unregister("kernel_sched_wakeup_new",
383 wake_up_callback,
384 &wakeup_trace);
385 marker_probe_unregister("kernel_sched_wakeup",
386 wake_up_callback,
387 &wakeup_trace);
388}
389
390static void wakeup_tracer_init(struct trace_array *tr)
391{
392 wakeup_trace = tr;
393
394 if (tr->ctrl)
395 start_wakeup_tracer(tr);
396}
397
398static void wakeup_tracer_reset(struct trace_array *tr)
399{
400 if (tr->ctrl) {
401 stop_wakeup_tracer(tr);
402 /* make sure we put back any tasks we are tracing */
403 wakeup_reset(tr);
404 }
405}
406
407static void wakeup_tracer_ctrl_update(struct trace_array *tr)
408{
409 if (tr->ctrl)
410 start_wakeup_tracer(tr);
411 else
412 stop_wakeup_tracer(tr);
413}
414
415static void wakeup_tracer_open(struct trace_iterator *iter)
416{
417 /* stop the trace while dumping */
418 if (iter->tr->ctrl)
419 stop_wakeup_tracer(iter->tr);
420}
421
422static void wakeup_tracer_close(struct trace_iterator *iter)
423{
424 /* forget about any processes we were recording */
425 if (iter->tr->ctrl)
426 start_wakeup_tracer(iter->tr);
427}
428
429static struct tracer wakeup_tracer __read_mostly =
430{
431 .name = "wakeup",
432 .init = wakeup_tracer_init,
433 .reset = wakeup_tracer_reset,
434 .open = wakeup_tracer_open,
435 .close = wakeup_tracer_close,
436 .ctrl_update = wakeup_tracer_ctrl_update,
437 .print_max = 1,
438#ifdef CONFIG_FTRACE_SELFTEST
439 .selftest = trace_selftest_startup_wakeup,
440#endif
441};
442
443__init static int init_wakeup_tracer(void)
444{
445 int ret;
446
447 ret = register_tracer(&wakeup_tracer);
448 if (ret)
449 return ret;
450
451 return 0;
452}
453device_initcall(init_wakeup_tracer);
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
new file mode 100644
index 000000000000..0911b7e073bf
--- /dev/null
+++ b/kernel/trace/trace_selftest.c
@@ -0,0 +1,563 @@
1/* Include in trace.c */
2
3#include <linux/kthread.h>
4#include <linux/delay.h>
5
6static inline int trace_valid_entry(struct trace_entry *entry)
7{
8 switch (entry->type) {
9 case TRACE_FN:
10 case TRACE_CTX:
11 case TRACE_WAKE:
12 case TRACE_STACK:
13 case TRACE_SPECIAL:
14 return 1;
15 }
16 return 0;
17}
18
19static int
20trace_test_buffer_cpu(struct trace_array *tr, struct trace_array_cpu *data)
21{
22 struct trace_entry *entries;
23 struct page *page;
24 int idx = 0;
25 int i;
26
27 BUG_ON(list_empty(&data->trace_pages));
28 page = list_entry(data->trace_pages.next, struct page, lru);
29 entries = page_address(page);
30
31 check_pages(data);
32 if (head_page(data) != entries)
33 goto failed;
34
35 /*
36 * The starting trace buffer always has valid elements,
37 * if any element exists.
38 */
39 entries = head_page(data);
40
41 for (i = 0; i < tr->entries; i++) {
42
43 if (i < data->trace_idx && !trace_valid_entry(&entries[idx])) {
44 printk(KERN_CONT ".. invalid entry %d ",
45 entries[idx].type);
46 goto failed;
47 }
48
49 idx++;
50 if (idx >= ENTRIES_PER_PAGE) {
51 page = virt_to_page(entries);
52 if (page->lru.next == &data->trace_pages) {
53 if (i != tr->entries - 1) {
54 printk(KERN_CONT ".. entries buffer mismatch");
55 goto failed;
56 }
57 } else {
58 page = list_entry(page->lru.next, struct page, lru);
59 entries = page_address(page);
60 }
61 idx = 0;
62 }
63 }
64
65 page = virt_to_page(entries);
66 if (page->lru.next != &data->trace_pages) {
67 printk(KERN_CONT ".. too many entries");
68 goto failed;
69 }
70
71 return 0;
72
73 failed:
74 /* disable tracing */
75 tracing_disabled = 1;
76 printk(KERN_CONT ".. corrupted trace buffer .. ");
77 return -1;
78}
79
80/*
81 * Test the trace buffer to see if all the elements
82 * are still sane.
83 */
84static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
85{
86 unsigned long flags, cnt = 0;
87 int cpu, ret = 0;
88
89 /* Don't allow flipping of max traces now */
90 raw_local_irq_save(flags);
91 __raw_spin_lock(&ftrace_max_lock);
92 for_each_possible_cpu(cpu) {
93 if (!head_page(tr->data[cpu]))
94 continue;
95
96 cnt += tr->data[cpu]->trace_idx;
97
98 ret = trace_test_buffer_cpu(tr, tr->data[cpu]);
99 if (ret)
100 break;
101 }
102 __raw_spin_unlock(&ftrace_max_lock);
103 raw_local_irq_restore(flags);
104
105 if (count)
106 *count = cnt;
107
108 return ret;
109}
110
111#ifdef CONFIG_FTRACE
112
113#ifdef CONFIG_DYNAMIC_FTRACE
114
115#define __STR(x) #x
116#define STR(x) __STR(x)
117
118/* Test dynamic code modification and ftrace filters */
119int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
120 struct trace_array *tr,
121 int (*func)(void))
122{
123 unsigned long count;
124 int ret;
125 int save_ftrace_enabled = ftrace_enabled;
126 int save_tracer_enabled = tracer_enabled;
127 char *func_name;
128
129 /* The ftrace test PASSED */
130 printk(KERN_CONT "PASSED\n");
131 pr_info("Testing dynamic ftrace: ");
132
133 /* enable tracing, and record the filter function */
134 ftrace_enabled = 1;
135 tracer_enabled = 1;
136
137 /* passed in by parameter to fool gcc from optimizing */
138 func();
139
140 /* update the records */
141 ret = ftrace_force_update();
142 if (ret) {
143 printk(KERN_CONT ".. ftraced failed .. ");
144 return ret;
145 }
146
147 /*
148 * Some archs *cough*PowerPC*cough* add charachters to the
149 * start of the function names. We simply put a '*' to
150 * accomodate them.
151 */
152 func_name = "*" STR(DYN_FTRACE_TEST_NAME);
153
154 /* filter only on our function */
155 ftrace_set_filter(func_name, strlen(func_name), 1);
156
157 /* enable tracing */
158 tr->ctrl = 1;
159 trace->init(tr);
160 /* Sleep for a 1/10 of a second */
161 msleep(100);
162
163 /* we should have nothing in the buffer */
164 ret = trace_test_buffer(tr, &count);
165 if (ret)
166 goto out;
167
168 if (count) {
169 ret = -1;
170 printk(KERN_CONT ".. filter did not filter .. ");
171 goto out;
172 }
173
174 /* call our function again */
175 func();
176
177 /* sleep again */
178 msleep(100);
179
180 /* stop the tracing. */
181 tr->ctrl = 0;
182 trace->ctrl_update(tr);
183 ftrace_enabled = 0;
184
185 /* check the trace buffer */
186 ret = trace_test_buffer(tr, &count);
187 trace->reset(tr);
188
189 /* we should only have one item */
190 if (!ret && count != 1) {
191 printk(KERN_CONT ".. filter failed count=%ld ..", count);
192 ret = -1;
193 goto out;
194 }
195 out:
196 ftrace_enabled = save_ftrace_enabled;
197 tracer_enabled = save_tracer_enabled;
198
199 /* Enable tracing on all functions again */
200 ftrace_set_filter(NULL, 0, 1);
201
202 return ret;
203}
204#else
205# define trace_selftest_startup_dynamic_tracing(trace, tr, func) ({ 0; })
206#endif /* CONFIG_DYNAMIC_FTRACE */
207/*
208 * Simple verification test of ftrace function tracer.
209 * Enable ftrace, sleep 1/10 second, and then read the trace
210 * buffer to see if all is in order.
211 */
212int
213trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
214{
215 unsigned long count;
216 int ret;
217 int save_ftrace_enabled = ftrace_enabled;
218 int save_tracer_enabled = tracer_enabled;
219
220 /* make sure msleep has been recorded */
221 msleep(1);
222
223 /* force the recorded functions to be traced */
224 ret = ftrace_force_update();
225 if (ret) {
226 printk(KERN_CONT ".. ftraced failed .. ");
227 return ret;
228 }
229
230 /* start the tracing */
231 ftrace_enabled = 1;
232 tracer_enabled = 1;
233
234 tr->ctrl = 1;
235 trace->init(tr);
236 /* Sleep for a 1/10 of a second */
237 msleep(100);
238 /* stop the tracing. */
239 tr->ctrl = 0;
240 trace->ctrl_update(tr);
241 ftrace_enabled = 0;
242
243 /* check the trace buffer */
244 ret = trace_test_buffer(tr, &count);
245 trace->reset(tr);
246
247 if (!ret && !count) {
248 printk(KERN_CONT ".. no entries found ..");
249 ret = -1;
250 goto out;
251 }
252
253 ret = trace_selftest_startup_dynamic_tracing(trace, tr,
254 DYN_FTRACE_TEST_NAME);
255
256 out:
257 ftrace_enabled = save_ftrace_enabled;
258 tracer_enabled = save_tracer_enabled;
259
260 /* kill ftrace totally if we failed */
261 if (ret)
262 ftrace_kill();
263
264 return ret;
265}
266#endif /* CONFIG_FTRACE */
267
268#ifdef CONFIG_IRQSOFF_TRACER
269int
270trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr)
271{
272 unsigned long save_max = tracing_max_latency;
273 unsigned long count;
274 int ret;
275
276 /* start the tracing */
277 tr->ctrl = 1;
278 trace->init(tr);
279 /* reset the max latency */
280 tracing_max_latency = 0;
281 /* disable interrupts for a bit */
282 local_irq_disable();
283 udelay(100);
284 local_irq_enable();
285 /* stop the tracing. */
286 tr->ctrl = 0;
287 trace->ctrl_update(tr);
288 /* check both trace buffers */
289 ret = trace_test_buffer(tr, NULL);
290 if (!ret)
291 ret = trace_test_buffer(&max_tr, &count);
292 trace->reset(tr);
293
294 if (!ret && !count) {
295 printk(KERN_CONT ".. no entries found ..");
296 ret = -1;
297 }
298
299 tracing_max_latency = save_max;
300
301 return ret;
302}
303#endif /* CONFIG_IRQSOFF_TRACER */
304
305#ifdef CONFIG_PREEMPT_TRACER
306int
307trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr)
308{
309 unsigned long save_max = tracing_max_latency;
310 unsigned long count;
311 int ret;
312
313 /* start the tracing */
314 tr->ctrl = 1;
315 trace->init(tr);
316 /* reset the max latency */
317 tracing_max_latency = 0;
318 /* disable preemption for a bit */
319 preempt_disable();
320 udelay(100);
321 preempt_enable();
322 /* stop the tracing. */
323 tr->ctrl = 0;
324 trace->ctrl_update(tr);
325 /* check both trace buffers */
326 ret = trace_test_buffer(tr, NULL);
327 if (!ret)
328 ret = trace_test_buffer(&max_tr, &count);
329 trace->reset(tr);
330
331 if (!ret && !count) {
332 printk(KERN_CONT ".. no entries found ..");
333 ret = -1;
334 }
335
336 tracing_max_latency = save_max;
337
338 return ret;
339}
340#endif /* CONFIG_PREEMPT_TRACER */
341
342#if defined(CONFIG_IRQSOFF_TRACER) && defined(CONFIG_PREEMPT_TRACER)
343int
344trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *tr)
345{
346 unsigned long save_max = tracing_max_latency;
347 unsigned long count;
348 int ret;
349
350 /* start the tracing */
351 tr->ctrl = 1;
352 trace->init(tr);
353
354 /* reset the max latency */
355 tracing_max_latency = 0;
356
357 /* disable preemption and interrupts for a bit */
358 preempt_disable();
359 local_irq_disable();
360 udelay(100);
361 preempt_enable();
362 /* reverse the order of preempt vs irqs */
363 local_irq_enable();
364
365 /* stop the tracing. */
366 tr->ctrl = 0;
367 trace->ctrl_update(tr);
368 /* check both trace buffers */
369 ret = trace_test_buffer(tr, NULL);
370 if (ret)
371 goto out;
372
373 ret = trace_test_buffer(&max_tr, &count);
374 if (ret)
375 goto out;
376
377 if (!ret && !count) {
378 printk(KERN_CONT ".. no entries found ..");
379 ret = -1;
380 goto out;
381 }
382
383 /* do the test by disabling interrupts first this time */
384 tracing_max_latency = 0;
385 tr->ctrl = 1;
386 trace->ctrl_update(tr);
387 preempt_disable();
388 local_irq_disable();
389 udelay(100);
390 preempt_enable();
391 /* reverse the order of preempt vs irqs */
392 local_irq_enable();
393
394 /* stop the tracing. */
395 tr->ctrl = 0;
396 trace->ctrl_update(tr);
397 /* check both trace buffers */
398 ret = trace_test_buffer(tr, NULL);
399 if (ret)
400 goto out;
401
402 ret = trace_test_buffer(&max_tr, &count);
403
404 if (!ret && !count) {
405 printk(KERN_CONT ".. no entries found ..");
406 ret = -1;
407 goto out;
408 }
409
410 out:
411 trace->reset(tr);
412 tracing_max_latency = save_max;
413
414 return ret;
415}
416#endif /* CONFIG_IRQSOFF_TRACER && CONFIG_PREEMPT_TRACER */
417
418#ifdef CONFIG_SCHED_TRACER
419static int trace_wakeup_test_thread(void *data)
420{
421 /* Make this a RT thread, doesn't need to be too high */
422 struct sched_param param = { .sched_priority = 5 };
423 struct completion *x = data;
424
425 sched_setscheduler(current, SCHED_FIFO, &param);
426
427 /* Make it know we have a new prio */
428 complete(x);
429
430 /* now go to sleep and let the test wake us up */
431 set_current_state(TASK_INTERRUPTIBLE);
432 schedule();
433
434 /* we are awake, now wait to disappear */
435 while (!kthread_should_stop()) {
436 /*
437 * This is an RT task, do short sleeps to let
438 * others run.
439 */
440 msleep(100);
441 }
442
443 return 0;
444}
445
446int
447trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr)
448{
449 unsigned long save_max = tracing_max_latency;
450 struct task_struct *p;
451 struct completion isrt;
452 unsigned long count;
453 int ret;
454
455 init_completion(&isrt);
456
457 /* create a high prio thread */
458 p = kthread_run(trace_wakeup_test_thread, &isrt, "ftrace-test");
459 if (IS_ERR(p)) {
460 printk(KERN_CONT "Failed to create ftrace wakeup test thread ");
461 return -1;
462 }
463
464 /* make sure the thread is running at an RT prio */
465 wait_for_completion(&isrt);
466
467 /* start the tracing */
468 tr->ctrl = 1;
469 trace->init(tr);
470 /* reset the max latency */
471 tracing_max_latency = 0;
472
473 /* sleep to let the RT thread sleep too */
474 msleep(100);
475
476 /*
477 * Yes this is slightly racy. It is possible that for some
478 * strange reason that the RT thread we created, did not
479 * call schedule for 100ms after doing the completion,
480 * and we do a wakeup on a task that already is awake.
481 * But that is extremely unlikely, and the worst thing that
482 * happens in such a case, is that we disable tracing.
483 * Honestly, if this race does happen something is horrible
484 * wrong with the system.
485 */
486
487 wake_up_process(p);
488
489 /* stop the tracing. */
490 tr->ctrl = 0;
491 trace->ctrl_update(tr);
492 /* check both trace buffers */
493 ret = trace_test_buffer(tr, NULL);
494 if (!ret)
495 ret = trace_test_buffer(&max_tr, &count);
496
497
498 trace->reset(tr);
499
500 tracing_max_latency = save_max;
501
502 /* kill the thread */
503 kthread_stop(p);
504
505 if (!ret && !count) {
506 printk(KERN_CONT ".. no entries found ..");
507 ret = -1;
508 }
509
510 return ret;
511}
512#endif /* CONFIG_SCHED_TRACER */
513
514#ifdef CONFIG_CONTEXT_SWITCH_TRACER
515int
516trace_selftest_startup_sched_switch(struct tracer *trace, struct trace_array *tr)
517{
518 unsigned long count;
519 int ret;
520
521 /* start the tracing */
522 tr->ctrl = 1;
523 trace->init(tr);
524 /* Sleep for a 1/10 of a second */
525 msleep(100);
526 /* stop the tracing. */
527 tr->ctrl = 0;
528 trace->ctrl_update(tr);
529 /* check the trace buffer */
530 ret = trace_test_buffer(tr, &count);
531 trace->reset(tr);
532
533 if (!ret && !count) {
534 printk(KERN_CONT ".. no entries found ..");
535 ret = -1;
536 }
537
538 return ret;
539}
540#endif /* CONFIG_CONTEXT_SWITCH_TRACER */
541
542#ifdef CONFIG_SYSPROF_TRACER
543int
544trace_selftest_startup_sysprof(struct tracer *trace, struct trace_array *tr)
545{
546 unsigned long count;
547 int ret;
548
549 /* start the tracing */
550 tr->ctrl = 1;
551 trace->init(tr);
552 /* Sleep for a 1/10 of a second */
553 msleep(100);
554 /* stop the tracing. */
555 tr->ctrl = 0;
556 trace->ctrl_update(tr);
557 /* check the trace buffer */
558 ret = trace_test_buffer(tr, &count);
559 trace->reset(tr);
560
561 return ret;
562}
563#endif /* CONFIG_SYSPROF_TRACER */
diff --git a/kernel/trace/trace_selftest_dynamic.c b/kernel/trace/trace_selftest_dynamic.c
new file mode 100644
index 000000000000..54dd77cce5bf
--- /dev/null
+++ b/kernel/trace/trace_selftest_dynamic.c
@@ -0,0 +1,7 @@
1#include "trace.h"
2
3int DYN_FTRACE_TEST_NAME(void)
4{
5 /* used to call mcount */
6 return 0;
7}
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c
new file mode 100644
index 000000000000..ce2d723c10e1
--- /dev/null
+++ b/kernel/trace/trace_sysprof.c
@@ -0,0 +1,365 @@
1/*
2 * trace stack traces
3 *
4 * Copyright (C) 2004-2008, Soeren Sandmann
5 * Copyright (C) 2007 Steven Rostedt <srostedt@redhat.com>
6 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7 */
8#include <linux/kallsyms.h>
9#include <linux/debugfs.h>
10#include <linux/hrtimer.h>
11#include <linux/uaccess.h>
12#include <linux/ftrace.h>
13#include <linux/module.h>
14#include <linux/irq.h>
15#include <linux/fs.h>
16
17#include <asm/stacktrace.h>
18
19#include "trace.h"
20
21static struct trace_array *sysprof_trace;
22static int __read_mostly tracer_enabled;
23
24/*
25 * 1 msec sample interval by default:
26 */
27static unsigned long sample_period = 1000000;
28static const unsigned int sample_max_depth = 512;
29
30static DEFINE_MUTEX(sample_timer_lock);
31/*
32 * Per CPU hrtimers that do the profiling:
33 */
34static DEFINE_PER_CPU(struct hrtimer, stack_trace_hrtimer);
35
36struct stack_frame {
37 const void __user *next_fp;
38 unsigned long return_address;
39};
40
41static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
42{
43 int ret;
44
45 if (!access_ok(VERIFY_READ, fp, sizeof(*frame)))
46 return 0;
47
48 ret = 1;
49 pagefault_disable();
50 if (__copy_from_user_inatomic(frame, fp, sizeof(*frame)))
51 ret = 0;
52 pagefault_enable();
53
54 return ret;
55}
56
57struct backtrace_info {
58 struct trace_array_cpu *data;
59 struct trace_array *tr;
60 int pos;
61};
62
63static void
64backtrace_warning_symbol(void *data, char *msg, unsigned long symbol)
65{
66 /* Ignore warnings */
67}
68
69static void backtrace_warning(void *data, char *msg)
70{
71 /* Ignore warnings */
72}
73
74static int backtrace_stack(void *data, char *name)
75{
76 /* Don't bother with IRQ stacks for now */
77 return -1;
78}
79
80static void backtrace_address(void *data, unsigned long addr, int reliable)
81{
82 struct backtrace_info *info = data;
83
84 if (info->pos < sample_max_depth && reliable) {
85 __trace_special(info->tr, info->data, 1, addr, 0);
86
87 info->pos++;
88 }
89}
90
91const static struct stacktrace_ops backtrace_ops = {
92 .warning = backtrace_warning,
93 .warning_symbol = backtrace_warning_symbol,
94 .stack = backtrace_stack,
95 .address = backtrace_address,
96};
97
98static int
99trace_kernel(struct pt_regs *regs, struct trace_array *tr,
100 struct trace_array_cpu *data)
101{
102 struct backtrace_info info;
103 unsigned long bp;
104 char *stack;
105
106 info.tr = tr;
107 info.data = data;
108 info.pos = 1;
109
110 __trace_special(info.tr, info.data, 1, regs->ip, 0);
111
112 stack = ((char *)regs + sizeof(struct pt_regs));
113#ifdef CONFIG_FRAME_POINTER
114 bp = regs->bp;
115#else
116 bp = 0;
117#endif
118
119 dump_trace(NULL, regs, (void *)stack, bp, &backtrace_ops, &info);
120
121 return info.pos;
122}
123
124static void timer_notify(struct pt_regs *regs, int cpu)
125{
126 struct trace_array_cpu *data;
127 struct stack_frame frame;
128 struct trace_array *tr;
129 const void __user *fp;
130 int is_user;
131 int i;
132
133 if (!regs)
134 return;
135
136 tr = sysprof_trace;
137 data = tr->data[cpu];
138 is_user = user_mode(regs);
139
140 if (!current || current->pid == 0)
141 return;
142
143 if (is_user && current->state != TASK_RUNNING)
144 return;
145
146 __trace_special(tr, data, 0, 0, current->pid);
147
148 if (!is_user)
149 i = trace_kernel(regs, tr, data);
150 else
151 i = 0;
152
153 /*
154 * Trace user stack if we are not a kernel thread
155 */
156 if (current->mm && i < sample_max_depth) {
157 regs = (struct pt_regs *)current->thread.sp0 - 1;
158
159 fp = (void __user *)regs->bp;
160
161 __trace_special(tr, data, 2, regs->ip, 0);
162
163 while (i < sample_max_depth) {
164 frame.next_fp = NULL;
165 frame.return_address = 0;
166 if (!copy_stack_frame(fp, &frame))
167 break;
168 if ((unsigned long)fp < regs->sp)
169 break;
170
171 __trace_special(tr, data, 2, frame.return_address,
172 (unsigned long)fp);
173 fp = frame.next_fp;
174
175 i++;
176 }
177
178 }
179
180 /*
181 * Special trace entry if we overflow the max depth:
182 */
183 if (i == sample_max_depth)
184 __trace_special(tr, data, -1, -1, -1);
185
186 __trace_special(tr, data, 3, current->pid, i);
187}
188
189static enum hrtimer_restart stack_trace_timer_fn(struct hrtimer *hrtimer)
190{
191 /* trace here */
192 timer_notify(get_irq_regs(), smp_processor_id());
193
194 hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period));
195
196 return HRTIMER_RESTART;
197}
198
199static void start_stack_timer(int cpu)
200{
201 struct hrtimer *hrtimer = &per_cpu(stack_trace_hrtimer, cpu);
202
203 hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
204 hrtimer->function = stack_trace_timer_fn;
205 hrtimer->cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
206
207 hrtimer_start(hrtimer, ns_to_ktime(sample_period), HRTIMER_MODE_REL);
208}
209
210static void start_stack_timers(void)
211{
212 cpumask_t saved_mask = current->cpus_allowed;
213 int cpu;
214
215 for_each_online_cpu(cpu) {
216 cpumask_of_cpu_ptr(new_mask, cpu);
217
218 set_cpus_allowed_ptr(current, new_mask);
219 start_stack_timer(cpu);
220 }
221 set_cpus_allowed_ptr(current, &saved_mask);
222}
223
224static void stop_stack_timer(int cpu)
225{
226 struct hrtimer *hrtimer = &per_cpu(stack_trace_hrtimer, cpu);
227
228 hrtimer_cancel(hrtimer);
229}
230
231static void stop_stack_timers(void)
232{
233 int cpu;
234
235 for_each_online_cpu(cpu)
236 stop_stack_timer(cpu);
237}
238
239static void stack_reset(struct trace_array *tr)
240{
241 int cpu;
242
243 tr->time_start = ftrace_now(tr->cpu);
244
245 for_each_online_cpu(cpu)
246 tracing_reset(tr->data[cpu]);
247}
248
249static void start_stack_trace(struct trace_array *tr)
250{
251 mutex_lock(&sample_timer_lock);
252 stack_reset(tr);
253 start_stack_timers();
254 tracer_enabled = 1;
255 mutex_unlock(&sample_timer_lock);
256}
257
258static void stop_stack_trace(struct trace_array *tr)
259{
260 mutex_lock(&sample_timer_lock);
261 stop_stack_timers();
262 tracer_enabled = 0;
263 mutex_unlock(&sample_timer_lock);
264}
265
266static void stack_trace_init(struct trace_array *tr)
267{
268 sysprof_trace = tr;
269
270 if (tr->ctrl)
271 start_stack_trace(tr);
272}
273
274static void stack_trace_reset(struct trace_array *tr)
275{
276 if (tr->ctrl)
277 stop_stack_trace(tr);
278}
279
280static void stack_trace_ctrl_update(struct trace_array *tr)
281{
282 /* When starting a new trace, reset the buffers */
283 if (tr->ctrl)
284 start_stack_trace(tr);
285 else
286 stop_stack_trace(tr);
287}
288
289static struct tracer stack_trace __read_mostly =
290{
291 .name = "sysprof",
292 .init = stack_trace_init,
293 .reset = stack_trace_reset,
294 .ctrl_update = stack_trace_ctrl_update,
295#ifdef CONFIG_FTRACE_SELFTEST
296 .selftest = trace_selftest_startup_sysprof,
297#endif
298};
299
300__init static int init_stack_trace(void)
301{
302 return register_tracer(&stack_trace);
303}
304device_initcall(init_stack_trace);
305
306#define MAX_LONG_DIGITS 22
307
308static ssize_t
309sysprof_sample_read(struct file *filp, char __user *ubuf,
310 size_t cnt, loff_t *ppos)
311{
312 char buf[MAX_LONG_DIGITS];
313 int r;
314
315 r = sprintf(buf, "%ld\n", nsecs_to_usecs(sample_period));
316
317 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
318}
319
320static ssize_t
321sysprof_sample_write(struct file *filp, const char __user *ubuf,
322 size_t cnt, loff_t *ppos)
323{
324 char buf[MAX_LONG_DIGITS];
325 unsigned long val;
326
327 if (cnt > MAX_LONG_DIGITS-1)
328 cnt = MAX_LONG_DIGITS-1;
329
330 if (copy_from_user(&buf, ubuf, cnt))
331 return -EFAULT;
332
333 buf[cnt] = 0;
334
335 val = simple_strtoul(buf, NULL, 10);
336 /*
337 * Enforce a minimum sample period of 100 usecs:
338 */
339 if (val < 100)
340 val = 100;
341
342 mutex_lock(&sample_timer_lock);
343 stop_stack_timers();
344 sample_period = val * 1000;
345 start_stack_timers();
346 mutex_unlock(&sample_timer_lock);
347
348 return cnt;
349}
350
351static struct file_operations sysprof_sample_fops = {
352 .read = sysprof_sample_read,
353 .write = sysprof_sample_write,
354};
355
356void init_tracer_sysprof_debugfs(struct dentry *d_tracer)
357{
358 struct dentry *entry;
359
360 entry = debugfs_create_file("sysprof_sample_period", 0644,
361 d_tracer, NULL, &sysprof_sample_fops);
362 if (entry)
363 return;
364 pr_warning("Could not create debugfs 'dyn_ftrace_total_info' entry\n");
365}