aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/testing/debugfs-kmemtrace71
-rw-r--r--Documentation/ftrace.txt74
-rw-r--r--Documentation/kernel-parameters.txt10
-rw-r--r--Documentation/sysrq.txt2
-rw-r--r--Documentation/vm/kmemtrace.txt126
-rw-r--r--MAINTAINERS6
-rw-r--r--arch/alpha/include/asm/hardirq.h13
-rw-r--r--arch/avr32/include/asm/hardirq.h11
-rw-r--r--arch/ia64/Kconfig3
-rw-r--r--arch/ia64/include/asm/ftrace.h28
-rw-r--r--arch/ia64/include/asm/hardirq.h10
-rw-r--r--arch/ia64/kernel/Makefile5
-rw-r--r--arch/ia64/kernel/entry.S100
-rw-r--r--arch/ia64/kernel/ftrace.c206
-rw-r--r--arch/ia64/kernel/ia64_ksyms.c6
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c4
-rw-r--r--arch/x86/kernel/dumpstack.c6
-rw-r--r--arch/x86/kernel/ftrace.c49
-rw-r--r--arch/x86/kernel/process.c5
-rw-r--r--arch/x86/kvm/Kconfig3
-rw-r--r--block/Kconfig16
-rw-r--r--block/Makefile1
-rw-r--r--drivers/char/sysrq.c2
-rw-r--r--drivers/oprofile/cpu_buffer.c5
-rw-r--r--fs/partitions/check.c4
-rw-r--r--include/linux/blktrace_api.h5
-rw-r--r--include/linux/ftrace.h99
-rw-r--r--include/linux/ftrace_irq.h2
-rw-r--r--include/linux/hardirq.h73
-rw-r--r--include/linux/ring_buffer.h22
-rw-r--r--include/linux/sched.h2
-rw-r--r--include/linux/slab_def.h68
-rw-r--r--include/linux/slob_def.h9
-rw-r--r--include/linux/slub_def.h53
-rw-r--r--include/trace/kmemtrace.h75
-rw-r--r--include/trace/power.h34
-rw-r--r--include/trace/workqueue.h25
-rw-r--r--init/main.c2
-rw-r--r--kernel/extable.c4
-rw-r--r--kernel/module.c2
-rw-r--r--kernel/relay.c4
-rw-r--r--kernel/sched.c8
-rw-r--r--kernel/softirq.c13
-rw-r--r--kernel/trace/Kconfig72
-rw-r--r--kernel/trace/Makefile5
-rw-r--r--kernel/trace/blktrace.c (renamed from block/blktrace.c)718
-rw-r--r--kernel/trace/ftrace.c953
-rw-r--r--kernel/trace/kmemtrace.c339
-rw-r--r--kernel/trace/ring_buffer.c168
-rw-r--r--kernel/trace/trace.c1480
-rw-r--r--kernel/trace/trace.h105
-rw-r--r--kernel/trace/trace_boot.c36
-rw-r--r--kernel/trace/trace_branch.c279
-rw-r--r--kernel/trace/trace_functions.c365
-rw-r--r--kernel/trace/trace_functions_graph.c378
-rw-r--r--kernel/trace/trace_hw_branches.c179
-rw-r--r--kernel/trace/trace_irqsoff.c44
-rw-r--r--kernel/trace/trace_mmiotrace.c41
-rw-r--r--kernel/trace/trace_nop.c5
-rw-r--r--kernel/trace/trace_output.c919
-rw-r--r--kernel/trace/trace_output.h62
-rw-r--r--kernel/trace/trace_power.c188
-rw-r--r--kernel/trace/trace_sched_switch.c12
-rw-r--r--kernel/trace/trace_sched_wakeup.c85
-rw-r--r--kernel/trace/trace_selftest.c69
-rw-r--r--kernel/trace/trace_stat.c319
-rw-r--r--kernel/trace/trace_stat.h31
-rw-r--r--kernel/trace/trace_sysprof.c19
-rw-r--r--kernel/trace/trace_workqueue.c281
-rw-r--r--kernel/workqueue.c16
-rw-r--r--mm/slab.c71
-rw-r--r--mm/slob.c37
-rw-r--r--mm/slub.c83
-rw-r--r--scripts/Makefile.build13
-rwxr-xr-xscripts/recordmcount.pl37
76 files changed, 6522 insertions, 2154 deletions
diff --git a/Documentation/ABI/testing/debugfs-kmemtrace b/Documentation/ABI/testing/debugfs-kmemtrace
new file mode 100644
index 000000000000..5e6a92a02d85
--- /dev/null
+++ b/Documentation/ABI/testing/debugfs-kmemtrace
@@ -0,0 +1,71 @@
1What: /sys/kernel/debug/kmemtrace/
2Date: July 2008
3Contact: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
4Description:
5
6In kmemtrace-enabled kernels, the following files are created:
7
8/sys/kernel/debug/kmemtrace/
9 cpu<n> (0400) Per-CPU tracing data, see below. (binary)
10 total_overruns (0400) Total number of bytes which were dropped from
11 cpu<n> files because of full buffer condition,
12 non-binary. (text)
13 abi_version (0400) Kernel's kmemtrace ABI version. (text)
14
15Each per-CPU file should be read according to the relay interface. That is,
16the reader should set affinity to that specific CPU and, as currently done by
17the userspace application (though there are other methods), use poll() with
18an infinite timeout before every read(). Otherwise, erroneous data may be
19read. The binary data has the following _core_ format:
20
21 Event ID (1 byte) Unsigned integer, one of:
22 0 - represents an allocation (KMEMTRACE_EVENT_ALLOC)
23 1 - represents a freeing of previously allocated memory
24 (KMEMTRACE_EVENT_FREE)
25 Type ID (1 byte) Unsigned integer, one of:
26 0 - this is a kmalloc() / kfree()
27 1 - this is a kmem_cache_alloc() / kmem_cache_free()
28 2 - this is a __get_free_pages() et al.
29 Event size (2 bytes) Unsigned integer representing the
30 size of this event. Used to extend
31 kmemtrace. Discard the bytes you
32 don't know about.
33 Sequence number (4 bytes) Signed integer used to reorder data
34 logged on SMP machines. Wraparound
35 must be taken into account, although
36 it is unlikely.
37 Caller address (8 bytes) Return address to the caller.
38 Pointer to mem (8 bytes) Pointer to target memory area. Can be
39 NULL, but not all such calls might be
40 recorded.
41
42In case of KMEMTRACE_EVENT_ALLOC events, the next fields follow:
43
44 Requested bytes (8 bytes) Total number of requested bytes,
45 unsigned, must not be zero.
46 Allocated bytes (8 bytes) Total number of actually allocated
47 bytes, unsigned, must not be lower
48 than requested bytes.
49 Requested flags (4 bytes) GFP flags supplied by the caller.
50 Target CPU (4 bytes) Signed integer, valid for event id 1.
51 If equal to -1, target CPU is the same
52 as origin CPU, but the reverse might
53 not be true.
54
55The data is made available in the same endianness the machine has.
56
57Other event ids and type ids may be defined and added. Other fields may be
58added by increasing event size, but see below for details.
59Every modification to the ABI, including new id definitions, are followed
60by bumping the ABI version by one.
61
62Adding new data to the packet (features) is done at the end of the mandatory
63data:
64 Feature size (2 byte)
65 Feature ID (1 byte)
66 Feature data (Feature size - 3 bytes)
67
68
69Users:
70 kmemtrace-user - git://repo.or.cz/kmemtrace-user.git
71
diff --git a/Documentation/ftrace.txt b/Documentation/ftrace.txt
index 803b1318b13d..758fb42a1b68 100644
--- a/Documentation/ftrace.txt
+++ b/Documentation/ftrace.txt
@@ -165,6 +165,8 @@ Here is the list of current tracers that may be configured.
165 nop - This is not a tracer. To remove all tracers from tracing 165 nop - This is not a tracer. To remove all tracers from tracing
166 simply echo "nop" into current_tracer. 166 simply echo "nop" into current_tracer.
167 167
168 hw-branch-tracer - traces branches on all cpu's in a circular buffer.
169
168 170
169Examples of using the tracer 171Examples of using the tracer
170---------------------------- 172----------------------------
@@ -1152,6 +1154,78 @@ int main (int argc, char **argv)
1152 return 0; 1154 return 0;
1153} 1155}
1154 1156
1157
1158hw-branch-tracer (x86 only)
1159---------------------------
1160
1161This tracer uses the x86 last branch tracing hardware feature to
1162collect a branch trace on all cpus with relatively low overhead.
1163
1164The tracer uses a fixed-size circular buffer per cpu and only
1165traces ring 0 branches. The trace file dumps that buffer in the
1166following format:
1167
1168# tracer: hw-branch-tracer
1169#
1170# CPU# TO <- FROM
1171 0 scheduler_tick+0xb5/0x1bf <- task_tick_idle+0x5/0x6
1172 2 run_posix_cpu_timers+0x2b/0x72a <- run_posix_cpu_timers+0x25/0x72a
1173 0 scheduler_tick+0x139/0x1bf <- scheduler_tick+0xed/0x1bf
1174 0 scheduler_tick+0x17c/0x1bf <- scheduler_tick+0x148/0x1bf
1175 2 run_posix_cpu_timers+0x9e/0x72a <- run_posix_cpu_timers+0x5e/0x72a
1176 0 scheduler_tick+0x1b6/0x1bf <- scheduler_tick+0x1aa/0x1bf
1177
1178
1179The tracer may be used to dump the trace for the oops'ing cpu on a
1180kernel oops into the system log. To enable this, ftrace_dump_on_oops
1181must be set. To set ftrace_dump_on_oops, one can either use the sysctl
1182function or set it via the proc system interface.
1183
1184 sysctl kernel.ftrace_dump_on_oops=1
1185
1186or
1187
1188 echo 1 > /proc/sys/kernel/ftrace_dump_on_oops
1189
1190
1191Here's an example of such a dump after a null pointer dereference in a
1192kernel module:
1193
1194[57848.105921] BUG: unable to handle kernel NULL pointer dereference at 0000000000000000
1195[57848.106019] IP: [<ffffffffa0000006>] open+0x6/0x14 [oops]
1196[57848.106019] PGD 2354e9067 PUD 2375e7067 PMD 0
1197[57848.106019] Oops: 0002 [#1] SMP
1198[57848.106019] last sysfs file: /sys/devices/pci0000:00/0000:00:1e.0/0000:20:05.0/local_cpus
1199[57848.106019] Dumping ftrace buffer:
1200[57848.106019] ---------------------------------
1201[...]
1202[57848.106019] 0 chrdev_open+0xe6/0x165 <- cdev_put+0x23/0x24
1203[57848.106019] 0 chrdev_open+0x117/0x165 <- chrdev_open+0xfa/0x165
1204[57848.106019] 0 chrdev_open+0x120/0x165 <- chrdev_open+0x11c/0x165
1205[57848.106019] 0 chrdev_open+0x134/0x165 <- chrdev_open+0x12b/0x165
1206[57848.106019] 0 open+0x0/0x14 [oops] <- chrdev_open+0x144/0x165
1207[57848.106019] 0 page_fault+0x0/0x30 <- open+0x6/0x14 [oops]
1208[57848.106019] 0 error_entry+0x0/0x5b <- page_fault+0x4/0x30
1209[57848.106019] 0 error_kernelspace+0x0/0x31 <- error_entry+0x59/0x5b
1210[57848.106019] 0 error_sti+0x0/0x1 <- error_kernelspace+0x2d/0x31
1211[57848.106019] 0 page_fault+0x9/0x30 <- error_sti+0x0/0x1
1212[57848.106019] 0 do_page_fault+0x0/0x881 <- page_fault+0x1a/0x30
1213[...]
1214[57848.106019] 0 do_page_fault+0x66b/0x881 <- is_prefetch+0x1ee/0x1f2
1215[57848.106019] 0 do_page_fault+0x6e0/0x881 <- do_page_fault+0x67a/0x881
1216[57848.106019] 0 oops_begin+0x0/0x96 <- do_page_fault+0x6e0/0x881
1217[57848.106019] 0 trace_hw_branch_oops+0x0/0x2d <- oops_begin+0x9/0x96
1218[...]
1219[57848.106019] 0 ds_suspend_bts+0x2a/0xe3 <- ds_suspend_bts+0x1a/0xe3
1220[57848.106019] ---------------------------------
1221[57848.106019] CPU 0
1222[57848.106019] Modules linked in: oops
1223[57848.106019] Pid: 5542, comm: cat Tainted: G W 2.6.28 #23
1224[57848.106019] RIP: 0010:[<ffffffffa0000006>] [<ffffffffa0000006>] open+0x6/0x14 [oops]
1225[57848.106019] RSP: 0018:ffff880235457d48 EFLAGS: 00010246
1226[...]
1227
1228
1155dynamic ftrace 1229dynamic ftrace
1156-------------- 1230--------------
1157 1231
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index b182626739ea..fc22e9223427 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -49,6 +49,7 @@ parameter is applicable:
49 ISAPNP ISA PnP code is enabled. 49 ISAPNP ISA PnP code is enabled.
50 ISDN Appropriate ISDN support is enabled. 50 ISDN Appropriate ISDN support is enabled.
51 JOY Appropriate joystick support is enabled. 51 JOY Appropriate joystick support is enabled.
52 KMEMTRACE kmemtrace is enabled.
52 LIBATA Libata driver is enabled 53 LIBATA Libata driver is enabled
53 LP Printer support is enabled. 54 LP Printer support is enabled.
54 LOOP Loopback device support is enabled. 55 LOOP Loopback device support is enabled.
@@ -1045,6 +1046,15 @@ and is between 256 and 4096 characters. It is defined in the file
1045 use the HighMem zone if it exists, and the Normal 1046 use the HighMem zone if it exists, and the Normal
1046 zone if it does not. 1047 zone if it does not.
1047 1048
1049 kmemtrace.enable= [KNL,KMEMTRACE] Format: { yes | no }
1050 Controls whether kmemtrace is enabled
1051 at boot-time.
1052
1053 kmemtrace.subbufs=n [KNL,KMEMTRACE] Overrides the number of
1054 subbufs kmemtrace's relay channel has. Set this
1055 higher than default (KMEMTRACE_N_SUBBUFS in code) if
1056 you experience buffer overruns.
1057
1048 movablecore=nn[KMG] [KNL,X86-32,IA-64,PPC,X86-64] This parameter 1058 movablecore=nn[KMG] [KNL,X86-32,IA-64,PPC,X86-64] This parameter
1049 is similar to kernelcore except it specifies the 1059 is similar to kernelcore except it specifies the
1050 amount of memory used for migratable allocations. 1060 amount of memory used for migratable allocations.
diff --git a/Documentation/sysrq.txt b/Documentation/sysrq.txt
index 9e592c718afb..535aeb936dbc 100644
--- a/Documentation/sysrq.txt
+++ b/Documentation/sysrq.txt
@@ -113,6 +113,8 @@ On all - write a character to /proc/sysrq-trigger. e.g.:
113 113
114'x' - Used by xmon interface on ppc/powerpc platforms. 114'x' - Used by xmon interface on ppc/powerpc platforms.
115 115
116'z' - Dump the ftrace buffer
117
116'0'-'9' - Sets the console log level, controlling which kernel messages 118'0'-'9' - Sets the console log level, controlling which kernel messages
117 will be printed to your console. ('0', for example would make 119 will be printed to your console. ('0', for example would make
118 it so that only emergency messages like PANICs or OOPSes would 120 it so that only emergency messages like PANICs or OOPSes would
diff --git a/Documentation/vm/kmemtrace.txt b/Documentation/vm/kmemtrace.txt
new file mode 100644
index 000000000000..a956d9b7f943
--- /dev/null
+++ b/Documentation/vm/kmemtrace.txt
@@ -0,0 +1,126 @@
1 kmemtrace - Kernel Memory Tracer
2
3 by Eduard - Gabriel Munteanu
4 <eduard.munteanu@linux360.ro>
5
6I. Introduction
7===============
8
9kmemtrace helps kernel developers figure out two things:
101) how different allocators (SLAB, SLUB etc.) perform
112) how kernel code allocates memory and how much
12
13To do this, we trace every allocation and export information to the userspace
14through the relay interface. We export things such as the number of requested
15bytes, the number of bytes actually allocated (i.e. including internal
16fragmentation), whether this is a slab allocation or a plain kmalloc() and so
17on.
18
19The actual analysis is performed by a userspace tool (see section III for
20details on where to get it from). It logs the data exported by the kernel,
21processes it and (as of writing this) can provide the following information:
22- the total amount of memory allocated and fragmentation per call-site
23- the amount of memory allocated and fragmentation per allocation
24- total memory allocated and fragmentation in the collected dataset
25- number of cross-CPU allocation and frees (makes sense in NUMA environments)
26
27Moreover, it can potentially find inconsistent and erroneous behavior in
28kernel code, such as using slab free functions on kmalloc'ed memory or
29allocating less memory than requested (but not truly failed allocations).
30
31kmemtrace also makes provisions for tracing on some arch and analysing the
32data on another.
33
34II. Design and goals
35====================
36
37kmemtrace was designed to handle rather large amounts of data. Thus, it uses
38the relay interface to export whatever is logged to userspace, which then
39stores it. Analysis and reporting is done asynchronously, that is, after the
40data is collected and stored. By design, it allows one to log and analyse
41on different machines and different arches.
42
43As of writing this, the ABI is not considered stable, though it might not
44change much. However, no guarantees are made about compatibility yet. When
45deemed stable, the ABI should still allow easy extension while maintaining
46backward compatibility. This is described further in Documentation/ABI.
47
48Summary of design goals:
49 - allow logging and analysis to be done across different machines
50 - be fast and anticipate usage in high-load environments (*)
51 - be reasonably extensible
52 - make it possible for GNU/Linux distributions to have kmemtrace
53 included in their repositories
54
55(*) - one of the reasons Pekka Enberg's original userspace data analysis
56 tool's code was rewritten from Perl to C (although this is more than a
57 simple conversion)
58
59
60III. Quick usage guide
61======================
62
631) Get a kernel that supports kmemtrace and build it accordingly (i.e. enable
64CONFIG_KMEMTRACE).
65
662) Get the userspace tool and build it:
67$ git-clone git://repo.or.cz/kmemtrace-user.git # current repository
68$ cd kmemtrace-user/
69$ ./autogen.sh
70$ ./configure
71$ make
72
733) Boot the kmemtrace-enabled kernel if you haven't, preferably in the
74'single' runlevel (so that relay buffers don't fill up easily), and run
75kmemtrace:
76# '$' does not mean user, but root here.
77$ mount -t debugfs none /sys/kernel/debug
78$ mount -t proc none /proc
79$ cd path/to/kmemtrace-user/
80$ ./kmemtraced
81Wait a bit, then stop it with CTRL+C.
82$ cat /sys/kernel/debug/kmemtrace/total_overruns # Check if we didn't
83 # overrun, should
84 # be zero.
85$ (Optionally) [Run kmemtrace_check separately on each cpu[0-9]*.out file to
86 check its correctness]
87$ ./kmemtrace-report
88
89Now you should have a nice and short summary of how the allocator performs.
90
91IV. FAQ and known issues
92========================
93
94Q: 'cat /sys/kernel/debug/kmemtrace/total_overruns' is non-zero, how do I fix
95this? Should I worry?
96A: If it's non-zero, this affects kmemtrace's accuracy, depending on how
97large the number is. You can fix it by supplying a higher
98'kmemtrace.subbufs=N' kernel parameter.
99---
100
101Q: kmemtrace_check reports errors, how do I fix this? Should I worry?
102A: This is a bug and should be reported. It can occur for a variety of
103reasons:
104 - possible bugs in relay code
105 - possible misuse of relay by kmemtrace
106 - timestamps being collected unorderly
107Or you may fix it yourself and send us a patch.
108---
109
110Q: kmemtrace_report shows many errors, how do I fix this? Should I worry?
111A: This is a known issue and I'm working on it. These might be true errors
112in kernel code, which may have inconsistent behavior (e.g. allocating memory
113with kmem_cache_alloc() and freeing it with kfree()). Pekka Enberg pointed
114out this behavior may work with SLAB, but may fail with other allocators.
115
116It may also be due to lack of tracing in some unusual allocator functions.
117
118We don't want bug reports regarding this issue yet.
119---
120
121V. See also
122===========
123
124Documentation/kernel-parameters.txt
125Documentation/ABI/testing/debugfs-kmemtrace
126
diff --git a/MAINTAINERS b/MAINTAINERS
index db65b4e6d132..b7e4afc4bc44 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2616,6 +2616,12 @@ M: jason.wessel@windriver.com
2616L: kgdb-bugreport@lists.sourceforge.net 2616L: kgdb-bugreport@lists.sourceforge.net
2617S: Maintained 2617S: Maintained
2618 2618
2619KMEMTRACE
2620P: Eduard - Gabriel Munteanu
2621M: eduard.munteanu@linux360.ro
2622L: linux-kernel@vger.kernel.org
2623S: Maintained
2624
2619KPROBES 2625KPROBES
2620P: Ananth N Mavinakayanahalli 2626P: Ananth N Mavinakayanahalli
2621M: ananth@in.ibm.com 2627M: ananth@in.ibm.com
diff --git a/arch/alpha/include/asm/hardirq.h b/arch/alpha/include/asm/hardirq.h
index d953e234daa8..88971460fa6c 100644
--- a/arch/alpha/include/asm/hardirq.h
+++ b/arch/alpha/include/asm/hardirq.h
@@ -14,17 +14,4 @@ typedef struct {
14 14
15void ack_bad_irq(unsigned int irq); 15void ack_bad_irq(unsigned int irq);
16 16
17#define HARDIRQ_BITS 12
18
19/*
20 * The hardirq mask has to be large enough to have
21 * space for potentially nestable IRQ sources in the system
22 * to nest on a single CPU. On Alpha, interrupts are masked at the CPU
23 * by IPL as well as at the system level. We only have 8 IPLs (UNIX PALcode)
24 * so we really only have 8 nestable IRQs, but allow some overhead
25 */
26#if (1 << HARDIRQ_BITS) < 16
27#error HARDIRQ_BITS is too low!
28#endif
29
30#endif /* _ALPHA_HARDIRQ_H */ 17#endif /* _ALPHA_HARDIRQ_H */
diff --git a/arch/avr32/include/asm/hardirq.h b/arch/avr32/include/asm/hardirq.h
index 267354356f60..015bc75ea798 100644
--- a/arch/avr32/include/asm/hardirq.h
+++ b/arch/avr32/include/asm/hardirq.h
@@ -20,15 +20,4 @@ void ack_bad_irq(unsigned int irq);
20 20
21#endif /* __ASSEMBLY__ */ 21#endif /* __ASSEMBLY__ */
22 22
23#define HARDIRQ_BITS 12
24
25/*
26 * The hardirq mask has to be large enough to have
27 * space for potentially all IRQ sources in the system
28 * nesting on a single CPU:
29 */
30#if (1 << HARDIRQ_BITS) < NR_IRQS
31# error HARDIRQ_BITS is too low!
32#endif
33
34#endif /* __ASM_AVR32_HARDIRQ_H */ 23#endif /* __ASM_AVR32_HARDIRQ_H */
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 6183aeccecf1..8b6a8a554afa 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -22,6 +22,9 @@ config IA64
22 select HAVE_OPROFILE 22 select HAVE_OPROFILE
23 select HAVE_KPROBES 23 select HAVE_KPROBES
24 select HAVE_KRETPROBES 24 select HAVE_KRETPROBES
25 select HAVE_FTRACE_MCOUNT_RECORD
26 select HAVE_DYNAMIC_FTRACE if (!ITANIUM)
27 select HAVE_FUNCTION_TRACER
25 select HAVE_DMA_ATTRS 28 select HAVE_DMA_ATTRS
26 select HAVE_KVM 29 select HAVE_KVM
27 select HAVE_ARCH_TRACEHOOK 30 select HAVE_ARCH_TRACEHOOK
diff --git a/arch/ia64/include/asm/ftrace.h b/arch/ia64/include/asm/ftrace.h
new file mode 100644
index 000000000000..d20db3c2a656
--- /dev/null
+++ b/arch/ia64/include/asm/ftrace.h
@@ -0,0 +1,28 @@
1#ifndef _ASM_IA64_FTRACE_H
2#define _ASM_IA64_FTRACE_H
3
4#ifdef CONFIG_FUNCTION_TRACER
5#define MCOUNT_INSN_SIZE 32 /* sizeof mcount call */
6
7#ifndef __ASSEMBLY__
8extern void _mcount(unsigned long pfs, unsigned long r1, unsigned long b0, unsigned long r0);
9#define mcount _mcount
10
11#include <asm/kprobes.h>
12/* In IA64, MCOUNT_ADDR is set in link time, so it's not a constant at compile time */
13#define MCOUNT_ADDR (((struct fnptr *)mcount)->ip)
14#define FTRACE_ADDR (((struct fnptr *)ftrace_caller)->ip)
15
16static inline unsigned long ftrace_call_adjust(unsigned long addr)
17{
18 /* second bundle, insn 2 */
19 return addr - 0x12;
20}
21
22struct dyn_arch_ftrace {
23};
24#endif
25
26#endif /* CONFIG_FUNCTION_TRACER */
27
28#endif /* _ASM_IA64_FTRACE_H */
diff --git a/arch/ia64/include/asm/hardirq.h b/arch/ia64/include/asm/hardirq.h
index 140e495b8e0e..d514cd9edb49 100644
--- a/arch/ia64/include/asm/hardirq.h
+++ b/arch/ia64/include/asm/hardirq.h
@@ -20,16 +20,6 @@
20 20
21#define local_softirq_pending() (local_cpu_data->softirq_pending) 21#define local_softirq_pending() (local_cpu_data->softirq_pending)
22 22
23#define HARDIRQ_BITS 14
24
25/*
26 * The hardirq mask has to be large enough to have space for potentially all IRQ sources
27 * in the system nesting on a single CPU:
28 */
29#if (1 << HARDIRQ_BITS) < NR_IRQS
30# error HARDIRQ_BITS is too low!
31#endif
32
33extern void __iomem *ipi_base_addr; 23extern void __iomem *ipi_base_addr;
34 24
35void ack_bad_irq(unsigned int irq); 25void ack_bad_irq(unsigned int irq);
diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile
index c381ea954892..ab6e7ec0bba3 100644
--- a/arch/ia64/kernel/Makefile
+++ b/arch/ia64/kernel/Makefile
@@ -2,6 +2,10 @@
2# Makefile for the linux kernel. 2# Makefile for the linux kernel.
3# 3#
4 4
5ifdef CONFIG_DYNAMIC_FTRACE
6CFLAGS_REMOVE_ftrace.o = -pg
7endif
8
5extra-y := head.o init_task.o vmlinux.lds 9extra-y := head.o init_task.o vmlinux.lds
6 10
7obj-y := acpi.o entry.o efi.o efi_stub.o gate-data.o fsys.o ia64_ksyms.o irq.o irq_ia64.o \ 11obj-y := acpi.o entry.o efi.o efi_stub.o gate-data.o fsys.o ia64_ksyms.o irq.o irq_ia64.o \
@@ -28,6 +32,7 @@ obj-$(CONFIG_IA64_CYCLONE) += cyclone.o
28obj-$(CONFIG_CPU_FREQ) += cpufreq/ 32obj-$(CONFIG_CPU_FREQ) += cpufreq/
29obj-$(CONFIG_IA64_MCA_RECOVERY) += mca_recovery.o 33obj-$(CONFIG_IA64_MCA_RECOVERY) += mca_recovery.o
30obj-$(CONFIG_KPROBES) += kprobes.o jprobes.o 34obj-$(CONFIG_KPROBES) += kprobes.o jprobes.o
35obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
31obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o 36obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o
32obj-$(CONFIG_CRASH_DUMP) += crash_dump.o 37obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
33obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR) += uncached.o 38obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR) += uncached.o
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index e5341e2c1175..7e3382b06d56 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -47,6 +47,7 @@
47#include <asm/processor.h> 47#include <asm/processor.h>
48#include <asm/thread_info.h> 48#include <asm/thread_info.h>
49#include <asm/unistd.h> 49#include <asm/unistd.h>
50#include <asm/ftrace.h>
50 51
51#include "minstate.h" 52#include "minstate.h"
52 53
@@ -1404,6 +1405,105 @@ GLOBAL_ENTRY(unw_init_running)
1404 br.ret.sptk.many rp 1405 br.ret.sptk.many rp
1405END(unw_init_running) 1406END(unw_init_running)
1406 1407
1408#ifdef CONFIG_FUNCTION_TRACER
1409#ifdef CONFIG_DYNAMIC_FTRACE
1410GLOBAL_ENTRY(_mcount)
1411 br ftrace_stub
1412END(_mcount)
1413
1414.here:
1415 br.ret.sptk.many b0
1416
1417GLOBAL_ENTRY(ftrace_caller)
1418 alloc out0 = ar.pfs, 8, 0, 4, 0
1419 mov out3 = r0
1420 ;;
1421 mov out2 = b0
1422 add r3 = 0x20, r3
1423 mov out1 = r1;
1424 br.call.sptk.many b0 = ftrace_patch_gp
1425 //this might be called from module, so we must patch gp
1426ftrace_patch_gp:
1427 movl gp=__gp
1428 mov b0 = r3
1429 ;;
1430.global ftrace_call;
1431ftrace_call:
1432{
1433 .mlx
1434 nop.m 0x0
1435 movl r3 = .here;;
1436}
1437 alloc loc0 = ar.pfs, 4, 4, 2, 0
1438 ;;
1439 mov loc1 = b0
1440 mov out0 = b0
1441 mov loc2 = r8
1442 mov loc3 = r15
1443 ;;
1444 adds out0 = -MCOUNT_INSN_SIZE, out0
1445 mov out1 = in2
1446 mov b6 = r3
1447
1448 br.call.sptk.many b0 = b6
1449 ;;
1450 mov ar.pfs = loc0
1451 mov b0 = loc1
1452 mov r8 = loc2
1453 mov r15 = loc3
1454 br ftrace_stub
1455 ;;
1456END(ftrace_caller)
1457
1458#else
1459GLOBAL_ENTRY(_mcount)
1460 movl r2 = ftrace_stub
1461 movl r3 = ftrace_trace_function;;
1462 ld8 r3 = [r3];;
1463 ld8 r3 = [r3];;
1464 cmp.eq p7,p0 = r2, r3
1465(p7) br.sptk.many ftrace_stub
1466 ;;
1467
1468 alloc loc0 = ar.pfs, 4, 4, 2, 0
1469 ;;
1470 mov loc1 = b0
1471 mov out0 = b0
1472 mov loc2 = r8
1473 mov loc3 = r15
1474 ;;
1475 adds out0 = -MCOUNT_INSN_SIZE, out0
1476 mov out1 = in2
1477 mov b6 = r3
1478
1479 br.call.sptk.many b0 = b6
1480 ;;
1481 mov ar.pfs = loc0
1482 mov b0 = loc1
1483 mov r8 = loc2
1484 mov r15 = loc3
1485 br ftrace_stub
1486 ;;
1487END(_mcount)
1488#endif
1489
1490GLOBAL_ENTRY(ftrace_stub)
1491 mov r3 = b0
1492 movl r2 = _mcount_ret_helper
1493 ;;
1494 mov b6 = r2
1495 mov b7 = r3
1496 br.ret.sptk.many b6
1497
1498_mcount_ret_helper:
1499 mov b0 = r42
1500 mov r1 = r41
1501 mov ar.pfs = r40
1502 br b7
1503END(ftrace_stub)
1504
1505#endif /* CONFIG_FUNCTION_TRACER */
1506
1407 .rodata 1507 .rodata
1408 .align 8 1508 .align 8
1409 .globl sys_call_table 1509 .globl sys_call_table
diff --git a/arch/ia64/kernel/ftrace.c b/arch/ia64/kernel/ftrace.c
new file mode 100644
index 000000000000..7fc8c961b1f7
--- /dev/null
+++ b/arch/ia64/kernel/ftrace.c
@@ -0,0 +1,206 @@
1/*
2 * Dynamic function tracing support.
3 *
4 * Copyright (C) 2008 Shaohua Li <shaohua.li@intel.com>
5 *
6 * For licencing details, see COPYING.
7 *
8 * Defines low-level handling of mcount calls when the kernel
9 * is compiled with the -pg flag. When using dynamic ftrace, the
10 * mcount call-sites get patched lazily with NOP till they are
11 * enabled. All code mutation routines here take effect atomically.
12 */
13
14#include <linux/uaccess.h>
15#include <linux/ftrace.h>
16
17#include <asm/cacheflush.h>
18#include <asm/patch.h>
19
20/* In IA64, each function will be added below two bundles with -pg option */
21static unsigned char __attribute__((aligned(8)))
22ftrace_orig_code[MCOUNT_INSN_SIZE] = {
23 0x02, 0x40, 0x31, 0x10, 0x80, 0x05, /* alloc r40=ar.pfs,12,8,0 */
24 0xb0, 0x02, 0x00, 0x00, 0x42, 0x40, /* mov r43=r0;; */
25 0x05, 0x00, 0xc4, 0x00, /* mov r42=b0 */
26 0x11, 0x48, 0x01, 0x02, 0x00, 0x21, /* mov r41=r1 */
27 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, /* nop.i 0x0 */
28 0x08, 0x00, 0x00, 0x50 /* br.call.sptk.many b0 = _mcount;; */
29};
30
31struct ftrace_orig_insn {
32 u64 dummy1, dummy2, dummy3;
33 u64 dummy4:64-41+13;
34 u64 imm20:20;
35 u64 dummy5:3;
36 u64 sign:1;
37 u64 dummy6:4;
38};
39
40/* mcount stub will be converted below for nop */
41static unsigned char ftrace_nop_code[MCOUNT_INSN_SIZE] = {
42 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MII] nop.m 0x0 */
43 0x30, 0x00, 0x00, 0x60, 0x00, 0x00, /* mov r3=ip */
44 0x00, 0x00, 0x04, 0x00, /* nop.i 0x0 */
45 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0x0 */
46 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* nop.x 0x0;; */
47 0x00, 0x00, 0x04, 0x00
48};
49
50static unsigned char *ftrace_nop_replace(void)
51{
52 return ftrace_nop_code;
53}
54
55/*
56 * mcount stub will be converted below for call
57 * Note: Just the last instruction is changed against nop
58 * */
59static unsigned char __attribute__((aligned(8)))
60ftrace_call_code[MCOUNT_INSN_SIZE] = {
61 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MII] nop.m 0x0 */
62 0x30, 0x00, 0x00, 0x60, 0x00, 0x00, /* mov r3=ip */
63 0x00, 0x00, 0x04, 0x00, /* nop.i 0x0 */
64 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0x0 */
65 0xff, 0xff, 0xff, 0xff, 0x7f, 0x00, /* brl.many .;;*/
66 0xf8, 0xff, 0xff, 0xc8
67};
68
69struct ftrace_call_insn {
70 u64 dummy1, dummy2;
71 u64 dummy3:48;
72 u64 imm39_l:16;
73 u64 imm39_h:23;
74 u64 dummy4:13;
75 u64 imm20:20;
76 u64 dummy5:3;
77 u64 i:1;
78 u64 dummy6:4;
79};
80
81static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
82{
83 struct ftrace_call_insn *code = (void *)ftrace_call_code;
84 unsigned long offset = addr - (ip + 0x10);
85
86 code->imm39_l = offset >> 24;
87 code->imm39_h = offset >> 40;
88 code->imm20 = offset >> 4;
89 code->i = offset >> 63;
90 return ftrace_call_code;
91}
92
93static int
94ftrace_modify_code(unsigned long ip, unsigned char *old_code,
95 unsigned char *new_code, int do_check)
96{
97 unsigned char replaced[MCOUNT_INSN_SIZE];
98
99 /*
100 * Note: Due to modules and __init, code can
101 * disappear and change, we need to protect against faulting
102 * as well as code changing. We do this by using the
103 * probe_kernel_* functions.
104 *
105 * No real locking needed, this code is run through
106 * kstop_machine, or before SMP starts.
107 */
108
109 if (!do_check)
110 goto skip_check;
111
112 /* read the text we want to modify */
113 if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
114 return -EFAULT;
115
116 /* Make sure it is what we expect it to be */
117 if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0)
118 return -EINVAL;
119
120skip_check:
121 /* replace the text with the new text */
122 if (probe_kernel_write(((void *)ip), new_code, MCOUNT_INSN_SIZE))
123 return -EPERM;
124 flush_icache_range(ip, ip + MCOUNT_INSN_SIZE);
125
126 return 0;
127}
128
129static int ftrace_make_nop_check(struct dyn_ftrace *rec, unsigned long addr)
130{
131 unsigned char __attribute__((aligned(8))) replaced[MCOUNT_INSN_SIZE];
132 unsigned long ip = rec->ip;
133
134 if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
135 return -EFAULT;
136 if (rec->flags & FTRACE_FL_CONVERTED) {
137 struct ftrace_call_insn *call_insn, *tmp_call;
138
139 call_insn = (void *)ftrace_call_code;
140 tmp_call = (void *)replaced;
141 call_insn->imm39_l = tmp_call->imm39_l;
142 call_insn->imm39_h = tmp_call->imm39_h;
143 call_insn->imm20 = tmp_call->imm20;
144 call_insn->i = tmp_call->i;
145 if (memcmp(replaced, ftrace_call_code, MCOUNT_INSN_SIZE) != 0)
146 return -EINVAL;
147 return 0;
148 } else {
149 struct ftrace_orig_insn *call_insn, *tmp_call;
150
151 call_insn = (void *)ftrace_orig_code;
152 tmp_call = (void *)replaced;
153 call_insn->sign = tmp_call->sign;
154 call_insn->imm20 = tmp_call->imm20;
155 if (memcmp(replaced, ftrace_orig_code, MCOUNT_INSN_SIZE) != 0)
156 return -EINVAL;
157 return 0;
158 }
159}
160
161int ftrace_make_nop(struct module *mod,
162 struct dyn_ftrace *rec, unsigned long addr)
163{
164 int ret;
165 char *new;
166
167 ret = ftrace_make_nop_check(rec, addr);
168 if (ret)
169 return ret;
170 new = ftrace_nop_replace();
171 return ftrace_modify_code(rec->ip, NULL, new, 0);
172}
173
174int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
175{
176 unsigned long ip = rec->ip;
177 unsigned char *old, *new;
178
179 old= ftrace_nop_replace();
180 new = ftrace_call_replace(ip, addr);
181 return ftrace_modify_code(ip, old, new, 1);
182}
183
184/* in IA64, _mcount can't directly call ftrace_stub. Only jump is ok */
185int ftrace_update_ftrace_func(ftrace_func_t func)
186{
187 unsigned long ip;
188 unsigned long addr = ((struct fnptr *)ftrace_call)->ip;
189
190 if (func == ftrace_stub)
191 return 0;
192 ip = ((struct fnptr *)func)->ip;
193
194 ia64_patch_imm64(addr + 2, ip);
195
196 flush_icache_range(addr, addr + 16);
197 return 0;
198}
199
200/* run from kstop_machine */
201int __init ftrace_dyn_arch_init(void *data)
202{
203 *(unsigned long *)data = 0;
204
205 return 0;
206}
diff --git a/arch/ia64/kernel/ia64_ksyms.c b/arch/ia64/kernel/ia64_ksyms.c
index 6da1f20d7372..2d311864e359 100644
--- a/arch/ia64/kernel/ia64_ksyms.c
+++ b/arch/ia64/kernel/ia64_ksyms.c
@@ -112,3 +112,9 @@ EXPORT_SYMBOL_GPL(esi_call_phys);
112#endif 112#endif
113extern char ia64_ivt[]; 113extern char ia64_ivt[];
114EXPORT_SYMBOL(ia64_ivt); 114EXPORT_SYMBOL(ia64_ivt);
115
116#include <asm/ftrace.h>
117#ifdef CONFIG_FUNCTION_TRACER
118/* mcount is defined in assembly */
119EXPORT_SYMBOL(_mcount);
120#endif
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 9c39095b33fc..8fc9a847cf48 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -34,6 +34,7 @@ config X86
34 select HAVE_FUNCTION_TRACER 34 select HAVE_FUNCTION_TRACER
35 select HAVE_FUNCTION_GRAPH_TRACER 35 select HAVE_FUNCTION_GRAPH_TRACER
36 select HAVE_FUNCTION_TRACE_MCOUNT_TEST 36 select HAVE_FUNCTION_TRACE_MCOUNT_TEST
37 select HAVE_FTRACE_NMI_ENTER if DYNAMIC_FTRACE
37 select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) 38 select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64)
38 select HAVE_ARCH_KGDB if !X86_VOYAGER 39 select HAVE_ARCH_KGDB if !X86_VOYAGER
39 select HAVE_ARCH_TRACEHOOK 40 select HAVE_ARCH_TRACEHOOK
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 4b1c319d30c3..c5d737cdb365 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -33,7 +33,7 @@
33#include <linux/cpufreq.h> 33#include <linux/cpufreq.h>
34#include <linux/compiler.h> 34#include <linux/compiler.h>
35#include <linux/dmi.h> 35#include <linux/dmi.h>
36#include <linux/ftrace.h> 36#include <trace/power.h>
37 37
38#include <linux/acpi.h> 38#include <linux/acpi.h>
39#include <acpi/processor.h> 39#include <acpi/processor.h>
@@ -70,6 +70,8 @@ struct acpi_cpufreq_data {
70 70
71static DEFINE_PER_CPU(struct acpi_cpufreq_data *, drv_data); 71static DEFINE_PER_CPU(struct acpi_cpufreq_data *, drv_data);
72 72
73DEFINE_TRACE(power_mark);
74
73/* acpi_perf_data is a pointer to percpu data. */ 75/* acpi_perf_data is a pointer to percpu data. */
74static struct acpi_processor_performance *acpi_perf_data; 76static struct acpi_processor_performance *acpi_perf_data;
75 77
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 6b1f6f6f8661..077c9ea655fc 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -14,6 +14,7 @@
14#include <linux/bug.h> 14#include <linux/bug.h>
15#include <linux/nmi.h> 15#include <linux/nmi.h>
16#include <linux/sysfs.h> 16#include <linux/sysfs.h>
17#include <linux/ftrace.h>
17 18
18#include <asm/stacktrace.h> 19#include <asm/stacktrace.h>
19 20
@@ -195,6 +196,11 @@ unsigned __kprobes long oops_begin(void)
195 int cpu; 196 int cpu;
196 unsigned long flags; 197 unsigned long flags;
197 198
199 /* notify the hw-branch tracer so it may disable tracing and
200 add the last trace to the trace buffer -
201 the earlier this happens, the more useful the trace. */
202 trace_hw_branch_oops();
203
198 oops_enter(); 204 oops_enter();
199 205
200 /* racy, but better than risking deadlock. */ 206 /* racy, but better than risking deadlock. */
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 231bdd3c5b1c..2f9c0c8cb4c7 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -82,7 +82,7 @@ static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
82 * are the same as what exists. 82 * are the same as what exists.
83 */ 83 */
84 84
85static atomic_t in_nmi = ATOMIC_INIT(0); 85static atomic_t nmi_running = ATOMIC_INIT(0);
86static int mod_code_status; /* holds return value of text write */ 86static int mod_code_status; /* holds return value of text write */
87static int mod_code_write; /* set when NMI should do the write */ 87static int mod_code_write; /* set when NMI should do the write */
88static void *mod_code_ip; /* holds the IP to write to */ 88static void *mod_code_ip; /* holds the IP to write to */
@@ -115,8 +115,8 @@ static void ftrace_mod_code(void)
115 115
116void ftrace_nmi_enter(void) 116void ftrace_nmi_enter(void)
117{ 117{
118 atomic_inc(&in_nmi); 118 atomic_inc(&nmi_running);
119 /* Must have in_nmi seen before reading write flag */ 119 /* Must have nmi_running seen before reading write flag */
120 smp_mb(); 120 smp_mb();
121 if (mod_code_write) { 121 if (mod_code_write) {
122 ftrace_mod_code(); 122 ftrace_mod_code();
@@ -126,22 +126,21 @@ void ftrace_nmi_enter(void)
126 126
127void ftrace_nmi_exit(void) 127void ftrace_nmi_exit(void)
128{ 128{
129 /* Finish all executions before clearing in_nmi */ 129 /* Finish all executions before clearing nmi_running */
130 smp_wmb(); 130 smp_wmb();
131 atomic_dec(&in_nmi); 131 atomic_dec(&nmi_running);
132} 132}
133 133
134static void wait_for_nmi(void) 134static void wait_for_nmi(void)
135{ 135{
136 int waited = 0; 136 if (!atomic_read(&nmi_running))
137 return;
137 138
138 while (atomic_read(&in_nmi)) { 139 do {
139 waited = 1;
140 cpu_relax(); 140 cpu_relax();
141 } 141 } while (atomic_read(&nmi_running));
142 142
143 if (waited) 143 nmi_wait_count++;
144 nmi_wait_count++;
145} 144}
146 145
147static int 146static int
@@ -368,25 +367,6 @@ int ftrace_disable_ftrace_graph_caller(void)
368 return ftrace_mod_jmp(ip, old_offset, new_offset); 367 return ftrace_mod_jmp(ip, old_offset, new_offset);
369} 368}
370 369
371#else /* CONFIG_DYNAMIC_FTRACE */
372
373/*
374 * These functions are picked from those used on
375 * this page for dynamic ftrace. They have been
376 * simplified to ignore all traces in NMI context.
377 */
378static atomic_t in_nmi;
379
380void ftrace_nmi_enter(void)
381{
382 atomic_inc(&in_nmi);
383}
384
385void ftrace_nmi_exit(void)
386{
387 atomic_dec(&in_nmi);
388}
389
390#endif /* !CONFIG_DYNAMIC_FTRACE */ 370#endif /* !CONFIG_DYNAMIC_FTRACE */
391 371
392/* Add a function return address to the trace stack on thread info.*/ 372/* Add a function return address to the trace stack on thread info.*/
@@ -476,7 +456,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
476 &return_to_handler; 456 &return_to_handler;
477 457
478 /* Nmi's are currently unsupported */ 458 /* Nmi's are currently unsupported */
479 if (unlikely(atomic_read(&in_nmi))) 459 if (unlikely(in_nmi()))
480 return; 460 return;
481 461
482 if (unlikely(atomic_read(&current->tracing_graph_pause))) 462 if (unlikely(atomic_read(&current->tracing_graph_pause)))
@@ -512,13 +492,6 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
512 return; 492 return;
513 } 493 }
514 494
515 if (unlikely(!__kernel_text_address(old))) {
516 ftrace_graph_stop();
517 *parent = old;
518 WARN_ON(1);
519 return;
520 }
521
522 calltime = cpu_clock(raw_smp_processor_id()); 495 calltime = cpu_clock(raw_smp_processor_id());
523 496
524 if (push_return_trace(old, calltime, 497 if (push_return_trace(old, calltime,
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 6d12f7e37f8c..23b328edc2b3 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -8,7 +8,7 @@
8#include <linux/module.h> 8#include <linux/module.h>
9#include <linux/pm.h> 9#include <linux/pm.h>
10#include <linux/clockchips.h> 10#include <linux/clockchips.h>
11#include <linux/ftrace.h> 11#include <trace/power.h>
12#include <asm/system.h> 12#include <asm/system.h>
13#include <asm/apic.h> 13#include <asm/apic.h>
14 14
@@ -19,6 +19,9 @@ EXPORT_SYMBOL(idle_nomwait);
19 19
20struct kmem_cache *task_xstate_cachep; 20struct kmem_cache *task_xstate_cachep;
21 21
22DEFINE_TRACE(power_start);
23DEFINE_TRACE(power_end);
24
22int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) 25int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
23{ 26{
24 *dst = *src; 27 *dst = *src;
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index b81125f0bdee..c7da3683f4c5 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -55,7 +55,8 @@ config KVM_AMD
55 55
56config KVM_TRACE 56config KVM_TRACE
57 bool "KVM trace support" 57 bool "KVM trace support"
58 depends on KVM && MARKERS && SYSFS 58 depends on KVM && SYSFS
59 select MARKERS
59 select RELAY 60 select RELAY
60 select DEBUG_FS 61 select DEBUG_FS
61 default n 62 default n
diff --git a/block/Kconfig b/block/Kconfig
index 0cbb3b88b59a..e7d12782bcfb 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -44,22 +44,6 @@ config LBD
44 44
45 If unsure, say N. 45 If unsure, say N.
46 46
47config BLK_DEV_IO_TRACE
48 bool "Support for tracing block io actions"
49 depends on SYSFS
50 select RELAY
51 select DEBUG_FS
52 select TRACEPOINTS
53 help
54 Say Y here if you want to be able to trace the block layer actions
55 on a given queue. Tracing allows you to see any traffic happening
56 on a block device queue. For more information (and the userspace
57 support tools needed), fetch the blktrace tools from:
58
59 git://git.kernel.dk/blktrace.git
60
61 If unsure, say N.
62
63config BLK_DEV_BSG 47config BLK_DEV_BSG
64 bool "Block layer SG support v4 (EXPERIMENTAL)" 48 bool "Block layer SG support v4 (EXPERIMENTAL)"
65 depends on EXPERIMENTAL 49 depends on EXPERIMENTAL
diff --git a/block/Makefile b/block/Makefile
index bfe73049f939..e9fa4dd690f2 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -13,6 +13,5 @@ obj-$(CONFIG_IOSCHED_AS) += as-iosched.o
13obj-$(CONFIG_IOSCHED_DEADLINE) += deadline-iosched.o 13obj-$(CONFIG_IOSCHED_DEADLINE) += deadline-iosched.o
14obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o 14obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o
15 15
16obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o
17obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o 16obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o
18obj-$(CONFIG_BLK_DEV_INTEGRITY) += blk-integrity.o 17obj-$(CONFIG_BLK_DEV_INTEGRITY) += blk-integrity.o
diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c
index 33a9351c896d..30659ce9bcf4 100644
--- a/drivers/char/sysrq.c
+++ b/drivers/char/sysrq.c
@@ -283,7 +283,7 @@ static void sysrq_ftrace_dump(int key, struct tty_struct *tty)
283} 283}
284static struct sysrq_key_op sysrq_ftrace_dump_op = { 284static struct sysrq_key_op sysrq_ftrace_dump_op = {
285 .handler = sysrq_ftrace_dump, 285 .handler = sysrq_ftrace_dump,
286 .help_msg = "dumpZ-ftrace-buffer", 286 .help_msg = "dump-ftrace-buffer(Z)",
287 .action_msg = "Dump ftrace buffer", 287 .action_msg = "Dump ftrace buffer",
288 .enable_mask = SYSRQ_ENABLE_DUMP, 288 .enable_mask = SYSRQ_ENABLE_DUMP,
289}; 289};
diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c
index e76d715e4342..f0e99d4c066b 100644
--- a/drivers/oprofile/cpu_buffer.c
+++ b/drivers/oprofile/cpu_buffer.c
@@ -161,7 +161,7 @@ struct op_sample
161{ 161{
162 entry->event = ring_buffer_lock_reserve 162 entry->event = ring_buffer_lock_reserve
163 (op_ring_buffer_write, sizeof(struct op_sample) + 163 (op_ring_buffer_write, sizeof(struct op_sample) +
164 size * sizeof(entry->sample->data[0]), &entry->irq_flags); 164 size * sizeof(entry->sample->data[0]));
165 if (entry->event) 165 if (entry->event)
166 entry->sample = ring_buffer_event_data(entry->event); 166 entry->sample = ring_buffer_event_data(entry->event);
167 else 167 else
@@ -178,8 +178,7 @@ struct op_sample
178 178
179int op_cpu_buffer_write_commit(struct op_entry *entry) 179int op_cpu_buffer_write_commit(struct op_entry *entry)
180{ 180{
181 return ring_buffer_unlock_commit(op_ring_buffer_write, entry->event, 181 return ring_buffer_unlock_commit(op_ring_buffer_write, entry->event);
182 entry->irq_flags);
183} 182}
184 183
185struct op_sample *op_cpu_buffer_read_entry(struct op_entry *entry, int cpu) 184struct op_sample *op_cpu_buffer_read_entry(struct op_entry *entry, int cpu)
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 6d720243f5f4..8a17f7edcc74 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -19,6 +19,7 @@
19#include <linux/kmod.h> 19#include <linux/kmod.h>
20#include <linux/ctype.h> 20#include <linux/ctype.h>
21#include <linux/genhd.h> 21#include <linux/genhd.h>
22#include <linux/blktrace_api.h>
22 23
23#include "check.h" 24#include "check.h"
24 25
@@ -294,6 +295,9 @@ static struct attribute_group part_attr_group = {
294 295
295static struct attribute_group *part_attr_groups[] = { 296static struct attribute_group *part_attr_groups[] = {
296 &part_attr_group, 297 &part_attr_group,
298#ifdef CONFIG_BLK_DEV_IO_TRACE
299 &blk_trace_attr_group,
300#endif
297 NULL 301 NULL
298}; 302};
299 303
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index 25379cba2370..ed12e8fd8cf7 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -143,6 +143,9 @@ struct blk_user_trace_setup {
143 143
144#ifdef __KERNEL__ 144#ifdef __KERNEL__
145#if defined(CONFIG_BLK_DEV_IO_TRACE) 145#if defined(CONFIG_BLK_DEV_IO_TRACE)
146
147#include <linux/sysfs.h>
148
146struct blk_trace { 149struct blk_trace {
147 int trace_state; 150 int trace_state;
148 struct rchan *rchan; 151 struct rchan *rchan;
@@ -193,6 +196,8 @@ extern int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
193extern int blk_trace_startstop(struct request_queue *q, int start); 196extern int blk_trace_startstop(struct request_queue *q, int start);
194extern int blk_trace_remove(struct request_queue *q); 197extern int blk_trace_remove(struct request_queue *q);
195 198
199extern struct attribute_group blk_trace_attr_group;
200
196#else /* !CONFIG_BLK_DEV_IO_TRACE */ 201#else /* !CONFIG_BLK_DEV_IO_TRACE */
197#define blk_trace_ioctl(bdev, cmd, arg) (-ENOTTY) 202#define blk_trace_ioctl(bdev, cmd, arg) (-ENOTTY)
198#define blk_trace_shutdown(q) do { } while (0) 203#define blk_trace_shutdown(q) do { } while (0)
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 677432b9cb7e..63281228ce3e 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -95,10 +95,41 @@ stack_trace_sysctl(struct ctl_table *table, int write,
95 loff_t *ppos); 95 loff_t *ppos);
96#endif 96#endif
97 97
98struct ftrace_func_command {
99 struct list_head list;
100 char *name;
101 int (*func)(char *func, char *cmd,
102 char *params, int enable);
103};
104
98#ifdef CONFIG_DYNAMIC_FTRACE 105#ifdef CONFIG_DYNAMIC_FTRACE
99/* asm/ftrace.h must be defined for archs supporting dynamic ftrace */ 106/* asm/ftrace.h must be defined for archs supporting dynamic ftrace */
100#include <asm/ftrace.h> 107#include <asm/ftrace.h>
101 108
109struct seq_file;
110
111struct ftrace_hook_ops {
112 void (*func)(unsigned long ip,
113 unsigned long parent_ip,
114 void **data);
115 int (*callback)(unsigned long ip, void **data);
116 void (*free)(void **data);
117 int (*print)(struct seq_file *m,
118 unsigned long ip,
119 struct ftrace_hook_ops *ops,
120 void *data);
121};
122
123extern int
124register_ftrace_function_hook(char *glob, struct ftrace_hook_ops *ops,
125 void *data);
126extern void
127unregister_ftrace_function_hook(char *glob, struct ftrace_hook_ops *ops,
128 void *data);
129extern void
130unregister_ftrace_function_hook_func(char *glob, struct ftrace_hook_ops *ops);
131extern void unregister_ftrace_function_hook_all(char *glob);
132
102enum { 133enum {
103 FTRACE_FL_FREE = (1 << 0), 134 FTRACE_FL_FREE = (1 << 0),
104 FTRACE_FL_FAILED = (1 << 1), 135 FTRACE_FL_FAILED = (1 << 1),
@@ -119,6 +150,9 @@ struct dyn_ftrace {
119int ftrace_force_update(void); 150int ftrace_force_update(void);
120void ftrace_set_filter(unsigned char *buf, int len, int reset); 151void ftrace_set_filter(unsigned char *buf, int len, int reset);
121 152
153int register_ftrace_command(struct ftrace_func_command *cmd);
154int unregister_ftrace_command(struct ftrace_func_command *cmd);
155
122/* defined in arch */ 156/* defined in arch */
123extern int ftrace_ip_converted(unsigned long ip); 157extern int ftrace_ip_converted(unsigned long ip);
124extern int ftrace_dyn_arch_init(void *data); 158extern int ftrace_dyn_arch_init(void *data);
@@ -126,6 +160,10 @@ extern int ftrace_update_ftrace_func(ftrace_func_t func);
126extern void ftrace_caller(void); 160extern void ftrace_caller(void);
127extern void ftrace_call(void); 161extern void ftrace_call(void);
128extern void mcount_call(void); 162extern void mcount_call(void);
163
164#ifndef FTRACE_ADDR
165#define FTRACE_ADDR ((unsigned long)ftrace_caller)
166#endif
129#ifdef CONFIG_FUNCTION_GRAPH_TRACER 167#ifdef CONFIG_FUNCTION_GRAPH_TRACER
130extern void ftrace_graph_caller(void); 168extern void ftrace_graph_caller(void);
131extern int ftrace_enable_ftrace_graph_caller(void); 169extern int ftrace_enable_ftrace_graph_caller(void);
@@ -136,7 +174,7 @@ static inline int ftrace_disable_ftrace_graph_caller(void) { return 0; }
136#endif 174#endif
137 175
138/** 176/**
139 * ftrace_make_nop - convert code into top 177 * ftrace_make_nop - convert code into nop
140 * @mod: module structure if called by module load initialization 178 * @mod: module structure if called by module load initialization
141 * @rec: the mcount call site record 179 * @rec: the mcount call site record
142 * @addr: the address that the call site should be calling 180 * @addr: the address that the call site should be calling
@@ -198,6 +236,14 @@ extern void ftrace_enable_daemon(void);
198# define ftrace_disable_daemon() do { } while (0) 236# define ftrace_disable_daemon() do { } while (0)
199# define ftrace_enable_daemon() do { } while (0) 237# define ftrace_enable_daemon() do { } while (0)
200static inline void ftrace_release(void *start, unsigned long size) { } 238static inline void ftrace_release(void *start, unsigned long size) { }
239static inline int register_ftrace_command(struct ftrace_func_command *cmd)
240{
241 return -EINVAL;
242}
243static inline int unregister_ftrace_command(char *cmd_name)
244{
245 return -EINVAL;
246}
201#endif /* CONFIG_DYNAMIC_FTRACE */ 247#endif /* CONFIG_DYNAMIC_FTRACE */
202 248
203/* totally disable ftrace - can not re-enable after this */ 249/* totally disable ftrace - can not re-enable after this */
@@ -298,6 +344,9 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3);
298extern int 344extern int
299__ftrace_printk(unsigned long ip, const char *fmt, ...) 345__ftrace_printk(unsigned long ip, const char *fmt, ...)
300 __attribute__ ((format (printf, 2, 3))); 346 __attribute__ ((format (printf, 2, 3)));
347# define ftrace_vprintk(fmt, ap) __ftrace_printk(_THIS_IP_, fmt, ap)
348extern int
349__ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap);
301extern void ftrace_dump(void); 350extern void ftrace_dump(void);
302#else 351#else
303static inline void 352static inline void
@@ -313,6 +362,11 @@ ftrace_printk(const char *fmt, ...)
313{ 362{
314 return 0; 363 return 0;
315} 364}
365static inline int
366ftrace_vprintk(const char *fmt, va_list ap)
367{
368 return 0;
369}
316static inline void ftrace_dump(void) { } 370static inline void ftrace_dump(void) { }
317#endif 371#endif
318 372
@@ -327,36 +381,6 @@ ftrace_init_module(struct module *mod,
327 unsigned long *start, unsigned long *end) { } 381 unsigned long *start, unsigned long *end) { }
328#endif 382#endif
329 383
330enum {
331 POWER_NONE = 0,
332 POWER_CSTATE = 1,
333 POWER_PSTATE = 2,
334};
335
336struct power_trace {
337#ifdef CONFIG_POWER_TRACER
338 ktime_t stamp;
339 ktime_t end;
340 int type;
341 int state;
342#endif
343};
344
345#ifdef CONFIG_POWER_TRACER
346extern void trace_power_start(struct power_trace *it, unsigned int type,
347 unsigned int state);
348extern void trace_power_mark(struct power_trace *it, unsigned int type,
349 unsigned int state);
350extern void trace_power_end(struct power_trace *it);
351#else
352static inline void trace_power_start(struct power_trace *it, unsigned int type,
353 unsigned int state) { }
354static inline void trace_power_mark(struct power_trace *it, unsigned int type,
355 unsigned int state) { }
356static inline void trace_power_end(struct power_trace *it) { }
357#endif
358
359
360/* 384/*
361 * Structure that defines an entry function trace. 385 * Structure that defines an entry function trace.
362 */ 386 */
@@ -492,4 +516,17 @@ static inline int test_tsk_trace_graph(struct task_struct *tsk)
492 516
493#endif /* CONFIG_TRACING */ 517#endif /* CONFIG_TRACING */
494 518
519
520#ifdef CONFIG_HW_BRANCH_TRACER
521
522void trace_hw_branch(u64 from, u64 to);
523void trace_hw_branch_oops(void);
524
525#else /* CONFIG_HW_BRANCH_TRACER */
526
527static inline void trace_hw_branch(u64 from, u64 to) {}
528static inline void trace_hw_branch_oops(void) {}
529
530#endif /* CONFIG_HW_BRANCH_TRACER */
531
495#endif /* _LINUX_FTRACE_H */ 532#endif /* _LINUX_FTRACE_H */
diff --git a/include/linux/ftrace_irq.h b/include/linux/ftrace_irq.h
index 366a054d0b05..dca7bf8cffe2 100644
--- a/include/linux/ftrace_irq.h
+++ b/include/linux/ftrace_irq.h
@@ -2,7 +2,7 @@
2#define _LINUX_FTRACE_IRQ_H 2#define _LINUX_FTRACE_IRQ_H
3 3
4 4
5#if defined(CONFIG_DYNAMIC_FTRACE) || defined(CONFIG_FUNCTION_GRAPH_TRACER) 5#ifdef CONFIG_FTRACE_NMI_ENTER
6extern void ftrace_nmi_enter(void); 6extern void ftrace_nmi_enter(void);
7extern void ftrace_nmi_exit(void); 7extern void ftrace_nmi_exit(void);
8#else 8#else
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index f83288347dda..faa1cf848bcd 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -15,55 +15,61 @@
15 * - bits 0-7 are the preemption count (max preemption depth: 256) 15 * - bits 0-7 are the preemption count (max preemption depth: 256)
16 * - bits 8-15 are the softirq count (max # of softirqs: 256) 16 * - bits 8-15 are the softirq count (max # of softirqs: 256)
17 * 17 *
18 * The hardirq count can be overridden per architecture, the default is: 18 * The hardirq count can in theory reach the same as NR_IRQS.
19 * In reality, the number of nested IRQS is limited to the stack
20 * size as well. For archs with over 1000 IRQS it is not practical
21 * to expect that they will all nest. We give a max of 10 bits for
22 * hardirq nesting. An arch may choose to give less than 10 bits.
23 * m68k expects it to be 8.
19 * 24 *
20 * - bits 16-27 are the hardirq count (max # of hardirqs: 4096) 25 * - bits 16-25 are the hardirq count (max # of nested hardirqs: 1024)
21 * - ( bit 28 is the PREEMPT_ACTIVE flag. ) 26 * - bit 26 is the NMI_MASK
27 * - bit 28 is the PREEMPT_ACTIVE flag
22 * 28 *
23 * PREEMPT_MASK: 0x000000ff 29 * PREEMPT_MASK: 0x000000ff
24 * SOFTIRQ_MASK: 0x0000ff00 30 * SOFTIRQ_MASK: 0x0000ff00
25 * HARDIRQ_MASK: 0x0fff0000 31 * HARDIRQ_MASK: 0x03ff0000
32 * NMI_MASK: 0x04000000
26 */ 33 */
27#define PREEMPT_BITS 8 34#define PREEMPT_BITS 8
28#define SOFTIRQ_BITS 8 35#define SOFTIRQ_BITS 8
36#define NMI_BITS 1
29 37
30#ifndef HARDIRQ_BITS 38#define MAX_HARDIRQ_BITS 10
31#define HARDIRQ_BITS 12
32 39
33#ifndef MAX_HARDIRQS_PER_CPU 40#ifndef HARDIRQ_BITS
34#define MAX_HARDIRQS_PER_CPU NR_IRQS 41# define HARDIRQ_BITS MAX_HARDIRQ_BITS
35#endif 42#endif
36 43
37/* 44#if HARDIRQ_BITS > MAX_HARDIRQ_BITS
38 * The hardirq mask has to be large enough to have space for potentially 45#error HARDIRQ_BITS too high!
39 * all IRQ sources in the system nesting on a single CPU.
40 */
41#if (1 << HARDIRQ_BITS) < MAX_HARDIRQS_PER_CPU
42# error HARDIRQ_BITS is too low!
43#endif
44#endif 46#endif
45 47
46#define PREEMPT_SHIFT 0 48#define PREEMPT_SHIFT 0
47#define SOFTIRQ_SHIFT (PREEMPT_SHIFT + PREEMPT_BITS) 49#define SOFTIRQ_SHIFT (PREEMPT_SHIFT + PREEMPT_BITS)
48#define HARDIRQ_SHIFT (SOFTIRQ_SHIFT + SOFTIRQ_BITS) 50#define HARDIRQ_SHIFT (SOFTIRQ_SHIFT + SOFTIRQ_BITS)
51#define NMI_SHIFT (HARDIRQ_SHIFT + HARDIRQ_BITS)
49 52
50#define __IRQ_MASK(x) ((1UL << (x))-1) 53#define __IRQ_MASK(x) ((1UL << (x))-1)
51 54
52#define PREEMPT_MASK (__IRQ_MASK(PREEMPT_BITS) << PREEMPT_SHIFT) 55#define PREEMPT_MASK (__IRQ_MASK(PREEMPT_BITS) << PREEMPT_SHIFT)
53#define SOFTIRQ_MASK (__IRQ_MASK(SOFTIRQ_BITS) << SOFTIRQ_SHIFT) 56#define SOFTIRQ_MASK (__IRQ_MASK(SOFTIRQ_BITS) << SOFTIRQ_SHIFT)
54#define HARDIRQ_MASK (__IRQ_MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT) 57#define HARDIRQ_MASK (__IRQ_MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT)
58#define NMI_MASK (__IRQ_MASK(NMI_BITS) << NMI_SHIFT)
55 59
56#define PREEMPT_OFFSET (1UL << PREEMPT_SHIFT) 60#define PREEMPT_OFFSET (1UL << PREEMPT_SHIFT)
57#define SOFTIRQ_OFFSET (1UL << SOFTIRQ_SHIFT) 61#define SOFTIRQ_OFFSET (1UL << SOFTIRQ_SHIFT)
58#define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT) 62#define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT)
63#define NMI_OFFSET (1UL << NMI_SHIFT)
59 64
60#if PREEMPT_ACTIVE < (1 << (HARDIRQ_SHIFT + HARDIRQ_BITS)) 65#if PREEMPT_ACTIVE < (1 << (NMI_SHIFT + NMI_BITS))
61#error PREEMPT_ACTIVE is too low! 66#error PREEMPT_ACTIVE is too low!
62#endif 67#endif
63 68
64#define hardirq_count() (preempt_count() & HARDIRQ_MASK) 69#define hardirq_count() (preempt_count() & HARDIRQ_MASK)
65#define softirq_count() (preempt_count() & SOFTIRQ_MASK) 70#define softirq_count() (preempt_count() & SOFTIRQ_MASK)
66#define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK)) 71#define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \
72 | NMI_MASK))
67 73
68/* 74/*
69 * Are we doing bottom half or hardware interrupt processing? 75 * Are we doing bottom half or hardware interrupt processing?
@@ -73,6 +79,11 @@
73#define in_softirq() (softirq_count()) 79#define in_softirq() (softirq_count())
74#define in_interrupt() (irq_count()) 80#define in_interrupt() (irq_count())
75 81
82/*
83 * Are we in NMI context?
84 */
85#define in_nmi() (preempt_count() & NMI_MASK)
86
76#if defined(CONFIG_PREEMPT) 87#if defined(CONFIG_PREEMPT)
77# define PREEMPT_INATOMIC_BASE kernel_locked() 88# define PREEMPT_INATOMIC_BASE kernel_locked()
78# define PREEMPT_CHECK_OFFSET 1 89# define PREEMPT_CHECK_OFFSET 1
@@ -164,20 +175,24 @@ extern void irq_enter(void);
164 */ 175 */
165extern void irq_exit(void); 176extern void irq_exit(void);
166 177
167#define nmi_enter() \ 178#define nmi_enter() \
168 do { \ 179 do { \
169 ftrace_nmi_enter(); \ 180 ftrace_nmi_enter(); \
170 lockdep_off(); \ 181 BUG_ON(in_nmi()); \
171 rcu_nmi_enter(); \ 182 add_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET); \
172 __irq_enter(); \ 183 lockdep_off(); \
184 rcu_nmi_enter(); \
185 trace_hardirq_enter(); \
173 } while (0) 186 } while (0)
174 187
175#define nmi_exit() \ 188#define nmi_exit() \
176 do { \ 189 do { \
177 __irq_exit(); \ 190 trace_hardirq_exit(); \
178 rcu_nmi_exit(); \ 191 rcu_nmi_exit(); \
179 lockdep_on(); \ 192 lockdep_on(); \
180 ftrace_nmi_exit(); \ 193 BUG_ON(!in_nmi()); \
194 sub_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET); \
195 ftrace_nmi_exit(); \
181 } while (0) 196 } while (0)
182 197
183#endif /* LINUX_HARDIRQ_H */ 198#endif /* LINUX_HARDIRQ_H */
diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index b3b359660082..f5e793d69bd3 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -8,7 +8,7 @@ struct ring_buffer;
8struct ring_buffer_iter; 8struct ring_buffer_iter;
9 9
10/* 10/*
11 * Don't reference this struct directly, use functions below. 11 * Don't refer to this struct directly, use functions below.
12 */ 12 */
13struct ring_buffer_event { 13struct ring_buffer_event {
14 u32 type:2, len:3, time_delta:27; 14 u32 type:2, len:3, time_delta:27;
@@ -74,13 +74,10 @@ void ring_buffer_free(struct ring_buffer *buffer);
74 74
75int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size); 75int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size);
76 76
77struct ring_buffer_event * 77struct ring_buffer_event *ring_buffer_lock_reserve(struct ring_buffer *buffer,
78ring_buffer_lock_reserve(struct ring_buffer *buffer, 78 unsigned long length);
79 unsigned long length,
80 unsigned long *flags);
81int ring_buffer_unlock_commit(struct ring_buffer *buffer, 79int ring_buffer_unlock_commit(struct ring_buffer *buffer,
82 struct ring_buffer_event *event, 80 struct ring_buffer_event *event);
83 unsigned long flags);
84int ring_buffer_write(struct ring_buffer *buffer, 81int ring_buffer_write(struct ring_buffer *buffer,
85 unsigned long length, void *data); 82 unsigned long length, void *data);
86 83
@@ -124,9 +121,20 @@ unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu);
124u64 ring_buffer_time_stamp(int cpu); 121u64 ring_buffer_time_stamp(int cpu);
125void ring_buffer_normalize_time_stamp(int cpu, u64 *ts); 122void ring_buffer_normalize_time_stamp(int cpu, u64 *ts);
126 123
124/*
125 * The below functions are fine to use outside the tracing facility.
126 */
127#ifdef CONFIG_RING_BUFFER
127void tracing_on(void); 128void tracing_on(void);
128void tracing_off(void); 129void tracing_off(void);
129void tracing_off_permanent(void); 130void tracing_off_permanent(void);
131int tracing_is_on(void);
132#else
133static inline void tracing_on(void) { }
134static inline void tracing_off(void) { }
135static inline void tracing_off_permanent(void) { }
136static inline int tracing_is_on(void) { return 0; }
137#endif
130 138
131void *ring_buffer_alloc_read_page(struct ring_buffer *buffer); 139void *ring_buffer_alloc_read_page(struct ring_buffer *buffer);
132void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data); 140void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8981e52c714f..426666dd8203 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -137,6 +137,8 @@ extern unsigned long nr_uninterruptible(void);
137extern unsigned long nr_active(void); 137extern unsigned long nr_active(void);
138extern unsigned long nr_iowait(void); 138extern unsigned long nr_iowait(void);
139 139
140extern unsigned long get_parent_ip(unsigned long addr);
141
140struct seq_file; 142struct seq_file;
141struct cfs_rq; 143struct cfs_rq;
142struct task_group; 144struct task_group;
diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
index 6ca6a7b66d75..f4523651fa42 100644
--- a/include/linux/slab_def.h
+++ b/include/linux/slab_def.h
@@ -14,6 +14,7 @@
14#include <asm/page.h> /* kmalloc_sizes.h needs PAGE_SIZE */ 14#include <asm/page.h> /* kmalloc_sizes.h needs PAGE_SIZE */
15#include <asm/cache.h> /* kmalloc_sizes.h needs L1_CACHE_BYTES */ 15#include <asm/cache.h> /* kmalloc_sizes.h needs L1_CACHE_BYTES */
16#include <linux/compiler.h> 16#include <linux/compiler.h>
17#include <trace/kmemtrace.h>
17 18
18/* Size description struct for general caches. */ 19/* Size description struct for general caches. */
19struct cache_sizes { 20struct cache_sizes {
@@ -28,8 +29,26 @@ extern struct cache_sizes malloc_sizes[];
28void *kmem_cache_alloc(struct kmem_cache *, gfp_t); 29void *kmem_cache_alloc(struct kmem_cache *, gfp_t);
29void *__kmalloc(size_t size, gfp_t flags); 30void *__kmalloc(size_t size, gfp_t flags);
30 31
31static inline void *kmalloc(size_t size, gfp_t flags) 32#ifdef CONFIG_KMEMTRACE
33extern void *kmem_cache_alloc_notrace(struct kmem_cache *cachep, gfp_t flags);
34extern size_t slab_buffer_size(struct kmem_cache *cachep);
35#else
36static __always_inline void *
37kmem_cache_alloc_notrace(struct kmem_cache *cachep, gfp_t flags)
32{ 38{
39 return kmem_cache_alloc(cachep, flags);
40}
41static inline size_t slab_buffer_size(struct kmem_cache *cachep)
42{
43 return 0;
44}
45#endif
46
47static __always_inline void *kmalloc(size_t size, gfp_t flags)
48{
49 struct kmem_cache *cachep;
50 void *ret;
51
33 if (__builtin_constant_p(size)) { 52 if (__builtin_constant_p(size)) {
34 int i = 0; 53 int i = 0;
35 54
@@ -47,10 +66,17 @@ static inline void *kmalloc(size_t size, gfp_t flags)
47found: 66found:
48#ifdef CONFIG_ZONE_DMA 67#ifdef CONFIG_ZONE_DMA
49 if (flags & GFP_DMA) 68 if (flags & GFP_DMA)
50 return kmem_cache_alloc(malloc_sizes[i].cs_dmacachep, 69 cachep = malloc_sizes[i].cs_dmacachep;
51 flags); 70 else
52#endif 71#endif
53 return kmem_cache_alloc(malloc_sizes[i].cs_cachep, flags); 72 cachep = malloc_sizes[i].cs_cachep;
73
74 ret = kmem_cache_alloc_notrace(cachep, flags);
75
76 kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC, _THIS_IP_, ret,
77 size, slab_buffer_size(cachep), flags);
78
79 return ret;
54 } 80 }
55 return __kmalloc(size, flags); 81 return __kmalloc(size, flags);
56} 82}
@@ -59,8 +85,25 @@ found:
59extern void *__kmalloc_node(size_t size, gfp_t flags, int node); 85extern void *__kmalloc_node(size_t size, gfp_t flags, int node);
60extern void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node); 86extern void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node);
61 87
62static inline void *kmalloc_node(size_t size, gfp_t flags, int node) 88#ifdef CONFIG_KMEMTRACE
89extern void *kmem_cache_alloc_node_notrace(struct kmem_cache *cachep,
90 gfp_t flags,
91 int nodeid);
92#else
93static __always_inline void *
94kmem_cache_alloc_node_notrace(struct kmem_cache *cachep,
95 gfp_t flags,
96 int nodeid)
97{
98 return kmem_cache_alloc_node(cachep, flags, nodeid);
99}
100#endif
101
102static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
63{ 103{
104 struct kmem_cache *cachep;
105 void *ret;
106
64 if (__builtin_constant_p(size)) { 107 if (__builtin_constant_p(size)) {
65 int i = 0; 108 int i = 0;
66 109
@@ -78,11 +121,18 @@ static inline void *kmalloc_node(size_t size, gfp_t flags, int node)
78found: 121found:
79#ifdef CONFIG_ZONE_DMA 122#ifdef CONFIG_ZONE_DMA
80 if (flags & GFP_DMA) 123 if (flags & GFP_DMA)
81 return kmem_cache_alloc_node(malloc_sizes[i].cs_dmacachep, 124 cachep = malloc_sizes[i].cs_dmacachep;
82 flags, node); 125 else
83#endif 126#endif
84 return kmem_cache_alloc_node(malloc_sizes[i].cs_cachep, 127 cachep = malloc_sizes[i].cs_cachep;
85 flags, node); 128
129 ret = kmem_cache_alloc_node_notrace(cachep, flags, node);
130
131 kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC, _THIS_IP_,
132 ret, size, slab_buffer_size(cachep),
133 flags, node);
134
135 return ret;
86 } 136 }
87 return __kmalloc_node(size, flags, node); 137 return __kmalloc_node(size, flags, node);
88} 138}
diff --git a/include/linux/slob_def.h b/include/linux/slob_def.h
index 59a3fa476ab9..0ec00b39d006 100644
--- a/include/linux/slob_def.h
+++ b/include/linux/slob_def.h
@@ -3,14 +3,15 @@
3 3
4void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node); 4void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node);
5 5
6static inline void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags) 6static __always_inline void *kmem_cache_alloc(struct kmem_cache *cachep,
7 gfp_t flags)
7{ 8{
8 return kmem_cache_alloc_node(cachep, flags, -1); 9 return kmem_cache_alloc_node(cachep, flags, -1);
9} 10}
10 11
11void *__kmalloc_node(size_t size, gfp_t flags, int node); 12void *__kmalloc_node(size_t size, gfp_t flags, int node);
12 13
13static inline void *kmalloc_node(size_t size, gfp_t flags, int node) 14static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
14{ 15{
15 return __kmalloc_node(size, flags, node); 16 return __kmalloc_node(size, flags, node);
16} 17}
@@ -23,12 +24,12 @@ static inline void *kmalloc_node(size_t size, gfp_t flags, int node)
23 * kmalloc is the normal method of allocating memory 24 * kmalloc is the normal method of allocating memory
24 * in the kernel. 25 * in the kernel.
25 */ 26 */
26static inline void *kmalloc(size_t size, gfp_t flags) 27static __always_inline void *kmalloc(size_t size, gfp_t flags)
27{ 28{
28 return __kmalloc_node(size, flags, -1); 29 return __kmalloc_node(size, flags, -1);
29} 30}
30 31
31static inline void *__kmalloc(size_t size, gfp_t flags) 32static __always_inline void *__kmalloc(size_t size, gfp_t flags)
32{ 33{
33 return kmalloc(size, flags); 34 return kmalloc(size, flags);
34} 35}
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 2f5c16b1aacd..6b657f7dcb2b 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -10,6 +10,7 @@
10#include <linux/gfp.h> 10#include <linux/gfp.h>
11#include <linux/workqueue.h> 11#include <linux/workqueue.h>
12#include <linux/kobject.h> 12#include <linux/kobject.h>
13#include <trace/kmemtrace.h>
13 14
14enum stat_item { 15enum stat_item {
15 ALLOC_FASTPATH, /* Allocation from cpu slab */ 16 ALLOC_FASTPATH, /* Allocation from cpu slab */
@@ -204,13 +205,31 @@ static __always_inline struct kmem_cache *kmalloc_slab(size_t size)
204void *kmem_cache_alloc(struct kmem_cache *, gfp_t); 205void *kmem_cache_alloc(struct kmem_cache *, gfp_t);
205void *__kmalloc(size_t size, gfp_t flags); 206void *__kmalloc(size_t size, gfp_t flags);
206 207
208#ifdef CONFIG_KMEMTRACE
209extern void *kmem_cache_alloc_notrace(struct kmem_cache *s, gfp_t gfpflags);
210#else
211static __always_inline void *
212kmem_cache_alloc_notrace(struct kmem_cache *s, gfp_t gfpflags)
213{
214 return kmem_cache_alloc(s, gfpflags);
215}
216#endif
217
207static __always_inline void *kmalloc_large(size_t size, gfp_t flags) 218static __always_inline void *kmalloc_large(size_t size, gfp_t flags)
208{ 219{
209 return (void *)__get_free_pages(flags | __GFP_COMP, get_order(size)); 220 unsigned int order = get_order(size);
221 void *ret = (void *) __get_free_pages(flags | __GFP_COMP, order);
222
223 kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC, _THIS_IP_, ret,
224 size, PAGE_SIZE << order, flags);
225
226 return ret;
210} 227}
211 228
212static __always_inline void *kmalloc(size_t size, gfp_t flags) 229static __always_inline void *kmalloc(size_t size, gfp_t flags)
213{ 230{
231 void *ret;
232
214 if (__builtin_constant_p(size)) { 233 if (__builtin_constant_p(size)) {
215 if (size > PAGE_SIZE) 234 if (size > PAGE_SIZE)
216 return kmalloc_large(size, flags); 235 return kmalloc_large(size, flags);
@@ -221,7 +240,13 @@ static __always_inline void *kmalloc(size_t size, gfp_t flags)
221 if (!s) 240 if (!s)
222 return ZERO_SIZE_PTR; 241 return ZERO_SIZE_PTR;
223 242
224 return kmem_cache_alloc(s, flags); 243 ret = kmem_cache_alloc_notrace(s, flags);
244
245 kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC,
246 _THIS_IP_, ret,
247 size, s->size, flags);
248
249 return ret;
225 } 250 }
226 } 251 }
227 return __kmalloc(size, flags); 252 return __kmalloc(size, flags);
@@ -231,8 +256,24 @@ static __always_inline void *kmalloc(size_t size, gfp_t flags)
231void *__kmalloc_node(size_t size, gfp_t flags, int node); 256void *__kmalloc_node(size_t size, gfp_t flags, int node);
232void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node); 257void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node);
233 258
259#ifdef CONFIG_KMEMTRACE
260extern void *kmem_cache_alloc_node_notrace(struct kmem_cache *s,
261 gfp_t gfpflags,
262 int node);
263#else
264static __always_inline void *
265kmem_cache_alloc_node_notrace(struct kmem_cache *s,
266 gfp_t gfpflags,
267 int node)
268{
269 return kmem_cache_alloc_node(s, gfpflags, node);
270}
271#endif
272
234static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) 273static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
235{ 274{
275 void *ret;
276
236 if (__builtin_constant_p(size) && 277 if (__builtin_constant_p(size) &&
237 size <= PAGE_SIZE && !(flags & SLUB_DMA)) { 278 size <= PAGE_SIZE && !(flags & SLUB_DMA)) {
238 struct kmem_cache *s = kmalloc_slab(size); 279 struct kmem_cache *s = kmalloc_slab(size);
@@ -240,7 +281,13 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
240 if (!s) 281 if (!s)
241 return ZERO_SIZE_PTR; 282 return ZERO_SIZE_PTR;
242 283
243 return kmem_cache_alloc_node(s, flags, node); 284 ret = kmem_cache_alloc_node_notrace(s, flags, node);
285
286 kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC,
287 _THIS_IP_, ret,
288 size, s->size, flags, node);
289
290 return ret;
244 } 291 }
245 return __kmalloc_node(size, flags, node); 292 return __kmalloc_node(size, flags, node);
246} 293}
diff --git a/include/trace/kmemtrace.h b/include/trace/kmemtrace.h
new file mode 100644
index 000000000000..ad8b7857855a
--- /dev/null
+++ b/include/trace/kmemtrace.h
@@ -0,0 +1,75 @@
1/*
2 * Copyright (C) 2008 Eduard - Gabriel Munteanu
3 *
4 * This file is released under GPL version 2.
5 */
6
7#ifndef _LINUX_KMEMTRACE_H
8#define _LINUX_KMEMTRACE_H
9
10#ifdef __KERNEL__
11
12#include <linux/types.h>
13#include <linux/marker.h>
14
15enum kmemtrace_type_id {
16 KMEMTRACE_TYPE_KMALLOC = 0, /* kmalloc() or kfree(). */
17 KMEMTRACE_TYPE_CACHE, /* kmem_cache_*(). */
18 KMEMTRACE_TYPE_PAGES, /* __get_free_pages() and friends. */
19};
20
21#ifdef CONFIG_KMEMTRACE
22
23extern void kmemtrace_init(void);
24
25extern void kmemtrace_mark_alloc_node(enum kmemtrace_type_id type_id,
26 unsigned long call_site,
27 const void *ptr,
28 size_t bytes_req,
29 size_t bytes_alloc,
30 gfp_t gfp_flags,
31 int node);
32
33extern void kmemtrace_mark_free(enum kmemtrace_type_id type_id,
34 unsigned long call_site,
35 const void *ptr);
36
37#else /* CONFIG_KMEMTRACE */
38
39static inline void kmemtrace_init(void)
40{
41}
42
43static inline void kmemtrace_mark_alloc_node(enum kmemtrace_type_id type_id,
44 unsigned long call_site,
45 const void *ptr,
46 size_t bytes_req,
47 size_t bytes_alloc,
48 gfp_t gfp_flags,
49 int node)
50{
51}
52
53static inline void kmemtrace_mark_free(enum kmemtrace_type_id type_id,
54 unsigned long call_site,
55 const void *ptr)
56{
57}
58
59#endif /* CONFIG_KMEMTRACE */
60
61static inline void kmemtrace_mark_alloc(enum kmemtrace_type_id type_id,
62 unsigned long call_site,
63 const void *ptr,
64 size_t bytes_req,
65 size_t bytes_alloc,
66 gfp_t gfp_flags)
67{
68 kmemtrace_mark_alloc_node(type_id, call_site, ptr,
69 bytes_req, bytes_alloc, gfp_flags, -1);
70}
71
72#endif /* __KERNEL__ */
73
74#endif /* _LINUX_KMEMTRACE_H */
75
diff --git a/include/trace/power.h b/include/trace/power.h
new file mode 100644
index 000000000000..2c733e58e89c
--- /dev/null
+++ b/include/trace/power.h
@@ -0,0 +1,34 @@
1#ifndef _TRACE_POWER_H
2#define _TRACE_POWER_H
3
4#include <linux/ktime.h>
5#include <linux/tracepoint.h>
6
7enum {
8 POWER_NONE = 0,
9 POWER_CSTATE = 1,
10 POWER_PSTATE = 2,
11};
12
13struct power_trace {
14#ifdef CONFIG_POWER_TRACER
15 ktime_t stamp;
16 ktime_t end;
17 int type;
18 int state;
19#endif
20};
21
22DECLARE_TRACE(power_start,
23 TPPROTO(struct power_trace *it, unsigned int type, unsigned int state),
24 TPARGS(it, type, state));
25
26DECLARE_TRACE(power_mark,
27 TPPROTO(struct power_trace *it, unsigned int type, unsigned int state),
28 TPARGS(it, type, state));
29
30DECLARE_TRACE(power_end,
31 TPPROTO(struct power_trace *it),
32 TPARGS(it));
33
34#endif /* _TRACE_POWER_H */
diff --git a/include/trace/workqueue.h b/include/trace/workqueue.h
new file mode 100644
index 000000000000..867829df4571
--- /dev/null
+++ b/include/trace/workqueue.h
@@ -0,0 +1,25 @@
1#ifndef __TRACE_WORKQUEUE_H
2#define __TRACE_WORKQUEUE_H
3
4#include <linux/tracepoint.h>
5#include <linux/workqueue.h>
6#include <linux/sched.h>
7
8DECLARE_TRACE(workqueue_insertion,
9 TPPROTO(struct task_struct *wq_thread, struct work_struct *work),
10 TPARGS(wq_thread, work));
11
12DECLARE_TRACE(workqueue_execution,
13 TPPROTO(struct task_struct *wq_thread, struct work_struct *work),
14 TPARGS(wq_thread, work));
15
16/* Trace the creation of one workqueue thread on a cpu */
17DECLARE_TRACE(workqueue_creation,
18 TPPROTO(struct task_struct *wq_thread, int cpu),
19 TPARGS(wq_thread, cpu));
20
21DECLARE_TRACE(workqueue_destruction,
22 TPPROTO(struct task_struct *wq_thread),
23 TPARGS(wq_thread));
24
25#endif /* __TRACE_WORKQUEUE_H */
diff --git a/init/main.c b/init/main.c
index 844209453c02..db7974ff7a0a 100644
--- a/init/main.c
+++ b/init/main.c
@@ -70,6 +70,7 @@
70#include <asm/setup.h> 70#include <asm/setup.h>
71#include <asm/sections.h> 71#include <asm/sections.h>
72#include <asm/cacheflush.h> 72#include <asm/cacheflush.h>
73#include <trace/kmemtrace.h>
73 74
74#ifdef CONFIG_X86_LOCAL_APIC 75#ifdef CONFIG_X86_LOCAL_APIC
75#include <asm/smp.h> 76#include <asm/smp.h>
@@ -641,6 +642,7 @@ asmlinkage void __init start_kernel(void)
641 enable_debug_pagealloc(); 642 enable_debug_pagealloc();
642 cpu_hotplug_init(); 643 cpu_hotplug_init();
643 kmem_cache_init(); 644 kmem_cache_init();
645 kmemtrace_init();
644 debug_objects_mem_init(); 646 debug_objects_mem_init();
645 idr_init_cache(); 647 idr_init_cache();
646 setup_per_cpu_pageset(); 648 setup_per_cpu_pageset();
diff --git a/kernel/extable.c b/kernel/extable.c
index e136ed8d82ba..0df6253730be 100644
--- a/kernel/extable.c
+++ b/kernel/extable.c
@@ -41,7 +41,7 @@ const struct exception_table_entry *search_exception_tables(unsigned long addr)
41 return e; 41 return e;
42} 42}
43 43
44__notrace_funcgraph int core_kernel_text(unsigned long addr) 44int core_kernel_text(unsigned long addr)
45{ 45{
46 if (addr >= (unsigned long)_stext && 46 if (addr >= (unsigned long)_stext &&
47 addr <= (unsigned long)_etext) 47 addr <= (unsigned long)_etext)
@@ -54,7 +54,7 @@ __notrace_funcgraph int core_kernel_text(unsigned long addr)
54 return 0; 54 return 0;
55} 55}
56 56
57__notrace_funcgraph int __kernel_text_address(unsigned long addr) 57int __kernel_text_address(unsigned long addr)
58{ 58{
59 if (core_kernel_text(addr)) 59 if (core_kernel_text(addr))
60 return 1; 60 return 1;
diff --git a/kernel/module.c b/kernel/module.c
index ba22484a987e..22d7379709da 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2735,7 +2735,7 @@ int is_module_address(unsigned long addr)
2735 2735
2736 2736
2737/* Is this a valid kernel address? */ 2737/* Is this a valid kernel address? */
2738__notrace_funcgraph struct module *__module_text_address(unsigned long addr) 2738struct module *__module_text_address(unsigned long addr)
2739{ 2739{
2740 struct module *mod; 2740 struct module *mod;
2741 2741
diff --git a/kernel/relay.c b/kernel/relay.c
index 9d79b7854fa6..edc0ba6d8160 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -677,9 +677,7 @@ int relay_late_setup_files(struct rchan *chan,
677 */ 677 */
678 for_each_online_cpu(i) { 678 for_each_online_cpu(i) {
679 if (unlikely(!chan->buf[i])) { 679 if (unlikely(!chan->buf[i])) {
680 printk(KERN_ERR "relay_late_setup_files: CPU %u " 680 WARN_ONCE(1, KERN_ERR "CPU has no buffer!\n");
681 "has no buffer, it must have!\n", i);
682 BUG();
683 err = -EINVAL; 681 err = -EINVAL;
684 break; 682 break;
685 } 683 }
diff --git a/kernel/sched.c b/kernel/sched.c
index c1d0ed360088..5dc3b0a5d35a 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4404,10 +4404,7 @@ void scheduler_tick(void)
4404#endif 4404#endif
4405} 4405}
4406 4406
4407#if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \ 4407unsigned long get_parent_ip(unsigned long addr)
4408 defined(CONFIG_PREEMPT_TRACER))
4409
4410static inline unsigned long get_parent_ip(unsigned long addr)
4411{ 4408{
4412 if (in_lock_functions(addr)) { 4409 if (in_lock_functions(addr)) {
4413 addr = CALLER_ADDR2; 4410 addr = CALLER_ADDR2;
@@ -4417,6 +4414,9 @@ static inline unsigned long get_parent_ip(unsigned long addr)
4417 return addr; 4414 return addr;
4418} 4415}
4419 4416
4417#if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
4418 defined(CONFIG_PREEMPT_TRACER))
4419
4420void __kprobes add_preempt_count(int val) 4420void __kprobes add_preempt_count(int val)
4421{ 4421{
4422#ifdef CONFIG_DEBUG_PREEMPT 4422#ifdef CONFIG_DEBUG_PREEMPT
diff --git a/kernel/softirq.c b/kernel/softirq.c
index bdbe9de9cd8d..6edfc2c11d99 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -21,6 +21,7 @@
21#include <linux/freezer.h> 21#include <linux/freezer.h>
22#include <linux/kthread.h> 22#include <linux/kthread.h>
23#include <linux/rcupdate.h> 23#include <linux/rcupdate.h>
24#include <linux/ftrace.h>
24#include <linux/smp.h> 25#include <linux/smp.h>
25#include <linux/tick.h> 26#include <linux/tick.h>
26 27
@@ -79,13 +80,23 @@ static void __local_bh_disable(unsigned long ip)
79 WARN_ON_ONCE(in_irq()); 80 WARN_ON_ONCE(in_irq());
80 81
81 raw_local_irq_save(flags); 82 raw_local_irq_save(flags);
82 add_preempt_count(SOFTIRQ_OFFSET); 83 /*
84 * The preempt tracer hooks into add_preempt_count and will break
85 * lockdep because it calls back into lockdep after SOFTIRQ_OFFSET
86 * is set and before current->softirq_enabled is cleared.
87 * We must manually increment preempt_count here and manually
88 * call the trace_preempt_off later.
89 */
90 preempt_count() += SOFTIRQ_OFFSET;
83 /* 91 /*
84 * Were softirqs turned off above: 92 * Were softirqs turned off above:
85 */ 93 */
86 if (softirq_count() == SOFTIRQ_OFFSET) 94 if (softirq_count() == SOFTIRQ_OFFSET)
87 trace_softirqs_off(ip); 95 trace_softirqs_off(ip);
88 raw_local_irq_restore(flags); 96 raw_local_irq_restore(flags);
97
98 if (preempt_count() == SOFTIRQ_OFFSET)
99 trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
89} 100}
90#else /* !CONFIG_TRACE_IRQFLAGS */ 101#else /* !CONFIG_TRACE_IRQFLAGS */
91static inline void __local_bh_disable(unsigned long ip) 102static inline void __local_bh_disable(unsigned long ip)
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 58a93fbd68aa..6ff928acd453 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -9,6 +9,9 @@ config USER_STACKTRACE_SUPPORT
9config NOP_TRACER 9config NOP_TRACER
10 bool 10 bool
11 11
12config HAVE_FTRACE_NMI_ENTER
13 bool
14
12config HAVE_FUNCTION_TRACER 15config HAVE_FUNCTION_TRACER
13 bool 16 bool
14 17
@@ -37,6 +40,11 @@ config TRACER_MAX_TRACE
37config RING_BUFFER 40config RING_BUFFER
38 bool 41 bool
39 42
43config FTRACE_NMI_ENTER
44 bool
45 depends on HAVE_FTRACE_NMI_ENTER
46 default y
47
40config TRACING 48config TRACING
41 bool 49 bool
42 select DEBUG_FS 50 select DEBUG_FS
@@ -126,6 +134,7 @@ config SYSPROF_TRACER
126 bool "Sysprof Tracer" 134 bool "Sysprof Tracer"
127 depends on X86 135 depends on X86
128 select TRACING 136 select TRACING
137 select CONTEXT_SWITCH_TRACER
129 help 138 help
130 This tracer provides the trace needed by the 'Sysprof' userspace 139 This tracer provides the trace needed by the 'Sysprof' userspace
131 tool. 140 tool.
@@ -164,9 +173,8 @@ config BOOT_TRACER
164 representation of the delays during initcalls - but the raw 173 representation of the delays during initcalls - but the raw
165 /debug/tracing/trace text output is readable too. 174 /debug/tracing/trace text output is readable too.
166 175
167 ( Note that tracing self tests can't be enabled if this tracer is 176 You must pass in ftrace=initcall to the kernel command line
168 selected, because the self-tests are an initcall as well and that 177 to enable this on bootup.
169 would invalidate the boot trace. )
170 178
171config TRACE_BRANCH_PROFILING 179config TRACE_BRANCH_PROFILING
172 bool "Trace likely/unlikely profiler" 180 bool "Trace likely/unlikely profiler"
@@ -264,6 +272,62 @@ config HW_BRANCH_TRACER
264 This tracer records all branches on the system in a circular 272 This tracer records all branches on the system in a circular
265 buffer giving access to the last N branches for each cpu. 273 buffer giving access to the last N branches for each cpu.
266 274
275config KMEMTRACE
276 bool "Trace SLAB allocations"
277 select TRACING
278 help
279 kmemtrace provides tracing for slab allocator functions, such as
280 kmalloc, kfree, kmem_cache_alloc, kmem_cache_free etc.. Collected
281 data is then fed to the userspace application in order to analyse
282 allocation hotspots, internal fragmentation and so on, making it
283 possible to see how well an allocator performs, as well as debug
284 and profile kernel code.
285
286 This requires an userspace application to use. See
287 Documentation/vm/kmemtrace.txt for more information.
288
289 Saying Y will make the kernel somewhat larger and slower. However,
290 if you disable kmemtrace at run-time or boot-time, the performance
291 impact is minimal (depending on the arch the kernel is built for).
292
293 If unsure, say N.
294
295config WORKQUEUE_TRACER
296 bool "Trace workqueues"
297 select TRACING
298 help
299 The workqueue tracer provides some statistical informations
300 about each cpu workqueue thread such as the number of the
301 works inserted and executed since their creation. It can help
302 to evaluate the amount of work each of them have to perform.
303 For example it can help a developer to decide whether he should
304 choose a per cpu workqueue instead of a singlethreaded one.
305
306config BLK_DEV_IO_TRACE
307 bool "Support for tracing block io actions"
308 depends on SYSFS
309 depends on BLOCK
310 select RELAY
311 select DEBUG_FS
312 select TRACEPOINTS
313 select TRACING
314 select STACKTRACE
315 help
316 Say Y here if you want to be able to trace the block layer actions
317 on a given queue. Tracing allows you to see any traffic happening
318 on a block device queue. For more information (and the userspace
319 support tools needed), fetch the blktrace tools from:
320
321 git://git.kernel.dk/blktrace.git
322
323 Tracing also is possible using the ftrace interface, e.g.:
324
325 echo 1 > /sys/block/sda/sda1/trace/enable
326 echo blk > /sys/kernel/debug/tracing/current_tracer
327 cat /sys/kernel/debug/tracing/trace_pipe
328
329 If unsure, say N.
330
267config DYNAMIC_FTRACE 331config DYNAMIC_FTRACE
268 bool "enable/disable ftrace tracepoints dynamically" 332 bool "enable/disable ftrace tracepoints dynamically"
269 depends on FUNCTION_TRACER 333 depends on FUNCTION_TRACER
@@ -294,7 +358,7 @@ config FTRACE_SELFTEST
294 358
295config FTRACE_STARTUP_TEST 359config FTRACE_STARTUP_TEST
296 bool "Perform a startup test on ftrace" 360 bool "Perform a startup test on ftrace"
297 depends on TRACING && DEBUG_KERNEL && !BOOT_TRACER 361 depends on TRACING && DEBUG_KERNEL
298 select FTRACE_SELFTEST 362 select FTRACE_SELFTEST
299 help 363 help
300 This option performs a series of startup tests on ftrace. On bootup 364 This option performs a series of startup tests on ftrace. On bootup
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 349d5a93653f..627090bc262d 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -19,6 +19,8 @@ obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o
19obj-$(CONFIG_RING_BUFFER) += ring_buffer.o 19obj-$(CONFIG_RING_BUFFER) += ring_buffer.o
20 20
21obj-$(CONFIG_TRACING) += trace.o 21obj-$(CONFIG_TRACING) += trace.o
22obj-$(CONFIG_TRACING) += trace_output.o
23obj-$(CONFIG_TRACING) += trace_stat.o
22obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o 24obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
23obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o 25obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o
24obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o 26obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o
@@ -33,5 +35,8 @@ obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o
33obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o 35obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o
34obj-$(CONFIG_HW_BRANCH_TRACER) += trace_hw_branches.o 36obj-$(CONFIG_HW_BRANCH_TRACER) += trace_hw_branches.o
35obj-$(CONFIG_POWER_TRACER) += trace_power.o 37obj-$(CONFIG_POWER_TRACER) += trace_power.o
38obj-$(CONFIG_KMEMTRACE) += kmemtrace.o
39obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o
40obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o
36 41
37libftrace-y := ftrace.o 42libftrace-y := ftrace.o
diff --git a/block/blktrace.c b/kernel/trace/blktrace.c
index 39cc3bfe56e4..3b91da064820 100644
--- a/block/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -24,10 +24,28 @@
24#include <linux/debugfs.h> 24#include <linux/debugfs.h>
25#include <linux/time.h> 25#include <linux/time.h>
26#include <trace/block.h> 26#include <trace/block.h>
27#include <asm/uaccess.h> 27#include <linux/uaccess.h>
28#include "trace_output.h"
28 29
29static unsigned int blktrace_seq __read_mostly = 1; 30static unsigned int blktrace_seq __read_mostly = 1;
30 31
32static struct trace_array *blk_tr;
33static int __read_mostly blk_tracer_enabled;
34
35/* Select an alternative, minimalistic output than the original one */
36#define TRACE_BLK_OPT_CLASSIC 0x1
37
38static struct tracer_opt blk_tracer_opts[] = {
39 /* Default disable the minimalistic output */
40 { TRACER_OPT(blk_classic, TRACE_BLK_OPT_CLASSIC) },
41 { }
42};
43
44static struct tracer_flags blk_tracer_flags = {
45 .val = 0,
46 .opts = blk_tracer_opts,
47};
48
31/* Global reference count of probes */ 49/* Global reference count of probes */
32static DEFINE_MUTEX(blk_probe_mutex); 50static DEFINE_MUTEX(blk_probe_mutex);
33static atomic_t blk_probes_ref = ATOMIC_INIT(0); 51static atomic_t blk_probes_ref = ATOMIC_INIT(0);
@@ -43,6 +61,9 @@ static void trace_note(struct blk_trace *bt, pid_t pid, int action,
43{ 61{
44 struct blk_io_trace *t; 62 struct blk_io_trace *t;
45 63
64 if (!bt->rchan)
65 return;
66
46 t = relay_reserve(bt->rchan, sizeof(*t) + len); 67 t = relay_reserve(bt->rchan, sizeof(*t) + len);
47 if (t) { 68 if (t) {
48 const int cpu = smp_processor_id(); 69 const int cpu = smp_processor_id();
@@ -90,6 +111,16 @@ void __trace_note_message(struct blk_trace *bt, const char *fmt, ...)
90 unsigned long flags; 111 unsigned long flags;
91 char *buf; 112 char *buf;
92 113
114 if (blk_tr) {
115 va_start(args, fmt);
116 ftrace_vprintk(fmt, args);
117 va_end(args);
118 return;
119 }
120
121 if (!bt->msg_data)
122 return;
123
93 local_irq_save(flags); 124 local_irq_save(flags);
94 buf = per_cpu_ptr(bt->msg_data, smp_processor_id()); 125 buf = per_cpu_ptr(bt->msg_data, smp_processor_id());
95 va_start(args, fmt); 126 va_start(args, fmt);
@@ -117,11 +148,12 @@ static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector,
117/* 148/*
118 * Data direction bit lookup 149 * Data direction bit lookup
119 */ 150 */
120static u32 ddir_act[2] __read_mostly = { BLK_TC_ACT(BLK_TC_READ), BLK_TC_ACT(BLK_TC_WRITE) }; 151static u32 ddir_act[2] __read_mostly = { BLK_TC_ACT(BLK_TC_READ),
152 BLK_TC_ACT(BLK_TC_WRITE) };
121 153
122/* The ilog2() calls fall out because they're constant */ 154/* The ilog2() calls fall out because they're constant */
123#define MASK_TC_BIT(rw, __name) ( (rw & (1 << BIO_RW_ ## __name)) << \ 155#define MASK_TC_BIT(rw, __name) ((rw & (1 << BIO_RW_ ## __name)) << \
124 (ilog2(BLK_TC_ ## __name) + BLK_TC_SHIFT - BIO_RW_ ## __name) ) 156 (ilog2(BLK_TC_ ## __name) + BLK_TC_SHIFT - BIO_RW_ ## __name))
125 157
126/* 158/*
127 * The worker for the various blk_add_trace*() types. Fills out a 159 * The worker for the various blk_add_trace*() types. Fills out a
@@ -131,13 +163,15 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
131 int rw, u32 what, int error, int pdu_len, void *pdu_data) 163 int rw, u32 what, int error, int pdu_len, void *pdu_data)
132{ 164{
133 struct task_struct *tsk = current; 165 struct task_struct *tsk = current;
166 struct ring_buffer_event *event = NULL;
134 struct blk_io_trace *t; 167 struct blk_io_trace *t;
135 unsigned long flags; 168 unsigned long flags = 0;
136 unsigned long *sequence; 169 unsigned long *sequence;
137 pid_t pid; 170 pid_t pid;
138 int cpu; 171 int cpu, pc = 0;
139 172
140 if (unlikely(bt->trace_state != Blktrace_running)) 173 if (unlikely(bt->trace_state != Blktrace_running ||
174 !blk_tracer_enabled))
141 return; 175 return;
142 176
143 what |= ddir_act[rw & WRITE]; 177 what |= ddir_act[rw & WRITE];
@@ -150,6 +184,20 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
150 pid = tsk->pid; 184 pid = tsk->pid;
151 if (unlikely(act_log_check(bt, what, sector, pid))) 185 if (unlikely(act_log_check(bt, what, sector, pid)))
152 return; 186 return;
187 cpu = raw_smp_processor_id();
188
189 if (blk_tr) {
190 tracing_record_cmdline(current);
191
192 pc = preempt_count();
193 event = trace_buffer_lock_reserve(blk_tr, TRACE_BLK,
194 sizeof(*t) + pdu_len,
195 0, pc);
196 if (!event)
197 return;
198 t = ring_buffer_event_data(event);
199 goto record_it;
200 }
153 201
154 /* 202 /*
155 * A word about the locking here - we disable interrupts to reserve 203 * A word about the locking here - we disable interrupts to reserve
@@ -163,23 +211,35 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
163 211
164 t = relay_reserve(bt->rchan, sizeof(*t) + pdu_len); 212 t = relay_reserve(bt->rchan, sizeof(*t) + pdu_len);
165 if (t) { 213 if (t) {
166 cpu = smp_processor_id();
167 sequence = per_cpu_ptr(bt->sequence, cpu); 214 sequence = per_cpu_ptr(bt->sequence, cpu);
168 215
169 t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION; 216 t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION;
170 t->sequence = ++(*sequence); 217 t->sequence = ++(*sequence);
171 t->time = ktime_to_ns(ktime_get()); 218 t->time = ktime_to_ns(ktime_get());
219record_it:
220 /*
221 * These two are not needed in ftrace as they are in the
222 * generic trace_entry, filled by tracing_generic_entry_update,
223 * but for the trace_event->bin() synthesizer benefit we do it
224 * here too.
225 */
226 t->cpu = cpu;
227 t->pid = pid;
228
172 t->sector = sector; 229 t->sector = sector;
173 t->bytes = bytes; 230 t->bytes = bytes;
174 t->action = what; 231 t->action = what;
175 t->pid = pid;
176 t->device = bt->dev; 232 t->device = bt->dev;
177 t->cpu = cpu;
178 t->error = error; 233 t->error = error;
179 t->pdu_len = pdu_len; 234 t->pdu_len = pdu_len;
180 235
181 if (pdu_len) 236 if (pdu_len)
182 memcpy((void *) t + sizeof(*t), pdu_data, pdu_len); 237 memcpy((void *) t + sizeof(*t), pdu_data, pdu_len);
238
239 if (blk_tr) {
240 trace_buffer_unlock_commit(blk_tr, event, 0, pc);
241 return;
242 }
183 } 243 }
184 244
185 local_irq_restore(flags); 245 local_irq_restore(flags);
@@ -385,7 +445,8 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
385 atomic_set(&bt->dropped, 0); 445 atomic_set(&bt->dropped, 0);
386 446
387 ret = -EIO; 447 ret = -EIO;
388 bt->dropped_file = debugfs_create_file("dropped", 0444, dir, bt, &blk_dropped_fops); 448 bt->dropped_file = debugfs_create_file("dropped", 0444, dir, bt,
449 &blk_dropped_fops);
389 if (!bt->dropped_file) 450 if (!bt->dropped_file)
390 goto err; 451 goto err;
391 452
@@ -467,10 +528,10 @@ EXPORT_SYMBOL_GPL(blk_trace_setup);
467 528
468int blk_trace_startstop(struct request_queue *q, int start) 529int blk_trace_startstop(struct request_queue *q, int start)
469{ 530{
470 struct blk_trace *bt;
471 int ret; 531 int ret;
532 struct blk_trace *bt = q->blk_trace;
472 533
473 if ((bt = q->blk_trace) == NULL) 534 if (bt == NULL)
474 return -EINVAL; 535 return -EINVAL;
475 536
476 /* 537 /*
@@ -606,12 +667,14 @@ static void blk_add_trace_rq_issue(struct request_queue *q, struct request *rq)
606 blk_add_trace_rq(q, rq, BLK_TA_ISSUE); 667 blk_add_trace_rq(q, rq, BLK_TA_ISSUE);
607} 668}
608 669
609static void blk_add_trace_rq_requeue(struct request_queue *q, struct request *rq) 670static void blk_add_trace_rq_requeue(struct request_queue *q,
671 struct request *rq)
610{ 672{
611 blk_add_trace_rq(q, rq, BLK_TA_REQUEUE); 673 blk_add_trace_rq(q, rq, BLK_TA_REQUEUE);
612} 674}
613 675
614static void blk_add_trace_rq_complete(struct request_queue *q, struct request *rq) 676static void blk_add_trace_rq_complete(struct request_queue *q,
677 struct request *rq)
615{ 678{
616 blk_add_trace_rq(q, rq, BLK_TA_COMPLETE); 679 blk_add_trace_rq(q, rq, BLK_TA_COMPLETE);
617} 680}
@@ -648,12 +711,14 @@ static void blk_add_trace_bio_complete(struct request_queue *q, struct bio *bio)
648 blk_add_trace_bio(q, bio, BLK_TA_COMPLETE); 711 blk_add_trace_bio(q, bio, BLK_TA_COMPLETE);
649} 712}
650 713
651static void blk_add_trace_bio_backmerge(struct request_queue *q, struct bio *bio) 714static void blk_add_trace_bio_backmerge(struct request_queue *q,
715 struct bio *bio)
652{ 716{
653 blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE); 717 blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE);
654} 718}
655 719
656static void blk_add_trace_bio_frontmerge(struct request_queue *q, struct bio *bio) 720static void blk_add_trace_bio_frontmerge(struct request_queue *q,
721 struct bio *bio)
657{ 722{
658 blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE); 723 blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE);
659} 724}
@@ -663,7 +728,8 @@ static void blk_add_trace_bio_queue(struct request_queue *q, struct bio *bio)
663 blk_add_trace_bio(q, bio, BLK_TA_QUEUE); 728 blk_add_trace_bio(q, bio, BLK_TA_QUEUE);
664} 729}
665 730
666static void blk_add_trace_getrq(struct request_queue *q, struct bio *bio, int rw) 731static void blk_add_trace_getrq(struct request_queue *q,
732 struct bio *bio, int rw)
667{ 733{
668 if (bio) 734 if (bio)
669 blk_add_trace_bio(q, bio, BLK_TA_GETRQ); 735 blk_add_trace_bio(q, bio, BLK_TA_GETRQ);
@@ -676,7 +742,8 @@ static void blk_add_trace_getrq(struct request_queue *q, struct bio *bio, int rw
676} 742}
677 743
678 744
679static void blk_add_trace_sleeprq(struct request_queue *q, struct bio *bio, int rw) 745static void blk_add_trace_sleeprq(struct request_queue *q,
746 struct bio *bio, int rw)
680{ 747{
681 if (bio) 748 if (bio)
682 blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ); 749 blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ);
@@ -684,7 +751,8 @@ static void blk_add_trace_sleeprq(struct request_queue *q, struct bio *bio, int
684 struct blk_trace *bt = q->blk_trace; 751 struct blk_trace *bt = q->blk_trace;
685 752
686 if (bt) 753 if (bt)
687 __blk_add_trace(bt, 0, 0, rw, BLK_TA_SLEEPRQ, 0, 0, NULL); 754 __blk_add_trace(bt, 0, 0, rw, BLK_TA_SLEEPRQ,
755 0, 0, NULL);
688 } 756 }
689} 757}
690 758
@@ -858,3 +926,613 @@ static void blk_unregister_tracepoints(void)
858 926
859 tracepoint_synchronize_unregister(); 927 tracepoint_synchronize_unregister();
860} 928}
929
930/*
931 * struct blk_io_tracer formatting routines
932 */
933
934static void fill_rwbs(char *rwbs, const struct blk_io_trace *t)
935{
936 int i = 0;
937
938 if (t->action & BLK_TC_DISCARD)
939 rwbs[i++] = 'D';
940 else if (t->action & BLK_TC_WRITE)
941 rwbs[i++] = 'W';
942 else if (t->bytes)
943 rwbs[i++] = 'R';
944 else
945 rwbs[i++] = 'N';
946
947 if (t->action & BLK_TC_AHEAD)
948 rwbs[i++] = 'A';
949 if (t->action & BLK_TC_BARRIER)
950 rwbs[i++] = 'B';
951 if (t->action & BLK_TC_SYNC)
952 rwbs[i++] = 'S';
953 if (t->action & BLK_TC_META)
954 rwbs[i++] = 'M';
955
956 rwbs[i] = '\0';
957}
958
959static inline
960const struct blk_io_trace *te_blk_io_trace(const struct trace_entry *ent)
961{
962 return (const struct blk_io_trace *)ent;
963}
964
965static inline const void *pdu_start(const struct trace_entry *ent)
966{
967 return te_blk_io_trace(ent) + 1;
968}
969
970static inline u32 t_sec(const struct trace_entry *ent)
971{
972 return te_blk_io_trace(ent)->bytes >> 9;
973}
974
975static inline unsigned long long t_sector(const struct trace_entry *ent)
976{
977 return te_blk_io_trace(ent)->sector;
978}
979
980static inline __u16 t_error(const struct trace_entry *ent)
981{
982 return te_blk_io_trace(ent)->sector;
983}
984
985static __u64 get_pdu_int(const struct trace_entry *ent)
986{
987 const __u64 *val = pdu_start(ent);
988 return be64_to_cpu(*val);
989}
990
991static void get_pdu_remap(const struct trace_entry *ent,
992 struct blk_io_trace_remap *r)
993{
994 const struct blk_io_trace_remap *__r = pdu_start(ent);
995 __u64 sector = __r->sector;
996
997 r->device = be32_to_cpu(__r->device);
998 r->device_from = be32_to_cpu(__r->device_from);
999 r->sector = be64_to_cpu(sector);
1000}
1001
1002static int blk_log_action_iter(struct trace_iterator *iter, const char *act)
1003{
1004 char rwbs[6];
1005 unsigned long long ts = ns2usecs(iter->ts);
1006 unsigned long usec_rem = do_div(ts, USEC_PER_SEC);
1007 unsigned secs = (unsigned long)ts;
1008 const struct trace_entry *ent = iter->ent;
1009 const struct blk_io_trace *t = (const struct blk_io_trace *)ent;
1010
1011 fill_rwbs(rwbs, t);
1012
1013 return trace_seq_printf(&iter->seq,
1014 "%3d,%-3d %2d %5d.%06lu %5u %2s %3s ",
1015 MAJOR(t->device), MINOR(t->device), iter->cpu,
1016 secs, usec_rem, ent->pid, act, rwbs);
1017}
1018
1019static int blk_log_action_seq(struct trace_seq *s, const struct blk_io_trace *t,
1020 const char *act)
1021{
1022 char rwbs[6];
1023 fill_rwbs(rwbs, t);
1024 return trace_seq_printf(s, "%3d,%-3d %2s %3s ",
1025 MAJOR(t->device), MINOR(t->device), act, rwbs);
1026}
1027
1028static int blk_log_generic(struct trace_seq *s, const struct trace_entry *ent)
1029{
1030 const char *cmd = trace_find_cmdline(ent->pid);
1031
1032 if (t_sec(ent))
1033 return trace_seq_printf(s, "%llu + %u [%s]\n",
1034 t_sector(ent), t_sec(ent), cmd);
1035 return trace_seq_printf(s, "[%s]\n", cmd);
1036}
1037
1038static int blk_log_with_error(struct trace_seq *s,
1039 const struct trace_entry *ent)
1040{
1041 if (t_sec(ent))
1042 return trace_seq_printf(s, "%llu + %u [%d]\n", t_sector(ent),
1043 t_sec(ent), t_error(ent));
1044 return trace_seq_printf(s, "%llu [%d]\n", t_sector(ent), t_error(ent));
1045}
1046
1047static int blk_log_remap(struct trace_seq *s, const struct trace_entry *ent)
1048{
1049 struct blk_io_trace_remap r = { .device = 0, };
1050
1051 get_pdu_remap(ent, &r);
1052 return trace_seq_printf(s, "%llu + %u <- (%d,%d) %llu\n",
1053 t_sector(ent),
1054 t_sec(ent), MAJOR(r.device), MINOR(r.device),
1055 (unsigned long long)r.sector);
1056}
1057
1058static int blk_log_plug(struct trace_seq *s, const struct trace_entry *ent)
1059{
1060 return trace_seq_printf(s, "[%s]\n", trace_find_cmdline(ent->pid));
1061}
1062
1063static int blk_log_unplug(struct trace_seq *s, const struct trace_entry *ent)
1064{
1065 return trace_seq_printf(s, "[%s] %llu\n", trace_find_cmdline(ent->pid),
1066 get_pdu_int(ent));
1067}
1068
1069static int blk_log_split(struct trace_seq *s, const struct trace_entry *ent)
1070{
1071 return trace_seq_printf(s, "%llu / %llu [%s]\n", t_sector(ent),
1072 get_pdu_int(ent), trace_find_cmdline(ent->pid));
1073}
1074
1075/*
1076 * struct tracer operations
1077 */
1078
1079static void blk_tracer_print_header(struct seq_file *m)
1080{
1081 if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CLASSIC))
1082 return;
1083 seq_puts(m, "# DEV CPU TIMESTAMP PID ACT FLG\n"
1084 "# | | | | | |\n");
1085}
1086
1087static void blk_tracer_start(struct trace_array *tr)
1088{
1089 mutex_lock(&blk_probe_mutex);
1090 if (atomic_add_return(1, &blk_probes_ref) == 1)
1091 if (blk_register_tracepoints())
1092 atomic_dec(&blk_probes_ref);
1093 mutex_unlock(&blk_probe_mutex);
1094 trace_flags &= ~TRACE_ITER_CONTEXT_INFO;
1095}
1096
1097static int blk_tracer_init(struct trace_array *tr)
1098{
1099 blk_tr = tr;
1100 blk_tracer_start(tr);
1101 mutex_lock(&blk_probe_mutex);
1102 blk_tracer_enabled++;
1103 mutex_unlock(&blk_probe_mutex);
1104 return 0;
1105}
1106
1107static void blk_tracer_stop(struct trace_array *tr)
1108{
1109 trace_flags |= TRACE_ITER_CONTEXT_INFO;
1110 mutex_lock(&blk_probe_mutex);
1111 if (atomic_dec_and_test(&blk_probes_ref))
1112 blk_unregister_tracepoints();
1113 mutex_unlock(&blk_probe_mutex);
1114}
1115
1116static void blk_tracer_reset(struct trace_array *tr)
1117{
1118 if (!atomic_read(&blk_probes_ref))
1119 return;
1120
1121 mutex_lock(&blk_probe_mutex);
1122 blk_tracer_enabled--;
1123 WARN_ON(blk_tracer_enabled < 0);
1124 mutex_unlock(&blk_probe_mutex);
1125
1126 blk_tracer_stop(tr);
1127}
1128
1129static struct {
1130 const char *act[2];
1131 int (*print)(struct trace_seq *s, const struct trace_entry *ent);
1132} what2act[] __read_mostly = {
1133 [__BLK_TA_QUEUE] = {{ "Q", "queue" }, blk_log_generic },
1134 [__BLK_TA_BACKMERGE] = {{ "M", "backmerge" }, blk_log_generic },
1135 [__BLK_TA_FRONTMERGE] = {{ "F", "frontmerge" }, blk_log_generic },
1136 [__BLK_TA_GETRQ] = {{ "G", "getrq" }, blk_log_generic },
1137 [__BLK_TA_SLEEPRQ] = {{ "S", "sleeprq" }, blk_log_generic },
1138 [__BLK_TA_REQUEUE] = {{ "R", "requeue" }, blk_log_with_error },
1139 [__BLK_TA_ISSUE] = {{ "D", "issue" }, blk_log_generic },
1140 [__BLK_TA_COMPLETE] = {{ "C", "complete" }, blk_log_with_error },
1141 [__BLK_TA_PLUG] = {{ "P", "plug" }, blk_log_plug },
1142 [__BLK_TA_UNPLUG_IO] = {{ "U", "unplug_io" }, blk_log_unplug },
1143 [__BLK_TA_UNPLUG_TIMER] = {{ "UT", "unplug_timer" }, blk_log_unplug },
1144 [__BLK_TA_INSERT] = {{ "I", "insert" }, blk_log_generic },
1145 [__BLK_TA_SPLIT] = {{ "X", "split" }, blk_log_split },
1146 [__BLK_TA_BOUNCE] = {{ "B", "bounce" }, blk_log_generic },
1147 [__BLK_TA_REMAP] = {{ "A", "remap" }, blk_log_remap },
1148};
1149
1150static enum print_line_t blk_trace_event_print(struct trace_iterator *iter,
1151 int flags)
1152{
1153 struct trace_seq *s = &iter->seq;
1154 const struct blk_io_trace *t = (struct blk_io_trace *)iter->ent;
1155 const u16 what = t->action & ((1 << BLK_TC_SHIFT) - 1);
1156 int ret;
1157
1158 if (!trace_print_context(iter))
1159 return TRACE_TYPE_PARTIAL_LINE;
1160
1161 if (unlikely(what == 0 || what > ARRAY_SIZE(what2act)))
1162 ret = trace_seq_printf(s, "Bad pc action %x\n", what);
1163 else {
1164 const bool long_act = !!(trace_flags & TRACE_ITER_VERBOSE);
1165 ret = blk_log_action_seq(s, t, what2act[what].act[long_act]);
1166 if (ret)
1167 ret = what2act[what].print(s, iter->ent);
1168 }
1169
1170 return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
1171}
1172
1173static int blk_trace_synthesize_old_trace(struct trace_iterator *iter)
1174{
1175 struct trace_seq *s = &iter->seq;
1176 struct blk_io_trace *t = (struct blk_io_trace *)iter->ent;
1177 const int offset = offsetof(struct blk_io_trace, sector);
1178 struct blk_io_trace old = {
1179 .magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION,
1180 .time = ns2usecs(iter->ts),
1181 };
1182
1183 if (!trace_seq_putmem(s, &old, offset))
1184 return 0;
1185 return trace_seq_putmem(s, &t->sector,
1186 sizeof(old) - offset + t->pdu_len);
1187}
1188
1189static enum print_line_t
1190blk_trace_event_print_binary(struct trace_iterator *iter, int flags)
1191{
1192 return blk_trace_synthesize_old_trace(iter) ?
1193 TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
1194}
1195
1196static enum print_line_t blk_tracer_print_line(struct trace_iterator *iter)
1197{
1198 const struct blk_io_trace *t;
1199 u16 what;
1200 int ret;
1201
1202 if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CLASSIC))
1203 return TRACE_TYPE_UNHANDLED;
1204
1205 t = (const struct blk_io_trace *)iter->ent;
1206 what = t->action & ((1 << BLK_TC_SHIFT) - 1);
1207
1208 if (unlikely(what == 0 || what > ARRAY_SIZE(what2act)))
1209 ret = trace_seq_printf(&iter->seq, "Bad pc action %x\n", what);
1210 else {
1211 const bool long_act = !!(trace_flags & TRACE_ITER_VERBOSE);
1212 ret = blk_log_action_iter(iter, what2act[what].act[long_act]);
1213 if (ret)
1214 ret = what2act[what].print(&iter->seq, iter->ent);
1215 }
1216
1217 return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
1218}
1219
1220static struct tracer blk_tracer __read_mostly = {
1221 .name = "blk",
1222 .init = blk_tracer_init,
1223 .reset = blk_tracer_reset,
1224 .start = blk_tracer_start,
1225 .stop = blk_tracer_stop,
1226 .print_header = blk_tracer_print_header,
1227 .print_line = blk_tracer_print_line,
1228 .flags = &blk_tracer_flags,
1229};
1230
1231static struct trace_event trace_blk_event = {
1232 .type = TRACE_BLK,
1233 .trace = blk_trace_event_print,
1234 .latency_trace = blk_trace_event_print,
1235 .binary = blk_trace_event_print_binary,
1236};
1237
1238static int __init init_blk_tracer(void)
1239{
1240 if (!register_ftrace_event(&trace_blk_event)) {
1241 pr_warning("Warning: could not register block events\n");
1242 return 1;
1243 }
1244
1245 if (register_tracer(&blk_tracer) != 0) {
1246 pr_warning("Warning: could not register the block tracer\n");
1247 unregister_ftrace_event(&trace_blk_event);
1248 return 1;
1249 }
1250
1251 return 0;
1252}
1253
1254device_initcall(init_blk_tracer);
1255
1256static int blk_trace_remove_queue(struct request_queue *q)
1257{
1258 struct blk_trace *bt;
1259
1260 bt = xchg(&q->blk_trace, NULL);
1261 if (bt == NULL)
1262 return -EINVAL;
1263
1264 kfree(bt);
1265 return 0;
1266}
1267
1268/*
1269 * Setup everything required to start tracing
1270 */
1271static int blk_trace_setup_queue(struct request_queue *q, dev_t dev)
1272{
1273 struct blk_trace *old_bt, *bt = NULL;
1274 int ret;
1275
1276 ret = -ENOMEM;
1277 bt = kzalloc(sizeof(*bt), GFP_KERNEL);
1278 if (!bt)
1279 goto err;
1280
1281 bt->dev = dev;
1282 bt->act_mask = (u16)-1;
1283 bt->end_lba = -1ULL;
1284 bt->trace_state = Blktrace_running;
1285
1286 old_bt = xchg(&q->blk_trace, bt);
1287 if (old_bt != NULL) {
1288 (void)xchg(&q->blk_trace, old_bt);
1289 kfree(bt);
1290 ret = -EBUSY;
1291 }
1292 return 0;
1293err:
1294 return ret;
1295}
1296
1297/*
1298 * sysfs interface to enable and configure tracing
1299 */
1300
1301static ssize_t sysfs_blk_trace_enable_show(struct device *dev,
1302 struct device_attribute *attr,
1303 char *buf)
1304{
1305 struct hd_struct *p = dev_to_part(dev);
1306 struct block_device *bdev;
1307 ssize_t ret = -ENXIO;
1308
1309 lock_kernel();
1310 bdev = bdget(part_devt(p));
1311 if (bdev != NULL) {
1312 struct request_queue *q = bdev_get_queue(bdev);
1313
1314 if (q != NULL) {
1315 mutex_lock(&bdev->bd_mutex);
1316 ret = sprintf(buf, "%u\n", !!q->blk_trace);
1317 mutex_unlock(&bdev->bd_mutex);
1318 }
1319
1320 bdput(bdev);
1321 }
1322
1323 unlock_kernel();
1324 return ret;
1325}
1326
1327static ssize_t sysfs_blk_trace_enable_store(struct device *dev,
1328 struct device_attribute *attr,
1329 const char *buf, size_t count)
1330{
1331 struct block_device *bdev;
1332 struct request_queue *q;
1333 struct hd_struct *p;
1334 int value;
1335 ssize_t ret = -ENXIO;
1336
1337 if (count == 0 || sscanf(buf, "%d", &value) != 1)
1338 goto out;
1339
1340 lock_kernel();
1341 p = dev_to_part(dev);
1342 bdev = bdget(part_devt(p));
1343 if (bdev == NULL)
1344 goto out_unlock_kernel;
1345
1346 q = bdev_get_queue(bdev);
1347 if (q == NULL)
1348 goto out_bdput;
1349
1350 mutex_lock(&bdev->bd_mutex);
1351 if (value)
1352 ret = blk_trace_setup_queue(q, bdev->bd_dev);
1353 else
1354 ret = blk_trace_remove_queue(q);
1355 mutex_unlock(&bdev->bd_mutex);
1356
1357 if (ret == 0)
1358 ret = count;
1359out_bdput:
1360 bdput(bdev);
1361out_unlock_kernel:
1362 unlock_kernel();
1363out:
1364 return ret;
1365}
1366
1367static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
1368 struct device_attribute *attr,
1369 char *buf);
1370static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
1371 struct device_attribute *attr,
1372 const char *buf, size_t count);
1373#define BLK_TRACE_DEVICE_ATTR(_name) \
1374 DEVICE_ATTR(_name, S_IRUGO | S_IWUSR, \
1375 sysfs_blk_trace_attr_show, \
1376 sysfs_blk_trace_attr_store)
1377
1378static DEVICE_ATTR(enable, S_IRUGO | S_IWUSR,
1379 sysfs_blk_trace_enable_show, sysfs_blk_trace_enable_store);
1380static BLK_TRACE_DEVICE_ATTR(act_mask);
1381static BLK_TRACE_DEVICE_ATTR(pid);
1382static BLK_TRACE_DEVICE_ATTR(start_lba);
1383static BLK_TRACE_DEVICE_ATTR(end_lba);
1384
1385static struct attribute *blk_trace_attrs[] = {
1386 &dev_attr_enable.attr,
1387 &dev_attr_act_mask.attr,
1388 &dev_attr_pid.attr,
1389 &dev_attr_start_lba.attr,
1390 &dev_attr_end_lba.attr,
1391 NULL
1392};
1393
1394struct attribute_group blk_trace_attr_group = {
1395 .name = "trace",
1396 .attrs = blk_trace_attrs,
1397};
1398
1399static int blk_str2act_mask(const char *str)
1400{
1401 int mask = 0;
1402 char *copy = kstrdup(str, GFP_KERNEL), *s;
1403
1404 if (copy == NULL)
1405 return -ENOMEM;
1406
1407 s = strstrip(copy);
1408
1409 while (1) {
1410 char *sep = strchr(s, ',');
1411
1412 if (sep != NULL)
1413 *sep = '\0';
1414
1415 if (strcasecmp(s, "barrier") == 0)
1416 mask |= BLK_TC_BARRIER;
1417 else if (strcasecmp(s, "complete") == 0)
1418 mask |= BLK_TC_COMPLETE;
1419 else if (strcasecmp(s, "fs") == 0)
1420 mask |= BLK_TC_FS;
1421 else if (strcasecmp(s, "issue") == 0)
1422 mask |= BLK_TC_ISSUE;
1423 else if (strcasecmp(s, "pc") == 0)
1424 mask |= BLK_TC_PC;
1425 else if (strcasecmp(s, "queue") == 0)
1426 mask |= BLK_TC_QUEUE;
1427 else if (strcasecmp(s, "read") == 0)
1428 mask |= BLK_TC_READ;
1429 else if (strcasecmp(s, "requeue") == 0)
1430 mask |= BLK_TC_REQUEUE;
1431 else if (strcasecmp(s, "sync") == 0)
1432 mask |= BLK_TC_SYNC;
1433 else if (strcasecmp(s, "write") == 0)
1434 mask |= BLK_TC_WRITE;
1435
1436 if (sep == NULL)
1437 break;
1438
1439 s = sep + 1;
1440 }
1441 kfree(copy);
1442
1443 return mask;
1444}
1445
1446static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
1447 struct device_attribute *attr,
1448 char *buf)
1449{
1450 struct hd_struct *p = dev_to_part(dev);
1451 struct request_queue *q;
1452 struct block_device *bdev;
1453 ssize_t ret = -ENXIO;
1454
1455 lock_kernel();
1456 bdev = bdget(part_devt(p));
1457 if (bdev == NULL)
1458 goto out_unlock_kernel;
1459
1460 q = bdev_get_queue(bdev);
1461 if (q == NULL)
1462 goto out_bdput;
1463 mutex_lock(&bdev->bd_mutex);
1464 if (q->blk_trace == NULL)
1465 ret = sprintf(buf, "disabled\n");
1466 else if (attr == &dev_attr_act_mask)
1467 ret = sprintf(buf, "%#x\n", q->blk_trace->act_mask);
1468 else if (attr == &dev_attr_pid)
1469 ret = sprintf(buf, "%u\n", q->blk_trace->pid);
1470 else if (attr == &dev_attr_start_lba)
1471 ret = sprintf(buf, "%llu\n", q->blk_trace->start_lba);
1472 else if (attr == &dev_attr_end_lba)
1473 ret = sprintf(buf, "%llu\n", q->blk_trace->end_lba);
1474 mutex_unlock(&bdev->bd_mutex);
1475out_bdput:
1476 bdput(bdev);
1477out_unlock_kernel:
1478 unlock_kernel();
1479 return ret;
1480}
1481
1482static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
1483 struct device_attribute *attr,
1484 const char *buf, size_t count)
1485{
1486 struct block_device *bdev;
1487 struct request_queue *q;
1488 struct hd_struct *p;
1489 u64 value;
1490 ssize_t ret = -ENXIO;
1491
1492 if (count == 0)
1493 goto out;
1494
1495 if (attr == &dev_attr_act_mask) {
1496 if (sscanf(buf, "%llx", &value) != 1) {
1497 /* Assume it is a list of trace category names */
1498 value = blk_str2act_mask(buf);
1499 if (value < 0)
1500 goto out;
1501 }
1502 } else if (sscanf(buf, "%llu", &value) != 1)
1503 goto out;
1504
1505 lock_kernel();
1506 p = dev_to_part(dev);
1507 bdev = bdget(part_devt(p));
1508 if (bdev == NULL)
1509 goto out_unlock_kernel;
1510
1511 q = bdev_get_queue(bdev);
1512 if (q == NULL)
1513 goto out_bdput;
1514
1515 mutex_lock(&bdev->bd_mutex);
1516 ret = 0;
1517 if (q->blk_trace == NULL)
1518 ret = blk_trace_setup_queue(q, bdev->bd_dev);
1519
1520 if (ret == 0) {
1521 if (attr == &dev_attr_act_mask)
1522 q->blk_trace->act_mask = value;
1523 else if (attr == &dev_attr_pid)
1524 q->blk_trace->pid = value;
1525 else if (attr == &dev_attr_start_lba)
1526 q->blk_trace->start_lba = value;
1527 else if (attr == &dev_attr_end_lba)
1528 q->blk_trace->end_lba = value;
1529 ret = count;
1530 }
1531 mutex_unlock(&bdev->bd_mutex);
1532out_bdput:
1533 bdput(bdev);
1534out_unlock_kernel:
1535 unlock_kernel();
1536out:
1537 return ret;
1538}
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index fdf913dfc7e8..aee95aea57e4 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -27,6 +27,7 @@
27#include <linux/sysctl.h> 27#include <linux/sysctl.h>
28#include <linux/ctype.h> 28#include <linux/ctype.h>
29#include <linux/list.h> 29#include <linux/list.h>
30#include <linux/hash.h>
30 31
31#include <asm/ftrace.h> 32#include <asm/ftrace.h>
32 33
@@ -44,14 +45,14 @@
44 ftrace_kill(); \ 45 ftrace_kill(); \
45 } while (0) 46 } while (0)
46 47
48/* hash bits for specific function selection */
49#define FTRACE_HASH_BITS 7
50#define FTRACE_FUNC_HASHSIZE (1 << FTRACE_HASH_BITS)
51
47/* ftrace_enabled is a method to turn ftrace on or off */ 52/* ftrace_enabled is a method to turn ftrace on or off */
48int ftrace_enabled __read_mostly; 53int ftrace_enabled __read_mostly;
49static int last_ftrace_enabled; 54static int last_ftrace_enabled;
50 55
51/* set when tracing only a pid */
52struct pid *ftrace_pid_trace;
53static struct pid * const ftrace_swapper_pid = &init_struct_pid;
54
55/* Quick disabling of function tracer. */ 56/* Quick disabling of function tracer. */
56int function_trace_stop; 57int function_trace_stop;
57 58
@@ -61,9 +62,7 @@ int function_trace_stop;
61 */ 62 */
62static int ftrace_disabled __read_mostly; 63static int ftrace_disabled __read_mostly;
63 64
64static DEFINE_SPINLOCK(ftrace_lock); 65static DEFINE_MUTEX(ftrace_lock);
65static DEFINE_MUTEX(ftrace_sysctl_lock);
66static DEFINE_MUTEX(ftrace_start_lock);
67 66
68static struct ftrace_ops ftrace_list_end __read_mostly = 67static struct ftrace_ops ftrace_list_end __read_mostly =
69{ 68{
@@ -134,9 +133,6 @@ static void ftrace_test_stop_func(unsigned long ip, unsigned long parent_ip)
134 133
135static int __register_ftrace_function(struct ftrace_ops *ops) 134static int __register_ftrace_function(struct ftrace_ops *ops)
136{ 135{
137 /* should not be called from interrupt context */
138 spin_lock(&ftrace_lock);
139
140 ops->next = ftrace_list; 136 ops->next = ftrace_list;
141 /* 137 /*
142 * We are entering ops into the ftrace_list but another 138 * We are entering ops into the ftrace_list but another
@@ -172,18 +168,12 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
172#endif 168#endif
173 } 169 }
174 170
175 spin_unlock(&ftrace_lock);
176
177 return 0; 171 return 0;
178} 172}
179 173
180static int __unregister_ftrace_function(struct ftrace_ops *ops) 174static int __unregister_ftrace_function(struct ftrace_ops *ops)
181{ 175{
182 struct ftrace_ops **p; 176 struct ftrace_ops **p;
183 int ret = 0;
184
185 /* should not be called from interrupt context */
186 spin_lock(&ftrace_lock);
187 177
188 /* 178 /*
189 * If we are removing the last function, then simply point 179 * If we are removing the last function, then simply point
@@ -192,17 +182,15 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
192 if (ftrace_list == ops && ops->next == &ftrace_list_end) { 182 if (ftrace_list == ops && ops->next == &ftrace_list_end) {
193 ftrace_trace_function = ftrace_stub; 183 ftrace_trace_function = ftrace_stub;
194 ftrace_list = &ftrace_list_end; 184 ftrace_list = &ftrace_list_end;
195 goto out; 185 return 0;
196 } 186 }
197 187
198 for (p = &ftrace_list; *p != &ftrace_list_end; p = &(*p)->next) 188 for (p = &ftrace_list; *p != &ftrace_list_end; p = &(*p)->next)
199 if (*p == ops) 189 if (*p == ops)
200 break; 190 break;
201 191
202 if (*p != ops) { 192 if (*p != ops)
203 ret = -1; 193 return -1;
204 goto out;
205 }
206 194
207 *p = (*p)->next; 195 *p = (*p)->next;
208 196
@@ -223,18 +211,14 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
223 } 211 }
224 } 212 }
225 213
226 out: 214 return 0;
227 spin_unlock(&ftrace_lock);
228
229 return ret;
230} 215}
231 216
232static void ftrace_update_pid_func(void) 217static void ftrace_update_pid_func(void)
233{ 218{
234 ftrace_func_t func; 219 ftrace_func_t func;
235 220
236 /* should not be called from interrupt context */ 221 mutex_lock(&ftrace_lock);
237 spin_lock(&ftrace_lock);
238 222
239 if (ftrace_trace_function == ftrace_stub) 223 if (ftrace_trace_function == ftrace_stub)
240 goto out; 224 goto out;
@@ -256,21 +240,30 @@ static void ftrace_update_pid_func(void)
256#endif 240#endif
257 241
258 out: 242 out:
259 spin_unlock(&ftrace_lock); 243 mutex_unlock(&ftrace_lock);
260} 244}
261 245
246/* set when tracing only a pid */
247struct pid *ftrace_pid_trace;
248static struct pid * const ftrace_swapper_pid = &init_struct_pid;
249
262#ifdef CONFIG_DYNAMIC_FTRACE 250#ifdef CONFIG_DYNAMIC_FTRACE
251
263#ifndef CONFIG_FTRACE_MCOUNT_RECORD 252#ifndef CONFIG_FTRACE_MCOUNT_RECORD
264# error Dynamic ftrace depends on MCOUNT_RECORD 253# error Dynamic ftrace depends on MCOUNT_RECORD
265#endif 254#endif
266 255
267/* 256static struct hlist_head ftrace_func_hash[FTRACE_FUNC_HASHSIZE] __read_mostly;
268 * Since MCOUNT_ADDR may point to mcount itself, we do not want 257
269 * to get it confused by reading a reference in the code as we 258struct ftrace_func_hook {
270 * are parsing on objcopy output of text. Use a variable for 259 struct hlist_node node;
271 * it instead. 260 struct ftrace_hook_ops *ops;
272 */ 261 unsigned long flags;
273static unsigned long mcount_addr = MCOUNT_ADDR; 262 unsigned long ip;
263 void *data;
264 struct rcu_head rcu;
265};
266
274 267
275enum { 268enum {
276 FTRACE_ENABLE_CALLS = (1 << 0), 269 FTRACE_ENABLE_CALLS = (1 << 0),
@@ -290,7 +283,7 @@ static DEFINE_MUTEX(ftrace_regex_lock);
290 283
291struct ftrace_page { 284struct ftrace_page {
292 struct ftrace_page *next; 285 struct ftrace_page *next;
293 unsigned long index; 286 int index;
294 struct dyn_ftrace records[]; 287 struct dyn_ftrace records[];
295}; 288};
296 289
@@ -305,6 +298,19 @@ static struct ftrace_page *ftrace_pages;
305 298
306static struct dyn_ftrace *ftrace_free_records; 299static struct dyn_ftrace *ftrace_free_records;
307 300
301/*
302 * This is a double for. Do not use 'break' to break out of the loop,
303 * you must use a goto.
304 */
305#define do_for_each_ftrace_rec(pg, rec) \
306 for (pg = ftrace_pages_start; pg; pg = pg->next) { \
307 int _____i; \
308 for (_____i = 0; _____i < pg->index; _____i++) { \
309 rec = &pg->records[_____i];
310
311#define while_for_each_ftrace_rec() \
312 } \
313 }
308 314
309#ifdef CONFIG_KPROBES 315#ifdef CONFIG_KPROBES
310 316
@@ -349,23 +355,16 @@ void ftrace_release(void *start, unsigned long size)
349 struct ftrace_page *pg; 355 struct ftrace_page *pg;
350 unsigned long s = (unsigned long)start; 356 unsigned long s = (unsigned long)start;
351 unsigned long e = s + size; 357 unsigned long e = s + size;
352 int i;
353 358
354 if (ftrace_disabled || !start) 359 if (ftrace_disabled || !start)
355 return; 360 return;
356 361
357 /* should not be called from interrupt context */ 362 mutex_lock(&ftrace_lock);
358 spin_lock(&ftrace_lock); 363 do_for_each_ftrace_rec(pg, rec) {
359 364 if ((rec->ip >= s) && (rec->ip < e))
360 for (pg = ftrace_pages_start; pg; pg = pg->next) { 365 ftrace_free_rec(rec);
361 for (i = 0; i < pg->index; i++) { 366 } while_for_each_ftrace_rec();
362 rec = &pg->records[i]; 367 mutex_unlock(&ftrace_lock);
363
364 if ((rec->ip >= s) && (rec->ip < e))
365 ftrace_free_rec(rec);
366 }
367 }
368 spin_unlock(&ftrace_lock);
369} 368}
370 369
371static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip) 370static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
@@ -464,7 +463,7 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
464 unsigned long ip, fl; 463 unsigned long ip, fl;
465 unsigned long ftrace_addr; 464 unsigned long ftrace_addr;
466 465
467 ftrace_addr = (unsigned long)ftrace_caller; 466 ftrace_addr = (unsigned long)FTRACE_ADDR;
468 467
469 ip = rec->ip; 468 ip = rec->ip;
470 469
@@ -473,7 +472,7 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
473 * it is not enabled then do nothing. 472 * it is not enabled then do nothing.
474 * 473 *
475 * If this record is not to be traced and 474 * If this record is not to be traced and
476 * it is enabled then disabled it. 475 * it is enabled then disable it.
477 * 476 *
478 */ 477 */
479 if (rec->flags & FTRACE_FL_NOTRACE) { 478 if (rec->flags & FTRACE_FL_NOTRACE) {
@@ -493,7 +492,7 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
493 if (fl == (FTRACE_FL_FILTER | FTRACE_FL_ENABLED)) 492 if (fl == (FTRACE_FL_FILTER | FTRACE_FL_ENABLED))
494 return 0; 493 return 0;
495 494
496 /* Record is not filtered and is not enabled do nothing */ 495 /* Record is not filtered or enabled, do nothing */
497 if (!fl) 496 if (!fl)
498 return 0; 497 return 0;
499 498
@@ -515,7 +514,7 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
515 514
516 } else { 515 } else {
517 516
518 /* if record is not enabled do nothing */ 517 /* if record is not enabled, do nothing */
519 if (!(rec->flags & FTRACE_FL_ENABLED)) 518 if (!(rec->flags & FTRACE_FL_ENABLED))
520 return 0; 519 return 0;
521 520
@@ -531,41 +530,37 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
531 530
532static void ftrace_replace_code(int enable) 531static void ftrace_replace_code(int enable)
533{ 532{
534 int i, failed; 533 int failed;
535 struct dyn_ftrace *rec; 534 struct dyn_ftrace *rec;
536 struct ftrace_page *pg; 535 struct ftrace_page *pg;
537 536
538 for (pg = ftrace_pages_start; pg; pg = pg->next) { 537 do_for_each_ftrace_rec(pg, rec) {
539 for (i = 0; i < pg->index; i++) { 538 /*
540 rec = &pg->records[i]; 539 * Skip over free records and records that have
541 540 * failed.
542 /* 541 */
543 * Skip over free records and records that have 542 if (rec->flags & FTRACE_FL_FREE ||
544 * failed. 543 rec->flags & FTRACE_FL_FAILED)
545 */ 544 continue;
546 if (rec->flags & FTRACE_FL_FREE ||
547 rec->flags & FTRACE_FL_FAILED)
548 continue;
549 545
550 /* ignore updates to this record's mcount site */ 546 /* ignore updates to this record's mcount site */
551 if (get_kprobe((void *)rec->ip)) { 547 if (get_kprobe((void *)rec->ip)) {
552 freeze_record(rec); 548 freeze_record(rec);
553 continue; 549 continue;
554 } else { 550 } else {
555 unfreeze_record(rec); 551 unfreeze_record(rec);
556 } 552 }
557 553
558 failed = __ftrace_replace_code(rec, enable); 554 failed = __ftrace_replace_code(rec, enable);
559 if (failed && (rec->flags & FTRACE_FL_CONVERTED)) { 555 if (failed && (rec->flags & FTRACE_FL_CONVERTED)) {
560 rec->flags |= FTRACE_FL_FAILED; 556 rec->flags |= FTRACE_FL_FAILED;
561 if ((system_state == SYSTEM_BOOTING) || 557 if ((system_state == SYSTEM_BOOTING) ||
562 !core_kernel_text(rec->ip)) { 558 !core_kernel_text(rec->ip)) {
563 ftrace_free_rec(rec); 559 ftrace_free_rec(rec);
564 } else 560 } else
565 ftrace_bug(failed, rec->ip); 561 ftrace_bug(failed, rec->ip);
566 }
567 } 562 }
568 } 563 } while_for_each_ftrace_rec();
569} 564}
570 565
571static int 566static int
@@ -576,7 +571,7 @@ ftrace_code_disable(struct module *mod, struct dyn_ftrace *rec)
576 571
577 ip = rec->ip; 572 ip = rec->ip;
578 573
579 ret = ftrace_make_nop(mod, rec, mcount_addr); 574 ret = ftrace_make_nop(mod, rec, MCOUNT_ADDR);
580 if (ret) { 575 if (ret) {
581 ftrace_bug(ret, ip); 576 ftrace_bug(ret, ip);
582 rec->flags |= FTRACE_FL_FAILED; 577 rec->flags |= FTRACE_FL_FAILED;
@@ -631,13 +626,10 @@ static void ftrace_startup(int command)
631 if (unlikely(ftrace_disabled)) 626 if (unlikely(ftrace_disabled))
632 return; 627 return;
633 628
634 mutex_lock(&ftrace_start_lock);
635 ftrace_start_up++; 629 ftrace_start_up++;
636 command |= FTRACE_ENABLE_CALLS; 630 command |= FTRACE_ENABLE_CALLS;
637 631
638 ftrace_startup_enable(command); 632 ftrace_startup_enable(command);
639
640 mutex_unlock(&ftrace_start_lock);
641} 633}
642 634
643static void ftrace_shutdown(int command) 635static void ftrace_shutdown(int command)
@@ -645,7 +637,6 @@ static void ftrace_shutdown(int command)
645 if (unlikely(ftrace_disabled)) 637 if (unlikely(ftrace_disabled))
646 return; 638 return;
647 639
648 mutex_lock(&ftrace_start_lock);
649 ftrace_start_up--; 640 ftrace_start_up--;
650 if (!ftrace_start_up) 641 if (!ftrace_start_up)
651 command |= FTRACE_DISABLE_CALLS; 642 command |= FTRACE_DISABLE_CALLS;
@@ -656,11 +647,9 @@ static void ftrace_shutdown(int command)
656 } 647 }
657 648
658 if (!command || !ftrace_enabled) 649 if (!command || !ftrace_enabled)
659 goto out; 650 return;
660 651
661 ftrace_run_update_code(command); 652 ftrace_run_update_code(command);
662 out:
663 mutex_unlock(&ftrace_start_lock);
664} 653}
665 654
666static void ftrace_startup_sysctl(void) 655static void ftrace_startup_sysctl(void)
@@ -670,7 +659,6 @@ static void ftrace_startup_sysctl(void)
670 if (unlikely(ftrace_disabled)) 659 if (unlikely(ftrace_disabled))
671 return; 660 return;
672 661
673 mutex_lock(&ftrace_start_lock);
674 /* Force update next time */ 662 /* Force update next time */
675 saved_ftrace_func = NULL; 663 saved_ftrace_func = NULL;
676 /* ftrace_start_up is true if we want ftrace running */ 664 /* ftrace_start_up is true if we want ftrace running */
@@ -678,7 +666,6 @@ static void ftrace_startup_sysctl(void)
678 command |= FTRACE_ENABLE_CALLS; 666 command |= FTRACE_ENABLE_CALLS;
679 667
680 ftrace_run_update_code(command); 668 ftrace_run_update_code(command);
681 mutex_unlock(&ftrace_start_lock);
682} 669}
683 670
684static void ftrace_shutdown_sysctl(void) 671static void ftrace_shutdown_sysctl(void)
@@ -688,13 +675,11 @@ static void ftrace_shutdown_sysctl(void)
688 if (unlikely(ftrace_disabled)) 675 if (unlikely(ftrace_disabled))
689 return; 676 return;
690 677
691 mutex_lock(&ftrace_start_lock);
692 /* ftrace_start_up is true if ftrace is running */ 678 /* ftrace_start_up is true if ftrace is running */
693 if (ftrace_start_up) 679 if (ftrace_start_up)
694 command |= FTRACE_DISABLE_CALLS; 680 command |= FTRACE_DISABLE_CALLS;
695 681
696 ftrace_run_update_code(command); 682 ftrace_run_update_code(command);
697 mutex_unlock(&ftrace_start_lock);
698} 683}
699 684
700static cycle_t ftrace_update_time; 685static cycle_t ftrace_update_time;
@@ -781,13 +766,16 @@ enum {
781 FTRACE_ITER_CONT = (1 << 1), 766 FTRACE_ITER_CONT = (1 << 1),
782 FTRACE_ITER_NOTRACE = (1 << 2), 767 FTRACE_ITER_NOTRACE = (1 << 2),
783 FTRACE_ITER_FAILURES = (1 << 3), 768 FTRACE_ITER_FAILURES = (1 << 3),
769 FTRACE_ITER_PRINTALL = (1 << 4),
770 FTRACE_ITER_HASH = (1 << 5),
784}; 771};
785 772
786#define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */ 773#define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */
787 774
788struct ftrace_iterator { 775struct ftrace_iterator {
789 struct ftrace_page *pg; 776 struct ftrace_page *pg;
790 unsigned idx; 777 int hidx;
778 int idx;
791 unsigned flags; 779 unsigned flags;
792 unsigned char buffer[FTRACE_BUFF_MAX+1]; 780 unsigned char buffer[FTRACE_BUFF_MAX+1];
793 unsigned buffer_idx; 781 unsigned buffer_idx;
@@ -795,15 +783,89 @@ struct ftrace_iterator {
795}; 783};
796 784
797static void * 785static void *
786t_hash_next(struct seq_file *m, void *v, loff_t *pos)
787{
788 struct ftrace_iterator *iter = m->private;
789 struct hlist_node *hnd = v;
790 struct hlist_head *hhd;
791
792 WARN_ON(!(iter->flags & FTRACE_ITER_HASH));
793
794 (*pos)++;
795
796 retry:
797 if (iter->hidx >= FTRACE_FUNC_HASHSIZE)
798 return NULL;
799
800 hhd = &ftrace_func_hash[iter->hidx];
801
802 if (hlist_empty(hhd)) {
803 iter->hidx++;
804 hnd = NULL;
805 goto retry;
806 }
807
808 if (!hnd)
809 hnd = hhd->first;
810 else {
811 hnd = hnd->next;
812 if (!hnd) {
813 iter->hidx++;
814 goto retry;
815 }
816 }
817
818 return hnd;
819}
820
821static void *t_hash_start(struct seq_file *m, loff_t *pos)
822{
823 struct ftrace_iterator *iter = m->private;
824 void *p = NULL;
825
826 iter->flags |= FTRACE_ITER_HASH;
827
828 return t_hash_next(m, p, pos);
829}
830
831static int t_hash_show(struct seq_file *m, void *v)
832{
833 struct ftrace_func_hook *rec;
834 struct hlist_node *hnd = v;
835 char str[KSYM_SYMBOL_LEN];
836
837 rec = hlist_entry(hnd, struct ftrace_func_hook, node);
838
839 if (rec->ops->print)
840 return rec->ops->print(m, rec->ip, rec->ops, rec->data);
841
842 kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
843 seq_printf(m, "%s:", str);
844
845 kallsyms_lookup((unsigned long)rec->ops->func, NULL, NULL, NULL, str);
846 seq_printf(m, "%s", str);
847
848 if (rec->data)
849 seq_printf(m, ":%p", rec->data);
850 seq_putc(m, '\n');
851
852 return 0;
853}
854
855static void *
798t_next(struct seq_file *m, void *v, loff_t *pos) 856t_next(struct seq_file *m, void *v, loff_t *pos)
799{ 857{
800 struct ftrace_iterator *iter = m->private; 858 struct ftrace_iterator *iter = m->private;
801 struct dyn_ftrace *rec = NULL; 859 struct dyn_ftrace *rec = NULL;
802 860
861 if (iter->flags & FTRACE_ITER_HASH)
862 return t_hash_next(m, v, pos);
863
803 (*pos)++; 864 (*pos)++;
804 865
805 /* should not be called from interrupt context */ 866 if (iter->flags & FTRACE_ITER_PRINTALL)
806 spin_lock(&ftrace_lock); 867 return NULL;
868
807 retry: 869 retry:
808 if (iter->idx >= iter->pg->index) { 870 if (iter->idx >= iter->pg->index) {
809 if (iter->pg->next) { 871 if (iter->pg->next) {
@@ -832,7 +894,6 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
832 goto retry; 894 goto retry;
833 } 895 }
834 } 896 }
835 spin_unlock(&ftrace_lock);
836 897
837 return rec; 898 return rec;
838} 899}
@@ -842,6 +903,23 @@ static void *t_start(struct seq_file *m, loff_t *pos)
842 struct ftrace_iterator *iter = m->private; 903 struct ftrace_iterator *iter = m->private;
843 void *p = NULL; 904 void *p = NULL;
844 905
906 mutex_lock(&ftrace_lock);
907 /*
908 * For set_ftrace_filter reading, if we have the filter
909 * off, we can short cut and just print out that all
910 * functions are enabled.
911 */
912 if (iter->flags & FTRACE_ITER_FILTER && !ftrace_filtered) {
913 if (*pos > 0)
914 return t_hash_start(m, pos);
915 iter->flags |= FTRACE_ITER_PRINTALL;
916 (*pos)++;
917 return iter;
918 }
919
920 if (iter->flags & FTRACE_ITER_HASH)
921 return t_hash_start(m, pos);
922
845 if (*pos > 0) { 923 if (*pos > 0) {
846 if (iter->idx < 0) 924 if (iter->idx < 0)
847 return p; 925 return p;
@@ -851,18 +929,31 @@ static void *t_start(struct seq_file *m, loff_t *pos)
851 929
852 p = t_next(m, p, pos); 930 p = t_next(m, p, pos);
853 931
932 if (!p)
933 return t_hash_start(m, pos);
934
854 return p; 935 return p;
855} 936}
856 937
857static void t_stop(struct seq_file *m, void *p) 938static void t_stop(struct seq_file *m, void *p)
858{ 939{
940 mutex_unlock(&ftrace_lock);
859} 941}
860 942
861static int t_show(struct seq_file *m, void *v) 943static int t_show(struct seq_file *m, void *v)
862{ 944{
945 struct ftrace_iterator *iter = m->private;
863 struct dyn_ftrace *rec = v; 946 struct dyn_ftrace *rec = v;
864 char str[KSYM_SYMBOL_LEN]; 947 char str[KSYM_SYMBOL_LEN];
865 948
949 if (iter->flags & FTRACE_ITER_HASH)
950 return t_hash_show(m, v);
951
952 if (iter->flags & FTRACE_ITER_PRINTALL) {
953 seq_printf(m, "#### all functions enabled ####\n");
954 return 0;
955 }
956
866 if (!rec) 957 if (!rec)
867 return 0; 958 return 0;
868 959
@@ -941,23 +1032,16 @@ static void ftrace_filter_reset(int enable)
941 struct ftrace_page *pg; 1032 struct ftrace_page *pg;
942 struct dyn_ftrace *rec; 1033 struct dyn_ftrace *rec;
943 unsigned long type = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE; 1034 unsigned long type = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
944 unsigned i;
945 1035
946 /* should not be called from interrupt context */ 1036 mutex_lock(&ftrace_lock);
947 spin_lock(&ftrace_lock);
948 if (enable) 1037 if (enable)
949 ftrace_filtered = 0; 1038 ftrace_filtered = 0;
950 pg = ftrace_pages_start; 1039 do_for_each_ftrace_rec(pg, rec) {
951 while (pg) { 1040 if (rec->flags & FTRACE_FL_FAILED)
952 for (i = 0; i < pg->index; i++) { 1041 continue;
953 rec = &pg->records[i]; 1042 rec->flags &= ~type;
954 if (rec->flags & FTRACE_FL_FAILED) 1043 } while_for_each_ftrace_rec();
955 continue; 1044 mutex_unlock(&ftrace_lock);
956 rec->flags &= ~type;
957 }
958 pg = pg->next;
959 }
960 spin_unlock(&ftrace_lock);
961} 1045}
962 1046
963static int 1047static int
@@ -1038,86 +1122,533 @@ enum {
1038 MATCH_END_ONLY, 1122 MATCH_END_ONLY,
1039}; 1123};
1040 1124
1041static void 1125/*
1042ftrace_match(unsigned char *buff, int len, int enable) 1126 * (static function - no need for kernel doc)
1127 *
1128 * Pass in a buffer containing a glob and this function will
1129 * set search to point to the search part of the buffer and
1130 * return the type of search it is (see enum above).
1131 * This does modify buff.
1132 *
1133 * Returns enum type.
1134 * search returns the pointer to use for comparison.
1135 * not returns 1 if buff started with a '!'
1136 * 0 otherwise.
1137 */
1138static int
1139ftrace_setup_glob(char *buff, int len, char **search, int *not)
1043{ 1140{
1044 char str[KSYM_SYMBOL_LEN];
1045 char *search = NULL;
1046 struct ftrace_page *pg;
1047 struct dyn_ftrace *rec;
1048 int type = MATCH_FULL; 1141 int type = MATCH_FULL;
1049 unsigned long flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE; 1142 int i;
1050 unsigned i, match = 0, search_len = 0;
1051 int not = 0;
1052 1143
1053 if (buff[0] == '!') { 1144 if (buff[0] == '!') {
1054 not = 1; 1145 *not = 1;
1055 buff++; 1146 buff++;
1056 len--; 1147 len--;
1057 } 1148 } else
1149 *not = 0;
1150
1151 *search = buff;
1058 1152
1059 for (i = 0; i < len; i++) { 1153 for (i = 0; i < len; i++) {
1060 if (buff[i] == '*') { 1154 if (buff[i] == '*') {
1061 if (!i) { 1155 if (!i) {
1062 search = buff + i + 1; 1156 *search = buff + 1;
1063 type = MATCH_END_ONLY; 1157 type = MATCH_END_ONLY;
1064 search_len = len - (i + 1);
1065 } else { 1158 } else {
1066 if (type == MATCH_END_ONLY) { 1159 if (type == MATCH_END_ONLY)
1067 type = MATCH_MIDDLE_ONLY; 1160 type = MATCH_MIDDLE_ONLY;
1068 } else { 1161 else
1069 match = i;
1070 type = MATCH_FRONT_ONLY; 1162 type = MATCH_FRONT_ONLY;
1071 }
1072 buff[i] = 0; 1163 buff[i] = 0;
1073 break; 1164 break;
1074 } 1165 }
1075 } 1166 }
1076 } 1167 }
1077 1168
1078 /* should not be called from interrupt context */ 1169 return type;
1079 spin_lock(&ftrace_lock); 1170}
1080 if (enable) 1171
1081 ftrace_filtered = 1; 1172static int ftrace_match(char *str, char *regex, int len, int type)
1082 pg = ftrace_pages_start; 1173{
1083 while (pg) { 1174 int matched = 0;
1084 for (i = 0; i < pg->index; i++) { 1175 char *ptr;
1085 int matched = 0; 1176
1086 char *ptr; 1177 switch (type) {
1087 1178 case MATCH_FULL:
1088 rec = &pg->records[i]; 1179 if (strcmp(str, regex) == 0)
1089 if (rec->flags & FTRACE_FL_FAILED) 1180 matched = 1;
1181 break;
1182 case MATCH_FRONT_ONLY:
1183 if (strncmp(str, regex, len) == 0)
1184 matched = 1;
1185 break;
1186 case MATCH_MIDDLE_ONLY:
1187 if (strstr(str, regex))
1188 matched = 1;
1189 break;
1190 case MATCH_END_ONLY:
1191 ptr = strstr(str, regex);
1192 if (ptr && (ptr[len] == 0))
1193 matched = 1;
1194 break;
1195 }
1196
1197 return matched;
1198}
1199
1200static int
1201ftrace_match_record(struct dyn_ftrace *rec, char *regex, int len, int type)
1202{
1203 char str[KSYM_SYMBOL_LEN];
1204
1205 kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
1206 return ftrace_match(str, regex, len, type);
1207}
1208
1209static void ftrace_match_records(char *buff, int len, int enable)
1210{
1211 char *search;
1212 struct ftrace_page *pg;
1213 struct dyn_ftrace *rec;
1214 int type;
1215 unsigned long flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
1216 unsigned search_len;
1217 int not;
1218
1219 type = ftrace_setup_glob(buff, len, &search, &not);
1220
1221 search_len = strlen(search);
1222
1223 mutex_lock(&ftrace_lock);
1224 do_for_each_ftrace_rec(pg, rec) {
1225
1226 if (rec->flags & FTRACE_FL_FAILED)
1227 continue;
1228
1229 if (ftrace_match_record(rec, search, search_len, type)) {
1230 if (not)
1231 rec->flags &= ~flag;
1232 else
1233 rec->flags |= flag;
1234 }
1235 /*
1236 * Only enable filtering if we have a function that
1237 * is filtered on.
1238 */
1239 if (enable && (rec->flags & FTRACE_FL_FILTER))
1240 ftrace_filtered = 1;
1241 } while_for_each_ftrace_rec();
1242 mutex_unlock(&ftrace_lock);
1243}
1244
1245static int
1246ftrace_match_module_record(struct dyn_ftrace *rec, char *mod,
1247 char *regex, int len, int type)
1248{
1249 char str[KSYM_SYMBOL_LEN];
1250 char *modname;
1251
1252 kallsyms_lookup(rec->ip, NULL, NULL, &modname, str);
1253
1254 if (!modname || strcmp(modname, mod))
1255 return 0;
1256
1257 /* blank search means to match all funcs in the mod */
1258 if (len)
1259 return ftrace_match(str, regex, len, type);
1260 else
1261 return 1;
1262}
1263
1264static void ftrace_match_module_records(char *buff, char *mod, int enable)
1265{
1266 char *search = buff;
1267 struct ftrace_page *pg;
1268 struct dyn_ftrace *rec;
1269 int type = MATCH_FULL;
1270 unsigned long flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
1271 unsigned search_len = 0;
1272 int not = 0;
1273
1274 /* blank or '*' mean the same */
1275 if (strcmp(buff, "*") == 0)
1276 buff[0] = 0;
1277
1278 /* handle the case of 'dont filter this module' */
1279 if (strcmp(buff, "!") == 0 || strcmp(buff, "!*") == 0) {
1280 buff[0] = 0;
1281 not = 1;
1282 }
1283
1284 if (strlen(buff)) {
1285 type = ftrace_setup_glob(buff, strlen(buff), &search, &not);
1286 search_len = strlen(search);
1287 }
1288
1289 mutex_lock(&ftrace_lock);
1290 do_for_each_ftrace_rec(pg, rec) {
1291
1292 if (rec->flags & FTRACE_FL_FAILED)
1293 continue;
1294
1295 if (ftrace_match_module_record(rec, mod,
1296 search, search_len, type)) {
1297 if (not)
1298 rec->flags &= ~flag;
1299 else
1300 rec->flags |= flag;
1301 }
1302 if (enable && (rec->flags & FTRACE_FL_FILTER))
1303 ftrace_filtered = 1;
1304
1305 } while_for_each_ftrace_rec();
1306 mutex_unlock(&ftrace_lock);
1307}
1308
1309/*
1310 * We register the module command as a template to show others how
1311 * to register the a command as well.
1312 */
1313
1314static int
1315ftrace_mod_callback(char *func, char *cmd, char *param, int enable)
1316{
1317 char *mod;
1318
1319 /*
1320 * cmd == 'mod' because we only registered this func
1321 * for the 'mod' ftrace_func_command.
1322 * But if you register one func with multiple commands,
1323 * you can tell which command was used by the cmd
1324 * parameter.
1325 */
1326
1327 /* we must have a module name */
1328 if (!param)
1329 return -EINVAL;
1330
1331 mod = strsep(&param, ":");
1332 if (!strlen(mod))
1333 return -EINVAL;
1334
1335 ftrace_match_module_records(func, mod, enable);
1336 return 0;
1337}
1338
1339static struct ftrace_func_command ftrace_mod_cmd = {
1340 .name = "mod",
1341 .func = ftrace_mod_callback,
1342};
1343
1344static int __init ftrace_mod_cmd_init(void)
1345{
1346 return register_ftrace_command(&ftrace_mod_cmd);
1347}
1348device_initcall(ftrace_mod_cmd_init);
1349
1350static void
1351function_trace_hook_call(unsigned long ip, unsigned long parent_ip)
1352{
1353 struct ftrace_func_hook *entry;
1354 struct hlist_head *hhd;
1355 struct hlist_node *n;
1356 unsigned long key;
1357 int resched;
1358
1359 key = hash_long(ip, FTRACE_HASH_BITS);
1360
1361 hhd = &ftrace_func_hash[key];
1362
1363 if (hlist_empty(hhd))
1364 return;
1365
1366 /*
1367 * Disable preemption for these calls to prevent a RCU grace
1368 * period. This syncs the hash iteration and freeing of items
1369 * on the hash. rcu_read_lock is too dangerous here.
1370 */
1371 resched = ftrace_preempt_disable();
1372 hlist_for_each_entry_rcu(entry, n, hhd, node) {
1373 if (entry->ip == ip)
1374 entry->ops->func(ip, parent_ip, &entry->data);
1375 }
1376 ftrace_preempt_enable(resched);
1377}
1378
1379static struct ftrace_ops trace_hook_ops __read_mostly =
1380{
1381 .func = function_trace_hook_call,
1382};
1383
1384static int ftrace_hook_registered;
1385
1386static void __enable_ftrace_function_hook(void)
1387{
1388 int i;
1389
1390 if (ftrace_hook_registered)
1391 return;
1392
1393 for (i = 0; i < FTRACE_FUNC_HASHSIZE; i++) {
1394 struct hlist_head *hhd = &ftrace_func_hash[i];
1395 if (hhd->first)
1396 break;
1397 }
1398 /* Nothing registered? */
1399 if (i == FTRACE_FUNC_HASHSIZE)
1400 return;
1401
1402 __register_ftrace_function(&trace_hook_ops);
1403 ftrace_startup(0);
1404 ftrace_hook_registered = 1;
1405}
1406
1407static void __disable_ftrace_function_hook(void)
1408{
1409 int i;
1410
1411 if (!ftrace_hook_registered)
1412 return;
1413
1414 for (i = 0; i < FTRACE_FUNC_HASHSIZE; i++) {
1415 struct hlist_head *hhd = &ftrace_func_hash[i];
1416 if (hhd->first)
1417 return;
1418 }
1419
1420 /* no more funcs left */
1421 __unregister_ftrace_function(&trace_hook_ops);
1422 ftrace_shutdown(0);
1423 ftrace_hook_registered = 0;
1424}
1425
1426
1427static void ftrace_free_entry_rcu(struct rcu_head *rhp)
1428{
1429 struct ftrace_func_hook *entry =
1430 container_of(rhp, struct ftrace_func_hook, rcu);
1431
1432 if (entry->ops->free)
1433 entry->ops->free(&entry->data);
1434 kfree(entry);
1435}
1436
1437
1438int
1439register_ftrace_function_hook(char *glob, struct ftrace_hook_ops *ops,
1440 void *data)
1441{
1442 struct ftrace_func_hook *entry;
1443 struct ftrace_page *pg;
1444 struct dyn_ftrace *rec;
1445 unsigned long key;
1446 int type, len, not;
1447 int count = 0;
1448 char *search;
1449
1450 type = ftrace_setup_glob(glob, strlen(glob), &search, &not);
1451 len = strlen(search);
1452
1453 /* we do not support '!' for function hooks */
1454 if (WARN_ON(not))
1455 return -EINVAL;
1456
1457 mutex_lock(&ftrace_lock);
1458 do_for_each_ftrace_rec(pg, rec) {
1459
1460 if (rec->flags & FTRACE_FL_FAILED)
1461 continue;
1462
1463 if (!ftrace_match_record(rec, search, len, type))
1464 continue;
1465
1466 entry = kmalloc(sizeof(*entry), GFP_KERNEL);
1467 if (!entry) {
1468 /* If we did not hook to any, then return error */
1469 if (!count)
1470 count = -ENOMEM;
1471 goto out_unlock;
1472 }
1473
1474 count++;
1475
1476 entry->data = data;
1477
1478 /*
1479 * The caller might want to do something special
1480 * for each function we find. We call the callback
1481 * to give the caller an opportunity to do so.
1482 */
1483 if (ops->callback) {
1484 if (ops->callback(rec->ip, &entry->data) < 0) {
1485 /* caller does not like this func */
1486 kfree(entry);
1090 continue; 1487 continue;
1091 kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
1092 switch (type) {
1093 case MATCH_FULL:
1094 if (strcmp(str, buff) == 0)
1095 matched = 1;
1096 break;
1097 case MATCH_FRONT_ONLY:
1098 if (memcmp(str, buff, match) == 0)
1099 matched = 1;
1100 break;
1101 case MATCH_MIDDLE_ONLY:
1102 if (strstr(str, search))
1103 matched = 1;
1104 break;
1105 case MATCH_END_ONLY:
1106 ptr = strstr(str, search);
1107 if (ptr && (ptr[search_len] == 0))
1108 matched = 1;
1109 break;
1110 } 1488 }
1111 if (matched) { 1489 }
1112 if (not) 1490
1113 rec->flags &= ~flag; 1491 entry->ops = ops;
1114 else 1492 entry->ip = rec->ip;
1115 rec->flags |= flag; 1493
1494 key = hash_long(entry->ip, FTRACE_HASH_BITS);
1495 hlist_add_head_rcu(&entry->node, &ftrace_func_hash[key]);
1496
1497 } while_for_each_ftrace_rec();
1498 __enable_ftrace_function_hook();
1499
1500 out_unlock:
1501 mutex_unlock(&ftrace_lock);
1502
1503 return count;
1504}
1505
1506enum {
1507 HOOK_TEST_FUNC = 1,
1508 HOOK_TEST_DATA = 2
1509};
1510
1511static void
1512__unregister_ftrace_function_hook(char *glob, struct ftrace_hook_ops *ops,
1513 void *data, int flags)
1514{
1515 struct ftrace_func_hook *entry;
1516 struct hlist_node *n, *tmp;
1517 char str[KSYM_SYMBOL_LEN];
1518 int type = MATCH_FULL;
1519 int i, len = 0;
1520 char *search;
1521
1522 if (glob && (strcmp(glob, "*") || !strlen(glob)))
1523 glob = NULL;
1524 else {
1525 int not;
1526
1527 type = ftrace_setup_glob(glob, strlen(glob), &search, &not);
1528 len = strlen(search);
1529
1530 /* we do not support '!' for function hooks */
1531 if (WARN_ON(not))
1532 return;
1533 }
1534
1535 mutex_lock(&ftrace_lock);
1536 for (i = 0; i < FTRACE_FUNC_HASHSIZE; i++) {
1537 struct hlist_head *hhd = &ftrace_func_hash[i];
1538
1539 hlist_for_each_entry_safe(entry, n, tmp, hhd, node) {
1540
1541 /* break up if statements for readability */
1542 if ((flags & HOOK_TEST_FUNC) && entry->ops != ops)
1543 continue;
1544
1545 if ((flags & HOOK_TEST_DATA) && entry->data != data)
1546 continue;
1547
1548 /* do this last, since it is the most expensive */
1549 if (glob) {
1550 kallsyms_lookup(entry->ip, NULL, NULL,
1551 NULL, str);
1552 if (!ftrace_match(str, glob, len, type))
1553 continue;
1116 } 1554 }
1555
1556 hlist_del(&entry->node);
1557 call_rcu(&entry->rcu, ftrace_free_entry_rcu);
1558 }
1559 }
1560 __disable_ftrace_function_hook();
1561 mutex_unlock(&ftrace_lock);
1562}
1563
1564void
1565unregister_ftrace_function_hook(char *glob, struct ftrace_hook_ops *ops,
1566 void *data)
1567{
1568 __unregister_ftrace_function_hook(glob, ops, data,
1569 HOOK_TEST_FUNC | HOOK_TEST_DATA);
1570}
1571
1572void
1573unregister_ftrace_function_hook_func(char *glob, struct ftrace_hook_ops *ops)
1574{
1575 __unregister_ftrace_function_hook(glob, ops, NULL, HOOK_TEST_FUNC);
1576}
1577
1578void unregister_ftrace_function_hook_all(char *glob)
1579{
1580 __unregister_ftrace_function_hook(glob, NULL, NULL, 0);
1581}
1582
1583static LIST_HEAD(ftrace_commands);
1584static DEFINE_MUTEX(ftrace_cmd_mutex);
1585
1586int register_ftrace_command(struct ftrace_func_command *cmd)
1587{
1588 struct ftrace_func_command *p;
1589 int ret = 0;
1590
1591 mutex_lock(&ftrace_cmd_mutex);
1592 list_for_each_entry(p, &ftrace_commands, list) {
1593 if (strcmp(cmd->name, p->name) == 0) {
1594 ret = -EBUSY;
1595 goto out_unlock;
1596 }
1597 }
1598 list_add(&cmd->list, &ftrace_commands);
1599 out_unlock:
1600 mutex_unlock(&ftrace_cmd_mutex);
1601
1602 return ret;
1603}
1604
1605int unregister_ftrace_command(struct ftrace_func_command *cmd)
1606{
1607 struct ftrace_func_command *p, *n;
1608 int ret = -ENODEV;
1609
1610 mutex_lock(&ftrace_cmd_mutex);
1611 list_for_each_entry_safe(p, n, &ftrace_commands, list) {
1612 if (strcmp(cmd->name, p->name) == 0) {
1613 ret = 0;
1614 list_del_init(&p->list);
1615 goto out_unlock;
1616 }
1617 }
1618 out_unlock:
1619 mutex_unlock(&ftrace_cmd_mutex);
1620
1621 return ret;
1622}
1623
1624static int ftrace_process_regex(char *buff, int len, int enable)
1625{
1626 struct ftrace_func_command *p;
1627 char *func, *command, *next = buff;
1628 int ret = -EINVAL;
1629
1630 func = strsep(&next, ":");
1631
1632 if (!next) {
1633 ftrace_match_records(func, len, enable);
1634 return 0;
1635 }
1636
1637 /* command found */
1638
1639 command = strsep(&next, ":");
1640
1641 mutex_lock(&ftrace_cmd_mutex);
1642 list_for_each_entry(p, &ftrace_commands, list) {
1643 if (strcmp(p->name, command) == 0) {
1644 ret = p->func(func, command, next, enable);
1645 goto out_unlock;
1117 } 1646 }
1118 pg = pg->next;
1119 } 1647 }
1120 spin_unlock(&ftrace_lock); 1648 out_unlock:
1649 mutex_unlock(&ftrace_cmd_mutex);
1650
1651 return ret;
1121} 1652}
1122 1653
1123static ssize_t 1654static ssize_t
@@ -1187,7 +1718,10 @@ ftrace_regex_write(struct file *file, const char __user *ubuf,
1187 if (isspace(ch)) { 1718 if (isspace(ch)) {
1188 iter->filtered++; 1719 iter->filtered++;
1189 iter->buffer[iter->buffer_idx] = 0; 1720 iter->buffer[iter->buffer_idx] = 0;
1190 ftrace_match(iter->buffer, iter->buffer_idx, enable); 1721 ret = ftrace_process_regex(iter->buffer,
1722 iter->buffer_idx, enable);
1723 if (ret)
1724 goto out;
1191 iter->buffer_idx = 0; 1725 iter->buffer_idx = 0;
1192 } else 1726 } else
1193 iter->flags |= FTRACE_ITER_CONT; 1727 iter->flags |= FTRACE_ITER_CONT;
@@ -1226,7 +1760,7 @@ ftrace_set_regex(unsigned char *buf, int len, int reset, int enable)
1226 if (reset) 1760 if (reset)
1227 ftrace_filter_reset(enable); 1761 ftrace_filter_reset(enable);
1228 if (buf) 1762 if (buf)
1229 ftrace_match(buf, len, enable); 1763 ftrace_match_records(buf, len, enable);
1230 mutex_unlock(&ftrace_regex_lock); 1764 mutex_unlock(&ftrace_regex_lock);
1231} 1765}
1232 1766
@@ -1276,15 +1810,13 @@ ftrace_regex_release(struct inode *inode, struct file *file, int enable)
1276 if (iter->buffer_idx) { 1810 if (iter->buffer_idx) {
1277 iter->filtered++; 1811 iter->filtered++;
1278 iter->buffer[iter->buffer_idx] = 0; 1812 iter->buffer[iter->buffer_idx] = 0;
1279 ftrace_match(iter->buffer, iter->buffer_idx, enable); 1813 ftrace_match_records(iter->buffer, iter->buffer_idx, enable);
1280 } 1814 }
1281 1815
1282 mutex_lock(&ftrace_sysctl_lock); 1816 mutex_lock(&ftrace_lock);
1283 mutex_lock(&ftrace_start_lock);
1284 if (ftrace_start_up && ftrace_enabled) 1817 if (ftrace_start_up && ftrace_enabled)
1285 ftrace_run_update_code(FTRACE_ENABLE_CALLS); 1818 ftrace_run_update_code(FTRACE_ENABLE_CALLS);
1286 mutex_unlock(&ftrace_start_lock); 1819 mutex_unlock(&ftrace_lock);
1287 mutex_unlock(&ftrace_sysctl_lock);
1288 1820
1289 kfree(iter); 1821 kfree(iter);
1290 mutex_unlock(&ftrace_regex_lock); 1822 mutex_unlock(&ftrace_regex_lock);
@@ -1437,36 +1969,33 @@ ftrace_set_func(unsigned long *array, int idx, char *buffer)
1437 struct dyn_ftrace *rec; 1969 struct dyn_ftrace *rec;
1438 struct ftrace_page *pg; 1970 struct ftrace_page *pg;
1439 int found = 0; 1971 int found = 0;
1440 int i, j; 1972 int j;
1441 1973
1442 if (ftrace_disabled) 1974 if (ftrace_disabled)
1443 return -ENODEV; 1975 return -ENODEV;
1444 1976
1445 /* should not be called from interrupt context */ 1977 mutex_lock(&ftrace_lock);
1446 spin_lock(&ftrace_lock); 1978 do_for_each_ftrace_rec(pg, rec) {
1447
1448 for (pg = ftrace_pages_start; pg; pg = pg->next) {
1449 for (i = 0; i < pg->index; i++) {
1450 rec = &pg->records[i];
1451 1979
1452 if (rec->flags & (FTRACE_FL_FAILED | FTRACE_FL_FREE)) 1980 if (rec->flags & (FTRACE_FL_FAILED | FTRACE_FL_FREE))
1453 continue; 1981 continue;
1454 1982
1455 kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); 1983 kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
1456 if (strcmp(str, buffer) == 0) { 1984 if (strcmp(str, buffer) == 0) {
1457 found = 1; 1985 /* Return 1 if we add it to the array */
1458 for (j = 0; j < idx; j++) 1986 found = 1;
1459 if (array[j] == rec->ip) { 1987 for (j = 0; j < idx; j++)
1460 found = 0; 1988 if (array[j] == rec->ip) {
1461 break; 1989 found = 0;
1462 } 1990 break;
1463 if (found) 1991 }
1464 array[idx] = rec->ip; 1992 if (found)
1465 break; 1993 array[idx] = rec->ip;
1466 } 1994 goto out;
1467 } 1995 }
1468 } 1996 } while_for_each_ftrace_rec();
1469 spin_unlock(&ftrace_lock); 1997 out:
1998 mutex_unlock(&ftrace_lock);
1470 1999
1471 return found ? 0 : -EINVAL; 2000 return found ? 0 : -EINVAL;
1472} 2001}
@@ -1604,7 +2133,7 @@ static int ftrace_convert_nops(struct module *mod,
1604 unsigned long addr; 2133 unsigned long addr;
1605 unsigned long flags; 2134 unsigned long flags;
1606 2135
1607 mutex_lock(&ftrace_start_lock); 2136 mutex_lock(&ftrace_lock);
1608 p = start; 2137 p = start;
1609 while (p < end) { 2138 while (p < end) {
1610 addr = ftrace_call_adjust(*p++); 2139 addr = ftrace_call_adjust(*p++);
@@ -1623,7 +2152,7 @@ static int ftrace_convert_nops(struct module *mod,
1623 local_irq_save(flags); 2152 local_irq_save(flags);
1624 ftrace_update_code(mod); 2153 ftrace_update_code(mod);
1625 local_irq_restore(flags); 2154 local_irq_restore(flags);
1626 mutex_unlock(&ftrace_start_lock); 2155 mutex_unlock(&ftrace_lock);
1627 2156
1628 return 0; 2157 return 0;
1629} 2158}
@@ -1796,7 +2325,7 @@ ftrace_pid_write(struct file *filp, const char __user *ubuf,
1796 if (ret < 0) 2325 if (ret < 0)
1797 return ret; 2326 return ret;
1798 2327
1799 mutex_lock(&ftrace_start_lock); 2328 mutex_lock(&ftrace_lock);
1800 if (val < 0) { 2329 if (val < 0) {
1801 /* disable pid tracing */ 2330 /* disable pid tracing */
1802 if (!ftrace_pid_trace) 2331 if (!ftrace_pid_trace)
@@ -1835,7 +2364,7 @@ ftrace_pid_write(struct file *filp, const char __user *ubuf,
1835 ftrace_startup_enable(0); 2364 ftrace_startup_enable(0);
1836 2365
1837 out: 2366 out:
1838 mutex_unlock(&ftrace_start_lock); 2367 mutex_unlock(&ftrace_lock);
1839 2368
1840 return cnt; 2369 return cnt;
1841} 2370}
@@ -1898,17 +2427,17 @@ int register_ftrace_function(struct ftrace_ops *ops)
1898 if (unlikely(ftrace_disabled)) 2427 if (unlikely(ftrace_disabled))
1899 return -1; 2428 return -1;
1900 2429
1901 mutex_lock(&ftrace_sysctl_lock); 2430 mutex_lock(&ftrace_lock);
1902 2431
1903 ret = __register_ftrace_function(ops); 2432 ret = __register_ftrace_function(ops);
1904 ftrace_startup(0); 2433 ftrace_startup(0);
1905 2434
1906 mutex_unlock(&ftrace_sysctl_lock); 2435 mutex_unlock(&ftrace_lock);
1907 return ret; 2436 return ret;
1908} 2437}
1909 2438
1910/** 2439/**
1911 * unregister_ftrace_function - unresgister a function for profiling. 2440 * unregister_ftrace_function - unregister a function for profiling.
1912 * @ops - ops structure that holds the function to unregister 2441 * @ops - ops structure that holds the function to unregister
1913 * 2442 *
1914 * Unregister a function that was added to be called by ftrace profiling. 2443 * Unregister a function that was added to be called by ftrace profiling.
@@ -1917,10 +2446,10 @@ int unregister_ftrace_function(struct ftrace_ops *ops)
1917{ 2446{
1918 int ret; 2447 int ret;
1919 2448
1920 mutex_lock(&ftrace_sysctl_lock); 2449 mutex_lock(&ftrace_lock);
1921 ret = __unregister_ftrace_function(ops); 2450 ret = __unregister_ftrace_function(ops);
1922 ftrace_shutdown(0); 2451 ftrace_shutdown(0);
1923 mutex_unlock(&ftrace_sysctl_lock); 2452 mutex_unlock(&ftrace_lock);
1924 2453
1925 return ret; 2454 return ret;
1926} 2455}
@@ -1935,7 +2464,7 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
1935 if (unlikely(ftrace_disabled)) 2464 if (unlikely(ftrace_disabled))
1936 return -ENODEV; 2465 return -ENODEV;
1937 2466
1938 mutex_lock(&ftrace_sysctl_lock); 2467 mutex_lock(&ftrace_lock);
1939 2468
1940 ret = proc_dointvec(table, write, file, buffer, lenp, ppos); 2469 ret = proc_dointvec(table, write, file, buffer, lenp, ppos);
1941 2470
@@ -1964,7 +2493,7 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
1964 } 2493 }
1965 2494
1966 out: 2495 out:
1967 mutex_unlock(&ftrace_sysctl_lock); 2496 mutex_unlock(&ftrace_lock);
1968 return ret; 2497 return ret;
1969} 2498}
1970 2499
@@ -2080,7 +2609,7 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc,
2080{ 2609{
2081 int ret = 0; 2610 int ret = 0;
2082 2611
2083 mutex_lock(&ftrace_sysctl_lock); 2612 mutex_lock(&ftrace_lock);
2084 2613
2085 ftrace_suspend_notifier.notifier_call = ftrace_suspend_notifier_call; 2614 ftrace_suspend_notifier.notifier_call = ftrace_suspend_notifier_call;
2086 register_pm_notifier(&ftrace_suspend_notifier); 2615 register_pm_notifier(&ftrace_suspend_notifier);
@@ -2098,13 +2627,13 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc,
2098 ftrace_startup(FTRACE_START_FUNC_RET); 2627 ftrace_startup(FTRACE_START_FUNC_RET);
2099 2628
2100out: 2629out:
2101 mutex_unlock(&ftrace_sysctl_lock); 2630 mutex_unlock(&ftrace_lock);
2102 return ret; 2631 return ret;
2103} 2632}
2104 2633
2105void unregister_ftrace_graph(void) 2634void unregister_ftrace_graph(void)
2106{ 2635{
2107 mutex_lock(&ftrace_sysctl_lock); 2636 mutex_lock(&ftrace_lock);
2108 2637
2109 atomic_dec(&ftrace_graph_active); 2638 atomic_dec(&ftrace_graph_active);
2110 ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub; 2639 ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub;
@@ -2112,7 +2641,7 @@ void unregister_ftrace_graph(void)
2112 ftrace_shutdown(FTRACE_STOP_FUNC_RET); 2641 ftrace_shutdown(FTRACE_STOP_FUNC_RET);
2113 unregister_pm_notifier(&ftrace_suspend_notifier); 2642 unregister_pm_notifier(&ftrace_suspend_notifier);
2114 2643
2115 mutex_unlock(&ftrace_sysctl_lock); 2644 mutex_unlock(&ftrace_lock);
2116} 2645}
2117 2646
2118/* Allocate a return stack for newly created task */ 2647/* Allocate a return stack for newly created task */
diff --git a/kernel/trace/kmemtrace.c b/kernel/trace/kmemtrace.c
new file mode 100644
index 000000000000..ae201b3eda89
--- /dev/null
+++ b/kernel/trace/kmemtrace.c
@@ -0,0 +1,339 @@
1/*
2 * Memory allocator tracing
3 *
4 * Copyright (C) 2008 Eduard - Gabriel Munteanu
5 * Copyright (C) 2008 Pekka Enberg <penberg@cs.helsinki.fi>
6 * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
7 */
8
9#include <linux/dcache.h>
10#include <linux/debugfs.h>
11#include <linux/fs.h>
12#include <linux/seq_file.h>
13#include <trace/kmemtrace.h>
14
15#include "trace.h"
16#include "trace_output.h"
17
18/* Select an alternative, minimalistic output than the original one */
19#define TRACE_KMEM_OPT_MINIMAL 0x1
20
21static struct tracer_opt kmem_opts[] = {
22 /* Default disable the minimalistic output */
23 { TRACER_OPT(kmem_minimalistic, TRACE_KMEM_OPT_MINIMAL) },
24 { }
25};
26
27static struct tracer_flags kmem_tracer_flags = {
28 .val = 0,
29 .opts = kmem_opts
30};
31
32
33static bool kmem_tracing_enabled __read_mostly;
34static struct trace_array *kmemtrace_array;
35
36static int kmem_trace_init(struct trace_array *tr)
37{
38 int cpu;
39 kmemtrace_array = tr;
40
41 for_each_cpu_mask(cpu, cpu_possible_map)
42 tracing_reset(tr, cpu);
43
44 kmem_tracing_enabled = true;
45
46 return 0;
47}
48
49static void kmem_trace_reset(struct trace_array *tr)
50{
51 kmem_tracing_enabled = false;
52}
53
54static void kmemtrace_headers(struct seq_file *s)
55{
56 /* Don't need headers for the original kmemtrace output */
57 if (!(kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL))
58 return;
59
60 seq_printf(s, "#\n");
61 seq_printf(s, "# ALLOC TYPE REQ GIVEN FLAGS "
62 " POINTER NODE CALLER\n");
63 seq_printf(s, "# FREE | | | | "
64 " | | | |\n");
65 seq_printf(s, "# |\n\n");
66}
67
68/*
69 * The two following functions give the original output from kmemtrace,
70 * or something close to....perhaps they need some missing things
71 */
72static enum print_line_t
73kmemtrace_print_alloc_original(struct trace_iterator *iter,
74 struct kmemtrace_alloc_entry *entry)
75{
76 struct trace_seq *s = &iter->seq;
77 int ret;
78
79 /* Taken from the old linux/kmemtrace.h */
80 ret = trace_seq_printf(s, "type_id %d call_site %lu ptr %lu "
81 "bytes_req %lu bytes_alloc %lu gfp_flags %lu node %d\n",
82 entry->type_id, entry->call_site, (unsigned long) entry->ptr,
83 (unsigned long) entry->bytes_req, (unsigned long) entry->bytes_alloc,
84 (unsigned long) entry->gfp_flags, entry->node);
85
86 if (!ret)
87 return TRACE_TYPE_PARTIAL_LINE;
88
89 return TRACE_TYPE_HANDLED;
90}
91
92static enum print_line_t
93kmemtrace_print_free_original(struct trace_iterator *iter,
94 struct kmemtrace_free_entry *entry)
95{
96 struct trace_seq *s = &iter->seq;
97 int ret;
98
99 /* Taken from the old linux/kmemtrace.h */
100 ret = trace_seq_printf(s, "type_id %d call_site %lu ptr %lu\n",
101 entry->type_id, entry->call_site, (unsigned long) entry->ptr);
102
103 if (!ret)
104 return TRACE_TYPE_PARTIAL_LINE;
105
106 return TRACE_TYPE_HANDLED;
107}
108
109
110/* The two other following provide a more minimalistic output */
111static enum print_line_t
112kmemtrace_print_alloc_compress(struct trace_iterator *iter,
113 struct kmemtrace_alloc_entry *entry)
114{
115 struct trace_seq *s = &iter->seq;
116 int ret;
117
118 /* Alloc entry */
119 ret = trace_seq_printf(s, " + ");
120 if (!ret)
121 return TRACE_TYPE_PARTIAL_LINE;
122
123 /* Type */
124 switch (entry->type_id) {
125 case KMEMTRACE_TYPE_KMALLOC:
126 ret = trace_seq_printf(s, "K ");
127 break;
128 case KMEMTRACE_TYPE_CACHE:
129 ret = trace_seq_printf(s, "C ");
130 break;
131 case KMEMTRACE_TYPE_PAGES:
132 ret = trace_seq_printf(s, "P ");
133 break;
134 default:
135 ret = trace_seq_printf(s, "? ");
136 }
137
138 if (!ret)
139 return TRACE_TYPE_PARTIAL_LINE;
140
141 /* Requested */
142 ret = trace_seq_printf(s, "%4zu ", entry->bytes_req);
143 if (!ret)
144 return TRACE_TYPE_PARTIAL_LINE;
145
146 /* Allocated */
147 ret = trace_seq_printf(s, "%4zu ", entry->bytes_alloc);
148 if (!ret)
149 return TRACE_TYPE_PARTIAL_LINE;
150
151 /* Flags
152 * TODO: would be better to see the name of the GFP flag names
153 */
154 ret = trace_seq_printf(s, "%08x ", entry->gfp_flags);
155 if (!ret)
156 return TRACE_TYPE_PARTIAL_LINE;
157
158 /* Pointer to allocated */
159 ret = trace_seq_printf(s, "0x%tx ", (ptrdiff_t)entry->ptr);
160 if (!ret)
161 return TRACE_TYPE_PARTIAL_LINE;
162
163 /* Node */
164 ret = trace_seq_printf(s, "%4d ", entry->node);
165 if (!ret)
166 return TRACE_TYPE_PARTIAL_LINE;
167
168 /* Call site */
169 ret = seq_print_ip_sym(s, entry->call_site, 0);
170 if (!ret)
171 return TRACE_TYPE_PARTIAL_LINE;
172
173 if (!trace_seq_printf(s, "\n"))
174 return TRACE_TYPE_PARTIAL_LINE;
175
176 return TRACE_TYPE_HANDLED;
177}
178
179static enum print_line_t
180kmemtrace_print_free_compress(struct trace_iterator *iter,
181 struct kmemtrace_free_entry *entry)
182{
183 struct trace_seq *s = &iter->seq;
184 int ret;
185
186 /* Free entry */
187 ret = trace_seq_printf(s, " - ");
188 if (!ret)
189 return TRACE_TYPE_PARTIAL_LINE;
190
191 /* Type */
192 switch (entry->type_id) {
193 case KMEMTRACE_TYPE_KMALLOC:
194 ret = trace_seq_printf(s, "K ");
195 break;
196 case KMEMTRACE_TYPE_CACHE:
197 ret = trace_seq_printf(s, "C ");
198 break;
199 case KMEMTRACE_TYPE_PAGES:
200 ret = trace_seq_printf(s, "P ");
201 break;
202 default:
203 ret = trace_seq_printf(s, "? ");
204 }
205
206 if (!ret)
207 return TRACE_TYPE_PARTIAL_LINE;
208
209 /* Skip requested/allocated/flags */
210 ret = trace_seq_printf(s, " ");
211 if (!ret)
212 return TRACE_TYPE_PARTIAL_LINE;
213
214 /* Pointer to allocated */
215 ret = trace_seq_printf(s, "0x%tx ", (ptrdiff_t)entry->ptr);
216 if (!ret)
217 return TRACE_TYPE_PARTIAL_LINE;
218
219 /* Skip node */
220 ret = trace_seq_printf(s, " ");
221 if (!ret)
222 return TRACE_TYPE_PARTIAL_LINE;
223
224 /* Call site */
225 ret = seq_print_ip_sym(s, entry->call_site, 0);
226 if (!ret)
227 return TRACE_TYPE_PARTIAL_LINE;
228
229 if (!trace_seq_printf(s, "\n"))
230 return TRACE_TYPE_PARTIAL_LINE;
231
232 return TRACE_TYPE_HANDLED;
233}
234
235static enum print_line_t kmemtrace_print_line(struct trace_iterator *iter)
236{
237 struct trace_entry *entry = iter->ent;
238
239 switch (entry->type) {
240 case TRACE_KMEM_ALLOC: {
241 struct kmemtrace_alloc_entry *field;
242 trace_assign_type(field, entry);
243 if (kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL)
244 return kmemtrace_print_alloc_compress(iter, field);
245 else
246 return kmemtrace_print_alloc_original(iter, field);
247 }
248
249 case TRACE_KMEM_FREE: {
250 struct kmemtrace_free_entry *field;
251 trace_assign_type(field, entry);
252 if (kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL)
253 return kmemtrace_print_free_compress(iter, field);
254 else
255 return kmemtrace_print_free_original(iter, field);
256 }
257
258 default:
259 return TRACE_TYPE_UNHANDLED;
260 }
261}
262
263/* Trace allocations */
264void kmemtrace_mark_alloc_node(enum kmemtrace_type_id type_id,
265 unsigned long call_site,
266 const void *ptr,
267 size_t bytes_req,
268 size_t bytes_alloc,
269 gfp_t gfp_flags,
270 int node)
271{
272 struct ring_buffer_event *event;
273 struct kmemtrace_alloc_entry *entry;
274 struct trace_array *tr = kmemtrace_array;
275
276 if (!kmem_tracing_enabled)
277 return;
278
279 event = trace_buffer_lock_reserve(tr, TRACE_KMEM_ALLOC,
280 sizeof(*entry), 0, 0);
281 if (!event)
282 return;
283 entry = ring_buffer_event_data(event);
284
285 entry->call_site = call_site;
286 entry->ptr = ptr;
287 entry->bytes_req = bytes_req;
288 entry->bytes_alloc = bytes_alloc;
289 entry->gfp_flags = gfp_flags;
290 entry->node = node;
291
292 trace_buffer_unlock_commit(tr, event, 0, 0);
293}
294EXPORT_SYMBOL(kmemtrace_mark_alloc_node);
295
296void kmemtrace_mark_free(enum kmemtrace_type_id type_id,
297 unsigned long call_site,
298 const void *ptr)
299{
300 struct ring_buffer_event *event;
301 struct kmemtrace_free_entry *entry;
302 struct trace_array *tr = kmemtrace_array;
303
304 if (!kmem_tracing_enabled)
305 return;
306
307 event = trace_buffer_lock_reserve(tr, TRACE_KMEM_FREE,
308 sizeof(*entry), 0, 0);
309 if (!event)
310 return;
311 entry = ring_buffer_event_data(event);
312 entry->type_id = type_id;
313 entry->call_site = call_site;
314 entry->ptr = ptr;
315
316 trace_buffer_unlock_commit(tr, event, 0, 0);
317}
318EXPORT_SYMBOL(kmemtrace_mark_free);
319
320static struct tracer kmem_tracer __read_mostly = {
321 .name = "kmemtrace",
322 .init = kmem_trace_init,
323 .reset = kmem_trace_reset,
324 .print_line = kmemtrace_print_line,
325 .print_header = kmemtrace_headers,
326 .flags = &kmem_tracer_flags
327};
328
329void kmemtrace_init(void)
330{
331 /* earliest opportunity to start kmem tracing */
332}
333
334static int __init init_kmem_tracer(void)
335{
336 return register_tracer(&kmem_tracer);
337}
338
339device_initcall(init_kmem_tracer);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index bd38c5cfd8ad..8f19f1aa42b0 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -4,9 +4,11 @@
4 * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com> 4 * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
5 */ 5 */
6#include <linux/ring_buffer.h> 6#include <linux/ring_buffer.h>
7#include <linux/ftrace_irq.h>
7#include <linux/spinlock.h> 8#include <linux/spinlock.h>
8#include <linux/debugfs.h> 9#include <linux/debugfs.h>
9#include <linux/uaccess.h> 10#include <linux/uaccess.h>
11#include <linux/hardirq.h>
10#include <linux/module.h> 12#include <linux/module.h>
11#include <linux/percpu.h> 13#include <linux/percpu.h>
12#include <linux/mutex.h> 14#include <linux/mutex.h>
@@ -57,7 +59,7 @@ enum {
57 RB_BUFFERS_DISABLED = 1 << RB_BUFFERS_DISABLED_BIT, 59 RB_BUFFERS_DISABLED = 1 << RB_BUFFERS_DISABLED_BIT,
58}; 60};
59 61
60static long ring_buffer_flags __read_mostly = RB_BUFFERS_ON; 62static unsigned long ring_buffer_flags __read_mostly = RB_BUFFERS_ON;
61 63
62/** 64/**
63 * tracing_on - enable all tracing buffers 65 * tracing_on - enable all tracing buffers
@@ -89,13 +91,22 @@ EXPORT_SYMBOL_GPL(tracing_off);
89 * tracing_off_permanent - permanently disable ring buffers 91 * tracing_off_permanent - permanently disable ring buffers
90 * 92 *
91 * This function, once called, will disable all ring buffers 93 * This function, once called, will disable all ring buffers
92 * permanenty. 94 * permanently.
93 */ 95 */
94void tracing_off_permanent(void) 96void tracing_off_permanent(void)
95{ 97{
96 set_bit(RB_BUFFERS_DISABLED_BIT, &ring_buffer_flags); 98 set_bit(RB_BUFFERS_DISABLED_BIT, &ring_buffer_flags);
97} 99}
98 100
101/**
102 * tracing_is_on - show state of ring buffers enabled
103 */
104int tracing_is_on(void)
105{
106 return ring_buffer_flags == RB_BUFFERS_ON;
107}
108EXPORT_SYMBOL_GPL(tracing_is_on);
109
99#include "trace.h" 110#include "trace.h"
100 111
101/* Up this if you want to test the TIME_EXTENTS and normalization */ 112/* Up this if you want to test the TIME_EXTENTS and normalization */
@@ -123,8 +134,7 @@ void ring_buffer_normalize_time_stamp(int cpu, u64 *ts)
123EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp); 134EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp);
124 135
125#define RB_EVNT_HDR_SIZE (sizeof(struct ring_buffer_event)) 136#define RB_EVNT_HDR_SIZE (sizeof(struct ring_buffer_event))
126#define RB_ALIGNMENT_SHIFT 2 137#define RB_ALIGNMENT 4U
127#define RB_ALIGNMENT (1 << RB_ALIGNMENT_SHIFT)
128#define RB_MAX_SMALL_DATA 28 138#define RB_MAX_SMALL_DATA 28
129 139
130enum { 140enum {
@@ -133,7 +143,7 @@ enum {
133}; 143};
134 144
135/* inline for ring buffer fast paths */ 145/* inline for ring buffer fast paths */
136static inline unsigned 146static unsigned
137rb_event_length(struct ring_buffer_event *event) 147rb_event_length(struct ring_buffer_event *event)
138{ 148{
139 unsigned length; 149 unsigned length;
@@ -151,7 +161,7 @@ rb_event_length(struct ring_buffer_event *event)
151 161
152 case RINGBUF_TYPE_DATA: 162 case RINGBUF_TYPE_DATA:
153 if (event->len) 163 if (event->len)
154 length = event->len << RB_ALIGNMENT_SHIFT; 164 length = event->len * RB_ALIGNMENT;
155 else 165 else
156 length = event->array[0]; 166 length = event->array[0];
157 return length + RB_EVNT_HDR_SIZE; 167 return length + RB_EVNT_HDR_SIZE;
@@ -179,7 +189,7 @@ unsigned ring_buffer_event_length(struct ring_buffer_event *event)
179EXPORT_SYMBOL_GPL(ring_buffer_event_length); 189EXPORT_SYMBOL_GPL(ring_buffer_event_length);
180 190
181/* inline for ring buffer fast paths */ 191/* inline for ring buffer fast paths */
182static inline void * 192static void *
183rb_event_data(struct ring_buffer_event *event) 193rb_event_data(struct ring_buffer_event *event)
184{ 194{
185 BUG_ON(event->type != RINGBUF_TYPE_DATA); 195 BUG_ON(event->type != RINGBUF_TYPE_DATA);
@@ -209,7 +219,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data);
209 219
210struct buffer_data_page { 220struct buffer_data_page {
211 u64 time_stamp; /* page time stamp */ 221 u64 time_stamp; /* page time stamp */
212 local_t commit; /* write commited index */ 222 local_t commit; /* write committed index */
213 unsigned char data[]; /* data of buffer page */ 223 unsigned char data[]; /* data of buffer page */
214}; 224};
215 225
@@ -229,10 +239,9 @@ static void rb_init_page(struct buffer_data_page *bpage)
229 * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing 239 * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing
230 * this issue out. 240 * this issue out.
231 */ 241 */
232static inline void free_buffer_page(struct buffer_page *bpage) 242static void free_buffer_page(struct buffer_page *bpage)
233{ 243{
234 if (bpage->page) 244 free_page((unsigned long)bpage->page);
235 free_page((unsigned long)bpage->page);
236 kfree(bpage); 245 kfree(bpage);
237} 246}
238 247
@@ -260,7 +269,7 @@ struct ring_buffer_per_cpu {
260 struct list_head pages; 269 struct list_head pages;
261 struct buffer_page *head_page; /* read from head */ 270 struct buffer_page *head_page; /* read from head */
262 struct buffer_page *tail_page; /* write to tail */ 271 struct buffer_page *tail_page; /* write to tail */
263 struct buffer_page *commit_page; /* commited pages */ 272 struct buffer_page *commit_page; /* committed pages */
264 struct buffer_page *reader_page; 273 struct buffer_page *reader_page;
265 unsigned long overrun; 274 unsigned long overrun;
266 unsigned long entries; 275 unsigned long entries;
@@ -273,8 +282,8 @@ struct ring_buffer {
273 unsigned pages; 282 unsigned pages;
274 unsigned flags; 283 unsigned flags;
275 int cpus; 284 int cpus;
276 cpumask_var_t cpumask;
277 atomic_t record_disabled; 285 atomic_t record_disabled;
286 cpumask_var_t cpumask;
278 287
279 struct mutex mutex; 288 struct mutex mutex;
280 289
@@ -303,7 +312,7 @@ struct ring_buffer_iter {
303 * check_pages - integrity check of buffer pages 312 * check_pages - integrity check of buffer pages
304 * @cpu_buffer: CPU buffer with pages to test 313 * @cpu_buffer: CPU buffer with pages to test
305 * 314 *
306 * As a safty measure we check to make sure the data pages have not 315 * As a safety measure we check to make sure the data pages have not
307 * been corrupted. 316 * been corrupted.
308 */ 317 */
309static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer) 318static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
@@ -811,7 +820,7 @@ rb_event_index(struct ring_buffer_event *event)
811 return (addr & ~PAGE_MASK) - (PAGE_SIZE - BUF_PAGE_SIZE); 820 return (addr & ~PAGE_MASK) - (PAGE_SIZE - BUF_PAGE_SIZE);
812} 821}
813 822
814static inline int 823static int
815rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer, 824rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
816 struct ring_buffer_event *event) 825 struct ring_buffer_event *event)
817{ 826{
@@ -825,7 +834,7 @@ rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
825 rb_commit_index(cpu_buffer) == index; 834 rb_commit_index(cpu_buffer) == index;
826} 835}
827 836
828static inline void 837static void
829rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer, 838rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer,
830 struct ring_buffer_event *event) 839 struct ring_buffer_event *event)
831{ 840{
@@ -850,7 +859,7 @@ rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer,
850 local_set(&cpu_buffer->commit_page->page->commit, index); 859 local_set(&cpu_buffer->commit_page->page->commit, index);
851} 860}
852 861
853static inline void 862static void
854rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) 863rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
855{ 864{
856 /* 865 /*
@@ -896,7 +905,7 @@ static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
896 cpu_buffer->reader_page->read = 0; 905 cpu_buffer->reader_page->read = 0;
897} 906}
898 907
899static inline void rb_inc_iter(struct ring_buffer_iter *iter) 908static void rb_inc_iter(struct ring_buffer_iter *iter)
900{ 909{
901 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; 910 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
902 911
@@ -926,7 +935,7 @@ static inline void rb_inc_iter(struct ring_buffer_iter *iter)
926 * and with this, we can determine what to place into the 935 * and with this, we can determine what to place into the
927 * data field. 936 * data field.
928 */ 937 */
929static inline void 938static void
930rb_update_event(struct ring_buffer_event *event, 939rb_update_event(struct ring_buffer_event *event,
931 unsigned type, unsigned length) 940 unsigned type, unsigned length)
932{ 941{
@@ -938,15 +947,11 @@ rb_update_event(struct ring_buffer_event *event,
938 break; 947 break;
939 948
940 case RINGBUF_TYPE_TIME_EXTEND: 949 case RINGBUF_TYPE_TIME_EXTEND:
941 event->len = 950 event->len = DIV_ROUND_UP(RB_LEN_TIME_EXTEND, RB_ALIGNMENT);
942 (RB_LEN_TIME_EXTEND + (RB_ALIGNMENT-1))
943 >> RB_ALIGNMENT_SHIFT;
944 break; 951 break;
945 952
946 case RINGBUF_TYPE_TIME_STAMP: 953 case RINGBUF_TYPE_TIME_STAMP:
947 event->len = 954 event->len = DIV_ROUND_UP(RB_LEN_TIME_STAMP, RB_ALIGNMENT);
948 (RB_LEN_TIME_STAMP + (RB_ALIGNMENT-1))
949 >> RB_ALIGNMENT_SHIFT;
950 break; 955 break;
951 956
952 case RINGBUF_TYPE_DATA: 957 case RINGBUF_TYPE_DATA:
@@ -955,16 +960,14 @@ rb_update_event(struct ring_buffer_event *event,
955 event->len = 0; 960 event->len = 0;
956 event->array[0] = length; 961 event->array[0] = length;
957 } else 962 } else
958 event->len = 963 event->len = DIV_ROUND_UP(length, RB_ALIGNMENT);
959 (length + (RB_ALIGNMENT-1))
960 >> RB_ALIGNMENT_SHIFT;
961 break; 964 break;
962 default: 965 default:
963 BUG(); 966 BUG();
964 } 967 }
965} 968}
966 969
967static inline unsigned rb_calculate_event_length(unsigned length) 970static unsigned rb_calculate_event_length(unsigned length)
968{ 971{
969 struct ring_buffer_event event; /* Used only for sizeof array */ 972 struct ring_buffer_event event; /* Used only for sizeof array */
970 973
@@ -990,6 +993,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
990 struct ring_buffer *buffer = cpu_buffer->buffer; 993 struct ring_buffer *buffer = cpu_buffer->buffer;
991 struct ring_buffer_event *event; 994 struct ring_buffer_event *event;
992 unsigned long flags; 995 unsigned long flags;
996 bool lock_taken = false;
993 997
994 commit_page = cpu_buffer->commit_page; 998 commit_page = cpu_buffer->commit_page;
995 /* we just need to protect against interrupts */ 999 /* we just need to protect against interrupts */
@@ -1003,7 +1007,30 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1003 struct buffer_page *next_page = tail_page; 1007 struct buffer_page *next_page = tail_page;
1004 1008
1005 local_irq_save(flags); 1009 local_irq_save(flags);
1006 __raw_spin_lock(&cpu_buffer->lock); 1010 /*
1011 * Since the write to the buffer is still not
1012 * fully lockless, we must be careful with NMIs.
1013 * The locks in the writers are taken when a write
1014 * crosses to a new page. The locks protect against
1015 * races with the readers (this will soon be fixed
1016 * with a lockless solution).
1017 *
1018 * Because we can not protect against NMIs, and we
1019 * want to keep traces reentrant, we need to manage
1020 * what happens when we are in an NMI.
1021 *
1022 * NMIs can happen after we take the lock.
1023 * If we are in an NMI, only take the lock
1024 * if it is not already taken. Otherwise
1025 * simply fail.
1026 */
1027 if (unlikely(in_nmi())) {
1028 if (!__raw_spin_trylock(&cpu_buffer->lock))
1029 goto out_reset;
1030 } else
1031 __raw_spin_lock(&cpu_buffer->lock);
1032
1033 lock_taken = true;
1007 1034
1008 rb_inc_page(cpu_buffer, &next_page); 1035 rb_inc_page(cpu_buffer, &next_page);
1009 1036
@@ -1012,7 +1039,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1012 1039
1013 /* we grabbed the lock before incrementing */ 1040 /* we grabbed the lock before incrementing */
1014 if (RB_WARN_ON(cpu_buffer, next_page == reader_page)) 1041 if (RB_WARN_ON(cpu_buffer, next_page == reader_page))
1015 goto out_unlock; 1042 goto out_reset;
1016 1043
1017 /* 1044 /*
1018 * If for some reason, we had an interrupt storm that made 1045 * If for some reason, we had an interrupt storm that made
@@ -1021,12 +1048,12 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1021 */ 1048 */
1022 if (unlikely(next_page == commit_page)) { 1049 if (unlikely(next_page == commit_page)) {
1023 WARN_ON_ONCE(1); 1050 WARN_ON_ONCE(1);
1024 goto out_unlock; 1051 goto out_reset;
1025 } 1052 }
1026 1053
1027 if (next_page == head_page) { 1054 if (next_page == head_page) {
1028 if (!(buffer->flags & RB_FL_OVERWRITE)) 1055 if (!(buffer->flags & RB_FL_OVERWRITE))
1029 goto out_unlock; 1056 goto out_reset;
1030 1057
1031 /* tail_page has not moved yet? */ 1058 /* tail_page has not moved yet? */
1032 if (tail_page == cpu_buffer->tail_page) { 1059 if (tail_page == cpu_buffer->tail_page) {
@@ -1100,12 +1127,13 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1100 1127
1101 return event; 1128 return event;
1102 1129
1103 out_unlock: 1130 out_reset:
1104 /* reset write */ 1131 /* reset write */
1105 if (tail <= BUF_PAGE_SIZE) 1132 if (tail <= BUF_PAGE_SIZE)
1106 local_set(&tail_page->write, tail); 1133 local_set(&tail_page->write, tail);
1107 1134
1108 __raw_spin_unlock(&cpu_buffer->lock); 1135 if (likely(lock_taken))
1136 __raw_spin_unlock(&cpu_buffer->lock);
1109 local_irq_restore(flags); 1137 local_irq_restore(flags);
1110 return NULL; 1138 return NULL;
1111} 1139}
@@ -1265,7 +1293,6 @@ static DEFINE_PER_CPU(int, rb_need_resched);
1265 * ring_buffer_lock_reserve - reserve a part of the buffer 1293 * ring_buffer_lock_reserve - reserve a part of the buffer
1266 * @buffer: the ring buffer to reserve from 1294 * @buffer: the ring buffer to reserve from
1267 * @length: the length of the data to reserve (excluding event header) 1295 * @length: the length of the data to reserve (excluding event header)
1268 * @flags: a pointer to save the interrupt flags
1269 * 1296 *
1270 * Returns a reseverd event on the ring buffer to copy directly to. 1297 * Returns a reseverd event on the ring buffer to copy directly to.
1271 * The user of this interface will need to get the body to write into 1298 * The user of this interface will need to get the body to write into
@@ -1278,9 +1305,7 @@ static DEFINE_PER_CPU(int, rb_need_resched);
1278 * If NULL is returned, then nothing has been allocated or locked. 1305 * If NULL is returned, then nothing has been allocated or locked.
1279 */ 1306 */
1280struct ring_buffer_event * 1307struct ring_buffer_event *
1281ring_buffer_lock_reserve(struct ring_buffer *buffer, 1308ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
1282 unsigned long length,
1283 unsigned long *flags)
1284{ 1309{
1285 struct ring_buffer_per_cpu *cpu_buffer; 1310 struct ring_buffer_per_cpu *cpu_buffer;
1286 struct ring_buffer_event *event; 1311 struct ring_buffer_event *event;
@@ -1347,15 +1372,13 @@ static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
1347 * ring_buffer_unlock_commit - commit a reserved 1372 * ring_buffer_unlock_commit - commit a reserved
1348 * @buffer: The buffer to commit to 1373 * @buffer: The buffer to commit to
1349 * @event: The event pointer to commit. 1374 * @event: The event pointer to commit.
1350 * @flags: the interrupt flags received from ring_buffer_lock_reserve.
1351 * 1375 *
1352 * This commits the data to the ring buffer, and releases any locks held. 1376 * This commits the data to the ring buffer, and releases any locks held.
1353 * 1377 *
1354 * Must be paired with ring_buffer_lock_reserve. 1378 * Must be paired with ring_buffer_lock_reserve.
1355 */ 1379 */
1356int ring_buffer_unlock_commit(struct ring_buffer *buffer, 1380int ring_buffer_unlock_commit(struct ring_buffer *buffer,
1357 struct ring_buffer_event *event, 1381 struct ring_buffer_event *event)
1358 unsigned long flags)
1359{ 1382{
1360 struct ring_buffer_per_cpu *cpu_buffer; 1383 struct ring_buffer_per_cpu *cpu_buffer;
1361 int cpu = raw_smp_processor_id(); 1384 int cpu = raw_smp_processor_id();
@@ -1438,7 +1461,7 @@ int ring_buffer_write(struct ring_buffer *buffer,
1438} 1461}
1439EXPORT_SYMBOL_GPL(ring_buffer_write); 1462EXPORT_SYMBOL_GPL(ring_buffer_write);
1440 1463
1441static inline int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer) 1464static int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer)
1442{ 1465{
1443 struct buffer_page *reader = cpu_buffer->reader_page; 1466 struct buffer_page *reader = cpu_buffer->reader_page;
1444 struct buffer_page *head = cpu_buffer->head_page; 1467 struct buffer_page *head = cpu_buffer->head_page;
@@ -2277,9 +2300,24 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
2277 if (buffer_a->pages != buffer_b->pages) 2300 if (buffer_a->pages != buffer_b->pages)
2278 return -EINVAL; 2301 return -EINVAL;
2279 2302
2303 if (ring_buffer_flags != RB_BUFFERS_ON)
2304 return -EAGAIN;
2305
2306 if (atomic_read(&buffer_a->record_disabled))
2307 return -EAGAIN;
2308
2309 if (atomic_read(&buffer_b->record_disabled))
2310 return -EAGAIN;
2311
2280 cpu_buffer_a = buffer_a->buffers[cpu]; 2312 cpu_buffer_a = buffer_a->buffers[cpu];
2281 cpu_buffer_b = buffer_b->buffers[cpu]; 2313 cpu_buffer_b = buffer_b->buffers[cpu];
2282 2314
2315 if (atomic_read(&cpu_buffer_a->record_disabled))
2316 return -EAGAIN;
2317
2318 if (atomic_read(&cpu_buffer_b->record_disabled))
2319 return -EAGAIN;
2320
2283 /* 2321 /*
2284 * We can't do a synchronize_sched here because this 2322 * We can't do a synchronize_sched here because this
2285 * function can be called in atomic context. 2323 * function can be called in atomic context.
@@ -2303,13 +2341,14 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
2303EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu); 2341EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu);
2304 2342
2305static void rb_remove_entries(struct ring_buffer_per_cpu *cpu_buffer, 2343static void rb_remove_entries(struct ring_buffer_per_cpu *cpu_buffer,
2306 struct buffer_data_page *bpage) 2344 struct buffer_data_page *bpage,
2345 unsigned int offset)
2307{ 2346{
2308 struct ring_buffer_event *event; 2347 struct ring_buffer_event *event;
2309 unsigned long head; 2348 unsigned long head;
2310 2349
2311 __raw_spin_lock(&cpu_buffer->lock); 2350 __raw_spin_lock(&cpu_buffer->lock);
2312 for (head = 0; head < local_read(&bpage->commit); 2351 for (head = offset; head < local_read(&bpage->commit);
2313 head += rb_event_length(event)) { 2352 head += rb_event_length(event)) {
2314 2353
2315 event = __rb_data_page_index(bpage, head); 2354 event = __rb_data_page_index(bpage, head);
@@ -2377,12 +2416,12 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data)
2377 * to swap with a page in the ring buffer. 2416 * to swap with a page in the ring buffer.
2378 * 2417 *
2379 * for example: 2418 * for example:
2380 * rpage = ring_buffer_alloc_page(buffer); 2419 * rpage = ring_buffer_alloc_read_page(buffer);
2381 * if (!rpage) 2420 * if (!rpage)
2382 * return error; 2421 * return error;
2383 * ret = ring_buffer_read_page(buffer, &rpage, cpu, 0); 2422 * ret = ring_buffer_read_page(buffer, &rpage, cpu, 0);
2384 * if (ret) 2423 * if (ret >= 0)
2385 * process_page(rpage); 2424 * process_page(rpage, ret);
2386 * 2425 *
2387 * When @full is set, the function will not return true unless 2426 * When @full is set, the function will not return true unless
2388 * the writer is off the reader page. 2427 * the writer is off the reader page.
@@ -2393,8 +2432,8 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data)
2393 * responsible for that. 2432 * responsible for that.
2394 * 2433 *
2395 * Returns: 2434 * Returns:
2396 * 1 if data has been transferred 2435 * >=0 if data has been transferred, returns the offset of consumed data.
2397 * 0 if no data has been transferred. 2436 * <0 if no data has been transferred.
2398 */ 2437 */
2399int ring_buffer_read_page(struct ring_buffer *buffer, 2438int ring_buffer_read_page(struct ring_buffer *buffer,
2400 void **data_page, int cpu, int full) 2439 void **data_page, int cpu, int full)
@@ -2403,7 +2442,8 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
2403 struct ring_buffer_event *event; 2442 struct ring_buffer_event *event;
2404 struct buffer_data_page *bpage; 2443 struct buffer_data_page *bpage;
2405 unsigned long flags; 2444 unsigned long flags;
2406 int ret = 0; 2445 unsigned int read;
2446 int ret = -1;
2407 2447
2408 if (!data_page) 2448 if (!data_page)
2409 return 0; 2449 return 0;
@@ -2425,25 +2465,29 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
2425 /* check for data */ 2465 /* check for data */
2426 if (!local_read(&cpu_buffer->reader_page->page->commit)) 2466 if (!local_read(&cpu_buffer->reader_page->page->commit))
2427 goto out; 2467 goto out;
2468
2469 read = cpu_buffer->reader_page->read;
2428 /* 2470 /*
2429 * If the writer is already off of the read page, then simply 2471 * If the writer is already off of the read page, then simply
2430 * switch the read page with the given page. Otherwise 2472 * switch the read page with the given page. Otherwise
2431 * we need to copy the data from the reader to the writer. 2473 * we need to copy the data from the reader to the writer.
2432 */ 2474 */
2433 if (cpu_buffer->reader_page == cpu_buffer->commit_page) { 2475 if (cpu_buffer->reader_page == cpu_buffer->commit_page) {
2434 unsigned int read = cpu_buffer->reader_page->read; 2476 unsigned int commit = rb_page_commit(cpu_buffer->reader_page);
2477 struct buffer_data_page *rpage = cpu_buffer->reader_page->page;
2435 2478
2436 if (full) 2479 if (full)
2437 goto out; 2480 goto out;
2438 /* The writer is still on the reader page, we must copy */ 2481 /* The writer is still on the reader page, we must copy */
2439 bpage = cpu_buffer->reader_page->page; 2482 memcpy(bpage->data + read, rpage->data + read, commit - read);
2440 memcpy(bpage->data,
2441 cpu_buffer->reader_page->page->data + read,
2442 local_read(&bpage->commit) - read);
2443 2483
2444 /* consume what was read */ 2484 /* consume what was read */
2445 cpu_buffer->reader_page += read; 2485 cpu_buffer->reader_page->read = commit;
2446 2486
2487 /* update bpage */
2488 local_set(&bpage->commit, commit);
2489 if (!read)
2490 bpage->time_stamp = rpage->time_stamp;
2447 } else { 2491 } else {
2448 /* swap the pages */ 2492 /* swap the pages */
2449 rb_init_page(bpage); 2493 rb_init_page(bpage);
@@ -2452,10 +2496,10 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
2452 cpu_buffer->reader_page->read = 0; 2496 cpu_buffer->reader_page->read = 0;
2453 *data_page = bpage; 2497 *data_page = bpage;
2454 } 2498 }
2455 ret = 1; 2499 ret = read;
2456 2500
2457 /* update the entry counter */ 2501 /* update the entry counter */
2458 rb_remove_entries(cpu_buffer, bpage); 2502 rb_remove_entries(cpu_buffer, bpage, read);
2459 out: 2503 out:
2460 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 2504 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2461 2505
@@ -2466,7 +2510,7 @@ static ssize_t
2466rb_simple_read(struct file *filp, char __user *ubuf, 2510rb_simple_read(struct file *filp, char __user *ubuf,
2467 size_t cnt, loff_t *ppos) 2511 size_t cnt, loff_t *ppos)
2468{ 2512{
2469 long *p = filp->private_data; 2513 unsigned long *p = filp->private_data;
2470 char buf[64]; 2514 char buf[64];
2471 int r; 2515 int r;
2472 2516
@@ -2482,9 +2526,9 @@ static ssize_t
2482rb_simple_write(struct file *filp, const char __user *ubuf, 2526rb_simple_write(struct file *filp, const char __user *ubuf,
2483 size_t cnt, loff_t *ppos) 2527 size_t cnt, loff_t *ppos)
2484{ 2528{
2485 long *p = filp->private_data; 2529 unsigned long *p = filp->private_data;
2486 char buf[64]; 2530 char buf[64];
2487 long val; 2531 unsigned long val;
2488 int ret; 2532 int ret;
2489 2533
2490 if (cnt >= sizeof(buf)) 2534 if (cnt >= sizeof(buf))
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 17bb88d86ac2..95f99a7abf2f 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -31,12 +31,14 @@
31#include <linux/fs.h> 31#include <linux/fs.h>
32#include <linux/kprobes.h> 32#include <linux/kprobes.h>
33#include <linux/writeback.h> 33#include <linux/writeback.h>
34#include <linux/splice.h>
34 35
35#include <linux/stacktrace.h> 36#include <linux/stacktrace.h>
36#include <linux/ring_buffer.h> 37#include <linux/ring_buffer.h>
37#include <linux/irqflags.h> 38#include <linux/irqflags.h>
38 39
39#include "trace.h" 40#include "trace.h"
41#include "trace_output.h"
40 42
41#define TRACE_BUFFER_FLAGS (RB_FL_OVERWRITE) 43#define TRACE_BUFFER_FLAGS (RB_FL_OVERWRITE)
42 44
@@ -52,6 +54,11 @@ unsigned long __read_mostly tracing_thresh;
52 */ 54 */
53static bool __read_mostly tracing_selftest_running; 55static bool __read_mostly tracing_selftest_running;
54 56
57/*
58 * If a tracer is running, we do not want to run SELFTEST.
59 */
60static bool __read_mostly tracing_selftest_disabled;
61
55/* For tracers that don't implement custom flags */ 62/* For tracers that don't implement custom flags */
56static struct tracer_opt dummy_tracer_opt[] = { 63static struct tracer_opt dummy_tracer_opt[] = {
57 { } 64 { }
@@ -73,7 +80,7 @@ static int dummy_set_flag(u32 old_flags, u32 bit, int set)
73 * of the tracer is successful. But that is the only place that sets 80 * of the tracer is successful. But that is the only place that sets
74 * this back to zero. 81 * this back to zero.
75 */ 82 */
76int tracing_disabled = 1; 83static int tracing_disabled = 1;
77 84
78static DEFINE_PER_CPU(local_t, ftrace_cpu_disabled); 85static DEFINE_PER_CPU(local_t, ftrace_cpu_disabled);
79 86
@@ -109,14 +116,19 @@ static cpumask_var_t __read_mostly tracing_buffer_mask;
109 */ 116 */
110int ftrace_dump_on_oops; 117int ftrace_dump_on_oops;
111 118
112static int tracing_set_tracer(char *buf); 119static int tracing_set_tracer(const char *buf);
120
121#define BOOTUP_TRACER_SIZE 100
122static char bootup_tracer_buf[BOOTUP_TRACER_SIZE] __initdata;
123static char *default_bootup_tracer;
113 124
114static int __init set_ftrace(char *str) 125static int __init set_ftrace(char *str)
115{ 126{
116 tracing_set_tracer(str); 127 strncpy(bootup_tracer_buf, str, BOOTUP_TRACER_SIZE);
128 default_bootup_tracer = bootup_tracer_buf;
117 return 1; 129 return 1;
118} 130}
119__setup("ftrace", set_ftrace); 131__setup("ftrace=", set_ftrace);
120 132
121static int __init set_ftrace_dump_on_oops(char *str) 133static int __init set_ftrace_dump_on_oops(char *str)
122{ 134{
@@ -186,9 +198,6 @@ int tracing_is_enabled(void)
186 return tracer_enabled; 198 return tracer_enabled;
187} 199}
188 200
189/* function tracing enabled */
190int ftrace_function_enabled;
191
192/* 201/*
193 * trace_buf_size is the size in bytes that is allocated 202 * trace_buf_size is the size in bytes that is allocated
194 * for a buffer. Note, the number of bytes is always rounded 203 * for a buffer. Note, the number of bytes is always rounded
@@ -229,7 +238,7 @@ static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
229 238
230/* trace_flags holds trace_options default values */ 239/* trace_flags holds trace_options default values */
231unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | 240unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
232 TRACE_ITER_ANNOTATE; 241 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO;
233 242
234/** 243/**
235 * trace_wake_up - wake up tasks waiting for trace input 244 * trace_wake_up - wake up tasks waiting for trace input
@@ -287,6 +296,7 @@ static const char *trace_options[] = {
287 "userstacktrace", 296 "userstacktrace",
288 "sym-userobj", 297 "sym-userobj",
289 "printk-msg-only", 298 "printk-msg-only",
299 "context-info",
290 NULL 300 NULL
291}; 301};
292 302
@@ -329,132 +339,6 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
329 tracing_record_cmdline(current); 339 tracing_record_cmdline(current);
330} 340}
331 341
332/**
333 * trace_seq_printf - sequence printing of trace information
334 * @s: trace sequence descriptor
335 * @fmt: printf format string
336 *
337 * The tracer may use either sequence operations or its own
338 * copy to user routines. To simplify formating of a trace
339 * trace_seq_printf is used to store strings into a special
340 * buffer (@s). Then the output may be either used by
341 * the sequencer or pulled into another buffer.
342 */
343int
344trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
345{
346 int len = (PAGE_SIZE - 1) - s->len;
347 va_list ap;
348 int ret;
349
350 if (!len)
351 return 0;
352
353 va_start(ap, fmt);
354 ret = vsnprintf(s->buffer + s->len, len, fmt, ap);
355 va_end(ap);
356
357 /* If we can't write it all, don't bother writing anything */
358 if (ret >= len)
359 return 0;
360
361 s->len += ret;
362
363 return len;
364}
365
366/**
367 * trace_seq_puts - trace sequence printing of simple string
368 * @s: trace sequence descriptor
369 * @str: simple string to record
370 *
371 * The tracer may use either the sequence operations or its own
372 * copy to user routines. This function records a simple string
373 * into a special buffer (@s) for later retrieval by a sequencer
374 * or other mechanism.
375 */
376static int
377trace_seq_puts(struct trace_seq *s, const char *str)
378{
379 int len = strlen(str);
380
381 if (len > ((PAGE_SIZE - 1) - s->len))
382 return 0;
383
384 memcpy(s->buffer + s->len, str, len);
385 s->len += len;
386
387 return len;
388}
389
390static int
391trace_seq_putc(struct trace_seq *s, unsigned char c)
392{
393 if (s->len >= (PAGE_SIZE - 1))
394 return 0;
395
396 s->buffer[s->len++] = c;
397
398 return 1;
399}
400
401static int
402trace_seq_putmem(struct trace_seq *s, void *mem, size_t len)
403{
404 if (len > ((PAGE_SIZE - 1) - s->len))
405 return 0;
406
407 memcpy(s->buffer + s->len, mem, len);
408 s->len += len;
409
410 return len;
411}
412
413#define MAX_MEMHEX_BYTES 8
414#define HEX_CHARS (MAX_MEMHEX_BYTES*2 + 1)
415
416static int
417trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len)
418{
419 unsigned char hex[HEX_CHARS];
420 unsigned char *data = mem;
421 int i, j;
422
423#ifdef __BIG_ENDIAN
424 for (i = 0, j = 0; i < len; i++) {
425#else
426 for (i = len-1, j = 0; i >= 0; i--) {
427#endif
428 hex[j++] = hex_asc_hi(data[i]);
429 hex[j++] = hex_asc_lo(data[i]);
430 }
431 hex[j++] = ' ';
432
433 return trace_seq_putmem(s, hex, j);
434}
435
436static int
437trace_seq_path(struct trace_seq *s, struct path *path)
438{
439 unsigned char *p;
440
441 if (s->len >= (PAGE_SIZE - 1))
442 return 0;
443 p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len);
444 if (!IS_ERR(p)) {
445 p = mangle_path(s->buffer + s->len, p, "\n");
446 if (p) {
447 s->len = p - s->buffer;
448 return 1;
449 }
450 } else {
451 s->buffer[s->len++] = '?';
452 return 1;
453 }
454
455 return 0;
456}
457
458static void 342static void
459trace_seq_reset(struct trace_seq *s) 343trace_seq_reset(struct trace_seq *s)
460{ 344{
@@ -481,6 +365,25 @@ ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt)
481 return cnt; 365 return cnt;
482} 366}
483 367
368ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
369{
370 int len;
371 void *ret;
372
373 if (s->len <= s->readpos)
374 return -EBUSY;
375
376 len = s->len - s->readpos;
377 if (cnt > len)
378 cnt = len;
379 ret = memcpy(buf, s->buffer + s->readpos, cnt);
380 if (!ret)
381 return -EFAULT;
382
383 s->readpos += len;
384 return cnt;
385}
386
484static void 387static void
485trace_print_seq(struct seq_file *m, struct trace_seq *s) 388trace_print_seq(struct seq_file *m, struct trace_seq *s)
486{ 389{
@@ -543,7 +446,7 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
543 446
544 ftrace_enable_cpu(); 447 ftrace_enable_cpu();
545 448
546 WARN_ON_ONCE(ret); 449 WARN_ON_ONCE(ret && ret != -EAGAIN);
547 450
548 __update_max_tr(tr, tsk, cpu); 451 __update_max_tr(tr, tsk, cpu);
549 __raw_spin_unlock(&ftrace_max_lock); 452 __raw_spin_unlock(&ftrace_max_lock);
@@ -556,6 +459,8 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
556 * Register a new plugin tracer. 459 * Register a new plugin tracer.
557 */ 460 */
558int register_tracer(struct tracer *type) 461int register_tracer(struct tracer *type)
462__releases(kernel_lock)
463__acquires(kernel_lock)
559{ 464{
560 struct tracer *t; 465 struct tracer *t;
561 int len; 466 int len;
@@ -596,7 +501,7 @@ int register_tracer(struct tracer *type)
596 type->flags->opts = dummy_tracer_opt; 501 type->flags->opts = dummy_tracer_opt;
597 502
598#ifdef CONFIG_FTRACE_STARTUP_TEST 503#ifdef CONFIG_FTRACE_STARTUP_TEST
599 if (type->selftest) { 504 if (type->selftest && !tracing_selftest_disabled) {
600 struct tracer *saved_tracer = current_trace; 505 struct tracer *saved_tracer = current_trace;
601 struct trace_array *tr = &global_trace; 506 struct trace_array *tr = &global_trace;
602 int i; 507 int i;
@@ -638,8 +543,26 @@ int register_tracer(struct tracer *type)
638 out: 543 out:
639 tracing_selftest_running = false; 544 tracing_selftest_running = false;
640 mutex_unlock(&trace_types_lock); 545 mutex_unlock(&trace_types_lock);
641 lock_kernel();
642 546
547 if (ret || !default_bootup_tracer)
548 goto out_unlock;
549
550 if (strncmp(default_bootup_tracer, type->name, BOOTUP_TRACER_SIZE))
551 goto out_unlock;
552
553 printk(KERN_INFO "Starting tracer '%s'\n", type->name);
554 /* Do we want this tracer to start on bootup? */
555 tracing_set_tracer(type->name);
556 default_bootup_tracer = NULL;
557 /* disable other selftests, since this will break it. */
558 tracing_selftest_disabled = 1;
559#ifdef CONFIG_FTRACE_STARTUP_TEST
560 printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
561 type->name);
562#endif
563
564 out_unlock:
565 lock_kernel();
643 return ret; 566 return ret;
644} 567}
645 568
@@ -658,6 +581,15 @@ void unregister_tracer(struct tracer *type)
658 581
659 found: 582 found:
660 *t = (*t)->next; 583 *t = (*t)->next;
584
585 if (type == current_trace && tracer_enabled) {
586 tracer_enabled = 0;
587 tracing_stop();
588 if (current_trace->stop)
589 current_trace->stop(&global_trace);
590 current_trace = &nop_trace;
591 }
592
661 if (strlen(type->name) != max_tracer_type_len) 593 if (strlen(type->name) != max_tracer_type_len)
662 goto out; 594 goto out;
663 595
@@ -696,7 +628,7 @@ static int cmdline_idx;
696static DEFINE_SPINLOCK(trace_cmdline_lock); 628static DEFINE_SPINLOCK(trace_cmdline_lock);
697 629
698/* temporary disable recording */ 630/* temporary disable recording */
699atomic_t trace_record_cmdline_disabled __read_mostly; 631static atomic_t trace_record_cmdline_disabled __read_mostly;
700 632
701static void trace_init_cmdlines(void) 633static void trace_init_cmdlines(void)
702{ 634{
@@ -738,13 +670,12 @@ void tracing_start(void)
738 return; 670 return;
739 671
740 spin_lock_irqsave(&tracing_start_lock, flags); 672 spin_lock_irqsave(&tracing_start_lock, flags);
741 if (--trace_stop_count) 673 if (--trace_stop_count) {
742 goto out; 674 if (trace_stop_count < 0) {
743 675 /* Someone screwed up their debugging */
744 if (trace_stop_count < 0) { 676 WARN_ON_ONCE(1);
745 /* Someone screwed up their debugging */ 677 trace_stop_count = 0;
746 WARN_ON_ONCE(1); 678 }
747 trace_stop_count = 0;
748 goto out; 679 goto out;
749 } 680 }
750 681
@@ -876,78 +807,100 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
876 (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0); 807 (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0);
877} 808}
878 809
810struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr,
811 unsigned char type,
812 unsigned long len,
813 unsigned long flags, int pc)
814{
815 struct ring_buffer_event *event;
816
817 event = ring_buffer_lock_reserve(tr->buffer, len);
818 if (event != NULL) {
819 struct trace_entry *ent = ring_buffer_event_data(event);
820
821 tracing_generic_entry_update(ent, flags, pc);
822 ent->type = type;
823 }
824
825 return event;
826}
827static void ftrace_trace_stack(struct trace_array *tr,
828 unsigned long flags, int skip, int pc);
829static void ftrace_trace_userstack(struct trace_array *tr,
830 unsigned long flags, int pc);
831
832void trace_buffer_unlock_commit(struct trace_array *tr,
833 struct ring_buffer_event *event,
834 unsigned long flags, int pc)
835{
836 ring_buffer_unlock_commit(tr->buffer, event);
837
838 ftrace_trace_stack(tr, flags, 6, pc);
839 ftrace_trace_userstack(tr, flags, pc);
840 trace_wake_up();
841}
842
879void 843void
880trace_function(struct trace_array *tr, struct trace_array_cpu *data, 844trace_function(struct trace_array *tr,
881 unsigned long ip, unsigned long parent_ip, unsigned long flags, 845 unsigned long ip, unsigned long parent_ip, unsigned long flags,
882 int pc) 846 int pc)
883{ 847{
884 struct ring_buffer_event *event; 848 struct ring_buffer_event *event;
885 struct ftrace_entry *entry; 849 struct ftrace_entry *entry;
886 unsigned long irq_flags;
887 850
888 /* If we are reading the ring buffer, don't trace */ 851 /* If we are reading the ring buffer, don't trace */
889 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) 852 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
890 return; 853 return;
891 854
892 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), 855 event = trace_buffer_lock_reserve(tr, TRACE_FN, sizeof(*entry),
893 &irq_flags); 856 flags, pc);
894 if (!event) 857 if (!event)
895 return; 858 return;
896 entry = ring_buffer_event_data(event); 859 entry = ring_buffer_event_data(event);
897 tracing_generic_entry_update(&entry->ent, flags, pc);
898 entry->ent.type = TRACE_FN;
899 entry->ip = ip; 860 entry->ip = ip;
900 entry->parent_ip = parent_ip; 861 entry->parent_ip = parent_ip;
901 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 862 ring_buffer_unlock_commit(tr->buffer, event);
902} 863}
903 864
904#ifdef CONFIG_FUNCTION_GRAPH_TRACER 865#ifdef CONFIG_FUNCTION_GRAPH_TRACER
905static void __trace_graph_entry(struct trace_array *tr, 866static void __trace_graph_entry(struct trace_array *tr,
906 struct trace_array_cpu *data,
907 struct ftrace_graph_ent *trace, 867 struct ftrace_graph_ent *trace,
908 unsigned long flags, 868 unsigned long flags,
909 int pc) 869 int pc)
910{ 870{
911 struct ring_buffer_event *event; 871 struct ring_buffer_event *event;
912 struct ftrace_graph_ent_entry *entry; 872 struct ftrace_graph_ent_entry *entry;
913 unsigned long irq_flags;
914 873
915 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) 874 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
916 return; 875 return;
917 876
918 event = ring_buffer_lock_reserve(global_trace.buffer, sizeof(*entry), 877 event = trace_buffer_lock_reserve(&global_trace, TRACE_GRAPH_ENT,
919 &irq_flags); 878 sizeof(*entry), flags, pc);
920 if (!event) 879 if (!event)
921 return; 880 return;
922 entry = ring_buffer_event_data(event); 881 entry = ring_buffer_event_data(event);
923 tracing_generic_entry_update(&entry->ent, flags, pc);
924 entry->ent.type = TRACE_GRAPH_ENT;
925 entry->graph_ent = *trace; 882 entry->graph_ent = *trace;
926 ring_buffer_unlock_commit(global_trace.buffer, event, irq_flags); 883 ring_buffer_unlock_commit(global_trace.buffer, event);
927} 884}
928 885
929static void __trace_graph_return(struct trace_array *tr, 886static void __trace_graph_return(struct trace_array *tr,
930 struct trace_array_cpu *data,
931 struct ftrace_graph_ret *trace, 887 struct ftrace_graph_ret *trace,
932 unsigned long flags, 888 unsigned long flags,
933 int pc) 889 int pc)
934{ 890{
935 struct ring_buffer_event *event; 891 struct ring_buffer_event *event;
936 struct ftrace_graph_ret_entry *entry; 892 struct ftrace_graph_ret_entry *entry;
937 unsigned long irq_flags;
938 893
939 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) 894 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
940 return; 895 return;
941 896
942 event = ring_buffer_lock_reserve(global_trace.buffer, sizeof(*entry), 897 event = trace_buffer_lock_reserve(&global_trace, TRACE_GRAPH_RET,
943 &irq_flags); 898 sizeof(*entry), flags, pc);
944 if (!event) 899 if (!event)
945 return; 900 return;
946 entry = ring_buffer_event_data(event); 901 entry = ring_buffer_event_data(event);
947 tracing_generic_entry_update(&entry->ent, flags, pc);
948 entry->ent.type = TRACE_GRAPH_RET;
949 entry->ret = *trace; 902 entry->ret = *trace;
950 ring_buffer_unlock_commit(global_trace.buffer, event, irq_flags); 903 ring_buffer_unlock_commit(global_trace.buffer, event);
951} 904}
952#endif 905#endif
953 906
@@ -957,31 +910,23 @@ ftrace(struct trace_array *tr, struct trace_array_cpu *data,
957 int pc) 910 int pc)
958{ 911{
959 if (likely(!atomic_read(&data->disabled))) 912 if (likely(!atomic_read(&data->disabled)))
960 trace_function(tr, data, ip, parent_ip, flags, pc); 913 trace_function(tr, ip, parent_ip, flags, pc);
961} 914}
962 915
963static void ftrace_trace_stack(struct trace_array *tr, 916static void __ftrace_trace_stack(struct trace_array *tr,
964 struct trace_array_cpu *data, 917 unsigned long flags,
965 unsigned long flags, 918 int skip, int pc)
966 int skip, int pc)
967{ 919{
968#ifdef CONFIG_STACKTRACE 920#ifdef CONFIG_STACKTRACE
969 struct ring_buffer_event *event; 921 struct ring_buffer_event *event;
970 struct stack_entry *entry; 922 struct stack_entry *entry;
971 struct stack_trace trace; 923 struct stack_trace trace;
972 unsigned long irq_flags;
973 924
974 if (!(trace_flags & TRACE_ITER_STACKTRACE)) 925 event = trace_buffer_lock_reserve(tr, TRACE_STACK,
975 return; 926 sizeof(*entry), flags, pc);
976
977 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
978 &irq_flags);
979 if (!event) 927 if (!event)
980 return; 928 return;
981 entry = ring_buffer_event_data(event); 929 entry = ring_buffer_event_data(event);
982 tracing_generic_entry_update(&entry->ent, flags, pc);
983 entry->ent.type = TRACE_STACK;
984
985 memset(&entry->caller, 0, sizeof(entry->caller)); 930 memset(&entry->caller, 0, sizeof(entry->caller));
986 931
987 trace.nr_entries = 0; 932 trace.nr_entries = 0;
@@ -990,38 +935,43 @@ static void ftrace_trace_stack(struct trace_array *tr,
990 trace.entries = entry->caller; 935 trace.entries = entry->caller;
991 936
992 save_stack_trace(&trace); 937 save_stack_trace(&trace);
993 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 938 ring_buffer_unlock_commit(tr->buffer, event);
994#endif 939#endif
995} 940}
996 941
942static void ftrace_trace_stack(struct trace_array *tr,
943 unsigned long flags,
944 int skip, int pc)
945{
946 if (!(trace_flags & TRACE_ITER_STACKTRACE))
947 return;
948
949 __ftrace_trace_stack(tr, flags, skip, pc);
950}
951
997void __trace_stack(struct trace_array *tr, 952void __trace_stack(struct trace_array *tr,
998 struct trace_array_cpu *data,
999 unsigned long flags, 953 unsigned long flags,
1000 int skip) 954 int skip, int pc)
1001{ 955{
1002 ftrace_trace_stack(tr, data, flags, skip, preempt_count()); 956 __ftrace_trace_stack(tr, flags, skip, pc);
1003} 957}
1004 958
1005static void ftrace_trace_userstack(struct trace_array *tr, 959static void ftrace_trace_userstack(struct trace_array *tr,
1006 struct trace_array_cpu *data, 960 unsigned long flags, int pc)
1007 unsigned long flags, int pc)
1008{ 961{
1009#ifdef CONFIG_STACKTRACE 962#ifdef CONFIG_STACKTRACE
1010 struct ring_buffer_event *event; 963 struct ring_buffer_event *event;
1011 struct userstack_entry *entry; 964 struct userstack_entry *entry;
1012 struct stack_trace trace; 965 struct stack_trace trace;
1013 unsigned long irq_flags;
1014 966
1015 if (!(trace_flags & TRACE_ITER_USERSTACKTRACE)) 967 if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
1016 return; 968 return;
1017 969
1018 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), 970 event = trace_buffer_lock_reserve(tr, TRACE_USER_STACK,
1019 &irq_flags); 971 sizeof(*entry), flags, pc);
1020 if (!event) 972 if (!event)
1021 return; 973 return;
1022 entry = ring_buffer_event_data(event); 974 entry = ring_buffer_event_data(event);
1023 tracing_generic_entry_update(&entry->ent, flags, pc);
1024 entry->ent.type = TRACE_USER_STACK;
1025 975
1026 memset(&entry->caller, 0, sizeof(entry->caller)); 976 memset(&entry->caller, 0, sizeof(entry->caller));
1027 977
@@ -1031,70 +981,58 @@ static void ftrace_trace_userstack(struct trace_array *tr,
1031 trace.entries = entry->caller; 981 trace.entries = entry->caller;
1032 982
1033 save_stack_trace_user(&trace); 983 save_stack_trace_user(&trace);
1034 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 984 ring_buffer_unlock_commit(tr->buffer, event);
1035#endif 985#endif
1036} 986}
1037 987
1038void __trace_userstack(struct trace_array *tr, 988#ifdef UNUSED
1039 struct trace_array_cpu *data, 989static void __trace_userstack(struct trace_array *tr, unsigned long flags)
1040 unsigned long flags)
1041{ 990{
1042 ftrace_trace_userstack(tr, data, flags, preempt_count()); 991 ftrace_trace_userstack(tr, flags, preempt_count());
1043} 992}
993#endif /* UNUSED */
1044 994
1045static void 995static void
1046ftrace_trace_special(void *__tr, void *__data, 996ftrace_trace_special(void *__tr,
1047 unsigned long arg1, unsigned long arg2, unsigned long arg3, 997 unsigned long arg1, unsigned long arg2, unsigned long arg3,
1048 int pc) 998 int pc)
1049{ 999{
1050 struct ring_buffer_event *event; 1000 struct ring_buffer_event *event;
1051 struct trace_array_cpu *data = __data;
1052 struct trace_array *tr = __tr; 1001 struct trace_array *tr = __tr;
1053 struct special_entry *entry; 1002 struct special_entry *entry;
1054 unsigned long irq_flags;
1055 1003
1056 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), 1004 event = trace_buffer_lock_reserve(tr, TRACE_SPECIAL,
1057 &irq_flags); 1005 sizeof(*entry), 0, pc);
1058 if (!event) 1006 if (!event)
1059 return; 1007 return;
1060 entry = ring_buffer_event_data(event); 1008 entry = ring_buffer_event_data(event);
1061 tracing_generic_entry_update(&entry->ent, 0, pc);
1062 entry->ent.type = TRACE_SPECIAL;
1063 entry->arg1 = arg1; 1009 entry->arg1 = arg1;
1064 entry->arg2 = arg2; 1010 entry->arg2 = arg2;
1065 entry->arg3 = arg3; 1011 entry->arg3 = arg3;
1066 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 1012 trace_buffer_unlock_commit(tr, event, 0, pc);
1067 ftrace_trace_stack(tr, data, irq_flags, 4, pc);
1068 ftrace_trace_userstack(tr, data, irq_flags, pc);
1069
1070 trace_wake_up();
1071} 1013}
1072 1014
1073void 1015void
1074__trace_special(void *__tr, void *__data, 1016__trace_special(void *__tr, void *__data,
1075 unsigned long arg1, unsigned long arg2, unsigned long arg3) 1017 unsigned long arg1, unsigned long arg2, unsigned long arg3)
1076{ 1018{
1077 ftrace_trace_special(__tr, __data, arg1, arg2, arg3, preempt_count()); 1019 ftrace_trace_special(__tr, arg1, arg2, arg3, preempt_count());
1078} 1020}
1079 1021
1080void 1022void
1081tracing_sched_switch_trace(struct trace_array *tr, 1023tracing_sched_switch_trace(struct trace_array *tr,
1082 struct trace_array_cpu *data,
1083 struct task_struct *prev, 1024 struct task_struct *prev,
1084 struct task_struct *next, 1025 struct task_struct *next,
1085 unsigned long flags, int pc) 1026 unsigned long flags, int pc)
1086{ 1027{
1087 struct ring_buffer_event *event; 1028 struct ring_buffer_event *event;
1088 struct ctx_switch_entry *entry; 1029 struct ctx_switch_entry *entry;
1089 unsigned long irq_flags;
1090 1030
1091 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), 1031 event = trace_buffer_lock_reserve(tr, TRACE_CTX,
1092 &irq_flags); 1032 sizeof(*entry), flags, pc);
1093 if (!event) 1033 if (!event)
1094 return; 1034 return;
1095 entry = ring_buffer_event_data(event); 1035 entry = ring_buffer_event_data(event);
1096 tracing_generic_entry_update(&entry->ent, flags, pc);
1097 entry->ent.type = TRACE_CTX;
1098 entry->prev_pid = prev->pid; 1036 entry->prev_pid = prev->pid;
1099 entry->prev_prio = prev->prio; 1037 entry->prev_prio = prev->prio;
1100 entry->prev_state = prev->state; 1038 entry->prev_state = prev->state;
@@ -1102,29 +1040,23 @@ tracing_sched_switch_trace(struct trace_array *tr,
1102 entry->next_prio = next->prio; 1040 entry->next_prio = next->prio;
1103 entry->next_state = next->state; 1041 entry->next_state = next->state;
1104 entry->next_cpu = task_cpu(next); 1042 entry->next_cpu = task_cpu(next);
1105 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 1043 trace_buffer_unlock_commit(tr, event, flags, pc);
1106 ftrace_trace_stack(tr, data, flags, 5, pc);
1107 ftrace_trace_userstack(tr, data, flags, pc);
1108} 1044}
1109 1045
1110void 1046void
1111tracing_sched_wakeup_trace(struct trace_array *tr, 1047tracing_sched_wakeup_trace(struct trace_array *tr,
1112 struct trace_array_cpu *data,
1113 struct task_struct *wakee, 1048 struct task_struct *wakee,
1114 struct task_struct *curr, 1049 struct task_struct *curr,
1115 unsigned long flags, int pc) 1050 unsigned long flags, int pc)
1116{ 1051{
1117 struct ring_buffer_event *event; 1052 struct ring_buffer_event *event;
1118 struct ctx_switch_entry *entry; 1053 struct ctx_switch_entry *entry;
1119 unsigned long irq_flags;
1120 1054
1121 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), 1055 event = trace_buffer_lock_reserve(tr, TRACE_WAKE,
1122 &irq_flags); 1056 sizeof(*entry), flags, pc);
1123 if (!event) 1057 if (!event)
1124 return; 1058 return;
1125 entry = ring_buffer_event_data(event); 1059 entry = ring_buffer_event_data(event);
1126 tracing_generic_entry_update(&entry->ent, flags, pc);
1127 entry->ent.type = TRACE_WAKE;
1128 entry->prev_pid = curr->pid; 1060 entry->prev_pid = curr->pid;
1129 entry->prev_prio = curr->prio; 1061 entry->prev_prio = curr->prio;
1130 entry->prev_state = curr->state; 1062 entry->prev_state = curr->state;
@@ -1132,11 +1064,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
1132 entry->next_prio = wakee->prio; 1064 entry->next_prio = wakee->prio;
1133 entry->next_state = wakee->state; 1065 entry->next_state = wakee->state;
1134 entry->next_cpu = task_cpu(wakee); 1066 entry->next_cpu = task_cpu(wakee);
1135 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 1067 trace_buffer_unlock_commit(tr, event, flags, pc);
1136 ftrace_trace_stack(tr, data, flags, 6, pc);
1137 ftrace_trace_userstack(tr, data, flags, pc);
1138
1139 trace_wake_up();
1140} 1068}
1141 1069
1142void 1070void
@@ -1157,66 +1085,7 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
1157 data = tr->data[cpu]; 1085 data = tr->data[cpu];
1158 1086
1159 if (likely(atomic_inc_return(&data->disabled) == 1)) 1087 if (likely(atomic_inc_return(&data->disabled) == 1))
1160 ftrace_trace_special(tr, data, arg1, arg2, arg3, pc); 1088 ftrace_trace_special(tr, arg1, arg2, arg3, pc);
1161
1162 atomic_dec(&data->disabled);
1163 local_irq_restore(flags);
1164}
1165
1166#ifdef CONFIG_FUNCTION_TRACER
1167static void
1168function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip)
1169{
1170 struct trace_array *tr = &global_trace;
1171 struct trace_array_cpu *data;
1172 unsigned long flags;
1173 long disabled;
1174 int cpu, resched;
1175 int pc;
1176
1177 if (unlikely(!ftrace_function_enabled))
1178 return;
1179
1180 pc = preempt_count();
1181 resched = ftrace_preempt_disable();
1182 local_save_flags(flags);
1183 cpu = raw_smp_processor_id();
1184 data = tr->data[cpu];
1185 disabled = atomic_inc_return(&data->disabled);
1186
1187 if (likely(disabled == 1))
1188 trace_function(tr, data, ip, parent_ip, flags, pc);
1189
1190 atomic_dec(&data->disabled);
1191 ftrace_preempt_enable(resched);
1192}
1193
1194static void
1195function_trace_call(unsigned long ip, unsigned long parent_ip)
1196{
1197 struct trace_array *tr = &global_trace;
1198 struct trace_array_cpu *data;
1199 unsigned long flags;
1200 long disabled;
1201 int cpu;
1202 int pc;
1203
1204 if (unlikely(!ftrace_function_enabled))
1205 return;
1206
1207 /*
1208 * Need to use raw, since this must be called before the
1209 * recursive protection is performed.
1210 */
1211 local_irq_save(flags);
1212 cpu = raw_smp_processor_id();
1213 data = tr->data[cpu];
1214 disabled = atomic_inc_return(&data->disabled);
1215
1216 if (likely(disabled == 1)) {
1217 pc = preempt_count();
1218 trace_function(tr, data, ip, parent_ip, flags, pc);
1219 }
1220 1089
1221 atomic_dec(&data->disabled); 1090 atomic_dec(&data->disabled);
1222 local_irq_restore(flags); 1091 local_irq_restore(flags);
@@ -1244,7 +1113,7 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
1244 disabled = atomic_inc_return(&data->disabled); 1113 disabled = atomic_inc_return(&data->disabled);
1245 if (likely(disabled == 1)) { 1114 if (likely(disabled == 1)) {
1246 pc = preempt_count(); 1115 pc = preempt_count();
1247 __trace_graph_entry(tr, data, trace, flags, pc); 1116 __trace_graph_entry(tr, trace, flags, pc);
1248 } 1117 }
1249 /* Only do the atomic if it is not already set */ 1118 /* Only do the atomic if it is not already set */
1250 if (!test_tsk_trace_graph(current)) 1119 if (!test_tsk_trace_graph(current))
@@ -1270,7 +1139,7 @@ void trace_graph_return(struct ftrace_graph_ret *trace)
1270 disabled = atomic_inc_return(&data->disabled); 1139 disabled = atomic_inc_return(&data->disabled);
1271 if (likely(disabled == 1)) { 1140 if (likely(disabled == 1)) {
1272 pc = preempt_count(); 1141 pc = preempt_count();
1273 __trace_graph_return(tr, data, trace, flags, pc); 1142 __trace_graph_return(tr, trace, flags, pc);
1274 } 1143 }
1275 if (!trace->depth) 1144 if (!trace->depth)
1276 clear_tsk_trace_graph(current); 1145 clear_tsk_trace_graph(current);
@@ -1279,31 +1148,6 @@ void trace_graph_return(struct ftrace_graph_ret *trace)
1279} 1148}
1280#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ 1149#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
1281 1150
1282static struct ftrace_ops trace_ops __read_mostly =
1283{
1284 .func = function_trace_call,
1285};
1286
1287void tracing_start_function_trace(void)
1288{
1289 ftrace_function_enabled = 0;
1290
1291 if (trace_flags & TRACE_ITER_PREEMPTONLY)
1292 trace_ops.func = function_trace_call_preempt_only;
1293 else
1294 trace_ops.func = function_trace_call;
1295
1296 register_ftrace_function(&trace_ops);
1297 ftrace_function_enabled = 1;
1298}
1299
1300void tracing_stop_function_trace(void)
1301{
1302 ftrace_function_enabled = 0;
1303 unregister_ftrace_function(&trace_ops);
1304}
1305#endif
1306
1307enum trace_file_type { 1151enum trace_file_type {
1308 TRACE_FILE_LAT_FMT = 1, 1152 TRACE_FILE_LAT_FMT = 1,
1309 TRACE_FILE_ANNOTATE = 2, 1153 TRACE_FILE_ANNOTATE = 2,
@@ -1376,8 +1220,8 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
1376} 1220}
1377 1221
1378/* Find the next real entry, without updating the iterator itself */ 1222/* Find the next real entry, without updating the iterator itself */
1379static struct trace_entry * 1223struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
1380find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts) 1224 int *ent_cpu, u64 *ent_ts)
1381{ 1225{
1382 return __find_next_entry(iter, ent_cpu, ent_ts); 1226 return __find_next_entry(iter, ent_cpu, ent_ts);
1383} 1227}
@@ -1472,154 +1316,6 @@ static void s_stop(struct seq_file *m, void *p)
1472 mutex_unlock(&trace_types_lock); 1316 mutex_unlock(&trace_types_lock);
1473} 1317}
1474 1318
1475#ifdef CONFIG_KRETPROBES
1476static inline const char *kretprobed(const char *name)
1477{
1478 static const char tramp_name[] = "kretprobe_trampoline";
1479 int size = sizeof(tramp_name);
1480
1481 if (strncmp(tramp_name, name, size) == 0)
1482 return "[unknown/kretprobe'd]";
1483 return name;
1484}
1485#else
1486static inline const char *kretprobed(const char *name)
1487{
1488 return name;
1489}
1490#endif /* CONFIG_KRETPROBES */
1491
1492static int
1493seq_print_sym_short(struct trace_seq *s, const char *fmt, unsigned long address)
1494{
1495#ifdef CONFIG_KALLSYMS
1496 char str[KSYM_SYMBOL_LEN];
1497 const char *name;
1498
1499 kallsyms_lookup(address, NULL, NULL, NULL, str);
1500
1501 name = kretprobed(str);
1502
1503 return trace_seq_printf(s, fmt, name);
1504#endif
1505 return 1;
1506}
1507
1508static int
1509seq_print_sym_offset(struct trace_seq *s, const char *fmt,
1510 unsigned long address)
1511{
1512#ifdef CONFIG_KALLSYMS
1513 char str[KSYM_SYMBOL_LEN];
1514 const char *name;
1515
1516 sprint_symbol(str, address);
1517 name = kretprobed(str);
1518
1519 return trace_seq_printf(s, fmt, name);
1520#endif
1521 return 1;
1522}
1523
1524#ifndef CONFIG_64BIT
1525# define IP_FMT "%08lx"
1526#else
1527# define IP_FMT "%016lx"
1528#endif
1529
1530int
1531seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
1532{
1533 int ret;
1534
1535 if (!ip)
1536 return trace_seq_printf(s, "0");
1537
1538 if (sym_flags & TRACE_ITER_SYM_OFFSET)
1539 ret = seq_print_sym_offset(s, "%s", ip);
1540 else
1541 ret = seq_print_sym_short(s, "%s", ip);
1542
1543 if (!ret)
1544 return 0;
1545
1546 if (sym_flags & TRACE_ITER_SYM_ADDR)
1547 ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
1548 return ret;
1549}
1550
1551static inline int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
1552 unsigned long ip, unsigned long sym_flags)
1553{
1554 struct file *file = NULL;
1555 unsigned long vmstart = 0;
1556 int ret = 1;
1557
1558 if (mm) {
1559 const struct vm_area_struct *vma;
1560
1561 down_read(&mm->mmap_sem);
1562 vma = find_vma(mm, ip);
1563 if (vma) {
1564 file = vma->vm_file;
1565 vmstart = vma->vm_start;
1566 }
1567 if (file) {
1568 ret = trace_seq_path(s, &file->f_path);
1569 if (ret)
1570 ret = trace_seq_printf(s, "[+0x%lx]", ip - vmstart);
1571 }
1572 up_read(&mm->mmap_sem);
1573 }
1574 if (ret && ((sym_flags & TRACE_ITER_SYM_ADDR) || !file))
1575 ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
1576 return ret;
1577}
1578
1579static int
1580seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s,
1581 unsigned long sym_flags)
1582{
1583 struct mm_struct *mm = NULL;
1584 int ret = 1;
1585 unsigned int i;
1586
1587 if (trace_flags & TRACE_ITER_SYM_USEROBJ) {
1588 struct task_struct *task;
1589 /*
1590 * we do the lookup on the thread group leader,
1591 * since individual threads might have already quit!
1592 */
1593 rcu_read_lock();
1594 task = find_task_by_vpid(entry->ent.tgid);
1595 if (task)
1596 mm = get_task_mm(task);
1597 rcu_read_unlock();
1598 }
1599
1600 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
1601 unsigned long ip = entry->caller[i];
1602
1603 if (ip == ULONG_MAX || !ret)
1604 break;
1605 if (i && ret)
1606 ret = trace_seq_puts(s, " <- ");
1607 if (!ip) {
1608 if (ret)
1609 ret = trace_seq_puts(s, "??");
1610 continue;
1611 }
1612 if (!ret)
1613 break;
1614 if (ret)
1615 ret = seq_print_user_ip(s, mm, ip, sym_flags);
1616 }
1617
1618 if (mm)
1619 mmput(mm);
1620 return ret;
1621}
1622
1623static void print_lat_help_header(struct seq_file *m) 1319static void print_lat_help_header(struct seq_file *m)
1624{ 1320{
1625 seq_puts(m, "# _------=> CPU# \n"); 1321 seq_puts(m, "# _------=> CPU# \n");
@@ -1704,103 +1400,6 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter)
1704 seq_puts(m, "\n"); 1400 seq_puts(m, "\n");
1705} 1401}
1706 1402
1707static void
1708lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
1709{
1710 int hardirq, softirq;
1711 char *comm;
1712
1713 comm = trace_find_cmdline(entry->pid);
1714
1715 trace_seq_printf(s, "%8.8s-%-5d ", comm, entry->pid);
1716 trace_seq_printf(s, "%3d", cpu);
1717 trace_seq_printf(s, "%c%c",
1718 (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' :
1719 (entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ? 'X' : '.',
1720 ((entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.'));
1721
1722 hardirq = entry->flags & TRACE_FLAG_HARDIRQ;
1723 softirq = entry->flags & TRACE_FLAG_SOFTIRQ;
1724 if (hardirq && softirq) {
1725 trace_seq_putc(s, 'H');
1726 } else {
1727 if (hardirq) {
1728 trace_seq_putc(s, 'h');
1729 } else {
1730 if (softirq)
1731 trace_seq_putc(s, 's');
1732 else
1733 trace_seq_putc(s, '.');
1734 }
1735 }
1736
1737 if (entry->preempt_count)
1738 trace_seq_printf(s, "%x", entry->preempt_count);
1739 else
1740 trace_seq_puts(s, ".");
1741}
1742
1743unsigned long preempt_mark_thresh = 100;
1744
1745static void
1746lat_print_timestamp(struct trace_seq *s, u64 abs_usecs,
1747 unsigned long rel_usecs)
1748{
1749 trace_seq_printf(s, " %4lldus", abs_usecs);
1750 if (rel_usecs > preempt_mark_thresh)
1751 trace_seq_puts(s, "!: ");
1752 else if (rel_usecs > 1)
1753 trace_seq_puts(s, "+: ");
1754 else
1755 trace_seq_puts(s, " : ");
1756}
1757
1758static const char state_to_char[] = TASK_STATE_TO_CHAR_STR;
1759
1760static int task_state_char(unsigned long state)
1761{
1762 int bit = state ? __ffs(state) + 1 : 0;
1763
1764 return bit < sizeof(state_to_char) - 1 ? state_to_char[bit] : '?';
1765}
1766
1767/*
1768 * The message is supposed to contain an ending newline.
1769 * If the printing stops prematurely, try to add a newline of our own.
1770 */
1771void trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter)
1772{
1773 struct trace_entry *ent;
1774 struct trace_field_cont *cont;
1775 bool ok = true;
1776
1777 ent = peek_next_entry(iter, iter->cpu, NULL);
1778 if (!ent || ent->type != TRACE_CONT) {
1779 trace_seq_putc(s, '\n');
1780 return;
1781 }
1782
1783 do {
1784 cont = (struct trace_field_cont *)ent;
1785 if (ok)
1786 ok = (trace_seq_printf(s, "%s", cont->buf) > 0);
1787
1788 ftrace_disable_cpu();
1789
1790 if (iter->buffer_iter[iter->cpu])
1791 ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
1792 else
1793 ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL);
1794
1795 ftrace_enable_cpu();
1796
1797 ent = peek_next_entry(iter, iter->cpu, NULL);
1798 } while (ent && ent->type == TRACE_CONT);
1799
1800 if (!ok)
1801 trace_seq_putc(s, '\n');
1802}
1803
1804static void test_cpu_buff_start(struct trace_iterator *iter) 1403static void test_cpu_buff_start(struct trace_iterator *iter)
1805{ 1404{
1806 struct trace_seq *s = &iter->seq; 1405 struct trace_seq *s = &iter->seq;
@@ -1818,138 +1417,31 @@ static void test_cpu_buff_start(struct trace_iterator *iter)
1818 trace_seq_printf(s, "##### CPU %u buffer started ####\n", iter->cpu); 1417 trace_seq_printf(s, "##### CPU %u buffer started ####\n", iter->cpu);
1819} 1418}
1820 1419
1821static enum print_line_t 1420static enum print_line_t print_lat_fmt(struct trace_iterator *iter)
1822print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
1823{ 1421{
1824 struct trace_seq *s = &iter->seq; 1422 struct trace_seq *s = &iter->seq;
1825 unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); 1423 unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
1826 struct trace_entry *next_entry; 1424 struct trace_event *event;
1827 unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE);
1828 struct trace_entry *entry = iter->ent; 1425 struct trace_entry *entry = iter->ent;
1829 unsigned long abs_usecs;
1830 unsigned long rel_usecs;
1831 u64 next_ts;
1832 char *comm;
1833 int S, T;
1834 int i;
1835
1836 if (entry->type == TRACE_CONT)
1837 return TRACE_TYPE_HANDLED;
1838 1426
1839 test_cpu_buff_start(iter); 1427 test_cpu_buff_start(iter);
1840 1428
1841 next_entry = find_next_entry(iter, NULL, &next_ts); 1429 event = ftrace_find_event(entry->type);
1842 if (!next_entry)
1843 next_ts = iter->ts;
1844 rel_usecs = ns2usecs(next_ts - iter->ts);
1845 abs_usecs = ns2usecs(iter->ts - iter->tr->time_start);
1846
1847 if (verbose) {
1848 comm = trace_find_cmdline(entry->pid);
1849 trace_seq_printf(s, "%16s %5d %3d %d %08x %08x [%08lx]"
1850 " %ld.%03ldms (+%ld.%03ldms): ",
1851 comm,
1852 entry->pid, cpu, entry->flags,
1853 entry->preempt_count, trace_idx,
1854 ns2usecs(iter->ts),
1855 abs_usecs/1000,
1856 abs_usecs % 1000, rel_usecs/1000,
1857 rel_usecs % 1000);
1858 } else {
1859 lat_print_generic(s, entry, cpu);
1860 lat_print_timestamp(s, abs_usecs, rel_usecs);
1861 }
1862 switch (entry->type) {
1863 case TRACE_FN: {
1864 struct ftrace_entry *field;
1865
1866 trace_assign_type(field, entry);
1867
1868 seq_print_ip_sym(s, field->ip, sym_flags);
1869 trace_seq_puts(s, " (");
1870 seq_print_ip_sym(s, field->parent_ip, sym_flags);
1871 trace_seq_puts(s, ")\n");
1872 break;
1873 }
1874 case TRACE_CTX:
1875 case TRACE_WAKE: {
1876 struct ctx_switch_entry *field;
1877
1878 trace_assign_type(field, entry);
1879
1880 T = task_state_char(field->next_state);
1881 S = task_state_char(field->prev_state);
1882 comm = trace_find_cmdline(field->next_pid);
1883 trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n",
1884 field->prev_pid,
1885 field->prev_prio,
1886 S, entry->type == TRACE_CTX ? "==>" : " +",
1887 field->next_cpu,
1888 field->next_pid,
1889 field->next_prio,
1890 T, comm);
1891 break;
1892 }
1893 case TRACE_SPECIAL: {
1894 struct special_entry *field;
1895
1896 trace_assign_type(field, entry);
1897
1898 trace_seq_printf(s, "# %ld %ld %ld\n",
1899 field->arg1,
1900 field->arg2,
1901 field->arg3);
1902 break;
1903 }
1904 case TRACE_STACK: {
1905 struct stack_entry *field;
1906
1907 trace_assign_type(field, entry);
1908
1909 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
1910 if (i)
1911 trace_seq_puts(s, " <= ");
1912 seq_print_ip_sym(s, field->caller[i], sym_flags);
1913 }
1914 trace_seq_puts(s, "\n");
1915 break;
1916 }
1917 case TRACE_PRINT: {
1918 struct print_entry *field;
1919
1920 trace_assign_type(field, entry);
1921 1430
1922 seq_print_ip_sym(s, field->ip, sym_flags); 1431 if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
1923 trace_seq_printf(s, ": %s", field->buf); 1432 if (!trace_print_lat_context(iter))
1924 if (entry->flags & TRACE_FLAG_CONT) 1433 goto partial;
1925 trace_seq_print_cont(s, iter);
1926 break;
1927 } 1434 }
1928 case TRACE_BRANCH: {
1929 struct trace_branch *field;
1930 1435
1931 trace_assign_type(field, entry); 1436 if (event)
1437 return event->latency_trace(iter, sym_flags);
1932 1438
1933 trace_seq_printf(s, "[%s] %s:%s:%d\n", 1439 if (!trace_seq_printf(s, "Unknown type %d\n", entry->type))
1934 field->correct ? " ok " : " MISS ", 1440 goto partial;
1935 field->func,
1936 field->file,
1937 field->line);
1938 break;
1939 }
1940 case TRACE_USER_STACK: {
1941 struct userstack_entry *field;
1942 1441
1943 trace_assign_type(field, entry);
1944
1945 seq_print_userip_objs(field, s, sym_flags);
1946 trace_seq_putc(s, '\n');
1947 break;
1948 }
1949 default:
1950 trace_seq_printf(s, "Unknown type %d\n", entry->type);
1951 }
1952 return TRACE_TYPE_HANDLED; 1442 return TRACE_TYPE_HANDLED;
1443partial:
1444 return TRACE_TYPE_PARTIAL_LINE;
1953} 1445}
1954 1446
1955static enum print_line_t print_trace_fmt(struct trace_iterator *iter) 1447static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
@@ -1957,313 +1449,78 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
1957 struct trace_seq *s = &iter->seq; 1449 struct trace_seq *s = &iter->seq;
1958 unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); 1450 unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
1959 struct trace_entry *entry; 1451 struct trace_entry *entry;
1960 unsigned long usec_rem; 1452 struct trace_event *event;
1961 unsigned long long t;
1962 unsigned long secs;
1963 char *comm;
1964 int ret;
1965 int S, T;
1966 int i;
1967 1453
1968 entry = iter->ent; 1454 entry = iter->ent;
1969 1455
1970 if (entry->type == TRACE_CONT)
1971 return TRACE_TYPE_HANDLED;
1972
1973 test_cpu_buff_start(iter); 1456 test_cpu_buff_start(iter);
1974 1457
1975 comm = trace_find_cmdline(iter->ent->pid); 1458 event = ftrace_find_event(entry->type);
1976
1977 t = ns2usecs(iter->ts);
1978 usec_rem = do_div(t, 1000000ULL);
1979 secs = (unsigned long)t;
1980
1981 ret = trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid);
1982 if (!ret)
1983 return TRACE_TYPE_PARTIAL_LINE;
1984 ret = trace_seq_printf(s, "[%03d] ", iter->cpu);
1985 if (!ret)
1986 return TRACE_TYPE_PARTIAL_LINE;
1987 ret = trace_seq_printf(s, "%5lu.%06lu: ", secs, usec_rem);
1988 if (!ret)
1989 return TRACE_TYPE_PARTIAL_LINE;
1990
1991 switch (entry->type) {
1992 case TRACE_FN: {
1993 struct ftrace_entry *field;
1994
1995 trace_assign_type(field, entry);
1996
1997 ret = seq_print_ip_sym(s, field->ip, sym_flags);
1998 if (!ret)
1999 return TRACE_TYPE_PARTIAL_LINE;
2000 if ((sym_flags & TRACE_ITER_PRINT_PARENT) &&
2001 field->parent_ip) {
2002 ret = trace_seq_printf(s, " <-");
2003 if (!ret)
2004 return TRACE_TYPE_PARTIAL_LINE;
2005 ret = seq_print_ip_sym(s,
2006 field->parent_ip,
2007 sym_flags);
2008 if (!ret)
2009 return TRACE_TYPE_PARTIAL_LINE;
2010 }
2011 ret = trace_seq_printf(s, "\n");
2012 if (!ret)
2013 return TRACE_TYPE_PARTIAL_LINE;
2014 break;
2015 }
2016 case TRACE_CTX:
2017 case TRACE_WAKE: {
2018 struct ctx_switch_entry *field;
2019
2020 trace_assign_type(field, entry);
2021
2022 T = task_state_char(field->next_state);
2023 S = task_state_char(field->prev_state);
2024 ret = trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c\n",
2025 field->prev_pid,
2026 field->prev_prio,
2027 S,
2028 entry->type == TRACE_CTX ? "==>" : " +",
2029 field->next_cpu,
2030 field->next_pid,
2031 field->next_prio,
2032 T);
2033 if (!ret)
2034 return TRACE_TYPE_PARTIAL_LINE;
2035 break;
2036 }
2037 case TRACE_SPECIAL: {
2038 struct special_entry *field;
2039
2040 trace_assign_type(field, entry);
2041
2042 ret = trace_seq_printf(s, "# %ld %ld %ld\n",
2043 field->arg1,
2044 field->arg2,
2045 field->arg3);
2046 if (!ret)
2047 return TRACE_TYPE_PARTIAL_LINE;
2048 break;
2049 }
2050 case TRACE_STACK: {
2051 struct stack_entry *field;
2052
2053 trace_assign_type(field, entry);
2054
2055 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
2056 if (i) {
2057 ret = trace_seq_puts(s, " <= ");
2058 if (!ret)
2059 return TRACE_TYPE_PARTIAL_LINE;
2060 }
2061 ret = seq_print_ip_sym(s, field->caller[i],
2062 sym_flags);
2063 if (!ret)
2064 return TRACE_TYPE_PARTIAL_LINE;
2065 }
2066 ret = trace_seq_puts(s, "\n");
2067 if (!ret)
2068 return TRACE_TYPE_PARTIAL_LINE;
2069 break;
2070 }
2071 case TRACE_PRINT: {
2072 struct print_entry *field;
2073
2074 trace_assign_type(field, entry);
2075 1459
2076 seq_print_ip_sym(s, field->ip, sym_flags); 1460 if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2077 trace_seq_printf(s, ": %s", field->buf); 1461 if (!trace_print_context(iter))
2078 if (entry->flags & TRACE_FLAG_CONT) 1462 goto partial;
2079 trace_seq_print_cont(s, iter);
2080 break;
2081 }
2082 case TRACE_GRAPH_RET: {
2083 return print_graph_function(iter);
2084 } 1463 }
2085 case TRACE_GRAPH_ENT: {
2086 return print_graph_function(iter);
2087 }
2088 case TRACE_BRANCH: {
2089 struct trace_branch *field;
2090
2091 trace_assign_type(field, entry);
2092 1464
2093 trace_seq_printf(s, "[%s] %s:%s:%d\n", 1465 if (event)
2094 field->correct ? " ok " : " MISS ", 1466 return event->trace(iter, sym_flags);
2095 field->func,
2096 field->file,
2097 field->line);
2098 break;
2099 }
2100 case TRACE_USER_STACK: {
2101 struct userstack_entry *field;
2102 1467
2103 trace_assign_type(field, entry); 1468 if (!trace_seq_printf(s, "Unknown type %d\n", entry->type))
1469 goto partial;
2104 1470
2105 ret = seq_print_userip_objs(field, s, sym_flags);
2106 if (!ret)
2107 return TRACE_TYPE_PARTIAL_LINE;
2108 ret = trace_seq_putc(s, '\n');
2109 if (!ret)
2110 return TRACE_TYPE_PARTIAL_LINE;
2111 break;
2112 }
2113 }
2114 return TRACE_TYPE_HANDLED; 1471 return TRACE_TYPE_HANDLED;
1472partial:
1473 return TRACE_TYPE_PARTIAL_LINE;
2115} 1474}
2116 1475
2117static enum print_line_t print_raw_fmt(struct trace_iterator *iter) 1476static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
2118{ 1477{
2119 struct trace_seq *s = &iter->seq; 1478 struct trace_seq *s = &iter->seq;
2120 struct trace_entry *entry; 1479 struct trace_entry *entry;
2121 int ret; 1480 struct trace_event *event;
2122 int S, T;
2123 1481
2124 entry = iter->ent; 1482 entry = iter->ent;
2125 1483
2126 if (entry->type == TRACE_CONT) 1484 if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2127 return TRACE_TYPE_HANDLED; 1485 if (!trace_seq_printf(s, "%d %d %llu ",
2128 1486 entry->pid, iter->cpu, iter->ts))
2129 ret = trace_seq_printf(s, "%d %d %llu ", 1487 goto partial;
2130 entry->pid, iter->cpu, iter->ts);
2131 if (!ret)
2132 return TRACE_TYPE_PARTIAL_LINE;
2133
2134 switch (entry->type) {
2135 case TRACE_FN: {
2136 struct ftrace_entry *field;
2137
2138 trace_assign_type(field, entry);
2139
2140 ret = trace_seq_printf(s, "%x %x\n",
2141 field->ip,
2142 field->parent_ip);
2143 if (!ret)
2144 return TRACE_TYPE_PARTIAL_LINE;
2145 break;
2146 }
2147 case TRACE_CTX:
2148 case TRACE_WAKE: {
2149 struct ctx_switch_entry *field;
2150
2151 trace_assign_type(field, entry);
2152
2153 T = task_state_char(field->next_state);
2154 S = entry->type == TRACE_WAKE ? '+' :
2155 task_state_char(field->prev_state);
2156 ret = trace_seq_printf(s, "%d %d %c %d %d %d %c\n",
2157 field->prev_pid,
2158 field->prev_prio,
2159 S,
2160 field->next_cpu,
2161 field->next_pid,
2162 field->next_prio,
2163 T);
2164 if (!ret)
2165 return TRACE_TYPE_PARTIAL_LINE;
2166 break;
2167 } 1488 }
2168 case TRACE_SPECIAL:
2169 case TRACE_USER_STACK:
2170 case TRACE_STACK: {
2171 struct special_entry *field;
2172 1489
2173 trace_assign_type(field, entry); 1490 event = ftrace_find_event(entry->type);
1491 if (event)
1492 return event->raw(iter, 0);
2174 1493
2175 ret = trace_seq_printf(s, "# %ld %ld %ld\n", 1494 if (!trace_seq_printf(s, "%d ?\n", entry->type))
2176 field->arg1, 1495 goto partial;
2177 field->arg2,
2178 field->arg3);
2179 if (!ret)
2180 return TRACE_TYPE_PARTIAL_LINE;
2181 break;
2182 }
2183 case TRACE_PRINT: {
2184 struct print_entry *field;
2185 1496
2186 trace_assign_type(field, entry);
2187
2188 trace_seq_printf(s, "# %lx %s", field->ip, field->buf);
2189 if (entry->flags & TRACE_FLAG_CONT)
2190 trace_seq_print_cont(s, iter);
2191 break;
2192 }
2193 }
2194 return TRACE_TYPE_HANDLED; 1497 return TRACE_TYPE_HANDLED;
1498partial:
1499 return TRACE_TYPE_PARTIAL_LINE;
2195} 1500}
2196 1501
2197#define SEQ_PUT_FIELD_RET(s, x) \
2198do { \
2199 if (!trace_seq_putmem(s, &(x), sizeof(x))) \
2200 return 0; \
2201} while (0)
2202
2203#define SEQ_PUT_HEX_FIELD_RET(s, x) \
2204do { \
2205 BUILD_BUG_ON(sizeof(x) > MAX_MEMHEX_BYTES); \
2206 if (!trace_seq_putmem_hex(s, &(x), sizeof(x))) \
2207 return 0; \
2208} while (0)
2209
2210static enum print_line_t print_hex_fmt(struct trace_iterator *iter) 1502static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
2211{ 1503{
2212 struct trace_seq *s = &iter->seq; 1504 struct trace_seq *s = &iter->seq;
2213 unsigned char newline = '\n'; 1505 unsigned char newline = '\n';
2214 struct trace_entry *entry; 1506 struct trace_entry *entry;
2215 int S, T; 1507 struct trace_event *event;
2216 1508
2217 entry = iter->ent; 1509 entry = iter->ent;
2218 1510
2219 if (entry->type == TRACE_CONT) 1511 if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2220 return TRACE_TYPE_HANDLED; 1512 SEQ_PUT_HEX_FIELD_RET(s, entry->pid);
2221 1513 SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
2222 SEQ_PUT_HEX_FIELD_RET(s, entry->pid); 1514 SEQ_PUT_HEX_FIELD_RET(s, iter->ts);
2223 SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
2224 SEQ_PUT_HEX_FIELD_RET(s, iter->ts);
2225
2226 switch (entry->type) {
2227 case TRACE_FN: {
2228 struct ftrace_entry *field;
2229
2230 trace_assign_type(field, entry);
2231
2232 SEQ_PUT_HEX_FIELD_RET(s, field->ip);
2233 SEQ_PUT_HEX_FIELD_RET(s, field->parent_ip);
2234 break;
2235 } 1515 }
2236 case TRACE_CTX:
2237 case TRACE_WAKE: {
2238 struct ctx_switch_entry *field;
2239
2240 trace_assign_type(field, entry);
2241
2242 T = task_state_char(field->next_state);
2243 S = entry->type == TRACE_WAKE ? '+' :
2244 task_state_char(field->prev_state);
2245 SEQ_PUT_HEX_FIELD_RET(s, field->prev_pid);
2246 SEQ_PUT_HEX_FIELD_RET(s, field->prev_prio);
2247 SEQ_PUT_HEX_FIELD_RET(s, S);
2248 SEQ_PUT_HEX_FIELD_RET(s, field->next_cpu);
2249 SEQ_PUT_HEX_FIELD_RET(s, field->next_pid);
2250 SEQ_PUT_HEX_FIELD_RET(s, field->next_prio);
2251 SEQ_PUT_HEX_FIELD_RET(s, T);
2252 break;
2253 }
2254 case TRACE_SPECIAL:
2255 case TRACE_USER_STACK:
2256 case TRACE_STACK: {
2257 struct special_entry *field;
2258
2259 trace_assign_type(field, entry);
2260 1516
2261 SEQ_PUT_HEX_FIELD_RET(s, field->arg1); 1517 event = ftrace_find_event(entry->type);
2262 SEQ_PUT_HEX_FIELD_RET(s, field->arg2); 1518 if (event) {
2263 SEQ_PUT_HEX_FIELD_RET(s, field->arg3); 1519 enum print_line_t ret = event->hex(iter, 0);
2264 break; 1520 if (ret != TRACE_TYPE_HANDLED)
2265 } 1521 return ret;
2266 } 1522 }
1523
2267 SEQ_PUT_FIELD_RET(s, newline); 1524 SEQ_PUT_FIELD_RET(s, newline);
2268 1525
2269 return TRACE_TYPE_HANDLED; 1526 return TRACE_TYPE_HANDLED;
@@ -2278,13 +1535,10 @@ static enum print_line_t print_printk_msg_only(struct trace_iterator *iter)
2278 1535
2279 trace_assign_type(field, entry); 1536 trace_assign_type(field, entry);
2280 1537
2281 ret = trace_seq_printf(s, field->buf); 1538 ret = trace_seq_printf(s, "%s", field->buf);
2282 if (!ret) 1539 if (!ret)
2283 return TRACE_TYPE_PARTIAL_LINE; 1540 return TRACE_TYPE_PARTIAL_LINE;
2284 1541
2285 if (entry->flags & TRACE_FLAG_CONT)
2286 trace_seq_print_cont(s, iter);
2287
2288 return TRACE_TYPE_HANDLED; 1542 return TRACE_TYPE_HANDLED;
2289} 1543}
2290 1544
@@ -2292,53 +1546,18 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
2292{ 1546{
2293 struct trace_seq *s = &iter->seq; 1547 struct trace_seq *s = &iter->seq;
2294 struct trace_entry *entry; 1548 struct trace_entry *entry;
1549 struct trace_event *event;
2295 1550
2296 entry = iter->ent; 1551 entry = iter->ent;
2297 1552
2298 if (entry->type == TRACE_CONT) 1553 if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2299 return TRACE_TYPE_HANDLED; 1554 SEQ_PUT_FIELD_RET(s, entry->pid);
2300 1555 SEQ_PUT_FIELD_RET(s, iter->cpu);
2301 SEQ_PUT_FIELD_RET(s, entry->pid); 1556 SEQ_PUT_FIELD_RET(s, iter->ts);
2302 SEQ_PUT_FIELD_RET(s, entry->cpu);
2303 SEQ_PUT_FIELD_RET(s, iter->ts);
2304
2305 switch (entry->type) {
2306 case TRACE_FN: {
2307 struct ftrace_entry *field;
2308
2309 trace_assign_type(field, entry);
2310
2311 SEQ_PUT_FIELD_RET(s, field->ip);
2312 SEQ_PUT_FIELD_RET(s, field->parent_ip);
2313 break;
2314 } 1557 }
2315 case TRACE_CTX: {
2316 struct ctx_switch_entry *field;
2317
2318 trace_assign_type(field, entry);
2319
2320 SEQ_PUT_FIELD_RET(s, field->prev_pid);
2321 SEQ_PUT_FIELD_RET(s, field->prev_prio);
2322 SEQ_PUT_FIELD_RET(s, field->prev_state);
2323 SEQ_PUT_FIELD_RET(s, field->next_pid);
2324 SEQ_PUT_FIELD_RET(s, field->next_prio);
2325 SEQ_PUT_FIELD_RET(s, field->next_state);
2326 break;
2327 }
2328 case TRACE_SPECIAL:
2329 case TRACE_USER_STACK:
2330 case TRACE_STACK: {
2331 struct special_entry *field;
2332
2333 trace_assign_type(field, entry);
2334 1558
2335 SEQ_PUT_FIELD_RET(s, field->arg1); 1559 event = ftrace_find_event(entry->type);
2336 SEQ_PUT_FIELD_RET(s, field->arg2); 1560 return event ? event->binary(iter, 0) : TRACE_TYPE_HANDLED;
2337 SEQ_PUT_FIELD_RET(s, field->arg3);
2338 break;
2339 }
2340 }
2341 return 1;
2342} 1561}
2343 1562
2344static int trace_empty(struct trace_iterator *iter) 1563static int trace_empty(struct trace_iterator *iter)
@@ -2383,7 +1602,7 @@ static enum print_line_t print_trace_line(struct trace_iterator *iter)
2383 return print_raw_fmt(iter); 1602 return print_raw_fmt(iter);
2384 1603
2385 if (iter->iter_flags & TRACE_FILE_LAT_FMT) 1604 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
2386 return print_lat_fmt(iter, iter->idx, iter->cpu); 1605 return print_lat_fmt(iter);
2387 1606
2388 return print_trace_fmt(iter); 1607 return print_trace_fmt(iter);
2389} 1608}
@@ -2505,7 +1724,7 @@ int tracing_open_generic(struct inode *inode, struct file *filp)
2505 return 0; 1724 return 0;
2506} 1725}
2507 1726
2508int tracing_release(struct inode *inode, struct file *file) 1727static int tracing_release(struct inode *inode, struct file *file)
2509{ 1728{
2510 struct seq_file *m = (struct seq_file *)file->private_data; 1729 struct seq_file *m = (struct seq_file *)file->private_data;
2511 struct trace_iterator *iter = m->private; 1730 struct trace_iterator *iter = m->private;
@@ -2748,7 +1967,7 @@ tracing_trace_options_read(struct file *filp, char __user *ubuf,
2748 struct tracer_opt *trace_opts = current_trace->flags->opts; 1967 struct tracer_opt *trace_opts = current_trace->flags->opts;
2749 1968
2750 1969
2751 /* calulate max size */ 1970 /* calculate max size */
2752 for (i = 0; trace_options[i]; i++) { 1971 for (i = 0; trace_options[i]; i++) {
2753 len += strlen(trace_options[i]); 1972 len += strlen(trace_options[i]);
2754 len += 3; /* "no" and space */ 1973 len += 3; /* "no" and space */
@@ -2930,7 +2149,7 @@ tracing_ctrl_write(struct file *filp, const char __user *ubuf,
2930{ 2149{
2931 struct trace_array *tr = filp->private_data; 2150 struct trace_array *tr = filp->private_data;
2932 char buf[64]; 2151 char buf[64];
2933 long val; 2152 unsigned long val;
2934 int ret; 2153 int ret;
2935 2154
2936 if (cnt >= sizeof(buf)) 2155 if (cnt >= sizeof(buf))
@@ -2985,7 +2204,13 @@ tracing_set_trace_read(struct file *filp, char __user *ubuf,
2985 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 2204 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2986} 2205}
2987 2206
2988static int tracing_set_tracer(char *buf) 2207int tracer_init(struct tracer *t, struct trace_array *tr)
2208{
2209 tracing_reset_online_cpus(tr);
2210 return t->init(tr);
2211}
2212
2213static int tracing_set_tracer(const char *buf)
2989{ 2214{
2990 struct trace_array *tr = &global_trace; 2215 struct trace_array *tr = &global_trace;
2991 struct tracer *t; 2216 struct tracer *t;
@@ -3009,7 +2234,7 @@ static int tracing_set_tracer(char *buf)
3009 2234
3010 current_trace = t; 2235 current_trace = t;
3011 if (t->init) { 2236 if (t->init) {
3012 ret = t->init(tr); 2237 ret = tracer_init(t, tr);
3013 if (ret) 2238 if (ret)
3014 goto out; 2239 goto out;
3015 } 2240 }
@@ -3072,9 +2297,9 @@ static ssize_t
3072tracing_max_lat_write(struct file *filp, const char __user *ubuf, 2297tracing_max_lat_write(struct file *filp, const char __user *ubuf,
3073 size_t cnt, loff_t *ppos) 2298 size_t cnt, loff_t *ppos)
3074{ 2299{
3075 long *ptr = filp->private_data; 2300 unsigned long *ptr = filp->private_data;
3076 char buf[64]; 2301 char buf[64];
3077 long val; 2302 unsigned long val;
3078 int ret; 2303 int ret;
3079 2304
3080 if (cnt >= sizeof(buf)) 2305 if (cnt >= sizeof(buf))
@@ -3167,37 +2392,15 @@ tracing_poll_pipe(struct file *filp, poll_table *poll_table)
3167 } 2392 }
3168} 2393}
3169 2394
3170/* 2395/* Must be called with trace_types_lock mutex held. */
3171 * Consumer reader. 2396static int tracing_wait_pipe(struct file *filp)
3172 */
3173static ssize_t
3174tracing_read_pipe(struct file *filp, char __user *ubuf,
3175 size_t cnt, loff_t *ppos)
3176{ 2397{
3177 struct trace_iterator *iter = filp->private_data; 2398 struct trace_iterator *iter = filp->private_data;
3178 ssize_t sret;
3179 2399
3180 /* return any leftover data */
3181 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
3182 if (sret != -EBUSY)
3183 return sret;
3184
3185 trace_seq_reset(&iter->seq);
3186
3187 mutex_lock(&trace_types_lock);
3188 if (iter->trace->read) {
3189 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
3190 if (sret)
3191 goto out;
3192 }
3193
3194waitagain:
3195 sret = 0;
3196 while (trace_empty(iter)) { 2400 while (trace_empty(iter)) {
3197 2401
3198 if ((filp->f_flags & O_NONBLOCK)) { 2402 if ((filp->f_flags & O_NONBLOCK)) {
3199 sret = -EAGAIN; 2403 return -EAGAIN;
3200 goto out;
3201 } 2404 }
3202 2405
3203 /* 2406 /*
@@ -3222,12 +2425,11 @@ waitagain:
3222 iter->tr->waiter = NULL; 2425 iter->tr->waiter = NULL;
3223 2426
3224 if (signal_pending(current)) { 2427 if (signal_pending(current)) {
3225 sret = -EINTR; 2428 return -EINTR;
3226 goto out;
3227 } 2429 }
3228 2430
3229 if (iter->trace != current_trace) 2431 if (iter->trace != current_trace)
3230 goto out; 2432 return 0;
3231 2433
3232 /* 2434 /*
3233 * We block until we read something and tracing is disabled. 2435 * We block until we read something and tracing is disabled.
@@ -3244,9 +2446,43 @@ waitagain:
3244 continue; 2446 continue;
3245 } 2447 }
3246 2448
2449 return 1;
2450}
2451
2452/*
2453 * Consumer reader.
2454 */
2455static ssize_t
2456tracing_read_pipe(struct file *filp, char __user *ubuf,
2457 size_t cnt, loff_t *ppos)
2458{
2459 struct trace_iterator *iter = filp->private_data;
2460 ssize_t sret;
2461
2462 /* return any leftover data */
2463 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
2464 if (sret != -EBUSY)
2465 return sret;
2466
2467 trace_seq_reset(&iter->seq);
2468
2469 mutex_lock(&trace_types_lock);
2470 if (iter->trace->read) {
2471 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
2472 if (sret)
2473 goto out;
2474 }
2475
2476waitagain:
2477 sret = tracing_wait_pipe(filp);
2478 if (sret <= 0)
2479 goto out;
2480
3247 /* stop when tracing is finished */ 2481 /* stop when tracing is finished */
3248 if (trace_empty(iter)) 2482 if (trace_empty(iter)) {
2483 sret = 0;
3249 goto out; 2484 goto out;
2485 }
3250 2486
3251 if (cnt >= PAGE_SIZE) 2487 if (cnt >= PAGE_SIZE)
3252 cnt = PAGE_SIZE - 1; 2488 cnt = PAGE_SIZE - 1;
@@ -3267,8 +2503,8 @@ waitagain:
3267 iter->seq.len = len; 2503 iter->seq.len = len;
3268 break; 2504 break;
3269 } 2505 }
3270 2506 if (ret != TRACE_TYPE_NO_CONSUME)
3271 trace_consume(iter); 2507 trace_consume(iter);
3272 2508
3273 if (iter->seq.len >= cnt) 2509 if (iter->seq.len >= cnt)
3274 break; 2510 break;
@@ -3292,6 +2528,135 @@ out:
3292 return sret; 2528 return sret;
3293} 2529}
3294 2530
2531static void tracing_pipe_buf_release(struct pipe_inode_info *pipe,
2532 struct pipe_buffer *buf)
2533{
2534 __free_page(buf->page);
2535}
2536
2537static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
2538 unsigned int idx)
2539{
2540 __free_page(spd->pages[idx]);
2541}
2542
2543static struct pipe_buf_operations tracing_pipe_buf_ops = {
2544 .can_merge = 0,
2545 .map = generic_pipe_buf_map,
2546 .unmap = generic_pipe_buf_unmap,
2547 .confirm = generic_pipe_buf_confirm,
2548 .release = tracing_pipe_buf_release,
2549 .steal = generic_pipe_buf_steal,
2550 .get = generic_pipe_buf_get,
2551};
2552
2553static size_t
2554tracing_fill_pipe_page(struct page *pages, size_t rem,
2555 struct trace_iterator *iter)
2556{
2557 size_t count;
2558 int ret;
2559
2560 /* Seq buffer is page-sized, exactly what we need. */
2561 for (;;) {
2562 count = iter->seq.len;
2563 ret = print_trace_line(iter);
2564 count = iter->seq.len - count;
2565 if (rem < count) {
2566 rem = 0;
2567 iter->seq.len -= count;
2568 break;
2569 }
2570 if (ret == TRACE_TYPE_PARTIAL_LINE) {
2571 iter->seq.len -= count;
2572 break;
2573 }
2574
2575 trace_consume(iter);
2576 rem -= count;
2577 if (!find_next_entry_inc(iter)) {
2578 rem = 0;
2579 iter->ent = NULL;
2580 break;
2581 }
2582 }
2583
2584 return rem;
2585}
2586
2587static ssize_t tracing_splice_read_pipe(struct file *filp,
2588 loff_t *ppos,
2589 struct pipe_inode_info *pipe,
2590 size_t len,
2591 unsigned int flags)
2592{
2593 struct page *pages[PIPE_BUFFERS];
2594 struct partial_page partial[PIPE_BUFFERS];
2595 struct trace_iterator *iter = filp->private_data;
2596 struct splice_pipe_desc spd = {
2597 .pages = pages,
2598 .partial = partial,
2599 .nr_pages = 0, /* This gets updated below. */
2600 .flags = flags,
2601 .ops = &tracing_pipe_buf_ops,
2602 .spd_release = tracing_spd_release_pipe,
2603 };
2604 ssize_t ret;
2605 size_t rem;
2606 unsigned int i;
2607
2608 mutex_lock(&trace_types_lock);
2609
2610 if (iter->trace->splice_read) {
2611 ret = iter->trace->splice_read(iter, filp,
2612 ppos, pipe, len, flags);
2613 if (ret)
2614 goto out_err;
2615 }
2616
2617 ret = tracing_wait_pipe(filp);
2618 if (ret <= 0)
2619 goto out_err;
2620
2621 if (!iter->ent && !find_next_entry_inc(iter)) {
2622 ret = -EFAULT;
2623 goto out_err;
2624 }
2625
2626 /* Fill as many pages as possible. */
2627 for (i = 0, rem = len; i < PIPE_BUFFERS && rem; i++) {
2628 pages[i] = alloc_page(GFP_KERNEL);
2629 if (!pages[i])
2630 break;
2631
2632 rem = tracing_fill_pipe_page(pages[i], rem, iter);
2633
2634 /* Copy the data into the page, so we can start over. */
2635 ret = trace_seq_to_buffer(&iter->seq,
2636 page_address(pages[i]),
2637 iter->seq.len);
2638 if (ret < 0) {
2639 __free_page(pages[i]);
2640 break;
2641 }
2642 partial[i].offset = 0;
2643 partial[i].len = iter->seq.len;
2644
2645 trace_seq_reset(&iter->seq);
2646 }
2647
2648 mutex_unlock(&trace_types_lock);
2649
2650 spd.nr_pages = i;
2651
2652 return splice_to_pipe(pipe, &spd);
2653
2654out_err:
2655 mutex_unlock(&trace_types_lock);
2656
2657 return ret;
2658}
2659
3295static ssize_t 2660static ssize_t
3296tracing_entries_read(struct file *filp, char __user *ubuf, 2661tracing_entries_read(struct file *filp, char __user *ubuf,
3297 size_t cnt, loff_t *ppos) 2662 size_t cnt, loff_t *ppos)
@@ -3455,6 +2820,7 @@ static struct file_operations tracing_pipe_fops = {
3455 .open = tracing_open_pipe, 2820 .open = tracing_open_pipe,
3456 .poll = tracing_poll_pipe, 2821 .poll = tracing_poll_pipe,
3457 .read = tracing_read_pipe, 2822 .read = tracing_read_pipe,
2823 .splice_read = tracing_splice_read_pipe,
3458 .release = tracing_release_pipe, 2824 .release = tracing_release_pipe,
3459}; 2825};
3460 2826
@@ -3653,18 +3019,16 @@ int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args)
3653 trace_buf[len] = 0; 3019 trace_buf[len] = 0;
3654 3020
3655 size = sizeof(*entry) + len + 1; 3021 size = sizeof(*entry) + len + 1;
3656 event = ring_buffer_lock_reserve(tr->buffer, size, &irq_flags); 3022 event = trace_buffer_lock_reserve(tr, TRACE_PRINT, size, irq_flags, pc);
3657 if (!event) 3023 if (!event)
3658 goto out_unlock; 3024 goto out_unlock;
3659 entry = ring_buffer_event_data(event); 3025 entry = ring_buffer_event_data(event);
3660 tracing_generic_entry_update(&entry->ent, irq_flags, pc);
3661 entry->ent.type = TRACE_PRINT;
3662 entry->ip = ip; 3026 entry->ip = ip;
3663 entry->depth = depth; 3027 entry->depth = depth;
3664 3028
3665 memcpy(&entry->buf, trace_buf, len); 3029 memcpy(&entry->buf, trace_buf, len);
3666 entry->buf[len] = 0; 3030 entry->buf[len] = 0;
3667 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 3031 ring_buffer_unlock_commit(tr->buffer, event);
3668 3032
3669 out_unlock: 3033 out_unlock:
3670 spin_unlock_irqrestore(&trace_buf_lock, irq_flags); 3034 spin_unlock_irqrestore(&trace_buf_lock, irq_flags);
@@ -3691,6 +3055,15 @@ int __ftrace_printk(unsigned long ip, const char *fmt, ...)
3691} 3055}
3692EXPORT_SYMBOL_GPL(__ftrace_printk); 3056EXPORT_SYMBOL_GPL(__ftrace_printk);
3693 3057
3058int __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap)
3059{
3060 if (!(trace_flags & TRACE_ITER_PRINTK))
3061 return 0;
3062
3063 return trace_vprintk(ip, task_curr_ret_stack(current), fmt, ap);
3064}
3065EXPORT_SYMBOL_GPL(__ftrace_vprintk);
3066
3694static int trace_panic_handler(struct notifier_block *this, 3067static int trace_panic_handler(struct notifier_block *this,
3695 unsigned long event, void *unused) 3068 unsigned long event, void *unused)
3696{ 3069{
@@ -3871,14 +3244,10 @@ __init static int tracer_alloc_buffers(void)
3871 trace_init_cmdlines(); 3244 trace_init_cmdlines();
3872 3245
3873 register_tracer(&nop_trace); 3246 register_tracer(&nop_trace);
3247 current_trace = &nop_trace;
3874#ifdef CONFIG_BOOT_TRACER 3248#ifdef CONFIG_BOOT_TRACER
3875 register_tracer(&boot_tracer); 3249 register_tracer(&boot_tracer);
3876 current_trace = &boot_tracer;
3877 current_trace->init(&global_trace);
3878#else
3879 current_trace = &nop_trace;
3880#endif 3250#endif
3881
3882 /* All seems OK, enable tracing */ 3251 /* All seems OK, enable tracing */
3883 tracing_disabled = 0; 3252 tracing_disabled = 0;
3884 3253
@@ -3895,5 +3264,26 @@ out_free_buffer_mask:
3895out: 3264out:
3896 return ret; 3265 return ret;
3897} 3266}
3267
3268__init static int clear_boot_tracer(void)
3269{
3270 /*
3271 * The default tracer at boot buffer is an init section.
3272 * This function is called in lateinit. If we did not
3273 * find the boot tracer, then clear it out, to prevent
3274 * later registration from accessing the buffer that is
3275 * about to be freed.
3276 */
3277 if (!default_bootup_tracer)
3278 return 0;
3279
3280 printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
3281 default_bootup_tracer);
3282 default_bootup_tracer = NULL;
3283
3284 return 0;
3285}
3286
3898early_initcall(tracer_alloc_buffers); 3287early_initcall(tracer_alloc_buffers);
3899fs_initcall(tracer_init_debugfs); 3288fs_initcall(tracer_init_debugfs);
3289late_initcall(clear_boot_tracer);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 4d3d381bfd95..dbff0207b213 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -9,6 +9,8 @@
9#include <linux/mmiotrace.h> 9#include <linux/mmiotrace.h>
10#include <linux/ftrace.h> 10#include <linux/ftrace.h>
11#include <trace/boot.h> 11#include <trace/boot.h>
12#include <trace/kmemtrace.h>
13#include <trace/power.h>
12 14
13enum trace_type { 15enum trace_type {
14 __TRACE_FIRST_TYPE = 0, 16 __TRACE_FIRST_TYPE = 0,
@@ -16,7 +18,6 @@ enum trace_type {
16 TRACE_FN, 18 TRACE_FN,
17 TRACE_CTX, 19 TRACE_CTX,
18 TRACE_WAKE, 20 TRACE_WAKE,
19 TRACE_CONT,
20 TRACE_STACK, 21 TRACE_STACK,
21 TRACE_PRINT, 22 TRACE_PRINT,
22 TRACE_SPECIAL, 23 TRACE_SPECIAL,
@@ -29,9 +30,12 @@ enum trace_type {
29 TRACE_GRAPH_ENT, 30 TRACE_GRAPH_ENT,
30 TRACE_USER_STACK, 31 TRACE_USER_STACK,
31 TRACE_HW_BRANCHES, 32 TRACE_HW_BRANCHES,
33 TRACE_KMEM_ALLOC,
34 TRACE_KMEM_FREE,
32 TRACE_POWER, 35 TRACE_POWER,
36 TRACE_BLK,
33 37
34 __TRACE_LAST_TYPE 38 __TRACE_LAST_TYPE,
35}; 39};
36 40
37/* 41/*
@@ -42,7 +46,6 @@ enum trace_type {
42 */ 46 */
43struct trace_entry { 47struct trace_entry {
44 unsigned char type; 48 unsigned char type;
45 unsigned char cpu;
46 unsigned char flags; 49 unsigned char flags;
47 unsigned char preempt_count; 50 unsigned char preempt_count;
48 int pid; 51 int pid;
@@ -60,13 +63,13 @@ struct ftrace_entry {
60 63
61/* Function call entry */ 64/* Function call entry */
62struct ftrace_graph_ent_entry { 65struct ftrace_graph_ent_entry {
63 struct trace_entry ent; 66 struct trace_entry ent;
64 struct ftrace_graph_ent graph_ent; 67 struct ftrace_graph_ent graph_ent;
65}; 68};
66 69
67/* Function return entry */ 70/* Function return entry */
68struct ftrace_graph_ret_entry { 71struct ftrace_graph_ret_entry {
69 struct trace_entry ent; 72 struct trace_entry ent;
70 struct ftrace_graph_ret ret; 73 struct ftrace_graph_ret ret;
71}; 74};
72extern struct tracer boot_tracer; 75extern struct tracer boot_tracer;
@@ -170,6 +173,24 @@ struct trace_power {
170 struct power_trace state_data; 173 struct power_trace state_data;
171}; 174};
172 175
176struct kmemtrace_alloc_entry {
177 struct trace_entry ent;
178 enum kmemtrace_type_id type_id;
179 unsigned long call_site;
180 const void *ptr;
181 size_t bytes_req;
182 size_t bytes_alloc;
183 gfp_t gfp_flags;
184 int node;
185};
186
187struct kmemtrace_free_entry {
188 struct trace_entry ent;
189 enum kmemtrace_type_id type_id;
190 unsigned long call_site;
191 const void *ptr;
192};
193
173/* 194/*
174 * trace_flag_type is an enumeration that holds different 195 * trace_flag_type is an enumeration that holds different
175 * states when a trace occurs. These are: 196 * states when a trace occurs. These are:
@@ -178,7 +199,6 @@ struct trace_power {
178 * NEED_RESCED - reschedule is requested 199 * NEED_RESCED - reschedule is requested
179 * HARDIRQ - inside an interrupt handler 200 * HARDIRQ - inside an interrupt handler
180 * SOFTIRQ - inside a softirq handler 201 * SOFTIRQ - inside a softirq handler
181 * CONT - multiple entries hold the trace item
182 */ 202 */
183enum trace_flag_type { 203enum trace_flag_type {
184 TRACE_FLAG_IRQS_OFF = 0x01, 204 TRACE_FLAG_IRQS_OFF = 0x01,
@@ -186,7 +206,6 @@ enum trace_flag_type {
186 TRACE_FLAG_NEED_RESCHED = 0x04, 206 TRACE_FLAG_NEED_RESCHED = 0x04,
187 TRACE_FLAG_HARDIRQ = 0x08, 207 TRACE_FLAG_HARDIRQ = 0x08,
188 TRACE_FLAG_SOFTIRQ = 0x10, 208 TRACE_FLAG_SOFTIRQ = 0x10,
189 TRACE_FLAG_CONT = 0x20,
190}; 209};
191 210
192#define TRACE_BUF_SIZE 1024 211#define TRACE_BUF_SIZE 1024
@@ -262,7 +281,6 @@ extern void __ftrace_bad_type(void);
262 do { \ 281 do { \
263 IF_ASSIGN(var, ent, struct ftrace_entry, TRACE_FN); \ 282 IF_ASSIGN(var, ent, struct ftrace_entry, TRACE_FN); \
264 IF_ASSIGN(var, ent, struct ctx_switch_entry, 0); \ 283 IF_ASSIGN(var, ent, struct ctx_switch_entry, 0); \
265 IF_ASSIGN(var, ent, struct trace_field_cont, TRACE_CONT); \
266 IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK); \ 284 IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK); \
267 IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\ 285 IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\
268 IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \ 286 IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \
@@ -280,6 +298,10 @@ extern void __ftrace_bad_type(void);
280 TRACE_GRAPH_RET); \ 298 TRACE_GRAPH_RET); \
281 IF_ASSIGN(var, ent, struct hw_branch_entry, TRACE_HW_BRANCHES);\ 299 IF_ASSIGN(var, ent, struct hw_branch_entry, TRACE_HW_BRANCHES);\
282 IF_ASSIGN(var, ent, struct trace_power, TRACE_POWER); \ 300 IF_ASSIGN(var, ent, struct trace_power, TRACE_POWER); \
301 IF_ASSIGN(var, ent, struct kmemtrace_alloc_entry, \
302 TRACE_KMEM_ALLOC); \
303 IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \
304 TRACE_KMEM_FREE); \
283 __ftrace_bad_type(); \ 305 __ftrace_bad_type(); \
284 } while (0) 306 } while (0)
285 307
@@ -287,7 +309,8 @@ extern void __ftrace_bad_type(void);
287enum print_line_t { 309enum print_line_t {
288 TRACE_TYPE_PARTIAL_LINE = 0, /* Retry after flushing the seq */ 310 TRACE_TYPE_PARTIAL_LINE = 0, /* Retry after flushing the seq */
289 TRACE_TYPE_HANDLED = 1, 311 TRACE_TYPE_HANDLED = 1,
290 TRACE_TYPE_UNHANDLED = 2 /* Relay to other output functions */ 312 TRACE_TYPE_UNHANDLED = 2, /* Relay to other output functions */
313 TRACE_TYPE_NO_CONSUME = 3 /* Handled but ask to not consume */
291}; 314};
292 315
293 316
@@ -313,6 +336,7 @@ struct tracer_flags {
313/* Makes more easy to define a tracer opt */ 336/* Makes more easy to define a tracer opt */
314#define TRACER_OPT(s, b) .name = #s, .bit = b 337#define TRACER_OPT(s, b) .name = #s, .bit = b
315 338
339
316/* 340/*
317 * A specific tracer, represented by methods that operate on a trace array: 341 * A specific tracer, represented by methods that operate on a trace array:
318 */ 342 */
@@ -329,6 +353,12 @@ struct tracer {
329 ssize_t (*read)(struct trace_iterator *iter, 353 ssize_t (*read)(struct trace_iterator *iter,
330 struct file *filp, char __user *ubuf, 354 struct file *filp, char __user *ubuf,
331 size_t cnt, loff_t *ppos); 355 size_t cnt, loff_t *ppos);
356 ssize_t (*splice_read)(struct trace_iterator *iter,
357 struct file *filp,
358 loff_t *ppos,
359 struct pipe_inode_info *pipe,
360 size_t len,
361 unsigned int flags);
332#ifdef CONFIG_FTRACE_STARTUP_TEST 362#ifdef CONFIG_FTRACE_STARTUP_TEST
333 int (*selftest)(struct tracer *trace, 363 int (*selftest)(struct tracer *trace,
334 struct trace_array *tr); 364 struct trace_array *tr);
@@ -340,6 +370,7 @@ struct tracer {
340 struct tracer *next; 370 struct tracer *next;
341 int print_max; 371 int print_max;
342 struct tracer_flags *flags; 372 struct tracer_flags *flags;
373 struct tracer_stat *stats;
343}; 374};
344 375
345struct trace_seq { 376struct trace_seq {
@@ -371,6 +402,7 @@ struct trace_iterator {
371 cpumask_var_t started; 402 cpumask_var_t started;
372}; 403};
373 404
405int tracer_init(struct tracer *t, struct trace_array *tr);
374int tracing_is_enabled(void); 406int tracing_is_enabled(void);
375void trace_wake_up(void); 407void trace_wake_up(void);
376void tracing_reset(struct trace_array *tr, int cpu); 408void tracing_reset(struct trace_array *tr, int cpu);
@@ -379,8 +411,23 @@ int tracing_open_generic(struct inode *inode, struct file *filp);
379struct dentry *tracing_init_dentry(void); 411struct dentry *tracing_init_dentry(void);
380void init_tracer_sysprof_debugfs(struct dentry *d_tracer); 412void init_tracer_sysprof_debugfs(struct dentry *d_tracer);
381 413
414struct ring_buffer_event;
415
416struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr,
417 unsigned char type,
418 unsigned long len,
419 unsigned long flags,
420 int pc);
421void trace_buffer_unlock_commit(struct trace_array *tr,
422 struct ring_buffer_event *event,
423 unsigned long flags, int pc);
424
382struct trace_entry *tracing_get_trace_entry(struct trace_array *tr, 425struct trace_entry *tracing_get_trace_entry(struct trace_array *tr,
383 struct trace_array_cpu *data); 426 struct trace_array_cpu *data);
427
428struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
429 int *ent_cpu, u64 *ent_ts);
430
384void tracing_generic_entry_update(struct trace_entry *entry, 431void tracing_generic_entry_update(struct trace_entry *entry,
385 unsigned long flags, 432 unsigned long flags,
386 int pc); 433 int pc);
@@ -391,14 +438,12 @@ void ftrace(struct trace_array *tr,
391 unsigned long parent_ip, 438 unsigned long parent_ip,
392 unsigned long flags, int pc); 439 unsigned long flags, int pc);
393void tracing_sched_switch_trace(struct trace_array *tr, 440void tracing_sched_switch_trace(struct trace_array *tr,
394 struct trace_array_cpu *data,
395 struct task_struct *prev, 441 struct task_struct *prev,
396 struct task_struct *next, 442 struct task_struct *next,
397 unsigned long flags, int pc); 443 unsigned long flags, int pc);
398void tracing_record_cmdline(struct task_struct *tsk); 444void tracing_record_cmdline(struct task_struct *tsk);
399 445
400void tracing_sched_wakeup_trace(struct trace_array *tr, 446void tracing_sched_wakeup_trace(struct trace_array *tr,
401 struct trace_array_cpu *data,
402 struct task_struct *wakee, 447 struct task_struct *wakee,
403 struct task_struct *cur, 448 struct task_struct *cur,
404 unsigned long flags, int pc); 449 unsigned long flags, int pc);
@@ -408,14 +453,12 @@ void trace_special(struct trace_array *tr,
408 unsigned long arg2, 453 unsigned long arg2,
409 unsigned long arg3, int pc); 454 unsigned long arg3, int pc);
410void trace_function(struct trace_array *tr, 455void trace_function(struct trace_array *tr,
411 struct trace_array_cpu *data,
412 unsigned long ip, 456 unsigned long ip,
413 unsigned long parent_ip, 457 unsigned long parent_ip,
414 unsigned long flags, int pc); 458 unsigned long flags, int pc);
415 459
416void trace_graph_return(struct ftrace_graph_ret *trace); 460void trace_graph_return(struct ftrace_graph_ret *trace);
417int trace_graph_entry(struct ftrace_graph_ent *trace); 461int trace_graph_entry(struct ftrace_graph_ent *trace);
418void trace_hw_branch(struct trace_array *tr, u64 from, u64 to);
419 462
420void tracing_start_cmdline_record(void); 463void tracing_start_cmdline_record(void);
421void tracing_stop_cmdline_record(void); 464void tracing_stop_cmdline_record(void);
@@ -434,15 +477,11 @@ void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu);
434void update_max_tr_single(struct trace_array *tr, 477void update_max_tr_single(struct trace_array *tr,
435 struct task_struct *tsk, int cpu); 478 struct task_struct *tsk, int cpu);
436 479
437extern cycle_t ftrace_now(int cpu); 480void __trace_stack(struct trace_array *tr,
481 unsigned long flags,
482 int skip, int pc);
438 483
439#ifdef CONFIG_FUNCTION_TRACER 484extern cycle_t ftrace_now(int cpu);
440void tracing_start_function_trace(void);
441void tracing_stop_function_trace(void);
442#else
443# define tracing_start_function_trace() do { } while (0)
444# define tracing_stop_function_trace() do { } while (0)
445#endif
446 485
447#ifdef CONFIG_CONTEXT_SWITCH_TRACER 486#ifdef CONFIG_CONTEXT_SWITCH_TRACER
448typedef void 487typedef void
@@ -456,10 +495,10 @@ struct tracer_switch_ops {
456 void *private; 495 void *private;
457 struct tracer_switch_ops *next; 496 struct tracer_switch_ops *next;
458}; 497};
459
460char *trace_find_cmdline(int pid);
461#endif /* CONFIG_CONTEXT_SWITCH_TRACER */ 498#endif /* CONFIG_CONTEXT_SWITCH_TRACER */
462 499
500extern char *trace_find_cmdline(int pid);
501
463#ifdef CONFIG_DYNAMIC_FTRACE 502#ifdef CONFIG_DYNAMIC_FTRACE
464extern unsigned long ftrace_update_tot_cnt; 503extern unsigned long ftrace_update_tot_cnt;
465#define DYN_FTRACE_TEST_NAME trace_selftest_dynamic_test_func 504#define DYN_FTRACE_TEST_NAME trace_selftest_dynamic_test_func
@@ -469,6 +508,8 @@ extern int DYN_FTRACE_TEST_NAME(void);
469#ifdef CONFIG_FTRACE_STARTUP_TEST 508#ifdef CONFIG_FTRACE_STARTUP_TEST
470extern int trace_selftest_startup_function(struct tracer *trace, 509extern int trace_selftest_startup_function(struct tracer *trace,
471 struct trace_array *tr); 510 struct trace_array *tr);
511extern int trace_selftest_startup_function_graph(struct tracer *trace,
512 struct trace_array *tr);
472extern int trace_selftest_startup_irqsoff(struct tracer *trace, 513extern int trace_selftest_startup_irqsoff(struct tracer *trace,
473 struct trace_array *tr); 514 struct trace_array *tr);
474extern int trace_selftest_startup_preemptoff(struct tracer *trace, 515extern int trace_selftest_startup_preemptoff(struct tracer *trace,
@@ -488,15 +529,6 @@ extern int trace_selftest_startup_branch(struct tracer *trace,
488#endif /* CONFIG_FTRACE_STARTUP_TEST */ 529#endif /* CONFIG_FTRACE_STARTUP_TEST */
489 530
490extern void *head_page(struct trace_array_cpu *data); 531extern void *head_page(struct trace_array_cpu *data);
491extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...);
492extern void trace_seq_print_cont(struct trace_seq *s,
493 struct trace_iterator *iter);
494
495extern int
496seq_print_ip_sym(struct trace_seq *s, unsigned long ip,
497 unsigned long sym_flags);
498extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
499 size_t cnt);
500extern long ns2usecs(cycle_t nsec); 532extern long ns2usecs(cycle_t nsec);
501extern int 533extern int
502trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args); 534trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args);
@@ -580,7 +612,8 @@ enum trace_iterator_flags {
580 TRACE_ITER_ANNOTATE = 0x2000, 612 TRACE_ITER_ANNOTATE = 0x2000,
581 TRACE_ITER_USERSTACKTRACE = 0x4000, 613 TRACE_ITER_USERSTACKTRACE = 0x4000,
582 TRACE_ITER_SYM_USEROBJ = 0x8000, 614 TRACE_ITER_SYM_USEROBJ = 0x8000,
583 TRACE_ITER_PRINTK_MSGONLY = 0x10000 615 TRACE_ITER_PRINTK_MSGONLY = 0x10000,
616 TRACE_ITER_CONTEXT_INFO = 0x20000 /* Print pid/cpu/time */
584}; 617};
585 618
586/* 619/*
@@ -601,12 +634,12 @@ extern struct tracer nop_trace;
601 * preempt_enable (after a disable), a schedule might take place 634 * preempt_enable (after a disable), a schedule might take place
602 * causing an infinite recursion. 635 * causing an infinite recursion.
603 * 636 *
604 * To prevent this, we read the need_recshed flag before 637 * To prevent this, we read the need_resched flag before
605 * disabling preemption. When we want to enable preemption we 638 * disabling preemption. When we want to enable preemption we
606 * check the flag, if it is set, then we call preempt_enable_no_resched. 639 * check the flag, if it is set, then we call preempt_enable_no_resched.
607 * Otherwise, we call preempt_enable. 640 * Otherwise, we call preempt_enable.
608 * 641 *
609 * The rational for doing the above is that if need resched is set 642 * The rational for doing the above is that if need_resched is set
610 * and we have yet to reschedule, we are either in an atomic location 643 * and we have yet to reschedule, we are either in an atomic location
611 * (where we do not need to check for scheduling) or we are inside 644 * (where we do not need to check for scheduling) or we are inside
612 * the scheduler and do not want to resched. 645 * the scheduler and do not want to resched.
@@ -627,7 +660,7 @@ static inline int ftrace_preempt_disable(void)
627 * 660 *
628 * This is a scheduler safe way to enable preemption and not miss 661 * This is a scheduler safe way to enable preemption and not miss
629 * any preemption checks. The disabled saved the state of preemption. 662 * any preemption checks. The disabled saved the state of preemption.
630 * If resched is set, then we were either inside an atomic or 663 * If resched is set, then we are either inside an atomic or
631 * are inside the scheduler (we would have already scheduled 664 * are inside the scheduler (we would have already scheduled
632 * otherwise). In this case, we do not want to call normal 665 * otherwise). In this case, we do not want to call normal
633 * preempt_enable, but preempt_enable_no_resched instead. 666 * preempt_enable, but preempt_enable_no_resched instead.
diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
index 366c8c333e13..7a30fc4c3642 100644
--- a/kernel/trace/trace_boot.c
+++ b/kernel/trace/trace_boot.c
@@ -11,6 +11,7 @@
11#include <linux/kallsyms.h> 11#include <linux/kallsyms.h>
12 12
13#include "trace.h" 13#include "trace.h"
14#include "trace_output.h"
14 15
15static struct trace_array *boot_trace; 16static struct trace_array *boot_trace;
16static bool pre_initcalls_finished; 17static bool pre_initcalls_finished;
@@ -27,13 +28,13 @@ void start_boot_trace(void)
27 28
28void enable_boot_trace(void) 29void enable_boot_trace(void)
29{ 30{
30 if (pre_initcalls_finished) 31 if (boot_trace && pre_initcalls_finished)
31 tracing_start_sched_switch_record(); 32 tracing_start_sched_switch_record();
32} 33}
33 34
34void disable_boot_trace(void) 35void disable_boot_trace(void)
35{ 36{
36 if (pre_initcalls_finished) 37 if (boot_trace && pre_initcalls_finished)
37 tracing_stop_sched_switch_record(); 38 tracing_stop_sched_switch_record();
38} 39}
39 40
@@ -42,6 +43,9 @@ static int boot_trace_init(struct trace_array *tr)
42 int cpu; 43 int cpu;
43 boot_trace = tr; 44 boot_trace = tr;
44 45
46 if (!tr)
47 return 0;
48
45 for_each_cpu(cpu, cpu_possible_mask) 49 for_each_cpu(cpu, cpu_possible_mask)
46 tracing_reset(tr, cpu); 50 tracing_reset(tr, cpu);
47 51
@@ -128,10 +132,9 @@ void trace_boot_call(struct boot_trace_call *bt, initcall_t fn)
128{ 132{
129 struct ring_buffer_event *event; 133 struct ring_buffer_event *event;
130 struct trace_boot_call *entry; 134 struct trace_boot_call *entry;
131 unsigned long irq_flags;
132 struct trace_array *tr = boot_trace; 135 struct trace_array *tr = boot_trace;
133 136
134 if (!pre_initcalls_finished) 137 if (!tr || !pre_initcalls_finished)
135 return; 138 return;
136 139
137 /* Get its name now since this function could 140 /* Get its name now since this function could
@@ -140,18 +143,13 @@ void trace_boot_call(struct boot_trace_call *bt, initcall_t fn)
140 sprint_symbol(bt->func, (unsigned long)fn); 143 sprint_symbol(bt->func, (unsigned long)fn);
141 preempt_disable(); 144 preempt_disable();
142 145
143 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), 146 event = trace_buffer_lock_reserve(tr, TRACE_BOOT_CALL,
144 &irq_flags); 147 sizeof(*entry), 0, 0);
145 if (!event) 148 if (!event)
146 goto out; 149 goto out;
147 entry = ring_buffer_event_data(event); 150 entry = ring_buffer_event_data(event);
148 tracing_generic_entry_update(&entry->ent, 0, 0);
149 entry->ent.type = TRACE_BOOT_CALL;
150 entry->boot_call = *bt; 151 entry->boot_call = *bt;
151 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 152 trace_buffer_unlock_commit(tr, event, 0, 0);
152
153 trace_wake_up();
154
155 out: 153 out:
156 preempt_enable(); 154 preempt_enable();
157} 155}
@@ -160,27 +158,21 @@ void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn)
160{ 158{
161 struct ring_buffer_event *event; 159 struct ring_buffer_event *event;
162 struct trace_boot_ret *entry; 160 struct trace_boot_ret *entry;
163 unsigned long irq_flags;
164 struct trace_array *tr = boot_trace; 161 struct trace_array *tr = boot_trace;
165 162
166 if (!pre_initcalls_finished) 163 if (!tr || !pre_initcalls_finished)
167 return; 164 return;
168 165
169 sprint_symbol(bt->func, (unsigned long)fn); 166 sprint_symbol(bt->func, (unsigned long)fn);
170 preempt_disable(); 167 preempt_disable();
171 168
172 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), 169 event = trace_buffer_lock_reserve(tr, TRACE_BOOT_RET,
173 &irq_flags); 170 sizeof(*entry), 0, 0);
174 if (!event) 171 if (!event)
175 goto out; 172 goto out;
176 entry = ring_buffer_event_data(event); 173 entry = ring_buffer_event_data(event);
177 tracing_generic_entry_update(&entry->ent, 0, 0);
178 entry->ent.type = TRACE_BOOT_RET;
179 entry->boot_ret = *bt; 174 entry->boot_ret = *bt;
180 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 175 trace_buffer_unlock_commit(tr, event, 0, 0);
181
182 trace_wake_up();
183
184 out: 176 out:
185 preempt_enable(); 177 preempt_enable();
186} 178}
diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c
index 6c00feb3bac7..c2e68d440c4d 100644
--- a/kernel/trace/trace_branch.c
+++ b/kernel/trace/trace_branch.c
@@ -14,12 +14,17 @@
14#include <linux/hash.h> 14#include <linux/hash.h>
15#include <linux/fs.h> 15#include <linux/fs.h>
16#include <asm/local.h> 16#include <asm/local.h>
17
17#include "trace.h" 18#include "trace.h"
19#include "trace_stat.h"
20#include "trace_output.h"
18 21
19#ifdef CONFIG_BRANCH_TRACER 22#ifdef CONFIG_BRANCH_TRACER
20 23
24static struct tracer branch_trace;
21static int branch_tracing_enabled __read_mostly; 25static int branch_tracing_enabled __read_mostly;
22static DEFINE_MUTEX(branch_tracing_mutex); 26static DEFINE_MUTEX(branch_tracing_mutex);
27
23static struct trace_array *branch_tracer; 28static struct trace_array *branch_tracer;
24 29
25static void 30static void
@@ -28,7 +33,7 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect)
28 struct trace_array *tr = branch_tracer; 33 struct trace_array *tr = branch_tracer;
29 struct ring_buffer_event *event; 34 struct ring_buffer_event *event;
30 struct trace_branch *entry; 35 struct trace_branch *entry;
31 unsigned long flags, irq_flags; 36 unsigned long flags;
32 int cpu, pc; 37 int cpu, pc;
33 const char *p; 38 const char *p;
34 39
@@ -47,15 +52,13 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect)
47 if (atomic_inc_return(&tr->data[cpu]->disabled) != 1) 52 if (atomic_inc_return(&tr->data[cpu]->disabled) != 1)
48 goto out; 53 goto out;
49 54
50 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), 55 pc = preempt_count();
51 &irq_flags); 56 event = trace_buffer_lock_reserve(tr, TRACE_BRANCH,
57 sizeof(*entry), flags, pc);
52 if (!event) 58 if (!event)
53 goto out; 59 goto out;
54 60
55 pc = preempt_count();
56 entry = ring_buffer_event_data(event); 61 entry = ring_buffer_event_data(event);
57 tracing_generic_entry_update(&entry->ent, flags, pc);
58 entry->ent.type = TRACE_BRANCH;
59 62
60 /* Strip off the path, only save the file */ 63 /* Strip off the path, only save the file */
61 p = f->file + strlen(f->file); 64 p = f->file + strlen(f->file);
@@ -70,7 +73,7 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect)
70 entry->line = f->line; 73 entry->line = f->line;
71 entry->correct = val == expect; 74 entry->correct = val == expect;
72 75
73 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 76 ring_buffer_unlock_commit(tr->buffer, event);
74 77
75 out: 78 out:
76 atomic_dec(&tr->data[cpu]->disabled); 79 atomic_dec(&tr->data[cpu]->disabled);
@@ -88,8 +91,6 @@ void trace_likely_condition(struct ftrace_branch_data *f, int val, int expect)
88 91
89int enable_branch_tracing(struct trace_array *tr) 92int enable_branch_tracing(struct trace_array *tr)
90{ 93{
91 int ret = 0;
92
93 mutex_lock(&branch_tracing_mutex); 94 mutex_lock(&branch_tracing_mutex);
94 branch_tracer = tr; 95 branch_tracer = tr;
95 /* 96 /*
@@ -100,7 +101,7 @@ int enable_branch_tracing(struct trace_array *tr)
100 branch_tracing_enabled++; 101 branch_tracing_enabled++;
101 mutex_unlock(&branch_tracing_mutex); 102 mutex_unlock(&branch_tracing_mutex);
102 103
103 return ret; 104 return 0;
104} 105}
105 106
106void disable_branch_tracing(void) 107void disable_branch_tracing(void)
@@ -128,11 +129,6 @@ static void stop_branch_trace(struct trace_array *tr)
128 129
129static int branch_trace_init(struct trace_array *tr) 130static int branch_trace_init(struct trace_array *tr)
130{ 131{
131 int cpu;
132
133 for_each_online_cpu(cpu)
134 tracing_reset(tr, cpu);
135
136 start_branch_trace(tr); 132 start_branch_trace(tr);
137 return 0; 133 return 0;
138} 134}
@@ -142,22 +138,54 @@ static void branch_trace_reset(struct trace_array *tr)
142 stop_branch_trace(tr); 138 stop_branch_trace(tr);
143} 139}
144 140
145struct tracer branch_trace __read_mostly = 141static enum print_line_t trace_branch_print(struct trace_iterator *iter,
142 int flags)
143{
144 struct trace_branch *field;
145
146 trace_assign_type(field, iter->ent);
147
148 if (trace_seq_printf(&iter->seq, "[%s] %s:%s:%d\n",
149 field->correct ? " ok " : " MISS ",
150 field->func,
151 field->file,
152 field->line))
153 return TRACE_TYPE_PARTIAL_LINE;
154
155 return TRACE_TYPE_HANDLED;
156}
157
158
159static struct trace_event trace_branch_event = {
160 .type = TRACE_BRANCH,
161 .trace = trace_branch_print,
162 .latency_trace = trace_branch_print,
163};
164
165static struct tracer branch_trace __read_mostly =
146{ 166{
147 .name = "branch", 167 .name = "branch",
148 .init = branch_trace_init, 168 .init = branch_trace_init,
149 .reset = branch_trace_reset, 169 .reset = branch_trace_reset,
150#ifdef CONFIG_FTRACE_SELFTEST 170#ifdef CONFIG_FTRACE_SELFTEST
151 .selftest = trace_selftest_startup_branch, 171 .selftest = trace_selftest_startup_branch,
152#endif 172#endif /* CONFIG_FTRACE_SELFTEST */
153}; 173};
154 174
155__init static int init_branch_trace(void) 175__init static int init_branch_tracer(void)
156{ 176{
177 int ret;
178
179 ret = register_ftrace_event(&trace_branch_event);
180 if (!ret) {
181 printk(KERN_WARNING "Warning: could not register "
182 "branch events\n");
183 return 1;
184 }
157 return register_tracer(&branch_trace); 185 return register_tracer(&branch_trace);
158} 186}
187device_initcall(init_branch_tracer);
159 188
160device_initcall(init_branch_trace);
161#else 189#else
162static inline 190static inline
163void trace_likely_condition(struct ftrace_branch_data *f, int val, int expect) 191void trace_likely_condition(struct ftrace_branch_data *f, int val, int expect)
@@ -183,66 +211,39 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect)
183} 211}
184EXPORT_SYMBOL(ftrace_likely_update); 212EXPORT_SYMBOL(ftrace_likely_update);
185 213
186struct ftrace_pointer { 214extern unsigned long __start_annotated_branch_profile[];
187 void *start; 215extern unsigned long __stop_annotated_branch_profile[];
188 void *stop;
189 int hit;
190};
191 216
192static void * 217static int annotated_branch_stat_headers(struct seq_file *m)
193t_next(struct seq_file *m, void *v, loff_t *pos)
194{ 218{
195 const struct ftrace_pointer *f = m->private; 219 seq_printf(m, " correct incorrect %% ");
196 struct ftrace_branch_data *p = v; 220 seq_printf(m, " Function "
197 221 " File Line\n"
198 (*pos)++; 222 " ------- --------- - "
199 223 " -------- "
200 if (v == (void *)1) 224 " ---- ----\n");
201 return f->start; 225 return 0;
202
203 ++p;
204
205 if ((void *)p >= (void *)f->stop)
206 return NULL;
207
208 return p;
209} 226}
210 227
211static void *t_start(struct seq_file *m, loff_t *pos) 228static inline long get_incorrect_percent(struct ftrace_branch_data *p)
212{ 229{
213 void *t = (void *)1; 230 long percent;
214 loff_t l = 0;
215
216 for (; t && l < *pos; t = t_next(m, t, &l))
217 ;
218 231
219 return t; 232 if (p->correct) {
220} 233 percent = p->incorrect * 100;
234 percent /= p->correct + p->incorrect;
235 } else
236 percent = p->incorrect ? 100 : -1;
221 237
222static void t_stop(struct seq_file *m, void *p) 238 return percent;
223{
224} 239}
225 240
226static int t_show(struct seq_file *m, void *v) 241static int branch_stat_show(struct seq_file *m, void *v)
227{ 242{
228 const struct ftrace_pointer *fp = m->private;
229 struct ftrace_branch_data *p = v; 243 struct ftrace_branch_data *p = v;
230 const char *f; 244 const char *f;
231 long percent; 245 long percent;
232 246
233 if (v == (void *)1) {
234 if (fp->hit)
235 seq_printf(m, " miss hit %% ");
236 else
237 seq_printf(m, " correct incorrect %% ");
238 seq_printf(m, " Function "
239 " File Line\n"
240 " ------- --------- - "
241 " -------- "
242 " ---- ----\n");
243 return 0;
244 }
245
246 /* Only print the file, not the path */ 247 /* Only print the file, not the path */
247 f = p->file + strlen(p->file); 248 f = p->file + strlen(p->file);
248 while (f >= p->file && *f != '/') 249 while (f >= p->file && *f != '/')
@@ -252,11 +253,7 @@ static int t_show(struct seq_file *m, void *v)
252 /* 253 /*
253 * The miss is overlayed on correct, and hit on incorrect. 254 * The miss is overlayed on correct, and hit on incorrect.
254 */ 255 */
255 if (p->correct) { 256 percent = get_incorrect_percent(p);
256 percent = p->incorrect * 100;
257 percent /= p->correct + p->incorrect;
258 } else
259 percent = p->incorrect ? 100 : -1;
260 257
261 seq_printf(m, "%8lu %8lu ", p->correct, p->incorrect); 258 seq_printf(m, "%8lu %8lu ", p->correct, p->incorrect);
262 if (percent < 0) 259 if (percent < 0)
@@ -267,76 +264,118 @@ static int t_show(struct seq_file *m, void *v)
267 return 0; 264 return 0;
268} 265}
269 266
270static struct seq_operations tracing_likely_seq_ops = { 267static void *annotated_branch_stat_start(void)
271 .start = t_start, 268{
272 .next = t_next, 269 return __start_annotated_branch_profile;
273 .stop = t_stop, 270}
274 .show = t_show, 271
272static void *
273annotated_branch_stat_next(void *v, int idx)
274{
275 struct ftrace_branch_data *p = v;
276
277 ++p;
278
279 if ((void *)p >= (void *)__stop_annotated_branch_profile)
280 return NULL;
281
282 return p;
283}
284
285static int annotated_branch_stat_cmp(void *p1, void *p2)
286{
287 struct ftrace_branch_data *a = p1;
288 struct ftrace_branch_data *b = p2;
289
290 long percent_a, percent_b;
291
292 percent_a = get_incorrect_percent(a);
293 percent_b = get_incorrect_percent(b);
294
295 if (percent_a < percent_b)
296 return -1;
297 if (percent_a > percent_b)
298 return 1;
299 else
300 return 0;
301}
302
303static struct tracer_stat annotated_branch_stats = {
304 .name = "branch_annotated",
305 .stat_start = annotated_branch_stat_start,
306 .stat_next = annotated_branch_stat_next,
307 .stat_cmp = annotated_branch_stat_cmp,
308 .stat_headers = annotated_branch_stat_headers,
309 .stat_show = branch_stat_show
275}; 310};
276 311
277static int tracing_branch_open(struct inode *inode, struct file *file) 312__init static int init_annotated_branch_stats(void)
278{ 313{
279 int ret; 314 int ret;
280 315
281 ret = seq_open(file, &tracing_likely_seq_ops); 316 ret = register_stat_tracer(&annotated_branch_stats);
282 if (!ret) { 317 if (!ret) {
283 struct seq_file *m = file->private_data; 318 printk(KERN_WARNING "Warning: could not register "
284 m->private = (void *)inode->i_private; 319 "annotated branches stats\n");
320 return 1;
285 } 321 }
286 322 return 0;
287 return ret;
288} 323}
289 324fs_initcall(init_annotated_branch_stats);
290static const struct file_operations tracing_branch_fops = {
291 .open = tracing_branch_open,
292 .read = seq_read,
293 .llseek = seq_lseek,
294};
295 325
296#ifdef CONFIG_PROFILE_ALL_BRANCHES 326#ifdef CONFIG_PROFILE_ALL_BRANCHES
327
297extern unsigned long __start_branch_profile[]; 328extern unsigned long __start_branch_profile[];
298extern unsigned long __stop_branch_profile[]; 329extern unsigned long __stop_branch_profile[];
299 330
300static const struct ftrace_pointer ftrace_branch_pos = { 331static int all_branch_stat_headers(struct seq_file *m)
301 .start = __start_branch_profile, 332{
302 .stop = __stop_branch_profile, 333 seq_printf(m, " miss hit %% ");
303 .hit = 1, 334 seq_printf(m, " Function "
304}; 335 " File Line\n"
336 " ------- --------- - "
337 " -------- "
338 " ---- ----\n");
339 return 0;
340}
305 341
306#endif /* CONFIG_PROFILE_ALL_BRANCHES */ 342static void *all_branch_stat_start(void)
343{
344 return __start_branch_profile;
345}
307 346
308extern unsigned long __start_annotated_branch_profile[]; 347static void *
309extern unsigned long __stop_annotated_branch_profile[]; 348all_branch_stat_next(void *v, int idx)
349{
350 struct ftrace_branch_data *p = v;
310 351
311static const struct ftrace_pointer ftrace_annotated_branch_pos = { 352 ++p;
312 .start = __start_annotated_branch_profile,
313 .stop = __stop_annotated_branch_profile,
314};
315 353
316static __init int ftrace_branch_init(void) 354 if ((void *)p >= (void *)__stop_branch_profile)
317{ 355 return NULL;
318 struct dentry *d_tracer;
319 struct dentry *entry;
320 356
321 d_tracer = tracing_init_dentry(); 357 return p;
358}
322 359
323 entry = debugfs_create_file("profile_annotated_branch", 0444, d_tracer, 360static struct tracer_stat all_branch_stats = {
324 (void *)&ftrace_annotated_branch_pos, 361 .name = "branch_all",
325 &tracing_branch_fops); 362 .stat_start = all_branch_stat_start,
326 if (!entry) 363 .stat_next = all_branch_stat_next,
327 pr_warning("Could not create debugfs " 364 .stat_headers = all_branch_stat_headers,
328 "'profile_annotatet_branch' entry\n"); 365 .stat_show = branch_stat_show
366};
329 367
330#ifdef CONFIG_PROFILE_ALL_BRANCHES 368__init static int all_annotated_branch_stats(void)
331 entry = debugfs_create_file("profile_branch", 0444, d_tracer, 369{
332 (void *)&ftrace_branch_pos, 370 int ret;
333 &tracing_branch_fops);
334 if (!entry)
335 pr_warning("Could not create debugfs"
336 " 'profile_branch' entry\n");
337#endif
338 371
372 ret = register_stat_tracer(&all_branch_stats);
373 if (!ret) {
374 printk(KERN_WARNING "Warning: could not register "
375 "all branches stats\n");
376 return 1;
377 }
339 return 0; 378 return 0;
340} 379}
341 380fs_initcall(all_annotated_branch_stats);
342device_initcall(ftrace_branch_init); 381#endif /* CONFIG_PROFILE_ALL_BRANCHES */
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 9236d7e25a16..f520aa419dff 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -9,6 +9,7 @@
9 * Copyright (C) 2004-2006 Ingo Molnar 9 * Copyright (C) 2004-2006 Ingo Molnar
10 * Copyright (C) 2004 William Lee Irwin III 10 * Copyright (C) 2004 William Lee Irwin III
11 */ 11 */
12#include <linux/ring_buffer.h>
12#include <linux/debugfs.h> 13#include <linux/debugfs.h>
13#include <linux/uaccess.h> 14#include <linux/uaccess.h>
14#include <linux/ftrace.h> 15#include <linux/ftrace.h>
@@ -16,52 +17,386 @@
16 17
17#include "trace.h" 18#include "trace.h"
18 19
19static void start_function_trace(struct trace_array *tr) 20/* function tracing enabled */
21static int ftrace_function_enabled;
22
23static struct trace_array *func_trace;
24
25static void tracing_start_function_trace(void);
26static void tracing_stop_function_trace(void);
27
28static int function_trace_init(struct trace_array *tr)
20{ 29{
30 func_trace = tr;
21 tr->cpu = get_cpu(); 31 tr->cpu = get_cpu();
22 tracing_reset_online_cpus(tr);
23 put_cpu(); 32 put_cpu();
24 33
25 tracing_start_cmdline_record(); 34 tracing_start_cmdline_record();
26 tracing_start_function_trace(); 35 tracing_start_function_trace();
36 return 0;
27} 37}
28 38
29static void stop_function_trace(struct trace_array *tr) 39static void function_trace_reset(struct trace_array *tr)
30{ 40{
31 tracing_stop_function_trace(); 41 tracing_stop_function_trace();
32 tracing_stop_cmdline_record(); 42 tracing_stop_cmdline_record();
33} 43}
34 44
35static int function_trace_init(struct trace_array *tr) 45static void function_trace_start(struct trace_array *tr)
36{ 46{
37 start_function_trace(tr); 47 tracing_reset_online_cpus(tr);
38 return 0;
39} 48}
40 49
41static void function_trace_reset(struct trace_array *tr) 50static void
51function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip)
52{
53 struct trace_array *tr = func_trace;
54 struct trace_array_cpu *data;
55 unsigned long flags;
56 long disabled;
57 int cpu, resched;
58 int pc;
59
60 if (unlikely(!ftrace_function_enabled))
61 return;
62
63 pc = preempt_count();
64 resched = ftrace_preempt_disable();
65 local_save_flags(flags);
66 cpu = raw_smp_processor_id();
67 data = tr->data[cpu];
68 disabled = atomic_inc_return(&data->disabled);
69
70 if (likely(disabled == 1))
71 trace_function(tr, ip, parent_ip, flags, pc);
72
73 atomic_dec(&data->disabled);
74 ftrace_preempt_enable(resched);
75}
76
77static void
78function_trace_call(unsigned long ip, unsigned long parent_ip)
42{ 79{
43 stop_function_trace(tr); 80 struct trace_array *tr = func_trace;
81 struct trace_array_cpu *data;
82 unsigned long flags;
83 long disabled;
84 int cpu;
85 int pc;
86
87 if (unlikely(!ftrace_function_enabled))
88 return;
89
90 /*
91 * Need to use raw, since this must be called before the
92 * recursive protection is performed.
93 */
94 local_irq_save(flags);
95 cpu = raw_smp_processor_id();
96 data = tr->data[cpu];
97 disabled = atomic_inc_return(&data->disabled);
98
99 if (likely(disabled == 1)) {
100 pc = preempt_count();
101 trace_function(tr, ip, parent_ip, flags, pc);
102 }
103
104 atomic_dec(&data->disabled);
105 local_irq_restore(flags);
44} 106}
45 107
46static void function_trace_start(struct trace_array *tr) 108static void
109function_stack_trace_call(unsigned long ip, unsigned long parent_ip)
47{ 110{
48 tracing_reset_online_cpus(tr); 111 struct trace_array *tr = func_trace;
112 struct trace_array_cpu *data;
113 unsigned long flags;
114 long disabled;
115 int cpu;
116 int pc;
117
118 if (unlikely(!ftrace_function_enabled))
119 return;
120
121 /*
122 * Need to use raw, since this must be called before the
123 * recursive protection is performed.
124 */
125 local_irq_save(flags);
126 cpu = raw_smp_processor_id();
127 data = tr->data[cpu];
128 disabled = atomic_inc_return(&data->disabled);
129
130 if (likely(disabled == 1)) {
131 pc = preempt_count();
132 trace_function(tr, ip, parent_ip, flags, pc);
133 /*
134 * skip over 5 funcs:
135 * __ftrace_trace_stack,
136 * __trace_stack,
137 * function_stack_trace_call
138 * ftrace_list_func
139 * ftrace_call
140 */
141 __trace_stack(tr, flags, 5, pc);
142 }
143
144 atomic_dec(&data->disabled);
145 local_irq_restore(flags);
146}
147
148
149static struct ftrace_ops trace_ops __read_mostly =
150{
151 .func = function_trace_call,
152};
153
154static struct ftrace_ops trace_stack_ops __read_mostly =
155{
156 .func = function_stack_trace_call,
157};
158
159/* Our two options */
160enum {
161 TRACE_FUNC_OPT_STACK = 0x1,
162};
163
164static struct tracer_opt func_opts[] = {
165#ifdef CONFIG_STACKTRACE
166 { TRACER_OPT(func_stack_trace, TRACE_FUNC_OPT_STACK) },
167#endif
168 { } /* Always set a last empty entry */
169};
170
171static struct tracer_flags func_flags = {
172 .val = 0, /* By default: all flags disabled */
173 .opts = func_opts
174};
175
176static void tracing_start_function_trace(void)
177{
178 ftrace_function_enabled = 0;
179
180 if (trace_flags & TRACE_ITER_PREEMPTONLY)
181 trace_ops.func = function_trace_call_preempt_only;
182 else
183 trace_ops.func = function_trace_call;
184
185 if (func_flags.val & TRACE_FUNC_OPT_STACK)
186 register_ftrace_function(&trace_stack_ops);
187 else
188 register_ftrace_function(&trace_ops);
189
190 ftrace_function_enabled = 1;
191}
192
193static void tracing_stop_function_trace(void)
194{
195 ftrace_function_enabled = 0;
196 /* OK if they are not registered */
197 unregister_ftrace_function(&trace_stack_ops);
198 unregister_ftrace_function(&trace_ops);
199}
200
201static int func_set_flag(u32 old_flags, u32 bit, int set)
202{
203 if (bit == TRACE_FUNC_OPT_STACK) {
204 /* do nothing if already set */
205 if (!!set == !!(func_flags.val & TRACE_FUNC_OPT_STACK))
206 return 0;
207
208 if (set) {
209 unregister_ftrace_function(&trace_ops);
210 register_ftrace_function(&trace_stack_ops);
211 } else {
212 unregister_ftrace_function(&trace_stack_ops);
213 register_ftrace_function(&trace_ops);
214 }
215
216 return 0;
217 }
218
219 return -EINVAL;
49} 220}
50 221
51static struct tracer function_trace __read_mostly = 222static struct tracer function_trace __read_mostly =
52{ 223{
53 .name = "function", 224 .name = "function",
54 .init = function_trace_init, 225 .init = function_trace_init,
55 .reset = function_trace_reset, 226 .reset = function_trace_reset,
56 .start = function_trace_start, 227 .start = function_trace_start,
228 .flags = &func_flags,
229 .set_flag = func_set_flag,
57#ifdef CONFIG_FTRACE_SELFTEST 230#ifdef CONFIG_FTRACE_SELFTEST
58 .selftest = trace_selftest_startup_function, 231 .selftest = trace_selftest_startup_function,
59#endif 232#endif
60}; 233};
61 234
235#ifdef CONFIG_DYNAMIC_FTRACE
236static void
237ftrace_traceon(unsigned long ip, unsigned long parent_ip, void **data)
238{
239 long *count = (long *)data;
240
241 if (tracing_is_on())
242 return;
243
244 if (!*count)
245 return;
246
247 if (*count != -1)
248 (*count)--;
249
250 tracing_on();
251}
252
253static void
254ftrace_traceoff(unsigned long ip, unsigned long parent_ip, void **data)
255{
256 long *count = (long *)data;
257
258 if (!tracing_is_on())
259 return;
260
261 if (!*count)
262 return;
263
264 if (*count != -1)
265 (*count)--;
266
267 tracing_off();
268}
269
270static int
271ftrace_trace_onoff_print(struct seq_file *m, unsigned long ip,
272 struct ftrace_hook_ops *ops, void *data);
273
274static struct ftrace_hook_ops traceon_hook_ops = {
275 .func = ftrace_traceon,
276 .print = ftrace_trace_onoff_print,
277};
278
279static struct ftrace_hook_ops traceoff_hook_ops = {
280 .func = ftrace_traceoff,
281 .print = ftrace_trace_onoff_print,
282};
283
284static int
285ftrace_trace_onoff_print(struct seq_file *m, unsigned long ip,
286 struct ftrace_hook_ops *ops, void *data)
287{
288 char str[KSYM_SYMBOL_LEN];
289 long count = (long)data;
290
291 kallsyms_lookup(ip, NULL, NULL, NULL, str);
292 seq_printf(m, "%s:", str);
293
294 if (ops == &traceon_hook_ops)
295 seq_printf(m, "traceon");
296 else
297 seq_printf(m, "traceoff");
298
299 if (count != -1)
300 seq_printf(m, ":count=%ld", count);
301 seq_putc(m, '\n');
302
303 return 0;
304}
305
306static int
307ftrace_trace_onoff_unreg(char *glob, char *cmd, char *param)
308{
309 struct ftrace_hook_ops *ops;
310
311 /* we register both traceon and traceoff to this callback */
312 if (strcmp(cmd, "traceon") == 0)
313 ops = &traceon_hook_ops;
314 else
315 ops = &traceoff_hook_ops;
316
317 unregister_ftrace_function_hook_func(glob, ops);
318
319 return 0;
320}
321
322static int
323ftrace_trace_onoff_callback(char *glob, char *cmd, char *param, int enable)
324{
325 struct ftrace_hook_ops *ops;
326 void *count = (void *)-1;
327 char *number;
328 int ret;
329
330 /* hash funcs only work with set_ftrace_filter */
331 if (!enable)
332 return -EINVAL;
333
334 if (glob[0] == '!')
335 return ftrace_trace_onoff_unreg(glob+1, cmd, param);
336
337 /* we register both traceon and traceoff to this callback */
338 if (strcmp(cmd, "traceon") == 0)
339 ops = &traceon_hook_ops;
340 else
341 ops = &traceoff_hook_ops;
342
343 if (!param)
344 goto out_reg;
345
346 number = strsep(&param, ":");
347
348 if (!strlen(number))
349 goto out_reg;
350
351 /*
352 * We use the callback data field (which is a pointer)
353 * as our counter.
354 */
355 ret = strict_strtoul(number, 0, (unsigned long *)&count);
356 if (ret)
357 return ret;
358
359 out_reg:
360 ret = register_ftrace_function_hook(glob, ops, count);
361
362 return ret;
363}
364
365static struct ftrace_func_command ftrace_traceon_cmd = {
366 .name = "traceon",
367 .func = ftrace_trace_onoff_callback,
368};
369
370static struct ftrace_func_command ftrace_traceoff_cmd = {
371 .name = "traceoff",
372 .func = ftrace_trace_onoff_callback,
373};
374
375static int __init init_func_cmd_traceon(void)
376{
377 int ret;
378
379 ret = register_ftrace_command(&ftrace_traceoff_cmd);
380 if (ret)
381 return ret;
382
383 ret = register_ftrace_command(&ftrace_traceon_cmd);
384 if (ret)
385 unregister_ftrace_command(&ftrace_traceoff_cmd);
386 return ret;
387}
388#else
389static inline int init_func_cmd_traceon(void)
390{
391 return 0;
392}
393#endif /* CONFIG_DYNAMIC_FTRACE */
394
62static __init int init_function_trace(void) 395static __init int init_function_trace(void)
63{ 396{
397 init_func_cmd_traceon();
64 return register_tracer(&function_trace); 398 return register_tracer(&function_trace);
65} 399}
66 400
67device_initcall(init_function_trace); 401device_initcall(init_function_trace);
402
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 930c08e5b38e..519a0cab1530 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * 2 *
3 * Function graph tracer. 3 * Function graph tracer.
4 * Copyright (c) 2008 Frederic Weisbecker <fweisbec@gmail.com> 4 * Copyright (c) 2008-2009 Frederic Weisbecker <fweisbec@gmail.com>
5 * Mostly borrowed from function tracer which 5 * Mostly borrowed from function tracer which
6 * is Copyright (c) Steven Rostedt <srostedt@redhat.com> 6 * is Copyright (c) Steven Rostedt <srostedt@redhat.com>
7 * 7 *
@@ -12,6 +12,7 @@
12#include <linux/fs.h> 12#include <linux/fs.h>
13 13
14#include "trace.h" 14#include "trace.h"
15#include "trace_output.h"
15 16
16#define TRACE_GRAPH_INDENT 2 17#define TRACE_GRAPH_INDENT 2
17 18
@@ -20,9 +21,11 @@
20#define TRACE_GRAPH_PRINT_CPU 0x2 21#define TRACE_GRAPH_PRINT_CPU 0x2
21#define TRACE_GRAPH_PRINT_OVERHEAD 0x4 22#define TRACE_GRAPH_PRINT_OVERHEAD 0x4
22#define TRACE_GRAPH_PRINT_PROC 0x8 23#define TRACE_GRAPH_PRINT_PROC 0x8
24#define TRACE_GRAPH_PRINT_DURATION 0x10
25#define TRACE_GRAPH_PRINT_ABS_TIME 0X20
23 26
24static struct tracer_opt trace_opts[] = { 27static struct tracer_opt trace_opts[] = {
25 /* Display overruns ? */ 28 /* Display overruns? (for self-debug purpose) */
26 { TRACER_OPT(funcgraph-overrun, TRACE_GRAPH_PRINT_OVERRUN) }, 29 { TRACER_OPT(funcgraph-overrun, TRACE_GRAPH_PRINT_OVERRUN) },
27 /* Display CPU ? */ 30 /* Display CPU ? */
28 { TRACER_OPT(funcgraph-cpu, TRACE_GRAPH_PRINT_CPU) }, 31 { TRACER_OPT(funcgraph-cpu, TRACE_GRAPH_PRINT_CPU) },
@@ -30,26 +33,26 @@ static struct tracer_opt trace_opts[] = {
30 { TRACER_OPT(funcgraph-overhead, TRACE_GRAPH_PRINT_OVERHEAD) }, 33 { TRACER_OPT(funcgraph-overhead, TRACE_GRAPH_PRINT_OVERHEAD) },
31 /* Display proc name/pid */ 34 /* Display proc name/pid */
32 { TRACER_OPT(funcgraph-proc, TRACE_GRAPH_PRINT_PROC) }, 35 { TRACER_OPT(funcgraph-proc, TRACE_GRAPH_PRINT_PROC) },
36 /* Display duration of execution */
37 { TRACER_OPT(funcgraph-duration, TRACE_GRAPH_PRINT_DURATION) },
38 /* Display absolute time of an entry */
39 { TRACER_OPT(funcgraph-abstime, TRACE_GRAPH_PRINT_ABS_TIME) },
33 { } /* Empty entry */ 40 { } /* Empty entry */
34}; 41};
35 42
36static struct tracer_flags tracer_flags = { 43static struct tracer_flags tracer_flags = {
37 /* Don't display overruns and proc by default */ 44 /* Don't display overruns and proc by default */
38 .val = TRACE_GRAPH_PRINT_CPU | TRACE_GRAPH_PRINT_OVERHEAD, 45 .val = TRACE_GRAPH_PRINT_CPU | TRACE_GRAPH_PRINT_OVERHEAD |
46 TRACE_GRAPH_PRINT_DURATION,
39 .opts = trace_opts 47 .opts = trace_opts
40}; 48};
41 49
42/* pid on the last trace processed */ 50/* pid on the last trace processed */
43static pid_t last_pid[NR_CPUS] = { [0 ... NR_CPUS-1] = -1 }; 51
44 52
45static int graph_trace_init(struct trace_array *tr) 53static int graph_trace_init(struct trace_array *tr)
46{ 54{
47 int cpu, ret; 55 int ret = register_ftrace_graph(&trace_graph_return,
48
49 for_each_online_cpu(cpu)
50 tracing_reset(tr, cpu);
51
52 ret = register_ftrace_graph(&trace_graph_return,
53 &trace_graph_entry); 56 &trace_graph_entry);
54 if (ret) 57 if (ret)
55 return ret; 58 return ret;
@@ -153,17 +156,25 @@ print_graph_proc(struct trace_seq *s, pid_t pid)
153 156
154/* If the pid changed since the last trace, output this event */ 157/* If the pid changed since the last trace, output this event */
155static enum print_line_t 158static enum print_line_t
156verif_pid(struct trace_seq *s, pid_t pid, int cpu) 159verif_pid(struct trace_seq *s, pid_t pid, int cpu, pid_t *last_pids_cpu)
157{ 160{
158 pid_t prev_pid; 161 pid_t prev_pid;
162 pid_t *last_pid;
159 int ret; 163 int ret;
160 164
161 if (last_pid[cpu] != -1 && last_pid[cpu] == pid) 165 if (!last_pids_cpu)
162 return TRACE_TYPE_HANDLED; 166 return TRACE_TYPE_HANDLED;
163 167
164 prev_pid = last_pid[cpu]; 168 last_pid = per_cpu_ptr(last_pids_cpu, cpu);
165 last_pid[cpu] = pid; 169
170 if (*last_pid == pid)
171 return TRACE_TYPE_HANDLED;
166 172
173 prev_pid = *last_pid;
174 *last_pid = pid;
175
176 if (prev_pid == -1)
177 return TRACE_TYPE_HANDLED;
167/* 178/*
168 * Context-switch trace line: 179 * Context-switch trace line:
169 180
@@ -175,34 +186,34 @@ verif_pid(struct trace_seq *s, pid_t pid, int cpu)
175 ret = trace_seq_printf(s, 186 ret = trace_seq_printf(s,
176 " ------------------------------------------\n"); 187 " ------------------------------------------\n");
177 if (!ret) 188 if (!ret)
178 TRACE_TYPE_PARTIAL_LINE; 189 return TRACE_TYPE_PARTIAL_LINE;
179 190
180 ret = print_graph_cpu(s, cpu); 191 ret = print_graph_cpu(s, cpu);
181 if (ret == TRACE_TYPE_PARTIAL_LINE) 192 if (ret == TRACE_TYPE_PARTIAL_LINE)
182 TRACE_TYPE_PARTIAL_LINE; 193 return TRACE_TYPE_PARTIAL_LINE;
183 194
184 ret = print_graph_proc(s, prev_pid); 195 ret = print_graph_proc(s, prev_pid);
185 if (ret == TRACE_TYPE_PARTIAL_LINE) 196 if (ret == TRACE_TYPE_PARTIAL_LINE)
186 TRACE_TYPE_PARTIAL_LINE; 197 return TRACE_TYPE_PARTIAL_LINE;
187 198
188 ret = trace_seq_printf(s, " => "); 199 ret = trace_seq_printf(s, " => ");
189 if (!ret) 200 if (!ret)
190 TRACE_TYPE_PARTIAL_LINE; 201 return TRACE_TYPE_PARTIAL_LINE;
191 202
192 ret = print_graph_proc(s, pid); 203 ret = print_graph_proc(s, pid);
193 if (ret == TRACE_TYPE_PARTIAL_LINE) 204 if (ret == TRACE_TYPE_PARTIAL_LINE)
194 TRACE_TYPE_PARTIAL_LINE; 205 return TRACE_TYPE_PARTIAL_LINE;
195 206
196 ret = trace_seq_printf(s, 207 ret = trace_seq_printf(s,
197 "\n ------------------------------------------\n\n"); 208 "\n ------------------------------------------\n\n");
198 if (!ret) 209 if (!ret)
199 TRACE_TYPE_PARTIAL_LINE; 210 return TRACE_TYPE_PARTIAL_LINE;
200 211
201 return ret; 212 return TRACE_TYPE_HANDLED;
202} 213}
203 214
204static bool 215static struct ftrace_graph_ret_entry *
205trace_branch_is_leaf(struct trace_iterator *iter, 216get_return_for_leaf(struct trace_iterator *iter,
206 struct ftrace_graph_ent_entry *curr) 217 struct ftrace_graph_ent_entry *curr)
207{ 218{
208 struct ring_buffer_iter *ring_iter; 219 struct ring_buffer_iter *ring_iter;
@@ -211,29 +222,63 @@ trace_branch_is_leaf(struct trace_iterator *iter,
211 222
212 ring_iter = iter->buffer_iter[iter->cpu]; 223 ring_iter = iter->buffer_iter[iter->cpu];
213 224
214 if (!ring_iter) 225 /* First peek to compare current entry and the next one */
215 return false; 226 if (ring_iter)
216 227 event = ring_buffer_iter_peek(ring_iter, NULL);
217 event = ring_buffer_iter_peek(ring_iter, NULL); 228 else {
229 /* We need to consume the current entry to see the next one */
230 ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL);
231 event = ring_buffer_peek(iter->tr->buffer, iter->cpu,
232 NULL);
233 }
218 234
219 if (!event) 235 if (!event)
220 return false; 236 return NULL;
221 237
222 next = ring_buffer_event_data(event); 238 next = ring_buffer_event_data(event);
223 239
224 if (next->ent.type != TRACE_GRAPH_RET) 240 if (next->ent.type != TRACE_GRAPH_RET)
225 return false; 241 return NULL;
226 242
227 if (curr->ent.pid != next->ent.pid || 243 if (curr->ent.pid != next->ent.pid ||
228 curr->graph_ent.func != next->ret.func) 244 curr->graph_ent.func != next->ret.func)
229 return false; 245 return NULL;
246
247 /* this is a leaf, now advance the iterator */
248 if (ring_iter)
249 ring_buffer_read(ring_iter, NULL);
250
251 return next;
252}
253
254/* Signal a overhead of time execution to the output */
255static int
256print_graph_overhead(unsigned long long duration, struct trace_seq *s)
257{
258 /* If duration disappear, we don't need anything */
259 if (!(tracer_flags.val & TRACE_GRAPH_PRINT_DURATION))
260 return 1;
261
262 /* Non nested entry or return */
263 if (duration == -1)
264 return trace_seq_printf(s, " ");
265
266 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) {
267 /* Duration exceeded 100 msecs */
268 if (duration > 100000ULL)
269 return trace_seq_printf(s, "! ");
230 270
231 return true; 271 /* Duration exceeded 10 msecs */
272 if (duration > 10000ULL)
273 return trace_seq_printf(s, "+ ");
274 }
275
276 return trace_seq_printf(s, " ");
232} 277}
233 278
234static enum print_line_t 279static enum print_line_t
235print_graph_irq(struct trace_seq *s, unsigned long addr, 280print_graph_irq(struct trace_seq *s, unsigned long addr,
236 enum trace_type type, int cpu, pid_t pid) 281 enum trace_type type, int cpu, pid_t pid)
237{ 282{
238 int ret; 283 int ret;
239 284
@@ -241,35 +286,40 @@ print_graph_irq(struct trace_seq *s, unsigned long addr,
241 addr >= (unsigned long)__irqentry_text_end) 286 addr >= (unsigned long)__irqentry_text_end)
242 return TRACE_TYPE_UNHANDLED; 287 return TRACE_TYPE_UNHANDLED;
243 288
244 if (type == TRACE_GRAPH_ENT) { 289 /* Cpu */
245 ret = trace_seq_printf(s, "==========> | "); 290 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) {
246 } else { 291 ret = print_graph_cpu(s, cpu);
247 /* Cpu */ 292 if (ret == TRACE_TYPE_PARTIAL_LINE)
248 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) { 293 return TRACE_TYPE_PARTIAL_LINE;
249 ret = print_graph_cpu(s, cpu); 294 }
250 if (ret == TRACE_TYPE_PARTIAL_LINE) 295 /* Proc */
251 return TRACE_TYPE_PARTIAL_LINE; 296 if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) {
252 } 297 ret = print_graph_proc(s, pid);
253 /* Proc */ 298 if (ret == TRACE_TYPE_PARTIAL_LINE)
254 if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) { 299 return TRACE_TYPE_PARTIAL_LINE;
255 ret = print_graph_proc(s, pid); 300 ret = trace_seq_printf(s, " | ");
256 if (ret == TRACE_TYPE_PARTIAL_LINE) 301 if (!ret)
257 return TRACE_TYPE_PARTIAL_LINE; 302 return TRACE_TYPE_PARTIAL_LINE;
303 }
258 304
259 ret = trace_seq_printf(s, " | "); 305 /* No overhead */
260 if (!ret) 306 ret = print_graph_overhead(-1, s);
261 return TRACE_TYPE_PARTIAL_LINE; 307 if (!ret)
262 } 308 return TRACE_TYPE_PARTIAL_LINE;
263 309
264 /* No overhead */ 310 if (type == TRACE_GRAPH_ENT)
265 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) { 311 ret = trace_seq_printf(s, "==========>");
266 ret = trace_seq_printf(s, " "); 312 else
267 if (!ret) 313 ret = trace_seq_printf(s, "<==========");
268 return TRACE_TYPE_PARTIAL_LINE; 314
269 } 315 if (!ret)
316 return TRACE_TYPE_PARTIAL_LINE;
317
318 /* Don't close the duration column if haven't one */
319 if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION)
320 trace_seq_printf(s, " |");
321 ret = trace_seq_printf(s, "\n");
270 322
271 ret = trace_seq_printf(s, "<========== |\n");
272 }
273 if (!ret) 323 if (!ret)
274 return TRACE_TYPE_PARTIAL_LINE; 324 return TRACE_TYPE_PARTIAL_LINE;
275 return TRACE_TYPE_HANDLED; 325 return TRACE_TYPE_HANDLED;
@@ -288,7 +338,7 @@ print_graph_duration(unsigned long long duration, struct trace_seq *s)
288 sprintf(msecs_str, "%lu", (unsigned long) duration); 338 sprintf(msecs_str, "%lu", (unsigned long) duration);
289 339
290 /* Print msecs */ 340 /* Print msecs */
291 ret = trace_seq_printf(s, msecs_str); 341 ret = trace_seq_printf(s, "%s", msecs_str);
292 if (!ret) 342 if (!ret)
293 return TRACE_TYPE_PARTIAL_LINE; 343 return TRACE_TYPE_PARTIAL_LINE;
294 344
@@ -321,51 +371,44 @@ print_graph_duration(unsigned long long duration, struct trace_seq *s)
321 371
322} 372}
323 373
324/* Signal a overhead of time execution to the output */ 374static int print_graph_abs_time(u64 t, struct trace_seq *s)
325static int
326print_graph_overhead(unsigned long long duration, struct trace_seq *s)
327{ 375{
328 /* Duration exceeded 100 msecs */ 376 unsigned long usecs_rem;
329 if (duration > 100000ULL)
330 return trace_seq_printf(s, "! ");
331 377
332 /* Duration exceeded 10 msecs */ 378 usecs_rem = do_div(t, 1000000000);
333 if (duration > 10000ULL) 379 usecs_rem /= 1000;
334 return trace_seq_printf(s, "+ ");
335 380
336 return trace_seq_printf(s, " "); 381 return trace_seq_printf(s, "%5lu.%06lu | ",
382 (unsigned long)t, usecs_rem);
337} 383}
338 384
339/* Case of a leaf function on its call entry */ 385/* Case of a leaf function on its call entry */
340static enum print_line_t 386static enum print_line_t
341print_graph_entry_leaf(struct trace_iterator *iter, 387print_graph_entry_leaf(struct trace_iterator *iter,
342 struct ftrace_graph_ent_entry *entry, struct trace_seq *s) 388 struct ftrace_graph_ent_entry *entry,
389 struct ftrace_graph_ret_entry *ret_entry, struct trace_seq *s)
343{ 390{
344 struct ftrace_graph_ret_entry *ret_entry;
345 struct ftrace_graph_ret *graph_ret; 391 struct ftrace_graph_ret *graph_ret;
346 struct ring_buffer_event *event;
347 struct ftrace_graph_ent *call; 392 struct ftrace_graph_ent *call;
348 unsigned long long duration; 393 unsigned long long duration;
349 int ret; 394 int ret;
350 int i; 395 int i;
351 396
352 event = ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
353 ret_entry = ring_buffer_event_data(event);
354 graph_ret = &ret_entry->ret; 397 graph_ret = &ret_entry->ret;
355 call = &entry->graph_ent; 398 call = &entry->graph_ent;
356 duration = graph_ret->rettime - graph_ret->calltime; 399 duration = graph_ret->rettime - graph_ret->calltime;
357 400
358 /* Overhead */ 401 /* Overhead */
359 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) { 402 ret = print_graph_overhead(duration, s);
360 ret = print_graph_overhead(duration, s); 403 if (!ret)
361 if (!ret) 404 return TRACE_TYPE_PARTIAL_LINE;
362 return TRACE_TYPE_PARTIAL_LINE;
363 }
364 405
365 /* Duration */ 406 /* Duration */
366 ret = print_graph_duration(duration, s); 407 if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) {
367 if (ret == TRACE_TYPE_PARTIAL_LINE) 408 ret = print_graph_duration(duration, s);
368 return TRACE_TYPE_PARTIAL_LINE; 409 if (ret == TRACE_TYPE_PARTIAL_LINE)
410 return TRACE_TYPE_PARTIAL_LINE;
411 }
369 412
370 /* Function */ 413 /* Function */
371 for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) { 414 for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) {
@@ -394,25 +437,17 @@ print_graph_entry_nested(struct ftrace_graph_ent_entry *entry,
394 struct ftrace_graph_ent *call = &entry->graph_ent; 437 struct ftrace_graph_ent *call = &entry->graph_ent;
395 438
396 /* No overhead */ 439 /* No overhead */
397 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) { 440 ret = print_graph_overhead(-1, s);
398 ret = trace_seq_printf(s, " "); 441 if (!ret)
399 if (!ret) 442 return TRACE_TYPE_PARTIAL_LINE;
400 return TRACE_TYPE_PARTIAL_LINE;
401 }
402 443
403 /* Interrupt */ 444 /* No time */
404 ret = print_graph_irq(s, call->func, TRACE_GRAPH_ENT, cpu, pid); 445 if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) {
405 if (ret == TRACE_TYPE_UNHANDLED) {
406 /* No time */
407 ret = trace_seq_printf(s, " | "); 446 ret = trace_seq_printf(s, " | ");
408 if (!ret) 447 if (!ret)
409 return TRACE_TYPE_PARTIAL_LINE; 448 return TRACE_TYPE_PARTIAL_LINE;
410 } else {
411 if (ret == TRACE_TYPE_PARTIAL_LINE)
412 return TRACE_TYPE_PARTIAL_LINE;
413 } 449 }
414 450
415
416 /* Function */ 451 /* Function */
417 for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) { 452 for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) {
418 ret = trace_seq_printf(s, " "); 453 ret = trace_seq_printf(s, " ");
@@ -428,20 +463,40 @@ print_graph_entry_nested(struct ftrace_graph_ent_entry *entry,
428 if (!ret) 463 if (!ret)
429 return TRACE_TYPE_PARTIAL_LINE; 464 return TRACE_TYPE_PARTIAL_LINE;
430 465
431 return TRACE_TYPE_HANDLED; 466 /*
467 * we already consumed the current entry to check the next one
468 * and see if this is a leaf.
469 */
470 return TRACE_TYPE_NO_CONSUME;
432} 471}
433 472
434static enum print_line_t 473static enum print_line_t
435print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s, 474print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
436 struct trace_iterator *iter, int cpu) 475 struct trace_iterator *iter)
437{ 476{
438 int ret; 477 int ret;
478 int cpu = iter->cpu;
479 pid_t *last_entry = iter->private;
439 struct trace_entry *ent = iter->ent; 480 struct trace_entry *ent = iter->ent;
481 struct ftrace_graph_ent *call = &field->graph_ent;
482 struct ftrace_graph_ret_entry *leaf_ret;
440 483
441 /* Pid */ 484 /* Pid */
442 if (verif_pid(s, ent->pid, cpu) == TRACE_TYPE_PARTIAL_LINE) 485 if (verif_pid(s, ent->pid, cpu, last_entry) == TRACE_TYPE_PARTIAL_LINE)
443 return TRACE_TYPE_PARTIAL_LINE; 486 return TRACE_TYPE_PARTIAL_LINE;
444 487
488 /* Interrupt */
489 ret = print_graph_irq(s, call->func, TRACE_GRAPH_ENT, cpu, ent->pid);
490 if (ret == TRACE_TYPE_PARTIAL_LINE)
491 return TRACE_TYPE_PARTIAL_LINE;
492
493 /* Absolute time */
494 if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) {
495 ret = print_graph_abs_time(iter->ts, s);
496 if (!ret)
497 return TRACE_TYPE_PARTIAL_LINE;
498 }
499
445 /* Cpu */ 500 /* Cpu */
446 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) { 501 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) {
447 ret = print_graph_cpu(s, cpu); 502 ret = print_graph_cpu(s, cpu);
@@ -460,8 +515,9 @@ print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
460 return TRACE_TYPE_PARTIAL_LINE; 515 return TRACE_TYPE_PARTIAL_LINE;
461 } 516 }
462 517
463 if (trace_branch_is_leaf(iter, field)) 518 leaf_ret = get_return_for_leaf(iter, field);
464 return print_graph_entry_leaf(iter, field, s); 519 if (leaf_ret)
520 return print_graph_entry_leaf(iter, field, leaf_ret, s);
465 else 521 else
466 return print_graph_entry_nested(field, s, iter->ent->pid, cpu); 522 return print_graph_entry_nested(field, s, iter->ent->pid, cpu);
467 523
@@ -469,16 +525,25 @@ print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
469 525
470static enum print_line_t 526static enum print_line_t
471print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s, 527print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
472 struct trace_entry *ent, int cpu) 528 struct trace_entry *ent, struct trace_iterator *iter)
473{ 529{
474 int i; 530 int i;
475 int ret; 531 int ret;
532 int cpu = iter->cpu;
533 pid_t *last_pid = iter->private;
476 unsigned long long duration = trace->rettime - trace->calltime; 534 unsigned long long duration = trace->rettime - trace->calltime;
477 535
478 /* Pid */ 536 /* Pid */
479 if (verif_pid(s, ent->pid, cpu) == TRACE_TYPE_PARTIAL_LINE) 537 if (verif_pid(s, ent->pid, cpu, last_pid) == TRACE_TYPE_PARTIAL_LINE)
480 return TRACE_TYPE_PARTIAL_LINE; 538 return TRACE_TYPE_PARTIAL_LINE;
481 539
540 /* Absolute time */
541 if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) {
542 ret = print_graph_abs_time(iter->ts, s);
543 if (!ret)
544 return TRACE_TYPE_PARTIAL_LINE;
545 }
546
482 /* Cpu */ 547 /* Cpu */
483 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) { 548 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) {
484 ret = print_graph_cpu(s, cpu); 549 ret = print_graph_cpu(s, cpu);
@@ -498,16 +563,16 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
498 } 563 }
499 564
500 /* Overhead */ 565 /* Overhead */
501 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) { 566 ret = print_graph_overhead(duration, s);
502 ret = print_graph_overhead(duration, s); 567 if (!ret)
503 if (!ret) 568 return TRACE_TYPE_PARTIAL_LINE;
504 return TRACE_TYPE_PARTIAL_LINE;
505 }
506 569
507 /* Duration */ 570 /* Duration */
508 ret = print_graph_duration(duration, s); 571 if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) {
509 if (ret == TRACE_TYPE_PARTIAL_LINE) 572 ret = print_graph_duration(duration, s);
510 return TRACE_TYPE_PARTIAL_LINE; 573 if (ret == TRACE_TYPE_PARTIAL_LINE)
574 return TRACE_TYPE_PARTIAL_LINE;
575 }
511 576
512 /* Closing brace */ 577 /* Closing brace */
513 for (i = 0; i < trace->depth * TRACE_GRAPH_INDENT; i++) { 578 for (i = 0; i < trace->depth * TRACE_GRAPH_INDENT; i++) {
@@ -541,14 +606,23 @@ print_graph_comment(struct print_entry *trace, struct trace_seq *s,
541{ 606{
542 int i; 607 int i;
543 int ret; 608 int ret;
609 int cpu = iter->cpu;
610 pid_t *last_pid = iter->private;
611
612 /* Absolute time */
613 if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) {
614 ret = print_graph_abs_time(iter->ts, s);
615 if (!ret)
616 return TRACE_TYPE_PARTIAL_LINE;
617 }
544 618
545 /* Pid */ 619 /* Pid */
546 if (verif_pid(s, ent->pid, iter->cpu) == TRACE_TYPE_PARTIAL_LINE) 620 if (verif_pid(s, ent->pid, cpu, last_pid) == TRACE_TYPE_PARTIAL_LINE)
547 return TRACE_TYPE_PARTIAL_LINE; 621 return TRACE_TYPE_PARTIAL_LINE;
548 622
549 /* Cpu */ 623 /* Cpu */
550 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) { 624 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) {
551 ret = print_graph_cpu(s, iter->cpu); 625 ret = print_graph_cpu(s, cpu);
552 if (ret == TRACE_TYPE_PARTIAL_LINE) 626 if (ret == TRACE_TYPE_PARTIAL_LINE)
553 return TRACE_TYPE_PARTIAL_LINE; 627 return TRACE_TYPE_PARTIAL_LINE;
554 } 628 }
@@ -565,17 +639,17 @@ print_graph_comment(struct print_entry *trace, struct trace_seq *s,
565 } 639 }
566 640
567 /* No overhead */ 641 /* No overhead */
568 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) { 642 ret = print_graph_overhead(-1, s);
569 ret = trace_seq_printf(s, " "); 643 if (!ret)
644 return TRACE_TYPE_PARTIAL_LINE;
645
646 /* No time */
647 if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) {
648 ret = trace_seq_printf(s, " | ");
570 if (!ret) 649 if (!ret)
571 return TRACE_TYPE_PARTIAL_LINE; 650 return TRACE_TYPE_PARTIAL_LINE;
572 } 651 }
573 652
574 /* No time */
575 ret = trace_seq_printf(s, " | ");
576 if (!ret)
577 return TRACE_TYPE_PARTIAL_LINE;
578
579 /* Indentation */ 653 /* Indentation */
580 if (trace->depth > 0) 654 if (trace->depth > 0)
581 for (i = 0; i < (trace->depth + 1) * TRACE_GRAPH_INDENT; i++) { 655 for (i = 0; i < (trace->depth + 1) * TRACE_GRAPH_INDENT; i++) {
@@ -589,8 +663,11 @@ print_graph_comment(struct print_entry *trace, struct trace_seq *s,
589 if (!ret) 663 if (!ret)
590 return TRACE_TYPE_PARTIAL_LINE; 664 return TRACE_TYPE_PARTIAL_LINE;
591 665
592 if (ent->flags & TRACE_FLAG_CONT) 666 /* Strip ending newline */
593 trace_seq_print_cont(s, iter); 667 if (s->buffer[s->len - 1] == '\n') {
668 s->buffer[s->len - 1] = '\0';
669 s->len--;
670 }
594 671
595 ret = trace_seq_printf(s, " */\n"); 672 ret = trace_seq_printf(s, " */\n");
596 if (!ret) 673 if (!ret)
@@ -610,13 +687,12 @@ print_graph_function(struct trace_iterator *iter)
610 case TRACE_GRAPH_ENT: { 687 case TRACE_GRAPH_ENT: {
611 struct ftrace_graph_ent_entry *field; 688 struct ftrace_graph_ent_entry *field;
612 trace_assign_type(field, entry); 689 trace_assign_type(field, entry);
613 return print_graph_entry(field, s, iter, 690 return print_graph_entry(field, s, iter);
614 iter->cpu);
615 } 691 }
616 case TRACE_GRAPH_RET: { 692 case TRACE_GRAPH_RET: {
617 struct ftrace_graph_ret_entry *field; 693 struct ftrace_graph_ret_entry *field;
618 trace_assign_type(field, entry); 694 trace_assign_type(field, entry);
619 return print_graph_return(&field->ret, s, entry, iter->cpu); 695 return print_graph_return(&field->ret, s, entry, iter);
620 } 696 }
621 case TRACE_PRINT: { 697 case TRACE_PRINT: {
622 struct print_entry *field; 698 struct print_entry *field;
@@ -632,33 +708,63 @@ static void print_graph_headers(struct seq_file *s)
632{ 708{
633 /* 1st line */ 709 /* 1st line */
634 seq_printf(s, "# "); 710 seq_printf(s, "# ");
711 if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME)
712 seq_printf(s, " TIME ");
635 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) 713 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU)
636 seq_printf(s, "CPU "); 714 seq_printf(s, "CPU");
637 if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) 715 if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC)
638 seq_printf(s, "TASK/PID "); 716 seq_printf(s, " TASK/PID ");
639 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) 717 if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION)
640 seq_printf(s, "OVERHEAD/"); 718 seq_printf(s, " DURATION ");
641 seq_printf(s, "DURATION FUNCTION CALLS\n"); 719 seq_printf(s, " FUNCTION CALLS\n");
642 720
643 /* 2nd line */ 721 /* 2nd line */
644 seq_printf(s, "# "); 722 seq_printf(s, "# ");
723 if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME)
724 seq_printf(s, " | ");
645 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) 725 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU)
646 seq_printf(s, "| "); 726 seq_printf(s, "| ");
647 if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) 727 if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC)
648 seq_printf(s, "| | "); 728 seq_printf(s, " | | ");
649 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) { 729 if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION)
650 seq_printf(s, "| "); 730 seq_printf(s, " | | ");
651 seq_printf(s, "| | | | |\n"); 731 seq_printf(s, " | | | |\n");
652 } else 732}
653 seq_printf(s, " | | | | |\n"); 733
734static void graph_trace_open(struct trace_iterator *iter)
735{
736 /* pid on the last trace processed */
737 pid_t *last_pid = alloc_percpu(pid_t);
738 int cpu;
739
740 if (!last_pid)
741 pr_warning("function graph tracer: not enough memory\n");
742 else
743 for_each_possible_cpu(cpu) {
744 pid_t *pid = per_cpu_ptr(last_pid, cpu);
745 *pid = -1;
746 }
747
748 iter->private = last_pid;
654} 749}
750
751static void graph_trace_close(struct trace_iterator *iter)
752{
753 percpu_free(iter->private);
754}
755
655static struct tracer graph_trace __read_mostly = { 756static struct tracer graph_trace __read_mostly = {
656 .name = "function_graph", 757 .name = "function_graph",
758 .open = graph_trace_open,
759 .close = graph_trace_close,
657 .init = graph_trace_init, 760 .init = graph_trace_init,
658 .reset = graph_trace_reset, 761 .reset = graph_trace_reset,
659 .print_line = print_graph_function, 762 .print_line = print_graph_function,
660 .print_header = print_graph_headers, 763 .print_header = print_graph_headers,
661 .flags = &tracer_flags, 764 .flags = &tracer_flags,
765#ifdef CONFIG_FTRACE_SELFTEST
766 .selftest = trace_selftest_startup_function_graph,
767#endif
662}; 768};
663 769
664static __init int init_graph_trace(void) 770static __init int init_graph_trace(void)
diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c
index 649df22d435f..3561aace075c 100644
--- a/kernel/trace/trace_hw_branches.c
+++ b/kernel/trace/trace_hw_branches.c
@@ -1,7 +1,8 @@
1/* 1/*
2 * h/w branch tracer for x86 based on bts 2 * h/w branch tracer for x86 based on bts
3 * 3 *
4 * Copyright (C) 2008 Markus Metzger <markus.t.metzger@gmail.com> 4 * Copyright (C) 2008-2009 Intel Corporation.
5 * Markus Metzger <markus.t.metzger@gmail.com>, 2008-2009
5 * 6 *
6 */ 7 */
7 8
@@ -10,21 +11,44 @@
10#include <linux/debugfs.h> 11#include <linux/debugfs.h>
11#include <linux/ftrace.h> 12#include <linux/ftrace.h>
12#include <linux/kallsyms.h> 13#include <linux/kallsyms.h>
14#include <linux/mutex.h>
15#include <linux/cpu.h>
16#include <linux/smp.h>
13 17
14#include <asm/ds.h> 18#include <asm/ds.h>
15 19
16#include "trace.h" 20#include "trace.h"
21#include "trace_output.h"
17 22
18 23
19#define SIZEOF_BTS (1 << 13) 24#define SIZEOF_BTS (1 << 13)
20 25
26/* The tracer mutex protects the below per-cpu tracer array.
27 It needs to be held to:
28 - start tracing on all cpus
29 - stop tracing on all cpus
30 - start tracing on a single hotplug cpu
31 - stop tracing on a single hotplug cpu
32 - read the trace from all cpus
33 - read the trace from a single cpu
34*/
35static DEFINE_MUTEX(bts_tracer_mutex);
21static DEFINE_PER_CPU(struct bts_tracer *, tracer); 36static DEFINE_PER_CPU(struct bts_tracer *, tracer);
22static DEFINE_PER_CPU(unsigned char[SIZEOF_BTS], buffer); 37static DEFINE_PER_CPU(unsigned char[SIZEOF_BTS], buffer);
23 38
24#define this_tracer per_cpu(tracer, smp_processor_id()) 39#define this_tracer per_cpu(tracer, smp_processor_id())
25#define this_buffer per_cpu(buffer, smp_processor_id()) 40#define this_buffer per_cpu(buffer, smp_processor_id())
26 41
42static int __read_mostly trace_hw_branches_enabled;
43static struct trace_array *hw_branch_trace __read_mostly;
27 44
45
46/*
47 * Start tracing on the current cpu.
48 * The argument is ignored.
49 *
50 * pre: bts_tracer_mutex must be locked.
51 */
28static void bts_trace_start_cpu(void *arg) 52static void bts_trace_start_cpu(void *arg)
29{ 53{
30 if (this_tracer) 54 if (this_tracer)
@@ -42,14 +66,20 @@ static void bts_trace_start_cpu(void *arg)
42 66
43static void bts_trace_start(struct trace_array *tr) 67static void bts_trace_start(struct trace_array *tr)
44{ 68{
45 int cpu; 69 mutex_lock(&bts_tracer_mutex);
46 70
47 tracing_reset_online_cpus(tr); 71 on_each_cpu(bts_trace_start_cpu, NULL, 1);
72 trace_hw_branches_enabled = 1;
48 73
49 for_each_cpu(cpu, cpu_possible_mask) 74 mutex_unlock(&bts_tracer_mutex);
50 smp_call_function_single(cpu, bts_trace_start_cpu, NULL, 1);
51} 75}
52 76
77/*
78 * Stop tracing on the current cpu.
79 * The argument is ignored.
80 *
81 * pre: bts_tracer_mutex must be locked.
82 */
53static void bts_trace_stop_cpu(void *arg) 83static void bts_trace_stop_cpu(void *arg)
54{ 84{
55 if (this_tracer) { 85 if (this_tracer) {
@@ -60,26 +90,62 @@ static void bts_trace_stop_cpu(void *arg)
60 90
61static void bts_trace_stop(struct trace_array *tr) 91static void bts_trace_stop(struct trace_array *tr)
62{ 92{
63 int cpu; 93 mutex_lock(&bts_tracer_mutex);
94
95 trace_hw_branches_enabled = 0;
96 on_each_cpu(bts_trace_stop_cpu, NULL, 1);
97
98 mutex_unlock(&bts_tracer_mutex);
99}
100
101static int __cpuinit bts_hotcpu_handler(struct notifier_block *nfb,
102 unsigned long action, void *hcpu)
103{
104 unsigned int cpu = (unsigned long)hcpu;
105
106 mutex_lock(&bts_tracer_mutex);
107
108 if (!trace_hw_branches_enabled)
109 goto out;
64 110
65 for_each_cpu(cpu, cpu_possible_mask) 111 switch (action) {
112 case CPU_ONLINE:
113 case CPU_DOWN_FAILED:
114 smp_call_function_single(cpu, bts_trace_start_cpu, NULL, 1);
115 break;
116 case CPU_DOWN_PREPARE:
66 smp_call_function_single(cpu, bts_trace_stop_cpu, NULL, 1); 117 smp_call_function_single(cpu, bts_trace_stop_cpu, NULL, 1);
118 break;
119 }
120
121 out:
122 mutex_unlock(&bts_tracer_mutex);
123 return NOTIFY_DONE;
67} 124}
68 125
69static int bts_trace_init(struct trace_array *tr) 126static struct notifier_block bts_hotcpu_notifier __cpuinitdata = {
127 .notifier_call = bts_hotcpu_handler
128};
129
130static int __cpuinit bts_trace_init(struct trace_array *tr)
70{ 131{
71 tracing_reset_online_cpus(tr); 132 hw_branch_trace = tr;
133
134 register_hotcpu_notifier(&bts_hotcpu_notifier);
72 bts_trace_start(tr); 135 bts_trace_start(tr);
73 136
74 return 0; 137 return 0;
75} 138}
76 139
140static void __cpuinit bts_trace_reset(struct trace_array *tr)
141{
142 bts_trace_stop(tr);
143 unregister_hotcpu_notifier(&bts_hotcpu_notifier);
144}
145
77static void bts_trace_print_header(struct seq_file *m) 146static void bts_trace_print_header(struct seq_file *m)
78{ 147{
79 seq_puts(m, 148 seq_puts(m, "# CPU# TO <- FROM\n");
80 "# CPU# FROM TO FUNCTION\n");
81 seq_puts(m,
82 "# | | | |\n");
83} 149}
84 150
85static enum print_line_t bts_trace_print_line(struct trace_iterator *iter) 151static enum print_line_t bts_trace_print_line(struct trace_iterator *iter)
@@ -87,15 +153,15 @@ static enum print_line_t bts_trace_print_line(struct trace_iterator *iter)
87 struct trace_entry *entry = iter->ent; 153 struct trace_entry *entry = iter->ent;
88 struct trace_seq *seq = &iter->seq; 154 struct trace_seq *seq = &iter->seq;
89 struct hw_branch_entry *it; 155 struct hw_branch_entry *it;
156 unsigned long symflags = TRACE_ITER_SYM_OFFSET;
90 157
91 trace_assign_type(it, entry); 158 trace_assign_type(it, entry);
92 159
93 if (entry->type == TRACE_HW_BRANCHES) { 160 if (entry->type == TRACE_HW_BRANCHES) {
94 if (trace_seq_printf(seq, "%4d ", entry->cpu) && 161 if (trace_seq_printf(seq, "%4d ", iter->cpu) &&
95 trace_seq_printf(seq, "0x%016llx -> 0x%016llx ", 162 seq_print_ip_sym(seq, it->to, symflags) &&
96 it->from, it->to) && 163 trace_seq_printf(seq, "\t <- ") &&
97 (!it->from || 164 seq_print_ip_sym(seq, it->from, symflags) &&
98 seq_print_ip_sym(seq, it->from, /* sym_flags = */ 0)) &&
99 trace_seq_printf(seq, "\n")) 165 trace_seq_printf(seq, "\n"))
100 return TRACE_TYPE_HANDLED; 166 return TRACE_TYPE_HANDLED;
101 return TRACE_TYPE_PARTIAL_LINE;; 167 return TRACE_TYPE_PARTIAL_LINE;;
@@ -103,26 +169,42 @@ static enum print_line_t bts_trace_print_line(struct trace_iterator *iter)
103 return TRACE_TYPE_UNHANDLED; 169 return TRACE_TYPE_UNHANDLED;
104} 170}
105 171
106void trace_hw_branch(struct trace_array *tr, u64 from, u64 to) 172void trace_hw_branch(u64 from, u64 to)
107{ 173{
174 struct trace_array *tr = hw_branch_trace;
108 struct ring_buffer_event *event; 175 struct ring_buffer_event *event;
109 struct hw_branch_entry *entry; 176 struct hw_branch_entry *entry;
110 unsigned long irq; 177 unsigned long irq1;
178 int cpu;
111 179
112 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), &irq); 180 if (unlikely(!tr))
113 if (!event)
114 return; 181 return;
182
183 if (unlikely(!trace_hw_branches_enabled))
184 return;
185
186 local_irq_save(irq1);
187 cpu = raw_smp_processor_id();
188 if (atomic_inc_return(&tr->data[cpu]->disabled) != 1)
189 goto out;
190
191 event = trace_buffer_lock_reserve(tr, TRACE_HW_BRANCHES,
192 sizeof(*entry), 0, 0);
193 if (!event)
194 goto out;
115 entry = ring_buffer_event_data(event); 195 entry = ring_buffer_event_data(event);
116 tracing_generic_entry_update(&entry->ent, 0, from); 196 tracing_generic_entry_update(&entry->ent, 0, from);
117 entry->ent.type = TRACE_HW_BRANCHES; 197 entry->ent.type = TRACE_HW_BRANCHES;
118 entry->ent.cpu = smp_processor_id();
119 entry->from = from; 198 entry->from = from;
120 entry->to = to; 199 entry->to = to;
121 ring_buffer_unlock_commit(tr->buffer, event, irq); 200 trace_buffer_unlock_commit(tr, event, 0, 0);
201
202 out:
203 atomic_dec(&tr->data[cpu]->disabled);
204 local_irq_restore(irq1);
122} 205}
123 206
124static void trace_bts_at(struct trace_array *tr, 207static void trace_bts_at(const struct bts_trace *trace, void *at)
125 const struct bts_trace *trace, void *at)
126{ 208{
127 struct bts_struct bts; 209 struct bts_struct bts;
128 int err = 0; 210 int err = 0;
@@ -137,18 +219,29 @@ static void trace_bts_at(struct trace_array *tr,
137 219
138 switch (bts.qualifier) { 220 switch (bts.qualifier) {
139 case BTS_BRANCH: 221 case BTS_BRANCH:
140 trace_hw_branch(tr, bts.variant.lbr.from, bts.variant.lbr.to); 222 trace_hw_branch(bts.variant.lbr.from, bts.variant.lbr.to);
141 break; 223 break;
142 } 224 }
143} 225}
144 226
227/*
228 * Collect the trace on the current cpu and write it into the ftrace buffer.
229 *
230 * pre: bts_tracer_mutex must be locked
231 */
145static void trace_bts_cpu(void *arg) 232static void trace_bts_cpu(void *arg)
146{ 233{
147 struct trace_array *tr = (struct trace_array *) arg; 234 struct trace_array *tr = (struct trace_array *) arg;
148 const struct bts_trace *trace; 235 const struct bts_trace *trace;
149 unsigned char *at; 236 unsigned char *at;
150 237
151 if (!this_tracer) 238 if (unlikely(!tr))
239 return;
240
241 if (unlikely(atomic_read(&tr->data[raw_smp_processor_id()]->disabled)))
242 return;
243
244 if (unlikely(!this_tracer))
152 return; 245 return;
153 246
154 ds_suspend_bts(this_tracer); 247 ds_suspend_bts(this_tracer);
@@ -158,11 +251,11 @@ static void trace_bts_cpu(void *arg)
158 251
159 for (at = trace->ds.top; (void *)at < trace->ds.end; 252 for (at = trace->ds.top; (void *)at < trace->ds.end;
160 at += trace->ds.size) 253 at += trace->ds.size)
161 trace_bts_at(tr, trace, at); 254 trace_bts_at(trace, at);
162 255
163 for (at = trace->ds.begin; (void *)at < trace->ds.top; 256 for (at = trace->ds.begin; (void *)at < trace->ds.top;
164 at += trace->ds.size) 257 at += trace->ds.size)
165 trace_bts_at(tr, trace, at); 258 trace_bts_at(trace, at);
166 259
167out: 260out:
168 ds_resume_bts(this_tracer); 261 ds_resume_bts(this_tracer);
@@ -170,22 +263,38 @@ out:
170 263
171static void trace_bts_prepare(struct trace_iterator *iter) 264static void trace_bts_prepare(struct trace_iterator *iter)
172{ 265{
173 int cpu; 266 mutex_lock(&bts_tracer_mutex);
267
268 on_each_cpu(trace_bts_cpu, iter->tr, 1);
269
270 mutex_unlock(&bts_tracer_mutex);
271}
272
273static void trace_bts_close(struct trace_iterator *iter)
274{
275 tracing_reset_online_cpus(iter->tr);
276}
277
278void trace_hw_branch_oops(void)
279{
280 mutex_lock(&bts_tracer_mutex);
281
282 trace_bts_cpu(hw_branch_trace);
174 283
175 for_each_cpu(cpu, cpu_possible_mask) 284 mutex_unlock(&bts_tracer_mutex);
176 smp_call_function_single(cpu, trace_bts_cpu, iter->tr, 1);
177} 285}
178 286
179struct tracer bts_tracer __read_mostly = 287struct tracer bts_tracer __read_mostly =
180{ 288{
181 .name = "hw-branch-tracer", 289 .name = "hw-branch-tracer",
182 .init = bts_trace_init, 290 .init = bts_trace_init,
183 .reset = bts_trace_stop, 291 .reset = bts_trace_reset,
184 .print_header = bts_trace_print_header, 292 .print_header = bts_trace_print_header,
185 .print_line = bts_trace_print_line, 293 .print_line = bts_trace_print_line,
186 .start = bts_trace_start, 294 .start = bts_trace_start,
187 .stop = bts_trace_stop, 295 .stop = bts_trace_stop,
188 .open = trace_bts_prepare 296 .open = trace_bts_prepare,
297 .close = trace_bts_close
189}; 298};
190 299
191__init static int init_bts_trace(void) 300__init static int init_bts_trace(void)
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 62a78d943534..c6b442d88de8 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -95,7 +95,7 @@ irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip)
95 disabled = atomic_inc_return(&data->disabled); 95 disabled = atomic_inc_return(&data->disabled);
96 96
97 if (likely(disabled == 1)) 97 if (likely(disabled == 1))
98 trace_function(tr, data, ip, parent_ip, flags, preempt_count()); 98 trace_function(tr, ip, parent_ip, flags, preempt_count());
99 99
100 atomic_dec(&data->disabled); 100 atomic_dec(&data->disabled);
101} 101}
@@ -153,7 +153,7 @@ check_critical_timing(struct trace_array *tr,
153 if (!report_latency(delta)) 153 if (!report_latency(delta))
154 goto out_unlock; 154 goto out_unlock;
155 155
156 trace_function(tr, data, CALLER_ADDR0, parent_ip, flags, pc); 156 trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc);
157 157
158 latency = nsecs_to_usecs(delta); 158 latency = nsecs_to_usecs(delta);
159 159
@@ -177,7 +177,7 @@ out:
177 data->critical_sequence = max_sequence; 177 data->critical_sequence = max_sequence;
178 data->preempt_timestamp = ftrace_now(cpu); 178 data->preempt_timestamp = ftrace_now(cpu);
179 tracing_reset(tr, cpu); 179 tracing_reset(tr, cpu);
180 trace_function(tr, data, CALLER_ADDR0, parent_ip, flags, pc); 180 trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc);
181} 181}
182 182
183static inline void 183static inline void
@@ -210,7 +210,7 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip)
210 210
211 local_save_flags(flags); 211 local_save_flags(flags);
212 212
213 trace_function(tr, data, ip, parent_ip, flags, preempt_count()); 213 trace_function(tr, ip, parent_ip, flags, preempt_count());
214 214
215 per_cpu(tracing_cpu, cpu) = 1; 215 per_cpu(tracing_cpu, cpu) = 1;
216 216
@@ -244,7 +244,7 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip)
244 atomic_inc(&data->disabled); 244 atomic_inc(&data->disabled);
245 245
246 local_save_flags(flags); 246 local_save_flags(flags);
247 trace_function(tr, data, ip, parent_ip, flags, preempt_count()); 247 trace_function(tr, ip, parent_ip, flags, preempt_count());
248 check_critical_timing(tr, data, parent_ip ? : ip, cpu); 248 check_critical_timing(tr, data, parent_ip ? : ip, cpu);
249 data->critical_start = 0; 249 data->critical_start = 0;
250 atomic_dec(&data->disabled); 250 atomic_dec(&data->disabled);
@@ -353,28 +353,18 @@ void trace_preempt_off(unsigned long a0, unsigned long a1)
353} 353}
354#endif /* CONFIG_PREEMPT_TRACER */ 354#endif /* CONFIG_PREEMPT_TRACER */
355 355
356/*
357 * save_tracer_enabled is used to save the state of the tracer_enabled
358 * variable when we disable it when we open a trace output file.
359 */
360static int save_tracer_enabled;
361
362static void start_irqsoff_tracer(struct trace_array *tr) 356static void start_irqsoff_tracer(struct trace_array *tr)
363{ 357{
364 register_ftrace_function(&trace_ops); 358 register_ftrace_function(&trace_ops);
365 if (tracing_is_enabled()) { 359 if (tracing_is_enabled())
366 tracer_enabled = 1; 360 tracer_enabled = 1;
367 save_tracer_enabled = 1; 361 else
368 } else {
369 tracer_enabled = 0; 362 tracer_enabled = 0;
370 save_tracer_enabled = 0;
371 }
372} 363}
373 364
374static void stop_irqsoff_tracer(struct trace_array *tr) 365static void stop_irqsoff_tracer(struct trace_array *tr)
375{ 366{
376 tracer_enabled = 0; 367 tracer_enabled = 0;
377 save_tracer_enabled = 0;
378 unregister_ftrace_function(&trace_ops); 368 unregister_ftrace_function(&trace_ops);
379} 369}
380 370
@@ -395,25 +385,11 @@ static void irqsoff_tracer_reset(struct trace_array *tr)
395static void irqsoff_tracer_start(struct trace_array *tr) 385static void irqsoff_tracer_start(struct trace_array *tr)
396{ 386{
397 tracer_enabled = 1; 387 tracer_enabled = 1;
398 save_tracer_enabled = 1;
399} 388}
400 389
401static void irqsoff_tracer_stop(struct trace_array *tr) 390static void irqsoff_tracer_stop(struct trace_array *tr)
402{ 391{
403 tracer_enabled = 0; 392 tracer_enabled = 0;
404 save_tracer_enabled = 0;
405}
406
407static void irqsoff_tracer_open(struct trace_iterator *iter)
408{
409 /* stop the trace while dumping */
410 tracer_enabled = 0;
411}
412
413static void irqsoff_tracer_close(struct trace_iterator *iter)
414{
415 /* restart tracing */
416 tracer_enabled = save_tracer_enabled;
417} 393}
418 394
419#ifdef CONFIG_IRQSOFF_TRACER 395#ifdef CONFIG_IRQSOFF_TRACER
@@ -431,8 +407,6 @@ static struct tracer irqsoff_tracer __read_mostly =
431 .reset = irqsoff_tracer_reset, 407 .reset = irqsoff_tracer_reset,
432 .start = irqsoff_tracer_start, 408 .start = irqsoff_tracer_start,
433 .stop = irqsoff_tracer_stop, 409 .stop = irqsoff_tracer_stop,
434 .open = irqsoff_tracer_open,
435 .close = irqsoff_tracer_close,
436 .print_max = 1, 410 .print_max = 1,
437#ifdef CONFIG_FTRACE_SELFTEST 411#ifdef CONFIG_FTRACE_SELFTEST
438 .selftest = trace_selftest_startup_irqsoff, 412 .selftest = trace_selftest_startup_irqsoff,
@@ -459,8 +433,6 @@ static struct tracer preemptoff_tracer __read_mostly =
459 .reset = irqsoff_tracer_reset, 433 .reset = irqsoff_tracer_reset,
460 .start = irqsoff_tracer_start, 434 .start = irqsoff_tracer_start,
461 .stop = irqsoff_tracer_stop, 435 .stop = irqsoff_tracer_stop,
462 .open = irqsoff_tracer_open,
463 .close = irqsoff_tracer_close,
464 .print_max = 1, 436 .print_max = 1,
465#ifdef CONFIG_FTRACE_SELFTEST 437#ifdef CONFIG_FTRACE_SELFTEST
466 .selftest = trace_selftest_startup_preemptoff, 438 .selftest = trace_selftest_startup_preemptoff,
@@ -489,8 +461,6 @@ static struct tracer preemptirqsoff_tracer __read_mostly =
489 .reset = irqsoff_tracer_reset, 461 .reset = irqsoff_tracer_reset,
490 .start = irqsoff_tracer_start, 462 .start = irqsoff_tracer_start,
491 .stop = irqsoff_tracer_stop, 463 .stop = irqsoff_tracer_stop,
492 .open = irqsoff_tracer_open,
493 .close = irqsoff_tracer_close,
494 .print_max = 1, 464 .print_max = 1,
495#ifdef CONFIG_FTRACE_SELFTEST 465#ifdef CONFIG_FTRACE_SELFTEST
496 .selftest = trace_selftest_startup_preemptirqsoff, 466 .selftest = trace_selftest_startup_preemptirqsoff,
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index 80e503ef6136..c401b908e805 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -12,6 +12,7 @@
12#include <asm/atomic.h> 12#include <asm/atomic.h>
13 13
14#include "trace.h" 14#include "trace.h"
15#include "trace_output.h"
15 16
16struct header_iter { 17struct header_iter {
17 struct pci_dev *dev; 18 struct pci_dev *dev;
@@ -183,21 +184,22 @@ static enum print_line_t mmio_print_rw(struct trace_iterator *iter)
183 switch (rw->opcode) { 184 switch (rw->opcode) {
184 case MMIO_READ: 185 case MMIO_READ:
185 ret = trace_seq_printf(s, 186 ret = trace_seq_printf(s,
186 "R %d %lu.%06lu %d 0x%llx 0x%lx 0x%lx %d\n", 187 "R %d %u.%06lu %d 0x%llx 0x%lx 0x%lx %d\n",
187 rw->width, secs, usec_rem, rw->map_id, 188 rw->width, secs, usec_rem, rw->map_id,
188 (unsigned long long)rw->phys, 189 (unsigned long long)rw->phys,
189 rw->value, rw->pc, 0); 190 rw->value, rw->pc, 0);
190 break; 191 break;
191 case MMIO_WRITE: 192 case MMIO_WRITE:
192 ret = trace_seq_printf(s, 193 ret = trace_seq_printf(s,
193 "W %d %lu.%06lu %d 0x%llx 0x%lx 0x%lx %d\n", 194 "W %d %u.%06lu %d 0x%llx 0x%lx 0x%lx %d\n",
194 rw->width, secs, usec_rem, rw->map_id, 195 rw->width, secs, usec_rem, rw->map_id,
195 (unsigned long long)rw->phys, 196 (unsigned long long)rw->phys,
196 rw->value, rw->pc, 0); 197 rw->value, rw->pc, 0);
197 break; 198 break;
198 case MMIO_UNKNOWN_OP: 199 case MMIO_UNKNOWN_OP:
199 ret = trace_seq_printf(s, 200 ret = trace_seq_printf(s,
200 "UNKNOWN %lu.%06lu %d 0x%llx %02x,%02x,%02x 0x%lx %d\n", 201 "UNKNOWN %u.%06lu %d 0x%llx %02lx,%02lx,"
202 "%02lx 0x%lx %d\n",
201 secs, usec_rem, rw->map_id, 203 secs, usec_rem, rw->map_id,
202 (unsigned long long)rw->phys, 204 (unsigned long long)rw->phys,
203 (rw->value >> 16) & 0xff, (rw->value >> 8) & 0xff, 205 (rw->value >> 16) & 0xff, (rw->value >> 8) & 0xff,
@@ -229,14 +231,14 @@ static enum print_line_t mmio_print_map(struct trace_iterator *iter)
229 switch (m->opcode) { 231 switch (m->opcode) {
230 case MMIO_PROBE: 232 case MMIO_PROBE:
231 ret = trace_seq_printf(s, 233 ret = trace_seq_printf(s,
232 "MAP %lu.%06lu %d 0x%llx 0x%lx 0x%lx 0x%lx %d\n", 234 "MAP %u.%06lu %d 0x%llx 0x%lx 0x%lx 0x%lx %d\n",
233 secs, usec_rem, m->map_id, 235 secs, usec_rem, m->map_id,
234 (unsigned long long)m->phys, m->virt, m->len, 236 (unsigned long long)m->phys, m->virt, m->len,
235 0UL, 0); 237 0UL, 0);
236 break; 238 break;
237 case MMIO_UNPROBE: 239 case MMIO_UNPROBE:
238 ret = trace_seq_printf(s, 240 ret = trace_seq_printf(s,
239 "UNMAP %lu.%06lu %d 0x%lx %d\n", 241 "UNMAP %u.%06lu %d 0x%lx %d\n",
240 secs, usec_rem, m->map_id, 0UL, 0); 242 secs, usec_rem, m->map_id, 0UL, 0);
241 break; 243 break;
242 default: 244 default:
@@ -260,13 +262,10 @@ static enum print_line_t mmio_print_mark(struct trace_iterator *iter)
260 int ret; 262 int ret;
261 263
262 /* The trailing newline must be in the message. */ 264 /* The trailing newline must be in the message. */
263 ret = trace_seq_printf(s, "MARK %lu.%06lu %s", secs, usec_rem, msg); 265 ret = trace_seq_printf(s, "MARK %u.%06lu %s", secs, usec_rem, msg);
264 if (!ret) 266 if (!ret)
265 return TRACE_TYPE_PARTIAL_LINE; 267 return TRACE_TYPE_PARTIAL_LINE;
266 268
267 if (entry->flags & TRACE_FLAG_CONT)
268 trace_seq_print_cont(s, iter);
269
270 return TRACE_TYPE_HANDLED; 269 return TRACE_TYPE_HANDLED;
271} 270}
272 271
@@ -308,21 +307,17 @@ static void __trace_mmiotrace_rw(struct trace_array *tr,
308{ 307{
309 struct ring_buffer_event *event; 308 struct ring_buffer_event *event;
310 struct trace_mmiotrace_rw *entry; 309 struct trace_mmiotrace_rw *entry;
311 unsigned long irq_flags; 310 int pc = preempt_count();
312 311
313 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), 312 event = trace_buffer_lock_reserve(tr, TRACE_MMIO_RW,
314 &irq_flags); 313 sizeof(*entry), 0, pc);
315 if (!event) { 314 if (!event) {
316 atomic_inc(&dropped_count); 315 atomic_inc(&dropped_count);
317 return; 316 return;
318 } 317 }
319 entry = ring_buffer_event_data(event); 318 entry = ring_buffer_event_data(event);
320 tracing_generic_entry_update(&entry->ent, 0, preempt_count());
321 entry->ent.type = TRACE_MMIO_RW;
322 entry->rw = *rw; 319 entry->rw = *rw;
323 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 320 trace_buffer_unlock_commit(tr, event, 0, pc);
324
325 trace_wake_up();
326} 321}
327 322
328void mmio_trace_rw(struct mmiotrace_rw *rw) 323void mmio_trace_rw(struct mmiotrace_rw *rw)
@@ -338,21 +333,17 @@ static void __trace_mmiotrace_map(struct trace_array *tr,
338{ 333{
339 struct ring_buffer_event *event; 334 struct ring_buffer_event *event;
340 struct trace_mmiotrace_map *entry; 335 struct trace_mmiotrace_map *entry;
341 unsigned long irq_flags; 336 int pc = preempt_count();
342 337
343 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), 338 event = trace_buffer_lock_reserve(tr, TRACE_MMIO_MAP,
344 &irq_flags); 339 sizeof(*entry), 0, pc);
345 if (!event) { 340 if (!event) {
346 atomic_inc(&dropped_count); 341 atomic_inc(&dropped_count);
347 return; 342 return;
348 } 343 }
349 entry = ring_buffer_event_data(event); 344 entry = ring_buffer_event_data(event);
350 tracing_generic_entry_update(&entry->ent, 0, preempt_count());
351 entry->ent.type = TRACE_MMIO_MAP;
352 entry->map = *map; 345 entry->map = *map;
353 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 346 trace_buffer_unlock_commit(tr, event, 0, pc);
354
355 trace_wake_up();
356} 347}
357 348
358void mmio_trace_mapping(struct mmiotrace_map *map) 349void mmio_trace_mapping(struct mmiotrace_map *map)
diff --git a/kernel/trace/trace_nop.c b/kernel/trace/trace_nop.c
index b9767acd30ac..9aa84bde23cd 100644
--- a/kernel/trace/trace_nop.c
+++ b/kernel/trace/trace_nop.c
@@ -47,12 +47,7 @@ static void stop_nop_trace(struct trace_array *tr)
47 47
48static int nop_trace_init(struct trace_array *tr) 48static int nop_trace_init(struct trace_array *tr)
49{ 49{
50 int cpu;
51 ctx_trace = tr; 50 ctx_trace = tr;
52
53 for_each_online_cpu(cpu)
54 tracing_reset(tr, cpu);
55
56 start_nop_trace(tr); 51 start_nop_trace(tr);
57 return 0; 52 return 0;
58} 53}
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
new file mode 100644
index 000000000000..9fc815031b09
--- /dev/null
+++ b/kernel/trace/trace_output.c
@@ -0,0 +1,919 @@
1/*
2 * trace_output.c
3 *
4 * Copyright (C) 2008 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
5 *
6 */
7
8#include <linux/module.h>
9#include <linux/mutex.h>
10#include <linux/ftrace.h>
11
12#include "trace_output.h"
13
14/* must be a power of 2 */
15#define EVENT_HASHSIZE 128
16
17static DEFINE_MUTEX(trace_event_mutex);
18static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly;
19
20static int next_event_type = __TRACE_LAST_TYPE + 1;
21
22/**
23 * trace_seq_printf - sequence printing of trace information
24 * @s: trace sequence descriptor
25 * @fmt: printf format string
26 *
27 * The tracer may use either sequence operations or its own
28 * copy to user routines. To simplify formating of a trace
29 * trace_seq_printf is used to store strings into a special
30 * buffer (@s). Then the output may be either used by
31 * the sequencer or pulled into another buffer.
32 */
33int
34trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
35{
36 int len = (PAGE_SIZE - 1) - s->len;
37 va_list ap;
38 int ret;
39
40 if (!len)
41 return 0;
42
43 va_start(ap, fmt);
44 ret = vsnprintf(s->buffer + s->len, len, fmt, ap);
45 va_end(ap);
46
47 /* If we can't write it all, don't bother writing anything */
48 if (ret >= len)
49 return 0;
50
51 s->len += ret;
52
53 return len;
54}
55
56/**
57 * trace_seq_puts - trace sequence printing of simple string
58 * @s: trace sequence descriptor
59 * @str: simple string to record
60 *
61 * The tracer may use either the sequence operations or its own
62 * copy to user routines. This function records a simple string
63 * into a special buffer (@s) for later retrieval by a sequencer
64 * or other mechanism.
65 */
66int trace_seq_puts(struct trace_seq *s, const char *str)
67{
68 int len = strlen(str);
69
70 if (len > ((PAGE_SIZE - 1) - s->len))
71 return 0;
72
73 memcpy(s->buffer + s->len, str, len);
74 s->len += len;
75
76 return len;
77}
78
79int trace_seq_putc(struct trace_seq *s, unsigned char c)
80{
81 if (s->len >= (PAGE_SIZE - 1))
82 return 0;
83
84 s->buffer[s->len++] = c;
85
86 return 1;
87}
88
89int trace_seq_putmem(struct trace_seq *s, void *mem, size_t len)
90{
91 if (len > ((PAGE_SIZE - 1) - s->len))
92 return 0;
93
94 memcpy(s->buffer + s->len, mem, len);
95 s->len += len;
96
97 return len;
98}
99
100int trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len)
101{
102 unsigned char hex[HEX_CHARS];
103 unsigned char *data = mem;
104 int i, j;
105
106#ifdef __BIG_ENDIAN
107 for (i = 0, j = 0; i < len; i++) {
108#else
109 for (i = len-1, j = 0; i >= 0; i--) {
110#endif
111 hex[j++] = hex_asc_hi(data[i]);
112 hex[j++] = hex_asc_lo(data[i]);
113 }
114 hex[j++] = ' ';
115
116 return trace_seq_putmem(s, hex, j);
117}
118
119int trace_seq_path(struct trace_seq *s, struct path *path)
120{
121 unsigned char *p;
122
123 if (s->len >= (PAGE_SIZE - 1))
124 return 0;
125 p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len);
126 if (!IS_ERR(p)) {
127 p = mangle_path(s->buffer + s->len, p, "\n");
128 if (p) {
129 s->len = p - s->buffer;
130 return 1;
131 }
132 } else {
133 s->buffer[s->len++] = '?';
134 return 1;
135 }
136
137 return 0;
138}
139
140#ifdef CONFIG_KRETPROBES
141static inline const char *kretprobed(const char *name)
142{
143 static const char tramp_name[] = "kretprobe_trampoline";
144 int size = sizeof(tramp_name);
145
146 if (strncmp(tramp_name, name, size) == 0)
147 return "[unknown/kretprobe'd]";
148 return name;
149}
150#else
151static inline const char *kretprobed(const char *name)
152{
153 return name;
154}
155#endif /* CONFIG_KRETPROBES */
156
157static int
158seq_print_sym_short(struct trace_seq *s, const char *fmt, unsigned long address)
159{
160#ifdef CONFIG_KALLSYMS
161 char str[KSYM_SYMBOL_LEN];
162 const char *name;
163
164 kallsyms_lookup(address, NULL, NULL, NULL, str);
165
166 name = kretprobed(str);
167
168 return trace_seq_printf(s, fmt, name);
169#endif
170 return 1;
171}
172
173static int
174seq_print_sym_offset(struct trace_seq *s, const char *fmt,
175 unsigned long address)
176{
177#ifdef CONFIG_KALLSYMS
178 char str[KSYM_SYMBOL_LEN];
179 const char *name;
180
181 sprint_symbol(str, address);
182 name = kretprobed(str);
183
184 return trace_seq_printf(s, fmt, name);
185#endif
186 return 1;
187}
188
189#ifndef CONFIG_64BIT
190# define IP_FMT "%08lx"
191#else
192# define IP_FMT "%016lx"
193#endif
194
195int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
196 unsigned long ip, unsigned long sym_flags)
197{
198 struct file *file = NULL;
199 unsigned long vmstart = 0;
200 int ret = 1;
201
202 if (mm) {
203 const struct vm_area_struct *vma;
204
205 down_read(&mm->mmap_sem);
206 vma = find_vma(mm, ip);
207 if (vma) {
208 file = vma->vm_file;
209 vmstart = vma->vm_start;
210 }
211 if (file) {
212 ret = trace_seq_path(s, &file->f_path);
213 if (ret)
214 ret = trace_seq_printf(s, "[+0x%lx]",
215 ip - vmstart);
216 }
217 up_read(&mm->mmap_sem);
218 }
219 if (ret && ((sym_flags & TRACE_ITER_SYM_ADDR) || !file))
220 ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
221 return ret;
222}
223
224int
225seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s,
226 unsigned long sym_flags)
227{
228 struct mm_struct *mm = NULL;
229 int ret = 1;
230 unsigned int i;
231
232 if (trace_flags & TRACE_ITER_SYM_USEROBJ) {
233 struct task_struct *task;
234 /*
235 * we do the lookup on the thread group leader,
236 * since individual threads might have already quit!
237 */
238 rcu_read_lock();
239 task = find_task_by_vpid(entry->ent.tgid);
240 if (task)
241 mm = get_task_mm(task);
242 rcu_read_unlock();
243 }
244
245 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
246 unsigned long ip = entry->caller[i];
247
248 if (ip == ULONG_MAX || !ret)
249 break;
250 if (i && ret)
251 ret = trace_seq_puts(s, " <- ");
252 if (!ip) {
253 if (ret)
254 ret = trace_seq_puts(s, "??");
255 continue;
256 }
257 if (!ret)
258 break;
259 if (ret)
260 ret = seq_print_user_ip(s, mm, ip, sym_flags);
261 }
262
263 if (mm)
264 mmput(mm);
265 return ret;
266}
267
268int
269seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
270{
271 int ret;
272
273 if (!ip)
274 return trace_seq_printf(s, "0");
275
276 if (sym_flags & TRACE_ITER_SYM_OFFSET)
277 ret = seq_print_sym_offset(s, "%s", ip);
278 else
279 ret = seq_print_sym_short(s, "%s", ip);
280
281 if (!ret)
282 return 0;
283
284 if (sym_flags & TRACE_ITER_SYM_ADDR)
285 ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
286 return ret;
287}
288
289static int
290lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
291{
292 int hardirq, softirq;
293 char *comm;
294
295 comm = trace_find_cmdline(entry->pid);
296 hardirq = entry->flags & TRACE_FLAG_HARDIRQ;
297 softirq = entry->flags & TRACE_FLAG_SOFTIRQ;
298
299 if (!trace_seq_printf(s, "%8.8s-%-5d %3d%c%c%c",
300 comm, entry->pid, cpu,
301 (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' :
302 (entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ?
303 'X' : '.',
304 (entry->flags & TRACE_FLAG_NEED_RESCHED) ?
305 'N' : '.',
306 (hardirq && softirq) ? 'H' :
307 hardirq ? 'h' : softirq ? 's' : '.'))
308 return 0;
309
310 if (entry->preempt_count)
311 return trace_seq_printf(s, "%x", entry->preempt_count);
312 return trace_seq_puts(s, ".");
313}
314
315static unsigned long preempt_mark_thresh = 100;
316
317static int
318lat_print_timestamp(struct trace_seq *s, u64 abs_usecs,
319 unsigned long rel_usecs)
320{
321 return trace_seq_printf(s, " %4lldus%c: ", abs_usecs,
322 rel_usecs > preempt_mark_thresh ? '!' :
323 rel_usecs > 1 ? '+' : ' ');
324}
325
326int trace_print_context(struct trace_iterator *iter)
327{
328 struct trace_seq *s = &iter->seq;
329 struct trace_entry *entry = iter->ent;
330 char *comm = trace_find_cmdline(entry->pid);
331 unsigned long long t = ns2usecs(iter->ts);
332 unsigned long usec_rem = do_div(t, USEC_PER_SEC);
333 unsigned long secs = (unsigned long)t;
334
335 return trace_seq_printf(s, "%16s-%-5d [%03d] %5lu.%06lu: ",
336 comm, entry->pid, iter->cpu, secs, usec_rem);
337}
338
339int trace_print_lat_context(struct trace_iterator *iter)
340{
341 u64 next_ts;
342 int ret;
343 struct trace_seq *s = &iter->seq;
344 struct trace_entry *entry = iter->ent,
345 *next_entry = trace_find_next_entry(iter, NULL,
346 &next_ts);
347 unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE);
348 unsigned long abs_usecs = ns2usecs(iter->ts - iter->tr->time_start);
349 unsigned long rel_usecs;
350
351 if (!next_entry)
352 next_ts = iter->ts;
353 rel_usecs = ns2usecs(next_ts - iter->ts);
354
355 if (verbose) {
356 char *comm = trace_find_cmdline(entry->pid);
357 ret = trace_seq_printf(s, "%16s %5d %3d %d %08x %08lx [%08lx]"
358 " %ld.%03ldms (+%ld.%03ldms): ", comm,
359 entry->pid, iter->cpu, entry->flags,
360 entry->preempt_count, iter->idx,
361 ns2usecs(iter->ts),
362 abs_usecs / USEC_PER_MSEC,
363 abs_usecs % USEC_PER_MSEC,
364 rel_usecs / USEC_PER_MSEC,
365 rel_usecs % USEC_PER_MSEC);
366 } else {
367 ret = lat_print_generic(s, entry, iter->cpu);
368 if (ret)
369 ret = lat_print_timestamp(s, abs_usecs, rel_usecs);
370 }
371
372 return ret;
373}
374
375static const char state_to_char[] = TASK_STATE_TO_CHAR_STR;
376
377static int task_state_char(unsigned long state)
378{
379 int bit = state ? __ffs(state) + 1 : 0;
380
381 return bit < sizeof(state_to_char) - 1 ? state_to_char[bit] : '?';
382}
383
384/**
385 * ftrace_find_event - find a registered event
386 * @type: the type of event to look for
387 *
388 * Returns an event of type @type otherwise NULL
389 */
390struct trace_event *ftrace_find_event(int type)
391{
392 struct trace_event *event;
393 struct hlist_node *n;
394 unsigned key;
395
396 key = type & (EVENT_HASHSIZE - 1);
397
398 hlist_for_each_entry_rcu(event, n, &event_hash[key], node) {
399 if (event->type == type)
400 return event;
401 }
402
403 return NULL;
404}
405
406/**
407 * register_ftrace_event - register output for an event type
408 * @event: the event type to register
409 *
410 * Event types are stored in a hash and this hash is used to
411 * find a way to print an event. If the @event->type is set
412 * then it will use that type, otherwise it will assign a
413 * type to use.
414 *
415 * If you assign your own type, please make sure it is added
416 * to the trace_type enum in trace.h, to avoid collisions
417 * with the dynamic types.
418 *
419 * Returns the event type number or zero on error.
420 */
421int register_ftrace_event(struct trace_event *event)
422{
423 unsigned key;
424 int ret = 0;
425
426 mutex_lock(&trace_event_mutex);
427
428 if (!event->type)
429 event->type = next_event_type++;
430 else if (event->type > __TRACE_LAST_TYPE) {
431 printk(KERN_WARNING "Need to add type to trace.h\n");
432 WARN_ON(1);
433 }
434
435 if (ftrace_find_event(event->type))
436 goto out;
437
438 if (event->trace == NULL)
439 event->trace = trace_nop_print;
440 if (event->latency_trace == NULL)
441 event->latency_trace = trace_nop_print;
442 if (event->raw == NULL)
443 event->raw = trace_nop_print;
444 if (event->hex == NULL)
445 event->hex = trace_nop_print;
446 if (event->binary == NULL)
447 event->binary = trace_nop_print;
448
449 key = event->type & (EVENT_HASHSIZE - 1);
450
451 hlist_add_head_rcu(&event->node, &event_hash[key]);
452
453 ret = event->type;
454 out:
455 mutex_unlock(&trace_event_mutex);
456
457 return ret;
458}
459
460/**
461 * unregister_ftrace_event - remove a no longer used event
462 * @event: the event to remove
463 */
464int unregister_ftrace_event(struct trace_event *event)
465{
466 mutex_lock(&trace_event_mutex);
467 hlist_del(&event->node);
468 mutex_unlock(&trace_event_mutex);
469
470 return 0;
471}
472
473/*
474 * Standard events
475 */
476
477enum print_line_t trace_nop_print(struct trace_iterator *iter, int flags)
478{
479 return TRACE_TYPE_HANDLED;
480}
481
482/* TRACE_FN */
483static enum print_line_t trace_fn_latency(struct trace_iterator *iter,
484 int flags)
485{
486 struct ftrace_entry *field;
487 struct trace_seq *s = &iter->seq;
488
489 trace_assign_type(field, iter->ent);
490
491 if (!seq_print_ip_sym(s, field->ip, flags))
492 goto partial;
493 if (!trace_seq_puts(s, " ("))
494 goto partial;
495 if (!seq_print_ip_sym(s, field->parent_ip, flags))
496 goto partial;
497 if (!trace_seq_puts(s, ")\n"))
498 goto partial;
499
500 return TRACE_TYPE_HANDLED;
501
502 partial:
503 return TRACE_TYPE_PARTIAL_LINE;
504}
505
506static enum print_line_t trace_fn_trace(struct trace_iterator *iter, int flags)
507{
508 struct ftrace_entry *field;
509 struct trace_seq *s = &iter->seq;
510
511 trace_assign_type(field, iter->ent);
512
513 if (!seq_print_ip_sym(s, field->ip, flags))
514 goto partial;
515
516 if ((flags & TRACE_ITER_PRINT_PARENT) && field->parent_ip) {
517 if (!trace_seq_printf(s, " <-"))
518 goto partial;
519 if (!seq_print_ip_sym(s,
520 field->parent_ip,
521 flags))
522 goto partial;
523 }
524 if (!trace_seq_printf(s, "\n"))
525 goto partial;
526
527 return TRACE_TYPE_HANDLED;
528
529 partial:
530 return TRACE_TYPE_PARTIAL_LINE;
531}
532
533static enum print_line_t trace_fn_raw(struct trace_iterator *iter, int flags)
534{
535 struct ftrace_entry *field;
536
537 trace_assign_type(field, iter->ent);
538
539 if (!trace_seq_printf(&iter->seq, "%lx %lx\n",
540 field->ip,
541 field->parent_ip))
542 return TRACE_TYPE_PARTIAL_LINE;
543
544 return TRACE_TYPE_HANDLED;
545}
546
547static enum print_line_t trace_fn_hex(struct trace_iterator *iter, int flags)
548{
549 struct ftrace_entry *field;
550 struct trace_seq *s = &iter->seq;
551
552 trace_assign_type(field, iter->ent);
553
554 SEQ_PUT_HEX_FIELD_RET(s, field->ip);
555 SEQ_PUT_HEX_FIELD_RET(s, field->parent_ip);
556
557 return TRACE_TYPE_HANDLED;
558}
559
560static enum print_line_t trace_fn_bin(struct trace_iterator *iter, int flags)
561{
562 struct ftrace_entry *field;
563 struct trace_seq *s = &iter->seq;
564
565 trace_assign_type(field, iter->ent);
566
567 SEQ_PUT_FIELD_RET(s, field->ip);
568 SEQ_PUT_FIELD_RET(s, field->parent_ip);
569
570 return TRACE_TYPE_HANDLED;
571}
572
573static struct trace_event trace_fn_event = {
574 .type = TRACE_FN,
575 .trace = trace_fn_trace,
576 .latency_trace = trace_fn_latency,
577 .raw = trace_fn_raw,
578 .hex = trace_fn_hex,
579 .binary = trace_fn_bin,
580};
581
582/* TRACE_CTX an TRACE_WAKE */
583static enum print_line_t trace_ctxwake_print(struct trace_iterator *iter,
584 char *delim)
585{
586 struct ctx_switch_entry *field;
587 char *comm;
588 int S, T;
589
590 trace_assign_type(field, iter->ent);
591
592 T = task_state_char(field->next_state);
593 S = task_state_char(field->prev_state);
594 comm = trace_find_cmdline(field->next_pid);
595 if (!trace_seq_printf(&iter->seq,
596 " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n",
597 field->prev_pid,
598 field->prev_prio,
599 S, delim,
600 field->next_cpu,
601 field->next_pid,
602 field->next_prio,
603 T, comm))
604 return TRACE_TYPE_PARTIAL_LINE;
605
606 return TRACE_TYPE_HANDLED;
607}
608
609static enum print_line_t trace_ctx_print(struct trace_iterator *iter, int flags)
610{
611 return trace_ctxwake_print(iter, "==>");
612}
613
614static enum print_line_t trace_wake_print(struct trace_iterator *iter,
615 int flags)
616{
617 return trace_ctxwake_print(iter, " +");
618}
619
620static int trace_ctxwake_raw(struct trace_iterator *iter, char S)
621{
622 struct ctx_switch_entry *field;
623 int T;
624
625 trace_assign_type(field, iter->ent);
626
627 if (!S)
628 task_state_char(field->prev_state);
629 T = task_state_char(field->next_state);
630 if (!trace_seq_printf(&iter->seq, "%d %d %c %d %d %d %c\n",
631 field->prev_pid,
632 field->prev_prio,
633 S,
634 field->next_cpu,
635 field->next_pid,
636 field->next_prio,
637 T))
638 return TRACE_TYPE_PARTIAL_LINE;
639
640 return TRACE_TYPE_HANDLED;
641}
642
643static enum print_line_t trace_ctx_raw(struct trace_iterator *iter, int flags)
644{
645 return trace_ctxwake_raw(iter, 0);
646}
647
648static enum print_line_t trace_wake_raw(struct trace_iterator *iter, int flags)
649{
650 return trace_ctxwake_raw(iter, '+');
651}
652
653
654static int trace_ctxwake_hex(struct trace_iterator *iter, char S)
655{
656 struct ctx_switch_entry *field;
657 struct trace_seq *s = &iter->seq;
658 int T;
659
660 trace_assign_type(field, iter->ent);
661
662 if (!S)
663 task_state_char(field->prev_state);
664 T = task_state_char(field->next_state);
665
666 SEQ_PUT_HEX_FIELD_RET(s, field->prev_pid);
667 SEQ_PUT_HEX_FIELD_RET(s, field->prev_prio);
668 SEQ_PUT_HEX_FIELD_RET(s, S);
669 SEQ_PUT_HEX_FIELD_RET(s, field->next_cpu);
670 SEQ_PUT_HEX_FIELD_RET(s, field->next_pid);
671 SEQ_PUT_HEX_FIELD_RET(s, field->next_prio);
672 SEQ_PUT_HEX_FIELD_RET(s, T);
673
674 return TRACE_TYPE_HANDLED;
675}
676
677static enum print_line_t trace_ctx_hex(struct trace_iterator *iter, int flags)
678{
679 return trace_ctxwake_hex(iter, 0);
680}
681
682static enum print_line_t trace_wake_hex(struct trace_iterator *iter, int flags)
683{
684 return trace_ctxwake_hex(iter, '+');
685}
686
687static enum print_line_t trace_ctxwake_bin(struct trace_iterator *iter,
688 int flags)
689{
690 struct ctx_switch_entry *field;
691 struct trace_seq *s = &iter->seq;
692
693 trace_assign_type(field, iter->ent);
694
695 SEQ_PUT_FIELD_RET(s, field->prev_pid);
696 SEQ_PUT_FIELD_RET(s, field->prev_prio);
697 SEQ_PUT_FIELD_RET(s, field->prev_state);
698 SEQ_PUT_FIELD_RET(s, field->next_pid);
699 SEQ_PUT_FIELD_RET(s, field->next_prio);
700 SEQ_PUT_FIELD_RET(s, field->next_state);
701
702 return TRACE_TYPE_HANDLED;
703}
704
705static struct trace_event trace_ctx_event = {
706 .type = TRACE_CTX,
707 .trace = trace_ctx_print,
708 .latency_trace = trace_ctx_print,
709 .raw = trace_ctx_raw,
710 .hex = trace_ctx_hex,
711 .binary = trace_ctxwake_bin,
712};
713
714static struct trace_event trace_wake_event = {
715 .type = TRACE_WAKE,
716 .trace = trace_wake_print,
717 .latency_trace = trace_wake_print,
718 .raw = trace_wake_raw,
719 .hex = trace_wake_hex,
720 .binary = trace_ctxwake_bin,
721};
722
723/* TRACE_SPECIAL */
724static enum print_line_t trace_special_print(struct trace_iterator *iter,
725 int flags)
726{
727 struct special_entry *field;
728
729 trace_assign_type(field, iter->ent);
730
731 if (!trace_seq_printf(&iter->seq, "# %ld %ld %ld\n",
732 field->arg1,
733 field->arg2,
734 field->arg3))
735 return TRACE_TYPE_PARTIAL_LINE;
736
737 return TRACE_TYPE_HANDLED;
738}
739
740static enum print_line_t trace_special_hex(struct trace_iterator *iter,
741 int flags)
742{
743 struct special_entry *field;
744 struct trace_seq *s = &iter->seq;
745
746 trace_assign_type(field, iter->ent);
747
748 SEQ_PUT_HEX_FIELD_RET(s, field->arg1);
749 SEQ_PUT_HEX_FIELD_RET(s, field->arg2);
750 SEQ_PUT_HEX_FIELD_RET(s, field->arg3);
751
752 return TRACE_TYPE_HANDLED;
753}
754
755static enum print_line_t trace_special_bin(struct trace_iterator *iter,
756 int flags)
757{
758 struct special_entry *field;
759 struct trace_seq *s = &iter->seq;
760
761 trace_assign_type(field, iter->ent);
762
763 SEQ_PUT_FIELD_RET(s, field->arg1);
764 SEQ_PUT_FIELD_RET(s, field->arg2);
765 SEQ_PUT_FIELD_RET(s, field->arg3);
766
767 return TRACE_TYPE_HANDLED;
768}
769
770static struct trace_event trace_special_event = {
771 .type = TRACE_SPECIAL,
772 .trace = trace_special_print,
773 .latency_trace = trace_special_print,
774 .raw = trace_special_print,
775 .hex = trace_special_hex,
776 .binary = trace_special_bin,
777};
778
779/* TRACE_STACK */
780
781static enum print_line_t trace_stack_print(struct trace_iterator *iter,
782 int flags)
783{
784 struct stack_entry *field;
785 struct trace_seq *s = &iter->seq;
786 int i;
787
788 trace_assign_type(field, iter->ent);
789
790 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
791 if (i) {
792 if (!trace_seq_puts(s, " <= "))
793 goto partial;
794
795 if (!seq_print_ip_sym(s, field->caller[i], flags))
796 goto partial;
797 }
798 if (!trace_seq_puts(s, "\n"))
799 goto partial;
800 }
801
802 return TRACE_TYPE_HANDLED;
803
804 partial:
805 return TRACE_TYPE_PARTIAL_LINE;
806}
807
808static struct trace_event trace_stack_event = {
809 .type = TRACE_STACK,
810 .trace = trace_stack_print,
811 .latency_trace = trace_stack_print,
812 .raw = trace_special_print,
813 .hex = trace_special_hex,
814 .binary = trace_special_bin,
815};
816
817/* TRACE_USER_STACK */
818static enum print_line_t trace_user_stack_print(struct trace_iterator *iter,
819 int flags)
820{
821 struct userstack_entry *field;
822 struct trace_seq *s = &iter->seq;
823
824 trace_assign_type(field, iter->ent);
825
826 if (!seq_print_userip_objs(field, s, flags))
827 goto partial;
828
829 if (!trace_seq_putc(s, '\n'))
830 goto partial;
831
832 return TRACE_TYPE_HANDLED;
833
834 partial:
835 return TRACE_TYPE_PARTIAL_LINE;
836}
837
838static struct trace_event trace_user_stack_event = {
839 .type = TRACE_USER_STACK,
840 .trace = trace_user_stack_print,
841 .latency_trace = trace_user_stack_print,
842 .raw = trace_special_print,
843 .hex = trace_special_hex,
844 .binary = trace_special_bin,
845};
846
847/* TRACE_PRINT */
848static enum print_line_t trace_print_print(struct trace_iterator *iter,
849 int flags)
850{
851 struct print_entry *field;
852 struct trace_seq *s = &iter->seq;
853
854 trace_assign_type(field, iter->ent);
855
856 if (!seq_print_ip_sym(s, field->ip, flags))
857 goto partial;
858
859 if (!trace_seq_printf(s, ": %s", field->buf))
860 goto partial;
861
862 return TRACE_TYPE_HANDLED;
863
864 partial:
865 return TRACE_TYPE_PARTIAL_LINE;
866}
867
868static enum print_line_t trace_print_raw(struct trace_iterator *iter, int flags)
869{
870 struct print_entry *field;
871
872 trace_assign_type(field, iter->ent);
873
874 if (!trace_seq_printf(&iter->seq, "# %lx %s", field->ip, field->buf))
875 goto partial;
876
877 return TRACE_TYPE_HANDLED;
878
879 partial:
880 return TRACE_TYPE_PARTIAL_LINE;
881}
882
883static struct trace_event trace_print_event = {
884 .type = TRACE_PRINT,
885 .trace = trace_print_print,
886 .latency_trace = trace_print_print,
887 .raw = trace_print_raw,
888};
889
890static struct trace_event *events[] __initdata = {
891 &trace_fn_event,
892 &trace_ctx_event,
893 &trace_wake_event,
894 &trace_special_event,
895 &trace_stack_event,
896 &trace_user_stack_event,
897 &trace_print_event,
898 NULL
899};
900
901__init static int init_events(void)
902{
903 struct trace_event *event;
904 int i, ret;
905
906 for (i = 0; events[i]; i++) {
907 event = events[i];
908
909 ret = register_ftrace_event(event);
910 if (!ret) {
911 printk(KERN_WARNING "event %d failed to register\n",
912 event->type);
913 WARN_ON_ONCE(1);
914 }
915 }
916
917 return 0;
918}
919device_initcall(init_events);
diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h
new file mode 100644
index 000000000000..551a25a72217
--- /dev/null
+++ b/kernel/trace/trace_output.h
@@ -0,0 +1,62 @@
1#ifndef __TRACE_EVENTS_H
2#define __TRACE_EVENTS_H
3
4#include "trace.h"
5
6typedef enum print_line_t (*trace_print_func)(struct trace_iterator *iter,
7 int flags);
8
9struct trace_event {
10 struct hlist_node node;
11 int type;
12 trace_print_func trace;
13 trace_print_func latency_trace;
14 trace_print_func raw;
15 trace_print_func hex;
16 trace_print_func binary;
17};
18
19extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
20 __attribute__ ((format (printf, 2, 3)));
21extern int
22seq_print_ip_sym(struct trace_seq *s, unsigned long ip,
23 unsigned long sym_flags);
24extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
25 size_t cnt);
26int trace_seq_puts(struct trace_seq *s, const char *str);
27int trace_seq_putc(struct trace_seq *s, unsigned char c);
28int trace_seq_putmem(struct trace_seq *s, void *mem, size_t len);
29int trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len);
30int trace_seq_path(struct trace_seq *s, struct path *path);
31int seq_print_userip_objs(const struct userstack_entry *entry,
32 struct trace_seq *s, unsigned long sym_flags);
33int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
34 unsigned long ip, unsigned long sym_flags);
35
36int trace_print_context(struct trace_iterator *iter);
37int trace_print_lat_context(struct trace_iterator *iter);
38
39struct trace_event *ftrace_find_event(int type);
40int register_ftrace_event(struct trace_event *event);
41int unregister_ftrace_event(struct trace_event *event);
42
43enum print_line_t trace_nop_print(struct trace_iterator *iter, int flags);
44
45#define MAX_MEMHEX_BYTES 8
46#define HEX_CHARS (MAX_MEMHEX_BYTES*2 + 1)
47
48#define SEQ_PUT_FIELD_RET(s, x) \
49do { \
50 if (!trace_seq_putmem(s, &(x), sizeof(x))) \
51 return TRACE_TYPE_PARTIAL_LINE; \
52} while (0)
53
54#define SEQ_PUT_HEX_FIELD_RET(s, x) \
55do { \
56 BUILD_BUG_ON(sizeof(x) > MAX_MEMHEX_BYTES); \
57 if (!trace_seq_putmem_hex(s, &(x), sizeof(x))) \
58 return TRACE_TYPE_PARTIAL_LINE; \
59} while (0)
60
61#endif
62
diff --git a/kernel/trace/trace_power.c b/kernel/trace/trace_power.c
index 7bda248daf55..91ce672fb037 100644
--- a/kernel/trace/trace_power.c
+++ b/kernel/trace/trace_power.c
@@ -11,24 +11,126 @@
11 11
12#include <linux/init.h> 12#include <linux/init.h>
13#include <linux/debugfs.h> 13#include <linux/debugfs.h>
14#include <linux/ftrace.h> 14#include <trace/power.h>
15#include <linux/kallsyms.h> 15#include <linux/kallsyms.h>
16#include <linux/module.h> 16#include <linux/module.h>
17 17
18#include "trace.h" 18#include "trace.h"
19#include "trace_output.h"
19 20
20static struct trace_array *power_trace; 21static struct trace_array *power_trace;
21static int __read_mostly trace_power_enabled; 22static int __read_mostly trace_power_enabled;
22 23
24static void probe_power_start(struct power_trace *it, unsigned int type,
25 unsigned int level)
26{
27 if (!trace_power_enabled)
28 return;
29
30 memset(it, 0, sizeof(struct power_trace));
31 it->state = level;
32 it->type = type;
33 it->stamp = ktime_get();
34}
35
36
37static void probe_power_end(struct power_trace *it)
38{
39 struct ring_buffer_event *event;
40 struct trace_power *entry;
41 struct trace_array_cpu *data;
42 struct trace_array *tr = power_trace;
43
44 if (!trace_power_enabled)
45 return;
46
47 preempt_disable();
48 it->end = ktime_get();
49 data = tr->data[smp_processor_id()];
50
51 event = trace_buffer_lock_reserve(tr, TRACE_POWER,
52 sizeof(*entry), 0, 0);
53 if (!event)
54 goto out;
55 entry = ring_buffer_event_data(event);
56 entry->state_data = *it;
57 trace_buffer_unlock_commit(tr, event, 0, 0);
58 out:
59 preempt_enable();
60}
61
62static void probe_power_mark(struct power_trace *it, unsigned int type,
63 unsigned int level)
64{
65 struct ring_buffer_event *event;
66 struct trace_power *entry;
67 struct trace_array_cpu *data;
68 struct trace_array *tr = power_trace;
69
70 if (!trace_power_enabled)
71 return;
72
73 memset(it, 0, sizeof(struct power_trace));
74 it->state = level;
75 it->type = type;
76 it->stamp = ktime_get();
77 preempt_disable();
78 it->end = it->stamp;
79 data = tr->data[smp_processor_id()];
80
81 event = trace_buffer_lock_reserve(tr, TRACE_POWER,
82 sizeof(*entry), 0, 0);
83 if (!event)
84 goto out;
85 entry = ring_buffer_event_data(event);
86 entry->state_data = *it;
87 trace_buffer_unlock_commit(tr, event, 0, 0);
88 out:
89 preempt_enable();
90}
91
92static int tracing_power_register(void)
93{
94 int ret;
95
96 ret = register_trace_power_start(probe_power_start);
97 if (ret) {
98 pr_info("power trace: Couldn't activate tracepoint"
99 " probe to trace_power_start\n");
100 return ret;
101 }
102 ret = register_trace_power_end(probe_power_end);
103 if (ret) {
104 pr_info("power trace: Couldn't activate tracepoint"
105 " probe to trace_power_end\n");
106 goto fail_start;
107 }
108 ret = register_trace_power_mark(probe_power_mark);
109 if (ret) {
110 pr_info("power trace: Couldn't activate tracepoint"
111 " probe to trace_power_mark\n");
112 goto fail_end;
113 }
114 return ret;
115fail_end:
116 unregister_trace_power_end(probe_power_end);
117fail_start:
118 unregister_trace_power_start(probe_power_start);
119 return ret;
120}
23 121
24static void start_power_trace(struct trace_array *tr) 122static void start_power_trace(struct trace_array *tr)
25{ 123{
26 trace_power_enabled = 1; 124 trace_power_enabled = 1;
125 tracing_power_register();
27} 126}
28 127
29static void stop_power_trace(struct trace_array *tr) 128static void stop_power_trace(struct trace_array *tr)
30{ 129{
31 trace_power_enabled = 0; 130 trace_power_enabled = 0;
131 unregister_trace_power_start(probe_power_start);
132 unregister_trace_power_end(probe_power_end);
133 unregister_trace_power_mark(probe_power_mark);
32} 134}
33 135
34 136
@@ -38,6 +140,7 @@ static int power_trace_init(struct trace_array *tr)
38 power_trace = tr; 140 power_trace = tr;
39 141
40 trace_power_enabled = 1; 142 trace_power_enabled = 1;
143 tracing_power_register();
41 144
42 for_each_cpu(cpu, cpu_possible_mask) 145 for_each_cpu(cpu, cpu_possible_mask)
43 tracing_reset(tr, cpu); 146 tracing_reset(tr, cpu);
@@ -94,86 +197,3 @@ static int init_power_trace(void)
94 return register_tracer(&power_tracer); 197 return register_tracer(&power_tracer);
95} 198}
96device_initcall(init_power_trace); 199device_initcall(init_power_trace);
97
98void trace_power_start(struct power_trace *it, unsigned int type,
99 unsigned int level)
100{
101 if (!trace_power_enabled)
102 return;
103
104 memset(it, 0, sizeof(struct power_trace));
105 it->state = level;
106 it->type = type;
107 it->stamp = ktime_get();
108}
109EXPORT_SYMBOL_GPL(trace_power_start);
110
111
112void trace_power_end(struct power_trace *it)
113{
114 struct ring_buffer_event *event;
115 struct trace_power *entry;
116 struct trace_array_cpu *data;
117 unsigned long irq_flags;
118 struct trace_array *tr = power_trace;
119
120 if (!trace_power_enabled)
121 return;
122
123 preempt_disable();
124 it->end = ktime_get();
125 data = tr->data[smp_processor_id()];
126
127 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
128 &irq_flags);
129 if (!event)
130 goto out;
131 entry = ring_buffer_event_data(event);
132 tracing_generic_entry_update(&entry->ent, 0, 0);
133 entry->ent.type = TRACE_POWER;
134 entry->state_data = *it;
135 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
136
137 trace_wake_up();
138
139 out:
140 preempt_enable();
141}
142EXPORT_SYMBOL_GPL(trace_power_end);
143
144void trace_power_mark(struct power_trace *it, unsigned int type,
145 unsigned int level)
146{
147 struct ring_buffer_event *event;
148 struct trace_power *entry;
149 struct trace_array_cpu *data;
150 unsigned long irq_flags;
151 struct trace_array *tr = power_trace;
152
153 if (!trace_power_enabled)
154 return;
155
156 memset(it, 0, sizeof(struct power_trace));
157 it->state = level;
158 it->type = type;
159 it->stamp = ktime_get();
160 preempt_disable();
161 it->end = it->stamp;
162 data = tr->data[smp_processor_id()];
163
164 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
165 &irq_flags);
166 if (!event)
167 goto out;
168 entry = ring_buffer_event_data(event);
169 tracing_generic_entry_update(&entry->ent, 0, 0);
170 entry->ent.type = TRACE_POWER;
171 entry->state_data = *it;
172 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
173
174 trace_wake_up();
175
176 out:
177 preempt_enable();
178}
179EXPORT_SYMBOL_GPL(trace_power_mark);
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index df175cb4564f..30e14fe85896 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -43,7 +43,7 @@ probe_sched_switch(struct rq *__rq, struct task_struct *prev,
43 data = ctx_trace->data[cpu]; 43 data = ctx_trace->data[cpu];
44 44
45 if (likely(!atomic_read(&data->disabled))) 45 if (likely(!atomic_read(&data->disabled)))
46 tracing_sched_switch_trace(ctx_trace, data, prev, next, flags, pc); 46 tracing_sched_switch_trace(ctx_trace, prev, next, flags, pc);
47 47
48 local_irq_restore(flags); 48 local_irq_restore(flags);
49} 49}
@@ -66,7 +66,7 @@ probe_sched_wakeup(struct rq *__rq, struct task_struct *wakee, int success)
66 data = ctx_trace->data[cpu]; 66 data = ctx_trace->data[cpu];
67 67
68 if (likely(!atomic_read(&data->disabled))) 68 if (likely(!atomic_read(&data->disabled)))
69 tracing_sched_wakeup_trace(ctx_trace, data, wakee, current, 69 tracing_sched_wakeup_trace(ctx_trace, wakee, current,
70 flags, pc); 70 flags, pc);
71 71
72 local_irq_restore(flags); 72 local_irq_restore(flags);
@@ -185,12 +185,6 @@ void tracing_sched_switch_assign_trace(struct trace_array *tr)
185 ctx_trace = tr; 185 ctx_trace = tr;
186} 186}
187 187
188static void start_sched_trace(struct trace_array *tr)
189{
190 tracing_reset_online_cpus(tr);
191 tracing_start_sched_switch_record();
192}
193
194static void stop_sched_trace(struct trace_array *tr) 188static void stop_sched_trace(struct trace_array *tr)
195{ 189{
196 tracing_stop_sched_switch_record(); 190 tracing_stop_sched_switch_record();
@@ -199,7 +193,7 @@ static void stop_sched_trace(struct trace_array *tr)
199static int sched_switch_trace_init(struct trace_array *tr) 193static int sched_switch_trace_init(struct trace_array *tr)
200{ 194{
201 ctx_trace = tr; 195 ctx_trace = tr;
202 start_sched_trace(tr); 196 tracing_start_sched_switch_record();
203 return 0; 197 return 0;
204} 198}
205 199
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 42ae1e77b6b3..96d716485898 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -25,6 +25,7 @@ static int __read_mostly tracer_enabled;
25static struct task_struct *wakeup_task; 25static struct task_struct *wakeup_task;
26static int wakeup_cpu; 26static int wakeup_cpu;
27static unsigned wakeup_prio = -1; 27static unsigned wakeup_prio = -1;
28static int wakeup_rt;
28 29
29static raw_spinlock_t wakeup_lock = 30static raw_spinlock_t wakeup_lock =
30 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 31 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
@@ -71,7 +72,7 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
71 if (task_cpu(wakeup_task) != cpu) 72 if (task_cpu(wakeup_task) != cpu)
72 goto unlock; 73 goto unlock;
73 74
74 trace_function(tr, data, ip, parent_ip, flags, pc); 75 trace_function(tr, ip, parent_ip, flags, pc);
75 76
76 unlock: 77 unlock:
77 __raw_spin_unlock(&wakeup_lock); 78 __raw_spin_unlock(&wakeup_lock);
@@ -151,7 +152,8 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
151 if (unlikely(!tracer_enabled || next != wakeup_task)) 152 if (unlikely(!tracer_enabled || next != wakeup_task))
152 goto out_unlock; 153 goto out_unlock;
153 154
154 trace_function(wakeup_trace, data, CALLER_ADDR1, CALLER_ADDR2, flags, pc); 155 trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc);
156 tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc);
155 157
156 /* 158 /*
157 * usecs conversion is slow so we try to delay the conversion 159 * usecs conversion is slow so we try to delay the conversion
@@ -182,13 +184,10 @@ out:
182 184
183static void __wakeup_reset(struct trace_array *tr) 185static void __wakeup_reset(struct trace_array *tr)
184{ 186{
185 struct trace_array_cpu *data;
186 int cpu; 187 int cpu;
187 188
188 for_each_possible_cpu(cpu) { 189 for_each_possible_cpu(cpu)
189 data = tr->data[cpu];
190 tracing_reset(tr, cpu); 190 tracing_reset(tr, cpu);
191 }
192 191
193 wakeup_cpu = -1; 192 wakeup_cpu = -1;
194 wakeup_prio = -1; 193 wakeup_prio = -1;
@@ -213,6 +212,7 @@ static void wakeup_reset(struct trace_array *tr)
213static void 212static void
214probe_wakeup(struct rq *rq, struct task_struct *p, int success) 213probe_wakeup(struct rq *rq, struct task_struct *p, int success)
215{ 214{
215 struct trace_array_cpu *data;
216 int cpu = smp_processor_id(); 216 int cpu = smp_processor_id();
217 unsigned long flags; 217 unsigned long flags;
218 long disabled; 218 long disabled;
@@ -224,7 +224,7 @@ probe_wakeup(struct rq *rq, struct task_struct *p, int success)
224 tracing_record_cmdline(p); 224 tracing_record_cmdline(p);
225 tracing_record_cmdline(current); 225 tracing_record_cmdline(current);
226 226
227 if (likely(!rt_task(p)) || 227 if ((wakeup_rt && !rt_task(p)) ||
228 p->prio >= wakeup_prio || 228 p->prio >= wakeup_prio ||
229 p->prio >= current->prio) 229 p->prio >= current->prio)
230 return; 230 return;
@@ -252,9 +252,10 @@ probe_wakeup(struct rq *rq, struct task_struct *p, int success)
252 252
253 local_save_flags(flags); 253 local_save_flags(flags);
254 254
255 wakeup_trace->data[wakeup_cpu]->preempt_timestamp = ftrace_now(cpu); 255 data = wakeup_trace->data[wakeup_cpu];
256 trace_function(wakeup_trace, wakeup_trace->data[wakeup_cpu], 256 data->preempt_timestamp = ftrace_now(cpu);
257 CALLER_ADDR1, CALLER_ADDR2, flags, pc); 257 tracing_sched_wakeup_trace(wakeup_trace, p, current, flags, pc);
258 trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc);
258 259
259out_locked: 260out_locked:
260 __raw_spin_unlock(&wakeup_lock); 261 __raw_spin_unlock(&wakeup_lock);
@@ -262,12 +263,6 @@ out:
262 atomic_dec(&wakeup_trace->data[cpu]->disabled); 263 atomic_dec(&wakeup_trace->data[cpu]->disabled);
263} 264}
264 265
265/*
266 * save_tracer_enabled is used to save the state of the tracer_enabled
267 * variable when we disable it when we open a trace output file.
268 */
269static int save_tracer_enabled;
270
271static void start_wakeup_tracer(struct trace_array *tr) 266static void start_wakeup_tracer(struct trace_array *tr)
272{ 267{
273 int ret; 268 int ret;
@@ -306,13 +301,10 @@ static void start_wakeup_tracer(struct trace_array *tr)
306 301
307 register_ftrace_function(&trace_ops); 302 register_ftrace_function(&trace_ops);
308 303
309 if (tracing_is_enabled()) { 304 if (tracing_is_enabled())
310 tracer_enabled = 1; 305 tracer_enabled = 1;
311 save_tracer_enabled = 1; 306 else
312 } else {
313 tracer_enabled = 0; 307 tracer_enabled = 0;
314 save_tracer_enabled = 0;
315 }
316 308
317 return; 309 return;
318fail_deprobe_wake_new: 310fail_deprobe_wake_new:
@@ -324,14 +316,13 @@ fail_deprobe:
324static void stop_wakeup_tracer(struct trace_array *tr) 316static void stop_wakeup_tracer(struct trace_array *tr)
325{ 317{
326 tracer_enabled = 0; 318 tracer_enabled = 0;
327 save_tracer_enabled = 0;
328 unregister_ftrace_function(&trace_ops); 319 unregister_ftrace_function(&trace_ops);
329 unregister_trace_sched_switch(probe_wakeup_sched_switch); 320 unregister_trace_sched_switch(probe_wakeup_sched_switch);
330 unregister_trace_sched_wakeup_new(probe_wakeup); 321 unregister_trace_sched_wakeup_new(probe_wakeup);
331 unregister_trace_sched_wakeup(probe_wakeup); 322 unregister_trace_sched_wakeup(probe_wakeup);
332} 323}
333 324
334static int wakeup_tracer_init(struct trace_array *tr) 325static int __wakeup_tracer_init(struct trace_array *tr)
335{ 326{
336 tracing_max_latency = 0; 327 tracing_max_latency = 0;
337 wakeup_trace = tr; 328 wakeup_trace = tr;
@@ -339,6 +330,18 @@ static int wakeup_tracer_init(struct trace_array *tr)
339 return 0; 330 return 0;
340} 331}
341 332
333static int wakeup_tracer_init(struct trace_array *tr)
334{
335 wakeup_rt = 0;
336 return __wakeup_tracer_init(tr);
337}
338
339static int wakeup_rt_tracer_init(struct trace_array *tr)
340{
341 wakeup_rt = 1;
342 return __wakeup_tracer_init(tr);
343}
344
342static void wakeup_tracer_reset(struct trace_array *tr) 345static void wakeup_tracer_reset(struct trace_array *tr)
343{ 346{
344 stop_wakeup_tracer(tr); 347 stop_wakeup_tracer(tr);
@@ -350,28 +353,11 @@ static void wakeup_tracer_start(struct trace_array *tr)
350{ 353{
351 wakeup_reset(tr); 354 wakeup_reset(tr);
352 tracer_enabled = 1; 355 tracer_enabled = 1;
353 save_tracer_enabled = 1;
354} 356}
355 357
356static void wakeup_tracer_stop(struct trace_array *tr) 358static void wakeup_tracer_stop(struct trace_array *tr)
357{ 359{
358 tracer_enabled = 0; 360 tracer_enabled = 0;
359 save_tracer_enabled = 0;
360}
361
362static void wakeup_tracer_open(struct trace_iterator *iter)
363{
364 /* stop the trace while dumping */
365 tracer_enabled = 0;
366}
367
368static void wakeup_tracer_close(struct trace_iterator *iter)
369{
370 /* forget about any processes we were recording */
371 if (save_tracer_enabled) {
372 wakeup_reset(iter->tr);
373 tracer_enabled = 1;
374 }
375} 361}
376 362
377static struct tracer wakeup_tracer __read_mostly = 363static struct tracer wakeup_tracer __read_mostly =
@@ -381,8 +367,19 @@ static struct tracer wakeup_tracer __read_mostly =
381 .reset = wakeup_tracer_reset, 367 .reset = wakeup_tracer_reset,
382 .start = wakeup_tracer_start, 368 .start = wakeup_tracer_start,
383 .stop = wakeup_tracer_stop, 369 .stop = wakeup_tracer_stop,
384 .open = wakeup_tracer_open, 370 .print_max = 1,
385 .close = wakeup_tracer_close, 371#ifdef CONFIG_FTRACE_SELFTEST
372 .selftest = trace_selftest_startup_wakeup,
373#endif
374};
375
376static struct tracer wakeup_rt_tracer __read_mostly =
377{
378 .name = "wakeup_rt",
379 .init = wakeup_rt_tracer_init,
380 .reset = wakeup_tracer_reset,
381 .start = wakeup_tracer_start,
382 .stop = wakeup_tracer_stop,
386 .print_max = 1, 383 .print_max = 1,
387#ifdef CONFIG_FTRACE_SELFTEST 384#ifdef CONFIG_FTRACE_SELFTEST
388 .selftest = trace_selftest_startup_wakeup, 385 .selftest = trace_selftest_startup_wakeup,
@@ -397,6 +394,10 @@ __init static int init_wakeup_tracer(void)
397 if (ret) 394 if (ret)
398 return ret; 395 return ret;
399 396
397 ret = register_tracer(&wakeup_rt_tracer);
398 if (ret)
399 return ret;
400
400 return 0; 401 return 0;
401} 402}
402device_initcall(init_wakeup_tracer); 403device_initcall(init_wakeup_tracer);
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 88c8eb70f54a..0c9aa1457e51 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -9,11 +9,12 @@ static inline int trace_valid_entry(struct trace_entry *entry)
9 case TRACE_FN: 9 case TRACE_FN:
10 case TRACE_CTX: 10 case TRACE_CTX:
11 case TRACE_WAKE: 11 case TRACE_WAKE:
12 case TRACE_CONT:
13 case TRACE_STACK: 12 case TRACE_STACK:
14 case TRACE_PRINT: 13 case TRACE_PRINT:
15 case TRACE_SPECIAL: 14 case TRACE_SPECIAL:
16 case TRACE_BRANCH: 15 case TRACE_BRANCH:
16 case TRACE_GRAPH_ENT:
17 case TRACE_GRAPH_RET:
17 return 1; 18 return 1;
18 } 19 }
19 return 0; 20 return 0;
@@ -116,7 +117,7 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
116 ftrace_set_filter(func_name, strlen(func_name), 1); 117 ftrace_set_filter(func_name, strlen(func_name), 1);
117 118
118 /* enable tracing */ 119 /* enable tracing */
119 ret = trace->init(tr); 120 ret = tracer_init(trace, tr);
120 if (ret) { 121 if (ret) {
121 warn_failed_init_tracer(trace, ret); 122 warn_failed_init_tracer(trace, ret);
122 goto out; 123 goto out;
@@ -190,7 +191,7 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
190 ftrace_enabled = 1; 191 ftrace_enabled = 1;
191 tracer_enabled = 1; 192 tracer_enabled = 1;
192 193
193 ret = trace->init(tr); 194 ret = tracer_init(trace, tr);
194 if (ret) { 195 if (ret) {
195 warn_failed_init_tracer(trace, ret); 196 warn_failed_init_tracer(trace, ret);
196 goto out; 197 goto out;
@@ -228,6 +229,54 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
228} 229}
229#endif /* CONFIG_FUNCTION_TRACER */ 230#endif /* CONFIG_FUNCTION_TRACER */
230 231
232
233#ifdef CONFIG_FUNCTION_GRAPH_TRACER
234/*
235 * Pretty much the same than for the function tracer from which the selftest
236 * has been borrowed.
237 */
238int
239trace_selftest_startup_function_graph(struct tracer *trace,
240 struct trace_array *tr)
241{
242 int ret;
243 unsigned long count;
244
245 ret = tracer_init(trace, tr);
246 if (ret) {
247 warn_failed_init_tracer(trace, ret);
248 goto out;
249 }
250
251 /* Sleep for a 1/10 of a second */
252 msleep(100);
253
254 tracing_stop();
255
256 /* check the trace buffer */
257 ret = trace_test_buffer(tr, &count);
258
259 trace->reset(tr);
260 tracing_start();
261
262 if (!ret && !count) {
263 printk(KERN_CONT ".. no entries found ..");
264 ret = -1;
265 goto out;
266 }
267
268 /* Don't test dynamic tracing, the function tracer already did */
269
270out:
271 /* Stop it if we failed */
272 if (ret)
273 ftrace_graph_stop();
274
275 return ret;
276}
277#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
278
279
231#ifdef CONFIG_IRQSOFF_TRACER 280#ifdef CONFIG_IRQSOFF_TRACER
232int 281int
233trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr) 282trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr)
@@ -237,7 +286,7 @@ trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr)
237 int ret; 286 int ret;
238 287
239 /* start the tracing */ 288 /* start the tracing */
240 ret = trace->init(tr); 289 ret = tracer_init(trace, tr);
241 if (ret) { 290 if (ret) {
242 warn_failed_init_tracer(trace, ret); 291 warn_failed_init_tracer(trace, ret);
243 return ret; 292 return ret;
@@ -291,7 +340,7 @@ trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr)
291 } 340 }
292 341
293 /* start the tracing */ 342 /* start the tracing */
294 ret = trace->init(tr); 343 ret = tracer_init(trace, tr);
295 if (ret) { 344 if (ret) {
296 warn_failed_init_tracer(trace, ret); 345 warn_failed_init_tracer(trace, ret);
297 return ret; 346 return ret;
@@ -345,7 +394,7 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *
345 } 394 }
346 395
347 /* start the tracing */ 396 /* start the tracing */
348 ret = trace->init(tr); 397 ret = tracer_init(trace, tr);
349 if (ret) { 398 if (ret) {
350 warn_failed_init_tracer(trace, ret); 399 warn_failed_init_tracer(trace, ret);
351 goto out; 400 goto out;
@@ -477,7 +526,7 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr)
477 wait_for_completion(&isrt); 526 wait_for_completion(&isrt);
478 527
479 /* start the tracing */ 528 /* start the tracing */
480 ret = trace->init(tr); 529 ret = tracer_init(trace, tr);
481 if (ret) { 530 if (ret) {
482 warn_failed_init_tracer(trace, ret); 531 warn_failed_init_tracer(trace, ret);
483 return ret; 532 return ret;
@@ -538,7 +587,7 @@ trace_selftest_startup_sched_switch(struct tracer *trace, struct trace_array *tr
538 int ret; 587 int ret;
539 588
540 /* start the tracing */ 589 /* start the tracing */
541 ret = trace->init(tr); 590 ret = tracer_init(trace, tr);
542 if (ret) { 591 if (ret) {
543 warn_failed_init_tracer(trace, ret); 592 warn_failed_init_tracer(trace, ret);
544 return ret; 593 return ret;
@@ -570,7 +619,7 @@ trace_selftest_startup_sysprof(struct tracer *trace, struct trace_array *tr)
570 int ret; 619 int ret;
571 620
572 /* start the tracing */ 621 /* start the tracing */
573 ret = trace->init(tr); 622 ret = tracer_init(trace, tr);
574 if (ret) { 623 if (ret) {
575 warn_failed_init_tracer(trace, ret); 624 warn_failed_init_tracer(trace, ret);
576 return 0; 625 return 0;
@@ -597,7 +646,7 @@ trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr)
597 int ret; 646 int ret;
598 647
599 /* start the tracing */ 648 /* start the tracing */
600 ret = trace->init(tr); 649 ret = tracer_init(trace, tr);
601 if (ret) { 650 if (ret) {
602 warn_failed_init_tracer(trace, ret); 651 warn_failed_init_tracer(trace, ret);
603 return ret; 652 return ret;
diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c
new file mode 100644
index 000000000000..eae9cef39291
--- /dev/null
+++ b/kernel/trace/trace_stat.c
@@ -0,0 +1,319 @@
1/*
2 * Infrastructure for statistic tracing (histogram output).
3 *
4 * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
5 *
6 * Based on the code from trace_branch.c which is
7 * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
8 *
9 */
10
11
12#include <linux/list.h>
13#include <linux/debugfs.h>
14#include "trace_stat.h"
15#include "trace.h"
16
17
18/* List of stat entries from a tracer */
19struct trace_stat_list {
20 struct list_head list;
21 void *stat;
22};
23
24/* A stat session is the stats output in one file */
25struct tracer_stat_session {
26 struct list_head session_list;
27 struct tracer_stat *ts;
28 struct list_head stat_list;
29 struct mutex stat_mutex;
30 struct dentry *file;
31};
32
33/* All of the sessions currently in use. Each stat file embeed one session */
34static LIST_HEAD(all_stat_sessions);
35static DEFINE_MUTEX(all_stat_sessions_mutex);
36
37/* The root directory for all stat files */
38static struct dentry *stat_dir;
39
40
41static void reset_stat_session(struct tracer_stat_session *session)
42{
43 struct trace_stat_list *node, *next;
44
45 list_for_each_entry_safe(node, next, &session->stat_list, list)
46 kfree(node);
47
48 INIT_LIST_HEAD(&session->stat_list);
49}
50
51static void destroy_session(struct tracer_stat_session *session)
52{
53 debugfs_remove(session->file);
54 reset_stat_session(session);
55 mutex_destroy(&session->stat_mutex);
56 kfree(session);
57}
58
59/*
60 * For tracers that don't provide a stat_cmp callback.
61 * This one will force an immediate insertion on tail of
62 * the list.
63 */
64static int dummy_cmp(void *p1, void *p2)
65{
66 return 1;
67}
68
69/*
70 * Initialize the stat list at each trace_stat file opening.
71 * All of these copies and sorting are required on all opening
72 * since the stats could have changed between two file sessions.
73 */
74static int stat_seq_init(struct tracer_stat_session *session)
75{
76 struct trace_stat_list *iter_entry, *new_entry;
77 struct tracer_stat *ts = session->ts;
78 void *prev_stat;
79 int ret = 0;
80 int i;
81
82 mutex_lock(&session->stat_mutex);
83 reset_stat_session(session);
84
85 if (!ts->stat_cmp)
86 ts->stat_cmp = dummy_cmp;
87
88 /*
89 * The first entry. Actually this is the second, but the first
90 * one (the stat_list head) is pointless.
91 */
92 new_entry = kmalloc(sizeof(struct trace_stat_list), GFP_KERNEL);
93 if (!new_entry) {
94 ret = -ENOMEM;
95 goto exit;
96 }
97
98 INIT_LIST_HEAD(&new_entry->list);
99
100 list_add(&new_entry->list, &session->stat_list);
101
102 new_entry->stat = ts->stat_start();
103 prev_stat = new_entry->stat;
104
105 /*
106 * Iterate over the tracer stat entries and store them in a sorted
107 * list.
108 */
109 for (i = 1; ; i++) {
110 new_entry = kmalloc(sizeof(struct trace_stat_list), GFP_KERNEL);
111 if (!new_entry) {
112 ret = -ENOMEM;
113 goto exit_free_list;
114 }
115
116 INIT_LIST_HEAD(&new_entry->list);
117 new_entry->stat = ts->stat_next(prev_stat, i);
118
119 /* End of insertion */
120 if (!new_entry->stat)
121 break;
122
123 list_for_each_entry(iter_entry, &session->stat_list, list) {
124
125 /* Insertion with a descendent sorting */
126 if (ts->stat_cmp(new_entry->stat,
127 iter_entry->stat) > 0) {
128
129 list_add_tail(&new_entry->list,
130 &iter_entry->list);
131 break;
132
133 /* The current smaller value */
134 } else if (list_is_last(&iter_entry->list,
135 &session->stat_list)) {
136 list_add(&new_entry->list, &iter_entry->list);
137 break;
138 }
139 }
140
141 prev_stat = new_entry->stat;
142 }
143exit:
144 mutex_unlock(&session->stat_mutex);
145 return ret;
146
147exit_free_list:
148 reset_stat_session(session);
149 mutex_unlock(&session->stat_mutex);
150 return ret;
151}
152
153
154static void *stat_seq_start(struct seq_file *s, loff_t *pos)
155{
156 struct tracer_stat_session *session = s->private;
157
158 /* Prevent from tracer switch or stat_list modification */
159 mutex_lock(&session->stat_mutex);
160
161 /* If we are in the beginning of the file, print the headers */
162 if (!*pos && session->ts->stat_headers)
163 session->ts->stat_headers(s);
164
165 return seq_list_start(&session->stat_list, *pos);
166}
167
168static void *stat_seq_next(struct seq_file *s, void *p, loff_t *pos)
169{
170 struct tracer_stat_session *session = s->private;
171
172 return seq_list_next(p, &session->stat_list, pos);
173}
174
175static void stat_seq_stop(struct seq_file *s, void *p)
176{
177 struct tracer_stat_session *session = s->private;
178 mutex_unlock(&session->stat_mutex);
179}
180
181static int stat_seq_show(struct seq_file *s, void *v)
182{
183 struct tracer_stat_session *session = s->private;
184 struct trace_stat_list *l = list_entry(v, struct trace_stat_list, list);
185
186 return session->ts->stat_show(s, l->stat);
187}
188
189static const struct seq_operations trace_stat_seq_ops = {
190 .start = stat_seq_start,
191 .next = stat_seq_next,
192 .stop = stat_seq_stop,
193 .show = stat_seq_show
194};
195
196/* The session stat is refilled and resorted at each stat file opening */
197static int tracing_stat_open(struct inode *inode, struct file *file)
198{
199 int ret;
200
201 struct tracer_stat_session *session = inode->i_private;
202
203 ret = seq_open(file, &trace_stat_seq_ops);
204 if (!ret) {
205 struct seq_file *m = file->private_data;
206 m->private = session;
207 ret = stat_seq_init(session);
208 }
209
210 return ret;
211}
212
213/*
214 * Avoid consuming memory with our now useless list.
215 */
216static int tracing_stat_release(struct inode *i, struct file *f)
217{
218 struct tracer_stat_session *session = i->i_private;
219
220 mutex_lock(&session->stat_mutex);
221 reset_stat_session(session);
222 mutex_unlock(&session->stat_mutex);
223
224 return 0;
225}
226
227static const struct file_operations tracing_stat_fops = {
228 .open = tracing_stat_open,
229 .read = seq_read,
230 .llseek = seq_lseek,
231 .release = tracing_stat_release
232};
233
234static int tracing_stat_init(void)
235{
236 struct dentry *d_tracing;
237
238 d_tracing = tracing_init_dentry();
239
240 stat_dir = debugfs_create_dir("trace_stat", d_tracing);
241 if (!stat_dir)
242 pr_warning("Could not create debugfs "
243 "'trace_stat' entry\n");
244 return 0;
245}
246
247static int init_stat_file(struct tracer_stat_session *session)
248{
249 if (!stat_dir && tracing_stat_init())
250 return -ENODEV;
251
252 session->file = debugfs_create_file(session->ts->name, 0644,
253 stat_dir,
254 session, &tracing_stat_fops);
255 if (!session->file)
256 return -ENOMEM;
257 return 0;
258}
259
260int register_stat_tracer(struct tracer_stat *trace)
261{
262 struct tracer_stat_session *session, *node, *tmp;
263 int ret;
264
265 if (!trace)
266 return -EINVAL;
267
268 if (!trace->stat_start || !trace->stat_next || !trace->stat_show)
269 return -EINVAL;
270
271 /* Already registered? */
272 mutex_lock(&all_stat_sessions_mutex);
273 list_for_each_entry_safe(node, tmp, &all_stat_sessions, session_list) {
274 if (node->ts == trace) {
275 mutex_unlock(&all_stat_sessions_mutex);
276 return -EINVAL;
277 }
278 }
279 mutex_unlock(&all_stat_sessions_mutex);
280
281 /* Init the session */
282 session = kmalloc(sizeof(struct tracer_stat_session), GFP_KERNEL);
283 if (!session)
284 return -ENOMEM;
285
286 session->ts = trace;
287 INIT_LIST_HEAD(&session->session_list);
288 INIT_LIST_HEAD(&session->stat_list);
289 mutex_init(&session->stat_mutex);
290 session->file = NULL;
291
292 ret = init_stat_file(session);
293 if (ret) {
294 destroy_session(session);
295 return ret;
296 }
297
298 /* Register */
299 mutex_lock(&all_stat_sessions_mutex);
300 list_add_tail(&session->session_list, &all_stat_sessions);
301 mutex_unlock(&all_stat_sessions_mutex);
302
303 return 0;
304}
305
306void unregister_stat_tracer(struct tracer_stat *trace)
307{
308 struct tracer_stat_session *node, *tmp;
309
310 mutex_lock(&all_stat_sessions_mutex);
311 list_for_each_entry_safe(node, tmp, &all_stat_sessions, session_list) {
312 if (node->ts == trace) {
313 list_del(&node->session_list);
314 destroy_session(node);
315 break;
316 }
317 }
318 mutex_unlock(&all_stat_sessions_mutex);
319}
diff --git a/kernel/trace/trace_stat.h b/kernel/trace/trace_stat.h
new file mode 100644
index 000000000000..202274cf7f3d
--- /dev/null
+++ b/kernel/trace/trace_stat.h
@@ -0,0 +1,31 @@
1#ifndef __TRACE_STAT_H
2#define __TRACE_STAT_H
3
4#include <linux/seq_file.h>
5
6/*
7 * If you want to provide a stat file (one-shot statistics), fill
8 * an iterator with stat_start/stat_next and a stat_show callbacks.
9 * The others callbacks are optional.
10 */
11struct tracer_stat {
12 /* The name of your stat file */
13 const char *name;
14 /* Iteration over statistic entries */
15 void *(*stat_start)(void);
16 void *(*stat_next)(void *prev, int idx);
17 /* Compare two entries for stats sorting */
18 int (*stat_cmp)(void *p1, void *p2);
19 /* Print a stat entry */
20 int (*stat_show)(struct seq_file *s, void *p);
21 /* Print the headers of your stat entries */
22 int (*stat_headers)(struct seq_file *s);
23};
24
25/*
26 * Destroy or create a stat file
27 */
28extern int register_stat_tracer(struct tracer_stat *trace);
29extern void unregister_stat_tracer(struct tracer_stat *trace);
30
31#endif /* __TRACE_STAT_H */
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c
index eaca5ad803ff..7c9a2d82a7d8 100644
--- a/kernel/trace/trace_sysprof.c
+++ b/kernel/trace/trace_sysprof.c
@@ -88,7 +88,7 @@ static void backtrace_address(void *data, unsigned long addr, int reliable)
88 } 88 }
89} 89}
90 90
91const static struct stacktrace_ops backtrace_ops = { 91static const struct stacktrace_ops backtrace_ops = {
92 .warning = backtrace_warning, 92 .warning = backtrace_warning,
93 .warning_symbol = backtrace_warning_symbol, 93 .warning_symbol = backtrace_warning_symbol,
94 .stack = backtrace_stack, 94 .stack = backtrace_stack,
@@ -226,15 +226,6 @@ static void stop_stack_timers(void)
226 stop_stack_timer(cpu); 226 stop_stack_timer(cpu);
227} 227}
228 228
229static void start_stack_trace(struct trace_array *tr)
230{
231 mutex_lock(&sample_timer_lock);
232 tracing_reset_online_cpus(tr);
233 start_stack_timers();
234 tracer_enabled = 1;
235 mutex_unlock(&sample_timer_lock);
236}
237
238static void stop_stack_trace(struct trace_array *tr) 229static void stop_stack_trace(struct trace_array *tr)
239{ 230{
240 mutex_lock(&sample_timer_lock); 231 mutex_lock(&sample_timer_lock);
@@ -247,12 +238,18 @@ static int stack_trace_init(struct trace_array *tr)
247{ 238{
248 sysprof_trace = tr; 239 sysprof_trace = tr;
249 240
250 start_stack_trace(tr); 241 tracing_start_cmdline_record();
242
243 mutex_lock(&sample_timer_lock);
244 start_stack_timers();
245 tracer_enabled = 1;
246 mutex_unlock(&sample_timer_lock);
251 return 0; 247 return 0;
252} 248}
253 249
254static void stack_trace_reset(struct trace_array *tr) 250static void stack_trace_reset(struct trace_array *tr)
255{ 251{
252 tracing_stop_cmdline_record();
256 stop_stack_trace(tr); 253 stop_stack_trace(tr);
257} 254}
258 255
diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c
new file mode 100644
index 000000000000..4664990fe9c5
--- /dev/null
+++ b/kernel/trace/trace_workqueue.c
@@ -0,0 +1,281 @@
1/*
2 * Workqueue statistical tracer.
3 *
4 * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
5 *
6 */
7
8
9#include <trace/workqueue.h>
10#include <linux/list.h>
11#include <linux/percpu.h>
12#include "trace_stat.h"
13#include "trace.h"
14
15
16/* A cpu workqueue thread */
17struct cpu_workqueue_stats {
18 struct list_head list;
19/* Useful to know if we print the cpu headers */
20 bool first_entry;
21 int cpu;
22 pid_t pid;
23/* Can be inserted from interrupt or user context, need to be atomic */
24 atomic_t inserted;
25/*
26 * Don't need to be atomic, works are serialized in a single workqueue thread
27 * on a single CPU.
28 */
29 unsigned int executed;
30};
31
32/* List of workqueue threads on one cpu */
33struct workqueue_global_stats {
34 struct list_head list;
35 spinlock_t lock;
36};
37
38/* Don't need a global lock because allocated before the workqueues, and
39 * never freed.
40 */
41static DEFINE_PER_CPU(struct workqueue_global_stats, all_workqueue_stat);
42#define workqueue_cpu_stat(cpu) (&per_cpu(all_workqueue_stat, cpu))
43
44/* Insertion of a work */
45static void
46probe_workqueue_insertion(struct task_struct *wq_thread,
47 struct work_struct *work)
48{
49 int cpu = cpumask_first(&wq_thread->cpus_allowed);
50 struct cpu_workqueue_stats *node, *next;
51 unsigned long flags;
52
53 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
54 list_for_each_entry_safe(node, next, &workqueue_cpu_stat(cpu)->list,
55 list) {
56 if (node->pid == wq_thread->pid) {
57 atomic_inc(&node->inserted);
58 goto found;
59 }
60 }
61 pr_debug("trace_workqueue: entry not found\n");
62found:
63 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
64}
65
66/* Execution of a work */
67static void
68probe_workqueue_execution(struct task_struct *wq_thread,
69 struct work_struct *work)
70{
71 int cpu = cpumask_first(&wq_thread->cpus_allowed);
72 struct cpu_workqueue_stats *node, *next;
73 unsigned long flags;
74
75 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
76 list_for_each_entry_safe(node, next, &workqueue_cpu_stat(cpu)->list,
77 list) {
78 if (node->pid == wq_thread->pid) {
79 node->executed++;
80 goto found;
81 }
82 }
83 pr_debug("trace_workqueue: entry not found\n");
84found:
85 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
86}
87
88/* Creation of a cpu workqueue thread */
89static void probe_workqueue_creation(struct task_struct *wq_thread, int cpu)
90{
91 struct cpu_workqueue_stats *cws;
92 unsigned long flags;
93
94 WARN_ON(cpu < 0 || cpu >= num_possible_cpus());
95
96 /* Workqueues are sometimes created in atomic context */
97 cws = kzalloc(sizeof(struct cpu_workqueue_stats), GFP_ATOMIC);
98 if (!cws) {
99 pr_warning("trace_workqueue: not enough memory\n");
100 return;
101 }
102 tracing_record_cmdline(wq_thread);
103
104 INIT_LIST_HEAD(&cws->list);
105 cws->cpu = cpu;
106
107 cws->pid = wq_thread->pid;
108
109 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
110 if (list_empty(&workqueue_cpu_stat(cpu)->list))
111 cws->first_entry = true;
112 list_add_tail(&cws->list, &workqueue_cpu_stat(cpu)->list);
113 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
114}
115
116/* Destruction of a cpu workqueue thread */
117static void probe_workqueue_destruction(struct task_struct *wq_thread)
118{
119 /* Workqueue only execute on one cpu */
120 int cpu = cpumask_first(&wq_thread->cpus_allowed);
121 struct cpu_workqueue_stats *node, *next;
122 unsigned long flags;
123
124 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
125 list_for_each_entry_safe(node, next, &workqueue_cpu_stat(cpu)->list,
126 list) {
127 if (node->pid == wq_thread->pid) {
128 list_del(&node->list);
129 kfree(node);
130 goto found;
131 }
132 }
133
134 pr_debug("trace_workqueue: don't find workqueue to destroy\n");
135found:
136 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
137
138}
139
140static struct cpu_workqueue_stats *workqueue_stat_start_cpu(int cpu)
141{
142 unsigned long flags;
143 struct cpu_workqueue_stats *ret = NULL;
144
145
146 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
147
148 if (!list_empty(&workqueue_cpu_stat(cpu)->list))
149 ret = list_entry(workqueue_cpu_stat(cpu)->list.next,
150 struct cpu_workqueue_stats, list);
151
152 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
153
154 return ret;
155}
156
157static void *workqueue_stat_start(void)
158{
159 int cpu;
160 void *ret = NULL;
161
162 for_each_possible_cpu(cpu) {
163 ret = workqueue_stat_start_cpu(cpu);
164 if (ret)
165 return ret;
166 }
167 return NULL;
168}
169
170static void *workqueue_stat_next(void *prev, int idx)
171{
172 struct cpu_workqueue_stats *prev_cws = prev;
173 int cpu = prev_cws->cpu;
174 unsigned long flags;
175 void *ret = NULL;
176
177 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
178 if (list_is_last(&prev_cws->list, &workqueue_cpu_stat(cpu)->list)) {
179 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
180 for (++cpu ; cpu < num_possible_cpus(); cpu++) {
181 ret = workqueue_stat_start_cpu(cpu);
182 if (ret)
183 return ret;
184 }
185 return NULL;
186 }
187 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
188
189 return list_entry(prev_cws->list.next, struct cpu_workqueue_stats,
190 list);
191}
192
193static int workqueue_stat_show(struct seq_file *s, void *p)
194{
195 struct cpu_workqueue_stats *cws = p;
196 unsigned long flags;
197 int cpu = cws->cpu;
198
199 seq_printf(s, "%3d %6d %6u %s\n", cws->cpu,
200 atomic_read(&cws->inserted),
201 cws->executed,
202 trace_find_cmdline(cws->pid));
203
204 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
205 if (&cws->list == workqueue_cpu_stat(cpu)->list.next)
206 seq_printf(s, "\n");
207 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
208
209 return 0;
210}
211
212static int workqueue_stat_headers(struct seq_file *s)
213{
214 seq_printf(s, "# CPU INSERTED EXECUTED NAME\n");
215 seq_printf(s, "# | | | |\n\n");
216 return 0;
217}
218
219struct tracer_stat workqueue_stats __read_mostly = {
220 .name = "workqueues",
221 .stat_start = workqueue_stat_start,
222 .stat_next = workqueue_stat_next,
223 .stat_show = workqueue_stat_show,
224 .stat_headers = workqueue_stat_headers
225};
226
227
228int __init stat_workqueue_init(void)
229{
230 if (register_stat_tracer(&workqueue_stats)) {
231 pr_warning("Unable to register workqueue stat tracer\n");
232 return 1;
233 }
234
235 return 0;
236}
237fs_initcall(stat_workqueue_init);
238
239/*
240 * Workqueues are created very early, just after pre-smp initcalls.
241 * So we must register our tracepoints at this stage.
242 */
243int __init trace_workqueue_early_init(void)
244{
245 int ret, cpu;
246
247 ret = register_trace_workqueue_insertion(probe_workqueue_insertion);
248 if (ret)
249 goto out;
250
251 ret = register_trace_workqueue_execution(probe_workqueue_execution);
252 if (ret)
253 goto no_insertion;
254
255 ret = register_trace_workqueue_creation(probe_workqueue_creation);
256 if (ret)
257 goto no_execution;
258
259 ret = register_trace_workqueue_destruction(probe_workqueue_destruction);
260 if (ret)
261 goto no_creation;
262
263 for_each_possible_cpu(cpu) {
264 spin_lock_init(&workqueue_cpu_stat(cpu)->lock);
265 INIT_LIST_HEAD(&workqueue_cpu_stat(cpu)->list);
266 }
267
268 return 0;
269
270no_creation:
271 unregister_trace_workqueue_creation(probe_workqueue_creation);
272no_execution:
273 unregister_trace_workqueue_execution(probe_workqueue_execution);
274no_insertion:
275 unregister_trace_workqueue_insertion(probe_workqueue_insertion);
276out:
277 pr_warning("trace_workqueue: unable to trace workqueues\n");
278
279 return 1;
280}
281early_initcall(trace_workqueue_early_init);
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 1f0c509b40d3..e53ee18ef431 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -33,6 +33,7 @@
33#include <linux/kallsyms.h> 33#include <linux/kallsyms.h>
34#include <linux/debug_locks.h> 34#include <linux/debug_locks.h>
35#include <linux/lockdep.h> 35#include <linux/lockdep.h>
36#include <trace/workqueue.h>
36 37
37/* 38/*
38 * The per-CPU workqueue (if single thread, we always use the first 39 * The per-CPU workqueue (if single thread, we always use the first
@@ -125,9 +126,13 @@ struct cpu_workqueue_struct *get_wq_data(struct work_struct *work)
125 return (void *) (atomic_long_read(&work->data) & WORK_STRUCT_WQ_DATA_MASK); 126 return (void *) (atomic_long_read(&work->data) & WORK_STRUCT_WQ_DATA_MASK);
126} 127}
127 128
129DEFINE_TRACE(workqueue_insertion);
130
128static void insert_work(struct cpu_workqueue_struct *cwq, 131static void insert_work(struct cpu_workqueue_struct *cwq,
129 struct work_struct *work, struct list_head *head) 132 struct work_struct *work, struct list_head *head)
130{ 133{
134 trace_workqueue_insertion(cwq->thread, work);
135
131 set_wq_data(work, cwq); 136 set_wq_data(work, cwq);
132 /* 137 /*
133 * Ensure that we get the right work->data if we see the 138 * Ensure that we get the right work->data if we see the
@@ -259,6 +264,8 @@ int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
259} 264}
260EXPORT_SYMBOL_GPL(queue_delayed_work_on); 265EXPORT_SYMBOL_GPL(queue_delayed_work_on);
261 266
267DEFINE_TRACE(workqueue_execution);
268
262static void run_workqueue(struct cpu_workqueue_struct *cwq) 269static void run_workqueue(struct cpu_workqueue_struct *cwq)
263{ 270{
264 spin_lock_irq(&cwq->lock); 271 spin_lock_irq(&cwq->lock);
@@ -284,7 +291,7 @@ static void run_workqueue(struct cpu_workqueue_struct *cwq)
284 */ 291 */
285 struct lockdep_map lockdep_map = work->lockdep_map; 292 struct lockdep_map lockdep_map = work->lockdep_map;
286#endif 293#endif
287 294 trace_workqueue_execution(cwq->thread, work);
288 cwq->current_work = work; 295 cwq->current_work = work;
289 list_del_init(cwq->worklist.next); 296 list_del_init(cwq->worklist.next);
290 spin_unlock_irq(&cwq->lock); 297 spin_unlock_irq(&cwq->lock);
@@ -765,6 +772,8 @@ init_cpu_workqueue(struct workqueue_struct *wq, int cpu)
765 return cwq; 772 return cwq;
766} 773}
767 774
775DEFINE_TRACE(workqueue_creation);
776
768static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu) 777static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
769{ 778{
770 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; 779 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
@@ -787,6 +796,8 @@ static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
787 sched_setscheduler_nocheck(p, SCHED_FIFO, &param); 796 sched_setscheduler_nocheck(p, SCHED_FIFO, &param);
788 cwq->thread = p; 797 cwq->thread = p;
789 798
799 trace_workqueue_creation(cwq->thread, cpu);
800
790 return 0; 801 return 0;
791} 802}
792 803
@@ -868,6 +879,8 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
868} 879}
869EXPORT_SYMBOL_GPL(__create_workqueue_key); 880EXPORT_SYMBOL_GPL(__create_workqueue_key);
870 881
882DEFINE_TRACE(workqueue_destruction);
883
871static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq) 884static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq)
872{ 885{
873 /* 886 /*
@@ -891,6 +904,7 @@ static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq)
891 * checks list_empty(), and a "normal" queue_work() can't use 904 * checks list_empty(), and a "normal" queue_work() can't use
892 * a dead CPU. 905 * a dead CPU.
893 */ 906 */
907 trace_workqueue_destruction(cwq->thread);
894 kthread_stop(cwq->thread); 908 kthread_stop(cwq->thread);
895 cwq->thread = NULL; 909 cwq->thread = NULL;
896} 910}
diff --git a/mm/slab.c b/mm/slab.c
index 4d00855629c4..aeeb4ecb9428 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -102,6 +102,7 @@
102#include <linux/cpu.h> 102#include <linux/cpu.h>
103#include <linux/sysctl.h> 103#include <linux/sysctl.h>
104#include <linux/module.h> 104#include <linux/module.h>
105#include <trace/kmemtrace.h>
105#include <linux/rcupdate.h> 106#include <linux/rcupdate.h>
106#include <linux/string.h> 107#include <linux/string.h>
107#include <linux/uaccess.h> 108#include <linux/uaccess.h>
@@ -568,6 +569,14 @@ static void **dbg_userword(struct kmem_cache *cachep, void *objp)
568 569
569#endif 570#endif
570 571
572#ifdef CONFIG_KMEMTRACE
573size_t slab_buffer_size(struct kmem_cache *cachep)
574{
575 return cachep->buffer_size;
576}
577EXPORT_SYMBOL(slab_buffer_size);
578#endif
579
571/* 580/*
572 * Do not go above this order unless 0 objects fit into the slab. 581 * Do not go above this order unless 0 objects fit into the slab.
573 */ 582 */
@@ -3550,10 +3559,23 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp)
3550 */ 3559 */
3551void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags) 3560void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
3552{ 3561{
3553 return __cache_alloc(cachep, flags, __builtin_return_address(0)); 3562 void *ret = __cache_alloc(cachep, flags, __builtin_return_address(0));
3563
3564 kmemtrace_mark_alloc(KMEMTRACE_TYPE_CACHE, _RET_IP_, ret,
3565 obj_size(cachep), cachep->buffer_size, flags);
3566
3567 return ret;
3554} 3568}
3555EXPORT_SYMBOL(kmem_cache_alloc); 3569EXPORT_SYMBOL(kmem_cache_alloc);
3556 3570
3571#ifdef CONFIG_KMEMTRACE
3572void *kmem_cache_alloc_notrace(struct kmem_cache *cachep, gfp_t flags)
3573{
3574 return __cache_alloc(cachep, flags, __builtin_return_address(0));
3575}
3576EXPORT_SYMBOL(kmem_cache_alloc_notrace);
3577#endif
3578
3557/** 3579/**
3558 * kmem_ptr_validate - check if an untrusted pointer might be a slab entry. 3580 * kmem_ptr_validate - check if an untrusted pointer might be a slab entry.
3559 * @cachep: the cache we're checking against 3581 * @cachep: the cache we're checking against
@@ -3598,23 +3620,47 @@ out:
3598#ifdef CONFIG_NUMA 3620#ifdef CONFIG_NUMA
3599void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid) 3621void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
3600{ 3622{
3601 return __cache_alloc_node(cachep, flags, nodeid, 3623 void *ret = __cache_alloc_node(cachep, flags, nodeid,
3602 __builtin_return_address(0)); 3624 __builtin_return_address(0));
3625
3626 kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_CACHE, _RET_IP_, ret,
3627 obj_size(cachep), cachep->buffer_size,
3628 flags, nodeid);
3629
3630 return ret;
3603} 3631}
3604EXPORT_SYMBOL(kmem_cache_alloc_node); 3632EXPORT_SYMBOL(kmem_cache_alloc_node);
3605 3633
3634#ifdef CONFIG_KMEMTRACE
3635void *kmem_cache_alloc_node_notrace(struct kmem_cache *cachep,
3636 gfp_t flags,
3637 int nodeid)
3638{
3639 return __cache_alloc_node(cachep, flags, nodeid,
3640 __builtin_return_address(0));
3641}
3642EXPORT_SYMBOL(kmem_cache_alloc_node_notrace);
3643#endif
3644
3606static __always_inline void * 3645static __always_inline void *
3607__do_kmalloc_node(size_t size, gfp_t flags, int node, void *caller) 3646__do_kmalloc_node(size_t size, gfp_t flags, int node, void *caller)
3608{ 3647{
3609 struct kmem_cache *cachep; 3648 struct kmem_cache *cachep;
3649 void *ret;
3610 3650
3611 cachep = kmem_find_general_cachep(size, flags); 3651 cachep = kmem_find_general_cachep(size, flags);
3612 if (unlikely(ZERO_OR_NULL_PTR(cachep))) 3652 if (unlikely(ZERO_OR_NULL_PTR(cachep)))
3613 return cachep; 3653 return cachep;
3614 return kmem_cache_alloc_node(cachep, flags, node); 3654 ret = kmem_cache_alloc_node_notrace(cachep, flags, node);
3655
3656 kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC,
3657 (unsigned long) caller, ret,
3658 size, cachep->buffer_size, flags, node);
3659
3660 return ret;
3615} 3661}
3616 3662
3617#ifdef CONFIG_DEBUG_SLAB 3663#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_KMEMTRACE)
3618void *__kmalloc_node(size_t size, gfp_t flags, int node) 3664void *__kmalloc_node(size_t size, gfp_t flags, int node)
3619{ 3665{
3620 return __do_kmalloc_node(size, flags, node, 3666 return __do_kmalloc_node(size, flags, node,
@@ -3647,6 +3693,7 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
3647 void *caller) 3693 void *caller)
3648{ 3694{
3649 struct kmem_cache *cachep; 3695 struct kmem_cache *cachep;
3696 void *ret;
3650 3697
3651 /* If you want to save a few bytes .text space: replace 3698 /* If you want to save a few bytes .text space: replace
3652 * __ with kmem_. 3699 * __ with kmem_.
@@ -3656,11 +3703,17 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
3656 cachep = __find_general_cachep(size, flags); 3703 cachep = __find_general_cachep(size, flags);
3657 if (unlikely(ZERO_OR_NULL_PTR(cachep))) 3704 if (unlikely(ZERO_OR_NULL_PTR(cachep)))
3658 return cachep; 3705 return cachep;
3659 return __cache_alloc(cachep, flags, caller); 3706 ret = __cache_alloc(cachep, flags, caller);
3707
3708 kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC,
3709 (unsigned long) caller, ret,
3710 size, cachep->buffer_size, flags);
3711
3712 return ret;
3660} 3713}
3661 3714
3662 3715
3663#ifdef CONFIG_DEBUG_SLAB 3716#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_KMEMTRACE)
3664void *__kmalloc(size_t size, gfp_t flags) 3717void *__kmalloc(size_t size, gfp_t flags)
3665{ 3718{
3666 return __do_kmalloc(size, flags, __builtin_return_address(0)); 3719 return __do_kmalloc(size, flags, __builtin_return_address(0));
@@ -3699,6 +3752,8 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp)
3699 debug_check_no_obj_freed(objp, obj_size(cachep)); 3752 debug_check_no_obj_freed(objp, obj_size(cachep));
3700 __cache_free(cachep, objp); 3753 __cache_free(cachep, objp);
3701 local_irq_restore(flags); 3754 local_irq_restore(flags);
3755
3756 kmemtrace_mark_free(KMEMTRACE_TYPE_CACHE, _RET_IP_, objp);
3702} 3757}
3703EXPORT_SYMBOL(kmem_cache_free); 3758EXPORT_SYMBOL(kmem_cache_free);
3704 3759
@@ -3725,6 +3780,8 @@ void kfree(const void *objp)
3725 debug_check_no_obj_freed(objp, obj_size(c)); 3780 debug_check_no_obj_freed(objp, obj_size(c));
3726 __cache_free(c, (void *)objp); 3781 __cache_free(c, (void *)objp);
3727 local_irq_restore(flags); 3782 local_irq_restore(flags);
3783
3784 kmemtrace_mark_free(KMEMTRACE_TYPE_KMALLOC, _RET_IP_, objp);
3728} 3785}
3729EXPORT_SYMBOL(kfree); 3786EXPORT_SYMBOL(kfree);
3730 3787
diff --git a/mm/slob.c b/mm/slob.c
index 52bc8a2bd9ef..f9cc24688232 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -65,6 +65,7 @@
65#include <linux/module.h> 65#include <linux/module.h>
66#include <linux/rcupdate.h> 66#include <linux/rcupdate.h>
67#include <linux/list.h> 67#include <linux/list.h>
68#include <trace/kmemtrace.h>
68#include <asm/atomic.h> 69#include <asm/atomic.h>
69 70
70/* 71/*
@@ -463,27 +464,38 @@ void *__kmalloc_node(size_t size, gfp_t gfp, int node)
463{ 464{
464 unsigned int *m; 465 unsigned int *m;
465 int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN); 466 int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
467 void *ret;
466 468
467 if (size < PAGE_SIZE - align) { 469 if (size < PAGE_SIZE - align) {
468 if (!size) 470 if (!size)
469 return ZERO_SIZE_PTR; 471 return ZERO_SIZE_PTR;
470 472
471 m = slob_alloc(size + align, gfp, align, node); 473 m = slob_alloc(size + align, gfp, align, node);
474
472 if (!m) 475 if (!m)
473 return NULL; 476 return NULL;
474 *m = size; 477 *m = size;
475 return (void *)m + align; 478 ret = (void *)m + align;
479
480 kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC,
481 _RET_IP_, ret,
482 size, size + align, gfp, node);
476 } else { 483 } else {
477 void *ret; 484 unsigned int order = get_order(size);
478 485
479 ret = slob_new_page(gfp | __GFP_COMP, get_order(size), node); 486 ret = slob_new_page(gfp | __GFP_COMP, order, node);
480 if (ret) { 487 if (ret) {
481 struct page *page; 488 struct page *page;
482 page = virt_to_page(ret); 489 page = virt_to_page(ret);
483 page->private = size; 490 page->private = size;
484 } 491 }
485 return ret; 492
493 kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC,
494 _RET_IP_, ret,
495 size, PAGE_SIZE << order, gfp, node);
486 } 496 }
497
498 return ret;
487} 499}
488EXPORT_SYMBOL(__kmalloc_node); 500EXPORT_SYMBOL(__kmalloc_node);
489 501
@@ -501,6 +513,8 @@ void kfree(const void *block)
501 slob_free(m, *m + align); 513 slob_free(m, *m + align);
502 } else 514 } else
503 put_page(&sp->page); 515 put_page(&sp->page);
516
517 kmemtrace_mark_free(KMEMTRACE_TYPE_KMALLOC, _RET_IP_, block);
504} 518}
505EXPORT_SYMBOL(kfree); 519EXPORT_SYMBOL(kfree);
506 520
@@ -570,10 +584,19 @@ void *kmem_cache_alloc_node(struct kmem_cache *c, gfp_t flags, int node)
570{ 584{
571 void *b; 585 void *b;
572 586
573 if (c->size < PAGE_SIZE) 587 if (c->size < PAGE_SIZE) {
574 b = slob_alloc(c->size, flags, c->align, node); 588 b = slob_alloc(c->size, flags, c->align, node);
575 else 589 kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_CACHE,
590 _RET_IP_, b, c->size,
591 SLOB_UNITS(c->size) * SLOB_UNIT,
592 flags, node);
593 } else {
576 b = slob_new_page(flags, get_order(c->size), node); 594 b = slob_new_page(flags, get_order(c->size), node);
595 kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_CACHE,
596 _RET_IP_, b, c->size,
597 PAGE_SIZE << get_order(c->size),
598 flags, node);
599 }
577 600
578 if (c->ctor) 601 if (c->ctor)
579 c->ctor(b); 602 c->ctor(b);
@@ -609,6 +632,8 @@ void kmem_cache_free(struct kmem_cache *c, void *b)
609 } else { 632 } else {
610 __kmem_cache_free(b, c->size); 633 __kmem_cache_free(b, c->size);
611 } 634 }
635
636 kmemtrace_mark_free(KMEMTRACE_TYPE_CACHE, _RET_IP_, b);
612} 637}
613EXPORT_SYMBOL(kmem_cache_free); 638EXPORT_SYMBOL(kmem_cache_free);
614 639
diff --git a/mm/slub.c b/mm/slub.c
index 0280eee6cf37..3525e7b21d19 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -16,6 +16,7 @@
16#include <linux/slab.h> 16#include <linux/slab.h>
17#include <linux/proc_fs.h> 17#include <linux/proc_fs.h>
18#include <linux/seq_file.h> 18#include <linux/seq_file.h>
19#include <trace/kmemtrace.h>
19#include <linux/cpu.h> 20#include <linux/cpu.h>
20#include <linux/cpuset.h> 21#include <linux/cpuset.h>
21#include <linux/mempolicy.h> 22#include <linux/mempolicy.h>
@@ -1623,18 +1624,46 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
1623 1624
1624void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags) 1625void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
1625{ 1626{
1626 return slab_alloc(s, gfpflags, -1, _RET_IP_); 1627 void *ret = slab_alloc(s, gfpflags, -1, _RET_IP_);
1628
1629 kmemtrace_mark_alloc(KMEMTRACE_TYPE_CACHE, _RET_IP_, ret,
1630 s->objsize, s->size, gfpflags);
1631
1632 return ret;
1627} 1633}
1628EXPORT_SYMBOL(kmem_cache_alloc); 1634EXPORT_SYMBOL(kmem_cache_alloc);
1629 1635
1636#ifdef CONFIG_KMEMTRACE
1637void *kmem_cache_alloc_notrace(struct kmem_cache *s, gfp_t gfpflags)
1638{
1639 return slab_alloc(s, gfpflags, -1, _RET_IP_);
1640}
1641EXPORT_SYMBOL(kmem_cache_alloc_notrace);
1642#endif
1643
1630#ifdef CONFIG_NUMA 1644#ifdef CONFIG_NUMA
1631void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node) 1645void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
1632{ 1646{
1633 return slab_alloc(s, gfpflags, node, _RET_IP_); 1647 void *ret = slab_alloc(s, gfpflags, node, _RET_IP_);
1648
1649 kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_CACHE, _RET_IP_, ret,
1650 s->objsize, s->size, gfpflags, node);
1651
1652 return ret;
1634} 1653}
1635EXPORT_SYMBOL(kmem_cache_alloc_node); 1654EXPORT_SYMBOL(kmem_cache_alloc_node);
1636#endif 1655#endif
1637 1656
1657#ifdef CONFIG_KMEMTRACE
1658void *kmem_cache_alloc_node_notrace(struct kmem_cache *s,
1659 gfp_t gfpflags,
1660 int node)
1661{
1662 return slab_alloc(s, gfpflags, node, _RET_IP_);
1663}
1664EXPORT_SYMBOL(kmem_cache_alloc_node_notrace);
1665#endif
1666
1638/* 1667/*
1639 * Slow patch handling. This may still be called frequently since objects 1668 * Slow patch handling. This may still be called frequently since objects
1640 * have a longer lifetime than the cpu slabs in most processing loads. 1669 * have a longer lifetime than the cpu slabs in most processing loads.
@@ -1742,6 +1771,8 @@ void kmem_cache_free(struct kmem_cache *s, void *x)
1742 page = virt_to_head_page(x); 1771 page = virt_to_head_page(x);
1743 1772
1744 slab_free(s, page, x, _RET_IP_); 1773 slab_free(s, page, x, _RET_IP_);
1774
1775 kmemtrace_mark_free(KMEMTRACE_TYPE_CACHE, _RET_IP_, x);
1745} 1776}
1746EXPORT_SYMBOL(kmem_cache_free); 1777EXPORT_SYMBOL(kmem_cache_free);
1747 1778
@@ -2657,6 +2688,7 @@ static struct kmem_cache *get_slab(size_t size, gfp_t flags)
2657void *__kmalloc(size_t size, gfp_t flags) 2688void *__kmalloc(size_t size, gfp_t flags)
2658{ 2689{
2659 struct kmem_cache *s; 2690 struct kmem_cache *s;
2691 void *ret;
2660 2692
2661 if (unlikely(size > PAGE_SIZE)) 2693 if (unlikely(size > PAGE_SIZE))
2662 return kmalloc_large(size, flags); 2694 return kmalloc_large(size, flags);
@@ -2666,7 +2698,12 @@ void *__kmalloc(size_t size, gfp_t flags)
2666 if (unlikely(ZERO_OR_NULL_PTR(s))) 2698 if (unlikely(ZERO_OR_NULL_PTR(s)))
2667 return s; 2699 return s;
2668 2700
2669 return slab_alloc(s, flags, -1, _RET_IP_); 2701 ret = slab_alloc(s, flags, -1, _RET_IP_);
2702
2703 kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC, _RET_IP_, ret,
2704 size, s->size, flags);
2705
2706 return ret;
2670} 2707}
2671EXPORT_SYMBOL(__kmalloc); 2708EXPORT_SYMBOL(__kmalloc);
2672 2709
@@ -2685,16 +2722,30 @@ static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
2685void *__kmalloc_node(size_t size, gfp_t flags, int node) 2722void *__kmalloc_node(size_t size, gfp_t flags, int node)
2686{ 2723{
2687 struct kmem_cache *s; 2724 struct kmem_cache *s;
2725 void *ret;
2688 2726
2689 if (unlikely(size > PAGE_SIZE)) 2727 if (unlikely(size > PAGE_SIZE)) {
2690 return kmalloc_large_node(size, flags, node); 2728 ret = kmalloc_large_node(size, flags, node);
2729
2730 kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC,
2731 _RET_IP_, ret,
2732 size, PAGE_SIZE << get_order(size),
2733 flags, node);
2734
2735 return ret;
2736 }
2691 2737
2692 s = get_slab(size, flags); 2738 s = get_slab(size, flags);
2693 2739
2694 if (unlikely(ZERO_OR_NULL_PTR(s))) 2740 if (unlikely(ZERO_OR_NULL_PTR(s)))
2695 return s; 2741 return s;
2696 2742
2697 return slab_alloc(s, flags, node, _RET_IP_); 2743 ret = slab_alloc(s, flags, node, _RET_IP_);
2744
2745 kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC, _RET_IP_, ret,
2746 size, s->size, flags, node);
2747
2748 return ret;
2698} 2749}
2699EXPORT_SYMBOL(__kmalloc_node); 2750EXPORT_SYMBOL(__kmalloc_node);
2700#endif 2751#endif
@@ -2753,6 +2804,8 @@ void kfree(const void *x)
2753 return; 2804 return;
2754 } 2805 }
2755 slab_free(page->slab, page, object, _RET_IP_); 2806 slab_free(page->slab, page, object, _RET_IP_);
2807
2808 kmemtrace_mark_free(KMEMTRACE_TYPE_KMALLOC, _RET_IP_, x);
2756} 2809}
2757EXPORT_SYMBOL(kfree); 2810EXPORT_SYMBOL(kfree);
2758 2811
@@ -3222,6 +3275,7 @@ static struct notifier_block __cpuinitdata slab_notifier = {
3222void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller) 3275void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
3223{ 3276{
3224 struct kmem_cache *s; 3277 struct kmem_cache *s;
3278 void *ret;
3225 3279
3226 if (unlikely(size > PAGE_SIZE)) 3280 if (unlikely(size > PAGE_SIZE))
3227 return kmalloc_large(size, gfpflags); 3281 return kmalloc_large(size, gfpflags);
@@ -3231,13 +3285,20 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
3231 if (unlikely(ZERO_OR_NULL_PTR(s))) 3285 if (unlikely(ZERO_OR_NULL_PTR(s)))
3232 return s; 3286 return s;
3233 3287
3234 return slab_alloc(s, gfpflags, -1, caller); 3288 ret = slab_alloc(s, gfpflags, -1, caller);
3289
3290 /* Honor the call site pointer we recieved. */
3291 kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC, caller, ret, size,
3292 s->size, gfpflags);
3293
3294 return ret;
3235} 3295}
3236 3296
3237void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags, 3297void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
3238 int node, unsigned long caller) 3298 int node, unsigned long caller)
3239{ 3299{
3240 struct kmem_cache *s; 3300 struct kmem_cache *s;
3301 void *ret;
3241 3302
3242 if (unlikely(size > PAGE_SIZE)) 3303 if (unlikely(size > PAGE_SIZE))
3243 return kmalloc_large_node(size, gfpflags, node); 3304 return kmalloc_large_node(size, gfpflags, node);
@@ -3247,7 +3308,13 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
3247 if (unlikely(ZERO_OR_NULL_PTR(s))) 3308 if (unlikely(ZERO_OR_NULL_PTR(s)))
3248 return s; 3309 return s;
3249 3310
3250 return slab_alloc(s, gfpflags, node, caller); 3311 ret = slab_alloc(s, gfpflags, node, caller);
3312
3313 /* Honor the call site pointer we recieved. */
3314 kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC, caller, ret,
3315 size, s->size, gfpflags, node);
3316
3317 return ret;
3251} 3318}
3252 3319
3253#ifdef CONFIG_SLUB_DEBUG 3320#ifdef CONFIG_SLUB_DEBUG
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index c7de8b39fcf1..39a9642927d3 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -112,13 +112,13 @@ endif
112# --------------------------------------------------------------------------- 112# ---------------------------------------------------------------------------
113 113
114# Default is built-in, unless we know otherwise 114# Default is built-in, unless we know otherwise
115modkern_cflags := $(CFLAGS_KERNEL) 115modkern_cflags = $(if $(part-of-module), $(CFLAGS_MODULE), $(CFLAGS_KERNEL))
116quiet_modtag := $(empty) $(empty) 116quiet_modtag := $(empty) $(empty)
117 117
118$(real-objs-m) : modkern_cflags := $(CFLAGS_MODULE) 118$(real-objs-m) : part-of-module := y
119$(real-objs-m:.o=.i) : modkern_cflags := $(CFLAGS_MODULE) 119$(real-objs-m:.o=.i) : part-of-module := y
120$(real-objs-m:.o=.s) : modkern_cflags := $(CFLAGS_MODULE) 120$(real-objs-m:.o=.s) : part-of-module := y
121$(real-objs-m:.o=.lst): modkern_cflags := $(CFLAGS_MODULE) 121$(real-objs-m:.o=.lst): part-of-module := y
122 122
123$(real-objs-m) : quiet_modtag := [M] 123$(real-objs-m) : quiet_modtag := [M]
124$(real-objs-m:.o=.i) : quiet_modtag := [M] 124$(real-objs-m:.o=.i) : quiet_modtag := [M]
@@ -205,7 +205,8 @@ endif
205ifdef CONFIG_FTRACE_MCOUNT_RECORD 205ifdef CONFIG_FTRACE_MCOUNT_RECORD
206cmd_record_mcount = perl $(srctree)/scripts/recordmcount.pl "$(ARCH)" \ 206cmd_record_mcount = perl $(srctree)/scripts/recordmcount.pl "$(ARCH)" \
207 "$(if $(CONFIG_64BIT),64,32)" \ 207 "$(if $(CONFIG_64BIT),64,32)" \
208 "$(OBJDUMP)" "$(OBJCOPY)" "$(CC)" "$(LD)" "$(NM)" "$(RM)" "$(MV)" "$(@)"; 208 "$(OBJDUMP)" "$(OBJCOPY)" "$(CC)" "$(LD)" "$(NM)" "$(RM)" "$(MV)" \
209 "$(if $(part-of-module),1,0)" "$(@)";
209endif 210endif
210 211
211define rule_cc_o_c 212define rule_cc_o_c
diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
index fe831412bea9..409596eca124 100755
--- a/scripts/recordmcount.pl
+++ b/scripts/recordmcount.pl
@@ -100,14 +100,19 @@ $P =~ s@.*/@@g;
100 100
101my $V = '0.1'; 101my $V = '0.1';
102 102
103if ($#ARGV < 6) { 103if ($#ARGV < 7) {
104 print "usage: $P arch objdump objcopy cc ld nm rm mv inputfile\n"; 104 print "usage: $P arch bits objdump objcopy cc ld nm rm mv is_module inputfile\n";
105 print "version: $V\n"; 105 print "version: $V\n";
106 exit(1); 106 exit(1);
107} 107}
108 108
109my ($arch, $bits, $objdump, $objcopy, $cc, 109my ($arch, $bits, $objdump, $objcopy, $cc,
110 $ld, $nm, $rm, $mv, $inputfile) = @ARGV; 110 $ld, $nm, $rm, $mv, $is_module, $inputfile) = @ARGV;
111
112# This file refers to mcount and shouldn't be ftraced, so lets' ignore it
113if ($inputfile eq "kernel/trace/ftrace.o") {
114 exit(0);
115}
111 116
112# Acceptable sections to record. 117# Acceptable sections to record.
113my %text_sections = ( 118my %text_sections = (
@@ -201,6 +206,13 @@ if ($arch eq "x86_64") {
201 $alignment = 2; 206 $alignment = 2;
202 $section_type = '%progbits'; 207 $section_type = '%progbits';
203 208
209} elsif ($arch eq "ia64") {
210 $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\s_mcount\$";
211 $type = "data8";
212
213 if ($is_module eq "0") {
214 $cc .= " -mconstant-gp";
215 }
204} else { 216} else {
205 die "Arch $arch is not supported with CONFIG_FTRACE_MCOUNT_RECORD"; 217 die "Arch $arch is not supported with CONFIG_FTRACE_MCOUNT_RECORD";
206} 218}
@@ -263,7 +275,6 @@ if (!$found_version) {
263 "\tDisabling local function references.\n"; 275 "\tDisabling local function references.\n";
264} 276}
265 277
266
267# 278#
268# Step 1: find all the local (static functions) and weak symbols. 279# Step 1: find all the local (static functions) and weak symbols.
269# 't' is local, 'w/W' is weak (we never use a weak function) 280# 't' is local, 'w/W' is weak (we never use a weak function)
@@ -331,13 +342,16 @@ sub update_funcs
331# 342#
332# Step 2: find the sections and mcount call sites 343# Step 2: find the sections and mcount call sites
333# 344#
334open(IN, "$objdump -dr $inputfile|") || die "error running $objdump"; 345open(IN, "$objdump -hdr $inputfile|") || die "error running $objdump";
335 346
336my $text; 347my $text;
337 348
349my $read_headers = 1;
350
338while (<IN>) { 351while (<IN>) {
339 # is it a section? 352 # is it a section?
340 if (/$section_regex/) { 353 if (/$section_regex/) {
354 $read_headers = 0;
341 355
342 # Only record text sections that we know are safe 356 # Only record text sections that we know are safe
343 if (defined($text_sections{$1})) { 357 if (defined($text_sections{$1})) {
@@ -371,6 +385,19 @@ while (<IN>) {
371 $ref_func = $text; 385 $ref_func = $text;
372 } 386 }
373 } 387 }
388 } elsif ($read_headers && /$mcount_section/) {
389 #
390 # Somehow the make process can execute this script on an
391 # object twice. If it does, we would duplicate the mcount
392 # section and it will cause the function tracer self test
393 # to fail. Check if the mcount section exists, and if it does,
394 # warn and exit.
395 #
396 print STDERR "ERROR: $mcount_section already in $inputfile\n" .
397 "\tThis may be an indication that your build is corrupted.\n" .
398 "\tDelete $inputfile and try again. If the same object file\n" .
399 "\tstill causes an issue, then disable CONFIG_DYNAMIC_FTRACE.\n";
400 exit(-1);
374 } 401 }
375 402
376 # is this a call site to mcount? If so, record it to print later 403 # is this a call site to mcount? If so, record it to print later