aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/testing/debugfs-kmemtrace71
-rw-r--r--Documentation/ftrace.txt74
-rw-r--r--Documentation/kernel-parameters.txt10
-rw-r--r--Documentation/sysrq.txt2
-rw-r--r--Documentation/tracers/mmiotrace.txt6
-rw-r--r--Documentation/vm/kmemtrace.txt126
-rw-r--r--MAINTAINERS6
-rw-r--r--arch/ia64/Kconfig3
-rw-r--r--arch/ia64/include/asm/ftrace.h28
-rw-r--r--arch/ia64/kernel/Makefile5
-rw-r--r--arch/ia64/kernel/entry.S100
-rw-r--r--arch/ia64/kernel/ftrace.c206
-rw-r--r--arch/ia64/kernel/ia64_ksyms.c6
-rw-r--r--arch/x86/Kconfig.debug24
-rw-r--r--arch/x86/kernel/dumpstack.c6
-rw-r--r--arch/x86/kernel/ftrace.c11
-rw-r--r--arch/x86/kvm/Kconfig3
-rw-r--r--block/Kconfig8
-rw-r--r--block/blktrace.c729
-rw-r--r--drivers/char/sysrq.c2
-rw-r--r--fs/partitions/check.c4
-rw-r--r--include/linux/blktrace_api.h5
-rw-r--r--include/linux/ftrace.h25
-rw-r--r--include/linux/sched.h2
-rw-r--r--include/linux/slab_def.h68
-rw-r--r--include/linux/slob_def.h9
-rw-r--r--include/linux/slub_def.h53
-rw-r--r--include/trace/kmemtrace.h75
-rw-r--r--include/trace/workqueue.h25
-rw-r--r--init/main.c2
-rw-r--r--kernel/relay.c4
-rw-r--r--kernel/sched.c8
-rw-r--r--kernel/softirq.c13
-rw-r--r--kernel/trace/Kconfig62
-rw-r--r--kernel/trace/Makefile4
-rw-r--r--kernel/trace/ftrace.c23
-rw-r--r--kernel/trace/kmemtrace.c350
-rw-r--r--kernel/trace/ring_buffer.c55
-rw-r--r--kernel/trace/trace.c1075
-rw-r--r--kernel/trace/trace.h68
-rw-r--r--kernel/trace/trace_boot.c12
-rw-r--r--kernel/trace/trace_branch.c284
-rw-r--r--kernel/trace/trace_functions.c193
-rw-r--r--kernel/trace/trace_functions_graph.c314
-rw-r--r--kernel/trace/trace_hw_branches.c174
-rw-r--r--kernel/trace/trace_irqsoff.c34
-rw-r--r--kernel/trace/trace_mmiotrace.c31
-rw-r--r--kernel/trace/trace_nop.c6
-rw-r--r--kernel/trace/trace_output.c910
-rw-r--r--kernel/trace/trace_output.h62
-rw-r--r--kernel/trace/trace_power.c1
-rw-r--r--kernel/trace/trace_sched_wakeup.c83
-rw-r--r--kernel/trace/trace_selftest.c1
-rw-r--r--kernel/trace/trace_stat.c319
-rw-r--r--kernel/trace/trace_stat.h31
-rw-r--r--kernel/trace/trace_workqueue.c281
-rw-r--r--kernel/workqueue.c16
-rw-r--r--mm/slab.c71
-rw-r--r--mm/slob.c37
-rw-r--r--mm/slub.c83
-rw-r--r--scripts/Makefile.build13
-rwxr-xr-xscripts/recordmcount.pl37
62 files changed, 4895 insertions, 1454 deletions
diff --git a/Documentation/ABI/testing/debugfs-kmemtrace b/Documentation/ABI/testing/debugfs-kmemtrace
new file mode 100644
index 000000000000..5e6a92a02d85
--- /dev/null
+++ b/Documentation/ABI/testing/debugfs-kmemtrace
@@ -0,0 +1,71 @@
1What: /sys/kernel/debug/kmemtrace/
2Date: July 2008
3Contact: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
4Description:
5
6In kmemtrace-enabled kernels, the following files are created:
7
8/sys/kernel/debug/kmemtrace/
9 cpu<n> (0400) Per-CPU tracing data, see below. (binary)
10 total_overruns (0400) Total number of bytes which were dropped from
11 cpu<n> files because of full buffer condition,
12 non-binary. (text)
13 abi_version (0400) Kernel's kmemtrace ABI version. (text)
14
15Each per-CPU file should be read according to the relay interface. That is,
16the reader should set affinity to that specific CPU and, as currently done by
17the userspace application (though there are other methods), use poll() with
18an infinite timeout before every read(). Otherwise, erroneous data may be
19read. The binary data has the following _core_ format:
20
21 Event ID (1 byte) Unsigned integer, one of:
22 0 - represents an allocation (KMEMTRACE_EVENT_ALLOC)
23 1 - represents a freeing of previously allocated memory
24 (KMEMTRACE_EVENT_FREE)
25 Type ID (1 byte) Unsigned integer, one of:
26 0 - this is a kmalloc() / kfree()
27 1 - this is a kmem_cache_alloc() / kmem_cache_free()
28 2 - this is a __get_free_pages() et al.
29 Event size (2 bytes) Unsigned integer representing the
30 size of this event. Used to extend
31 kmemtrace. Discard the bytes you
32 don't know about.
33 Sequence number (4 bytes) Signed integer used to reorder data
34 logged on SMP machines. Wraparound
35 must be taken into account, although
36 it is unlikely.
37 Caller address (8 bytes) Return address to the caller.
38 Pointer to mem (8 bytes) Pointer to target memory area. Can be
39 NULL, but not all such calls might be
40 recorded.
41
42In case of KMEMTRACE_EVENT_ALLOC events, the next fields follow:
43
44 Requested bytes (8 bytes) Total number of requested bytes,
45 unsigned, must not be zero.
46 Allocated bytes (8 bytes) Total number of actually allocated
47 bytes, unsigned, must not be lower
48 than requested bytes.
49 Requested flags (4 bytes) GFP flags supplied by the caller.
50 Target CPU (4 bytes) Signed integer, valid for event id 1.
51 If equal to -1, target CPU is the same
52 as origin CPU, but the reverse might
53 not be true.
54
55The data is made available in the same endianness the machine has.
56
57Other event ids and type ids may be defined and added. Other fields may be
58added by increasing event size, but see below for details.
59Every modification to the ABI, including new id definitions, are followed
60by bumping the ABI version by one.
61
62Adding new data to the packet (features) is done at the end of the mandatory
63data:
64 Feature size (2 byte)
65 Feature ID (1 byte)
66 Feature data (Feature size - 3 bytes)
67
68
69Users:
70 kmemtrace-user - git://repo.or.cz/kmemtrace-user.git
71
diff --git a/Documentation/ftrace.txt b/Documentation/ftrace.txt
index 803b1318b13d..758fb42a1b68 100644
--- a/Documentation/ftrace.txt
+++ b/Documentation/ftrace.txt
@@ -165,6 +165,8 @@ Here is the list of current tracers that may be configured.
165 nop - This is not a tracer. To remove all tracers from tracing 165 nop - This is not a tracer. To remove all tracers from tracing
166 simply echo "nop" into current_tracer. 166 simply echo "nop" into current_tracer.
167 167
168 hw-branch-tracer - traces branches on all cpu's in a circular buffer.
169
168 170
169Examples of using the tracer 171Examples of using the tracer
170---------------------------- 172----------------------------
@@ -1152,6 +1154,78 @@ int main (int argc, char **argv)
1152 return 0; 1154 return 0;
1153} 1155}
1154 1156
1157
1158hw-branch-tracer (x86 only)
1159---------------------------
1160
1161This tracer uses the x86 last branch tracing hardware feature to
1162collect a branch trace on all cpus with relatively low overhead.
1163
1164The tracer uses a fixed-size circular buffer per cpu and only
1165traces ring 0 branches. The trace file dumps that buffer in the
1166following format:
1167
1168# tracer: hw-branch-tracer
1169#
1170# CPU# TO <- FROM
1171 0 scheduler_tick+0xb5/0x1bf <- task_tick_idle+0x5/0x6
1172 2 run_posix_cpu_timers+0x2b/0x72a <- run_posix_cpu_timers+0x25/0x72a
1173 0 scheduler_tick+0x139/0x1bf <- scheduler_tick+0xed/0x1bf
1174 0 scheduler_tick+0x17c/0x1bf <- scheduler_tick+0x148/0x1bf
1175 2 run_posix_cpu_timers+0x9e/0x72a <- run_posix_cpu_timers+0x5e/0x72a
1176 0 scheduler_tick+0x1b6/0x1bf <- scheduler_tick+0x1aa/0x1bf
1177
1178
1179The tracer may be used to dump the trace for the oops'ing cpu on a
1180kernel oops into the system log. To enable this, ftrace_dump_on_oops
1181must be set. To set ftrace_dump_on_oops, one can either use the sysctl
1182function or set it via the proc system interface.
1183
1184 sysctl kernel.ftrace_dump_on_oops=1
1185
1186or
1187
1188 echo 1 > /proc/sys/kernel/ftrace_dump_on_oops
1189
1190
1191Here's an example of such a dump after a null pointer dereference in a
1192kernel module:
1193
1194[57848.105921] BUG: unable to handle kernel NULL pointer dereference at 0000000000000000
1195[57848.106019] IP: [<ffffffffa0000006>] open+0x6/0x14 [oops]
1196[57848.106019] PGD 2354e9067 PUD 2375e7067 PMD 0
1197[57848.106019] Oops: 0002 [#1] SMP
1198[57848.106019] last sysfs file: /sys/devices/pci0000:00/0000:00:1e.0/0000:20:05.0/local_cpus
1199[57848.106019] Dumping ftrace buffer:
1200[57848.106019] ---------------------------------
1201[...]
1202[57848.106019] 0 chrdev_open+0xe6/0x165 <- cdev_put+0x23/0x24
1203[57848.106019] 0 chrdev_open+0x117/0x165 <- chrdev_open+0xfa/0x165
1204[57848.106019] 0 chrdev_open+0x120/0x165 <- chrdev_open+0x11c/0x165
1205[57848.106019] 0 chrdev_open+0x134/0x165 <- chrdev_open+0x12b/0x165
1206[57848.106019] 0 open+0x0/0x14 [oops] <- chrdev_open+0x144/0x165
1207[57848.106019] 0 page_fault+0x0/0x30 <- open+0x6/0x14 [oops]
1208[57848.106019] 0 error_entry+0x0/0x5b <- page_fault+0x4/0x30
1209[57848.106019] 0 error_kernelspace+0x0/0x31 <- error_entry+0x59/0x5b
1210[57848.106019] 0 error_sti+0x0/0x1 <- error_kernelspace+0x2d/0x31
1211[57848.106019] 0 page_fault+0x9/0x30 <- error_sti+0x0/0x1
1212[57848.106019] 0 do_page_fault+0x0/0x881 <- page_fault+0x1a/0x30
1213[...]
1214[57848.106019] 0 do_page_fault+0x66b/0x881 <- is_prefetch+0x1ee/0x1f2
1215[57848.106019] 0 do_page_fault+0x6e0/0x881 <- do_page_fault+0x67a/0x881
1216[57848.106019] 0 oops_begin+0x0/0x96 <- do_page_fault+0x6e0/0x881
1217[57848.106019] 0 trace_hw_branch_oops+0x0/0x2d <- oops_begin+0x9/0x96
1218[...]
1219[57848.106019] 0 ds_suspend_bts+0x2a/0xe3 <- ds_suspend_bts+0x1a/0xe3
1220[57848.106019] ---------------------------------
1221[57848.106019] CPU 0
1222[57848.106019] Modules linked in: oops
1223[57848.106019] Pid: 5542, comm: cat Tainted: G W 2.6.28 #23
1224[57848.106019] RIP: 0010:[<ffffffffa0000006>] [<ffffffffa0000006>] open+0x6/0x14 [oops]
1225[57848.106019] RSP: 0018:ffff880235457d48 EFLAGS: 00010246
1226[...]
1227
1228
1155dynamic ftrace 1229dynamic ftrace
1156-------------- 1230--------------
1157 1231
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index d8362cf9909e..6390ffb520fd 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -49,6 +49,7 @@ parameter is applicable:
49 ISAPNP ISA PnP code is enabled. 49 ISAPNP ISA PnP code is enabled.
50 ISDN Appropriate ISDN support is enabled. 50 ISDN Appropriate ISDN support is enabled.
51 JOY Appropriate joystick support is enabled. 51 JOY Appropriate joystick support is enabled.
52 KMEMTRACE kmemtrace is enabled.
52 LIBATA Libata driver is enabled 53 LIBATA Libata driver is enabled
53 LP Printer support is enabled. 54 LP Printer support is enabled.
54 LOOP Loopback device support is enabled. 55 LOOP Loopback device support is enabled.
@@ -1043,6 +1044,15 @@ and is between 256 and 4096 characters. It is defined in the file
1043 use the HighMem zone if it exists, and the Normal 1044 use the HighMem zone if it exists, and the Normal
1044 zone if it does not. 1045 zone if it does not.
1045 1046
1047 kmemtrace.enable= [KNL,KMEMTRACE] Format: { yes | no }
1048 Controls whether kmemtrace is enabled
1049 at boot-time.
1050
1051 kmemtrace.subbufs=n [KNL,KMEMTRACE] Overrides the number of
1052 subbufs kmemtrace's relay channel has. Set this
1053 higher than default (KMEMTRACE_N_SUBBUFS in code) if
1054 you experience buffer overruns.
1055
1046 movablecore=nn[KMG] [KNL,X86-32,IA-64,PPC,X86-64] This parameter 1056 movablecore=nn[KMG] [KNL,X86-32,IA-64,PPC,X86-64] This parameter
1047 is similar to kernelcore except it specifies the 1057 is similar to kernelcore except it specifies the
1048 amount of memory used for migratable allocations. 1058 amount of memory used for migratable allocations.
diff --git a/Documentation/sysrq.txt b/Documentation/sysrq.txt
index 9e592c718afb..535aeb936dbc 100644
--- a/Documentation/sysrq.txt
+++ b/Documentation/sysrq.txt
@@ -113,6 +113,8 @@ On all - write a character to /proc/sysrq-trigger. e.g.:
113 113
114'x' - Used by xmon interface on ppc/powerpc platforms. 114'x' - Used by xmon interface on ppc/powerpc platforms.
115 115
116'z' - Dump the ftrace buffer
117
116'0'-'9' - Sets the console log level, controlling which kernel messages 118'0'-'9' - Sets the console log level, controlling which kernel messages
117 will be printed to your console. ('0', for example would make 119 will be printed to your console. ('0', for example would make
118 it so that only emergency messages like PANICs or OOPSes would 120 it so that only emergency messages like PANICs or OOPSes would
diff --git a/Documentation/tracers/mmiotrace.txt b/Documentation/tracers/mmiotrace.txt
index cde23b4a12a1..5731c67abc55 100644
--- a/Documentation/tracers/mmiotrace.txt
+++ b/Documentation/tracers/mmiotrace.txt
@@ -78,12 +78,10 @@ to view your kernel log and look for "mmiotrace has lost events" warning. If
78events were lost, the trace is incomplete. You should enlarge the buffers and 78events were lost, the trace is incomplete. You should enlarge the buffers and
79try again. Buffers are enlarged by first seeing how large the current buffers 79try again. Buffers are enlarged by first seeing how large the current buffers
80are: 80are:
81$ cat /debug/tracing/trace_entries 81$ cat /debug/tracing/buffer_size_kb
82gives you a number. Approximately double this number and write it back, for 82gives you a number. Approximately double this number and write it back, for
83instance: 83instance:
84$ echo 0 > /debug/tracing/tracing_enabled 84$ echo 128000 > /debug/tracing/buffer_size_kb
85$ echo 128000 > /debug/tracing/trace_entries
86$ echo 1 > /debug/tracing/tracing_enabled
87Then start again from the top. 85Then start again from the top.
88 86
89If you are doing a trace for a driver project, e.g. Nouveau, you should also 87If you are doing a trace for a driver project, e.g. Nouveau, you should also
diff --git a/Documentation/vm/kmemtrace.txt b/Documentation/vm/kmemtrace.txt
new file mode 100644
index 000000000000..a956d9b7f943
--- /dev/null
+++ b/Documentation/vm/kmemtrace.txt
@@ -0,0 +1,126 @@
1 kmemtrace - Kernel Memory Tracer
2
3 by Eduard - Gabriel Munteanu
4 <eduard.munteanu@linux360.ro>
5
6I. Introduction
7===============
8
9kmemtrace helps kernel developers figure out two things:
101) how different allocators (SLAB, SLUB etc.) perform
112) how kernel code allocates memory and how much
12
13To do this, we trace every allocation and export information to the userspace
14through the relay interface. We export things such as the number of requested
15bytes, the number of bytes actually allocated (i.e. including internal
16fragmentation), whether this is a slab allocation or a plain kmalloc() and so
17on.
18
19The actual analysis is performed by a userspace tool (see section III for
20details on where to get it from). It logs the data exported by the kernel,
21processes it and (as of writing this) can provide the following information:
22- the total amount of memory allocated and fragmentation per call-site
23- the amount of memory allocated and fragmentation per allocation
24- total memory allocated and fragmentation in the collected dataset
25- number of cross-CPU allocation and frees (makes sense in NUMA environments)
26
27Moreover, it can potentially find inconsistent and erroneous behavior in
28kernel code, such as using slab free functions on kmalloc'ed memory or
29allocating less memory than requested (but not truly failed allocations).
30
31kmemtrace also makes provisions for tracing on some arch and analysing the
32data on another.
33
34II. Design and goals
35====================
36
37kmemtrace was designed to handle rather large amounts of data. Thus, it uses
38the relay interface to export whatever is logged to userspace, which then
39stores it. Analysis and reporting is done asynchronously, that is, after the
40data is collected and stored. By design, it allows one to log and analyse
41on different machines and different arches.
42
43As of writing this, the ABI is not considered stable, though it might not
44change much. However, no guarantees are made about compatibility yet. When
45deemed stable, the ABI should still allow easy extension while maintaining
46backward compatibility. This is described further in Documentation/ABI.
47
48Summary of design goals:
49 - allow logging and analysis to be done across different machines
50 - be fast and anticipate usage in high-load environments (*)
51 - be reasonably extensible
52 - make it possible for GNU/Linux distributions to have kmemtrace
53 included in their repositories
54
55(*) - one of the reasons Pekka Enberg's original userspace data analysis
56 tool's code was rewritten from Perl to C (although this is more than a
57 simple conversion)
58
59
60III. Quick usage guide
61======================
62
631) Get a kernel that supports kmemtrace and build it accordingly (i.e. enable
64CONFIG_KMEMTRACE).
65
662) Get the userspace tool and build it:
67$ git-clone git://repo.or.cz/kmemtrace-user.git # current repository
68$ cd kmemtrace-user/
69$ ./autogen.sh
70$ ./configure
71$ make
72
733) Boot the kmemtrace-enabled kernel if you haven't, preferably in the
74'single' runlevel (so that relay buffers don't fill up easily), and run
75kmemtrace:
76# '$' does not mean user, but root here.
77$ mount -t debugfs none /sys/kernel/debug
78$ mount -t proc none /proc
79$ cd path/to/kmemtrace-user/
80$ ./kmemtraced
81Wait a bit, then stop it with CTRL+C.
82$ cat /sys/kernel/debug/kmemtrace/total_overruns # Check if we didn't
83 # overrun, should
84 # be zero.
85$ (Optionally) [Run kmemtrace_check separately on each cpu[0-9]*.out file to
86 check its correctness]
87$ ./kmemtrace-report
88
89Now you should have a nice and short summary of how the allocator performs.
90
91IV. FAQ and known issues
92========================
93
94Q: 'cat /sys/kernel/debug/kmemtrace/total_overruns' is non-zero, how do I fix
95this? Should I worry?
96A: If it's non-zero, this affects kmemtrace's accuracy, depending on how
97large the number is. You can fix it by supplying a higher
98'kmemtrace.subbufs=N' kernel parameter.
99---
100
101Q: kmemtrace_check reports errors, how do I fix this? Should I worry?
102A: This is a bug and should be reported. It can occur for a variety of
103reasons:
104 - possible bugs in relay code
105 - possible misuse of relay by kmemtrace
106 - timestamps being collected unorderly
107Or you may fix it yourself and send us a patch.
108---
109
110Q: kmemtrace_report shows many errors, how do I fix this? Should I worry?
111A: This is a known issue and I'm working on it. These might be true errors
112in kernel code, which may have inconsistent behavior (e.g. allocating memory
113with kmem_cache_alloc() and freeing it with kfree()). Pekka Enberg pointed
114out this behavior may work with SLAB, but may fail with other allocators.
115
116It may also be due to lack of tracing in some unusual allocator functions.
117
118We don't want bug reports regarding this issue yet.
119---
120
121V. See also
122===========
123
124Documentation/kernel-parameters.txt
125Documentation/ABI/testing/debugfs-kmemtrace
126
diff --git a/MAINTAINERS b/MAINTAINERS
index 5bff376d297c..829a697f1235 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2606,6 +2606,12 @@ M: jason.wessel@windriver.com
2606L: kgdb-bugreport@lists.sourceforge.net 2606L: kgdb-bugreport@lists.sourceforge.net
2607S: Maintained 2607S: Maintained
2608 2608
2609KMEMTRACE
2610P: Eduard - Gabriel Munteanu
2611M: eduard.munteanu@linux360.ro
2612L: linux-kernel@vger.kernel.org
2613S: Maintained
2614
2609KPROBES 2615KPROBES
2610P: Ananth N Mavinakayanahalli 2616P: Ananth N Mavinakayanahalli
2611M: ananth@in.ibm.com 2617M: ananth@in.ibm.com
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 6183aeccecf1..8b6a8a554afa 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -22,6 +22,9 @@ config IA64
22 select HAVE_OPROFILE 22 select HAVE_OPROFILE
23 select HAVE_KPROBES 23 select HAVE_KPROBES
24 select HAVE_KRETPROBES 24 select HAVE_KRETPROBES
25 select HAVE_FTRACE_MCOUNT_RECORD
26 select HAVE_DYNAMIC_FTRACE if (!ITANIUM)
27 select HAVE_FUNCTION_TRACER
25 select HAVE_DMA_ATTRS 28 select HAVE_DMA_ATTRS
26 select HAVE_KVM 29 select HAVE_KVM
27 select HAVE_ARCH_TRACEHOOK 30 select HAVE_ARCH_TRACEHOOK
diff --git a/arch/ia64/include/asm/ftrace.h b/arch/ia64/include/asm/ftrace.h
new file mode 100644
index 000000000000..d20db3c2a656
--- /dev/null
+++ b/arch/ia64/include/asm/ftrace.h
@@ -0,0 +1,28 @@
1#ifndef _ASM_IA64_FTRACE_H
2#define _ASM_IA64_FTRACE_H
3
4#ifdef CONFIG_FUNCTION_TRACER
5#define MCOUNT_INSN_SIZE 32 /* sizeof mcount call */
6
7#ifndef __ASSEMBLY__
8extern void _mcount(unsigned long pfs, unsigned long r1, unsigned long b0, unsigned long r0);
9#define mcount _mcount
10
11#include <asm/kprobes.h>
12/* In IA64, MCOUNT_ADDR is set in link time, so it's not a constant at compile time */
13#define MCOUNT_ADDR (((struct fnptr *)mcount)->ip)
14#define FTRACE_ADDR (((struct fnptr *)ftrace_caller)->ip)
15
16static inline unsigned long ftrace_call_adjust(unsigned long addr)
17{
18 /* second bundle, insn 2 */
19 return addr - 0x12;
20}
21
22struct dyn_arch_ftrace {
23};
24#endif
25
26#endif /* CONFIG_FUNCTION_TRACER */
27
28#endif /* _ASM_IA64_FTRACE_H */
diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile
index c381ea954892..ab6e7ec0bba3 100644
--- a/arch/ia64/kernel/Makefile
+++ b/arch/ia64/kernel/Makefile
@@ -2,6 +2,10 @@
2# Makefile for the linux kernel. 2# Makefile for the linux kernel.
3# 3#
4 4
5ifdef CONFIG_DYNAMIC_FTRACE
6CFLAGS_REMOVE_ftrace.o = -pg
7endif
8
5extra-y := head.o init_task.o vmlinux.lds 9extra-y := head.o init_task.o vmlinux.lds
6 10
7obj-y := acpi.o entry.o efi.o efi_stub.o gate-data.o fsys.o ia64_ksyms.o irq.o irq_ia64.o \ 11obj-y := acpi.o entry.o efi.o efi_stub.o gate-data.o fsys.o ia64_ksyms.o irq.o irq_ia64.o \
@@ -28,6 +32,7 @@ obj-$(CONFIG_IA64_CYCLONE) += cyclone.o
28obj-$(CONFIG_CPU_FREQ) += cpufreq/ 32obj-$(CONFIG_CPU_FREQ) += cpufreq/
29obj-$(CONFIG_IA64_MCA_RECOVERY) += mca_recovery.o 33obj-$(CONFIG_IA64_MCA_RECOVERY) += mca_recovery.o
30obj-$(CONFIG_KPROBES) += kprobes.o jprobes.o 34obj-$(CONFIG_KPROBES) += kprobes.o jprobes.o
35obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
31obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o 36obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o
32obj-$(CONFIG_CRASH_DUMP) += crash_dump.o 37obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
33obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR) += uncached.o 38obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR) += uncached.o
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index e5341e2c1175..7e3382b06d56 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -47,6 +47,7 @@
47#include <asm/processor.h> 47#include <asm/processor.h>
48#include <asm/thread_info.h> 48#include <asm/thread_info.h>
49#include <asm/unistd.h> 49#include <asm/unistd.h>
50#include <asm/ftrace.h>
50 51
51#include "minstate.h" 52#include "minstate.h"
52 53
@@ -1404,6 +1405,105 @@ GLOBAL_ENTRY(unw_init_running)
1404 br.ret.sptk.many rp 1405 br.ret.sptk.many rp
1405END(unw_init_running) 1406END(unw_init_running)
1406 1407
1408#ifdef CONFIG_FUNCTION_TRACER
1409#ifdef CONFIG_DYNAMIC_FTRACE
1410GLOBAL_ENTRY(_mcount)
1411 br ftrace_stub
1412END(_mcount)
1413
1414.here:
1415 br.ret.sptk.many b0
1416
1417GLOBAL_ENTRY(ftrace_caller)
1418 alloc out0 = ar.pfs, 8, 0, 4, 0
1419 mov out3 = r0
1420 ;;
1421 mov out2 = b0
1422 add r3 = 0x20, r3
1423 mov out1 = r1;
1424 br.call.sptk.many b0 = ftrace_patch_gp
1425 //this might be called from module, so we must patch gp
1426ftrace_patch_gp:
1427 movl gp=__gp
1428 mov b0 = r3
1429 ;;
1430.global ftrace_call;
1431ftrace_call:
1432{
1433 .mlx
1434 nop.m 0x0
1435 movl r3 = .here;;
1436}
1437 alloc loc0 = ar.pfs, 4, 4, 2, 0
1438 ;;
1439 mov loc1 = b0
1440 mov out0 = b0
1441 mov loc2 = r8
1442 mov loc3 = r15
1443 ;;
1444 adds out0 = -MCOUNT_INSN_SIZE, out0
1445 mov out1 = in2
1446 mov b6 = r3
1447
1448 br.call.sptk.many b0 = b6
1449 ;;
1450 mov ar.pfs = loc0
1451 mov b0 = loc1
1452 mov r8 = loc2
1453 mov r15 = loc3
1454 br ftrace_stub
1455 ;;
1456END(ftrace_caller)
1457
1458#else
1459GLOBAL_ENTRY(_mcount)
1460 movl r2 = ftrace_stub
1461 movl r3 = ftrace_trace_function;;
1462 ld8 r3 = [r3];;
1463 ld8 r3 = [r3];;
1464 cmp.eq p7,p0 = r2, r3
1465(p7) br.sptk.many ftrace_stub
1466 ;;
1467
1468 alloc loc0 = ar.pfs, 4, 4, 2, 0
1469 ;;
1470 mov loc1 = b0
1471 mov out0 = b0
1472 mov loc2 = r8
1473 mov loc3 = r15
1474 ;;
1475 adds out0 = -MCOUNT_INSN_SIZE, out0
1476 mov out1 = in2
1477 mov b6 = r3
1478
1479 br.call.sptk.many b0 = b6
1480 ;;
1481 mov ar.pfs = loc0
1482 mov b0 = loc1
1483 mov r8 = loc2
1484 mov r15 = loc3
1485 br ftrace_stub
1486 ;;
1487END(_mcount)
1488#endif
1489
1490GLOBAL_ENTRY(ftrace_stub)
1491 mov r3 = b0
1492 movl r2 = _mcount_ret_helper
1493 ;;
1494 mov b6 = r2
1495 mov b7 = r3
1496 br.ret.sptk.many b6
1497
1498_mcount_ret_helper:
1499 mov b0 = r42
1500 mov r1 = r41
1501 mov ar.pfs = r40
1502 br b7
1503END(ftrace_stub)
1504
1505#endif /* CONFIG_FUNCTION_TRACER */
1506
1407 .rodata 1507 .rodata
1408 .align 8 1508 .align 8
1409 .globl sys_call_table 1509 .globl sys_call_table
diff --git a/arch/ia64/kernel/ftrace.c b/arch/ia64/kernel/ftrace.c
new file mode 100644
index 000000000000..7fc8c961b1f7
--- /dev/null
+++ b/arch/ia64/kernel/ftrace.c
@@ -0,0 +1,206 @@
1/*
2 * Dynamic function tracing support.
3 *
4 * Copyright (C) 2008 Shaohua Li <shaohua.li@intel.com>
5 *
6 * For licencing details, see COPYING.
7 *
8 * Defines low-level handling of mcount calls when the kernel
9 * is compiled with the -pg flag. When using dynamic ftrace, the
10 * mcount call-sites get patched lazily with NOP till they are
11 * enabled. All code mutation routines here take effect atomically.
12 */
13
14#include <linux/uaccess.h>
15#include <linux/ftrace.h>
16
17#include <asm/cacheflush.h>
18#include <asm/patch.h>
19
20/* In IA64, each function will be added below two bundles with -pg option */
21static unsigned char __attribute__((aligned(8)))
22ftrace_orig_code[MCOUNT_INSN_SIZE] = {
23 0x02, 0x40, 0x31, 0x10, 0x80, 0x05, /* alloc r40=ar.pfs,12,8,0 */
24 0xb0, 0x02, 0x00, 0x00, 0x42, 0x40, /* mov r43=r0;; */
25 0x05, 0x00, 0xc4, 0x00, /* mov r42=b0 */
26 0x11, 0x48, 0x01, 0x02, 0x00, 0x21, /* mov r41=r1 */
27 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, /* nop.i 0x0 */
28 0x08, 0x00, 0x00, 0x50 /* br.call.sptk.many b0 = _mcount;; */
29};
30
31struct ftrace_orig_insn {
32 u64 dummy1, dummy2, dummy3;
33 u64 dummy4:64-41+13;
34 u64 imm20:20;
35 u64 dummy5:3;
36 u64 sign:1;
37 u64 dummy6:4;
38};
39
40/* mcount stub will be converted below for nop */
41static unsigned char ftrace_nop_code[MCOUNT_INSN_SIZE] = {
42 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MII] nop.m 0x0 */
43 0x30, 0x00, 0x00, 0x60, 0x00, 0x00, /* mov r3=ip */
44 0x00, 0x00, 0x04, 0x00, /* nop.i 0x0 */
45 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0x0 */
46 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* nop.x 0x0;; */
47 0x00, 0x00, 0x04, 0x00
48};
49
50static unsigned char *ftrace_nop_replace(void)
51{
52 return ftrace_nop_code;
53}
54
55/*
56 * mcount stub will be converted below for call
57 * Note: Just the last instruction is changed against nop
58 * */
59static unsigned char __attribute__((aligned(8)))
60ftrace_call_code[MCOUNT_INSN_SIZE] = {
61 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MII] nop.m 0x0 */
62 0x30, 0x00, 0x00, 0x60, 0x00, 0x00, /* mov r3=ip */
63 0x00, 0x00, 0x04, 0x00, /* nop.i 0x0 */
64 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0x0 */
65 0xff, 0xff, 0xff, 0xff, 0x7f, 0x00, /* brl.many .;;*/
66 0xf8, 0xff, 0xff, 0xc8
67};
68
69struct ftrace_call_insn {
70 u64 dummy1, dummy2;
71 u64 dummy3:48;
72 u64 imm39_l:16;
73 u64 imm39_h:23;
74 u64 dummy4:13;
75 u64 imm20:20;
76 u64 dummy5:3;
77 u64 i:1;
78 u64 dummy6:4;
79};
80
81static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
82{
83 struct ftrace_call_insn *code = (void *)ftrace_call_code;
84 unsigned long offset = addr - (ip + 0x10);
85
86 code->imm39_l = offset >> 24;
87 code->imm39_h = offset >> 40;
88 code->imm20 = offset >> 4;
89 code->i = offset >> 63;
90 return ftrace_call_code;
91}
92
93static int
94ftrace_modify_code(unsigned long ip, unsigned char *old_code,
95 unsigned char *new_code, int do_check)
96{
97 unsigned char replaced[MCOUNT_INSN_SIZE];
98
99 /*
100 * Note: Due to modules and __init, code can
101 * disappear and change, we need to protect against faulting
102 * as well as code changing. We do this by using the
103 * probe_kernel_* functions.
104 *
105 * No real locking needed, this code is run through
106 * kstop_machine, or before SMP starts.
107 */
108
109 if (!do_check)
110 goto skip_check;
111
112 /* read the text we want to modify */
113 if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
114 return -EFAULT;
115
116 /* Make sure it is what we expect it to be */
117 if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0)
118 return -EINVAL;
119
120skip_check:
121 /* replace the text with the new text */
122 if (probe_kernel_write(((void *)ip), new_code, MCOUNT_INSN_SIZE))
123 return -EPERM;
124 flush_icache_range(ip, ip + MCOUNT_INSN_SIZE);
125
126 return 0;
127}
128
129static int ftrace_make_nop_check(struct dyn_ftrace *rec, unsigned long addr)
130{
131 unsigned char __attribute__((aligned(8))) replaced[MCOUNT_INSN_SIZE];
132 unsigned long ip = rec->ip;
133
134 if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
135 return -EFAULT;
136 if (rec->flags & FTRACE_FL_CONVERTED) {
137 struct ftrace_call_insn *call_insn, *tmp_call;
138
139 call_insn = (void *)ftrace_call_code;
140 tmp_call = (void *)replaced;
141 call_insn->imm39_l = tmp_call->imm39_l;
142 call_insn->imm39_h = tmp_call->imm39_h;
143 call_insn->imm20 = tmp_call->imm20;
144 call_insn->i = tmp_call->i;
145 if (memcmp(replaced, ftrace_call_code, MCOUNT_INSN_SIZE) != 0)
146 return -EINVAL;
147 return 0;
148 } else {
149 struct ftrace_orig_insn *call_insn, *tmp_call;
150
151 call_insn = (void *)ftrace_orig_code;
152 tmp_call = (void *)replaced;
153 call_insn->sign = tmp_call->sign;
154 call_insn->imm20 = tmp_call->imm20;
155 if (memcmp(replaced, ftrace_orig_code, MCOUNT_INSN_SIZE) != 0)
156 return -EINVAL;
157 return 0;
158 }
159}
160
161int ftrace_make_nop(struct module *mod,
162 struct dyn_ftrace *rec, unsigned long addr)
163{
164 int ret;
165 char *new;
166
167 ret = ftrace_make_nop_check(rec, addr);
168 if (ret)
169 return ret;
170 new = ftrace_nop_replace();
171 return ftrace_modify_code(rec->ip, NULL, new, 0);
172}
173
174int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
175{
176 unsigned long ip = rec->ip;
177 unsigned char *old, *new;
178
179 old= ftrace_nop_replace();
180 new = ftrace_call_replace(ip, addr);
181 return ftrace_modify_code(ip, old, new, 1);
182}
183
184/* in IA64, _mcount can't directly call ftrace_stub. Only jump is ok */
185int ftrace_update_ftrace_func(ftrace_func_t func)
186{
187 unsigned long ip;
188 unsigned long addr = ((struct fnptr *)ftrace_call)->ip;
189
190 if (func == ftrace_stub)
191 return 0;
192 ip = ((struct fnptr *)func)->ip;
193
194 ia64_patch_imm64(addr + 2, ip);
195
196 flush_icache_range(addr, addr + 16);
197 return 0;
198}
199
200/* run from kstop_machine */
201int __init ftrace_dyn_arch_init(void *data)
202{
203 *(unsigned long *)data = 0;
204
205 return 0;
206}
diff --git a/arch/ia64/kernel/ia64_ksyms.c b/arch/ia64/kernel/ia64_ksyms.c
index 6da1f20d7372..2d311864e359 100644
--- a/arch/ia64/kernel/ia64_ksyms.c
+++ b/arch/ia64/kernel/ia64_ksyms.c
@@ -112,3 +112,9 @@ EXPORT_SYMBOL_GPL(esi_call_phys);
112#endif 112#endif
113extern char ia64_ivt[]; 113extern char ia64_ivt[];
114EXPORT_SYMBOL(ia64_ivt); 114EXPORT_SYMBOL(ia64_ivt);
115
116#include <asm/ftrace.h>
117#ifdef CONFIG_FUNCTION_TRACER
118/* mcount is defined in assembly */
119EXPORT_SYMBOL(_mcount);
120#endif
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 10d6cc3fd052..e1983fa025d2 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -174,28 +174,8 @@ config IOMMU_LEAK
174 Add a simple leak tracer to the IOMMU code. This is useful when you 174 Add a simple leak tracer to the IOMMU code. This is useful when you
175 are debugging a buggy device driver that leaks IOMMU mappings. 175 are debugging a buggy device driver that leaks IOMMU mappings.
176 176
177config MMIOTRACE 177config HAVE_MMIOTRACE_SUPPORT
178 bool "Memory mapped IO tracing" 178 def_bool y
179 depends on DEBUG_KERNEL && PCI
180 select TRACING
181 help
182 Mmiotrace traces Memory Mapped I/O access and is meant for
183 debugging and reverse engineering. It is called from the ioremap
184 implementation and works via page faults. Tracing is disabled by
185 default and can be enabled at run-time.
186
187 See Documentation/tracers/mmiotrace.txt.
188 If you are not helping to develop drivers, say N.
189
190config MMIOTRACE_TEST
191 tristate "Test module for mmiotrace"
192 depends on MMIOTRACE && m
193 help
194 This is a dumb module for testing mmiotrace. It is very dangerous
195 as it will write garbage to IO memory starting at a given address.
196 However, it should be safe to use on e.g. unused portion of VRAM.
197
198 Say N, unless you absolutely know what you are doing.
199 179
200# 180#
201# IO delay types: 181# IO delay types:
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 6b1f6f6f8661..077c9ea655fc 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -14,6 +14,7 @@
14#include <linux/bug.h> 14#include <linux/bug.h>
15#include <linux/nmi.h> 15#include <linux/nmi.h>
16#include <linux/sysfs.h> 16#include <linux/sysfs.h>
17#include <linux/ftrace.h>
17 18
18#include <asm/stacktrace.h> 19#include <asm/stacktrace.h>
19 20
@@ -195,6 +196,11 @@ unsigned __kprobes long oops_begin(void)
195 int cpu; 196 int cpu;
196 unsigned long flags; 197 unsigned long flags;
197 198
199 /* notify the hw-branch tracer so it may disable tracing and
200 add the last trace to the trace buffer -
201 the earlier this happens, the more useful the trace. */
202 trace_hw_branch_oops();
203
198 oops_enter(); 204 oops_enter();
199 205
200 /* racy, but better than risking deadlock. */ 206 /* racy, but better than risking deadlock. */
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 1b43086b097a..4d33224c055f 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -133,15 +133,14 @@ void ftrace_nmi_exit(void)
133 133
134static void wait_for_nmi(void) 134static void wait_for_nmi(void)
135{ 135{
136 int waited = 0; 136 if (!atomic_read(&in_nmi))
137 return;
137 138
138 while (atomic_read(&in_nmi)) { 139 do {
139 waited = 1;
140 cpu_relax(); 140 cpu_relax();
141 } 141 } while(atomic_read(&in_nmi));
142 142
143 if (waited) 143 nmi_wait_count++;
144 nmi_wait_count++;
145} 144}
146 145
147static int 146static int
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index b81125f0bdee..c7da3683f4c5 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -55,7 +55,8 @@ config KVM_AMD
55 55
56config KVM_TRACE 56config KVM_TRACE
57 bool "KVM trace support" 57 bool "KVM trace support"
58 depends on KVM && MARKERS && SYSFS 58 depends on KVM && SYSFS
59 select MARKERS
59 select RELAY 60 select RELAY
60 select DEBUG_FS 61 select DEBUG_FS
61 default n 62 default n
diff --git a/block/Kconfig b/block/Kconfig
index 0cbb3b88b59a..7cdaa1d72252 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -50,6 +50,8 @@ config BLK_DEV_IO_TRACE
50 select RELAY 50 select RELAY
51 select DEBUG_FS 51 select DEBUG_FS
52 select TRACEPOINTS 52 select TRACEPOINTS
53 select TRACING
54 select STACKTRACE
53 help 55 help
54 Say Y here if you want to be able to trace the block layer actions 56 Say Y here if you want to be able to trace the block layer actions
55 on a given queue. Tracing allows you to see any traffic happening 57 on a given queue. Tracing allows you to see any traffic happening
@@ -58,6 +60,12 @@ config BLK_DEV_IO_TRACE
58 60
59 git://git.kernel.dk/blktrace.git 61 git://git.kernel.dk/blktrace.git
60 62
63 Tracing also is possible using the ftrace interface, e.g.:
64
65 echo 1 > /sys/block/sda/sda1/trace/enable
66 echo blk > /sys/kernel/debug/tracing/current_tracer
67 cat /sys/kernel/debug/tracing/trace_pipe
68
61 If unsure, say N. 69 If unsure, say N.
62 70
63config BLK_DEV_BSG 71config BLK_DEV_BSG
diff --git a/block/blktrace.c b/block/blktrace.c
index 39cc3bfe56e4..c7698d1617a1 100644
--- a/block/blktrace.c
+++ b/block/blktrace.c
@@ -24,10 +24,28 @@
24#include <linux/debugfs.h> 24#include <linux/debugfs.h>
25#include <linux/time.h> 25#include <linux/time.h>
26#include <trace/block.h> 26#include <trace/block.h>
27#include <asm/uaccess.h> 27#include <linux/uaccess.h>
28#include <../kernel/trace/trace_output.h>
28 29
29static unsigned int blktrace_seq __read_mostly = 1; 30static unsigned int blktrace_seq __read_mostly = 1;
30 31
32static struct trace_array *blk_tr;
33static int __read_mostly blk_tracer_enabled;
34
35/* Select an alternative, minimalistic output than the original one */
36#define TRACE_BLK_OPT_CLASSIC 0x1
37
38static struct tracer_opt blk_tracer_opts[] = {
39 /* Default disable the minimalistic output */
40 { TRACER_OPT(blk_classic, TRACE_BLK_OPT_CLASSIC) },
41 { }
42};
43
44static struct tracer_flags blk_tracer_flags = {
45 .val = 0,
46 .opts = blk_tracer_opts,
47};
48
31/* Global reference count of probes */ 49/* Global reference count of probes */
32static DEFINE_MUTEX(blk_probe_mutex); 50static DEFINE_MUTEX(blk_probe_mutex);
33static atomic_t blk_probes_ref = ATOMIC_INIT(0); 51static atomic_t blk_probes_ref = ATOMIC_INIT(0);
@@ -43,6 +61,9 @@ static void trace_note(struct blk_trace *bt, pid_t pid, int action,
43{ 61{
44 struct blk_io_trace *t; 62 struct blk_io_trace *t;
45 63
64 if (!bt->rchan)
65 return;
66
46 t = relay_reserve(bt->rchan, sizeof(*t) + len); 67 t = relay_reserve(bt->rchan, sizeof(*t) + len);
47 if (t) { 68 if (t) {
48 const int cpu = smp_processor_id(); 69 const int cpu = smp_processor_id();
@@ -90,6 +111,16 @@ void __trace_note_message(struct blk_trace *bt, const char *fmt, ...)
90 unsigned long flags; 111 unsigned long flags;
91 char *buf; 112 char *buf;
92 113
114 if (blk_tr) {
115 va_start(args, fmt);
116 ftrace_vprintk(fmt, args);
117 va_end(args);
118 return;
119 }
120
121 if (!bt->msg_data)
122 return;
123
93 local_irq_save(flags); 124 local_irq_save(flags);
94 buf = per_cpu_ptr(bt->msg_data, smp_processor_id()); 125 buf = per_cpu_ptr(bt->msg_data, smp_processor_id());
95 va_start(args, fmt); 126 va_start(args, fmt);
@@ -117,11 +148,12 @@ static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector,
117/* 148/*
118 * Data direction bit lookup 149 * Data direction bit lookup
119 */ 150 */
120static u32 ddir_act[2] __read_mostly = { BLK_TC_ACT(BLK_TC_READ), BLK_TC_ACT(BLK_TC_WRITE) }; 151static u32 ddir_act[2] __read_mostly = { BLK_TC_ACT(BLK_TC_READ),
152 BLK_TC_ACT(BLK_TC_WRITE) };
121 153
122/* The ilog2() calls fall out because they're constant */ 154/* The ilog2() calls fall out because they're constant */
123#define MASK_TC_BIT(rw, __name) ( (rw & (1 << BIO_RW_ ## __name)) << \ 155#define MASK_TC_BIT(rw, __name) ((rw & (1 << BIO_RW_ ## __name)) << \
124 (ilog2(BLK_TC_ ## __name) + BLK_TC_SHIFT - BIO_RW_ ## __name) ) 156 (ilog2(BLK_TC_ ## __name) + BLK_TC_SHIFT - BIO_RW_ ## __name))
125 157
126/* 158/*
127 * The worker for the various blk_add_trace*() types. Fills out a 159 * The worker for the various blk_add_trace*() types. Fills out a
@@ -131,13 +163,15 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
131 int rw, u32 what, int error, int pdu_len, void *pdu_data) 163 int rw, u32 what, int error, int pdu_len, void *pdu_data)
132{ 164{
133 struct task_struct *tsk = current; 165 struct task_struct *tsk = current;
166 struct ring_buffer_event *event = NULL;
134 struct blk_io_trace *t; 167 struct blk_io_trace *t;
135 unsigned long flags; 168 unsigned long flags;
136 unsigned long *sequence; 169 unsigned long *sequence;
137 pid_t pid; 170 pid_t pid;
138 int cpu; 171 int cpu, pc = 0;
139 172
140 if (unlikely(bt->trace_state != Blktrace_running)) 173 if (unlikely(bt->trace_state != Blktrace_running ||
174 !blk_tracer_enabled))
141 return; 175 return;
142 176
143 what |= ddir_act[rw & WRITE]; 177 what |= ddir_act[rw & WRITE];
@@ -150,6 +184,24 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
150 pid = tsk->pid; 184 pid = tsk->pid;
151 if (unlikely(act_log_check(bt, what, sector, pid))) 185 if (unlikely(act_log_check(bt, what, sector, pid)))
152 return; 186 return;
187 cpu = raw_smp_processor_id();
188
189 if (blk_tr) {
190 struct trace_entry *ent;
191 tracing_record_cmdline(current);
192
193 event = ring_buffer_lock_reserve(blk_tr->buffer,
194 sizeof(*t) + pdu_len, &flags);
195 if (!event)
196 return;
197
198 ent = ring_buffer_event_data(event);
199 t = (struct blk_io_trace *)ent;
200 pc = preempt_count();
201 tracing_generic_entry_update(ent, 0, pc);
202 ent->type = TRACE_BLK;
203 goto record_it;
204 }
153 205
154 /* 206 /*
155 * A word about the locking here - we disable interrupts to reserve 207 * A word about the locking here - we disable interrupts to reserve
@@ -163,23 +215,40 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
163 215
164 t = relay_reserve(bt->rchan, sizeof(*t) + pdu_len); 216 t = relay_reserve(bt->rchan, sizeof(*t) + pdu_len);
165 if (t) { 217 if (t) {
166 cpu = smp_processor_id();
167 sequence = per_cpu_ptr(bt->sequence, cpu); 218 sequence = per_cpu_ptr(bt->sequence, cpu);
168 219
169 t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION; 220 t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION;
170 t->sequence = ++(*sequence); 221 t->sequence = ++(*sequence);
171 t->time = ktime_to_ns(ktime_get()); 222 t->time = ktime_to_ns(ktime_get());
223record_it:
224 /*
225 * These two are not needed in ftrace as they are in the
226 * generic trace_entry, filled by tracing_generic_entry_update,
227 * but for the trace_event->bin() synthesizer benefit we do it
228 * here too.
229 */
230 t->cpu = cpu;
231 t->pid = pid;
232
172 t->sector = sector; 233 t->sector = sector;
173 t->bytes = bytes; 234 t->bytes = bytes;
174 t->action = what; 235 t->action = what;
175 t->pid = pid;
176 t->device = bt->dev; 236 t->device = bt->dev;
177 t->cpu = cpu;
178 t->error = error; 237 t->error = error;
179 t->pdu_len = pdu_len; 238 t->pdu_len = pdu_len;
180 239
181 if (pdu_len) 240 if (pdu_len)
182 memcpy((void *) t + sizeof(*t), pdu_data, pdu_len); 241 memcpy((void *) t + sizeof(*t), pdu_data, pdu_len);
242
243 if (blk_tr) {
244 ring_buffer_unlock_commit(blk_tr->buffer, event, flags);
245 if (pid != 0 &&
246 !(blk_tracer_flags.val & TRACE_BLK_OPT_CLASSIC) &&
247 (trace_flags & TRACE_ITER_STACKTRACE) != 0)
248 __trace_stack(blk_tr, NULL, flags, 5, pc);
249 trace_wake_up();
250 return;
251 }
183 } 252 }
184 253
185 local_irq_restore(flags); 254 local_irq_restore(flags);
@@ -385,7 +454,8 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
385 atomic_set(&bt->dropped, 0); 454 atomic_set(&bt->dropped, 0);
386 455
387 ret = -EIO; 456 ret = -EIO;
388 bt->dropped_file = debugfs_create_file("dropped", 0444, dir, bt, &blk_dropped_fops); 457 bt->dropped_file = debugfs_create_file("dropped", 0444, dir, bt,
458 &blk_dropped_fops);
389 if (!bt->dropped_file) 459 if (!bt->dropped_file)
390 goto err; 460 goto err;
391 461
@@ -467,10 +537,10 @@ EXPORT_SYMBOL_GPL(blk_trace_setup);
467 537
468int blk_trace_startstop(struct request_queue *q, int start) 538int blk_trace_startstop(struct request_queue *q, int start)
469{ 539{
470 struct blk_trace *bt;
471 int ret; 540 int ret;
541 struct blk_trace *bt = q->blk_trace;
472 542
473 if ((bt = q->blk_trace) == NULL) 543 if (bt == NULL)
474 return -EINVAL; 544 return -EINVAL;
475 545
476 /* 546 /*
@@ -606,12 +676,14 @@ static void blk_add_trace_rq_issue(struct request_queue *q, struct request *rq)
606 blk_add_trace_rq(q, rq, BLK_TA_ISSUE); 676 blk_add_trace_rq(q, rq, BLK_TA_ISSUE);
607} 677}
608 678
609static void blk_add_trace_rq_requeue(struct request_queue *q, struct request *rq) 679static void blk_add_trace_rq_requeue(struct request_queue *q,
680 struct request *rq)
610{ 681{
611 blk_add_trace_rq(q, rq, BLK_TA_REQUEUE); 682 blk_add_trace_rq(q, rq, BLK_TA_REQUEUE);
612} 683}
613 684
614static void blk_add_trace_rq_complete(struct request_queue *q, struct request *rq) 685static void blk_add_trace_rq_complete(struct request_queue *q,
686 struct request *rq)
615{ 687{
616 blk_add_trace_rq(q, rq, BLK_TA_COMPLETE); 688 blk_add_trace_rq(q, rq, BLK_TA_COMPLETE);
617} 689}
@@ -648,12 +720,14 @@ static void blk_add_trace_bio_complete(struct request_queue *q, struct bio *bio)
648 blk_add_trace_bio(q, bio, BLK_TA_COMPLETE); 720 blk_add_trace_bio(q, bio, BLK_TA_COMPLETE);
649} 721}
650 722
651static void blk_add_trace_bio_backmerge(struct request_queue *q, struct bio *bio) 723static void blk_add_trace_bio_backmerge(struct request_queue *q,
724 struct bio *bio)
652{ 725{
653 blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE); 726 blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE);
654} 727}
655 728
656static void blk_add_trace_bio_frontmerge(struct request_queue *q, struct bio *bio) 729static void blk_add_trace_bio_frontmerge(struct request_queue *q,
730 struct bio *bio)
657{ 731{
658 blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE); 732 blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE);
659} 733}
@@ -663,7 +737,8 @@ static void blk_add_trace_bio_queue(struct request_queue *q, struct bio *bio)
663 blk_add_trace_bio(q, bio, BLK_TA_QUEUE); 737 blk_add_trace_bio(q, bio, BLK_TA_QUEUE);
664} 738}
665 739
666static void blk_add_trace_getrq(struct request_queue *q, struct bio *bio, int rw) 740static void blk_add_trace_getrq(struct request_queue *q,
741 struct bio *bio, int rw)
667{ 742{
668 if (bio) 743 if (bio)
669 blk_add_trace_bio(q, bio, BLK_TA_GETRQ); 744 blk_add_trace_bio(q, bio, BLK_TA_GETRQ);
@@ -676,7 +751,8 @@ static void blk_add_trace_getrq(struct request_queue *q, struct bio *bio, int rw
676} 751}
677 752
678 753
679static void blk_add_trace_sleeprq(struct request_queue *q, struct bio *bio, int rw) 754static void blk_add_trace_sleeprq(struct request_queue *q,
755 struct bio *bio, int rw)
680{ 756{
681 if (bio) 757 if (bio)
682 blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ); 758 blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ);
@@ -684,7 +760,8 @@ static void blk_add_trace_sleeprq(struct request_queue *q, struct bio *bio, int
684 struct blk_trace *bt = q->blk_trace; 760 struct blk_trace *bt = q->blk_trace;
685 761
686 if (bt) 762 if (bt)
687 __blk_add_trace(bt, 0, 0, rw, BLK_TA_SLEEPRQ, 0, 0, NULL); 763 __blk_add_trace(bt, 0, 0, rw, BLK_TA_SLEEPRQ,
764 0, 0, NULL);
688 } 765 }
689} 766}
690 767
@@ -858,3 +935,617 @@ static void blk_unregister_tracepoints(void)
858 935
859 tracepoint_synchronize_unregister(); 936 tracepoint_synchronize_unregister();
860} 937}
938
939/*
940 * struct blk_io_tracer formatting routines
941 */
942
943static void fill_rwbs(char *rwbs, const struct blk_io_trace *t)
944{
945 int i = 0;
946
947 if (t->action & BLK_TC_DISCARD)
948 rwbs[i++] = 'D';
949 else if (t->action & BLK_TC_WRITE)
950 rwbs[i++] = 'W';
951 else if (t->bytes)
952 rwbs[i++] = 'R';
953 else
954 rwbs[i++] = 'N';
955
956 if (t->action & BLK_TC_AHEAD)
957 rwbs[i++] = 'A';
958 if (t->action & BLK_TC_BARRIER)
959 rwbs[i++] = 'B';
960 if (t->action & BLK_TC_SYNC)
961 rwbs[i++] = 'S';
962 if (t->action & BLK_TC_META)
963 rwbs[i++] = 'M';
964
965 rwbs[i] = '\0';
966}
967
968static inline
969const struct blk_io_trace *te_blk_io_trace(const struct trace_entry *ent)
970{
971 return (const struct blk_io_trace *)ent;
972}
973
974static inline const void *pdu_start(const struct trace_entry *ent)
975{
976 return te_blk_io_trace(ent) + 1;
977}
978
979static inline u32 t_sec(const struct trace_entry *ent)
980{
981 return te_blk_io_trace(ent)->bytes >> 9;
982}
983
984static inline unsigned long long t_sector(const struct trace_entry *ent)
985{
986 return te_blk_io_trace(ent)->sector;
987}
988
989static inline __u16 t_error(const struct trace_entry *ent)
990{
991 return te_blk_io_trace(ent)->sector;
992}
993
994static __u64 get_pdu_int(const struct trace_entry *ent)
995{
996 const __u64 *val = pdu_start(ent);
997 return be64_to_cpu(*val);
998}
999
1000static void get_pdu_remap(const struct trace_entry *ent,
1001 struct blk_io_trace_remap *r)
1002{
1003 const struct blk_io_trace_remap *__r = pdu_start(ent);
1004 __u64 sector = __r->sector;
1005
1006 r->device = be32_to_cpu(__r->device);
1007 r->device_from = be32_to_cpu(__r->device_from);
1008 r->sector = be64_to_cpu(sector);
1009}
1010
1011static int blk_log_action_iter(struct trace_iterator *iter, const char *act)
1012{
1013 char rwbs[6];
1014 unsigned long long ts = ns2usecs(iter->ts);
1015 unsigned long usec_rem = do_div(ts, USEC_PER_SEC);
1016 unsigned secs = (unsigned long)ts;
1017 const struct trace_entry *ent = iter->ent;
1018 const struct blk_io_trace *t = (const struct blk_io_trace *)ent;
1019
1020 fill_rwbs(rwbs, t);
1021
1022 return trace_seq_printf(&iter->seq,
1023 "%3d,%-3d %2d %5d.%06lu %5u %2s %3s ",
1024 MAJOR(t->device), MINOR(t->device), iter->cpu,
1025 secs, usec_rem, ent->pid, act, rwbs);
1026}
1027
1028static int blk_log_action_seq(struct trace_seq *s, const struct blk_io_trace *t,
1029 const char *act)
1030{
1031 char rwbs[6];
1032 fill_rwbs(rwbs, t);
1033 return trace_seq_printf(s, "%3d,%-3d %2s %3s ",
1034 MAJOR(t->device), MINOR(t->device), act, rwbs);
1035}
1036
1037static int blk_log_generic(struct trace_seq *s, const struct trace_entry *ent)
1038{
1039 const char *cmd = trace_find_cmdline(ent->pid);
1040
1041 if (t_sec(ent))
1042 return trace_seq_printf(s, "%llu + %u [%s]\n",
1043 t_sector(ent), t_sec(ent), cmd);
1044 return trace_seq_printf(s, "[%s]\n", cmd);
1045}
1046
1047static int blk_log_with_error(struct trace_seq *s,
1048 const struct trace_entry *ent)
1049{
1050 if (t_sec(ent))
1051 return trace_seq_printf(s, "%llu + %u [%d]\n", t_sector(ent),
1052 t_sec(ent), t_error(ent));
1053 return trace_seq_printf(s, "%llu [%d]\n", t_sector(ent), t_error(ent));
1054}
1055
1056static int blk_log_remap(struct trace_seq *s, const struct trace_entry *ent)
1057{
1058 struct blk_io_trace_remap r = { .device = 0, };
1059
1060 get_pdu_remap(ent, &r);
1061 return trace_seq_printf(s, "%llu + %u <- (%d,%d) %llu\n",
1062 t_sector(ent),
1063 t_sec(ent), MAJOR(r.device), MINOR(r.device),
1064 (unsigned long long)r.sector);
1065}
1066
1067static int blk_log_plug(struct trace_seq *s, const struct trace_entry *ent)
1068{
1069 return trace_seq_printf(s, "[%s]\n", trace_find_cmdline(ent->pid));
1070}
1071
1072static int blk_log_unplug(struct trace_seq *s, const struct trace_entry *ent)
1073{
1074 return trace_seq_printf(s, "[%s] %llu\n", trace_find_cmdline(ent->pid),
1075 get_pdu_int(ent));
1076}
1077
1078static int blk_log_split(struct trace_seq *s, const struct trace_entry *ent)
1079{
1080 return trace_seq_printf(s, "%llu / %llu [%s]\n", t_sector(ent),
1081 get_pdu_int(ent), trace_find_cmdline(ent->pid));
1082}
1083
1084/*
1085 * struct tracer operations
1086 */
1087
1088static void blk_tracer_print_header(struct seq_file *m)
1089{
1090 if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CLASSIC))
1091 return;
1092 seq_puts(m, "# DEV CPU TIMESTAMP PID ACT FLG\n"
1093 "# | | | | | |\n");
1094}
1095
1096static void blk_tracer_start(struct trace_array *tr)
1097{
1098 tracing_reset_online_cpus(tr);
1099
1100 mutex_lock(&blk_probe_mutex);
1101 if (atomic_add_return(1, &blk_probes_ref) == 1)
1102 if (blk_register_tracepoints())
1103 atomic_dec(&blk_probes_ref);
1104 mutex_unlock(&blk_probe_mutex);
1105 trace_flags &= ~TRACE_ITER_CONTEXT_INFO;
1106}
1107
1108static int blk_tracer_init(struct trace_array *tr)
1109{
1110 blk_tr = tr;
1111 blk_tracer_start(tr);
1112 mutex_lock(&blk_probe_mutex);
1113 blk_tracer_enabled++;
1114 mutex_unlock(&blk_probe_mutex);
1115 return 0;
1116}
1117
1118static void blk_tracer_stop(struct trace_array *tr)
1119{
1120 trace_flags |= TRACE_ITER_CONTEXT_INFO;
1121 mutex_lock(&blk_probe_mutex);
1122 if (atomic_dec_and_test(&blk_probes_ref))
1123 blk_unregister_tracepoints();
1124 mutex_unlock(&blk_probe_mutex);
1125}
1126
1127static void blk_tracer_reset(struct trace_array *tr)
1128{
1129 if (!atomic_read(&blk_probes_ref))
1130 return;
1131
1132 mutex_lock(&blk_probe_mutex);
1133 blk_tracer_enabled--;
1134 WARN_ON(blk_tracer_enabled < 0);
1135 mutex_unlock(&blk_probe_mutex);
1136
1137 blk_tracer_stop(tr);
1138}
1139
1140static struct {
1141 const char *act[2];
1142 int (*print)(struct trace_seq *s, const struct trace_entry *ent);
1143} what2act[] __read_mostly = {
1144 [__BLK_TA_QUEUE] = {{ "Q", "queue" }, blk_log_generic },
1145 [__BLK_TA_BACKMERGE] = {{ "M", "backmerge" }, blk_log_generic },
1146 [__BLK_TA_FRONTMERGE] = {{ "F", "frontmerge" }, blk_log_generic },
1147 [__BLK_TA_GETRQ] = {{ "G", "getrq" }, blk_log_generic },
1148 [__BLK_TA_SLEEPRQ] = {{ "S", "sleeprq" }, blk_log_generic },
1149 [__BLK_TA_REQUEUE] = {{ "R", "requeue" }, blk_log_with_error },
1150 [__BLK_TA_ISSUE] = {{ "D", "issue" }, blk_log_generic },
1151 [__BLK_TA_COMPLETE] = {{ "C", "complete" }, blk_log_with_error },
1152 [__BLK_TA_PLUG] = {{ "P", "plug" }, blk_log_plug },
1153 [__BLK_TA_UNPLUG_IO] = {{ "U", "unplug_io" }, blk_log_unplug },
1154 [__BLK_TA_UNPLUG_TIMER] = {{ "UT", "unplug_timer" }, blk_log_unplug },
1155 [__BLK_TA_INSERT] = {{ "I", "insert" }, blk_log_generic },
1156 [__BLK_TA_SPLIT] = {{ "X", "split" }, blk_log_split },
1157 [__BLK_TA_BOUNCE] = {{ "B", "bounce" }, blk_log_generic },
1158 [__BLK_TA_REMAP] = {{ "A", "remap" }, blk_log_remap },
1159};
1160
1161static enum print_line_t blk_trace_event_print(struct trace_iterator *iter,
1162 int flags)
1163{
1164 struct trace_seq *s = &iter->seq;
1165 const struct blk_io_trace *t = (struct blk_io_trace *)iter->ent;
1166 const u16 what = t->action & ((1 << BLK_TC_SHIFT) - 1);
1167 int ret;
1168
1169 if (!trace_print_context(iter))
1170 return TRACE_TYPE_PARTIAL_LINE;
1171
1172 if (unlikely(what == 0 || what > ARRAY_SIZE(what2act)))
1173 ret = trace_seq_printf(s, "Bad pc action %x\n", what);
1174 else {
1175 const bool long_act = !!(trace_flags & TRACE_ITER_VERBOSE);
1176 ret = blk_log_action_seq(s, t, what2act[what].act[long_act]);
1177 if (ret)
1178 ret = what2act[what].print(s, iter->ent);
1179 }
1180
1181 return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
1182}
1183
1184static int blk_trace_synthesize_old_trace(struct trace_iterator *iter)
1185{
1186 struct trace_seq *s = &iter->seq;
1187 struct blk_io_trace *t = (struct blk_io_trace *)iter->ent;
1188 const int offset = offsetof(struct blk_io_trace, sector);
1189 struct blk_io_trace old = {
1190 .magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION,
1191 .time = ns2usecs(iter->ts),
1192 };
1193
1194 if (!trace_seq_putmem(s, &old, offset))
1195 return 0;
1196 return trace_seq_putmem(s, &t->sector,
1197 sizeof(old) - offset + t->pdu_len);
1198}
1199
1200static enum print_line_t
1201blk_trace_event_print_binary(struct trace_iterator *iter, int flags)
1202{
1203 return blk_trace_synthesize_old_trace(iter) ?
1204 TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
1205}
1206
1207static enum print_line_t blk_tracer_print_line(struct trace_iterator *iter)
1208{
1209 const struct blk_io_trace *t;
1210 u16 what;
1211 int ret;
1212
1213 if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CLASSIC))
1214 return TRACE_TYPE_UNHANDLED;
1215
1216 t = (const struct blk_io_trace *)iter->ent;
1217 what = t->action & ((1 << BLK_TC_SHIFT) - 1);
1218
1219 if (unlikely(what == 0 || what > ARRAY_SIZE(what2act)))
1220 ret = trace_seq_printf(&iter->seq, "Bad pc action %x\n", what);
1221 else {
1222 const bool long_act = !!(trace_flags & TRACE_ITER_VERBOSE);
1223 ret = blk_log_action_iter(iter, what2act[what].act[long_act]);
1224 if (ret)
1225 ret = what2act[what].print(&iter->seq, iter->ent);
1226 }
1227
1228 return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
1229}
1230
1231static struct tracer blk_tracer __read_mostly = {
1232 .name = "blk",
1233 .init = blk_tracer_init,
1234 .reset = blk_tracer_reset,
1235 .start = blk_tracer_start,
1236 .stop = blk_tracer_stop,
1237 .print_header = blk_tracer_print_header,
1238 .print_line = blk_tracer_print_line,
1239 .flags = &blk_tracer_flags,
1240};
1241
1242static struct trace_event trace_blk_event = {
1243 .type = TRACE_BLK,
1244 .trace = blk_trace_event_print,
1245 .latency_trace = blk_trace_event_print,
1246 .raw = trace_nop_print,
1247 .hex = trace_nop_print,
1248 .binary = blk_trace_event_print_binary,
1249};
1250
1251static int __init init_blk_tracer(void)
1252{
1253 if (!register_ftrace_event(&trace_blk_event)) {
1254 pr_warning("Warning: could not register block events\n");
1255 return 1;
1256 }
1257
1258 if (register_tracer(&blk_tracer) != 0) {
1259 pr_warning("Warning: could not register the block tracer\n");
1260 unregister_ftrace_event(&trace_blk_event);
1261 return 1;
1262 }
1263
1264 return 0;
1265}
1266
1267device_initcall(init_blk_tracer);
1268
1269static int blk_trace_remove_queue(struct request_queue *q)
1270{
1271 struct blk_trace *bt;
1272
1273 bt = xchg(&q->blk_trace, NULL);
1274 if (bt == NULL)
1275 return -EINVAL;
1276
1277 kfree(bt);
1278 return 0;
1279}
1280
1281/*
1282 * Setup everything required to start tracing
1283 */
1284static int blk_trace_setup_queue(struct request_queue *q, dev_t dev)
1285{
1286 struct blk_trace *old_bt, *bt = NULL;
1287 int ret;
1288
1289 ret = -ENOMEM;
1290 bt = kzalloc(sizeof(*bt), GFP_KERNEL);
1291 if (!bt)
1292 goto err;
1293
1294 bt->dev = dev;
1295 bt->act_mask = (u16)-1;
1296 bt->end_lba = -1ULL;
1297 bt->trace_state = Blktrace_running;
1298
1299 old_bt = xchg(&q->blk_trace, bt);
1300 if (old_bt != NULL) {
1301 (void)xchg(&q->blk_trace, old_bt);
1302 kfree(bt);
1303 ret = -EBUSY;
1304 }
1305 return 0;
1306err:
1307 return ret;
1308}
1309
1310/*
1311 * sysfs interface to enable and configure tracing
1312 */
1313
1314static ssize_t sysfs_blk_trace_enable_show(struct device *dev,
1315 struct device_attribute *attr,
1316 char *buf)
1317{
1318 struct hd_struct *p = dev_to_part(dev);
1319 struct block_device *bdev;
1320 ssize_t ret = -ENXIO;
1321
1322 lock_kernel();
1323 bdev = bdget(part_devt(p));
1324 if (bdev != NULL) {
1325 struct request_queue *q = bdev_get_queue(bdev);
1326
1327 if (q != NULL) {
1328 mutex_lock(&bdev->bd_mutex);
1329 ret = sprintf(buf, "%u\n", !!q->blk_trace);
1330 mutex_unlock(&bdev->bd_mutex);
1331 }
1332
1333 bdput(bdev);
1334 }
1335
1336 unlock_kernel();
1337 return ret;
1338}
1339
1340static ssize_t sysfs_blk_trace_enable_store(struct device *dev,
1341 struct device_attribute *attr,
1342 const char *buf, size_t count)
1343{
1344 struct block_device *bdev;
1345 struct request_queue *q;
1346 struct hd_struct *p;
1347 int value;
1348 ssize_t ret = -ENXIO;
1349
1350 if (count == 0 || sscanf(buf, "%d", &value) != 1)
1351 goto out;
1352
1353 lock_kernel();
1354 p = dev_to_part(dev);
1355 bdev = bdget(part_devt(p));
1356 if (bdev == NULL)
1357 goto out_unlock_kernel;
1358
1359 q = bdev_get_queue(bdev);
1360 if (q == NULL)
1361 goto out_bdput;
1362
1363 mutex_lock(&bdev->bd_mutex);
1364 if (value)
1365 ret = blk_trace_setup_queue(q, bdev->bd_dev);
1366 else
1367 ret = blk_trace_remove_queue(q);
1368 mutex_unlock(&bdev->bd_mutex);
1369
1370 if (ret == 0)
1371 ret = count;
1372out_bdput:
1373 bdput(bdev);
1374out_unlock_kernel:
1375 unlock_kernel();
1376out:
1377 return ret;
1378}
1379
1380static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
1381 struct device_attribute *attr,
1382 char *buf);
1383static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
1384 struct device_attribute *attr,
1385 const char *buf, size_t count);
1386#define BLK_TRACE_DEVICE_ATTR(_name) \
1387 DEVICE_ATTR(_name, S_IRUGO | S_IWUSR, \
1388 sysfs_blk_trace_attr_show, \
1389 sysfs_blk_trace_attr_store)
1390
1391static DEVICE_ATTR(enable, S_IRUGO | S_IWUSR,
1392 sysfs_blk_trace_enable_show, sysfs_blk_trace_enable_store);
1393static BLK_TRACE_DEVICE_ATTR(act_mask);
1394static BLK_TRACE_DEVICE_ATTR(pid);
1395static BLK_TRACE_DEVICE_ATTR(start_lba);
1396static BLK_TRACE_DEVICE_ATTR(end_lba);
1397
1398static struct attribute *blk_trace_attrs[] = {
1399 &dev_attr_enable.attr,
1400 &dev_attr_act_mask.attr,
1401 &dev_attr_pid.attr,
1402 &dev_attr_start_lba.attr,
1403 &dev_attr_end_lba.attr,
1404 NULL
1405};
1406
1407struct attribute_group blk_trace_attr_group = {
1408 .name = "trace",
1409 .attrs = blk_trace_attrs,
1410};
1411
1412static int blk_str2act_mask(const char *str)
1413{
1414 int mask = 0;
1415 char *copy = kstrdup(str, GFP_KERNEL), *s;
1416
1417 if (copy == NULL)
1418 return -ENOMEM;
1419
1420 s = strstrip(copy);
1421
1422 while (1) {
1423 char *sep = strchr(s, ',');
1424
1425 if (sep != NULL)
1426 *sep = '\0';
1427
1428 if (strcasecmp(s, "barrier") == 0)
1429 mask |= BLK_TC_BARRIER;
1430 else if (strcasecmp(s, "complete") == 0)
1431 mask |= BLK_TC_COMPLETE;
1432 else if (strcasecmp(s, "fs") == 0)
1433 mask |= BLK_TC_FS;
1434 else if (strcasecmp(s, "issue") == 0)
1435 mask |= BLK_TC_ISSUE;
1436 else if (strcasecmp(s, "pc") == 0)
1437 mask |= BLK_TC_PC;
1438 else if (strcasecmp(s, "queue") == 0)
1439 mask |= BLK_TC_QUEUE;
1440 else if (strcasecmp(s, "read") == 0)
1441 mask |= BLK_TC_READ;
1442 else if (strcasecmp(s, "requeue") == 0)
1443 mask |= BLK_TC_REQUEUE;
1444 else if (strcasecmp(s, "sync") == 0)
1445 mask |= BLK_TC_SYNC;
1446 else if (strcasecmp(s, "write") == 0)
1447 mask |= BLK_TC_WRITE;
1448
1449 if (sep == NULL)
1450 break;
1451
1452 s = sep + 1;
1453 }
1454 kfree(copy);
1455
1456 return mask;
1457}
1458
1459static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
1460 struct device_attribute *attr,
1461 char *buf)
1462{
1463 struct hd_struct *p = dev_to_part(dev);
1464 struct request_queue *q;
1465 struct block_device *bdev;
1466 ssize_t ret = -ENXIO;
1467
1468 lock_kernel();
1469 bdev = bdget(part_devt(p));
1470 if (bdev == NULL)
1471 goto out_unlock_kernel;
1472
1473 q = bdev_get_queue(bdev);
1474 if (q == NULL)
1475 goto out_bdput;
1476 mutex_lock(&bdev->bd_mutex);
1477 if (q->blk_trace == NULL)
1478 ret = sprintf(buf, "disabled\n");
1479 else if (attr == &dev_attr_act_mask)
1480 ret = sprintf(buf, "%#x\n", q->blk_trace->act_mask);
1481 else if (attr == &dev_attr_pid)
1482 ret = sprintf(buf, "%u\n", q->blk_trace->pid);
1483 else if (attr == &dev_attr_start_lba)
1484 ret = sprintf(buf, "%llu\n", q->blk_trace->start_lba);
1485 else if (attr == &dev_attr_end_lba)
1486 ret = sprintf(buf, "%llu\n", q->blk_trace->end_lba);
1487 mutex_unlock(&bdev->bd_mutex);
1488out_bdput:
1489 bdput(bdev);
1490out_unlock_kernel:
1491 unlock_kernel();
1492 return ret;
1493}
1494
1495static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
1496 struct device_attribute *attr,
1497 const char *buf, size_t count)
1498{
1499 struct block_device *bdev;
1500 struct request_queue *q;
1501 struct hd_struct *p;
1502 u64 value;
1503 ssize_t ret = -ENXIO;
1504
1505 if (count == 0)
1506 goto out;
1507
1508 if (attr == &dev_attr_act_mask) {
1509 if (sscanf(buf, "%llx", &value) != 1) {
1510 /* Assume it is a list of trace category names */
1511 value = blk_str2act_mask(buf);
1512 if (value < 0)
1513 goto out;
1514 }
1515 } else if (sscanf(buf, "%llu", &value) != 1)
1516 goto out;
1517
1518 lock_kernel();
1519 p = dev_to_part(dev);
1520 bdev = bdget(part_devt(p));
1521 if (bdev == NULL)
1522 goto out_unlock_kernel;
1523
1524 q = bdev_get_queue(bdev);
1525 if (q == NULL)
1526 goto out_bdput;
1527
1528 mutex_lock(&bdev->bd_mutex);
1529 ret = 0;
1530 if (q->blk_trace == NULL)
1531 ret = blk_trace_setup_queue(q, bdev->bd_dev);
1532
1533 if (ret == 0) {
1534 if (attr == &dev_attr_act_mask)
1535 q->blk_trace->act_mask = value;
1536 else if (attr == &dev_attr_pid)
1537 q->blk_trace->pid = value;
1538 else if (attr == &dev_attr_start_lba)
1539 q->blk_trace->start_lba = value;
1540 else if (attr == &dev_attr_end_lba)
1541 q->blk_trace->end_lba = value;
1542 ret = count;
1543 }
1544 mutex_unlock(&bdev->bd_mutex);
1545out_bdput:
1546 bdput(bdev);
1547out_unlock_kernel:
1548 unlock_kernel();
1549out:
1550 return ret;
1551}
diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c
index 33a9351c896d..30659ce9bcf4 100644
--- a/drivers/char/sysrq.c
+++ b/drivers/char/sysrq.c
@@ -283,7 +283,7 @@ static void sysrq_ftrace_dump(int key, struct tty_struct *tty)
283} 283}
284static struct sysrq_key_op sysrq_ftrace_dump_op = { 284static struct sysrq_key_op sysrq_ftrace_dump_op = {
285 .handler = sysrq_ftrace_dump, 285 .handler = sysrq_ftrace_dump,
286 .help_msg = "dumpZ-ftrace-buffer", 286 .help_msg = "dump-ftrace-buffer(Z)",
287 .action_msg = "Dump ftrace buffer", 287 .action_msg = "Dump ftrace buffer",
288 .enable_mask = SYSRQ_ENABLE_DUMP, 288 .enable_mask = SYSRQ_ENABLE_DUMP,
289}; 289};
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 6d720243f5f4..8a17f7edcc74 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -19,6 +19,7 @@
19#include <linux/kmod.h> 19#include <linux/kmod.h>
20#include <linux/ctype.h> 20#include <linux/ctype.h>
21#include <linux/genhd.h> 21#include <linux/genhd.h>
22#include <linux/blktrace_api.h>
22 23
23#include "check.h" 24#include "check.h"
24 25
@@ -294,6 +295,9 @@ static struct attribute_group part_attr_group = {
294 295
295static struct attribute_group *part_attr_groups[] = { 296static struct attribute_group *part_attr_groups[] = {
296 &part_attr_group, 297 &part_attr_group,
298#ifdef CONFIG_BLK_DEV_IO_TRACE
299 &blk_trace_attr_group,
300#endif
297 NULL 301 NULL
298}; 302};
299 303
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index 25379cba2370..ed12e8fd8cf7 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -143,6 +143,9 @@ struct blk_user_trace_setup {
143 143
144#ifdef __KERNEL__ 144#ifdef __KERNEL__
145#if defined(CONFIG_BLK_DEV_IO_TRACE) 145#if defined(CONFIG_BLK_DEV_IO_TRACE)
146
147#include <linux/sysfs.h>
148
146struct blk_trace { 149struct blk_trace {
147 int trace_state; 150 int trace_state;
148 struct rchan *rchan; 151 struct rchan *rchan;
@@ -193,6 +196,8 @@ extern int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
193extern int blk_trace_startstop(struct request_queue *q, int start); 196extern int blk_trace_startstop(struct request_queue *q, int start);
194extern int blk_trace_remove(struct request_queue *q); 197extern int blk_trace_remove(struct request_queue *q);
195 198
199extern struct attribute_group blk_trace_attr_group;
200
196#else /* !CONFIG_BLK_DEV_IO_TRACE */ 201#else /* !CONFIG_BLK_DEV_IO_TRACE */
197#define blk_trace_ioctl(bdev, cmd, arg) (-ENOTTY) 202#define blk_trace_ioctl(bdev, cmd, arg) (-ENOTTY)
198#define blk_trace_shutdown(q) do { } while (0) 203#define blk_trace_shutdown(q) do { } while (0)
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 677432b9cb7e..7840e718c6c7 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -126,6 +126,10 @@ extern int ftrace_update_ftrace_func(ftrace_func_t func);
126extern void ftrace_caller(void); 126extern void ftrace_caller(void);
127extern void ftrace_call(void); 127extern void ftrace_call(void);
128extern void mcount_call(void); 128extern void mcount_call(void);
129
130#ifndef FTRACE_ADDR
131#define FTRACE_ADDR ((unsigned long)ftrace_caller)
132#endif
129#ifdef CONFIG_FUNCTION_GRAPH_TRACER 133#ifdef CONFIG_FUNCTION_GRAPH_TRACER
130extern void ftrace_graph_caller(void); 134extern void ftrace_graph_caller(void);
131extern int ftrace_enable_ftrace_graph_caller(void); 135extern int ftrace_enable_ftrace_graph_caller(void);
@@ -298,6 +302,9 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3);
298extern int 302extern int
299__ftrace_printk(unsigned long ip, const char *fmt, ...) 303__ftrace_printk(unsigned long ip, const char *fmt, ...)
300 __attribute__ ((format (printf, 2, 3))); 304 __attribute__ ((format (printf, 2, 3)));
305# define ftrace_vprintk(fmt, ap) __ftrace_printk(_THIS_IP_, fmt, ap)
306extern int
307__ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap);
301extern void ftrace_dump(void); 308extern void ftrace_dump(void);
302#else 309#else
303static inline void 310static inline void
@@ -313,6 +320,11 @@ ftrace_printk(const char *fmt, ...)
313{ 320{
314 return 0; 321 return 0;
315} 322}
323static inline int
324ftrace_vprintk(const char *fmt, va_list ap)
325{
326 return 0;
327}
316static inline void ftrace_dump(void) { } 328static inline void ftrace_dump(void) { }
317#endif 329#endif
318 330
@@ -492,4 +504,17 @@ static inline int test_tsk_trace_graph(struct task_struct *tsk)
492 504
493#endif /* CONFIG_TRACING */ 505#endif /* CONFIG_TRACING */
494 506
507
508#ifdef CONFIG_HW_BRANCH_TRACER
509
510void trace_hw_branch(u64 from, u64 to);
511void trace_hw_branch_oops(void);
512
513#else /* CONFIG_HW_BRANCH_TRACER */
514
515static inline void trace_hw_branch(u64 from, u64 to) {}
516static inline void trace_hw_branch_oops(void) {}
517
518#endif /* CONFIG_HW_BRANCH_TRACER */
519
495#endif /* _LINUX_FTRACE_H */ 520#endif /* _LINUX_FTRACE_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5a7c76388731..f3c23cf11abc 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -137,6 +137,8 @@ extern unsigned long nr_uninterruptible(void);
137extern unsigned long nr_active(void); 137extern unsigned long nr_active(void);
138extern unsigned long nr_iowait(void); 138extern unsigned long nr_iowait(void);
139 139
140extern unsigned long get_parent_ip(unsigned long addr);
141
140struct seq_file; 142struct seq_file;
141struct cfs_rq; 143struct cfs_rq;
142struct task_group; 144struct task_group;
diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
index 6ca6a7b66d75..f4523651fa42 100644
--- a/include/linux/slab_def.h
+++ b/include/linux/slab_def.h
@@ -14,6 +14,7 @@
14#include <asm/page.h> /* kmalloc_sizes.h needs PAGE_SIZE */ 14#include <asm/page.h> /* kmalloc_sizes.h needs PAGE_SIZE */
15#include <asm/cache.h> /* kmalloc_sizes.h needs L1_CACHE_BYTES */ 15#include <asm/cache.h> /* kmalloc_sizes.h needs L1_CACHE_BYTES */
16#include <linux/compiler.h> 16#include <linux/compiler.h>
17#include <trace/kmemtrace.h>
17 18
18/* Size description struct for general caches. */ 19/* Size description struct for general caches. */
19struct cache_sizes { 20struct cache_sizes {
@@ -28,8 +29,26 @@ extern struct cache_sizes malloc_sizes[];
28void *kmem_cache_alloc(struct kmem_cache *, gfp_t); 29void *kmem_cache_alloc(struct kmem_cache *, gfp_t);
29void *__kmalloc(size_t size, gfp_t flags); 30void *__kmalloc(size_t size, gfp_t flags);
30 31
31static inline void *kmalloc(size_t size, gfp_t flags) 32#ifdef CONFIG_KMEMTRACE
33extern void *kmem_cache_alloc_notrace(struct kmem_cache *cachep, gfp_t flags);
34extern size_t slab_buffer_size(struct kmem_cache *cachep);
35#else
36static __always_inline void *
37kmem_cache_alloc_notrace(struct kmem_cache *cachep, gfp_t flags)
32{ 38{
39 return kmem_cache_alloc(cachep, flags);
40}
41static inline size_t slab_buffer_size(struct kmem_cache *cachep)
42{
43 return 0;
44}
45#endif
46
47static __always_inline void *kmalloc(size_t size, gfp_t flags)
48{
49 struct kmem_cache *cachep;
50 void *ret;
51
33 if (__builtin_constant_p(size)) { 52 if (__builtin_constant_p(size)) {
34 int i = 0; 53 int i = 0;
35 54
@@ -47,10 +66,17 @@ static inline void *kmalloc(size_t size, gfp_t flags)
47found: 66found:
48#ifdef CONFIG_ZONE_DMA 67#ifdef CONFIG_ZONE_DMA
49 if (flags & GFP_DMA) 68 if (flags & GFP_DMA)
50 return kmem_cache_alloc(malloc_sizes[i].cs_dmacachep, 69 cachep = malloc_sizes[i].cs_dmacachep;
51 flags); 70 else
52#endif 71#endif
53 return kmem_cache_alloc(malloc_sizes[i].cs_cachep, flags); 72 cachep = malloc_sizes[i].cs_cachep;
73
74 ret = kmem_cache_alloc_notrace(cachep, flags);
75
76 kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC, _THIS_IP_, ret,
77 size, slab_buffer_size(cachep), flags);
78
79 return ret;
54 } 80 }
55 return __kmalloc(size, flags); 81 return __kmalloc(size, flags);
56} 82}
@@ -59,8 +85,25 @@ found:
59extern void *__kmalloc_node(size_t size, gfp_t flags, int node); 85extern void *__kmalloc_node(size_t size, gfp_t flags, int node);
60extern void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node); 86extern void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node);
61 87
62static inline void *kmalloc_node(size_t size, gfp_t flags, int node) 88#ifdef CONFIG_KMEMTRACE
89extern void *kmem_cache_alloc_node_notrace(struct kmem_cache *cachep,
90 gfp_t flags,
91 int nodeid);
92#else
93static __always_inline void *
94kmem_cache_alloc_node_notrace(struct kmem_cache *cachep,
95 gfp_t flags,
96 int nodeid)
97{
98 return kmem_cache_alloc_node(cachep, flags, nodeid);
99}
100#endif
101
102static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
63{ 103{
104 struct kmem_cache *cachep;
105 void *ret;
106
64 if (__builtin_constant_p(size)) { 107 if (__builtin_constant_p(size)) {
65 int i = 0; 108 int i = 0;
66 109
@@ -78,11 +121,18 @@ static inline void *kmalloc_node(size_t size, gfp_t flags, int node)
78found: 121found:
79#ifdef CONFIG_ZONE_DMA 122#ifdef CONFIG_ZONE_DMA
80 if (flags & GFP_DMA) 123 if (flags & GFP_DMA)
81 return kmem_cache_alloc_node(malloc_sizes[i].cs_dmacachep, 124 cachep = malloc_sizes[i].cs_dmacachep;
82 flags, node); 125 else
83#endif 126#endif
84 return kmem_cache_alloc_node(malloc_sizes[i].cs_cachep, 127 cachep = malloc_sizes[i].cs_cachep;
85 flags, node); 128
129 ret = kmem_cache_alloc_node_notrace(cachep, flags, node);
130
131 kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC, _THIS_IP_,
132 ret, size, slab_buffer_size(cachep),
133 flags, node);
134
135 return ret;
86 } 136 }
87 return __kmalloc_node(size, flags, node); 137 return __kmalloc_node(size, flags, node);
88} 138}
diff --git a/include/linux/slob_def.h b/include/linux/slob_def.h
index 59a3fa476ab9..0ec00b39d006 100644
--- a/include/linux/slob_def.h
+++ b/include/linux/slob_def.h
@@ -3,14 +3,15 @@
3 3
4void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node); 4void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node);
5 5
6static inline void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags) 6static __always_inline void *kmem_cache_alloc(struct kmem_cache *cachep,
7 gfp_t flags)
7{ 8{
8 return kmem_cache_alloc_node(cachep, flags, -1); 9 return kmem_cache_alloc_node(cachep, flags, -1);
9} 10}
10 11
11void *__kmalloc_node(size_t size, gfp_t flags, int node); 12void *__kmalloc_node(size_t size, gfp_t flags, int node);
12 13
13static inline void *kmalloc_node(size_t size, gfp_t flags, int node) 14static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
14{ 15{
15 return __kmalloc_node(size, flags, node); 16 return __kmalloc_node(size, flags, node);
16} 17}
@@ -23,12 +24,12 @@ static inline void *kmalloc_node(size_t size, gfp_t flags, int node)
23 * kmalloc is the normal method of allocating memory 24 * kmalloc is the normal method of allocating memory
24 * in the kernel. 25 * in the kernel.
25 */ 26 */
26static inline void *kmalloc(size_t size, gfp_t flags) 27static __always_inline void *kmalloc(size_t size, gfp_t flags)
27{ 28{
28 return __kmalloc_node(size, flags, -1); 29 return __kmalloc_node(size, flags, -1);
29} 30}
30 31
31static inline void *__kmalloc(size_t size, gfp_t flags) 32static __always_inline void *__kmalloc(size_t size, gfp_t flags)
32{ 33{
33 return kmalloc(size, flags); 34 return kmalloc(size, flags);
34} 35}
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 2f5c16b1aacd..6b657f7dcb2b 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -10,6 +10,7 @@
10#include <linux/gfp.h> 10#include <linux/gfp.h>
11#include <linux/workqueue.h> 11#include <linux/workqueue.h>
12#include <linux/kobject.h> 12#include <linux/kobject.h>
13#include <trace/kmemtrace.h>
13 14
14enum stat_item { 15enum stat_item {
15 ALLOC_FASTPATH, /* Allocation from cpu slab */ 16 ALLOC_FASTPATH, /* Allocation from cpu slab */
@@ -204,13 +205,31 @@ static __always_inline struct kmem_cache *kmalloc_slab(size_t size)
204void *kmem_cache_alloc(struct kmem_cache *, gfp_t); 205void *kmem_cache_alloc(struct kmem_cache *, gfp_t);
205void *__kmalloc(size_t size, gfp_t flags); 206void *__kmalloc(size_t size, gfp_t flags);
206 207
208#ifdef CONFIG_KMEMTRACE
209extern void *kmem_cache_alloc_notrace(struct kmem_cache *s, gfp_t gfpflags);
210#else
211static __always_inline void *
212kmem_cache_alloc_notrace(struct kmem_cache *s, gfp_t gfpflags)
213{
214 return kmem_cache_alloc(s, gfpflags);
215}
216#endif
217
207static __always_inline void *kmalloc_large(size_t size, gfp_t flags) 218static __always_inline void *kmalloc_large(size_t size, gfp_t flags)
208{ 219{
209 return (void *)__get_free_pages(flags | __GFP_COMP, get_order(size)); 220 unsigned int order = get_order(size);
221 void *ret = (void *) __get_free_pages(flags | __GFP_COMP, order);
222
223 kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC, _THIS_IP_, ret,
224 size, PAGE_SIZE << order, flags);
225
226 return ret;
210} 227}
211 228
212static __always_inline void *kmalloc(size_t size, gfp_t flags) 229static __always_inline void *kmalloc(size_t size, gfp_t flags)
213{ 230{
231 void *ret;
232
214 if (__builtin_constant_p(size)) { 233 if (__builtin_constant_p(size)) {
215 if (size > PAGE_SIZE) 234 if (size > PAGE_SIZE)
216 return kmalloc_large(size, flags); 235 return kmalloc_large(size, flags);
@@ -221,7 +240,13 @@ static __always_inline void *kmalloc(size_t size, gfp_t flags)
221 if (!s) 240 if (!s)
222 return ZERO_SIZE_PTR; 241 return ZERO_SIZE_PTR;
223 242
224 return kmem_cache_alloc(s, flags); 243 ret = kmem_cache_alloc_notrace(s, flags);
244
245 kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC,
246 _THIS_IP_, ret,
247 size, s->size, flags);
248
249 return ret;
225 } 250 }
226 } 251 }
227 return __kmalloc(size, flags); 252 return __kmalloc(size, flags);
@@ -231,8 +256,24 @@ static __always_inline void *kmalloc(size_t size, gfp_t flags)
231void *__kmalloc_node(size_t size, gfp_t flags, int node); 256void *__kmalloc_node(size_t size, gfp_t flags, int node);
232void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node); 257void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node);
233 258
259#ifdef CONFIG_KMEMTRACE
260extern void *kmem_cache_alloc_node_notrace(struct kmem_cache *s,
261 gfp_t gfpflags,
262 int node);
263#else
264static __always_inline void *
265kmem_cache_alloc_node_notrace(struct kmem_cache *s,
266 gfp_t gfpflags,
267 int node)
268{
269 return kmem_cache_alloc_node(s, gfpflags, node);
270}
271#endif
272
234static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) 273static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
235{ 274{
275 void *ret;
276
236 if (__builtin_constant_p(size) && 277 if (__builtin_constant_p(size) &&
237 size <= PAGE_SIZE && !(flags & SLUB_DMA)) { 278 size <= PAGE_SIZE && !(flags & SLUB_DMA)) {
238 struct kmem_cache *s = kmalloc_slab(size); 279 struct kmem_cache *s = kmalloc_slab(size);
@@ -240,7 +281,13 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
240 if (!s) 281 if (!s)
241 return ZERO_SIZE_PTR; 282 return ZERO_SIZE_PTR;
242 283
243 return kmem_cache_alloc_node(s, flags, node); 284 ret = kmem_cache_alloc_node_notrace(s, flags, node);
285
286 kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC,
287 _THIS_IP_, ret,
288 size, s->size, flags, node);
289
290 return ret;
244 } 291 }
245 return __kmalloc_node(size, flags, node); 292 return __kmalloc_node(size, flags, node);
246} 293}
diff --git a/include/trace/kmemtrace.h b/include/trace/kmemtrace.h
new file mode 100644
index 000000000000..ad8b7857855a
--- /dev/null
+++ b/include/trace/kmemtrace.h
@@ -0,0 +1,75 @@
1/*
2 * Copyright (C) 2008 Eduard - Gabriel Munteanu
3 *
4 * This file is released under GPL version 2.
5 */
6
7#ifndef _LINUX_KMEMTRACE_H
8#define _LINUX_KMEMTRACE_H
9
10#ifdef __KERNEL__
11
12#include <linux/types.h>
13#include <linux/marker.h>
14
15enum kmemtrace_type_id {
16 KMEMTRACE_TYPE_KMALLOC = 0, /* kmalloc() or kfree(). */
17 KMEMTRACE_TYPE_CACHE, /* kmem_cache_*(). */
18 KMEMTRACE_TYPE_PAGES, /* __get_free_pages() and friends. */
19};
20
21#ifdef CONFIG_KMEMTRACE
22
23extern void kmemtrace_init(void);
24
25extern void kmemtrace_mark_alloc_node(enum kmemtrace_type_id type_id,
26 unsigned long call_site,
27 const void *ptr,
28 size_t bytes_req,
29 size_t bytes_alloc,
30 gfp_t gfp_flags,
31 int node);
32
33extern void kmemtrace_mark_free(enum kmemtrace_type_id type_id,
34 unsigned long call_site,
35 const void *ptr);
36
37#else /* CONFIG_KMEMTRACE */
38
39static inline void kmemtrace_init(void)
40{
41}
42
43static inline void kmemtrace_mark_alloc_node(enum kmemtrace_type_id type_id,
44 unsigned long call_site,
45 const void *ptr,
46 size_t bytes_req,
47 size_t bytes_alloc,
48 gfp_t gfp_flags,
49 int node)
50{
51}
52
53static inline void kmemtrace_mark_free(enum kmemtrace_type_id type_id,
54 unsigned long call_site,
55 const void *ptr)
56{
57}
58
59#endif /* CONFIG_KMEMTRACE */
60
61static inline void kmemtrace_mark_alloc(enum kmemtrace_type_id type_id,
62 unsigned long call_site,
63 const void *ptr,
64 size_t bytes_req,
65 size_t bytes_alloc,
66 gfp_t gfp_flags)
67{
68 kmemtrace_mark_alloc_node(type_id, call_site, ptr,
69 bytes_req, bytes_alloc, gfp_flags, -1);
70}
71
72#endif /* __KERNEL__ */
73
74#endif /* _LINUX_KMEMTRACE_H */
75
diff --git a/include/trace/workqueue.h b/include/trace/workqueue.h
new file mode 100644
index 000000000000..867829df4571
--- /dev/null
+++ b/include/trace/workqueue.h
@@ -0,0 +1,25 @@
1#ifndef __TRACE_WORKQUEUE_H
2#define __TRACE_WORKQUEUE_H
3
4#include <linux/tracepoint.h>
5#include <linux/workqueue.h>
6#include <linux/sched.h>
7
8DECLARE_TRACE(workqueue_insertion,
9 TPPROTO(struct task_struct *wq_thread, struct work_struct *work),
10 TPARGS(wq_thread, work));
11
12DECLARE_TRACE(workqueue_execution,
13 TPPROTO(struct task_struct *wq_thread, struct work_struct *work),
14 TPARGS(wq_thread, work));
15
16/* Trace the creation of one workqueue thread on a cpu */
17DECLARE_TRACE(workqueue_creation,
18 TPPROTO(struct task_struct *wq_thread, int cpu),
19 TPARGS(wq_thread, cpu));
20
21DECLARE_TRACE(workqueue_destruction,
22 TPPROTO(struct task_struct *wq_thread),
23 TPARGS(wq_thread));
24
25#endif /* __TRACE_WORKQUEUE_H */
diff --git a/init/main.c b/init/main.c
index 844209453c02..db7974ff7a0a 100644
--- a/init/main.c
+++ b/init/main.c
@@ -70,6 +70,7 @@
70#include <asm/setup.h> 70#include <asm/setup.h>
71#include <asm/sections.h> 71#include <asm/sections.h>
72#include <asm/cacheflush.h> 72#include <asm/cacheflush.h>
73#include <trace/kmemtrace.h>
73 74
74#ifdef CONFIG_X86_LOCAL_APIC 75#ifdef CONFIG_X86_LOCAL_APIC
75#include <asm/smp.h> 76#include <asm/smp.h>
@@ -641,6 +642,7 @@ asmlinkage void __init start_kernel(void)
641 enable_debug_pagealloc(); 642 enable_debug_pagealloc();
642 cpu_hotplug_init(); 643 cpu_hotplug_init();
643 kmem_cache_init(); 644 kmem_cache_init();
645 kmemtrace_init();
644 debug_objects_mem_init(); 646 debug_objects_mem_init();
645 idr_init_cache(); 647 idr_init_cache();
646 setup_per_cpu_pageset(); 648 setup_per_cpu_pageset();
diff --git a/kernel/relay.c b/kernel/relay.c
index 9d79b7854fa6..edc0ba6d8160 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -677,9 +677,7 @@ int relay_late_setup_files(struct rchan *chan,
677 */ 677 */
678 for_each_online_cpu(i) { 678 for_each_online_cpu(i) {
679 if (unlikely(!chan->buf[i])) { 679 if (unlikely(!chan->buf[i])) {
680 printk(KERN_ERR "relay_late_setup_files: CPU %u " 680 WARN_ONCE(1, KERN_ERR "CPU has no buffer!\n");
681 "has no buffer, it must have!\n", i);
682 BUG();
683 err = -EINVAL; 681 err = -EINVAL;
684 break; 682 break;
685 } 683 }
diff --git a/kernel/sched.c b/kernel/sched.c
index 242d0d47a70d..566c8c9e3a6d 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4409,10 +4409,7 @@ void scheduler_tick(void)
4409#endif 4409#endif
4410} 4410}
4411 4411
4412#if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \ 4412unsigned long get_parent_ip(unsigned long addr)
4413 defined(CONFIG_PREEMPT_TRACER))
4414
4415static inline unsigned long get_parent_ip(unsigned long addr)
4416{ 4413{
4417 if (in_lock_functions(addr)) { 4414 if (in_lock_functions(addr)) {
4418 addr = CALLER_ADDR2; 4415 addr = CALLER_ADDR2;
@@ -4422,6 +4419,9 @@ static inline unsigned long get_parent_ip(unsigned long addr)
4422 return addr; 4419 return addr;
4423} 4420}
4424 4421
4422#if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
4423 defined(CONFIG_PREEMPT_TRACER))
4424
4425void __kprobes add_preempt_count(int val) 4425void __kprobes add_preempt_count(int val)
4426{ 4426{
4427#ifdef CONFIG_DEBUG_PREEMPT 4427#ifdef CONFIG_DEBUG_PREEMPT
diff --git a/kernel/softirq.c b/kernel/softirq.c
index bdbe9de9cd8d..6edfc2c11d99 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -21,6 +21,7 @@
21#include <linux/freezer.h> 21#include <linux/freezer.h>
22#include <linux/kthread.h> 22#include <linux/kthread.h>
23#include <linux/rcupdate.h> 23#include <linux/rcupdate.h>
24#include <linux/ftrace.h>
24#include <linux/smp.h> 25#include <linux/smp.h>
25#include <linux/tick.h> 26#include <linux/tick.h>
26 27
@@ -79,13 +80,23 @@ static void __local_bh_disable(unsigned long ip)
79 WARN_ON_ONCE(in_irq()); 80 WARN_ON_ONCE(in_irq());
80 81
81 raw_local_irq_save(flags); 82 raw_local_irq_save(flags);
82 add_preempt_count(SOFTIRQ_OFFSET); 83 /*
84 * The preempt tracer hooks into add_preempt_count and will break
85 * lockdep because it calls back into lockdep after SOFTIRQ_OFFSET
86 * is set and before current->softirq_enabled is cleared.
87 * We must manually increment preempt_count here and manually
88 * call the trace_preempt_off later.
89 */
90 preempt_count() += SOFTIRQ_OFFSET;
83 /* 91 /*
84 * Were softirqs turned off above: 92 * Were softirqs turned off above:
85 */ 93 */
86 if (softirq_count() == SOFTIRQ_OFFSET) 94 if (softirq_count() == SOFTIRQ_OFFSET)
87 trace_softirqs_off(ip); 95 trace_softirqs_off(ip);
88 raw_local_irq_restore(flags); 96 raw_local_irq_restore(flags);
97
98 if (preempt_count() == SOFTIRQ_OFFSET)
99 trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
89} 100}
90#else /* !CONFIG_TRACE_IRQFLAGS */ 101#else /* !CONFIG_TRACE_IRQFLAGS */
91static inline void __local_bh_disable(unsigned long ip) 102static inline void __local_bh_disable(unsigned long ip)
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index e2a4ff6fc3a6..28f2644484d9 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -164,9 +164,8 @@ config BOOT_TRACER
164 representation of the delays during initcalls - but the raw 164 representation of the delays during initcalls - but the raw
165 /debug/tracing/trace text output is readable too. 165 /debug/tracing/trace text output is readable too.
166 166
167 ( Note that tracing self tests can't be enabled if this tracer is 167 You must pass in ftrace=initcall to the kernel command line
168 selected, because the self-tests are an initcall as well and that 168 to enable this on bootup.
169 would invalidate the boot trace. )
170 169
171config TRACE_BRANCH_PROFILING 170config TRACE_BRANCH_PROFILING
172 bool "Trace likely/unlikely profiler" 171 bool "Trace likely/unlikely profiler"
@@ -264,6 +263,38 @@ config HW_BRANCH_TRACER
264 This tracer records all branches on the system in a circular 263 This tracer records all branches on the system in a circular
265 buffer giving access to the last N branches for each cpu. 264 buffer giving access to the last N branches for each cpu.
266 265
266config KMEMTRACE
267 bool "Trace SLAB allocations"
268 select TRACING
269 help
270 kmemtrace provides tracing for slab allocator functions, such as
271 kmalloc, kfree, kmem_cache_alloc, kmem_cache_free etc.. Collected
272 data is then fed to the userspace application in order to analyse
273 allocation hotspots, internal fragmentation and so on, making it
274 possible to see how well an allocator performs, as well as debug
275 and profile kernel code.
276
277 This requires an userspace application to use. See
278 Documentation/vm/kmemtrace.txt for more information.
279
280 Saying Y will make the kernel somewhat larger and slower. However,
281 if you disable kmemtrace at run-time or boot-time, the performance
282 impact is minimal (depending on the arch the kernel is built for).
283
284 If unsure, say N.
285
286config WORKQUEUE_TRACER
287 bool "Trace workqueues"
288 select TRACING
289 help
290 The workqueue tracer provides some statistical informations
291 about each cpu workqueue thread such as the number of the
292 works inserted and executed since their creation. It can help
293 to evaluate the amount of work each of them have to perform.
294 For example it can help a developer to decide whether he should
295 choose a per cpu workqueue instead of a singlethreaded one.
296
297
267config DYNAMIC_FTRACE 298config DYNAMIC_FTRACE
268 bool "enable/disable ftrace tracepoints dynamically" 299 bool "enable/disable ftrace tracepoints dynamically"
269 depends on FUNCTION_TRACER 300 depends on FUNCTION_TRACER
@@ -294,7 +325,7 @@ config FTRACE_SELFTEST
294 325
295config FTRACE_STARTUP_TEST 326config FTRACE_STARTUP_TEST
296 bool "Perform a startup test on ftrace" 327 bool "Perform a startup test on ftrace"
297 depends on TRACING && DEBUG_KERNEL && !BOOT_TRACER 328 depends on TRACING && DEBUG_KERNEL
298 select FTRACE_SELFTEST 329 select FTRACE_SELFTEST
299 help 330 help
300 This option performs a series of startup tests on ftrace. On bootup 331 This option performs a series of startup tests on ftrace. On bootup
@@ -302,4 +333,27 @@ config FTRACE_STARTUP_TEST
302 functioning properly. It will do tests on all the configured 333 functioning properly. It will do tests on all the configured
303 tracers of ftrace. 334 tracers of ftrace.
304 335
336config MMIOTRACE
337 bool "Memory mapped IO tracing"
338 depends on HAVE_MMIOTRACE_SUPPORT && DEBUG_KERNEL && PCI
339 select TRACING
340 help
341 Mmiotrace traces Memory Mapped I/O access and is meant for
342 debugging and reverse engineering. It is called from the ioremap
343 implementation and works via page faults. Tracing is disabled by
344 default and can be enabled at run-time.
345
346 See Documentation/tracers/mmiotrace.txt.
347 If you are not helping to develop drivers, say N.
348
349config MMIOTRACE_TEST
350 tristate "Test module for mmiotrace"
351 depends on MMIOTRACE && m
352 help
353 This is a dumb module for testing mmiotrace. It is very dangerous
354 as it will write garbage to IO memory starting at a given address.
355 However, it should be safe to use on e.g. unused portion of VRAM.
356
357 Say N, unless you absolutely know what you are doing.
358
305endmenu 359endmenu
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 349d5a93653f..f76d48f3527d 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -19,6 +19,8 @@ obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o
19obj-$(CONFIG_RING_BUFFER) += ring_buffer.o 19obj-$(CONFIG_RING_BUFFER) += ring_buffer.o
20 20
21obj-$(CONFIG_TRACING) += trace.o 21obj-$(CONFIG_TRACING) += trace.o
22obj-$(CONFIG_TRACING) += trace_output.o
23obj-$(CONFIG_TRACING) += trace_stat.o
22obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o 24obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
23obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o 25obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o
24obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o 26obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o
@@ -33,5 +35,7 @@ obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o
33obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o 35obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o
34obj-$(CONFIG_HW_BRANCH_TRACER) += trace_hw_branches.o 36obj-$(CONFIG_HW_BRANCH_TRACER) += trace_hw_branches.o
35obj-$(CONFIG_POWER_TRACER) += trace_power.o 37obj-$(CONFIG_POWER_TRACER) += trace_power.o
38obj-$(CONFIG_KMEMTRACE) += kmemtrace.o
39obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o
36 40
37libftrace-y := ftrace.o 41libftrace-y := ftrace.o
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 7dcf6e9f2b04..68610031780b 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -264,14 +264,6 @@ static void ftrace_update_pid_func(void)
264# error Dynamic ftrace depends on MCOUNT_RECORD 264# error Dynamic ftrace depends on MCOUNT_RECORD
265#endif 265#endif
266 266
267/*
268 * Since MCOUNT_ADDR may point to mcount itself, we do not want
269 * to get it confused by reading a reference in the code as we
270 * are parsing on objcopy output of text. Use a variable for
271 * it instead.
272 */
273static unsigned long mcount_addr = MCOUNT_ADDR;
274
275enum { 267enum {
276 FTRACE_ENABLE_CALLS = (1 << 0), 268 FTRACE_ENABLE_CALLS = (1 << 0),
277 FTRACE_DISABLE_CALLS = (1 << 1), 269 FTRACE_DISABLE_CALLS = (1 << 1),
@@ -290,7 +282,7 @@ static DEFINE_MUTEX(ftrace_regex_lock);
290 282
291struct ftrace_page { 283struct ftrace_page {
292 struct ftrace_page *next; 284 struct ftrace_page *next;
293 unsigned long index; 285 int index;
294 struct dyn_ftrace records[]; 286 struct dyn_ftrace records[];
295}; 287};
296 288
@@ -464,7 +456,7 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
464 unsigned long ip, fl; 456 unsigned long ip, fl;
465 unsigned long ftrace_addr; 457 unsigned long ftrace_addr;
466 458
467 ftrace_addr = (unsigned long)ftrace_caller; 459 ftrace_addr = (unsigned long)FTRACE_ADDR;
468 460
469 ip = rec->ip; 461 ip = rec->ip;
470 462
@@ -576,7 +568,7 @@ ftrace_code_disable(struct module *mod, struct dyn_ftrace *rec)
576 568
577 ip = rec->ip; 569 ip = rec->ip;
578 570
579 ret = ftrace_make_nop(mod, rec, mcount_addr); 571 ret = ftrace_make_nop(mod, rec, MCOUNT_ADDR);
580 if (ret) { 572 if (ret) {
581 ftrace_bug(ret, ip); 573 ftrace_bug(ret, ip);
582 rec->flags |= FTRACE_FL_FAILED; 574 rec->flags |= FTRACE_FL_FAILED;
@@ -787,7 +779,7 @@ enum {
787 779
788struct ftrace_iterator { 780struct ftrace_iterator {
789 struct ftrace_page *pg; 781 struct ftrace_page *pg;
790 unsigned idx; 782 int idx;
791 unsigned flags; 783 unsigned flags;
792 unsigned char buffer[FTRACE_BUFF_MAX+1]; 784 unsigned char buffer[FTRACE_BUFF_MAX+1];
793 unsigned buffer_idx; 785 unsigned buffer_idx;
@@ -1737,9 +1729,12 @@ static void clear_ftrace_pid(struct pid *pid)
1737{ 1729{
1738 struct task_struct *p; 1730 struct task_struct *p;
1739 1731
1732 rcu_read_lock();
1740 do_each_pid_task(pid, PIDTYPE_PID, p) { 1733 do_each_pid_task(pid, PIDTYPE_PID, p) {
1741 clear_tsk_trace_trace(p); 1734 clear_tsk_trace_trace(p);
1742 } while_each_pid_task(pid, PIDTYPE_PID, p); 1735 } while_each_pid_task(pid, PIDTYPE_PID, p);
1736 rcu_read_unlock();
1737
1743 put_pid(pid); 1738 put_pid(pid);
1744} 1739}
1745 1740
@@ -1747,9 +1742,11 @@ static void set_ftrace_pid(struct pid *pid)
1747{ 1742{
1748 struct task_struct *p; 1743 struct task_struct *p;
1749 1744
1745 rcu_read_lock();
1750 do_each_pid_task(pid, PIDTYPE_PID, p) { 1746 do_each_pid_task(pid, PIDTYPE_PID, p) {
1751 set_tsk_trace_trace(p); 1747 set_tsk_trace_trace(p);
1752 } while_each_pid_task(pid, PIDTYPE_PID, p); 1748 } while_each_pid_task(pid, PIDTYPE_PID, p);
1749 rcu_read_unlock();
1753} 1750}
1754 1751
1755static void clear_ftrace_pid_task(struct pid **pid) 1752static void clear_ftrace_pid_task(struct pid **pid)
@@ -1903,7 +1900,7 @@ int register_ftrace_function(struct ftrace_ops *ops)
1903} 1900}
1904 1901
1905/** 1902/**
1906 * unregister_ftrace_function - unresgister a function for profiling. 1903 * unregister_ftrace_function - unregister a function for profiling.
1907 * @ops - ops structure that holds the function to unregister 1904 * @ops - ops structure that holds the function to unregister
1908 * 1905 *
1909 * Unregister a function that was added to be called by ftrace profiling. 1906 * Unregister a function that was added to be called by ftrace profiling.
diff --git a/kernel/trace/kmemtrace.c b/kernel/trace/kmemtrace.c
new file mode 100644
index 000000000000..f04c0625f1cd
--- /dev/null
+++ b/kernel/trace/kmemtrace.c
@@ -0,0 +1,350 @@
1/*
2 * Memory allocator tracing
3 *
4 * Copyright (C) 2008 Eduard - Gabriel Munteanu
5 * Copyright (C) 2008 Pekka Enberg <penberg@cs.helsinki.fi>
6 * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
7 */
8
9#include <linux/dcache.h>
10#include <linux/debugfs.h>
11#include <linux/fs.h>
12#include <linux/seq_file.h>
13#include <trace/kmemtrace.h>
14
15#include "trace.h"
16#include "trace_output.h"
17
18/* Select an alternative, minimalistic output than the original one */
19#define TRACE_KMEM_OPT_MINIMAL 0x1
20
21static struct tracer_opt kmem_opts[] = {
22 /* Default disable the minimalistic output */
23 { TRACER_OPT(kmem_minimalistic, TRACE_KMEM_OPT_MINIMAL) },
24 { }
25};
26
27static struct tracer_flags kmem_tracer_flags = {
28 .val = 0,
29 .opts = kmem_opts
30};
31
32
33static bool kmem_tracing_enabled __read_mostly;
34static struct trace_array *kmemtrace_array;
35
36static int kmem_trace_init(struct trace_array *tr)
37{
38 int cpu;
39 kmemtrace_array = tr;
40
41 for_each_cpu_mask(cpu, cpu_possible_map)
42 tracing_reset(tr, cpu);
43
44 kmem_tracing_enabled = true;
45
46 return 0;
47}
48
49static void kmem_trace_reset(struct trace_array *tr)
50{
51 kmem_tracing_enabled = false;
52}
53
54static void kmemtrace_headers(struct seq_file *s)
55{
56 /* Don't need headers for the original kmemtrace output */
57 if (!(kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL))
58 return;
59
60 seq_printf(s, "#\n");
61 seq_printf(s, "# ALLOC TYPE REQ GIVEN FLAGS "
62 " POINTER NODE CALLER\n");
63 seq_printf(s, "# FREE | | | | "
64 " | | | |\n");
65 seq_printf(s, "# |\n\n");
66}
67
68/*
69 * The two following functions give the original output from kmemtrace,
70 * or something close to....perhaps they need some missing things
71 */
72static enum print_line_t
73kmemtrace_print_alloc_original(struct trace_iterator *iter,
74 struct kmemtrace_alloc_entry *entry)
75{
76 struct trace_seq *s = &iter->seq;
77 int ret;
78
79 /* Taken from the old linux/kmemtrace.h */
80 ret = trace_seq_printf(s, "type_id %d call_site %lu ptr %lu "
81 "bytes_req %lu bytes_alloc %lu gfp_flags %lu node %d\n",
82 entry->type_id, entry->call_site, (unsigned long) entry->ptr,
83 (unsigned long) entry->bytes_req, (unsigned long) entry->bytes_alloc,
84 (unsigned long) entry->gfp_flags, entry->node);
85
86 if (!ret)
87 return TRACE_TYPE_PARTIAL_LINE;
88
89 return TRACE_TYPE_HANDLED;
90}
91
92static enum print_line_t
93kmemtrace_print_free_original(struct trace_iterator *iter,
94 struct kmemtrace_free_entry *entry)
95{
96 struct trace_seq *s = &iter->seq;
97 int ret;
98
99 /* Taken from the old linux/kmemtrace.h */
100 ret = trace_seq_printf(s, "type_id %d call_site %lu ptr %lu\n",
101 entry->type_id, entry->call_site, (unsigned long) entry->ptr);
102
103 if (!ret)
104 return TRACE_TYPE_PARTIAL_LINE;
105
106 return TRACE_TYPE_HANDLED;
107}
108
109
110/* The two other following provide a more minimalistic output */
111static enum print_line_t
112kmemtrace_print_alloc_compress(struct trace_iterator *iter,
113 struct kmemtrace_alloc_entry *entry)
114{
115 struct trace_seq *s = &iter->seq;
116 int ret;
117
118 /* Alloc entry */
119 ret = trace_seq_printf(s, " + ");
120 if (!ret)
121 return TRACE_TYPE_PARTIAL_LINE;
122
123 /* Type */
124 switch (entry->type_id) {
125 case KMEMTRACE_TYPE_KMALLOC:
126 ret = trace_seq_printf(s, "K ");
127 break;
128 case KMEMTRACE_TYPE_CACHE:
129 ret = trace_seq_printf(s, "C ");
130 break;
131 case KMEMTRACE_TYPE_PAGES:
132 ret = trace_seq_printf(s, "P ");
133 break;
134 default:
135 ret = trace_seq_printf(s, "? ");
136 }
137
138 if (!ret)
139 return TRACE_TYPE_PARTIAL_LINE;
140
141 /* Requested */
142 ret = trace_seq_printf(s, "%4zu ", entry->bytes_req);
143 if (!ret)
144 return TRACE_TYPE_PARTIAL_LINE;
145
146 /* Allocated */
147 ret = trace_seq_printf(s, "%4zu ", entry->bytes_alloc);
148 if (!ret)
149 return TRACE_TYPE_PARTIAL_LINE;
150
151 /* Flags
152 * TODO: would be better to see the name of the GFP flag names
153 */
154 ret = trace_seq_printf(s, "%08x ", entry->gfp_flags);
155 if (!ret)
156 return TRACE_TYPE_PARTIAL_LINE;
157
158 /* Pointer to allocated */
159 ret = trace_seq_printf(s, "0x%tx ", (ptrdiff_t)entry->ptr);
160 if (!ret)
161 return TRACE_TYPE_PARTIAL_LINE;
162
163 /* Node */
164 ret = trace_seq_printf(s, "%4d ", entry->node);
165 if (!ret)
166 return TRACE_TYPE_PARTIAL_LINE;
167
168 /* Call site */
169 ret = seq_print_ip_sym(s, entry->call_site, 0);
170 if (!ret)
171 return TRACE_TYPE_PARTIAL_LINE;
172
173 if (!trace_seq_printf(s, "\n"))
174 return TRACE_TYPE_PARTIAL_LINE;
175
176 return TRACE_TYPE_HANDLED;
177}
178
179static enum print_line_t
180kmemtrace_print_free_compress(struct trace_iterator *iter,
181 struct kmemtrace_free_entry *entry)
182{
183 struct trace_seq *s = &iter->seq;
184 int ret;
185
186 /* Free entry */
187 ret = trace_seq_printf(s, " - ");
188 if (!ret)
189 return TRACE_TYPE_PARTIAL_LINE;
190
191 /* Type */
192 switch (entry->type_id) {
193 case KMEMTRACE_TYPE_KMALLOC:
194 ret = trace_seq_printf(s, "K ");
195 break;
196 case KMEMTRACE_TYPE_CACHE:
197 ret = trace_seq_printf(s, "C ");
198 break;
199 case KMEMTRACE_TYPE_PAGES:
200 ret = trace_seq_printf(s, "P ");
201 break;
202 default:
203 ret = trace_seq_printf(s, "? ");
204 }
205
206 if (!ret)
207 return TRACE_TYPE_PARTIAL_LINE;
208
209 /* Skip requested/allocated/flags */
210 ret = trace_seq_printf(s, " ");
211 if (!ret)
212 return TRACE_TYPE_PARTIAL_LINE;
213
214 /* Pointer to allocated */
215 ret = trace_seq_printf(s, "0x%tx ", (ptrdiff_t)entry->ptr);
216 if (!ret)
217 return TRACE_TYPE_PARTIAL_LINE;
218
219 /* Skip node */
220 ret = trace_seq_printf(s, " ");
221 if (!ret)
222 return TRACE_TYPE_PARTIAL_LINE;
223
224 /* Call site */
225 ret = seq_print_ip_sym(s, entry->call_site, 0);
226 if (!ret)
227 return TRACE_TYPE_PARTIAL_LINE;
228
229 if (!trace_seq_printf(s, "\n"))
230 return TRACE_TYPE_PARTIAL_LINE;
231
232 return TRACE_TYPE_HANDLED;
233}
234
235static enum print_line_t kmemtrace_print_line(struct trace_iterator *iter)
236{
237 struct trace_entry *entry = iter->ent;
238
239 switch (entry->type) {
240 case TRACE_KMEM_ALLOC: {
241 struct kmemtrace_alloc_entry *field;
242 trace_assign_type(field, entry);
243 if (kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL)
244 return kmemtrace_print_alloc_compress(iter, field);
245 else
246 return kmemtrace_print_alloc_original(iter, field);
247 }
248
249 case TRACE_KMEM_FREE: {
250 struct kmemtrace_free_entry *field;
251 trace_assign_type(field, entry);
252 if (kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL)
253 return kmemtrace_print_free_compress(iter, field);
254 else
255 return kmemtrace_print_free_original(iter, field);
256 }
257
258 default:
259 return TRACE_TYPE_UNHANDLED;
260 }
261}
262
263/* Trace allocations */
264void kmemtrace_mark_alloc_node(enum kmemtrace_type_id type_id,
265 unsigned long call_site,
266 const void *ptr,
267 size_t bytes_req,
268 size_t bytes_alloc,
269 gfp_t gfp_flags,
270 int node)
271{
272 struct ring_buffer_event *event;
273 struct kmemtrace_alloc_entry *entry;
274 struct trace_array *tr = kmemtrace_array;
275 unsigned long irq_flags;
276
277 if (!kmem_tracing_enabled)
278 return;
279
280 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
281 &irq_flags);
282 if (!event)
283 return;
284 entry = ring_buffer_event_data(event);
285 tracing_generic_entry_update(&entry->ent, 0, 0);
286
287 entry->ent.type = TRACE_KMEM_ALLOC;
288 entry->call_site = call_site;
289 entry->ptr = ptr;
290 entry->bytes_req = bytes_req;
291 entry->bytes_alloc = bytes_alloc;
292 entry->gfp_flags = gfp_flags;
293 entry->node = node;
294
295 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
296
297 trace_wake_up();
298}
299EXPORT_SYMBOL(kmemtrace_mark_alloc_node);
300
301void kmemtrace_mark_free(enum kmemtrace_type_id type_id,
302 unsigned long call_site,
303 const void *ptr)
304{
305 struct ring_buffer_event *event;
306 struct kmemtrace_free_entry *entry;
307 struct trace_array *tr = kmemtrace_array;
308 unsigned long irq_flags;
309
310 if (!kmem_tracing_enabled)
311 return;
312
313 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
314 &irq_flags);
315 if (!event)
316 return;
317 entry = ring_buffer_event_data(event);
318 tracing_generic_entry_update(&entry->ent, 0, 0);
319
320 entry->ent.type = TRACE_KMEM_FREE;
321 entry->type_id = type_id;
322 entry->call_site = call_site;
323 entry->ptr = ptr;
324
325 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
326
327 trace_wake_up();
328}
329EXPORT_SYMBOL(kmemtrace_mark_free);
330
331static struct tracer kmem_tracer __read_mostly = {
332 .name = "kmemtrace",
333 .init = kmem_trace_init,
334 .reset = kmem_trace_reset,
335 .print_line = kmemtrace_print_line,
336 .print_header = kmemtrace_headers,
337 .flags = &kmem_tracer_flags
338};
339
340void kmemtrace_init(void)
341{
342 /* earliest opportunity to start kmem tracing */
343}
344
345static int __init init_kmem_tracer(void)
346{
347 return register_tracer(&kmem_tracer);
348}
349
350device_initcall(init_kmem_tracer);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index bd38c5cfd8ad..b36d7374ceef 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -123,8 +123,7 @@ void ring_buffer_normalize_time_stamp(int cpu, u64 *ts)
123EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp); 123EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp);
124 124
125#define RB_EVNT_HDR_SIZE (sizeof(struct ring_buffer_event)) 125#define RB_EVNT_HDR_SIZE (sizeof(struct ring_buffer_event))
126#define RB_ALIGNMENT_SHIFT 2 126#define RB_ALIGNMENT 4U
127#define RB_ALIGNMENT (1 << RB_ALIGNMENT_SHIFT)
128#define RB_MAX_SMALL_DATA 28 127#define RB_MAX_SMALL_DATA 28
129 128
130enum { 129enum {
@@ -133,7 +132,7 @@ enum {
133}; 132};
134 133
135/* inline for ring buffer fast paths */ 134/* inline for ring buffer fast paths */
136static inline unsigned 135static unsigned
137rb_event_length(struct ring_buffer_event *event) 136rb_event_length(struct ring_buffer_event *event)
138{ 137{
139 unsigned length; 138 unsigned length;
@@ -151,7 +150,7 @@ rb_event_length(struct ring_buffer_event *event)
151 150
152 case RINGBUF_TYPE_DATA: 151 case RINGBUF_TYPE_DATA:
153 if (event->len) 152 if (event->len)
154 length = event->len << RB_ALIGNMENT_SHIFT; 153 length = event->len * RB_ALIGNMENT;
155 else 154 else
156 length = event->array[0]; 155 length = event->array[0];
157 return length + RB_EVNT_HDR_SIZE; 156 return length + RB_EVNT_HDR_SIZE;
@@ -179,7 +178,7 @@ unsigned ring_buffer_event_length(struct ring_buffer_event *event)
179EXPORT_SYMBOL_GPL(ring_buffer_event_length); 178EXPORT_SYMBOL_GPL(ring_buffer_event_length);
180 179
181/* inline for ring buffer fast paths */ 180/* inline for ring buffer fast paths */
182static inline void * 181static void *
183rb_event_data(struct ring_buffer_event *event) 182rb_event_data(struct ring_buffer_event *event)
184{ 183{
185 BUG_ON(event->type != RINGBUF_TYPE_DATA); 184 BUG_ON(event->type != RINGBUF_TYPE_DATA);
@@ -229,10 +228,9 @@ static void rb_init_page(struct buffer_data_page *bpage)
229 * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing 228 * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing
230 * this issue out. 229 * this issue out.
231 */ 230 */
232static inline void free_buffer_page(struct buffer_page *bpage) 231static void free_buffer_page(struct buffer_page *bpage)
233{ 232{
234 if (bpage->page) 233 free_page((unsigned long)bpage->page);
235 free_page((unsigned long)bpage->page);
236 kfree(bpage); 234 kfree(bpage);
237} 235}
238 236
@@ -811,7 +809,7 @@ rb_event_index(struct ring_buffer_event *event)
811 return (addr & ~PAGE_MASK) - (PAGE_SIZE - BUF_PAGE_SIZE); 809 return (addr & ~PAGE_MASK) - (PAGE_SIZE - BUF_PAGE_SIZE);
812} 810}
813 811
814static inline int 812static int
815rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer, 813rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
816 struct ring_buffer_event *event) 814 struct ring_buffer_event *event)
817{ 815{
@@ -825,7 +823,7 @@ rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
825 rb_commit_index(cpu_buffer) == index; 823 rb_commit_index(cpu_buffer) == index;
826} 824}
827 825
828static inline void 826static void
829rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer, 827rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer,
830 struct ring_buffer_event *event) 828 struct ring_buffer_event *event)
831{ 829{
@@ -850,7 +848,7 @@ rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer,
850 local_set(&cpu_buffer->commit_page->page->commit, index); 848 local_set(&cpu_buffer->commit_page->page->commit, index);
851} 849}
852 850
853static inline void 851static void
854rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) 852rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
855{ 853{
856 /* 854 /*
@@ -896,7 +894,7 @@ static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
896 cpu_buffer->reader_page->read = 0; 894 cpu_buffer->reader_page->read = 0;
897} 895}
898 896
899static inline void rb_inc_iter(struct ring_buffer_iter *iter) 897static void rb_inc_iter(struct ring_buffer_iter *iter)
900{ 898{
901 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; 899 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
902 900
@@ -926,7 +924,7 @@ static inline void rb_inc_iter(struct ring_buffer_iter *iter)
926 * and with this, we can determine what to place into the 924 * and with this, we can determine what to place into the
927 * data field. 925 * data field.
928 */ 926 */
929static inline void 927static void
930rb_update_event(struct ring_buffer_event *event, 928rb_update_event(struct ring_buffer_event *event,
931 unsigned type, unsigned length) 929 unsigned type, unsigned length)
932{ 930{
@@ -938,15 +936,11 @@ rb_update_event(struct ring_buffer_event *event,
938 break; 936 break;
939 937
940 case RINGBUF_TYPE_TIME_EXTEND: 938 case RINGBUF_TYPE_TIME_EXTEND:
941 event->len = 939 event->len = DIV_ROUND_UP(RB_LEN_TIME_EXTEND, RB_ALIGNMENT);
942 (RB_LEN_TIME_EXTEND + (RB_ALIGNMENT-1))
943 >> RB_ALIGNMENT_SHIFT;
944 break; 940 break;
945 941
946 case RINGBUF_TYPE_TIME_STAMP: 942 case RINGBUF_TYPE_TIME_STAMP:
947 event->len = 943 event->len = DIV_ROUND_UP(RB_LEN_TIME_STAMP, RB_ALIGNMENT);
948 (RB_LEN_TIME_STAMP + (RB_ALIGNMENT-1))
949 >> RB_ALIGNMENT_SHIFT;
950 break; 944 break;
951 945
952 case RINGBUF_TYPE_DATA: 946 case RINGBUF_TYPE_DATA:
@@ -955,16 +949,14 @@ rb_update_event(struct ring_buffer_event *event,
955 event->len = 0; 949 event->len = 0;
956 event->array[0] = length; 950 event->array[0] = length;
957 } else 951 } else
958 event->len = 952 event->len = DIV_ROUND_UP(length, RB_ALIGNMENT);
959 (length + (RB_ALIGNMENT-1))
960 >> RB_ALIGNMENT_SHIFT;
961 break; 953 break;
962 default: 954 default:
963 BUG(); 955 BUG();
964 } 956 }
965} 957}
966 958
967static inline unsigned rb_calculate_event_length(unsigned length) 959static unsigned rb_calculate_event_length(unsigned length)
968{ 960{
969 struct ring_buffer_event event; /* Used only for sizeof array */ 961 struct ring_buffer_event event; /* Used only for sizeof array */
970 962
@@ -1438,7 +1430,7 @@ int ring_buffer_write(struct ring_buffer *buffer,
1438} 1430}
1439EXPORT_SYMBOL_GPL(ring_buffer_write); 1431EXPORT_SYMBOL_GPL(ring_buffer_write);
1440 1432
1441static inline int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer) 1433static int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer)
1442{ 1434{
1443 struct buffer_page *reader = cpu_buffer->reader_page; 1435 struct buffer_page *reader = cpu_buffer->reader_page;
1444 struct buffer_page *head = cpu_buffer->head_page; 1436 struct buffer_page *head = cpu_buffer->head_page;
@@ -2277,9 +2269,24 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
2277 if (buffer_a->pages != buffer_b->pages) 2269 if (buffer_a->pages != buffer_b->pages)
2278 return -EINVAL; 2270 return -EINVAL;
2279 2271
2272 if (ring_buffer_flags != RB_BUFFERS_ON)
2273 return -EAGAIN;
2274
2275 if (atomic_read(&buffer_a->record_disabled))
2276 return -EAGAIN;
2277
2278 if (atomic_read(&buffer_b->record_disabled))
2279 return -EAGAIN;
2280
2280 cpu_buffer_a = buffer_a->buffers[cpu]; 2281 cpu_buffer_a = buffer_a->buffers[cpu];
2281 cpu_buffer_b = buffer_b->buffers[cpu]; 2282 cpu_buffer_b = buffer_b->buffers[cpu];
2282 2283
2284 if (atomic_read(&cpu_buffer_a->record_disabled))
2285 return -EAGAIN;
2286
2287 if (atomic_read(&cpu_buffer_b->record_disabled))
2288 return -EAGAIN;
2289
2283 /* 2290 /*
2284 * We can't do a synchronize_sched here because this 2291 * We can't do a synchronize_sched here because this
2285 * function can be called in atomic context. 2292 * function can be called in atomic context.
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 17bb88d86ac2..fd51cf0b94c7 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -37,6 +37,7 @@
37#include <linux/irqflags.h> 37#include <linux/irqflags.h>
38 38
39#include "trace.h" 39#include "trace.h"
40#include "trace_output.h"
40 41
41#define TRACE_BUFFER_FLAGS (RB_FL_OVERWRITE) 42#define TRACE_BUFFER_FLAGS (RB_FL_OVERWRITE)
42 43
@@ -52,6 +53,11 @@ unsigned long __read_mostly tracing_thresh;
52 */ 53 */
53static bool __read_mostly tracing_selftest_running; 54static bool __read_mostly tracing_selftest_running;
54 55
56/*
57 * If a tracer is running, we do not want to run SELFTEST.
58 */
59static bool __read_mostly tracing_selftest_disabled;
60
55/* For tracers that don't implement custom flags */ 61/* For tracers that don't implement custom flags */
56static struct tracer_opt dummy_tracer_opt[] = { 62static struct tracer_opt dummy_tracer_opt[] = {
57 { } 63 { }
@@ -109,14 +115,19 @@ static cpumask_var_t __read_mostly tracing_buffer_mask;
109 */ 115 */
110int ftrace_dump_on_oops; 116int ftrace_dump_on_oops;
111 117
112static int tracing_set_tracer(char *buf); 118static int tracing_set_tracer(const char *buf);
119
120#define BOOTUP_TRACER_SIZE 100
121static char bootup_tracer_buf[BOOTUP_TRACER_SIZE] __initdata;
122static char *default_bootup_tracer;
113 123
114static int __init set_ftrace(char *str) 124static int __init set_ftrace(char *str)
115{ 125{
116 tracing_set_tracer(str); 126 strncpy(bootup_tracer_buf, str, BOOTUP_TRACER_SIZE);
127 default_bootup_tracer = bootup_tracer_buf;
117 return 1; 128 return 1;
118} 129}
119__setup("ftrace", set_ftrace); 130__setup("ftrace=", set_ftrace);
120 131
121static int __init set_ftrace_dump_on_oops(char *str) 132static int __init set_ftrace_dump_on_oops(char *str)
122{ 133{
@@ -186,9 +197,6 @@ int tracing_is_enabled(void)
186 return tracer_enabled; 197 return tracer_enabled;
187} 198}
188 199
189/* function tracing enabled */
190int ftrace_function_enabled;
191
192/* 200/*
193 * trace_buf_size is the size in bytes that is allocated 201 * trace_buf_size is the size in bytes that is allocated
194 * for a buffer. Note, the number of bytes is always rounded 202 * for a buffer. Note, the number of bytes is always rounded
@@ -229,7 +237,7 @@ static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
229 237
230/* trace_flags holds trace_options default values */ 238/* trace_flags holds trace_options default values */
231unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | 239unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
232 TRACE_ITER_ANNOTATE; 240 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO;
233 241
234/** 242/**
235 * trace_wake_up - wake up tasks waiting for trace input 243 * trace_wake_up - wake up tasks waiting for trace input
@@ -287,6 +295,7 @@ static const char *trace_options[] = {
287 "userstacktrace", 295 "userstacktrace",
288 "sym-userobj", 296 "sym-userobj",
289 "printk-msg-only", 297 "printk-msg-only",
298 "context-info",
290 NULL 299 NULL
291}; 300};
292 301
@@ -329,132 +338,6 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
329 tracing_record_cmdline(current); 338 tracing_record_cmdline(current);
330} 339}
331 340
332/**
333 * trace_seq_printf - sequence printing of trace information
334 * @s: trace sequence descriptor
335 * @fmt: printf format string
336 *
337 * The tracer may use either sequence operations or its own
338 * copy to user routines. To simplify formating of a trace
339 * trace_seq_printf is used to store strings into a special
340 * buffer (@s). Then the output may be either used by
341 * the sequencer or pulled into another buffer.
342 */
343int
344trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
345{
346 int len = (PAGE_SIZE - 1) - s->len;
347 va_list ap;
348 int ret;
349
350 if (!len)
351 return 0;
352
353 va_start(ap, fmt);
354 ret = vsnprintf(s->buffer + s->len, len, fmt, ap);
355 va_end(ap);
356
357 /* If we can't write it all, don't bother writing anything */
358 if (ret >= len)
359 return 0;
360
361 s->len += ret;
362
363 return len;
364}
365
366/**
367 * trace_seq_puts - trace sequence printing of simple string
368 * @s: trace sequence descriptor
369 * @str: simple string to record
370 *
371 * The tracer may use either the sequence operations or its own
372 * copy to user routines. This function records a simple string
373 * into a special buffer (@s) for later retrieval by a sequencer
374 * or other mechanism.
375 */
376static int
377trace_seq_puts(struct trace_seq *s, const char *str)
378{
379 int len = strlen(str);
380
381 if (len > ((PAGE_SIZE - 1) - s->len))
382 return 0;
383
384 memcpy(s->buffer + s->len, str, len);
385 s->len += len;
386
387 return len;
388}
389
390static int
391trace_seq_putc(struct trace_seq *s, unsigned char c)
392{
393 if (s->len >= (PAGE_SIZE - 1))
394 return 0;
395
396 s->buffer[s->len++] = c;
397
398 return 1;
399}
400
401static int
402trace_seq_putmem(struct trace_seq *s, void *mem, size_t len)
403{
404 if (len > ((PAGE_SIZE - 1) - s->len))
405 return 0;
406
407 memcpy(s->buffer + s->len, mem, len);
408 s->len += len;
409
410 return len;
411}
412
413#define MAX_MEMHEX_BYTES 8
414#define HEX_CHARS (MAX_MEMHEX_BYTES*2 + 1)
415
416static int
417trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len)
418{
419 unsigned char hex[HEX_CHARS];
420 unsigned char *data = mem;
421 int i, j;
422
423#ifdef __BIG_ENDIAN
424 for (i = 0, j = 0; i < len; i++) {
425#else
426 for (i = len-1, j = 0; i >= 0; i--) {
427#endif
428 hex[j++] = hex_asc_hi(data[i]);
429 hex[j++] = hex_asc_lo(data[i]);
430 }
431 hex[j++] = ' ';
432
433 return trace_seq_putmem(s, hex, j);
434}
435
436static int
437trace_seq_path(struct trace_seq *s, struct path *path)
438{
439 unsigned char *p;
440
441 if (s->len >= (PAGE_SIZE - 1))
442 return 0;
443 p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len);
444 if (!IS_ERR(p)) {
445 p = mangle_path(s->buffer + s->len, p, "\n");
446 if (p) {
447 s->len = p - s->buffer;
448 return 1;
449 }
450 } else {
451 s->buffer[s->len++] = '?';
452 return 1;
453 }
454
455 return 0;
456}
457
458static void 341static void
459trace_seq_reset(struct trace_seq *s) 342trace_seq_reset(struct trace_seq *s)
460{ 343{
@@ -543,7 +426,7 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
543 426
544 ftrace_enable_cpu(); 427 ftrace_enable_cpu();
545 428
546 WARN_ON_ONCE(ret); 429 WARN_ON_ONCE(ret && ret != -EAGAIN);
547 430
548 __update_max_tr(tr, tsk, cpu); 431 __update_max_tr(tr, tsk, cpu);
549 __raw_spin_unlock(&ftrace_max_lock); 432 __raw_spin_unlock(&ftrace_max_lock);
@@ -596,7 +479,7 @@ int register_tracer(struct tracer *type)
596 type->flags->opts = dummy_tracer_opt; 479 type->flags->opts = dummy_tracer_opt;
597 480
598#ifdef CONFIG_FTRACE_STARTUP_TEST 481#ifdef CONFIG_FTRACE_STARTUP_TEST
599 if (type->selftest) { 482 if (type->selftest && !tracing_selftest_disabled) {
600 struct tracer *saved_tracer = current_trace; 483 struct tracer *saved_tracer = current_trace;
601 struct trace_array *tr = &global_trace; 484 struct trace_array *tr = &global_trace;
602 int i; 485 int i;
@@ -638,8 +521,25 @@ int register_tracer(struct tracer *type)
638 out: 521 out:
639 tracing_selftest_running = false; 522 tracing_selftest_running = false;
640 mutex_unlock(&trace_types_lock); 523 mutex_unlock(&trace_types_lock);
641 lock_kernel();
642 524
525 if (!ret && default_bootup_tracer) {
526 if (!strncmp(default_bootup_tracer, type->name,
527 BOOTUP_TRACER_SIZE)) {
528 printk(KERN_INFO "Starting tracer '%s'\n",
529 type->name);
530 /* Do we want this tracer to start on bootup? */
531 tracing_set_tracer(type->name);
532 default_bootup_tracer = NULL;
533 /* disable other selftests, since this will break it. */
534 tracing_selftest_disabled = 1;
535#ifdef CONFIG_FTRACE_STARTUP_TEST
536 printk(KERN_INFO "Disabling FTRACE selftests due"
537 " to running tracer '%s'\n", type->name);
538#endif
539 }
540 }
541
542 lock_kernel();
643 return ret; 543 return ret;
644} 544}
645 545
@@ -738,13 +638,12 @@ void tracing_start(void)
738 return; 638 return;
739 639
740 spin_lock_irqsave(&tracing_start_lock, flags); 640 spin_lock_irqsave(&tracing_start_lock, flags);
741 if (--trace_stop_count) 641 if (--trace_stop_count) {
742 goto out; 642 if (trace_stop_count < 0) {
743 643 /* Someone screwed up their debugging */
744 if (trace_stop_count < 0) { 644 WARN_ON_ONCE(1);
745 /* Someone screwed up their debugging */ 645 trace_stop_count = 0;
746 WARN_ON_ONCE(1); 646 }
747 trace_stop_count = 0;
748 goto out; 647 goto out;
749 } 648 }
750 649
@@ -960,10 +859,10 @@ ftrace(struct trace_array *tr, struct trace_array_cpu *data,
960 trace_function(tr, data, ip, parent_ip, flags, pc); 859 trace_function(tr, data, ip, parent_ip, flags, pc);
961} 860}
962 861
963static void ftrace_trace_stack(struct trace_array *tr, 862static void __ftrace_trace_stack(struct trace_array *tr,
964 struct trace_array_cpu *data, 863 struct trace_array_cpu *data,
965 unsigned long flags, 864 unsigned long flags,
966 int skip, int pc) 865 int skip, int pc)
967{ 866{
968#ifdef CONFIG_STACKTRACE 867#ifdef CONFIG_STACKTRACE
969 struct ring_buffer_event *event; 868 struct ring_buffer_event *event;
@@ -971,9 +870,6 @@ static void ftrace_trace_stack(struct trace_array *tr,
971 struct stack_trace trace; 870 struct stack_trace trace;
972 unsigned long irq_flags; 871 unsigned long irq_flags;
973 872
974 if (!(trace_flags & TRACE_ITER_STACKTRACE))
975 return;
976
977 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), 873 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
978 &irq_flags); 874 &irq_flags);
979 if (!event) 875 if (!event)
@@ -994,12 +890,23 @@ static void ftrace_trace_stack(struct trace_array *tr,
994#endif 890#endif
995} 891}
996 892
893static void ftrace_trace_stack(struct trace_array *tr,
894 struct trace_array_cpu *data,
895 unsigned long flags,
896 int skip, int pc)
897{
898 if (!(trace_flags & TRACE_ITER_STACKTRACE))
899 return;
900
901 __ftrace_trace_stack(tr, data, flags, skip, pc);
902}
903
997void __trace_stack(struct trace_array *tr, 904void __trace_stack(struct trace_array *tr,
998 struct trace_array_cpu *data, 905 struct trace_array_cpu *data,
999 unsigned long flags, 906 unsigned long flags,
1000 int skip) 907 int skip, int pc)
1001{ 908{
1002 ftrace_trace_stack(tr, data, flags, skip, preempt_count()); 909 __ftrace_trace_stack(tr, data, flags, skip, pc);
1003} 910}
1004 911
1005static void ftrace_trace_userstack(struct trace_array *tr, 912static void ftrace_trace_userstack(struct trace_array *tr,
@@ -1163,65 +1070,6 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
1163 local_irq_restore(flags); 1070 local_irq_restore(flags);
1164} 1071}
1165 1072
1166#ifdef CONFIG_FUNCTION_TRACER
1167static void
1168function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip)
1169{
1170 struct trace_array *tr = &global_trace;
1171 struct trace_array_cpu *data;
1172 unsigned long flags;
1173 long disabled;
1174 int cpu, resched;
1175 int pc;
1176
1177 if (unlikely(!ftrace_function_enabled))
1178 return;
1179
1180 pc = preempt_count();
1181 resched = ftrace_preempt_disable();
1182 local_save_flags(flags);
1183 cpu = raw_smp_processor_id();
1184 data = tr->data[cpu];
1185 disabled = atomic_inc_return(&data->disabled);
1186
1187 if (likely(disabled == 1))
1188 trace_function(tr, data, ip, parent_ip, flags, pc);
1189
1190 atomic_dec(&data->disabled);
1191 ftrace_preempt_enable(resched);
1192}
1193
1194static void
1195function_trace_call(unsigned long ip, unsigned long parent_ip)
1196{
1197 struct trace_array *tr = &global_trace;
1198 struct trace_array_cpu *data;
1199 unsigned long flags;
1200 long disabled;
1201 int cpu;
1202 int pc;
1203
1204 if (unlikely(!ftrace_function_enabled))
1205 return;
1206
1207 /*
1208 * Need to use raw, since this must be called before the
1209 * recursive protection is performed.
1210 */
1211 local_irq_save(flags);
1212 cpu = raw_smp_processor_id();
1213 data = tr->data[cpu];
1214 disabled = atomic_inc_return(&data->disabled);
1215
1216 if (likely(disabled == 1)) {
1217 pc = preempt_count();
1218 trace_function(tr, data, ip, parent_ip, flags, pc);
1219 }
1220
1221 atomic_dec(&data->disabled);
1222 local_irq_restore(flags);
1223}
1224
1225#ifdef CONFIG_FUNCTION_GRAPH_TRACER 1073#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1226int trace_graph_entry(struct ftrace_graph_ent *trace) 1074int trace_graph_entry(struct ftrace_graph_ent *trace)
1227{ 1075{
@@ -1279,31 +1127,6 @@ void trace_graph_return(struct ftrace_graph_ret *trace)
1279} 1127}
1280#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ 1128#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
1281 1129
1282static struct ftrace_ops trace_ops __read_mostly =
1283{
1284 .func = function_trace_call,
1285};
1286
1287void tracing_start_function_trace(void)
1288{
1289 ftrace_function_enabled = 0;
1290
1291 if (trace_flags & TRACE_ITER_PREEMPTONLY)
1292 trace_ops.func = function_trace_call_preempt_only;
1293 else
1294 trace_ops.func = function_trace_call;
1295
1296 register_ftrace_function(&trace_ops);
1297 ftrace_function_enabled = 1;
1298}
1299
1300void tracing_stop_function_trace(void)
1301{
1302 ftrace_function_enabled = 0;
1303 unregister_ftrace_function(&trace_ops);
1304}
1305#endif
1306
1307enum trace_file_type { 1130enum trace_file_type {
1308 TRACE_FILE_LAT_FMT = 1, 1131 TRACE_FILE_LAT_FMT = 1,
1309 TRACE_FILE_ANNOTATE = 2, 1132 TRACE_FILE_ANNOTATE = 2,
@@ -1376,8 +1199,8 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
1376} 1199}
1377 1200
1378/* Find the next real entry, without updating the iterator itself */ 1201/* Find the next real entry, without updating the iterator itself */
1379static struct trace_entry * 1202struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
1380find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts) 1203 int *ent_cpu, u64 *ent_ts)
1381{ 1204{
1382 return __find_next_entry(iter, ent_cpu, ent_ts); 1205 return __find_next_entry(iter, ent_cpu, ent_ts);
1383} 1206}
@@ -1472,154 +1295,6 @@ static void s_stop(struct seq_file *m, void *p)
1472 mutex_unlock(&trace_types_lock); 1295 mutex_unlock(&trace_types_lock);
1473} 1296}
1474 1297
1475#ifdef CONFIG_KRETPROBES
1476static inline const char *kretprobed(const char *name)
1477{
1478 static const char tramp_name[] = "kretprobe_trampoline";
1479 int size = sizeof(tramp_name);
1480
1481 if (strncmp(tramp_name, name, size) == 0)
1482 return "[unknown/kretprobe'd]";
1483 return name;
1484}
1485#else
1486static inline const char *kretprobed(const char *name)
1487{
1488 return name;
1489}
1490#endif /* CONFIG_KRETPROBES */
1491
1492static int
1493seq_print_sym_short(struct trace_seq *s, const char *fmt, unsigned long address)
1494{
1495#ifdef CONFIG_KALLSYMS
1496 char str[KSYM_SYMBOL_LEN];
1497 const char *name;
1498
1499 kallsyms_lookup(address, NULL, NULL, NULL, str);
1500
1501 name = kretprobed(str);
1502
1503 return trace_seq_printf(s, fmt, name);
1504#endif
1505 return 1;
1506}
1507
1508static int
1509seq_print_sym_offset(struct trace_seq *s, const char *fmt,
1510 unsigned long address)
1511{
1512#ifdef CONFIG_KALLSYMS
1513 char str[KSYM_SYMBOL_LEN];
1514 const char *name;
1515
1516 sprint_symbol(str, address);
1517 name = kretprobed(str);
1518
1519 return trace_seq_printf(s, fmt, name);
1520#endif
1521 return 1;
1522}
1523
1524#ifndef CONFIG_64BIT
1525# define IP_FMT "%08lx"
1526#else
1527# define IP_FMT "%016lx"
1528#endif
1529
1530int
1531seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
1532{
1533 int ret;
1534
1535 if (!ip)
1536 return trace_seq_printf(s, "0");
1537
1538 if (sym_flags & TRACE_ITER_SYM_OFFSET)
1539 ret = seq_print_sym_offset(s, "%s", ip);
1540 else
1541 ret = seq_print_sym_short(s, "%s", ip);
1542
1543 if (!ret)
1544 return 0;
1545
1546 if (sym_flags & TRACE_ITER_SYM_ADDR)
1547 ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
1548 return ret;
1549}
1550
1551static inline int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
1552 unsigned long ip, unsigned long sym_flags)
1553{
1554 struct file *file = NULL;
1555 unsigned long vmstart = 0;
1556 int ret = 1;
1557
1558 if (mm) {
1559 const struct vm_area_struct *vma;
1560
1561 down_read(&mm->mmap_sem);
1562 vma = find_vma(mm, ip);
1563 if (vma) {
1564 file = vma->vm_file;
1565 vmstart = vma->vm_start;
1566 }
1567 if (file) {
1568 ret = trace_seq_path(s, &file->f_path);
1569 if (ret)
1570 ret = trace_seq_printf(s, "[+0x%lx]", ip - vmstart);
1571 }
1572 up_read(&mm->mmap_sem);
1573 }
1574 if (ret && ((sym_flags & TRACE_ITER_SYM_ADDR) || !file))
1575 ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
1576 return ret;
1577}
1578
1579static int
1580seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s,
1581 unsigned long sym_flags)
1582{
1583 struct mm_struct *mm = NULL;
1584 int ret = 1;
1585 unsigned int i;
1586
1587 if (trace_flags & TRACE_ITER_SYM_USEROBJ) {
1588 struct task_struct *task;
1589 /*
1590 * we do the lookup on the thread group leader,
1591 * since individual threads might have already quit!
1592 */
1593 rcu_read_lock();
1594 task = find_task_by_vpid(entry->ent.tgid);
1595 if (task)
1596 mm = get_task_mm(task);
1597 rcu_read_unlock();
1598 }
1599
1600 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
1601 unsigned long ip = entry->caller[i];
1602
1603 if (ip == ULONG_MAX || !ret)
1604 break;
1605 if (i && ret)
1606 ret = trace_seq_puts(s, " <- ");
1607 if (!ip) {
1608 if (ret)
1609 ret = trace_seq_puts(s, "??");
1610 continue;
1611 }
1612 if (!ret)
1613 break;
1614 if (ret)
1615 ret = seq_print_user_ip(s, mm, ip, sym_flags);
1616 }
1617
1618 if (mm)
1619 mmput(mm);
1620 return ret;
1621}
1622
1623static void print_lat_help_header(struct seq_file *m) 1298static void print_lat_help_header(struct seq_file *m)
1624{ 1299{
1625 seq_puts(m, "# _------=> CPU# \n"); 1300 seq_puts(m, "# _------=> CPU# \n");
@@ -1704,103 +1379,6 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter)
1704 seq_puts(m, "\n"); 1379 seq_puts(m, "\n");
1705} 1380}
1706 1381
1707static void
1708lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
1709{
1710 int hardirq, softirq;
1711 char *comm;
1712
1713 comm = trace_find_cmdline(entry->pid);
1714
1715 trace_seq_printf(s, "%8.8s-%-5d ", comm, entry->pid);
1716 trace_seq_printf(s, "%3d", cpu);
1717 trace_seq_printf(s, "%c%c",
1718 (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' :
1719 (entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ? 'X' : '.',
1720 ((entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.'));
1721
1722 hardirq = entry->flags & TRACE_FLAG_HARDIRQ;
1723 softirq = entry->flags & TRACE_FLAG_SOFTIRQ;
1724 if (hardirq && softirq) {
1725 trace_seq_putc(s, 'H');
1726 } else {
1727 if (hardirq) {
1728 trace_seq_putc(s, 'h');
1729 } else {
1730 if (softirq)
1731 trace_seq_putc(s, 's');
1732 else
1733 trace_seq_putc(s, '.');
1734 }
1735 }
1736
1737 if (entry->preempt_count)
1738 trace_seq_printf(s, "%x", entry->preempt_count);
1739 else
1740 trace_seq_puts(s, ".");
1741}
1742
1743unsigned long preempt_mark_thresh = 100;
1744
1745static void
1746lat_print_timestamp(struct trace_seq *s, u64 abs_usecs,
1747 unsigned long rel_usecs)
1748{
1749 trace_seq_printf(s, " %4lldus", abs_usecs);
1750 if (rel_usecs > preempt_mark_thresh)
1751 trace_seq_puts(s, "!: ");
1752 else if (rel_usecs > 1)
1753 trace_seq_puts(s, "+: ");
1754 else
1755 trace_seq_puts(s, " : ");
1756}
1757
1758static const char state_to_char[] = TASK_STATE_TO_CHAR_STR;
1759
1760static int task_state_char(unsigned long state)
1761{
1762 int bit = state ? __ffs(state) + 1 : 0;
1763
1764 return bit < sizeof(state_to_char) - 1 ? state_to_char[bit] : '?';
1765}
1766
1767/*
1768 * The message is supposed to contain an ending newline.
1769 * If the printing stops prematurely, try to add a newline of our own.
1770 */
1771void trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter)
1772{
1773 struct trace_entry *ent;
1774 struct trace_field_cont *cont;
1775 bool ok = true;
1776
1777 ent = peek_next_entry(iter, iter->cpu, NULL);
1778 if (!ent || ent->type != TRACE_CONT) {
1779 trace_seq_putc(s, '\n');
1780 return;
1781 }
1782
1783 do {
1784 cont = (struct trace_field_cont *)ent;
1785 if (ok)
1786 ok = (trace_seq_printf(s, "%s", cont->buf) > 0);
1787
1788 ftrace_disable_cpu();
1789
1790 if (iter->buffer_iter[iter->cpu])
1791 ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
1792 else
1793 ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL);
1794
1795 ftrace_enable_cpu();
1796
1797 ent = peek_next_entry(iter, iter->cpu, NULL);
1798 } while (ent && ent->type == TRACE_CONT);
1799
1800 if (!ok)
1801 trace_seq_putc(s, '\n');
1802}
1803
1804static void test_cpu_buff_start(struct trace_iterator *iter) 1382static void test_cpu_buff_start(struct trace_iterator *iter)
1805{ 1383{
1806 struct trace_seq *s = &iter->seq; 1384 struct trace_seq *s = &iter->seq;
@@ -1818,138 +1396,31 @@ static void test_cpu_buff_start(struct trace_iterator *iter)
1818 trace_seq_printf(s, "##### CPU %u buffer started ####\n", iter->cpu); 1396 trace_seq_printf(s, "##### CPU %u buffer started ####\n", iter->cpu);
1819} 1397}
1820 1398
1821static enum print_line_t 1399static enum print_line_t print_lat_fmt(struct trace_iterator *iter)
1822print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
1823{ 1400{
1824 struct trace_seq *s = &iter->seq; 1401 struct trace_seq *s = &iter->seq;
1825 unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); 1402 unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
1826 struct trace_entry *next_entry; 1403 struct trace_event *event;
1827 unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE);
1828 struct trace_entry *entry = iter->ent; 1404 struct trace_entry *entry = iter->ent;
1829 unsigned long abs_usecs;
1830 unsigned long rel_usecs;
1831 u64 next_ts;
1832 char *comm;
1833 int S, T;
1834 int i;
1835
1836 if (entry->type == TRACE_CONT)
1837 return TRACE_TYPE_HANDLED;
1838 1405
1839 test_cpu_buff_start(iter); 1406 test_cpu_buff_start(iter);
1840 1407
1841 next_entry = find_next_entry(iter, NULL, &next_ts); 1408 event = ftrace_find_event(entry->type);
1842 if (!next_entry)
1843 next_ts = iter->ts;
1844 rel_usecs = ns2usecs(next_ts - iter->ts);
1845 abs_usecs = ns2usecs(iter->ts - iter->tr->time_start);
1846
1847 if (verbose) {
1848 comm = trace_find_cmdline(entry->pid);
1849 trace_seq_printf(s, "%16s %5d %3d %d %08x %08x [%08lx]"
1850 " %ld.%03ldms (+%ld.%03ldms): ",
1851 comm,
1852 entry->pid, cpu, entry->flags,
1853 entry->preempt_count, trace_idx,
1854 ns2usecs(iter->ts),
1855 abs_usecs/1000,
1856 abs_usecs % 1000, rel_usecs/1000,
1857 rel_usecs % 1000);
1858 } else {
1859 lat_print_generic(s, entry, cpu);
1860 lat_print_timestamp(s, abs_usecs, rel_usecs);
1861 }
1862 switch (entry->type) {
1863 case TRACE_FN: {
1864 struct ftrace_entry *field;
1865
1866 trace_assign_type(field, entry);
1867
1868 seq_print_ip_sym(s, field->ip, sym_flags);
1869 trace_seq_puts(s, " (");
1870 seq_print_ip_sym(s, field->parent_ip, sym_flags);
1871 trace_seq_puts(s, ")\n");
1872 break;
1873 }
1874 case TRACE_CTX:
1875 case TRACE_WAKE: {
1876 struct ctx_switch_entry *field;
1877
1878 trace_assign_type(field, entry);
1879
1880 T = task_state_char(field->next_state);
1881 S = task_state_char(field->prev_state);
1882 comm = trace_find_cmdline(field->next_pid);
1883 trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n",
1884 field->prev_pid,
1885 field->prev_prio,
1886 S, entry->type == TRACE_CTX ? "==>" : " +",
1887 field->next_cpu,
1888 field->next_pid,
1889 field->next_prio,
1890 T, comm);
1891 break;
1892 }
1893 case TRACE_SPECIAL: {
1894 struct special_entry *field;
1895
1896 trace_assign_type(field, entry);
1897
1898 trace_seq_printf(s, "# %ld %ld %ld\n",
1899 field->arg1,
1900 field->arg2,
1901 field->arg3);
1902 break;
1903 }
1904 case TRACE_STACK: {
1905 struct stack_entry *field;
1906
1907 trace_assign_type(field, entry);
1908 1409
1909 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) { 1410 if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
1910 if (i) 1411 if (!trace_print_lat_context(iter))
1911 trace_seq_puts(s, " <= "); 1412 goto partial;
1912 seq_print_ip_sym(s, field->caller[i], sym_flags);
1913 }
1914 trace_seq_puts(s, "\n");
1915 break;
1916 }
1917 case TRACE_PRINT: {
1918 struct print_entry *field;
1919
1920 trace_assign_type(field, entry);
1921
1922 seq_print_ip_sym(s, field->ip, sym_flags);
1923 trace_seq_printf(s, ": %s", field->buf);
1924 if (entry->flags & TRACE_FLAG_CONT)
1925 trace_seq_print_cont(s, iter);
1926 break;
1927 } 1413 }
1928 case TRACE_BRANCH: {
1929 struct trace_branch *field;
1930 1414
1931 trace_assign_type(field, entry); 1415 if (event && event->latency_trace)
1416 return event->latency_trace(iter, sym_flags);
1932 1417
1933 trace_seq_printf(s, "[%s] %s:%s:%d\n", 1418 if (!trace_seq_printf(s, "Unknown type %d\n", entry->type))
1934 field->correct ? " ok " : " MISS ", 1419 goto partial;
1935 field->func,
1936 field->file,
1937 field->line);
1938 break;
1939 }
1940 case TRACE_USER_STACK: {
1941 struct userstack_entry *field;
1942
1943 trace_assign_type(field, entry);
1944 1420
1945 seq_print_userip_objs(field, s, sym_flags);
1946 trace_seq_putc(s, '\n');
1947 break;
1948 }
1949 default:
1950 trace_seq_printf(s, "Unknown type %d\n", entry->type);
1951 }
1952 return TRACE_TYPE_HANDLED; 1421 return TRACE_TYPE_HANDLED;
1422partial:
1423 return TRACE_TYPE_PARTIAL_LINE;
1953} 1424}
1954 1425
1955static enum print_line_t print_trace_fmt(struct trace_iterator *iter) 1426static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
@@ -1957,313 +1428,78 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
1957 struct trace_seq *s = &iter->seq; 1428 struct trace_seq *s = &iter->seq;
1958 unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); 1429 unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
1959 struct trace_entry *entry; 1430 struct trace_entry *entry;
1960 unsigned long usec_rem; 1431 struct trace_event *event;
1961 unsigned long long t;
1962 unsigned long secs;
1963 char *comm;
1964 int ret;
1965 int S, T;
1966 int i;
1967 1432
1968 entry = iter->ent; 1433 entry = iter->ent;
1969 1434
1970 if (entry->type == TRACE_CONT)
1971 return TRACE_TYPE_HANDLED;
1972
1973 test_cpu_buff_start(iter); 1435 test_cpu_buff_start(iter);
1974 1436
1975 comm = trace_find_cmdline(iter->ent->pid); 1437 event = ftrace_find_event(entry->type);
1976
1977 t = ns2usecs(iter->ts);
1978 usec_rem = do_div(t, 1000000ULL);
1979 secs = (unsigned long)t;
1980
1981 ret = trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid);
1982 if (!ret)
1983 return TRACE_TYPE_PARTIAL_LINE;
1984 ret = trace_seq_printf(s, "[%03d] ", iter->cpu);
1985 if (!ret)
1986 return TRACE_TYPE_PARTIAL_LINE;
1987 ret = trace_seq_printf(s, "%5lu.%06lu: ", secs, usec_rem);
1988 if (!ret)
1989 return TRACE_TYPE_PARTIAL_LINE;
1990 1438
1991 switch (entry->type) { 1439 if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
1992 case TRACE_FN: { 1440 if (!trace_print_context(iter))
1993 struct ftrace_entry *field; 1441 goto partial;
1994
1995 trace_assign_type(field, entry);
1996
1997 ret = seq_print_ip_sym(s, field->ip, sym_flags);
1998 if (!ret)
1999 return TRACE_TYPE_PARTIAL_LINE;
2000 if ((sym_flags & TRACE_ITER_PRINT_PARENT) &&
2001 field->parent_ip) {
2002 ret = trace_seq_printf(s, " <-");
2003 if (!ret)
2004 return TRACE_TYPE_PARTIAL_LINE;
2005 ret = seq_print_ip_sym(s,
2006 field->parent_ip,
2007 sym_flags);
2008 if (!ret)
2009 return TRACE_TYPE_PARTIAL_LINE;
2010 }
2011 ret = trace_seq_printf(s, "\n");
2012 if (!ret)
2013 return TRACE_TYPE_PARTIAL_LINE;
2014 break;
2015 }
2016 case TRACE_CTX:
2017 case TRACE_WAKE: {
2018 struct ctx_switch_entry *field;
2019
2020 trace_assign_type(field, entry);
2021
2022 T = task_state_char(field->next_state);
2023 S = task_state_char(field->prev_state);
2024 ret = trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c\n",
2025 field->prev_pid,
2026 field->prev_prio,
2027 S,
2028 entry->type == TRACE_CTX ? "==>" : " +",
2029 field->next_cpu,
2030 field->next_pid,
2031 field->next_prio,
2032 T);
2033 if (!ret)
2034 return TRACE_TYPE_PARTIAL_LINE;
2035 break;
2036 } 1442 }
2037 case TRACE_SPECIAL: {
2038 struct special_entry *field;
2039 1443
2040 trace_assign_type(field, entry); 1444 if (event && event->trace)
2041 1445 return event->trace(iter, sym_flags);
2042 ret = trace_seq_printf(s, "# %ld %ld %ld\n",
2043 field->arg1,
2044 field->arg2,
2045 field->arg3);
2046 if (!ret)
2047 return TRACE_TYPE_PARTIAL_LINE;
2048 break;
2049 }
2050 case TRACE_STACK: {
2051 struct stack_entry *field;
2052
2053 trace_assign_type(field, entry);
2054
2055 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
2056 if (i) {
2057 ret = trace_seq_puts(s, " <= ");
2058 if (!ret)
2059 return TRACE_TYPE_PARTIAL_LINE;
2060 }
2061 ret = seq_print_ip_sym(s, field->caller[i],
2062 sym_flags);
2063 if (!ret)
2064 return TRACE_TYPE_PARTIAL_LINE;
2065 }
2066 ret = trace_seq_puts(s, "\n");
2067 if (!ret)
2068 return TRACE_TYPE_PARTIAL_LINE;
2069 break;
2070 }
2071 case TRACE_PRINT: {
2072 struct print_entry *field;
2073
2074 trace_assign_type(field, entry);
2075
2076 seq_print_ip_sym(s, field->ip, sym_flags);
2077 trace_seq_printf(s, ": %s", field->buf);
2078 if (entry->flags & TRACE_FLAG_CONT)
2079 trace_seq_print_cont(s, iter);
2080 break;
2081 }
2082 case TRACE_GRAPH_RET: {
2083 return print_graph_function(iter);
2084 }
2085 case TRACE_GRAPH_ENT: {
2086 return print_graph_function(iter);
2087 }
2088 case TRACE_BRANCH: {
2089 struct trace_branch *field;
2090 1446
2091 trace_assign_type(field, entry); 1447 if (!trace_seq_printf(s, "Unknown type %d\n", entry->type))
1448 goto partial;
2092 1449
2093 trace_seq_printf(s, "[%s] %s:%s:%d\n",
2094 field->correct ? " ok " : " MISS ",
2095 field->func,
2096 field->file,
2097 field->line);
2098 break;
2099 }
2100 case TRACE_USER_STACK: {
2101 struct userstack_entry *field;
2102
2103 trace_assign_type(field, entry);
2104
2105 ret = seq_print_userip_objs(field, s, sym_flags);
2106 if (!ret)
2107 return TRACE_TYPE_PARTIAL_LINE;
2108 ret = trace_seq_putc(s, '\n');
2109 if (!ret)
2110 return TRACE_TYPE_PARTIAL_LINE;
2111 break;
2112 }
2113 }
2114 return TRACE_TYPE_HANDLED; 1450 return TRACE_TYPE_HANDLED;
1451partial:
1452 return TRACE_TYPE_PARTIAL_LINE;
2115} 1453}
2116 1454
2117static enum print_line_t print_raw_fmt(struct trace_iterator *iter) 1455static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
2118{ 1456{
2119 struct trace_seq *s = &iter->seq; 1457 struct trace_seq *s = &iter->seq;
2120 struct trace_entry *entry; 1458 struct trace_entry *entry;
2121 int ret; 1459 struct trace_event *event;
2122 int S, T;
2123 1460
2124 entry = iter->ent; 1461 entry = iter->ent;
2125 1462
2126 if (entry->type == TRACE_CONT) 1463 if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2127 return TRACE_TYPE_HANDLED; 1464 if (!trace_seq_printf(s, "%d %d %llu ",
2128 1465 entry->pid, iter->cpu, iter->ts))
2129 ret = trace_seq_printf(s, "%d %d %llu ", 1466 goto partial;
2130 entry->pid, iter->cpu, iter->ts);
2131 if (!ret)
2132 return TRACE_TYPE_PARTIAL_LINE;
2133
2134 switch (entry->type) {
2135 case TRACE_FN: {
2136 struct ftrace_entry *field;
2137
2138 trace_assign_type(field, entry);
2139
2140 ret = trace_seq_printf(s, "%x %x\n",
2141 field->ip,
2142 field->parent_ip);
2143 if (!ret)
2144 return TRACE_TYPE_PARTIAL_LINE;
2145 break;
2146 }
2147 case TRACE_CTX:
2148 case TRACE_WAKE: {
2149 struct ctx_switch_entry *field;
2150
2151 trace_assign_type(field, entry);
2152
2153 T = task_state_char(field->next_state);
2154 S = entry->type == TRACE_WAKE ? '+' :
2155 task_state_char(field->prev_state);
2156 ret = trace_seq_printf(s, "%d %d %c %d %d %d %c\n",
2157 field->prev_pid,
2158 field->prev_prio,
2159 S,
2160 field->next_cpu,
2161 field->next_pid,
2162 field->next_prio,
2163 T);
2164 if (!ret)
2165 return TRACE_TYPE_PARTIAL_LINE;
2166 break;
2167 }
2168 case TRACE_SPECIAL:
2169 case TRACE_USER_STACK:
2170 case TRACE_STACK: {
2171 struct special_entry *field;
2172
2173 trace_assign_type(field, entry);
2174
2175 ret = trace_seq_printf(s, "# %ld %ld %ld\n",
2176 field->arg1,
2177 field->arg2,
2178 field->arg3);
2179 if (!ret)
2180 return TRACE_TYPE_PARTIAL_LINE;
2181 break;
2182 } 1467 }
2183 case TRACE_PRINT: {
2184 struct print_entry *field;
2185 1468
2186 trace_assign_type(field, entry); 1469 event = ftrace_find_event(entry->type);
1470 if (event && event->raw)
1471 return event->raw(iter, 0);
1472
1473 if (!trace_seq_printf(s, "%d ?\n", entry->type))
1474 goto partial;
2187 1475
2188 trace_seq_printf(s, "# %lx %s", field->ip, field->buf);
2189 if (entry->flags & TRACE_FLAG_CONT)
2190 trace_seq_print_cont(s, iter);
2191 break;
2192 }
2193 }
2194 return TRACE_TYPE_HANDLED; 1476 return TRACE_TYPE_HANDLED;
1477partial:
1478 return TRACE_TYPE_PARTIAL_LINE;
2195} 1479}
2196 1480
2197#define SEQ_PUT_FIELD_RET(s, x) \
2198do { \
2199 if (!trace_seq_putmem(s, &(x), sizeof(x))) \
2200 return 0; \
2201} while (0)
2202
2203#define SEQ_PUT_HEX_FIELD_RET(s, x) \
2204do { \
2205 BUILD_BUG_ON(sizeof(x) > MAX_MEMHEX_BYTES); \
2206 if (!trace_seq_putmem_hex(s, &(x), sizeof(x))) \
2207 return 0; \
2208} while (0)
2209
2210static enum print_line_t print_hex_fmt(struct trace_iterator *iter) 1481static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
2211{ 1482{
2212 struct trace_seq *s = &iter->seq; 1483 struct trace_seq *s = &iter->seq;
2213 unsigned char newline = '\n'; 1484 unsigned char newline = '\n';
2214 struct trace_entry *entry; 1485 struct trace_entry *entry;
2215 int S, T; 1486 struct trace_event *event;
2216 1487
2217 entry = iter->ent; 1488 entry = iter->ent;
2218 1489
2219 if (entry->type == TRACE_CONT) 1490 if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2220 return TRACE_TYPE_HANDLED; 1491 SEQ_PUT_HEX_FIELD_RET(s, entry->pid);
2221 1492 SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
2222 SEQ_PUT_HEX_FIELD_RET(s, entry->pid); 1493 SEQ_PUT_HEX_FIELD_RET(s, iter->ts);
2223 SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
2224 SEQ_PUT_HEX_FIELD_RET(s, iter->ts);
2225
2226 switch (entry->type) {
2227 case TRACE_FN: {
2228 struct ftrace_entry *field;
2229
2230 trace_assign_type(field, entry);
2231
2232 SEQ_PUT_HEX_FIELD_RET(s, field->ip);
2233 SEQ_PUT_HEX_FIELD_RET(s, field->parent_ip);
2234 break;
2235 } 1494 }
2236 case TRACE_CTX:
2237 case TRACE_WAKE: {
2238 struct ctx_switch_entry *field;
2239
2240 trace_assign_type(field, entry);
2241
2242 T = task_state_char(field->next_state);
2243 S = entry->type == TRACE_WAKE ? '+' :
2244 task_state_char(field->prev_state);
2245 SEQ_PUT_HEX_FIELD_RET(s, field->prev_pid);
2246 SEQ_PUT_HEX_FIELD_RET(s, field->prev_prio);
2247 SEQ_PUT_HEX_FIELD_RET(s, S);
2248 SEQ_PUT_HEX_FIELD_RET(s, field->next_cpu);
2249 SEQ_PUT_HEX_FIELD_RET(s, field->next_pid);
2250 SEQ_PUT_HEX_FIELD_RET(s, field->next_prio);
2251 SEQ_PUT_HEX_FIELD_RET(s, T);
2252 break;
2253 }
2254 case TRACE_SPECIAL:
2255 case TRACE_USER_STACK:
2256 case TRACE_STACK: {
2257 struct special_entry *field;
2258
2259 trace_assign_type(field, entry);
2260 1495
2261 SEQ_PUT_HEX_FIELD_RET(s, field->arg1); 1496 event = ftrace_find_event(entry->type);
2262 SEQ_PUT_HEX_FIELD_RET(s, field->arg2); 1497 if (event && event->hex) {
2263 SEQ_PUT_HEX_FIELD_RET(s, field->arg3); 1498 enum print_line_t ret = event->hex(iter, 0);
2264 break; 1499 if (ret != TRACE_TYPE_HANDLED)
2265 } 1500 return ret;
2266 } 1501 }
1502
2267 SEQ_PUT_FIELD_RET(s, newline); 1503 SEQ_PUT_FIELD_RET(s, newline);
2268 1504
2269 return TRACE_TYPE_HANDLED; 1505 return TRACE_TYPE_HANDLED;
@@ -2278,13 +1514,10 @@ static enum print_line_t print_printk_msg_only(struct trace_iterator *iter)
2278 1514
2279 trace_assign_type(field, entry); 1515 trace_assign_type(field, entry);
2280 1516
2281 ret = trace_seq_printf(s, field->buf); 1517 ret = trace_seq_printf(s, "%s", field->buf);
2282 if (!ret) 1518 if (!ret)
2283 return TRACE_TYPE_PARTIAL_LINE; 1519 return TRACE_TYPE_PARTIAL_LINE;
2284 1520
2285 if (entry->flags & TRACE_FLAG_CONT)
2286 trace_seq_print_cont(s, iter);
2287
2288 return TRACE_TYPE_HANDLED; 1521 return TRACE_TYPE_HANDLED;
2289} 1522}
2290 1523
@@ -2292,53 +1525,21 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
2292{ 1525{
2293 struct trace_seq *s = &iter->seq; 1526 struct trace_seq *s = &iter->seq;
2294 struct trace_entry *entry; 1527 struct trace_entry *entry;
1528 struct trace_event *event;
2295 1529
2296 entry = iter->ent; 1530 entry = iter->ent;
2297 1531
2298 if (entry->type == TRACE_CONT) 1532 if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2299 return TRACE_TYPE_HANDLED; 1533 SEQ_PUT_FIELD_RET(s, entry->pid);
2300 1534 SEQ_PUT_FIELD_RET(s, entry->cpu);
2301 SEQ_PUT_FIELD_RET(s, entry->pid); 1535 SEQ_PUT_FIELD_RET(s, iter->ts);
2302 SEQ_PUT_FIELD_RET(s, entry->cpu);
2303 SEQ_PUT_FIELD_RET(s, iter->ts);
2304
2305 switch (entry->type) {
2306 case TRACE_FN: {
2307 struct ftrace_entry *field;
2308
2309 trace_assign_type(field, entry);
2310
2311 SEQ_PUT_FIELD_RET(s, field->ip);
2312 SEQ_PUT_FIELD_RET(s, field->parent_ip);
2313 break;
2314 }
2315 case TRACE_CTX: {
2316 struct ctx_switch_entry *field;
2317
2318 trace_assign_type(field, entry);
2319
2320 SEQ_PUT_FIELD_RET(s, field->prev_pid);
2321 SEQ_PUT_FIELD_RET(s, field->prev_prio);
2322 SEQ_PUT_FIELD_RET(s, field->prev_state);
2323 SEQ_PUT_FIELD_RET(s, field->next_pid);
2324 SEQ_PUT_FIELD_RET(s, field->next_prio);
2325 SEQ_PUT_FIELD_RET(s, field->next_state);
2326 break;
2327 } 1536 }
2328 case TRACE_SPECIAL:
2329 case TRACE_USER_STACK:
2330 case TRACE_STACK: {
2331 struct special_entry *field;
2332 1537
2333 trace_assign_type(field, entry); 1538 event = ftrace_find_event(entry->type);
1539 if (event && event->binary)
1540 return event->binary(iter, 0);
2334 1541
2335 SEQ_PUT_FIELD_RET(s, field->arg1); 1542 return TRACE_TYPE_HANDLED;
2336 SEQ_PUT_FIELD_RET(s, field->arg2);
2337 SEQ_PUT_FIELD_RET(s, field->arg3);
2338 break;
2339 }
2340 }
2341 return 1;
2342} 1543}
2343 1544
2344static int trace_empty(struct trace_iterator *iter) 1545static int trace_empty(struct trace_iterator *iter)
@@ -2383,7 +1584,7 @@ static enum print_line_t print_trace_line(struct trace_iterator *iter)
2383 return print_raw_fmt(iter); 1584 return print_raw_fmt(iter);
2384 1585
2385 if (iter->iter_flags & TRACE_FILE_LAT_FMT) 1586 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
2386 return print_lat_fmt(iter, iter->idx, iter->cpu); 1587 return print_lat_fmt(iter);
2387 1588
2388 return print_trace_fmt(iter); 1589 return print_trace_fmt(iter);
2389} 1590}
@@ -2985,7 +2186,7 @@ tracing_set_trace_read(struct file *filp, char __user *ubuf,
2985 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 2186 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2986} 2187}
2987 2188
2988static int tracing_set_tracer(char *buf) 2189static int tracing_set_tracer(const char *buf)
2989{ 2190{
2990 struct trace_array *tr = &global_trace; 2191 struct trace_array *tr = &global_trace;
2991 struct tracer *t; 2192 struct tracer *t;
@@ -3691,6 +2892,15 @@ int __ftrace_printk(unsigned long ip, const char *fmt, ...)
3691} 2892}
3692EXPORT_SYMBOL_GPL(__ftrace_printk); 2893EXPORT_SYMBOL_GPL(__ftrace_printk);
3693 2894
2895int __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap)
2896{
2897 if (!(trace_flags & TRACE_ITER_PRINTK))
2898 return 0;
2899
2900 return trace_vprintk(ip, task_curr_ret_stack(current), fmt, ap);
2901}
2902EXPORT_SYMBOL_GPL(__ftrace_vprintk);
2903
3694static int trace_panic_handler(struct notifier_block *this, 2904static int trace_panic_handler(struct notifier_block *this,
3695 unsigned long event, void *unused) 2905 unsigned long event, void *unused)
3696{ 2906{
@@ -3871,14 +3081,10 @@ __init static int tracer_alloc_buffers(void)
3871 trace_init_cmdlines(); 3081 trace_init_cmdlines();
3872 3082
3873 register_tracer(&nop_trace); 3083 register_tracer(&nop_trace);
3084 current_trace = &nop_trace;
3874#ifdef CONFIG_BOOT_TRACER 3085#ifdef CONFIG_BOOT_TRACER
3875 register_tracer(&boot_tracer); 3086 register_tracer(&boot_tracer);
3876 current_trace = &boot_tracer;
3877 current_trace->init(&global_trace);
3878#else
3879 current_trace = &nop_trace;
3880#endif 3087#endif
3881
3882 /* All seems OK, enable tracing */ 3088 /* All seems OK, enable tracing */
3883 tracing_disabled = 0; 3089 tracing_disabled = 0;
3884 3090
@@ -3895,5 +3101,26 @@ out_free_buffer_mask:
3895out: 3101out:
3896 return ret; 3102 return ret;
3897} 3103}
3104
3105__init static int clear_boot_tracer(void)
3106{
3107 /*
3108 * The default tracer at boot buffer is an init section.
3109 * This function is called in lateinit. If we did not
3110 * find the boot tracer, then clear it out, to prevent
3111 * later registration from accessing the buffer that is
3112 * about to be freed.
3113 */
3114 if (!default_bootup_tracer)
3115 return 0;
3116
3117 printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
3118 default_bootup_tracer);
3119 default_bootup_tracer = NULL;
3120
3121 return 0;
3122}
3123
3898early_initcall(tracer_alloc_buffers); 3124early_initcall(tracer_alloc_buffers);
3899fs_initcall(tracer_init_debugfs); 3125fs_initcall(tracer_init_debugfs);
3126late_initcall(clear_boot_tracer);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 4d3d381bfd95..f0c7a0f08cac 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -9,6 +9,7 @@
9#include <linux/mmiotrace.h> 9#include <linux/mmiotrace.h>
10#include <linux/ftrace.h> 10#include <linux/ftrace.h>
11#include <trace/boot.h> 11#include <trace/boot.h>
12#include <trace/kmemtrace.h>
12 13
13enum trace_type { 14enum trace_type {
14 __TRACE_FIRST_TYPE = 0, 15 __TRACE_FIRST_TYPE = 0,
@@ -16,7 +17,6 @@ enum trace_type {
16 TRACE_FN, 17 TRACE_FN,
17 TRACE_CTX, 18 TRACE_CTX,
18 TRACE_WAKE, 19 TRACE_WAKE,
19 TRACE_CONT,
20 TRACE_STACK, 20 TRACE_STACK,
21 TRACE_PRINT, 21 TRACE_PRINT,
22 TRACE_SPECIAL, 22 TRACE_SPECIAL,
@@ -29,9 +29,12 @@ enum trace_type {
29 TRACE_GRAPH_ENT, 29 TRACE_GRAPH_ENT,
30 TRACE_USER_STACK, 30 TRACE_USER_STACK,
31 TRACE_HW_BRANCHES, 31 TRACE_HW_BRANCHES,
32 TRACE_KMEM_ALLOC,
33 TRACE_KMEM_FREE,
32 TRACE_POWER, 34 TRACE_POWER,
35 TRACE_BLK,
33 36
34 __TRACE_LAST_TYPE 37 __TRACE_LAST_TYPE,
35}; 38};
36 39
37/* 40/*
@@ -170,6 +173,24 @@ struct trace_power {
170 struct power_trace state_data; 173 struct power_trace state_data;
171}; 174};
172 175
176struct kmemtrace_alloc_entry {
177 struct trace_entry ent;
178 enum kmemtrace_type_id type_id;
179 unsigned long call_site;
180 const void *ptr;
181 size_t bytes_req;
182 size_t bytes_alloc;
183 gfp_t gfp_flags;
184 int node;
185};
186
187struct kmemtrace_free_entry {
188 struct trace_entry ent;
189 enum kmemtrace_type_id type_id;
190 unsigned long call_site;
191 const void *ptr;
192};
193
173/* 194/*
174 * trace_flag_type is an enumeration that holds different 195 * trace_flag_type is an enumeration that holds different
175 * states when a trace occurs. These are: 196 * states when a trace occurs. These are:
@@ -178,7 +199,6 @@ struct trace_power {
178 * NEED_RESCED - reschedule is requested 199 * NEED_RESCED - reschedule is requested
179 * HARDIRQ - inside an interrupt handler 200 * HARDIRQ - inside an interrupt handler
180 * SOFTIRQ - inside a softirq handler 201 * SOFTIRQ - inside a softirq handler
181 * CONT - multiple entries hold the trace item
182 */ 202 */
183enum trace_flag_type { 203enum trace_flag_type {
184 TRACE_FLAG_IRQS_OFF = 0x01, 204 TRACE_FLAG_IRQS_OFF = 0x01,
@@ -186,7 +206,6 @@ enum trace_flag_type {
186 TRACE_FLAG_NEED_RESCHED = 0x04, 206 TRACE_FLAG_NEED_RESCHED = 0x04,
187 TRACE_FLAG_HARDIRQ = 0x08, 207 TRACE_FLAG_HARDIRQ = 0x08,
188 TRACE_FLAG_SOFTIRQ = 0x10, 208 TRACE_FLAG_SOFTIRQ = 0x10,
189 TRACE_FLAG_CONT = 0x20,
190}; 209};
191 210
192#define TRACE_BUF_SIZE 1024 211#define TRACE_BUF_SIZE 1024
@@ -262,7 +281,6 @@ extern void __ftrace_bad_type(void);
262 do { \ 281 do { \
263 IF_ASSIGN(var, ent, struct ftrace_entry, TRACE_FN); \ 282 IF_ASSIGN(var, ent, struct ftrace_entry, TRACE_FN); \
264 IF_ASSIGN(var, ent, struct ctx_switch_entry, 0); \ 283 IF_ASSIGN(var, ent, struct ctx_switch_entry, 0); \
265 IF_ASSIGN(var, ent, struct trace_field_cont, TRACE_CONT); \
266 IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK); \ 284 IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK); \
267 IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\ 285 IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\
268 IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \ 286 IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \
@@ -280,6 +298,10 @@ extern void __ftrace_bad_type(void);
280 TRACE_GRAPH_RET); \ 298 TRACE_GRAPH_RET); \
281 IF_ASSIGN(var, ent, struct hw_branch_entry, TRACE_HW_BRANCHES);\ 299 IF_ASSIGN(var, ent, struct hw_branch_entry, TRACE_HW_BRANCHES);\
282 IF_ASSIGN(var, ent, struct trace_power, TRACE_POWER); \ 300 IF_ASSIGN(var, ent, struct trace_power, TRACE_POWER); \
301 IF_ASSIGN(var, ent, struct kmemtrace_alloc_entry, \
302 TRACE_KMEM_ALLOC); \
303 IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \
304 TRACE_KMEM_FREE); \
283 __ftrace_bad_type(); \ 305 __ftrace_bad_type(); \
284 } while (0) 306 } while (0)
285 307
@@ -313,6 +335,7 @@ struct tracer_flags {
313/* Makes more easy to define a tracer opt */ 335/* Makes more easy to define a tracer opt */
314#define TRACER_OPT(s, b) .name = #s, .bit = b 336#define TRACER_OPT(s, b) .name = #s, .bit = b
315 337
338
316/* 339/*
317 * A specific tracer, represented by methods that operate on a trace array: 340 * A specific tracer, represented by methods that operate on a trace array:
318 */ 341 */
@@ -340,6 +363,7 @@ struct tracer {
340 struct tracer *next; 363 struct tracer *next;
341 int print_max; 364 int print_max;
342 struct tracer_flags *flags; 365 struct tracer_flags *flags;
366 struct tracer_stat *stats;
343}; 367};
344 368
345struct trace_seq { 369struct trace_seq {
@@ -381,6 +405,10 @@ void init_tracer_sysprof_debugfs(struct dentry *d_tracer);
381 405
382struct trace_entry *tracing_get_trace_entry(struct trace_array *tr, 406struct trace_entry *tracing_get_trace_entry(struct trace_array *tr,
383 struct trace_array_cpu *data); 407 struct trace_array_cpu *data);
408
409struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
410 int *ent_cpu, u64 *ent_ts);
411
384void tracing_generic_entry_update(struct trace_entry *entry, 412void tracing_generic_entry_update(struct trace_entry *entry,
385 unsigned long flags, 413 unsigned long flags,
386 int pc); 414 int pc);
@@ -415,7 +443,6 @@ void trace_function(struct trace_array *tr,
415 443
416void trace_graph_return(struct ftrace_graph_ret *trace); 444void trace_graph_return(struct ftrace_graph_ret *trace);
417int trace_graph_entry(struct ftrace_graph_ent *trace); 445int trace_graph_entry(struct ftrace_graph_ent *trace);
418void trace_hw_branch(struct trace_array *tr, u64 from, u64 to);
419 446
420void tracing_start_cmdline_record(void); 447void tracing_start_cmdline_record(void);
421void tracing_stop_cmdline_record(void); 448void tracing_stop_cmdline_record(void);
@@ -434,15 +461,12 @@ void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu);
434void update_max_tr_single(struct trace_array *tr, 461void update_max_tr_single(struct trace_array *tr,
435 struct task_struct *tsk, int cpu); 462 struct task_struct *tsk, int cpu);
436 463
437extern cycle_t ftrace_now(int cpu); 464void __trace_stack(struct trace_array *tr,
465 struct trace_array_cpu *data,
466 unsigned long flags,
467 int skip, int pc);
438 468
439#ifdef CONFIG_FUNCTION_TRACER 469extern cycle_t ftrace_now(int cpu);
440void tracing_start_function_trace(void);
441void tracing_stop_function_trace(void);
442#else
443# define tracing_start_function_trace() do { } while (0)
444# define tracing_stop_function_trace() do { } while (0)
445#endif
446 470
447#ifdef CONFIG_CONTEXT_SWITCH_TRACER 471#ifdef CONFIG_CONTEXT_SWITCH_TRACER
448typedef void 472typedef void
@@ -456,10 +480,10 @@ struct tracer_switch_ops {
456 void *private; 480 void *private;
457 struct tracer_switch_ops *next; 481 struct tracer_switch_ops *next;
458}; 482};
459
460char *trace_find_cmdline(int pid);
461#endif /* CONFIG_CONTEXT_SWITCH_TRACER */ 483#endif /* CONFIG_CONTEXT_SWITCH_TRACER */
462 484
485extern char *trace_find_cmdline(int pid);
486
463#ifdef CONFIG_DYNAMIC_FTRACE 487#ifdef CONFIG_DYNAMIC_FTRACE
464extern unsigned long ftrace_update_tot_cnt; 488extern unsigned long ftrace_update_tot_cnt;
465#define DYN_FTRACE_TEST_NAME trace_selftest_dynamic_test_func 489#define DYN_FTRACE_TEST_NAME trace_selftest_dynamic_test_func
@@ -488,15 +512,6 @@ extern int trace_selftest_startup_branch(struct tracer *trace,
488#endif /* CONFIG_FTRACE_STARTUP_TEST */ 512#endif /* CONFIG_FTRACE_STARTUP_TEST */
489 513
490extern void *head_page(struct trace_array_cpu *data); 514extern void *head_page(struct trace_array_cpu *data);
491extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...);
492extern void trace_seq_print_cont(struct trace_seq *s,
493 struct trace_iterator *iter);
494
495extern int
496seq_print_ip_sym(struct trace_seq *s, unsigned long ip,
497 unsigned long sym_flags);
498extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
499 size_t cnt);
500extern long ns2usecs(cycle_t nsec); 515extern long ns2usecs(cycle_t nsec);
501extern int 516extern int
502trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args); 517trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args);
@@ -580,7 +595,8 @@ enum trace_iterator_flags {
580 TRACE_ITER_ANNOTATE = 0x2000, 595 TRACE_ITER_ANNOTATE = 0x2000,
581 TRACE_ITER_USERSTACKTRACE = 0x4000, 596 TRACE_ITER_USERSTACKTRACE = 0x4000,
582 TRACE_ITER_SYM_USEROBJ = 0x8000, 597 TRACE_ITER_SYM_USEROBJ = 0x8000,
583 TRACE_ITER_PRINTK_MSGONLY = 0x10000 598 TRACE_ITER_PRINTK_MSGONLY = 0x10000,
599 TRACE_ITER_CONTEXT_INFO = 0x20000 /* Print pid/cpu/time */
584}; 600};
585 601
586/* 602/*
diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
index 366c8c333e13..1f07895977a0 100644
--- a/kernel/trace/trace_boot.c
+++ b/kernel/trace/trace_boot.c
@@ -11,6 +11,7 @@
11#include <linux/kallsyms.h> 11#include <linux/kallsyms.h>
12 12
13#include "trace.h" 13#include "trace.h"
14#include "trace_output.h"
14 15
15static struct trace_array *boot_trace; 16static struct trace_array *boot_trace;
16static bool pre_initcalls_finished; 17static bool pre_initcalls_finished;
@@ -27,13 +28,13 @@ void start_boot_trace(void)
27 28
28void enable_boot_trace(void) 29void enable_boot_trace(void)
29{ 30{
30 if (pre_initcalls_finished) 31 if (boot_trace && pre_initcalls_finished)
31 tracing_start_sched_switch_record(); 32 tracing_start_sched_switch_record();
32} 33}
33 34
34void disable_boot_trace(void) 35void disable_boot_trace(void)
35{ 36{
36 if (pre_initcalls_finished) 37 if (boot_trace && pre_initcalls_finished)
37 tracing_stop_sched_switch_record(); 38 tracing_stop_sched_switch_record();
38} 39}
39 40
@@ -42,6 +43,9 @@ static int boot_trace_init(struct trace_array *tr)
42 int cpu; 43 int cpu;
43 boot_trace = tr; 44 boot_trace = tr;
44 45
46 if (!tr)
47 return 0;
48
45 for_each_cpu(cpu, cpu_possible_mask) 49 for_each_cpu(cpu, cpu_possible_mask)
46 tracing_reset(tr, cpu); 50 tracing_reset(tr, cpu);
47 51
@@ -131,7 +135,7 @@ void trace_boot_call(struct boot_trace_call *bt, initcall_t fn)
131 unsigned long irq_flags; 135 unsigned long irq_flags;
132 struct trace_array *tr = boot_trace; 136 struct trace_array *tr = boot_trace;
133 137
134 if (!pre_initcalls_finished) 138 if (!tr || !pre_initcalls_finished)
135 return; 139 return;
136 140
137 /* Get its name now since this function could 141 /* Get its name now since this function could
@@ -163,7 +167,7 @@ void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn)
163 unsigned long irq_flags; 167 unsigned long irq_flags;
164 struct trace_array *tr = boot_trace; 168 struct trace_array *tr = boot_trace;
165 169
166 if (!pre_initcalls_finished) 170 if (!tr || !pre_initcalls_finished)
167 return; 171 return;
168 172
169 sprint_symbol(bt->func, (unsigned long)fn); 173 sprint_symbol(bt->func, (unsigned long)fn);
diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c
index 6c00feb3bac7..7ac72a44b2d3 100644
--- a/kernel/trace/trace_branch.c
+++ b/kernel/trace/trace_branch.c
@@ -14,12 +14,17 @@
14#include <linux/hash.h> 14#include <linux/hash.h>
15#include <linux/fs.h> 15#include <linux/fs.h>
16#include <asm/local.h> 16#include <asm/local.h>
17
17#include "trace.h" 18#include "trace.h"
19#include "trace_stat.h"
20#include "trace_output.h"
18 21
19#ifdef CONFIG_BRANCH_TRACER 22#ifdef CONFIG_BRANCH_TRACER
20 23
24static struct tracer branch_trace;
21static int branch_tracing_enabled __read_mostly; 25static int branch_tracing_enabled __read_mostly;
22static DEFINE_MUTEX(branch_tracing_mutex); 26static DEFINE_MUTEX(branch_tracing_mutex);
27
23static struct trace_array *branch_tracer; 28static struct trace_array *branch_tracer;
24 29
25static void 30static void
@@ -128,11 +133,7 @@ static void stop_branch_trace(struct trace_array *tr)
128 133
129static int branch_trace_init(struct trace_array *tr) 134static int branch_trace_init(struct trace_array *tr)
130{ 135{
131 int cpu; 136 tracing_reset_online_cpus(tr);
132
133 for_each_online_cpu(cpu)
134 tracing_reset(tr, cpu);
135
136 start_branch_trace(tr); 137 start_branch_trace(tr);
137 return 0; 138 return 0;
138} 139}
@@ -142,22 +143,74 @@ static void branch_trace_reset(struct trace_array *tr)
142 stop_branch_trace(tr); 143 stop_branch_trace(tr);
143} 144}
144 145
145struct tracer branch_trace __read_mostly = 146static int
147trace_print_print(struct trace_seq *s, struct trace_entry *entry, int flags)
148{
149 struct print_entry *field;
150
151 trace_assign_type(field, entry);
152
153 if (seq_print_ip_sym(s, field->ip, flags))
154 goto partial;
155
156 if (trace_seq_printf(s, ": %s", field->buf))
157 goto partial;
158
159 partial:
160 return TRACE_TYPE_PARTIAL_LINE;
161}
162
163static enum print_line_t trace_branch_print(struct trace_iterator *iter,
164 int flags)
165{
166 struct trace_branch *field;
167
168 trace_assign_type(field, iter->ent);
169
170 if (trace_seq_printf(&iter->seq, "[%s] %s:%s:%d\n",
171 field->correct ? " ok " : " MISS ",
172 field->func,
173 field->file,
174 field->line))
175 return TRACE_TYPE_PARTIAL_LINE;
176
177 return TRACE_TYPE_HANDLED;
178}
179
180
181static struct trace_event trace_branch_event = {
182 .type = TRACE_BRANCH,
183 .trace = trace_branch_print,
184 .latency_trace = trace_branch_print,
185 .raw = trace_nop_print,
186 .hex = trace_nop_print,
187 .binary = trace_nop_print,
188};
189
190static struct tracer branch_trace __read_mostly =
146{ 191{
147 .name = "branch", 192 .name = "branch",
148 .init = branch_trace_init, 193 .init = branch_trace_init,
149 .reset = branch_trace_reset, 194 .reset = branch_trace_reset,
150#ifdef CONFIG_FTRACE_SELFTEST 195#ifdef CONFIG_FTRACE_SELFTEST
151 .selftest = trace_selftest_startup_branch, 196 .selftest = trace_selftest_startup_branch,
152#endif 197#endif /* CONFIG_FTRACE_SELFTEST */
153}; 198};
154 199
155__init static int init_branch_trace(void) 200__init static int init_branch_tracer(void)
156{ 201{
202 int ret;
203
204 ret = register_ftrace_event(&trace_branch_event);
205 if (!ret) {
206 printk(KERN_WARNING "Warning: could not register "
207 "branch events\n");
208 return 1;
209 }
157 return register_tracer(&branch_trace); 210 return register_tracer(&branch_trace);
158} 211}
212device_initcall(init_branch_tracer);
159 213
160device_initcall(init_branch_trace);
161#else 214#else
162static inline 215static inline
163void trace_likely_condition(struct ftrace_branch_data *f, int val, int expect) 216void trace_likely_condition(struct ftrace_branch_data *f, int val, int expect)
@@ -183,66 +236,39 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect)
183} 236}
184EXPORT_SYMBOL(ftrace_likely_update); 237EXPORT_SYMBOL(ftrace_likely_update);
185 238
186struct ftrace_pointer { 239extern unsigned long __start_annotated_branch_profile[];
187 void *start; 240extern unsigned long __stop_annotated_branch_profile[];
188 void *stop;
189 int hit;
190};
191 241
192static void * 242static int annotated_branch_stat_headers(struct seq_file *m)
193t_next(struct seq_file *m, void *v, loff_t *pos)
194{ 243{
195 const struct ftrace_pointer *f = m->private; 244 seq_printf(m, " correct incorrect %% ");
196 struct ftrace_branch_data *p = v; 245 seq_printf(m, " Function "
197 246 " File Line\n"
198 (*pos)++; 247 " ------- --------- - "
199 248 " -------- "
200 if (v == (void *)1) 249 " ---- ----\n");
201 return f->start; 250 return 0;
202
203 ++p;
204
205 if ((void *)p >= (void *)f->stop)
206 return NULL;
207
208 return p;
209} 251}
210 252
211static void *t_start(struct seq_file *m, loff_t *pos) 253static inline long get_incorrect_percent(struct ftrace_branch_data *p)
212{ 254{
213 void *t = (void *)1; 255 long percent;
214 loff_t l = 0;
215
216 for (; t && l < *pos; t = t_next(m, t, &l))
217 ;
218 256
219 return t; 257 if (p->correct) {
220} 258 percent = p->incorrect * 100;
259 percent /= p->correct + p->incorrect;
260 } else
261 percent = p->incorrect ? 100 : -1;
221 262
222static void t_stop(struct seq_file *m, void *p) 263 return percent;
223{
224} 264}
225 265
226static int t_show(struct seq_file *m, void *v) 266static int branch_stat_show(struct seq_file *m, void *v)
227{ 267{
228 const struct ftrace_pointer *fp = m->private;
229 struct ftrace_branch_data *p = v; 268 struct ftrace_branch_data *p = v;
230 const char *f; 269 const char *f;
231 long percent; 270 long percent;
232 271
233 if (v == (void *)1) {
234 if (fp->hit)
235 seq_printf(m, " miss hit %% ");
236 else
237 seq_printf(m, " correct incorrect %% ");
238 seq_printf(m, " Function "
239 " File Line\n"
240 " ------- --------- - "
241 " -------- "
242 " ---- ----\n");
243 return 0;
244 }
245
246 /* Only print the file, not the path */ 272 /* Only print the file, not the path */
247 f = p->file + strlen(p->file); 273 f = p->file + strlen(p->file);
248 while (f >= p->file && *f != '/') 274 while (f >= p->file && *f != '/')
@@ -252,11 +278,7 @@ static int t_show(struct seq_file *m, void *v)
252 /* 278 /*
253 * The miss is overlayed on correct, and hit on incorrect. 279 * The miss is overlayed on correct, and hit on incorrect.
254 */ 280 */
255 if (p->correct) { 281 percent = get_incorrect_percent(p);
256 percent = p->incorrect * 100;
257 percent /= p->correct + p->incorrect;
258 } else
259 percent = p->incorrect ? 100 : -1;
260 282
261 seq_printf(m, "%8lu %8lu ", p->correct, p->incorrect); 283 seq_printf(m, "%8lu %8lu ", p->correct, p->incorrect);
262 if (percent < 0) 284 if (percent < 0)
@@ -267,76 +289,118 @@ static int t_show(struct seq_file *m, void *v)
267 return 0; 289 return 0;
268} 290}
269 291
270static struct seq_operations tracing_likely_seq_ops = { 292static void *annotated_branch_stat_start(void)
271 .start = t_start, 293{
272 .next = t_next, 294 return __start_annotated_branch_profile;
273 .stop = t_stop, 295}
274 .show = t_show, 296
297static void *
298annotated_branch_stat_next(void *v, int idx)
299{
300 struct ftrace_branch_data *p = v;
301
302 ++p;
303
304 if ((void *)p >= (void *)__stop_annotated_branch_profile)
305 return NULL;
306
307 return p;
308}
309
310static int annotated_branch_stat_cmp(void *p1, void *p2)
311{
312 struct ftrace_branch_data *a = p1;
313 struct ftrace_branch_data *b = p2;
314
315 long percent_a, percent_b;
316
317 percent_a = get_incorrect_percent(a);
318 percent_b = get_incorrect_percent(b);
319
320 if (percent_a < percent_b)
321 return -1;
322 if (percent_a > percent_b)
323 return 1;
324 else
325 return 0;
326}
327
328static struct tracer_stat annotated_branch_stats = {
329 .name = "branch_annotated",
330 .stat_start = annotated_branch_stat_start,
331 .stat_next = annotated_branch_stat_next,
332 .stat_cmp = annotated_branch_stat_cmp,
333 .stat_headers = annotated_branch_stat_headers,
334 .stat_show = branch_stat_show
275}; 335};
276 336
277static int tracing_branch_open(struct inode *inode, struct file *file) 337__init static int init_annotated_branch_stats(void)
278{ 338{
279 int ret; 339 int ret;
280 340
281 ret = seq_open(file, &tracing_likely_seq_ops); 341 ret = register_stat_tracer(&annotated_branch_stats);
282 if (!ret) { 342 if (!ret) {
283 struct seq_file *m = file->private_data; 343 printk(KERN_WARNING "Warning: could not register "
284 m->private = (void *)inode->i_private; 344 "annotated branches stats\n");
345 return 1;
285 } 346 }
286 347 return 0;
287 return ret;
288} 348}
289 349fs_initcall(init_annotated_branch_stats);
290static const struct file_operations tracing_branch_fops = {
291 .open = tracing_branch_open,
292 .read = seq_read,
293 .llseek = seq_lseek,
294};
295 350
296#ifdef CONFIG_PROFILE_ALL_BRANCHES 351#ifdef CONFIG_PROFILE_ALL_BRANCHES
352
297extern unsigned long __start_branch_profile[]; 353extern unsigned long __start_branch_profile[];
298extern unsigned long __stop_branch_profile[]; 354extern unsigned long __stop_branch_profile[];
299 355
300static const struct ftrace_pointer ftrace_branch_pos = { 356static int all_branch_stat_headers(struct seq_file *m)
301 .start = __start_branch_profile, 357{
302 .stop = __stop_branch_profile, 358 seq_printf(m, " miss hit %% ");
303 .hit = 1, 359 seq_printf(m, " Function "
304}; 360 " File Line\n"
361 " ------- --------- - "
362 " -------- "
363 " ---- ----\n");
364 return 0;
365}
305 366
306#endif /* CONFIG_PROFILE_ALL_BRANCHES */ 367static void *all_branch_stat_start(void)
368{
369 return __start_branch_profile;
370}
307 371
308extern unsigned long __start_annotated_branch_profile[]; 372static void *
309extern unsigned long __stop_annotated_branch_profile[]; 373all_branch_stat_next(void *v, int idx)
374{
375 struct ftrace_branch_data *p = v;
310 376
311static const struct ftrace_pointer ftrace_annotated_branch_pos = { 377 ++p;
312 .start = __start_annotated_branch_profile,
313 .stop = __stop_annotated_branch_profile,
314};
315 378
316static __init int ftrace_branch_init(void) 379 if ((void *)p >= (void *)__stop_branch_profile)
317{ 380 return NULL;
318 struct dentry *d_tracer;
319 struct dentry *entry;
320 381
321 d_tracer = tracing_init_dentry(); 382 return p;
383}
322 384
323 entry = debugfs_create_file("profile_annotated_branch", 0444, d_tracer, 385static struct tracer_stat all_branch_stats = {
324 (void *)&ftrace_annotated_branch_pos, 386 .name = "branch_all",
325 &tracing_branch_fops); 387 .stat_start = all_branch_stat_start,
326 if (!entry) 388 .stat_next = all_branch_stat_next,
327 pr_warning("Could not create debugfs " 389 .stat_headers = all_branch_stat_headers,
328 "'profile_annotatet_branch' entry\n"); 390 .stat_show = branch_stat_show
391};
329 392
330#ifdef CONFIG_PROFILE_ALL_BRANCHES 393__init static int all_annotated_branch_stats(void)
331 entry = debugfs_create_file("profile_branch", 0444, d_tracer, 394{
332 (void *)&ftrace_branch_pos, 395 int ret;
333 &tracing_branch_fops);
334 if (!entry)
335 pr_warning("Could not create debugfs"
336 " 'profile_branch' entry\n");
337#endif
338 396
397 ret = register_stat_tracer(&all_branch_stats);
398 if (!ret) {
399 printk(KERN_WARNING "Warning: could not register "
400 "all branches stats\n");
401 return 1;
402 }
339 return 0; 403 return 0;
340} 404}
341 405fs_initcall(all_annotated_branch_stats);
342device_initcall(ftrace_branch_init); 406#endif /* CONFIG_PROFILE_ALL_BRANCHES */
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 9236d7e25a16..b3a320f8aba7 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -16,8 +16,17 @@
16 16
17#include "trace.h" 17#include "trace.h"
18 18
19/* function tracing enabled */
20static int ftrace_function_enabled;
21
22static struct trace_array *func_trace;
23
24static void tracing_start_function_trace(void);
25static void tracing_stop_function_trace(void);
26
19static void start_function_trace(struct trace_array *tr) 27static void start_function_trace(struct trace_array *tr)
20{ 28{
29 func_trace = tr;
21 tr->cpu = get_cpu(); 30 tr->cpu = get_cpu();
22 tracing_reset_online_cpus(tr); 31 tracing_reset_online_cpus(tr);
23 put_cpu(); 32 put_cpu();
@@ -48,14 +57,188 @@ static void function_trace_start(struct trace_array *tr)
48 tracing_reset_online_cpus(tr); 57 tracing_reset_online_cpus(tr);
49} 58}
50 59
60static void
61function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip)
62{
63 struct trace_array *tr = func_trace;
64 struct trace_array_cpu *data;
65 unsigned long flags;
66 long disabled;
67 int cpu, resched;
68 int pc;
69
70 if (unlikely(!ftrace_function_enabled))
71 return;
72
73 pc = preempt_count();
74 resched = ftrace_preempt_disable();
75 local_save_flags(flags);
76 cpu = raw_smp_processor_id();
77 data = tr->data[cpu];
78 disabled = atomic_inc_return(&data->disabled);
79
80 if (likely(disabled == 1))
81 trace_function(tr, data, ip, parent_ip, flags, pc);
82
83 atomic_dec(&data->disabled);
84 ftrace_preempt_enable(resched);
85}
86
87static void
88function_trace_call(unsigned long ip, unsigned long parent_ip)
89{
90 struct trace_array *tr = func_trace;
91 struct trace_array_cpu *data;
92 unsigned long flags;
93 long disabled;
94 int cpu;
95 int pc;
96
97 if (unlikely(!ftrace_function_enabled))
98 return;
99
100 /*
101 * Need to use raw, since this must be called before the
102 * recursive protection is performed.
103 */
104 local_irq_save(flags);
105 cpu = raw_smp_processor_id();
106 data = tr->data[cpu];
107 disabled = atomic_inc_return(&data->disabled);
108
109 if (likely(disabled == 1)) {
110 pc = preempt_count();
111 trace_function(tr, data, ip, parent_ip, flags, pc);
112 }
113
114 atomic_dec(&data->disabled);
115 local_irq_restore(flags);
116}
117
118static void
119function_stack_trace_call(unsigned long ip, unsigned long parent_ip)
120{
121 struct trace_array *tr = func_trace;
122 struct trace_array_cpu *data;
123 unsigned long flags;
124 long disabled;
125 int cpu;
126 int pc;
127
128 if (unlikely(!ftrace_function_enabled))
129 return;
130
131 /*
132 * Need to use raw, since this must be called before the
133 * recursive protection is performed.
134 */
135 local_irq_save(flags);
136 cpu = raw_smp_processor_id();
137 data = tr->data[cpu];
138 disabled = atomic_inc_return(&data->disabled);
139
140 if (likely(disabled == 1)) {
141 pc = preempt_count();
142 trace_function(tr, data, ip, parent_ip, flags, pc);
143 /*
144 * skip over 5 funcs:
145 * __ftrace_trace_stack,
146 * __trace_stack,
147 * function_stack_trace_call
148 * ftrace_list_func
149 * ftrace_call
150 */
151 __trace_stack(tr, data, flags, 5, pc);
152 }
153
154 atomic_dec(&data->disabled);
155 local_irq_restore(flags);
156}
157
158
159static struct ftrace_ops trace_ops __read_mostly =
160{
161 .func = function_trace_call,
162};
163
164static struct ftrace_ops trace_stack_ops __read_mostly =
165{
166 .func = function_stack_trace_call,
167};
168
169/* Our two options */
170enum {
171 TRACE_FUNC_OPT_STACK = 0x1,
172};
173
174static struct tracer_opt func_opts[] = {
175#ifdef CONFIG_STACKTRACE
176 { TRACER_OPT(func_stack_trace, TRACE_FUNC_OPT_STACK) },
177#endif
178 { } /* Always set a last empty entry */
179};
180
181static struct tracer_flags func_flags = {
182 .val = 0, /* By default: all flags disabled */
183 .opts = func_opts
184};
185
186static void tracing_start_function_trace(void)
187{
188 ftrace_function_enabled = 0;
189
190 if (trace_flags & TRACE_ITER_PREEMPTONLY)
191 trace_ops.func = function_trace_call_preempt_only;
192 else
193 trace_ops.func = function_trace_call;
194
195 if (func_flags.val & TRACE_FUNC_OPT_STACK)
196 register_ftrace_function(&trace_stack_ops);
197 else
198 register_ftrace_function(&trace_ops);
199
200 ftrace_function_enabled = 1;
201}
202
203static void tracing_stop_function_trace(void)
204{
205 ftrace_function_enabled = 0;
206 /* OK if they are not registered */
207 unregister_ftrace_function(&trace_stack_ops);
208 unregister_ftrace_function(&trace_ops);
209}
210
211static int func_set_flag(u32 old_flags, u32 bit, int set)
212{
213 if (bit == TRACE_FUNC_OPT_STACK) {
214 /* do nothing if already set */
215 if (!!set == !!(func_flags.val & TRACE_FUNC_OPT_STACK))
216 return 0;
217
218 if (set) {
219 unregister_ftrace_function(&trace_ops);
220 register_ftrace_function(&trace_stack_ops);
221 } else {
222 unregister_ftrace_function(&trace_stack_ops);
223 register_ftrace_function(&trace_ops);
224 }
225
226 return 0;
227 }
228
229 return -EINVAL;
230}
231
51static struct tracer function_trace __read_mostly = 232static struct tracer function_trace __read_mostly =
52{ 233{
53 .name = "function", 234 .name = "function",
54 .init = function_trace_init, 235 .init = function_trace_init,
55 .reset = function_trace_reset, 236 .reset = function_trace_reset,
56 .start = function_trace_start, 237 .start = function_trace_start,
238 .flags = &func_flags,
239 .set_flag = func_set_flag,
57#ifdef CONFIG_FTRACE_SELFTEST 240#ifdef CONFIG_FTRACE_SELFTEST
58 .selftest = trace_selftest_startup_function, 241 .selftest = trace_selftest_startup_function,
59#endif 242#endif
60}; 243};
61 244
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 930c08e5b38e..c97594d826bc 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * 2 *
3 * Function graph tracer. 3 * Function graph tracer.
4 * Copyright (c) 2008 Frederic Weisbecker <fweisbec@gmail.com> 4 * Copyright (c) 2008-2009 Frederic Weisbecker <fweisbec@gmail.com>
5 * Mostly borrowed from function tracer which 5 * Mostly borrowed from function tracer which
6 * is Copyright (c) Steven Rostedt <srostedt@redhat.com> 6 * is Copyright (c) Steven Rostedt <srostedt@redhat.com>
7 * 7 *
@@ -12,6 +12,7 @@
12#include <linux/fs.h> 12#include <linux/fs.h>
13 13
14#include "trace.h" 14#include "trace.h"
15#include "trace_output.h"
15 16
16#define TRACE_GRAPH_INDENT 2 17#define TRACE_GRAPH_INDENT 2
17 18
@@ -20,9 +21,11 @@
20#define TRACE_GRAPH_PRINT_CPU 0x2 21#define TRACE_GRAPH_PRINT_CPU 0x2
21#define TRACE_GRAPH_PRINT_OVERHEAD 0x4 22#define TRACE_GRAPH_PRINT_OVERHEAD 0x4
22#define TRACE_GRAPH_PRINT_PROC 0x8 23#define TRACE_GRAPH_PRINT_PROC 0x8
24#define TRACE_GRAPH_PRINT_DURATION 0x10
25#define TRACE_GRAPH_PRINT_ABS_TIME 0X20
23 26
24static struct tracer_opt trace_opts[] = { 27static struct tracer_opt trace_opts[] = {
25 /* Display overruns ? */ 28 /* Display overruns? (for self-debug purpose) */
26 { TRACER_OPT(funcgraph-overrun, TRACE_GRAPH_PRINT_OVERRUN) }, 29 { TRACER_OPT(funcgraph-overrun, TRACE_GRAPH_PRINT_OVERRUN) },
27 /* Display CPU ? */ 30 /* Display CPU ? */
28 { TRACER_OPT(funcgraph-cpu, TRACE_GRAPH_PRINT_CPU) }, 31 { TRACER_OPT(funcgraph-cpu, TRACE_GRAPH_PRINT_CPU) },
@@ -30,29 +33,30 @@ static struct tracer_opt trace_opts[] = {
30 { TRACER_OPT(funcgraph-overhead, TRACE_GRAPH_PRINT_OVERHEAD) }, 33 { TRACER_OPT(funcgraph-overhead, TRACE_GRAPH_PRINT_OVERHEAD) },
31 /* Display proc name/pid */ 34 /* Display proc name/pid */
32 { TRACER_OPT(funcgraph-proc, TRACE_GRAPH_PRINT_PROC) }, 35 { TRACER_OPT(funcgraph-proc, TRACE_GRAPH_PRINT_PROC) },
36 /* Display duration of execution */
37 { TRACER_OPT(funcgraph-duration, TRACE_GRAPH_PRINT_DURATION) },
38 /* Display absolute time of an entry */
39 { TRACER_OPT(funcgraph-abstime, TRACE_GRAPH_PRINT_ABS_TIME) },
33 { } /* Empty entry */ 40 { } /* Empty entry */
34}; 41};
35 42
36static struct tracer_flags tracer_flags = { 43static struct tracer_flags tracer_flags = {
37 /* Don't display overruns and proc by default */ 44 /* Don't display overruns and proc by default */
38 .val = TRACE_GRAPH_PRINT_CPU | TRACE_GRAPH_PRINT_OVERHEAD, 45 .val = TRACE_GRAPH_PRINT_CPU | TRACE_GRAPH_PRINT_OVERHEAD |
46 TRACE_GRAPH_PRINT_DURATION,
39 .opts = trace_opts 47 .opts = trace_opts
40}; 48};
41 49
42/* pid on the last trace processed */ 50/* pid on the last trace processed */
43static pid_t last_pid[NR_CPUS] = { [0 ... NR_CPUS-1] = -1 }; 51
44 52
45static int graph_trace_init(struct trace_array *tr) 53static int graph_trace_init(struct trace_array *tr)
46{ 54{
47 int cpu, ret; 55 int ret = register_ftrace_graph(&trace_graph_return,
48
49 for_each_online_cpu(cpu)
50 tracing_reset(tr, cpu);
51
52 ret = register_ftrace_graph(&trace_graph_return,
53 &trace_graph_entry); 56 &trace_graph_entry);
54 if (ret) 57 if (ret)
55 return ret; 58 return ret;
59 tracing_reset_online_cpus(tr);
56 tracing_start_cmdline_record(); 60 tracing_start_cmdline_record();
57 61
58 return 0; 62 return 0;
@@ -153,17 +157,25 @@ print_graph_proc(struct trace_seq *s, pid_t pid)
153 157
154/* If the pid changed since the last trace, output this event */ 158/* If the pid changed since the last trace, output this event */
155static enum print_line_t 159static enum print_line_t
156verif_pid(struct trace_seq *s, pid_t pid, int cpu) 160verif_pid(struct trace_seq *s, pid_t pid, int cpu, pid_t *last_pids_cpu)
157{ 161{
158 pid_t prev_pid; 162 pid_t prev_pid;
163 pid_t *last_pid;
159 int ret; 164 int ret;
160 165
161 if (last_pid[cpu] != -1 && last_pid[cpu] == pid) 166 if (!last_pids_cpu)
162 return TRACE_TYPE_HANDLED; 167 return TRACE_TYPE_HANDLED;
163 168
164 prev_pid = last_pid[cpu]; 169 last_pid = per_cpu_ptr(last_pids_cpu, cpu);
165 last_pid[cpu] = pid; 170
171 if (*last_pid == pid)
172 return TRACE_TYPE_HANDLED;
166 173
174 prev_pid = *last_pid;
175 *last_pid = pid;
176
177 if (prev_pid == -1)
178 return TRACE_TYPE_HANDLED;
167/* 179/*
168 * Context-switch trace line: 180 * Context-switch trace line:
169 181
@@ -231,9 +243,34 @@ trace_branch_is_leaf(struct trace_iterator *iter,
231 return true; 243 return true;
232} 244}
233 245
246/* Signal a overhead of time execution to the output */
247static int
248print_graph_overhead(unsigned long long duration, struct trace_seq *s)
249{
250 /* If duration disappear, we don't need anything */
251 if (!(tracer_flags.val & TRACE_GRAPH_PRINT_DURATION))
252 return 1;
253
254 /* Non nested entry or return */
255 if (duration == -1)
256 return trace_seq_printf(s, " ");
257
258 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) {
259 /* Duration exceeded 100 msecs */
260 if (duration > 100000ULL)
261 return trace_seq_printf(s, "! ");
262
263 /* Duration exceeded 10 msecs */
264 if (duration > 10000ULL)
265 return trace_seq_printf(s, "+ ");
266 }
267
268 return trace_seq_printf(s, " ");
269}
270
234static enum print_line_t 271static enum print_line_t
235print_graph_irq(struct trace_seq *s, unsigned long addr, 272print_graph_irq(struct trace_seq *s, unsigned long addr,
236 enum trace_type type, int cpu, pid_t pid) 273 enum trace_type type, int cpu, pid_t pid)
237{ 274{
238 int ret; 275 int ret;
239 276
@@ -241,35 +278,40 @@ print_graph_irq(struct trace_seq *s, unsigned long addr,
241 addr >= (unsigned long)__irqentry_text_end) 278 addr >= (unsigned long)__irqentry_text_end)
242 return TRACE_TYPE_UNHANDLED; 279 return TRACE_TYPE_UNHANDLED;
243 280
244 if (type == TRACE_GRAPH_ENT) { 281 /* Cpu */
245 ret = trace_seq_printf(s, "==========> | "); 282 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) {
246 } else { 283 ret = print_graph_cpu(s, cpu);
247 /* Cpu */ 284 if (ret == TRACE_TYPE_PARTIAL_LINE)
248 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) { 285 return TRACE_TYPE_PARTIAL_LINE;
249 ret = print_graph_cpu(s, cpu); 286 }
250 if (ret == TRACE_TYPE_PARTIAL_LINE) 287 /* Proc */
251 return TRACE_TYPE_PARTIAL_LINE; 288 if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) {
252 } 289 ret = print_graph_proc(s, pid);
253 /* Proc */ 290 if (ret == TRACE_TYPE_PARTIAL_LINE)
254 if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) { 291 return TRACE_TYPE_PARTIAL_LINE;
255 ret = print_graph_proc(s, pid); 292 ret = trace_seq_printf(s, " | ");
256 if (ret == TRACE_TYPE_PARTIAL_LINE) 293 if (!ret)
257 return TRACE_TYPE_PARTIAL_LINE; 294 return TRACE_TYPE_PARTIAL_LINE;
295 }
258 296
259 ret = trace_seq_printf(s, " | "); 297 /* No overhead */
260 if (!ret) 298 ret = print_graph_overhead(-1, s);
261 return TRACE_TYPE_PARTIAL_LINE; 299 if (!ret)
262 } 300 return TRACE_TYPE_PARTIAL_LINE;
263 301
264 /* No overhead */ 302 if (type == TRACE_GRAPH_ENT)
265 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) { 303 ret = trace_seq_printf(s, "==========>");
266 ret = trace_seq_printf(s, " "); 304 else
267 if (!ret) 305 ret = trace_seq_printf(s, "<==========");
268 return TRACE_TYPE_PARTIAL_LINE; 306
269 } 307 if (!ret)
308 return TRACE_TYPE_PARTIAL_LINE;
309
310 /* Don't close the duration column if haven't one */
311 if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION)
312 trace_seq_printf(s, " |");
313 ret = trace_seq_printf(s, "\n");
270 314
271 ret = trace_seq_printf(s, "<========== |\n");
272 }
273 if (!ret) 315 if (!ret)
274 return TRACE_TYPE_PARTIAL_LINE; 316 return TRACE_TYPE_PARTIAL_LINE;
275 return TRACE_TYPE_HANDLED; 317 return TRACE_TYPE_HANDLED;
@@ -288,7 +330,7 @@ print_graph_duration(unsigned long long duration, struct trace_seq *s)
288 sprintf(msecs_str, "%lu", (unsigned long) duration); 330 sprintf(msecs_str, "%lu", (unsigned long) duration);
289 331
290 /* Print msecs */ 332 /* Print msecs */
291 ret = trace_seq_printf(s, msecs_str); 333 ret = trace_seq_printf(s, "%s", msecs_str);
292 if (!ret) 334 if (!ret)
293 return TRACE_TYPE_PARTIAL_LINE; 335 return TRACE_TYPE_PARTIAL_LINE;
294 336
@@ -321,19 +363,15 @@ print_graph_duration(unsigned long long duration, struct trace_seq *s)
321 363
322} 364}
323 365
324/* Signal a overhead of time execution to the output */ 366static int print_graph_abs_time(u64 t, struct trace_seq *s)
325static int
326print_graph_overhead(unsigned long long duration, struct trace_seq *s)
327{ 367{
328 /* Duration exceeded 100 msecs */ 368 unsigned long usecs_rem;
329 if (duration > 100000ULL)
330 return trace_seq_printf(s, "! ");
331 369
332 /* Duration exceeded 10 msecs */ 370 usecs_rem = do_div(t, 1000000000);
333 if (duration > 10000ULL) 371 usecs_rem /= 1000;
334 return trace_seq_printf(s, "+ ");
335 372
336 return trace_seq_printf(s, " "); 373 return trace_seq_printf(s, "%5lu.%06lu | ",
374 (unsigned long)t, usecs_rem);
337} 375}
338 376
339/* Case of a leaf function on its call entry */ 377/* Case of a leaf function on its call entry */
@@ -356,16 +394,16 @@ print_graph_entry_leaf(struct trace_iterator *iter,
356 duration = graph_ret->rettime - graph_ret->calltime; 394 duration = graph_ret->rettime - graph_ret->calltime;
357 395
358 /* Overhead */ 396 /* Overhead */
359 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) { 397 ret = print_graph_overhead(duration, s);
360 ret = print_graph_overhead(duration, s); 398 if (!ret)
361 if (!ret) 399 return TRACE_TYPE_PARTIAL_LINE;
362 return TRACE_TYPE_PARTIAL_LINE;
363 }
364 400
365 /* Duration */ 401 /* Duration */
366 ret = print_graph_duration(duration, s); 402 if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) {
367 if (ret == TRACE_TYPE_PARTIAL_LINE) 403 ret = print_graph_duration(duration, s);
368 return TRACE_TYPE_PARTIAL_LINE; 404 if (ret == TRACE_TYPE_PARTIAL_LINE)
405 return TRACE_TYPE_PARTIAL_LINE;
406 }
369 407
370 /* Function */ 408 /* Function */
371 for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) { 409 for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) {
@@ -394,25 +432,17 @@ print_graph_entry_nested(struct ftrace_graph_ent_entry *entry,
394 struct ftrace_graph_ent *call = &entry->graph_ent; 432 struct ftrace_graph_ent *call = &entry->graph_ent;
395 433
396 /* No overhead */ 434 /* No overhead */
397 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) { 435 ret = print_graph_overhead(-1, s);
398 ret = trace_seq_printf(s, " "); 436 if (!ret)
399 if (!ret) 437 return TRACE_TYPE_PARTIAL_LINE;
400 return TRACE_TYPE_PARTIAL_LINE;
401 }
402 438
403 /* Interrupt */ 439 /* No time */
404 ret = print_graph_irq(s, call->func, TRACE_GRAPH_ENT, cpu, pid); 440 if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) {
405 if (ret == TRACE_TYPE_UNHANDLED) {
406 /* No time */
407 ret = trace_seq_printf(s, " | "); 441 ret = trace_seq_printf(s, " | ");
408 if (!ret) 442 if (!ret)
409 return TRACE_TYPE_PARTIAL_LINE; 443 return TRACE_TYPE_PARTIAL_LINE;
410 } else {
411 if (ret == TRACE_TYPE_PARTIAL_LINE)
412 return TRACE_TYPE_PARTIAL_LINE;
413 } 444 }
414 445
415
416 /* Function */ 446 /* Function */
417 for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) { 447 for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) {
418 ret = trace_seq_printf(s, " "); 448 ret = trace_seq_printf(s, " ");
@@ -433,15 +463,30 @@ print_graph_entry_nested(struct ftrace_graph_ent_entry *entry,
433 463
434static enum print_line_t 464static enum print_line_t
435print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s, 465print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
436 struct trace_iterator *iter, int cpu) 466 struct trace_iterator *iter)
437{ 467{
438 int ret; 468 int ret;
469 int cpu = iter->cpu;
470 pid_t *last_entry = iter->private;
439 struct trace_entry *ent = iter->ent; 471 struct trace_entry *ent = iter->ent;
472 struct ftrace_graph_ent *call = &field->graph_ent;
440 473
441 /* Pid */ 474 /* Pid */
442 if (verif_pid(s, ent->pid, cpu) == TRACE_TYPE_PARTIAL_LINE) 475 if (verif_pid(s, ent->pid, cpu, last_entry) == TRACE_TYPE_PARTIAL_LINE)
476 return TRACE_TYPE_PARTIAL_LINE;
477
478 /* Interrupt */
479 ret = print_graph_irq(s, call->func, TRACE_GRAPH_ENT, cpu, ent->pid);
480 if (ret == TRACE_TYPE_PARTIAL_LINE)
443 return TRACE_TYPE_PARTIAL_LINE; 481 return TRACE_TYPE_PARTIAL_LINE;
444 482
483 /* Absolute time */
484 if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) {
485 ret = print_graph_abs_time(iter->ts, s);
486 if (!ret)
487 return TRACE_TYPE_PARTIAL_LINE;
488 }
489
445 /* Cpu */ 490 /* Cpu */
446 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) { 491 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) {
447 ret = print_graph_cpu(s, cpu); 492 ret = print_graph_cpu(s, cpu);
@@ -469,16 +514,25 @@ print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
469 514
470static enum print_line_t 515static enum print_line_t
471print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s, 516print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
472 struct trace_entry *ent, int cpu) 517 struct trace_entry *ent, struct trace_iterator *iter)
473{ 518{
474 int i; 519 int i;
475 int ret; 520 int ret;
521 int cpu = iter->cpu;
522 pid_t *last_pid = iter->private;
476 unsigned long long duration = trace->rettime - trace->calltime; 523 unsigned long long duration = trace->rettime - trace->calltime;
477 524
478 /* Pid */ 525 /* Pid */
479 if (verif_pid(s, ent->pid, cpu) == TRACE_TYPE_PARTIAL_LINE) 526 if (verif_pid(s, ent->pid, cpu, last_pid) == TRACE_TYPE_PARTIAL_LINE)
480 return TRACE_TYPE_PARTIAL_LINE; 527 return TRACE_TYPE_PARTIAL_LINE;
481 528
529 /* Absolute time */
530 if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) {
531 ret = print_graph_abs_time(iter->ts, s);
532 if (!ret)
533 return TRACE_TYPE_PARTIAL_LINE;
534 }
535
482 /* Cpu */ 536 /* Cpu */
483 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) { 537 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) {
484 ret = print_graph_cpu(s, cpu); 538 ret = print_graph_cpu(s, cpu);
@@ -498,16 +552,16 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
498 } 552 }
499 553
500 /* Overhead */ 554 /* Overhead */
501 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) { 555 ret = print_graph_overhead(duration, s);
502 ret = print_graph_overhead(duration, s); 556 if (!ret)
503 if (!ret) 557 return TRACE_TYPE_PARTIAL_LINE;
504 return TRACE_TYPE_PARTIAL_LINE;
505 }
506 558
507 /* Duration */ 559 /* Duration */
508 ret = print_graph_duration(duration, s); 560 if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) {
509 if (ret == TRACE_TYPE_PARTIAL_LINE) 561 ret = print_graph_duration(duration, s);
510 return TRACE_TYPE_PARTIAL_LINE; 562 if (ret == TRACE_TYPE_PARTIAL_LINE)
563 return TRACE_TYPE_PARTIAL_LINE;
564 }
511 565
512 /* Closing brace */ 566 /* Closing brace */
513 for (i = 0; i < trace->depth * TRACE_GRAPH_INDENT; i++) { 567 for (i = 0; i < trace->depth * TRACE_GRAPH_INDENT; i++) {
@@ -541,14 +595,23 @@ print_graph_comment(struct print_entry *trace, struct trace_seq *s,
541{ 595{
542 int i; 596 int i;
543 int ret; 597 int ret;
598 int cpu = iter->cpu;
599 pid_t *last_pid = iter->private;
600
601 /* Absolute time */
602 if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) {
603 ret = print_graph_abs_time(iter->ts, s);
604 if (!ret)
605 return TRACE_TYPE_PARTIAL_LINE;
606 }
544 607
545 /* Pid */ 608 /* Pid */
546 if (verif_pid(s, ent->pid, iter->cpu) == TRACE_TYPE_PARTIAL_LINE) 609 if (verif_pid(s, ent->pid, cpu, last_pid) == TRACE_TYPE_PARTIAL_LINE)
547 return TRACE_TYPE_PARTIAL_LINE; 610 return TRACE_TYPE_PARTIAL_LINE;
548 611
549 /* Cpu */ 612 /* Cpu */
550 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) { 613 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) {
551 ret = print_graph_cpu(s, iter->cpu); 614 ret = print_graph_cpu(s, cpu);
552 if (ret == TRACE_TYPE_PARTIAL_LINE) 615 if (ret == TRACE_TYPE_PARTIAL_LINE)
553 return TRACE_TYPE_PARTIAL_LINE; 616 return TRACE_TYPE_PARTIAL_LINE;
554 } 617 }
@@ -565,17 +628,17 @@ print_graph_comment(struct print_entry *trace, struct trace_seq *s,
565 } 628 }
566 629
567 /* No overhead */ 630 /* No overhead */
568 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) { 631 ret = print_graph_overhead(-1, s);
569 ret = trace_seq_printf(s, " "); 632 if (!ret)
633 return TRACE_TYPE_PARTIAL_LINE;
634
635 /* No time */
636 if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) {
637 ret = trace_seq_printf(s, " | ");
570 if (!ret) 638 if (!ret)
571 return TRACE_TYPE_PARTIAL_LINE; 639 return TRACE_TYPE_PARTIAL_LINE;
572 } 640 }
573 641
574 /* No time */
575 ret = trace_seq_printf(s, " | ");
576 if (!ret)
577 return TRACE_TYPE_PARTIAL_LINE;
578
579 /* Indentation */ 642 /* Indentation */
580 if (trace->depth > 0) 643 if (trace->depth > 0)
581 for (i = 0; i < (trace->depth + 1) * TRACE_GRAPH_INDENT; i++) { 644 for (i = 0; i < (trace->depth + 1) * TRACE_GRAPH_INDENT; i++) {
@@ -589,8 +652,11 @@ print_graph_comment(struct print_entry *trace, struct trace_seq *s,
589 if (!ret) 652 if (!ret)
590 return TRACE_TYPE_PARTIAL_LINE; 653 return TRACE_TYPE_PARTIAL_LINE;
591 654
592 if (ent->flags & TRACE_FLAG_CONT) 655 /* Strip ending newline */
593 trace_seq_print_cont(s, iter); 656 if (s->buffer[s->len - 1] == '\n') {
657 s->buffer[s->len - 1] = '\0';
658 s->len--;
659 }
594 660
595 ret = trace_seq_printf(s, " */\n"); 661 ret = trace_seq_printf(s, " */\n");
596 if (!ret) 662 if (!ret)
@@ -610,13 +676,12 @@ print_graph_function(struct trace_iterator *iter)
610 case TRACE_GRAPH_ENT: { 676 case TRACE_GRAPH_ENT: {
611 struct ftrace_graph_ent_entry *field; 677 struct ftrace_graph_ent_entry *field;
612 trace_assign_type(field, entry); 678 trace_assign_type(field, entry);
613 return print_graph_entry(field, s, iter, 679 return print_graph_entry(field, s, iter);
614 iter->cpu);
615 } 680 }
616 case TRACE_GRAPH_RET: { 681 case TRACE_GRAPH_RET: {
617 struct ftrace_graph_ret_entry *field; 682 struct ftrace_graph_ret_entry *field;
618 trace_assign_type(field, entry); 683 trace_assign_type(field, entry);
619 return print_graph_return(&field->ret, s, entry, iter->cpu); 684 return print_graph_return(&field->ret, s, entry, iter);
620 } 685 }
621 case TRACE_PRINT: { 686 case TRACE_PRINT: {
622 struct print_entry *field; 687 struct print_entry *field;
@@ -632,28 +697,55 @@ static void print_graph_headers(struct seq_file *s)
632{ 697{
633 /* 1st line */ 698 /* 1st line */
634 seq_printf(s, "# "); 699 seq_printf(s, "# ");
700 if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME)
701 seq_printf(s, " TIME ");
635 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) 702 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU)
636 seq_printf(s, "CPU "); 703 seq_printf(s, "CPU");
637 if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) 704 if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC)
638 seq_printf(s, "TASK/PID "); 705 seq_printf(s, " TASK/PID ");
639 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) 706 if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION)
640 seq_printf(s, "OVERHEAD/"); 707 seq_printf(s, " DURATION ");
641 seq_printf(s, "DURATION FUNCTION CALLS\n"); 708 seq_printf(s, " FUNCTION CALLS\n");
642 709
643 /* 2nd line */ 710 /* 2nd line */
644 seq_printf(s, "# "); 711 seq_printf(s, "# ");
712 if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME)
713 seq_printf(s, " | ");
645 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) 714 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU)
646 seq_printf(s, "| "); 715 seq_printf(s, "| ");
647 if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) 716 if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC)
648 seq_printf(s, "| | "); 717 seq_printf(s, " | | ");
649 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) { 718 if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION)
650 seq_printf(s, "| "); 719 seq_printf(s, " | | ");
651 seq_printf(s, "| | | | |\n"); 720 seq_printf(s, " | | | |\n");
652 } else 721}
653 seq_printf(s, " | | | | |\n"); 722
723static void graph_trace_open(struct trace_iterator *iter)
724{
725 /* pid on the last trace processed */
726 pid_t *last_pid = alloc_percpu(pid_t);
727 int cpu;
728
729 if (!last_pid)
730 pr_warning("function graph tracer: not enough memory\n");
731 else
732 for_each_possible_cpu(cpu) {
733 pid_t *pid = per_cpu_ptr(last_pid, cpu);
734 *pid = -1;
735 }
736
737 iter->private = last_pid;
654} 738}
739
740static void graph_trace_close(struct trace_iterator *iter)
741{
742 percpu_free(iter->private);
743}
744
655static struct tracer graph_trace __read_mostly = { 745static struct tracer graph_trace __read_mostly = {
656 .name = "function_graph", 746 .name = "function_graph",
747 .open = graph_trace_open,
748 .close = graph_trace_close,
657 .init = graph_trace_init, 749 .init = graph_trace_init,
658 .reset = graph_trace_reset, 750 .reset = graph_trace_reset,
659 .print_line = print_graph_function, 751 .print_line = print_graph_function,
diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c
index 649df22d435f..fff3545fc866 100644
--- a/kernel/trace/trace_hw_branches.c
+++ b/kernel/trace/trace_hw_branches.c
@@ -1,7 +1,8 @@
1/* 1/*
2 * h/w branch tracer for x86 based on bts 2 * h/w branch tracer for x86 based on bts
3 * 3 *
4 * Copyright (C) 2008 Markus Metzger <markus.t.metzger@gmail.com> 4 * Copyright (C) 2008-2009 Intel Corporation.
5 * Markus Metzger <markus.t.metzger@gmail.com>, 2008-2009
5 * 6 *
6 */ 7 */
7 8
@@ -10,21 +11,44 @@
10#include <linux/debugfs.h> 11#include <linux/debugfs.h>
11#include <linux/ftrace.h> 12#include <linux/ftrace.h>
12#include <linux/kallsyms.h> 13#include <linux/kallsyms.h>
14#include <linux/mutex.h>
15#include <linux/cpu.h>
16#include <linux/smp.h>
13 17
14#include <asm/ds.h> 18#include <asm/ds.h>
15 19
16#include "trace.h" 20#include "trace.h"
21#include "trace_output.h"
17 22
18 23
19#define SIZEOF_BTS (1 << 13) 24#define SIZEOF_BTS (1 << 13)
20 25
26/* The tracer mutex protects the below per-cpu tracer array.
27 It needs to be held to:
28 - start tracing on all cpus
29 - stop tracing on all cpus
30 - start tracing on a single hotplug cpu
31 - stop tracing on a single hotplug cpu
32 - read the trace from all cpus
33 - read the trace from a single cpu
34*/
35static DEFINE_MUTEX(bts_tracer_mutex);
21static DEFINE_PER_CPU(struct bts_tracer *, tracer); 36static DEFINE_PER_CPU(struct bts_tracer *, tracer);
22static DEFINE_PER_CPU(unsigned char[SIZEOF_BTS], buffer); 37static DEFINE_PER_CPU(unsigned char[SIZEOF_BTS], buffer);
23 38
24#define this_tracer per_cpu(tracer, smp_processor_id()) 39#define this_tracer per_cpu(tracer, smp_processor_id())
25#define this_buffer per_cpu(buffer, smp_processor_id()) 40#define this_buffer per_cpu(buffer, smp_processor_id())
26 41
42static int __read_mostly trace_hw_branches_enabled;
43static struct trace_array *hw_branch_trace __read_mostly;
27 44
45
46/*
47 * Start tracing on the current cpu.
48 * The argument is ignored.
49 *
50 * pre: bts_tracer_mutex must be locked.
51 */
28static void bts_trace_start_cpu(void *arg) 52static void bts_trace_start_cpu(void *arg)
29{ 53{
30 if (this_tracer) 54 if (this_tracer)
@@ -42,14 +66,20 @@ static void bts_trace_start_cpu(void *arg)
42 66
43static void bts_trace_start(struct trace_array *tr) 67static void bts_trace_start(struct trace_array *tr)
44{ 68{
45 int cpu; 69 mutex_lock(&bts_tracer_mutex);
46 70
47 tracing_reset_online_cpus(tr); 71 on_each_cpu(bts_trace_start_cpu, NULL, 1);
72 trace_hw_branches_enabled = 1;
48 73
49 for_each_cpu(cpu, cpu_possible_mask) 74 mutex_unlock(&bts_tracer_mutex);
50 smp_call_function_single(cpu, bts_trace_start_cpu, NULL, 1);
51} 75}
52 76
77/*
78 * Start tracing on the current cpu.
79 * The argument is ignored.
80 *
81 * pre: bts_tracer_mutex must be locked.
82 */
53static void bts_trace_stop_cpu(void *arg) 83static void bts_trace_stop_cpu(void *arg)
54{ 84{
55 if (this_tracer) { 85 if (this_tracer) {
@@ -60,26 +90,63 @@ static void bts_trace_stop_cpu(void *arg)
60 90
61static void bts_trace_stop(struct trace_array *tr) 91static void bts_trace_stop(struct trace_array *tr)
62{ 92{
63 int cpu; 93 mutex_lock(&bts_tracer_mutex);
94
95 trace_hw_branches_enabled = 0;
96 on_each_cpu(bts_trace_stop_cpu, NULL, 1);
64 97
65 for_each_cpu(cpu, cpu_possible_mask) 98 mutex_unlock(&bts_tracer_mutex);
99}
100
101static int __cpuinit bts_hotcpu_handler(struct notifier_block *nfb,
102 unsigned long action, void *hcpu)
103{
104 unsigned int cpu = (unsigned long)hcpu;
105
106 mutex_lock(&bts_tracer_mutex);
107
108 if (!trace_hw_branches_enabled)
109 goto out;
110
111 switch (action) {
112 case CPU_ONLINE:
113 case CPU_DOWN_FAILED:
114 smp_call_function_single(cpu, bts_trace_start_cpu, NULL, 1);
115 break;
116 case CPU_DOWN_PREPARE:
66 smp_call_function_single(cpu, bts_trace_stop_cpu, NULL, 1); 117 smp_call_function_single(cpu, bts_trace_stop_cpu, NULL, 1);
118 break;
119 }
120
121 out:
122 mutex_unlock(&bts_tracer_mutex);
123 return NOTIFY_DONE;
67} 124}
68 125
126static struct notifier_block bts_hotcpu_notifier __cpuinitdata = {
127 .notifier_call = bts_hotcpu_handler
128};
129
69static int bts_trace_init(struct trace_array *tr) 130static int bts_trace_init(struct trace_array *tr)
70{ 131{
132 hw_branch_trace = tr;
133
134 register_hotcpu_notifier(&bts_hotcpu_notifier);
71 tracing_reset_online_cpus(tr); 135 tracing_reset_online_cpus(tr);
72 bts_trace_start(tr); 136 bts_trace_start(tr);
73 137
74 return 0; 138 return 0;
75} 139}
76 140
141static void bts_trace_reset(struct trace_array *tr)
142{
143 bts_trace_stop(tr);
144 unregister_hotcpu_notifier(&bts_hotcpu_notifier);
145}
146
77static void bts_trace_print_header(struct seq_file *m) 147static void bts_trace_print_header(struct seq_file *m)
78{ 148{
79 seq_puts(m, 149 seq_puts(m, "# CPU# TO <- FROM\n");
80 "# CPU# FROM TO FUNCTION\n");
81 seq_puts(m,
82 "# | | | |\n");
83} 150}
84 151
85static enum print_line_t bts_trace_print_line(struct trace_iterator *iter) 152static enum print_line_t bts_trace_print_line(struct trace_iterator *iter)
@@ -87,15 +154,15 @@ static enum print_line_t bts_trace_print_line(struct trace_iterator *iter)
87 struct trace_entry *entry = iter->ent; 154 struct trace_entry *entry = iter->ent;
88 struct trace_seq *seq = &iter->seq; 155 struct trace_seq *seq = &iter->seq;
89 struct hw_branch_entry *it; 156 struct hw_branch_entry *it;
157 unsigned long symflags = TRACE_ITER_SYM_OFFSET;
90 158
91 trace_assign_type(it, entry); 159 trace_assign_type(it, entry);
92 160
93 if (entry->type == TRACE_HW_BRANCHES) { 161 if (entry->type == TRACE_HW_BRANCHES) {
94 if (trace_seq_printf(seq, "%4d ", entry->cpu) && 162 if (trace_seq_printf(seq, "%4d ", entry->cpu) &&
95 trace_seq_printf(seq, "0x%016llx -> 0x%016llx ", 163 seq_print_ip_sym(seq, it->to, symflags) &&
96 it->from, it->to) && 164 trace_seq_printf(seq, "\t <- ") &&
97 (!it->from || 165 seq_print_ip_sym(seq, it->from, symflags) &&
98 seq_print_ip_sym(seq, it->from, /* sym_flags = */ 0)) &&
99 trace_seq_printf(seq, "\n")) 166 trace_seq_printf(seq, "\n"))
100 return TRACE_TYPE_HANDLED; 167 return TRACE_TYPE_HANDLED;
101 return TRACE_TYPE_PARTIAL_LINE;; 168 return TRACE_TYPE_PARTIAL_LINE;;
@@ -103,26 +170,42 @@ static enum print_line_t bts_trace_print_line(struct trace_iterator *iter)
103 return TRACE_TYPE_UNHANDLED; 170 return TRACE_TYPE_UNHANDLED;
104} 171}
105 172
106void trace_hw_branch(struct trace_array *tr, u64 from, u64 to) 173void trace_hw_branch(u64 from, u64 to)
107{ 174{
175 struct trace_array *tr = hw_branch_trace;
108 struct ring_buffer_event *event; 176 struct ring_buffer_event *event;
109 struct hw_branch_entry *entry; 177 struct hw_branch_entry *entry;
110 unsigned long irq; 178 unsigned long irq1, irq2;
179 int cpu;
111 180
112 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), &irq); 181 if (unlikely(!tr))
113 if (!event)
114 return; 182 return;
183
184 if (unlikely(!trace_hw_branches_enabled))
185 return;
186
187 local_irq_save(irq1);
188 cpu = raw_smp_processor_id();
189 if (atomic_inc_return(&tr->data[cpu]->disabled) != 1)
190 goto out;
191
192 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), &irq2);
193 if (!event)
194 goto out;
115 entry = ring_buffer_event_data(event); 195 entry = ring_buffer_event_data(event);
116 tracing_generic_entry_update(&entry->ent, 0, from); 196 tracing_generic_entry_update(&entry->ent, 0, from);
117 entry->ent.type = TRACE_HW_BRANCHES; 197 entry->ent.type = TRACE_HW_BRANCHES;
118 entry->ent.cpu = smp_processor_id(); 198 entry->ent.cpu = cpu;
119 entry->from = from; 199 entry->from = from;
120 entry->to = to; 200 entry->to = to;
121 ring_buffer_unlock_commit(tr->buffer, event, irq); 201 ring_buffer_unlock_commit(tr->buffer, event, irq2);
202
203 out:
204 atomic_dec(&tr->data[cpu]->disabled);
205 local_irq_restore(irq1);
122} 206}
123 207
124static void trace_bts_at(struct trace_array *tr, 208static void trace_bts_at(const struct bts_trace *trace, void *at)
125 const struct bts_trace *trace, void *at)
126{ 209{
127 struct bts_struct bts; 210 struct bts_struct bts;
128 int err = 0; 211 int err = 0;
@@ -137,18 +220,29 @@ static void trace_bts_at(struct trace_array *tr,
137 220
138 switch (bts.qualifier) { 221 switch (bts.qualifier) {
139 case BTS_BRANCH: 222 case BTS_BRANCH:
140 trace_hw_branch(tr, bts.variant.lbr.from, bts.variant.lbr.to); 223 trace_hw_branch(bts.variant.lbr.from, bts.variant.lbr.to);
141 break; 224 break;
142 } 225 }
143} 226}
144 227
228/*
229 * Collect the trace on the current cpu and write it into the ftrace buffer.
230 *
231 * pre: bts_tracer_mutex must be locked
232 */
145static void trace_bts_cpu(void *arg) 233static void trace_bts_cpu(void *arg)
146{ 234{
147 struct trace_array *tr = (struct trace_array *) arg; 235 struct trace_array *tr = (struct trace_array *) arg;
148 const struct bts_trace *trace; 236 const struct bts_trace *trace;
149 unsigned char *at; 237 unsigned char *at;
150 238
151 if (!this_tracer) 239 if (unlikely(!tr))
240 return;
241
242 if (unlikely(atomic_read(&tr->data[raw_smp_processor_id()]->disabled)))
243 return;
244
245 if (unlikely(!this_tracer))
152 return; 246 return;
153 247
154 ds_suspend_bts(this_tracer); 248 ds_suspend_bts(this_tracer);
@@ -158,11 +252,11 @@ static void trace_bts_cpu(void *arg)
158 252
159 for (at = trace->ds.top; (void *)at < trace->ds.end; 253 for (at = trace->ds.top; (void *)at < trace->ds.end;
160 at += trace->ds.size) 254 at += trace->ds.size)
161 trace_bts_at(tr, trace, at); 255 trace_bts_at(trace, at);
162 256
163 for (at = trace->ds.begin; (void *)at < trace->ds.top; 257 for (at = trace->ds.begin; (void *)at < trace->ds.top;
164 at += trace->ds.size) 258 at += trace->ds.size)
165 trace_bts_at(tr, trace, at); 259 trace_bts_at(trace, at);
166 260
167out: 261out:
168 ds_resume_bts(this_tracer); 262 ds_resume_bts(this_tracer);
@@ -170,22 +264,38 @@ out:
170 264
171static void trace_bts_prepare(struct trace_iterator *iter) 265static void trace_bts_prepare(struct trace_iterator *iter)
172{ 266{
173 int cpu; 267 mutex_lock(&bts_tracer_mutex);
268
269 on_each_cpu(trace_bts_cpu, iter->tr, 1);
270
271 mutex_unlock(&bts_tracer_mutex);
272}
273
274static void trace_bts_close(struct trace_iterator *iter)
275{
276 tracing_reset_online_cpus(iter->tr);
277}
278
279void trace_hw_branch_oops(void)
280{
281 mutex_lock(&bts_tracer_mutex);
282
283 trace_bts_cpu(hw_branch_trace);
174 284
175 for_each_cpu(cpu, cpu_possible_mask) 285 mutex_unlock(&bts_tracer_mutex);
176 smp_call_function_single(cpu, trace_bts_cpu, iter->tr, 1);
177} 286}
178 287
179struct tracer bts_tracer __read_mostly = 288struct tracer bts_tracer __read_mostly =
180{ 289{
181 .name = "hw-branch-tracer", 290 .name = "hw-branch-tracer",
182 .init = bts_trace_init, 291 .init = bts_trace_init,
183 .reset = bts_trace_stop, 292 .reset = bts_trace_reset,
184 .print_header = bts_trace_print_header, 293 .print_header = bts_trace_print_header,
185 .print_line = bts_trace_print_line, 294 .print_line = bts_trace_print_line,
186 .start = bts_trace_start, 295 .start = bts_trace_start,
187 .stop = bts_trace_stop, 296 .stop = bts_trace_stop,
188 .open = trace_bts_prepare 297 .open = trace_bts_prepare,
298 .close = trace_bts_close
189}; 299};
190 300
191__init static int init_bts_trace(void) 301__init static int init_bts_trace(void)
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 62a78d943534..ed344b022a14 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -353,28 +353,18 @@ void trace_preempt_off(unsigned long a0, unsigned long a1)
353} 353}
354#endif /* CONFIG_PREEMPT_TRACER */ 354#endif /* CONFIG_PREEMPT_TRACER */
355 355
356/*
357 * save_tracer_enabled is used to save the state of the tracer_enabled
358 * variable when we disable it when we open a trace output file.
359 */
360static int save_tracer_enabled;
361
362static void start_irqsoff_tracer(struct trace_array *tr) 356static void start_irqsoff_tracer(struct trace_array *tr)
363{ 357{
364 register_ftrace_function(&trace_ops); 358 register_ftrace_function(&trace_ops);
365 if (tracing_is_enabled()) { 359 if (tracing_is_enabled())
366 tracer_enabled = 1; 360 tracer_enabled = 1;
367 save_tracer_enabled = 1; 361 else
368 } else {
369 tracer_enabled = 0; 362 tracer_enabled = 0;
370 save_tracer_enabled = 0;
371 }
372} 363}
373 364
374static void stop_irqsoff_tracer(struct trace_array *tr) 365static void stop_irqsoff_tracer(struct trace_array *tr)
375{ 366{
376 tracer_enabled = 0; 367 tracer_enabled = 0;
377 save_tracer_enabled = 0;
378 unregister_ftrace_function(&trace_ops); 368 unregister_ftrace_function(&trace_ops);
379} 369}
380 370
@@ -395,25 +385,11 @@ static void irqsoff_tracer_reset(struct trace_array *tr)
395static void irqsoff_tracer_start(struct trace_array *tr) 385static void irqsoff_tracer_start(struct trace_array *tr)
396{ 386{
397 tracer_enabled = 1; 387 tracer_enabled = 1;
398 save_tracer_enabled = 1;
399} 388}
400 389
401static void irqsoff_tracer_stop(struct trace_array *tr) 390static void irqsoff_tracer_stop(struct trace_array *tr)
402{ 391{
403 tracer_enabled = 0; 392 tracer_enabled = 0;
404 save_tracer_enabled = 0;
405}
406
407static void irqsoff_tracer_open(struct trace_iterator *iter)
408{
409 /* stop the trace while dumping */
410 tracer_enabled = 0;
411}
412
413static void irqsoff_tracer_close(struct trace_iterator *iter)
414{
415 /* restart tracing */
416 tracer_enabled = save_tracer_enabled;
417} 393}
418 394
419#ifdef CONFIG_IRQSOFF_TRACER 395#ifdef CONFIG_IRQSOFF_TRACER
@@ -431,8 +407,6 @@ static struct tracer irqsoff_tracer __read_mostly =
431 .reset = irqsoff_tracer_reset, 407 .reset = irqsoff_tracer_reset,
432 .start = irqsoff_tracer_start, 408 .start = irqsoff_tracer_start,
433 .stop = irqsoff_tracer_stop, 409 .stop = irqsoff_tracer_stop,
434 .open = irqsoff_tracer_open,
435 .close = irqsoff_tracer_close,
436 .print_max = 1, 410 .print_max = 1,
437#ifdef CONFIG_FTRACE_SELFTEST 411#ifdef CONFIG_FTRACE_SELFTEST
438 .selftest = trace_selftest_startup_irqsoff, 412 .selftest = trace_selftest_startup_irqsoff,
@@ -459,8 +433,6 @@ static struct tracer preemptoff_tracer __read_mostly =
459 .reset = irqsoff_tracer_reset, 433 .reset = irqsoff_tracer_reset,
460 .start = irqsoff_tracer_start, 434 .start = irqsoff_tracer_start,
461 .stop = irqsoff_tracer_stop, 435 .stop = irqsoff_tracer_stop,
462 .open = irqsoff_tracer_open,
463 .close = irqsoff_tracer_close,
464 .print_max = 1, 436 .print_max = 1,
465#ifdef CONFIG_FTRACE_SELFTEST 437#ifdef CONFIG_FTRACE_SELFTEST
466 .selftest = trace_selftest_startup_preemptoff, 438 .selftest = trace_selftest_startup_preemptoff,
@@ -489,8 +461,6 @@ static struct tracer preemptirqsoff_tracer __read_mostly =
489 .reset = irqsoff_tracer_reset, 461 .reset = irqsoff_tracer_reset,
490 .start = irqsoff_tracer_start, 462 .start = irqsoff_tracer_start,
491 .stop = irqsoff_tracer_stop, 463 .stop = irqsoff_tracer_stop,
492 .open = irqsoff_tracer_open,
493 .close = irqsoff_tracer_close,
494 .print_max = 1, 464 .print_max = 1,
495#ifdef CONFIG_FTRACE_SELFTEST 465#ifdef CONFIG_FTRACE_SELFTEST
496 .selftest = trace_selftest_startup_preemptirqsoff, 466 .selftest = trace_selftest_startup_preemptirqsoff,
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index fffcb069f1dc..ec78e244242e 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -9,8 +9,10 @@
9#include <linux/kernel.h> 9#include <linux/kernel.h>
10#include <linux/mmiotrace.h> 10#include <linux/mmiotrace.h>
11#include <linux/pci.h> 11#include <linux/pci.h>
12#include <asm/atomic.h>
12 13
13#include "trace.h" 14#include "trace.h"
15#include "trace_output.h"
14 16
15struct header_iter { 17struct header_iter {
16 struct pci_dev *dev; 18 struct pci_dev *dev;
@@ -19,6 +21,7 @@ struct header_iter {
19static struct trace_array *mmio_trace_array; 21static struct trace_array *mmio_trace_array;
20static bool overrun_detected; 22static bool overrun_detected;
21static unsigned long prev_overruns; 23static unsigned long prev_overruns;
24static atomic_t dropped_count;
22 25
23static void mmio_reset_data(struct trace_array *tr) 26static void mmio_reset_data(struct trace_array *tr)
24{ 27{
@@ -121,11 +124,11 @@ static void mmio_close(struct trace_iterator *iter)
121 124
122static unsigned long count_overruns(struct trace_iterator *iter) 125static unsigned long count_overruns(struct trace_iterator *iter)
123{ 126{
124 unsigned long cnt = 0; 127 unsigned long cnt = atomic_xchg(&dropped_count, 0);
125 unsigned long over = ring_buffer_overruns(iter->tr->buffer); 128 unsigned long over = ring_buffer_overruns(iter->tr->buffer);
126 129
127 if (over > prev_overruns) 130 if (over > prev_overruns)
128 cnt = over - prev_overruns; 131 cnt += over - prev_overruns;
129 prev_overruns = over; 132 prev_overruns = over;
130 return cnt; 133 return cnt;
131} 134}
@@ -181,21 +184,22 @@ static enum print_line_t mmio_print_rw(struct trace_iterator *iter)
181 switch (rw->opcode) { 184 switch (rw->opcode) {
182 case MMIO_READ: 185 case MMIO_READ:
183 ret = trace_seq_printf(s, 186 ret = trace_seq_printf(s,
184 "R %d %lu.%06lu %d 0x%llx 0x%lx 0x%lx %d\n", 187 "R %d %u.%06lu %d 0x%llx 0x%lx 0x%lx %d\n",
185 rw->width, secs, usec_rem, rw->map_id, 188 rw->width, secs, usec_rem, rw->map_id,
186 (unsigned long long)rw->phys, 189 (unsigned long long)rw->phys,
187 rw->value, rw->pc, 0); 190 rw->value, rw->pc, 0);
188 break; 191 break;
189 case MMIO_WRITE: 192 case MMIO_WRITE:
190 ret = trace_seq_printf(s, 193 ret = trace_seq_printf(s,
191 "W %d %lu.%06lu %d 0x%llx 0x%lx 0x%lx %d\n", 194 "W %d %u.%06lu %d 0x%llx 0x%lx 0x%lx %d\n",
192 rw->width, secs, usec_rem, rw->map_id, 195 rw->width, secs, usec_rem, rw->map_id,
193 (unsigned long long)rw->phys, 196 (unsigned long long)rw->phys,
194 rw->value, rw->pc, 0); 197 rw->value, rw->pc, 0);
195 break; 198 break;
196 case MMIO_UNKNOWN_OP: 199 case MMIO_UNKNOWN_OP:
197 ret = trace_seq_printf(s, 200 ret = trace_seq_printf(s,
198 "UNKNOWN %lu.%06lu %d 0x%llx %02x,%02x,%02x 0x%lx %d\n", 201 "UNKNOWN %u.%06lu %d 0x%llx %02lx,%02lx,"
202 "%02lx 0x%lx %d\n",
199 secs, usec_rem, rw->map_id, 203 secs, usec_rem, rw->map_id,
200 (unsigned long long)rw->phys, 204 (unsigned long long)rw->phys,
201 (rw->value >> 16) & 0xff, (rw->value >> 8) & 0xff, 205 (rw->value >> 16) & 0xff, (rw->value >> 8) & 0xff,
@@ -227,14 +231,14 @@ static enum print_line_t mmio_print_map(struct trace_iterator *iter)
227 switch (m->opcode) { 231 switch (m->opcode) {
228 case MMIO_PROBE: 232 case MMIO_PROBE:
229 ret = trace_seq_printf(s, 233 ret = trace_seq_printf(s,
230 "MAP %lu.%06lu %d 0x%llx 0x%lx 0x%lx 0x%lx %d\n", 234 "MAP %u.%06lu %d 0x%llx 0x%lx 0x%lx 0x%lx %d\n",
231 secs, usec_rem, m->map_id, 235 secs, usec_rem, m->map_id,
232 (unsigned long long)m->phys, m->virt, m->len, 236 (unsigned long long)m->phys, m->virt, m->len,
233 0UL, 0); 237 0UL, 0);
234 break; 238 break;
235 case MMIO_UNPROBE: 239 case MMIO_UNPROBE:
236 ret = trace_seq_printf(s, 240 ret = trace_seq_printf(s,
237 "UNMAP %lu.%06lu %d 0x%lx %d\n", 241 "UNMAP %u.%06lu %d 0x%lx %d\n",
238 secs, usec_rem, m->map_id, 0UL, 0); 242 secs, usec_rem, m->map_id, 0UL, 0);
239 break; 243 break;
240 default: 244 default:
@@ -258,13 +262,10 @@ static enum print_line_t mmio_print_mark(struct trace_iterator *iter)
258 int ret; 262 int ret;
259 263
260 /* The trailing newline must be in the message. */ 264 /* The trailing newline must be in the message. */
261 ret = trace_seq_printf(s, "MARK %lu.%06lu %s", secs, usec_rem, msg); 265 ret = trace_seq_printf(s, "MARK %u.%06lu %s", secs, usec_rem, msg);
262 if (!ret) 266 if (!ret)
263 return TRACE_TYPE_PARTIAL_LINE; 267 return TRACE_TYPE_PARTIAL_LINE;
264 268
265 if (entry->flags & TRACE_FLAG_CONT)
266 trace_seq_print_cont(s, iter);
267
268 return TRACE_TYPE_HANDLED; 269 return TRACE_TYPE_HANDLED;
269} 270}
270 271
@@ -310,8 +311,10 @@ static void __trace_mmiotrace_rw(struct trace_array *tr,
310 311
311 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), 312 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
312 &irq_flags); 313 &irq_flags);
313 if (!event) 314 if (!event) {
315 atomic_inc(&dropped_count);
314 return; 316 return;
317 }
315 entry = ring_buffer_event_data(event); 318 entry = ring_buffer_event_data(event);
316 tracing_generic_entry_update(&entry->ent, 0, preempt_count()); 319 tracing_generic_entry_update(&entry->ent, 0, preempt_count());
317 entry->ent.type = TRACE_MMIO_RW; 320 entry->ent.type = TRACE_MMIO_RW;
@@ -338,8 +341,10 @@ static void __trace_mmiotrace_map(struct trace_array *tr,
338 341
339 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), 342 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
340 &irq_flags); 343 &irq_flags);
341 if (!event) 344 if (!event) {
345 atomic_inc(&dropped_count);
342 return; 346 return;
347 }
343 entry = ring_buffer_event_data(event); 348 entry = ring_buffer_event_data(event);
344 tracing_generic_entry_update(&entry->ent, 0, preempt_count()); 349 tracing_generic_entry_update(&entry->ent, 0, preempt_count());
345 entry->ent.type = TRACE_MMIO_MAP; 350 entry->ent.type = TRACE_MMIO_MAP;
diff --git a/kernel/trace/trace_nop.c b/kernel/trace/trace_nop.c
index b9767acd30ac..087b6cbf4ea5 100644
--- a/kernel/trace/trace_nop.c
+++ b/kernel/trace/trace_nop.c
@@ -47,12 +47,8 @@ static void stop_nop_trace(struct trace_array *tr)
47 47
48static int nop_trace_init(struct trace_array *tr) 48static int nop_trace_init(struct trace_array *tr)
49{ 49{
50 int cpu;
51 ctx_trace = tr; 50 ctx_trace = tr;
52 51 tracing_reset_online_cpus(tr);
53 for_each_online_cpu(cpu)
54 tracing_reset(tr, cpu);
55
56 start_nop_trace(tr); 52 start_nop_trace(tr);
57 return 0; 53 return 0;
58} 54}
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
new file mode 100644
index 000000000000..b7380eee9fa1
--- /dev/null
+++ b/kernel/trace/trace_output.c
@@ -0,0 +1,910 @@
1/*
2 * trace_output.c
3 *
4 * Copyright (C) 2008 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
5 *
6 */
7
8#include <linux/module.h>
9#include <linux/mutex.h>
10#include <linux/ftrace.h>
11
12#include "trace_output.h"
13
14/* must be a power of 2 */
15#define EVENT_HASHSIZE 128
16
17static DEFINE_MUTEX(trace_event_mutex);
18static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly;
19
20static int next_event_type = __TRACE_LAST_TYPE + 1;
21
22/**
23 * trace_seq_printf - sequence printing of trace information
24 * @s: trace sequence descriptor
25 * @fmt: printf format string
26 *
27 * The tracer may use either sequence operations or its own
28 * copy to user routines. To simplify formating of a trace
29 * trace_seq_printf is used to store strings into a special
30 * buffer (@s). Then the output may be either used by
31 * the sequencer or pulled into another buffer.
32 */
33int
34trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
35{
36 int len = (PAGE_SIZE - 1) - s->len;
37 va_list ap;
38 int ret;
39
40 if (!len)
41 return 0;
42
43 va_start(ap, fmt);
44 ret = vsnprintf(s->buffer + s->len, len, fmt, ap);
45 va_end(ap);
46
47 /* If we can't write it all, don't bother writing anything */
48 if (ret >= len)
49 return 0;
50
51 s->len += ret;
52
53 return len;
54}
55
56/**
57 * trace_seq_puts - trace sequence printing of simple string
58 * @s: trace sequence descriptor
59 * @str: simple string to record
60 *
61 * The tracer may use either the sequence operations or its own
62 * copy to user routines. This function records a simple string
63 * into a special buffer (@s) for later retrieval by a sequencer
64 * or other mechanism.
65 */
66int trace_seq_puts(struct trace_seq *s, const char *str)
67{
68 int len = strlen(str);
69
70 if (len > ((PAGE_SIZE - 1) - s->len))
71 return 0;
72
73 memcpy(s->buffer + s->len, str, len);
74 s->len += len;
75
76 return len;
77}
78
79int trace_seq_putc(struct trace_seq *s, unsigned char c)
80{
81 if (s->len >= (PAGE_SIZE - 1))
82 return 0;
83
84 s->buffer[s->len++] = c;
85
86 return 1;
87}
88
89int trace_seq_putmem(struct trace_seq *s, void *mem, size_t len)
90{
91 if (len > ((PAGE_SIZE - 1) - s->len))
92 return 0;
93
94 memcpy(s->buffer + s->len, mem, len);
95 s->len += len;
96
97 return len;
98}
99
100int trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len)
101{
102 unsigned char hex[HEX_CHARS];
103 unsigned char *data = mem;
104 int i, j;
105
106#ifdef __BIG_ENDIAN
107 for (i = 0, j = 0; i < len; i++) {
108#else
109 for (i = len-1, j = 0; i >= 0; i--) {
110#endif
111 hex[j++] = hex_asc_hi(data[i]);
112 hex[j++] = hex_asc_lo(data[i]);
113 }
114 hex[j++] = ' ';
115
116 return trace_seq_putmem(s, hex, j);
117}
118
119int trace_seq_path(struct trace_seq *s, struct path *path)
120{
121 unsigned char *p;
122
123 if (s->len >= (PAGE_SIZE - 1))
124 return 0;
125 p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len);
126 if (!IS_ERR(p)) {
127 p = mangle_path(s->buffer + s->len, p, "\n");
128 if (p) {
129 s->len = p - s->buffer;
130 return 1;
131 }
132 } else {
133 s->buffer[s->len++] = '?';
134 return 1;
135 }
136
137 return 0;
138}
139
140#ifdef CONFIG_KRETPROBES
141static inline const char *kretprobed(const char *name)
142{
143 static const char tramp_name[] = "kretprobe_trampoline";
144 int size = sizeof(tramp_name);
145
146 if (strncmp(tramp_name, name, size) == 0)
147 return "[unknown/kretprobe'd]";
148 return name;
149}
150#else
151static inline const char *kretprobed(const char *name)
152{
153 return name;
154}
155#endif /* CONFIG_KRETPROBES */
156
157static int
158seq_print_sym_short(struct trace_seq *s, const char *fmt, unsigned long address)
159{
160#ifdef CONFIG_KALLSYMS
161 char str[KSYM_SYMBOL_LEN];
162 const char *name;
163
164 kallsyms_lookup(address, NULL, NULL, NULL, str);
165
166 name = kretprobed(str);
167
168 return trace_seq_printf(s, fmt, name);
169#endif
170 return 1;
171}
172
173static int
174seq_print_sym_offset(struct trace_seq *s, const char *fmt,
175 unsigned long address)
176{
177#ifdef CONFIG_KALLSYMS
178 char str[KSYM_SYMBOL_LEN];
179 const char *name;
180
181 sprint_symbol(str, address);
182 name = kretprobed(str);
183
184 return trace_seq_printf(s, fmt, name);
185#endif
186 return 1;
187}
188
189#ifndef CONFIG_64BIT
190# define IP_FMT "%08lx"
191#else
192# define IP_FMT "%016lx"
193#endif
194
195int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
196 unsigned long ip, unsigned long sym_flags)
197{
198 struct file *file = NULL;
199 unsigned long vmstart = 0;
200 int ret = 1;
201
202 if (mm) {
203 const struct vm_area_struct *vma;
204
205 down_read(&mm->mmap_sem);
206 vma = find_vma(mm, ip);
207 if (vma) {
208 file = vma->vm_file;
209 vmstart = vma->vm_start;
210 }
211 if (file) {
212 ret = trace_seq_path(s, &file->f_path);
213 if (ret)
214 ret = trace_seq_printf(s, "[+0x%lx]",
215 ip - vmstart);
216 }
217 up_read(&mm->mmap_sem);
218 }
219 if (ret && ((sym_flags & TRACE_ITER_SYM_ADDR) || !file))
220 ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
221 return ret;
222}
223
224int
225seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s,
226 unsigned long sym_flags)
227{
228 struct mm_struct *mm = NULL;
229 int ret = 1;
230 unsigned int i;
231
232 if (trace_flags & TRACE_ITER_SYM_USEROBJ) {
233 struct task_struct *task;
234 /*
235 * we do the lookup on the thread group leader,
236 * since individual threads might have already quit!
237 */
238 rcu_read_lock();
239 task = find_task_by_vpid(entry->ent.tgid);
240 if (task)
241 mm = get_task_mm(task);
242 rcu_read_unlock();
243 }
244
245 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
246 unsigned long ip = entry->caller[i];
247
248 if (ip == ULONG_MAX || !ret)
249 break;
250 if (i && ret)
251 ret = trace_seq_puts(s, " <- ");
252 if (!ip) {
253 if (ret)
254 ret = trace_seq_puts(s, "??");
255 continue;
256 }
257 if (!ret)
258 break;
259 if (ret)
260 ret = seq_print_user_ip(s, mm, ip, sym_flags);
261 }
262
263 if (mm)
264 mmput(mm);
265 return ret;
266}
267
268int
269seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
270{
271 int ret;
272
273 if (!ip)
274 return trace_seq_printf(s, "0");
275
276 if (sym_flags & TRACE_ITER_SYM_OFFSET)
277 ret = seq_print_sym_offset(s, "%s", ip);
278 else
279 ret = seq_print_sym_short(s, "%s", ip);
280
281 if (!ret)
282 return 0;
283
284 if (sym_flags & TRACE_ITER_SYM_ADDR)
285 ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
286 return ret;
287}
288
289static int
290lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
291{
292 int hardirq, softirq;
293 char *comm;
294
295 comm = trace_find_cmdline(entry->pid);
296 hardirq = entry->flags & TRACE_FLAG_HARDIRQ;
297 softirq = entry->flags & TRACE_FLAG_SOFTIRQ;
298
299 if (!trace_seq_printf(s, "%8.8s-%-5d %3d%c%c%c",
300 comm, entry->pid, cpu,
301 (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' :
302 (entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ?
303 'X' : '.',
304 (entry->flags & TRACE_FLAG_NEED_RESCHED) ?
305 'N' : '.',
306 (hardirq && softirq) ? 'H' :
307 hardirq ? 'h' : softirq ? 's' : '.'))
308 return 0;
309
310 if (entry->preempt_count)
311 return trace_seq_printf(s, "%x", entry->preempt_count);
312 return trace_seq_puts(s, ".");
313}
314
315static unsigned long preempt_mark_thresh = 100;
316
317static int
318lat_print_timestamp(struct trace_seq *s, u64 abs_usecs,
319 unsigned long rel_usecs)
320{
321 return trace_seq_printf(s, " %4lldus%c: ", abs_usecs,
322 rel_usecs > preempt_mark_thresh ? '!' :
323 rel_usecs > 1 ? '+' : ' ');
324}
325
326int trace_print_context(struct trace_iterator *iter)
327{
328 struct trace_seq *s = &iter->seq;
329 struct trace_entry *entry = iter->ent;
330 char *comm = trace_find_cmdline(entry->pid);
331 unsigned long long t = ns2usecs(iter->ts);
332 unsigned long usec_rem = do_div(t, USEC_PER_SEC);
333 unsigned long secs = (unsigned long)t;
334
335 return trace_seq_printf(s, "%16s-%-5d [%03d] %5lu.%06lu: ",
336 comm, entry->pid, entry->cpu, secs, usec_rem);
337}
338
339int trace_print_lat_context(struct trace_iterator *iter)
340{
341 u64 next_ts;
342 int ret;
343 struct trace_seq *s = &iter->seq;
344 struct trace_entry *entry = iter->ent,
345 *next_entry = trace_find_next_entry(iter, NULL,
346 &next_ts);
347 unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE);
348 unsigned long abs_usecs = ns2usecs(iter->ts - iter->tr->time_start);
349 unsigned long rel_usecs;
350
351 if (!next_entry)
352 next_ts = iter->ts;
353 rel_usecs = ns2usecs(next_ts - iter->ts);
354
355 if (verbose) {
356 char *comm = trace_find_cmdline(entry->pid);
357 ret = trace_seq_printf(s, "%16s %5d %3d %d %08x %08lx [%08lx]"
358 " %ld.%03ldms (+%ld.%03ldms): ", comm,
359 entry->pid, entry->cpu, entry->flags,
360 entry->preempt_count, iter->idx,
361 ns2usecs(iter->ts),
362 abs_usecs / USEC_PER_MSEC,
363 abs_usecs % USEC_PER_MSEC,
364 rel_usecs / USEC_PER_MSEC,
365 rel_usecs % USEC_PER_MSEC);
366 } else {
367 ret = lat_print_generic(s, entry, entry->cpu);
368 if (ret)
369 ret = lat_print_timestamp(s, abs_usecs, rel_usecs);
370 }
371
372 return ret;
373}
374
375static const char state_to_char[] = TASK_STATE_TO_CHAR_STR;
376
377static int task_state_char(unsigned long state)
378{
379 int bit = state ? __ffs(state) + 1 : 0;
380
381 return bit < sizeof(state_to_char) - 1 ? state_to_char[bit] : '?';
382}
383
384/**
385 * ftrace_find_event - find a registered event
386 * @type: the type of event to look for
387 *
388 * Returns an event of type @type otherwise NULL
389 */
390struct trace_event *ftrace_find_event(int type)
391{
392 struct trace_event *event;
393 struct hlist_node *n;
394 unsigned key;
395
396 key = type & (EVENT_HASHSIZE - 1);
397
398 hlist_for_each_entry_rcu(event, n, &event_hash[key], node) {
399 if (event->type == type)
400 return event;
401 }
402
403 return NULL;
404}
405
406/**
407 * register_ftrace_event - register output for an event type
408 * @event: the event type to register
409 *
410 * Event types are stored in a hash and this hash is used to
411 * find a way to print an event. If the @event->type is set
412 * then it will use that type, otherwise it will assign a
413 * type to use.
414 *
415 * If you assign your own type, please make sure it is added
416 * to the trace_type enum in trace.h, to avoid collisions
417 * with the dynamic types.
418 *
419 * Returns the event type number or zero on error.
420 */
421int register_ftrace_event(struct trace_event *event)
422{
423 unsigned key;
424 int ret = 0;
425
426 mutex_lock(&trace_event_mutex);
427
428 if (!event->type)
429 event->type = next_event_type++;
430 else if (event->type > __TRACE_LAST_TYPE) {
431 printk(KERN_WARNING "Need to add type to trace.h\n");
432 WARN_ON(1);
433 }
434
435 if (ftrace_find_event(event->type))
436 goto out;
437
438 key = event->type & (EVENT_HASHSIZE - 1);
439
440 hlist_add_head_rcu(&event->node, &event_hash[key]);
441
442 ret = event->type;
443 out:
444 mutex_unlock(&trace_event_mutex);
445
446 return ret;
447}
448
449/**
450 * unregister_ftrace_event - remove a no longer used event
451 * @event: the event to remove
452 */
453int unregister_ftrace_event(struct trace_event *event)
454{
455 mutex_lock(&trace_event_mutex);
456 hlist_del(&event->node);
457 mutex_unlock(&trace_event_mutex);
458
459 return 0;
460}
461
462/*
463 * Standard events
464 */
465
466enum print_line_t trace_nop_print(struct trace_iterator *iter, int flags)
467{
468 return TRACE_TYPE_HANDLED;
469}
470
471/* TRACE_FN */
472static enum print_line_t trace_fn_latency(struct trace_iterator *iter,
473 int flags)
474{
475 struct ftrace_entry *field;
476 struct trace_seq *s = &iter->seq;
477
478 trace_assign_type(field, iter->ent);
479
480 if (!seq_print_ip_sym(s, field->ip, flags))
481 goto partial;
482 if (!trace_seq_puts(s, " ("))
483 goto partial;
484 if (!seq_print_ip_sym(s, field->parent_ip, flags))
485 goto partial;
486 if (!trace_seq_puts(s, ")\n"))
487 goto partial;
488
489 return TRACE_TYPE_HANDLED;
490
491 partial:
492 return TRACE_TYPE_PARTIAL_LINE;
493}
494
495static enum print_line_t trace_fn_trace(struct trace_iterator *iter, int flags)
496{
497 struct ftrace_entry *field;
498 struct trace_seq *s = &iter->seq;
499
500 trace_assign_type(field, iter->ent);
501
502 if (!seq_print_ip_sym(s, field->ip, flags))
503 goto partial;
504
505 if ((flags & TRACE_ITER_PRINT_PARENT) && field->parent_ip) {
506 if (!trace_seq_printf(s, " <-"))
507 goto partial;
508 if (!seq_print_ip_sym(s,
509 field->parent_ip,
510 flags))
511 goto partial;
512 }
513 if (!trace_seq_printf(s, "\n"))
514 goto partial;
515
516 return TRACE_TYPE_HANDLED;
517
518 partial:
519 return TRACE_TYPE_PARTIAL_LINE;
520}
521
522static enum print_line_t trace_fn_raw(struct trace_iterator *iter, int flags)
523{
524 struct ftrace_entry *field;
525
526 trace_assign_type(field, iter->ent);
527
528 if (!trace_seq_printf(&iter->seq, "%lx %lx\n",
529 field->ip,
530 field->parent_ip))
531 return TRACE_TYPE_PARTIAL_LINE;
532
533 return TRACE_TYPE_HANDLED;
534}
535
536static enum print_line_t trace_fn_hex(struct trace_iterator *iter, int flags)
537{
538 struct ftrace_entry *field;
539 struct trace_seq *s = &iter->seq;
540
541 trace_assign_type(field, iter->ent);
542
543 SEQ_PUT_HEX_FIELD_RET(s, field->ip);
544 SEQ_PUT_HEX_FIELD_RET(s, field->parent_ip);
545
546 return TRACE_TYPE_HANDLED;
547}
548
549static enum print_line_t trace_fn_bin(struct trace_iterator *iter, int flags)
550{
551 struct ftrace_entry *field;
552 struct trace_seq *s = &iter->seq;
553
554 trace_assign_type(field, iter->ent);
555
556 SEQ_PUT_FIELD_RET(s, field->ip);
557 SEQ_PUT_FIELD_RET(s, field->parent_ip);
558
559 return TRACE_TYPE_HANDLED;
560}
561
562static struct trace_event trace_fn_event = {
563 .type = TRACE_FN,
564 .trace = trace_fn_trace,
565 .latency_trace = trace_fn_latency,
566 .raw = trace_fn_raw,
567 .hex = trace_fn_hex,
568 .binary = trace_fn_bin,
569};
570
571/* TRACE_CTX an TRACE_WAKE */
572static enum print_line_t trace_ctxwake_print(struct trace_iterator *iter,
573 char *delim)
574{
575 struct ctx_switch_entry *field;
576 char *comm;
577 int S, T;
578
579 trace_assign_type(field, iter->ent);
580
581 T = task_state_char(field->next_state);
582 S = task_state_char(field->prev_state);
583 comm = trace_find_cmdline(field->next_pid);
584 if (!trace_seq_printf(&iter->seq,
585 " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n",
586 field->prev_pid,
587 field->prev_prio,
588 S, delim,
589 field->next_cpu,
590 field->next_pid,
591 field->next_prio,
592 T, comm))
593 return TRACE_TYPE_PARTIAL_LINE;
594
595 return TRACE_TYPE_HANDLED;
596}
597
598static enum print_line_t trace_ctx_print(struct trace_iterator *iter, int flags)
599{
600 return trace_ctxwake_print(iter, "==>");
601}
602
603static enum print_line_t trace_wake_print(struct trace_iterator *iter,
604 int flags)
605{
606 return trace_ctxwake_print(iter, " +");
607}
608
609static int trace_ctxwake_raw(struct trace_iterator *iter, char S)
610{
611 struct ctx_switch_entry *field;
612 int T;
613
614 trace_assign_type(field, iter->ent);
615
616 if (!S)
617 task_state_char(field->prev_state);
618 T = task_state_char(field->next_state);
619 if (!trace_seq_printf(&iter->seq, "%d %d %c %d %d %d %c\n",
620 field->prev_pid,
621 field->prev_prio,
622 S,
623 field->next_cpu,
624 field->next_pid,
625 field->next_prio,
626 T))
627 return TRACE_TYPE_PARTIAL_LINE;
628
629 return TRACE_TYPE_HANDLED;
630}
631
632static enum print_line_t trace_ctx_raw(struct trace_iterator *iter, int flags)
633{
634 return trace_ctxwake_raw(iter, 0);
635}
636
637static enum print_line_t trace_wake_raw(struct trace_iterator *iter, int flags)
638{
639 return trace_ctxwake_raw(iter, '+');
640}
641
642
643static int trace_ctxwake_hex(struct trace_iterator *iter, char S)
644{
645 struct ctx_switch_entry *field;
646 struct trace_seq *s = &iter->seq;
647 int T;
648
649 trace_assign_type(field, iter->ent);
650
651 if (!S)
652 task_state_char(field->prev_state);
653 T = task_state_char(field->next_state);
654
655 SEQ_PUT_HEX_FIELD_RET(s, field->prev_pid);
656 SEQ_PUT_HEX_FIELD_RET(s, field->prev_prio);
657 SEQ_PUT_HEX_FIELD_RET(s, S);
658 SEQ_PUT_HEX_FIELD_RET(s, field->next_cpu);
659 SEQ_PUT_HEX_FIELD_RET(s, field->next_pid);
660 SEQ_PUT_HEX_FIELD_RET(s, field->next_prio);
661 SEQ_PUT_HEX_FIELD_RET(s, T);
662
663 return TRACE_TYPE_HANDLED;
664}
665
666static enum print_line_t trace_ctx_hex(struct trace_iterator *iter, int flags)
667{
668 return trace_ctxwake_hex(iter, 0);
669}
670
671static enum print_line_t trace_wake_hex(struct trace_iterator *iter, int flags)
672{
673 return trace_ctxwake_hex(iter, '+');
674}
675
676static enum print_line_t trace_ctxwake_bin(struct trace_iterator *iter,
677 int flags)
678{
679 struct ctx_switch_entry *field;
680 struct trace_seq *s = &iter->seq;
681
682 trace_assign_type(field, iter->ent);
683
684 SEQ_PUT_FIELD_RET(s, field->prev_pid);
685 SEQ_PUT_FIELD_RET(s, field->prev_prio);
686 SEQ_PUT_FIELD_RET(s, field->prev_state);
687 SEQ_PUT_FIELD_RET(s, field->next_pid);
688 SEQ_PUT_FIELD_RET(s, field->next_prio);
689 SEQ_PUT_FIELD_RET(s, field->next_state);
690
691 return TRACE_TYPE_HANDLED;
692}
693
694static struct trace_event trace_ctx_event = {
695 .type = TRACE_CTX,
696 .trace = trace_ctx_print,
697 .latency_trace = trace_ctx_print,
698 .raw = trace_ctx_raw,
699 .hex = trace_ctx_hex,
700 .binary = trace_ctxwake_bin,
701};
702
703static struct trace_event trace_wake_event = {
704 .type = TRACE_WAKE,
705 .trace = trace_wake_print,
706 .latency_trace = trace_wake_print,
707 .raw = trace_wake_raw,
708 .hex = trace_wake_hex,
709 .binary = trace_ctxwake_bin,
710};
711
712/* TRACE_SPECIAL */
713static enum print_line_t trace_special_print(struct trace_iterator *iter,
714 int flags)
715{
716 struct special_entry *field;
717
718 trace_assign_type(field, iter->ent);
719
720 if (!trace_seq_printf(&iter->seq, "# %ld %ld %ld\n",
721 field->arg1,
722 field->arg2,
723 field->arg3))
724 return TRACE_TYPE_PARTIAL_LINE;
725
726 return TRACE_TYPE_HANDLED;
727}
728
729static enum print_line_t trace_special_hex(struct trace_iterator *iter,
730 int flags)
731{
732 struct special_entry *field;
733 struct trace_seq *s = &iter->seq;
734
735 trace_assign_type(field, iter->ent);
736
737 SEQ_PUT_HEX_FIELD_RET(s, field->arg1);
738 SEQ_PUT_HEX_FIELD_RET(s, field->arg2);
739 SEQ_PUT_HEX_FIELD_RET(s, field->arg3);
740
741 return TRACE_TYPE_HANDLED;
742}
743
744static enum print_line_t trace_special_bin(struct trace_iterator *iter,
745 int flags)
746{
747 struct special_entry *field;
748 struct trace_seq *s = &iter->seq;
749
750 trace_assign_type(field, iter->ent);
751
752 SEQ_PUT_FIELD_RET(s, field->arg1);
753 SEQ_PUT_FIELD_RET(s, field->arg2);
754 SEQ_PUT_FIELD_RET(s, field->arg3);
755
756 return TRACE_TYPE_HANDLED;
757}
758
759static struct trace_event trace_special_event = {
760 .type = TRACE_SPECIAL,
761 .trace = trace_special_print,
762 .latency_trace = trace_special_print,
763 .raw = trace_special_print,
764 .hex = trace_special_hex,
765 .binary = trace_special_bin,
766};
767
768/* TRACE_STACK */
769
770static enum print_line_t trace_stack_print(struct trace_iterator *iter,
771 int flags)
772{
773 struct stack_entry *field;
774 struct trace_seq *s = &iter->seq;
775 int i;
776
777 trace_assign_type(field, iter->ent);
778
779 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
780 if (i) {
781 if (!trace_seq_puts(s, " <= "))
782 goto partial;
783
784 if (!seq_print_ip_sym(s, field->caller[i], flags))
785 goto partial;
786 }
787 if (!trace_seq_puts(s, "\n"))
788 goto partial;
789 }
790
791 return TRACE_TYPE_HANDLED;
792
793 partial:
794 return TRACE_TYPE_PARTIAL_LINE;
795}
796
797static struct trace_event trace_stack_event = {
798 .type = TRACE_STACK,
799 .trace = trace_stack_print,
800 .latency_trace = trace_stack_print,
801 .raw = trace_special_print,
802 .hex = trace_special_hex,
803 .binary = trace_special_bin,
804};
805
806/* TRACE_USER_STACK */
807static enum print_line_t trace_user_stack_print(struct trace_iterator *iter,
808 int flags)
809{
810 struct userstack_entry *field;
811 struct trace_seq *s = &iter->seq;
812
813 trace_assign_type(field, iter->ent);
814
815 if (!seq_print_userip_objs(field, s, flags))
816 goto partial;
817
818 if (!trace_seq_putc(s, '\n'))
819 goto partial;
820
821 return TRACE_TYPE_HANDLED;
822
823 partial:
824 return TRACE_TYPE_PARTIAL_LINE;
825}
826
827static struct trace_event trace_user_stack_event = {
828 .type = TRACE_USER_STACK,
829 .trace = trace_user_stack_print,
830 .latency_trace = trace_user_stack_print,
831 .raw = trace_special_print,
832 .hex = trace_special_hex,
833 .binary = trace_special_bin,
834};
835
836/* TRACE_PRINT */
837static enum print_line_t trace_print_print(struct trace_iterator *iter,
838 int flags)
839{
840 struct print_entry *field;
841 struct trace_seq *s = &iter->seq;
842
843 trace_assign_type(field, iter->ent);
844
845 if (!seq_print_ip_sym(s, field->ip, flags))
846 goto partial;
847
848 if (!trace_seq_printf(s, ": %s", field->buf))
849 goto partial;
850
851 return TRACE_TYPE_HANDLED;
852
853 partial:
854 return TRACE_TYPE_PARTIAL_LINE;
855}
856
857static enum print_line_t trace_print_raw(struct trace_iterator *iter, int flags)
858{
859 struct print_entry *field;
860
861 trace_assign_type(field, iter->ent);
862
863 if (!trace_seq_printf(&iter->seq, "# %lx %s", field->ip, field->buf))
864 goto partial;
865
866 return TRACE_TYPE_HANDLED;
867
868 partial:
869 return TRACE_TYPE_PARTIAL_LINE;
870}
871
872static struct trace_event trace_print_event = {
873 .type = TRACE_PRINT,
874 .trace = trace_print_print,
875 .latency_trace = trace_print_print,
876 .raw = trace_print_raw,
877 .hex = trace_nop_print,
878 .binary = trace_nop_print,
879};
880
881static struct trace_event *events[] __initdata = {
882 &trace_fn_event,
883 &trace_ctx_event,
884 &trace_wake_event,
885 &trace_special_event,
886 &trace_stack_event,
887 &trace_user_stack_event,
888 &trace_print_event,
889 NULL
890};
891
892__init static int init_events(void)
893{
894 struct trace_event *event;
895 int i, ret;
896
897 for (i = 0; events[i]; i++) {
898 event = events[i];
899
900 ret = register_ftrace_event(event);
901 if (!ret) {
902 printk(KERN_WARNING "event %d failed to register\n",
903 event->type);
904 WARN_ON_ONCE(1);
905 }
906 }
907
908 return 0;
909}
910device_initcall(init_events);
diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h
new file mode 100644
index 000000000000..551a25a72217
--- /dev/null
+++ b/kernel/trace/trace_output.h
@@ -0,0 +1,62 @@
1#ifndef __TRACE_EVENTS_H
2#define __TRACE_EVENTS_H
3
4#include "trace.h"
5
6typedef enum print_line_t (*trace_print_func)(struct trace_iterator *iter,
7 int flags);
8
9struct trace_event {
10 struct hlist_node node;
11 int type;
12 trace_print_func trace;
13 trace_print_func latency_trace;
14 trace_print_func raw;
15 trace_print_func hex;
16 trace_print_func binary;
17};
18
19extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
20 __attribute__ ((format (printf, 2, 3)));
21extern int
22seq_print_ip_sym(struct trace_seq *s, unsigned long ip,
23 unsigned long sym_flags);
24extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
25 size_t cnt);
26int trace_seq_puts(struct trace_seq *s, const char *str);
27int trace_seq_putc(struct trace_seq *s, unsigned char c);
28int trace_seq_putmem(struct trace_seq *s, void *mem, size_t len);
29int trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len);
30int trace_seq_path(struct trace_seq *s, struct path *path);
31int seq_print_userip_objs(const struct userstack_entry *entry,
32 struct trace_seq *s, unsigned long sym_flags);
33int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
34 unsigned long ip, unsigned long sym_flags);
35
36int trace_print_context(struct trace_iterator *iter);
37int trace_print_lat_context(struct trace_iterator *iter);
38
39struct trace_event *ftrace_find_event(int type);
40int register_ftrace_event(struct trace_event *event);
41int unregister_ftrace_event(struct trace_event *event);
42
43enum print_line_t trace_nop_print(struct trace_iterator *iter, int flags);
44
45#define MAX_MEMHEX_BYTES 8
46#define HEX_CHARS (MAX_MEMHEX_BYTES*2 + 1)
47
48#define SEQ_PUT_FIELD_RET(s, x) \
49do { \
50 if (!trace_seq_putmem(s, &(x), sizeof(x))) \
51 return TRACE_TYPE_PARTIAL_LINE; \
52} while (0)
53
54#define SEQ_PUT_HEX_FIELD_RET(s, x) \
55do { \
56 BUILD_BUG_ON(sizeof(x) > MAX_MEMHEX_BYTES); \
57 if (!trace_seq_putmem_hex(s, &(x), sizeof(x))) \
58 return TRACE_TYPE_PARTIAL_LINE; \
59} while (0)
60
61#endif
62
diff --git a/kernel/trace/trace_power.c b/kernel/trace/trace_power.c
index 7bda248daf55..faa6ab7a1f5c 100644
--- a/kernel/trace/trace_power.c
+++ b/kernel/trace/trace_power.c
@@ -16,6 +16,7 @@
16#include <linux/module.h> 16#include <linux/module.h>
17 17
18#include "trace.h" 18#include "trace.h"
19#include "trace_output.h"
19 20
20static struct trace_array *power_trace; 21static struct trace_array *power_trace;
21static int __read_mostly trace_power_enabled; 22static int __read_mostly trace_power_enabled;
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 42ae1e77b6b3..a48c9b4b0c85 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -25,6 +25,7 @@ static int __read_mostly tracer_enabled;
25static struct task_struct *wakeup_task; 25static struct task_struct *wakeup_task;
26static int wakeup_cpu; 26static int wakeup_cpu;
27static unsigned wakeup_prio = -1; 27static unsigned wakeup_prio = -1;
28static int wakeup_rt;
28 29
29static raw_spinlock_t wakeup_lock = 30static raw_spinlock_t wakeup_lock =
30 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 31 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
@@ -152,6 +153,7 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
152 goto out_unlock; 153 goto out_unlock;
153 154
154 trace_function(wakeup_trace, data, CALLER_ADDR1, CALLER_ADDR2, flags, pc); 155 trace_function(wakeup_trace, data, CALLER_ADDR1, CALLER_ADDR2, flags, pc);
156 tracing_sched_switch_trace(wakeup_trace, data, prev, next, flags, pc);
155 157
156 /* 158 /*
157 * usecs conversion is slow so we try to delay the conversion 159 * usecs conversion is slow so we try to delay the conversion
@@ -182,13 +184,10 @@ out:
182 184
183static void __wakeup_reset(struct trace_array *tr) 185static void __wakeup_reset(struct trace_array *tr)
184{ 186{
185 struct trace_array_cpu *data;
186 int cpu; 187 int cpu;
187 188
188 for_each_possible_cpu(cpu) { 189 for_each_possible_cpu(cpu)
189 data = tr->data[cpu];
190 tracing_reset(tr, cpu); 190 tracing_reset(tr, cpu);
191 }
192 191
193 wakeup_cpu = -1; 192 wakeup_cpu = -1;
194 wakeup_prio = -1; 193 wakeup_prio = -1;
@@ -213,6 +212,7 @@ static void wakeup_reset(struct trace_array *tr)
213static void 212static void
214probe_wakeup(struct rq *rq, struct task_struct *p, int success) 213probe_wakeup(struct rq *rq, struct task_struct *p, int success)
215{ 214{
215 struct trace_array_cpu *data;
216 int cpu = smp_processor_id(); 216 int cpu = smp_processor_id();
217 unsigned long flags; 217 unsigned long flags;
218 long disabled; 218 long disabled;
@@ -224,7 +224,7 @@ probe_wakeup(struct rq *rq, struct task_struct *p, int success)
224 tracing_record_cmdline(p); 224 tracing_record_cmdline(p);
225 tracing_record_cmdline(current); 225 tracing_record_cmdline(current);
226 226
227 if (likely(!rt_task(p)) || 227 if ((wakeup_rt && !rt_task(p)) ||
228 p->prio >= wakeup_prio || 228 p->prio >= wakeup_prio ||
229 p->prio >= current->prio) 229 p->prio >= current->prio)
230 return; 230 return;
@@ -252,9 +252,12 @@ probe_wakeup(struct rq *rq, struct task_struct *p, int success)
252 252
253 local_save_flags(flags); 253 local_save_flags(flags);
254 254
255 wakeup_trace->data[wakeup_cpu]->preempt_timestamp = ftrace_now(cpu); 255 data = wakeup_trace->data[wakeup_cpu];
256 trace_function(wakeup_trace, wakeup_trace->data[wakeup_cpu], 256 data->preempt_timestamp = ftrace_now(cpu);
257 CALLER_ADDR1, CALLER_ADDR2, flags, pc); 257 tracing_sched_wakeup_trace(wakeup_trace, data, p, current,
258 flags, pc);
259 trace_function(wakeup_trace, data, CALLER_ADDR1, CALLER_ADDR2,
260 flags, pc);
258 261
259out_locked: 262out_locked:
260 __raw_spin_unlock(&wakeup_lock); 263 __raw_spin_unlock(&wakeup_lock);
@@ -262,12 +265,6 @@ out:
262 atomic_dec(&wakeup_trace->data[cpu]->disabled); 265 atomic_dec(&wakeup_trace->data[cpu]->disabled);
263} 266}
264 267
265/*
266 * save_tracer_enabled is used to save the state of the tracer_enabled
267 * variable when we disable it when we open a trace output file.
268 */
269static int save_tracer_enabled;
270
271static void start_wakeup_tracer(struct trace_array *tr) 268static void start_wakeup_tracer(struct trace_array *tr)
272{ 269{
273 int ret; 270 int ret;
@@ -306,13 +303,10 @@ static void start_wakeup_tracer(struct trace_array *tr)
306 303
307 register_ftrace_function(&trace_ops); 304 register_ftrace_function(&trace_ops);
308 305
309 if (tracing_is_enabled()) { 306 if (tracing_is_enabled())
310 tracer_enabled = 1; 307 tracer_enabled = 1;
311 save_tracer_enabled = 1; 308 else
312 } else {
313 tracer_enabled = 0; 309 tracer_enabled = 0;
314 save_tracer_enabled = 0;
315 }
316 310
317 return; 311 return;
318fail_deprobe_wake_new: 312fail_deprobe_wake_new:
@@ -324,14 +318,13 @@ fail_deprobe:
324static void stop_wakeup_tracer(struct trace_array *tr) 318static void stop_wakeup_tracer(struct trace_array *tr)
325{ 319{
326 tracer_enabled = 0; 320 tracer_enabled = 0;
327 save_tracer_enabled = 0;
328 unregister_ftrace_function(&trace_ops); 321 unregister_ftrace_function(&trace_ops);
329 unregister_trace_sched_switch(probe_wakeup_sched_switch); 322 unregister_trace_sched_switch(probe_wakeup_sched_switch);
330 unregister_trace_sched_wakeup_new(probe_wakeup); 323 unregister_trace_sched_wakeup_new(probe_wakeup);
331 unregister_trace_sched_wakeup(probe_wakeup); 324 unregister_trace_sched_wakeup(probe_wakeup);
332} 325}
333 326
334static int wakeup_tracer_init(struct trace_array *tr) 327static int __wakeup_tracer_init(struct trace_array *tr)
335{ 328{
336 tracing_max_latency = 0; 329 tracing_max_latency = 0;
337 wakeup_trace = tr; 330 wakeup_trace = tr;
@@ -339,6 +332,18 @@ static int wakeup_tracer_init(struct trace_array *tr)
339 return 0; 332 return 0;
340} 333}
341 334
335static int wakeup_tracer_init(struct trace_array *tr)
336{
337 wakeup_rt = 0;
338 return __wakeup_tracer_init(tr);
339}
340
341static int wakeup_rt_tracer_init(struct trace_array *tr)
342{
343 wakeup_rt = 1;
344 return __wakeup_tracer_init(tr);
345}
346
342static void wakeup_tracer_reset(struct trace_array *tr) 347static void wakeup_tracer_reset(struct trace_array *tr)
343{ 348{
344 stop_wakeup_tracer(tr); 349 stop_wakeup_tracer(tr);
@@ -350,28 +355,11 @@ static void wakeup_tracer_start(struct trace_array *tr)
350{ 355{
351 wakeup_reset(tr); 356 wakeup_reset(tr);
352 tracer_enabled = 1; 357 tracer_enabled = 1;
353 save_tracer_enabled = 1;
354} 358}
355 359
356static void wakeup_tracer_stop(struct trace_array *tr) 360static void wakeup_tracer_stop(struct trace_array *tr)
357{ 361{
358 tracer_enabled = 0; 362 tracer_enabled = 0;
359 save_tracer_enabled = 0;
360}
361
362static void wakeup_tracer_open(struct trace_iterator *iter)
363{
364 /* stop the trace while dumping */
365 tracer_enabled = 0;
366}
367
368static void wakeup_tracer_close(struct trace_iterator *iter)
369{
370 /* forget about any processes we were recording */
371 if (save_tracer_enabled) {
372 wakeup_reset(iter->tr);
373 tracer_enabled = 1;
374 }
375} 363}
376 364
377static struct tracer wakeup_tracer __read_mostly = 365static struct tracer wakeup_tracer __read_mostly =
@@ -381,8 +369,19 @@ static struct tracer wakeup_tracer __read_mostly =
381 .reset = wakeup_tracer_reset, 369 .reset = wakeup_tracer_reset,
382 .start = wakeup_tracer_start, 370 .start = wakeup_tracer_start,
383 .stop = wakeup_tracer_stop, 371 .stop = wakeup_tracer_stop,
384 .open = wakeup_tracer_open, 372 .print_max = 1,
385 .close = wakeup_tracer_close, 373#ifdef CONFIG_FTRACE_SELFTEST
374 .selftest = trace_selftest_startup_wakeup,
375#endif
376};
377
378static struct tracer wakeup_rt_tracer __read_mostly =
379{
380 .name = "wakeup_rt",
381 .init = wakeup_rt_tracer_init,
382 .reset = wakeup_tracer_reset,
383 .start = wakeup_tracer_start,
384 .stop = wakeup_tracer_stop,
386 .print_max = 1, 385 .print_max = 1,
387#ifdef CONFIG_FTRACE_SELFTEST 386#ifdef CONFIG_FTRACE_SELFTEST
388 .selftest = trace_selftest_startup_wakeup, 387 .selftest = trace_selftest_startup_wakeup,
@@ -397,6 +396,10 @@ __init static int init_wakeup_tracer(void)
397 if (ret) 396 if (ret)
398 return ret; 397 return ret;
399 398
399 ret = register_tracer(&wakeup_rt_tracer);
400 if (ret)
401 return ret;
402
400 return 0; 403 return 0;
401} 404}
402device_initcall(init_wakeup_tracer); 405device_initcall(init_wakeup_tracer);
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 88c8eb70f54a..5013812578b1 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -9,7 +9,6 @@ static inline int trace_valid_entry(struct trace_entry *entry)
9 case TRACE_FN: 9 case TRACE_FN:
10 case TRACE_CTX: 10 case TRACE_CTX:
11 case TRACE_WAKE: 11 case TRACE_WAKE:
12 case TRACE_CONT:
13 case TRACE_STACK: 12 case TRACE_STACK:
14 case TRACE_PRINT: 13 case TRACE_PRINT:
15 case TRACE_SPECIAL: 14 case TRACE_SPECIAL:
diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c
new file mode 100644
index 000000000000..eae9cef39291
--- /dev/null
+++ b/kernel/trace/trace_stat.c
@@ -0,0 +1,319 @@
1/*
2 * Infrastructure for statistic tracing (histogram output).
3 *
4 * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
5 *
6 * Based on the code from trace_branch.c which is
7 * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
8 *
9 */
10
11
12#include <linux/list.h>
13#include <linux/debugfs.h>
14#include "trace_stat.h"
15#include "trace.h"
16
17
18/* List of stat entries from a tracer */
19struct trace_stat_list {
20 struct list_head list;
21 void *stat;
22};
23
24/* A stat session is the stats output in one file */
25struct tracer_stat_session {
26 struct list_head session_list;
27 struct tracer_stat *ts;
28 struct list_head stat_list;
29 struct mutex stat_mutex;
30 struct dentry *file;
31};
32
33/* All of the sessions currently in use. Each stat file embeed one session */
34static LIST_HEAD(all_stat_sessions);
35static DEFINE_MUTEX(all_stat_sessions_mutex);
36
37/* The root directory for all stat files */
38static struct dentry *stat_dir;
39
40
41static void reset_stat_session(struct tracer_stat_session *session)
42{
43 struct trace_stat_list *node, *next;
44
45 list_for_each_entry_safe(node, next, &session->stat_list, list)
46 kfree(node);
47
48 INIT_LIST_HEAD(&session->stat_list);
49}
50
51static void destroy_session(struct tracer_stat_session *session)
52{
53 debugfs_remove(session->file);
54 reset_stat_session(session);
55 mutex_destroy(&session->stat_mutex);
56 kfree(session);
57}
58
59/*
60 * For tracers that don't provide a stat_cmp callback.
61 * This one will force an immediate insertion on tail of
62 * the list.
63 */
64static int dummy_cmp(void *p1, void *p2)
65{
66 return 1;
67}
68
69/*
70 * Initialize the stat list at each trace_stat file opening.
71 * All of these copies and sorting are required on all opening
72 * since the stats could have changed between two file sessions.
73 */
74static int stat_seq_init(struct tracer_stat_session *session)
75{
76 struct trace_stat_list *iter_entry, *new_entry;
77 struct tracer_stat *ts = session->ts;
78 void *prev_stat;
79 int ret = 0;
80 int i;
81
82 mutex_lock(&session->stat_mutex);
83 reset_stat_session(session);
84
85 if (!ts->stat_cmp)
86 ts->stat_cmp = dummy_cmp;
87
88 /*
89 * The first entry. Actually this is the second, but the first
90 * one (the stat_list head) is pointless.
91 */
92 new_entry = kmalloc(sizeof(struct trace_stat_list), GFP_KERNEL);
93 if (!new_entry) {
94 ret = -ENOMEM;
95 goto exit;
96 }
97
98 INIT_LIST_HEAD(&new_entry->list);
99
100 list_add(&new_entry->list, &session->stat_list);
101
102 new_entry->stat = ts->stat_start();
103 prev_stat = new_entry->stat;
104
105 /*
106 * Iterate over the tracer stat entries and store them in a sorted
107 * list.
108 */
109 for (i = 1; ; i++) {
110 new_entry = kmalloc(sizeof(struct trace_stat_list), GFP_KERNEL);
111 if (!new_entry) {
112 ret = -ENOMEM;
113 goto exit_free_list;
114 }
115
116 INIT_LIST_HEAD(&new_entry->list);
117 new_entry->stat = ts->stat_next(prev_stat, i);
118
119 /* End of insertion */
120 if (!new_entry->stat)
121 break;
122
123 list_for_each_entry(iter_entry, &session->stat_list, list) {
124
125 /* Insertion with a descendent sorting */
126 if (ts->stat_cmp(new_entry->stat,
127 iter_entry->stat) > 0) {
128
129 list_add_tail(&new_entry->list,
130 &iter_entry->list);
131 break;
132
133 /* The current smaller value */
134 } else if (list_is_last(&iter_entry->list,
135 &session->stat_list)) {
136 list_add(&new_entry->list, &iter_entry->list);
137 break;
138 }
139 }
140
141 prev_stat = new_entry->stat;
142 }
143exit:
144 mutex_unlock(&session->stat_mutex);
145 return ret;
146
147exit_free_list:
148 reset_stat_session(session);
149 mutex_unlock(&session->stat_mutex);
150 return ret;
151}
152
153
154static void *stat_seq_start(struct seq_file *s, loff_t *pos)
155{
156 struct tracer_stat_session *session = s->private;
157
158 /* Prevent from tracer switch or stat_list modification */
159 mutex_lock(&session->stat_mutex);
160
161 /* If we are in the beginning of the file, print the headers */
162 if (!*pos && session->ts->stat_headers)
163 session->ts->stat_headers(s);
164
165 return seq_list_start(&session->stat_list, *pos);
166}
167
168static void *stat_seq_next(struct seq_file *s, void *p, loff_t *pos)
169{
170 struct tracer_stat_session *session = s->private;
171
172 return seq_list_next(p, &session->stat_list, pos);
173}
174
175static void stat_seq_stop(struct seq_file *s, void *p)
176{
177 struct tracer_stat_session *session = s->private;
178 mutex_unlock(&session->stat_mutex);
179}
180
181static int stat_seq_show(struct seq_file *s, void *v)
182{
183 struct tracer_stat_session *session = s->private;
184 struct trace_stat_list *l = list_entry(v, struct trace_stat_list, list);
185
186 return session->ts->stat_show(s, l->stat);
187}
188
189static const struct seq_operations trace_stat_seq_ops = {
190 .start = stat_seq_start,
191 .next = stat_seq_next,
192 .stop = stat_seq_stop,
193 .show = stat_seq_show
194};
195
196/* The session stat is refilled and resorted at each stat file opening */
197static int tracing_stat_open(struct inode *inode, struct file *file)
198{
199 int ret;
200
201 struct tracer_stat_session *session = inode->i_private;
202
203 ret = seq_open(file, &trace_stat_seq_ops);
204 if (!ret) {
205 struct seq_file *m = file->private_data;
206 m->private = session;
207 ret = stat_seq_init(session);
208 }
209
210 return ret;
211}
212
213/*
214 * Avoid consuming memory with our now useless list.
215 */
216static int tracing_stat_release(struct inode *i, struct file *f)
217{
218 struct tracer_stat_session *session = i->i_private;
219
220 mutex_lock(&session->stat_mutex);
221 reset_stat_session(session);
222 mutex_unlock(&session->stat_mutex);
223
224 return 0;
225}
226
227static const struct file_operations tracing_stat_fops = {
228 .open = tracing_stat_open,
229 .read = seq_read,
230 .llseek = seq_lseek,
231 .release = tracing_stat_release
232};
233
234static int tracing_stat_init(void)
235{
236 struct dentry *d_tracing;
237
238 d_tracing = tracing_init_dentry();
239
240 stat_dir = debugfs_create_dir("trace_stat", d_tracing);
241 if (!stat_dir)
242 pr_warning("Could not create debugfs "
243 "'trace_stat' entry\n");
244 return 0;
245}
246
247static int init_stat_file(struct tracer_stat_session *session)
248{
249 if (!stat_dir && tracing_stat_init())
250 return -ENODEV;
251
252 session->file = debugfs_create_file(session->ts->name, 0644,
253 stat_dir,
254 session, &tracing_stat_fops);
255 if (!session->file)
256 return -ENOMEM;
257 return 0;
258}
259
260int register_stat_tracer(struct tracer_stat *trace)
261{
262 struct tracer_stat_session *session, *node, *tmp;
263 int ret;
264
265 if (!trace)
266 return -EINVAL;
267
268 if (!trace->stat_start || !trace->stat_next || !trace->stat_show)
269 return -EINVAL;
270
271 /* Already registered? */
272 mutex_lock(&all_stat_sessions_mutex);
273 list_for_each_entry_safe(node, tmp, &all_stat_sessions, session_list) {
274 if (node->ts == trace) {
275 mutex_unlock(&all_stat_sessions_mutex);
276 return -EINVAL;
277 }
278 }
279 mutex_unlock(&all_stat_sessions_mutex);
280
281 /* Init the session */
282 session = kmalloc(sizeof(struct tracer_stat_session), GFP_KERNEL);
283 if (!session)
284 return -ENOMEM;
285
286 session->ts = trace;
287 INIT_LIST_HEAD(&session->session_list);
288 INIT_LIST_HEAD(&session->stat_list);
289 mutex_init(&session->stat_mutex);
290 session->file = NULL;
291
292 ret = init_stat_file(session);
293 if (ret) {
294 destroy_session(session);
295 return ret;
296 }
297
298 /* Register */
299 mutex_lock(&all_stat_sessions_mutex);
300 list_add_tail(&session->session_list, &all_stat_sessions);
301 mutex_unlock(&all_stat_sessions_mutex);
302
303 return 0;
304}
305
306void unregister_stat_tracer(struct tracer_stat *trace)
307{
308 struct tracer_stat_session *node, *tmp;
309
310 mutex_lock(&all_stat_sessions_mutex);
311 list_for_each_entry_safe(node, tmp, &all_stat_sessions, session_list) {
312 if (node->ts == trace) {
313 list_del(&node->session_list);
314 destroy_session(node);
315 break;
316 }
317 }
318 mutex_unlock(&all_stat_sessions_mutex);
319}
diff --git a/kernel/trace/trace_stat.h b/kernel/trace/trace_stat.h
new file mode 100644
index 000000000000..202274cf7f3d
--- /dev/null
+++ b/kernel/trace/trace_stat.h
@@ -0,0 +1,31 @@
1#ifndef __TRACE_STAT_H
2#define __TRACE_STAT_H
3
4#include <linux/seq_file.h>
5
6/*
7 * If you want to provide a stat file (one-shot statistics), fill
8 * an iterator with stat_start/stat_next and a stat_show callbacks.
9 * The others callbacks are optional.
10 */
11struct tracer_stat {
12 /* The name of your stat file */
13 const char *name;
14 /* Iteration over statistic entries */
15 void *(*stat_start)(void);
16 void *(*stat_next)(void *prev, int idx);
17 /* Compare two entries for stats sorting */
18 int (*stat_cmp)(void *p1, void *p2);
19 /* Print a stat entry */
20 int (*stat_show)(struct seq_file *s, void *p);
21 /* Print the headers of your stat entries */
22 int (*stat_headers)(struct seq_file *s);
23};
24
25/*
26 * Destroy or create a stat file
27 */
28extern int register_stat_tracer(struct tracer_stat *trace);
29extern void unregister_stat_tracer(struct tracer_stat *trace);
30
31#endif /* __TRACE_STAT_H */
diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c
new file mode 100644
index 000000000000..4664990fe9c5
--- /dev/null
+++ b/kernel/trace/trace_workqueue.c
@@ -0,0 +1,281 @@
1/*
2 * Workqueue statistical tracer.
3 *
4 * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
5 *
6 */
7
8
9#include <trace/workqueue.h>
10#include <linux/list.h>
11#include <linux/percpu.h>
12#include "trace_stat.h"
13#include "trace.h"
14
15
16/* A cpu workqueue thread */
17struct cpu_workqueue_stats {
18 struct list_head list;
19/* Useful to know if we print the cpu headers */
20 bool first_entry;
21 int cpu;
22 pid_t pid;
23/* Can be inserted from interrupt or user context, need to be atomic */
24 atomic_t inserted;
25/*
26 * Don't need to be atomic, works are serialized in a single workqueue thread
27 * on a single CPU.
28 */
29 unsigned int executed;
30};
31
32/* List of workqueue threads on one cpu */
33struct workqueue_global_stats {
34 struct list_head list;
35 spinlock_t lock;
36};
37
38/* Don't need a global lock because allocated before the workqueues, and
39 * never freed.
40 */
41static DEFINE_PER_CPU(struct workqueue_global_stats, all_workqueue_stat);
42#define workqueue_cpu_stat(cpu) (&per_cpu(all_workqueue_stat, cpu))
43
44/* Insertion of a work */
45static void
46probe_workqueue_insertion(struct task_struct *wq_thread,
47 struct work_struct *work)
48{
49 int cpu = cpumask_first(&wq_thread->cpus_allowed);
50 struct cpu_workqueue_stats *node, *next;
51 unsigned long flags;
52
53 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
54 list_for_each_entry_safe(node, next, &workqueue_cpu_stat(cpu)->list,
55 list) {
56 if (node->pid == wq_thread->pid) {
57 atomic_inc(&node->inserted);
58 goto found;
59 }
60 }
61 pr_debug("trace_workqueue: entry not found\n");
62found:
63 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
64}
65
66/* Execution of a work */
67static void
68probe_workqueue_execution(struct task_struct *wq_thread,
69 struct work_struct *work)
70{
71 int cpu = cpumask_first(&wq_thread->cpus_allowed);
72 struct cpu_workqueue_stats *node, *next;
73 unsigned long flags;
74
75 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
76 list_for_each_entry_safe(node, next, &workqueue_cpu_stat(cpu)->list,
77 list) {
78 if (node->pid == wq_thread->pid) {
79 node->executed++;
80 goto found;
81 }
82 }
83 pr_debug("trace_workqueue: entry not found\n");
84found:
85 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
86}
87
88/* Creation of a cpu workqueue thread */
89static void probe_workqueue_creation(struct task_struct *wq_thread, int cpu)
90{
91 struct cpu_workqueue_stats *cws;
92 unsigned long flags;
93
94 WARN_ON(cpu < 0 || cpu >= num_possible_cpus());
95
96 /* Workqueues are sometimes created in atomic context */
97 cws = kzalloc(sizeof(struct cpu_workqueue_stats), GFP_ATOMIC);
98 if (!cws) {
99 pr_warning("trace_workqueue: not enough memory\n");
100 return;
101 }
102 tracing_record_cmdline(wq_thread);
103
104 INIT_LIST_HEAD(&cws->list);
105 cws->cpu = cpu;
106
107 cws->pid = wq_thread->pid;
108
109 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
110 if (list_empty(&workqueue_cpu_stat(cpu)->list))
111 cws->first_entry = true;
112 list_add_tail(&cws->list, &workqueue_cpu_stat(cpu)->list);
113 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
114}
115
116/* Destruction of a cpu workqueue thread */
117static void probe_workqueue_destruction(struct task_struct *wq_thread)
118{
119 /* Workqueue only execute on one cpu */
120 int cpu = cpumask_first(&wq_thread->cpus_allowed);
121 struct cpu_workqueue_stats *node, *next;
122 unsigned long flags;
123
124 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
125 list_for_each_entry_safe(node, next, &workqueue_cpu_stat(cpu)->list,
126 list) {
127 if (node->pid == wq_thread->pid) {
128 list_del(&node->list);
129 kfree(node);
130 goto found;
131 }
132 }
133
134 pr_debug("trace_workqueue: don't find workqueue to destroy\n");
135found:
136 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
137
138}
139
140static struct cpu_workqueue_stats *workqueue_stat_start_cpu(int cpu)
141{
142 unsigned long flags;
143 struct cpu_workqueue_stats *ret = NULL;
144
145
146 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
147
148 if (!list_empty(&workqueue_cpu_stat(cpu)->list))
149 ret = list_entry(workqueue_cpu_stat(cpu)->list.next,
150 struct cpu_workqueue_stats, list);
151
152 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
153
154 return ret;
155}
156
157static void *workqueue_stat_start(void)
158{
159 int cpu;
160 void *ret = NULL;
161
162 for_each_possible_cpu(cpu) {
163 ret = workqueue_stat_start_cpu(cpu);
164 if (ret)
165 return ret;
166 }
167 return NULL;
168}
169
170static void *workqueue_stat_next(void *prev, int idx)
171{
172 struct cpu_workqueue_stats *prev_cws = prev;
173 int cpu = prev_cws->cpu;
174 unsigned long flags;
175 void *ret = NULL;
176
177 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
178 if (list_is_last(&prev_cws->list, &workqueue_cpu_stat(cpu)->list)) {
179 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
180 for (++cpu ; cpu < num_possible_cpus(); cpu++) {
181 ret = workqueue_stat_start_cpu(cpu);
182 if (ret)
183 return ret;
184 }
185 return NULL;
186 }
187 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
188
189 return list_entry(prev_cws->list.next, struct cpu_workqueue_stats,
190 list);
191}
192
193static int workqueue_stat_show(struct seq_file *s, void *p)
194{
195 struct cpu_workqueue_stats *cws = p;
196 unsigned long flags;
197 int cpu = cws->cpu;
198
199 seq_printf(s, "%3d %6d %6u %s\n", cws->cpu,
200 atomic_read(&cws->inserted),
201 cws->executed,
202 trace_find_cmdline(cws->pid));
203
204 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
205 if (&cws->list == workqueue_cpu_stat(cpu)->list.next)
206 seq_printf(s, "\n");
207 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
208
209 return 0;
210}
211
212static int workqueue_stat_headers(struct seq_file *s)
213{
214 seq_printf(s, "# CPU INSERTED EXECUTED NAME\n");
215 seq_printf(s, "# | | | |\n\n");
216 return 0;
217}
218
219struct tracer_stat workqueue_stats __read_mostly = {
220 .name = "workqueues",
221 .stat_start = workqueue_stat_start,
222 .stat_next = workqueue_stat_next,
223 .stat_show = workqueue_stat_show,
224 .stat_headers = workqueue_stat_headers
225};
226
227
228int __init stat_workqueue_init(void)
229{
230 if (register_stat_tracer(&workqueue_stats)) {
231 pr_warning("Unable to register workqueue stat tracer\n");
232 return 1;
233 }
234
235 return 0;
236}
237fs_initcall(stat_workqueue_init);
238
239/*
240 * Workqueues are created very early, just after pre-smp initcalls.
241 * So we must register our tracepoints at this stage.
242 */
243int __init trace_workqueue_early_init(void)
244{
245 int ret, cpu;
246
247 ret = register_trace_workqueue_insertion(probe_workqueue_insertion);
248 if (ret)
249 goto out;
250
251 ret = register_trace_workqueue_execution(probe_workqueue_execution);
252 if (ret)
253 goto no_insertion;
254
255 ret = register_trace_workqueue_creation(probe_workqueue_creation);
256 if (ret)
257 goto no_execution;
258
259 ret = register_trace_workqueue_destruction(probe_workqueue_destruction);
260 if (ret)
261 goto no_creation;
262
263 for_each_possible_cpu(cpu) {
264 spin_lock_init(&workqueue_cpu_stat(cpu)->lock);
265 INIT_LIST_HEAD(&workqueue_cpu_stat(cpu)->list);
266 }
267
268 return 0;
269
270no_creation:
271 unregister_trace_workqueue_creation(probe_workqueue_creation);
272no_execution:
273 unregister_trace_workqueue_execution(probe_workqueue_execution);
274no_insertion:
275 unregister_trace_workqueue_insertion(probe_workqueue_insertion);
276out:
277 pr_warning("trace_workqueue: unable to trace workqueues\n");
278
279 return 1;
280}
281early_initcall(trace_workqueue_early_init);
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 1f0c509b40d3..e53ee18ef431 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -33,6 +33,7 @@
33#include <linux/kallsyms.h> 33#include <linux/kallsyms.h>
34#include <linux/debug_locks.h> 34#include <linux/debug_locks.h>
35#include <linux/lockdep.h> 35#include <linux/lockdep.h>
36#include <trace/workqueue.h>
36 37
37/* 38/*
38 * The per-CPU workqueue (if single thread, we always use the first 39 * The per-CPU workqueue (if single thread, we always use the first
@@ -125,9 +126,13 @@ struct cpu_workqueue_struct *get_wq_data(struct work_struct *work)
125 return (void *) (atomic_long_read(&work->data) & WORK_STRUCT_WQ_DATA_MASK); 126 return (void *) (atomic_long_read(&work->data) & WORK_STRUCT_WQ_DATA_MASK);
126} 127}
127 128
129DEFINE_TRACE(workqueue_insertion);
130
128static void insert_work(struct cpu_workqueue_struct *cwq, 131static void insert_work(struct cpu_workqueue_struct *cwq,
129 struct work_struct *work, struct list_head *head) 132 struct work_struct *work, struct list_head *head)
130{ 133{
134 trace_workqueue_insertion(cwq->thread, work);
135
131 set_wq_data(work, cwq); 136 set_wq_data(work, cwq);
132 /* 137 /*
133 * Ensure that we get the right work->data if we see the 138 * Ensure that we get the right work->data if we see the
@@ -259,6 +264,8 @@ int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
259} 264}
260EXPORT_SYMBOL_GPL(queue_delayed_work_on); 265EXPORT_SYMBOL_GPL(queue_delayed_work_on);
261 266
267DEFINE_TRACE(workqueue_execution);
268
262static void run_workqueue(struct cpu_workqueue_struct *cwq) 269static void run_workqueue(struct cpu_workqueue_struct *cwq)
263{ 270{
264 spin_lock_irq(&cwq->lock); 271 spin_lock_irq(&cwq->lock);
@@ -284,7 +291,7 @@ static void run_workqueue(struct cpu_workqueue_struct *cwq)
284 */ 291 */
285 struct lockdep_map lockdep_map = work->lockdep_map; 292 struct lockdep_map lockdep_map = work->lockdep_map;
286#endif 293#endif
287 294 trace_workqueue_execution(cwq->thread, work);
288 cwq->current_work = work; 295 cwq->current_work = work;
289 list_del_init(cwq->worklist.next); 296 list_del_init(cwq->worklist.next);
290 spin_unlock_irq(&cwq->lock); 297 spin_unlock_irq(&cwq->lock);
@@ -765,6 +772,8 @@ init_cpu_workqueue(struct workqueue_struct *wq, int cpu)
765 return cwq; 772 return cwq;
766} 773}
767 774
775DEFINE_TRACE(workqueue_creation);
776
768static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu) 777static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
769{ 778{
770 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; 779 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
@@ -787,6 +796,8 @@ static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
787 sched_setscheduler_nocheck(p, SCHED_FIFO, &param); 796 sched_setscheduler_nocheck(p, SCHED_FIFO, &param);
788 cwq->thread = p; 797 cwq->thread = p;
789 798
799 trace_workqueue_creation(cwq->thread, cpu);
800
790 return 0; 801 return 0;
791} 802}
792 803
@@ -868,6 +879,8 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
868} 879}
869EXPORT_SYMBOL_GPL(__create_workqueue_key); 880EXPORT_SYMBOL_GPL(__create_workqueue_key);
870 881
882DEFINE_TRACE(workqueue_destruction);
883
871static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq) 884static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq)
872{ 885{
873 /* 886 /*
@@ -891,6 +904,7 @@ static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq)
891 * checks list_empty(), and a "normal" queue_work() can't use 904 * checks list_empty(), and a "normal" queue_work() can't use
892 * a dead CPU. 905 * a dead CPU.
893 */ 906 */
907 trace_workqueue_destruction(cwq->thread);
894 kthread_stop(cwq->thread); 908 kthread_stop(cwq->thread);
895 cwq->thread = NULL; 909 cwq->thread = NULL;
896} 910}
diff --git a/mm/slab.c b/mm/slab.c
index ddc41f337d58..dae716b32915 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -102,6 +102,7 @@
102#include <linux/cpu.h> 102#include <linux/cpu.h>
103#include <linux/sysctl.h> 103#include <linux/sysctl.h>
104#include <linux/module.h> 104#include <linux/module.h>
105#include <trace/kmemtrace.h>
105#include <linux/rcupdate.h> 106#include <linux/rcupdate.h>
106#include <linux/string.h> 107#include <linux/string.h>
107#include <linux/uaccess.h> 108#include <linux/uaccess.h>
@@ -568,6 +569,14 @@ static void **dbg_userword(struct kmem_cache *cachep, void *objp)
568 569
569#endif 570#endif
570 571
572#ifdef CONFIG_KMEMTRACE
573size_t slab_buffer_size(struct kmem_cache *cachep)
574{
575 return cachep->buffer_size;
576}
577EXPORT_SYMBOL(slab_buffer_size);
578#endif
579
571/* 580/*
572 * Do not go above this order unless 0 objects fit into the slab. 581 * Do not go above this order unless 0 objects fit into the slab.
573 */ 582 */
@@ -3550,10 +3559,23 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp)
3550 */ 3559 */
3551void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags) 3560void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
3552{ 3561{
3553 return __cache_alloc(cachep, flags, __builtin_return_address(0)); 3562 void *ret = __cache_alloc(cachep, flags, __builtin_return_address(0));
3563
3564 kmemtrace_mark_alloc(KMEMTRACE_TYPE_CACHE, _RET_IP_, ret,
3565 obj_size(cachep), cachep->buffer_size, flags);
3566
3567 return ret;
3554} 3568}
3555EXPORT_SYMBOL(kmem_cache_alloc); 3569EXPORT_SYMBOL(kmem_cache_alloc);
3556 3570
3571#ifdef CONFIG_KMEMTRACE
3572void *kmem_cache_alloc_notrace(struct kmem_cache *cachep, gfp_t flags)
3573{
3574 return __cache_alloc(cachep, flags, __builtin_return_address(0));
3575}
3576EXPORT_SYMBOL(kmem_cache_alloc_notrace);
3577#endif
3578
3557/** 3579/**
3558 * kmem_ptr_validate - check if an untrusted pointer might be a slab entry. 3580 * kmem_ptr_validate - check if an untrusted pointer might be a slab entry.
3559 * @cachep: the cache we're checking against 3581 * @cachep: the cache we're checking against
@@ -3598,23 +3620,47 @@ out:
3598#ifdef CONFIG_NUMA 3620#ifdef CONFIG_NUMA
3599void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid) 3621void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
3600{ 3622{
3601 return __cache_alloc_node(cachep, flags, nodeid, 3623 void *ret = __cache_alloc_node(cachep, flags, nodeid,
3602 __builtin_return_address(0)); 3624 __builtin_return_address(0));
3625
3626 kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_CACHE, _RET_IP_, ret,
3627 obj_size(cachep), cachep->buffer_size,
3628 flags, nodeid);
3629
3630 return ret;
3603} 3631}
3604EXPORT_SYMBOL(kmem_cache_alloc_node); 3632EXPORT_SYMBOL(kmem_cache_alloc_node);
3605 3633
3634#ifdef CONFIG_KMEMTRACE
3635void *kmem_cache_alloc_node_notrace(struct kmem_cache *cachep,
3636 gfp_t flags,
3637 int nodeid)
3638{
3639 return __cache_alloc_node(cachep, flags, nodeid,
3640 __builtin_return_address(0));
3641}
3642EXPORT_SYMBOL(kmem_cache_alloc_node_notrace);
3643#endif
3644
3606static __always_inline void * 3645static __always_inline void *
3607__do_kmalloc_node(size_t size, gfp_t flags, int node, void *caller) 3646__do_kmalloc_node(size_t size, gfp_t flags, int node, void *caller)
3608{ 3647{
3609 struct kmem_cache *cachep; 3648 struct kmem_cache *cachep;
3649 void *ret;
3610 3650
3611 cachep = kmem_find_general_cachep(size, flags); 3651 cachep = kmem_find_general_cachep(size, flags);
3612 if (unlikely(ZERO_OR_NULL_PTR(cachep))) 3652 if (unlikely(ZERO_OR_NULL_PTR(cachep)))
3613 return cachep; 3653 return cachep;
3614 return kmem_cache_alloc_node(cachep, flags, node); 3654 ret = kmem_cache_alloc_node_notrace(cachep, flags, node);
3655
3656 kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC,
3657 (unsigned long) caller, ret,
3658 size, cachep->buffer_size, flags, node);
3659
3660 return ret;
3615} 3661}
3616 3662
3617#ifdef CONFIG_DEBUG_SLAB 3663#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_KMEMTRACE)
3618void *__kmalloc_node(size_t size, gfp_t flags, int node) 3664void *__kmalloc_node(size_t size, gfp_t flags, int node)
3619{ 3665{
3620 return __do_kmalloc_node(size, flags, node, 3666 return __do_kmalloc_node(size, flags, node,
@@ -3647,6 +3693,7 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
3647 void *caller) 3693 void *caller)
3648{ 3694{
3649 struct kmem_cache *cachep; 3695 struct kmem_cache *cachep;
3696 void *ret;
3650 3697
3651 /* If you want to save a few bytes .text space: replace 3698 /* If you want to save a few bytes .text space: replace
3652 * __ with kmem_. 3699 * __ with kmem_.
@@ -3656,11 +3703,17 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
3656 cachep = __find_general_cachep(size, flags); 3703 cachep = __find_general_cachep(size, flags);
3657 if (unlikely(ZERO_OR_NULL_PTR(cachep))) 3704 if (unlikely(ZERO_OR_NULL_PTR(cachep)))
3658 return cachep; 3705 return cachep;
3659 return __cache_alloc(cachep, flags, caller); 3706 ret = __cache_alloc(cachep, flags, caller);
3707
3708 kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC,
3709 (unsigned long) caller, ret,
3710 size, cachep->buffer_size, flags);
3711
3712 return ret;
3660} 3713}
3661 3714
3662 3715
3663#ifdef CONFIG_DEBUG_SLAB 3716#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_KMEMTRACE)
3664void *__kmalloc(size_t size, gfp_t flags) 3717void *__kmalloc(size_t size, gfp_t flags)
3665{ 3718{
3666 return __do_kmalloc(size, flags, __builtin_return_address(0)); 3719 return __do_kmalloc(size, flags, __builtin_return_address(0));
@@ -3699,6 +3752,8 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp)
3699 debug_check_no_obj_freed(objp, obj_size(cachep)); 3752 debug_check_no_obj_freed(objp, obj_size(cachep));
3700 __cache_free(cachep, objp); 3753 __cache_free(cachep, objp);
3701 local_irq_restore(flags); 3754 local_irq_restore(flags);
3755
3756 kmemtrace_mark_free(KMEMTRACE_TYPE_CACHE, _RET_IP_, objp);
3702} 3757}
3703EXPORT_SYMBOL(kmem_cache_free); 3758EXPORT_SYMBOL(kmem_cache_free);
3704 3759
@@ -3725,6 +3780,8 @@ void kfree(const void *objp)
3725 debug_check_no_obj_freed(objp, obj_size(c)); 3780 debug_check_no_obj_freed(objp, obj_size(c));
3726 __cache_free(c, (void *)objp); 3781 __cache_free(c, (void *)objp);
3727 local_irq_restore(flags); 3782 local_irq_restore(flags);
3783
3784 kmemtrace_mark_free(KMEMTRACE_TYPE_KMALLOC, _RET_IP_, objp);
3728} 3785}
3729EXPORT_SYMBOL(kfree); 3786EXPORT_SYMBOL(kfree);
3730 3787
diff --git a/mm/slob.c b/mm/slob.c
index bf7e8fc3aed8..4d1c0fc33b6b 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -65,6 +65,7 @@
65#include <linux/module.h> 65#include <linux/module.h>
66#include <linux/rcupdate.h> 66#include <linux/rcupdate.h>
67#include <linux/list.h> 67#include <linux/list.h>
68#include <trace/kmemtrace.h>
68#include <asm/atomic.h> 69#include <asm/atomic.h>
69 70
70/* 71/*
@@ -463,27 +464,38 @@ void *__kmalloc_node(size_t size, gfp_t gfp, int node)
463{ 464{
464 unsigned int *m; 465 unsigned int *m;
465 int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN); 466 int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
467 void *ret;
466 468
467 if (size < PAGE_SIZE - align) { 469 if (size < PAGE_SIZE - align) {
468 if (!size) 470 if (!size)
469 return ZERO_SIZE_PTR; 471 return ZERO_SIZE_PTR;
470 472
471 m = slob_alloc(size + align, gfp, align, node); 473 m = slob_alloc(size + align, gfp, align, node);
474
472 if (!m) 475 if (!m)
473 return NULL; 476 return NULL;
474 *m = size; 477 *m = size;
475 return (void *)m + align; 478 ret = (void *)m + align;
479
480 kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC,
481 _RET_IP_, ret,
482 size, size + align, gfp, node);
476 } else { 483 } else {
477 void *ret; 484 unsigned int order = get_order(size);
478 485
479 ret = slob_new_page(gfp | __GFP_COMP, get_order(size), node); 486 ret = slob_new_page(gfp | __GFP_COMP, order, node);
480 if (ret) { 487 if (ret) {
481 struct page *page; 488 struct page *page;
482 page = virt_to_page(ret); 489 page = virt_to_page(ret);
483 page->private = size; 490 page->private = size;
484 } 491 }
485 return ret; 492
493 kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC,
494 _RET_IP_, ret,
495 size, PAGE_SIZE << order, gfp, node);
486 } 496 }
497
498 return ret;
487} 499}
488EXPORT_SYMBOL(__kmalloc_node); 500EXPORT_SYMBOL(__kmalloc_node);
489 501
@@ -501,6 +513,8 @@ void kfree(const void *block)
501 slob_free(m, *m + align); 513 slob_free(m, *m + align);
502 } else 514 } else
503 put_page(&sp->page); 515 put_page(&sp->page);
516
517 kmemtrace_mark_free(KMEMTRACE_TYPE_KMALLOC, _RET_IP_, block);
504} 518}
505EXPORT_SYMBOL(kfree); 519EXPORT_SYMBOL(kfree);
506 520
@@ -569,10 +583,19 @@ void *kmem_cache_alloc_node(struct kmem_cache *c, gfp_t flags, int node)
569{ 583{
570 void *b; 584 void *b;
571 585
572 if (c->size < PAGE_SIZE) 586 if (c->size < PAGE_SIZE) {
573 b = slob_alloc(c->size, flags, c->align, node); 587 b = slob_alloc(c->size, flags, c->align, node);
574 else 588 kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_CACHE,
589 _RET_IP_, b, c->size,
590 SLOB_UNITS(c->size) * SLOB_UNIT,
591 flags, node);
592 } else {
575 b = slob_new_page(flags, get_order(c->size), node); 593 b = slob_new_page(flags, get_order(c->size), node);
594 kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_CACHE,
595 _RET_IP_, b, c->size,
596 PAGE_SIZE << get_order(c->size),
597 flags, node);
598 }
576 599
577 if (c->ctor) 600 if (c->ctor)
578 c->ctor(b); 601 c->ctor(b);
@@ -608,6 +631,8 @@ void kmem_cache_free(struct kmem_cache *c, void *b)
608 } else { 631 } else {
609 __kmem_cache_free(b, c->size); 632 __kmem_cache_free(b, c->size);
610 } 633 }
634
635 kmemtrace_mark_free(KMEMTRACE_TYPE_CACHE, _RET_IP_, b);
611} 636}
612EXPORT_SYMBOL(kmem_cache_free); 637EXPORT_SYMBOL(kmem_cache_free);
613 638
diff --git a/mm/slub.c b/mm/slub.c
index bdc9abb08a23..0343b3b88984 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -16,6 +16,7 @@
16#include <linux/slab.h> 16#include <linux/slab.h>
17#include <linux/proc_fs.h> 17#include <linux/proc_fs.h>
18#include <linux/seq_file.h> 18#include <linux/seq_file.h>
19#include <trace/kmemtrace.h>
19#include <linux/cpu.h> 20#include <linux/cpu.h>
20#include <linux/cpuset.h> 21#include <linux/cpuset.h>
21#include <linux/mempolicy.h> 22#include <linux/mempolicy.h>
@@ -1623,18 +1624,46 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
1623 1624
1624void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags) 1625void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
1625{ 1626{
1626 return slab_alloc(s, gfpflags, -1, _RET_IP_); 1627 void *ret = slab_alloc(s, gfpflags, -1, _RET_IP_);
1628
1629 kmemtrace_mark_alloc(KMEMTRACE_TYPE_CACHE, _RET_IP_, ret,
1630 s->objsize, s->size, gfpflags);
1631
1632 return ret;
1627} 1633}
1628EXPORT_SYMBOL(kmem_cache_alloc); 1634EXPORT_SYMBOL(kmem_cache_alloc);
1629 1635
1636#ifdef CONFIG_KMEMTRACE
1637void *kmem_cache_alloc_notrace(struct kmem_cache *s, gfp_t gfpflags)
1638{
1639 return slab_alloc(s, gfpflags, -1, _RET_IP_);
1640}
1641EXPORT_SYMBOL(kmem_cache_alloc_notrace);
1642#endif
1643
1630#ifdef CONFIG_NUMA 1644#ifdef CONFIG_NUMA
1631void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node) 1645void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
1632{ 1646{
1633 return slab_alloc(s, gfpflags, node, _RET_IP_); 1647 void *ret = slab_alloc(s, gfpflags, node, _RET_IP_);
1648
1649 kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_CACHE, _RET_IP_, ret,
1650 s->objsize, s->size, gfpflags, node);
1651
1652 return ret;
1634} 1653}
1635EXPORT_SYMBOL(kmem_cache_alloc_node); 1654EXPORT_SYMBOL(kmem_cache_alloc_node);
1636#endif 1655#endif
1637 1656
1657#ifdef CONFIG_KMEMTRACE
1658void *kmem_cache_alloc_node_notrace(struct kmem_cache *s,
1659 gfp_t gfpflags,
1660 int node)
1661{
1662 return slab_alloc(s, gfpflags, node, _RET_IP_);
1663}
1664EXPORT_SYMBOL(kmem_cache_alloc_node_notrace);
1665#endif
1666
1638/* 1667/*
1639 * Slow patch handling. This may still be called frequently since objects 1668 * Slow patch handling. This may still be called frequently since objects
1640 * have a longer lifetime than the cpu slabs in most processing loads. 1669 * have a longer lifetime than the cpu slabs in most processing loads.
@@ -1742,6 +1771,8 @@ void kmem_cache_free(struct kmem_cache *s, void *x)
1742 page = virt_to_head_page(x); 1771 page = virt_to_head_page(x);
1743 1772
1744 slab_free(s, page, x, _RET_IP_); 1773 slab_free(s, page, x, _RET_IP_);
1774
1775 kmemtrace_mark_free(KMEMTRACE_TYPE_CACHE, _RET_IP_, x);
1745} 1776}
1746EXPORT_SYMBOL(kmem_cache_free); 1777EXPORT_SYMBOL(kmem_cache_free);
1747 1778
@@ -2657,6 +2688,7 @@ static struct kmem_cache *get_slab(size_t size, gfp_t flags)
2657void *__kmalloc(size_t size, gfp_t flags) 2688void *__kmalloc(size_t size, gfp_t flags)
2658{ 2689{
2659 struct kmem_cache *s; 2690 struct kmem_cache *s;
2691 void *ret;
2660 2692
2661 if (unlikely(size > PAGE_SIZE)) 2693 if (unlikely(size > PAGE_SIZE))
2662 return kmalloc_large(size, flags); 2694 return kmalloc_large(size, flags);
@@ -2666,7 +2698,12 @@ void *__kmalloc(size_t size, gfp_t flags)
2666 if (unlikely(ZERO_OR_NULL_PTR(s))) 2698 if (unlikely(ZERO_OR_NULL_PTR(s)))
2667 return s; 2699 return s;
2668 2700
2669 return slab_alloc(s, flags, -1, _RET_IP_); 2701 ret = slab_alloc(s, flags, -1, _RET_IP_);
2702
2703 kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC, _RET_IP_, ret,
2704 size, s->size, flags);
2705
2706 return ret;
2670} 2707}
2671EXPORT_SYMBOL(__kmalloc); 2708EXPORT_SYMBOL(__kmalloc);
2672 2709
@@ -2685,16 +2722,30 @@ static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
2685void *__kmalloc_node(size_t size, gfp_t flags, int node) 2722void *__kmalloc_node(size_t size, gfp_t flags, int node)
2686{ 2723{
2687 struct kmem_cache *s; 2724 struct kmem_cache *s;
2725 void *ret;
2688 2726
2689 if (unlikely(size > PAGE_SIZE)) 2727 if (unlikely(size > PAGE_SIZE)) {
2690 return kmalloc_large_node(size, flags, node); 2728 ret = kmalloc_large_node(size, flags, node);
2729
2730 kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC,
2731 _RET_IP_, ret,
2732 size, PAGE_SIZE << get_order(size),
2733 flags, node);
2734
2735 return ret;
2736 }
2691 2737
2692 s = get_slab(size, flags); 2738 s = get_slab(size, flags);
2693 2739
2694 if (unlikely(ZERO_OR_NULL_PTR(s))) 2740 if (unlikely(ZERO_OR_NULL_PTR(s)))
2695 return s; 2741 return s;
2696 2742
2697 return slab_alloc(s, flags, node, _RET_IP_); 2743 ret = slab_alloc(s, flags, node, _RET_IP_);
2744
2745 kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC, _RET_IP_, ret,
2746 size, s->size, flags, node);
2747
2748 return ret;
2698} 2749}
2699EXPORT_SYMBOL(__kmalloc_node); 2750EXPORT_SYMBOL(__kmalloc_node);
2700#endif 2751#endif
@@ -2752,6 +2803,8 @@ void kfree(const void *x)
2752 return; 2803 return;
2753 } 2804 }
2754 slab_free(page->slab, page, object, _RET_IP_); 2805 slab_free(page->slab, page, object, _RET_IP_);
2806
2807 kmemtrace_mark_free(KMEMTRACE_TYPE_KMALLOC, _RET_IP_, x);
2755} 2808}
2756EXPORT_SYMBOL(kfree); 2809EXPORT_SYMBOL(kfree);
2757 2810
@@ -3221,6 +3274,7 @@ static struct notifier_block __cpuinitdata slab_notifier = {
3221void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller) 3274void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
3222{ 3275{
3223 struct kmem_cache *s; 3276 struct kmem_cache *s;
3277 void *ret;
3224 3278
3225 if (unlikely(size > PAGE_SIZE)) 3279 if (unlikely(size > PAGE_SIZE))
3226 return kmalloc_large(size, gfpflags); 3280 return kmalloc_large(size, gfpflags);
@@ -3230,13 +3284,20 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
3230 if (unlikely(ZERO_OR_NULL_PTR(s))) 3284 if (unlikely(ZERO_OR_NULL_PTR(s)))
3231 return s; 3285 return s;
3232 3286
3233 return slab_alloc(s, gfpflags, -1, caller); 3287 ret = slab_alloc(s, gfpflags, -1, caller);
3288
3289 /* Honor the call site pointer we recieved. */
3290 kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC, caller, ret, size,
3291 s->size, gfpflags);
3292
3293 return ret;
3234} 3294}
3235 3295
3236void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags, 3296void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
3237 int node, unsigned long caller) 3297 int node, unsigned long caller)
3238{ 3298{
3239 struct kmem_cache *s; 3299 struct kmem_cache *s;
3300 void *ret;
3240 3301
3241 if (unlikely(size > PAGE_SIZE)) 3302 if (unlikely(size > PAGE_SIZE))
3242 return kmalloc_large_node(size, gfpflags, node); 3303 return kmalloc_large_node(size, gfpflags, node);
@@ -3246,7 +3307,13 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
3246 if (unlikely(ZERO_OR_NULL_PTR(s))) 3307 if (unlikely(ZERO_OR_NULL_PTR(s)))
3247 return s; 3308 return s;
3248 3309
3249 return slab_alloc(s, gfpflags, node, caller); 3310 ret = slab_alloc(s, gfpflags, node, caller);
3311
3312 /* Honor the call site pointer we recieved. */
3313 kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC, caller, ret,
3314 size, s->size, gfpflags, node);
3315
3316 return ret;
3250} 3317}
3251 3318
3252#ifdef CONFIG_SLUB_DEBUG 3319#ifdef CONFIG_SLUB_DEBUG
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index c7de8b39fcf1..39a9642927d3 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -112,13 +112,13 @@ endif
112# --------------------------------------------------------------------------- 112# ---------------------------------------------------------------------------
113 113
114# Default is built-in, unless we know otherwise 114# Default is built-in, unless we know otherwise
115modkern_cflags := $(CFLAGS_KERNEL) 115modkern_cflags = $(if $(part-of-module), $(CFLAGS_MODULE), $(CFLAGS_KERNEL))
116quiet_modtag := $(empty) $(empty) 116quiet_modtag := $(empty) $(empty)
117 117
118$(real-objs-m) : modkern_cflags := $(CFLAGS_MODULE) 118$(real-objs-m) : part-of-module := y
119$(real-objs-m:.o=.i) : modkern_cflags := $(CFLAGS_MODULE) 119$(real-objs-m:.o=.i) : part-of-module := y
120$(real-objs-m:.o=.s) : modkern_cflags := $(CFLAGS_MODULE) 120$(real-objs-m:.o=.s) : part-of-module := y
121$(real-objs-m:.o=.lst): modkern_cflags := $(CFLAGS_MODULE) 121$(real-objs-m:.o=.lst): part-of-module := y
122 122
123$(real-objs-m) : quiet_modtag := [M] 123$(real-objs-m) : quiet_modtag := [M]
124$(real-objs-m:.o=.i) : quiet_modtag := [M] 124$(real-objs-m:.o=.i) : quiet_modtag := [M]
@@ -205,7 +205,8 @@ endif
205ifdef CONFIG_FTRACE_MCOUNT_RECORD 205ifdef CONFIG_FTRACE_MCOUNT_RECORD
206cmd_record_mcount = perl $(srctree)/scripts/recordmcount.pl "$(ARCH)" \ 206cmd_record_mcount = perl $(srctree)/scripts/recordmcount.pl "$(ARCH)" \
207 "$(if $(CONFIG_64BIT),64,32)" \ 207 "$(if $(CONFIG_64BIT),64,32)" \
208 "$(OBJDUMP)" "$(OBJCOPY)" "$(CC)" "$(LD)" "$(NM)" "$(RM)" "$(MV)" "$(@)"; 208 "$(OBJDUMP)" "$(OBJCOPY)" "$(CC)" "$(LD)" "$(NM)" "$(RM)" "$(MV)" \
209 "$(if $(part-of-module),1,0)" "$(@)";
209endif 210endif
210 211
211define rule_cc_o_c 212define rule_cc_o_c
diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
index fe831412bea9..409596eca124 100755
--- a/scripts/recordmcount.pl
+++ b/scripts/recordmcount.pl
@@ -100,14 +100,19 @@ $P =~ s@.*/@@g;
100 100
101my $V = '0.1'; 101my $V = '0.1';
102 102
103if ($#ARGV < 6) { 103if ($#ARGV < 7) {
104 print "usage: $P arch objdump objcopy cc ld nm rm mv inputfile\n"; 104 print "usage: $P arch bits objdump objcopy cc ld nm rm mv is_module inputfile\n";
105 print "version: $V\n"; 105 print "version: $V\n";
106 exit(1); 106 exit(1);
107} 107}
108 108
109my ($arch, $bits, $objdump, $objcopy, $cc, 109my ($arch, $bits, $objdump, $objcopy, $cc,
110 $ld, $nm, $rm, $mv, $inputfile) = @ARGV; 110 $ld, $nm, $rm, $mv, $is_module, $inputfile) = @ARGV;
111
112# This file refers to mcount and shouldn't be ftraced, so lets' ignore it
113if ($inputfile eq "kernel/trace/ftrace.o") {
114 exit(0);
115}
111 116
112# Acceptable sections to record. 117# Acceptable sections to record.
113my %text_sections = ( 118my %text_sections = (
@@ -201,6 +206,13 @@ if ($arch eq "x86_64") {
201 $alignment = 2; 206 $alignment = 2;
202 $section_type = '%progbits'; 207 $section_type = '%progbits';
203 208
209} elsif ($arch eq "ia64") {
210 $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\s_mcount\$";
211 $type = "data8";
212
213 if ($is_module eq "0") {
214 $cc .= " -mconstant-gp";
215 }
204} else { 216} else {
205 die "Arch $arch is not supported with CONFIG_FTRACE_MCOUNT_RECORD"; 217 die "Arch $arch is not supported with CONFIG_FTRACE_MCOUNT_RECORD";
206} 218}
@@ -263,7 +275,6 @@ if (!$found_version) {
263 "\tDisabling local function references.\n"; 275 "\tDisabling local function references.\n";
264} 276}
265 277
266
267# 278#
268# Step 1: find all the local (static functions) and weak symbols. 279# Step 1: find all the local (static functions) and weak symbols.
269# 't' is local, 'w/W' is weak (we never use a weak function) 280# 't' is local, 'w/W' is weak (we never use a weak function)
@@ -331,13 +342,16 @@ sub update_funcs
331# 342#
332# Step 2: find the sections and mcount call sites 343# Step 2: find the sections and mcount call sites
333# 344#
334open(IN, "$objdump -dr $inputfile|") || die "error running $objdump"; 345open(IN, "$objdump -hdr $inputfile|") || die "error running $objdump";
335 346
336my $text; 347my $text;
337 348
349my $read_headers = 1;
350
338while (<IN>) { 351while (<IN>) {
339 # is it a section? 352 # is it a section?
340 if (/$section_regex/) { 353 if (/$section_regex/) {
354 $read_headers = 0;
341 355
342 # Only record text sections that we know are safe 356 # Only record text sections that we know are safe
343 if (defined($text_sections{$1})) { 357 if (defined($text_sections{$1})) {
@@ -371,6 +385,19 @@ while (<IN>) {
371 $ref_func = $text; 385 $ref_func = $text;
372 } 386 }
373 } 387 }
388 } elsif ($read_headers && /$mcount_section/) {
389 #
390 # Somehow the make process can execute this script on an
391 # object twice. If it does, we would duplicate the mcount
392 # section and it will cause the function tracer self test
393 # to fail. Check if the mcount section exists, and if it does,
394 # warn and exit.
395 #
396 print STDERR "ERROR: $mcount_section already in $inputfile\n" .
397 "\tThis may be an indication that your build is corrupted.\n" .
398 "\tDelete $inputfile and try again. If the same object file\n" .
399 "\tstill causes an issue, then disable CONFIG_DYNAMIC_FTRACE.\n";
400 exit(-1);
374 } 401 }
375 402
376 # is this a call site to mcount? If so, record it to print later 403 # is this a call site to mcount? If so, record it to print later