aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/ftrace.txt149
-rw-r--r--Documentation/kernel-parameters.txt8
-rw-r--r--Documentation/markers.txt29
-rw-r--r--Documentation/tracepoints.txt94
-rw-r--r--arch/powerpc/include/asm/ftrace.h14
-rw-r--r--arch/powerpc/include/asm/module.h16
-rw-r--r--arch/powerpc/kernel/Makefile1
-rw-r--r--arch/powerpc/kernel/entry_32.S40
-rw-r--r--arch/powerpc/kernel/entry_64.S12
-rw-r--r--arch/powerpc/kernel/ftrace.c461
-rw-r--r--arch/powerpc/kernel/idle.c5
-rw-r--r--arch/powerpc/kernel/module_32.c10
-rw-r--r--arch/powerpc/kernel/module_64.c13
-rw-r--r--arch/powerpc/lib/Makefile3
-rw-r--r--arch/x86/Kconfig3
-rw-r--r--arch/x86/Kconfig.cpu1
-rw-r--r--arch/x86/Kconfig.debug4
-rw-r--r--arch/x86/include/asm/ds.h134
-rw-r--r--arch/x86/include/asm/ftrace.h34
-rw-r--r--arch/x86/include/asm/thread_info.h2
-rw-r--r--arch/x86/kernel/Makefile7
-rw-r--r--arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c4
-rw-r--r--arch/x86/kernel/cpu/intel.c3
-rw-r--r--arch/x86/kernel/ds.c692
-rw-r--r--arch/x86/kernel/entry_32.S47
-rw-r--r--arch/x86/kernel/entry_64.S79
-rw-r--r--arch/x86/kernel/ftrace.c375
-rw-r--r--arch/x86/kernel/process.c16
-rw-r--r--arch/x86/kernel/ptrace.c98
-rw-r--r--arch/x86/kernel/stacktrace.c64
-rw-r--r--arch/x86/kernel/vsyscall_64.c3
-rw-r--r--arch/x86/mm/Makefile3
-rw-r--r--arch/x86/mm/fault.c2
-rw-r--r--arch/x86/vdso/vclock_gettime.c3
-rw-r--r--block/Kconfig1
-rw-r--r--block/blk-core.c46
-rw-r--r--block/blktrace.c332
-rw-r--r--block/elevator.c12
-rw-r--r--drivers/char/sysrq.c18
-rw-r--r--drivers/md/dm.c8
-rw-r--r--fs/bio.c5
-rw-r--r--fs/seq_file.c14
-rw-r--r--include/asm-generic/vmlinux.lds.h21
-rw-r--r--include/linux/blktrace_api.h172
-rw-r--r--include/linux/compiler.h84
-rw-r--r--include/linux/ftrace.h189
-rw-r--r--include/linux/ftrace_irq.h13
-rw-r--r--include/linux/hardirq.h15
-rw-r--r--include/linux/marker.h75
-rw-r--r--include/linux/rcupdate.h2
-rw-r--r--include/linux/ring_buffer.h1
-rw-r--r--include/linux/sched.h24
-rw-r--r--include/linux/seq_file.h1
-rw-r--r--include/linux/stacktrace.h8
-rw-r--r--include/linux/tracepoint.h57
-rw-r--r--include/linux/tty.h2
-rw-r--r--include/trace/block.h76
-rw-r--r--include/trace/boot.h56
-rw-r--r--include/trace/sched.h24
-rw-r--r--init/Kconfig1
-rw-r--r--init/main.c35
-rw-r--r--kernel/Makefile4
-rw-r--r--kernel/exit.c5
-rw-r--r--kernel/fork.c5
-rw-r--r--kernel/kthread.c3
-rw-r--r--kernel/marker.c192
-rw-r--r--kernel/module.c11
-rw-r--r--kernel/power/disk.c13
-rw-r--r--kernel/power/main.c5
-rw-r--r--kernel/profile.c2
-rw-r--r--kernel/sched.c7
-rw-r--r--kernel/signal.c2
-rw-r--r--kernel/sysctl.c10
-rw-r--r--kernel/trace/Kconfig101
-rw-r--r--kernel/trace/Makefile9
-rw-r--r--kernel/trace/ftrace.c584
-rw-r--r--kernel/trace/ring_buffer.c377
-rw-r--r--kernel/trace/trace.c851
-rw-r--r--kernel/trace/trace.h219
-rw-r--r--kernel/trace/trace_boot.c166
-rw-r--r--kernel/trace/trace_branch.c342
-rw-r--r--kernel/trace/trace_bts.c276
-rw-r--r--kernel/trace/trace_functions.c18
-rw-r--r--kernel/trace/trace_functions_graph.c400
-rw-r--r--kernel/trace/trace_irqsoff.c61
-rw-r--r--kernel/trace/trace_mmiotrace.c25
-rw-r--r--kernel/trace/trace_nop.c65
-rw-r--r--kernel/trace/trace_power.c179
-rw-r--r--kernel/trace/trace_sched_switch.c106
-rw-r--r--kernel/trace/trace_sched_wakeup.c70
-rw-r--r--kernel/trace/trace_selftest.c173
-rw-r--r--kernel/trace/trace_stack.c8
-rw-r--r--kernel/trace/trace_sysprof.c19
-rw-r--r--kernel/tracepoint.c295
-rw-r--r--mm/bounce.c5
-rw-r--r--samples/tracepoints/tp-samples-trace.h4
-rw-r--r--samples/tracepoints/tracepoint-probe-sample.c1
-rw-r--r--samples/tracepoints/tracepoint-probe-sample2.c1
-rw-r--r--samples/tracepoints/tracepoint-sample.c3
-rw-r--r--scripts/Makefile.build12
-rw-r--r--scripts/bootgraph.pl16
-rwxr-xr-xscripts/recordmcount.pl50
-rw-r--r--scripts/trace/power.pl108
-rw-r--r--scripts/tracing/draw_functrace.py130
104 files changed, 6835 insertions, 1819 deletions
diff --git a/Documentation/ftrace.txt b/Documentation/ftrace.txt
index 9cc4d685dde5..803b1318b13d 100644
--- a/Documentation/ftrace.txt
+++ b/Documentation/ftrace.txt
@@ -82,7 +82,7 @@ of ftrace. Here is a list of some of the key files:
82 tracer is not adding more data, they will display 82 tracer is not adding more data, they will display
83 the same information every time they are read. 83 the same information every time they are read.
84 84
85 iter_ctrl: This file lets the user control the amount of data 85 trace_options: This file lets the user control the amount of data
86 that is displayed in one of the above output 86 that is displayed in one of the above output
87 files. 87 files.
88 88
@@ -94,10 +94,10 @@ of ftrace. Here is a list of some of the key files:
94 only be recorded if the latency is greater than 94 only be recorded if the latency is greater than
95 the value in this file. (in microseconds) 95 the value in this file. (in microseconds)
96 96
97 trace_entries: This sets or displays the number of bytes each CPU 97 buffer_size_kb: This sets or displays the number of kilobytes each CPU
98 buffer can hold. The tracer buffers are the same size 98 buffer can hold. The tracer buffers are the same size
99 for each CPU. The displayed number is the size of the 99 for each CPU. The displayed number is the size of the
100 CPU buffer and not total size of all buffers. The 100 CPU buffer and not total size of all buffers. The
101 trace buffers are allocated in pages (blocks of memory 101 trace buffers are allocated in pages (blocks of memory
102 that the kernel uses for allocation, usually 4 KB in size). 102 that the kernel uses for allocation, usually 4 KB in size).
103 If the last page allocated has room for more bytes 103 If the last page allocated has room for more bytes
@@ -127,6 +127,8 @@ of ftrace. Here is a list of some of the key files:
127 be traced. If a function exists in both set_ftrace_filter 127 be traced. If a function exists in both set_ftrace_filter
128 and set_ftrace_notrace, the function will _not_ be traced. 128 and set_ftrace_notrace, the function will _not_ be traced.
129 129
130 set_ftrace_pid: Have the function tracer only trace a single thread.
131
130 available_filter_functions: This lists the functions that ftrace 132 available_filter_functions: This lists the functions that ftrace
131 has processed and can trace. These are the function 133 has processed and can trace. These are the function
132 names that you can pass to "set_ftrace_filter" or 134 names that you can pass to "set_ftrace_filter" or
@@ -316,23 +318,23 @@ The above is mostly meaningful for kernel developers.
316 The rest is the same as the 'trace' file. 318 The rest is the same as the 'trace' file.
317 319
318 320
319iter_ctrl 321trace_options
320--------- 322-------------
321 323
322The iter_ctrl file is used to control what gets printed in the trace 324The trace_options file is used to control what gets printed in the trace
323output. To see what is available, simply cat the file: 325output. To see what is available, simply cat the file:
324 326
325 cat /debug/tracing/iter_ctrl 327 cat /debug/tracing/trace_options
326 print-parent nosym-offset nosym-addr noverbose noraw nohex nobin \ 328 print-parent nosym-offset nosym-addr noverbose noraw nohex nobin \
327 noblock nostacktrace nosched-tree 329 noblock nostacktrace nosched-tree nouserstacktrace nosym-userobj
328 330
329To disable one of the options, echo in the option prepended with "no". 331To disable one of the options, echo in the option prepended with "no".
330 332
331 echo noprint-parent > /debug/tracing/iter_ctrl 333 echo noprint-parent > /debug/tracing/trace_options
332 334
333To enable an option, leave off the "no". 335To enable an option, leave off the "no".
334 336
335 echo sym-offset > /debug/tracing/iter_ctrl 337 echo sym-offset > /debug/tracing/trace_options
336 338
337Here are the available options: 339Here are the available options:
338 340
@@ -378,6 +380,20 @@ Here are the available options:
378 When a trace is recorded, so is the stack of functions. 380 When a trace is recorded, so is the stack of functions.
379 This allows for back traces of trace sites. 381 This allows for back traces of trace sites.
380 382
383 userstacktrace - This option changes the trace.
384 It records a stacktrace of the current userspace thread.
385
386 sym-userobj - when user stacktrace are enabled, look up which object the
387 address belongs to, and print a relative address
388 This is especially useful when ASLR is on, otherwise you don't
389 get a chance to resolve the address to object/file/line after the app is no
390 longer running
391
392 The lookup is performed when you read trace,trace_pipe,latency_trace. Example:
393
394 a.out-1623 [000] 40874.465068: /root/a.out[+0x480] <-/root/a.out[+0
395x494] <- /root/a.out[+0x4a8] <- /lib/libc-2.7.so[+0x1e1a6]
396
381 sched-tree - TBD (any users??) 397 sched-tree - TBD (any users??)
382 398
383 399
@@ -1059,6 +1075,83 @@ For simple one time traces, the above is sufficent. For anything else,
1059a search through /proc/mounts may be needed to find where the debugfs 1075a search through /proc/mounts may be needed to find where the debugfs
1060file-system is mounted. 1076file-system is mounted.
1061 1077
1078
1079Single thread tracing
1080---------------------
1081
1082By writing into /debug/tracing/set_ftrace_pid you can trace a
1083single thread. For example:
1084
1085# cat /debug/tracing/set_ftrace_pid
1086no pid
1087# echo 3111 > /debug/tracing/set_ftrace_pid
1088# cat /debug/tracing/set_ftrace_pid
10893111
1090# echo function > /debug/tracing/current_tracer
1091# cat /debug/tracing/trace | head
1092 # tracer: function
1093 #
1094 # TASK-PID CPU# TIMESTAMP FUNCTION
1095 # | | | | |
1096 yum-updatesd-3111 [003] 1637.254676: finish_task_switch <-thread_return
1097 yum-updatesd-3111 [003] 1637.254681: hrtimer_cancel <-schedule_hrtimeout_range
1098 yum-updatesd-3111 [003] 1637.254682: hrtimer_try_to_cancel <-hrtimer_cancel
1099 yum-updatesd-3111 [003] 1637.254683: lock_hrtimer_base <-hrtimer_try_to_cancel
1100 yum-updatesd-3111 [003] 1637.254685: fget_light <-do_sys_poll
1101 yum-updatesd-3111 [003] 1637.254686: pipe_poll <-do_sys_poll
1102# echo -1 > /debug/tracing/set_ftrace_pid
1103# cat /debug/tracing/trace |head
1104 # tracer: function
1105 #
1106 # TASK-PID CPU# TIMESTAMP FUNCTION
1107 # | | | | |
1108 ##### CPU 3 buffer started ####
1109 yum-updatesd-3111 [003] 1701.957688: free_poll_entry <-poll_freewait
1110 yum-updatesd-3111 [003] 1701.957689: remove_wait_queue <-free_poll_entry
1111 yum-updatesd-3111 [003] 1701.957691: fput <-free_poll_entry
1112 yum-updatesd-3111 [003] 1701.957692: audit_syscall_exit <-sysret_audit
1113 yum-updatesd-3111 [003] 1701.957693: path_put <-audit_syscall_exit
1114
1115If you want to trace a function when executing, you could use
1116something like this simple program:
1117
1118#include <stdio.h>
1119#include <stdlib.h>
1120#include <sys/types.h>
1121#include <sys/stat.h>
1122#include <fcntl.h>
1123#include <unistd.h>
1124
1125int main (int argc, char **argv)
1126{
1127 if (argc < 1)
1128 exit(-1);
1129
1130 if (fork() > 0) {
1131 int fd, ffd;
1132 char line[64];
1133 int s;
1134
1135 ffd = open("/debug/tracing/current_tracer", O_WRONLY);
1136 if (ffd < 0)
1137 exit(-1);
1138 write(ffd, "nop", 3);
1139
1140 fd = open("/debug/tracing/set_ftrace_pid", O_WRONLY);
1141 s = sprintf(line, "%d\n", getpid());
1142 write(fd, line, s);
1143
1144 write(ffd, "function", 8);
1145
1146 close(fd);
1147 close(ffd);
1148
1149 execvp(argv[1], argv+1);
1150 }
1151
1152 return 0;
1153}
1154
1062dynamic ftrace 1155dynamic ftrace
1063-------------- 1156--------------
1064 1157
@@ -1158,7 +1251,11 @@ These are the only wild cards which are supported.
1158 1251
1159 <match>*<match> will not work. 1252 <match>*<match> will not work.
1160 1253
1161 # echo hrtimer_* > /debug/tracing/set_ftrace_filter 1254Note: It is better to use quotes to enclose the wild cards, otherwise
1255 the shell may expand the parameters into names of files in the local
1256 directory.
1257
1258 # echo 'hrtimer_*' > /debug/tracing/set_ftrace_filter
1162 1259
1163Produces: 1260Produces:
1164 1261
@@ -1213,7 +1310,7 @@ Again, now we want to append.
1213 # echo sys_nanosleep > /debug/tracing/set_ftrace_filter 1310 # echo sys_nanosleep > /debug/tracing/set_ftrace_filter
1214 # cat /debug/tracing/set_ftrace_filter 1311 # cat /debug/tracing/set_ftrace_filter
1215sys_nanosleep 1312sys_nanosleep
1216 # echo hrtimer_* >> /debug/tracing/set_ftrace_filter 1313 # echo 'hrtimer_*' >> /debug/tracing/set_ftrace_filter
1217 # cat /debug/tracing/set_ftrace_filter 1314 # cat /debug/tracing/set_ftrace_filter
1218hrtimer_run_queues 1315hrtimer_run_queues
1219hrtimer_run_pending 1316hrtimer_run_pending
@@ -1299,41 +1396,29 @@ trace entries
1299------------- 1396-------------
1300 1397
1301Having too much or not enough data can be troublesome in diagnosing 1398Having too much or not enough data can be troublesome in diagnosing
1302an issue in the kernel. The file trace_entries is used to modify 1399an issue in the kernel. The file buffer_size_kb is used to modify
1303the size of the internal trace buffers. The number listed 1400the size of the internal trace buffers. The number listed
1304is the number of entries that can be recorded per CPU. To know 1401is the number of entries that can be recorded per CPU. To know
1305the full size, multiply the number of possible CPUS with the 1402the full size, multiply the number of possible CPUS with the
1306number of entries. 1403number of entries.
1307 1404
1308 # cat /debug/tracing/trace_entries 1405 # cat /debug/tracing/buffer_size_kb
130965620 14061408 (units kilobytes)
1310 1407
1311Note, to modify this, you must have tracing completely disabled. To do that, 1408Note, to modify this, you must have tracing completely disabled. To do that,
1312echo "nop" into the current_tracer. If the current_tracer is not set 1409echo "nop" into the current_tracer. If the current_tracer is not set
1313to "nop", an EINVAL error will be returned. 1410to "nop", an EINVAL error will be returned.
1314 1411
1315 # echo nop > /debug/tracing/current_tracer 1412 # echo nop > /debug/tracing/current_tracer
1316 # echo 100000 > /debug/tracing/trace_entries 1413 # echo 10000 > /debug/tracing/buffer_size_kb
1317 # cat /debug/tracing/trace_entries 1414 # cat /debug/tracing/buffer_size_kb
1318100045 141510000 (units kilobytes)
1319
1320
1321Notice that we echoed in 100,000 but the size is 100,045. The entries
1322are held in individual pages. It allocates the number of pages it takes
1323to fulfill the request. If more entries may fit on the last page
1324then they will be added.
1325
1326 # echo 1 > /debug/tracing/trace_entries
1327 # cat /debug/tracing/trace_entries
132885
1329
1330This shows us that 85 entries can fit in a single page.
1331 1416
1332The number of pages which will be allocated is limited to a percentage 1417The number of pages which will be allocated is limited to a percentage
1333of available memory. Allocating too much will produce an error. 1418of available memory. Allocating too much will produce an error.
1334 1419
1335 # echo 1000000000000 > /debug/tracing/trace_entries 1420 # echo 1000000000000 > /debug/tracing/buffer_size_kb
1336-bash: echo: write error: Cannot allocate memory 1421-bash: echo: write error: Cannot allocate memory
1337 # cat /debug/tracing/trace_entries 1422 # cat /debug/tracing/buffer_size_kb
133885 142385
1339 1424
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index e0f346d201ed..2919a2e91938 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -750,6 +750,14 @@ and is between 256 and 4096 characters. It is defined in the file
750 parameter will force ia64_sal_cache_flush to call 750 parameter will force ia64_sal_cache_flush to call
751 ia64_pal_cache_flush instead of SAL_CACHE_FLUSH. 751 ia64_pal_cache_flush instead of SAL_CACHE_FLUSH.
752 752
753 ftrace=[tracer]
754 [ftrace] will set and start the specified tracer
755 as early as possible in order to facilitate early
756 boot debugging.
757
758 ftrace_dump_on_oops
759 [ftrace] will dump the trace buffers on oops.
760
753 gamecon.map[2|3]= 761 gamecon.map[2|3]=
754 [HW,JOY] Multisystem joystick and NES/SNES/PSX pad 762 [HW,JOY] Multisystem joystick and NES/SNES/PSX pad
755 support via parallel port (up to 5 devices per port) 763 support via parallel port (up to 5 devices per port)
diff --git a/Documentation/markers.txt b/Documentation/markers.txt
index 089f6138fcd9..d2b3d0e91b26 100644
--- a/Documentation/markers.txt
+++ b/Documentation/markers.txt
@@ -51,11 +51,16 @@ to call) for the specific marker through marker_probe_register() and can be
51activated by calling marker_arm(). Marker deactivation can be done by calling 51activated by calling marker_arm(). Marker deactivation can be done by calling
52marker_disarm() as many times as marker_arm() has been called. Removing a probe 52marker_disarm() as many times as marker_arm() has been called. Removing a probe
53is done through marker_probe_unregister(); it will disarm the probe. 53is done through marker_probe_unregister(); it will disarm the probe.
54marker_synchronize_unregister() must be called before the end of the module exit 54
55function to make sure there is no caller left using the probe. This, and the 55marker_synchronize_unregister() must be called between probe unregistration and
56fact that preemption is disabled around the probe call, make sure that probe 56the first occurrence of
57removal and module unload are safe. See the "Probe example" section below for a 57- the end of module exit function,
58sample probe module. 58 to make sure there is no caller left using the probe;
59- the free of any resource used by the probes,
60 to make sure the probes wont be accessing invalid data.
61This, and the fact that preemption is disabled around the probe call, make sure
62that probe removal and module unload are safe. See the "Probe example" section
63below for a sample probe module.
59 64
60The marker mechanism supports inserting multiple instances of the same marker. 65The marker mechanism supports inserting multiple instances of the same marker.
61Markers can be put in inline functions, inlined static functions, and 66Markers can be put in inline functions, inlined static functions, and
@@ -70,6 +75,20 @@ a printk warning which identifies the inconsistency:
70 75
71"Format mismatch for probe probe_name (format), marker (format)" 76"Format mismatch for probe probe_name (format), marker (format)"
72 77
78Another way to use markers is to simply define the marker without generating any
79function call to actually call into the marker. This is useful in combination
80with tracepoint probes in a scheme like this :
81
82void probe_tracepoint_name(unsigned int arg1, struct task_struct *tsk);
83
84DEFINE_MARKER_TP(marker_eventname, tracepoint_name, probe_tracepoint_name,
85 "arg1 %u pid %d");
86
87notrace void probe_tracepoint_name(unsigned int arg1, struct task_struct *tsk)
88{
89 struct marker *marker = &GET_MARKER(kernel_irq_entry);
90 /* write data to trace buffers ... */
91}
73 92
74* Probe / marker example 93* Probe / marker example
75 94
diff --git a/Documentation/tracepoints.txt b/Documentation/tracepoints.txt
index 5d354e167494..6f0a044f5b5e 100644
--- a/Documentation/tracepoints.txt
+++ b/Documentation/tracepoints.txt
@@ -3,28 +3,30 @@
3 Mathieu Desnoyers 3 Mathieu Desnoyers
4 4
5 5
6This document introduces Linux Kernel Tracepoints and their use. It provides 6This document introduces Linux Kernel Tracepoints and their use. It
7examples of how to insert tracepoints in the kernel and connect probe functions 7provides examples of how to insert tracepoints in the kernel and
8to them and provides some examples of probe functions. 8connect probe functions to them and provides some examples of probe
9functions.
9 10
10 11
11* Purpose of tracepoints 12* Purpose of tracepoints
12 13
13A tracepoint placed in code provides a hook to call a function (probe) that you 14A tracepoint placed in code provides a hook to call a function (probe)
14can provide at runtime. A tracepoint can be "on" (a probe is connected to it) or 15that you can provide at runtime. A tracepoint can be "on" (a probe is
15"off" (no probe is attached). When a tracepoint is "off" it has no effect, 16connected to it) or "off" (no probe is attached). When a tracepoint is
16except for adding a tiny time penalty (checking a condition for a branch) and 17"off" it has no effect, except for adding a tiny time penalty
17space penalty (adding a few bytes for the function call at the end of the 18(checking a condition for a branch) and space penalty (adding a few
18instrumented function and adds a data structure in a separate section). When a 19bytes for the function call at the end of the instrumented function
19tracepoint is "on", the function you provide is called each time the tracepoint 20and adds a data structure in a separate section). When a tracepoint
20is executed, in the execution context of the caller. When the function provided 21is "on", the function you provide is called each time the tracepoint
21ends its execution, it returns to the caller (continuing from the tracepoint 22is executed, in the execution context of the caller. When the function
22site). 23provided ends its execution, it returns to the caller (continuing from
24the tracepoint site).
23 25
24You can put tracepoints at important locations in the code. They are 26You can put tracepoints at important locations in the code. They are
25lightweight hooks that can pass an arbitrary number of parameters, 27lightweight hooks that can pass an arbitrary number of parameters,
26which prototypes are described in a tracepoint declaration placed in a header 28which prototypes are described in a tracepoint declaration placed in a
27file. 29header file.
28 30
29They can be used for tracing and performance accounting. 31They can be used for tracing and performance accounting.
30 32
@@ -42,14 +44,16 @@ In include/trace/subsys.h :
42 44
43#include <linux/tracepoint.h> 45#include <linux/tracepoint.h>
44 46
45DEFINE_TRACE(subsys_eventname, 47DECLARE_TRACE(subsys_eventname,
46 TPPTOTO(int firstarg, struct task_struct *p), 48 TPPROTO(int firstarg, struct task_struct *p),
47 TPARGS(firstarg, p)); 49 TPARGS(firstarg, p));
48 50
49In subsys/file.c (where the tracing statement must be added) : 51In subsys/file.c (where the tracing statement must be added) :
50 52
51#include <trace/subsys.h> 53#include <trace/subsys.h>
52 54
55DEFINE_TRACE(subsys_eventname);
56
53void somefct(void) 57void somefct(void)
54{ 58{
55 ... 59 ...
@@ -61,31 +65,41 @@ Where :
61- subsys_eventname is an identifier unique to your event 65- subsys_eventname is an identifier unique to your event
62 - subsys is the name of your subsystem. 66 - subsys is the name of your subsystem.
63 - eventname is the name of the event to trace. 67 - eventname is the name of the event to trace.
64- TPPTOTO(int firstarg, struct task_struct *p) is the prototype of the function
65 called by this tracepoint.
66- TPARGS(firstarg, p) are the parameters names, same as found in the prototype.
67 68
68Connecting a function (probe) to a tracepoint is done by providing a probe 69- TPPROTO(int firstarg, struct task_struct *p) is the prototype of the
69(function to call) for the specific tracepoint through 70 function called by this tracepoint.
70register_trace_subsys_eventname(). Removing a probe is done through
71unregister_trace_subsys_eventname(); it will remove the probe sure there is no
72caller left using the probe when it returns. Probe removal is preempt-safe
73because preemption is disabled around the probe call. See the "Probe example"
74section below for a sample probe module.
75
76The tracepoint mechanism supports inserting multiple instances of the same
77tracepoint, but a single definition must be made of a given tracepoint name over
78all the kernel to make sure no type conflict will occur. Name mangling of the
79tracepoints is done using the prototypes to make sure typing is correct.
80Verification of probe type correctness is done at the registration site by the
81compiler. Tracepoints can be put in inline functions, inlined static functions,
82and unrolled loops as well as regular functions.
83
84The naming scheme "subsys_event" is suggested here as a convention intended
85to limit collisions. Tracepoint names are global to the kernel: they are
86considered as being the same whether they are in the core kernel image or in
87modules.
88 71
72- TPARGS(firstarg, p) are the parameters names, same as found in the
73 prototype.
74
75Connecting a function (probe) to a tracepoint is done by providing a
76probe (function to call) for the specific tracepoint through
77register_trace_subsys_eventname(). Removing a probe is done through
78unregister_trace_subsys_eventname(); it will remove the probe.
79
80tracepoint_synchronize_unregister() must be called before the end of
81the module exit function to make sure there is no caller left using
82the probe. This, and the fact that preemption is disabled around the
83probe call, make sure that probe removal and module unload are safe.
84See the "Probe example" section below for a sample probe module.
85
86The tracepoint mechanism supports inserting multiple instances of the
87same tracepoint, but a single definition must be made of a given
88tracepoint name over all the kernel to make sure no type conflict will
89occur. Name mangling of the tracepoints is done using the prototypes
90to make sure typing is correct. Verification of probe type correctness
91is done at the registration site by the compiler. Tracepoints can be
92put in inline functions, inlined static functions, and unrolled loops
93as well as regular functions.
94
95The naming scheme "subsys_event" is suggested here as a convention
96intended to limit collisions. Tracepoint names are global to the
97kernel: they are considered as being the same whether they are in the
98core kernel image or in modules.
99
100If the tracepoint has to be used in kernel modules, an
101EXPORT_TRACEPOINT_SYMBOL_GPL() or EXPORT_TRACEPOINT_SYMBOL() can be
102used to export the defined tracepoints.
89 103
90* Probe / tracepoint example 104* Probe / tracepoint example
91 105
diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h
index b298f7a631e6..e5f2ae8362f7 100644
--- a/arch/powerpc/include/asm/ftrace.h
+++ b/arch/powerpc/include/asm/ftrace.h
@@ -7,7 +7,19 @@
7 7
8#ifndef __ASSEMBLY__ 8#ifndef __ASSEMBLY__
9extern void _mcount(void); 9extern void _mcount(void);
10#endif 10
11#ifdef CONFIG_DYNAMIC_FTRACE
12static inline unsigned long ftrace_call_adjust(unsigned long addr)
13{
14 /* reloction of mcount call site is the same as the address */
15 return addr;
16}
17
18struct dyn_arch_ftrace {
19 struct module *mod;
20};
21#endif /* CONFIG_DYNAMIC_FTRACE */
22#endif /* __ASSEMBLY__ */
11 23
12#endif 24#endif
13 25
diff --git a/arch/powerpc/include/asm/module.h b/arch/powerpc/include/asm/module.h
index e5f14b13ccf0..08454880a2c0 100644
--- a/arch/powerpc/include/asm/module.h
+++ b/arch/powerpc/include/asm/module.h
@@ -34,11 +34,19 @@ struct mod_arch_specific {
34#ifdef __powerpc64__ 34#ifdef __powerpc64__
35 unsigned int stubs_section; /* Index of stubs section in module */ 35 unsigned int stubs_section; /* Index of stubs section in module */
36 unsigned int toc_section; /* What section is the TOC? */ 36 unsigned int toc_section; /* What section is the TOC? */
37#else 37#ifdef CONFIG_DYNAMIC_FTRACE
38 unsigned long toc;
39 unsigned long tramp;
40#endif
41
42#else /* powerpc64 */
38 /* Indices of PLT sections within module. */ 43 /* Indices of PLT sections within module. */
39 unsigned int core_plt_section; 44 unsigned int core_plt_section;
40 unsigned int init_plt_section; 45 unsigned int init_plt_section;
46#ifdef CONFIG_DYNAMIC_FTRACE
47 unsigned long tramp;
41#endif 48#endif
49#endif /* powerpc64 */
42 50
43 /* List of BUG addresses, source line numbers and filenames */ 51 /* List of BUG addresses, source line numbers and filenames */
44 struct list_head bug_list; 52 struct list_head bug_list;
@@ -68,6 +76,12 @@ struct mod_arch_specific {
68# endif /* MODULE */ 76# endif /* MODULE */
69#endif 77#endif
70 78
79#ifdef CONFIG_DYNAMIC_FTRACE
80# ifdef MODULE
81 asm(".section .ftrace.tramp,\"ax\",@nobits; .align 3; .previous");
82# endif /* MODULE */
83#endif
84
71 85
72struct exception_table_entry; 86struct exception_table_entry;
73void sort_ex_table(struct exception_table_entry *start, 87void sort_ex_table(struct exception_table_entry *start,
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 92673b43858d..d17edb4a2f9d 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -17,6 +17,7 @@ ifdef CONFIG_FUNCTION_TRACER
17CFLAGS_REMOVE_cputable.o = -pg -mno-sched-epilog 17CFLAGS_REMOVE_cputable.o = -pg -mno-sched-epilog
18CFLAGS_REMOVE_prom_init.o = -pg -mno-sched-epilog 18CFLAGS_REMOVE_prom_init.o = -pg -mno-sched-epilog
19CFLAGS_REMOVE_btext.o = -pg -mno-sched-epilog 19CFLAGS_REMOVE_btext.o = -pg -mno-sched-epilog
20CFLAGS_REMOVE_prom.o = -pg -mno-sched-epilog
20 21
21ifdef CONFIG_DYNAMIC_FTRACE 22ifdef CONFIG_DYNAMIC_FTRACE
22# dynamic ftrace setup. 23# dynamic ftrace setup.
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 7ecc0d1855c3..6f7eb7e00c79 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -1162,39 +1162,17 @@ machine_check_in_rtas:
1162#ifdef CONFIG_DYNAMIC_FTRACE 1162#ifdef CONFIG_DYNAMIC_FTRACE
1163_GLOBAL(mcount) 1163_GLOBAL(mcount)
1164_GLOBAL(_mcount) 1164_GLOBAL(_mcount)
1165 stwu r1,-48(r1) 1165 /*
1166 stw r3, 12(r1) 1166 * It is required that _mcount on PPC32 must preserve the
1167 stw r4, 16(r1) 1167 * link register. But we have r0 to play with. We use r0
1168 stw r5, 20(r1) 1168 * to push the return address back to the caller of mcount
1169 stw r6, 24(r1) 1169 * into the ctr register, restore the link register and
1170 mflr r3 1170 * then jump back using the ctr register.
1171 stw r7, 28(r1) 1171 */
1172 mfcr r5 1172 mflr r0
1173 stw r8, 32(r1)
1174 stw r9, 36(r1)
1175 stw r10,40(r1)
1176 stw r3, 44(r1)
1177 stw r5, 8(r1)
1178 subi r3, r3, MCOUNT_INSN_SIZE
1179 .globl mcount_call
1180mcount_call:
1181 bl ftrace_stub
1182 nop
1183 lwz r6, 8(r1)
1184 lwz r0, 44(r1)
1185 lwz r3, 12(r1)
1186 mtctr r0 1173 mtctr r0
1187 lwz r4, 16(r1) 1174 lwz r0, 4(r1)
1188 mtcr r6
1189 lwz r5, 20(r1)
1190 lwz r6, 24(r1)
1191 lwz r0, 52(r1)
1192 lwz r7, 28(r1)
1193 lwz r8, 32(r1)
1194 mtlr r0 1175 mtlr r0
1195 lwz r9, 36(r1)
1196 lwz r10,40(r1)
1197 addi r1, r1, 48
1198 bctr 1176 bctr
1199 1177
1200_GLOBAL(ftrace_caller) 1178_GLOBAL(ftrace_caller)
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index e0bcf9354286..383ed6eb0085 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -894,18 +894,6 @@ _GLOBAL(enter_prom)
894#ifdef CONFIG_DYNAMIC_FTRACE 894#ifdef CONFIG_DYNAMIC_FTRACE
895_GLOBAL(mcount) 895_GLOBAL(mcount)
896_GLOBAL(_mcount) 896_GLOBAL(_mcount)
897 /* Taken from output of objdump from lib64/glibc */
898 mflr r3
899 stdu r1, -112(r1)
900 std r3, 128(r1)
901 subi r3, r3, MCOUNT_INSN_SIZE
902 .globl mcount_call
903mcount_call:
904 bl ftrace_stub
905 nop
906 ld r0, 128(r1)
907 mtlr r0
908 addi r1, r1, 112
909 blr 897 blr
910 898
911_GLOBAL(ftrace_caller) 899_GLOBAL(ftrace_caller)
diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c
index f4b006ed0ab1..5355244c99ff 100644
--- a/arch/powerpc/kernel/ftrace.c
+++ b/arch/powerpc/kernel/ftrace.c
@@ -9,22 +9,30 @@
9 9
10#include <linux/spinlock.h> 10#include <linux/spinlock.h>
11#include <linux/hardirq.h> 11#include <linux/hardirq.h>
12#include <linux/uaccess.h>
13#include <linux/module.h>
12#include <linux/ftrace.h> 14#include <linux/ftrace.h>
13#include <linux/percpu.h> 15#include <linux/percpu.h>
14#include <linux/init.h> 16#include <linux/init.h>
15#include <linux/list.h> 17#include <linux/list.h>
16 18
17#include <asm/cacheflush.h> 19#include <asm/cacheflush.h>
20#include <asm/code-patching.h>
18#include <asm/ftrace.h> 21#include <asm/ftrace.h>
19 22
23#if 0
24#define DEBUGP printk
25#else
26#define DEBUGP(fmt , ...) do { } while (0)
27#endif
20 28
21static unsigned int ftrace_nop = 0x60000000; 29static unsigned int ftrace_nop = PPC_NOP_INSTR;
22 30
23#ifdef CONFIG_PPC32 31#ifdef CONFIG_PPC32
24# define GET_ADDR(addr) addr 32# define GET_ADDR(addr) addr
25#else 33#else
26/* PowerPC64's functions are data that points to the functions */ 34/* PowerPC64's functions are data that points to the functions */
27# define GET_ADDR(addr) *(unsigned long *)addr 35# define GET_ADDR(addr) (*(unsigned long *)addr)
28#endif 36#endif
29 37
30 38
@@ -33,12 +41,12 @@ static unsigned int ftrace_calc_offset(long ip, long addr)
33 return (int)(addr - ip); 41 return (int)(addr - ip);
34} 42}
35 43
36unsigned char *ftrace_nop_replace(void) 44static unsigned char *ftrace_nop_replace(void)
37{ 45{
38 return (char *)&ftrace_nop; 46 return (char *)&ftrace_nop;
39} 47}
40 48
41unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) 49static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
42{ 50{
43 static unsigned int op; 51 static unsigned int op;
44 52
@@ -68,49 +76,422 @@ unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
68# define _ASM_PTR " .long " 76# define _ASM_PTR " .long "
69#endif 77#endif
70 78
71int 79static int
72ftrace_modify_code(unsigned long ip, unsigned char *old_code, 80ftrace_modify_code(unsigned long ip, unsigned char *old_code,
73 unsigned char *new_code) 81 unsigned char *new_code)
74{ 82{
75 unsigned replaced; 83 unsigned char replaced[MCOUNT_INSN_SIZE];
76 unsigned old = *(unsigned *)old_code;
77 unsigned new = *(unsigned *)new_code;
78 int faulted = 0;
79 84
80 /* 85 /*
81 * Note: Due to modules and __init, code can 86 * Note: Due to modules and __init, code can
82 * disappear and change, we need to protect against faulting 87 * disappear and change, we need to protect against faulting
83 * as well as code changing. 88 * as well as code changing. We do this by using the
89 * probe_kernel_* functions.
84 * 90 *
85 * No real locking needed, this code is run through 91 * No real locking needed, this code is run through
86 * kstop_machine. 92 * kstop_machine, or before SMP starts.
87 */ 93 */
88 asm volatile ( 94
89 "1: lwz %1, 0(%2)\n" 95 /* read the text we want to modify */
90 " cmpw %1, %5\n" 96 if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
91 " bne 2f\n" 97 return -EFAULT;
92 " stwu %3, 0(%2)\n" 98
93 "2:\n" 99 /* Make sure it is what we expect it to be */
94 ".section .fixup, \"ax\"\n" 100 if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0)
95 "3: li %0, 1\n" 101 return -EINVAL;
96 " b 2b\n" 102
97 ".previous\n" 103 /* replace the text with the new text */
98 ".section __ex_table,\"a\"\n" 104 if (probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE))
99 _ASM_ALIGN "\n" 105 return -EPERM;
100 _ASM_PTR "1b, 3b\n" 106
101 ".previous" 107 flush_icache_range(ip, ip + 8);
102 : "=r"(faulted), "=r"(replaced) 108
103 : "r"(ip), "r"(new), 109 return 0;
104 "0"(faulted), "r"(old) 110}
105 : "memory"); 111
106 112/*
107 if (replaced != old && replaced != new) 113 * Helper functions that are the same for both PPC64 and PPC32.
108 faulted = 2; 114 */
109 115static int test_24bit_addr(unsigned long ip, unsigned long addr)
110 if (!faulted) 116{
111 flush_icache_range(ip, ip + 8); 117
112 118 /* use the create_branch to verify that this offset can be branched */
113 return faulted; 119 return create_branch((unsigned int *)ip, addr, 0);
120}
121
122static int is_bl_op(unsigned int op)
123{
124 return (op & 0xfc000003) == 0x48000001;
125}
126
127static unsigned long find_bl_target(unsigned long ip, unsigned int op)
128{
129 static int offset;
130
131 offset = (op & 0x03fffffc);
132 /* make it signed */
133 if (offset & 0x02000000)
134 offset |= 0xfe000000;
135
136 return ip + (long)offset;
137}
138
139#ifdef CONFIG_PPC64
140static int
141__ftrace_make_nop(struct module *mod,
142 struct dyn_ftrace *rec, unsigned long addr)
143{
144 unsigned int op;
145 unsigned int jmp[5];
146 unsigned long ptr;
147 unsigned long ip = rec->ip;
148 unsigned long tramp;
149 int offset;
150
151 /* read where this goes */
152 if (probe_kernel_read(&op, (void *)ip, sizeof(int)))
153 return -EFAULT;
154
155 /* Make sure that that this is still a 24bit jump */
156 if (!is_bl_op(op)) {
157 printk(KERN_ERR "Not expected bl: opcode is %x\n", op);
158 return -EINVAL;
159 }
160
161 /* lets find where the pointer goes */
162 tramp = find_bl_target(ip, op);
163
164 /*
165 * On PPC64 the trampoline looks like:
166 * 0x3d, 0x82, 0x00, 0x00, addis r12,r2, <high>
167 * 0x39, 0x8c, 0x00, 0x00, addi r12,r12, <low>
168 * Where the bytes 2,3,6 and 7 make up the 32bit offset
169 * to the TOC that holds the pointer.
170 * to jump to.
171 * 0xf8, 0x41, 0x00, 0x28, std r2,40(r1)
172 * 0xe9, 0x6c, 0x00, 0x20, ld r11,32(r12)
173 * The actually address is 32 bytes from the offset
174 * into the TOC.
175 * 0xe8, 0x4c, 0x00, 0x28, ld r2,40(r12)
176 */
177
178 DEBUGP("ip:%lx jumps to %lx r2: %lx", ip, tramp, mod->arch.toc);
179
180 /* Find where the trampoline jumps to */
181 if (probe_kernel_read(jmp, (void *)tramp, sizeof(jmp))) {
182 printk(KERN_ERR "Failed to read %lx\n", tramp);
183 return -EFAULT;
184 }
185
186 DEBUGP(" %08x %08x", jmp[0], jmp[1]);
187
188 /* verify that this is what we expect it to be */
189 if (((jmp[0] & 0xffff0000) != 0x3d820000) ||
190 ((jmp[1] & 0xffff0000) != 0x398c0000) ||
191 (jmp[2] != 0xf8410028) ||
192 (jmp[3] != 0xe96c0020) ||
193 (jmp[4] != 0xe84c0028)) {
194 printk(KERN_ERR "Not a trampoline\n");
195 return -EINVAL;
196 }
197
198 offset = (unsigned)((unsigned short)jmp[0]) << 16 |
199 (unsigned)((unsigned short)jmp[1]);
200
201 DEBUGP(" %x ", offset);
202
203 /* get the address this jumps too */
204 tramp = mod->arch.toc + offset + 32;
205 DEBUGP("toc: %lx", tramp);
206
207 if (probe_kernel_read(jmp, (void *)tramp, 8)) {
208 printk(KERN_ERR "Failed to read %lx\n", tramp);
209 return -EFAULT;
210 }
211
212 DEBUGP(" %08x %08x\n", jmp[0], jmp[1]);
213
214 ptr = ((unsigned long)jmp[0] << 32) + jmp[1];
215
216 /* This should match what was called */
217 if (ptr != GET_ADDR(addr)) {
218 printk(KERN_ERR "addr does not match %lx\n", ptr);
219 return -EINVAL;
220 }
221
222 /*
223 * We want to nop the line, but the next line is
224 * 0xe8, 0x41, 0x00, 0x28 ld r2,40(r1)
225 * This needs to be turned to a nop too.
226 */
227 if (probe_kernel_read(&op, (void *)(ip+4), MCOUNT_INSN_SIZE))
228 return -EFAULT;
229
230 if (op != 0xe8410028) {
231 printk(KERN_ERR "Next line is not ld! (%08x)\n", op);
232 return -EINVAL;
233 }
234
235 /*
236 * Milton Miller pointed out that we can not blindly do nops.
237 * If a task was preempted when calling a trace function,
238 * the nops will remove the way to restore the TOC in r2
239 * and the r2 TOC will get corrupted.
240 */
241
242 /*
243 * Replace:
244 * bl <tramp> <==== will be replaced with "b 1f"
245 * ld r2,40(r1)
246 * 1:
247 */
248 op = 0x48000008; /* b +8 */
249
250 if (probe_kernel_write((void *)ip, &op, MCOUNT_INSN_SIZE))
251 return -EPERM;
252
253
254 flush_icache_range(ip, ip + 8);
255
256 return 0;
257}
258
259#else /* !PPC64 */
260static int
261__ftrace_make_nop(struct module *mod,
262 struct dyn_ftrace *rec, unsigned long addr)
263{
264 unsigned int op;
265 unsigned int jmp[4];
266 unsigned long ip = rec->ip;
267 unsigned long tramp;
268
269 if (probe_kernel_read(&op, (void *)ip, MCOUNT_INSN_SIZE))
270 return -EFAULT;
271
272 /* Make sure that that this is still a 24bit jump */
273 if (!is_bl_op(op)) {
274 printk(KERN_ERR "Not expected bl: opcode is %x\n", op);
275 return -EINVAL;
276 }
277
278 /* lets find where the pointer goes */
279 tramp = find_bl_target(ip, op);
280
281 /*
282 * On PPC32 the trampoline looks like:
283 * 0x3d, 0x60, 0x00, 0x00 lis r11,sym@ha
284 * 0x39, 0x6b, 0x00, 0x00 addi r11,r11,sym@l
285 * 0x7d, 0x69, 0x03, 0xa6 mtctr r11
286 * 0x4e, 0x80, 0x04, 0x20 bctr
287 */
288
289 DEBUGP("ip:%lx jumps to %lx", ip, tramp);
290
291 /* Find where the trampoline jumps to */
292 if (probe_kernel_read(jmp, (void *)tramp, sizeof(jmp))) {
293 printk(KERN_ERR "Failed to read %lx\n", tramp);
294 return -EFAULT;
295 }
296
297 DEBUGP(" %08x %08x ", jmp[0], jmp[1]);
298
299 /* verify that this is what we expect it to be */
300 if (((jmp[0] & 0xffff0000) != 0x3d600000) ||
301 ((jmp[1] & 0xffff0000) != 0x396b0000) ||
302 (jmp[2] != 0x7d6903a6) ||
303 (jmp[3] != 0x4e800420)) {
304 printk(KERN_ERR "Not a trampoline\n");
305 return -EINVAL;
306 }
307
308 tramp = (jmp[1] & 0xffff) |
309 ((jmp[0] & 0xffff) << 16);
310 if (tramp & 0x8000)
311 tramp -= 0x10000;
312
313 DEBUGP(" %x ", tramp);
314
315 if (tramp != addr) {
316 printk(KERN_ERR
317 "Trampoline location %08lx does not match addr\n",
318 tramp);
319 return -EINVAL;
320 }
321
322 op = PPC_NOP_INSTR;
323
324 if (probe_kernel_write((void *)ip, &op, MCOUNT_INSN_SIZE))
325 return -EPERM;
326
327 flush_icache_range(ip, ip + 8);
328
329 return 0;
330}
331#endif /* PPC64 */
332
333int ftrace_make_nop(struct module *mod,
334 struct dyn_ftrace *rec, unsigned long addr)
335{
336 unsigned char *old, *new;
337 unsigned long ip = rec->ip;
338
339 /*
340 * If the calling address is more that 24 bits away,
341 * then we had to use a trampoline to make the call.
342 * Otherwise just update the call site.
343 */
344 if (test_24bit_addr(ip, addr)) {
345 /* within range */
346 old = ftrace_call_replace(ip, addr);
347 new = ftrace_nop_replace();
348 return ftrace_modify_code(ip, old, new);
349 }
350
351 /*
352 * Out of range jumps are called from modules.
353 * We should either already have a pointer to the module
354 * or it has been passed in.
355 */
356 if (!rec->arch.mod) {
357 if (!mod) {
358 printk(KERN_ERR "No module loaded addr=%lx\n",
359 addr);
360 return -EFAULT;
361 }
362 rec->arch.mod = mod;
363 } else if (mod) {
364 if (mod != rec->arch.mod) {
365 printk(KERN_ERR
366 "Record mod %p not equal to passed in mod %p\n",
367 rec->arch.mod, mod);
368 return -EINVAL;
369 }
370 /* nothing to do if mod == rec->arch.mod */
371 } else
372 mod = rec->arch.mod;
373
374 return __ftrace_make_nop(mod, rec, addr);
375
376}
377
378#ifdef CONFIG_PPC64
379static int
380__ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
381{
382 unsigned int op[2];
383 unsigned long ip = rec->ip;
384
385 /* read where this goes */
386 if (probe_kernel_read(op, (void *)ip, MCOUNT_INSN_SIZE * 2))
387 return -EFAULT;
388
389 /*
390 * It should be pointing to two nops or
391 * b +8; ld r2,40(r1)
392 */
393 if (((op[0] != 0x48000008) || (op[1] != 0xe8410028)) &&
394 ((op[0] != PPC_NOP_INSTR) || (op[1] != PPC_NOP_INSTR))) {
395 printk(KERN_ERR "Expected NOPs but have %x %x\n", op[0], op[1]);
396 return -EINVAL;
397 }
398
399 /* If we never set up a trampoline to ftrace_caller, then bail */
400 if (!rec->arch.mod->arch.tramp) {
401 printk(KERN_ERR "No ftrace trampoline\n");
402 return -EINVAL;
403 }
404
405 /* create the branch to the trampoline */
406 op[0] = create_branch((unsigned int *)ip,
407 rec->arch.mod->arch.tramp, BRANCH_SET_LINK);
408 if (!op[0]) {
409 printk(KERN_ERR "REL24 out of range!\n");
410 return -EINVAL;
411 }
412
413 /* ld r2,40(r1) */
414 op[1] = 0xe8410028;
415
416 DEBUGP("write to %lx\n", rec->ip);
417
418 if (probe_kernel_write((void *)ip, op, MCOUNT_INSN_SIZE * 2))
419 return -EPERM;
420
421 flush_icache_range(ip, ip + 8);
422
423 return 0;
424}
425#else
426static int
427__ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
428{
429 unsigned int op;
430 unsigned long ip = rec->ip;
431
432 /* read where this goes */
433 if (probe_kernel_read(&op, (void *)ip, MCOUNT_INSN_SIZE))
434 return -EFAULT;
435
436 /* It should be pointing to a nop */
437 if (op != PPC_NOP_INSTR) {
438 printk(KERN_ERR "Expected NOP but have %x\n", op);
439 return -EINVAL;
440 }
441
442 /* If we never set up a trampoline to ftrace_caller, then bail */
443 if (!rec->arch.mod->arch.tramp) {
444 printk(KERN_ERR "No ftrace trampoline\n");
445 return -EINVAL;
446 }
447
448 /* create the branch to the trampoline */
449 op = create_branch((unsigned int *)ip,
450 rec->arch.mod->arch.tramp, BRANCH_SET_LINK);
451 if (!op) {
452 printk(KERN_ERR "REL24 out of range!\n");
453 return -EINVAL;
454 }
455
456 DEBUGP("write to %lx\n", rec->ip);
457
458 if (probe_kernel_write((void *)ip, &op, MCOUNT_INSN_SIZE))
459 return -EPERM;
460
461 flush_icache_range(ip, ip + 8);
462
463 return 0;
464}
465#endif /* CONFIG_PPC64 */
466
467int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
468{
469 unsigned char *old, *new;
470 unsigned long ip = rec->ip;
471
472 /*
473 * If the calling address is more that 24 bits away,
474 * then we had to use a trampoline to make the call.
475 * Otherwise just update the call site.
476 */
477 if (test_24bit_addr(ip, addr)) {
478 /* within range */
479 old = ftrace_nop_replace();
480 new = ftrace_call_replace(ip, addr);
481 return ftrace_modify_code(ip, old, new);
482 }
483
484 /*
485 * Out of range jumps are called from modules.
486 * Being that we are converting from nop, it had better
487 * already have a module defined.
488 */
489 if (!rec->arch.mod) {
490 printk(KERN_ERR "No module loaded\n");
491 return -EINVAL;
492 }
493
494 return __ftrace_make_call(rec, addr);
114} 495}
115 496
116int ftrace_update_ftrace_func(ftrace_func_t func) 497int ftrace_update_ftrace_func(ftrace_func_t func)
@@ -128,10 +509,10 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
128 509
129int __init ftrace_dyn_arch_init(void *data) 510int __init ftrace_dyn_arch_init(void *data)
130{ 511{
131 /* This is running in kstop_machine */ 512 /* caller expects data to be zero */
513 unsigned long *p = data;
132 514
133 ftrace_mcount_set(data); 515 *p = 0;
134 516
135 return 0; 517 return 0;
136} 518}
137
diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c
index 31982d05d81a..88d9c1d5e5fb 100644
--- a/arch/powerpc/kernel/idle.c
+++ b/arch/powerpc/kernel/idle.c
@@ -69,10 +69,15 @@ void cpu_idle(void)
69 smp_mb(); 69 smp_mb();
70 local_irq_disable(); 70 local_irq_disable();
71 71
72 /* Don't trace irqs off for idle */
73 stop_critical_timings();
74
72 /* check again after disabling irqs */ 75 /* check again after disabling irqs */
73 if (!need_resched() && !cpu_should_die()) 76 if (!need_resched() && !cpu_should_die())
74 ppc_md.power_save(); 77 ppc_md.power_save();
75 78
79 start_critical_timings();
80
76 local_irq_enable(); 81 local_irq_enable();
77 set_thread_flag(TIF_POLLING_NRFLAG); 82 set_thread_flag(TIF_POLLING_NRFLAG);
78 83
diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c
index 2df91a03462a..f832773fc28e 100644
--- a/arch/powerpc/kernel/module_32.c
+++ b/arch/powerpc/kernel/module_32.c
@@ -22,6 +22,7 @@
22#include <linux/fs.h> 22#include <linux/fs.h>
23#include <linux/string.h> 23#include <linux/string.h>
24#include <linux/kernel.h> 24#include <linux/kernel.h>
25#include <linux/ftrace.h>
25#include <linux/cache.h> 26#include <linux/cache.h>
26#include <linux/bug.h> 27#include <linux/bug.h>
27#include <linux/sort.h> 28#include <linux/sort.h>
@@ -53,6 +54,9 @@ static unsigned int count_relocs(const Elf32_Rela *rela, unsigned int num)
53 r_addend = rela[i].r_addend; 54 r_addend = rela[i].r_addend;
54 } 55 }
55 56
57#ifdef CONFIG_DYNAMIC_FTRACE
58 _count_relocs++; /* add one for ftrace_caller */
59#endif
56 return _count_relocs; 60 return _count_relocs;
57} 61}
58 62
@@ -306,5 +310,11 @@ int apply_relocate_add(Elf32_Shdr *sechdrs,
306 return -ENOEXEC; 310 return -ENOEXEC;
307 } 311 }
308 } 312 }
313#ifdef CONFIG_DYNAMIC_FTRACE
314 module->arch.tramp =
315 do_plt_call(module->module_core,
316 (unsigned long)ftrace_caller,
317 sechdrs, module);
318#endif
309 return 0; 319 return 0;
310} 320}
diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index 1af2377e4992..8992b031a7b6 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -20,6 +20,7 @@
20#include <linux/moduleloader.h> 20#include <linux/moduleloader.h>
21#include <linux/err.h> 21#include <linux/err.h>
22#include <linux/vmalloc.h> 22#include <linux/vmalloc.h>
23#include <linux/ftrace.h>
23#include <linux/bug.h> 24#include <linux/bug.h>
24#include <asm/module.h> 25#include <asm/module.h>
25#include <asm/firmware.h> 26#include <asm/firmware.h>
@@ -163,6 +164,11 @@ static unsigned long get_stubs_size(const Elf64_Ehdr *hdr,
163 } 164 }
164 } 165 }
165 166
167#ifdef CONFIG_DYNAMIC_FTRACE
168 /* make the trampoline to the ftrace_caller */
169 relocs++;
170#endif
171
166 DEBUGP("Looks like a total of %lu stubs, max\n", relocs); 172 DEBUGP("Looks like a total of %lu stubs, max\n", relocs);
167 return relocs * sizeof(struct ppc64_stub_entry); 173 return relocs * sizeof(struct ppc64_stub_entry);
168} 174}
@@ -441,5 +447,12 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
441 } 447 }
442 } 448 }
443 449
450#ifdef CONFIG_DYNAMIC_FTRACE
451 me->arch.toc = my_r2(sechdrs, me);
452 me->arch.tramp = stub_for_addr(sechdrs,
453 (unsigned long)ftrace_caller,
454 me);
455#endif
456
444 return 0; 457 return 0;
445} 458}
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index d69912c07ce7..8db35278a4b4 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -6,6 +6,9 @@ ifeq ($(CONFIG_PPC64),y)
6EXTRA_CFLAGS += -mno-minimal-toc 6EXTRA_CFLAGS += -mno-minimal-toc
7endif 7endif
8 8
9CFLAGS_REMOVE_code-patching.o = -pg
10CFLAGS_REMOVE_feature-fixups.o = -pg
11
9obj-y := string.o alloc.o \ 12obj-y := string.o alloc.o \
10 checksum_$(CONFIG_WORD_SIZE).o 13 checksum_$(CONFIG_WORD_SIZE).o
11obj-$(CONFIG_PPC32) += div64.o copy_32.o crtsavres.o 14obj-$(CONFIG_PPC32) += div64.o copy_32.o crtsavres.o
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index ac22bb7719f7..45c86fb94132 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -29,11 +29,14 @@ config X86
29 select HAVE_FTRACE_MCOUNT_RECORD 29 select HAVE_FTRACE_MCOUNT_RECORD
30 select HAVE_DYNAMIC_FTRACE 30 select HAVE_DYNAMIC_FTRACE
31 select HAVE_FUNCTION_TRACER 31 select HAVE_FUNCTION_TRACER
32 select HAVE_FUNCTION_GRAPH_TRACER
33 select HAVE_FUNCTION_TRACE_MCOUNT_TEST
32 select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) 34 select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64)
33 select HAVE_ARCH_KGDB if !X86_VOYAGER 35 select HAVE_ARCH_KGDB if !X86_VOYAGER
34 select HAVE_ARCH_TRACEHOOK 36 select HAVE_ARCH_TRACEHOOK
35 select HAVE_GENERIC_DMA_COHERENT if X86_32 37 select HAVE_GENERIC_DMA_COHERENT if X86_32
36 select HAVE_EFFICIENT_UNALIGNED_ACCESS 38 select HAVE_EFFICIENT_UNALIGNED_ACCESS
39 select USER_STACKTRACE_SUPPORT
37 40
38config ARCH_DEFCONFIG 41config ARCH_DEFCONFIG
39 string 42 string
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index b815664fe370..85a78575956c 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -515,6 +515,7 @@ config CPU_SUP_UMC_32
515config X86_DS 515config X86_DS
516 def_bool X86_PTRACE_BTS 516 def_bool X86_PTRACE_BTS
517 depends on X86_DEBUGCTLMSR 517 depends on X86_DEBUGCTLMSR
518 select HAVE_HW_BRANCH_TRACER
518 519
519config X86_PTRACE_BTS 520config X86_PTRACE_BTS
520 bool "Branch Trace Store" 521 bool "Branch Trace Store"
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 2a3dfbd5e677..fa013f529b74 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -186,14 +186,10 @@ config IOMMU_LEAK
186 Add a simple leak tracer to the IOMMU code. This is useful when you 186 Add a simple leak tracer to the IOMMU code. This is useful when you
187 are debugging a buggy device driver that leaks IOMMU mappings. 187 are debugging a buggy device driver that leaks IOMMU mappings.
188 188
189config MMIOTRACE_HOOKS
190 bool
191
192config MMIOTRACE 189config MMIOTRACE
193 bool "Memory mapped IO tracing" 190 bool "Memory mapped IO tracing"
194 depends on DEBUG_KERNEL && PCI 191 depends on DEBUG_KERNEL && PCI
195 select TRACING 192 select TRACING
196 select MMIOTRACE_HOOKS
197 help 193 help
198 Mmiotrace traces Memory Mapped I/O access and is meant for 194 Mmiotrace traces Memory Mapped I/O access and is meant for
199 debugging and reverse engineering. It is called from the ioremap 195 debugging and reverse engineering. It is called from the ioremap
diff --git a/arch/x86/include/asm/ds.h b/arch/x86/include/asm/ds.h
index a95008457ea4..99b6c39774a4 100644
--- a/arch/x86/include/asm/ds.h
+++ b/arch/x86/include/asm/ds.h
@@ -7,13 +7,12 @@
7 * 7 *
8 * It manages: 8 * It manages:
9 * - per-thread and per-cpu allocation of BTS and PEBS 9 * - per-thread and per-cpu allocation of BTS and PEBS
10 * - buffer memory allocation (optional) 10 * - buffer overflow handling (to be done)
11 * - buffer overflow handling
12 * - buffer access 11 * - buffer access
13 * 12 *
14 * It assumes: 13 * It assumes:
15 * - get_task_struct on all parameter tasks 14 * - get_task_struct on all traced tasks
16 * - current is allowed to trace parameter tasks 15 * - current is allowed to trace tasks
17 * 16 *
18 * 17 *
19 * Copyright (C) 2007-2008 Intel Corporation. 18 * Copyright (C) 2007-2008 Intel Corporation.
@@ -26,11 +25,18 @@
26 25
27#include <linux/types.h> 26#include <linux/types.h>
28#include <linux/init.h> 27#include <linux/init.h>
28#include <linux/err.h>
29 29
30 30
31#ifdef CONFIG_X86_DS 31#ifdef CONFIG_X86_DS
32 32
33struct task_struct; 33struct task_struct;
34struct ds_tracer;
35struct bts_tracer;
36struct pebs_tracer;
37
38typedef void (*bts_ovfl_callback_t)(struct bts_tracer *);
39typedef void (*pebs_ovfl_callback_t)(struct pebs_tracer *);
34 40
35/* 41/*
36 * Request BTS or PEBS 42 * Request BTS or PEBS
@@ -38,60 +44,62 @@ struct task_struct;
38 * Due to alignement constraints, the actual buffer may be slightly 44 * Due to alignement constraints, the actual buffer may be slightly
39 * smaller than the requested or provided buffer. 45 * smaller than the requested or provided buffer.
40 * 46 *
41 * Returns 0 on success; -Eerrno otherwise 47 * Returns a pointer to a tracer structure on success, or
48 * ERR_PTR(errcode) on failure.
49 *
50 * The interrupt threshold is independent from the overflow callback
51 * to allow users to use their own overflow interrupt handling mechanism.
42 * 52 *
43 * task: the task to request recording for; 53 * task: the task to request recording for;
44 * NULL for per-cpu recording on the current cpu 54 * NULL for per-cpu recording on the current cpu
45 * base: the base pointer for the (non-pageable) buffer; 55 * base: the base pointer for the (non-pageable) buffer;
46 * NULL if buffer allocation requested 56 * size: the size of the provided buffer in bytes
47 * size: the size of the requested or provided buffer
48 * ovfl: pointer to a function to be called on buffer overflow; 57 * ovfl: pointer to a function to be called on buffer overflow;
49 * NULL if cyclic buffer requested 58 * NULL if cyclic buffer requested
59 * th: the interrupt threshold in records from the end of the buffer;
60 * -1 if no interrupt threshold is requested.
50 */ 61 */
51typedef void (*ds_ovfl_callback_t)(struct task_struct *); 62extern struct bts_tracer *ds_request_bts(struct task_struct *task,
52extern int ds_request_bts(struct task_struct *task, void *base, size_t size, 63 void *base, size_t size,
53 ds_ovfl_callback_t ovfl); 64 bts_ovfl_callback_t ovfl, size_t th);
54extern int ds_request_pebs(struct task_struct *task, void *base, size_t size, 65extern struct pebs_tracer *ds_request_pebs(struct task_struct *task,
55 ds_ovfl_callback_t ovfl); 66 void *base, size_t size,
67 pebs_ovfl_callback_t ovfl,
68 size_t th);
56 69
57/* 70/*
58 * Release BTS or PEBS resources 71 * Release BTS or PEBS resources
59 * 72 *
60 * Frees buffers allocated on ds_request.
61 *
62 * Returns 0 on success; -Eerrno otherwise 73 * Returns 0 on success; -Eerrno otherwise
63 * 74 *
64 * task: the task to release resources for; 75 * tracer: the tracer handle returned from ds_request_~()
65 * NULL to release resources for the current cpu
66 */ 76 */
67extern int ds_release_bts(struct task_struct *task); 77extern int ds_release_bts(struct bts_tracer *tracer);
68extern int ds_release_pebs(struct task_struct *task); 78extern int ds_release_pebs(struct pebs_tracer *tracer);
69 79
70/* 80/*
71 * Return the (array) index of the write pointer. 81 * Get the (array) index of the write pointer.
72 * (assuming an array of BTS/PEBS records) 82 * (assuming an array of BTS/PEBS records)
73 * 83 *
74 * Returns -Eerrno on error 84 * Returns 0 on success; -Eerrno on error
75 * 85 *
76 * task: the task to access; 86 * tracer: the tracer handle returned from ds_request_~()
77 * NULL to access the current cpu 87 * pos (out): will hold the result
78 * pos (out): if not NULL, will hold the result
79 */ 88 */
80extern int ds_get_bts_index(struct task_struct *task, size_t *pos); 89extern int ds_get_bts_index(struct bts_tracer *tracer, size_t *pos);
81extern int ds_get_pebs_index(struct task_struct *task, size_t *pos); 90extern int ds_get_pebs_index(struct pebs_tracer *tracer, size_t *pos);
82 91
83/* 92/*
84 * Return the (array) index one record beyond the end of the array. 93 * Get the (array) index one record beyond the end of the array.
85 * (assuming an array of BTS/PEBS records) 94 * (assuming an array of BTS/PEBS records)
86 * 95 *
87 * Returns -Eerrno on error 96 * Returns 0 on success; -Eerrno on error
88 * 97 *
89 * task: the task to access; 98 * tracer: the tracer handle returned from ds_request_~()
90 * NULL to access the current cpu 99 * pos (out): will hold the result
91 * pos (out): if not NULL, will hold the result
92 */ 100 */
93extern int ds_get_bts_end(struct task_struct *task, size_t *pos); 101extern int ds_get_bts_end(struct bts_tracer *tracer, size_t *pos);
94extern int ds_get_pebs_end(struct task_struct *task, size_t *pos); 102extern int ds_get_pebs_end(struct pebs_tracer *tracer, size_t *pos);
95 103
96/* 104/*
97 * Provide a pointer to the BTS/PEBS record at parameter index. 105 * Provide a pointer to the BTS/PEBS record at parameter index.
@@ -102,14 +110,13 @@ extern int ds_get_pebs_end(struct task_struct *task, size_t *pos);
102 * 110 *
103 * Returns the size of a single record on success; -Eerrno on error 111 * Returns the size of a single record on success; -Eerrno on error
104 * 112 *
105 * task: the task to access; 113 * tracer: the tracer handle returned from ds_request_~()
106 * NULL to access the current cpu
107 * index: the index of the requested record 114 * index: the index of the requested record
108 * record (out): pointer to the requested record 115 * record (out): pointer to the requested record
109 */ 116 */
110extern int ds_access_bts(struct task_struct *task, 117extern int ds_access_bts(struct bts_tracer *tracer,
111 size_t index, const void **record); 118 size_t index, const void **record);
112extern int ds_access_pebs(struct task_struct *task, 119extern int ds_access_pebs(struct pebs_tracer *tracer,
113 size_t index, const void **record); 120 size_t index, const void **record);
114 121
115/* 122/*
@@ -129,38 +136,24 @@ extern int ds_access_pebs(struct task_struct *task,
129 * 136 *
130 * Returns the number of bytes written or -Eerrno. 137 * Returns the number of bytes written or -Eerrno.
131 * 138 *
132 * task: the task to access; 139 * tracer: the tracer handle returned from ds_request_~()
133 * NULL to access the current cpu
134 * buffer: the buffer to write 140 * buffer: the buffer to write
135 * size: the size of the buffer 141 * size: the size of the buffer
136 */ 142 */
137extern int ds_write_bts(struct task_struct *task, 143extern int ds_write_bts(struct bts_tracer *tracer,
138 const void *buffer, size_t size); 144 const void *buffer, size_t size);
139extern int ds_write_pebs(struct task_struct *task, 145extern int ds_write_pebs(struct pebs_tracer *tracer,
140 const void *buffer, size_t size); 146 const void *buffer, size_t size);
141 147
142/* 148/*
143 * Same as ds_write_bts/pebs, but omit ownership checks.
144 *
145 * This is needed to have some other task than the owner of the
146 * BTS/PEBS buffer or the parameter task itself write into the
147 * respective buffer.
148 */
149extern int ds_unchecked_write_bts(struct task_struct *task,
150 const void *buffer, size_t size);
151extern int ds_unchecked_write_pebs(struct task_struct *task,
152 const void *buffer, size_t size);
153
154/*
155 * Reset the write pointer of the BTS/PEBS buffer. 149 * Reset the write pointer of the BTS/PEBS buffer.
156 * 150 *
157 * Returns 0 on success; -Eerrno on error 151 * Returns 0 on success; -Eerrno on error
158 * 152 *
159 * task: the task to access; 153 * tracer: the tracer handle returned from ds_request_~()
160 * NULL to access the current cpu
161 */ 154 */
162extern int ds_reset_bts(struct task_struct *task); 155extern int ds_reset_bts(struct bts_tracer *tracer);
163extern int ds_reset_pebs(struct task_struct *task); 156extern int ds_reset_pebs(struct pebs_tracer *tracer);
164 157
165/* 158/*
166 * Clear the BTS/PEBS buffer and reset the write pointer. 159 * Clear the BTS/PEBS buffer and reset the write pointer.
@@ -168,33 +161,30 @@ extern int ds_reset_pebs(struct task_struct *task);
168 * 161 *
169 * Returns 0 on success; -Eerrno on error 162 * Returns 0 on success; -Eerrno on error
170 * 163 *
171 * task: the task to access; 164 * tracer: the tracer handle returned from ds_request_~()
172 * NULL to access the current cpu
173 */ 165 */
174extern int ds_clear_bts(struct task_struct *task); 166extern int ds_clear_bts(struct bts_tracer *tracer);
175extern int ds_clear_pebs(struct task_struct *task); 167extern int ds_clear_pebs(struct pebs_tracer *tracer);
176 168
177/* 169/*
178 * Provide the PEBS counter reset value. 170 * Provide the PEBS counter reset value.
179 * 171 *
180 * Returns 0 on success; -Eerrno on error 172 * Returns 0 on success; -Eerrno on error
181 * 173 *
182 * task: the task to access; 174 * tracer: the tracer handle returned from ds_request_pebs()
183 * NULL to access the current cpu
184 * value (out): the counter reset value 175 * value (out): the counter reset value
185 */ 176 */
186extern int ds_get_pebs_reset(struct task_struct *task, u64 *value); 177extern int ds_get_pebs_reset(struct pebs_tracer *tracer, u64 *value);
187 178
188/* 179/*
189 * Set the PEBS counter reset value. 180 * Set the PEBS counter reset value.
190 * 181 *
191 * Returns 0 on success; -Eerrno on error 182 * Returns 0 on success; -Eerrno on error
192 * 183 *
193 * task: the task to access; 184 * tracer: the tracer handle returned from ds_request_pebs()
194 * NULL to access the current cpu
195 * value: the new counter reset value 185 * value: the new counter reset value
196 */ 186 */
197extern int ds_set_pebs_reset(struct task_struct *task, u64 value); 187extern int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value);
198 188
199/* 189/*
200 * Initialization 190 * Initialization
@@ -207,17 +197,13 @@ extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *);
207/* 197/*
208 * The DS context - part of struct thread_struct. 198 * The DS context - part of struct thread_struct.
209 */ 199 */
200#define MAX_SIZEOF_DS (12 * 8)
201
210struct ds_context { 202struct ds_context {
211 /* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */ 203 /* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */
212 unsigned char *ds; 204 unsigned char ds[MAX_SIZEOF_DS];
213 /* the owner of the BTS and PEBS configuration, respectively */ 205 /* the owner of the BTS and PEBS configuration, respectively */
214 struct task_struct *owner[2]; 206 struct ds_tracer *owner[2];
215 /* buffer overflow notification function for BTS and PEBS */
216 ds_ovfl_callback_t callback[2];
217 /* the original buffer address */
218 void *buffer[2];
219 /* the number of allocated pages for on-request allocated buffers */
220 unsigned int pages[2];
221 /* use count */ 207 /* use count */
222 unsigned long count; 208 unsigned long count;
223 /* a pointer to the context location inside the thread_struct 209 /* a pointer to the context location inside the thread_struct
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 9e8bc29b8b17..7e61b4ceb9a4 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -17,8 +17,40 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr)
17 */ 17 */
18 return addr - 1; 18 return addr - 1;
19} 19}
20#endif
21 20
21#ifdef CONFIG_DYNAMIC_FTRACE
22
23struct dyn_arch_ftrace {
24 /* No extra data needed for x86 */
25};
26
27#endif /* CONFIG_DYNAMIC_FTRACE */
28#endif /* __ASSEMBLY__ */
22#endif /* CONFIG_FUNCTION_TRACER */ 29#endif /* CONFIG_FUNCTION_TRACER */
23 30
31#ifdef CONFIG_FUNCTION_GRAPH_TRACER
32
33#ifndef __ASSEMBLY__
34
35/*
36 * Stack of return addresses for functions
37 * of a thread.
38 * Used in struct thread_info
39 */
40struct ftrace_ret_stack {
41 unsigned long ret;
42 unsigned long func;
43 unsigned long long calltime;
44};
45
46/*
47 * Primary handler of a function return.
48 * It relays on ftrace_return_to_handler.
49 * Defined in entry32.S
50 */
51extern void return_to_handler(void);
52
53#endif /* __ASSEMBLY__ */
54#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
55
24#endif /* _ASM_X86_FTRACE_H */ 56#endif /* _ASM_X86_FTRACE_H */
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index e44d379faad2..0921b4018c11 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -20,6 +20,8 @@
20struct task_struct; 20struct task_struct;
21struct exec_domain; 21struct exec_domain;
22#include <asm/processor.h> 22#include <asm/processor.h>
23#include <asm/ftrace.h>
24#include <asm/atomic.h>
23 25
24struct thread_info { 26struct thread_info {
25 struct task_struct *task; /* main task structure */ 27 struct task_struct *task; /* main task structure */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index b62a7667828e..d274425fb076 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -14,6 +14,12 @@ CFLAGS_REMOVE_paravirt-spinlocks.o = -pg
14CFLAGS_REMOVE_ftrace.o = -pg 14CFLAGS_REMOVE_ftrace.o = -pg
15endif 15endif
16 16
17ifdef CONFIG_FUNCTION_GRAPH_TRACER
18# Don't trace __switch_to() but let it for function tracer
19CFLAGS_REMOVE_process_32.o = -pg
20CFLAGS_REMOVE_process_64.o = -pg
21endif
22
17# 23#
18# vsyscalls (which work on the user stack) should have 24# vsyscalls (which work on the user stack) should have
19# no stack-protector checks: 25# no stack-protector checks:
@@ -65,6 +71,7 @@ obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o
65obj-$(CONFIG_X86_IO_APIC) += io_apic.o 71obj-$(CONFIG_X86_IO_APIC) += io_apic.o
66obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o 72obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o
67obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o 73obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
74obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
68obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o 75obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o
69obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o 76obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o
70obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o 77obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 8e48c5d4467d..88ea02dcb622 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -33,6 +33,7 @@
33#include <linux/cpufreq.h> 33#include <linux/cpufreq.h>
34#include <linux/compiler.h> 34#include <linux/compiler.h>
35#include <linux/dmi.h> 35#include <linux/dmi.h>
36#include <linux/ftrace.h>
36 37
37#include <linux/acpi.h> 38#include <linux/acpi.h>
38#include <acpi/processor.h> 39#include <acpi/processor.h>
@@ -391,6 +392,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
391 unsigned int next_perf_state = 0; /* Index into perf table */ 392 unsigned int next_perf_state = 0; /* Index into perf table */
392 unsigned int i; 393 unsigned int i;
393 int result = 0; 394 int result = 0;
395 struct power_trace it;
394 396
395 dprintk("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu); 397 dprintk("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu);
396 398
@@ -427,6 +429,8 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
427 } 429 }
428 } 430 }
429 431
432 trace_power_mark(&it, POWER_PSTATE, next_perf_state);
433
430 switch (data->cpu_feature) { 434 switch (data->cpu_feature) {
431 case SYSTEM_INTEL_MSR_CAPABLE: 435 case SYSTEM_INTEL_MSR_CAPABLE:
432 cmd.type = SYSTEM_INTEL_MSR_CAPABLE; 436 cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index cce0b6118d55..816f27f289b1 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -307,12 +307,11 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
307 set_cpu_cap(c, X86_FEATURE_P4); 307 set_cpu_cap(c, X86_FEATURE_P4);
308 if (c->x86 == 6) 308 if (c->x86 == 6)
309 set_cpu_cap(c, X86_FEATURE_P3); 309 set_cpu_cap(c, X86_FEATURE_P3);
310#endif
310 311
311 if (cpu_has_bts) 312 if (cpu_has_bts)
312 ptrace_bts_init_intel(c); 313 ptrace_bts_init_intel(c);
313 314
314#endif
315
316 detect_extended_topology(c); 315 detect_extended_topology(c);
317 if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) { 316 if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) {
318 /* 317 /*
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index a2d1176c38ee..19a8c2c0389f 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -7,13 +7,12 @@
7 * 7 *
8 * It manages: 8 * It manages:
9 * - per-thread and per-cpu allocation of BTS and PEBS 9 * - per-thread and per-cpu allocation of BTS and PEBS
10 * - buffer memory allocation (optional) 10 * - buffer overflow handling (to be done)
11 * - buffer overflow handling
12 * - buffer access 11 * - buffer access
13 * 12 *
14 * It assumes: 13 * It assumes:
15 * - get_task_struct on all parameter tasks 14 * - get_task_struct on all traced tasks
16 * - current is allowed to trace parameter tasks 15 * - current is allowed to trace tasks
17 * 16 *
18 * 17 *
19 * Copyright (C) 2007-2008 Intel Corporation. 18 * Copyright (C) 2007-2008 Intel Corporation.
@@ -28,6 +27,7 @@
28#include <linux/slab.h> 27#include <linux/slab.h>
29#include <linux/sched.h> 28#include <linux/sched.h>
30#include <linux/mm.h> 29#include <linux/mm.h>
30#include <linux/kernel.h>
31 31
32 32
33/* 33/*
@@ -44,6 +44,33 @@ struct ds_configuration {
44}; 44};
45static struct ds_configuration ds_cfg; 45static struct ds_configuration ds_cfg;
46 46
47/*
48 * A BTS or PEBS tracer.
49 *
50 * This holds the configuration of the tracer and serves as a handle
51 * to identify tracers.
52 */
53struct ds_tracer {
54 /* the DS context (partially) owned by this tracer */
55 struct ds_context *context;
56 /* the buffer provided on ds_request() and its size in bytes */
57 void *buffer;
58 size_t size;
59};
60
61struct bts_tracer {
62 /* the common DS part */
63 struct ds_tracer ds;
64 /* buffer overflow notification function */
65 bts_ovfl_callback_t ovfl;
66};
67
68struct pebs_tracer {
69 /* the common DS part */
70 struct ds_tracer ds;
71 /* buffer overflow notification function */
72 pebs_ovfl_callback_t ovfl;
73};
47 74
48/* 75/*
49 * Debug Store (DS) save area configuration (see Intel64 and IA32 76 * Debug Store (DS) save area configuration (see Intel64 and IA32
@@ -107,34 +134,13 @@ static inline void ds_set(unsigned char *base, enum ds_qualifier qual,
107 (*(unsigned long *)base) = value; 134 (*(unsigned long *)base) = value;
108} 135}
109 136
137#define DS_ALIGNMENT (1 << 3) /* BTS and PEBS buffer alignment */
110 138
111/*
112 * Locking is done only for allocating BTS or PEBS resources and for
113 * guarding context and buffer memory allocation.
114 *
115 * Most functions require the current task to own the ds context part
116 * they are going to access. All the locking is done when validating
117 * access to the context.
118 */
119static spinlock_t ds_lock = __SPIN_LOCK_UNLOCKED(ds_lock);
120 139
121/* 140/*
122 * Validate that the current task is allowed to access the BTS/PEBS 141 * Locking is done only for allocating BTS or PEBS resources.
123 * buffer of the parameter task.
124 *
125 * Returns 0, if access is granted; -Eerrno, otherwise.
126 */ 142 */
127static inline int ds_validate_access(struct ds_context *context, 143static spinlock_t ds_lock = __SPIN_LOCK_UNLOCKED(ds_lock);
128 enum ds_qualifier qual)
129{
130 if (!context)
131 return -EPERM;
132
133 if (context->owner[qual] == current)
134 return 0;
135
136 return -EPERM;
137}
138 144
139 145
140/* 146/*
@@ -183,51 +189,13 @@ static inline int check_tracer(struct task_struct *task)
183 * 189 *
184 * Contexts are use-counted. They are allocated on first access and 190 * Contexts are use-counted. They are allocated on first access and
185 * deallocated when the last user puts the context. 191 * deallocated when the last user puts the context.
186 *
187 * We distinguish between an allocating and a non-allocating get of a
188 * context:
189 * - the allocating get is used for requesting BTS/PEBS resources. It
190 * requires the caller to hold the global ds_lock.
191 * - the non-allocating get is used for all other cases. A
192 * non-existing context indicates an error. It acquires and releases
193 * the ds_lock itself for obtaining the context.
194 *
195 * A context and its DS configuration are allocated and deallocated
196 * together. A context always has a DS configuration of the
197 * appropriate size.
198 */ 192 */
199static DEFINE_PER_CPU(struct ds_context *, system_context); 193static DEFINE_PER_CPU(struct ds_context *, system_context);
200 194
201#define this_system_context per_cpu(system_context, smp_processor_id()) 195#define this_system_context per_cpu(system_context, smp_processor_id())
202 196
203/*
204 * Returns the pointer to the parameter task's context or to the
205 * system-wide context, if task is NULL.
206 *
207 * Increases the use count of the returned context, if not NULL.
208 */
209static inline struct ds_context *ds_get_context(struct task_struct *task) 197static inline struct ds_context *ds_get_context(struct task_struct *task)
210{ 198{
211 struct ds_context *context;
212 unsigned long irq;
213
214 spin_lock_irqsave(&ds_lock, irq);
215
216 context = (task ? task->thread.ds_ctx : this_system_context);
217 if (context)
218 context->count++;
219
220 spin_unlock_irqrestore(&ds_lock, irq);
221
222 return context;
223}
224
225/*
226 * Same as ds_get_context, but allocates the context and it's DS
227 * structure, if necessary; returns NULL; if out of memory.
228 */
229static inline struct ds_context *ds_alloc_context(struct task_struct *task)
230{
231 struct ds_context **p_context = 199 struct ds_context **p_context =
232 (task ? &task->thread.ds_ctx : &this_system_context); 200 (task ? &task->thread.ds_ctx : &this_system_context);
233 struct ds_context *context = *p_context; 201 struct ds_context *context = *p_context;
@@ -238,16 +206,9 @@ static inline struct ds_context *ds_alloc_context(struct task_struct *task)
238 if (!context) 206 if (!context)
239 return NULL; 207 return NULL;
240 208
241 context->ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL);
242 if (!context->ds) {
243 kfree(context);
244 return NULL;
245 }
246
247 spin_lock_irqsave(&ds_lock, irq); 209 spin_lock_irqsave(&ds_lock, irq);
248 210
249 if (*p_context) { 211 if (*p_context) {
250 kfree(context->ds);
251 kfree(context); 212 kfree(context);
252 213
253 context = *p_context; 214 context = *p_context;
@@ -272,10 +233,6 @@ static inline struct ds_context *ds_alloc_context(struct task_struct *task)
272 return context; 233 return context;
273} 234}
274 235
275/*
276 * Decreases the use count of the parameter context, if not NULL.
277 * Deallocates the context, if the use count reaches zero.
278 */
279static inline void ds_put_context(struct ds_context *context) 236static inline void ds_put_context(struct ds_context *context)
280{ 237{
281 unsigned long irq; 238 unsigned long irq;
@@ -296,13 +253,6 @@ static inline void ds_put_context(struct ds_context *context)
296 if (!context->task || (context->task == current)) 253 if (!context->task || (context->task == current))
297 wrmsrl(MSR_IA32_DS_AREA, 0); 254 wrmsrl(MSR_IA32_DS_AREA, 0);
298 255
299 put_tracer(context->task);
300
301 /* free any leftover buffers from tracers that did not
302 * deallocate them properly. */
303 kfree(context->buffer[ds_bts]);
304 kfree(context->buffer[ds_pebs]);
305 kfree(context->ds);
306 kfree(context); 256 kfree(context);
307 out: 257 out:
308 spin_unlock_irqrestore(&ds_lock, irq); 258 spin_unlock_irqrestore(&ds_lock, irq);
@@ -312,345 +262,342 @@ static inline void ds_put_context(struct ds_context *context)
312/* 262/*
313 * Handle a buffer overflow 263 * Handle a buffer overflow
314 * 264 *
315 * task: the task whose buffers are overflowing;
316 * NULL for a buffer overflow on the current cpu
317 * context: the ds context 265 * context: the ds context
318 * qual: the buffer type 266 * qual: the buffer type
319 */ 267 */
320static void ds_overflow(struct task_struct *task, struct ds_context *context, 268static void ds_overflow(struct ds_context *context, enum ds_qualifier qual)
321 enum ds_qualifier qual) 269{
322{ 270 switch (qual) {
323 if (!context) 271 case ds_bts: {
324 return; 272 struct bts_tracer *tracer =
325 273 container_of(context->owner[qual],
326 if (context->callback[qual]) 274 struct bts_tracer, ds);
327 (*context->callback[qual])(task); 275 if (tracer->ovfl)
328 276 tracer->ovfl(tracer);
329 /* todo: do some more overflow handling */ 277 }
278 break;
279 case ds_pebs: {
280 struct pebs_tracer *tracer =
281 container_of(context->owner[qual],
282 struct pebs_tracer, ds);
283 if (tracer->ovfl)
284 tracer->ovfl(tracer);
285 }
286 break;
287 }
330} 288}
331 289
332 290
333/* 291static void ds_install_ds_config(struct ds_context *context,
334 * Allocate a non-pageable buffer of the parameter size. 292 enum ds_qualifier qual,
335 * Checks the memory and the locked memory rlimit. 293 void *base, size_t size, size_t ith)
336 *
337 * Returns the buffer, if successful;
338 * NULL, if out of memory or rlimit exceeded.
339 *
340 * size: the requested buffer size in bytes
341 * pages (out): if not NULL, contains the number of pages reserved
342 */
343static inline void *ds_allocate_buffer(size_t size, unsigned int *pages)
344{ 294{
345 unsigned long rlim, vm, pgsz; 295 unsigned long buffer, adj;
346 void *buffer;
347
348 pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT;
349
350 rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
351 vm = current->mm->total_vm + pgsz;
352 if (rlim < vm)
353 return NULL;
354 296
355 rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; 297 /* adjust the buffer address and size to meet alignment
356 vm = current->mm->locked_vm + pgsz; 298 * constraints:
357 if (rlim < vm) 299 * - buffer is double-word aligned
358 return NULL; 300 * - size is multiple of record size
301 *
302 * We checked the size at the very beginning; we have enough
303 * space to do the adjustment.
304 */
305 buffer = (unsigned long)base;
359 306
360 buffer = kzalloc(size, GFP_KERNEL); 307 adj = ALIGN(buffer, DS_ALIGNMENT) - buffer;
361 if (!buffer) 308 buffer += adj;
362 return NULL; 309 size -= adj;
363 310
364 current->mm->total_vm += pgsz; 311 size /= ds_cfg.sizeof_rec[qual];
365 current->mm->locked_vm += pgsz; 312 size *= ds_cfg.sizeof_rec[qual];
366 313
367 if (pages) 314 ds_set(context->ds, qual, ds_buffer_base, buffer);
368 *pages = pgsz; 315 ds_set(context->ds, qual, ds_index, buffer);
316 ds_set(context->ds, qual, ds_absolute_maximum, buffer + size);
369 317
370 return buffer; 318 /* The value for 'no threshold' is -1, which will set the
319 * threshold outside of the buffer, just like we want it.
320 */
321 ds_set(context->ds, qual,
322 ds_interrupt_threshold, buffer + size - ith);
371} 323}
372 324
373static int ds_request(struct task_struct *task, void *base, size_t size, 325static int ds_request(struct ds_tracer *tracer, enum ds_qualifier qual,
374 ds_ovfl_callback_t ovfl, enum ds_qualifier qual) 326 struct task_struct *task,
327 void *base, size_t size, size_t th)
375{ 328{
376 struct ds_context *context; 329 struct ds_context *context;
377 unsigned long buffer, adj;
378 const unsigned long alignment = (1 << 3);
379 unsigned long irq; 330 unsigned long irq;
380 int error = 0; 331 int error;
381 332
333 error = -EOPNOTSUPP;
382 if (!ds_cfg.sizeof_ds) 334 if (!ds_cfg.sizeof_ds)
383 return -EOPNOTSUPP; 335 goto out;
336
337 error = -EINVAL;
338 if (!base)
339 goto out;
384 340
385 /* we require some space to do alignment adjustments below */ 341 /* we require some space to do alignment adjustments below */
386 if (size < (alignment + ds_cfg.sizeof_rec[qual])) 342 error = -EINVAL;
387 return -EINVAL; 343 if (size < (DS_ALIGNMENT + ds_cfg.sizeof_rec[qual]))
344 goto out;
388 345
389 /* buffer overflow notification is not yet implemented */ 346 if (th != (size_t)-1) {
390 if (ovfl) 347 th *= ds_cfg.sizeof_rec[qual];
391 return -EOPNOTSUPP; 348
349 error = -EINVAL;
350 if (size <= th)
351 goto out;
352 }
392 353
354 tracer->buffer = base;
355 tracer->size = size;
393 356
394 context = ds_alloc_context(task); 357 error = -ENOMEM;
358 context = ds_get_context(task);
395 if (!context) 359 if (!context)
396 return -ENOMEM; 360 goto out;
361 tracer->context = context;
362
397 363
398 spin_lock_irqsave(&ds_lock, irq); 364 spin_lock_irqsave(&ds_lock, irq);
399 365
400 error = -EPERM; 366 error = -EPERM;
401 if (!check_tracer(task)) 367 if (!check_tracer(task))
402 goto out_unlock; 368 goto out_unlock;
403
404 get_tracer(task); 369 get_tracer(task);
405 370
406 error = -EALREADY;
407 if (context->owner[qual] == current)
408 goto out_put_tracer;
409 error = -EPERM; 371 error = -EPERM;
410 if (context->owner[qual] != NULL) 372 if (context->owner[qual])
411 goto out_put_tracer; 373 goto out_put_tracer;
412 context->owner[qual] = current; 374 context->owner[qual] = tracer;
413 375
414 spin_unlock_irqrestore(&ds_lock, irq); 376 spin_unlock_irqrestore(&ds_lock, irq);
415 377
416 378
417 error = -ENOMEM; 379 ds_install_ds_config(context, qual, base, size, th);
418 if (!base) {
419 base = ds_allocate_buffer(size, &context->pages[qual]);
420 if (!base)
421 goto out_release;
422
423 context->buffer[qual] = base;
424 }
425 error = 0;
426 380
427 context->callback[qual] = ovfl; 381 return 0;
428
429 /* adjust the buffer address and size to meet alignment
430 * constraints:
431 * - buffer is double-word aligned
432 * - size is multiple of record size
433 *
434 * We checked the size at the very beginning; we have enough
435 * space to do the adjustment.
436 */
437 buffer = (unsigned long)base;
438
439 adj = ALIGN(buffer, alignment) - buffer;
440 buffer += adj;
441 size -= adj;
442
443 size /= ds_cfg.sizeof_rec[qual];
444 size *= ds_cfg.sizeof_rec[qual];
445
446 ds_set(context->ds, qual, ds_buffer_base, buffer);
447 ds_set(context->ds, qual, ds_index, buffer);
448 ds_set(context->ds, qual, ds_absolute_maximum, buffer + size);
449
450 if (ovfl) {
451 /* todo: select a suitable interrupt threshold */
452 } else
453 ds_set(context->ds, qual,
454 ds_interrupt_threshold, buffer + size + 1);
455
456 /* we keep the context until ds_release */
457 return error;
458
459 out_release:
460 context->owner[qual] = NULL;
461 ds_put_context(context);
462 put_tracer(task);
463 return error;
464 382
465 out_put_tracer: 383 out_put_tracer:
466 spin_unlock_irqrestore(&ds_lock, irq);
467 ds_put_context(context);
468 put_tracer(task); 384 put_tracer(task);
469 return error;
470
471 out_unlock: 385 out_unlock:
472 spin_unlock_irqrestore(&ds_lock, irq); 386 spin_unlock_irqrestore(&ds_lock, irq);
473 ds_put_context(context); 387 ds_put_context(context);
388 tracer->context = NULL;
389 out:
474 return error; 390 return error;
475} 391}
476 392
477int ds_request_bts(struct task_struct *task, void *base, size_t size, 393struct bts_tracer *ds_request_bts(struct task_struct *task,
478 ds_ovfl_callback_t ovfl) 394 void *base, size_t size,
395 bts_ovfl_callback_t ovfl, size_t th)
479{ 396{
480 return ds_request(task, base, size, ovfl, ds_bts); 397 struct bts_tracer *tracer;
481} 398 int error;
482 399
483int ds_request_pebs(struct task_struct *task, void *base, size_t size, 400 /* buffer overflow notification is not yet implemented */
484 ds_ovfl_callback_t ovfl) 401 error = -EOPNOTSUPP;
485{ 402 if (ovfl)
486 return ds_request(task, base, size, ovfl, ds_pebs); 403 goto out;
404
405 error = -ENOMEM;
406 tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
407 if (!tracer)
408 goto out;
409 tracer->ovfl = ovfl;
410
411 error = ds_request(&tracer->ds, ds_bts, task, base, size, th);
412 if (error < 0)
413 goto out_tracer;
414
415 return tracer;
416
417 out_tracer:
418 kfree(tracer);
419 out:
420 return ERR_PTR(error);
487} 421}
488 422
489static int ds_release(struct task_struct *task, enum ds_qualifier qual) 423struct pebs_tracer *ds_request_pebs(struct task_struct *task,
424 void *base, size_t size,
425 pebs_ovfl_callback_t ovfl, size_t th)
490{ 426{
491 struct ds_context *context; 427 struct pebs_tracer *tracer;
492 int error; 428 int error;
493 429
494 context = ds_get_context(task); 430 /* buffer overflow notification is not yet implemented */
495 error = ds_validate_access(context, qual); 431 error = -EOPNOTSUPP;
496 if (error < 0) 432 if (ovfl)
497 goto out; 433 goto out;
498 434
499 kfree(context->buffer[qual]); 435 error = -ENOMEM;
500 context->buffer[qual] = NULL; 436 tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
437 if (!tracer)
438 goto out;
439 tracer->ovfl = ovfl;
501 440
502 current->mm->total_vm -= context->pages[qual]; 441 error = ds_request(&tracer->ds, ds_pebs, task, base, size, th);
503 current->mm->locked_vm -= context->pages[qual]; 442 if (error < 0)
504 context->pages[qual] = 0; 443 goto out_tracer;
505 context->owner[qual] = NULL;
506 444
507 /* 445 return tracer;
508 * we put the context twice: 446
509 * once for the ds_get_context 447 out_tracer:
510 * once for the corresponding ds_request 448 kfree(tracer);
511 */
512 ds_put_context(context);
513 out: 449 out:
514 ds_put_context(context); 450 return ERR_PTR(error);
515 return error;
516} 451}
517 452
518int ds_release_bts(struct task_struct *task) 453static void ds_release(struct ds_tracer *tracer, enum ds_qualifier qual)
519{ 454{
520 return ds_release(task, ds_bts); 455 BUG_ON(tracer->context->owner[qual] != tracer);
456 tracer->context->owner[qual] = NULL;
457
458 put_tracer(tracer->context->task);
459 ds_put_context(tracer->context);
521} 460}
522 461
523int ds_release_pebs(struct task_struct *task) 462int ds_release_bts(struct bts_tracer *tracer)
524{ 463{
525 return ds_release(task, ds_pebs); 464 if (!tracer)
465 return -EINVAL;
466
467 ds_release(&tracer->ds, ds_bts);
468 kfree(tracer);
469
470 return 0;
526} 471}
527 472
528static int ds_get_index(struct task_struct *task, size_t *pos, 473int ds_release_pebs(struct pebs_tracer *tracer)
529 enum ds_qualifier qual)
530{ 474{
531 struct ds_context *context; 475 if (!tracer)
532 unsigned long base, index; 476 return -EINVAL;
533 int error;
534 477
535 context = ds_get_context(task); 478 ds_release(&tracer->ds, ds_pebs);
536 error = ds_validate_access(context, qual); 479 kfree(tracer);
537 if (error < 0) 480
538 goto out; 481 return 0;
482}
483
484static size_t ds_get_index(struct ds_context *context, enum ds_qualifier qual)
485{
486 unsigned long base, index;
539 487
540 base = ds_get(context->ds, qual, ds_buffer_base); 488 base = ds_get(context->ds, qual, ds_buffer_base);
541 index = ds_get(context->ds, qual, ds_index); 489 index = ds_get(context->ds, qual, ds_index);
542 490
543 error = ((index - base) / ds_cfg.sizeof_rec[qual]); 491 return (index - base) / ds_cfg.sizeof_rec[qual];
544 if (pos)
545 *pos = error;
546 out:
547 ds_put_context(context);
548 return error;
549} 492}
550 493
551int ds_get_bts_index(struct task_struct *task, size_t *pos) 494int ds_get_bts_index(struct bts_tracer *tracer, size_t *pos)
552{ 495{
553 return ds_get_index(task, pos, ds_bts); 496 if (!tracer)
497 return -EINVAL;
498
499 if (!pos)
500 return -EINVAL;
501
502 *pos = ds_get_index(tracer->ds.context, ds_bts);
503
504 return 0;
554} 505}
555 506
556int ds_get_pebs_index(struct task_struct *task, size_t *pos) 507int ds_get_pebs_index(struct pebs_tracer *tracer, size_t *pos)
557{ 508{
558 return ds_get_index(task, pos, ds_pebs); 509 if (!tracer)
510 return -EINVAL;
511
512 if (!pos)
513 return -EINVAL;
514
515 *pos = ds_get_index(tracer->ds.context, ds_pebs);
516
517 return 0;
559} 518}
560 519
561static int ds_get_end(struct task_struct *task, size_t *pos, 520static size_t ds_get_end(struct ds_context *context, enum ds_qualifier qual)
562 enum ds_qualifier qual)
563{ 521{
564 struct ds_context *context; 522 unsigned long base, max;
565 unsigned long base, end;
566 int error;
567
568 context = ds_get_context(task);
569 error = ds_validate_access(context, qual);
570 if (error < 0)
571 goto out;
572 523
573 base = ds_get(context->ds, qual, ds_buffer_base); 524 base = ds_get(context->ds, qual, ds_buffer_base);
574 end = ds_get(context->ds, qual, ds_absolute_maximum); 525 max = ds_get(context->ds, qual, ds_absolute_maximum);
575 526
576 error = ((end - base) / ds_cfg.sizeof_rec[qual]); 527 return (max - base) / ds_cfg.sizeof_rec[qual];
577 if (pos)
578 *pos = error;
579 out:
580 ds_put_context(context);
581 return error;
582} 528}
583 529
584int ds_get_bts_end(struct task_struct *task, size_t *pos) 530int ds_get_bts_end(struct bts_tracer *tracer, size_t *pos)
585{ 531{
586 return ds_get_end(task, pos, ds_bts); 532 if (!tracer)
533 return -EINVAL;
534
535 if (!pos)
536 return -EINVAL;
537
538 *pos = ds_get_end(tracer->ds.context, ds_bts);
539
540 return 0;
587} 541}
588 542
589int ds_get_pebs_end(struct task_struct *task, size_t *pos) 543int ds_get_pebs_end(struct pebs_tracer *tracer, size_t *pos)
590{ 544{
591 return ds_get_end(task, pos, ds_pebs); 545 if (!tracer)
546 return -EINVAL;
547
548 if (!pos)
549 return -EINVAL;
550
551 *pos = ds_get_end(tracer->ds.context, ds_pebs);
552
553 return 0;
592} 554}
593 555
594static int ds_access(struct task_struct *task, size_t index, 556static int ds_access(struct ds_context *context, enum ds_qualifier qual,
595 const void **record, enum ds_qualifier qual) 557 size_t index, const void **record)
596{ 558{
597 struct ds_context *context;
598 unsigned long base, idx; 559 unsigned long base, idx;
599 int error;
600 560
601 if (!record) 561 if (!record)
602 return -EINVAL; 562 return -EINVAL;
603 563
604 context = ds_get_context(task);
605 error = ds_validate_access(context, qual);
606 if (error < 0)
607 goto out;
608
609 base = ds_get(context->ds, qual, ds_buffer_base); 564 base = ds_get(context->ds, qual, ds_buffer_base);
610 idx = base + (index * ds_cfg.sizeof_rec[qual]); 565 idx = base + (index * ds_cfg.sizeof_rec[qual]);
611 566
612 error = -EINVAL;
613 if (idx > ds_get(context->ds, qual, ds_absolute_maximum)) 567 if (idx > ds_get(context->ds, qual, ds_absolute_maximum))
614 goto out; 568 return -EINVAL;
615 569
616 *record = (const void *)idx; 570 *record = (const void *)idx;
617 error = ds_cfg.sizeof_rec[qual]; 571
618 out: 572 return ds_cfg.sizeof_rec[qual];
619 ds_put_context(context);
620 return error;
621} 573}
622 574
623int ds_access_bts(struct task_struct *task, size_t index, const void **record) 575int ds_access_bts(struct bts_tracer *tracer, size_t index,
576 const void **record)
624{ 577{
625 return ds_access(task, index, record, ds_bts); 578 if (!tracer)
579 return -EINVAL;
580
581 return ds_access(tracer->ds.context, ds_bts, index, record);
626} 582}
627 583
628int ds_access_pebs(struct task_struct *task, size_t index, const void **record) 584int ds_access_pebs(struct pebs_tracer *tracer, size_t index,
585 const void **record)
629{ 586{
630 return ds_access(task, index, record, ds_pebs); 587 if (!tracer)
588 return -EINVAL;
589
590 return ds_access(tracer->ds.context, ds_pebs, index, record);
631} 591}
632 592
633static int ds_write(struct task_struct *task, const void *record, size_t size, 593static int ds_write(struct ds_context *context, enum ds_qualifier qual,
634 enum ds_qualifier qual, int force) 594 const void *record, size_t size)
635{ 595{
636 struct ds_context *context; 596 int bytes_written = 0;
637 int error;
638 597
639 if (!record) 598 if (!record)
640 return -EINVAL; 599 return -EINVAL;
641 600
642 error = -EPERM;
643 context = ds_get_context(task);
644 if (!context)
645 goto out;
646
647 if (!force) {
648 error = ds_validate_access(context, qual);
649 if (error < 0)
650 goto out;
651 }
652
653 error = 0;
654 while (size) { 601 while (size) {
655 unsigned long base, index, end, write_end, int_th; 602 unsigned long base, index, end, write_end, int_th;
656 unsigned long write_size, adj_write_size; 603 unsigned long write_size, adj_write_size;
@@ -678,14 +625,14 @@ static int ds_write(struct task_struct *task, const void *record, size_t size,
678 write_end = end; 625 write_end = end;
679 626
680 if (write_end <= index) 627 if (write_end <= index)
681 goto out; 628 break;
682 629
683 write_size = min((unsigned long) size, write_end - index); 630 write_size = min((unsigned long) size, write_end - index);
684 memcpy((void *)index, record, write_size); 631 memcpy((void *)index, record, write_size);
685 632
686 record = (const char *)record + write_size; 633 record = (const char *)record + write_size;
687 size -= write_size; 634 size -= write_size;
688 error += write_size; 635 bytes_written += write_size;
689 636
690 adj_write_size = write_size / ds_cfg.sizeof_rec[qual]; 637 adj_write_size = write_size / ds_cfg.sizeof_rec[qual];
691 adj_write_size *= ds_cfg.sizeof_rec[qual]; 638 adj_write_size *= ds_cfg.sizeof_rec[qual];
@@ -700,47 +647,32 @@ static int ds_write(struct task_struct *task, const void *record, size_t size,
700 ds_set(context->ds, qual, ds_index, index); 647 ds_set(context->ds, qual, ds_index, index);
701 648
702 if (index >= int_th) 649 if (index >= int_th)
703 ds_overflow(task, context, qual); 650 ds_overflow(context, qual);
704 } 651 }
705 652
706 out: 653 return bytes_written;
707 ds_put_context(context);
708 return error;
709} 654}
710 655
711int ds_write_bts(struct task_struct *task, const void *record, size_t size) 656int ds_write_bts(struct bts_tracer *tracer, const void *record, size_t size)
712{ 657{
713 return ds_write(task, record, size, ds_bts, /* force = */ 0); 658 if (!tracer)
714} 659 return -EINVAL;
715 660
716int ds_write_pebs(struct task_struct *task, const void *record, size_t size) 661 return ds_write(tracer->ds.context, ds_bts, record, size);
717{
718 return ds_write(task, record, size, ds_pebs, /* force = */ 0);
719} 662}
720 663
721int ds_unchecked_write_bts(struct task_struct *task, 664int ds_write_pebs(struct pebs_tracer *tracer, const void *record, size_t size)
722 const void *record, size_t size)
723{ 665{
724 return ds_write(task, record, size, ds_bts, /* force = */ 1); 666 if (!tracer)
725} 667 return -EINVAL;
726 668
727int ds_unchecked_write_pebs(struct task_struct *task, 669 return ds_write(tracer->ds.context, ds_pebs, record, size);
728 const void *record, size_t size)
729{
730 return ds_write(task, record, size, ds_pebs, /* force = */ 1);
731} 670}
732 671
733static int ds_reset_or_clear(struct task_struct *task, 672static void ds_reset_or_clear(struct ds_context *context,
734 enum ds_qualifier qual, int clear) 673 enum ds_qualifier qual, int clear)
735{ 674{
736 struct ds_context *context;
737 unsigned long base, end; 675 unsigned long base, end;
738 int error;
739
740 context = ds_get_context(task);
741 error = ds_validate_access(context, qual);
742 if (error < 0)
743 goto out;
744 676
745 base = ds_get(context->ds, qual, ds_buffer_base); 677 base = ds_get(context->ds, qual, ds_buffer_base);
746 end = ds_get(context->ds, qual, ds_absolute_maximum); 678 end = ds_get(context->ds, qual, ds_absolute_maximum);
@@ -749,70 +681,69 @@ static int ds_reset_or_clear(struct task_struct *task,
749 memset((void *)base, 0, end - base); 681 memset((void *)base, 0, end - base);
750 682
751 ds_set(context->ds, qual, ds_index, base); 683 ds_set(context->ds, qual, ds_index, base);
752
753 error = 0;
754 out:
755 ds_put_context(context);
756 return error;
757} 684}
758 685
759int ds_reset_bts(struct task_struct *task) 686int ds_reset_bts(struct bts_tracer *tracer)
760{ 687{
761 return ds_reset_or_clear(task, ds_bts, /* clear = */ 0); 688 if (!tracer)
689 return -EINVAL;
690
691 ds_reset_or_clear(tracer->ds.context, ds_bts, /* clear = */ 0);
692
693 return 0;
762} 694}
763 695
764int ds_reset_pebs(struct task_struct *task) 696int ds_reset_pebs(struct pebs_tracer *tracer)
765{ 697{
766 return ds_reset_or_clear(task, ds_pebs, /* clear = */ 0); 698 if (!tracer)
699 return -EINVAL;
700
701 ds_reset_or_clear(tracer->ds.context, ds_pebs, /* clear = */ 0);
702
703 return 0;
767} 704}
768 705
769int ds_clear_bts(struct task_struct *task) 706int ds_clear_bts(struct bts_tracer *tracer)
770{ 707{
771 return ds_reset_or_clear(task, ds_bts, /* clear = */ 1); 708 if (!tracer)
709 return -EINVAL;
710
711 ds_reset_or_clear(tracer->ds.context, ds_bts, /* clear = */ 1);
712
713 return 0;
772} 714}
773 715
774int ds_clear_pebs(struct task_struct *task) 716int ds_clear_pebs(struct pebs_tracer *tracer)
775{ 717{
776 return ds_reset_or_clear(task, ds_pebs, /* clear = */ 1); 718 if (!tracer)
719 return -EINVAL;
720
721 ds_reset_or_clear(tracer->ds.context, ds_pebs, /* clear = */ 1);
722
723 return 0;
777} 724}
778 725
779int ds_get_pebs_reset(struct task_struct *task, u64 *value) 726int ds_get_pebs_reset(struct pebs_tracer *tracer, u64 *value)
780{ 727{
781 struct ds_context *context; 728 if (!tracer)
782 int error; 729 return -EINVAL;
783 730
784 if (!value) 731 if (!value)
785 return -EINVAL; 732 return -EINVAL;
786 733
787 context = ds_get_context(task); 734 *value = *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8));
788 error = ds_validate_access(context, ds_pebs);
789 if (error < 0)
790 goto out;
791 735
792 *value = *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8)); 736 return 0;
793
794 error = 0;
795 out:
796 ds_put_context(context);
797 return error;
798} 737}
799 738
800int ds_set_pebs_reset(struct task_struct *task, u64 value) 739int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value)
801{ 740{
802 struct ds_context *context; 741 if (!tracer)
803 int error; 742 return -EINVAL;
804
805 context = ds_get_context(task);
806 error = ds_validate_access(context, ds_pebs);
807 if (error < 0)
808 goto out;
809 743
810 *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8)) = value; 744 *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8)) = value;
811 745
812 error = 0; 746 return 0;
813 out:
814 ds_put_context(context);
815 return error;
816} 747}
817 748
818static const struct ds_configuration ds_cfg_var = { 749static const struct ds_configuration ds_cfg_var = {
@@ -840,6 +771,10 @@ static inline void
840ds_configure(const struct ds_configuration *cfg) 771ds_configure(const struct ds_configuration *cfg)
841{ 772{
842 ds_cfg = *cfg; 773 ds_cfg = *cfg;
774
775 printk(KERN_INFO "DS available\n");
776
777 BUG_ON(MAX_SIZEOF_DS < ds_cfg.sizeof_ds);
843} 778}
844 779
845void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) 780void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
@@ -847,17 +782,16 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
847 switch (c->x86) { 782 switch (c->x86) {
848 case 0x6: 783 case 0x6:
849 switch (c->x86_model) { 784 switch (c->x86_model) {
785 case 0 ... 0xC:
786 /* sorry, don't know about them */
787 break;
850 case 0xD: 788 case 0xD:
851 case 0xE: /* Pentium M */ 789 case 0xE: /* Pentium M */
852 ds_configure(&ds_cfg_var); 790 ds_configure(&ds_cfg_var);
853 break; 791 break;
854 case 0xF: /* Core2 */ 792 default: /* Core2, Atom, ... */
855 case 0x1C: /* Atom */
856 ds_configure(&ds_cfg_64); 793 ds_configure(&ds_cfg_64);
857 break; 794 break;
858 default:
859 /* sorry, don't know about them */
860 break;
861 } 795 }
862 break; 796 break;
863 case 0xF: 797 case 0xF:
@@ -884,6 +818,8 @@ void ds_free(struct ds_context *context)
884 * is dying. There should not be any user of that context left 818 * is dying. There should not be any user of that context left
885 * to disturb us, anymore. */ 819 * to disturb us, anymore. */
886 unsigned long leftovers = context->count; 820 unsigned long leftovers = context->count;
887 while (leftovers--) 821 while (leftovers--) {
822 put_tracer(context->task);
888 ds_put_context(context); 823 ds_put_context(context);
824 }
889} 825}
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 28b597ef9ca1..958af86186c4 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -1157,6 +1157,9 @@ ENTRY(mcount)
1157END(mcount) 1157END(mcount)
1158 1158
1159ENTRY(ftrace_caller) 1159ENTRY(ftrace_caller)
1160 cmpl $0, function_trace_stop
1161 jne ftrace_stub
1162
1160 pushl %eax 1163 pushl %eax
1161 pushl %ecx 1164 pushl %ecx
1162 pushl %edx 1165 pushl %edx
@@ -1171,6 +1174,11 @@ ftrace_call:
1171 popl %edx 1174 popl %edx
1172 popl %ecx 1175 popl %ecx
1173 popl %eax 1176 popl %eax
1177#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1178.globl ftrace_graph_call
1179ftrace_graph_call:
1180 jmp ftrace_stub
1181#endif
1174 1182
1175.globl ftrace_stub 1183.globl ftrace_stub
1176ftrace_stub: 1184ftrace_stub:
@@ -1180,8 +1188,15 @@ END(ftrace_caller)
1180#else /* ! CONFIG_DYNAMIC_FTRACE */ 1188#else /* ! CONFIG_DYNAMIC_FTRACE */
1181 1189
1182ENTRY(mcount) 1190ENTRY(mcount)
1191 cmpl $0, function_trace_stop
1192 jne ftrace_stub
1193
1183 cmpl $ftrace_stub, ftrace_trace_function 1194 cmpl $ftrace_stub, ftrace_trace_function
1184 jnz trace 1195 jnz trace
1196#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1197 cmpl $ftrace_stub, ftrace_graph_return
1198 jnz ftrace_graph_caller
1199#endif
1185.globl ftrace_stub 1200.globl ftrace_stub
1186ftrace_stub: 1201ftrace_stub:
1187 ret 1202 ret
@@ -1200,12 +1215,42 @@ trace:
1200 popl %edx 1215 popl %edx
1201 popl %ecx 1216 popl %ecx
1202 popl %eax 1217 popl %eax
1203
1204 jmp ftrace_stub 1218 jmp ftrace_stub
1205END(mcount) 1219END(mcount)
1206#endif /* CONFIG_DYNAMIC_FTRACE */ 1220#endif /* CONFIG_DYNAMIC_FTRACE */
1207#endif /* CONFIG_FUNCTION_TRACER */ 1221#endif /* CONFIG_FUNCTION_TRACER */
1208 1222
1223#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1224ENTRY(ftrace_graph_caller)
1225 cmpl $0, function_trace_stop
1226 jne ftrace_stub
1227
1228 pushl %eax
1229 pushl %ecx
1230 pushl %edx
1231 movl 0xc(%esp), %edx
1232 lea 0x4(%ebp), %eax
1233 call prepare_ftrace_return
1234 popl %edx
1235 popl %ecx
1236 popl %eax
1237 ret
1238END(ftrace_graph_caller)
1239
1240.globl return_to_handler
1241return_to_handler:
1242 pushl $0
1243 pushl %eax
1244 pushl %ecx
1245 pushl %edx
1246 call ftrace_return_to_handler
1247 movl %eax, 0xc(%esp)
1248 popl %edx
1249 popl %ecx
1250 popl %eax
1251 ret
1252#endif
1253
1209.section .rodata,"a" 1254.section .rodata,"a"
1210#include "syscall_table_32.S" 1255#include "syscall_table_32.S"
1211 1256
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index b86f332c96a6..2aa0526ac30e 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -68,6 +68,8 @@ ENTRY(mcount)
68END(mcount) 68END(mcount)
69 69
70ENTRY(ftrace_caller) 70ENTRY(ftrace_caller)
71 cmpl $0, function_trace_stop
72 jne ftrace_stub
71 73
72 /* taken from glibc */ 74 /* taken from glibc */
73 subq $0x38, %rsp 75 subq $0x38, %rsp
@@ -96,6 +98,12 @@ ftrace_call:
96 movq (%rsp), %rax 98 movq (%rsp), %rax
97 addq $0x38, %rsp 99 addq $0x38, %rsp
98 100
101#ifdef CONFIG_FUNCTION_GRAPH_TRACER
102.globl ftrace_graph_call
103ftrace_graph_call:
104 jmp ftrace_stub
105#endif
106
99.globl ftrace_stub 107.globl ftrace_stub
100ftrace_stub: 108ftrace_stub:
101 retq 109 retq
@@ -103,8 +111,17 @@ END(ftrace_caller)
103 111
104#else /* ! CONFIG_DYNAMIC_FTRACE */ 112#else /* ! CONFIG_DYNAMIC_FTRACE */
105ENTRY(mcount) 113ENTRY(mcount)
114 cmpl $0, function_trace_stop
115 jne ftrace_stub
116
106 cmpq $ftrace_stub, ftrace_trace_function 117 cmpq $ftrace_stub, ftrace_trace_function
107 jnz trace 118 jnz trace
119
120#ifdef CONFIG_FUNCTION_GRAPH_TRACER
121 cmpq $ftrace_stub, ftrace_graph_return
122 jnz ftrace_graph_caller
123#endif
124
108.globl ftrace_stub 125.globl ftrace_stub
109ftrace_stub: 126ftrace_stub:
110 retq 127 retq
@@ -140,6 +157,68 @@ END(mcount)
140#endif /* CONFIG_DYNAMIC_FTRACE */ 157#endif /* CONFIG_DYNAMIC_FTRACE */
141#endif /* CONFIG_FUNCTION_TRACER */ 158#endif /* CONFIG_FUNCTION_TRACER */
142 159
160#ifdef CONFIG_FUNCTION_GRAPH_TRACER
161ENTRY(ftrace_graph_caller)
162 cmpl $0, function_trace_stop
163 jne ftrace_stub
164
165 subq $0x38, %rsp
166 movq %rax, (%rsp)
167 movq %rcx, 8(%rsp)
168 movq %rdx, 16(%rsp)
169 movq %rsi, 24(%rsp)
170 movq %rdi, 32(%rsp)
171 movq %r8, 40(%rsp)
172 movq %r9, 48(%rsp)
173
174 leaq 8(%rbp), %rdi
175 movq 0x38(%rsp), %rsi
176
177 call prepare_ftrace_return
178
179 movq 48(%rsp), %r9
180 movq 40(%rsp), %r8
181 movq 32(%rsp), %rdi
182 movq 24(%rsp), %rsi
183 movq 16(%rsp), %rdx
184 movq 8(%rsp), %rcx
185 movq (%rsp), %rax
186 addq $0x38, %rsp
187 retq
188END(ftrace_graph_caller)
189
190
191.globl return_to_handler
192return_to_handler:
193 subq $80, %rsp
194
195 movq %rax, (%rsp)
196 movq %rcx, 8(%rsp)
197 movq %rdx, 16(%rsp)
198 movq %rsi, 24(%rsp)
199 movq %rdi, 32(%rsp)
200 movq %r8, 40(%rsp)
201 movq %r9, 48(%rsp)
202 movq %r10, 56(%rsp)
203 movq %r11, 64(%rsp)
204
205 call ftrace_return_to_handler
206
207 movq %rax, 72(%rsp)
208 movq 64(%rsp), %r11
209 movq 56(%rsp), %r10
210 movq 48(%rsp), %r9
211 movq 40(%rsp), %r8
212 movq 32(%rsp), %rdi
213 movq 24(%rsp), %rsi
214 movq 16(%rsp), %rdx
215 movq 8(%rsp), %rcx
216 movq (%rsp), %rax
217 addq $72, %rsp
218 retq
219#endif
220
221
143#ifndef CONFIG_PREEMPT 222#ifndef CONFIG_PREEMPT
144#define retint_kernel retint_restore_args 223#define retint_kernel retint_restore_args
145#endif 224#endif
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 50ea0ac8c9bf..58832478b94e 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -14,14 +14,17 @@
14#include <linux/uaccess.h> 14#include <linux/uaccess.h>
15#include <linux/ftrace.h> 15#include <linux/ftrace.h>
16#include <linux/percpu.h> 16#include <linux/percpu.h>
17#include <linux/sched.h>
17#include <linux/init.h> 18#include <linux/init.h>
18#include <linux/list.h> 19#include <linux/list.h>
19 20
20#include <asm/ftrace.h> 21#include <asm/ftrace.h>
22#include <linux/ftrace.h>
21#include <asm/nops.h> 23#include <asm/nops.h>
24#include <asm/nmi.h>
22 25
23 26
24static unsigned char ftrace_nop[MCOUNT_INSN_SIZE]; 27#ifdef CONFIG_DYNAMIC_FTRACE
25 28
26union ftrace_code_union { 29union ftrace_code_union {
27 char code[MCOUNT_INSN_SIZE]; 30 char code[MCOUNT_INSN_SIZE];
@@ -31,18 +34,12 @@ union ftrace_code_union {
31 } __attribute__((packed)); 34 } __attribute__((packed));
32}; 35};
33 36
34
35static int ftrace_calc_offset(long ip, long addr) 37static int ftrace_calc_offset(long ip, long addr)
36{ 38{
37 return (int)(addr - ip); 39 return (int)(addr - ip);
38} 40}
39 41
40unsigned char *ftrace_nop_replace(void) 42static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
41{
42 return ftrace_nop;
43}
44
45unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
46{ 43{
47 static union ftrace_code_union calc; 44 static union ftrace_code_union calc;
48 45
@@ -56,7 +53,142 @@ unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
56 return calc.code; 53 return calc.code;
57} 54}
58 55
59int 56/*
57 * Modifying code must take extra care. On an SMP machine, if
58 * the code being modified is also being executed on another CPU
59 * that CPU will have undefined results and possibly take a GPF.
60 * We use kstop_machine to stop other CPUS from exectuing code.
61 * But this does not stop NMIs from happening. We still need
62 * to protect against that. We separate out the modification of
63 * the code to take care of this.
64 *
65 * Two buffers are added: An IP buffer and a "code" buffer.
66 *
67 * 1) Put the instruction pointer into the IP buffer
68 * and the new code into the "code" buffer.
69 * 2) Set a flag that says we are modifying code
70 * 3) Wait for any running NMIs to finish.
71 * 4) Write the code
72 * 5) clear the flag.
73 * 6) Wait for any running NMIs to finish.
74 *
75 * If an NMI is executed, the first thing it does is to call
76 * "ftrace_nmi_enter". This will check if the flag is set to write
77 * and if it is, it will write what is in the IP and "code" buffers.
78 *
79 * The trick is, it does not matter if everyone is writing the same
80 * content to the code location. Also, if a CPU is executing code
81 * it is OK to write to that code location if the contents being written
82 * are the same as what exists.
83 */
84
85static atomic_t in_nmi = ATOMIC_INIT(0);
86static int mod_code_status; /* holds return value of text write */
87static int mod_code_write; /* set when NMI should do the write */
88static void *mod_code_ip; /* holds the IP to write to */
89static void *mod_code_newcode; /* holds the text to write to the IP */
90
91static unsigned nmi_wait_count;
92static atomic_t nmi_update_count = ATOMIC_INIT(0);
93
94int ftrace_arch_read_dyn_info(char *buf, int size)
95{
96 int r;
97
98 r = snprintf(buf, size, "%u %u",
99 nmi_wait_count,
100 atomic_read(&nmi_update_count));
101 return r;
102}
103
104static void ftrace_mod_code(void)
105{
106 /*
107 * Yes, more than one CPU process can be writing to mod_code_status.
108 * (and the code itself)
109 * But if one were to fail, then they all should, and if one were
110 * to succeed, then they all should.
111 */
112 mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode,
113 MCOUNT_INSN_SIZE);
114}
115
116void ftrace_nmi_enter(void)
117{
118 atomic_inc(&in_nmi);
119 /* Must have in_nmi seen before reading write flag */
120 smp_mb();
121 if (mod_code_write) {
122 ftrace_mod_code();
123 atomic_inc(&nmi_update_count);
124 }
125}
126
127void ftrace_nmi_exit(void)
128{
129 /* Finish all executions before clearing in_nmi */
130 smp_wmb();
131 atomic_dec(&in_nmi);
132}
133
134static void wait_for_nmi(void)
135{
136 int waited = 0;
137
138 while (atomic_read(&in_nmi)) {
139 waited = 1;
140 cpu_relax();
141 }
142
143 if (waited)
144 nmi_wait_count++;
145}
146
147static int
148do_ftrace_mod_code(unsigned long ip, void *new_code)
149{
150 mod_code_ip = (void *)ip;
151 mod_code_newcode = new_code;
152
153 /* The buffers need to be visible before we let NMIs write them */
154 smp_wmb();
155
156 mod_code_write = 1;
157
158 /* Make sure write bit is visible before we wait on NMIs */
159 smp_mb();
160
161 wait_for_nmi();
162
163 /* Make sure all running NMIs have finished before we write the code */
164 smp_mb();
165
166 ftrace_mod_code();
167
168 /* Make sure the write happens before clearing the bit */
169 smp_wmb();
170
171 mod_code_write = 0;
172
173 /* make sure NMIs see the cleared bit */
174 smp_mb();
175
176 wait_for_nmi();
177
178 return mod_code_status;
179}
180
181
182
183
184static unsigned char ftrace_nop[MCOUNT_INSN_SIZE];
185
186static unsigned char *ftrace_nop_replace(void)
187{
188 return ftrace_nop;
189}
190
191static int
60ftrace_modify_code(unsigned long ip, unsigned char *old_code, 192ftrace_modify_code(unsigned long ip, unsigned char *old_code,
61 unsigned char *new_code) 193 unsigned char *new_code)
62{ 194{
@@ -81,7 +213,7 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code,
81 return -EINVAL; 213 return -EINVAL;
82 214
83 /* replace the text with the new text */ 215 /* replace the text with the new text */
84 if (probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE)) 216 if (do_ftrace_mod_code(ip, new_code))
85 return -EPERM; 217 return -EPERM;
86 218
87 sync_core(); 219 sync_core();
@@ -89,6 +221,29 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code,
89 return 0; 221 return 0;
90} 222}
91 223
224int ftrace_make_nop(struct module *mod,
225 struct dyn_ftrace *rec, unsigned long addr)
226{
227 unsigned char *new, *old;
228 unsigned long ip = rec->ip;
229
230 old = ftrace_call_replace(ip, addr);
231 new = ftrace_nop_replace();
232
233 return ftrace_modify_code(rec->ip, old, new);
234}
235
236int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
237{
238 unsigned char *new, *old;
239 unsigned long ip = rec->ip;
240
241 old = ftrace_nop_replace();
242 new = ftrace_call_replace(ip, addr);
243
244 return ftrace_modify_code(rec->ip, old, new);
245}
246
92int ftrace_update_ftrace_func(ftrace_func_t func) 247int ftrace_update_ftrace_func(ftrace_func_t func)
93{ 248{
94 unsigned long ip = (unsigned long)(&ftrace_call); 249 unsigned long ip = (unsigned long)(&ftrace_call);
@@ -165,3 +320,203 @@ int __init ftrace_dyn_arch_init(void *data)
165 320
166 return 0; 321 return 0;
167} 322}
323#endif
324
325#ifdef CONFIG_FUNCTION_GRAPH_TRACER
326
327#ifdef CONFIG_DYNAMIC_FTRACE
328extern void ftrace_graph_call(void);
329
330static int ftrace_mod_jmp(unsigned long ip,
331 int old_offset, int new_offset)
332{
333 unsigned char code[MCOUNT_INSN_SIZE];
334
335 if (probe_kernel_read(code, (void *)ip, MCOUNT_INSN_SIZE))
336 return -EFAULT;
337
338 if (code[0] != 0xe9 || old_offset != *(int *)(&code[1]))
339 return -EINVAL;
340
341 *(int *)(&code[1]) = new_offset;
342
343 if (do_ftrace_mod_code(ip, &code))
344 return -EPERM;
345
346 return 0;
347}
348
349int ftrace_enable_ftrace_graph_caller(void)
350{
351 unsigned long ip = (unsigned long)(&ftrace_graph_call);
352 int old_offset, new_offset;
353
354 old_offset = (unsigned long)(&ftrace_stub) - (ip + MCOUNT_INSN_SIZE);
355 new_offset = (unsigned long)(&ftrace_graph_caller) - (ip + MCOUNT_INSN_SIZE);
356
357 return ftrace_mod_jmp(ip, old_offset, new_offset);
358}
359
360int ftrace_disable_ftrace_graph_caller(void)
361{
362 unsigned long ip = (unsigned long)(&ftrace_graph_call);
363 int old_offset, new_offset;
364
365 old_offset = (unsigned long)(&ftrace_graph_caller) - (ip + MCOUNT_INSN_SIZE);
366 new_offset = (unsigned long)(&ftrace_stub) - (ip + MCOUNT_INSN_SIZE);
367
368 return ftrace_mod_jmp(ip, old_offset, new_offset);
369}
370
371#else /* CONFIG_DYNAMIC_FTRACE */
372
373/*
374 * These functions are picked from those used on
375 * this page for dynamic ftrace. They have been
376 * simplified to ignore all traces in NMI context.
377 */
378static atomic_t in_nmi;
379
380void ftrace_nmi_enter(void)
381{
382 atomic_inc(&in_nmi);
383}
384
385void ftrace_nmi_exit(void)
386{
387 atomic_dec(&in_nmi);
388}
389
390#endif /* !CONFIG_DYNAMIC_FTRACE */
391
392/* Add a function return address to the trace stack on thread info.*/
393static int push_return_trace(unsigned long ret, unsigned long long time,
394 unsigned long func, int *depth)
395{
396 int index;
397
398 if (!current->ret_stack)
399 return -EBUSY;
400
401 /* The return trace stack is full */
402 if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) {
403 atomic_inc(&current->trace_overrun);
404 return -EBUSY;
405 }
406
407 index = ++current->curr_ret_stack;
408 barrier();
409 current->ret_stack[index].ret = ret;
410 current->ret_stack[index].func = func;
411 current->ret_stack[index].calltime = time;
412 *depth = index;
413
414 return 0;
415}
416
417/* Retrieve a function return address to the trace stack on thread info.*/
418static void pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret)
419{
420 int index;
421
422 index = current->curr_ret_stack;
423 *ret = current->ret_stack[index].ret;
424 trace->func = current->ret_stack[index].func;
425 trace->calltime = current->ret_stack[index].calltime;
426 trace->overrun = atomic_read(&current->trace_overrun);
427 trace->depth = index;
428 current->curr_ret_stack--;
429}
430
431/*
432 * Send the trace to the ring-buffer.
433 * @return the original return address.
434 */
435unsigned long ftrace_return_to_handler(void)
436{
437 struct ftrace_graph_ret trace;
438 unsigned long ret;
439
440 pop_return_trace(&trace, &ret);
441 trace.rettime = cpu_clock(raw_smp_processor_id());
442 ftrace_graph_return(&trace);
443
444 return ret;
445}
446
447/*
448 * Hook the return address and push it in the stack of return addrs
449 * in current thread info.
450 */
451void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
452{
453 unsigned long old;
454 unsigned long long calltime;
455 int faulted;
456 struct ftrace_graph_ent trace;
457 unsigned long return_hooker = (unsigned long)
458 &return_to_handler;
459
460 /* Nmi's are currently unsupported */
461 if (atomic_read(&in_nmi))
462 return;
463
464 /*
465 * Protect against fault, even if it shouldn't
466 * happen. This tool is too much intrusive to
467 * ignore such a protection.
468 */
469 asm volatile(
470#ifdef CONFIG_X86_64
471 "1: movq (%[parent_old]), %[old]\n"
472 "2: movq %[return_hooker], (%[parent_replaced])\n"
473#else
474 "1: movl (%[parent_old]), %[old]\n"
475 "2: movl %[return_hooker], (%[parent_replaced])\n"
476#endif
477 " movl $0, %[faulted]\n"
478
479 ".section .fixup, \"ax\"\n"
480 "3: movl $1, %[faulted]\n"
481 ".previous\n"
482
483 ".section __ex_table, \"a\"\n"
484#ifdef CONFIG_X86_64
485 " .quad 1b, 3b\n"
486 " .quad 2b, 3b\n"
487#else
488 " .long 1b, 3b\n"
489 " .long 2b, 3b\n"
490#endif
491 ".previous\n"
492
493 : [parent_replaced] "=r" (parent), [old] "=r" (old),
494 [faulted] "=r" (faulted)
495 : [parent_old] "0" (parent), [return_hooker] "r" (return_hooker)
496 : "memory"
497 );
498
499 if (WARN_ON(faulted)) {
500 unregister_ftrace_graph();
501 return;
502 }
503
504 if (WARN_ON(!__kernel_text_address(old))) {
505 unregister_ftrace_graph();
506 *parent = old;
507 return;
508 }
509
510 calltime = cpu_clock(raw_smp_processor_id());
511
512 if (push_return_trace(old, calltime,
513 self_addr, &trace.depth) == -EBUSY) {
514 *parent = old;
515 return;
516 }
517
518 trace.func = self_addr;
519 ftrace_graph_entry(&trace);
520
521}
522#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index c622772744d8..c27af49a4ede 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -7,6 +7,7 @@
7#include <linux/module.h> 7#include <linux/module.h>
8#include <linux/pm.h> 8#include <linux/pm.h>
9#include <linux/clockchips.h> 9#include <linux/clockchips.h>
10#include <linux/ftrace.h>
10#include <asm/system.h> 11#include <asm/system.h>
11 12
12unsigned long idle_halt; 13unsigned long idle_halt;
@@ -100,6 +101,9 @@ static inline int hlt_use_halt(void)
100void default_idle(void) 101void default_idle(void)
101{ 102{
102 if (hlt_use_halt()) { 103 if (hlt_use_halt()) {
104 struct power_trace it;
105
106 trace_power_start(&it, POWER_CSTATE, 1);
103 current_thread_info()->status &= ~TS_POLLING; 107 current_thread_info()->status &= ~TS_POLLING;
104 /* 108 /*
105 * TS_POLLING-cleared state must be visible before we 109 * TS_POLLING-cleared state must be visible before we
@@ -112,6 +116,7 @@ void default_idle(void)
112 else 116 else
113 local_irq_enable(); 117 local_irq_enable();
114 current_thread_info()->status |= TS_POLLING; 118 current_thread_info()->status |= TS_POLLING;
119 trace_power_end(&it);
115 } else { 120 } else {
116 local_irq_enable(); 121 local_irq_enable();
117 /* loop is done by the caller */ 122 /* loop is done by the caller */
@@ -154,24 +159,31 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait);
154 */ 159 */
155void mwait_idle_with_hints(unsigned long ax, unsigned long cx) 160void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
156{ 161{
162 struct power_trace it;
163
164 trace_power_start(&it, POWER_CSTATE, (ax>>4)+1);
157 if (!need_resched()) { 165 if (!need_resched()) {
158 __monitor((void *)&current_thread_info()->flags, 0, 0); 166 __monitor((void *)&current_thread_info()->flags, 0, 0);
159 smp_mb(); 167 smp_mb();
160 if (!need_resched()) 168 if (!need_resched())
161 __mwait(ax, cx); 169 __mwait(ax, cx);
162 } 170 }
171 trace_power_end(&it);
163} 172}
164 173
165/* Default MONITOR/MWAIT with no hints, used for default C1 state */ 174/* Default MONITOR/MWAIT with no hints, used for default C1 state */
166static void mwait_idle(void) 175static void mwait_idle(void)
167{ 176{
177 struct power_trace it;
168 if (!need_resched()) { 178 if (!need_resched()) {
179 trace_power_start(&it, POWER_CSTATE, 1);
169 __monitor((void *)&current_thread_info()->flags, 0, 0); 180 __monitor((void *)&current_thread_info()->flags, 0, 0);
170 smp_mb(); 181 smp_mb();
171 if (!need_resched()) 182 if (!need_resched())
172 __sti_mwait(0, 0); 183 __sti_mwait(0, 0);
173 else 184 else
174 local_irq_enable(); 185 local_irq_enable();
186 trace_power_end(&it);
175 } else 187 } else
176 local_irq_enable(); 188 local_irq_enable();
177} 189}
@@ -183,9 +195,13 @@ static void mwait_idle(void)
183 */ 195 */
184static void poll_idle(void) 196static void poll_idle(void)
185{ 197{
198 struct power_trace it;
199
200 trace_power_start(&it, POWER_CSTATE, 0);
186 local_irq_enable(); 201 local_irq_enable();
187 while (!need_resched()) 202 while (!need_resched())
188 cpu_relax(); 203 cpu_relax();
204 trace_power_end(&it);
189} 205}
190 206
191/* 207/*
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 0a6d8c12e10d..2c8ec1ba75e6 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -668,14 +668,14 @@ static int ptrace_bts_read_record(struct task_struct *child, size_t index,
668 size_t bts_index, bts_end; 668 size_t bts_index, bts_end;
669 int error; 669 int error;
670 670
671 error = ds_get_bts_end(child, &bts_end); 671 error = ds_get_bts_end(child->bts, &bts_end);
672 if (error < 0) 672 if (error < 0)
673 return error; 673 return error;
674 674
675 if (bts_end <= index) 675 if (bts_end <= index)
676 return -EINVAL; 676 return -EINVAL;
677 677
678 error = ds_get_bts_index(child, &bts_index); 678 error = ds_get_bts_index(child->bts, &bts_index);
679 if (error < 0) 679 if (error < 0)
680 return error; 680 return error;
681 681
@@ -684,7 +684,7 @@ static int ptrace_bts_read_record(struct task_struct *child, size_t index,
684 if (bts_end <= bts_index) 684 if (bts_end <= bts_index)
685 bts_index -= bts_end; 685 bts_index -= bts_end;
686 686
687 error = ds_access_bts(child, bts_index, &bts_record); 687 error = ds_access_bts(child->bts, bts_index, &bts_record);
688 if (error < 0) 688 if (error < 0)
689 return error; 689 return error;
690 690
@@ -705,14 +705,14 @@ static int ptrace_bts_drain(struct task_struct *child,
705 size_t end, i; 705 size_t end, i;
706 int error; 706 int error;
707 707
708 error = ds_get_bts_index(child, &end); 708 error = ds_get_bts_index(child->bts, &end);
709 if (error < 0) 709 if (error < 0)
710 return error; 710 return error;
711 711
712 if (size < (end * sizeof(struct bts_struct))) 712 if (size < (end * sizeof(struct bts_struct)))
713 return -EIO; 713 return -EIO;
714 714
715 error = ds_access_bts(child, 0, (const void **)&raw); 715 error = ds_access_bts(child->bts, 0, (const void **)&raw);
716 if (error < 0) 716 if (error < 0)
717 return error; 717 return error;
718 718
@@ -723,18 +723,13 @@ static int ptrace_bts_drain(struct task_struct *child,
723 return -EFAULT; 723 return -EFAULT;
724 } 724 }
725 725
726 error = ds_clear_bts(child); 726 error = ds_clear_bts(child->bts);
727 if (error < 0) 727 if (error < 0)
728 return error; 728 return error;
729 729
730 return end; 730 return end;
731} 731}
732 732
733static void ptrace_bts_ovfl(struct task_struct *child)
734{
735 send_sig(child->thread.bts_ovfl_signal, child, 0);
736}
737
738static int ptrace_bts_config(struct task_struct *child, 733static int ptrace_bts_config(struct task_struct *child,
739 long cfg_size, 734 long cfg_size,
740 const struct ptrace_bts_config __user *ucfg) 735 const struct ptrace_bts_config __user *ucfg)
@@ -760,23 +755,45 @@ static int ptrace_bts_config(struct task_struct *child,
760 goto errout; 755 goto errout;
761 756
762 if (cfg.flags & PTRACE_BTS_O_ALLOC) { 757 if (cfg.flags & PTRACE_BTS_O_ALLOC) {
763 ds_ovfl_callback_t ovfl = NULL; 758 bts_ovfl_callback_t ovfl = NULL;
764 unsigned int sig = 0; 759 unsigned int sig = 0;
765 760
766 /* we ignore the error in case we were not tracing child */ 761 error = -EINVAL;
767 (void)ds_release_bts(child); 762 if (cfg.size < (10 * bts_cfg.sizeof_bts))
763 goto errout;
768 764
769 if (cfg.flags & PTRACE_BTS_O_SIGNAL) { 765 if (cfg.flags & PTRACE_BTS_O_SIGNAL) {
770 if (!cfg.signal) 766 if (!cfg.signal)
771 goto errout; 767 goto errout;
772 768
769 error = -EOPNOTSUPP;
770 goto errout;
771
773 sig = cfg.signal; 772 sig = cfg.signal;
774 ovfl = ptrace_bts_ovfl;
775 } 773 }
776 774
777 error = ds_request_bts(child, /* base = */ NULL, cfg.size, ovfl); 775 if (child->bts) {
778 if (error < 0) 776 (void)ds_release_bts(child->bts);
777 kfree(child->bts_buffer);
778
779 child->bts = NULL;
780 child->bts_buffer = NULL;
781 }
782
783 error = -ENOMEM;
784 child->bts_buffer = kzalloc(cfg.size, GFP_KERNEL);
785 if (!child->bts_buffer)
786 goto errout;
787
788 child->bts = ds_request_bts(child, child->bts_buffer, cfg.size,
789 ovfl, /* th = */ (size_t)-1);
790 if (IS_ERR(child->bts)) {
791 error = PTR_ERR(child->bts);
792 kfree(child->bts_buffer);
793 child->bts = NULL;
794 child->bts_buffer = NULL;
779 goto errout; 795 goto errout;
796 }
780 797
781 child->thread.bts_ovfl_signal = sig; 798 child->thread.bts_ovfl_signal = sig;
782 } 799 }
@@ -823,15 +840,15 @@ static int ptrace_bts_status(struct task_struct *child,
823 if (cfg_size < sizeof(cfg)) 840 if (cfg_size < sizeof(cfg))
824 return -EIO; 841 return -EIO;
825 842
826 error = ds_get_bts_end(child, &end); 843 error = ds_get_bts_end(child->bts, &end);
827 if (error < 0) 844 if (error < 0)
828 return error; 845 return error;
829 846
830 error = ds_access_bts(child, /* index = */ 0, &base); 847 error = ds_access_bts(child->bts, /* index = */ 0, &base);
831 if (error < 0) 848 if (error < 0)
832 return error; 849 return error;
833 850
834 error = ds_access_bts(child, /* index = */ end, &max); 851 error = ds_access_bts(child->bts, /* index = */ end, &max);
835 if (error < 0) 852 if (error < 0)
836 return error; 853 return error;
837 854
@@ -884,10 +901,7 @@ static int ptrace_bts_write_record(struct task_struct *child,
884 return -EINVAL; 901 return -EINVAL;
885 } 902 }
886 903
887 /* The writing task will be the switched-to task on a context 904 return ds_write_bts(child->bts, bts_record, bts_cfg.sizeof_bts);
888 * switch. It needs to write into the switched-from task's BTS
889 * buffer. */
890 return ds_unchecked_write_bts(child, bts_record, bts_cfg.sizeof_bts);
891} 905}
892 906
893void ptrace_bts_take_timestamp(struct task_struct *tsk, 907void ptrace_bts_take_timestamp(struct task_struct *tsk,
@@ -929,17 +943,16 @@ void __cpuinit ptrace_bts_init_intel(struct cpuinfo_x86 *c)
929 switch (c->x86) { 943 switch (c->x86) {
930 case 0x6: 944 case 0x6:
931 switch (c->x86_model) { 945 switch (c->x86_model) {
946 case 0 ... 0xC:
947 /* sorry, don't know about them */
948 break;
932 case 0xD: 949 case 0xD:
933 case 0xE: /* Pentium M */ 950 case 0xE: /* Pentium M */
934 bts_configure(&bts_cfg_pentium_m); 951 bts_configure(&bts_cfg_pentium_m);
935 break; 952 break;
936 case 0xF: /* Core2 */ 953 default: /* Core2, Atom, ... */
937 case 0x1C: /* Atom */
938 bts_configure(&bts_cfg_core2); 954 bts_configure(&bts_cfg_core2);
939 break; 955 break;
940 default:
941 /* sorry, don't know about them */
942 break;
943 } 956 }
944 break; 957 break;
945 case 0xF: 958 case 0xF:
@@ -973,13 +986,17 @@ void ptrace_disable(struct task_struct *child)
973 clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); 986 clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
974#endif 987#endif
975#ifdef CONFIG_X86_PTRACE_BTS 988#ifdef CONFIG_X86_PTRACE_BTS
976 (void)ds_release_bts(child); 989 if (child->bts) {
990 (void)ds_release_bts(child->bts);
991 kfree(child->bts_buffer);
992 child->bts_buffer = NULL;
977 993
978 child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask; 994 child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask;
979 if (!child->thread.debugctlmsr) 995 if (!child->thread.debugctlmsr)
980 clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); 996 clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
981 997
982 clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); 998 clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
999 }
983#endif /* CONFIG_X86_PTRACE_BTS */ 1000#endif /* CONFIG_X86_PTRACE_BTS */
984} 1001}
985 1002
@@ -1111,9 +1128,16 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
1111 (child, data, (struct ptrace_bts_config __user *)addr); 1128 (child, data, (struct ptrace_bts_config __user *)addr);
1112 break; 1129 break;
1113 1130
1114 case PTRACE_BTS_SIZE: 1131 case PTRACE_BTS_SIZE: {
1115 ret = ds_get_bts_index(child, /* pos = */ NULL); 1132 size_t size;
1133
1134 ret = ds_get_bts_index(child->bts, &size);
1135 if (ret == 0) {
1136 BUG_ON(size != (int) size);
1137 ret = (int) size;
1138 }
1116 break; 1139 break;
1140 }
1117 1141
1118 case PTRACE_BTS_GET: 1142 case PTRACE_BTS_GET:
1119 ret = ptrace_bts_read_record 1143 ret = ptrace_bts_read_record
@@ -1121,7 +1145,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
1121 break; 1145 break;
1122 1146
1123 case PTRACE_BTS_CLEAR: 1147 case PTRACE_BTS_CLEAR:
1124 ret = ds_clear_bts(child); 1148 ret = ds_clear_bts(child->bts);
1125 break; 1149 break;
1126 1150
1127 case PTRACE_BTS_DRAIN: 1151 case PTRACE_BTS_DRAIN:
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index a03e7f6d90c3..10786af95545 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -6,6 +6,7 @@
6#include <linux/sched.h> 6#include <linux/sched.h>
7#include <linux/stacktrace.h> 7#include <linux/stacktrace.h>
8#include <linux/module.h> 8#include <linux/module.h>
9#include <linux/uaccess.h>
9#include <asm/stacktrace.h> 10#include <asm/stacktrace.h>
10 11
11static void save_stack_warning(void *data, char *msg) 12static void save_stack_warning(void *data, char *msg)
@@ -83,3 +84,66 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
83 trace->entries[trace->nr_entries++] = ULONG_MAX; 84 trace->entries[trace->nr_entries++] = ULONG_MAX;
84} 85}
85EXPORT_SYMBOL_GPL(save_stack_trace_tsk); 86EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
87
88/* Userspace stacktrace - based on kernel/trace/trace_sysprof.c */
89
90struct stack_frame {
91 const void __user *next_fp;
92 unsigned long ret_addr;
93};
94
95static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
96{
97 int ret;
98
99 if (!access_ok(VERIFY_READ, fp, sizeof(*frame)))
100 return 0;
101
102 ret = 1;
103 pagefault_disable();
104 if (__copy_from_user_inatomic(frame, fp, sizeof(*frame)))
105 ret = 0;
106 pagefault_enable();
107
108 return ret;
109}
110
111static inline void __save_stack_trace_user(struct stack_trace *trace)
112{
113 const struct pt_regs *regs = task_pt_regs(current);
114 const void __user *fp = (const void __user *)regs->bp;
115
116 if (trace->nr_entries < trace->max_entries)
117 trace->entries[trace->nr_entries++] = regs->ip;
118
119 while (trace->nr_entries < trace->max_entries) {
120 struct stack_frame frame;
121
122 frame.next_fp = NULL;
123 frame.ret_addr = 0;
124 if (!copy_stack_frame(fp, &frame))
125 break;
126 if ((unsigned long)fp < regs->sp)
127 break;
128 if (frame.ret_addr) {
129 trace->entries[trace->nr_entries++] =
130 frame.ret_addr;
131 }
132 if (fp == frame.next_fp)
133 break;
134 fp = frame.next_fp;
135 }
136}
137
138void save_stack_trace_user(struct stack_trace *trace)
139{
140 /*
141 * Trace user stack if we are not a kernel thread
142 */
143 if (current->mm) {
144 __save_stack_trace_user(trace);
145 }
146 if (trace->nr_entries < trace->max_entries)
147 trace->entries[trace->nr_entries++] = ULONG_MAX;
148}
149
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 0b8b6690a86d..6f3d3d4cd973 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -17,6 +17,9 @@
17 * want per guest time just set the kernel.vsyscall64 sysctl to 0. 17 * want per guest time just set the kernel.vsyscall64 sysctl to 0.
18 */ 18 */
19 19
20/* Disable profiling for userspace code: */
21#define DISABLE_BRANCH_PROFILING
22
20#include <linux/time.h> 23#include <linux/time.h>
21#include <linux/init.h> 24#include <linux/init.h>
22#include <linux/kernel.h> 25#include <linux/kernel.h>
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index fea4565ff576..d8cc96a2738f 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -8,9 +8,8 @@ obj-$(CONFIG_X86_PTDUMP) += dump_pagetables.o
8 8
9obj-$(CONFIG_HIGHMEM) += highmem_32.o 9obj-$(CONFIG_HIGHMEM) += highmem_32.o
10 10
11obj-$(CONFIG_MMIOTRACE_HOOKS) += kmmio.o
12obj-$(CONFIG_MMIOTRACE) += mmiotrace.o 11obj-$(CONFIG_MMIOTRACE) += mmiotrace.o
13mmiotrace-y := pf_in.o mmio-mod.o 12mmiotrace-y := kmmio.o pf_in.o mmio-mod.o
14obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o 13obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o
15 14
16obj-$(CONFIG_NUMA) += numa_$(BITS).o 15obj-$(CONFIG_NUMA) += numa_$(BITS).o
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 31e8730fa246..4152d3c3b138 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -53,7 +53,7 @@
53 53
54static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr) 54static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr)
55{ 55{
56#ifdef CONFIG_MMIOTRACE_HOOKS 56#ifdef CONFIG_MMIOTRACE
57 if (unlikely(is_kmmio_active())) 57 if (unlikely(is_kmmio_active()))
58 if (kmmio_handler(regs, addr) == 1) 58 if (kmmio_handler(regs, addr) == 1)
59 return -1; 59 return -1;
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index 1ef0f90813d6..d9d35824c56f 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -9,6 +9,9 @@
9 * Also alternative() doesn't work. 9 * Also alternative() doesn't work.
10 */ 10 */
11 11
12/* Disable profiling for userspace code: */
13#define DISABLE_BRANCH_PROFILING
14
12#include <linux/kernel.h> 15#include <linux/kernel.h>
13#include <linux/posix-timers.h> 16#include <linux/posix-timers.h>
14#include <linux/time.h> 17#include <linux/time.h>
diff --git a/block/Kconfig b/block/Kconfig
index 1ab7c15c8d7a..290b219fad9c 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -47,6 +47,7 @@ config BLK_DEV_IO_TRACE
47 depends on SYSFS 47 depends on SYSFS
48 select RELAY 48 select RELAY
49 select DEBUG_FS 49 select DEBUG_FS
50 select TRACEPOINTS
50 help 51 help
51 Say Y here if you want to be able to trace the block layer actions 52 Say Y here if you want to be able to trace the block layer actions
52 on a given queue. Tracing allows you to see any traffic happening 53 on a given queue. Tracing allows you to see any traffic happening
diff --git a/block/blk-core.c b/block/blk-core.c
index 10e8a64a5a5b..0c06cf5aaaf8 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -28,9 +28,23 @@
28#include <linux/task_io_accounting_ops.h> 28#include <linux/task_io_accounting_ops.h>
29#include <linux/blktrace_api.h> 29#include <linux/blktrace_api.h>
30#include <linux/fault-inject.h> 30#include <linux/fault-inject.h>
31#include <trace/block.h>
31 32
32#include "blk.h" 33#include "blk.h"
33 34
35DEFINE_TRACE(block_plug);
36DEFINE_TRACE(block_unplug_io);
37DEFINE_TRACE(block_unplug_timer);
38DEFINE_TRACE(block_getrq);
39DEFINE_TRACE(block_sleeprq);
40DEFINE_TRACE(block_rq_requeue);
41DEFINE_TRACE(block_bio_backmerge);
42DEFINE_TRACE(block_bio_frontmerge);
43DEFINE_TRACE(block_bio_queue);
44DEFINE_TRACE(block_rq_complete);
45DEFINE_TRACE(block_remap); /* Also used in drivers/md/dm.c */
46EXPORT_TRACEPOINT_SYMBOL_GPL(block_remap);
47
34static int __make_request(struct request_queue *q, struct bio *bio); 48static int __make_request(struct request_queue *q, struct bio *bio);
35 49
36/* 50/*
@@ -205,7 +219,7 @@ void blk_plug_device(struct request_queue *q)
205 219
206 if (!queue_flag_test_and_set(QUEUE_FLAG_PLUGGED, q)) { 220 if (!queue_flag_test_and_set(QUEUE_FLAG_PLUGGED, q)) {
207 mod_timer(&q->unplug_timer, jiffies + q->unplug_delay); 221 mod_timer(&q->unplug_timer, jiffies + q->unplug_delay);
208 blk_add_trace_generic(q, NULL, 0, BLK_TA_PLUG); 222 trace_block_plug(q);
209 } 223 }
210} 224}
211EXPORT_SYMBOL(blk_plug_device); 225EXPORT_SYMBOL(blk_plug_device);
@@ -292,9 +306,7 @@ void blk_unplug_work(struct work_struct *work)
292 struct request_queue *q = 306 struct request_queue *q =
293 container_of(work, struct request_queue, unplug_work); 307 container_of(work, struct request_queue, unplug_work);
294 308
295 blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL, 309 trace_block_unplug_io(q);
296 q->rq.count[READ] + q->rq.count[WRITE]);
297
298 q->unplug_fn(q); 310 q->unplug_fn(q);
299} 311}
300 312
@@ -302,9 +314,7 @@ void blk_unplug_timeout(unsigned long data)
302{ 314{
303 struct request_queue *q = (struct request_queue *)data; 315 struct request_queue *q = (struct request_queue *)data;
304 316
305 blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_TIMER, NULL, 317 trace_block_unplug_timer(q);
306 q->rq.count[READ] + q->rq.count[WRITE]);
307
308 kblockd_schedule_work(q, &q->unplug_work); 318 kblockd_schedule_work(q, &q->unplug_work);
309} 319}
310 320
@@ -314,9 +324,7 @@ void blk_unplug(struct request_queue *q)
314 * devices don't necessarily have an ->unplug_fn defined 324 * devices don't necessarily have an ->unplug_fn defined
315 */ 325 */
316 if (q->unplug_fn) { 326 if (q->unplug_fn) {
317 blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL, 327 trace_block_unplug_io(q);
318 q->rq.count[READ] + q->rq.count[WRITE]);
319
320 q->unplug_fn(q); 328 q->unplug_fn(q);
321 } 329 }
322} 330}
@@ -822,7 +830,7 @@ rq_starved:
822 if (ioc_batching(q, ioc)) 830 if (ioc_batching(q, ioc))
823 ioc->nr_batch_requests--; 831 ioc->nr_batch_requests--;
824 832
825 blk_add_trace_generic(q, bio, rw, BLK_TA_GETRQ); 833 trace_block_getrq(q, bio, rw);
826out: 834out:
827 return rq; 835 return rq;
828} 836}
@@ -848,7 +856,7 @@ static struct request *get_request_wait(struct request_queue *q, int rw_flags,
848 prepare_to_wait_exclusive(&rl->wait[rw], &wait, 856 prepare_to_wait_exclusive(&rl->wait[rw], &wait,
849 TASK_UNINTERRUPTIBLE); 857 TASK_UNINTERRUPTIBLE);
850 858
851 blk_add_trace_generic(q, bio, rw, BLK_TA_SLEEPRQ); 859 trace_block_sleeprq(q, bio, rw);
852 860
853 __generic_unplug_device(q); 861 __generic_unplug_device(q);
854 spin_unlock_irq(q->queue_lock); 862 spin_unlock_irq(q->queue_lock);
@@ -928,7 +936,7 @@ void blk_requeue_request(struct request_queue *q, struct request *rq)
928{ 936{
929 blk_delete_timer(rq); 937 blk_delete_timer(rq);
930 blk_clear_rq_complete(rq); 938 blk_clear_rq_complete(rq);
931 blk_add_trace_rq(q, rq, BLK_TA_REQUEUE); 939 trace_block_rq_requeue(q, rq);
932 940
933 if (blk_rq_tagged(rq)) 941 if (blk_rq_tagged(rq))
934 blk_queue_end_tag(q, rq); 942 blk_queue_end_tag(q, rq);
@@ -1167,7 +1175,7 @@ static int __make_request(struct request_queue *q, struct bio *bio)
1167 if (!ll_back_merge_fn(q, req, bio)) 1175 if (!ll_back_merge_fn(q, req, bio))
1168 break; 1176 break;
1169 1177
1170 blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE); 1178 trace_block_bio_backmerge(q, bio);
1171 1179
1172 req->biotail->bi_next = bio; 1180 req->biotail->bi_next = bio;
1173 req->biotail = bio; 1181 req->biotail = bio;
@@ -1186,7 +1194,7 @@ static int __make_request(struct request_queue *q, struct bio *bio)
1186 if (!ll_front_merge_fn(q, req, bio)) 1194 if (!ll_front_merge_fn(q, req, bio))
1187 break; 1195 break;
1188 1196
1189 blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE); 1197 trace_block_bio_frontmerge(q, bio);
1190 1198
1191 bio->bi_next = req->bio; 1199 bio->bi_next = req->bio;
1192 req->bio = bio; 1200 req->bio = bio;
@@ -1269,7 +1277,7 @@ static inline void blk_partition_remap(struct bio *bio)
1269 bio->bi_sector += p->start_sect; 1277 bio->bi_sector += p->start_sect;
1270 bio->bi_bdev = bdev->bd_contains; 1278 bio->bi_bdev = bdev->bd_contains;
1271 1279
1272 blk_add_trace_remap(bdev_get_queue(bio->bi_bdev), bio, 1280 trace_block_remap(bdev_get_queue(bio->bi_bdev), bio,
1273 bdev->bd_dev, bio->bi_sector, 1281 bdev->bd_dev, bio->bi_sector,
1274 bio->bi_sector - p->start_sect); 1282 bio->bi_sector - p->start_sect);
1275 } 1283 }
@@ -1441,10 +1449,10 @@ end_io:
1441 goto end_io; 1449 goto end_io;
1442 1450
1443 if (old_sector != -1) 1451 if (old_sector != -1)
1444 blk_add_trace_remap(q, bio, old_dev, bio->bi_sector, 1452 trace_block_remap(q, bio, old_dev, bio->bi_sector,
1445 old_sector); 1453 old_sector);
1446 1454
1447 blk_add_trace_bio(q, bio, BLK_TA_QUEUE); 1455 trace_block_bio_queue(q, bio);
1448 1456
1449 old_sector = bio->bi_sector; 1457 old_sector = bio->bi_sector;
1450 old_dev = bio->bi_bdev->bd_dev; 1458 old_dev = bio->bi_bdev->bd_dev;
@@ -1656,7 +1664,7 @@ static int __end_that_request_first(struct request *req, int error,
1656 int total_bytes, bio_nbytes, next_idx = 0; 1664 int total_bytes, bio_nbytes, next_idx = 0;
1657 struct bio *bio; 1665 struct bio *bio;
1658 1666
1659 blk_add_trace_rq(req->q, req, BLK_TA_COMPLETE); 1667 trace_block_rq_complete(req->q, req);
1660 1668
1661 /* 1669 /*
1662 * for a REQ_TYPE_BLOCK_PC request, we want to carry any eventual 1670 * for a REQ_TYPE_BLOCK_PC request, we want to carry any eventual
diff --git a/block/blktrace.c b/block/blktrace.c
index 85049a7e7a17..b0a2cae886db 100644
--- a/block/blktrace.c
+++ b/block/blktrace.c
@@ -23,10 +23,18 @@
23#include <linux/mutex.h> 23#include <linux/mutex.h>
24#include <linux/debugfs.h> 24#include <linux/debugfs.h>
25#include <linux/time.h> 25#include <linux/time.h>
26#include <trace/block.h>
26#include <asm/uaccess.h> 27#include <asm/uaccess.h>
27 28
28static unsigned int blktrace_seq __read_mostly = 1; 29static unsigned int blktrace_seq __read_mostly = 1;
29 30
31/* Global reference count of probes */
32static DEFINE_MUTEX(blk_probe_mutex);
33static atomic_t blk_probes_ref = ATOMIC_INIT(0);
34
35static int blk_register_tracepoints(void);
36static void blk_unregister_tracepoints(void);
37
30/* 38/*
31 * Send out a notify message. 39 * Send out a notify message.
32 */ 40 */
@@ -119,7 +127,7 @@ static u32 ddir_act[2] __read_mostly = { BLK_TC_ACT(BLK_TC_READ), BLK_TC_ACT(BLK
119 * The worker for the various blk_add_trace*() types. Fills out a 127 * The worker for the various blk_add_trace*() types. Fills out a
120 * blk_io_trace structure and places it in a per-cpu subbuffer. 128 * blk_io_trace structure and places it in a per-cpu subbuffer.
121 */ 129 */
122void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, 130static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
123 int rw, u32 what, int error, int pdu_len, void *pdu_data) 131 int rw, u32 what, int error, int pdu_len, void *pdu_data)
124{ 132{
125 struct task_struct *tsk = current; 133 struct task_struct *tsk = current;
@@ -177,8 +185,6 @@ void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
177 local_irq_restore(flags); 185 local_irq_restore(flags);
178} 186}
179 187
180EXPORT_SYMBOL_GPL(__blk_add_trace);
181
182static struct dentry *blk_tree_root; 188static struct dentry *blk_tree_root;
183static DEFINE_MUTEX(blk_tree_mutex); 189static DEFINE_MUTEX(blk_tree_mutex);
184static unsigned int root_users; 190static unsigned int root_users;
@@ -237,6 +243,10 @@ static void blk_trace_cleanup(struct blk_trace *bt)
237 free_percpu(bt->sequence); 243 free_percpu(bt->sequence);
238 free_percpu(bt->msg_data); 244 free_percpu(bt->msg_data);
239 kfree(bt); 245 kfree(bt);
246 mutex_lock(&blk_probe_mutex);
247 if (atomic_dec_and_test(&blk_probes_ref))
248 blk_unregister_tracepoints();
249 mutex_unlock(&blk_probe_mutex);
240} 250}
241 251
242int blk_trace_remove(struct request_queue *q) 252int blk_trace_remove(struct request_queue *q)
@@ -428,6 +438,14 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
428 bt->pid = buts->pid; 438 bt->pid = buts->pid;
429 bt->trace_state = Blktrace_setup; 439 bt->trace_state = Blktrace_setup;
430 440
441 mutex_lock(&blk_probe_mutex);
442 if (atomic_add_return(1, &blk_probes_ref) == 1) {
443 ret = blk_register_tracepoints();
444 if (ret)
445 goto probe_err;
446 }
447 mutex_unlock(&blk_probe_mutex);
448
431 ret = -EBUSY; 449 ret = -EBUSY;
432 old_bt = xchg(&q->blk_trace, bt); 450 old_bt = xchg(&q->blk_trace, bt);
433 if (old_bt) { 451 if (old_bt) {
@@ -436,6 +454,9 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
436 } 454 }
437 455
438 return 0; 456 return 0;
457probe_err:
458 atomic_dec(&blk_probes_ref);
459 mutex_unlock(&blk_probe_mutex);
439err: 460err:
440 if (dir) 461 if (dir)
441 blk_remove_tree(dir); 462 blk_remove_tree(dir);
@@ -562,3 +583,308 @@ void blk_trace_shutdown(struct request_queue *q)
562 blk_trace_remove(q); 583 blk_trace_remove(q);
563 } 584 }
564} 585}
586
587/*
588 * blktrace probes
589 */
590
591/**
592 * blk_add_trace_rq - Add a trace for a request oriented action
593 * @q: queue the io is for
594 * @rq: the source request
595 * @what: the action
596 *
597 * Description:
598 * Records an action against a request. Will log the bio offset + size.
599 *
600 **/
601static void blk_add_trace_rq(struct request_queue *q, struct request *rq,
602 u32 what)
603{
604 struct blk_trace *bt = q->blk_trace;
605 int rw = rq->cmd_flags & 0x03;
606
607 if (likely(!bt))
608 return;
609
610 if (blk_discard_rq(rq))
611 rw |= (1 << BIO_RW_DISCARD);
612
613 if (blk_pc_request(rq)) {
614 what |= BLK_TC_ACT(BLK_TC_PC);
615 __blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors,
616 sizeof(rq->cmd), rq->cmd);
617 } else {
618 what |= BLK_TC_ACT(BLK_TC_FS);
619 __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9,
620 rw, what, rq->errors, 0, NULL);
621 }
622}
623
624static void blk_add_trace_rq_abort(struct request_queue *q, struct request *rq)
625{
626 blk_add_trace_rq(q, rq, BLK_TA_ABORT);
627}
628
629static void blk_add_trace_rq_insert(struct request_queue *q, struct request *rq)
630{
631 blk_add_trace_rq(q, rq, BLK_TA_INSERT);
632}
633
634static void blk_add_trace_rq_issue(struct request_queue *q, struct request *rq)
635{
636 blk_add_trace_rq(q, rq, BLK_TA_ISSUE);
637}
638
639static void blk_add_trace_rq_requeue(struct request_queue *q, struct request *rq)
640{
641 blk_add_trace_rq(q, rq, BLK_TA_REQUEUE);
642}
643
644static void blk_add_trace_rq_complete(struct request_queue *q, struct request *rq)
645{
646 blk_add_trace_rq(q, rq, BLK_TA_COMPLETE);
647}
648
649/**
650 * blk_add_trace_bio - Add a trace for a bio oriented action
651 * @q: queue the io is for
652 * @bio: the source bio
653 * @what: the action
654 *
655 * Description:
656 * Records an action against a bio. Will log the bio offset + size.
657 *
658 **/
659static void blk_add_trace_bio(struct request_queue *q, struct bio *bio,
660 u32 what)
661{
662 struct blk_trace *bt = q->blk_trace;
663
664 if (likely(!bt))
665 return;
666
667 __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what,
668 !bio_flagged(bio, BIO_UPTODATE), 0, NULL);
669}
670
671static void blk_add_trace_bio_bounce(struct request_queue *q, struct bio *bio)
672{
673 blk_add_trace_bio(q, bio, BLK_TA_BOUNCE);
674}
675
676static void blk_add_trace_bio_complete(struct request_queue *q, struct bio *bio)
677{
678 blk_add_trace_bio(q, bio, BLK_TA_COMPLETE);
679}
680
681static void blk_add_trace_bio_backmerge(struct request_queue *q, struct bio *bio)
682{
683 blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE);
684}
685
686static void blk_add_trace_bio_frontmerge(struct request_queue *q, struct bio *bio)
687{
688 blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE);
689}
690
691static void blk_add_trace_bio_queue(struct request_queue *q, struct bio *bio)
692{
693 blk_add_trace_bio(q, bio, BLK_TA_QUEUE);
694}
695
696static void blk_add_trace_getrq(struct request_queue *q, struct bio *bio, int rw)
697{
698 if (bio)
699 blk_add_trace_bio(q, bio, BLK_TA_GETRQ);
700 else {
701 struct blk_trace *bt = q->blk_trace;
702
703 if (bt)
704 __blk_add_trace(bt, 0, 0, rw, BLK_TA_GETRQ, 0, 0, NULL);
705 }
706}
707
708
709static void blk_add_trace_sleeprq(struct request_queue *q, struct bio *bio, int rw)
710{
711 if (bio)
712 blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ);
713 else {
714 struct blk_trace *bt = q->blk_trace;
715
716 if (bt)
717 __blk_add_trace(bt, 0, 0, rw, BLK_TA_SLEEPRQ, 0, 0, NULL);
718 }
719}
720
721static void blk_add_trace_plug(struct request_queue *q)
722{
723 struct blk_trace *bt = q->blk_trace;
724
725 if (bt)
726 __blk_add_trace(bt, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL);
727}
728
729static void blk_add_trace_unplug_io(struct request_queue *q)
730{
731 struct blk_trace *bt = q->blk_trace;
732
733 if (bt) {
734 unsigned int pdu = q->rq.count[READ] + q->rq.count[WRITE];
735 __be64 rpdu = cpu_to_be64(pdu);
736
737 __blk_add_trace(bt, 0, 0, 0, BLK_TA_UNPLUG_IO, 0,
738 sizeof(rpdu), &rpdu);
739 }
740}
741
742static void blk_add_trace_unplug_timer(struct request_queue *q)
743{
744 struct blk_trace *bt = q->blk_trace;
745
746 if (bt) {
747 unsigned int pdu = q->rq.count[READ] + q->rq.count[WRITE];
748 __be64 rpdu = cpu_to_be64(pdu);
749
750 __blk_add_trace(bt, 0, 0, 0, BLK_TA_UNPLUG_TIMER, 0,
751 sizeof(rpdu), &rpdu);
752 }
753}
754
755static void blk_add_trace_split(struct request_queue *q, struct bio *bio,
756 unsigned int pdu)
757{
758 struct blk_trace *bt = q->blk_trace;
759
760 if (bt) {
761 __be64 rpdu = cpu_to_be64(pdu);
762
763 __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw,
764 BLK_TA_SPLIT, !bio_flagged(bio, BIO_UPTODATE),
765 sizeof(rpdu), &rpdu);
766 }
767}
768
769/**
770 * blk_add_trace_remap - Add a trace for a remap operation
771 * @q: queue the io is for
772 * @bio: the source bio
773 * @dev: target device
774 * @from: source sector
775 * @to: target sector
776 *
777 * Description:
778 * Device mapper or raid target sometimes need to split a bio because
779 * it spans a stripe (or similar). Add a trace for that action.
780 *
781 **/
782static void blk_add_trace_remap(struct request_queue *q, struct bio *bio,
783 dev_t dev, sector_t from, sector_t to)
784{
785 struct blk_trace *bt = q->blk_trace;
786 struct blk_io_trace_remap r;
787
788 if (likely(!bt))
789 return;
790
791 r.device = cpu_to_be32(dev);
792 r.device_from = cpu_to_be32(bio->bi_bdev->bd_dev);
793 r.sector = cpu_to_be64(to);
794
795 __blk_add_trace(bt, from, bio->bi_size, bio->bi_rw, BLK_TA_REMAP,
796 !bio_flagged(bio, BIO_UPTODATE), sizeof(r), &r);
797}
798
799/**
800 * blk_add_driver_data - Add binary message with driver-specific data
801 * @q: queue the io is for
802 * @rq: io request
803 * @data: driver-specific data
804 * @len: length of driver-specific data
805 *
806 * Description:
807 * Some drivers might want to write driver-specific data per request.
808 *
809 **/
810void blk_add_driver_data(struct request_queue *q,
811 struct request *rq,
812 void *data, size_t len)
813{
814 struct blk_trace *bt = q->blk_trace;
815
816 if (likely(!bt))
817 return;
818
819 if (blk_pc_request(rq))
820 __blk_add_trace(bt, 0, rq->data_len, 0, BLK_TA_DRV_DATA,
821 rq->errors, len, data);
822 else
823 __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9,
824 0, BLK_TA_DRV_DATA, rq->errors, len, data);
825}
826EXPORT_SYMBOL_GPL(blk_add_driver_data);
827
828static int blk_register_tracepoints(void)
829{
830 int ret;
831
832 ret = register_trace_block_rq_abort(blk_add_trace_rq_abort);
833 WARN_ON(ret);
834 ret = register_trace_block_rq_insert(blk_add_trace_rq_insert);
835 WARN_ON(ret);
836 ret = register_trace_block_rq_issue(blk_add_trace_rq_issue);
837 WARN_ON(ret);
838 ret = register_trace_block_rq_requeue(blk_add_trace_rq_requeue);
839 WARN_ON(ret);
840 ret = register_trace_block_rq_complete(blk_add_trace_rq_complete);
841 WARN_ON(ret);
842 ret = register_trace_block_bio_bounce(blk_add_trace_bio_bounce);
843 WARN_ON(ret);
844 ret = register_trace_block_bio_complete(blk_add_trace_bio_complete);
845 WARN_ON(ret);
846 ret = register_trace_block_bio_backmerge(blk_add_trace_bio_backmerge);
847 WARN_ON(ret);
848 ret = register_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge);
849 WARN_ON(ret);
850 ret = register_trace_block_bio_queue(blk_add_trace_bio_queue);
851 WARN_ON(ret);
852 ret = register_trace_block_getrq(blk_add_trace_getrq);
853 WARN_ON(ret);
854 ret = register_trace_block_sleeprq(blk_add_trace_sleeprq);
855 WARN_ON(ret);
856 ret = register_trace_block_plug(blk_add_trace_plug);
857 WARN_ON(ret);
858 ret = register_trace_block_unplug_timer(blk_add_trace_unplug_timer);
859 WARN_ON(ret);
860 ret = register_trace_block_unplug_io(blk_add_trace_unplug_io);
861 WARN_ON(ret);
862 ret = register_trace_block_split(blk_add_trace_split);
863 WARN_ON(ret);
864 ret = register_trace_block_remap(blk_add_trace_remap);
865 WARN_ON(ret);
866 return 0;
867}
868
869static void blk_unregister_tracepoints(void)
870{
871 unregister_trace_block_remap(blk_add_trace_remap);
872 unregister_trace_block_split(blk_add_trace_split);
873 unregister_trace_block_unplug_io(blk_add_trace_unplug_io);
874 unregister_trace_block_unplug_timer(blk_add_trace_unplug_timer);
875 unregister_trace_block_plug(blk_add_trace_plug);
876 unregister_trace_block_sleeprq(blk_add_trace_sleeprq);
877 unregister_trace_block_getrq(blk_add_trace_getrq);
878 unregister_trace_block_bio_queue(blk_add_trace_bio_queue);
879 unregister_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge);
880 unregister_trace_block_bio_backmerge(blk_add_trace_bio_backmerge);
881 unregister_trace_block_bio_complete(blk_add_trace_bio_complete);
882 unregister_trace_block_bio_bounce(blk_add_trace_bio_bounce);
883 unregister_trace_block_rq_complete(blk_add_trace_rq_complete);
884 unregister_trace_block_rq_requeue(blk_add_trace_rq_requeue);
885 unregister_trace_block_rq_issue(blk_add_trace_rq_issue);
886 unregister_trace_block_rq_insert(blk_add_trace_rq_insert);
887 unregister_trace_block_rq_abort(blk_add_trace_rq_abort);
888
889 tracepoint_synchronize_unregister();
890}
diff --git a/block/elevator.c b/block/elevator.c
index 9ac82dde99dd..e5677fe4f412 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -33,6 +33,7 @@
33#include <linux/compiler.h> 33#include <linux/compiler.h>
34#include <linux/delay.h> 34#include <linux/delay.h>
35#include <linux/blktrace_api.h> 35#include <linux/blktrace_api.h>
36#include <trace/block.h>
36#include <linux/hash.h> 37#include <linux/hash.h>
37#include <linux/uaccess.h> 38#include <linux/uaccess.h>
38 39
@@ -41,6 +42,8 @@
41static DEFINE_SPINLOCK(elv_list_lock); 42static DEFINE_SPINLOCK(elv_list_lock);
42static LIST_HEAD(elv_list); 43static LIST_HEAD(elv_list);
43 44
45DEFINE_TRACE(block_rq_abort);
46
44/* 47/*
45 * Merge hash stuff. 48 * Merge hash stuff.
46 */ 49 */
@@ -52,6 +55,9 @@ static const int elv_hash_shift = 6;
52#define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors) 55#define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors)
53#define ELV_ON_HASH(rq) (!hlist_unhashed(&(rq)->hash)) 56#define ELV_ON_HASH(rq) (!hlist_unhashed(&(rq)->hash))
54 57
58DEFINE_TRACE(block_rq_insert);
59DEFINE_TRACE(block_rq_issue);
60
55/* 61/*
56 * Query io scheduler to see if the current process issuing bio may be 62 * Query io scheduler to see if the current process issuing bio may be
57 * merged with rq. 63 * merged with rq.
@@ -586,7 +592,7 @@ void elv_insert(struct request_queue *q, struct request *rq, int where)
586 unsigned ordseq; 592 unsigned ordseq;
587 int unplug_it = 1; 593 int unplug_it = 1;
588 594
589 blk_add_trace_rq(q, rq, BLK_TA_INSERT); 595 trace_block_rq_insert(q, rq);
590 596
591 rq->q = q; 597 rq->q = q;
592 598
@@ -772,7 +778,7 @@ struct request *elv_next_request(struct request_queue *q)
772 * not be passed by new incoming requests 778 * not be passed by new incoming requests
773 */ 779 */
774 rq->cmd_flags |= REQ_STARTED; 780 rq->cmd_flags |= REQ_STARTED;
775 blk_add_trace_rq(q, rq, BLK_TA_ISSUE); 781 trace_block_rq_issue(q, rq);
776 } 782 }
777 783
778 if (!q->boundary_rq || q->boundary_rq == rq) { 784 if (!q->boundary_rq || q->boundary_rq == rq) {
@@ -921,7 +927,7 @@ void elv_abort_queue(struct request_queue *q)
921 while (!list_empty(&q->queue_head)) { 927 while (!list_empty(&q->queue_head)) {
922 rq = list_entry_rq(q->queue_head.next); 928 rq = list_entry_rq(q->queue_head.next);
923 rq->cmd_flags |= REQ_QUIET; 929 rq->cmd_flags |= REQ_QUIET;
924 blk_add_trace_rq(q, rq, BLK_TA_ABORT); 930 trace_block_rq_abort(q, rq);
925 __blk_end_request(rq, -EIO, blk_rq_bytes(rq)); 931 __blk_end_request(rq, -EIO, blk_rq_bytes(rq));
926 } 932 }
927} 933}
diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c
index ce0d9da52a8a..94966edfb44d 100644
--- a/drivers/char/sysrq.c
+++ b/drivers/char/sysrq.c
@@ -274,6 +274,22 @@ static struct sysrq_key_op sysrq_showstate_blocked_op = {
274 .enable_mask = SYSRQ_ENABLE_DUMP, 274 .enable_mask = SYSRQ_ENABLE_DUMP,
275}; 275};
276 276
277#ifdef CONFIG_TRACING
278#include <linux/ftrace.h>
279
280static void sysrq_ftrace_dump(int key, struct tty_struct *tty)
281{
282 ftrace_dump();
283}
284static struct sysrq_key_op sysrq_ftrace_dump_op = {
285 .handler = sysrq_ftrace_dump,
286 .help_msg = "dumpZ-ftrace-buffer",
287 .action_msg = "Dump ftrace buffer",
288 .enable_mask = SYSRQ_ENABLE_DUMP,
289};
290#else
291#define sysrq_ftrace_dump_op (*(struct sysrq_key_op *)0)
292#endif
277 293
278static void sysrq_handle_showmem(int key, struct tty_struct *tty) 294static void sysrq_handle_showmem(int key, struct tty_struct *tty)
279{ 295{
@@ -406,7 +422,7 @@ static struct sysrq_key_op *sysrq_key_table[36] = {
406 NULL, /* x */ 422 NULL, /* x */
407 /* y: May be registered on sparc64 for global register dump */ 423 /* y: May be registered on sparc64 for global register dump */
408 NULL, /* y */ 424 NULL, /* y */
409 NULL /* z */ 425 &sysrq_ftrace_dump_op, /* z */
410}; 426};
411 427
412/* key2index calculation, -1 on invalid index */ 428/* key2index calculation, -1 on invalid index */
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index c99e4728ff41..343094c3feeb 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -21,6 +21,7 @@
21#include <linux/idr.h> 21#include <linux/idr.h>
22#include <linux/hdreg.h> 22#include <linux/hdreg.h>
23#include <linux/blktrace_api.h> 23#include <linux/blktrace_api.h>
24#include <trace/block.h>
24 25
25#define DM_MSG_PREFIX "core" 26#define DM_MSG_PREFIX "core"
26 27
@@ -51,6 +52,8 @@ struct dm_target_io {
51 union map_info info; 52 union map_info info;
52}; 53};
53 54
55DEFINE_TRACE(block_bio_complete);
56
54union map_info *dm_get_mapinfo(struct bio *bio) 57union map_info *dm_get_mapinfo(struct bio *bio)
55{ 58{
56 if (bio && bio->bi_private) 59 if (bio && bio->bi_private)
@@ -504,8 +507,7 @@ static void dec_pending(struct dm_io *io, int error)
504 end_io_acct(io); 507 end_io_acct(io);
505 508
506 if (io->error != DM_ENDIO_REQUEUE) { 509 if (io->error != DM_ENDIO_REQUEUE) {
507 blk_add_trace_bio(io->md->queue, io->bio, 510 trace_block_bio_complete(io->md->queue, io->bio);
508 BLK_TA_COMPLETE);
509 511
510 bio_endio(io->bio, io->error); 512 bio_endio(io->bio, io->error);
511 } 513 }
@@ -598,7 +600,7 @@ static void __map_bio(struct dm_target *ti, struct bio *clone,
598 if (r == DM_MAPIO_REMAPPED) { 600 if (r == DM_MAPIO_REMAPPED) {
599 /* the bio has been remapped so dispatch it */ 601 /* the bio has been remapped so dispatch it */
600 602
601 blk_add_trace_remap(bdev_get_queue(clone->bi_bdev), clone, 603 trace_block_remap(bdev_get_queue(clone->bi_bdev), clone,
602 tio->io->bio->bi_bdev->bd_dev, 604 tio->io->bio->bi_bdev->bd_dev,
603 clone->bi_sector, sector); 605 clone->bi_sector, sector);
604 606
diff --git a/fs/bio.c b/fs/bio.c
index 77a55bcceedb..df99c882b807 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -26,8 +26,11 @@
26#include <linux/mempool.h> 26#include <linux/mempool.h>
27#include <linux/workqueue.h> 27#include <linux/workqueue.h>
28#include <linux/blktrace_api.h> 28#include <linux/blktrace_api.h>
29#include <trace/block.h>
29#include <scsi/sg.h> /* for struct sg_iovec */ 30#include <scsi/sg.h> /* for struct sg_iovec */
30 31
32DEFINE_TRACE(block_split);
33
31static struct kmem_cache *bio_slab __read_mostly; 34static struct kmem_cache *bio_slab __read_mostly;
32 35
33static mempool_t *bio_split_pool __read_mostly; 36static mempool_t *bio_split_pool __read_mostly;
@@ -1263,7 +1266,7 @@ struct bio_pair *bio_split(struct bio *bi, int first_sectors)
1263 if (!bp) 1266 if (!bp)
1264 return bp; 1267 return bp;
1265 1268
1266 blk_add_trace_pdu_int(bdev_get_queue(bi->bi_bdev), BLK_TA_SPLIT, bi, 1269 trace_block_split(bdev_get_queue(bi->bi_bdev), bi,
1267 bi->bi_sector + first_sectors); 1270 bi->bi_sector + first_sectors);
1268 1271
1269 BUG_ON(bi->bi_vcnt != 1); 1272 BUG_ON(bi->bi_vcnt != 1);
diff --git a/fs/seq_file.c b/fs/seq_file.c
index eba2eabcd2b8..16c211558c22 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -357,7 +357,18 @@ int seq_printf(struct seq_file *m, const char *f, ...)
357} 357}
358EXPORT_SYMBOL(seq_printf); 358EXPORT_SYMBOL(seq_printf);
359 359
360static char *mangle_path(char *s, char *p, char *esc) 360/**
361 * mangle_path - mangle and copy path to buffer beginning
362 * @s: buffer start
363 * @p: beginning of path in above buffer
364 * @esc: set of characters that need escaping
365 *
366 * Copy the path from @p to @s, replacing each occurrence of character from
367 * @esc with usual octal escape.
368 * Returns pointer past last written character in @s, or NULL in case of
369 * failure.
370 */
371char *mangle_path(char *s, char *p, char *esc)
361{ 372{
362 while (s <= p) { 373 while (s <= p) {
363 char c = *p++; 374 char c = *p++;
@@ -376,6 +387,7 @@ static char *mangle_path(char *s, char *p, char *esc)
376 } 387 }
377 return NULL; 388 return NULL;
378} 389}
390EXPORT_SYMBOL(mangle_path);
379 391
380/* 392/*
381 * return the absolute path of 'dentry' residing in mount 'mnt'. 393 * return the absolute path of 'dentry' residing in mount 'mnt'.
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 80744606bad1..eba835a2c2cd 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -45,6 +45,22 @@
45#define MCOUNT_REC() 45#define MCOUNT_REC()
46#endif 46#endif
47 47
48#ifdef CONFIG_TRACE_BRANCH_PROFILING
49#define LIKELY_PROFILE() VMLINUX_SYMBOL(__start_annotated_branch_profile) = .; \
50 *(_ftrace_annotated_branch) \
51 VMLINUX_SYMBOL(__stop_annotated_branch_profile) = .;
52#else
53#define LIKELY_PROFILE()
54#endif
55
56#ifdef CONFIG_PROFILE_ALL_BRANCHES
57#define BRANCH_PROFILE() VMLINUX_SYMBOL(__start_branch_profile) = .; \
58 *(_ftrace_branch) \
59 VMLINUX_SYMBOL(__stop_branch_profile) = .;
60#else
61#define BRANCH_PROFILE()
62#endif
63
48/* .data section */ 64/* .data section */
49#define DATA_DATA \ 65#define DATA_DATA \
50 *(.data) \ 66 *(.data) \
@@ -60,9 +76,12 @@
60 VMLINUX_SYMBOL(__start___markers) = .; \ 76 VMLINUX_SYMBOL(__start___markers) = .; \
61 *(__markers) \ 77 *(__markers) \
62 VMLINUX_SYMBOL(__stop___markers) = .; \ 78 VMLINUX_SYMBOL(__stop___markers) = .; \
79 . = ALIGN(32); \
63 VMLINUX_SYMBOL(__start___tracepoints) = .; \ 80 VMLINUX_SYMBOL(__start___tracepoints) = .; \
64 *(__tracepoints) \ 81 *(__tracepoints) \
65 VMLINUX_SYMBOL(__stop___tracepoints) = .; 82 VMLINUX_SYMBOL(__stop___tracepoints) = .; \
83 LIKELY_PROFILE() \
84 BRANCH_PROFILE()
66 85
67#define RO_DATA(align) \ 86#define RO_DATA(align) \
68 . = ALIGN((align)); \ 87 . = ALIGN((align)); \
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index bdf505d33e77..1dba3493d520 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -160,7 +160,6 @@ struct blk_trace {
160 160
161extern int blk_trace_ioctl(struct block_device *, unsigned, char __user *); 161extern int blk_trace_ioctl(struct block_device *, unsigned, char __user *);
162extern void blk_trace_shutdown(struct request_queue *); 162extern void blk_trace_shutdown(struct request_queue *);
163extern void __blk_add_trace(struct blk_trace *, sector_t, int, int, u32, int, int, void *);
164extern int do_blk_trace_setup(struct request_queue *q, 163extern int do_blk_trace_setup(struct request_queue *q,
165 char *name, dev_t dev, struct blk_user_trace_setup *buts); 164 char *name, dev_t dev, struct blk_user_trace_setup *buts);
166extern void __trace_note_message(struct blk_trace *, const char *fmt, ...); 165extern void __trace_note_message(struct blk_trace *, const char *fmt, ...);
@@ -186,168 +185,8 @@ extern void __trace_note_message(struct blk_trace *, const char *fmt, ...);
186 } while (0) 185 } while (0)
187#define BLK_TN_MAX_MSG 128 186#define BLK_TN_MAX_MSG 128
188 187
189/** 188extern void blk_add_driver_data(struct request_queue *q, struct request *rq,
190 * blk_add_trace_rq - Add a trace for a request oriented action 189 void *data, size_t len);
191 * @q: queue the io is for
192 * @rq: the source request
193 * @what: the action
194 *
195 * Description:
196 * Records an action against a request. Will log the bio offset + size.
197 *
198 **/
199static inline void blk_add_trace_rq(struct request_queue *q, struct request *rq,
200 u32 what)
201{
202 struct blk_trace *bt = q->blk_trace;
203 int rw = rq->cmd_flags & 0x03;
204
205 if (likely(!bt))
206 return;
207
208 if (blk_discard_rq(rq))
209 rw |= (1 << BIO_RW_DISCARD);
210
211 if (blk_pc_request(rq)) {
212 what |= BLK_TC_ACT(BLK_TC_PC);
213 __blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors, sizeof(rq->cmd), rq->cmd);
214 } else {
215 what |= BLK_TC_ACT(BLK_TC_FS);
216 __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9, rw, what, rq->errors, 0, NULL);
217 }
218}
219
220/**
221 * blk_add_trace_bio - Add a trace for a bio oriented action
222 * @q: queue the io is for
223 * @bio: the source bio
224 * @what: the action
225 *
226 * Description:
227 * Records an action against a bio. Will log the bio offset + size.
228 *
229 **/
230static inline void blk_add_trace_bio(struct request_queue *q, struct bio *bio,
231 u32 what)
232{
233 struct blk_trace *bt = q->blk_trace;
234
235 if (likely(!bt))
236 return;
237
238 __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what, !bio_flagged(bio, BIO_UPTODATE), 0, NULL);
239}
240
241/**
242 * blk_add_trace_generic - Add a trace for a generic action
243 * @q: queue the io is for
244 * @bio: the source bio
245 * @rw: the data direction
246 * @what: the action
247 *
248 * Description:
249 * Records a simple trace
250 *
251 **/
252static inline void blk_add_trace_generic(struct request_queue *q,
253 struct bio *bio, int rw, u32 what)
254{
255 struct blk_trace *bt = q->blk_trace;
256
257 if (likely(!bt))
258 return;
259
260 if (bio)
261 blk_add_trace_bio(q, bio, what);
262 else
263 __blk_add_trace(bt, 0, 0, rw, what, 0, 0, NULL);
264}
265
266/**
267 * blk_add_trace_pdu_int - Add a trace for a bio with an integer payload
268 * @q: queue the io is for
269 * @what: the action
270 * @bio: the source bio
271 * @pdu: the integer payload
272 *
273 * Description:
274 * Adds a trace with some integer payload. This might be an unplug
275 * option given as the action, with the depth at unplug time given
276 * as the payload
277 *
278 **/
279static inline void blk_add_trace_pdu_int(struct request_queue *q, u32 what,
280 struct bio *bio, unsigned int pdu)
281{
282 struct blk_trace *bt = q->blk_trace;
283 __be64 rpdu = cpu_to_be64(pdu);
284
285 if (likely(!bt))
286 return;
287
288 if (bio)
289 __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what, !bio_flagged(bio, BIO_UPTODATE), sizeof(rpdu), &rpdu);
290 else
291 __blk_add_trace(bt, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu);
292}
293
294/**
295 * blk_add_trace_remap - Add a trace for a remap operation
296 * @q: queue the io is for
297 * @bio: the source bio
298 * @dev: target device
299 * @from: source sector
300 * @to: target sector
301 *
302 * Description:
303 * Device mapper or raid target sometimes need to split a bio because
304 * it spans a stripe (or similar). Add a trace for that action.
305 *
306 **/
307static inline void blk_add_trace_remap(struct request_queue *q, struct bio *bio,
308 dev_t dev, sector_t from, sector_t to)
309{
310 struct blk_trace *bt = q->blk_trace;
311 struct blk_io_trace_remap r;
312
313 if (likely(!bt))
314 return;
315
316 r.device = cpu_to_be32(dev);
317 r.device_from = cpu_to_be32(bio->bi_bdev->bd_dev);
318 r.sector = cpu_to_be64(to);
319
320 __blk_add_trace(bt, from, bio->bi_size, bio->bi_rw, BLK_TA_REMAP, !bio_flagged(bio, BIO_UPTODATE), sizeof(r), &r);
321}
322
323/**
324 * blk_add_driver_data - Add binary message with driver-specific data
325 * @q: queue the io is for
326 * @rq: io request
327 * @data: driver-specific data
328 * @len: length of driver-specific data
329 *
330 * Description:
331 * Some drivers might want to write driver-specific data per request.
332 *
333 **/
334static inline void blk_add_driver_data(struct request_queue *q,
335 struct request *rq,
336 void *data, size_t len)
337{
338 struct blk_trace *bt = q->blk_trace;
339
340 if (likely(!bt))
341 return;
342
343 if (blk_pc_request(rq))
344 __blk_add_trace(bt, 0, rq->data_len, 0, BLK_TA_DRV_DATA,
345 rq->errors, len, data);
346 else
347 __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9,
348 0, BLK_TA_DRV_DATA, rq->errors, len, data);
349}
350
351extern int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, 190extern int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
352 char __user *arg); 191 char __user *arg);
353extern int blk_trace_startstop(struct request_queue *q, int start); 192extern int blk_trace_startstop(struct request_queue *q, int start);
@@ -356,13 +195,8 @@ extern int blk_trace_remove(struct request_queue *q);
356#else /* !CONFIG_BLK_DEV_IO_TRACE */ 195#else /* !CONFIG_BLK_DEV_IO_TRACE */
357#define blk_trace_ioctl(bdev, cmd, arg) (-ENOTTY) 196#define blk_trace_ioctl(bdev, cmd, arg) (-ENOTTY)
358#define blk_trace_shutdown(q) do { } while (0) 197#define blk_trace_shutdown(q) do { } while (0)
359#define blk_add_trace_rq(q, rq, what) do { } while (0)
360#define blk_add_trace_bio(q, rq, what) do { } while (0)
361#define blk_add_trace_generic(q, rq, rw, what) do { } while (0)
362#define blk_add_trace_pdu_int(q, what, bio, pdu) do { } while (0)
363#define blk_add_trace_remap(q, bio, dev, f, t) do {} while (0)
364#define blk_add_driver_data(q, rq, data, len) do {} while (0)
365#define do_blk_trace_setup(q, name, dev, buts) (-ENOTTY) 198#define do_blk_trace_setup(q, name, dev, buts) (-ENOTTY)
199#define blk_add_driver_data(q, rq, data, len) do {} while (0)
366#define blk_trace_setup(q, name, dev, arg) (-ENOTTY) 200#define blk_trace_setup(q, name, dev, arg) (-ENOTTY)
367#define blk_trace_startstop(q, start) (-ENOTTY) 201#define blk_trace_startstop(q, start) (-ENOTTY)
368#define blk_trace_remove(q) (-ENOTTY) 202#define blk_trace_remove(q) (-ENOTTY)
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 98115d9d04da..ea7c6be354b7 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -59,8 +59,88 @@ extern void __chk_io_ptr(const volatile void __iomem *);
59 * specific implementations come from the above header files 59 * specific implementations come from the above header files
60 */ 60 */
61 61
62#define likely(x) __builtin_expect(!!(x), 1) 62struct ftrace_branch_data {
63#define unlikely(x) __builtin_expect(!!(x), 0) 63 const char *func;
64 const char *file;
65 unsigned line;
66 union {
67 struct {
68 unsigned long correct;
69 unsigned long incorrect;
70 };
71 struct {
72 unsigned long miss;
73 unsigned long hit;
74 };
75 };
76};
77
78/*
79 * Note: DISABLE_BRANCH_PROFILING can be used by special lowlevel code
80 * to disable branch tracing on a per file basis.
81 */
82#if defined(CONFIG_TRACE_BRANCH_PROFILING) && !defined(DISABLE_BRANCH_PROFILING)
83void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
84
85#define likely_notrace(x) __builtin_expect(!!(x), 1)
86#define unlikely_notrace(x) __builtin_expect(!!(x), 0)
87
88#define __branch_check__(x, expect) ({ \
89 int ______r; \
90 static struct ftrace_branch_data \
91 __attribute__((__aligned__(4))) \
92 __attribute__((section("_ftrace_annotated_branch"))) \
93 ______f = { \
94 .func = __func__, \
95 .file = __FILE__, \
96 .line = __LINE__, \
97 }; \
98 ______r = likely_notrace(x); \
99 ftrace_likely_update(&______f, ______r, expect); \
100 ______r; \
101 })
102
103/*
104 * Using __builtin_constant_p(x) to ignore cases where the return
105 * value is always the same. This idea is taken from a similar patch
106 * written by Daniel Walker.
107 */
108# ifndef likely
109# define likely(x) (__builtin_constant_p(x) ? !!(x) : __branch_check__(x, 1))
110# endif
111# ifndef unlikely
112# define unlikely(x) (__builtin_constant_p(x) ? !!(x) : __branch_check__(x, 0))
113# endif
114
115#ifdef CONFIG_PROFILE_ALL_BRANCHES
116/*
117 * "Define 'is'", Bill Clinton
118 * "Define 'if'", Steven Rostedt
119 */
120#define if(cond) if (__builtin_constant_p((cond)) ? !!(cond) : \
121 ({ \
122 int ______r; \
123 static struct ftrace_branch_data \
124 __attribute__((__aligned__(4))) \
125 __attribute__((section("_ftrace_branch"))) \
126 ______f = { \
127 .func = __func__, \
128 .file = __FILE__, \
129 .line = __LINE__, \
130 }; \
131 ______r = !!(cond); \
132 if (______r) \
133 ______f.hit++; \
134 else \
135 ______f.miss++; \
136 ______r; \
137 }))
138#endif /* CONFIG_PROFILE_ALL_BRANCHES */
139
140#else
141# define likely(x) __builtin_expect(!!(x), 1)
142# define unlikely(x) __builtin_expect(!!(x), 0)
143#endif
64 144
65/* Optimization barrier */ 145/* Optimization barrier */
66#ifndef barrier 146#ifndef barrier
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 703eb53cfa2b..afba918c623c 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -23,6 +23,45 @@ struct ftrace_ops {
23 struct ftrace_ops *next; 23 struct ftrace_ops *next;
24}; 24};
25 25
26extern int function_trace_stop;
27
28/*
29 * Type of the current tracing.
30 */
31enum ftrace_tracing_type_t {
32 FTRACE_TYPE_ENTER = 0, /* Hook the call of the function */
33 FTRACE_TYPE_RETURN, /* Hook the return of the function */
34};
35
36/* Current tracing type, default is FTRACE_TYPE_ENTER */
37extern enum ftrace_tracing_type_t ftrace_tracing_type;
38
39/**
40 * ftrace_stop - stop function tracer.
41 *
42 * A quick way to stop the function tracer. Note this an on off switch,
43 * it is not something that is recursive like preempt_disable.
44 * This does not disable the calling of mcount, it only stops the
45 * calling of functions from mcount.
46 */
47static inline void ftrace_stop(void)
48{
49 function_trace_stop = 1;
50}
51
52/**
53 * ftrace_start - start the function tracer.
54 *
55 * This function is the inverse of ftrace_stop. This does not enable
56 * the function tracing if the function tracer is disabled. This only
57 * sets the function tracer flag to continue calling the functions
58 * from mcount.
59 */
60static inline void ftrace_start(void)
61{
62 function_trace_stop = 0;
63}
64
26/* 65/*
27 * The ftrace_ops must be a static and should also 66 * The ftrace_ops must be a static and should also
28 * be read_mostly. These functions do modify read_mostly variables 67 * be read_mostly. These functions do modify read_mostly variables
@@ -41,9 +80,13 @@ extern void ftrace_stub(unsigned long a0, unsigned long a1);
41# define unregister_ftrace_function(ops) do { } while (0) 80# define unregister_ftrace_function(ops) do { } while (0)
42# define clear_ftrace_function(ops) do { } while (0) 81# define clear_ftrace_function(ops) do { } while (0)
43static inline void ftrace_kill(void) { } 82static inline void ftrace_kill(void) { }
83static inline void ftrace_stop(void) { }
84static inline void ftrace_start(void) { }
44#endif /* CONFIG_FUNCTION_TRACER */ 85#endif /* CONFIG_FUNCTION_TRACER */
45 86
46#ifdef CONFIG_DYNAMIC_FTRACE 87#ifdef CONFIG_DYNAMIC_FTRACE
88/* asm/ftrace.h must be defined for archs supporting dynamic ftrace */
89#include <asm/ftrace.h>
47 90
48enum { 91enum {
49 FTRACE_FL_FREE = (1 << 0), 92 FTRACE_FL_FREE = (1 << 0),
@@ -59,6 +102,7 @@ struct dyn_ftrace {
59 struct list_head list; 102 struct list_head list;
60 unsigned long ip; /* address of mcount call-site */ 103 unsigned long ip; /* address of mcount call-site */
61 unsigned long flags; 104 unsigned long flags;
105 struct dyn_arch_ftrace arch;
62}; 106};
63 107
64int ftrace_force_update(void); 108int ftrace_force_update(void);
@@ -66,19 +110,48 @@ void ftrace_set_filter(unsigned char *buf, int len, int reset);
66 110
67/* defined in arch */ 111/* defined in arch */
68extern int ftrace_ip_converted(unsigned long ip); 112extern int ftrace_ip_converted(unsigned long ip);
69extern unsigned char *ftrace_nop_replace(void);
70extern unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr);
71extern int ftrace_dyn_arch_init(void *data); 113extern int ftrace_dyn_arch_init(void *data);
72extern int ftrace_update_ftrace_func(ftrace_func_t func); 114extern int ftrace_update_ftrace_func(ftrace_func_t func);
73extern void ftrace_caller(void); 115extern void ftrace_caller(void);
74extern void ftrace_call(void); 116extern void ftrace_call(void);
75extern void mcount_call(void); 117extern void mcount_call(void);
118#ifdef CONFIG_FUNCTION_GRAPH_TRACER
119extern void ftrace_graph_caller(void);
120extern int ftrace_enable_ftrace_graph_caller(void);
121extern int ftrace_disable_ftrace_graph_caller(void);
122#else
123static inline int ftrace_enable_ftrace_graph_caller(void) { return 0; }
124static inline int ftrace_disable_ftrace_graph_caller(void) { return 0; }
125#endif
126
127/**
128 * ftrace_make_nop - convert code into top
129 * @mod: module structure if called by module load initialization
130 * @rec: the mcount call site record
131 * @addr: the address that the call site should be calling
132 *
133 * This is a very sensitive operation and great care needs
134 * to be taken by the arch. The operation should carefully
135 * read the location, check to see if what is read is indeed
136 * what we expect it to be, and then on success of the compare,
137 * it should write to the location.
138 *
139 * The code segment at @rec->ip should be a caller to @addr
140 *
141 * Return must be:
142 * 0 on success
143 * -EFAULT on error reading the location
144 * -EINVAL on a failed compare of the contents
145 * -EPERM on error writing to the location
146 * Any other value will be considered a failure.
147 */
148extern int ftrace_make_nop(struct module *mod,
149 struct dyn_ftrace *rec, unsigned long addr);
76 150
77/** 151/**
78 * ftrace_modify_code - modify code segment 152 * ftrace_make_call - convert a nop call site into a call to addr
79 * @ip: the address of the code segment 153 * @rec: the mcount call site record
80 * @old_code: the contents of what is expected to be there 154 * @addr: the address that the call site should call
81 * @new_code: the code to patch in
82 * 155 *
83 * This is a very sensitive operation and great care needs 156 * This is a very sensitive operation and great care needs
84 * to be taken by the arch. The operation should carefully 157 * to be taken by the arch. The operation should carefully
@@ -86,6 +159,8 @@ extern void mcount_call(void);
86 * what we expect it to be, and then on success of the compare, 159 * what we expect it to be, and then on success of the compare,
87 * it should write to the location. 160 * it should write to the location.
88 * 161 *
162 * The code segment at @rec->ip should be a nop
163 *
89 * Return must be: 164 * Return must be:
90 * 0 on success 165 * 0 on success
91 * -EFAULT on error reading the location 166 * -EFAULT on error reading the location
@@ -93,8 +168,11 @@ extern void mcount_call(void);
93 * -EPERM on error writing to the location 168 * -EPERM on error writing to the location
94 * Any other value will be considered a failure. 169 * Any other value will be considered a failure.
95 */ 170 */
96extern int ftrace_modify_code(unsigned long ip, unsigned char *old_code, 171extern int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr);
97 unsigned char *new_code); 172
173
174/* May be defined in arch */
175extern int ftrace_arch_read_dyn_info(char *buf, int size);
98 176
99extern int skip_trace(unsigned long ip); 177extern int skip_trace(unsigned long ip);
100 178
@@ -102,7 +180,6 @@ extern void ftrace_release(void *start, unsigned long size);
102 180
103extern void ftrace_disable_daemon(void); 181extern void ftrace_disable_daemon(void);
104extern void ftrace_enable_daemon(void); 182extern void ftrace_enable_daemon(void);
105
106#else 183#else
107# define skip_trace(ip) ({ 0; }) 184# define skip_trace(ip) ({ 0; })
108# define ftrace_force_update() ({ 0; }) 185# define ftrace_force_update() ({ 0; })
@@ -181,6 +258,12 @@ static inline void __ftrace_enabled_restore(int enabled)
181#endif 258#endif
182 259
183#ifdef CONFIG_TRACING 260#ifdef CONFIG_TRACING
261extern int ftrace_dump_on_oops;
262
263extern void tracing_start(void);
264extern void tracing_stop(void);
265extern void ftrace_off_permanent(void);
266
184extern void 267extern void
185ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3); 268ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3);
186 269
@@ -211,6 +294,9 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { }
211static inline int 294static inline int
212ftrace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 0))); 295ftrace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 0)));
213 296
297static inline void tracing_start(void) { }
298static inline void tracing_stop(void) { }
299static inline void ftrace_off_permanent(void) { }
214static inline int 300static inline int
215ftrace_printk(const char *fmt, ...) 301ftrace_printk(const char *fmt, ...)
216{ 302{
@@ -221,33 +307,86 @@ static inline void ftrace_dump(void) { }
221 307
222#ifdef CONFIG_FTRACE_MCOUNT_RECORD 308#ifdef CONFIG_FTRACE_MCOUNT_RECORD
223extern void ftrace_init(void); 309extern void ftrace_init(void);
224extern void ftrace_init_module(unsigned long *start, unsigned long *end); 310extern void ftrace_init_module(struct module *mod,
311 unsigned long *start, unsigned long *end);
225#else 312#else
226static inline void ftrace_init(void) { } 313static inline void ftrace_init(void) { }
227static inline void 314static inline void
228ftrace_init_module(unsigned long *start, unsigned long *end) { } 315ftrace_init_module(struct module *mod,
316 unsigned long *start, unsigned long *end) { }
229#endif 317#endif
230 318
319enum {
320 POWER_NONE = 0,
321 POWER_CSTATE = 1,
322 POWER_PSTATE = 2,
323};
231 324
232struct boot_trace { 325struct power_trace {
233 pid_t caller; 326#ifdef CONFIG_POWER_TRACER
234 char func[KSYM_NAME_LEN]; 327 ktime_t stamp;
235 int result; 328 ktime_t end;
236 unsigned long long duration; /* usecs */ 329 int type;
237 ktime_t calltime; 330 int state;
238 ktime_t rettime; 331#endif
239}; 332};
240 333
241#ifdef CONFIG_BOOT_TRACER 334#ifdef CONFIG_POWER_TRACER
242extern void trace_boot(struct boot_trace *it, initcall_t fn); 335extern void trace_power_start(struct power_trace *it, unsigned int type,
243extern void start_boot_trace(void); 336 unsigned int state);
244extern void stop_boot_trace(void); 337extern void trace_power_mark(struct power_trace *it, unsigned int type,
338 unsigned int state);
339extern void trace_power_end(struct power_trace *it);
245#else 340#else
246static inline void trace_boot(struct boot_trace *it, initcall_t fn) { } 341static inline void trace_power_start(struct power_trace *it, unsigned int type,
247static inline void start_boot_trace(void) { } 342 unsigned int state) { }
248static inline void stop_boot_trace(void) { } 343static inline void trace_power_mark(struct power_trace *it, unsigned int type,
344 unsigned int state) { }
345static inline void trace_power_end(struct power_trace *it) { }
249#endif 346#endif
250 347
251 348
349/*
350 * Structure that defines an entry function trace.
351 */
352struct ftrace_graph_ent {
353 unsigned long func; /* Current function */
354 int depth;
355};
356
357/*
358 * Structure that defines a return function trace.
359 */
360struct ftrace_graph_ret {
361 unsigned long func; /* Current function */
362 unsigned long long calltime;
363 unsigned long long rettime;
364 /* Number of functions that overran the depth limit for current task */
365 unsigned long overrun;
366 int depth;
367};
368
369#ifdef CONFIG_FUNCTION_GRAPH_TRACER
370#define FTRACE_RETFUNC_DEPTH 50
371#define FTRACE_RETSTACK_ALLOC_SIZE 32
372/* Type of the callback handlers for tracing function graph*/
373typedef void (*trace_func_graph_ret_t)(struct ftrace_graph_ret *); /* return */
374typedef void (*trace_func_graph_ent_t)(struct ftrace_graph_ent *); /* entry */
375
376extern int register_ftrace_graph(trace_func_graph_ret_t retfunc,
377 trace_func_graph_ent_t entryfunc);
378
379/* The current handlers in use */
380extern trace_func_graph_ret_t ftrace_graph_return;
381extern trace_func_graph_ent_t ftrace_graph_entry;
382
383extern void unregister_ftrace_graph(void);
384
385extern void ftrace_graph_init_task(struct task_struct *t);
386extern void ftrace_graph_exit_task(struct task_struct *t);
387#else
388static inline void ftrace_graph_init_task(struct task_struct *t) { }
389static inline void ftrace_graph_exit_task(struct task_struct *t) { }
390#endif
252 391
253#endif /* _LINUX_FTRACE_H */ 392#endif /* _LINUX_FTRACE_H */
diff --git a/include/linux/ftrace_irq.h b/include/linux/ftrace_irq.h
new file mode 100644
index 000000000000..366a054d0b05
--- /dev/null
+++ b/include/linux/ftrace_irq.h
@@ -0,0 +1,13 @@
1#ifndef _LINUX_FTRACE_IRQ_H
2#define _LINUX_FTRACE_IRQ_H
3
4
5#if defined(CONFIG_DYNAMIC_FTRACE) || defined(CONFIG_FUNCTION_GRAPH_TRACER)
6extern void ftrace_nmi_enter(void);
7extern void ftrace_nmi_exit(void);
8#else
9static inline void ftrace_nmi_enter(void) { }
10static inline void ftrace_nmi_exit(void) { }
11#endif
12
13#endif /* _LINUX_FTRACE_IRQ_H */
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index 181006cc94a0..89a56d79e4c6 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -4,6 +4,7 @@
4#include <linux/preempt.h> 4#include <linux/preempt.h>
5#include <linux/smp_lock.h> 5#include <linux/smp_lock.h>
6#include <linux/lockdep.h> 6#include <linux/lockdep.h>
7#include <linux/ftrace_irq.h>
7#include <asm/hardirq.h> 8#include <asm/hardirq.h>
8#include <asm/system.h> 9#include <asm/system.h>
9 10
@@ -161,7 +162,17 @@ extern void irq_enter(void);
161 */ 162 */
162extern void irq_exit(void); 163extern void irq_exit(void);
163 164
164#define nmi_enter() do { lockdep_off(); __irq_enter(); } while (0) 165#define nmi_enter() \
165#define nmi_exit() do { __irq_exit(); lockdep_on(); } while (0) 166 do { \
167 ftrace_nmi_enter(); \
168 lockdep_off(); \
169 __irq_enter(); \
170 } while (0)
171#define nmi_exit() \
172 do { \
173 __irq_exit(); \
174 lockdep_on(); \
175 ftrace_nmi_exit(); \
176 } while (0)
166 177
167#endif /* LINUX_HARDIRQ_H */ 178#endif /* LINUX_HARDIRQ_H */
diff --git a/include/linux/marker.h b/include/linux/marker.h
index 889196c7fbb1..b85e74ca782f 100644
--- a/include/linux/marker.h
+++ b/include/linux/marker.h
@@ -12,6 +12,7 @@
12 * See the file COPYING for more details. 12 * See the file COPYING for more details.
13 */ 13 */
14 14
15#include <stdarg.h>
15#include <linux/types.h> 16#include <linux/types.h>
16 17
17struct module; 18struct module;
@@ -48,10 +49,28 @@ struct marker {
48 void (*call)(const struct marker *mdata, void *call_private, ...); 49 void (*call)(const struct marker *mdata, void *call_private, ...);
49 struct marker_probe_closure single; 50 struct marker_probe_closure single;
50 struct marker_probe_closure *multi; 51 struct marker_probe_closure *multi;
52 const char *tp_name; /* Optional tracepoint name */
53 void *tp_cb; /* Optional tracepoint callback */
51} __attribute__((aligned(8))); 54} __attribute__((aligned(8)));
52 55
53#ifdef CONFIG_MARKERS 56#ifdef CONFIG_MARKERS
54 57
58#define _DEFINE_MARKER(name, tp_name_str, tp_cb, format) \
59 static const char __mstrtab_##name[] \
60 __attribute__((section("__markers_strings"))) \
61 = #name "\0" format; \
62 static struct marker __mark_##name \
63 __attribute__((section("__markers"), aligned(8))) = \
64 { __mstrtab_##name, &__mstrtab_##name[sizeof(#name)], \
65 0, 0, marker_probe_cb, { __mark_empty_function, NULL},\
66 NULL, tp_name_str, tp_cb }
67
68#define DEFINE_MARKER(name, format) \
69 _DEFINE_MARKER(name, NULL, NULL, format)
70
71#define DEFINE_MARKER_TP(name, tp_name, tp_cb, format) \
72 _DEFINE_MARKER(name, #tp_name, tp_cb, format)
73
55/* 74/*
56 * Note : the empty asm volatile with read constraint is used here instead of a 75 * Note : the empty asm volatile with read constraint is used here instead of a
57 * "used" attribute to fix a gcc 4.1.x bug. 76 * "used" attribute to fix a gcc 4.1.x bug.
@@ -65,14 +84,7 @@ struct marker {
65 */ 84 */
66#define __trace_mark(generic, name, call_private, format, args...) \ 85#define __trace_mark(generic, name, call_private, format, args...) \
67 do { \ 86 do { \
68 static const char __mstrtab_##name[] \ 87 DEFINE_MARKER(name, format); \
69 __attribute__((section("__markers_strings"))) \
70 = #name "\0" format; \
71 static struct marker __mark_##name \
72 __attribute__((section("__markers"), aligned(8))) = \
73 { __mstrtab_##name, &__mstrtab_##name[sizeof(#name)], \
74 0, 0, marker_probe_cb, \
75 { __mark_empty_function, NULL}, NULL }; \
76 __mark_check_format(format, ## args); \ 88 __mark_check_format(format, ## args); \
77 if (unlikely(__mark_##name.state)) { \ 89 if (unlikely(__mark_##name.state)) { \
78 (*__mark_##name.call) \ 90 (*__mark_##name.call) \
@@ -80,14 +92,39 @@ struct marker {
80 } \ 92 } \
81 } while (0) 93 } while (0)
82 94
95#define __trace_mark_tp(name, call_private, tp_name, tp_cb, format, args...) \
96 do { \
97 void __check_tp_type(void) \
98 { \
99 register_trace_##tp_name(tp_cb); \
100 } \
101 DEFINE_MARKER_TP(name, tp_name, tp_cb, format); \
102 __mark_check_format(format, ## args); \
103 (*__mark_##name.call)(&__mark_##name, call_private, \
104 ## args); \
105 } while (0)
106
83extern void marker_update_probe_range(struct marker *begin, 107extern void marker_update_probe_range(struct marker *begin,
84 struct marker *end); 108 struct marker *end);
109
110#define GET_MARKER(name) (__mark_##name)
111
85#else /* !CONFIG_MARKERS */ 112#else /* !CONFIG_MARKERS */
113#define DEFINE_MARKER(name, tp_name, tp_cb, format)
86#define __trace_mark(generic, name, call_private, format, args...) \ 114#define __trace_mark(generic, name, call_private, format, args...) \
87 __mark_check_format(format, ## args) 115 __mark_check_format(format, ## args)
116#define __trace_mark_tp(name, call_private, tp_name, tp_cb, format, args...) \
117 do { \
118 void __check_tp_type(void) \
119 { \
120 register_trace_##tp_name(tp_cb); \
121 } \
122 __mark_check_format(format, ## args); \
123 } while (0)
88static inline void marker_update_probe_range(struct marker *begin, 124static inline void marker_update_probe_range(struct marker *begin,
89 struct marker *end) 125 struct marker *end)
90{ } 126{ }
127#define GET_MARKER(name)
91#endif /* CONFIG_MARKERS */ 128#endif /* CONFIG_MARKERS */
92 129
93/** 130/**
@@ -117,6 +154,20 @@ static inline void marker_update_probe_range(struct marker *begin,
117 __trace_mark(1, name, NULL, format, ## args) 154 __trace_mark(1, name, NULL, format, ## args)
118 155
119/** 156/**
157 * trace_mark_tp - Marker in a tracepoint callback
158 * @name: marker name, not quoted.
159 * @tp_name: tracepoint name, not quoted.
160 * @tp_cb: tracepoint callback. Should have an associated global symbol so it
161 * is not optimized away by the compiler (should not be static).
162 * @format: format string
163 * @args...: variable argument list
164 *
165 * Places a marker in a tracepoint callback.
166 */
167#define trace_mark_tp(name, tp_name, tp_cb, format, args...) \
168 __trace_mark_tp(name, NULL, tp_name, tp_cb, format, ## args)
169
170/**
120 * MARK_NOARGS - Format string for a marker with no argument. 171 * MARK_NOARGS - Format string for a marker with no argument.
121 */ 172 */
122#define MARK_NOARGS " " 173#define MARK_NOARGS " "
@@ -136,8 +187,6 @@ extern marker_probe_func __mark_empty_function;
136 187
137extern void marker_probe_cb(const struct marker *mdata, 188extern void marker_probe_cb(const struct marker *mdata,
138 void *call_private, ...); 189 void *call_private, ...);
139extern void marker_probe_cb_noarg(const struct marker *mdata,
140 void *call_private, ...);
141 190
142/* 191/*
143 * Connect a probe to a marker. 192 * Connect a probe to a marker.
@@ -162,8 +211,10 @@ extern void *marker_get_private_data(const char *name, marker_probe_func *probe,
162 211
163/* 212/*
164 * marker_synchronize_unregister must be called between the last marker probe 213 * marker_synchronize_unregister must be called between the last marker probe
165 * unregistration and the end of module exit to make sure there is no caller 214 * unregistration and the first one of
166 * executing a probe when it is freed. 215 * - the end of module exit function
216 * - the free of any resource used by the probes
217 * to ensure the code and data are valid for any possibly running probes.
167 */ 218 */
168#define marker_synchronize_unregister() synchronize_sched() 219#define marker_synchronize_unregister() synchronize_sched()
169 220
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 86f1f5e43e33..895dc9c1088c 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -142,6 +142,7 @@ struct rcu_head {
142 * on the write-side to insure proper synchronization. 142 * on the write-side to insure proper synchronization.
143 */ 143 */
144#define rcu_read_lock_sched() preempt_disable() 144#define rcu_read_lock_sched() preempt_disable()
145#define rcu_read_lock_sched_notrace() preempt_disable_notrace()
145 146
146/* 147/*
147 * rcu_read_unlock_sched - marks the end of a RCU-classic critical section 148 * rcu_read_unlock_sched - marks the end of a RCU-classic critical section
@@ -149,6 +150,7 @@ struct rcu_head {
149 * See rcu_read_lock_sched for more information. 150 * See rcu_read_lock_sched for more information.
150 */ 151 */
151#define rcu_read_unlock_sched() preempt_enable() 152#define rcu_read_unlock_sched() preempt_enable()
153#define rcu_read_unlock_sched_notrace() preempt_enable_notrace()
152 154
153 155
154 156
diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index e097c2e6b6dc..3bb87a753fa3 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -122,6 +122,7 @@ void ring_buffer_normalize_time_stamp(int cpu, u64 *ts);
122 122
123void tracing_on(void); 123void tracing_on(void);
124void tracing_off(void); 124void tracing_off(void);
125void tracing_off_permanent(void);
125 126
126enum ring_buffer_flags { 127enum ring_buffer_flags {
127 RB_FL_OVERWRITE = 1 << 0, 128 RB_FL_OVERWRITE = 1 << 0,
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 55e30d114477..2d0a93c31228 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -96,6 +96,7 @@ struct exec_domain;
96struct futex_pi_state; 96struct futex_pi_state;
97struct robust_list_head; 97struct robust_list_head;
98struct bio; 98struct bio;
99struct bts_tracer;
99 100
100/* 101/*
101 * List of flags we want to share for kernel threads, 102 * List of flags we want to share for kernel threads,
@@ -1165,6 +1166,18 @@ struct task_struct {
1165 struct list_head ptraced; 1166 struct list_head ptraced;
1166 struct list_head ptrace_entry; 1167 struct list_head ptrace_entry;
1167 1168
1169#ifdef CONFIG_X86_PTRACE_BTS
1170 /*
1171 * This is the tracer handle for the ptrace BTS extension.
1172 * This field actually belongs to the ptracer task.
1173 */
1174 struct bts_tracer *bts;
1175 /*
1176 * The buffer to hold the BTS data.
1177 */
1178 void *bts_buffer;
1179#endif /* CONFIG_X86_PTRACE_BTS */
1180
1168 /* PID/PID hash table linkage. */ 1181 /* PID/PID hash table linkage. */
1169 struct pid_link pids[PIDTYPE_MAX]; 1182 struct pid_link pids[PIDTYPE_MAX];
1170 struct list_head thread_group; 1183 struct list_head thread_group;
@@ -1356,6 +1369,17 @@ struct task_struct {
1356 unsigned long default_timer_slack_ns; 1369 unsigned long default_timer_slack_ns;
1357 1370
1358 struct list_head *scm_work_list; 1371 struct list_head *scm_work_list;
1372#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1373 /* Index of current stored adress in ret_stack */
1374 int curr_ret_stack;
1375 /* Stack of return addresses for return function tracing */
1376 struct ftrace_ret_stack *ret_stack;
1377 /*
1378 * Number of functions that haven't been traced
1379 * because of depth overrun.
1380 */
1381 atomic_t trace_overrun;
1382#endif
1359}; 1383};
1360 1384
1361/* 1385/*
diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
index dc50bcc282a8..b3dfa72f13b9 100644
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -34,6 +34,7 @@ struct seq_operations {
34 34
35#define SEQ_SKIP 1 35#define SEQ_SKIP 1
36 36
37char *mangle_path(char *s, char *p, char *esc);
37int seq_open(struct file *, const struct seq_operations *); 38int seq_open(struct file *, const struct seq_operations *);
38ssize_t seq_read(struct file *, char __user *, size_t, loff_t *); 39ssize_t seq_read(struct file *, char __user *, size_t, loff_t *);
39loff_t seq_lseek(struct file *, loff_t, int); 40loff_t seq_lseek(struct file *, loff_t, int);
diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h
index b106fd8e0d5c..1a8cecc4f38c 100644
--- a/include/linux/stacktrace.h
+++ b/include/linux/stacktrace.h
@@ -15,9 +15,17 @@ extern void save_stack_trace_tsk(struct task_struct *tsk,
15 struct stack_trace *trace); 15 struct stack_trace *trace);
16 16
17extern void print_stack_trace(struct stack_trace *trace, int spaces); 17extern void print_stack_trace(struct stack_trace *trace, int spaces);
18
19#ifdef CONFIG_USER_STACKTRACE_SUPPORT
20extern void save_stack_trace_user(struct stack_trace *trace);
21#else
22# define save_stack_trace_user(trace) do { } while (0)
23#endif
24
18#else 25#else
19# define save_stack_trace(trace) do { } while (0) 26# define save_stack_trace(trace) do { } while (0)
20# define save_stack_trace_tsk(tsk, trace) do { } while (0) 27# define save_stack_trace_tsk(tsk, trace) do { } while (0)
28# define save_stack_trace_user(trace) do { } while (0)
21# define print_stack_trace(trace, spaces) do { } while (0) 29# define print_stack_trace(trace, spaces) do { } while (0)
22#endif 30#endif
23 31
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index c5bb39c7a770..757005458366 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -24,8 +24,12 @@ struct tracepoint {
24 const char *name; /* Tracepoint name */ 24 const char *name; /* Tracepoint name */
25 int state; /* State. */ 25 int state; /* State. */
26 void **funcs; 26 void **funcs;
27} __attribute__((aligned(8))); 27} __attribute__((aligned(32))); /*
28 28 * Aligned on 32 bytes because it is
29 * globally visible and gcc happily
30 * align these on the structure size.
31 * Keep in sync with vmlinux.lds.h.
32 */
29 33
30#define TPPROTO(args...) args 34#define TPPROTO(args...) args
31#define TPARGS(args...) args 35#define TPARGS(args...) args
@@ -40,14 +44,14 @@ struct tracepoint {
40 do { \ 44 do { \
41 void **it_func; \ 45 void **it_func; \
42 \ 46 \
43 rcu_read_lock_sched(); \ 47 rcu_read_lock_sched_notrace(); \
44 it_func = rcu_dereference((tp)->funcs); \ 48 it_func = rcu_dereference((tp)->funcs); \
45 if (it_func) { \ 49 if (it_func) { \
46 do { \ 50 do { \
47 ((void(*)(proto))(*it_func))(args); \ 51 ((void(*)(proto))(*it_func))(args); \
48 } while (*(++it_func)); \ 52 } while (*(++it_func)); \
49 } \ 53 } \
50 rcu_read_unlock_sched(); \ 54 rcu_read_unlock_sched_notrace(); \
51 } while (0) 55 } while (0)
52 56
53/* 57/*
@@ -55,35 +59,40 @@ struct tracepoint {
55 * not add unwanted padding between the beginning of the section and the 59 * not add unwanted padding between the beginning of the section and the
56 * structure. Force alignment to the same alignment as the section start. 60 * structure. Force alignment to the same alignment as the section start.
57 */ 61 */
58#define DEFINE_TRACE(name, proto, args) \ 62#define DECLARE_TRACE(name, proto, args) \
63 extern struct tracepoint __tracepoint_##name; \
59 static inline void trace_##name(proto) \ 64 static inline void trace_##name(proto) \
60 { \ 65 { \
61 static const char __tpstrtab_##name[] \
62 __attribute__((section("__tracepoints_strings"))) \
63 = #name ":" #proto; \
64 static struct tracepoint __tracepoint_##name \
65 __attribute__((section("__tracepoints"), aligned(8))) = \
66 { __tpstrtab_##name, 0, NULL }; \
67 if (unlikely(__tracepoint_##name.state)) \ 66 if (unlikely(__tracepoint_##name.state)) \
68 __DO_TRACE(&__tracepoint_##name, \ 67 __DO_TRACE(&__tracepoint_##name, \
69 TPPROTO(proto), TPARGS(args)); \ 68 TPPROTO(proto), TPARGS(args)); \
70 } \ 69 } \
71 static inline int register_trace_##name(void (*probe)(proto)) \ 70 static inline int register_trace_##name(void (*probe)(proto)) \
72 { \ 71 { \
73 return tracepoint_probe_register(#name ":" #proto, \ 72 return tracepoint_probe_register(#name, (void *)probe); \
74 (void *)probe); \
75 } \ 73 } \
76 static inline void unregister_trace_##name(void (*probe)(proto))\ 74 static inline int unregister_trace_##name(void (*probe)(proto)) \
77 { \ 75 { \
78 tracepoint_probe_unregister(#name ":" #proto, \ 76 return tracepoint_probe_unregister(#name, (void *)probe);\
79 (void *)probe); \
80 } 77 }
81 78
79#define DEFINE_TRACE(name) \
80 static const char __tpstrtab_##name[] \
81 __attribute__((section("__tracepoints_strings"))) = #name; \
82 struct tracepoint __tracepoint_##name \
83 __attribute__((section("__tracepoints"), aligned(32))) = \
84 { __tpstrtab_##name, 0, NULL }
85
86#define EXPORT_TRACEPOINT_SYMBOL_GPL(name) \
87 EXPORT_SYMBOL_GPL(__tracepoint_##name)
88#define EXPORT_TRACEPOINT_SYMBOL(name) \
89 EXPORT_SYMBOL(__tracepoint_##name)
90
82extern void tracepoint_update_probe_range(struct tracepoint *begin, 91extern void tracepoint_update_probe_range(struct tracepoint *begin,
83 struct tracepoint *end); 92 struct tracepoint *end);
84 93
85#else /* !CONFIG_TRACEPOINTS */ 94#else /* !CONFIG_TRACEPOINTS */
86#define DEFINE_TRACE(name, proto, args) \ 95#define DECLARE_TRACE(name, proto, args) \
87 static inline void _do_trace_##name(struct tracepoint *tp, proto) \ 96 static inline void _do_trace_##name(struct tracepoint *tp, proto) \
88 { } \ 97 { } \
89 static inline void trace_##name(proto) \ 98 static inline void trace_##name(proto) \
@@ -92,8 +101,14 @@ extern void tracepoint_update_probe_range(struct tracepoint *begin,
92 { \ 101 { \
93 return -ENOSYS; \ 102 return -ENOSYS; \
94 } \ 103 } \
95 static inline void unregister_trace_##name(void (*probe)(proto))\ 104 static inline int unregister_trace_##name(void (*probe)(proto)) \
96 { } 105 { \
106 return -ENOSYS; \
107 }
108
109#define DEFINE_TRACE(name)
110#define EXPORT_TRACEPOINT_SYMBOL_GPL(name)
111#define EXPORT_TRACEPOINT_SYMBOL(name)
97 112
98static inline void tracepoint_update_probe_range(struct tracepoint *begin, 113static inline void tracepoint_update_probe_range(struct tracepoint *begin,
99 struct tracepoint *end) 114 struct tracepoint *end)
@@ -112,6 +127,10 @@ extern int tracepoint_probe_register(const char *name, void *probe);
112 */ 127 */
113extern int tracepoint_probe_unregister(const char *name, void *probe); 128extern int tracepoint_probe_unregister(const char *name, void *probe);
114 129
130extern int tracepoint_probe_register_noupdate(const char *name, void *probe);
131extern int tracepoint_probe_unregister_noupdate(const char *name, void *probe);
132extern void tracepoint_probe_update_all(void);
133
115struct tracepoint_iter { 134struct tracepoint_iter {
116 struct module *module; 135 struct module *module;
117 struct tracepoint *tracepoint; 136 struct tracepoint *tracepoint;
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 3b8121d4e36f..eaec37c9d83d 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -325,7 +325,7 @@ extern struct class *tty_class;
325 * go away 325 * go away
326 */ 326 */
327 327
328extern inline struct tty_struct *tty_kref_get(struct tty_struct *tty) 328static inline struct tty_struct *tty_kref_get(struct tty_struct *tty)
329{ 329{
330 if (tty) 330 if (tty)
331 kref_get(&tty->kref); 331 kref_get(&tty->kref);
diff --git a/include/trace/block.h b/include/trace/block.h
new file mode 100644
index 000000000000..25c6a1fd5b77
--- /dev/null
+++ b/include/trace/block.h
@@ -0,0 +1,76 @@
1#ifndef _TRACE_BLOCK_H
2#define _TRACE_BLOCK_H
3
4#include <linux/blkdev.h>
5#include <linux/tracepoint.h>
6
7DECLARE_TRACE(block_rq_abort,
8 TPPROTO(struct request_queue *q, struct request *rq),
9 TPARGS(q, rq));
10
11DECLARE_TRACE(block_rq_insert,
12 TPPROTO(struct request_queue *q, struct request *rq),
13 TPARGS(q, rq));
14
15DECLARE_TRACE(block_rq_issue,
16 TPPROTO(struct request_queue *q, struct request *rq),
17 TPARGS(q, rq));
18
19DECLARE_TRACE(block_rq_requeue,
20 TPPROTO(struct request_queue *q, struct request *rq),
21 TPARGS(q, rq));
22
23DECLARE_TRACE(block_rq_complete,
24 TPPROTO(struct request_queue *q, struct request *rq),
25 TPARGS(q, rq));
26
27DECLARE_TRACE(block_bio_bounce,
28 TPPROTO(struct request_queue *q, struct bio *bio),
29 TPARGS(q, bio));
30
31DECLARE_TRACE(block_bio_complete,
32 TPPROTO(struct request_queue *q, struct bio *bio),
33 TPARGS(q, bio));
34
35DECLARE_TRACE(block_bio_backmerge,
36 TPPROTO(struct request_queue *q, struct bio *bio),
37 TPARGS(q, bio));
38
39DECLARE_TRACE(block_bio_frontmerge,
40 TPPROTO(struct request_queue *q, struct bio *bio),
41 TPARGS(q, bio));
42
43DECLARE_TRACE(block_bio_queue,
44 TPPROTO(struct request_queue *q, struct bio *bio),
45 TPARGS(q, bio));
46
47DECLARE_TRACE(block_getrq,
48 TPPROTO(struct request_queue *q, struct bio *bio, int rw),
49 TPARGS(q, bio, rw));
50
51DECLARE_TRACE(block_sleeprq,
52 TPPROTO(struct request_queue *q, struct bio *bio, int rw),
53 TPARGS(q, bio, rw));
54
55DECLARE_TRACE(block_plug,
56 TPPROTO(struct request_queue *q),
57 TPARGS(q));
58
59DECLARE_TRACE(block_unplug_timer,
60 TPPROTO(struct request_queue *q),
61 TPARGS(q));
62
63DECLARE_TRACE(block_unplug_io,
64 TPPROTO(struct request_queue *q),
65 TPARGS(q));
66
67DECLARE_TRACE(block_split,
68 TPPROTO(struct request_queue *q, struct bio *bio, unsigned int pdu),
69 TPARGS(q, bio, pdu));
70
71DECLARE_TRACE(block_remap,
72 TPPROTO(struct request_queue *q, struct bio *bio, dev_t dev,
73 sector_t from, sector_t to),
74 TPARGS(q, bio, dev, from, to));
75
76#endif
diff --git a/include/trace/boot.h b/include/trace/boot.h
new file mode 100644
index 000000000000..6b54537eab02
--- /dev/null
+++ b/include/trace/boot.h
@@ -0,0 +1,56 @@
1#ifndef _LINUX_TRACE_BOOT_H
2#define _LINUX_TRACE_BOOT_H
3
4/*
5 * Structure which defines the trace of an initcall
6 * while it is called.
7 * You don't have to fill the func field since it is
8 * only used internally by the tracer.
9 */
10struct boot_trace_call {
11 pid_t caller;
12 char func[KSYM_NAME_LEN];
13};
14
15/*
16 * Structure which defines the trace of an initcall
17 * while it returns.
18 */
19struct boot_trace_ret {
20 char func[KSYM_NAME_LEN];
21 int result;
22 unsigned long long duration; /* nsecs */
23};
24
25#ifdef CONFIG_BOOT_TRACER
26/* Append the traces on the ring-buffer */
27extern void trace_boot_call(struct boot_trace_call *bt, initcall_t fn);
28extern void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn);
29
30/* Tells the tracer that smp_pre_initcall is finished.
31 * So we can start the tracing
32 */
33extern void start_boot_trace(void);
34
35/* Resume the tracing of other necessary events
36 * such as sched switches
37 */
38extern void enable_boot_trace(void);
39
40/* Suspend this tracing. Actually, only sched_switches tracing have
41 * to be suspended. Initcalls doesn't need it.)
42 */
43extern void disable_boot_trace(void);
44#else
45static inline
46void trace_boot_call(struct boot_trace_call *bt, initcall_t fn) { }
47
48static inline
49void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn) { }
50
51static inline void start_boot_trace(void) { }
52static inline void enable_boot_trace(void) { }
53static inline void disable_boot_trace(void) { }
54#endif /* CONFIG_BOOT_TRACER */
55
56#endif /* __LINUX_TRACE_BOOT_H */
diff --git a/include/trace/sched.h b/include/trace/sched.h
index ad47369d01b5..9b2854abf7e2 100644
--- a/include/trace/sched.h
+++ b/include/trace/sched.h
@@ -4,52 +4,52 @@
4#include <linux/sched.h> 4#include <linux/sched.h>
5#include <linux/tracepoint.h> 5#include <linux/tracepoint.h>
6 6
7DEFINE_TRACE(sched_kthread_stop, 7DECLARE_TRACE(sched_kthread_stop,
8 TPPROTO(struct task_struct *t), 8 TPPROTO(struct task_struct *t),
9 TPARGS(t)); 9 TPARGS(t));
10 10
11DEFINE_TRACE(sched_kthread_stop_ret, 11DECLARE_TRACE(sched_kthread_stop_ret,
12 TPPROTO(int ret), 12 TPPROTO(int ret),
13 TPARGS(ret)); 13 TPARGS(ret));
14 14
15DEFINE_TRACE(sched_wait_task, 15DECLARE_TRACE(sched_wait_task,
16 TPPROTO(struct rq *rq, struct task_struct *p), 16 TPPROTO(struct rq *rq, struct task_struct *p),
17 TPARGS(rq, p)); 17 TPARGS(rq, p));
18 18
19DEFINE_TRACE(sched_wakeup, 19DECLARE_TRACE(sched_wakeup,
20 TPPROTO(struct rq *rq, struct task_struct *p), 20 TPPROTO(struct rq *rq, struct task_struct *p),
21 TPARGS(rq, p)); 21 TPARGS(rq, p));
22 22
23DEFINE_TRACE(sched_wakeup_new, 23DECLARE_TRACE(sched_wakeup_new,
24 TPPROTO(struct rq *rq, struct task_struct *p), 24 TPPROTO(struct rq *rq, struct task_struct *p),
25 TPARGS(rq, p)); 25 TPARGS(rq, p));
26 26
27DEFINE_TRACE(sched_switch, 27DECLARE_TRACE(sched_switch,
28 TPPROTO(struct rq *rq, struct task_struct *prev, 28 TPPROTO(struct rq *rq, struct task_struct *prev,
29 struct task_struct *next), 29 struct task_struct *next),
30 TPARGS(rq, prev, next)); 30 TPARGS(rq, prev, next));
31 31
32DEFINE_TRACE(sched_migrate_task, 32DECLARE_TRACE(sched_migrate_task,
33 TPPROTO(struct rq *rq, struct task_struct *p, int dest_cpu), 33 TPPROTO(struct rq *rq, struct task_struct *p, int dest_cpu),
34 TPARGS(rq, p, dest_cpu)); 34 TPARGS(rq, p, dest_cpu));
35 35
36DEFINE_TRACE(sched_process_free, 36DECLARE_TRACE(sched_process_free,
37 TPPROTO(struct task_struct *p), 37 TPPROTO(struct task_struct *p),
38 TPARGS(p)); 38 TPARGS(p));
39 39
40DEFINE_TRACE(sched_process_exit, 40DECLARE_TRACE(sched_process_exit,
41 TPPROTO(struct task_struct *p), 41 TPPROTO(struct task_struct *p),
42 TPARGS(p)); 42 TPARGS(p));
43 43
44DEFINE_TRACE(sched_process_wait, 44DECLARE_TRACE(sched_process_wait,
45 TPPROTO(struct pid *pid), 45 TPPROTO(struct pid *pid),
46 TPARGS(pid)); 46 TPARGS(pid));
47 47
48DEFINE_TRACE(sched_process_fork, 48DECLARE_TRACE(sched_process_fork,
49 TPPROTO(struct task_struct *parent, struct task_struct *child), 49 TPPROTO(struct task_struct *parent, struct task_struct *child),
50 TPARGS(parent, child)); 50 TPARGS(parent, child));
51 51
52DEFINE_TRACE(sched_signal_send, 52DECLARE_TRACE(sched_signal_send,
53 TPPROTO(int sig, struct task_struct *p), 53 TPPROTO(int sig, struct task_struct *p),
54 TPARGS(sig, p)); 54 TPARGS(sig, p));
55 55
diff --git a/init/Kconfig b/init/Kconfig
index f763762d544a..f291f086caa1 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -808,6 +808,7 @@ config TRACEPOINTS
808 808
809config MARKERS 809config MARKERS
810 bool "Activate markers" 810 bool "Activate markers"
811 depends on TRACEPOINTS
811 help 812 help
812 Place an empty function call at each marker site. Can be 813 Place an empty function call at each marker site. Can be
813 dynamically changed for a probe function. 814 dynamically changed for a probe function.
diff --git a/init/main.c b/init/main.c
index 7e117a231af1..79213c0785d2 100644
--- a/init/main.c
+++ b/init/main.c
@@ -63,6 +63,7 @@
63#include <linux/signal.h> 63#include <linux/signal.h>
64#include <linux/idr.h> 64#include <linux/idr.h>
65#include <linux/ftrace.h> 65#include <linux/ftrace.h>
66#include <trace/boot.h>
66 67
67#include <asm/io.h> 68#include <asm/io.h>
68#include <asm/bugs.h> 69#include <asm/bugs.h>
@@ -703,31 +704,35 @@ core_param(initcall_debug, initcall_debug, bool, 0644);
703int do_one_initcall(initcall_t fn) 704int do_one_initcall(initcall_t fn)
704{ 705{
705 int count = preempt_count(); 706 int count = preempt_count();
706 ktime_t delta; 707 ktime_t calltime, delta, rettime;
707 char msgbuf[64]; 708 char msgbuf[64];
708 struct boot_trace it; 709 struct boot_trace_call call;
710 struct boot_trace_ret ret;
709 711
710 if (initcall_debug) { 712 if (initcall_debug) {
711 it.caller = task_pid_nr(current); 713 call.caller = task_pid_nr(current);
712 printk("calling %pF @ %i\n", fn, it.caller); 714 printk("calling %pF @ %i\n", fn, call.caller);
713 it.calltime = ktime_get(); 715 calltime = ktime_get();
716 trace_boot_call(&call, fn);
717 enable_boot_trace();
714 } 718 }
715 719
716 it.result = fn(); 720 ret.result = fn();
717 721
718 if (initcall_debug) { 722 if (initcall_debug) {
719 it.rettime = ktime_get(); 723 disable_boot_trace();
720 delta = ktime_sub(it.rettime, it.calltime); 724 rettime = ktime_get();
721 it.duration = (unsigned long long) delta.tv64 >> 10; 725 delta = ktime_sub(rettime, calltime);
726 ret.duration = (unsigned long long) ktime_to_ns(delta) >> 10;
727 trace_boot_ret(&ret, fn);
722 printk("initcall %pF returned %d after %Ld usecs\n", fn, 728 printk("initcall %pF returned %d after %Ld usecs\n", fn,
723 it.result, it.duration); 729 ret.result, ret.duration);
724 trace_boot(&it, fn);
725 } 730 }
726 731
727 msgbuf[0] = 0; 732 msgbuf[0] = 0;
728 733
729 if (it.result && it.result != -ENODEV && initcall_debug) 734 if (ret.result && ret.result != -ENODEV && initcall_debug)
730 sprintf(msgbuf, "error code %d ", it.result); 735 sprintf(msgbuf, "error code %d ", ret.result);
731 736
732 if (preempt_count() != count) { 737 if (preempt_count() != count) {
733 strlcat(msgbuf, "preemption imbalance ", sizeof(msgbuf)); 738 strlcat(msgbuf, "preemption imbalance ", sizeof(msgbuf));
@@ -741,7 +746,7 @@ int do_one_initcall(initcall_t fn)
741 printk("initcall %pF returned with %s\n", fn, msgbuf); 746 printk("initcall %pF returned with %s\n", fn, msgbuf);
742 } 747 }
743 748
744 return it.result; 749 return ret.result;
745} 750}
746 751
747 752
@@ -882,7 +887,7 @@ static int __init kernel_init(void * unused)
882 * we're essentially up and running. Get rid of the 887 * we're essentially up and running. Get rid of the
883 * initmem segments and start the user-mode stuff.. 888 * initmem segments and start the user-mode stuff..
884 */ 889 */
885 stop_boot_trace(); 890
886 init_post(); 891 init_post();
887 return 0; 892 return 0;
888} 893}
diff --git a/kernel/Makefile b/kernel/Makefile
index 19fad003b19d..703cf3b7389c 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -21,6 +21,10 @@ CFLAGS_REMOVE_cgroup-debug.o = -pg
21CFLAGS_REMOVE_sched_clock.o = -pg 21CFLAGS_REMOVE_sched_clock.o = -pg
22CFLAGS_REMOVE_sched.o = -pg 22CFLAGS_REMOVE_sched.o = -pg
23endif 23endif
24ifdef CONFIG_FUNCTION_GRAPH_TRACER
25CFLAGS_REMOVE_extable.o = -pg # For __kernel_text_address()
26CFLAGS_REMOVE_module.o = -pg # For __module_text_address()
27endif
24 28
25obj-$(CONFIG_FREEZER) += freezer.o 29obj-$(CONFIG_FREEZER) += freezer.o
26obj-$(CONFIG_PROFILING) += profile.o 30obj-$(CONFIG_PROFILING) += profile.o
diff --git a/kernel/exit.c b/kernel/exit.c
index 2d8be7ebb0f7..e5ae36ebe8af 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -53,6 +53,10 @@
53#include <asm/pgtable.h> 53#include <asm/pgtable.h>
54#include <asm/mmu_context.h> 54#include <asm/mmu_context.h>
55 55
56DEFINE_TRACE(sched_process_free);
57DEFINE_TRACE(sched_process_exit);
58DEFINE_TRACE(sched_process_wait);
59
56static void exit_mm(struct task_struct * tsk); 60static void exit_mm(struct task_struct * tsk);
57 61
58static inline int task_detached(struct task_struct *p) 62static inline int task_detached(struct task_struct *p)
@@ -1123,7 +1127,6 @@ NORET_TYPE void do_exit(long code)
1123 preempt_disable(); 1127 preempt_disable();
1124 /* causes final put_task_struct in finish_task_switch(). */ 1128 /* causes final put_task_struct in finish_task_switch(). */
1125 tsk->state = TASK_DEAD; 1129 tsk->state = TASK_DEAD;
1126
1127 schedule(); 1130 schedule();
1128 BUG(); 1131 BUG();
1129 /* Avoid "noreturn function does return". */ 1132 /* Avoid "noreturn function does return". */
diff --git a/kernel/fork.c b/kernel/fork.c
index 2a372a0e206f..5f82a999c032 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -47,6 +47,7 @@
47#include <linux/mount.h> 47#include <linux/mount.h>
48#include <linux/audit.h> 48#include <linux/audit.h>
49#include <linux/memcontrol.h> 49#include <linux/memcontrol.h>
50#include <linux/ftrace.h>
50#include <linux/profile.h> 51#include <linux/profile.h>
51#include <linux/rmap.h> 52#include <linux/rmap.h>
52#include <linux/acct.h> 53#include <linux/acct.h>
@@ -80,6 +81,8 @@ DEFINE_PER_CPU(unsigned long, process_counts) = 0;
80 81
81__cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */ 82__cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */
82 83
84DEFINE_TRACE(sched_process_fork);
85
83int nr_processes(void) 86int nr_processes(void)
84{ 87{
85 int cpu; 88 int cpu;
@@ -137,6 +140,7 @@ void free_task(struct task_struct *tsk)
137 prop_local_destroy_single(&tsk->dirties); 140 prop_local_destroy_single(&tsk->dirties);
138 free_thread_info(tsk->stack); 141 free_thread_info(tsk->stack);
139 rt_mutex_debug_task_free(tsk); 142 rt_mutex_debug_task_free(tsk);
143 ftrace_graph_exit_task(tsk);
140 free_task_struct(tsk); 144 free_task_struct(tsk);
141} 145}
142EXPORT_SYMBOL(free_task); 146EXPORT_SYMBOL(free_task);
@@ -1267,6 +1271,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1267 total_forks++; 1271 total_forks++;
1268 spin_unlock(&current->sighand->siglock); 1272 spin_unlock(&current->sighand->siglock);
1269 write_unlock_irq(&tasklist_lock); 1273 write_unlock_irq(&tasklist_lock);
1274 ftrace_graph_init_task(p);
1270 proc_fork_connector(p); 1275 proc_fork_connector(p);
1271 cgroup_post_fork(p); 1276 cgroup_post_fork(p);
1272 return p; 1277 return p;
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 8e7a7ce3ed0a..4fbc456f393d 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -21,6 +21,9 @@ static DEFINE_SPINLOCK(kthread_create_lock);
21static LIST_HEAD(kthread_create_list); 21static LIST_HEAD(kthread_create_list);
22struct task_struct *kthreadd_task; 22struct task_struct *kthreadd_task;
23 23
24DEFINE_TRACE(sched_kthread_stop);
25DEFINE_TRACE(sched_kthread_stop_ret);
26
24struct kthread_create_info 27struct kthread_create_info
25{ 28{
26 /* Information passed to kthread() from kthreadd. */ 29 /* Information passed to kthread() from kthreadd. */
diff --git a/kernel/marker.c b/kernel/marker.c
index e9c6b2bc9400..ea54f2647868 100644
--- a/kernel/marker.c
+++ b/kernel/marker.c
@@ -43,6 +43,7 @@ static DEFINE_MUTEX(markers_mutex);
43 */ 43 */
44#define MARKER_HASH_BITS 6 44#define MARKER_HASH_BITS 6
45#define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS) 45#define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS)
46static struct hlist_head marker_table[MARKER_TABLE_SIZE];
46 47
47/* 48/*
48 * Note about RCU : 49 * Note about RCU :
@@ -64,11 +65,10 @@ struct marker_entry {
64 void *oldptr; 65 void *oldptr;
65 int rcu_pending; 66 int rcu_pending;
66 unsigned char ptype:1; 67 unsigned char ptype:1;
68 unsigned char format_allocated:1;
67 char name[0]; /* Contains name'\0'format'\0' */ 69 char name[0]; /* Contains name'\0'format'\0' */
68}; 70};
69 71
70static struct hlist_head marker_table[MARKER_TABLE_SIZE];
71
72/** 72/**
73 * __mark_empty_function - Empty probe callback 73 * __mark_empty_function - Empty probe callback
74 * @probe_private: probe private data 74 * @probe_private: probe private data
@@ -81,7 +81,7 @@ static struct hlist_head marker_table[MARKER_TABLE_SIZE];
81 * though the function pointer change and the marker enabling are two distinct 81 * though the function pointer change and the marker enabling are two distinct
82 * operations that modifies the execution flow of preemptible code. 82 * operations that modifies the execution flow of preemptible code.
83 */ 83 */
84void __mark_empty_function(void *probe_private, void *call_private, 84notrace void __mark_empty_function(void *probe_private, void *call_private,
85 const char *fmt, va_list *args) 85 const char *fmt, va_list *args)
86{ 86{
87} 87}
@@ -97,7 +97,8 @@ EXPORT_SYMBOL_GPL(__mark_empty_function);
97 * need to put a full smp_rmb() in this branch. This is why we do not use 97 * need to put a full smp_rmb() in this branch. This is why we do not use
98 * rcu_dereference() for the pointer read. 98 * rcu_dereference() for the pointer read.
99 */ 99 */
100void marker_probe_cb(const struct marker *mdata, void *call_private, ...) 100notrace void marker_probe_cb(const struct marker *mdata,
101 void *call_private, ...)
101{ 102{
102 va_list args; 103 va_list args;
103 char ptype; 104 char ptype;
@@ -107,7 +108,7 @@ void marker_probe_cb(const struct marker *mdata, void *call_private, ...)
107 * sure the teardown of the callbacks can be done correctly when they 108 * sure the teardown of the callbacks can be done correctly when they
108 * are in modules and they insure RCU read coherency. 109 * are in modules and they insure RCU read coherency.
109 */ 110 */
110 rcu_read_lock_sched(); 111 rcu_read_lock_sched_notrace();
111 ptype = mdata->ptype; 112 ptype = mdata->ptype;
112 if (likely(!ptype)) { 113 if (likely(!ptype)) {
113 marker_probe_func *func; 114 marker_probe_func *func;
@@ -145,7 +146,7 @@ void marker_probe_cb(const struct marker *mdata, void *call_private, ...)
145 va_end(args); 146 va_end(args);
146 } 147 }
147 } 148 }
148 rcu_read_unlock_sched(); 149 rcu_read_unlock_sched_notrace();
149} 150}
150EXPORT_SYMBOL_GPL(marker_probe_cb); 151EXPORT_SYMBOL_GPL(marker_probe_cb);
151 152
@@ -157,12 +158,13 @@ EXPORT_SYMBOL_GPL(marker_probe_cb);
157 * 158 *
158 * Should be connected to markers "MARK_NOARGS". 159 * Should be connected to markers "MARK_NOARGS".
159 */ 160 */
160void marker_probe_cb_noarg(const struct marker *mdata, void *call_private, ...) 161static notrace void marker_probe_cb_noarg(const struct marker *mdata,
162 void *call_private, ...)
161{ 163{
162 va_list args; /* not initialized */ 164 va_list args; /* not initialized */
163 char ptype; 165 char ptype;
164 166
165 rcu_read_lock_sched(); 167 rcu_read_lock_sched_notrace();
166 ptype = mdata->ptype; 168 ptype = mdata->ptype;
167 if (likely(!ptype)) { 169 if (likely(!ptype)) {
168 marker_probe_func *func; 170 marker_probe_func *func;
@@ -195,9 +197,8 @@ void marker_probe_cb_noarg(const struct marker *mdata, void *call_private, ...)
195 multi[i].func(multi[i].probe_private, call_private, 197 multi[i].func(multi[i].probe_private, call_private,
196 mdata->format, &args); 198 mdata->format, &args);
197 } 199 }
198 rcu_read_unlock_sched(); 200 rcu_read_unlock_sched_notrace();
199} 201}
200EXPORT_SYMBOL_GPL(marker_probe_cb_noarg);
201 202
202static void free_old_closure(struct rcu_head *head) 203static void free_old_closure(struct rcu_head *head)
203{ 204{
@@ -416,6 +417,7 @@ static struct marker_entry *add_marker(const char *name, const char *format)
416 e->single.probe_private = NULL; 417 e->single.probe_private = NULL;
417 e->multi = NULL; 418 e->multi = NULL;
418 e->ptype = 0; 419 e->ptype = 0;
420 e->format_allocated = 0;
419 e->refcount = 0; 421 e->refcount = 0;
420 e->rcu_pending = 0; 422 e->rcu_pending = 0;
421 hlist_add_head(&e->hlist, head); 423 hlist_add_head(&e->hlist, head);
@@ -447,6 +449,8 @@ static int remove_marker(const char *name)
447 if (e->single.func != __mark_empty_function) 449 if (e->single.func != __mark_empty_function)
448 return -EBUSY; 450 return -EBUSY;
449 hlist_del(&e->hlist); 451 hlist_del(&e->hlist);
452 if (e->format_allocated)
453 kfree(e->format);
450 /* Make sure the call_rcu has been executed */ 454 /* Make sure the call_rcu has been executed */
451 if (e->rcu_pending) 455 if (e->rcu_pending)
452 rcu_barrier_sched(); 456 rcu_barrier_sched();
@@ -457,57 +461,34 @@ static int remove_marker(const char *name)
457/* 461/*
458 * Set the mark_entry format to the format found in the element. 462 * Set the mark_entry format to the format found in the element.
459 */ 463 */
460static int marker_set_format(struct marker_entry **entry, const char *format) 464static int marker_set_format(struct marker_entry *entry, const char *format)
461{ 465{
462 struct marker_entry *e; 466 entry->format = kstrdup(format, GFP_KERNEL);
463 size_t name_len = strlen((*entry)->name) + 1; 467 if (!entry->format)
464 size_t format_len = strlen(format) + 1;
465
466
467 e = kmalloc(sizeof(struct marker_entry) + name_len + format_len,
468 GFP_KERNEL);
469 if (!e)
470 return -ENOMEM; 468 return -ENOMEM;
471 memcpy(&e->name[0], (*entry)->name, name_len); 469 entry->format_allocated = 1;
472 e->format = &e->name[name_len]; 470
473 memcpy(e->format, format, format_len);
474 if (strcmp(e->format, MARK_NOARGS) == 0)
475 e->call = marker_probe_cb_noarg;
476 else
477 e->call = marker_probe_cb;
478 e->single = (*entry)->single;
479 e->multi = (*entry)->multi;
480 e->ptype = (*entry)->ptype;
481 e->refcount = (*entry)->refcount;
482 e->rcu_pending = 0;
483 hlist_add_before(&e->hlist, &(*entry)->hlist);
484 hlist_del(&(*entry)->hlist);
485 /* Make sure the call_rcu has been executed */
486 if ((*entry)->rcu_pending)
487 rcu_barrier_sched();
488 kfree(*entry);
489 *entry = e;
490 trace_mark(core_marker_format, "name %s format %s", 471 trace_mark(core_marker_format, "name %s format %s",
491 e->name, e->format); 472 entry->name, entry->format);
492 return 0; 473 return 0;
493} 474}
494 475
495/* 476/*
496 * Sets the probe callback corresponding to one marker. 477 * Sets the probe callback corresponding to one marker.
497 */ 478 */
498static int set_marker(struct marker_entry **entry, struct marker *elem, 479static int set_marker(struct marker_entry *entry, struct marker *elem,
499 int active) 480 int active)
500{ 481{
501 int ret; 482 int ret = 0;
502 WARN_ON(strcmp((*entry)->name, elem->name) != 0); 483 WARN_ON(strcmp(entry->name, elem->name) != 0);
503 484
504 if ((*entry)->format) { 485 if (entry->format) {
505 if (strcmp((*entry)->format, elem->format) != 0) { 486 if (strcmp(entry->format, elem->format) != 0) {
506 printk(KERN_NOTICE 487 printk(KERN_NOTICE
507 "Format mismatch for probe %s " 488 "Format mismatch for probe %s "
508 "(%s), marker (%s)\n", 489 "(%s), marker (%s)\n",
509 (*entry)->name, 490 entry->name,
510 (*entry)->format, 491 entry->format,
511 elem->format); 492 elem->format);
512 return -EPERM; 493 return -EPERM;
513 } 494 }
@@ -523,37 +504,67 @@ static int set_marker(struct marker_entry **entry, struct marker *elem,
523 * pass from a "safe" callback (with argument) to an "unsafe" 504 * pass from a "safe" callback (with argument) to an "unsafe"
524 * callback (does not set arguments). 505 * callback (does not set arguments).
525 */ 506 */
526 elem->call = (*entry)->call; 507 elem->call = entry->call;
527 /* 508 /*
528 * Sanity check : 509 * Sanity check :
529 * We only update the single probe private data when the ptr is 510 * We only update the single probe private data when the ptr is
530 * set to a _non_ single probe! (0 -> 1 and N -> 1, N != 1) 511 * set to a _non_ single probe! (0 -> 1 and N -> 1, N != 1)
531 */ 512 */
532 WARN_ON(elem->single.func != __mark_empty_function 513 WARN_ON(elem->single.func != __mark_empty_function
533 && elem->single.probe_private 514 && elem->single.probe_private != entry->single.probe_private
534 != (*entry)->single.probe_private && 515 && !elem->ptype);
535 !elem->ptype); 516 elem->single.probe_private = entry->single.probe_private;
536 elem->single.probe_private = (*entry)->single.probe_private;
537 /* 517 /*
538 * Make sure the private data is valid when we update the 518 * Make sure the private data is valid when we update the
539 * single probe ptr. 519 * single probe ptr.
540 */ 520 */
541 smp_wmb(); 521 smp_wmb();
542 elem->single.func = (*entry)->single.func; 522 elem->single.func = entry->single.func;
543 /* 523 /*
544 * We also make sure that the new probe callbacks array is consistent 524 * We also make sure that the new probe callbacks array is consistent
545 * before setting a pointer to it. 525 * before setting a pointer to it.
546 */ 526 */
547 rcu_assign_pointer(elem->multi, (*entry)->multi); 527 rcu_assign_pointer(elem->multi, entry->multi);
548 /* 528 /*
549 * Update the function or multi probe array pointer before setting the 529 * Update the function or multi probe array pointer before setting the
550 * ptype. 530 * ptype.
551 */ 531 */
552 smp_wmb(); 532 smp_wmb();
553 elem->ptype = (*entry)->ptype; 533 elem->ptype = entry->ptype;
534
535 if (elem->tp_name && (active ^ elem->state)) {
536 WARN_ON(!elem->tp_cb);
537 /*
538 * It is ok to directly call the probe registration because type
539 * checking has been done in the __trace_mark_tp() macro.
540 */
541
542 if (active) {
543 /*
544 * try_module_get should always succeed because we hold
545 * lock_module() to get the tp_cb address.
546 */
547 ret = try_module_get(__module_text_address(
548 (unsigned long)elem->tp_cb));
549 BUG_ON(!ret);
550 ret = tracepoint_probe_register_noupdate(
551 elem->tp_name,
552 elem->tp_cb);
553 } else {
554 ret = tracepoint_probe_unregister_noupdate(
555 elem->tp_name,
556 elem->tp_cb);
557 /*
558 * tracepoint_probe_update_all() must be called
559 * before the module containing tp_cb is unloaded.
560 */
561 module_put(__module_text_address(
562 (unsigned long)elem->tp_cb));
563 }
564 }
554 elem->state = active; 565 elem->state = active;
555 566
556 return 0; 567 return ret;
557} 568}
558 569
559/* 570/*
@@ -564,7 +575,24 @@ static int set_marker(struct marker_entry **entry, struct marker *elem,
564 */ 575 */
565static void disable_marker(struct marker *elem) 576static void disable_marker(struct marker *elem)
566{ 577{
578 int ret;
579
567 /* leave "call" as is. It is known statically. */ 580 /* leave "call" as is. It is known statically. */
581 if (elem->tp_name && elem->state) {
582 WARN_ON(!elem->tp_cb);
583 /*
584 * It is ok to directly call the probe registration because type
585 * checking has been done in the __trace_mark_tp() macro.
586 */
587 ret = tracepoint_probe_unregister_noupdate(elem->tp_name,
588 elem->tp_cb);
589 WARN_ON(ret);
590 /*
591 * tracepoint_probe_update_all() must be called
592 * before the module containing tp_cb is unloaded.
593 */
594 module_put(__module_text_address((unsigned long)elem->tp_cb));
595 }
568 elem->state = 0; 596 elem->state = 0;
569 elem->single.func = __mark_empty_function; 597 elem->single.func = __mark_empty_function;
570 /* Update the function before setting the ptype */ 598 /* Update the function before setting the ptype */
@@ -594,8 +622,7 @@ void marker_update_probe_range(struct marker *begin,
594 for (iter = begin; iter < end; iter++) { 622 for (iter = begin; iter < end; iter++) {
595 mark_entry = get_marker(iter->name); 623 mark_entry = get_marker(iter->name);
596 if (mark_entry) { 624 if (mark_entry) {
597 set_marker(&mark_entry, iter, 625 set_marker(mark_entry, iter, !!mark_entry->refcount);
598 !!mark_entry->refcount);
599 /* 626 /*
600 * ignore error, continue 627 * ignore error, continue
601 */ 628 */
@@ -629,6 +656,7 @@ static void marker_update_probes(void)
629 marker_update_probe_range(__start___markers, __stop___markers); 656 marker_update_probe_range(__start___markers, __stop___markers);
630 /* Markers in modules. */ 657 /* Markers in modules. */
631 module_update_markers(); 658 module_update_markers();
659 tracepoint_probe_update_all();
632} 660}
633 661
634/** 662/**
@@ -657,7 +685,7 @@ int marker_probe_register(const char *name, const char *format,
657 ret = PTR_ERR(entry); 685 ret = PTR_ERR(entry);
658 } else if (format) { 686 } else if (format) {
659 if (!entry->format) 687 if (!entry->format)
660 ret = marker_set_format(&entry, format); 688 ret = marker_set_format(entry, format);
661 else if (strcmp(entry->format, format)) 689 else if (strcmp(entry->format, format))
662 ret = -EPERM; 690 ret = -EPERM;
663 } 691 }
@@ -676,10 +704,11 @@ int marker_probe_register(const char *name, const char *format,
676 goto end; 704 goto end;
677 } 705 }
678 mutex_unlock(&markers_mutex); 706 mutex_unlock(&markers_mutex);
679 marker_update_probes(); /* may update entry */ 707 marker_update_probes();
680 mutex_lock(&markers_mutex); 708 mutex_lock(&markers_mutex);
681 entry = get_marker(name); 709 entry = get_marker(name);
682 WARN_ON(!entry); 710 if (!entry)
711 goto end;
683 if (entry->rcu_pending) 712 if (entry->rcu_pending)
684 rcu_barrier_sched(); 713 rcu_barrier_sched();
685 entry->oldptr = old; 714 entry->oldptr = old;
@@ -720,7 +749,7 @@ int marker_probe_unregister(const char *name,
720 rcu_barrier_sched(); 749 rcu_barrier_sched();
721 old = marker_entry_remove_probe(entry, probe, probe_private); 750 old = marker_entry_remove_probe(entry, probe, probe_private);
722 mutex_unlock(&markers_mutex); 751 mutex_unlock(&markers_mutex);
723 marker_update_probes(); /* may update entry */ 752 marker_update_probes();
724 mutex_lock(&markers_mutex); 753 mutex_lock(&markers_mutex);
725 entry = get_marker(name); 754 entry = get_marker(name);
726 if (!entry) 755 if (!entry)
@@ -801,10 +830,11 @@ int marker_probe_unregister_private_data(marker_probe_func *probe,
801 rcu_barrier_sched(); 830 rcu_barrier_sched();
802 old = marker_entry_remove_probe(entry, NULL, probe_private); 831 old = marker_entry_remove_probe(entry, NULL, probe_private);
803 mutex_unlock(&markers_mutex); 832 mutex_unlock(&markers_mutex);
804 marker_update_probes(); /* may update entry */ 833 marker_update_probes();
805 mutex_lock(&markers_mutex); 834 mutex_lock(&markers_mutex);
806 entry = get_marker_from_private_data(probe, probe_private); 835 entry = get_marker_from_private_data(probe, probe_private);
807 WARN_ON(!entry); 836 if (!entry)
837 goto end;
808 if (entry->rcu_pending) 838 if (entry->rcu_pending)
809 rcu_barrier_sched(); 839 rcu_barrier_sched();
810 entry->oldptr = old; 840 entry->oldptr = old;
@@ -848,8 +878,6 @@ void *marker_get_private_data(const char *name, marker_probe_func *probe,
848 if (!e->ptype) { 878 if (!e->ptype) {
849 if (num == 0 && e->single.func == probe) 879 if (num == 0 && e->single.func == probe)
850 return e->single.probe_private; 880 return e->single.probe_private;
851 else
852 break;
853 } else { 881 } else {
854 struct marker_probe_closure *closure; 882 struct marker_probe_closure *closure;
855 int match = 0; 883 int match = 0;
@@ -861,8 +889,42 @@ void *marker_get_private_data(const char *name, marker_probe_func *probe,
861 return closure[i].probe_private; 889 return closure[i].probe_private;
862 } 890 }
863 } 891 }
892 break;
864 } 893 }
865 } 894 }
866 return ERR_PTR(-ENOENT); 895 return ERR_PTR(-ENOENT);
867} 896}
868EXPORT_SYMBOL_GPL(marker_get_private_data); 897EXPORT_SYMBOL_GPL(marker_get_private_data);
898
899#ifdef CONFIG_MODULES
900
901int marker_module_notify(struct notifier_block *self,
902 unsigned long val, void *data)
903{
904 struct module *mod = data;
905
906 switch (val) {
907 case MODULE_STATE_COMING:
908 marker_update_probe_range(mod->markers,
909 mod->markers + mod->num_markers);
910 break;
911 case MODULE_STATE_GOING:
912 marker_update_probe_range(mod->markers,
913 mod->markers + mod->num_markers);
914 break;
915 }
916 return 0;
917}
918
919struct notifier_block marker_module_nb = {
920 .notifier_call = marker_module_notify,
921 .priority = 0,
922};
923
924static int init_markers(void)
925{
926 return register_module_notifier(&marker_module_nb);
927}
928__initcall(init_markers);
929
930#endif /* CONFIG_MODULES */
diff --git a/kernel/module.c b/kernel/module.c
index 1f4cc00e0c20..89bcf7c1327d 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2184,24 +2184,15 @@ static noinline struct module *load_module(void __user *umod,
2184 struct mod_debug *debug; 2184 struct mod_debug *debug;
2185 unsigned int num_debug; 2185 unsigned int num_debug;
2186 2186
2187#ifdef CONFIG_MARKERS
2188 marker_update_probe_range(mod->markers,
2189 mod->markers + mod->num_markers);
2190#endif
2191 debug = section_objs(hdr, sechdrs, secstrings, "__verbose", 2187 debug = section_objs(hdr, sechdrs, secstrings, "__verbose",
2192 sizeof(*debug), &num_debug); 2188 sizeof(*debug), &num_debug);
2193 dynamic_printk_setup(debug, num_debug); 2189 dynamic_printk_setup(debug, num_debug);
2194
2195#ifdef CONFIG_TRACEPOINTS
2196 tracepoint_update_probe_range(mod->tracepoints,
2197 mod->tracepoints + mod->num_tracepoints);
2198#endif
2199 } 2190 }
2200 2191
2201 /* sechdrs[0].sh_size is always zero */ 2192 /* sechdrs[0].sh_size is always zero */
2202 mseg = section_objs(hdr, sechdrs, secstrings, "__mcount_loc", 2193 mseg = section_objs(hdr, sechdrs, secstrings, "__mcount_loc",
2203 sizeof(*mseg), &num_mcount); 2194 sizeof(*mseg), &num_mcount);
2204 ftrace_init_module(mseg, mseg + num_mcount); 2195 ftrace_init_module(mod, mseg, mseg + num_mcount);
2205 2196
2206 err = module_finalize(hdr, sechdrs, mod); 2197 err = module_finalize(hdr, sechdrs, mod);
2207 if (err < 0) 2198 if (err < 0)
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index c9d74083746f..f77d3819ef57 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -22,7 +22,6 @@
22#include <linux/console.h> 22#include <linux/console.h>
23#include <linux/cpu.h> 23#include <linux/cpu.h>
24#include <linux/freezer.h> 24#include <linux/freezer.h>
25#include <linux/ftrace.h>
26 25
27#include "power.h" 26#include "power.h"
28 27
@@ -257,7 +256,7 @@ static int create_image(int platform_mode)
257 256
258int hibernation_snapshot(int platform_mode) 257int hibernation_snapshot(int platform_mode)
259{ 258{
260 int error, ftrace_save; 259 int error;
261 260
262 /* Free memory before shutting down devices. */ 261 /* Free memory before shutting down devices. */
263 error = swsusp_shrink_memory(); 262 error = swsusp_shrink_memory();
@@ -269,7 +268,6 @@ int hibernation_snapshot(int platform_mode)
269 goto Close; 268 goto Close;
270 269
271 suspend_console(); 270 suspend_console();
272 ftrace_save = __ftrace_enabled_save();
273 error = device_suspend(PMSG_FREEZE); 271 error = device_suspend(PMSG_FREEZE);
274 if (error) 272 if (error)
275 goto Recover_platform; 273 goto Recover_platform;
@@ -299,7 +297,6 @@ int hibernation_snapshot(int platform_mode)
299 Resume_devices: 297 Resume_devices:
300 device_resume(in_suspend ? 298 device_resume(in_suspend ?
301 (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE); 299 (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE);
302 __ftrace_enabled_restore(ftrace_save);
303 resume_console(); 300 resume_console();
304 Close: 301 Close:
305 platform_end(platform_mode); 302 platform_end(platform_mode);
@@ -370,11 +367,10 @@ static int resume_target_kernel(void)
370 367
371int hibernation_restore(int platform_mode) 368int hibernation_restore(int platform_mode)
372{ 369{
373 int error, ftrace_save; 370 int error;
374 371
375 pm_prepare_console(); 372 pm_prepare_console();
376 suspend_console(); 373 suspend_console();
377 ftrace_save = __ftrace_enabled_save();
378 error = device_suspend(PMSG_QUIESCE); 374 error = device_suspend(PMSG_QUIESCE);
379 if (error) 375 if (error)
380 goto Finish; 376 goto Finish;
@@ -389,7 +385,6 @@ int hibernation_restore(int platform_mode)
389 platform_restore_cleanup(platform_mode); 385 platform_restore_cleanup(platform_mode);
390 device_resume(PMSG_RECOVER); 386 device_resume(PMSG_RECOVER);
391 Finish: 387 Finish:
392 __ftrace_enabled_restore(ftrace_save);
393 resume_console(); 388 resume_console();
394 pm_restore_console(); 389 pm_restore_console();
395 return error; 390 return error;
@@ -402,7 +397,7 @@ int hibernation_restore(int platform_mode)
402 397
403int hibernation_platform_enter(void) 398int hibernation_platform_enter(void)
404{ 399{
405 int error, ftrace_save; 400 int error;
406 401
407 if (!hibernation_ops) 402 if (!hibernation_ops)
408 return -ENOSYS; 403 return -ENOSYS;
@@ -417,7 +412,6 @@ int hibernation_platform_enter(void)
417 goto Close; 412 goto Close;
418 413
419 suspend_console(); 414 suspend_console();
420 ftrace_save = __ftrace_enabled_save();
421 error = device_suspend(PMSG_HIBERNATE); 415 error = device_suspend(PMSG_HIBERNATE);
422 if (error) { 416 if (error) {
423 if (hibernation_ops->recover) 417 if (hibernation_ops->recover)
@@ -452,7 +446,6 @@ int hibernation_platform_enter(void)
452 hibernation_ops->finish(); 446 hibernation_ops->finish();
453 Resume_devices: 447 Resume_devices:
454 device_resume(PMSG_RESTORE); 448 device_resume(PMSG_RESTORE);
455 __ftrace_enabled_restore(ftrace_save);
456 resume_console(); 449 resume_console();
457 Close: 450 Close:
458 hibernation_ops->end(); 451 hibernation_ops->end();
diff --git a/kernel/power/main.c b/kernel/power/main.c
index b8f7ce9473e8..613f16941b85 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -22,7 +22,6 @@
22#include <linux/freezer.h> 22#include <linux/freezer.h>
23#include <linux/vmstat.h> 23#include <linux/vmstat.h>
24#include <linux/syscalls.h> 24#include <linux/syscalls.h>
25#include <linux/ftrace.h>
26 25
27#include "power.h" 26#include "power.h"
28 27
@@ -317,7 +316,7 @@ static int suspend_enter(suspend_state_t state)
317 */ 316 */
318int suspend_devices_and_enter(suspend_state_t state) 317int suspend_devices_and_enter(suspend_state_t state)
319{ 318{
320 int error, ftrace_save; 319 int error;
321 320
322 if (!suspend_ops) 321 if (!suspend_ops)
323 return -ENOSYS; 322 return -ENOSYS;
@@ -328,7 +327,6 @@ int suspend_devices_and_enter(suspend_state_t state)
328 goto Close; 327 goto Close;
329 } 328 }
330 suspend_console(); 329 suspend_console();
331 ftrace_save = __ftrace_enabled_save();
332 suspend_test_start(); 330 suspend_test_start();
333 error = device_suspend(PMSG_SUSPEND); 331 error = device_suspend(PMSG_SUSPEND);
334 if (error) { 332 if (error) {
@@ -360,7 +358,6 @@ int suspend_devices_and_enter(suspend_state_t state)
360 suspend_test_start(); 358 suspend_test_start();
361 device_resume(PMSG_RESUME); 359 device_resume(PMSG_RESUME);
362 suspend_test_finish("resume devices"); 360 suspend_test_finish("resume devices");
363 __ftrace_enabled_restore(ftrace_save);
364 resume_console(); 361 resume_console();
365 Close: 362 Close:
366 if (suspend_ops->end) 363 if (suspend_ops->end)
diff --git a/kernel/profile.c b/kernel/profile.c
index dc41827fbfee..60adefb59b5e 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -544,7 +544,7 @@ static const struct file_operations proc_profile_operations = {
544}; 544};
545 545
546#ifdef CONFIG_SMP 546#ifdef CONFIG_SMP
547static inline void profile_nop(void *unused) 547static void profile_nop(void *unused)
548{ 548{
549} 549}
550 550
diff --git a/kernel/sched.c b/kernel/sched.c
index b7480fb5c3dc..7729c4bbc8ba 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -118,6 +118,12 @@
118 */ 118 */
119#define RUNTIME_INF ((u64)~0ULL) 119#define RUNTIME_INF ((u64)~0ULL)
120 120
121DEFINE_TRACE(sched_wait_task);
122DEFINE_TRACE(sched_wakeup);
123DEFINE_TRACE(sched_wakeup_new);
124DEFINE_TRACE(sched_switch);
125DEFINE_TRACE(sched_migrate_task);
126
121#ifdef CONFIG_SMP 127#ifdef CONFIG_SMP
122/* 128/*
123 * Divide a load by a sched group cpu_power : (load / sg->__cpu_power) 129 * Divide a load by a sched group cpu_power : (load / sg->__cpu_power)
@@ -5896,6 +5902,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
5896 * The idle tasks have their own, simple scheduling class: 5902 * The idle tasks have their own, simple scheduling class:
5897 */ 5903 */
5898 idle->sched_class = &idle_sched_class; 5904 idle->sched_class = &idle_sched_class;
5905 ftrace_graph_init_task(idle);
5899} 5906}
5900 5907
5901/* 5908/*
diff --git a/kernel/signal.c b/kernel/signal.c
index 4530fc654455..e9afe63da24b 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -41,6 +41,8 @@
41 41
42static struct kmem_cache *sigqueue_cachep; 42static struct kmem_cache *sigqueue_cachep;
43 43
44DEFINE_TRACE(sched_signal_send);
45
44static void __user *sig_handler(struct task_struct *t, int sig) 46static void __user *sig_handler(struct task_struct *t, int sig)
45{ 47{
46 return t->sighand->action[sig - 1].sa.sa_handler; 48 return t->sighand->action[sig - 1].sa.sa_handler;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 3d56fe7570da..c83f566e940a 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -487,6 +487,16 @@ static struct ctl_table kern_table[] = {
487 .proc_handler = &ftrace_enable_sysctl, 487 .proc_handler = &ftrace_enable_sysctl,
488 }, 488 },
489#endif 489#endif
490#ifdef CONFIG_TRACING
491 {
492 .ctl_name = CTL_UNNUMBERED,
493 .procname = "ftrace_dump_on_oops",
494 .data = &ftrace_dump_on_oops,
495 .maxlen = sizeof(int),
496 .mode = 0644,
497 .proc_handler = &proc_dointvec,
498 },
499#endif
490#ifdef CONFIG_MODULES 500#ifdef CONFIG_MODULES
491 { 501 {
492 .ctl_name = KERN_MODPROBE, 502 .ctl_name = KERN_MODPROBE,
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 33dbefd471e8..8b6b673b4d6c 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -3,18 +3,34 @@
3# select HAVE_FUNCTION_TRACER: 3# select HAVE_FUNCTION_TRACER:
4# 4#
5 5
6config USER_STACKTRACE_SUPPORT
7 bool
8
6config NOP_TRACER 9config NOP_TRACER
7 bool 10 bool
8 11
9config HAVE_FUNCTION_TRACER 12config HAVE_FUNCTION_TRACER
10 bool 13 bool
11 14
15config HAVE_FUNCTION_GRAPH_TRACER
16 bool
17
18config HAVE_FUNCTION_TRACE_MCOUNT_TEST
19 bool
20 help
21 This gets selected when the arch tests the function_trace_stop
22 variable at the mcount call site. Otherwise, this variable
23 is tested by the called function.
24
12config HAVE_DYNAMIC_FTRACE 25config HAVE_DYNAMIC_FTRACE
13 bool 26 bool
14 27
15config HAVE_FTRACE_MCOUNT_RECORD 28config HAVE_FTRACE_MCOUNT_RECORD
16 bool 29 bool
17 30
31config HAVE_HW_BRANCH_TRACER
32 bool
33
18config TRACER_MAX_TRACE 34config TRACER_MAX_TRACE
19 bool 35 bool
20 36
@@ -47,6 +63,19 @@ config FUNCTION_TRACER
47 (the bootup default), then the overhead of the instructions is very 63 (the bootup default), then the overhead of the instructions is very
48 small and not measurable even in micro-benchmarks. 64 small and not measurable even in micro-benchmarks.
49 65
66config FUNCTION_GRAPH_TRACER
67 bool "Kernel Function Graph Tracer"
68 depends on HAVE_FUNCTION_GRAPH_TRACER
69 depends on FUNCTION_TRACER
70 help
71 Enable the kernel to trace a function at both its return
72 and its entry.
73 It's first purpose is to trace the duration of functions and
74 draw a call graph for each thread with some informations like
75 the return value.
76 This is done by setting the current return address on the current
77 task structure into a stack of calls.
78
50config IRQSOFF_TRACER 79config IRQSOFF_TRACER
51 bool "Interrupts-off Latency Tracer" 80 bool "Interrupts-off Latency Tracer"
52 default n 81 default n
@@ -138,6 +167,70 @@ config BOOT_TRACER
138 selected, because the self-tests are an initcall as well and that 167 selected, because the self-tests are an initcall as well and that
139 would invalidate the boot trace. ) 168 would invalidate the boot trace. )
140 169
170config TRACE_BRANCH_PROFILING
171 bool "Trace likely/unlikely profiler"
172 depends on DEBUG_KERNEL
173 select TRACING
174 help
175 This tracer profiles all the the likely and unlikely macros
176 in the kernel. It will display the results in:
177
178 /debugfs/tracing/profile_annotated_branch
179
180 Note: this will add a significant overhead, only turn this
181 on if you need to profile the system's use of these macros.
182
183 Say N if unsure.
184
185config PROFILE_ALL_BRANCHES
186 bool "Profile all if conditionals"
187 depends on TRACE_BRANCH_PROFILING
188 help
189 This tracer profiles all branch conditions. Every if ()
190 taken in the kernel is recorded whether it hit or miss.
191 The results will be displayed in:
192
193 /debugfs/tracing/profile_branch
194
195 This configuration, when enabled, will impose a great overhead
196 on the system. This should only be enabled when the system
197 is to be analyzed
198
199 Say N if unsure.
200
201config TRACING_BRANCHES
202 bool
203 help
204 Selected by tracers that will trace the likely and unlikely
205 conditions. This prevents the tracers themselves from being
206 profiled. Profiling the tracing infrastructure can only happen
207 when the likelys and unlikelys are not being traced.
208
209config BRANCH_TRACER
210 bool "Trace likely/unlikely instances"
211 depends on TRACE_BRANCH_PROFILING
212 select TRACING_BRANCHES
213 help
214 This traces the events of likely and unlikely condition
215 calls in the kernel. The difference between this and the
216 "Trace likely/unlikely profiler" is that this is not a
217 histogram of the callers, but actually places the calling
218 events into a running trace buffer to see when and where the
219 events happened, as well as their results.
220
221 Say N if unsure.
222
223config POWER_TRACER
224 bool "Trace power consumption behavior"
225 depends on DEBUG_KERNEL
226 depends on X86
227 select TRACING
228 help
229 This tracer helps developers to analyze and optimize the kernels
230 power management decisions, specifically the C-state and P-state
231 behavior.
232
233
141config STACK_TRACER 234config STACK_TRACER
142 bool "Trace max stack" 235 bool "Trace max stack"
143 depends on HAVE_FUNCTION_TRACER 236 depends on HAVE_FUNCTION_TRACER
@@ -157,6 +250,14 @@ config STACK_TRACER
157 250
158 Say N if unsure. 251 Say N if unsure.
159 252
253config BTS_TRACER
254 depends on HAVE_HW_BRANCH_TRACER
255 bool "Trace branches"
256 select TRACING
257 help
258 This tracer records all branches on the system in a circular
259 buffer giving access to the last N branches for each cpu.
260
160config DYNAMIC_FTRACE 261config DYNAMIC_FTRACE
161 bool "enable/disable ftrace tracepoints dynamically" 262 bool "enable/disable ftrace tracepoints dynamically"
162 depends on FUNCTION_TRACER 263 depends on FUNCTION_TRACER
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index c8228b1a49e9..62dc561b6676 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -10,6 +10,11 @@ CFLAGS_trace_selftest_dynamic.o = -pg
10obj-y += trace_selftest_dynamic.o 10obj-y += trace_selftest_dynamic.o
11endif 11endif
12 12
13# If unlikely tracing is enabled, do not trace these files
14ifdef CONFIG_TRACING_BRANCHES
15KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
16endif
17
13obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o 18obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o
14obj-$(CONFIG_RING_BUFFER) += ring_buffer.o 19obj-$(CONFIG_RING_BUFFER) += ring_buffer.o
15 20
@@ -24,5 +29,9 @@ obj-$(CONFIG_NOP_TRACER) += trace_nop.o
24obj-$(CONFIG_STACK_TRACER) += trace_stack.o 29obj-$(CONFIG_STACK_TRACER) += trace_stack.o
25obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o 30obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o
26obj-$(CONFIG_BOOT_TRACER) += trace_boot.o 31obj-$(CONFIG_BOOT_TRACER) += trace_boot.o
32obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o
33obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o
34obj-$(CONFIG_BTS_TRACER) += trace_bts.o
35obj-$(CONFIG_POWER_TRACER) += trace_power.o
27 36
28libftrace-y := ftrace.o 37libftrace-y := ftrace.o
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 78db083390f0..2e78628443e8 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -47,6 +47,12 @@
47int ftrace_enabled __read_mostly; 47int ftrace_enabled __read_mostly;
48static int last_ftrace_enabled; 48static int last_ftrace_enabled;
49 49
50/* ftrace_pid_trace >= 0 will only trace threads with this pid */
51static int ftrace_pid_trace = -1;
52
53/* Quick disabling of function tracer. */
54int function_trace_stop;
55
50/* 56/*
51 * ftrace_disabled is set when an anomaly is discovered. 57 * ftrace_disabled is set when an anomaly is discovered.
52 * ftrace_disabled is much stronger than ftrace_enabled. 58 * ftrace_disabled is much stronger than ftrace_enabled.
@@ -55,6 +61,7 @@ static int ftrace_disabled __read_mostly;
55 61
56static DEFINE_SPINLOCK(ftrace_lock); 62static DEFINE_SPINLOCK(ftrace_lock);
57static DEFINE_MUTEX(ftrace_sysctl_lock); 63static DEFINE_MUTEX(ftrace_sysctl_lock);
64static DEFINE_MUTEX(ftrace_start_lock);
58 65
59static struct ftrace_ops ftrace_list_end __read_mostly = 66static struct ftrace_ops ftrace_list_end __read_mostly =
60{ 67{
@@ -63,6 +70,8 @@ static struct ftrace_ops ftrace_list_end __read_mostly =
63 70
64static struct ftrace_ops *ftrace_list __read_mostly = &ftrace_list_end; 71static struct ftrace_ops *ftrace_list __read_mostly = &ftrace_list_end;
65ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub; 72ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;
73ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub;
74ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub;
66 75
67static void ftrace_list_func(unsigned long ip, unsigned long parent_ip) 76static void ftrace_list_func(unsigned long ip, unsigned long parent_ip)
68{ 77{
@@ -79,6 +88,21 @@ static void ftrace_list_func(unsigned long ip, unsigned long parent_ip)
79 }; 88 };
80} 89}
81 90
91static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip)
92{
93 if (current->pid != ftrace_pid_trace)
94 return;
95
96 ftrace_pid_function(ip, parent_ip);
97}
98
99static void set_ftrace_pid_function(ftrace_func_t func)
100{
101 /* do not set ftrace_pid_function to itself! */
102 if (func != ftrace_pid_func)
103 ftrace_pid_function = func;
104}
105
82/** 106/**
83 * clear_ftrace_function - reset the ftrace function 107 * clear_ftrace_function - reset the ftrace function
84 * 108 *
@@ -88,8 +112,24 @@ static void ftrace_list_func(unsigned long ip, unsigned long parent_ip)
88void clear_ftrace_function(void) 112void clear_ftrace_function(void)
89{ 113{
90 ftrace_trace_function = ftrace_stub; 114 ftrace_trace_function = ftrace_stub;
115 __ftrace_trace_function = ftrace_stub;
116 ftrace_pid_function = ftrace_stub;
91} 117}
92 118
119#ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
120/*
121 * For those archs that do not test ftrace_trace_stop in their
122 * mcount call site, we need to do it from C.
123 */
124static void ftrace_test_stop_func(unsigned long ip, unsigned long parent_ip)
125{
126 if (function_trace_stop)
127 return;
128
129 __ftrace_trace_function(ip, parent_ip);
130}
131#endif
132
93static int __register_ftrace_function(struct ftrace_ops *ops) 133static int __register_ftrace_function(struct ftrace_ops *ops)
94{ 134{
95 /* should not be called from interrupt context */ 135 /* should not be called from interrupt context */
@@ -106,14 +146,28 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
106 ftrace_list = ops; 146 ftrace_list = ops;
107 147
108 if (ftrace_enabled) { 148 if (ftrace_enabled) {
149 ftrace_func_t func;
150
151 if (ops->next == &ftrace_list_end)
152 func = ops->func;
153 else
154 func = ftrace_list_func;
155
156 if (ftrace_pid_trace >= 0) {
157 set_ftrace_pid_function(func);
158 func = ftrace_pid_func;
159 }
160
109 /* 161 /*
110 * For one func, simply call it directly. 162 * For one func, simply call it directly.
111 * For more than one func, call the chain. 163 * For more than one func, call the chain.
112 */ 164 */
113 if (ops->next == &ftrace_list_end) 165#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
114 ftrace_trace_function = ops->func; 166 ftrace_trace_function = func;
115 else 167#else
116 ftrace_trace_function = ftrace_list_func; 168 __ftrace_trace_function = func;
169 ftrace_trace_function = ftrace_test_stop_func;
170#endif
117 } 171 }
118 172
119 spin_unlock(&ftrace_lock); 173 spin_unlock(&ftrace_lock);
@@ -152,9 +206,19 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
152 206
153 if (ftrace_enabled) { 207 if (ftrace_enabled) {
154 /* If we only have one func left, then call that directly */ 208 /* If we only have one func left, then call that directly */
155 if (ftrace_list == &ftrace_list_end || 209 if (ftrace_list->next == &ftrace_list_end) {
156 ftrace_list->next == &ftrace_list_end) 210 ftrace_func_t func = ftrace_list->func;
157 ftrace_trace_function = ftrace_list->func; 211
212 if (ftrace_pid_trace >= 0) {
213 set_ftrace_pid_function(func);
214 func = ftrace_pid_func;
215 }
216#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
217 ftrace_trace_function = func;
218#else
219 __ftrace_trace_function = func;
220#endif
221 }
158 } 222 }
159 223
160 out: 224 out:
@@ -163,6 +227,36 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
163 return ret; 227 return ret;
164} 228}
165 229
230static void ftrace_update_pid_func(void)
231{
232 ftrace_func_t func;
233
234 /* should not be called from interrupt context */
235 spin_lock(&ftrace_lock);
236
237 if (ftrace_trace_function == ftrace_stub)
238 goto out;
239
240 func = ftrace_trace_function;
241
242 if (ftrace_pid_trace >= 0) {
243 set_ftrace_pid_function(func);
244 func = ftrace_pid_func;
245 } else {
246 if (func == ftrace_pid_func)
247 func = ftrace_pid_function;
248 }
249
250#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
251 ftrace_trace_function = func;
252#else
253 __ftrace_trace_function = func;
254#endif
255
256 out:
257 spin_unlock(&ftrace_lock);
258}
259
166#ifdef CONFIG_DYNAMIC_FTRACE 260#ifdef CONFIG_DYNAMIC_FTRACE
167#ifndef CONFIG_FTRACE_MCOUNT_RECORD 261#ifndef CONFIG_FTRACE_MCOUNT_RECORD
168# error Dynamic ftrace depends on MCOUNT_RECORD 262# error Dynamic ftrace depends on MCOUNT_RECORD
@@ -182,6 +276,8 @@ enum {
182 FTRACE_UPDATE_TRACE_FUNC = (1 << 2), 276 FTRACE_UPDATE_TRACE_FUNC = (1 << 2),
183 FTRACE_ENABLE_MCOUNT = (1 << 3), 277 FTRACE_ENABLE_MCOUNT = (1 << 3),
184 FTRACE_DISABLE_MCOUNT = (1 << 4), 278 FTRACE_DISABLE_MCOUNT = (1 << 4),
279 FTRACE_START_FUNC_RET = (1 << 5),
280 FTRACE_STOP_FUNC_RET = (1 << 6),
185}; 281};
186 282
187static int ftrace_filtered; 283static int ftrace_filtered;
@@ -308,7 +404,7 @@ ftrace_record_ip(unsigned long ip)
308{ 404{
309 struct dyn_ftrace *rec; 405 struct dyn_ftrace *rec;
310 406
311 if (!ftrace_enabled || ftrace_disabled) 407 if (ftrace_disabled)
312 return NULL; 408 return NULL;
313 409
314 rec = ftrace_alloc_dyn_node(ip); 410 rec = ftrace_alloc_dyn_node(ip);
@@ -322,14 +418,51 @@ ftrace_record_ip(unsigned long ip)
322 return rec; 418 return rec;
323} 419}
324 420
325#define FTRACE_ADDR ((long)(ftrace_caller)) 421static void print_ip_ins(const char *fmt, unsigned char *p)
422{
423 int i;
424
425 printk(KERN_CONT "%s", fmt);
426
427 for (i = 0; i < MCOUNT_INSN_SIZE; i++)
428 printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]);
429}
430
431static void ftrace_bug(int failed, unsigned long ip)
432{
433 switch (failed) {
434 case -EFAULT:
435 FTRACE_WARN_ON_ONCE(1);
436 pr_info("ftrace faulted on modifying ");
437 print_ip_sym(ip);
438 break;
439 case -EINVAL:
440 FTRACE_WARN_ON_ONCE(1);
441 pr_info("ftrace failed to modify ");
442 print_ip_sym(ip);
443 print_ip_ins(" actual: ", (unsigned char *)ip);
444 printk(KERN_CONT "\n");
445 break;
446 case -EPERM:
447 FTRACE_WARN_ON_ONCE(1);
448 pr_info("ftrace faulted on writing ");
449 print_ip_sym(ip);
450 break;
451 default:
452 FTRACE_WARN_ON_ONCE(1);
453 pr_info("ftrace faulted on unknown error ");
454 print_ip_sym(ip);
455 }
456}
457
326 458
327static int 459static int
328__ftrace_replace_code(struct dyn_ftrace *rec, 460__ftrace_replace_code(struct dyn_ftrace *rec, int enable)
329 unsigned char *nop, int enable)
330{ 461{
331 unsigned long ip, fl; 462 unsigned long ip, fl;
332 unsigned char *call, *old, *new; 463 unsigned long ftrace_addr;
464
465 ftrace_addr = (unsigned long)ftrace_caller;
333 466
334 ip = rec->ip; 467 ip = rec->ip;
335 468
@@ -388,34 +521,28 @@ __ftrace_replace_code(struct dyn_ftrace *rec,
388 } 521 }
389 } 522 }
390 523
391 call = ftrace_call_replace(ip, FTRACE_ADDR); 524 if (rec->flags & FTRACE_FL_ENABLED)
392 525 return ftrace_make_call(rec, ftrace_addr);
393 if (rec->flags & FTRACE_FL_ENABLED) { 526 else
394 old = nop; 527 return ftrace_make_nop(NULL, rec, ftrace_addr);
395 new = call;
396 } else {
397 old = call;
398 new = nop;
399 }
400
401 return ftrace_modify_code(ip, old, new);
402} 528}
403 529
404static void ftrace_replace_code(int enable) 530static void ftrace_replace_code(int enable)
405{ 531{
406 int i, failed; 532 int i, failed;
407 unsigned char *nop = NULL;
408 struct dyn_ftrace *rec; 533 struct dyn_ftrace *rec;
409 struct ftrace_page *pg; 534 struct ftrace_page *pg;
410 535
411 nop = ftrace_nop_replace();
412
413 for (pg = ftrace_pages_start; pg; pg = pg->next) { 536 for (pg = ftrace_pages_start; pg; pg = pg->next) {
414 for (i = 0; i < pg->index; i++) { 537 for (i = 0; i < pg->index; i++) {
415 rec = &pg->records[i]; 538 rec = &pg->records[i];
416 539
417 /* don't modify code that has already faulted */ 540 /*
418 if (rec->flags & FTRACE_FL_FAILED) 541 * Skip over free records and records that have
542 * failed.
543 */
544 if (rec->flags & FTRACE_FL_FREE ||
545 rec->flags & FTRACE_FL_FAILED)
419 continue; 546 continue;
420 547
421 /* ignore updates to this record's mcount site */ 548 /* ignore updates to this record's mcount site */
@@ -426,68 +553,30 @@ static void ftrace_replace_code(int enable)
426 unfreeze_record(rec); 553 unfreeze_record(rec);
427 } 554 }
428 555
429 failed = __ftrace_replace_code(rec, nop, enable); 556 failed = __ftrace_replace_code(rec, enable);
430 if (failed && (rec->flags & FTRACE_FL_CONVERTED)) { 557 if (failed && (rec->flags & FTRACE_FL_CONVERTED)) {
431 rec->flags |= FTRACE_FL_FAILED; 558 rec->flags |= FTRACE_FL_FAILED;
432 if ((system_state == SYSTEM_BOOTING) || 559 if ((system_state == SYSTEM_BOOTING) ||
433 !core_kernel_text(rec->ip)) { 560 !core_kernel_text(rec->ip)) {
434 ftrace_free_rec(rec); 561 ftrace_free_rec(rec);
435 } 562 } else
563 ftrace_bug(failed, rec->ip);
436 } 564 }
437 } 565 }
438 } 566 }
439} 567}
440 568
441static void print_ip_ins(const char *fmt, unsigned char *p)
442{
443 int i;
444
445 printk(KERN_CONT "%s", fmt);
446
447 for (i = 0; i < MCOUNT_INSN_SIZE; i++)
448 printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]);
449}
450
451static int 569static int
452ftrace_code_disable(struct dyn_ftrace *rec) 570ftrace_code_disable(struct module *mod, struct dyn_ftrace *rec)
453{ 571{
454 unsigned long ip; 572 unsigned long ip;
455 unsigned char *nop, *call;
456 int ret; 573 int ret;
457 574
458 ip = rec->ip; 575 ip = rec->ip;
459 576
460 nop = ftrace_nop_replace(); 577 ret = ftrace_make_nop(mod, rec, mcount_addr);
461 call = ftrace_call_replace(ip, mcount_addr);
462
463 ret = ftrace_modify_code(ip, call, nop);
464 if (ret) { 578 if (ret) {
465 switch (ret) { 579 ftrace_bug(ret, ip);
466 case -EFAULT:
467 FTRACE_WARN_ON_ONCE(1);
468 pr_info("ftrace faulted on modifying ");
469 print_ip_sym(ip);
470 break;
471 case -EINVAL:
472 FTRACE_WARN_ON_ONCE(1);
473 pr_info("ftrace failed to modify ");
474 print_ip_sym(ip);
475 print_ip_ins(" expected: ", call);
476 print_ip_ins(" actual: ", (unsigned char *)ip);
477 print_ip_ins(" replace: ", nop);
478 printk(KERN_CONT "\n");
479 break;
480 case -EPERM:
481 FTRACE_WARN_ON_ONCE(1);
482 pr_info("ftrace faulted on writing ");
483 print_ip_sym(ip);
484 break;
485 default:
486 FTRACE_WARN_ON_ONCE(1);
487 pr_info("ftrace faulted on unknown error ");
488 print_ip_sym(ip);
489 }
490
491 rec->flags |= FTRACE_FL_FAILED; 580 rec->flags |= FTRACE_FL_FAILED;
492 return 0; 581 return 0;
493 } 582 }
@@ -506,6 +595,11 @@ static int __ftrace_modify_code(void *data)
506 if (*command & FTRACE_UPDATE_TRACE_FUNC) 595 if (*command & FTRACE_UPDATE_TRACE_FUNC)
507 ftrace_update_ftrace_func(ftrace_trace_function); 596 ftrace_update_ftrace_func(ftrace_trace_function);
508 597
598 if (*command & FTRACE_START_FUNC_RET)
599 ftrace_enable_ftrace_graph_caller();
600 else if (*command & FTRACE_STOP_FUNC_RET)
601 ftrace_disable_ftrace_graph_caller();
602
509 return 0; 603 return 0;
510} 604}
511 605
@@ -515,43 +609,43 @@ static void ftrace_run_update_code(int command)
515} 609}
516 610
517static ftrace_func_t saved_ftrace_func; 611static ftrace_func_t saved_ftrace_func;
518static int ftrace_start; 612static int ftrace_start_up;
519static DEFINE_MUTEX(ftrace_start_lock);
520 613
521static void ftrace_startup(void) 614static void ftrace_startup_enable(int command)
522{ 615{
523 int command = 0;
524
525 if (unlikely(ftrace_disabled))
526 return;
527
528 mutex_lock(&ftrace_start_lock);
529 ftrace_start++;
530 command |= FTRACE_ENABLE_CALLS;
531
532 if (saved_ftrace_func != ftrace_trace_function) { 616 if (saved_ftrace_func != ftrace_trace_function) {
533 saved_ftrace_func = ftrace_trace_function; 617 saved_ftrace_func = ftrace_trace_function;
534 command |= FTRACE_UPDATE_TRACE_FUNC; 618 command |= FTRACE_UPDATE_TRACE_FUNC;
535 } 619 }
536 620
537 if (!command || !ftrace_enabled) 621 if (!command || !ftrace_enabled)
538 goto out; 622 return;
539 623
540 ftrace_run_update_code(command); 624 ftrace_run_update_code(command);
541 out:
542 mutex_unlock(&ftrace_start_lock);
543} 625}
544 626
545static void ftrace_shutdown(void) 627static void ftrace_startup(int command)
546{ 628{
547 int command = 0; 629 if (unlikely(ftrace_disabled))
630 return;
631
632 mutex_lock(&ftrace_start_lock);
633 ftrace_start_up++;
634 command |= FTRACE_ENABLE_CALLS;
635
636 ftrace_startup_enable(command);
637
638 mutex_unlock(&ftrace_start_lock);
639}
548 640
641static void ftrace_shutdown(int command)
642{
549 if (unlikely(ftrace_disabled)) 643 if (unlikely(ftrace_disabled))
550 return; 644 return;
551 645
552 mutex_lock(&ftrace_start_lock); 646 mutex_lock(&ftrace_start_lock);
553 ftrace_start--; 647 ftrace_start_up--;
554 if (!ftrace_start) 648 if (!ftrace_start_up)
555 command |= FTRACE_DISABLE_CALLS; 649 command |= FTRACE_DISABLE_CALLS;
556 650
557 if (saved_ftrace_func != ftrace_trace_function) { 651 if (saved_ftrace_func != ftrace_trace_function) {
@@ -577,8 +671,8 @@ static void ftrace_startup_sysctl(void)
577 mutex_lock(&ftrace_start_lock); 671 mutex_lock(&ftrace_start_lock);
578 /* Force update next time */ 672 /* Force update next time */
579 saved_ftrace_func = NULL; 673 saved_ftrace_func = NULL;
580 /* ftrace_start is true if we want ftrace running */ 674 /* ftrace_start_up is true if we want ftrace running */
581 if (ftrace_start) 675 if (ftrace_start_up)
582 command |= FTRACE_ENABLE_CALLS; 676 command |= FTRACE_ENABLE_CALLS;
583 677
584 ftrace_run_update_code(command); 678 ftrace_run_update_code(command);
@@ -593,8 +687,8 @@ static void ftrace_shutdown_sysctl(void)
593 return; 687 return;
594 688
595 mutex_lock(&ftrace_start_lock); 689 mutex_lock(&ftrace_start_lock);
596 /* ftrace_start is true if ftrace is running */ 690 /* ftrace_start_up is true if ftrace is running */
597 if (ftrace_start) 691 if (ftrace_start_up)
598 command |= FTRACE_DISABLE_CALLS; 692 command |= FTRACE_DISABLE_CALLS;
599 693
600 ftrace_run_update_code(command); 694 ftrace_run_update_code(command);
@@ -605,7 +699,7 @@ static cycle_t ftrace_update_time;
605static unsigned long ftrace_update_cnt; 699static unsigned long ftrace_update_cnt;
606unsigned long ftrace_update_tot_cnt; 700unsigned long ftrace_update_tot_cnt;
607 701
608static int ftrace_update_code(void) 702static int ftrace_update_code(struct module *mod)
609{ 703{
610 struct dyn_ftrace *p, *t; 704 struct dyn_ftrace *p, *t;
611 cycle_t start, stop; 705 cycle_t start, stop;
@@ -622,7 +716,7 @@ static int ftrace_update_code(void)
622 list_del_init(&p->list); 716 list_del_init(&p->list);
623 717
624 /* convert record (i.e, patch mcount-call with NOP) */ 718 /* convert record (i.e, patch mcount-call with NOP) */
625 if (ftrace_code_disable(p)) { 719 if (ftrace_code_disable(mod, p)) {
626 p->flags |= FTRACE_FL_CONVERTED; 720 p->flags |= FTRACE_FL_CONVERTED;
627 ftrace_update_cnt++; 721 ftrace_update_cnt++;
628 } else 722 } else
@@ -690,7 +784,6 @@ enum {
690#define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */ 784#define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */
691 785
692struct ftrace_iterator { 786struct ftrace_iterator {
693 loff_t pos;
694 struct ftrace_page *pg; 787 struct ftrace_page *pg;
695 unsigned idx; 788 unsigned idx;
696 unsigned flags; 789 unsigned flags;
@@ -715,6 +808,8 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
715 iter->pg = iter->pg->next; 808 iter->pg = iter->pg->next;
716 iter->idx = 0; 809 iter->idx = 0;
717 goto retry; 810 goto retry;
811 } else {
812 iter->idx = -1;
718 } 813 }
719 } else { 814 } else {
720 rec = &iter->pg->records[iter->idx++]; 815 rec = &iter->pg->records[iter->idx++];
@@ -737,8 +832,6 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
737 } 832 }
738 spin_unlock(&ftrace_lock); 833 spin_unlock(&ftrace_lock);
739 834
740 iter->pos = *pos;
741
742 return rec; 835 return rec;
743} 836}
744 837
@@ -746,13 +839,15 @@ static void *t_start(struct seq_file *m, loff_t *pos)
746{ 839{
747 struct ftrace_iterator *iter = m->private; 840 struct ftrace_iterator *iter = m->private;
748 void *p = NULL; 841 void *p = NULL;
749 loff_t l = -1;
750 842
751 if (*pos > iter->pos) 843 if (*pos > 0) {
752 *pos = iter->pos; 844 if (iter->idx < 0)
845 return p;
846 (*pos)--;
847 iter->idx--;
848 }
753 849
754 l = *pos; 850 p = t_next(m, p, pos);
755 p = t_next(m, p, &l);
756 851
757 return p; 852 return p;
758} 853}
@@ -763,21 +858,15 @@ static void t_stop(struct seq_file *m, void *p)
763 858
764static int t_show(struct seq_file *m, void *v) 859static int t_show(struct seq_file *m, void *v)
765{ 860{
766 struct ftrace_iterator *iter = m->private;
767 struct dyn_ftrace *rec = v; 861 struct dyn_ftrace *rec = v;
768 char str[KSYM_SYMBOL_LEN]; 862 char str[KSYM_SYMBOL_LEN];
769 int ret = 0;
770 863
771 if (!rec) 864 if (!rec)
772 return 0; 865 return 0;
773 866
774 kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); 867 kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
775 868
776 ret = seq_printf(m, "%s\n", str); 869 seq_printf(m, "%s\n", str);
777 if (ret < 0) {
778 iter->pos--;
779 iter->idx--;
780 }
781 870
782 return 0; 871 return 0;
783} 872}
@@ -803,7 +892,6 @@ ftrace_avail_open(struct inode *inode, struct file *file)
803 return -ENOMEM; 892 return -ENOMEM;
804 893
805 iter->pg = ftrace_pages_start; 894 iter->pg = ftrace_pages_start;
806 iter->pos = 0;
807 895
808 ret = seq_open(file, &show_ftrace_seq_ops); 896 ret = seq_open(file, &show_ftrace_seq_ops);
809 if (!ret) { 897 if (!ret) {
@@ -890,7 +978,6 @@ ftrace_regex_open(struct inode *inode, struct file *file, int enable)
890 978
891 if (file->f_mode & FMODE_READ) { 979 if (file->f_mode & FMODE_READ) {
892 iter->pg = ftrace_pages_start; 980 iter->pg = ftrace_pages_start;
893 iter->pos = 0;
894 iter->flags = enable ? FTRACE_ITER_FILTER : 981 iter->flags = enable ? FTRACE_ITER_FILTER :
895 FTRACE_ITER_NOTRACE; 982 FTRACE_ITER_NOTRACE;
896 983
@@ -1181,7 +1268,7 @@ ftrace_regex_release(struct inode *inode, struct file *file, int enable)
1181 1268
1182 mutex_lock(&ftrace_sysctl_lock); 1269 mutex_lock(&ftrace_sysctl_lock);
1183 mutex_lock(&ftrace_start_lock); 1270 mutex_lock(&ftrace_start_lock);
1184 if (ftrace_start && ftrace_enabled) 1271 if (ftrace_start_up && ftrace_enabled)
1185 ftrace_run_update_code(FTRACE_ENABLE_CALLS); 1272 ftrace_run_update_code(FTRACE_ENABLE_CALLS);
1186 mutex_unlock(&ftrace_start_lock); 1273 mutex_unlock(&ftrace_start_lock);
1187 mutex_unlock(&ftrace_sysctl_lock); 1274 mutex_unlock(&ftrace_sysctl_lock);
@@ -1233,13 +1320,10 @@ static struct file_operations ftrace_notrace_fops = {
1233 .release = ftrace_notrace_release, 1320 .release = ftrace_notrace_release,
1234}; 1321};
1235 1322
1236static __init int ftrace_init_debugfs(void) 1323static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer)
1237{ 1324{
1238 struct dentry *d_tracer;
1239 struct dentry *entry; 1325 struct dentry *entry;
1240 1326
1241 d_tracer = tracing_init_dentry();
1242
1243 entry = debugfs_create_file("available_filter_functions", 0444, 1327 entry = debugfs_create_file("available_filter_functions", 0444,
1244 d_tracer, NULL, &ftrace_avail_fops); 1328 d_tracer, NULL, &ftrace_avail_fops);
1245 if (!entry) 1329 if (!entry)
@@ -1266,9 +1350,8 @@ static __init int ftrace_init_debugfs(void)
1266 return 0; 1350 return 0;
1267} 1351}
1268 1352
1269fs_initcall(ftrace_init_debugfs); 1353static int ftrace_convert_nops(struct module *mod,
1270 1354 unsigned long *start,
1271static int ftrace_convert_nops(unsigned long *start,
1272 unsigned long *end) 1355 unsigned long *end)
1273{ 1356{
1274 unsigned long *p; 1357 unsigned long *p;
@@ -1279,23 +1362,32 @@ static int ftrace_convert_nops(unsigned long *start,
1279 p = start; 1362 p = start;
1280 while (p < end) { 1363 while (p < end) {
1281 addr = ftrace_call_adjust(*p++); 1364 addr = ftrace_call_adjust(*p++);
1365 /*
1366 * Some architecture linkers will pad between
1367 * the different mcount_loc sections of different
1368 * object files to satisfy alignments.
1369 * Skip any NULL pointers.
1370 */
1371 if (!addr)
1372 continue;
1282 ftrace_record_ip(addr); 1373 ftrace_record_ip(addr);
1283 } 1374 }
1284 1375
1285 /* disable interrupts to prevent kstop machine */ 1376 /* disable interrupts to prevent kstop machine */
1286 local_irq_save(flags); 1377 local_irq_save(flags);
1287 ftrace_update_code(); 1378 ftrace_update_code(mod);
1288 local_irq_restore(flags); 1379 local_irq_restore(flags);
1289 mutex_unlock(&ftrace_start_lock); 1380 mutex_unlock(&ftrace_start_lock);
1290 1381
1291 return 0; 1382 return 0;
1292} 1383}
1293 1384
1294void ftrace_init_module(unsigned long *start, unsigned long *end) 1385void ftrace_init_module(struct module *mod,
1386 unsigned long *start, unsigned long *end)
1295{ 1387{
1296 if (ftrace_disabled || start == end) 1388 if (ftrace_disabled || start == end)
1297 return; 1389 return;
1298 ftrace_convert_nops(start, end); 1390 ftrace_convert_nops(mod, start, end);
1299} 1391}
1300 1392
1301extern unsigned long __start_mcount_loc[]; 1393extern unsigned long __start_mcount_loc[];
@@ -1325,7 +1417,8 @@ void __init ftrace_init(void)
1325 1417
1326 last_ftrace_enabled = ftrace_enabled = 1; 1418 last_ftrace_enabled = ftrace_enabled = 1;
1327 1419
1328 ret = ftrace_convert_nops(__start_mcount_loc, 1420 ret = ftrace_convert_nops(NULL,
1421 __start_mcount_loc,
1329 __stop_mcount_loc); 1422 __stop_mcount_loc);
1330 1423
1331 return; 1424 return;
@@ -1342,12 +1435,101 @@ static int __init ftrace_nodyn_init(void)
1342} 1435}
1343device_initcall(ftrace_nodyn_init); 1436device_initcall(ftrace_nodyn_init);
1344 1437
1345# define ftrace_startup() do { } while (0) 1438static inline int ftrace_init_dyn_debugfs(struct dentry *d_tracer) { return 0; }
1346# define ftrace_shutdown() do { } while (0) 1439static inline void ftrace_startup_enable(int command) { }
1440/* Keep as macros so we do not need to define the commands */
1441# define ftrace_startup(command) do { } while (0)
1442# define ftrace_shutdown(command) do { } while (0)
1347# define ftrace_startup_sysctl() do { } while (0) 1443# define ftrace_startup_sysctl() do { } while (0)
1348# define ftrace_shutdown_sysctl() do { } while (0) 1444# define ftrace_shutdown_sysctl() do { } while (0)
1349#endif /* CONFIG_DYNAMIC_FTRACE */ 1445#endif /* CONFIG_DYNAMIC_FTRACE */
1350 1446
1447static ssize_t
1448ftrace_pid_read(struct file *file, char __user *ubuf,
1449 size_t cnt, loff_t *ppos)
1450{
1451 char buf[64];
1452 int r;
1453
1454 if (ftrace_pid_trace >= 0)
1455 r = sprintf(buf, "%u\n", ftrace_pid_trace);
1456 else
1457 r = sprintf(buf, "no pid\n");
1458
1459 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
1460}
1461
1462static ssize_t
1463ftrace_pid_write(struct file *filp, const char __user *ubuf,
1464 size_t cnt, loff_t *ppos)
1465{
1466 char buf[64];
1467 long val;
1468 int ret;
1469
1470 if (cnt >= sizeof(buf))
1471 return -EINVAL;
1472
1473 if (copy_from_user(&buf, ubuf, cnt))
1474 return -EFAULT;
1475
1476 buf[cnt] = 0;
1477
1478 ret = strict_strtol(buf, 10, &val);
1479 if (ret < 0)
1480 return ret;
1481
1482 mutex_lock(&ftrace_start_lock);
1483 if (ret < 0) {
1484 /* disable pid tracing */
1485 if (ftrace_pid_trace < 0)
1486 goto out;
1487 ftrace_pid_trace = -1;
1488
1489 } else {
1490
1491 if (ftrace_pid_trace == val)
1492 goto out;
1493
1494 ftrace_pid_trace = val;
1495 }
1496
1497 /* update the function call */
1498 ftrace_update_pid_func();
1499 ftrace_startup_enable(0);
1500
1501 out:
1502 mutex_unlock(&ftrace_start_lock);
1503
1504 return cnt;
1505}
1506
1507static struct file_operations ftrace_pid_fops = {
1508 .read = ftrace_pid_read,
1509 .write = ftrace_pid_write,
1510};
1511
1512static __init int ftrace_init_debugfs(void)
1513{
1514 struct dentry *d_tracer;
1515 struct dentry *entry;
1516
1517 d_tracer = tracing_init_dentry();
1518 if (!d_tracer)
1519 return 0;
1520
1521 ftrace_init_dyn_debugfs(d_tracer);
1522
1523 entry = debugfs_create_file("set_ftrace_pid", 0644, d_tracer,
1524 NULL, &ftrace_pid_fops);
1525 if (!entry)
1526 pr_warning("Could not create debugfs "
1527 "'set_ftrace_pid' entry\n");
1528 return 0;
1529}
1530
1531fs_initcall(ftrace_init_debugfs);
1532
1351/** 1533/**
1352 * ftrace_kill - kill ftrace 1534 * ftrace_kill - kill ftrace
1353 * 1535 *
@@ -1381,10 +1563,11 @@ int register_ftrace_function(struct ftrace_ops *ops)
1381 return -1; 1563 return -1;
1382 1564
1383 mutex_lock(&ftrace_sysctl_lock); 1565 mutex_lock(&ftrace_sysctl_lock);
1566
1384 ret = __register_ftrace_function(ops); 1567 ret = __register_ftrace_function(ops);
1385 ftrace_startup(); 1568 ftrace_startup(0);
1386 mutex_unlock(&ftrace_sysctl_lock);
1387 1569
1570 mutex_unlock(&ftrace_sysctl_lock);
1388 return ret; 1571 return ret;
1389} 1572}
1390 1573
@@ -1400,7 +1583,7 @@ int unregister_ftrace_function(struct ftrace_ops *ops)
1400 1583
1401 mutex_lock(&ftrace_sysctl_lock); 1584 mutex_lock(&ftrace_sysctl_lock);
1402 ret = __unregister_ftrace_function(ops); 1585 ret = __unregister_ftrace_function(ops);
1403 ftrace_shutdown(); 1586 ftrace_shutdown(0);
1404 mutex_unlock(&ftrace_sysctl_lock); 1587 mutex_unlock(&ftrace_sysctl_lock);
1405 1588
1406 return ret; 1589 return ret;
@@ -1449,3 +1632,142 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
1449 return ret; 1632 return ret;
1450} 1633}
1451 1634
1635#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1636
1637static atomic_t ftrace_graph_active;
1638
1639/* The callbacks that hook a function */
1640trace_func_graph_ret_t ftrace_graph_return =
1641 (trace_func_graph_ret_t)ftrace_stub;
1642trace_func_graph_ent_t ftrace_graph_entry =
1643 (trace_func_graph_ent_t)ftrace_stub;
1644
1645/* Try to assign a return stack array on FTRACE_RETSTACK_ALLOC_SIZE tasks. */
1646static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list)
1647{
1648 int i;
1649 int ret = 0;
1650 unsigned long flags;
1651 int start = 0, end = FTRACE_RETSTACK_ALLOC_SIZE;
1652 struct task_struct *g, *t;
1653
1654 for (i = 0; i < FTRACE_RETSTACK_ALLOC_SIZE; i++) {
1655 ret_stack_list[i] = kmalloc(FTRACE_RETFUNC_DEPTH
1656 * sizeof(struct ftrace_ret_stack),
1657 GFP_KERNEL);
1658 if (!ret_stack_list[i]) {
1659 start = 0;
1660 end = i;
1661 ret = -ENOMEM;
1662 goto free;
1663 }
1664 }
1665
1666 read_lock_irqsave(&tasklist_lock, flags);
1667 do_each_thread(g, t) {
1668 if (start == end) {
1669 ret = -EAGAIN;
1670 goto unlock;
1671 }
1672
1673 if (t->ret_stack == NULL) {
1674 t->curr_ret_stack = -1;
1675 /* Make sure IRQs see the -1 first: */
1676 barrier();
1677 t->ret_stack = ret_stack_list[start++];
1678 atomic_set(&t->trace_overrun, 0);
1679 }
1680 } while_each_thread(g, t);
1681
1682unlock:
1683 read_unlock_irqrestore(&tasklist_lock, flags);
1684free:
1685 for (i = start; i < end; i++)
1686 kfree(ret_stack_list[i]);
1687 return ret;
1688}
1689
1690/* Allocate a return stack for each task */
1691static int start_graph_tracing(void)
1692{
1693 struct ftrace_ret_stack **ret_stack_list;
1694 int ret;
1695
1696 ret_stack_list = kmalloc(FTRACE_RETSTACK_ALLOC_SIZE *
1697 sizeof(struct ftrace_ret_stack *),
1698 GFP_KERNEL);
1699
1700 if (!ret_stack_list)
1701 return -ENOMEM;
1702
1703 do {
1704 ret = alloc_retstack_tasklist(ret_stack_list);
1705 } while (ret == -EAGAIN);
1706
1707 kfree(ret_stack_list);
1708 return ret;
1709}
1710
1711int register_ftrace_graph(trace_func_graph_ret_t retfunc,
1712 trace_func_graph_ent_t entryfunc)
1713{
1714 int ret = 0;
1715
1716 mutex_lock(&ftrace_sysctl_lock);
1717
1718 atomic_inc(&ftrace_graph_active);
1719 ret = start_graph_tracing();
1720 if (ret) {
1721 atomic_dec(&ftrace_graph_active);
1722 goto out;
1723 }
1724
1725 ftrace_graph_return = retfunc;
1726 ftrace_graph_entry = entryfunc;
1727
1728 ftrace_startup(FTRACE_START_FUNC_RET);
1729
1730out:
1731 mutex_unlock(&ftrace_sysctl_lock);
1732 return ret;
1733}
1734
1735void unregister_ftrace_graph(void)
1736{
1737 mutex_lock(&ftrace_sysctl_lock);
1738
1739 atomic_dec(&ftrace_graph_active);
1740 ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub;
1741 ftrace_graph_entry = (trace_func_graph_ent_t)ftrace_stub;
1742 ftrace_shutdown(FTRACE_STOP_FUNC_RET);
1743
1744 mutex_unlock(&ftrace_sysctl_lock);
1745}
1746
1747/* Allocate a return stack for newly created task */
1748void ftrace_graph_init_task(struct task_struct *t)
1749{
1750 if (atomic_read(&ftrace_graph_active)) {
1751 t->ret_stack = kmalloc(FTRACE_RETFUNC_DEPTH
1752 * sizeof(struct ftrace_ret_stack),
1753 GFP_KERNEL);
1754 if (!t->ret_stack)
1755 return;
1756 t->curr_ret_stack = -1;
1757 atomic_set(&t->trace_overrun, 0);
1758 } else
1759 t->ret_stack = NULL;
1760}
1761
1762void ftrace_graph_exit_task(struct task_struct *t)
1763{
1764 struct ftrace_ret_stack *ret_stack = t->ret_stack;
1765
1766 t->ret_stack = NULL;
1767 /* NULL must become visible to IRQs before we free it: */
1768 barrier();
1769
1770 kfree(ret_stack);
1771}
1772#endif
1773
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 668bbb5ef2bd..e206951603c1 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -18,8 +18,46 @@
18 18
19#include "trace.h" 19#include "trace.h"
20 20
21/* Global flag to disable all recording to ring buffers */ 21/*
22static int ring_buffers_off __read_mostly; 22 * A fast way to enable or disable all ring buffers is to
23 * call tracing_on or tracing_off. Turning off the ring buffers
24 * prevents all ring buffers from being recorded to.
25 * Turning this switch on, makes it OK to write to the
26 * ring buffer, if the ring buffer is enabled itself.
27 *
28 * There's three layers that must be on in order to write
29 * to the ring buffer.
30 *
31 * 1) This global flag must be set.
32 * 2) The ring buffer must be enabled for recording.
33 * 3) The per cpu buffer must be enabled for recording.
34 *
35 * In case of an anomaly, this global flag has a bit set that
36 * will permantly disable all ring buffers.
37 */
38
39/*
40 * Global flag to disable all recording to ring buffers
41 * This has two bits: ON, DISABLED
42 *
43 * ON DISABLED
44 * ---- ----------
45 * 0 0 : ring buffers are off
46 * 1 0 : ring buffers are on
47 * X 1 : ring buffers are permanently disabled
48 */
49
50enum {
51 RB_BUFFERS_ON_BIT = 0,
52 RB_BUFFERS_DISABLED_BIT = 1,
53};
54
55enum {
56 RB_BUFFERS_ON = 1 << RB_BUFFERS_ON_BIT,
57 RB_BUFFERS_DISABLED = 1 << RB_BUFFERS_DISABLED_BIT,
58};
59
60static long ring_buffer_flags __read_mostly = RB_BUFFERS_ON;
23 61
24/** 62/**
25 * tracing_on - enable all tracing buffers 63 * tracing_on - enable all tracing buffers
@@ -29,7 +67,7 @@ static int ring_buffers_off __read_mostly;
29 */ 67 */
30void tracing_on(void) 68void tracing_on(void)
31{ 69{
32 ring_buffers_off = 0; 70 set_bit(RB_BUFFERS_ON_BIT, &ring_buffer_flags);
33} 71}
34 72
35/** 73/**
@@ -42,9 +80,22 @@ void tracing_on(void)
42 */ 80 */
43void tracing_off(void) 81void tracing_off(void)
44{ 82{
45 ring_buffers_off = 1; 83 clear_bit(RB_BUFFERS_ON_BIT, &ring_buffer_flags);
84}
85
86/**
87 * tracing_off_permanent - permanently disable ring buffers
88 *
89 * This function, once called, will disable all ring buffers
90 * permanenty.
91 */
92void tracing_off_permanent(void)
93{
94 set_bit(RB_BUFFERS_DISABLED_BIT, &ring_buffer_flags);
46} 95}
47 96
97#include "trace.h"
98
48/* Up this if you want to test the TIME_EXTENTS and normalization */ 99/* Up this if you want to test the TIME_EXTENTS and normalization */
49#define DEBUG_SHIFT 0 100#define DEBUG_SHIFT 0
50 101
@@ -187,7 +238,8 @@ static inline int test_time_stamp(u64 delta)
187struct ring_buffer_per_cpu { 238struct ring_buffer_per_cpu {
188 int cpu; 239 int cpu;
189 struct ring_buffer *buffer; 240 struct ring_buffer *buffer;
190 spinlock_t lock; 241 spinlock_t reader_lock; /* serialize readers */
242 raw_spinlock_t lock;
191 struct lock_class_key lock_key; 243 struct lock_class_key lock_key;
192 struct list_head pages; 244 struct list_head pages;
193 struct buffer_page *head_page; /* read from head */ 245 struct buffer_page *head_page; /* read from head */
@@ -221,32 +273,16 @@ struct ring_buffer_iter {
221 u64 read_stamp; 273 u64 read_stamp;
222}; 274};
223 275
276/* buffer may be either ring_buffer or ring_buffer_per_cpu */
224#define RB_WARN_ON(buffer, cond) \ 277#define RB_WARN_ON(buffer, cond) \
225 do { \ 278 ({ \
226 if (unlikely(cond)) { \ 279 int _____ret = unlikely(cond); \
227 atomic_inc(&buffer->record_disabled); \ 280 if (_____ret) { \
228 WARN_ON(1); \
229 } \
230 } while (0)
231
232#define RB_WARN_ON_RET(buffer, cond) \
233 do { \
234 if (unlikely(cond)) { \
235 atomic_inc(&buffer->record_disabled); \
236 WARN_ON(1); \
237 return -1; \
238 } \
239 } while (0)
240
241#define RB_WARN_ON_ONCE(buffer, cond) \
242 do { \
243 static int once; \
244 if (unlikely(cond) && !once) { \
245 once++; \
246 atomic_inc(&buffer->record_disabled); \ 281 atomic_inc(&buffer->record_disabled); \
247 WARN_ON(1); \ 282 WARN_ON(1); \
248 } \ 283 } \
249 } while (0) 284 _____ret; \
285 })
250 286
251/** 287/**
252 * check_pages - integrity check of buffer pages 288 * check_pages - integrity check of buffer pages
@@ -260,14 +296,18 @@ static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
260 struct list_head *head = &cpu_buffer->pages; 296 struct list_head *head = &cpu_buffer->pages;
261 struct buffer_page *page, *tmp; 297 struct buffer_page *page, *tmp;
262 298
263 RB_WARN_ON_RET(cpu_buffer, head->next->prev != head); 299 if (RB_WARN_ON(cpu_buffer, head->next->prev != head))
264 RB_WARN_ON_RET(cpu_buffer, head->prev->next != head); 300 return -1;
301 if (RB_WARN_ON(cpu_buffer, head->prev->next != head))
302 return -1;
265 303
266 list_for_each_entry_safe(page, tmp, head, list) { 304 list_for_each_entry_safe(page, tmp, head, list) {
267 RB_WARN_ON_RET(cpu_buffer, 305 if (RB_WARN_ON(cpu_buffer,
268 page->list.next->prev != &page->list); 306 page->list.next->prev != &page->list))
269 RB_WARN_ON_RET(cpu_buffer, 307 return -1;
270 page->list.prev->next != &page->list); 308 if (RB_WARN_ON(cpu_buffer,
309 page->list.prev->next != &page->list))
310 return -1;
271 } 311 }
272 312
273 return 0; 313 return 0;
@@ -324,7 +364,8 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
324 364
325 cpu_buffer->cpu = cpu; 365 cpu_buffer->cpu = cpu;
326 cpu_buffer->buffer = buffer; 366 cpu_buffer->buffer = buffer;
327 spin_lock_init(&cpu_buffer->lock); 367 spin_lock_init(&cpu_buffer->reader_lock);
368 cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
328 INIT_LIST_HEAD(&cpu_buffer->pages); 369 INIT_LIST_HEAD(&cpu_buffer->pages);
329 370
330 page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()), 371 page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()),
@@ -473,13 +514,15 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
473 synchronize_sched(); 514 synchronize_sched();
474 515
475 for (i = 0; i < nr_pages; i++) { 516 for (i = 0; i < nr_pages; i++) {
476 BUG_ON(list_empty(&cpu_buffer->pages)); 517 if (RB_WARN_ON(cpu_buffer, list_empty(&cpu_buffer->pages)))
518 return;
477 p = cpu_buffer->pages.next; 519 p = cpu_buffer->pages.next;
478 page = list_entry(p, struct buffer_page, list); 520 page = list_entry(p, struct buffer_page, list);
479 list_del_init(&page->list); 521 list_del_init(&page->list);
480 free_buffer_page(page); 522 free_buffer_page(page);
481 } 523 }
482 BUG_ON(list_empty(&cpu_buffer->pages)); 524 if (RB_WARN_ON(cpu_buffer, list_empty(&cpu_buffer->pages)))
525 return;
483 526
484 rb_reset_cpu(cpu_buffer); 527 rb_reset_cpu(cpu_buffer);
485 528
@@ -501,7 +544,8 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
501 synchronize_sched(); 544 synchronize_sched();
502 545
503 for (i = 0; i < nr_pages; i++) { 546 for (i = 0; i < nr_pages; i++) {
504 BUG_ON(list_empty(pages)); 547 if (RB_WARN_ON(cpu_buffer, list_empty(pages)))
548 return;
505 p = pages->next; 549 p = pages->next;
506 page = list_entry(p, struct buffer_page, list); 550 page = list_entry(p, struct buffer_page, list);
507 list_del_init(&page->list); 551 list_del_init(&page->list);
@@ -562,7 +606,10 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
562 if (size < buffer_size) { 606 if (size < buffer_size) {
563 607
564 /* easy case, just free pages */ 608 /* easy case, just free pages */
565 BUG_ON(nr_pages >= buffer->pages); 609 if (RB_WARN_ON(buffer, nr_pages >= buffer->pages)) {
610 mutex_unlock(&buffer->mutex);
611 return -1;
612 }
566 613
567 rm_pages = buffer->pages - nr_pages; 614 rm_pages = buffer->pages - nr_pages;
568 615
@@ -581,7 +628,11 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
581 * add these pages to the cpu_buffers. Otherwise we just free 628 * add these pages to the cpu_buffers. Otherwise we just free
582 * them all and return -ENOMEM; 629 * them all and return -ENOMEM;
583 */ 630 */
584 BUG_ON(nr_pages <= buffer->pages); 631 if (RB_WARN_ON(buffer, nr_pages <= buffer->pages)) {
632 mutex_unlock(&buffer->mutex);
633 return -1;
634 }
635
585 new_pages = nr_pages - buffer->pages; 636 new_pages = nr_pages - buffer->pages;
586 637
587 for_each_buffer_cpu(buffer, cpu) { 638 for_each_buffer_cpu(buffer, cpu) {
@@ -604,7 +655,10 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
604 rb_insert_pages(cpu_buffer, &pages, new_pages); 655 rb_insert_pages(cpu_buffer, &pages, new_pages);
605 } 656 }
606 657
607 BUG_ON(!list_empty(&pages)); 658 if (RB_WARN_ON(buffer, !list_empty(&pages))) {
659 mutex_unlock(&buffer->mutex);
660 return -1;
661 }
608 662
609 out: 663 out:
610 buffer->pages = nr_pages; 664 buffer->pages = nr_pages;
@@ -693,7 +747,8 @@ static void rb_update_overflow(struct ring_buffer_per_cpu *cpu_buffer)
693 head += rb_event_length(event)) { 747 head += rb_event_length(event)) {
694 748
695 event = __rb_page_index(cpu_buffer->head_page, head); 749 event = __rb_page_index(cpu_buffer->head_page, head);
696 BUG_ON(rb_null_event(event)); 750 if (RB_WARN_ON(cpu_buffer, rb_null_event(event)))
751 return;
697 /* Only count data entries */ 752 /* Only count data entries */
698 if (event->type != RINGBUF_TYPE_DATA) 753 if (event->type != RINGBUF_TYPE_DATA)
699 continue; 754 continue;
@@ -746,8 +801,9 @@ rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer,
746 addr &= PAGE_MASK; 801 addr &= PAGE_MASK;
747 802
748 while (cpu_buffer->commit_page->page != (void *)addr) { 803 while (cpu_buffer->commit_page->page != (void *)addr) {
749 RB_WARN_ON(cpu_buffer, 804 if (RB_WARN_ON(cpu_buffer,
750 cpu_buffer->commit_page == cpu_buffer->tail_page); 805 cpu_buffer->commit_page == cpu_buffer->tail_page))
806 return;
751 cpu_buffer->commit_page->commit = 807 cpu_buffer->commit_page->commit =
752 cpu_buffer->commit_page->write; 808 cpu_buffer->commit_page->write;
753 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page); 809 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
@@ -894,7 +950,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
894 if (write > BUF_PAGE_SIZE) { 950 if (write > BUF_PAGE_SIZE) {
895 struct buffer_page *next_page = tail_page; 951 struct buffer_page *next_page = tail_page;
896 952
897 spin_lock_irqsave(&cpu_buffer->lock, flags); 953 local_irq_save(flags);
954 __raw_spin_lock(&cpu_buffer->lock);
898 955
899 rb_inc_page(cpu_buffer, &next_page); 956 rb_inc_page(cpu_buffer, &next_page);
900 957
@@ -902,7 +959,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
902 reader_page = cpu_buffer->reader_page; 959 reader_page = cpu_buffer->reader_page;
903 960
904 /* we grabbed the lock before incrementing */ 961 /* we grabbed the lock before incrementing */
905 RB_WARN_ON(cpu_buffer, next_page == reader_page); 962 if (RB_WARN_ON(cpu_buffer, next_page == reader_page))
963 goto out_unlock;
906 964
907 /* 965 /*
908 * If for some reason, we had an interrupt storm that made 966 * If for some reason, we had an interrupt storm that made
@@ -970,7 +1028,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
970 rb_set_commit_to_write(cpu_buffer); 1028 rb_set_commit_to_write(cpu_buffer);
971 } 1029 }
972 1030
973 spin_unlock_irqrestore(&cpu_buffer->lock, flags); 1031 __raw_spin_unlock(&cpu_buffer->lock);
1032 local_irq_restore(flags);
974 1033
975 /* fail and let the caller try again */ 1034 /* fail and let the caller try again */
976 return ERR_PTR(-EAGAIN); 1035 return ERR_PTR(-EAGAIN);
@@ -978,7 +1037,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
978 1037
979 /* We reserved something on the buffer */ 1038 /* We reserved something on the buffer */
980 1039
981 BUG_ON(write > BUF_PAGE_SIZE); 1040 if (RB_WARN_ON(cpu_buffer, write > BUF_PAGE_SIZE))
1041 return NULL;
982 1042
983 event = __rb_page_index(tail_page, tail); 1043 event = __rb_page_index(tail_page, tail);
984 rb_update_event(event, type, length); 1044 rb_update_event(event, type, length);
@@ -993,7 +1053,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
993 return event; 1053 return event;
994 1054
995 out_unlock: 1055 out_unlock:
996 spin_unlock_irqrestore(&cpu_buffer->lock, flags); 1056 __raw_spin_unlock(&cpu_buffer->lock);
1057 local_irq_restore(flags);
997 return NULL; 1058 return NULL;
998} 1059}
999 1060
@@ -1076,10 +1137,8 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
1076 * storm or we have something buggy. 1137 * storm or we have something buggy.
1077 * Bail! 1138 * Bail!
1078 */ 1139 */
1079 if (unlikely(++nr_loops > 1000)) { 1140 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000))
1080 RB_WARN_ON(cpu_buffer, 1);
1081 return NULL; 1141 return NULL;
1082 }
1083 1142
1084 ts = ring_buffer_time_stamp(cpu_buffer->cpu); 1143 ts = ring_buffer_time_stamp(cpu_buffer->cpu);
1085 1144
@@ -1175,15 +1234,14 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer,
1175 struct ring_buffer_event *event; 1234 struct ring_buffer_event *event;
1176 int cpu, resched; 1235 int cpu, resched;
1177 1236
1178 if (ring_buffers_off) 1237 if (ring_buffer_flags != RB_BUFFERS_ON)
1179 return NULL; 1238 return NULL;
1180 1239
1181 if (atomic_read(&buffer->record_disabled)) 1240 if (atomic_read(&buffer->record_disabled))
1182 return NULL; 1241 return NULL;
1183 1242
1184 /* If we are tracing schedule, we don't want to recurse */ 1243 /* If we are tracing schedule, we don't want to recurse */
1185 resched = need_resched(); 1244 resched = ftrace_preempt_disable();
1186 preempt_disable_notrace();
1187 1245
1188 cpu = raw_smp_processor_id(); 1246 cpu = raw_smp_processor_id();
1189 1247
@@ -1214,10 +1272,7 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer,
1214 return event; 1272 return event;
1215 1273
1216 out: 1274 out:
1217 if (resched) 1275 ftrace_preempt_enable(resched);
1218 preempt_enable_no_resched_notrace();
1219 else
1220 preempt_enable_notrace();
1221 return NULL; 1276 return NULL;
1222} 1277}
1223 1278
@@ -1259,12 +1314,9 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer,
1259 /* 1314 /*
1260 * Only the last preempt count needs to restore preemption. 1315 * Only the last preempt count needs to restore preemption.
1261 */ 1316 */
1262 if (preempt_count() == 1) { 1317 if (preempt_count() == 1)
1263 if (per_cpu(rb_need_resched, cpu)) 1318 ftrace_preempt_enable(per_cpu(rb_need_resched, cpu));
1264 preempt_enable_no_resched_notrace(); 1319 else
1265 else
1266 preempt_enable_notrace();
1267 } else
1268 preempt_enable_no_resched_notrace(); 1320 preempt_enable_no_resched_notrace();
1269 1321
1270 return 0; 1322 return 0;
@@ -1294,14 +1346,13 @@ int ring_buffer_write(struct ring_buffer *buffer,
1294 int ret = -EBUSY; 1346 int ret = -EBUSY;
1295 int cpu, resched; 1347 int cpu, resched;
1296 1348
1297 if (ring_buffers_off) 1349 if (ring_buffer_flags != RB_BUFFERS_ON)
1298 return -EBUSY; 1350 return -EBUSY;
1299 1351
1300 if (atomic_read(&buffer->record_disabled)) 1352 if (atomic_read(&buffer->record_disabled))
1301 return -EBUSY; 1353 return -EBUSY;
1302 1354
1303 resched = need_resched(); 1355 resched = ftrace_preempt_disable();
1304 preempt_disable_notrace();
1305 1356
1306 cpu = raw_smp_processor_id(); 1357 cpu = raw_smp_processor_id();
1307 1358
@@ -1327,10 +1378,7 @@ int ring_buffer_write(struct ring_buffer *buffer,
1327 1378
1328 ret = 0; 1379 ret = 0;
1329 out: 1380 out:
1330 if (resched) 1381 ftrace_preempt_enable(resched);
1331 preempt_enable_no_resched_notrace();
1332 else
1333 preempt_enable_notrace();
1334 1382
1335 return ret; 1383 return ret;
1336} 1384}
@@ -1489,14 +1537,7 @@ unsigned long ring_buffer_overruns(struct ring_buffer *buffer)
1489 return overruns; 1537 return overruns;
1490} 1538}
1491 1539
1492/** 1540static void rb_iter_reset(struct ring_buffer_iter *iter)
1493 * ring_buffer_iter_reset - reset an iterator
1494 * @iter: The iterator to reset
1495 *
1496 * Resets the iterator, so that it will start from the beginning
1497 * again.
1498 */
1499void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
1500{ 1541{
1501 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; 1542 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
1502 1543
@@ -1515,6 +1556,23 @@ void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
1515} 1556}
1516 1557
1517/** 1558/**
1559 * ring_buffer_iter_reset - reset an iterator
1560 * @iter: The iterator to reset
1561 *
1562 * Resets the iterator, so that it will start from the beginning
1563 * again.
1564 */
1565void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
1566{
1567 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
1568 unsigned long flags;
1569
1570 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
1571 rb_iter_reset(iter);
1572 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
1573}
1574
1575/**
1518 * ring_buffer_iter_empty - check if an iterator has no more to read 1576 * ring_buffer_iter_empty - check if an iterator has no more to read
1519 * @iter: The iterator to check 1577 * @iter: The iterator to check
1520 */ 1578 */
@@ -1597,7 +1655,8 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
1597 unsigned long flags; 1655 unsigned long flags;
1598 int nr_loops = 0; 1656 int nr_loops = 0;
1599 1657
1600 spin_lock_irqsave(&cpu_buffer->lock, flags); 1658 local_irq_save(flags);
1659 __raw_spin_lock(&cpu_buffer->lock);
1601 1660
1602 again: 1661 again:
1603 /* 1662 /*
@@ -1606,8 +1665,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
1606 * a case where we will loop three times. There should be no 1665 * a case where we will loop three times. There should be no
1607 * reason to loop four times (that I know of). 1666 * reason to loop four times (that I know of).
1608 */ 1667 */
1609 if (unlikely(++nr_loops > 3)) { 1668 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 3)) {
1610 RB_WARN_ON(cpu_buffer, 1);
1611 reader = NULL; 1669 reader = NULL;
1612 goto out; 1670 goto out;
1613 } 1671 }
@@ -1619,8 +1677,9 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
1619 goto out; 1677 goto out;
1620 1678
1621 /* Never should we have an index greater than the size */ 1679 /* Never should we have an index greater than the size */
1622 RB_WARN_ON(cpu_buffer, 1680 if (RB_WARN_ON(cpu_buffer,
1623 cpu_buffer->reader_page->read > rb_page_size(reader)); 1681 cpu_buffer->reader_page->read > rb_page_size(reader)))
1682 goto out;
1624 1683
1625 /* check if we caught up to the tail */ 1684 /* check if we caught up to the tail */
1626 reader = NULL; 1685 reader = NULL;
@@ -1659,7 +1718,8 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
1659 goto again; 1718 goto again;
1660 1719
1661 out: 1720 out:
1662 spin_unlock_irqrestore(&cpu_buffer->lock, flags); 1721 __raw_spin_unlock(&cpu_buffer->lock);
1722 local_irq_restore(flags);
1663 1723
1664 return reader; 1724 return reader;
1665} 1725}
@@ -1673,7 +1733,8 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
1673 reader = rb_get_reader_page(cpu_buffer); 1733 reader = rb_get_reader_page(cpu_buffer);
1674 1734
1675 /* This function should not be called when buffer is empty */ 1735 /* This function should not be called when buffer is empty */
1676 BUG_ON(!reader); 1736 if (RB_WARN_ON(cpu_buffer, !reader))
1737 return;
1677 1738
1678 event = rb_reader_event(cpu_buffer); 1739 event = rb_reader_event(cpu_buffer);
1679 1740
@@ -1700,7 +1761,9 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
1700 * Check if we are at the end of the buffer. 1761 * Check if we are at the end of the buffer.
1701 */ 1762 */
1702 if (iter->head >= rb_page_size(iter->head_page)) { 1763 if (iter->head >= rb_page_size(iter->head_page)) {
1703 BUG_ON(iter->head_page == cpu_buffer->commit_page); 1764 if (RB_WARN_ON(buffer,
1765 iter->head_page == cpu_buffer->commit_page))
1766 return;
1704 rb_inc_iter(iter); 1767 rb_inc_iter(iter);
1705 return; 1768 return;
1706 } 1769 }
@@ -1713,8 +1776,10 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
1713 * This should not be called to advance the header if we are 1776 * This should not be called to advance the header if we are
1714 * at the tail of the buffer. 1777 * at the tail of the buffer.
1715 */ 1778 */
1716 BUG_ON((iter->head_page == cpu_buffer->commit_page) && 1779 if (RB_WARN_ON(cpu_buffer,
1717 (iter->head + length > rb_commit_index(cpu_buffer))); 1780 (iter->head_page == cpu_buffer->commit_page) &&
1781 (iter->head + length > rb_commit_index(cpu_buffer))))
1782 return;
1718 1783
1719 rb_update_iter_read_stamp(iter, event); 1784 rb_update_iter_read_stamp(iter, event);
1720 1785
@@ -1726,17 +1791,8 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
1726 rb_advance_iter(iter); 1791 rb_advance_iter(iter);
1727} 1792}
1728 1793
1729/** 1794static struct ring_buffer_event *
1730 * ring_buffer_peek - peek at the next event to be read 1795rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1731 * @buffer: The ring buffer to read
1732 * @cpu: The cpu to peak at
1733 * @ts: The timestamp counter of this event.
1734 *
1735 * This will return the event that will be read next, but does
1736 * not consume the data.
1737 */
1738struct ring_buffer_event *
1739ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1740{ 1796{
1741 struct ring_buffer_per_cpu *cpu_buffer; 1797 struct ring_buffer_per_cpu *cpu_buffer;
1742 struct ring_buffer_event *event; 1798 struct ring_buffer_event *event;
@@ -1757,10 +1813,8 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1757 * can have. Nesting 10 deep of interrupts is clearly 1813 * can have. Nesting 10 deep of interrupts is clearly
1758 * an anomaly. 1814 * an anomaly.
1759 */ 1815 */
1760 if (unlikely(++nr_loops > 10)) { 1816 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 10))
1761 RB_WARN_ON(cpu_buffer, 1);
1762 return NULL; 1817 return NULL;
1763 }
1764 1818
1765 reader = rb_get_reader_page(cpu_buffer); 1819 reader = rb_get_reader_page(cpu_buffer);
1766 if (!reader) 1820 if (!reader)
@@ -1798,16 +1852,8 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1798 return NULL; 1852 return NULL;
1799} 1853}
1800 1854
1801/** 1855static struct ring_buffer_event *
1802 * ring_buffer_iter_peek - peek at the next event to be read 1856rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1803 * @iter: The ring buffer iterator
1804 * @ts: The timestamp counter of this event.
1805 *
1806 * This will return the event that will be read next, but does
1807 * not increment the iterator.
1808 */
1809struct ring_buffer_event *
1810ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1811{ 1857{
1812 struct ring_buffer *buffer; 1858 struct ring_buffer *buffer;
1813 struct ring_buffer_per_cpu *cpu_buffer; 1859 struct ring_buffer_per_cpu *cpu_buffer;
@@ -1829,10 +1875,8 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1829 * can have. Nesting 10 deep of interrupts is clearly 1875 * can have. Nesting 10 deep of interrupts is clearly
1830 * an anomaly. 1876 * an anomaly.
1831 */ 1877 */
1832 if (unlikely(++nr_loops > 10)) { 1878 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 10))
1833 RB_WARN_ON(cpu_buffer, 1);
1834 return NULL; 1879 return NULL;
1835 }
1836 1880
1837 if (rb_per_cpu_empty(cpu_buffer)) 1881 if (rb_per_cpu_empty(cpu_buffer))
1838 return NULL; 1882 return NULL;
@@ -1869,6 +1913,51 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1869} 1913}
1870 1914
1871/** 1915/**
1916 * ring_buffer_peek - peek at the next event to be read
1917 * @buffer: The ring buffer to read
1918 * @cpu: The cpu to peak at
1919 * @ts: The timestamp counter of this event.
1920 *
1921 * This will return the event that will be read next, but does
1922 * not consume the data.
1923 */
1924struct ring_buffer_event *
1925ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1926{
1927 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
1928 struct ring_buffer_event *event;
1929 unsigned long flags;
1930
1931 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
1932 event = rb_buffer_peek(buffer, cpu, ts);
1933 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
1934
1935 return event;
1936}
1937
1938/**
1939 * ring_buffer_iter_peek - peek at the next event to be read
1940 * @iter: The ring buffer iterator
1941 * @ts: The timestamp counter of this event.
1942 *
1943 * This will return the event that will be read next, but does
1944 * not increment the iterator.
1945 */
1946struct ring_buffer_event *
1947ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1948{
1949 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
1950 struct ring_buffer_event *event;
1951 unsigned long flags;
1952
1953 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
1954 event = rb_iter_peek(iter, ts);
1955 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
1956
1957 return event;
1958}
1959
1960/**
1872 * ring_buffer_consume - return an event and consume it 1961 * ring_buffer_consume - return an event and consume it
1873 * @buffer: The ring buffer to get the next event from 1962 * @buffer: The ring buffer to get the next event from
1874 * 1963 *
@@ -1879,19 +1968,24 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1879struct ring_buffer_event * 1968struct ring_buffer_event *
1880ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) 1969ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
1881{ 1970{
1882 struct ring_buffer_per_cpu *cpu_buffer; 1971 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
1883 struct ring_buffer_event *event; 1972 struct ring_buffer_event *event;
1973 unsigned long flags;
1884 1974
1885 if (!cpu_isset(cpu, buffer->cpumask)) 1975 if (!cpu_isset(cpu, buffer->cpumask))
1886 return NULL; 1976 return NULL;
1887 1977
1888 event = ring_buffer_peek(buffer, cpu, ts); 1978 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
1979
1980 event = rb_buffer_peek(buffer, cpu, ts);
1889 if (!event) 1981 if (!event)
1890 return NULL; 1982 goto out;
1891 1983
1892 cpu_buffer = buffer->buffers[cpu];
1893 rb_advance_reader(cpu_buffer); 1984 rb_advance_reader(cpu_buffer);
1894 1985
1986 out:
1987 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
1988
1895 return event; 1989 return event;
1896} 1990}
1897 1991
@@ -1928,9 +2022,11 @@ ring_buffer_read_start(struct ring_buffer *buffer, int cpu)
1928 atomic_inc(&cpu_buffer->record_disabled); 2022 atomic_inc(&cpu_buffer->record_disabled);
1929 synchronize_sched(); 2023 synchronize_sched();
1930 2024
1931 spin_lock_irqsave(&cpu_buffer->lock, flags); 2025 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
1932 ring_buffer_iter_reset(iter); 2026 __raw_spin_lock(&cpu_buffer->lock);
1933 spin_unlock_irqrestore(&cpu_buffer->lock, flags); 2027 rb_iter_reset(iter);
2028 __raw_spin_unlock(&cpu_buffer->lock);
2029 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
1934 2030
1935 return iter; 2031 return iter;
1936} 2032}
@@ -1962,12 +2058,17 @@ struct ring_buffer_event *
1962ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts) 2058ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts)
1963{ 2059{
1964 struct ring_buffer_event *event; 2060 struct ring_buffer_event *event;
2061 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
2062 unsigned long flags;
1965 2063
1966 event = ring_buffer_iter_peek(iter, ts); 2064 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
2065 event = rb_iter_peek(iter, ts);
1967 if (!event) 2066 if (!event)
1968 return NULL; 2067 goto out;
1969 2068
1970 rb_advance_iter(iter); 2069 rb_advance_iter(iter);
2070 out:
2071 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
1971 2072
1972 return event; 2073 return event;
1973} 2074}
@@ -2016,11 +2117,15 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
2016 if (!cpu_isset(cpu, buffer->cpumask)) 2117 if (!cpu_isset(cpu, buffer->cpumask))
2017 return; 2118 return;
2018 2119
2019 spin_lock_irqsave(&cpu_buffer->lock, flags); 2120 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
2121
2122 __raw_spin_lock(&cpu_buffer->lock);
2020 2123
2021 rb_reset_cpu(cpu_buffer); 2124 rb_reset_cpu(cpu_buffer);
2022 2125
2023 spin_unlock_irqrestore(&cpu_buffer->lock, flags); 2126 __raw_spin_unlock(&cpu_buffer->lock);
2127
2128 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2024} 2129}
2025 2130
2026/** 2131/**
@@ -2122,12 +2227,14 @@ static ssize_t
2122rb_simple_read(struct file *filp, char __user *ubuf, 2227rb_simple_read(struct file *filp, char __user *ubuf,
2123 size_t cnt, loff_t *ppos) 2228 size_t cnt, loff_t *ppos)
2124{ 2229{
2125 int *p = filp->private_data; 2230 long *p = filp->private_data;
2126 char buf[64]; 2231 char buf[64];
2127 int r; 2232 int r;
2128 2233
2129 /* !ring_buffers_off == tracing_on */ 2234 if (test_bit(RB_BUFFERS_DISABLED_BIT, p))
2130 r = sprintf(buf, "%d\n", !*p); 2235 r = sprintf(buf, "permanently disabled\n");
2236 else
2237 r = sprintf(buf, "%d\n", test_bit(RB_BUFFERS_ON_BIT, p));
2131 2238
2132 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 2239 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2133} 2240}
@@ -2136,7 +2243,7 @@ static ssize_t
2136rb_simple_write(struct file *filp, const char __user *ubuf, 2243rb_simple_write(struct file *filp, const char __user *ubuf,
2137 size_t cnt, loff_t *ppos) 2244 size_t cnt, loff_t *ppos)
2138{ 2245{
2139 int *p = filp->private_data; 2246 long *p = filp->private_data;
2140 char buf[64]; 2247 char buf[64];
2141 long val; 2248 long val;
2142 int ret; 2249 int ret;
@@ -2153,8 +2260,10 @@ rb_simple_write(struct file *filp, const char __user *ubuf,
2153 if (ret < 0) 2260 if (ret < 0)
2154 return ret; 2261 return ret;
2155 2262
2156 /* !ring_buffers_off == tracing_on */ 2263 if (val)
2157 *p = !val; 2264 set_bit(RB_BUFFERS_ON_BIT, p);
2265 else
2266 clear_bit(RB_BUFFERS_ON_BIT, p);
2158 2267
2159 (*ppos)++; 2268 (*ppos)++;
2160 2269
@@ -2176,7 +2285,7 @@ static __init int rb_init_debugfs(void)
2176 d_tracer = tracing_init_dentry(); 2285 d_tracer = tracing_init_dentry();
2177 2286
2178 entry = debugfs_create_file("tracing_on", 0644, d_tracer, 2287 entry = debugfs_create_file("tracing_on", 0644, d_tracer,
2179 &ring_buffers_off, &rb_simple_fops); 2288 &ring_buffer_flags, &rb_simple_fops);
2180 if (!entry) 2289 if (!entry)
2181 pr_warning("Could not create debugfs 'tracing_on' entry\n"); 2290 pr_warning("Could not create debugfs 'tracing_on' entry\n");
2182 2291
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index d86e3252f300..91887a280ab9 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -30,6 +30,7 @@
30#include <linux/gfp.h> 30#include <linux/gfp.h>
31#include <linux/fs.h> 31#include <linux/fs.h>
32#include <linux/kprobes.h> 32#include <linux/kprobes.h>
33#include <linux/seq_file.h>
33#include <linux/writeback.h> 34#include <linux/writeback.h>
34 35
35#include <linux/stacktrace.h> 36#include <linux/stacktrace.h>
@@ -43,6 +44,29 @@
43unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX; 44unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX;
44unsigned long __read_mostly tracing_thresh; 45unsigned long __read_mostly tracing_thresh;
45 46
47/* For tracers that don't implement custom flags */
48static struct tracer_opt dummy_tracer_opt[] = {
49 { }
50};
51
52static struct tracer_flags dummy_tracer_flags = {
53 .val = 0,
54 .opts = dummy_tracer_opt
55};
56
57static int dummy_set_flag(u32 old_flags, u32 bit, int set)
58{
59 return 0;
60}
61
62/*
63 * Kill all tracing for good (never come back).
64 * It is initialized to 1 but will turn to zero if the initialization
65 * of the tracer is successful. But that is the only place that sets
66 * this back to zero.
67 */
68int tracing_disabled = 1;
69
46static DEFINE_PER_CPU(local_t, ftrace_cpu_disabled); 70static DEFINE_PER_CPU(local_t, ftrace_cpu_disabled);
47 71
48static inline void ftrace_disable_cpu(void) 72static inline void ftrace_disable_cpu(void)
@@ -62,7 +86,36 @@ static cpumask_t __read_mostly tracing_buffer_mask;
62#define for_each_tracing_cpu(cpu) \ 86#define for_each_tracing_cpu(cpu) \
63 for_each_cpu_mask(cpu, tracing_buffer_mask) 87 for_each_cpu_mask(cpu, tracing_buffer_mask)
64 88
65static int tracing_disabled = 1; 89/*
90 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
91 *
92 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
93 * is set, then ftrace_dump is called. This will output the contents
94 * of the ftrace buffers to the console. This is very useful for
95 * capturing traces that lead to crashes and outputing it to a
96 * serial console.
97 *
98 * It is default off, but you can enable it with either specifying
99 * "ftrace_dump_on_oops" in the kernel command line, or setting
100 * /proc/sys/kernel/ftrace_dump_on_oops to true.
101 */
102int ftrace_dump_on_oops;
103
104static int tracing_set_tracer(char *buf);
105
106static int __init set_ftrace(char *str)
107{
108 tracing_set_tracer(str);
109 return 1;
110}
111__setup("ftrace", set_ftrace);
112
113static int __init set_ftrace_dump_on_oops(char *str)
114{
115 ftrace_dump_on_oops = 1;
116 return 1;
117}
118__setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
66 119
67long 120long
68ns2usecs(cycle_t nsec) 121ns2usecs(cycle_t nsec)
@@ -112,6 +165,19 @@ static DEFINE_PER_CPU(struct trace_array_cpu, max_data);
112/* tracer_enabled is used to toggle activation of a tracer */ 165/* tracer_enabled is used to toggle activation of a tracer */
113static int tracer_enabled = 1; 166static int tracer_enabled = 1;
114 167
168/**
169 * tracing_is_enabled - return tracer_enabled status
170 *
171 * This function is used by other tracers to know the status
172 * of the tracer_enabled flag. Tracers may use this function
173 * to know if it should enable their features when starting
174 * up. See irqsoff tracer for an example (start_irqsoff_tracer).
175 */
176int tracing_is_enabled(void)
177{
178 return tracer_enabled;
179}
180
115/* function tracing enabled */ 181/* function tracing enabled */
116int ftrace_function_enabled; 182int ftrace_function_enabled;
117 183
@@ -153,8 +219,9 @@ static DEFINE_MUTEX(trace_types_lock);
153/* trace_wait is a waitqueue for tasks blocked on trace_poll */ 219/* trace_wait is a waitqueue for tasks blocked on trace_poll */
154static DECLARE_WAIT_QUEUE_HEAD(trace_wait); 220static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
155 221
156/* trace_flags holds iter_ctrl options */ 222/* trace_flags holds trace_options default values */
157unsigned long trace_flags = TRACE_ITER_PRINT_PARENT; 223unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
224 TRACE_ITER_ANNOTATE;
158 225
159/** 226/**
160 * trace_wake_up - wake up tasks waiting for trace input 227 * trace_wake_up - wake up tasks waiting for trace input
@@ -193,13 +260,6 @@ unsigned long nsecs_to_usecs(unsigned long nsecs)
193 return nsecs / 1000; 260 return nsecs / 1000;
194} 261}
195 262
196/*
197 * TRACE_ITER_SYM_MASK masks the options in trace_flags that
198 * control the output of kernel symbols.
199 */
200#define TRACE_ITER_SYM_MASK \
201 (TRACE_ITER_PRINT_PARENT|TRACE_ITER_SYM_OFFSET|TRACE_ITER_SYM_ADDR)
202
203/* These must match the bit postions in trace_iterator_flags */ 263/* These must match the bit postions in trace_iterator_flags */
204static const char *trace_options[] = { 264static const char *trace_options[] = {
205 "print-parent", 265 "print-parent",
@@ -213,6 +273,11 @@ static const char *trace_options[] = {
213 "stacktrace", 273 "stacktrace",
214 "sched-tree", 274 "sched-tree",
215 "ftrace_printk", 275 "ftrace_printk",
276 "ftrace_preempt",
277 "branch",
278 "annotate",
279 "userstacktrace",
280 "sym-userobj",
216 NULL 281 NULL
217}; 282};
218 283
@@ -359,6 +424,28 @@ trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len)
359 return trace_seq_putmem(s, hex, j); 424 return trace_seq_putmem(s, hex, j);
360} 425}
361 426
427static int
428trace_seq_path(struct trace_seq *s, struct path *path)
429{
430 unsigned char *p;
431
432 if (s->len >= (PAGE_SIZE - 1))
433 return 0;
434 p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len);
435 if (!IS_ERR(p)) {
436 p = mangle_path(s->buffer + s->len, p, "\n");
437 if (p) {
438 s->len = p - s->buffer;
439 return 1;
440 }
441 } else {
442 s->buffer[s->len++] = '?';
443 return 1;
444 }
445
446 return 0;
447}
448
362static void 449static void
363trace_seq_reset(struct trace_seq *s) 450trace_seq_reset(struct trace_seq *s)
364{ 451{
@@ -470,7 +557,15 @@ int register_tracer(struct tracer *type)
470 return -1; 557 return -1;
471 } 558 }
472 559
560 /*
561 * When this gets called we hold the BKL which means that
562 * preemption is disabled. Various trace selftests however
563 * need to disable and enable preemption for successful tests.
564 * So we drop the BKL here and grab it after the tests again.
565 */
566 unlock_kernel();
473 mutex_lock(&trace_types_lock); 567 mutex_lock(&trace_types_lock);
568
474 for (t = trace_types; t; t = t->next) { 569 for (t = trace_types; t; t = t->next) {
475 if (strcmp(type->name, t->name) == 0) { 570 if (strcmp(type->name, t->name) == 0) {
476 /* already found */ 571 /* already found */
@@ -481,11 +576,18 @@ int register_tracer(struct tracer *type)
481 } 576 }
482 } 577 }
483 578
579 if (!type->set_flag)
580 type->set_flag = &dummy_set_flag;
581 if (!type->flags)
582 type->flags = &dummy_tracer_flags;
583 else
584 if (!type->flags->opts)
585 type->flags->opts = dummy_tracer_opt;
586
484#ifdef CONFIG_FTRACE_STARTUP_TEST 587#ifdef CONFIG_FTRACE_STARTUP_TEST
485 if (type->selftest) { 588 if (type->selftest) {
486 struct tracer *saved_tracer = current_trace; 589 struct tracer *saved_tracer = current_trace;
487 struct trace_array *tr = &global_trace; 590 struct trace_array *tr = &global_trace;
488 int saved_ctrl = tr->ctrl;
489 int i; 591 int i;
490 /* 592 /*
491 * Run a selftest on this tracer. 593 * Run a selftest on this tracer.
@@ -494,25 +596,23 @@ int register_tracer(struct tracer *type)
494 * internal tracing to verify that everything is in order. 596 * internal tracing to verify that everything is in order.
495 * If we fail, we do not register this tracer. 597 * If we fail, we do not register this tracer.
496 */ 598 */
497 for_each_tracing_cpu(i) { 599 for_each_tracing_cpu(i)
498 tracing_reset(tr, i); 600 tracing_reset(tr, i);
499 } 601
500 current_trace = type; 602 current_trace = type;
501 tr->ctrl = 0;
502 /* the test is responsible for initializing and enabling */ 603 /* the test is responsible for initializing and enabling */
503 pr_info("Testing tracer %s: ", type->name); 604 pr_info("Testing tracer %s: ", type->name);
504 ret = type->selftest(type, tr); 605 ret = type->selftest(type, tr);
505 /* the test is responsible for resetting too */ 606 /* the test is responsible for resetting too */
506 current_trace = saved_tracer; 607 current_trace = saved_tracer;
507 tr->ctrl = saved_ctrl;
508 if (ret) { 608 if (ret) {
509 printk(KERN_CONT "FAILED!\n"); 609 printk(KERN_CONT "FAILED!\n");
510 goto out; 610 goto out;
511 } 611 }
512 /* Only reset on passing, to avoid touching corrupted buffers */ 612 /* Only reset on passing, to avoid touching corrupted buffers */
513 for_each_tracing_cpu(i) { 613 for_each_tracing_cpu(i)
514 tracing_reset(tr, i); 614 tracing_reset(tr, i);
515 } 615
516 printk(KERN_CONT "PASSED\n"); 616 printk(KERN_CONT "PASSED\n");
517 } 617 }
518#endif 618#endif
@@ -525,6 +625,7 @@ int register_tracer(struct tracer *type)
525 625
526 out: 626 out:
527 mutex_unlock(&trace_types_lock); 627 mutex_unlock(&trace_types_lock);
628 lock_kernel();
528 629
529 return ret; 630 return ret;
530} 631}
@@ -581,6 +682,91 @@ static void trace_init_cmdlines(void)
581 cmdline_idx = 0; 682 cmdline_idx = 0;
582} 683}
583 684
685static int trace_stop_count;
686static DEFINE_SPINLOCK(tracing_start_lock);
687
688/**
689 * ftrace_off_permanent - disable all ftrace code permanently
690 *
691 * This should only be called when a serious anomally has
692 * been detected. This will turn off the function tracing,
693 * ring buffers, and other tracing utilites. It takes no
694 * locks and can be called from any context.
695 */
696void ftrace_off_permanent(void)
697{
698 tracing_disabled = 1;
699 ftrace_stop();
700 tracing_off_permanent();
701}
702
703/**
704 * tracing_start - quick start of the tracer
705 *
706 * If tracing is enabled but was stopped by tracing_stop,
707 * this will start the tracer back up.
708 */
709void tracing_start(void)
710{
711 struct ring_buffer *buffer;
712 unsigned long flags;
713
714 if (tracing_disabled)
715 return;
716
717 spin_lock_irqsave(&tracing_start_lock, flags);
718 if (--trace_stop_count)
719 goto out;
720
721 if (trace_stop_count < 0) {
722 /* Someone screwed up their debugging */
723 WARN_ON_ONCE(1);
724 trace_stop_count = 0;
725 goto out;
726 }
727
728
729 buffer = global_trace.buffer;
730 if (buffer)
731 ring_buffer_record_enable(buffer);
732
733 buffer = max_tr.buffer;
734 if (buffer)
735 ring_buffer_record_enable(buffer);
736
737 ftrace_start();
738 out:
739 spin_unlock_irqrestore(&tracing_start_lock, flags);
740}
741
742/**
743 * tracing_stop - quick stop of the tracer
744 *
745 * Light weight way to stop tracing. Use in conjunction with
746 * tracing_start.
747 */
748void tracing_stop(void)
749{
750 struct ring_buffer *buffer;
751 unsigned long flags;
752
753 ftrace_stop();
754 spin_lock_irqsave(&tracing_start_lock, flags);
755 if (trace_stop_count++)
756 goto out;
757
758 buffer = global_trace.buffer;
759 if (buffer)
760 ring_buffer_record_disable(buffer);
761
762 buffer = max_tr.buffer;
763 if (buffer)
764 ring_buffer_record_disable(buffer);
765
766 out:
767 spin_unlock_irqrestore(&tracing_start_lock, flags);
768}
769
584void trace_stop_cmdline_recording(void); 770void trace_stop_cmdline_recording(void);
585 771
586static void trace_save_cmdline(struct task_struct *tsk) 772static void trace_save_cmdline(struct task_struct *tsk)
@@ -618,7 +804,7 @@ static void trace_save_cmdline(struct task_struct *tsk)
618 spin_unlock(&trace_cmdline_lock); 804 spin_unlock(&trace_cmdline_lock);
619} 805}
620 806
621static char *trace_find_cmdline(int pid) 807char *trace_find_cmdline(int pid)
622{ 808{
623 char *cmdline = "<...>"; 809 char *cmdline = "<...>";
624 unsigned map; 810 unsigned map;
@@ -655,6 +841,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
655 841
656 entry->preempt_count = pc & 0xff; 842 entry->preempt_count = pc & 0xff;
657 entry->pid = (tsk) ? tsk->pid : 0; 843 entry->pid = (tsk) ? tsk->pid : 0;
844 entry->tgid = (tsk) ? tsk->tgid : 0;
658 entry->flags = 845 entry->flags =
659#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT 846#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
660 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) | 847 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
@@ -691,6 +878,56 @@ trace_function(struct trace_array *tr, struct trace_array_cpu *data,
691 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 878 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
692} 879}
693 880
881#ifdef CONFIG_FUNCTION_GRAPH_TRACER
882static void __trace_graph_entry(struct trace_array *tr,
883 struct trace_array_cpu *data,
884 struct ftrace_graph_ent *trace,
885 unsigned long flags,
886 int pc)
887{
888 struct ring_buffer_event *event;
889 struct ftrace_graph_ent_entry *entry;
890 unsigned long irq_flags;
891
892 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
893 return;
894
895 event = ring_buffer_lock_reserve(global_trace.buffer, sizeof(*entry),
896 &irq_flags);
897 if (!event)
898 return;
899 entry = ring_buffer_event_data(event);
900 tracing_generic_entry_update(&entry->ent, flags, pc);
901 entry->ent.type = TRACE_GRAPH_ENT;
902 entry->graph_ent = *trace;
903 ring_buffer_unlock_commit(global_trace.buffer, event, irq_flags);
904}
905
906static void __trace_graph_return(struct trace_array *tr,
907 struct trace_array_cpu *data,
908 struct ftrace_graph_ret *trace,
909 unsigned long flags,
910 int pc)
911{
912 struct ring_buffer_event *event;
913 struct ftrace_graph_ret_entry *entry;
914 unsigned long irq_flags;
915
916 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
917 return;
918
919 event = ring_buffer_lock_reserve(global_trace.buffer, sizeof(*entry),
920 &irq_flags);
921 if (!event)
922 return;
923 entry = ring_buffer_event_data(event);
924 tracing_generic_entry_update(&entry->ent, flags, pc);
925 entry->ent.type = TRACE_GRAPH_RET;
926 entry->ret = *trace;
927 ring_buffer_unlock_commit(global_trace.buffer, event, irq_flags);
928}
929#endif
930
694void 931void
695ftrace(struct trace_array *tr, struct trace_array_cpu *data, 932ftrace(struct trace_array *tr, struct trace_array_cpu *data,
696 unsigned long ip, unsigned long parent_ip, unsigned long flags, 933 unsigned long ip, unsigned long parent_ip, unsigned long flags,
@@ -742,6 +979,46 @@ void __trace_stack(struct trace_array *tr,
742 ftrace_trace_stack(tr, data, flags, skip, preempt_count()); 979 ftrace_trace_stack(tr, data, flags, skip, preempt_count());
743} 980}
744 981
982static void ftrace_trace_userstack(struct trace_array *tr,
983 struct trace_array_cpu *data,
984 unsigned long flags, int pc)
985{
986#ifdef CONFIG_STACKTRACE
987 struct ring_buffer_event *event;
988 struct userstack_entry *entry;
989 struct stack_trace trace;
990 unsigned long irq_flags;
991
992 if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
993 return;
994
995 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
996 &irq_flags);
997 if (!event)
998 return;
999 entry = ring_buffer_event_data(event);
1000 tracing_generic_entry_update(&entry->ent, flags, pc);
1001 entry->ent.type = TRACE_USER_STACK;
1002
1003 memset(&entry->caller, 0, sizeof(entry->caller));
1004
1005 trace.nr_entries = 0;
1006 trace.max_entries = FTRACE_STACK_ENTRIES;
1007 trace.skip = 0;
1008 trace.entries = entry->caller;
1009
1010 save_stack_trace_user(&trace);
1011 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
1012#endif
1013}
1014
1015void __trace_userstack(struct trace_array *tr,
1016 struct trace_array_cpu *data,
1017 unsigned long flags)
1018{
1019 ftrace_trace_userstack(tr, data, flags, preempt_count());
1020}
1021
745static void 1022static void
746ftrace_trace_special(void *__tr, void *__data, 1023ftrace_trace_special(void *__tr, void *__data,
747 unsigned long arg1, unsigned long arg2, unsigned long arg3, 1024 unsigned long arg1, unsigned long arg2, unsigned long arg3,
@@ -765,6 +1042,7 @@ ftrace_trace_special(void *__tr, void *__data,
765 entry->arg3 = arg3; 1042 entry->arg3 = arg3;
766 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 1043 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
767 ftrace_trace_stack(tr, data, irq_flags, 4, pc); 1044 ftrace_trace_stack(tr, data, irq_flags, 4, pc);
1045 ftrace_trace_userstack(tr, data, irq_flags, pc);
768 1046
769 trace_wake_up(); 1047 trace_wake_up();
770} 1048}
@@ -803,6 +1081,7 @@ tracing_sched_switch_trace(struct trace_array *tr,
803 entry->next_cpu = task_cpu(next); 1081 entry->next_cpu = task_cpu(next);
804 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 1082 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
805 ftrace_trace_stack(tr, data, flags, 5, pc); 1083 ftrace_trace_stack(tr, data, flags, 5, pc);
1084 ftrace_trace_userstack(tr, data, flags, pc);
806} 1085}
807 1086
808void 1087void
@@ -832,6 +1111,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
832 entry->next_cpu = task_cpu(wakee); 1111 entry->next_cpu = task_cpu(wakee);
833 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 1112 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
834 ftrace_trace_stack(tr, data, flags, 6, pc); 1113 ftrace_trace_stack(tr, data, flags, 6, pc);
1114 ftrace_trace_userstack(tr, data, flags, pc);
835 1115
836 trace_wake_up(); 1116 trace_wake_up();
837} 1117}
@@ -841,26 +1121,28 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
841{ 1121{
842 struct trace_array *tr = &global_trace; 1122 struct trace_array *tr = &global_trace;
843 struct trace_array_cpu *data; 1123 struct trace_array_cpu *data;
1124 unsigned long flags;
844 int cpu; 1125 int cpu;
845 int pc; 1126 int pc;
846 1127
847 if (tracing_disabled || !tr->ctrl) 1128 if (tracing_disabled)
848 return; 1129 return;
849 1130
850 pc = preempt_count(); 1131 pc = preempt_count();
851 preempt_disable_notrace(); 1132 local_irq_save(flags);
852 cpu = raw_smp_processor_id(); 1133 cpu = raw_smp_processor_id();
853 data = tr->data[cpu]; 1134 data = tr->data[cpu];
854 1135
855 if (likely(!atomic_read(&data->disabled))) 1136 if (likely(atomic_inc_return(&data->disabled) == 1))
856 ftrace_trace_special(tr, data, arg1, arg2, arg3, pc); 1137 ftrace_trace_special(tr, data, arg1, arg2, arg3, pc);
857 1138
858 preempt_enable_notrace(); 1139 atomic_dec(&data->disabled);
1140 local_irq_restore(flags);
859} 1141}
860 1142
861#ifdef CONFIG_FUNCTION_TRACER 1143#ifdef CONFIG_FUNCTION_TRACER
862static void 1144static void
863function_trace_call(unsigned long ip, unsigned long parent_ip) 1145function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip)
864{ 1146{
865 struct trace_array *tr = &global_trace; 1147 struct trace_array *tr = &global_trace;
866 struct trace_array_cpu *data; 1148 struct trace_array_cpu *data;
@@ -873,8 +1155,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip)
873 return; 1155 return;
874 1156
875 pc = preempt_count(); 1157 pc = preempt_count();
876 resched = need_resched(); 1158 resched = ftrace_preempt_disable();
877 preempt_disable_notrace();
878 local_save_flags(flags); 1159 local_save_flags(flags);
879 cpu = raw_smp_processor_id(); 1160 cpu = raw_smp_processor_id();
880 data = tr->data[cpu]; 1161 data = tr->data[cpu];
@@ -884,12 +1165,84 @@ function_trace_call(unsigned long ip, unsigned long parent_ip)
884 trace_function(tr, data, ip, parent_ip, flags, pc); 1165 trace_function(tr, data, ip, parent_ip, flags, pc);
885 1166
886 atomic_dec(&data->disabled); 1167 atomic_dec(&data->disabled);
887 if (resched) 1168 ftrace_preempt_enable(resched);
888 preempt_enable_no_resched_notrace();
889 else
890 preempt_enable_notrace();
891} 1169}
892 1170
1171static void
1172function_trace_call(unsigned long ip, unsigned long parent_ip)
1173{
1174 struct trace_array *tr = &global_trace;
1175 struct trace_array_cpu *data;
1176 unsigned long flags;
1177 long disabled;
1178 int cpu;
1179 int pc;
1180
1181 if (unlikely(!ftrace_function_enabled))
1182 return;
1183
1184 /*
1185 * Need to use raw, since this must be called before the
1186 * recursive protection is performed.
1187 */
1188 local_irq_save(flags);
1189 cpu = raw_smp_processor_id();
1190 data = tr->data[cpu];
1191 disabled = atomic_inc_return(&data->disabled);
1192
1193 if (likely(disabled == 1)) {
1194 pc = preempt_count();
1195 trace_function(tr, data, ip, parent_ip, flags, pc);
1196 }
1197
1198 atomic_dec(&data->disabled);
1199 local_irq_restore(flags);
1200}
1201
1202#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1203void trace_graph_entry(struct ftrace_graph_ent *trace)
1204{
1205 struct trace_array *tr = &global_trace;
1206 struct trace_array_cpu *data;
1207 unsigned long flags;
1208 long disabled;
1209 int cpu;
1210 int pc;
1211
1212 raw_local_irq_save(flags);
1213 cpu = raw_smp_processor_id();
1214 data = tr->data[cpu];
1215 disabled = atomic_inc_return(&data->disabled);
1216 if (likely(disabled == 1)) {
1217 pc = preempt_count();
1218 __trace_graph_entry(tr, data, trace, flags, pc);
1219 }
1220 atomic_dec(&data->disabled);
1221 raw_local_irq_restore(flags);
1222}
1223
1224void trace_graph_return(struct ftrace_graph_ret *trace)
1225{
1226 struct trace_array *tr = &global_trace;
1227 struct trace_array_cpu *data;
1228 unsigned long flags;
1229 long disabled;
1230 int cpu;
1231 int pc;
1232
1233 raw_local_irq_save(flags);
1234 cpu = raw_smp_processor_id();
1235 data = tr->data[cpu];
1236 disabled = atomic_inc_return(&data->disabled);
1237 if (likely(disabled == 1)) {
1238 pc = preempt_count();
1239 __trace_graph_return(tr, data, trace, flags, pc);
1240 }
1241 atomic_dec(&data->disabled);
1242 raw_local_irq_restore(flags);
1243}
1244#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
1245
893static struct ftrace_ops trace_ops __read_mostly = 1246static struct ftrace_ops trace_ops __read_mostly =
894{ 1247{
895 .func = function_trace_call, 1248 .func = function_trace_call,
@@ -898,9 +1251,14 @@ static struct ftrace_ops trace_ops __read_mostly =
898void tracing_start_function_trace(void) 1251void tracing_start_function_trace(void)
899{ 1252{
900 ftrace_function_enabled = 0; 1253 ftrace_function_enabled = 0;
1254
1255 if (trace_flags & TRACE_ITER_PREEMPTONLY)
1256 trace_ops.func = function_trace_call_preempt_only;
1257 else
1258 trace_ops.func = function_trace_call;
1259
901 register_ftrace_function(&trace_ops); 1260 register_ftrace_function(&trace_ops);
902 if (tracer_enabled) 1261 ftrace_function_enabled = 1;
903 ftrace_function_enabled = 1;
904} 1262}
905 1263
906void tracing_stop_function_trace(void) 1264void tracing_stop_function_trace(void)
@@ -912,6 +1270,7 @@ void tracing_stop_function_trace(void)
912 1270
913enum trace_file_type { 1271enum trace_file_type {
914 TRACE_FILE_LAT_FMT = 1, 1272 TRACE_FILE_LAT_FMT = 1,
1273 TRACE_FILE_ANNOTATE = 2,
915}; 1274};
916 1275
917static void trace_iterator_increment(struct trace_iterator *iter, int cpu) 1276static void trace_iterator_increment(struct trace_iterator *iter, int cpu)
@@ -1047,10 +1406,6 @@ static void *s_start(struct seq_file *m, loff_t *pos)
1047 1406
1048 atomic_inc(&trace_record_cmdline_disabled); 1407 atomic_inc(&trace_record_cmdline_disabled);
1049 1408
1050 /* let the tracer grab locks here if needed */
1051 if (current_trace->start)
1052 current_trace->start(iter);
1053
1054 if (*pos != iter->pos) { 1409 if (*pos != iter->pos) {
1055 iter->ent = NULL; 1410 iter->ent = NULL;
1056 iter->cpu = 0; 1411 iter->cpu = 0;
@@ -1077,14 +1432,7 @@ static void *s_start(struct seq_file *m, loff_t *pos)
1077 1432
1078static void s_stop(struct seq_file *m, void *p) 1433static void s_stop(struct seq_file *m, void *p)
1079{ 1434{
1080 struct trace_iterator *iter = m->private;
1081
1082 atomic_dec(&trace_record_cmdline_disabled); 1435 atomic_dec(&trace_record_cmdline_disabled);
1083
1084 /* let the tracer release locks here if needed */
1085 if (current_trace && current_trace == iter->trace && iter->trace->stop)
1086 iter->trace->stop(iter);
1087
1088 mutex_unlock(&trace_types_lock); 1436 mutex_unlock(&trace_types_lock);
1089} 1437}
1090 1438
@@ -1143,7 +1491,7 @@ seq_print_sym_offset(struct trace_seq *s, const char *fmt,
1143# define IP_FMT "%016lx" 1491# define IP_FMT "%016lx"
1144#endif 1492#endif
1145 1493
1146static int 1494int
1147seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags) 1495seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
1148{ 1496{
1149 int ret; 1497 int ret;
@@ -1164,6 +1512,78 @@ seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
1164 return ret; 1512 return ret;
1165} 1513}
1166 1514
1515static inline int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
1516 unsigned long ip, unsigned long sym_flags)
1517{
1518 struct file *file = NULL;
1519 unsigned long vmstart = 0;
1520 int ret = 1;
1521
1522 if (mm) {
1523 const struct vm_area_struct *vma;
1524
1525 down_read(&mm->mmap_sem);
1526 vma = find_vma(mm, ip);
1527 if (vma) {
1528 file = vma->vm_file;
1529 vmstart = vma->vm_start;
1530 }
1531 if (file) {
1532 ret = trace_seq_path(s, &file->f_path);
1533 if (ret)
1534 ret = trace_seq_printf(s, "[+0x%lx]", ip - vmstart);
1535 }
1536 up_read(&mm->mmap_sem);
1537 }
1538 if (ret && ((sym_flags & TRACE_ITER_SYM_ADDR) || !file))
1539 ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
1540 return ret;
1541}
1542
1543static int
1544seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s,
1545 unsigned long sym_flags)
1546{
1547 struct mm_struct *mm = NULL;
1548 int ret = 1;
1549 unsigned int i;
1550
1551 if (trace_flags & TRACE_ITER_SYM_USEROBJ) {
1552 struct task_struct *task;
1553 /*
1554 * we do the lookup on the thread group leader,
1555 * since individual threads might have already quit!
1556 */
1557 rcu_read_lock();
1558 task = find_task_by_vpid(entry->ent.tgid);
1559 if (task)
1560 mm = get_task_mm(task);
1561 rcu_read_unlock();
1562 }
1563
1564 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
1565 unsigned long ip = entry->caller[i];
1566
1567 if (ip == ULONG_MAX || !ret)
1568 break;
1569 if (i && ret)
1570 ret = trace_seq_puts(s, " <- ");
1571 if (!ip) {
1572 if (ret)
1573 ret = trace_seq_puts(s, "??");
1574 continue;
1575 }
1576 if (!ret)
1577 break;
1578 if (ret)
1579 ret = seq_print_user_ip(s, mm, ip, sym_flags);
1580 }
1581
1582 if (mm)
1583 mmput(mm);
1584 return ret;
1585}
1586
1167static void print_lat_help_header(struct seq_file *m) 1587static void print_lat_help_header(struct seq_file *m)
1168{ 1588{
1169 seq_puts(m, "# _------=> CPU# \n"); 1589 seq_puts(m, "# _------=> CPU# \n");
@@ -1338,6 +1758,23 @@ void trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter)
1338 trace_seq_putc(s, '\n'); 1758 trace_seq_putc(s, '\n');
1339} 1759}
1340 1760
1761static void test_cpu_buff_start(struct trace_iterator *iter)
1762{
1763 struct trace_seq *s = &iter->seq;
1764
1765 if (!(trace_flags & TRACE_ITER_ANNOTATE))
1766 return;
1767
1768 if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
1769 return;
1770
1771 if (cpu_isset(iter->cpu, iter->started))
1772 return;
1773
1774 cpu_set(iter->cpu, iter->started);
1775 trace_seq_printf(s, "##### CPU %u buffer started ####\n", iter->cpu);
1776}
1777
1341static enum print_line_t 1778static enum print_line_t
1342print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) 1779print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
1343{ 1780{
@@ -1357,6 +1794,8 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
1357 if (entry->type == TRACE_CONT) 1794 if (entry->type == TRACE_CONT)
1358 return TRACE_TYPE_HANDLED; 1795 return TRACE_TYPE_HANDLED;
1359 1796
1797 test_cpu_buff_start(iter);
1798
1360 next_entry = find_next_entry(iter, NULL, &next_ts); 1799 next_entry = find_next_entry(iter, NULL, &next_ts);
1361 if (!next_entry) 1800 if (!next_entry)
1362 next_ts = iter->ts; 1801 next_ts = iter->ts;
@@ -1448,6 +1887,27 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
1448 trace_seq_print_cont(s, iter); 1887 trace_seq_print_cont(s, iter);
1449 break; 1888 break;
1450 } 1889 }
1890 case TRACE_BRANCH: {
1891 struct trace_branch *field;
1892
1893 trace_assign_type(field, entry);
1894
1895 trace_seq_printf(s, "[%s] %s:%s:%d\n",
1896 field->correct ? " ok " : " MISS ",
1897 field->func,
1898 field->file,
1899 field->line);
1900 break;
1901 }
1902 case TRACE_USER_STACK: {
1903 struct userstack_entry *field;
1904
1905 trace_assign_type(field, entry);
1906
1907 seq_print_userip_objs(field, s, sym_flags);
1908 trace_seq_putc(s, '\n');
1909 break;
1910 }
1451 default: 1911 default:
1452 trace_seq_printf(s, "Unknown type %d\n", entry->type); 1912 trace_seq_printf(s, "Unknown type %d\n", entry->type);
1453 } 1913 }
@@ -1472,6 +1932,8 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
1472 if (entry->type == TRACE_CONT) 1932 if (entry->type == TRACE_CONT)
1473 return TRACE_TYPE_HANDLED; 1933 return TRACE_TYPE_HANDLED;
1474 1934
1935 test_cpu_buff_start(iter);
1936
1475 comm = trace_find_cmdline(iter->ent->pid); 1937 comm = trace_find_cmdline(iter->ent->pid);
1476 1938
1477 t = ns2usecs(iter->ts); 1939 t = ns2usecs(iter->ts);
@@ -1581,6 +2043,37 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
1581 trace_seq_print_cont(s, iter); 2043 trace_seq_print_cont(s, iter);
1582 break; 2044 break;
1583 } 2045 }
2046 case TRACE_GRAPH_RET: {
2047 return print_graph_function(iter);
2048 }
2049 case TRACE_GRAPH_ENT: {
2050 return print_graph_function(iter);
2051 }
2052 case TRACE_BRANCH: {
2053 struct trace_branch *field;
2054
2055 trace_assign_type(field, entry);
2056
2057 trace_seq_printf(s, "[%s] %s:%s:%d\n",
2058 field->correct ? " ok " : " MISS ",
2059 field->func,
2060 field->file,
2061 field->line);
2062 break;
2063 }
2064 case TRACE_USER_STACK: {
2065 struct userstack_entry *field;
2066
2067 trace_assign_type(field, entry);
2068
2069 ret = seq_print_userip_objs(field, s, sym_flags);
2070 if (!ret)
2071 return TRACE_TYPE_PARTIAL_LINE;
2072 ret = trace_seq_putc(s, '\n');
2073 if (!ret)
2074 return TRACE_TYPE_PARTIAL_LINE;
2075 break;
2076 }
1584 } 2077 }
1585 return TRACE_TYPE_HANDLED; 2078 return TRACE_TYPE_HANDLED;
1586} 2079}
@@ -1640,6 +2133,7 @@ static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
1640 break; 2133 break;
1641 } 2134 }
1642 case TRACE_SPECIAL: 2135 case TRACE_SPECIAL:
2136 case TRACE_USER_STACK:
1643 case TRACE_STACK: { 2137 case TRACE_STACK: {
1644 struct special_entry *field; 2138 struct special_entry *field;
1645 2139
@@ -1728,6 +2222,7 @@ static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
1728 break; 2222 break;
1729 } 2223 }
1730 case TRACE_SPECIAL: 2224 case TRACE_SPECIAL:
2225 case TRACE_USER_STACK:
1731 case TRACE_STACK: { 2226 case TRACE_STACK: {
1732 struct special_entry *field; 2227 struct special_entry *field;
1733 2228
@@ -1782,6 +2277,7 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
1782 break; 2277 break;
1783 } 2278 }
1784 case TRACE_SPECIAL: 2279 case TRACE_SPECIAL:
2280 case TRACE_USER_STACK:
1785 case TRACE_STACK: { 2281 case TRACE_STACK: {
1786 struct special_entry *field; 2282 struct special_entry *field;
1787 2283
@@ -1847,7 +2343,9 @@ static int s_show(struct seq_file *m, void *v)
1847 seq_printf(m, "# tracer: %s\n", iter->trace->name); 2343 seq_printf(m, "# tracer: %s\n", iter->trace->name);
1848 seq_puts(m, "#\n"); 2344 seq_puts(m, "#\n");
1849 } 2345 }
1850 if (iter->iter_flags & TRACE_FILE_LAT_FMT) { 2346 if (iter->trace && iter->trace->print_header)
2347 iter->trace->print_header(m);
2348 else if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
1851 /* print nothing if the buffers are empty */ 2349 /* print nothing if the buffers are empty */
1852 if (trace_empty(iter)) 2350 if (trace_empty(iter))
1853 return 0; 2351 return 0;
@@ -1899,6 +2397,15 @@ __tracing_open(struct inode *inode, struct file *file, int *ret)
1899 iter->trace = current_trace; 2397 iter->trace = current_trace;
1900 iter->pos = -1; 2398 iter->pos = -1;
1901 2399
2400 /* Notify the tracer early; before we stop tracing. */
2401 if (iter->trace && iter->trace->open)
2402 iter->trace->open(iter);
2403
2404 /* Annotate start of buffers if we had overruns */
2405 if (ring_buffer_overruns(iter->tr->buffer))
2406 iter->iter_flags |= TRACE_FILE_ANNOTATE;
2407
2408
1902 for_each_tracing_cpu(cpu) { 2409 for_each_tracing_cpu(cpu) {
1903 2410
1904 iter->buffer_iter[cpu] = 2411 iter->buffer_iter[cpu] =
@@ -1917,13 +2424,7 @@ __tracing_open(struct inode *inode, struct file *file, int *ret)
1917 m->private = iter; 2424 m->private = iter;
1918 2425
1919 /* stop the trace while dumping */ 2426 /* stop the trace while dumping */
1920 if (iter->tr->ctrl) { 2427 tracing_stop();
1921 tracer_enabled = 0;
1922 ftrace_function_enabled = 0;
1923 }
1924
1925 if (iter->trace && iter->trace->open)
1926 iter->trace->open(iter);
1927 2428
1928 mutex_unlock(&trace_types_lock); 2429 mutex_unlock(&trace_types_lock);
1929 2430
@@ -1966,14 +2467,7 @@ int tracing_release(struct inode *inode, struct file *file)
1966 iter->trace->close(iter); 2467 iter->trace->close(iter);
1967 2468
1968 /* reenable tracing if it was previously enabled */ 2469 /* reenable tracing if it was previously enabled */
1969 if (iter->tr->ctrl) { 2470 tracing_start();
1970 tracer_enabled = 1;
1971 /*
1972 * It is safe to enable function tracing even if it
1973 * isn't used
1974 */
1975 ftrace_function_enabled = 1;
1976 }
1977 mutex_unlock(&trace_types_lock); 2471 mutex_unlock(&trace_types_lock);
1978 2472
1979 seq_release(inode, file); 2473 seq_release(inode, file);
@@ -2189,13 +2683,16 @@ static struct file_operations tracing_cpumask_fops = {
2189}; 2683};
2190 2684
2191static ssize_t 2685static ssize_t
2192tracing_iter_ctrl_read(struct file *filp, char __user *ubuf, 2686tracing_trace_options_read(struct file *filp, char __user *ubuf,
2193 size_t cnt, loff_t *ppos) 2687 size_t cnt, loff_t *ppos)
2194{ 2688{
2689 int i;
2195 char *buf; 2690 char *buf;
2196 int r = 0; 2691 int r = 0;
2197 int len = 0; 2692 int len = 0;
2198 int i; 2693 u32 tracer_flags = current_trace->flags->val;
2694 struct tracer_opt *trace_opts = current_trace->flags->opts;
2695
2199 2696
2200 /* calulate max size */ 2697 /* calulate max size */
2201 for (i = 0; trace_options[i]; i++) { 2698 for (i = 0; trace_options[i]; i++) {
@@ -2203,6 +2700,15 @@ tracing_iter_ctrl_read(struct file *filp, char __user *ubuf,
2203 len += 3; /* "no" and space */ 2700 len += 3; /* "no" and space */
2204 } 2701 }
2205 2702
2703 /*
2704 * Increase the size with names of options specific
2705 * of the current tracer.
2706 */
2707 for (i = 0; trace_opts[i].name; i++) {
2708 len += strlen(trace_opts[i].name);
2709 len += 3; /* "no" and space */
2710 }
2711
2206 /* +2 for \n and \0 */ 2712 /* +2 for \n and \0 */
2207 buf = kmalloc(len + 2, GFP_KERNEL); 2713 buf = kmalloc(len + 2, GFP_KERNEL);
2208 if (!buf) 2714 if (!buf)
@@ -2215,6 +2721,15 @@ tracing_iter_ctrl_read(struct file *filp, char __user *ubuf,
2215 r += sprintf(buf + r, "no%s ", trace_options[i]); 2721 r += sprintf(buf + r, "no%s ", trace_options[i]);
2216 } 2722 }
2217 2723
2724 for (i = 0; trace_opts[i].name; i++) {
2725 if (tracer_flags & trace_opts[i].bit)
2726 r += sprintf(buf + r, "%s ",
2727 trace_opts[i].name);
2728 else
2729 r += sprintf(buf + r, "no%s ",
2730 trace_opts[i].name);
2731 }
2732
2218 r += sprintf(buf + r, "\n"); 2733 r += sprintf(buf + r, "\n");
2219 WARN_ON(r >= len + 2); 2734 WARN_ON(r >= len + 2);
2220 2735
@@ -2225,13 +2740,48 @@ tracing_iter_ctrl_read(struct file *filp, char __user *ubuf,
2225 return r; 2740 return r;
2226} 2741}
2227 2742
2743/* Try to assign a tracer specific option */
2744static int set_tracer_option(struct tracer *trace, char *cmp, int neg)
2745{
2746 struct tracer_flags *trace_flags = trace->flags;
2747 struct tracer_opt *opts = NULL;
2748 int ret = 0, i = 0;
2749 int len;
2750
2751 for (i = 0; trace_flags->opts[i].name; i++) {
2752 opts = &trace_flags->opts[i];
2753 len = strlen(opts->name);
2754
2755 if (strncmp(cmp, opts->name, len) == 0) {
2756 ret = trace->set_flag(trace_flags->val,
2757 opts->bit, !neg);
2758 break;
2759 }
2760 }
2761 /* Not found */
2762 if (!trace_flags->opts[i].name)
2763 return -EINVAL;
2764
2765 /* Refused to handle */
2766 if (ret)
2767 return ret;
2768
2769 if (neg)
2770 trace_flags->val &= ~opts->bit;
2771 else
2772 trace_flags->val |= opts->bit;
2773
2774 return 0;
2775}
2776
2228static ssize_t 2777static ssize_t
2229tracing_iter_ctrl_write(struct file *filp, const char __user *ubuf, 2778tracing_trace_options_write(struct file *filp, const char __user *ubuf,
2230 size_t cnt, loff_t *ppos) 2779 size_t cnt, loff_t *ppos)
2231{ 2780{
2232 char buf[64]; 2781 char buf[64];
2233 char *cmp = buf; 2782 char *cmp = buf;
2234 int neg = 0; 2783 int neg = 0;
2784 int ret;
2235 int i; 2785 int i;
2236 2786
2237 if (cnt >= sizeof(buf)) 2787 if (cnt >= sizeof(buf))
@@ -2258,11 +2808,13 @@ tracing_iter_ctrl_write(struct file *filp, const char __user *ubuf,
2258 break; 2808 break;
2259 } 2809 }
2260 } 2810 }
2261 /* 2811
2262 * If no option could be set, return an error: 2812 /* If no option could be set, test the specific tracer options */
2263 */ 2813 if (!trace_options[i]) {
2264 if (!trace_options[i]) 2814 ret = set_tracer_option(current_trace, cmp, neg);
2265 return -EINVAL; 2815 if (ret)
2816 return ret;
2817 }
2266 2818
2267 filp->f_pos += cnt; 2819 filp->f_pos += cnt;
2268 2820
@@ -2271,8 +2823,8 @@ tracing_iter_ctrl_write(struct file *filp, const char __user *ubuf,
2271 2823
2272static struct file_operations tracing_iter_fops = { 2824static struct file_operations tracing_iter_fops = {
2273 .open = tracing_open_generic, 2825 .open = tracing_open_generic,
2274 .read = tracing_iter_ctrl_read, 2826 .read = tracing_trace_options_read,
2275 .write = tracing_iter_ctrl_write, 2827 .write = tracing_trace_options_write,
2276}; 2828};
2277 2829
2278static const char readme_msg[] = 2830static const char readme_msg[] =
@@ -2286,9 +2838,9 @@ static const char readme_msg[] =
2286 "# echo sched_switch > /debug/tracing/current_tracer\n" 2838 "# echo sched_switch > /debug/tracing/current_tracer\n"
2287 "# cat /debug/tracing/current_tracer\n" 2839 "# cat /debug/tracing/current_tracer\n"
2288 "sched_switch\n" 2840 "sched_switch\n"
2289 "# cat /debug/tracing/iter_ctrl\n" 2841 "# cat /debug/tracing/trace_options\n"
2290 "noprint-parent nosym-offset nosym-addr noverbose\n" 2842 "noprint-parent nosym-offset nosym-addr noverbose\n"
2291 "# echo print-parent > /debug/tracing/iter_ctrl\n" 2843 "# echo print-parent > /debug/tracing/trace_options\n"
2292 "# echo 1 > /debug/tracing/tracing_enabled\n" 2844 "# echo 1 > /debug/tracing/tracing_enabled\n"
2293 "# cat /debug/tracing/trace > /tmp/trace.txt\n" 2845 "# cat /debug/tracing/trace > /tmp/trace.txt\n"
2294 "echo 0 > /debug/tracing/tracing_enabled\n" 2846 "echo 0 > /debug/tracing/tracing_enabled\n"
@@ -2311,11 +2863,10 @@ static ssize_t
2311tracing_ctrl_read(struct file *filp, char __user *ubuf, 2863tracing_ctrl_read(struct file *filp, char __user *ubuf,
2312 size_t cnt, loff_t *ppos) 2864 size_t cnt, loff_t *ppos)
2313{ 2865{
2314 struct trace_array *tr = filp->private_data;
2315 char buf[64]; 2866 char buf[64];
2316 int r; 2867 int r;
2317 2868
2318 r = sprintf(buf, "%ld\n", tr->ctrl); 2869 r = sprintf(buf, "%u\n", tracer_enabled);
2319 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 2870 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2320} 2871}
2321 2872
@@ -2343,16 +2894,18 @@ tracing_ctrl_write(struct file *filp, const char __user *ubuf,
2343 val = !!val; 2894 val = !!val;
2344 2895
2345 mutex_lock(&trace_types_lock); 2896 mutex_lock(&trace_types_lock);
2346 if (tr->ctrl ^ val) { 2897 if (tracer_enabled ^ val) {
2347 if (val) 2898 if (val) {
2348 tracer_enabled = 1; 2899 tracer_enabled = 1;
2349 else 2900 if (current_trace->start)
2901 current_trace->start(tr);
2902 tracing_start();
2903 } else {
2350 tracer_enabled = 0; 2904 tracer_enabled = 0;
2351 2905 tracing_stop();
2352 tr->ctrl = val; 2906 if (current_trace->stop)
2353 2907 current_trace->stop(tr);
2354 if (current_trace && current_trace->ctrl_update) 2908 }
2355 current_trace->ctrl_update(tr);
2356 } 2909 }
2357 mutex_unlock(&trace_types_lock); 2910 mutex_unlock(&trace_types_lock);
2358 2911
@@ -2378,29 +2931,11 @@ tracing_set_trace_read(struct file *filp, char __user *ubuf,
2378 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 2931 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2379} 2932}
2380 2933
2381static ssize_t 2934static int tracing_set_tracer(char *buf)
2382tracing_set_trace_write(struct file *filp, const char __user *ubuf,
2383 size_t cnt, loff_t *ppos)
2384{ 2935{
2385 struct trace_array *tr = &global_trace; 2936 struct trace_array *tr = &global_trace;
2386 struct tracer *t; 2937 struct tracer *t;
2387 char buf[max_tracer_type_len+1]; 2938 int ret = 0;
2388 int i;
2389 size_t ret;
2390
2391 ret = cnt;
2392
2393 if (cnt > max_tracer_type_len)
2394 cnt = max_tracer_type_len;
2395
2396 if (copy_from_user(&buf, ubuf, cnt))
2397 return -EFAULT;
2398
2399 buf[cnt] = 0;
2400
2401 /* strip ending whitespace. */
2402 for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
2403 buf[i] = 0;
2404 2939
2405 mutex_lock(&trace_types_lock); 2940 mutex_lock(&trace_types_lock);
2406 for (t = trace_types; t; t = t->next) { 2941 for (t = trace_types; t; t = t->next) {
@@ -2414,18 +2949,52 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf,
2414 if (t == current_trace) 2949 if (t == current_trace)
2415 goto out; 2950 goto out;
2416 2951
2952 trace_branch_disable();
2417 if (current_trace && current_trace->reset) 2953 if (current_trace && current_trace->reset)
2418 current_trace->reset(tr); 2954 current_trace->reset(tr);
2419 2955
2420 current_trace = t; 2956 current_trace = t;
2421 if (t->init) 2957 if (t->init) {
2422 t->init(tr); 2958 ret = t->init(tr);
2959 if (ret)
2960 goto out;
2961 }
2423 2962
2963 trace_branch_enable(tr);
2424 out: 2964 out:
2425 mutex_unlock(&trace_types_lock); 2965 mutex_unlock(&trace_types_lock);
2426 2966
2427 if (ret > 0) 2967 return ret;
2428 filp->f_pos += ret; 2968}
2969
2970static ssize_t
2971tracing_set_trace_write(struct file *filp, const char __user *ubuf,
2972 size_t cnt, loff_t *ppos)
2973{
2974 char buf[max_tracer_type_len+1];
2975 int i;
2976 size_t ret;
2977 int err;
2978
2979 ret = cnt;
2980
2981 if (cnt > max_tracer_type_len)
2982 cnt = max_tracer_type_len;
2983
2984 if (copy_from_user(&buf, ubuf, cnt))
2985 return -EFAULT;
2986
2987 buf[cnt] = 0;
2988
2989 /* strip ending whitespace. */
2990 for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
2991 buf[i] = 0;
2992
2993 err = tracing_set_tracer(buf);
2994 if (err)
2995 return err;
2996
2997 filp->f_pos += ret;
2429 2998
2430 return ret; 2999 return ret;
2431} 3000}
@@ -2492,6 +3061,10 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
2492 return -ENOMEM; 3061 return -ENOMEM;
2493 3062
2494 mutex_lock(&trace_types_lock); 3063 mutex_lock(&trace_types_lock);
3064
3065 /* trace pipe does not show start of buffer */
3066 cpus_setall(iter->started);
3067
2495 iter->tr = &global_trace; 3068 iter->tr = &global_trace;
2496 iter->trace = current_trace; 3069 iter->trace = current_trace;
2497 filp->private_data = iter; 3070 filp->private_data = iter;
@@ -2667,7 +3240,7 @@ tracing_entries_read(struct file *filp, char __user *ubuf,
2667 char buf[64]; 3240 char buf[64];
2668 int r; 3241 int r;
2669 3242
2670 r = sprintf(buf, "%lu\n", tr->entries); 3243 r = sprintf(buf, "%lu\n", tr->entries >> 10);
2671 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 3244 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2672} 3245}
2673 3246
@@ -2678,7 +3251,6 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
2678 unsigned long val; 3251 unsigned long val;
2679 char buf[64]; 3252 char buf[64];
2680 int ret, cpu; 3253 int ret, cpu;
2681 struct trace_array *tr = filp->private_data;
2682 3254
2683 if (cnt >= sizeof(buf)) 3255 if (cnt >= sizeof(buf))
2684 return -EINVAL; 3256 return -EINVAL;
@@ -2698,12 +3270,7 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
2698 3270
2699 mutex_lock(&trace_types_lock); 3271 mutex_lock(&trace_types_lock);
2700 3272
2701 if (tr->ctrl) { 3273 tracing_stop();
2702 cnt = -EBUSY;
2703 pr_info("ftrace: please disable tracing"
2704 " before modifying buffer size\n");
2705 goto out;
2706 }
2707 3274
2708 /* disable all cpu buffers */ 3275 /* disable all cpu buffers */
2709 for_each_tracing_cpu(cpu) { 3276 for_each_tracing_cpu(cpu) {
@@ -2713,6 +3280,9 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
2713 atomic_inc(&max_tr.data[cpu]->disabled); 3280 atomic_inc(&max_tr.data[cpu]->disabled);
2714 } 3281 }
2715 3282
3283 /* value is in KB */
3284 val <<= 10;
3285
2716 if (val != global_trace.entries) { 3286 if (val != global_trace.entries) {
2717 ret = ring_buffer_resize(global_trace.buffer, val); 3287 ret = ring_buffer_resize(global_trace.buffer, val);
2718 if (ret < 0) { 3288 if (ret < 0) {
@@ -2751,6 +3321,7 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
2751 atomic_dec(&max_tr.data[cpu]->disabled); 3321 atomic_dec(&max_tr.data[cpu]->disabled);
2752 } 3322 }
2753 3323
3324 tracing_start();
2754 max_tr.entries = global_trace.entries; 3325 max_tr.entries = global_trace.entries;
2755 mutex_unlock(&trace_types_lock); 3326 mutex_unlock(&trace_types_lock);
2756 3327
@@ -2773,9 +3344,8 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
2773{ 3344{
2774 char *buf; 3345 char *buf;
2775 char *end; 3346 char *end;
2776 struct trace_array *tr = &global_trace;
2777 3347
2778 if (!tr->ctrl || tracing_disabled) 3348 if (tracing_disabled)
2779 return -EINVAL; 3349 return -EINVAL;
2780 3350
2781 if (cnt > TRACE_BUF_SIZE) 3351 if (cnt > TRACE_BUF_SIZE)
@@ -2841,22 +3411,38 @@ static struct file_operations tracing_mark_fops = {
2841 3411
2842#ifdef CONFIG_DYNAMIC_FTRACE 3412#ifdef CONFIG_DYNAMIC_FTRACE
2843 3413
3414int __weak ftrace_arch_read_dyn_info(char *buf, int size)
3415{
3416 return 0;
3417}
3418
2844static ssize_t 3419static ssize_t
2845tracing_read_long(struct file *filp, char __user *ubuf, 3420tracing_read_dyn_info(struct file *filp, char __user *ubuf,
2846 size_t cnt, loff_t *ppos) 3421 size_t cnt, loff_t *ppos)
2847{ 3422{
3423 static char ftrace_dyn_info_buffer[1024];
3424 static DEFINE_MUTEX(dyn_info_mutex);
2848 unsigned long *p = filp->private_data; 3425 unsigned long *p = filp->private_data;
2849 char buf[64]; 3426 char *buf = ftrace_dyn_info_buffer;
3427 int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
2850 int r; 3428 int r;
2851 3429
2852 r = sprintf(buf, "%ld\n", *p); 3430 mutex_lock(&dyn_info_mutex);
3431 r = sprintf(buf, "%ld ", *p);
2853 3432
2854 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 3433 r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
3434 buf[r++] = '\n';
3435
3436 r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3437
3438 mutex_unlock(&dyn_info_mutex);
3439
3440 return r;
2855} 3441}
2856 3442
2857static struct file_operations tracing_read_long_fops = { 3443static struct file_operations tracing_dyn_info_fops = {
2858 .open = tracing_open_generic, 3444 .open = tracing_open_generic,
2859 .read = tracing_read_long, 3445 .read = tracing_read_dyn_info,
2860}; 3446};
2861#endif 3447#endif
2862 3448
@@ -2897,10 +3483,10 @@ static __init int tracer_init_debugfs(void)
2897 if (!entry) 3483 if (!entry)
2898 pr_warning("Could not create debugfs 'tracing_enabled' entry\n"); 3484 pr_warning("Could not create debugfs 'tracing_enabled' entry\n");
2899 3485
2900 entry = debugfs_create_file("iter_ctrl", 0644, d_tracer, 3486 entry = debugfs_create_file("trace_options", 0644, d_tracer,
2901 NULL, &tracing_iter_fops); 3487 NULL, &tracing_iter_fops);
2902 if (!entry) 3488 if (!entry)
2903 pr_warning("Could not create debugfs 'iter_ctrl' entry\n"); 3489 pr_warning("Could not create debugfs 'trace_options' entry\n");
2904 3490
2905 entry = debugfs_create_file("tracing_cpumask", 0644, d_tracer, 3491 entry = debugfs_create_file("tracing_cpumask", 0644, d_tracer,
2906 NULL, &tracing_cpumask_fops); 3492 NULL, &tracing_cpumask_fops);
@@ -2950,11 +3536,11 @@ static __init int tracer_init_debugfs(void)
2950 pr_warning("Could not create debugfs " 3536 pr_warning("Could not create debugfs "
2951 "'trace_pipe' entry\n"); 3537 "'trace_pipe' entry\n");
2952 3538
2953 entry = debugfs_create_file("trace_entries", 0644, d_tracer, 3539 entry = debugfs_create_file("buffer_size_kb", 0644, d_tracer,
2954 &global_trace, &tracing_entries_fops); 3540 &global_trace, &tracing_entries_fops);
2955 if (!entry) 3541 if (!entry)
2956 pr_warning("Could not create debugfs " 3542 pr_warning("Could not create debugfs "
2957 "'trace_entries' entry\n"); 3543 "'buffer_size_kb' entry\n");
2958 3544
2959 entry = debugfs_create_file("trace_marker", 0220, d_tracer, 3545 entry = debugfs_create_file("trace_marker", 0220, d_tracer,
2960 NULL, &tracing_mark_fops); 3546 NULL, &tracing_mark_fops);
@@ -2965,7 +3551,7 @@ static __init int tracer_init_debugfs(void)
2965#ifdef CONFIG_DYNAMIC_FTRACE 3551#ifdef CONFIG_DYNAMIC_FTRACE
2966 entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer, 3552 entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer,
2967 &ftrace_update_tot_cnt, 3553 &ftrace_update_tot_cnt,
2968 &tracing_read_long_fops); 3554 &tracing_dyn_info_fops);
2969 if (!entry) 3555 if (!entry)
2970 pr_warning("Could not create debugfs " 3556 pr_warning("Could not create debugfs "
2971 "'dyn_ftrace_total_info' entry\n"); 3557 "'dyn_ftrace_total_info' entry\n");
@@ -2988,7 +3574,7 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2988 unsigned long flags, irq_flags; 3574 unsigned long flags, irq_flags;
2989 int cpu, len = 0, size, pc; 3575 int cpu, len = 0, size, pc;
2990 3576
2991 if (!tr->ctrl || tracing_disabled) 3577 if (tracing_disabled)
2992 return 0; 3578 return 0;
2993 3579
2994 pc = preempt_count(); 3580 pc = preempt_count();
@@ -3046,7 +3632,8 @@ EXPORT_SYMBOL_GPL(__ftrace_printk);
3046static int trace_panic_handler(struct notifier_block *this, 3632static int trace_panic_handler(struct notifier_block *this,
3047 unsigned long event, void *unused) 3633 unsigned long event, void *unused)
3048{ 3634{
3049 ftrace_dump(); 3635 if (ftrace_dump_on_oops)
3636 ftrace_dump();
3050 return NOTIFY_OK; 3637 return NOTIFY_OK;
3051} 3638}
3052 3639
@@ -3062,7 +3649,8 @@ static int trace_die_handler(struct notifier_block *self,
3062{ 3649{
3063 switch (val) { 3650 switch (val) {
3064 case DIE_OOPS: 3651 case DIE_OOPS:
3065 ftrace_dump(); 3652 if (ftrace_dump_on_oops)
3653 ftrace_dump();
3066 break; 3654 break;
3067 default: 3655 default:
3068 break; 3656 break;
@@ -3103,7 +3691,6 @@ trace_printk_seq(struct trace_seq *s)
3103 trace_seq_reset(s); 3691 trace_seq_reset(s);
3104} 3692}
3105 3693
3106
3107void ftrace_dump(void) 3694void ftrace_dump(void)
3108{ 3695{
3109 static DEFINE_SPINLOCK(ftrace_dump_lock); 3696 static DEFINE_SPINLOCK(ftrace_dump_lock);
@@ -3128,6 +3715,9 @@ void ftrace_dump(void)
3128 atomic_inc(&global_trace.data[cpu]->disabled); 3715 atomic_inc(&global_trace.data[cpu]->disabled);
3129 } 3716 }
3130 3717
3718 /* don't look at user memory in panic mode */
3719 trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
3720
3131 printk(KERN_TRACE "Dumping ftrace buffer:\n"); 3721 printk(KERN_TRACE "Dumping ftrace buffer:\n");
3132 3722
3133 iter.tr = &global_trace; 3723 iter.tr = &global_trace;
@@ -3221,7 +3811,6 @@ __init static int tracer_alloc_buffers(void)
3221#endif 3811#endif
3222 3812
3223 /* All seems OK, enable tracing */ 3813 /* All seems OK, enable tracing */
3224 global_trace.ctrl = tracer_enabled;
3225 tracing_disabled = 0; 3814 tracing_disabled = 0;
3226 3815
3227 atomic_notifier_chain_register(&panic_notifier_list, 3816 atomic_notifier_chain_register(&panic_notifier_list,
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 8465ad052707..f96f4e787ff3 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -8,6 +8,7 @@
8#include <linux/ring_buffer.h> 8#include <linux/ring_buffer.h>
9#include <linux/mmiotrace.h> 9#include <linux/mmiotrace.h>
10#include <linux/ftrace.h> 10#include <linux/ftrace.h>
11#include <trace/boot.h>
11 12
12enum trace_type { 13enum trace_type {
13 __TRACE_FIRST_TYPE = 0, 14 __TRACE_FIRST_TYPE = 0,
@@ -21,7 +22,14 @@ enum trace_type {
21 TRACE_SPECIAL, 22 TRACE_SPECIAL,
22 TRACE_MMIO_RW, 23 TRACE_MMIO_RW,
23 TRACE_MMIO_MAP, 24 TRACE_MMIO_MAP,
24 TRACE_BOOT, 25 TRACE_BRANCH,
26 TRACE_BOOT_CALL,
27 TRACE_BOOT_RET,
28 TRACE_GRAPH_RET,
29 TRACE_GRAPH_ENT,
30 TRACE_USER_STACK,
31 TRACE_BTS,
32 TRACE_POWER,
25 33
26 __TRACE_LAST_TYPE 34 __TRACE_LAST_TYPE
27}; 35};
@@ -38,6 +46,7 @@ struct trace_entry {
38 unsigned char flags; 46 unsigned char flags;
39 unsigned char preempt_count; 47 unsigned char preempt_count;
40 int pid; 48 int pid;
49 int tgid;
41}; 50};
42 51
43/* 52/*
@@ -48,6 +57,18 @@ struct ftrace_entry {
48 unsigned long ip; 57 unsigned long ip;
49 unsigned long parent_ip; 58 unsigned long parent_ip;
50}; 59};
60
61/* Function call entry */
62struct ftrace_graph_ent_entry {
63 struct trace_entry ent;
64 struct ftrace_graph_ent graph_ent;
65};
66
67/* Function return entry */
68struct ftrace_graph_ret_entry {
69 struct trace_entry ent;
70 struct ftrace_graph_ret ret;
71};
51extern struct tracer boot_tracer; 72extern struct tracer boot_tracer;
52 73
53/* 74/*
@@ -85,6 +106,11 @@ struct stack_entry {
85 unsigned long caller[FTRACE_STACK_ENTRIES]; 106 unsigned long caller[FTRACE_STACK_ENTRIES];
86}; 107};
87 108
109struct userstack_entry {
110 struct trace_entry ent;
111 unsigned long caller[FTRACE_STACK_ENTRIES];
112};
113
88/* 114/*
89 * ftrace_printk entry: 115 * ftrace_printk entry:
90 */ 116 */
@@ -112,9 +138,35 @@ struct trace_mmiotrace_map {
112 struct mmiotrace_map map; 138 struct mmiotrace_map map;
113}; 139};
114 140
115struct trace_boot { 141struct trace_boot_call {
142 struct trace_entry ent;
143 struct boot_trace_call boot_call;
144};
145
146struct trace_boot_ret {
147 struct trace_entry ent;
148 struct boot_trace_ret boot_ret;
149};
150
151#define TRACE_FUNC_SIZE 30
152#define TRACE_FILE_SIZE 20
153struct trace_branch {
154 struct trace_entry ent;
155 unsigned line;
156 char func[TRACE_FUNC_SIZE+1];
157 char file[TRACE_FILE_SIZE+1];
158 char correct;
159};
160
161struct bts_entry {
162 struct trace_entry ent;
163 unsigned long from;
164 unsigned long to;
165};
166
167struct trace_power {
116 struct trace_entry ent; 168 struct trace_entry ent;
117 struct boot_trace initcall; 169 struct power_trace state_data;
118}; 170};
119 171
120/* 172/*
@@ -172,7 +224,6 @@ struct trace_iterator;
172struct trace_array { 224struct trace_array {
173 struct ring_buffer *buffer; 225 struct ring_buffer *buffer;
174 unsigned long entries; 226 unsigned long entries;
175 long ctrl;
176 int cpu; 227 int cpu;
177 cycle_t time_start; 228 cycle_t time_start;
178 struct task_struct *waiter; 229 struct task_struct *waiter;
@@ -212,13 +263,22 @@ extern void __ftrace_bad_type(void);
212 IF_ASSIGN(var, ent, struct ctx_switch_entry, 0); \ 263 IF_ASSIGN(var, ent, struct ctx_switch_entry, 0); \
213 IF_ASSIGN(var, ent, struct trace_field_cont, TRACE_CONT); \ 264 IF_ASSIGN(var, ent, struct trace_field_cont, TRACE_CONT); \
214 IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK); \ 265 IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK); \
266 IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\
215 IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \ 267 IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \
216 IF_ASSIGN(var, ent, struct special_entry, 0); \ 268 IF_ASSIGN(var, ent, struct special_entry, 0); \
217 IF_ASSIGN(var, ent, struct trace_mmiotrace_rw, \ 269 IF_ASSIGN(var, ent, struct trace_mmiotrace_rw, \
218 TRACE_MMIO_RW); \ 270 TRACE_MMIO_RW); \
219 IF_ASSIGN(var, ent, struct trace_mmiotrace_map, \ 271 IF_ASSIGN(var, ent, struct trace_mmiotrace_map, \
220 TRACE_MMIO_MAP); \ 272 TRACE_MMIO_MAP); \
221 IF_ASSIGN(var, ent, struct trace_boot, TRACE_BOOT); \ 273 IF_ASSIGN(var, ent, struct trace_boot_call, TRACE_BOOT_CALL);\
274 IF_ASSIGN(var, ent, struct trace_boot_ret, TRACE_BOOT_RET);\
275 IF_ASSIGN(var, ent, struct trace_branch, TRACE_BRANCH); \
276 IF_ASSIGN(var, ent, struct ftrace_graph_ent_entry, \
277 TRACE_GRAPH_ENT); \
278 IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry, \
279 TRACE_GRAPH_RET); \
280 IF_ASSIGN(var, ent, struct bts_entry, TRACE_BTS);\
281 IF_ASSIGN(var, ent, struct trace_power, TRACE_POWER); \
222 __ftrace_bad_type(); \ 282 __ftrace_bad_type(); \
223 } while (0) 283 } while (0)
224 284
@@ -229,29 +289,56 @@ enum print_line_t {
229 TRACE_TYPE_UNHANDLED = 2 /* Relay to other output functions */ 289 TRACE_TYPE_UNHANDLED = 2 /* Relay to other output functions */
230}; 290};
231 291
292
293/*
294 * An option specific to a tracer. This is a boolean value.
295 * The bit is the bit index that sets its value on the
296 * flags value in struct tracer_flags.
297 */
298struct tracer_opt {
299 const char *name; /* Will appear on the trace_options file */
300 u32 bit; /* Mask assigned in val field in tracer_flags */
301};
302
303/*
304 * The set of specific options for a tracer. Your tracer
305 * have to set the initial value of the flags val.
306 */
307struct tracer_flags {
308 u32 val;
309 struct tracer_opt *opts;
310};
311
312/* Makes more easy to define a tracer opt */
313#define TRACER_OPT(s, b) .name = #s, .bit = b
314
232/* 315/*
233 * A specific tracer, represented by methods that operate on a trace array: 316 * A specific tracer, represented by methods that operate on a trace array:
234 */ 317 */
235struct tracer { 318struct tracer {
236 const char *name; 319 const char *name;
237 void (*init)(struct trace_array *tr); 320 /* Your tracer should raise a warning if init fails */
321 int (*init)(struct trace_array *tr);
238 void (*reset)(struct trace_array *tr); 322 void (*reset)(struct trace_array *tr);
323 void (*start)(struct trace_array *tr);
324 void (*stop)(struct trace_array *tr);
239 void (*open)(struct trace_iterator *iter); 325 void (*open)(struct trace_iterator *iter);
240 void (*pipe_open)(struct trace_iterator *iter); 326 void (*pipe_open)(struct trace_iterator *iter);
241 void (*close)(struct trace_iterator *iter); 327 void (*close)(struct trace_iterator *iter);
242 void (*start)(struct trace_iterator *iter);
243 void (*stop)(struct trace_iterator *iter);
244 ssize_t (*read)(struct trace_iterator *iter, 328 ssize_t (*read)(struct trace_iterator *iter,
245 struct file *filp, char __user *ubuf, 329 struct file *filp, char __user *ubuf,
246 size_t cnt, loff_t *ppos); 330 size_t cnt, loff_t *ppos);
247 void (*ctrl_update)(struct trace_array *tr);
248#ifdef CONFIG_FTRACE_STARTUP_TEST 331#ifdef CONFIG_FTRACE_STARTUP_TEST
249 int (*selftest)(struct tracer *trace, 332 int (*selftest)(struct tracer *trace,
250 struct trace_array *tr); 333 struct trace_array *tr);
251#endif 334#endif
335 void (*print_header)(struct seq_file *m);
252 enum print_line_t (*print_line)(struct trace_iterator *iter); 336 enum print_line_t (*print_line)(struct trace_iterator *iter);
337 /* If you handled the flag setting, return 0 */
338 int (*set_flag)(u32 old_flags, u32 bit, int set);
253 struct tracer *next; 339 struct tracer *next;
254 int print_max; 340 int print_max;
341 struct tracer_flags *flags;
255}; 342};
256 343
257struct trace_seq { 344struct trace_seq {
@@ -279,8 +366,11 @@ struct trace_iterator {
279 unsigned long iter_flags; 366 unsigned long iter_flags;
280 loff_t pos; 367 loff_t pos;
281 long idx; 368 long idx;
369
370 cpumask_t started;
282}; 371};
283 372
373int tracing_is_enabled(void);
284void trace_wake_up(void); 374void trace_wake_up(void);
285void tracing_reset(struct trace_array *tr, int cpu); 375void tracing_reset(struct trace_array *tr, int cpu);
286int tracing_open_generic(struct inode *inode, struct file *filp); 376int tracing_open_generic(struct inode *inode, struct file *filp);
@@ -321,8 +411,17 @@ void trace_function(struct trace_array *tr,
321 unsigned long parent_ip, 411 unsigned long parent_ip,
322 unsigned long flags, int pc); 412 unsigned long flags, int pc);
323 413
414void trace_graph_return(struct ftrace_graph_ret *trace);
415void trace_graph_entry(struct ftrace_graph_ent *trace);
416void trace_bts(struct trace_array *tr,
417 unsigned long from,
418 unsigned long to);
419
324void tracing_start_cmdline_record(void); 420void tracing_start_cmdline_record(void);
325void tracing_stop_cmdline_record(void); 421void tracing_stop_cmdline_record(void);
422void tracing_sched_switch_assign_trace(struct trace_array *tr);
423void tracing_stop_sched_switch_record(void);
424void tracing_start_sched_switch_record(void);
326int register_tracer(struct tracer *type); 425int register_tracer(struct tracer *type);
327void unregister_tracer(struct tracer *type); 426void unregister_tracer(struct tracer *type);
328 427
@@ -358,6 +457,7 @@ struct tracer_switch_ops {
358 struct tracer_switch_ops *next; 457 struct tracer_switch_ops *next;
359}; 458};
360 459
460char *trace_find_cmdline(int pid);
361#endif /* CONFIG_CONTEXT_SWITCH_TRACER */ 461#endif /* CONFIG_CONTEXT_SWITCH_TRACER */
362 462
363#ifdef CONFIG_DYNAMIC_FTRACE 463#ifdef CONFIG_DYNAMIC_FTRACE
@@ -383,12 +483,18 @@ extern int trace_selftest_startup_sched_switch(struct tracer *trace,
383 struct trace_array *tr); 483 struct trace_array *tr);
384extern int trace_selftest_startup_sysprof(struct tracer *trace, 484extern int trace_selftest_startup_sysprof(struct tracer *trace,
385 struct trace_array *tr); 485 struct trace_array *tr);
486extern int trace_selftest_startup_branch(struct tracer *trace,
487 struct trace_array *tr);
386#endif /* CONFIG_FTRACE_STARTUP_TEST */ 488#endif /* CONFIG_FTRACE_STARTUP_TEST */
387 489
388extern void *head_page(struct trace_array_cpu *data); 490extern void *head_page(struct trace_array_cpu *data);
389extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...); 491extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...);
390extern void trace_seq_print_cont(struct trace_seq *s, 492extern void trace_seq_print_cont(struct trace_seq *s,
391 struct trace_iterator *iter); 493 struct trace_iterator *iter);
494
495extern int
496seq_print_ip_sym(struct trace_seq *s, unsigned long ip,
497 unsigned long sym_flags);
392extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, 498extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
393 size_t cnt); 499 size_t cnt);
394extern long ns2usecs(cycle_t nsec); 500extern long ns2usecs(cycle_t nsec);
@@ -396,6 +502,17 @@ extern int trace_vprintk(unsigned long ip, const char *fmt, va_list args);
396 502
397extern unsigned long trace_flags; 503extern unsigned long trace_flags;
398 504
505/* Standard output formatting function used for function return traces */
506#ifdef CONFIG_FUNCTION_GRAPH_TRACER
507extern enum print_line_t print_graph_function(struct trace_iterator *iter);
508#else
509static inline enum print_line_t
510print_graph_function(struct trace_iterator *iter)
511{
512 return TRACE_TYPE_UNHANDLED;
513}
514#endif
515
399/* 516/*
400 * trace_iterator_flags is an enumeration that defines bit 517 * trace_iterator_flags is an enumeration that defines bit
401 * positions into trace_flags that controls the output. 518 * positions into trace_flags that controls the output.
@@ -415,8 +532,92 @@ enum trace_iterator_flags {
415 TRACE_ITER_STACKTRACE = 0x100, 532 TRACE_ITER_STACKTRACE = 0x100,
416 TRACE_ITER_SCHED_TREE = 0x200, 533 TRACE_ITER_SCHED_TREE = 0x200,
417 TRACE_ITER_PRINTK = 0x400, 534 TRACE_ITER_PRINTK = 0x400,
535 TRACE_ITER_PREEMPTONLY = 0x800,
536 TRACE_ITER_BRANCH = 0x1000,
537 TRACE_ITER_ANNOTATE = 0x2000,
538 TRACE_ITER_USERSTACKTRACE = 0x4000,
539 TRACE_ITER_SYM_USEROBJ = 0x8000
418}; 540};
419 541
542/*
543 * TRACE_ITER_SYM_MASK masks the options in trace_flags that
544 * control the output of kernel symbols.
545 */
546#define TRACE_ITER_SYM_MASK \
547 (TRACE_ITER_PRINT_PARENT|TRACE_ITER_SYM_OFFSET|TRACE_ITER_SYM_ADDR)
548
420extern struct tracer nop_trace; 549extern struct tracer nop_trace;
421 550
551/**
552 * ftrace_preempt_disable - disable preemption scheduler safe
553 *
554 * When tracing can happen inside the scheduler, there exists
555 * cases that the tracing might happen before the need_resched
556 * flag is checked. If this happens and the tracer calls
557 * preempt_enable (after a disable), a schedule might take place
558 * causing an infinite recursion.
559 *
560 * To prevent this, we read the need_recshed flag before
561 * disabling preemption. When we want to enable preemption we
562 * check the flag, if it is set, then we call preempt_enable_no_resched.
563 * Otherwise, we call preempt_enable.
564 *
565 * The rational for doing the above is that if need resched is set
566 * and we have yet to reschedule, we are either in an atomic location
567 * (where we do not need to check for scheduling) or we are inside
568 * the scheduler and do not want to resched.
569 */
570static inline int ftrace_preempt_disable(void)
571{
572 int resched;
573
574 resched = need_resched();
575 preempt_disable_notrace();
576
577 return resched;
578}
579
580/**
581 * ftrace_preempt_enable - enable preemption scheduler safe
582 * @resched: the return value from ftrace_preempt_disable
583 *
584 * This is a scheduler safe way to enable preemption and not miss
585 * any preemption checks. The disabled saved the state of preemption.
586 * If resched is set, then we were either inside an atomic or
587 * are inside the scheduler (we would have already scheduled
588 * otherwise). In this case, we do not want to call normal
589 * preempt_enable, but preempt_enable_no_resched instead.
590 */
591static inline void ftrace_preempt_enable(int resched)
592{
593 if (resched)
594 preempt_enable_no_resched_notrace();
595 else
596 preempt_enable_notrace();
597}
598
599#ifdef CONFIG_BRANCH_TRACER
600extern int enable_branch_tracing(struct trace_array *tr);
601extern void disable_branch_tracing(void);
602static inline int trace_branch_enable(struct trace_array *tr)
603{
604 if (trace_flags & TRACE_ITER_BRANCH)
605 return enable_branch_tracing(tr);
606 return 0;
607}
608static inline void trace_branch_disable(void)
609{
610 /* due to races, always disable */
611 disable_branch_tracing();
612}
613#else
614static inline int trace_branch_enable(struct trace_array *tr)
615{
616 return 0;
617}
618static inline void trace_branch_disable(void)
619{
620}
621#endif /* CONFIG_BRANCH_TRACER */
622
422#endif /* _LINUX_KERNEL_TRACE_H */ 623#endif /* _LINUX_KERNEL_TRACE_H */
diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
index d0a5e50eeff2..a4fa2c57e34e 100644
--- a/kernel/trace/trace_boot.c
+++ b/kernel/trace/trace_boot.c
@@ -13,73 +13,117 @@
13#include "trace.h" 13#include "trace.h"
14 14
15static struct trace_array *boot_trace; 15static struct trace_array *boot_trace;
16static int trace_boot_enabled; 16static bool pre_initcalls_finished;
17 17
18 18/* Tells the boot tracer that the pre_smp_initcalls are finished.
19/* Should be started after do_pre_smp_initcalls() in init/main.c */ 19 * So we are ready .
20 * It doesn't enable sched events tracing however.
21 * You have to call enable_boot_trace to do so.
22 */
20void start_boot_trace(void) 23void start_boot_trace(void)
21{ 24{
22 trace_boot_enabled = 1; 25 pre_initcalls_finished = true;
23} 26}
24 27
25void stop_boot_trace(void) 28void enable_boot_trace(void)
26{ 29{
27 trace_boot_enabled = 0; 30 if (pre_initcalls_finished)
31 tracing_start_sched_switch_record();
28} 32}
29 33
30void reset_boot_trace(struct trace_array *tr) 34void disable_boot_trace(void)
31{ 35{
32 stop_boot_trace(); 36 if (pre_initcalls_finished)
37 tracing_stop_sched_switch_record();
33} 38}
34 39
35static void boot_trace_init(struct trace_array *tr) 40static void reset_boot_trace(struct trace_array *tr)
36{ 41{
37 int cpu; 42 int cpu;
38 boot_trace = tr;
39 43
40 trace_boot_enabled = 0; 44 tr->time_start = ftrace_now(tr->cpu);
45
46 for_each_online_cpu(cpu)
47 tracing_reset(tr, cpu);
48}
49
50static int boot_trace_init(struct trace_array *tr)
51{
52 int cpu;
53 boot_trace = tr;
41 54
42 for_each_cpu_mask(cpu, cpu_possible_map) 55 for_each_cpu_mask(cpu, cpu_possible_map)
43 tracing_reset(tr, cpu); 56 tracing_reset(tr, cpu);
57
58 tracing_sched_switch_assign_trace(tr);
59 return 0;
44} 60}
45 61
46static void boot_trace_ctrl_update(struct trace_array *tr) 62static enum print_line_t
63initcall_call_print_line(struct trace_iterator *iter)
47{ 64{
48 if (tr->ctrl) 65 struct trace_entry *entry = iter->ent;
49 start_boot_trace(); 66 struct trace_seq *s = &iter->seq;
67 struct trace_boot_call *field;
68 struct boot_trace_call *call;
69 u64 ts;
70 unsigned long nsec_rem;
71 int ret;
72
73 trace_assign_type(field, entry);
74 call = &field->boot_call;
75 ts = iter->ts;
76 nsec_rem = do_div(ts, 1000000000);
77
78 ret = trace_seq_printf(s, "[%5ld.%09ld] calling %s @ %i\n",
79 (unsigned long)ts, nsec_rem, call->func, call->caller);
80
81 if (!ret)
82 return TRACE_TYPE_PARTIAL_LINE;
50 else 83 else
51 stop_boot_trace(); 84 return TRACE_TYPE_HANDLED;
52} 85}
53 86
54static enum print_line_t initcall_print_line(struct trace_iterator *iter) 87static enum print_line_t
88initcall_ret_print_line(struct trace_iterator *iter)
55{ 89{
56 int ret;
57 struct trace_entry *entry = iter->ent; 90 struct trace_entry *entry = iter->ent;
58 struct trace_boot *field = (struct trace_boot *)entry;
59 struct boot_trace *it = &field->initcall;
60 struct trace_seq *s = &iter->seq; 91 struct trace_seq *s = &iter->seq;
61 struct timespec calltime = ktime_to_timespec(it->calltime); 92 struct trace_boot_ret *field;
62 struct timespec rettime = ktime_to_timespec(it->rettime); 93 struct boot_trace_ret *init_ret;
63 94 u64 ts;
64 if (entry->type == TRACE_BOOT) { 95 unsigned long nsec_rem;
65 ret = trace_seq_printf(s, "[%5ld.%09ld] calling %s @ %i\n", 96 int ret;
66 calltime.tv_sec, 97
67 calltime.tv_nsec, 98 trace_assign_type(field, entry);
68 it->func, it->caller); 99 init_ret = &field->boot_ret;
69 if (!ret) 100 ts = iter->ts;
70 return TRACE_TYPE_PARTIAL_LINE; 101 nsec_rem = do_div(ts, 1000000000);
71 102
72 ret = trace_seq_printf(s, "[%5ld.%09ld] initcall %s " 103 ret = trace_seq_printf(s, "[%5ld.%09ld] initcall %s "
73 "returned %d after %lld msecs\n", 104 "returned %d after %llu msecs\n",
74 rettime.tv_sec, 105 (unsigned long) ts,
75 rettime.tv_nsec, 106 nsec_rem,
76 it->func, it->result, it->duration); 107 init_ret->func, init_ret->result, init_ret->duration);
77 108
78 if (!ret) 109 if (!ret)
79 return TRACE_TYPE_PARTIAL_LINE; 110 return TRACE_TYPE_PARTIAL_LINE;
111 else
80 return TRACE_TYPE_HANDLED; 112 return TRACE_TYPE_HANDLED;
113}
114
115static enum print_line_t initcall_print_line(struct trace_iterator *iter)
116{
117 struct trace_entry *entry = iter->ent;
118
119 switch (entry->type) {
120 case TRACE_BOOT_CALL:
121 return initcall_call_print_line(iter);
122 case TRACE_BOOT_RET:
123 return initcall_ret_print_line(iter);
124 default:
125 return TRACE_TYPE_UNHANDLED;
81 } 126 }
82 return TRACE_TYPE_UNHANDLED;
83} 127}
84 128
85struct tracer boot_tracer __read_mostly = 129struct tracer boot_tracer __read_mostly =
@@ -87,27 +131,53 @@ struct tracer boot_tracer __read_mostly =
87 .name = "initcall", 131 .name = "initcall",
88 .init = boot_trace_init, 132 .init = boot_trace_init,
89 .reset = reset_boot_trace, 133 .reset = reset_boot_trace,
90 .ctrl_update = boot_trace_ctrl_update,
91 .print_line = initcall_print_line, 134 .print_line = initcall_print_line,
92}; 135};
93 136
94void trace_boot(struct boot_trace *it, initcall_t fn) 137void trace_boot_call(struct boot_trace_call *bt, initcall_t fn)
95{ 138{
96 struct ring_buffer_event *event; 139 struct ring_buffer_event *event;
97 struct trace_boot *entry; 140 struct trace_boot_call *entry;
98 struct trace_array_cpu *data;
99 unsigned long irq_flags; 141 unsigned long irq_flags;
100 struct trace_array *tr = boot_trace; 142 struct trace_array *tr = boot_trace;
101 143
102 if (!trace_boot_enabled) 144 if (!pre_initcalls_finished)
103 return; 145 return;
104 146
105 /* Get its name now since this function could 147 /* Get its name now since this function could
106 * disappear because it is in the .init section. 148 * disappear because it is in the .init section.
107 */ 149 */
108 sprint_symbol(it->func, (unsigned long)fn); 150 sprint_symbol(bt->func, (unsigned long)fn);
151 preempt_disable();
152
153 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
154 &irq_flags);
155 if (!event)
156 goto out;
157 entry = ring_buffer_event_data(event);
158 tracing_generic_entry_update(&entry->ent, 0, 0);
159 entry->ent.type = TRACE_BOOT_CALL;
160 entry->boot_call = *bt;
161 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
162
163 trace_wake_up();
164
165 out:
166 preempt_enable();
167}
168
169void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn)
170{
171 struct ring_buffer_event *event;
172 struct trace_boot_ret *entry;
173 unsigned long irq_flags;
174 struct trace_array *tr = boot_trace;
175
176 if (!pre_initcalls_finished)
177 return;
178
179 sprint_symbol(bt->func, (unsigned long)fn);
109 preempt_disable(); 180 preempt_disable();
110 data = tr->data[smp_processor_id()];
111 181
112 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), 182 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
113 &irq_flags); 183 &irq_flags);
@@ -115,8 +185,8 @@ void trace_boot(struct boot_trace *it, initcall_t fn)
115 goto out; 185 goto out;
116 entry = ring_buffer_event_data(event); 186 entry = ring_buffer_event_data(event);
117 tracing_generic_entry_update(&entry->ent, 0, 0); 187 tracing_generic_entry_update(&entry->ent, 0, 0);
118 entry->ent.type = TRACE_BOOT; 188 entry->ent.type = TRACE_BOOT_RET;
119 entry->initcall = *it; 189 entry->boot_ret = *bt;
120 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 190 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
121 191
122 trace_wake_up(); 192 trace_wake_up();
diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c
new file mode 100644
index 000000000000..bc972753568d
--- /dev/null
+++ b/kernel/trace/trace_branch.c
@@ -0,0 +1,342 @@
1/*
2 * unlikely profiler
3 *
4 * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
5 */
6#include <linux/kallsyms.h>
7#include <linux/seq_file.h>
8#include <linux/spinlock.h>
9#include <linux/irqflags.h>
10#include <linux/debugfs.h>
11#include <linux/uaccess.h>
12#include <linux/module.h>
13#include <linux/ftrace.h>
14#include <linux/hash.h>
15#include <linux/fs.h>
16#include <asm/local.h>
17#include "trace.h"
18
19#ifdef CONFIG_BRANCH_TRACER
20
21static int branch_tracing_enabled __read_mostly;
22static DEFINE_MUTEX(branch_tracing_mutex);
23static struct trace_array *branch_tracer;
24
25static void
26probe_likely_condition(struct ftrace_branch_data *f, int val, int expect)
27{
28 struct trace_array *tr = branch_tracer;
29 struct ring_buffer_event *event;
30 struct trace_branch *entry;
31 unsigned long flags, irq_flags;
32 int cpu, pc;
33 const char *p;
34
35 /*
36 * I would love to save just the ftrace_likely_data pointer, but
37 * this code can also be used by modules. Ugly things can happen
38 * if the module is unloaded, and then we go and read the
39 * pointer. This is slower, but much safer.
40 */
41
42 if (unlikely(!tr))
43 return;
44
45 raw_local_irq_save(flags);
46 cpu = raw_smp_processor_id();
47 if (atomic_inc_return(&tr->data[cpu]->disabled) != 1)
48 goto out;
49
50 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
51 &irq_flags);
52 if (!event)
53 goto out;
54
55 pc = preempt_count();
56 entry = ring_buffer_event_data(event);
57 tracing_generic_entry_update(&entry->ent, flags, pc);
58 entry->ent.type = TRACE_BRANCH;
59
60 /* Strip off the path, only save the file */
61 p = f->file + strlen(f->file);
62 while (p >= f->file && *p != '/')
63 p--;
64 p++;
65
66 strncpy(entry->func, f->func, TRACE_FUNC_SIZE);
67 strncpy(entry->file, p, TRACE_FILE_SIZE);
68 entry->func[TRACE_FUNC_SIZE] = 0;
69 entry->file[TRACE_FILE_SIZE] = 0;
70 entry->line = f->line;
71 entry->correct = val == expect;
72
73 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
74
75 out:
76 atomic_dec(&tr->data[cpu]->disabled);
77 raw_local_irq_restore(flags);
78}
79
80static inline
81void trace_likely_condition(struct ftrace_branch_data *f, int val, int expect)
82{
83 if (!branch_tracing_enabled)
84 return;
85
86 probe_likely_condition(f, val, expect);
87}
88
89int enable_branch_tracing(struct trace_array *tr)
90{
91 int ret = 0;
92
93 mutex_lock(&branch_tracing_mutex);
94 branch_tracer = tr;
95 /*
96 * Must be seen before enabling. The reader is a condition
97 * where we do not need a matching rmb()
98 */
99 smp_wmb();
100 branch_tracing_enabled++;
101 mutex_unlock(&branch_tracing_mutex);
102
103 return ret;
104}
105
106void disable_branch_tracing(void)
107{
108 mutex_lock(&branch_tracing_mutex);
109
110 if (!branch_tracing_enabled)
111 goto out_unlock;
112
113 branch_tracing_enabled--;
114
115 out_unlock:
116 mutex_unlock(&branch_tracing_mutex);
117}
118
119static void start_branch_trace(struct trace_array *tr)
120{
121 enable_branch_tracing(tr);
122}
123
124static void stop_branch_trace(struct trace_array *tr)
125{
126 disable_branch_tracing();
127}
128
129static int branch_trace_init(struct trace_array *tr)
130{
131 int cpu;
132
133 for_each_online_cpu(cpu)
134 tracing_reset(tr, cpu);
135
136 start_branch_trace(tr);
137 return 0;
138}
139
140static void branch_trace_reset(struct trace_array *tr)
141{
142 stop_branch_trace(tr);
143}
144
145struct tracer branch_trace __read_mostly =
146{
147 .name = "branch",
148 .init = branch_trace_init,
149 .reset = branch_trace_reset,
150#ifdef CONFIG_FTRACE_SELFTEST
151 .selftest = trace_selftest_startup_branch,
152#endif
153};
154
155__init static int init_branch_trace(void)
156{
157 return register_tracer(&branch_trace);
158}
159
160device_initcall(init_branch_trace);
161#else
162static inline
163void trace_likely_condition(struct ftrace_branch_data *f, int val, int expect)
164{
165}
166#endif /* CONFIG_BRANCH_TRACER */
167
168void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect)
169{
170 /*
171 * I would love to have a trace point here instead, but the
172 * trace point code is so inundated with unlikely and likely
173 * conditions that the recursive nightmare that exists is too
174 * much to try to get working. At least for now.
175 */
176 trace_likely_condition(f, val, expect);
177
178 /* FIXME: Make this atomic! */
179 if (val == expect)
180 f->correct++;
181 else
182 f->incorrect++;
183}
184EXPORT_SYMBOL(ftrace_likely_update);
185
186struct ftrace_pointer {
187 void *start;
188 void *stop;
189 int hit;
190};
191
192static void *
193t_next(struct seq_file *m, void *v, loff_t *pos)
194{
195 const struct ftrace_pointer *f = m->private;
196 struct ftrace_branch_data *p = v;
197
198 (*pos)++;
199
200 if (v == (void *)1)
201 return f->start;
202
203 ++p;
204
205 if ((void *)p >= (void *)f->stop)
206 return NULL;
207
208 return p;
209}
210
211static void *t_start(struct seq_file *m, loff_t *pos)
212{
213 void *t = (void *)1;
214 loff_t l = 0;
215
216 for (; t && l < *pos; t = t_next(m, t, &l))
217 ;
218
219 return t;
220}
221
222static void t_stop(struct seq_file *m, void *p)
223{
224}
225
226static int t_show(struct seq_file *m, void *v)
227{
228 const struct ftrace_pointer *fp = m->private;
229 struct ftrace_branch_data *p = v;
230 const char *f;
231 long percent;
232
233 if (v == (void *)1) {
234 if (fp->hit)
235 seq_printf(m, " miss hit %% ");
236 else
237 seq_printf(m, " correct incorrect %% ");
238 seq_printf(m, " Function "
239 " File Line\n"
240 " ------- --------- - "
241 " -------- "
242 " ---- ----\n");
243 return 0;
244 }
245
246 /* Only print the file, not the path */
247 f = p->file + strlen(p->file);
248 while (f >= p->file && *f != '/')
249 f--;
250 f++;
251
252 /*
253 * The miss is overlayed on correct, and hit on incorrect.
254 */
255 if (p->correct) {
256 percent = p->incorrect * 100;
257 percent /= p->correct + p->incorrect;
258 } else
259 percent = p->incorrect ? 100 : -1;
260
261 seq_printf(m, "%8lu %8lu ", p->correct, p->incorrect);
262 if (percent < 0)
263 seq_printf(m, " X ");
264 else
265 seq_printf(m, "%3ld ", percent);
266 seq_printf(m, "%-30.30s %-20.20s %d\n", p->func, f, p->line);
267 return 0;
268}
269
270static struct seq_operations tracing_likely_seq_ops = {
271 .start = t_start,
272 .next = t_next,
273 .stop = t_stop,
274 .show = t_show,
275};
276
277static int tracing_branch_open(struct inode *inode, struct file *file)
278{
279 int ret;
280
281 ret = seq_open(file, &tracing_likely_seq_ops);
282 if (!ret) {
283 struct seq_file *m = file->private_data;
284 m->private = (void *)inode->i_private;
285 }
286
287 return ret;
288}
289
290static const struct file_operations tracing_branch_fops = {
291 .open = tracing_branch_open,
292 .read = seq_read,
293 .llseek = seq_lseek,
294};
295
296#ifdef CONFIG_PROFILE_ALL_BRANCHES
297extern unsigned long __start_branch_profile[];
298extern unsigned long __stop_branch_profile[];
299
300static const struct ftrace_pointer ftrace_branch_pos = {
301 .start = __start_branch_profile,
302 .stop = __stop_branch_profile,
303 .hit = 1,
304};
305
306#endif /* CONFIG_PROFILE_ALL_BRANCHES */
307
308extern unsigned long __start_annotated_branch_profile[];
309extern unsigned long __stop_annotated_branch_profile[];
310
311static const struct ftrace_pointer ftrace_annotated_branch_pos = {
312 .start = __start_annotated_branch_profile,
313 .stop = __stop_annotated_branch_profile,
314};
315
316static __init int ftrace_branch_init(void)
317{
318 struct dentry *d_tracer;
319 struct dentry *entry;
320
321 d_tracer = tracing_init_dentry();
322
323 entry = debugfs_create_file("profile_annotated_branch", 0444, d_tracer,
324 (void *)&ftrace_annotated_branch_pos,
325 &tracing_branch_fops);
326 if (!entry)
327 pr_warning("Could not create debugfs "
328 "'profile_annotatet_branch' entry\n");
329
330#ifdef CONFIG_PROFILE_ALL_BRANCHES
331 entry = debugfs_create_file("profile_branch", 0444, d_tracer,
332 (void *)&ftrace_branch_pos,
333 &tracing_branch_fops);
334 if (!entry)
335 pr_warning("Could not create debugfs"
336 " 'profile_branch' entry\n");
337#endif
338
339 return 0;
340}
341
342device_initcall(ftrace_branch_init);
diff --git a/kernel/trace/trace_bts.c b/kernel/trace/trace_bts.c
new file mode 100644
index 000000000000..23b76e4690ef
--- /dev/null
+++ b/kernel/trace/trace_bts.c
@@ -0,0 +1,276 @@
1/*
2 * BTS tracer
3 *
4 * Copyright (C) 2008 Markus Metzger <markus.t.metzger@gmail.com>
5 *
6 */
7
8#include <linux/module.h>
9#include <linux/fs.h>
10#include <linux/debugfs.h>
11#include <linux/ftrace.h>
12#include <linux/kallsyms.h>
13
14#include <asm/ds.h>
15
16#include "trace.h"
17
18
19#define SIZEOF_BTS (1 << 13)
20
21static DEFINE_PER_CPU(struct bts_tracer *, tracer);
22static DEFINE_PER_CPU(unsigned char[SIZEOF_BTS], buffer);
23
24#define this_tracer per_cpu(tracer, smp_processor_id())
25#define this_buffer per_cpu(buffer, smp_processor_id())
26
27
28/*
29 * Information to interpret a BTS record.
30 * This will go into an in-kernel BTS interface.
31 */
32static unsigned char sizeof_field;
33static unsigned long debugctl_mask;
34
35#define sizeof_bts (3 * sizeof_field)
36
37static void bts_trace_cpuinit(struct cpuinfo_x86 *c)
38{
39 switch (c->x86) {
40 case 0x6:
41 switch (c->x86_model) {
42 case 0x0 ... 0xC:
43 break;
44 case 0xD:
45 case 0xE: /* Pentium M */
46 sizeof_field = sizeof(long);
47 debugctl_mask = (1<<6)|(1<<7);
48 break;
49 default:
50 sizeof_field = 8;
51 debugctl_mask = (1<<6)|(1<<7);
52 break;
53 }
54 break;
55 case 0xF:
56 switch (c->x86_model) {
57 case 0x0:
58 case 0x1:
59 case 0x2: /* Netburst */
60 sizeof_field = sizeof(long);
61 debugctl_mask = (1<<2)|(1<<3);
62 break;
63 default:
64 /* sorry, don't know about them */
65 break;
66 }
67 break;
68 default:
69 /* sorry, don't know about them */
70 break;
71 }
72}
73
74static inline void bts_enable(void)
75{
76 unsigned long debugctl;
77
78 rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
79 wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl | debugctl_mask);
80}
81
82static inline void bts_disable(void)
83{
84 unsigned long debugctl;
85
86 rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
87 wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl & ~debugctl_mask);
88}
89
90static void bts_trace_reset(struct trace_array *tr)
91{
92 int cpu;
93
94 tr->time_start = ftrace_now(tr->cpu);
95
96 for_each_online_cpu(cpu)
97 tracing_reset(tr, cpu);
98}
99
100static void bts_trace_start_cpu(void *arg)
101{
102 this_tracer =
103 ds_request_bts(/* task = */ NULL, this_buffer, SIZEOF_BTS,
104 /* ovfl = */ NULL, /* th = */ (size_t)-1);
105 if (IS_ERR(this_tracer)) {
106 this_tracer = NULL;
107 return;
108 }
109
110 bts_enable();
111}
112
113static void bts_trace_start(struct trace_array *tr)
114{
115 int cpu;
116
117 bts_trace_reset(tr);
118
119 for_each_cpu_mask(cpu, cpu_possible_map)
120 smp_call_function_single(cpu, bts_trace_start_cpu, NULL, 1);
121}
122
123static void bts_trace_stop_cpu(void *arg)
124{
125 if (this_tracer) {
126 bts_disable();
127
128 ds_release_bts(this_tracer);
129 this_tracer = NULL;
130 }
131}
132
133static void bts_trace_stop(struct trace_array *tr)
134{
135 int cpu;
136
137 for_each_cpu_mask(cpu, cpu_possible_map)
138 smp_call_function_single(cpu, bts_trace_stop_cpu, NULL, 1);
139}
140
141static int bts_trace_init(struct trace_array *tr)
142{
143 bts_trace_cpuinit(&boot_cpu_data);
144 bts_trace_reset(tr);
145 bts_trace_start(tr);
146
147 return 0;
148}
149
150static void bts_trace_print_header(struct seq_file *m)
151{
152#ifdef __i386__
153 seq_puts(m, "# CPU# FROM TO FUNCTION\n");
154 seq_puts(m, "# | | | |\n");
155#else
156 seq_puts(m,
157 "# CPU# FROM TO FUNCTION\n");
158 seq_puts(m,
159 "# | | | |\n");
160#endif
161}
162
163static enum print_line_t bts_trace_print_line(struct trace_iterator *iter)
164{
165 struct trace_entry *entry = iter->ent;
166 struct trace_seq *seq = &iter->seq;
167 struct bts_entry *it;
168
169 trace_assign_type(it, entry);
170
171 if (entry->type == TRACE_BTS) {
172 int ret;
173#ifdef CONFIG_KALLSYMS
174 char function[KSYM_SYMBOL_LEN];
175 sprint_symbol(function, it->from);
176#else
177 char *function = "<unknown>";
178#endif
179
180 ret = trace_seq_printf(seq, "%4d 0x%lx -> 0x%lx [%s]\n",
181 entry->cpu, it->from, it->to, function);
182 if (!ret)
183 return TRACE_TYPE_PARTIAL_LINE;;
184 return TRACE_TYPE_HANDLED;
185 }
186 return TRACE_TYPE_UNHANDLED;
187}
188
189void trace_bts(struct trace_array *tr, unsigned long from, unsigned long to)
190{
191 struct ring_buffer_event *event;
192 struct bts_entry *entry;
193 unsigned long irq;
194
195 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), &irq);
196 if (!event)
197 return;
198 entry = ring_buffer_event_data(event);
199 tracing_generic_entry_update(&entry->ent, 0, from);
200 entry->ent.type = TRACE_BTS;
201 entry->ent.cpu = smp_processor_id();
202 entry->from = from;
203 entry->to = to;
204 ring_buffer_unlock_commit(tr->buffer, event, irq);
205}
206
207static void trace_bts_at(struct trace_array *tr, size_t index)
208{
209 const void *raw = NULL;
210 unsigned long from, to;
211 int err;
212
213 err = ds_access_bts(this_tracer, index, &raw);
214 if (err < 0)
215 return;
216
217 from = *(const unsigned long *)raw;
218 to = *(const unsigned long *)((const char *)raw + sizeof_field);
219
220 trace_bts(tr, from, to);
221}
222
223static void trace_bts_cpu(void *arg)
224{
225 struct trace_array *tr = (struct trace_array *) arg;
226 size_t index = 0, end = 0, i;
227 int err;
228
229 if (!this_tracer)
230 return;
231
232 bts_disable();
233
234 err = ds_get_bts_index(this_tracer, &index);
235 if (err < 0)
236 goto out;
237
238 err = ds_get_bts_end(this_tracer, &end);
239 if (err < 0)
240 goto out;
241
242 for (i = index; i < end; i++)
243 trace_bts_at(tr, i);
244
245 for (i = 0; i < index; i++)
246 trace_bts_at(tr, i);
247
248out:
249 bts_enable();
250}
251
252static void trace_bts_prepare(struct trace_iterator *iter)
253{
254 int cpu;
255
256 for_each_cpu_mask(cpu, cpu_possible_map)
257 smp_call_function_single(cpu, trace_bts_cpu, iter->tr, 1);
258}
259
260struct tracer bts_tracer __read_mostly =
261{
262 .name = "bts",
263 .init = bts_trace_init,
264 .reset = bts_trace_stop,
265 .print_header = bts_trace_print_header,
266 .print_line = bts_trace_print_line,
267 .start = bts_trace_start,
268 .stop = bts_trace_stop,
269 .open = trace_bts_prepare
270};
271
272__init static int init_bts_trace(void)
273{
274 return register_tracer(&bts_tracer);
275}
276device_initcall(init_bts_trace);
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 0f85a64003d3..e74f6d0a3216 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -42,24 +42,20 @@ static void stop_function_trace(struct trace_array *tr)
42 tracing_stop_cmdline_record(); 42 tracing_stop_cmdline_record();
43} 43}
44 44
45static void function_trace_init(struct trace_array *tr) 45static int function_trace_init(struct trace_array *tr)
46{ 46{
47 if (tr->ctrl) 47 start_function_trace(tr);
48 start_function_trace(tr); 48 return 0;
49} 49}
50 50
51static void function_trace_reset(struct trace_array *tr) 51static void function_trace_reset(struct trace_array *tr)
52{ 52{
53 if (tr->ctrl) 53 stop_function_trace(tr);
54 stop_function_trace(tr);
55} 54}
56 55
57static void function_trace_ctrl_update(struct trace_array *tr) 56static void function_trace_start(struct trace_array *tr)
58{ 57{
59 if (tr->ctrl) 58 function_reset(tr);
60 start_function_trace(tr);
61 else
62 stop_function_trace(tr);
63} 59}
64 60
65static struct tracer function_trace __read_mostly = 61static struct tracer function_trace __read_mostly =
@@ -67,7 +63,7 @@ static struct tracer function_trace __read_mostly =
67 .name = "function", 63 .name = "function",
68 .init = function_trace_init, 64 .init = function_trace_init,
69 .reset = function_trace_reset, 65 .reset = function_trace_reset,
70 .ctrl_update = function_trace_ctrl_update, 66 .start = function_trace_start,
71#ifdef CONFIG_FTRACE_SELFTEST 67#ifdef CONFIG_FTRACE_SELFTEST
72 .selftest = trace_selftest_startup_function, 68 .selftest = trace_selftest_startup_function,
73#endif 69#endif
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
new file mode 100644
index 000000000000..894b50bca313
--- /dev/null
+++ b/kernel/trace/trace_functions_graph.c
@@ -0,0 +1,400 @@
1/*
2 *
3 * Function graph tracer.
4 * Copyright (c) 2008 Frederic Weisbecker <fweisbec@gmail.com>
5 * Mostly borrowed from function tracer which
6 * is Copyright (c) Steven Rostedt <srostedt@redhat.com>
7 *
8 */
9#include <linux/debugfs.h>
10#include <linux/uaccess.h>
11#include <linux/ftrace.h>
12#include <linux/fs.h>
13
14#include "trace.h"
15
16#define TRACE_GRAPH_INDENT 2
17
18/* Flag options */
19#define TRACE_GRAPH_PRINT_OVERRUN 0x1
20#define TRACE_GRAPH_PRINT_CPU 0x2
21#define TRACE_GRAPH_PRINT_OVERHEAD 0x4
22
23static struct tracer_opt trace_opts[] = {
24 /* Display overruns ? */
25 { TRACER_OPT(funcgraph-overrun, TRACE_GRAPH_PRINT_OVERRUN) },
26 /* Display CPU ? */
27 { TRACER_OPT(funcgraph-cpu, TRACE_GRAPH_PRINT_CPU) },
28 /* Display Overhead ? */
29 { TRACER_OPT(funcgraph-overhead, TRACE_GRAPH_PRINT_OVERHEAD) },
30 { } /* Empty entry */
31};
32
33static struct tracer_flags tracer_flags = {
34 /* Don't display overruns by default */
35 .val = TRACE_GRAPH_PRINT_CPU | TRACE_GRAPH_PRINT_OVERHEAD,
36 .opts = trace_opts
37};
38
39/* pid on the last trace processed */
40static pid_t last_pid[NR_CPUS] = { [0 ... NR_CPUS-1] = -1 };
41
42static int graph_trace_init(struct trace_array *tr)
43{
44 int cpu, ret;
45
46 for_each_online_cpu(cpu)
47 tracing_reset(tr, cpu);
48
49 ret = register_ftrace_graph(&trace_graph_return,
50 &trace_graph_entry);
51 if (ret)
52 return ret;
53 tracing_start_cmdline_record();
54
55 return 0;
56}
57
58static void graph_trace_reset(struct trace_array *tr)
59{
60 tracing_stop_cmdline_record();
61 unregister_ftrace_graph();
62}
63
64static inline int log10_cpu(int nb)
65{
66 if (nb / 100)
67 return 3;
68 if (nb / 10)
69 return 2;
70 return 1;
71}
72
73static enum print_line_t
74print_graph_cpu(struct trace_seq *s, int cpu)
75{
76 int i;
77 int ret;
78 int log10_this = log10_cpu(cpu);
79 int log10_all = log10_cpu(cpus_weight_nr(cpu_online_map));
80
81
82 /*
83 * Start with a space character - to make it stand out
84 * to the right a bit when trace output is pasted into
85 * email:
86 */
87 ret = trace_seq_printf(s, " ");
88
89 /*
90 * Tricky - we space the CPU field according to the max
91 * number of online CPUs. On a 2-cpu system it would take
92 * a maximum of 1 digit - on a 128 cpu system it would
93 * take up to 3 digits:
94 */
95 for (i = 0; i < log10_all - log10_this; i++) {
96 ret = trace_seq_printf(s, " ");
97 if (!ret)
98 return TRACE_TYPE_PARTIAL_LINE;
99 }
100 ret = trace_seq_printf(s, "%d) ", cpu);
101 if (!ret)
102 return TRACE_TYPE_PARTIAL_LINE;
103
104 return TRACE_TYPE_HANDLED;
105}
106
107
108/* If the pid changed since the last trace, output this event */
109static int verif_pid(struct trace_seq *s, pid_t pid, int cpu)
110{
111 char *comm, *prev_comm;
112 pid_t prev_pid;
113 int ret;
114
115 if (last_pid[cpu] != -1 && last_pid[cpu] == pid)
116 return 1;
117
118 prev_pid = last_pid[cpu];
119 last_pid[cpu] = pid;
120
121 comm = trace_find_cmdline(pid);
122 prev_comm = trace_find_cmdline(prev_pid);
123
124/*
125 * Context-switch trace line:
126
127 ------------------------------------------
128 | 1) migration/0--1 => sshd-1755
129 ------------------------------------------
130
131 */
132 ret = trace_seq_printf(s,
133 " ------------------------------------------\n");
134 ret += trace_seq_printf(s, " | %d) %s-%d => %s-%d\n",
135 cpu, prev_comm, prev_pid, comm, pid);
136 ret += trace_seq_printf(s,
137 " ------------------------------------------\n\n");
138 return ret;
139}
140
141static bool
142trace_branch_is_leaf(struct trace_iterator *iter,
143 struct ftrace_graph_ent_entry *curr)
144{
145 struct ring_buffer_iter *ring_iter;
146 struct ring_buffer_event *event;
147 struct ftrace_graph_ret_entry *next;
148
149 ring_iter = iter->buffer_iter[iter->cpu];
150
151 if (!ring_iter)
152 return false;
153
154 event = ring_buffer_iter_peek(ring_iter, NULL);
155
156 if (!event)
157 return false;
158
159 next = ring_buffer_event_data(event);
160
161 if (next->ent.type != TRACE_GRAPH_RET)
162 return false;
163
164 if (curr->ent.pid != next->ent.pid ||
165 curr->graph_ent.func != next->ret.func)
166 return false;
167
168 return true;
169}
170
171
172static inline int
173print_graph_duration(unsigned long long duration, struct trace_seq *s)
174{
175 unsigned long nsecs_rem = do_div(duration, 1000);
176 return trace_seq_printf(s, "%4llu.%3lu us | ", duration, nsecs_rem);
177}
178
179/* Signal a overhead of time execution to the output */
180static int
181print_graph_overhead(unsigned long long duration, struct trace_seq *s)
182{
183 /* Duration exceeded 100 msecs */
184 if (duration > 100000ULL)
185 return trace_seq_printf(s, "! ");
186
187 /* Duration exceeded 10 msecs */
188 if (duration > 10000ULL)
189 return trace_seq_printf(s, "+ ");
190
191 return trace_seq_printf(s, " ");
192}
193
194/* Case of a leaf function on its call entry */
195static enum print_line_t
196print_graph_entry_leaf(struct trace_iterator *iter,
197 struct ftrace_graph_ent_entry *entry, struct trace_seq *s)
198{
199 struct ftrace_graph_ret_entry *ret_entry;
200 struct ftrace_graph_ret *graph_ret;
201 struct ring_buffer_event *event;
202 struct ftrace_graph_ent *call;
203 unsigned long long duration;
204 int ret;
205 int i;
206
207 event = ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
208 ret_entry = ring_buffer_event_data(event);
209 graph_ret = &ret_entry->ret;
210 call = &entry->graph_ent;
211 duration = graph_ret->rettime - graph_ret->calltime;
212
213 /* Must not exceed 8 characters: 9999.999 us */
214 if (duration > 10000000ULL)
215 duration = 9999999ULL;
216
217 /* Overhead */
218 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) {
219 ret = print_graph_overhead(duration, s);
220 if (!ret)
221 return TRACE_TYPE_PARTIAL_LINE;
222 }
223
224 /* Duration */
225 ret = print_graph_duration(duration, s);
226 if (!ret)
227 return TRACE_TYPE_PARTIAL_LINE;
228
229 /* Function */
230 for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) {
231 ret = trace_seq_printf(s, " ");
232 if (!ret)
233 return TRACE_TYPE_PARTIAL_LINE;
234 }
235
236 ret = seq_print_ip_sym(s, call->func, 0);
237 if (!ret)
238 return TRACE_TYPE_PARTIAL_LINE;
239
240 ret = trace_seq_printf(s, "();\n");
241 if (!ret)
242 return TRACE_TYPE_PARTIAL_LINE;
243
244 return TRACE_TYPE_HANDLED;
245}
246
247static enum print_line_t
248print_graph_entry_nested(struct ftrace_graph_ent_entry *entry,
249 struct trace_seq *s)
250{
251 int i;
252 int ret;
253 struct ftrace_graph_ent *call = &entry->graph_ent;
254
255 /* No overhead */
256 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) {
257 ret = trace_seq_printf(s, " ");
258 if (!ret)
259 return TRACE_TYPE_PARTIAL_LINE;
260 }
261
262 /* No time */
263 ret = trace_seq_printf(s, " | ");
264
265 /* Function */
266 for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) {
267 ret = trace_seq_printf(s, " ");
268 if (!ret)
269 return TRACE_TYPE_PARTIAL_LINE;
270 }
271
272 ret = seq_print_ip_sym(s, call->func, 0);
273 if (!ret)
274 return TRACE_TYPE_PARTIAL_LINE;
275
276 ret = trace_seq_printf(s, "() {\n");
277 if (!ret)
278 return TRACE_TYPE_PARTIAL_LINE;
279
280 return TRACE_TYPE_HANDLED;
281}
282
283static enum print_line_t
284print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
285 struct trace_iterator *iter, int cpu)
286{
287 int ret;
288 struct trace_entry *ent = iter->ent;
289
290 /* Pid */
291 if (!verif_pid(s, ent->pid, cpu))
292 return TRACE_TYPE_PARTIAL_LINE;
293
294 /* Cpu */
295 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) {
296 ret = print_graph_cpu(s, cpu);
297 if (!ret)
298 return TRACE_TYPE_PARTIAL_LINE;
299 }
300
301 if (trace_branch_is_leaf(iter, field))
302 return print_graph_entry_leaf(iter, field, s);
303 else
304 return print_graph_entry_nested(field, s);
305
306}
307
308static enum print_line_t
309print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
310 struct trace_entry *ent, int cpu)
311{
312 int i;
313 int ret;
314 unsigned long long duration = trace->rettime - trace->calltime;
315
316 /* Must not exceed 8 characters: xxxx.yyy us */
317 if (duration > 10000000ULL)
318 duration = 9999999ULL;
319
320 /* Pid */
321 if (!verif_pid(s, ent->pid, cpu))
322 return TRACE_TYPE_PARTIAL_LINE;
323
324 /* Cpu */
325 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) {
326 ret = print_graph_cpu(s, cpu);
327 if (!ret)
328 return TRACE_TYPE_PARTIAL_LINE;
329 }
330
331 /* Overhead */
332 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) {
333 ret = print_graph_overhead(duration, s);
334 if (!ret)
335 return TRACE_TYPE_PARTIAL_LINE;
336 }
337
338 /* Duration */
339 ret = print_graph_duration(duration, s);
340 if (!ret)
341 return TRACE_TYPE_PARTIAL_LINE;
342
343 /* Closing brace */
344 for (i = 0; i < trace->depth * TRACE_GRAPH_INDENT; i++) {
345 ret = trace_seq_printf(s, " ");
346 if (!ret)
347 return TRACE_TYPE_PARTIAL_LINE;
348 }
349
350 ret = trace_seq_printf(s, "}\n");
351 if (!ret)
352 return TRACE_TYPE_PARTIAL_LINE;
353
354 /* Overrun */
355 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERRUN) {
356 ret = trace_seq_printf(s, " (Overruns: %lu)\n",
357 trace->overrun);
358 if (!ret)
359 return TRACE_TYPE_PARTIAL_LINE;
360 }
361 return TRACE_TYPE_HANDLED;
362}
363
364enum print_line_t
365print_graph_function(struct trace_iterator *iter)
366{
367 struct trace_seq *s = &iter->seq;
368 struct trace_entry *entry = iter->ent;
369
370 switch (entry->type) {
371 case TRACE_GRAPH_ENT: {
372 struct ftrace_graph_ent_entry *field;
373 trace_assign_type(field, entry);
374 return print_graph_entry(field, s, iter,
375 iter->cpu);
376 }
377 case TRACE_GRAPH_RET: {
378 struct ftrace_graph_ret_entry *field;
379 trace_assign_type(field, entry);
380 return print_graph_return(&field->ret, s, entry, iter->cpu);
381 }
382 default:
383 return TRACE_TYPE_UNHANDLED;
384 }
385}
386
387static struct tracer graph_trace __read_mostly = {
388 .name = "function_graph",
389 .init = graph_trace_init,
390 .reset = graph_trace_reset,
391 .print_line = print_graph_function,
392 .flags = &tracer_flags,
393};
394
395static __init int init_graph_trace(void)
396{
397 return register_tracer(&graph_trace);
398}
399
400device_initcall(init_graph_trace);
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 9c74071c10e0..7c2e326bbc8b 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -353,15 +353,28 @@ void trace_preempt_off(unsigned long a0, unsigned long a1)
353} 353}
354#endif /* CONFIG_PREEMPT_TRACER */ 354#endif /* CONFIG_PREEMPT_TRACER */
355 355
356/*
357 * save_tracer_enabled is used to save the state of the tracer_enabled
358 * variable when we disable it when we open a trace output file.
359 */
360static int save_tracer_enabled;
361
356static void start_irqsoff_tracer(struct trace_array *tr) 362static void start_irqsoff_tracer(struct trace_array *tr)
357{ 363{
358 register_ftrace_function(&trace_ops); 364 register_ftrace_function(&trace_ops);
359 tracer_enabled = 1; 365 if (tracing_is_enabled()) {
366 tracer_enabled = 1;
367 save_tracer_enabled = 1;
368 } else {
369 tracer_enabled = 0;
370 save_tracer_enabled = 0;
371 }
360} 372}
361 373
362static void stop_irqsoff_tracer(struct trace_array *tr) 374static void stop_irqsoff_tracer(struct trace_array *tr)
363{ 375{
364 tracer_enabled = 0; 376 tracer_enabled = 0;
377 save_tracer_enabled = 0;
365 unregister_ftrace_function(&trace_ops); 378 unregister_ftrace_function(&trace_ops);
366} 379}
367 380
@@ -370,53 +383,55 @@ static void __irqsoff_tracer_init(struct trace_array *tr)
370 irqsoff_trace = tr; 383 irqsoff_trace = tr;
371 /* make sure that the tracer is visible */ 384 /* make sure that the tracer is visible */
372 smp_wmb(); 385 smp_wmb();
373 386 start_irqsoff_tracer(tr);
374 if (tr->ctrl)
375 start_irqsoff_tracer(tr);
376} 387}
377 388
378static void irqsoff_tracer_reset(struct trace_array *tr) 389static void irqsoff_tracer_reset(struct trace_array *tr)
379{ 390{
380 if (tr->ctrl) 391 stop_irqsoff_tracer(tr);
381 stop_irqsoff_tracer(tr);
382} 392}
383 393
384static void irqsoff_tracer_ctrl_update(struct trace_array *tr) 394static void irqsoff_tracer_start(struct trace_array *tr)
385{ 395{
386 if (tr->ctrl) 396 tracer_enabled = 1;
387 start_irqsoff_tracer(tr); 397 save_tracer_enabled = 1;
388 else 398}
389 stop_irqsoff_tracer(tr); 399
400static void irqsoff_tracer_stop(struct trace_array *tr)
401{
402 tracer_enabled = 0;
403 save_tracer_enabled = 0;
390} 404}
391 405
392static void irqsoff_tracer_open(struct trace_iterator *iter) 406static void irqsoff_tracer_open(struct trace_iterator *iter)
393{ 407{
394 /* stop the trace while dumping */ 408 /* stop the trace while dumping */
395 if (iter->tr->ctrl) 409 tracer_enabled = 0;
396 stop_irqsoff_tracer(iter->tr);
397} 410}
398 411
399static void irqsoff_tracer_close(struct trace_iterator *iter) 412static void irqsoff_tracer_close(struct trace_iterator *iter)
400{ 413{
401 if (iter->tr->ctrl) 414 /* restart tracing */
402 start_irqsoff_tracer(iter->tr); 415 tracer_enabled = save_tracer_enabled;
403} 416}
404 417
405#ifdef CONFIG_IRQSOFF_TRACER 418#ifdef CONFIG_IRQSOFF_TRACER
406static void irqsoff_tracer_init(struct trace_array *tr) 419static int irqsoff_tracer_init(struct trace_array *tr)
407{ 420{
408 trace_type = TRACER_IRQS_OFF; 421 trace_type = TRACER_IRQS_OFF;
409 422
410 __irqsoff_tracer_init(tr); 423 __irqsoff_tracer_init(tr);
424 return 0;
411} 425}
412static struct tracer irqsoff_tracer __read_mostly = 426static struct tracer irqsoff_tracer __read_mostly =
413{ 427{
414 .name = "irqsoff", 428 .name = "irqsoff",
415 .init = irqsoff_tracer_init, 429 .init = irqsoff_tracer_init,
416 .reset = irqsoff_tracer_reset, 430 .reset = irqsoff_tracer_reset,
431 .start = irqsoff_tracer_start,
432 .stop = irqsoff_tracer_stop,
417 .open = irqsoff_tracer_open, 433 .open = irqsoff_tracer_open,
418 .close = irqsoff_tracer_close, 434 .close = irqsoff_tracer_close,
419 .ctrl_update = irqsoff_tracer_ctrl_update,
420 .print_max = 1, 435 .print_max = 1,
421#ifdef CONFIG_FTRACE_SELFTEST 436#ifdef CONFIG_FTRACE_SELFTEST
422 .selftest = trace_selftest_startup_irqsoff, 437 .selftest = trace_selftest_startup_irqsoff,
@@ -428,11 +443,12 @@ static struct tracer irqsoff_tracer __read_mostly =
428#endif 443#endif
429 444
430#ifdef CONFIG_PREEMPT_TRACER 445#ifdef CONFIG_PREEMPT_TRACER
431static void preemptoff_tracer_init(struct trace_array *tr) 446static int preemptoff_tracer_init(struct trace_array *tr)
432{ 447{
433 trace_type = TRACER_PREEMPT_OFF; 448 trace_type = TRACER_PREEMPT_OFF;
434 449
435 __irqsoff_tracer_init(tr); 450 __irqsoff_tracer_init(tr);
451 return 0;
436} 452}
437 453
438static struct tracer preemptoff_tracer __read_mostly = 454static struct tracer preemptoff_tracer __read_mostly =
@@ -440,9 +456,10 @@ static struct tracer preemptoff_tracer __read_mostly =
440 .name = "preemptoff", 456 .name = "preemptoff",
441 .init = preemptoff_tracer_init, 457 .init = preemptoff_tracer_init,
442 .reset = irqsoff_tracer_reset, 458 .reset = irqsoff_tracer_reset,
459 .start = irqsoff_tracer_start,
460 .stop = irqsoff_tracer_stop,
443 .open = irqsoff_tracer_open, 461 .open = irqsoff_tracer_open,
444 .close = irqsoff_tracer_close, 462 .close = irqsoff_tracer_close,
445 .ctrl_update = irqsoff_tracer_ctrl_update,
446 .print_max = 1, 463 .print_max = 1,
447#ifdef CONFIG_FTRACE_SELFTEST 464#ifdef CONFIG_FTRACE_SELFTEST
448 .selftest = trace_selftest_startup_preemptoff, 465 .selftest = trace_selftest_startup_preemptoff,
@@ -456,11 +473,12 @@ static struct tracer preemptoff_tracer __read_mostly =
456#if defined(CONFIG_IRQSOFF_TRACER) && \ 473#if defined(CONFIG_IRQSOFF_TRACER) && \
457 defined(CONFIG_PREEMPT_TRACER) 474 defined(CONFIG_PREEMPT_TRACER)
458 475
459static void preemptirqsoff_tracer_init(struct trace_array *tr) 476static int preemptirqsoff_tracer_init(struct trace_array *tr)
460{ 477{
461 trace_type = TRACER_IRQS_OFF | TRACER_PREEMPT_OFF; 478 trace_type = TRACER_IRQS_OFF | TRACER_PREEMPT_OFF;
462 479
463 __irqsoff_tracer_init(tr); 480 __irqsoff_tracer_init(tr);
481 return 0;
464} 482}
465 483
466static struct tracer preemptirqsoff_tracer __read_mostly = 484static struct tracer preemptirqsoff_tracer __read_mostly =
@@ -468,9 +486,10 @@ static struct tracer preemptirqsoff_tracer __read_mostly =
468 .name = "preemptirqsoff", 486 .name = "preemptirqsoff",
469 .init = preemptirqsoff_tracer_init, 487 .init = preemptirqsoff_tracer_init,
470 .reset = irqsoff_tracer_reset, 488 .reset = irqsoff_tracer_reset,
489 .start = irqsoff_tracer_start,
490 .stop = irqsoff_tracer_stop,
471 .open = irqsoff_tracer_open, 491 .open = irqsoff_tracer_open,
472 .close = irqsoff_tracer_close, 492 .close = irqsoff_tracer_close,
473 .ctrl_update = irqsoff_tracer_ctrl_update,
474 .print_max = 1, 493 .print_max = 1,
475#ifdef CONFIG_FTRACE_SELFTEST 494#ifdef CONFIG_FTRACE_SELFTEST
476 .selftest = trace_selftest_startup_preemptirqsoff, 495 .selftest = trace_selftest_startup_preemptirqsoff,
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index e62cbf78eab6..2a98a206acc2 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -32,34 +32,29 @@ static void mmio_reset_data(struct trace_array *tr)
32 tracing_reset(tr, cpu); 32 tracing_reset(tr, cpu);
33} 33}
34 34
35static void mmio_trace_init(struct trace_array *tr) 35static int mmio_trace_init(struct trace_array *tr)
36{ 36{
37 pr_debug("in %s\n", __func__); 37 pr_debug("in %s\n", __func__);
38 mmio_trace_array = tr; 38 mmio_trace_array = tr;
39 if (tr->ctrl) { 39
40 mmio_reset_data(tr); 40 mmio_reset_data(tr);
41 enable_mmiotrace(); 41 enable_mmiotrace();
42 } 42 return 0;
43} 43}
44 44
45static void mmio_trace_reset(struct trace_array *tr) 45static void mmio_trace_reset(struct trace_array *tr)
46{ 46{
47 pr_debug("in %s\n", __func__); 47 pr_debug("in %s\n", __func__);
48 if (tr->ctrl) 48
49 disable_mmiotrace(); 49 disable_mmiotrace();
50 mmio_reset_data(tr); 50 mmio_reset_data(tr);
51 mmio_trace_array = NULL; 51 mmio_trace_array = NULL;
52} 52}
53 53
54static void mmio_trace_ctrl_update(struct trace_array *tr) 54static void mmio_trace_start(struct trace_array *tr)
55{ 55{
56 pr_debug("in %s\n", __func__); 56 pr_debug("in %s\n", __func__);
57 if (tr->ctrl) { 57 mmio_reset_data(tr);
58 mmio_reset_data(tr);
59 enable_mmiotrace();
60 } else {
61 disable_mmiotrace();
62 }
63} 58}
64 59
65static int mmio_print_pcidev(struct trace_seq *s, const struct pci_dev *dev) 60static int mmio_print_pcidev(struct trace_seq *s, const struct pci_dev *dev)
@@ -296,10 +291,10 @@ static struct tracer mmio_tracer __read_mostly =
296 .name = "mmiotrace", 291 .name = "mmiotrace",
297 .init = mmio_trace_init, 292 .init = mmio_trace_init,
298 .reset = mmio_trace_reset, 293 .reset = mmio_trace_reset,
294 .start = mmio_trace_start,
299 .pipe_open = mmio_pipe_open, 295 .pipe_open = mmio_pipe_open,
300 .close = mmio_close, 296 .close = mmio_close,
301 .read = mmio_read, 297 .read = mmio_read,
302 .ctrl_update = mmio_trace_ctrl_update,
303 .print_line = mmio_print_line, 298 .print_line = mmio_print_line,
304}; 299};
305 300
diff --git a/kernel/trace/trace_nop.c b/kernel/trace/trace_nop.c
index 4592b4862515..b9767acd30ac 100644
--- a/kernel/trace/trace_nop.c
+++ b/kernel/trace/trace_nop.c
@@ -12,6 +12,27 @@
12 12
13#include "trace.h" 13#include "trace.h"
14 14
15/* Our two options */
16enum {
17 TRACE_NOP_OPT_ACCEPT = 0x1,
18 TRACE_NOP_OPT_REFUSE = 0x2
19};
20
21/* Options for the tracer (see trace_options file) */
22static struct tracer_opt nop_opts[] = {
23 /* Option that will be accepted by set_flag callback */
24 { TRACER_OPT(test_nop_accept, TRACE_NOP_OPT_ACCEPT) },
25 /* Option that will be refused by set_flag callback */
26 { TRACER_OPT(test_nop_refuse, TRACE_NOP_OPT_REFUSE) },
27 { } /* Always set a last empty entry */
28};
29
30static struct tracer_flags nop_flags = {
31 /* You can check your flags value here when you want. */
32 .val = 0, /* By default: all flags disabled */
33 .opts = nop_opts
34};
35
15static struct trace_array *ctx_trace; 36static struct trace_array *ctx_trace;
16 37
17static void start_nop_trace(struct trace_array *tr) 38static void start_nop_trace(struct trace_array *tr)
@@ -24,7 +45,7 @@ static void stop_nop_trace(struct trace_array *tr)
24 /* Nothing to do! */ 45 /* Nothing to do! */
25} 46}
26 47
27static void nop_trace_init(struct trace_array *tr) 48static int nop_trace_init(struct trace_array *tr)
28{ 49{
29 int cpu; 50 int cpu;
30 ctx_trace = tr; 51 ctx_trace = tr;
@@ -32,33 +53,53 @@ static void nop_trace_init(struct trace_array *tr)
32 for_each_online_cpu(cpu) 53 for_each_online_cpu(cpu)
33 tracing_reset(tr, cpu); 54 tracing_reset(tr, cpu);
34 55
35 if (tr->ctrl) 56 start_nop_trace(tr);
36 start_nop_trace(tr); 57 return 0;
37} 58}
38 59
39static void nop_trace_reset(struct trace_array *tr) 60static void nop_trace_reset(struct trace_array *tr)
40{ 61{
41 if (tr->ctrl) 62 stop_nop_trace(tr);
42 stop_nop_trace(tr);
43} 63}
44 64
45static void nop_trace_ctrl_update(struct trace_array *tr) 65/* It only serves as a signal handler and a callback to
66 * accept or refuse tthe setting of a flag.
67 * If you don't implement it, then the flag setting will be
68 * automatically accepted.
69 */
70static int nop_set_flag(u32 old_flags, u32 bit, int set)
46{ 71{
47 /* When starting a new trace, reset the buffers */ 72 /*
48 if (tr->ctrl) 73 * Note that you don't need to update nop_flags.val yourself.
49 start_nop_trace(tr); 74 * The tracing Api will do it automatically if you return 0
50 else 75 */
51 stop_nop_trace(tr); 76 if (bit == TRACE_NOP_OPT_ACCEPT) {
77 printk(KERN_DEBUG "nop_test_accept flag set to %d: we accept."
78 " Now cat trace_options to see the result\n",
79 set);
80 return 0;
81 }
82
83 if (bit == TRACE_NOP_OPT_REFUSE) {
84 printk(KERN_DEBUG "nop_test_refuse flag set to %d: we refuse."
85 "Now cat trace_options to see the result\n",
86 set);
87 return -EINVAL;
88 }
89
90 return 0;
52} 91}
53 92
93
54struct tracer nop_trace __read_mostly = 94struct tracer nop_trace __read_mostly =
55{ 95{
56 .name = "nop", 96 .name = "nop",
57 .init = nop_trace_init, 97 .init = nop_trace_init,
58 .reset = nop_trace_reset, 98 .reset = nop_trace_reset,
59 .ctrl_update = nop_trace_ctrl_update,
60#ifdef CONFIG_FTRACE_SELFTEST 99#ifdef CONFIG_FTRACE_SELFTEST
61 .selftest = trace_selftest_startup_nop, 100 .selftest = trace_selftest_startup_nop,
62#endif 101#endif
102 .flags = &nop_flags,
103 .set_flag = nop_set_flag
63}; 104};
64 105
diff --git a/kernel/trace/trace_power.c b/kernel/trace/trace_power.c
new file mode 100644
index 000000000000..a7172a352f62
--- /dev/null
+++ b/kernel/trace/trace_power.c
@@ -0,0 +1,179 @@
1/*
2 * ring buffer based C-state tracer
3 *
4 * Arjan van de Ven <arjan@linux.intel.com>
5 * Copyright (C) 2008 Intel Corporation
6 *
7 * Much is borrowed from trace_boot.c which is
8 * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
9 *
10 */
11
12#include <linux/init.h>
13#include <linux/debugfs.h>
14#include <linux/ftrace.h>
15#include <linux/kallsyms.h>
16#include <linux/module.h>
17
18#include "trace.h"
19
20static struct trace_array *power_trace;
21static int __read_mostly trace_power_enabled;
22
23
24static void start_power_trace(struct trace_array *tr)
25{
26 trace_power_enabled = 1;
27}
28
29static void stop_power_trace(struct trace_array *tr)
30{
31 trace_power_enabled = 0;
32}
33
34
35static int power_trace_init(struct trace_array *tr)
36{
37 int cpu;
38 power_trace = tr;
39
40 trace_power_enabled = 1;
41
42 for_each_cpu_mask(cpu, cpu_possible_map)
43 tracing_reset(tr, cpu);
44 return 0;
45}
46
47static enum print_line_t power_print_line(struct trace_iterator *iter)
48{
49 int ret = 0;
50 struct trace_entry *entry = iter->ent;
51 struct trace_power *field ;
52 struct power_trace *it;
53 struct trace_seq *s = &iter->seq;
54 struct timespec stamp;
55 struct timespec duration;
56
57 trace_assign_type(field, entry);
58 it = &field->state_data;
59 stamp = ktime_to_timespec(it->stamp);
60 duration = ktime_to_timespec(ktime_sub(it->end, it->stamp));
61
62 if (entry->type == TRACE_POWER) {
63 if (it->type == POWER_CSTATE)
64 ret = trace_seq_printf(s, "[%5ld.%09ld] CSTATE: Going to C%i on cpu %i for %ld.%09ld\n",
65 stamp.tv_sec,
66 stamp.tv_nsec,
67 it->state, iter->cpu,
68 duration.tv_sec,
69 duration.tv_nsec);
70 if (it->type == POWER_PSTATE)
71 ret = trace_seq_printf(s, "[%5ld.%09ld] PSTATE: Going to P%i on cpu %i\n",
72 stamp.tv_sec,
73 stamp.tv_nsec,
74 it->state, iter->cpu);
75 if (!ret)
76 return TRACE_TYPE_PARTIAL_LINE;
77 return TRACE_TYPE_HANDLED;
78 }
79 return TRACE_TYPE_UNHANDLED;
80}
81
82static struct tracer power_tracer __read_mostly =
83{
84 .name = "power",
85 .init = power_trace_init,
86 .start = start_power_trace,
87 .stop = stop_power_trace,
88 .reset = stop_power_trace,
89 .print_line = power_print_line,
90};
91
92static int init_power_trace(void)
93{
94 return register_tracer(&power_tracer);
95}
96device_initcall(init_power_trace);
97
98void trace_power_start(struct power_trace *it, unsigned int type,
99 unsigned int level)
100{
101 if (!trace_power_enabled)
102 return;
103
104 memset(it, 0, sizeof(struct power_trace));
105 it->state = level;
106 it->type = type;
107 it->stamp = ktime_get();
108}
109EXPORT_SYMBOL_GPL(trace_power_start);
110
111
112void trace_power_end(struct power_trace *it)
113{
114 struct ring_buffer_event *event;
115 struct trace_power *entry;
116 struct trace_array_cpu *data;
117 unsigned long irq_flags;
118 struct trace_array *tr = power_trace;
119
120 if (!trace_power_enabled)
121 return;
122
123 preempt_disable();
124 it->end = ktime_get();
125 data = tr->data[smp_processor_id()];
126
127 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
128 &irq_flags);
129 if (!event)
130 goto out;
131 entry = ring_buffer_event_data(event);
132 tracing_generic_entry_update(&entry->ent, 0, 0);
133 entry->ent.type = TRACE_POWER;
134 entry->state_data = *it;
135 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
136
137 trace_wake_up();
138
139 out:
140 preempt_enable();
141}
142EXPORT_SYMBOL_GPL(trace_power_end);
143
144void trace_power_mark(struct power_trace *it, unsigned int type,
145 unsigned int level)
146{
147 struct ring_buffer_event *event;
148 struct trace_power *entry;
149 struct trace_array_cpu *data;
150 unsigned long irq_flags;
151 struct trace_array *tr = power_trace;
152
153 if (!trace_power_enabled)
154 return;
155
156 memset(it, 0, sizeof(struct power_trace));
157 it->state = level;
158 it->type = type;
159 it->stamp = ktime_get();
160 preempt_disable();
161 it->end = it->stamp;
162 data = tr->data[smp_processor_id()];
163
164 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
165 &irq_flags);
166 if (!event)
167 goto out;
168 entry = ring_buffer_event_data(event);
169 tracing_generic_entry_update(&entry->ent, 0, 0);
170 entry->ent.type = TRACE_POWER;
171 entry->state_data = *it;
172 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
173
174 trace_wake_up();
175
176 out:
177 preempt_enable();
178}
179EXPORT_SYMBOL_GPL(trace_power_mark);
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index b8f56beb1a62..863390557b44 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -16,7 +16,8 @@
16 16
17static struct trace_array *ctx_trace; 17static struct trace_array *ctx_trace;
18static int __read_mostly tracer_enabled; 18static int __read_mostly tracer_enabled;
19static atomic_t sched_ref; 19static int sched_ref;
20static DEFINE_MUTEX(sched_register_mutex);
20 21
21static void 22static void
22probe_sched_switch(struct rq *__rq, struct task_struct *prev, 23probe_sched_switch(struct rq *__rq, struct task_struct *prev,
@@ -27,7 +28,7 @@ probe_sched_switch(struct rq *__rq, struct task_struct *prev,
27 int cpu; 28 int cpu;
28 int pc; 29 int pc;
29 30
30 if (!atomic_read(&sched_ref)) 31 if (!sched_ref)
31 return; 32 return;
32 33
33 tracing_record_cmdline(prev); 34 tracing_record_cmdline(prev);
@@ -123,20 +124,18 @@ static void tracing_sched_unregister(void)
123 124
124static void tracing_start_sched_switch(void) 125static void tracing_start_sched_switch(void)
125{ 126{
126 long ref; 127 mutex_lock(&sched_register_mutex);
127 128 if (!(sched_ref++))
128 ref = atomic_inc_return(&sched_ref);
129 if (ref == 1)
130 tracing_sched_register(); 129 tracing_sched_register();
130 mutex_unlock(&sched_register_mutex);
131} 131}
132 132
133static void tracing_stop_sched_switch(void) 133static void tracing_stop_sched_switch(void)
134{ 134{
135 long ref; 135 mutex_lock(&sched_register_mutex);
136 136 if (!(--sched_ref))
137 ref = atomic_dec_and_test(&sched_ref);
138 if (ref)
139 tracing_sched_unregister(); 137 tracing_sched_unregister();
138 mutex_unlock(&sched_register_mutex);
140} 139}
141 140
142void tracing_start_cmdline_record(void) 141void tracing_start_cmdline_record(void)
@@ -149,40 +148,86 @@ void tracing_stop_cmdline_record(void)
149 tracing_stop_sched_switch(); 148 tracing_stop_sched_switch();
150} 149}
151 150
151/**
152 * tracing_start_sched_switch_record - start tracing context switches
153 *
154 * Turns on context switch tracing for a tracer.
155 */
156void tracing_start_sched_switch_record(void)
157{
158 if (unlikely(!ctx_trace)) {
159 WARN_ON(1);
160 return;
161 }
162
163 tracing_start_sched_switch();
164
165 mutex_lock(&sched_register_mutex);
166 tracer_enabled++;
167 mutex_unlock(&sched_register_mutex);
168}
169
170/**
171 * tracing_stop_sched_switch_record - start tracing context switches
172 *
173 * Turns off context switch tracing for a tracer.
174 */
175void tracing_stop_sched_switch_record(void)
176{
177 mutex_lock(&sched_register_mutex);
178 tracer_enabled--;
179 WARN_ON(tracer_enabled < 0);
180 mutex_unlock(&sched_register_mutex);
181
182 tracing_stop_sched_switch();
183}
184
185/**
186 * tracing_sched_switch_assign_trace - assign a trace array for ctx switch
187 * @tr: trace array pointer to assign
188 *
189 * Some tracers might want to record the context switches in their
190 * trace. This function lets those tracers assign the trace array
191 * to use.
192 */
193void tracing_sched_switch_assign_trace(struct trace_array *tr)
194{
195 ctx_trace = tr;
196}
197
152static void start_sched_trace(struct trace_array *tr) 198static void start_sched_trace(struct trace_array *tr)
153{ 199{
154 sched_switch_reset(tr); 200 sched_switch_reset(tr);
155 tracing_start_cmdline_record(); 201 tracing_start_sched_switch_record();
156 tracer_enabled = 1;
157} 202}
158 203
159static void stop_sched_trace(struct trace_array *tr) 204static void stop_sched_trace(struct trace_array *tr)
160{ 205{
161 tracer_enabled = 0; 206 tracing_stop_sched_switch_record();
162 tracing_stop_cmdline_record();
163} 207}
164 208
165static void sched_switch_trace_init(struct trace_array *tr) 209static int sched_switch_trace_init(struct trace_array *tr)
166{ 210{
167 ctx_trace = tr; 211 ctx_trace = tr;
168 212 start_sched_trace(tr);
169 if (tr->ctrl) 213 return 0;
170 start_sched_trace(tr);
171} 214}
172 215
173static void sched_switch_trace_reset(struct trace_array *tr) 216static void sched_switch_trace_reset(struct trace_array *tr)
174{ 217{
175 if (tr->ctrl) 218 if (sched_ref)
176 stop_sched_trace(tr); 219 stop_sched_trace(tr);
177} 220}
178 221
179static void sched_switch_trace_ctrl_update(struct trace_array *tr) 222static void sched_switch_trace_start(struct trace_array *tr)
180{ 223{
181 /* When starting a new trace, reset the buffers */ 224 sched_switch_reset(tr);
182 if (tr->ctrl) 225 tracing_start_sched_switch();
183 start_sched_trace(tr); 226}
184 else 227
185 stop_sched_trace(tr); 228static void sched_switch_trace_stop(struct trace_array *tr)
229{
230 tracing_stop_sched_switch();
186} 231}
187 232
188static struct tracer sched_switch_trace __read_mostly = 233static struct tracer sched_switch_trace __read_mostly =
@@ -190,7 +235,8 @@ static struct tracer sched_switch_trace __read_mostly =
190 .name = "sched_switch", 235 .name = "sched_switch",
191 .init = sched_switch_trace_init, 236 .init = sched_switch_trace_init,
192 .reset = sched_switch_trace_reset, 237 .reset = sched_switch_trace_reset,
193 .ctrl_update = sched_switch_trace_ctrl_update, 238 .start = sched_switch_trace_start,
239 .stop = sched_switch_trace_stop,
194#ifdef CONFIG_FTRACE_SELFTEST 240#ifdef CONFIG_FTRACE_SELFTEST
195 .selftest = trace_selftest_startup_sched_switch, 241 .selftest = trace_selftest_startup_sched_switch,
196#endif 242#endif
@@ -198,14 +244,6 @@ static struct tracer sched_switch_trace __read_mostly =
198 244
199__init static int init_sched_switch_trace(void) 245__init static int init_sched_switch_trace(void)
200{ 246{
201 int ret = 0;
202
203 if (atomic_read(&sched_ref))
204 ret = tracing_sched_register();
205 if (ret) {
206 pr_info("error registering scheduler trace\n");
207 return ret;
208 }
209 return register_tracer(&sched_switch_trace); 247 return register_tracer(&sched_switch_trace);
210} 248}
211device_initcall(init_sched_switch_trace); 249device_initcall(init_sched_switch_trace);
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 3ae93f16b565..0067b49746c1 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -50,8 +50,7 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
50 return; 50 return;
51 51
52 pc = preempt_count(); 52 pc = preempt_count();
53 resched = need_resched(); 53 resched = ftrace_preempt_disable();
54 preempt_disable_notrace();
55 54
56 cpu = raw_smp_processor_id(); 55 cpu = raw_smp_processor_id();
57 data = tr->data[cpu]; 56 data = tr->data[cpu];
@@ -81,15 +80,7 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
81 out: 80 out:
82 atomic_dec(&data->disabled); 81 atomic_dec(&data->disabled);
83 82
84 /* 83 ftrace_preempt_enable(resched);
85 * To prevent recursion from the scheduler, if the
86 * resched flag was set before we entered, then
87 * don't reschedule.
88 */
89 if (resched)
90 preempt_enable_no_resched_notrace();
91 else
92 preempt_enable_notrace();
93} 84}
94 85
95static struct ftrace_ops trace_ops __read_mostly = 86static struct ftrace_ops trace_ops __read_mostly =
@@ -271,6 +262,12 @@ out:
271 atomic_dec(&wakeup_trace->data[cpu]->disabled); 262 atomic_dec(&wakeup_trace->data[cpu]->disabled);
272} 263}
273 264
265/*
266 * save_tracer_enabled is used to save the state of the tracer_enabled
267 * variable when we disable it when we open a trace output file.
268 */
269static int save_tracer_enabled;
270
274static void start_wakeup_tracer(struct trace_array *tr) 271static void start_wakeup_tracer(struct trace_array *tr)
275{ 272{
276 int ret; 273 int ret;
@@ -309,7 +306,13 @@ static void start_wakeup_tracer(struct trace_array *tr)
309 306
310 register_ftrace_function(&trace_ops); 307 register_ftrace_function(&trace_ops);
311 308
312 tracer_enabled = 1; 309 if (tracing_is_enabled()) {
310 tracer_enabled = 1;
311 save_tracer_enabled = 1;
312 } else {
313 tracer_enabled = 0;
314 save_tracer_enabled = 0;
315 }
313 316
314 return; 317 return;
315fail_deprobe_wake_new: 318fail_deprobe_wake_new:
@@ -321,49 +324,53 @@ fail_deprobe:
321static void stop_wakeup_tracer(struct trace_array *tr) 324static void stop_wakeup_tracer(struct trace_array *tr)
322{ 325{
323 tracer_enabled = 0; 326 tracer_enabled = 0;
327 save_tracer_enabled = 0;
324 unregister_ftrace_function(&trace_ops); 328 unregister_ftrace_function(&trace_ops);
325 unregister_trace_sched_switch(probe_wakeup_sched_switch); 329 unregister_trace_sched_switch(probe_wakeup_sched_switch);
326 unregister_trace_sched_wakeup_new(probe_wakeup); 330 unregister_trace_sched_wakeup_new(probe_wakeup);
327 unregister_trace_sched_wakeup(probe_wakeup); 331 unregister_trace_sched_wakeup(probe_wakeup);
328} 332}
329 333
330static void wakeup_tracer_init(struct trace_array *tr) 334static int wakeup_tracer_init(struct trace_array *tr)
331{ 335{
332 wakeup_trace = tr; 336 wakeup_trace = tr;
333 337 start_wakeup_tracer(tr);
334 if (tr->ctrl) 338 return 0;
335 start_wakeup_tracer(tr);
336} 339}
337 340
338static void wakeup_tracer_reset(struct trace_array *tr) 341static void wakeup_tracer_reset(struct trace_array *tr)
339{ 342{
340 if (tr->ctrl) { 343 stop_wakeup_tracer(tr);
341 stop_wakeup_tracer(tr); 344 /* make sure we put back any tasks we are tracing */
342 /* make sure we put back any tasks we are tracing */ 345 wakeup_reset(tr);
343 wakeup_reset(tr); 346}
344 } 347
348static void wakeup_tracer_start(struct trace_array *tr)
349{
350 wakeup_reset(tr);
351 tracer_enabled = 1;
352 save_tracer_enabled = 1;
345} 353}
346 354
347static void wakeup_tracer_ctrl_update(struct trace_array *tr) 355static void wakeup_tracer_stop(struct trace_array *tr)
348{ 356{
349 if (tr->ctrl) 357 tracer_enabled = 0;
350 start_wakeup_tracer(tr); 358 save_tracer_enabled = 0;
351 else
352 stop_wakeup_tracer(tr);
353} 359}
354 360
355static void wakeup_tracer_open(struct trace_iterator *iter) 361static void wakeup_tracer_open(struct trace_iterator *iter)
356{ 362{
357 /* stop the trace while dumping */ 363 /* stop the trace while dumping */
358 if (iter->tr->ctrl) 364 tracer_enabled = 0;
359 stop_wakeup_tracer(iter->tr);
360} 365}
361 366
362static void wakeup_tracer_close(struct trace_iterator *iter) 367static void wakeup_tracer_close(struct trace_iterator *iter)
363{ 368{
364 /* forget about any processes we were recording */ 369 /* forget about any processes we were recording */
365 if (iter->tr->ctrl) 370 if (save_tracer_enabled) {
366 start_wakeup_tracer(iter->tr); 371 wakeup_reset(iter->tr);
372 tracer_enabled = 1;
373 }
367} 374}
368 375
369static struct tracer wakeup_tracer __read_mostly = 376static struct tracer wakeup_tracer __read_mostly =
@@ -371,9 +378,10 @@ static struct tracer wakeup_tracer __read_mostly =
371 .name = "wakeup", 378 .name = "wakeup",
372 .init = wakeup_tracer_init, 379 .init = wakeup_tracer_init,
373 .reset = wakeup_tracer_reset, 380 .reset = wakeup_tracer_reset,
381 .start = wakeup_tracer_start,
382 .stop = wakeup_tracer_stop,
374 .open = wakeup_tracer_open, 383 .open = wakeup_tracer_open,
375 .close = wakeup_tracer_close, 384 .close = wakeup_tracer_close,
376 .ctrl_update = wakeup_tracer_ctrl_update,
377 .print_max = 1, 385 .print_max = 1,
378#ifdef CONFIG_FTRACE_SELFTEST 386#ifdef CONFIG_FTRACE_SELFTEST
379 .selftest = trace_selftest_startup_wakeup, 387 .selftest = trace_selftest_startup_wakeup,
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 90bc752a7580..88c8eb70f54a 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -13,6 +13,7 @@ static inline int trace_valid_entry(struct trace_entry *entry)
13 case TRACE_STACK: 13 case TRACE_STACK:
14 case TRACE_PRINT: 14 case TRACE_PRINT:
15 case TRACE_SPECIAL: 15 case TRACE_SPECIAL:
16 case TRACE_BRANCH:
16 return 1; 17 return 1;
17 } 18 }
18 return 0; 19 return 0;
@@ -51,7 +52,7 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
51 int cpu, ret = 0; 52 int cpu, ret = 0;
52 53
53 /* Don't allow flipping of max traces now */ 54 /* Don't allow flipping of max traces now */
54 raw_local_irq_save(flags); 55 local_irq_save(flags);
55 __raw_spin_lock(&ftrace_max_lock); 56 __raw_spin_lock(&ftrace_max_lock);
56 57
57 cnt = ring_buffer_entries(tr->buffer); 58 cnt = ring_buffer_entries(tr->buffer);
@@ -62,7 +63,7 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
62 break; 63 break;
63 } 64 }
64 __raw_spin_unlock(&ftrace_max_lock); 65 __raw_spin_unlock(&ftrace_max_lock);
65 raw_local_irq_restore(flags); 66 local_irq_restore(flags);
66 67
67 if (count) 68 if (count)
68 *count = cnt; 69 *count = cnt;
@@ -70,6 +71,11 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
70 return ret; 71 return ret;
71} 72}
72 73
74static inline void warn_failed_init_tracer(struct tracer *trace, int init_ret)
75{
76 printk(KERN_WARNING "Failed to init %s tracer, init returned %d\n",
77 trace->name, init_ret);
78}
73#ifdef CONFIG_FUNCTION_TRACER 79#ifdef CONFIG_FUNCTION_TRACER
74 80
75#ifdef CONFIG_DYNAMIC_FTRACE 81#ifdef CONFIG_DYNAMIC_FTRACE
@@ -110,8 +116,11 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
110 ftrace_set_filter(func_name, strlen(func_name), 1); 116 ftrace_set_filter(func_name, strlen(func_name), 1);
111 117
112 /* enable tracing */ 118 /* enable tracing */
113 tr->ctrl = 1; 119 ret = trace->init(tr);
114 trace->init(tr); 120 if (ret) {
121 warn_failed_init_tracer(trace, ret);
122 goto out;
123 }
115 124
116 /* Sleep for a 1/10 of a second */ 125 /* Sleep for a 1/10 of a second */
117 msleep(100); 126 msleep(100);
@@ -134,13 +143,13 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
134 msleep(100); 143 msleep(100);
135 144
136 /* stop the tracing. */ 145 /* stop the tracing. */
137 tr->ctrl = 0; 146 tracing_stop();
138 trace->ctrl_update(tr);
139 ftrace_enabled = 0; 147 ftrace_enabled = 0;
140 148
141 /* check the trace buffer */ 149 /* check the trace buffer */
142 ret = trace_test_buffer(tr, &count); 150 ret = trace_test_buffer(tr, &count);
143 trace->reset(tr); 151 trace->reset(tr);
152 tracing_start();
144 153
145 /* we should only have one item */ 154 /* we should only have one item */
146 if (!ret && count != 1) { 155 if (!ret && count != 1) {
@@ -148,6 +157,7 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
148 ret = -1; 157 ret = -1;
149 goto out; 158 goto out;
150 } 159 }
160
151 out: 161 out:
152 ftrace_enabled = save_ftrace_enabled; 162 ftrace_enabled = save_ftrace_enabled;
153 tracer_enabled = save_tracer_enabled; 163 tracer_enabled = save_tracer_enabled;
@@ -180,18 +190,22 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
180 ftrace_enabled = 1; 190 ftrace_enabled = 1;
181 tracer_enabled = 1; 191 tracer_enabled = 1;
182 192
183 tr->ctrl = 1; 193 ret = trace->init(tr);
184 trace->init(tr); 194 if (ret) {
195 warn_failed_init_tracer(trace, ret);
196 goto out;
197 }
198
185 /* Sleep for a 1/10 of a second */ 199 /* Sleep for a 1/10 of a second */
186 msleep(100); 200 msleep(100);
187 /* stop the tracing. */ 201 /* stop the tracing. */
188 tr->ctrl = 0; 202 tracing_stop();
189 trace->ctrl_update(tr);
190 ftrace_enabled = 0; 203 ftrace_enabled = 0;
191 204
192 /* check the trace buffer */ 205 /* check the trace buffer */
193 ret = trace_test_buffer(tr, &count); 206 ret = trace_test_buffer(tr, &count);
194 trace->reset(tr); 207 trace->reset(tr);
208 tracing_start();
195 209
196 if (!ret && !count) { 210 if (!ret && !count) {
197 printk(KERN_CONT ".. no entries found .."); 211 printk(KERN_CONT ".. no entries found ..");
@@ -223,8 +237,12 @@ trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr)
223 int ret; 237 int ret;
224 238
225 /* start the tracing */ 239 /* start the tracing */
226 tr->ctrl = 1; 240 ret = trace->init(tr);
227 trace->init(tr); 241 if (ret) {
242 warn_failed_init_tracer(trace, ret);
243 return ret;
244 }
245
228 /* reset the max latency */ 246 /* reset the max latency */
229 tracing_max_latency = 0; 247 tracing_max_latency = 0;
230 /* disable interrupts for a bit */ 248 /* disable interrupts for a bit */
@@ -232,13 +250,13 @@ trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr)
232 udelay(100); 250 udelay(100);
233 local_irq_enable(); 251 local_irq_enable();
234 /* stop the tracing. */ 252 /* stop the tracing. */
235 tr->ctrl = 0; 253 tracing_stop();
236 trace->ctrl_update(tr);
237 /* check both trace buffers */ 254 /* check both trace buffers */
238 ret = trace_test_buffer(tr, NULL); 255 ret = trace_test_buffer(tr, NULL);
239 if (!ret) 256 if (!ret)
240 ret = trace_test_buffer(&max_tr, &count); 257 ret = trace_test_buffer(&max_tr, &count);
241 trace->reset(tr); 258 trace->reset(tr);
259 tracing_start();
242 260
243 if (!ret && !count) { 261 if (!ret && !count) {
244 printk(KERN_CONT ".. no entries found .."); 262 printk(KERN_CONT ".. no entries found ..");
@@ -259,9 +277,26 @@ trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr)
259 unsigned long count; 277 unsigned long count;
260 int ret; 278 int ret;
261 279
280 /*
281 * Now that the big kernel lock is no longer preemptable,
282 * and this is called with the BKL held, it will always
283 * fail. If preemption is already disabled, simply
284 * pass the test. When the BKL is removed, or becomes
285 * preemptible again, we will once again test this,
286 * so keep it in.
287 */
288 if (preempt_count()) {
289 printk(KERN_CONT "can not test ... force ");
290 return 0;
291 }
292
262 /* start the tracing */ 293 /* start the tracing */
263 tr->ctrl = 1; 294 ret = trace->init(tr);
264 trace->init(tr); 295 if (ret) {
296 warn_failed_init_tracer(trace, ret);
297 return ret;
298 }
299
265 /* reset the max latency */ 300 /* reset the max latency */
266 tracing_max_latency = 0; 301 tracing_max_latency = 0;
267 /* disable preemption for a bit */ 302 /* disable preemption for a bit */
@@ -269,13 +304,13 @@ trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr)
269 udelay(100); 304 udelay(100);
270 preempt_enable(); 305 preempt_enable();
271 /* stop the tracing. */ 306 /* stop the tracing. */
272 tr->ctrl = 0; 307 tracing_stop();
273 trace->ctrl_update(tr);
274 /* check both trace buffers */ 308 /* check both trace buffers */
275 ret = trace_test_buffer(tr, NULL); 309 ret = trace_test_buffer(tr, NULL);
276 if (!ret) 310 if (!ret)
277 ret = trace_test_buffer(&max_tr, &count); 311 ret = trace_test_buffer(&max_tr, &count);
278 trace->reset(tr); 312 trace->reset(tr);
313 tracing_start();
279 314
280 if (!ret && !count) { 315 if (!ret && !count) {
281 printk(KERN_CONT ".. no entries found .."); 316 printk(KERN_CONT ".. no entries found ..");
@@ -296,9 +331,25 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *
296 unsigned long count; 331 unsigned long count;
297 int ret; 332 int ret;
298 333
334 /*
335 * Now that the big kernel lock is no longer preemptable,
336 * and this is called with the BKL held, it will always
337 * fail. If preemption is already disabled, simply
338 * pass the test. When the BKL is removed, or becomes
339 * preemptible again, we will once again test this,
340 * so keep it in.
341 */
342 if (preempt_count()) {
343 printk(KERN_CONT "can not test ... force ");
344 return 0;
345 }
346
299 /* start the tracing */ 347 /* start the tracing */
300 tr->ctrl = 1; 348 ret = trace->init(tr);
301 trace->init(tr); 349 if (ret) {
350 warn_failed_init_tracer(trace, ret);
351 goto out;
352 }
302 353
303 /* reset the max latency */ 354 /* reset the max latency */
304 tracing_max_latency = 0; 355 tracing_max_latency = 0;
@@ -312,27 +363,30 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *
312 local_irq_enable(); 363 local_irq_enable();
313 364
314 /* stop the tracing. */ 365 /* stop the tracing. */
315 tr->ctrl = 0; 366 tracing_stop();
316 trace->ctrl_update(tr);
317 /* check both trace buffers */ 367 /* check both trace buffers */
318 ret = trace_test_buffer(tr, NULL); 368 ret = trace_test_buffer(tr, NULL);
319 if (ret) 369 if (ret) {
370 tracing_start();
320 goto out; 371 goto out;
372 }
321 373
322 ret = trace_test_buffer(&max_tr, &count); 374 ret = trace_test_buffer(&max_tr, &count);
323 if (ret) 375 if (ret) {
376 tracing_start();
324 goto out; 377 goto out;
378 }
325 379
326 if (!ret && !count) { 380 if (!ret && !count) {
327 printk(KERN_CONT ".. no entries found .."); 381 printk(KERN_CONT ".. no entries found ..");
328 ret = -1; 382 ret = -1;
383 tracing_start();
329 goto out; 384 goto out;
330 } 385 }
331 386
332 /* do the test by disabling interrupts first this time */ 387 /* do the test by disabling interrupts first this time */
333 tracing_max_latency = 0; 388 tracing_max_latency = 0;
334 tr->ctrl = 1; 389 tracing_start();
335 trace->ctrl_update(tr);
336 preempt_disable(); 390 preempt_disable();
337 local_irq_disable(); 391 local_irq_disable();
338 udelay(100); 392 udelay(100);
@@ -341,8 +395,7 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *
341 local_irq_enable(); 395 local_irq_enable();
342 396
343 /* stop the tracing. */ 397 /* stop the tracing. */
344 tr->ctrl = 0; 398 tracing_stop();
345 trace->ctrl_update(tr);
346 /* check both trace buffers */ 399 /* check both trace buffers */
347 ret = trace_test_buffer(tr, NULL); 400 ret = trace_test_buffer(tr, NULL);
348 if (ret) 401 if (ret)
@@ -358,6 +411,7 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *
358 411
359 out: 412 out:
360 trace->reset(tr); 413 trace->reset(tr);
414 tracing_start();
361 tracing_max_latency = save_max; 415 tracing_max_latency = save_max;
362 416
363 return ret; 417 return ret;
@@ -423,8 +477,12 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr)
423 wait_for_completion(&isrt); 477 wait_for_completion(&isrt);
424 478
425 /* start the tracing */ 479 /* start the tracing */
426 tr->ctrl = 1; 480 ret = trace->init(tr);
427 trace->init(tr); 481 if (ret) {
482 warn_failed_init_tracer(trace, ret);
483 return ret;
484 }
485
428 /* reset the max latency */ 486 /* reset the max latency */
429 tracing_max_latency = 0; 487 tracing_max_latency = 0;
430 488
@@ -448,8 +506,7 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr)
448 msleep(100); 506 msleep(100);
449 507
450 /* stop the tracing. */ 508 /* stop the tracing. */
451 tr->ctrl = 0; 509 tracing_stop();
452 trace->ctrl_update(tr);
453 /* check both trace buffers */ 510 /* check both trace buffers */
454 ret = trace_test_buffer(tr, NULL); 511 ret = trace_test_buffer(tr, NULL);
455 if (!ret) 512 if (!ret)
@@ -457,6 +514,7 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr)
457 514
458 515
459 trace->reset(tr); 516 trace->reset(tr);
517 tracing_start();
460 518
461 tracing_max_latency = save_max; 519 tracing_max_latency = save_max;
462 520
@@ -480,16 +538,20 @@ trace_selftest_startup_sched_switch(struct tracer *trace, struct trace_array *tr
480 int ret; 538 int ret;
481 539
482 /* start the tracing */ 540 /* start the tracing */
483 tr->ctrl = 1; 541 ret = trace->init(tr);
484 trace->init(tr); 542 if (ret) {
543 warn_failed_init_tracer(trace, ret);
544 return ret;
545 }
546
485 /* Sleep for a 1/10 of a second */ 547 /* Sleep for a 1/10 of a second */
486 msleep(100); 548 msleep(100);
487 /* stop the tracing. */ 549 /* stop the tracing. */
488 tr->ctrl = 0; 550 tracing_stop();
489 trace->ctrl_update(tr);
490 /* check the trace buffer */ 551 /* check the trace buffer */
491 ret = trace_test_buffer(tr, &count); 552 ret = trace_test_buffer(tr, &count);
492 trace->reset(tr); 553 trace->reset(tr);
554 tracing_start();
493 555
494 if (!ret && !count) { 556 if (!ret && !count) {
495 printk(KERN_CONT ".. no entries found .."); 557 printk(KERN_CONT ".. no entries found ..");
@@ -508,17 +570,48 @@ trace_selftest_startup_sysprof(struct tracer *trace, struct trace_array *tr)
508 int ret; 570 int ret;
509 571
510 /* start the tracing */ 572 /* start the tracing */
511 tr->ctrl = 1; 573 ret = trace->init(tr);
512 trace->init(tr); 574 if (ret) {
575 warn_failed_init_tracer(trace, ret);
576 return 0;
577 }
578
513 /* Sleep for a 1/10 of a second */ 579 /* Sleep for a 1/10 of a second */
514 msleep(100); 580 msleep(100);
515 /* stop the tracing. */ 581 /* stop the tracing. */
516 tr->ctrl = 0; 582 tracing_stop();
517 trace->ctrl_update(tr);
518 /* check the trace buffer */ 583 /* check the trace buffer */
519 ret = trace_test_buffer(tr, &count); 584 ret = trace_test_buffer(tr, &count);
520 trace->reset(tr); 585 trace->reset(tr);
586 tracing_start();
521 587
522 return ret; 588 return ret;
523} 589}
524#endif /* CONFIG_SYSPROF_TRACER */ 590#endif /* CONFIG_SYSPROF_TRACER */
591
592#ifdef CONFIG_BRANCH_TRACER
593int
594trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr)
595{
596 unsigned long count;
597 int ret;
598
599 /* start the tracing */
600 ret = trace->init(tr);
601 if (ret) {
602 warn_failed_init_tracer(trace, ret);
603 return ret;
604 }
605
606 /* Sleep for a 1/10 of a second */
607 msleep(100);
608 /* stop the tracing. */
609 tracing_stop();
610 /* check the trace buffer */
611 ret = trace_test_buffer(tr, &count);
612 trace->reset(tr);
613 tracing_start();
614
615 return ret;
616}
617#endif /* CONFIG_BRANCH_TRACER */
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 3bdb44bde4b7..fde3be15c642 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -107,8 +107,7 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip)
107 if (unlikely(!ftrace_enabled || stack_trace_disabled)) 107 if (unlikely(!ftrace_enabled || stack_trace_disabled))
108 return; 108 return;
109 109
110 resched = need_resched(); 110 resched = ftrace_preempt_disable();
111 preempt_disable_notrace();
112 111
113 cpu = raw_smp_processor_id(); 112 cpu = raw_smp_processor_id();
114 /* no atomic needed, we only modify this variable by this cpu */ 113 /* no atomic needed, we only modify this variable by this cpu */
@@ -120,10 +119,7 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip)
120 out: 119 out:
121 per_cpu(trace_active, cpu)--; 120 per_cpu(trace_active, cpu)--;
122 /* prevent recursion in schedule */ 121 /* prevent recursion in schedule */
123 if (resched) 122 ftrace_preempt_enable(resched);
124 preempt_enable_no_resched_notrace();
125 else
126 preempt_enable_notrace();
127} 123}
128 124
129static struct ftrace_ops trace_ops __read_mostly = 125static struct ftrace_ops trace_ops __read_mostly =
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c
index 9587d3bcba55..54960edb96d0 100644
--- a/kernel/trace/trace_sysprof.c
+++ b/kernel/trace/trace_sysprof.c
@@ -261,27 +261,17 @@ static void stop_stack_trace(struct trace_array *tr)
261 mutex_unlock(&sample_timer_lock); 261 mutex_unlock(&sample_timer_lock);
262} 262}
263 263
264static void stack_trace_init(struct trace_array *tr) 264static int stack_trace_init(struct trace_array *tr)
265{ 265{
266 sysprof_trace = tr; 266 sysprof_trace = tr;
267 267
268 if (tr->ctrl) 268 start_stack_trace(tr);
269 start_stack_trace(tr); 269 return 0;
270} 270}
271 271
272static void stack_trace_reset(struct trace_array *tr) 272static void stack_trace_reset(struct trace_array *tr)
273{ 273{
274 if (tr->ctrl) 274 stop_stack_trace(tr);
275 stop_stack_trace(tr);
276}
277
278static void stack_trace_ctrl_update(struct trace_array *tr)
279{
280 /* When starting a new trace, reset the buffers */
281 if (tr->ctrl)
282 start_stack_trace(tr);
283 else
284 stop_stack_trace(tr);
285} 275}
286 276
287static struct tracer stack_trace __read_mostly = 277static struct tracer stack_trace __read_mostly =
@@ -289,7 +279,6 @@ static struct tracer stack_trace __read_mostly =
289 .name = "sysprof", 279 .name = "sysprof",
290 .init = stack_trace_init, 280 .init = stack_trace_init,
291 .reset = stack_trace_reset, 281 .reset = stack_trace_reset,
292 .ctrl_update = stack_trace_ctrl_update,
293#ifdef CONFIG_FTRACE_SELFTEST 282#ifdef CONFIG_FTRACE_SELFTEST
294 .selftest = trace_selftest_startup_sysprof, 283 .selftest = trace_selftest_startup_sysprof,
295#endif 284#endif
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index af8c85664882..79602740bbb5 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -43,6 +43,7 @@ static DEFINE_MUTEX(tracepoints_mutex);
43 */ 43 */
44#define TRACEPOINT_HASH_BITS 6 44#define TRACEPOINT_HASH_BITS 6
45#define TRACEPOINT_TABLE_SIZE (1 << TRACEPOINT_HASH_BITS) 45#define TRACEPOINT_TABLE_SIZE (1 << TRACEPOINT_HASH_BITS)
46static struct hlist_head tracepoint_table[TRACEPOINT_TABLE_SIZE];
46 47
47/* 48/*
48 * Note about RCU : 49 * Note about RCU :
@@ -54,40 +55,43 @@ struct tracepoint_entry {
54 struct hlist_node hlist; 55 struct hlist_node hlist;
55 void **funcs; 56 void **funcs;
56 int refcount; /* Number of times armed. 0 if disarmed. */ 57 int refcount; /* Number of times armed. 0 if disarmed. */
57 struct rcu_head rcu;
58 void *oldptr;
59 unsigned char rcu_pending:1;
60 char name[0]; 58 char name[0];
61}; 59};
62 60
63static struct hlist_head tracepoint_table[TRACEPOINT_TABLE_SIZE]; 61struct tp_probes {
62 union {
63 struct rcu_head rcu;
64 struct list_head list;
65 } u;
66 void *probes[0];
67};
64 68
65static void free_old_closure(struct rcu_head *head) 69static inline void *allocate_probes(int count)
66{ 70{
67 struct tracepoint_entry *entry = container_of(head, 71 struct tp_probes *p = kmalloc(count * sizeof(void *)
68 struct tracepoint_entry, rcu); 72 + sizeof(struct tp_probes), GFP_KERNEL);
69 kfree(entry->oldptr); 73 return p == NULL ? NULL : p->probes;
70 /* Make sure we free the data before setting the pending flag to 0 */
71 smp_wmb();
72 entry->rcu_pending = 0;
73} 74}
74 75
75static void tracepoint_entry_free_old(struct tracepoint_entry *entry, void *old) 76static void rcu_free_old_probes(struct rcu_head *head)
76{ 77{
77 if (!old) 78 kfree(container_of(head, struct tp_probes, u.rcu));
78 return; 79}
79 entry->oldptr = old; 80
80 entry->rcu_pending = 1; 81static inline void release_probes(void *old)
81 /* write rcu_pending before calling the RCU callback */ 82{
82 smp_wmb(); 83 if (old) {
83 call_rcu_sched(&entry->rcu, free_old_closure); 84 struct tp_probes *tp_probes = container_of(old,
85 struct tp_probes, probes[0]);
86 call_rcu_sched(&tp_probes->u.rcu, rcu_free_old_probes);
87 }
84} 88}
85 89
86static void debug_print_probes(struct tracepoint_entry *entry) 90static void debug_print_probes(struct tracepoint_entry *entry)
87{ 91{
88 int i; 92 int i;
89 93
90 if (!tracepoint_debug) 94 if (!tracepoint_debug || !entry->funcs)
91 return; 95 return;
92 96
93 for (i = 0; entry->funcs[i]; i++) 97 for (i = 0; entry->funcs[i]; i++)
@@ -111,12 +115,13 @@ tracepoint_entry_add_probe(struct tracepoint_entry *entry, void *probe)
111 return ERR_PTR(-EEXIST); 115 return ERR_PTR(-EEXIST);
112 } 116 }
113 /* + 2 : one for new probe, one for NULL func */ 117 /* + 2 : one for new probe, one for NULL func */
114 new = kzalloc((nr_probes + 2) * sizeof(void *), GFP_KERNEL); 118 new = allocate_probes(nr_probes + 2);
115 if (new == NULL) 119 if (new == NULL)
116 return ERR_PTR(-ENOMEM); 120 return ERR_PTR(-ENOMEM);
117 if (old) 121 if (old)
118 memcpy(new, old, nr_probes * sizeof(void *)); 122 memcpy(new, old, nr_probes * sizeof(void *));
119 new[nr_probes] = probe; 123 new[nr_probes] = probe;
124 new[nr_probes + 1] = NULL;
120 entry->refcount = nr_probes + 1; 125 entry->refcount = nr_probes + 1;
121 entry->funcs = new; 126 entry->funcs = new;
122 debug_print_probes(entry); 127 debug_print_probes(entry);
@@ -132,7 +137,7 @@ tracepoint_entry_remove_probe(struct tracepoint_entry *entry, void *probe)
132 old = entry->funcs; 137 old = entry->funcs;
133 138
134 if (!old) 139 if (!old)
135 return NULL; 140 return ERR_PTR(-ENOENT);
136 141
137 debug_print_probes(entry); 142 debug_print_probes(entry);
138 /* (N -> M), (N > 1, M >= 0) probes */ 143 /* (N -> M), (N > 1, M >= 0) probes */
@@ -151,13 +156,13 @@ tracepoint_entry_remove_probe(struct tracepoint_entry *entry, void *probe)
151 int j = 0; 156 int j = 0;
152 /* N -> M, (N > 1, M > 0) */ 157 /* N -> M, (N > 1, M > 0) */
153 /* + 1 for NULL */ 158 /* + 1 for NULL */
154 new = kzalloc((nr_probes - nr_del + 1) 159 new = allocate_probes(nr_probes - nr_del + 1);
155 * sizeof(void *), GFP_KERNEL);
156 if (new == NULL) 160 if (new == NULL)
157 return ERR_PTR(-ENOMEM); 161 return ERR_PTR(-ENOMEM);
158 for (i = 0; old[i]; i++) 162 for (i = 0; old[i]; i++)
159 if ((probe && old[i] != probe)) 163 if ((probe && old[i] != probe))
160 new[j++] = old[i]; 164 new[j++] = old[i];
165 new[nr_probes - nr_del] = NULL;
161 entry->refcount = nr_probes - nr_del; 166 entry->refcount = nr_probes - nr_del;
162 entry->funcs = new; 167 entry->funcs = new;
163 } 168 }
@@ -215,7 +220,6 @@ static struct tracepoint_entry *add_tracepoint(const char *name)
215 memcpy(&e->name[0], name, name_len); 220 memcpy(&e->name[0], name, name_len);
216 e->funcs = NULL; 221 e->funcs = NULL;
217 e->refcount = 0; 222 e->refcount = 0;
218 e->rcu_pending = 0;
219 hlist_add_head(&e->hlist, head); 223 hlist_add_head(&e->hlist, head);
220 return e; 224 return e;
221} 225}
@@ -224,32 +228,10 @@ static struct tracepoint_entry *add_tracepoint(const char *name)
224 * Remove the tracepoint from the tracepoint hash table. Must be called with 228 * Remove the tracepoint from the tracepoint hash table. Must be called with
225 * mutex_lock held. 229 * mutex_lock held.
226 */ 230 */
227static int remove_tracepoint(const char *name) 231static inline void remove_tracepoint(struct tracepoint_entry *e)
228{ 232{
229 struct hlist_head *head;
230 struct hlist_node *node;
231 struct tracepoint_entry *e;
232 int found = 0;
233 size_t len = strlen(name) + 1;
234 u32 hash = jhash(name, len-1, 0);
235
236 head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)];
237 hlist_for_each_entry(e, node, head, hlist) {
238 if (!strcmp(name, e->name)) {
239 found = 1;
240 break;
241 }
242 }
243 if (!found)
244 return -ENOENT;
245 if (e->refcount)
246 return -EBUSY;
247 hlist_del(&e->hlist); 233 hlist_del(&e->hlist);
248 /* Make sure the call_rcu_sched has been executed */
249 if (e->rcu_pending)
250 rcu_barrier_sched();
251 kfree(e); 234 kfree(e);
252 return 0;
253} 235}
254 236
255/* 237/*
@@ -280,6 +262,7 @@ static void set_tracepoint(struct tracepoint_entry **entry,
280static void disable_tracepoint(struct tracepoint *elem) 262static void disable_tracepoint(struct tracepoint *elem)
281{ 263{
282 elem->state = 0; 264 elem->state = 0;
265 rcu_assign_pointer(elem->funcs, NULL);
283} 266}
284 267
285/** 268/**
@@ -320,6 +303,23 @@ static void tracepoint_update_probes(void)
320 module_update_tracepoints(); 303 module_update_tracepoints();
321} 304}
322 305
306static void *tracepoint_add_probe(const char *name, void *probe)
307{
308 struct tracepoint_entry *entry;
309 void *old;
310
311 entry = get_tracepoint(name);
312 if (!entry) {
313 entry = add_tracepoint(name);
314 if (IS_ERR(entry))
315 return entry;
316 }
317 old = tracepoint_entry_add_probe(entry, probe);
318 if (IS_ERR(old) && !entry->refcount)
319 remove_tracepoint(entry);
320 return old;
321}
322
323/** 323/**
324 * tracepoint_probe_register - Connect a probe to a tracepoint 324 * tracepoint_probe_register - Connect a probe to a tracepoint
325 * @name: tracepoint name 325 * @name: tracepoint name
@@ -330,44 +330,36 @@ static void tracepoint_update_probes(void)
330 */ 330 */
331int tracepoint_probe_register(const char *name, void *probe) 331int tracepoint_probe_register(const char *name, void *probe)
332{ 332{
333 struct tracepoint_entry *entry;
334 int ret = 0;
335 void *old; 333 void *old;
336 334
337 mutex_lock(&tracepoints_mutex); 335 mutex_lock(&tracepoints_mutex);
338 entry = get_tracepoint(name); 336 old = tracepoint_add_probe(name, probe);
339 if (!entry) {
340 entry = add_tracepoint(name);
341 if (IS_ERR(entry)) {
342 ret = PTR_ERR(entry);
343 goto end;
344 }
345 }
346 /*
347 * If we detect that a call_rcu_sched is pending for this tracepoint,
348 * make sure it's executed now.
349 */
350 if (entry->rcu_pending)
351 rcu_barrier_sched();
352 old = tracepoint_entry_add_probe(entry, probe);
353 if (IS_ERR(old)) {
354 ret = PTR_ERR(old);
355 goto end;
356 }
357 mutex_unlock(&tracepoints_mutex); 337 mutex_unlock(&tracepoints_mutex);
338 if (IS_ERR(old))
339 return PTR_ERR(old);
340
358 tracepoint_update_probes(); /* may update entry */ 341 tracepoint_update_probes(); /* may update entry */
359 mutex_lock(&tracepoints_mutex); 342 release_probes(old);
360 entry = get_tracepoint(name); 343 return 0;
361 WARN_ON(!entry);
362 if (entry->rcu_pending)
363 rcu_barrier_sched();
364 tracepoint_entry_free_old(entry, old);
365end:
366 mutex_unlock(&tracepoints_mutex);
367 return ret;
368} 344}
369EXPORT_SYMBOL_GPL(tracepoint_probe_register); 345EXPORT_SYMBOL_GPL(tracepoint_probe_register);
370 346
347static void *tracepoint_remove_probe(const char *name, void *probe)
348{
349 struct tracepoint_entry *entry;
350 void *old;
351
352 entry = get_tracepoint(name);
353 if (!entry)
354 return ERR_PTR(-ENOENT);
355 old = tracepoint_entry_remove_probe(entry, probe);
356 if (IS_ERR(old))
357 return old;
358 if (!entry->refcount)
359 remove_tracepoint(entry);
360 return old;
361}
362
371/** 363/**
372 * tracepoint_probe_unregister - Disconnect a probe from a tracepoint 364 * tracepoint_probe_unregister - Disconnect a probe from a tracepoint
373 * @name: tracepoint name 365 * @name: tracepoint name
@@ -380,38 +372,104 @@ EXPORT_SYMBOL_GPL(tracepoint_probe_register);
380 */ 372 */
381int tracepoint_probe_unregister(const char *name, void *probe) 373int tracepoint_probe_unregister(const char *name, void *probe)
382{ 374{
383 struct tracepoint_entry *entry;
384 void *old; 375 void *old;
385 int ret = -ENOENT;
386 376
387 mutex_lock(&tracepoints_mutex); 377 mutex_lock(&tracepoints_mutex);
388 entry = get_tracepoint(name); 378 old = tracepoint_remove_probe(name, probe);
389 if (!entry)
390 goto end;
391 if (entry->rcu_pending)
392 rcu_barrier_sched();
393 old = tracepoint_entry_remove_probe(entry, probe);
394 if (!old) {
395 printk(KERN_WARNING "Warning: Trying to unregister a probe"
396 "that doesn't exist\n");
397 goto end;
398 }
399 mutex_unlock(&tracepoints_mutex); 379 mutex_unlock(&tracepoints_mutex);
380 if (IS_ERR(old))
381 return PTR_ERR(old);
382
400 tracepoint_update_probes(); /* may update entry */ 383 tracepoint_update_probes(); /* may update entry */
384 release_probes(old);
385 return 0;
386}
387EXPORT_SYMBOL_GPL(tracepoint_probe_unregister);
388
389static LIST_HEAD(old_probes);
390static int need_update;
391
392static void tracepoint_add_old_probes(void *old)
393{
394 need_update = 1;
395 if (old) {
396 struct tp_probes *tp_probes = container_of(old,
397 struct tp_probes, probes[0]);
398 list_add(&tp_probes->u.list, &old_probes);
399 }
400}
401
402/**
403 * tracepoint_probe_register_noupdate - register a probe but not connect
404 * @name: tracepoint name
405 * @probe: probe handler
406 *
407 * caller must call tracepoint_probe_update_all()
408 */
409int tracepoint_probe_register_noupdate(const char *name, void *probe)
410{
411 void *old;
412
401 mutex_lock(&tracepoints_mutex); 413 mutex_lock(&tracepoints_mutex);
402 entry = get_tracepoint(name); 414 old = tracepoint_add_probe(name, probe);
403 if (!entry) 415 if (IS_ERR(old)) {
404 goto end; 416 mutex_unlock(&tracepoints_mutex);
405 if (entry->rcu_pending) 417 return PTR_ERR(old);
406 rcu_barrier_sched(); 418 }
407 tracepoint_entry_free_old(entry, old); 419 tracepoint_add_old_probes(old);
408 remove_tracepoint(name); /* Ignore busy error message */
409 ret = 0;
410end:
411 mutex_unlock(&tracepoints_mutex); 420 mutex_unlock(&tracepoints_mutex);
412 return ret; 421 return 0;
413} 422}
414EXPORT_SYMBOL_GPL(tracepoint_probe_unregister); 423EXPORT_SYMBOL_GPL(tracepoint_probe_register_noupdate);
424
425/**
426 * tracepoint_probe_unregister_noupdate - remove a probe but not disconnect
427 * @name: tracepoint name
428 * @probe: probe function pointer
429 *
430 * caller must call tracepoint_probe_update_all()
431 */
432int tracepoint_probe_unregister_noupdate(const char *name, void *probe)
433{
434 void *old;
435
436 mutex_lock(&tracepoints_mutex);
437 old = tracepoint_remove_probe(name, probe);
438 if (IS_ERR(old)) {
439 mutex_unlock(&tracepoints_mutex);
440 return PTR_ERR(old);
441 }
442 tracepoint_add_old_probes(old);
443 mutex_unlock(&tracepoints_mutex);
444 return 0;
445}
446EXPORT_SYMBOL_GPL(tracepoint_probe_unregister_noupdate);
447
448/**
449 * tracepoint_probe_update_all - update tracepoints
450 */
451void tracepoint_probe_update_all(void)
452{
453 LIST_HEAD(release_probes);
454 struct tp_probes *pos, *next;
455
456 mutex_lock(&tracepoints_mutex);
457 if (!need_update) {
458 mutex_unlock(&tracepoints_mutex);
459 return;
460 }
461 if (!list_empty(&old_probes))
462 list_replace_init(&old_probes, &release_probes);
463 need_update = 0;
464 mutex_unlock(&tracepoints_mutex);
465
466 tracepoint_update_probes();
467 list_for_each_entry_safe(pos, next, &release_probes, u.list) {
468 list_del(&pos->u.list);
469 call_rcu_sched(&pos->u.rcu, rcu_free_old_probes);
470 }
471}
472EXPORT_SYMBOL_GPL(tracepoint_probe_update_all);
415 473
416/** 474/**
417 * tracepoint_get_iter_range - Get a next tracepoint iterator given a range. 475 * tracepoint_get_iter_range - Get a next tracepoint iterator given a range.
@@ -483,3 +541,36 @@ void tracepoint_iter_reset(struct tracepoint_iter *iter)
483 iter->tracepoint = NULL; 541 iter->tracepoint = NULL;
484} 542}
485EXPORT_SYMBOL_GPL(tracepoint_iter_reset); 543EXPORT_SYMBOL_GPL(tracepoint_iter_reset);
544
545#ifdef CONFIG_MODULES
546
547int tracepoint_module_notify(struct notifier_block *self,
548 unsigned long val, void *data)
549{
550 struct module *mod = data;
551
552 switch (val) {
553 case MODULE_STATE_COMING:
554 tracepoint_update_probe_range(mod->tracepoints,
555 mod->tracepoints + mod->num_tracepoints);
556 break;
557 case MODULE_STATE_GOING:
558 tracepoint_update_probe_range(mod->tracepoints,
559 mod->tracepoints + mod->num_tracepoints);
560 break;
561 }
562 return 0;
563}
564
565struct notifier_block tracepoint_module_nb = {
566 .notifier_call = tracepoint_module_notify,
567 .priority = 0,
568};
569
570static int init_tracepoints(void)
571{
572 return register_module_notifier(&tracepoint_module_nb);
573}
574__initcall(init_tracepoints);
575
576#endif /* CONFIG_MODULES */
diff --git a/mm/bounce.c b/mm/bounce.c
index 06722c403058..bf0cf7c8387b 100644
--- a/mm/bounce.c
+++ b/mm/bounce.c
@@ -14,6 +14,7 @@
14#include <linux/hash.h> 14#include <linux/hash.h>
15#include <linux/highmem.h> 15#include <linux/highmem.h>
16#include <linux/blktrace_api.h> 16#include <linux/blktrace_api.h>
17#include <trace/block.h>
17#include <asm/tlbflush.h> 18#include <asm/tlbflush.h>
18 19
19#define POOL_SIZE 64 20#define POOL_SIZE 64
@@ -21,6 +22,8 @@
21 22
22static mempool_t *page_pool, *isa_page_pool; 23static mempool_t *page_pool, *isa_page_pool;
23 24
25DEFINE_TRACE(block_bio_bounce);
26
24#ifdef CONFIG_HIGHMEM 27#ifdef CONFIG_HIGHMEM
25static __init int init_emergency_pool(void) 28static __init int init_emergency_pool(void)
26{ 29{
@@ -222,7 +225,7 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
222 if (!bio) 225 if (!bio)
223 return; 226 return;
224 227
225 blk_add_trace_bio(q, *bio_orig, BLK_TA_BOUNCE); 228 trace_block_bio_bounce(q, *bio_orig);
226 229
227 /* 230 /*
228 * at least one page was bounced, fill in possible non-highmem 231 * at least one page was bounced, fill in possible non-highmem
diff --git a/samples/tracepoints/tp-samples-trace.h b/samples/tracepoints/tp-samples-trace.h
index 0216b55bd640..01724e04c556 100644
--- a/samples/tracepoints/tp-samples-trace.h
+++ b/samples/tracepoints/tp-samples-trace.h
@@ -4,10 +4,10 @@
4#include <linux/proc_fs.h> /* for struct inode and struct file */ 4#include <linux/proc_fs.h> /* for struct inode and struct file */
5#include <linux/tracepoint.h> 5#include <linux/tracepoint.h>
6 6
7DEFINE_TRACE(subsys_event, 7DECLARE_TRACE(subsys_event,
8 TPPROTO(struct inode *inode, struct file *file), 8 TPPROTO(struct inode *inode, struct file *file),
9 TPARGS(inode, file)); 9 TPARGS(inode, file));
10DEFINE_TRACE(subsys_eventb, 10DECLARE_TRACE(subsys_eventb,
11 TPPROTO(void), 11 TPPROTO(void),
12 TPARGS()); 12 TPARGS());
13#endif 13#endif
diff --git a/samples/tracepoints/tracepoint-probe-sample.c b/samples/tracepoints/tracepoint-probe-sample.c
index 55abfdda4bd4..e3a964889dc7 100644
--- a/samples/tracepoints/tracepoint-probe-sample.c
+++ b/samples/tracepoints/tracepoint-probe-sample.c
@@ -46,6 +46,7 @@ void __exit tp_sample_trace_exit(void)
46{ 46{
47 unregister_trace_subsys_eventb(probe_subsys_eventb); 47 unregister_trace_subsys_eventb(probe_subsys_eventb);
48 unregister_trace_subsys_event(probe_subsys_event); 48 unregister_trace_subsys_event(probe_subsys_event);
49 tracepoint_synchronize_unregister();
49} 50}
50 51
51module_exit(tp_sample_trace_exit); 52module_exit(tp_sample_trace_exit);
diff --git a/samples/tracepoints/tracepoint-probe-sample2.c b/samples/tracepoints/tracepoint-probe-sample2.c
index 5e9fcf4afffe..685a5acb4562 100644
--- a/samples/tracepoints/tracepoint-probe-sample2.c
+++ b/samples/tracepoints/tracepoint-probe-sample2.c
@@ -33,6 +33,7 @@ module_init(tp_sample_trace_init);
33void __exit tp_sample_trace_exit(void) 33void __exit tp_sample_trace_exit(void)
34{ 34{
35 unregister_trace_subsys_event(probe_subsys_event); 35 unregister_trace_subsys_event(probe_subsys_event);
36 tracepoint_synchronize_unregister();
36} 37}
37 38
38module_exit(tp_sample_trace_exit); 39module_exit(tp_sample_trace_exit);
diff --git a/samples/tracepoints/tracepoint-sample.c b/samples/tracepoints/tracepoint-sample.c
index 4ae4b7fcc043..00d169792a3e 100644
--- a/samples/tracepoints/tracepoint-sample.c
+++ b/samples/tracepoints/tracepoint-sample.c
@@ -13,6 +13,9 @@
13#include <linux/proc_fs.h> 13#include <linux/proc_fs.h>
14#include "tp-samples-trace.h" 14#include "tp-samples-trace.h"
15 15
16DEFINE_TRACE(subsys_event);
17DEFINE_TRACE(subsys_eventb);
18
16struct proc_dir_entry *pentry_example; 19struct proc_dir_entry *pentry_example;
17 20
18static int my_open(struct inode *inode, struct file *file) 21static int my_open(struct inode *inode, struct file *file)
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index 468fbc9016c7..7a176773af85 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -198,16 +198,10 @@ cmd_modversions = \
198 fi; 198 fi;
199endif 199endif
200 200
201ifdef CONFIG_64BIT
202arch_bits = 64
203else
204arch_bits = 32
205endif
206
207ifdef CONFIG_FTRACE_MCOUNT_RECORD 201ifdef CONFIG_FTRACE_MCOUNT_RECORD
208cmd_record_mcount = perl $(srctree)/scripts/recordmcount.pl \ 202cmd_record_mcount = perl $(srctree)/scripts/recordmcount.pl "$(ARCH)" \
209 "$(ARCH)" "$(arch_bits)" "$(OBJDUMP)" "$(OBJCOPY)" "$(CC)" "$(LD)" \ 203 "$(if $(CONFIG_64BIT),64,32)" \
210 "$(NM)" "$(RM)" "$(MV)" "$(@)"; 204 "$(OBJDUMP)" "$(OBJCOPY)" "$(CC)" "$(LD)" "$(NM)" "$(RM)" "$(MV)" "$(@)";
211endif 205endif
212 206
213define rule_cc_o_c 207define rule_cc_o_c
diff --git a/scripts/bootgraph.pl b/scripts/bootgraph.pl
index d2c61efc216f..f0af9aa9b243 100644
--- a/scripts/bootgraph.pl
+++ b/scripts/bootgraph.pl
@@ -78,11 +78,13 @@ while (<>) {
78} 78}
79 79
80if ($count == 0) { 80if ($count == 0) {
81 print "No data found in the dmesg. Make sure that 'printk.time=1' and\n"; 81 print STDERR <<END;
82 print "'initcall_debug' are passed on the kernel command line.\n\n"; 82No data found in the dmesg. Make sure that 'printk.time=1' and
83 print "Usage: \n"; 83'initcall_debug' are passed on the kernel command line.
84 print " dmesg | perl scripts/bootgraph.pl > output.svg\n\n"; 84Usage:
85 exit; 85 dmesg | perl scripts/bootgraph.pl > output.svg
86END
87 exit 1;
86} 88}
87 89
88print "<?xml version=\"1.0\" standalone=\"no\"?> \n"; 90print "<?xml version=\"1.0\" standalone=\"no\"?> \n";
@@ -109,8 +111,8 @@ my $stylecounter = 0;
109my %rows; 111my %rows;
110my $rowscount = 1; 112my $rowscount = 1;
111my @initcalls = sort { $start{$a} <=> $start{$b} } keys(%start); 113my @initcalls = sort { $start{$a} <=> $start{$b} } keys(%start);
112my $key; 114
113foreach $key (@initcalls) { 115foreach my $key (@initcalls) {
114 my $duration = $end{$key} - $start{$key}; 116 my $duration = $end{$key} - $start{$key};
115 117
116 if ($duration >= $threshold) { 118 if ($duration >= $threshold) {
diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
index 6b9fe3eb8360..0b1dc9f9bb06 100755
--- a/scripts/recordmcount.pl
+++ b/scripts/recordmcount.pl
@@ -112,6 +112,8 @@ my ($arch, $bits, $objdump, $objcopy, $cc,
112# Acceptable sections to record. 112# Acceptable sections to record.
113my %text_sections = ( 113my %text_sections = (
114 ".text" => 1, 114 ".text" => 1,
115 ".sched.text" => 1,
116 ".spinlock.text" => 1,
115); 117);
116 118
117$objdump = "objdump" if ((length $objdump) == 0); 119$objdump = "objdump" if ((length $objdump) == 0);
@@ -130,10 +132,13 @@ my %weak; # List of weak functions
130my %convert; # List of local functions used that needs conversion 132my %convert; # List of local functions used that needs conversion
131 133
132my $type; 134my $type;
135my $nm_regex; # Find the local functions (return function)
133my $section_regex; # Find the start of a section 136my $section_regex; # Find the start of a section
134my $function_regex; # Find the name of a function 137my $function_regex; # Find the name of a function
135 # (return offset and func name) 138 # (return offset and func name)
136my $mcount_regex; # Find the call site to mcount (return offset) 139my $mcount_regex; # Find the call site to mcount (return offset)
140my $alignment; # The .align value to use for $mcount_section
141my $section_type; # Section header plus possible alignment command
137 142
138if ($arch eq "x86") { 143if ($arch eq "x86") {
139 if ($bits == 64) { 144 if ($bits == 64) {
@@ -143,11 +148,21 @@ if ($arch eq "x86") {
143 } 148 }
144} 149}
145 150
151#
152# We base the defaults off of i386, the other archs may
153# feel free to change them in the below if statements.
154#
155$nm_regex = "^[0-9a-fA-F]+\\s+t\\s+(\\S+)";
156$section_regex = "Disassembly of section\\s+(\\S+):";
157$function_regex = "^([0-9a-fA-F]+)\\s+<(.*?)>:";
158$mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\smcount\$";
159$section_type = '@progbits';
160$type = ".long";
161
146if ($arch eq "x86_64") { 162if ($arch eq "x86_64") {
147 $section_regex = "Disassembly of section\\s+(\\S+):";
148 $function_regex = "^([0-9a-fA-F]+)\\s+<(.*?)>:";
149 $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\smcount([+-]0x[0-9a-zA-Z]+)?\$"; 163 $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\smcount([+-]0x[0-9a-zA-Z]+)?\$";
150 $type = ".quad"; 164 $type = ".quad";
165 $alignment = 8;
151 166
152 # force flags for this arch 167 # force flags for this arch
153 $ld .= " -m elf_x86_64"; 168 $ld .= " -m elf_x86_64";
@@ -156,10 +171,7 @@ if ($arch eq "x86_64") {
156 $cc .= " -m64"; 171 $cc .= " -m64";
157 172
158} elsif ($arch eq "i386") { 173} elsif ($arch eq "i386") {
159 $section_regex = "Disassembly of section\\s+(\\S+):"; 174 $alignment = 4;
160 $function_regex = "^([0-9a-fA-F]+)\\s+<(.*?)>:";
161 $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\smcount\$";
162 $type = ".long";
163 175
164 # force flags for this arch 176 # force flags for this arch
165 $ld .= " -m elf_i386"; 177 $ld .= " -m elf_i386";
@@ -167,6 +179,27 @@ if ($arch eq "x86_64") {
167 $objcopy .= " -O elf32-i386"; 179 $objcopy .= " -O elf32-i386";
168 $cc .= " -m32"; 180 $cc .= " -m32";
169 181
182} elsif ($arch eq "sh") {
183 $alignment = 2;
184
185 # force flags for this arch
186 $ld .= " -m shlelf_linux";
187 $objcopy .= " -O elf32-sh-linux";
188 $cc .= " -m32";
189
190} elsif ($arch eq "powerpc") {
191 $nm_regex = "^[0-9a-fA-F]+\\s+t\\s+(\\.?\\S+)";
192 $function_regex = "^([0-9a-fA-F]+)\\s+<(\\.?.*?)>:";
193 $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\s\\.?_mcount\$";
194
195 if ($bits == 64) {
196 $type = ".quad";
197 }
198
199} elsif ($arch eq "arm") {
200 $alignment = 2;
201 $section_type = '%progbits';
202
170} else { 203} else {
171 die "Arch $arch is not supported with CONFIG_FTRACE_MCOUNT_RECORD"; 204 die "Arch $arch is not supported with CONFIG_FTRACE_MCOUNT_RECORD";
172} 205}
@@ -236,7 +269,7 @@ if (!$found_version) {
236# 269#
237open (IN, "$nm $inputfile|") || die "error running $nm"; 270open (IN, "$nm $inputfile|") || die "error running $nm";
238while (<IN>) { 271while (<IN>) {
239 if (/^[0-9a-fA-F]+\s+t\s+(\S+)/) { 272 if (/$nm_regex/) {
240 $locals{$1} = 1; 273 $locals{$1} = 1;
241 } elsif (/^[0-9a-fA-F]+\s+([wW])\s+(\S+)/) { 274 } elsif (/^[0-9a-fA-F]+\s+([wW])\s+(\S+)/) {
242 $weak{$2} = $1; 275 $weak{$2} = $1;
@@ -287,7 +320,8 @@ sub update_funcs
287 if (!$opened) { 320 if (!$opened) {
288 open(FILE, ">$mcount_s") || die "can't create $mcount_s\n"; 321 open(FILE, ">$mcount_s") || die "can't create $mcount_s\n";
289 $opened = 1; 322 $opened = 1;
290 print FILE "\t.section $mcount_section,\"a\",\@progbits\n"; 323 print FILE "\t.section $mcount_section,\"a\",$section_type\n";
324 print FILE "\t.align $alignment\n" if (defined($alignment));
291 } 325 }
292 printf FILE "\t%s %s + %d\n", $type, $ref_func, $offsets[$i] - $offset; 326 printf FILE "\t%s %s + %d\n", $type, $ref_func, $offsets[$i] - $offset;
293 } 327 }
diff --git a/scripts/trace/power.pl b/scripts/trace/power.pl
new file mode 100644
index 000000000000..4f729b3501e0
--- /dev/null
+++ b/scripts/trace/power.pl
@@ -0,0 +1,108 @@
1#!/usr/bin/perl
2
3# Copyright 2008, Intel Corporation
4#
5# This file is part of the Linux kernel
6#
7# This program file is free software; you can redistribute it and/or modify it
8# under the terms of the GNU General Public License as published by the
9# Free Software Foundation; version 2 of the License.
10#
11# This program is distributed in the hope that it will be useful, but WITHOUT
12# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14# for more details.
15#
16# You should have received a copy of the GNU General Public License
17# along with this program in a file named COPYING; if not, write to the
18# Free Software Foundation, Inc.,
19# 51 Franklin Street, Fifth Floor,
20# Boston, MA 02110-1301 USA
21#
22# Authors:
23# Arjan van de Ven <arjan@linux.intel.com>
24
25
26#
27# This script turns a cstate ftrace output into a SVG graphic that shows
28# historic C-state information
29#
30#
31# cat /sys/kernel/debug/tracing/trace | perl power.pl > out.svg
32#
33
34my @styles;
35my $base = 0;
36
37my @pstate_last;
38my @pstate_level;
39
40$styles[0] = "fill:rgb(0,0,255);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
41$styles[1] = "fill:rgb(0,255,0);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
42$styles[2] = "fill:rgb(255,0,20);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
43$styles[3] = "fill:rgb(255,255,20);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
44$styles[4] = "fill:rgb(255,0,255);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
45$styles[5] = "fill:rgb(0,255,255);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
46$styles[6] = "fill:rgb(0,128,255);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
47$styles[7] = "fill:rgb(0,255,128);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
48$styles[8] = "fill:rgb(0,25,20);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
49
50
51print "<?xml version=\"1.0\" standalone=\"no\"?> \n";
52print "<svg width=\"10000\" height=\"100%\" version=\"1.1\" xmlns=\"http://www.w3.org/2000/svg\">\n";
53
54my $scale = 30000.0;
55while (<>) {
56 my $line = $_;
57 if ($line =~ /([0-9\.]+)\] CSTATE: Going to C([0-9]) on cpu ([0-9]+) for ([0-9\.]+)/) {
58 if ($base == 0) {
59 $base = $1;
60 }
61 my $time = $1 - $base;
62 $time = $time * $scale;
63 my $C = $2;
64 my $cpu = $3;
65 my $y = 400 * $cpu;
66 my $duration = $4 * $scale;
67 my $msec = int($4 * 100000)/100.0;
68 my $height = $C * 20;
69 $style = $styles[$C];
70
71 $y = $y + 140 - $height;
72
73 $x2 = $time + 4;
74 $y2 = $y + 4;
75
76
77 print "<rect x=\"$time\" width=\"$duration\" y=\"$y\" height=\"$height\" style=\"$style\"/>\n";
78 print "<text transform=\"translate($x2,$y2) rotate(90)\">C$C $msec</text>\n";
79 }
80 if ($line =~ /([0-9\.]+)\] PSTATE: Going to P([0-9]) on cpu ([0-9]+)/) {
81 my $time = $1 - $base;
82 my $state = $2;
83 my $cpu = $3;
84
85 if (defined($pstate_last[$cpu])) {
86 my $from = $pstate_last[$cpu];
87 my $oldstate = $pstate_state[$cpu];
88 my $duration = ($time-$from) * $scale;
89
90 $from = $from * $scale;
91 my $to = $from + $duration;
92 my $height = 140 - ($oldstate * (140/8));
93
94 my $y = 400 * $cpu + 200 + $height;
95 my $y2 = $y+4;
96 my $style = $styles[8];
97
98 print "<rect x=\"$from\" y=\"$y\" width=\"$duration\" height=\"5\" style=\"$style\"/>\n";
99 print "<text transform=\"translate($from,$y2)\">P$oldstate (cpu $cpu)</text>\n";
100 };
101
102 $pstate_last[$cpu] = $time;
103 $pstate_state[$cpu] = $state;
104 }
105}
106
107
108print "</svg>\n";
diff --git a/scripts/tracing/draw_functrace.py b/scripts/tracing/draw_functrace.py
new file mode 100644
index 000000000000..902f9a992620
--- /dev/null
+++ b/scripts/tracing/draw_functrace.py
@@ -0,0 +1,130 @@
1#!/usr/bin/python
2
3"""
4Copyright 2008 (c) Frederic Weisbecker <fweisbec@gmail.com>
5Licensed under the terms of the GNU GPL License version 2
6
7This script parses a trace provided by the function tracer in
8kernel/trace/trace_functions.c
9The resulted trace is processed into a tree to produce a more human
10view of the call stack by drawing textual but hierarchical tree of
11calls. Only the functions's names and the the call time are provided.
12
13Usage:
14 Be sure that you have CONFIG_FUNCTION_TRACER
15 # mkdir /debugfs
16 # mount -t debug debug /debug
17 # echo function > /debug/tracing/current_tracer
18 $ cat /debug/tracing/trace_pipe > ~/raw_trace_func
19 Wait some times but not too much, the script is a bit slow.
20 Break the pipe (Ctrl + Z)
21 $ scripts/draw_functrace.py < raw_trace_func > draw_functrace
22 Then you have your drawn trace in draw_functrace
23"""
24
25
26import sys, re
27
28class CallTree:
29 """ This class provides a tree representation of the functions
30 call stack. If a function has no parent in the kernel (interrupt,
31 syscall, kernel thread...) then it is attached to a virtual parent
32 called ROOT.
33 """
34 ROOT = None
35
36 def __init__(self, func, time = None, parent = None):
37 self._func = func
38 self._time = time
39 if parent is None:
40 self._parent = CallTree.ROOT
41 else:
42 self._parent = parent
43 self._children = []
44
45 def calls(self, func, calltime):
46 """ If a function calls another one, call this method to insert it
47 into the tree at the appropriate place.
48 @return: A reference to the newly created child node.
49 """
50 child = CallTree(func, calltime, self)
51 self._children.append(child)
52 return child
53
54 def getParent(self, func):
55 """ Retrieve the last parent of the current node that
56 has the name given by func. If this function is not
57 on a parent, then create it as new child of root
58 @return: A reference to the parent.
59 """
60 tree = self
61 while tree != CallTree.ROOT and tree._func != func:
62 tree = tree._parent
63 if tree == CallTree.ROOT:
64 child = CallTree.ROOT.calls(func, None)
65 return child
66 return tree
67
68 def __repr__(self):
69 return self.__toString("", True)
70
71 def __toString(self, branch, lastChild):
72 if self._time is not None:
73 s = "%s----%s (%s)\n" % (branch, self._func, self._time)
74 else:
75 s = "%s----%s\n" % (branch, self._func)
76
77 i = 0
78 if lastChild:
79 branch = branch[:-1] + " "
80 while i < len(self._children):
81 if i != len(self._children) - 1:
82 s += "%s" % self._children[i].__toString(branch +\
83 " |", False)
84 else:
85 s += "%s" % self._children[i].__toString(branch +\
86 " |", True)
87 i += 1
88 return s
89
90class BrokenLineException(Exception):
91 """If the last line is not complete because of the pipe breakage,
92 we want to stop the processing and ignore this line.
93 """
94 pass
95
96class CommentLineException(Exception):
97 """ If the line is a comment (as in the beginning of the trace file),
98 just ignore it.
99 """
100 pass
101
102
103def parseLine(line):
104 line = line.strip()
105 if line.startswith("#"):
106 raise CommentLineException
107 m = re.match("[^]]+?\\] +([0-9.]+): (\\w+) <-(\\w+)", line)
108 if m is None:
109 raise BrokenLineException
110 return (m.group(1), m.group(2), m.group(3))
111
112
113def main():
114 CallTree.ROOT = CallTree("Root (Nowhere)", None, None)
115 tree = CallTree.ROOT
116
117 for line in sys.stdin:
118 try:
119 calltime, callee, caller = parseLine(line)
120 except BrokenLineException:
121 break
122 except CommentLineException:
123 continue
124 tree = tree.getParent(caller)
125 tree = tree.calls(callee, calltime)
126
127 print CallTree.ROOT
128
129if __name__ == "__main__":
130 main()