aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/ftrace.txt149
-rw-r--r--Documentation/kernel-parameters.txt12
-rw-r--r--Documentation/markers.txt29
-rw-r--r--Documentation/tracepoints.txt94
-rw-r--r--arch/powerpc/include/asm/ftrace.h14
-rw-r--r--arch/powerpc/include/asm/module.h16
-rw-r--r--arch/powerpc/kernel/Makefile1
-rw-r--r--arch/powerpc/kernel/entry_32.S40
-rw-r--r--arch/powerpc/kernel/entry_64.S12
-rw-r--r--arch/powerpc/kernel/ftrace.c461
-rw-r--r--arch/powerpc/kernel/idle.c5
-rw-r--r--arch/powerpc/kernel/module_32.c10
-rw-r--r--arch/powerpc/kernel/module_64.c13
-rw-r--r--arch/powerpc/lib/Makefile3
-rw-r--r--arch/x86/Kconfig3
-rw-r--r--arch/x86/Kconfig.cpu2
-rw-r--r--arch/x86/Kconfig.debug4
-rw-r--r--arch/x86/include/asm/ds.h312
-rw-r--r--arch/x86/include/asm/ftrace.h61
-rw-r--r--arch/x86/include/asm/msr.h3
-rw-r--r--arch/x86/include/asm/processor.h13
-rw-r--r--arch/x86/include/asm/ptrace.h43
-rw-r--r--arch/x86/include/asm/thread_info.h7
-rw-r--r--arch/x86/kernel/Makefile1
-rw-r--r--arch/x86/kernel/apic.c3
-rw-r--r--arch/x86/kernel/cpu/Makefile5
-rw-r--r--arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c4
-rw-r--r--arch/x86/kernel/cpu/intel.c4
-rw-r--r--arch/x86/kernel/ds.c1138
-rw-r--r--arch/x86/kernel/dumpstack.c34
-rw-r--r--arch/x86/kernel/dumpstack.h2
-rw-r--r--arch/x86/kernel/dumpstack_32.c5
-rw-r--r--arch/x86/kernel/dumpstack_64.c7
-rw-r--r--arch/x86/kernel/entry_32.S51
-rw-r--r--arch/x86/kernel/entry_64.S98
-rw-r--r--arch/x86/kernel/ftrace.c390
-rw-r--r--arch/x86/kernel/irq_64.c3
-rw-r--r--arch/x86/kernel/process.c16
-rw-r--r--arch/x86/kernel/process_32.c67
-rw-r--r--arch/x86/kernel/process_64.c58
-rw-r--r--arch/x86/kernel/ptrace.c431
-rw-r--r--arch/x86/kernel/smpboot.c2
-rw-r--r--arch/x86/kernel/stacktrace.c64
-rw-r--r--arch/x86/kernel/vmlinux_32.lds.S1
-rw-r--r--arch/x86/kernel/vmlinux_64.lds.S1
-rw-r--r--arch/x86/kernel/vsyscall_64.c3
-rw-r--r--arch/x86/mm/Makefile3
-rw-r--r--arch/x86/mm/fault.c2
-rw-r--r--arch/x86/vdso/vclock_gettime.c3
-rw-r--r--block/Kconfig1
-rw-r--r--block/blk-core.c46
-rw-r--r--block/blktrace.c332
-rw-r--r--block/elevator.c12
-rw-r--r--drivers/char/sysrq.c18
-rw-r--r--drivers/md/dm.c8
-rw-r--r--fs/bio.c5
-rw-r--r--fs/seq_file.c14
-rw-r--r--include/asm-generic/vmlinux.lds.h31
-rw-r--r--include/linux/blktrace_api.h172
-rw-r--r--include/linux/compiler.h84
-rw-r--r--include/linux/ftrace.h293
-rw-r--r--include/linux/ftrace_irq.h13
-rw-r--r--include/linux/hardirq.h15
-rw-r--r--include/linux/marker.h75
-rw-r--r--include/linux/mm.h2
-rw-r--r--include/linux/pid.h4
-rw-r--r--include/linux/ptrace.h22
-rw-r--r--include/linux/rcupdate.h2
-rw-r--r--include/linux/ring_buffer.h16
-rw-r--r--include/linux/sched.h31
-rw-r--r--include/linux/seq_file.h1
-rw-r--r--include/linux/stacktrace.h8
-rw-r--r--include/linux/tracepoint.h57
-rw-r--r--include/linux/tty.h2
-rw-r--r--include/trace/block.h76
-rw-r--r--include/trace/boot.h60
-rw-r--r--include/trace/sched.h36
-rw-r--r--init/Kconfig1
-rw-r--r--init/main.c35
-rw-r--r--kernel/exit.c5
-rw-r--r--kernel/extable.c5
-rw-r--r--kernel/fork.c14
-rw-r--r--kernel/kthread.c3
-rw-r--r--kernel/lockdep.c1
-rw-r--r--kernel/marker.c192
-rw-r--r--kernel/module.c13
-rw-r--r--kernel/power/disk.c13
-rw-r--r--kernel/power/main.c5
-rw-r--r--kernel/profile.c2
-rw-r--r--kernel/ptrace.c12
-rw-r--r--kernel/sched.c14
-rw-r--r--kernel/signal.c2
-rw-r--r--kernel/sysctl.c20
-rw-r--r--kernel/trace/Kconfig115
-rw-r--r--kernel/trace/Makefile9
-rw-r--r--kernel/trace/ftrace.c929
-rw-r--r--kernel/trace/ring_buffer.c709
-rw-r--r--kernel/trace/trace.c976
-rw-r--r--kernel/trace/trace.h265
-rw-r--r--kernel/trace/trace_boot.c158
-rw-r--r--kernel/trace/trace_branch.c342
-rw-r--r--kernel/trace/trace_functions.c30
-rw-r--r--kernel/trace/trace_functions_graph.c669
-rw-r--r--kernel/trace/trace_hw_branches.c195
-rw-r--r--kernel/trace/trace_irqsoff.c61
-rw-r--r--kernel/trace/trace_mmiotrace.c33
-rw-r--r--kernel/trace/trace_nop.c65
-rw-r--r--kernel/trace/trace_power.c179
-rw-r--r--kernel/trace/trace_sched_switch.c121
-rw-r--r--kernel/trace/trace_sched_wakeup.c72
-rw-r--r--kernel/trace/trace_selftest.c173
-rw-r--r--kernel/trace/trace_stack.c70
-rw-r--r--kernel/trace/trace_sysprof.c31
-rw-r--r--kernel/tracepoint.c295
-rw-r--r--mm/bounce.c5
-rw-r--r--mm/mlock.c45
-rw-r--r--samples/tracepoints/tp-samples-trace.h4
-rw-r--r--samples/tracepoints/tracepoint-probe-sample.c1
-rw-r--r--samples/tracepoints/tracepoint-probe-sample2.c1
-rw-r--r--samples/tracepoints/tracepoint-sample.c3
-rw-r--r--scripts/Makefile.build12
-rw-r--r--scripts/bootgraph.pl16
-rwxr-xr-xscripts/recordmcount.pl48
-rw-r--r--scripts/trace/power.pl108
-rw-r--r--scripts/tracing/draw_functrace.py130
125 files changed, 8735 insertions, 2611 deletions
diff --git a/Documentation/ftrace.txt b/Documentation/ftrace.txt
index 9cc4d685dde5..803b1318b13d 100644
--- a/Documentation/ftrace.txt
+++ b/Documentation/ftrace.txt
@@ -82,7 +82,7 @@ of ftrace. Here is a list of some of the key files:
82 tracer is not adding more data, they will display 82 tracer is not adding more data, they will display
83 the same information every time they are read. 83 the same information every time they are read.
84 84
85 iter_ctrl: This file lets the user control the amount of data 85 trace_options: This file lets the user control the amount of data
86 that is displayed in one of the above output 86 that is displayed in one of the above output
87 files. 87 files.
88 88
@@ -94,10 +94,10 @@ of ftrace. Here is a list of some of the key files:
94 only be recorded if the latency is greater than 94 only be recorded if the latency is greater than
95 the value in this file. (in microseconds) 95 the value in this file. (in microseconds)
96 96
97 trace_entries: This sets or displays the number of bytes each CPU 97 buffer_size_kb: This sets or displays the number of kilobytes each CPU
98 buffer can hold. The tracer buffers are the same size 98 buffer can hold. The tracer buffers are the same size
99 for each CPU. The displayed number is the size of the 99 for each CPU. The displayed number is the size of the
100 CPU buffer and not total size of all buffers. The 100 CPU buffer and not total size of all buffers. The
101 trace buffers are allocated in pages (blocks of memory 101 trace buffers are allocated in pages (blocks of memory
102 that the kernel uses for allocation, usually 4 KB in size). 102 that the kernel uses for allocation, usually 4 KB in size).
103 If the last page allocated has room for more bytes 103 If the last page allocated has room for more bytes
@@ -127,6 +127,8 @@ of ftrace. Here is a list of some of the key files:
127 be traced. If a function exists in both set_ftrace_filter 127 be traced. If a function exists in both set_ftrace_filter
128 and set_ftrace_notrace, the function will _not_ be traced. 128 and set_ftrace_notrace, the function will _not_ be traced.
129 129
130 set_ftrace_pid: Have the function tracer only trace a single thread.
131
130 available_filter_functions: This lists the functions that ftrace 132 available_filter_functions: This lists the functions that ftrace
131 has processed and can trace. These are the function 133 has processed and can trace. These are the function
132 names that you can pass to "set_ftrace_filter" or 134 names that you can pass to "set_ftrace_filter" or
@@ -316,23 +318,23 @@ The above is mostly meaningful for kernel developers.
316 The rest is the same as the 'trace' file. 318 The rest is the same as the 'trace' file.
317 319
318 320
319iter_ctrl 321trace_options
320--------- 322-------------
321 323
322The iter_ctrl file is used to control what gets printed in the trace 324The trace_options file is used to control what gets printed in the trace
323output. To see what is available, simply cat the file: 325output. To see what is available, simply cat the file:
324 326
325 cat /debug/tracing/iter_ctrl 327 cat /debug/tracing/trace_options
326 print-parent nosym-offset nosym-addr noverbose noraw nohex nobin \ 328 print-parent nosym-offset nosym-addr noverbose noraw nohex nobin \
327 noblock nostacktrace nosched-tree 329 noblock nostacktrace nosched-tree nouserstacktrace nosym-userobj
328 330
329To disable one of the options, echo in the option prepended with "no". 331To disable one of the options, echo in the option prepended with "no".
330 332
331 echo noprint-parent > /debug/tracing/iter_ctrl 333 echo noprint-parent > /debug/tracing/trace_options
332 334
333To enable an option, leave off the "no". 335To enable an option, leave off the "no".
334 336
335 echo sym-offset > /debug/tracing/iter_ctrl 337 echo sym-offset > /debug/tracing/trace_options
336 338
337Here are the available options: 339Here are the available options:
338 340
@@ -378,6 +380,20 @@ Here are the available options:
378 When a trace is recorded, so is the stack of functions. 380 When a trace is recorded, so is the stack of functions.
379 This allows for back traces of trace sites. 381 This allows for back traces of trace sites.
380 382
383 userstacktrace - This option changes the trace.
384 It records a stacktrace of the current userspace thread.
385
386 sym-userobj - when user stacktrace are enabled, look up which object the
387 address belongs to, and print a relative address
388 This is especially useful when ASLR is on, otherwise you don't
389 get a chance to resolve the address to object/file/line after the app is no
390 longer running
391
392 The lookup is performed when you read trace,trace_pipe,latency_trace. Example:
393
394 a.out-1623 [000] 40874.465068: /root/a.out[+0x480] <-/root/a.out[+0
395x494] <- /root/a.out[+0x4a8] <- /lib/libc-2.7.so[+0x1e1a6]
396
381 sched-tree - TBD (any users??) 397 sched-tree - TBD (any users??)
382 398
383 399
@@ -1059,6 +1075,83 @@ For simple one time traces, the above is sufficent. For anything else,
1059a search through /proc/mounts may be needed to find where the debugfs 1075a search through /proc/mounts may be needed to find where the debugfs
1060file-system is mounted. 1076file-system is mounted.
1061 1077
1078
1079Single thread tracing
1080---------------------
1081
1082By writing into /debug/tracing/set_ftrace_pid you can trace a
1083single thread. For example:
1084
1085# cat /debug/tracing/set_ftrace_pid
1086no pid
1087# echo 3111 > /debug/tracing/set_ftrace_pid
1088# cat /debug/tracing/set_ftrace_pid
10893111
1090# echo function > /debug/tracing/current_tracer
1091# cat /debug/tracing/trace | head
1092 # tracer: function
1093 #
1094 # TASK-PID CPU# TIMESTAMP FUNCTION
1095 # | | | | |
1096 yum-updatesd-3111 [003] 1637.254676: finish_task_switch <-thread_return
1097 yum-updatesd-3111 [003] 1637.254681: hrtimer_cancel <-schedule_hrtimeout_range
1098 yum-updatesd-3111 [003] 1637.254682: hrtimer_try_to_cancel <-hrtimer_cancel
1099 yum-updatesd-3111 [003] 1637.254683: lock_hrtimer_base <-hrtimer_try_to_cancel
1100 yum-updatesd-3111 [003] 1637.254685: fget_light <-do_sys_poll
1101 yum-updatesd-3111 [003] 1637.254686: pipe_poll <-do_sys_poll
1102# echo -1 > /debug/tracing/set_ftrace_pid
1103# cat /debug/tracing/trace |head
1104 # tracer: function
1105 #
1106 # TASK-PID CPU# TIMESTAMP FUNCTION
1107 # | | | | |
1108 ##### CPU 3 buffer started ####
1109 yum-updatesd-3111 [003] 1701.957688: free_poll_entry <-poll_freewait
1110 yum-updatesd-3111 [003] 1701.957689: remove_wait_queue <-free_poll_entry
1111 yum-updatesd-3111 [003] 1701.957691: fput <-free_poll_entry
1112 yum-updatesd-3111 [003] 1701.957692: audit_syscall_exit <-sysret_audit
1113 yum-updatesd-3111 [003] 1701.957693: path_put <-audit_syscall_exit
1114
1115If you want to trace a function when executing, you could use
1116something like this simple program:
1117
1118#include <stdio.h>
1119#include <stdlib.h>
1120#include <sys/types.h>
1121#include <sys/stat.h>
1122#include <fcntl.h>
1123#include <unistd.h>
1124
1125int main (int argc, char **argv)
1126{
1127 if (argc < 1)
1128 exit(-1);
1129
1130 if (fork() > 0) {
1131 int fd, ffd;
1132 char line[64];
1133 int s;
1134
1135 ffd = open("/debug/tracing/current_tracer", O_WRONLY);
1136 if (ffd < 0)
1137 exit(-1);
1138 write(ffd, "nop", 3);
1139
1140 fd = open("/debug/tracing/set_ftrace_pid", O_WRONLY);
1141 s = sprintf(line, "%d\n", getpid());
1142 write(fd, line, s);
1143
1144 write(ffd, "function", 8);
1145
1146 close(fd);
1147 close(ffd);
1148
1149 execvp(argv[1], argv+1);
1150 }
1151
1152 return 0;
1153}
1154
1062dynamic ftrace 1155dynamic ftrace
1063-------------- 1156--------------
1064 1157
@@ -1158,7 +1251,11 @@ These are the only wild cards which are supported.
1158 1251
1159 <match>*<match> will not work. 1252 <match>*<match> will not work.
1160 1253
1161 # echo hrtimer_* > /debug/tracing/set_ftrace_filter 1254Note: It is better to use quotes to enclose the wild cards, otherwise
1255 the shell may expand the parameters into names of files in the local
1256 directory.
1257
1258 # echo 'hrtimer_*' > /debug/tracing/set_ftrace_filter
1162 1259
1163Produces: 1260Produces:
1164 1261
@@ -1213,7 +1310,7 @@ Again, now we want to append.
1213 # echo sys_nanosleep > /debug/tracing/set_ftrace_filter 1310 # echo sys_nanosleep > /debug/tracing/set_ftrace_filter
1214 # cat /debug/tracing/set_ftrace_filter 1311 # cat /debug/tracing/set_ftrace_filter
1215sys_nanosleep 1312sys_nanosleep
1216 # echo hrtimer_* >> /debug/tracing/set_ftrace_filter 1313 # echo 'hrtimer_*' >> /debug/tracing/set_ftrace_filter
1217 # cat /debug/tracing/set_ftrace_filter 1314 # cat /debug/tracing/set_ftrace_filter
1218hrtimer_run_queues 1315hrtimer_run_queues
1219hrtimer_run_pending 1316hrtimer_run_pending
@@ -1299,41 +1396,29 @@ trace entries
1299------------- 1396-------------
1300 1397
1301Having too much or not enough data can be troublesome in diagnosing 1398Having too much or not enough data can be troublesome in diagnosing
1302an issue in the kernel. The file trace_entries is used to modify 1399an issue in the kernel. The file buffer_size_kb is used to modify
1303the size of the internal trace buffers. The number listed 1400the size of the internal trace buffers. The number listed
1304is the number of entries that can be recorded per CPU. To know 1401is the number of entries that can be recorded per CPU. To know
1305the full size, multiply the number of possible CPUS with the 1402the full size, multiply the number of possible CPUS with the
1306number of entries. 1403number of entries.
1307 1404
1308 # cat /debug/tracing/trace_entries 1405 # cat /debug/tracing/buffer_size_kb
130965620 14061408 (units kilobytes)
1310 1407
1311Note, to modify this, you must have tracing completely disabled. To do that, 1408Note, to modify this, you must have tracing completely disabled. To do that,
1312echo "nop" into the current_tracer. If the current_tracer is not set 1409echo "nop" into the current_tracer. If the current_tracer is not set
1313to "nop", an EINVAL error will be returned. 1410to "nop", an EINVAL error will be returned.
1314 1411
1315 # echo nop > /debug/tracing/current_tracer 1412 # echo nop > /debug/tracing/current_tracer
1316 # echo 100000 > /debug/tracing/trace_entries 1413 # echo 10000 > /debug/tracing/buffer_size_kb
1317 # cat /debug/tracing/trace_entries 1414 # cat /debug/tracing/buffer_size_kb
1318100045 141510000 (units kilobytes)
1319
1320
1321Notice that we echoed in 100,000 but the size is 100,045. The entries
1322are held in individual pages. It allocates the number of pages it takes
1323to fulfill the request. If more entries may fit on the last page
1324then they will be added.
1325
1326 # echo 1 > /debug/tracing/trace_entries
1327 # cat /debug/tracing/trace_entries
132885
1329
1330This shows us that 85 entries can fit in a single page.
1331 1416
1332The number of pages which will be allocated is limited to a percentage 1417The number of pages which will be allocated is limited to a percentage
1333of available memory. Allocating too much will produce an error. 1418of available memory. Allocating too much will produce an error.
1334 1419
1335 # echo 1000000000000 > /debug/tracing/trace_entries 1420 # echo 1000000000000 > /debug/tracing/buffer_size_kb
1336-bash: echo: write error: Cannot allocate memory 1421-bash: echo: write error: Cannot allocate memory
1337 # cat /debug/tracing/trace_entries 1422 # cat /debug/tracing/buffer_size_kb
133885 142385
1339 1424
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 2c95cae8302b..68e7694c0ac7 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -89,6 +89,7 @@ parameter is applicable:
89 SPARC Sparc architecture is enabled. 89 SPARC Sparc architecture is enabled.
90 SWSUSP Software suspend (hibernation) is enabled. 90 SWSUSP Software suspend (hibernation) is enabled.
91 SUSPEND System suspend states are enabled. 91 SUSPEND System suspend states are enabled.
92 FTRACE Function tracing enabled.
92 TS Appropriate touchscreen support is enabled. 93 TS Appropriate touchscreen support is enabled.
93 USB USB support is enabled. 94 USB USB support is enabled.
94 USBHID USB Human Interface Device support is enabled. 95 USBHID USB Human Interface Device support is enabled.
@@ -753,6 +754,14 @@ and is between 256 and 4096 characters. It is defined in the file
753 parameter will force ia64_sal_cache_flush to call 754 parameter will force ia64_sal_cache_flush to call
754 ia64_pal_cache_flush instead of SAL_CACHE_FLUSH. 755 ia64_pal_cache_flush instead of SAL_CACHE_FLUSH.
755 756
757 ftrace=[tracer]
758 [ftrace] will set and start the specified tracer
759 as early as possible in order to facilitate early
760 boot debugging.
761
762 ftrace_dump_on_oops
763 [ftrace] will dump the trace buffers on oops.
764
756 gamecon.map[2|3]= 765 gamecon.map[2|3]=
757 [HW,JOY] Multisystem joystick and NES/SNES/PSX pad 766 [HW,JOY] Multisystem joystick and NES/SNES/PSX pad
758 support via parallel port (up to 5 devices per port) 767 support via parallel port (up to 5 devices per port)
@@ -2196,6 +2205,9 @@ and is between 256 and 4096 characters. It is defined in the file
2196 st= [HW,SCSI] SCSI tape parameters (buffers, etc.) 2205 st= [HW,SCSI] SCSI tape parameters (buffers, etc.)
2197 See Documentation/scsi/st.txt. 2206 See Documentation/scsi/st.txt.
2198 2207
2208 stacktrace [FTRACE]
2209 Enabled the stack tracer on boot up.
2210
2199 sti= [PARISC,HW] 2211 sti= [PARISC,HW]
2200 Format: <num> 2212 Format: <num>
2201 Set the STI (builtin display/keyboard on the HP-PARISC 2213 Set the STI (builtin display/keyboard on the HP-PARISC
diff --git a/Documentation/markers.txt b/Documentation/markers.txt
index 089f6138fcd9..d2b3d0e91b26 100644
--- a/Documentation/markers.txt
+++ b/Documentation/markers.txt
@@ -51,11 +51,16 @@ to call) for the specific marker through marker_probe_register() and can be
51activated by calling marker_arm(). Marker deactivation can be done by calling 51activated by calling marker_arm(). Marker deactivation can be done by calling
52marker_disarm() as many times as marker_arm() has been called. Removing a probe 52marker_disarm() as many times as marker_arm() has been called. Removing a probe
53is done through marker_probe_unregister(); it will disarm the probe. 53is done through marker_probe_unregister(); it will disarm the probe.
54marker_synchronize_unregister() must be called before the end of the module exit 54
55function to make sure there is no caller left using the probe. This, and the 55marker_synchronize_unregister() must be called between probe unregistration and
56fact that preemption is disabled around the probe call, make sure that probe 56the first occurrence of
57removal and module unload are safe. See the "Probe example" section below for a 57- the end of module exit function,
58sample probe module. 58 to make sure there is no caller left using the probe;
59- the free of any resource used by the probes,
60 to make sure the probes wont be accessing invalid data.
61This, and the fact that preemption is disabled around the probe call, make sure
62that probe removal and module unload are safe. See the "Probe example" section
63below for a sample probe module.
59 64
60The marker mechanism supports inserting multiple instances of the same marker. 65The marker mechanism supports inserting multiple instances of the same marker.
61Markers can be put in inline functions, inlined static functions, and 66Markers can be put in inline functions, inlined static functions, and
@@ -70,6 +75,20 @@ a printk warning which identifies the inconsistency:
70 75
71"Format mismatch for probe probe_name (format), marker (format)" 76"Format mismatch for probe probe_name (format), marker (format)"
72 77
78Another way to use markers is to simply define the marker without generating any
79function call to actually call into the marker. This is useful in combination
80with tracepoint probes in a scheme like this :
81
82void probe_tracepoint_name(unsigned int arg1, struct task_struct *tsk);
83
84DEFINE_MARKER_TP(marker_eventname, tracepoint_name, probe_tracepoint_name,
85 "arg1 %u pid %d");
86
87notrace void probe_tracepoint_name(unsigned int arg1, struct task_struct *tsk)
88{
89 struct marker *marker = &GET_MARKER(kernel_irq_entry);
90 /* write data to trace buffers ... */
91}
73 92
74* Probe / marker example 93* Probe / marker example
75 94
diff --git a/Documentation/tracepoints.txt b/Documentation/tracepoints.txt
index 5d354e167494..6f0a044f5b5e 100644
--- a/Documentation/tracepoints.txt
+++ b/Documentation/tracepoints.txt
@@ -3,28 +3,30 @@
3 Mathieu Desnoyers 3 Mathieu Desnoyers
4 4
5 5
6This document introduces Linux Kernel Tracepoints and their use. It provides 6This document introduces Linux Kernel Tracepoints and their use. It
7examples of how to insert tracepoints in the kernel and connect probe functions 7provides examples of how to insert tracepoints in the kernel and
8to them and provides some examples of probe functions. 8connect probe functions to them and provides some examples of probe
9functions.
9 10
10 11
11* Purpose of tracepoints 12* Purpose of tracepoints
12 13
13A tracepoint placed in code provides a hook to call a function (probe) that you 14A tracepoint placed in code provides a hook to call a function (probe)
14can provide at runtime. A tracepoint can be "on" (a probe is connected to it) or 15that you can provide at runtime. A tracepoint can be "on" (a probe is
15"off" (no probe is attached). When a tracepoint is "off" it has no effect, 16connected to it) or "off" (no probe is attached). When a tracepoint is
16except for adding a tiny time penalty (checking a condition for a branch) and 17"off" it has no effect, except for adding a tiny time penalty
17space penalty (adding a few bytes for the function call at the end of the 18(checking a condition for a branch) and space penalty (adding a few
18instrumented function and adds a data structure in a separate section). When a 19bytes for the function call at the end of the instrumented function
19tracepoint is "on", the function you provide is called each time the tracepoint 20and adds a data structure in a separate section). When a tracepoint
20is executed, in the execution context of the caller. When the function provided 21is "on", the function you provide is called each time the tracepoint
21ends its execution, it returns to the caller (continuing from the tracepoint 22is executed, in the execution context of the caller. When the function
22site). 23provided ends its execution, it returns to the caller (continuing from
24the tracepoint site).
23 25
24You can put tracepoints at important locations in the code. They are 26You can put tracepoints at important locations in the code. They are
25lightweight hooks that can pass an arbitrary number of parameters, 27lightweight hooks that can pass an arbitrary number of parameters,
26which prototypes are described in a tracepoint declaration placed in a header 28which prototypes are described in a tracepoint declaration placed in a
27file. 29header file.
28 30
29They can be used for tracing and performance accounting. 31They can be used for tracing and performance accounting.
30 32
@@ -42,14 +44,16 @@ In include/trace/subsys.h :
42 44
43#include <linux/tracepoint.h> 45#include <linux/tracepoint.h>
44 46
45DEFINE_TRACE(subsys_eventname, 47DECLARE_TRACE(subsys_eventname,
46 TPPTOTO(int firstarg, struct task_struct *p), 48 TPPROTO(int firstarg, struct task_struct *p),
47 TPARGS(firstarg, p)); 49 TPARGS(firstarg, p));
48 50
49In subsys/file.c (where the tracing statement must be added) : 51In subsys/file.c (where the tracing statement must be added) :
50 52
51#include <trace/subsys.h> 53#include <trace/subsys.h>
52 54
55DEFINE_TRACE(subsys_eventname);
56
53void somefct(void) 57void somefct(void)
54{ 58{
55 ... 59 ...
@@ -61,31 +65,41 @@ Where :
61- subsys_eventname is an identifier unique to your event 65- subsys_eventname is an identifier unique to your event
62 - subsys is the name of your subsystem. 66 - subsys is the name of your subsystem.
63 - eventname is the name of the event to trace. 67 - eventname is the name of the event to trace.
64- TPPTOTO(int firstarg, struct task_struct *p) is the prototype of the function
65 called by this tracepoint.
66- TPARGS(firstarg, p) are the parameters names, same as found in the prototype.
67 68
68Connecting a function (probe) to a tracepoint is done by providing a probe 69- TPPROTO(int firstarg, struct task_struct *p) is the prototype of the
69(function to call) for the specific tracepoint through 70 function called by this tracepoint.
70register_trace_subsys_eventname(). Removing a probe is done through
71unregister_trace_subsys_eventname(); it will remove the probe sure there is no
72caller left using the probe when it returns. Probe removal is preempt-safe
73because preemption is disabled around the probe call. See the "Probe example"
74section below for a sample probe module.
75
76The tracepoint mechanism supports inserting multiple instances of the same
77tracepoint, but a single definition must be made of a given tracepoint name over
78all the kernel to make sure no type conflict will occur. Name mangling of the
79tracepoints is done using the prototypes to make sure typing is correct.
80Verification of probe type correctness is done at the registration site by the
81compiler. Tracepoints can be put in inline functions, inlined static functions,
82and unrolled loops as well as regular functions.
83
84The naming scheme "subsys_event" is suggested here as a convention intended
85to limit collisions. Tracepoint names are global to the kernel: they are
86considered as being the same whether they are in the core kernel image or in
87modules.
88 71
72- TPARGS(firstarg, p) are the parameters names, same as found in the
73 prototype.
74
75Connecting a function (probe) to a tracepoint is done by providing a
76probe (function to call) for the specific tracepoint through
77register_trace_subsys_eventname(). Removing a probe is done through
78unregister_trace_subsys_eventname(); it will remove the probe.
79
80tracepoint_synchronize_unregister() must be called before the end of
81the module exit function to make sure there is no caller left using
82the probe. This, and the fact that preemption is disabled around the
83probe call, make sure that probe removal and module unload are safe.
84See the "Probe example" section below for a sample probe module.
85
86The tracepoint mechanism supports inserting multiple instances of the
87same tracepoint, but a single definition must be made of a given
88tracepoint name over all the kernel to make sure no type conflict will
89occur. Name mangling of the tracepoints is done using the prototypes
90to make sure typing is correct. Verification of probe type correctness
91is done at the registration site by the compiler. Tracepoints can be
92put in inline functions, inlined static functions, and unrolled loops
93as well as regular functions.
94
95The naming scheme "subsys_event" is suggested here as a convention
96intended to limit collisions. Tracepoint names are global to the
97kernel: they are considered as being the same whether they are in the
98core kernel image or in modules.
99
100If the tracepoint has to be used in kernel modules, an
101EXPORT_TRACEPOINT_SYMBOL_GPL() or EXPORT_TRACEPOINT_SYMBOL() can be
102used to export the defined tracepoints.
89 103
90* Probe / tracepoint example 104* Probe / tracepoint example
91 105
diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h
index b298f7a631e6..e5f2ae8362f7 100644
--- a/arch/powerpc/include/asm/ftrace.h
+++ b/arch/powerpc/include/asm/ftrace.h
@@ -7,7 +7,19 @@
7 7
8#ifndef __ASSEMBLY__ 8#ifndef __ASSEMBLY__
9extern void _mcount(void); 9extern void _mcount(void);
10#endif 10
11#ifdef CONFIG_DYNAMIC_FTRACE
12static inline unsigned long ftrace_call_adjust(unsigned long addr)
13{
14 /* reloction of mcount call site is the same as the address */
15 return addr;
16}
17
18struct dyn_arch_ftrace {
19 struct module *mod;
20};
21#endif /* CONFIG_DYNAMIC_FTRACE */
22#endif /* __ASSEMBLY__ */
11 23
12#endif 24#endif
13 25
diff --git a/arch/powerpc/include/asm/module.h b/arch/powerpc/include/asm/module.h
index e5f14b13ccf0..08454880a2c0 100644
--- a/arch/powerpc/include/asm/module.h
+++ b/arch/powerpc/include/asm/module.h
@@ -34,11 +34,19 @@ struct mod_arch_specific {
34#ifdef __powerpc64__ 34#ifdef __powerpc64__
35 unsigned int stubs_section; /* Index of stubs section in module */ 35 unsigned int stubs_section; /* Index of stubs section in module */
36 unsigned int toc_section; /* What section is the TOC? */ 36 unsigned int toc_section; /* What section is the TOC? */
37#else 37#ifdef CONFIG_DYNAMIC_FTRACE
38 unsigned long toc;
39 unsigned long tramp;
40#endif
41
42#else /* powerpc64 */
38 /* Indices of PLT sections within module. */ 43 /* Indices of PLT sections within module. */
39 unsigned int core_plt_section; 44 unsigned int core_plt_section;
40 unsigned int init_plt_section; 45 unsigned int init_plt_section;
46#ifdef CONFIG_DYNAMIC_FTRACE
47 unsigned long tramp;
41#endif 48#endif
49#endif /* powerpc64 */
42 50
43 /* List of BUG addresses, source line numbers and filenames */ 51 /* List of BUG addresses, source line numbers and filenames */
44 struct list_head bug_list; 52 struct list_head bug_list;
@@ -68,6 +76,12 @@ struct mod_arch_specific {
68# endif /* MODULE */ 76# endif /* MODULE */
69#endif 77#endif
70 78
79#ifdef CONFIG_DYNAMIC_FTRACE
80# ifdef MODULE
81 asm(".section .ftrace.tramp,\"ax\",@nobits; .align 3; .previous");
82# endif /* MODULE */
83#endif
84
71 85
72struct exception_table_entry; 86struct exception_table_entry;
73void sort_ex_table(struct exception_table_entry *start, 87void sort_ex_table(struct exception_table_entry *start,
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 92673b43858d..d17edb4a2f9d 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -17,6 +17,7 @@ ifdef CONFIG_FUNCTION_TRACER
17CFLAGS_REMOVE_cputable.o = -pg -mno-sched-epilog 17CFLAGS_REMOVE_cputable.o = -pg -mno-sched-epilog
18CFLAGS_REMOVE_prom_init.o = -pg -mno-sched-epilog 18CFLAGS_REMOVE_prom_init.o = -pg -mno-sched-epilog
19CFLAGS_REMOVE_btext.o = -pg -mno-sched-epilog 19CFLAGS_REMOVE_btext.o = -pg -mno-sched-epilog
20CFLAGS_REMOVE_prom.o = -pg -mno-sched-epilog
20 21
21ifdef CONFIG_DYNAMIC_FTRACE 22ifdef CONFIG_DYNAMIC_FTRACE
22# dynamic ftrace setup. 23# dynamic ftrace setup.
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 7ecc0d1855c3..6f7eb7e00c79 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -1162,39 +1162,17 @@ machine_check_in_rtas:
1162#ifdef CONFIG_DYNAMIC_FTRACE 1162#ifdef CONFIG_DYNAMIC_FTRACE
1163_GLOBAL(mcount) 1163_GLOBAL(mcount)
1164_GLOBAL(_mcount) 1164_GLOBAL(_mcount)
1165 stwu r1,-48(r1) 1165 /*
1166 stw r3, 12(r1) 1166 * It is required that _mcount on PPC32 must preserve the
1167 stw r4, 16(r1) 1167 * link register. But we have r0 to play with. We use r0
1168 stw r5, 20(r1) 1168 * to push the return address back to the caller of mcount
1169 stw r6, 24(r1) 1169 * into the ctr register, restore the link register and
1170 mflr r3 1170 * then jump back using the ctr register.
1171 stw r7, 28(r1) 1171 */
1172 mfcr r5 1172 mflr r0
1173 stw r8, 32(r1)
1174 stw r9, 36(r1)
1175 stw r10,40(r1)
1176 stw r3, 44(r1)
1177 stw r5, 8(r1)
1178 subi r3, r3, MCOUNT_INSN_SIZE
1179 .globl mcount_call
1180mcount_call:
1181 bl ftrace_stub
1182 nop
1183 lwz r6, 8(r1)
1184 lwz r0, 44(r1)
1185 lwz r3, 12(r1)
1186 mtctr r0 1173 mtctr r0
1187 lwz r4, 16(r1) 1174 lwz r0, 4(r1)
1188 mtcr r6
1189 lwz r5, 20(r1)
1190 lwz r6, 24(r1)
1191 lwz r0, 52(r1)
1192 lwz r7, 28(r1)
1193 lwz r8, 32(r1)
1194 mtlr r0 1175 mtlr r0
1195 lwz r9, 36(r1)
1196 lwz r10,40(r1)
1197 addi r1, r1, 48
1198 bctr 1176 bctr
1199 1177
1200_GLOBAL(ftrace_caller) 1178_GLOBAL(ftrace_caller)
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index e0bcf9354286..383ed6eb0085 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -894,18 +894,6 @@ _GLOBAL(enter_prom)
894#ifdef CONFIG_DYNAMIC_FTRACE 894#ifdef CONFIG_DYNAMIC_FTRACE
895_GLOBAL(mcount) 895_GLOBAL(mcount)
896_GLOBAL(_mcount) 896_GLOBAL(_mcount)
897 /* Taken from output of objdump from lib64/glibc */
898 mflr r3
899 stdu r1, -112(r1)
900 std r3, 128(r1)
901 subi r3, r3, MCOUNT_INSN_SIZE
902 .globl mcount_call
903mcount_call:
904 bl ftrace_stub
905 nop
906 ld r0, 128(r1)
907 mtlr r0
908 addi r1, r1, 112
909 blr 897 blr
910 898
911_GLOBAL(ftrace_caller) 899_GLOBAL(ftrace_caller)
diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c
index f4b006ed0ab1..5355244c99ff 100644
--- a/arch/powerpc/kernel/ftrace.c
+++ b/arch/powerpc/kernel/ftrace.c
@@ -9,22 +9,30 @@
9 9
10#include <linux/spinlock.h> 10#include <linux/spinlock.h>
11#include <linux/hardirq.h> 11#include <linux/hardirq.h>
12#include <linux/uaccess.h>
13#include <linux/module.h>
12#include <linux/ftrace.h> 14#include <linux/ftrace.h>
13#include <linux/percpu.h> 15#include <linux/percpu.h>
14#include <linux/init.h> 16#include <linux/init.h>
15#include <linux/list.h> 17#include <linux/list.h>
16 18
17#include <asm/cacheflush.h> 19#include <asm/cacheflush.h>
20#include <asm/code-patching.h>
18#include <asm/ftrace.h> 21#include <asm/ftrace.h>
19 22
23#if 0
24#define DEBUGP printk
25#else
26#define DEBUGP(fmt , ...) do { } while (0)
27#endif
20 28
21static unsigned int ftrace_nop = 0x60000000; 29static unsigned int ftrace_nop = PPC_NOP_INSTR;
22 30
23#ifdef CONFIG_PPC32 31#ifdef CONFIG_PPC32
24# define GET_ADDR(addr) addr 32# define GET_ADDR(addr) addr
25#else 33#else
26/* PowerPC64's functions are data that points to the functions */ 34/* PowerPC64's functions are data that points to the functions */
27# define GET_ADDR(addr) *(unsigned long *)addr 35# define GET_ADDR(addr) (*(unsigned long *)addr)
28#endif 36#endif
29 37
30 38
@@ -33,12 +41,12 @@ static unsigned int ftrace_calc_offset(long ip, long addr)
33 return (int)(addr - ip); 41 return (int)(addr - ip);
34} 42}
35 43
36unsigned char *ftrace_nop_replace(void) 44static unsigned char *ftrace_nop_replace(void)
37{ 45{
38 return (char *)&ftrace_nop; 46 return (char *)&ftrace_nop;
39} 47}
40 48
41unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) 49static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
42{ 50{
43 static unsigned int op; 51 static unsigned int op;
44 52
@@ -68,49 +76,422 @@ unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
68# define _ASM_PTR " .long " 76# define _ASM_PTR " .long "
69#endif 77#endif
70 78
71int 79static int
72ftrace_modify_code(unsigned long ip, unsigned char *old_code, 80ftrace_modify_code(unsigned long ip, unsigned char *old_code,
73 unsigned char *new_code) 81 unsigned char *new_code)
74{ 82{
75 unsigned replaced; 83 unsigned char replaced[MCOUNT_INSN_SIZE];
76 unsigned old = *(unsigned *)old_code;
77 unsigned new = *(unsigned *)new_code;
78 int faulted = 0;
79 84
80 /* 85 /*
81 * Note: Due to modules and __init, code can 86 * Note: Due to modules and __init, code can
82 * disappear and change, we need to protect against faulting 87 * disappear and change, we need to protect against faulting
83 * as well as code changing. 88 * as well as code changing. We do this by using the
89 * probe_kernel_* functions.
84 * 90 *
85 * No real locking needed, this code is run through 91 * No real locking needed, this code is run through
86 * kstop_machine. 92 * kstop_machine, or before SMP starts.
87 */ 93 */
88 asm volatile ( 94
89 "1: lwz %1, 0(%2)\n" 95 /* read the text we want to modify */
90 " cmpw %1, %5\n" 96 if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
91 " bne 2f\n" 97 return -EFAULT;
92 " stwu %3, 0(%2)\n" 98
93 "2:\n" 99 /* Make sure it is what we expect it to be */
94 ".section .fixup, \"ax\"\n" 100 if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0)
95 "3: li %0, 1\n" 101 return -EINVAL;
96 " b 2b\n" 102
97 ".previous\n" 103 /* replace the text with the new text */
98 ".section __ex_table,\"a\"\n" 104 if (probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE))
99 _ASM_ALIGN "\n" 105 return -EPERM;
100 _ASM_PTR "1b, 3b\n" 106
101 ".previous" 107 flush_icache_range(ip, ip + 8);
102 : "=r"(faulted), "=r"(replaced) 108
103 : "r"(ip), "r"(new), 109 return 0;
104 "0"(faulted), "r"(old) 110}
105 : "memory"); 111
106 112/*
107 if (replaced != old && replaced != new) 113 * Helper functions that are the same for both PPC64 and PPC32.
108 faulted = 2; 114 */
109 115static int test_24bit_addr(unsigned long ip, unsigned long addr)
110 if (!faulted) 116{
111 flush_icache_range(ip, ip + 8); 117
112 118 /* use the create_branch to verify that this offset can be branched */
113 return faulted; 119 return create_branch((unsigned int *)ip, addr, 0);
120}
121
122static int is_bl_op(unsigned int op)
123{
124 return (op & 0xfc000003) == 0x48000001;
125}
126
127static unsigned long find_bl_target(unsigned long ip, unsigned int op)
128{
129 static int offset;
130
131 offset = (op & 0x03fffffc);
132 /* make it signed */
133 if (offset & 0x02000000)
134 offset |= 0xfe000000;
135
136 return ip + (long)offset;
137}
138
139#ifdef CONFIG_PPC64
140static int
141__ftrace_make_nop(struct module *mod,
142 struct dyn_ftrace *rec, unsigned long addr)
143{
144 unsigned int op;
145 unsigned int jmp[5];
146 unsigned long ptr;
147 unsigned long ip = rec->ip;
148 unsigned long tramp;
149 int offset;
150
151 /* read where this goes */
152 if (probe_kernel_read(&op, (void *)ip, sizeof(int)))
153 return -EFAULT;
154
155 /* Make sure that that this is still a 24bit jump */
156 if (!is_bl_op(op)) {
157 printk(KERN_ERR "Not expected bl: opcode is %x\n", op);
158 return -EINVAL;
159 }
160
161 /* lets find where the pointer goes */
162 tramp = find_bl_target(ip, op);
163
164 /*
165 * On PPC64 the trampoline looks like:
166 * 0x3d, 0x82, 0x00, 0x00, addis r12,r2, <high>
167 * 0x39, 0x8c, 0x00, 0x00, addi r12,r12, <low>
168 * Where the bytes 2,3,6 and 7 make up the 32bit offset
169 * to the TOC that holds the pointer.
170 * to jump to.
171 * 0xf8, 0x41, 0x00, 0x28, std r2,40(r1)
172 * 0xe9, 0x6c, 0x00, 0x20, ld r11,32(r12)
173 * The actually address is 32 bytes from the offset
174 * into the TOC.
175 * 0xe8, 0x4c, 0x00, 0x28, ld r2,40(r12)
176 */
177
178 DEBUGP("ip:%lx jumps to %lx r2: %lx", ip, tramp, mod->arch.toc);
179
180 /* Find where the trampoline jumps to */
181 if (probe_kernel_read(jmp, (void *)tramp, sizeof(jmp))) {
182 printk(KERN_ERR "Failed to read %lx\n", tramp);
183 return -EFAULT;
184 }
185
186 DEBUGP(" %08x %08x", jmp[0], jmp[1]);
187
188 /* verify that this is what we expect it to be */
189 if (((jmp[0] & 0xffff0000) != 0x3d820000) ||
190 ((jmp[1] & 0xffff0000) != 0x398c0000) ||
191 (jmp[2] != 0xf8410028) ||
192 (jmp[3] != 0xe96c0020) ||
193 (jmp[4] != 0xe84c0028)) {
194 printk(KERN_ERR "Not a trampoline\n");
195 return -EINVAL;
196 }
197
198 offset = (unsigned)((unsigned short)jmp[0]) << 16 |
199 (unsigned)((unsigned short)jmp[1]);
200
201 DEBUGP(" %x ", offset);
202
203 /* get the address this jumps too */
204 tramp = mod->arch.toc + offset + 32;
205 DEBUGP("toc: %lx", tramp);
206
207 if (probe_kernel_read(jmp, (void *)tramp, 8)) {
208 printk(KERN_ERR "Failed to read %lx\n", tramp);
209 return -EFAULT;
210 }
211
212 DEBUGP(" %08x %08x\n", jmp[0], jmp[1]);
213
214 ptr = ((unsigned long)jmp[0] << 32) + jmp[1];
215
216 /* This should match what was called */
217 if (ptr != GET_ADDR(addr)) {
218 printk(KERN_ERR "addr does not match %lx\n", ptr);
219 return -EINVAL;
220 }
221
222 /*
223 * We want to nop the line, but the next line is
224 * 0xe8, 0x41, 0x00, 0x28 ld r2,40(r1)
225 * This needs to be turned to a nop too.
226 */
227 if (probe_kernel_read(&op, (void *)(ip+4), MCOUNT_INSN_SIZE))
228 return -EFAULT;
229
230 if (op != 0xe8410028) {
231 printk(KERN_ERR "Next line is not ld! (%08x)\n", op);
232 return -EINVAL;
233 }
234
235 /*
236 * Milton Miller pointed out that we can not blindly do nops.
237 * If a task was preempted when calling a trace function,
238 * the nops will remove the way to restore the TOC in r2
239 * and the r2 TOC will get corrupted.
240 */
241
242 /*
243 * Replace:
244 * bl <tramp> <==== will be replaced with "b 1f"
245 * ld r2,40(r1)
246 * 1:
247 */
248 op = 0x48000008; /* b +8 */
249
250 if (probe_kernel_write((void *)ip, &op, MCOUNT_INSN_SIZE))
251 return -EPERM;
252
253
254 flush_icache_range(ip, ip + 8);
255
256 return 0;
257}
258
259#else /* !PPC64 */
260static int
261__ftrace_make_nop(struct module *mod,
262 struct dyn_ftrace *rec, unsigned long addr)
263{
264 unsigned int op;
265 unsigned int jmp[4];
266 unsigned long ip = rec->ip;
267 unsigned long tramp;
268
269 if (probe_kernel_read(&op, (void *)ip, MCOUNT_INSN_SIZE))
270 return -EFAULT;
271
272 /* Make sure that that this is still a 24bit jump */
273 if (!is_bl_op(op)) {
274 printk(KERN_ERR "Not expected bl: opcode is %x\n", op);
275 return -EINVAL;
276 }
277
278 /* lets find where the pointer goes */
279 tramp = find_bl_target(ip, op);
280
281 /*
282 * On PPC32 the trampoline looks like:
283 * 0x3d, 0x60, 0x00, 0x00 lis r11,sym@ha
284 * 0x39, 0x6b, 0x00, 0x00 addi r11,r11,sym@l
285 * 0x7d, 0x69, 0x03, 0xa6 mtctr r11
286 * 0x4e, 0x80, 0x04, 0x20 bctr
287 */
288
289 DEBUGP("ip:%lx jumps to %lx", ip, tramp);
290
291 /* Find where the trampoline jumps to */
292 if (probe_kernel_read(jmp, (void *)tramp, sizeof(jmp))) {
293 printk(KERN_ERR "Failed to read %lx\n", tramp);
294 return -EFAULT;
295 }
296
297 DEBUGP(" %08x %08x ", jmp[0], jmp[1]);
298
299 /* verify that this is what we expect it to be */
300 if (((jmp[0] & 0xffff0000) != 0x3d600000) ||
301 ((jmp[1] & 0xffff0000) != 0x396b0000) ||
302 (jmp[2] != 0x7d6903a6) ||
303 (jmp[3] != 0x4e800420)) {
304 printk(KERN_ERR "Not a trampoline\n");
305 return -EINVAL;
306 }
307
308 tramp = (jmp[1] & 0xffff) |
309 ((jmp[0] & 0xffff) << 16);
310 if (tramp & 0x8000)
311 tramp -= 0x10000;
312
313 DEBUGP(" %x ", tramp);
314
315 if (tramp != addr) {
316 printk(KERN_ERR
317 "Trampoline location %08lx does not match addr\n",
318 tramp);
319 return -EINVAL;
320 }
321
322 op = PPC_NOP_INSTR;
323
324 if (probe_kernel_write((void *)ip, &op, MCOUNT_INSN_SIZE))
325 return -EPERM;
326
327 flush_icache_range(ip, ip + 8);
328
329 return 0;
330}
331#endif /* PPC64 */
332
333int ftrace_make_nop(struct module *mod,
334 struct dyn_ftrace *rec, unsigned long addr)
335{
336 unsigned char *old, *new;
337 unsigned long ip = rec->ip;
338
339 /*
340 * If the calling address is more that 24 bits away,
341 * then we had to use a trampoline to make the call.
342 * Otherwise just update the call site.
343 */
344 if (test_24bit_addr(ip, addr)) {
345 /* within range */
346 old = ftrace_call_replace(ip, addr);
347 new = ftrace_nop_replace();
348 return ftrace_modify_code(ip, old, new);
349 }
350
351 /*
352 * Out of range jumps are called from modules.
353 * We should either already have a pointer to the module
354 * or it has been passed in.
355 */
356 if (!rec->arch.mod) {
357 if (!mod) {
358 printk(KERN_ERR "No module loaded addr=%lx\n",
359 addr);
360 return -EFAULT;
361 }
362 rec->arch.mod = mod;
363 } else if (mod) {
364 if (mod != rec->arch.mod) {
365 printk(KERN_ERR
366 "Record mod %p not equal to passed in mod %p\n",
367 rec->arch.mod, mod);
368 return -EINVAL;
369 }
370 /* nothing to do if mod == rec->arch.mod */
371 } else
372 mod = rec->arch.mod;
373
374 return __ftrace_make_nop(mod, rec, addr);
375
376}
377
378#ifdef CONFIG_PPC64
379static int
380__ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
381{
382 unsigned int op[2];
383 unsigned long ip = rec->ip;
384
385 /* read where this goes */
386 if (probe_kernel_read(op, (void *)ip, MCOUNT_INSN_SIZE * 2))
387 return -EFAULT;
388
389 /*
390 * It should be pointing to two nops or
391 * b +8; ld r2,40(r1)
392 */
393 if (((op[0] != 0x48000008) || (op[1] != 0xe8410028)) &&
394 ((op[0] != PPC_NOP_INSTR) || (op[1] != PPC_NOP_INSTR))) {
395 printk(KERN_ERR "Expected NOPs but have %x %x\n", op[0], op[1]);
396 return -EINVAL;
397 }
398
399 /* If we never set up a trampoline to ftrace_caller, then bail */
400 if (!rec->arch.mod->arch.tramp) {
401 printk(KERN_ERR "No ftrace trampoline\n");
402 return -EINVAL;
403 }
404
405 /* create the branch to the trampoline */
406 op[0] = create_branch((unsigned int *)ip,
407 rec->arch.mod->arch.tramp, BRANCH_SET_LINK);
408 if (!op[0]) {
409 printk(KERN_ERR "REL24 out of range!\n");
410 return -EINVAL;
411 }
412
413 /* ld r2,40(r1) */
414 op[1] = 0xe8410028;
415
416 DEBUGP("write to %lx\n", rec->ip);
417
418 if (probe_kernel_write((void *)ip, op, MCOUNT_INSN_SIZE * 2))
419 return -EPERM;
420
421 flush_icache_range(ip, ip + 8);
422
423 return 0;
424}
425#else
426static int
427__ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
428{
429 unsigned int op;
430 unsigned long ip = rec->ip;
431
432 /* read where this goes */
433 if (probe_kernel_read(&op, (void *)ip, MCOUNT_INSN_SIZE))
434 return -EFAULT;
435
436 /* It should be pointing to a nop */
437 if (op != PPC_NOP_INSTR) {
438 printk(KERN_ERR "Expected NOP but have %x\n", op);
439 return -EINVAL;
440 }
441
442 /* If we never set up a trampoline to ftrace_caller, then bail */
443 if (!rec->arch.mod->arch.tramp) {
444 printk(KERN_ERR "No ftrace trampoline\n");
445 return -EINVAL;
446 }
447
448 /* create the branch to the trampoline */
449 op = create_branch((unsigned int *)ip,
450 rec->arch.mod->arch.tramp, BRANCH_SET_LINK);
451 if (!op) {
452 printk(KERN_ERR "REL24 out of range!\n");
453 return -EINVAL;
454 }
455
456 DEBUGP("write to %lx\n", rec->ip);
457
458 if (probe_kernel_write((void *)ip, &op, MCOUNT_INSN_SIZE))
459 return -EPERM;
460
461 flush_icache_range(ip, ip + 8);
462
463 return 0;
464}
465#endif /* CONFIG_PPC64 */
466
467int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
468{
469 unsigned char *old, *new;
470 unsigned long ip = rec->ip;
471
472 /*
473 * If the calling address is more that 24 bits away,
474 * then we had to use a trampoline to make the call.
475 * Otherwise just update the call site.
476 */
477 if (test_24bit_addr(ip, addr)) {
478 /* within range */
479 old = ftrace_nop_replace();
480 new = ftrace_call_replace(ip, addr);
481 return ftrace_modify_code(ip, old, new);
482 }
483
484 /*
485 * Out of range jumps are called from modules.
486 * Being that we are converting from nop, it had better
487 * already have a module defined.
488 */
489 if (!rec->arch.mod) {
490 printk(KERN_ERR "No module loaded\n");
491 return -EINVAL;
492 }
493
494 return __ftrace_make_call(rec, addr);
114} 495}
115 496
116int ftrace_update_ftrace_func(ftrace_func_t func) 497int ftrace_update_ftrace_func(ftrace_func_t func)
@@ -128,10 +509,10 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
128 509
129int __init ftrace_dyn_arch_init(void *data) 510int __init ftrace_dyn_arch_init(void *data)
130{ 511{
131 /* This is running in kstop_machine */ 512 /* caller expects data to be zero */
513 unsigned long *p = data;
132 514
133 ftrace_mcount_set(data); 515 *p = 0;
134 516
135 return 0; 517 return 0;
136} 518}
137
diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c
index 31982d05d81a..88d9c1d5e5fb 100644
--- a/arch/powerpc/kernel/idle.c
+++ b/arch/powerpc/kernel/idle.c
@@ -69,10 +69,15 @@ void cpu_idle(void)
69 smp_mb(); 69 smp_mb();
70 local_irq_disable(); 70 local_irq_disable();
71 71
72 /* Don't trace irqs off for idle */
73 stop_critical_timings();
74
72 /* check again after disabling irqs */ 75 /* check again after disabling irqs */
73 if (!need_resched() && !cpu_should_die()) 76 if (!need_resched() && !cpu_should_die())
74 ppc_md.power_save(); 77 ppc_md.power_save();
75 78
79 start_critical_timings();
80
76 local_irq_enable(); 81 local_irq_enable();
77 set_thread_flag(TIF_POLLING_NRFLAG); 82 set_thread_flag(TIF_POLLING_NRFLAG);
78 83
diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c
index 2df91a03462a..f832773fc28e 100644
--- a/arch/powerpc/kernel/module_32.c
+++ b/arch/powerpc/kernel/module_32.c
@@ -22,6 +22,7 @@
22#include <linux/fs.h> 22#include <linux/fs.h>
23#include <linux/string.h> 23#include <linux/string.h>
24#include <linux/kernel.h> 24#include <linux/kernel.h>
25#include <linux/ftrace.h>
25#include <linux/cache.h> 26#include <linux/cache.h>
26#include <linux/bug.h> 27#include <linux/bug.h>
27#include <linux/sort.h> 28#include <linux/sort.h>
@@ -53,6 +54,9 @@ static unsigned int count_relocs(const Elf32_Rela *rela, unsigned int num)
53 r_addend = rela[i].r_addend; 54 r_addend = rela[i].r_addend;
54 } 55 }
55 56
57#ifdef CONFIG_DYNAMIC_FTRACE
58 _count_relocs++; /* add one for ftrace_caller */
59#endif
56 return _count_relocs; 60 return _count_relocs;
57} 61}
58 62
@@ -306,5 +310,11 @@ int apply_relocate_add(Elf32_Shdr *sechdrs,
306 return -ENOEXEC; 310 return -ENOEXEC;
307 } 311 }
308 } 312 }
313#ifdef CONFIG_DYNAMIC_FTRACE
314 module->arch.tramp =
315 do_plt_call(module->module_core,
316 (unsigned long)ftrace_caller,
317 sechdrs, module);
318#endif
309 return 0; 319 return 0;
310} 320}
diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index 1af2377e4992..8992b031a7b6 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -20,6 +20,7 @@
20#include <linux/moduleloader.h> 20#include <linux/moduleloader.h>
21#include <linux/err.h> 21#include <linux/err.h>
22#include <linux/vmalloc.h> 22#include <linux/vmalloc.h>
23#include <linux/ftrace.h>
23#include <linux/bug.h> 24#include <linux/bug.h>
24#include <asm/module.h> 25#include <asm/module.h>
25#include <asm/firmware.h> 26#include <asm/firmware.h>
@@ -163,6 +164,11 @@ static unsigned long get_stubs_size(const Elf64_Ehdr *hdr,
163 } 164 }
164 } 165 }
165 166
167#ifdef CONFIG_DYNAMIC_FTRACE
168 /* make the trampoline to the ftrace_caller */
169 relocs++;
170#endif
171
166 DEBUGP("Looks like a total of %lu stubs, max\n", relocs); 172 DEBUGP("Looks like a total of %lu stubs, max\n", relocs);
167 return relocs * sizeof(struct ppc64_stub_entry); 173 return relocs * sizeof(struct ppc64_stub_entry);
168} 174}
@@ -441,5 +447,12 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
441 } 447 }
442 } 448 }
443 449
450#ifdef CONFIG_DYNAMIC_FTRACE
451 me->arch.toc = my_r2(sechdrs, me);
452 me->arch.tramp = stub_for_addr(sechdrs,
453 (unsigned long)ftrace_caller,
454 me);
455#endif
456
444 return 0; 457 return 0;
445} 458}
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index d69912c07ce7..8db35278a4b4 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -6,6 +6,9 @@ ifeq ($(CONFIG_PPC64),y)
6EXTRA_CFLAGS += -mno-minimal-toc 6EXTRA_CFLAGS += -mno-minimal-toc
7endif 7endif
8 8
9CFLAGS_REMOVE_code-patching.o = -pg
10CFLAGS_REMOVE_feature-fixups.o = -pg
11
9obj-y := string.o alloc.o \ 12obj-y := string.o alloc.o \
10 checksum_$(CONFIG_WORD_SIZE).o 13 checksum_$(CONFIG_WORD_SIZE).o
11obj-$(CONFIG_PPC32) += div64.o copy_32.o crtsavres.o 14obj-$(CONFIG_PPC32) += div64.o copy_32.o crtsavres.o
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index a2ae4c05f46f..7b17f9d72ba1 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -31,11 +31,14 @@ config X86
31 select HAVE_FTRACE_MCOUNT_RECORD 31 select HAVE_FTRACE_MCOUNT_RECORD
32 select HAVE_DYNAMIC_FTRACE 32 select HAVE_DYNAMIC_FTRACE
33 select HAVE_FUNCTION_TRACER 33 select HAVE_FUNCTION_TRACER
34 select HAVE_FUNCTION_GRAPH_TRACER
35 select HAVE_FUNCTION_TRACE_MCOUNT_TEST
34 select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) 36 select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64)
35 select HAVE_ARCH_KGDB if !X86_VOYAGER 37 select HAVE_ARCH_KGDB if !X86_VOYAGER
36 select HAVE_ARCH_TRACEHOOK 38 select HAVE_ARCH_TRACEHOOK
37 select HAVE_GENERIC_DMA_COHERENT if X86_32 39 select HAVE_GENERIC_DMA_COHERENT if X86_32
38 select HAVE_EFFICIENT_UNALIGNED_ACCESS 40 select HAVE_EFFICIENT_UNALIGNED_ACCESS
41 select USER_STACKTRACE_SUPPORT
39 42
40config ARCH_DEFCONFIG 43config ARCH_DEFCONFIG
41 string 44 string
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 8e99073b9e0f..85a78575956c 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -515,12 +515,12 @@ config CPU_SUP_UMC_32
515config X86_DS 515config X86_DS
516 def_bool X86_PTRACE_BTS 516 def_bool X86_PTRACE_BTS
517 depends on X86_DEBUGCTLMSR 517 depends on X86_DEBUGCTLMSR
518 select HAVE_HW_BRANCH_TRACER
518 519
519config X86_PTRACE_BTS 520config X86_PTRACE_BTS
520 bool "Branch Trace Store" 521 bool "Branch Trace Store"
521 default y 522 default y
522 depends on X86_DEBUGCTLMSR 523 depends on X86_DEBUGCTLMSR
523 depends on BROKEN
524 help 524 help
525 This adds a ptrace interface to the hardware's branch trace store. 525 This adds a ptrace interface to the hardware's branch trace store.
526 526
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 4ee768660f75..10d6cc3fd052 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -174,14 +174,10 @@ config IOMMU_LEAK
174 Add a simple leak tracer to the IOMMU code. This is useful when you 174 Add a simple leak tracer to the IOMMU code. This is useful when you
175 are debugging a buggy device driver that leaks IOMMU mappings. 175 are debugging a buggy device driver that leaks IOMMU mappings.
176 176
177config MMIOTRACE_HOOKS
178 bool
179
180config MMIOTRACE 177config MMIOTRACE
181 bool "Memory mapped IO tracing" 178 bool "Memory mapped IO tracing"
182 depends on DEBUG_KERNEL && PCI 179 depends on DEBUG_KERNEL && PCI
183 select TRACING 180 select TRACING
184 select MMIOTRACE_HOOKS
185 help 181 help
186 Mmiotrace traces Memory Mapped I/O access and is meant for 182 Mmiotrace traces Memory Mapped I/O access and is meant for
187 debugging and reverse engineering. It is called from the ioremap 183 debugging and reverse engineering. It is called from the ioremap
diff --git a/arch/x86/include/asm/ds.h b/arch/x86/include/asm/ds.h
index a95008457ea4..a8f672ba100c 100644
--- a/arch/x86/include/asm/ds.h
+++ b/arch/x86/include/asm/ds.h
@@ -6,14 +6,13 @@
6 * precise-event based sampling (PEBS). 6 * precise-event based sampling (PEBS).
7 * 7 *
8 * It manages: 8 * It manages:
9 * - per-thread and per-cpu allocation of BTS and PEBS 9 * - DS and BTS hardware configuration
10 * - buffer memory allocation (optional) 10 * - buffer overflow handling (to be done)
11 * - buffer overflow handling
12 * - buffer access 11 * - buffer access
13 * 12 *
14 * It assumes: 13 * It does not do:
15 * - get_task_struct on all parameter tasks 14 * - security checking (is the caller allowed to trace the task)
16 * - current is allowed to trace parameter tasks 15 * - buffer allocation (memory accounting)
17 * 16 *
18 * 17 *
19 * Copyright (C) 2007-2008 Intel Corporation. 18 * Copyright (C) 2007-2008 Intel Corporation.
@@ -26,11 +25,51 @@
26 25
27#include <linux/types.h> 26#include <linux/types.h>
28#include <linux/init.h> 27#include <linux/init.h>
28#include <linux/err.h>
29 29
30 30
31#ifdef CONFIG_X86_DS 31#ifdef CONFIG_X86_DS
32 32
33struct task_struct; 33struct task_struct;
34struct ds_context;
35struct ds_tracer;
36struct bts_tracer;
37struct pebs_tracer;
38
39typedef void (*bts_ovfl_callback_t)(struct bts_tracer *);
40typedef void (*pebs_ovfl_callback_t)(struct pebs_tracer *);
41
42
43/*
44 * A list of features plus corresponding macros to talk about them in
45 * the ds_request function's flags parameter.
46 *
47 * We use the enum to index an array of corresponding control bits;
48 * we use the macro to index a flags bit-vector.
49 */
50enum ds_feature {
51 dsf_bts = 0,
52 dsf_bts_kernel,
53#define BTS_KERNEL (1 << dsf_bts_kernel)
54 /* trace kernel-mode branches */
55
56 dsf_bts_user,
57#define BTS_USER (1 << dsf_bts_user)
58 /* trace user-mode branches */
59
60 dsf_bts_overflow,
61 dsf_bts_max,
62 dsf_pebs = dsf_bts_max,
63
64 dsf_pebs_max,
65 dsf_ctl_max = dsf_pebs_max,
66 dsf_bts_timestamps = dsf_ctl_max,
67#define BTS_TIMESTAMPS (1 << dsf_bts_timestamps)
68 /* add timestamps into BTS trace */
69
70#define BTS_USER_FLAGS (BTS_KERNEL | BTS_USER | BTS_TIMESTAMPS)
71};
72
34 73
35/* 74/*
36 * Request BTS or PEBS 75 * Request BTS or PEBS
@@ -38,163 +77,169 @@ struct task_struct;
38 * Due to alignement constraints, the actual buffer may be slightly 77 * Due to alignement constraints, the actual buffer may be slightly
39 * smaller than the requested or provided buffer. 78 * smaller than the requested or provided buffer.
40 * 79 *
41 * Returns 0 on success; -Eerrno otherwise 80 * Returns a pointer to a tracer structure on success, or
81 * ERR_PTR(errcode) on failure.
82 *
83 * The interrupt threshold is independent from the overflow callback
84 * to allow users to use their own overflow interrupt handling mechanism.
42 * 85 *
43 * task: the task to request recording for; 86 * task: the task to request recording for;
44 * NULL for per-cpu recording on the current cpu 87 * NULL for per-cpu recording on the current cpu
45 * base: the base pointer for the (non-pageable) buffer; 88 * base: the base pointer for the (non-pageable) buffer;
46 * NULL if buffer allocation requested 89 * size: the size of the provided buffer in bytes
47 * size: the size of the requested or provided buffer
48 * ovfl: pointer to a function to be called on buffer overflow; 90 * ovfl: pointer to a function to be called on buffer overflow;
49 * NULL if cyclic buffer requested 91 * NULL if cyclic buffer requested
92 * th: the interrupt threshold in records from the end of the buffer;
93 * -1 if no interrupt threshold is requested.
94 * flags: a bit-mask of the above flags
50 */ 95 */
51typedef void (*ds_ovfl_callback_t)(struct task_struct *); 96extern struct bts_tracer *ds_request_bts(struct task_struct *task,
52extern int ds_request_bts(struct task_struct *task, void *base, size_t size, 97 void *base, size_t size,
53 ds_ovfl_callback_t ovfl); 98 bts_ovfl_callback_t ovfl,
54extern int ds_request_pebs(struct task_struct *task, void *base, size_t size, 99 size_t th, unsigned int flags);
55 ds_ovfl_callback_t ovfl); 100extern struct pebs_tracer *ds_request_pebs(struct task_struct *task,
101 void *base, size_t size,
102 pebs_ovfl_callback_t ovfl,
103 size_t th, unsigned int flags);
56 104
57/* 105/*
58 * Release BTS or PEBS resources 106 * Release BTS or PEBS resources
107 * Suspend and resume BTS or PEBS tracing
59 * 108 *
60 * Frees buffers allocated on ds_request. 109 * tracer: the tracer handle returned from ds_request_~()
61 *
62 * Returns 0 on success; -Eerrno otherwise
63 *
64 * task: the task to release resources for;
65 * NULL to release resources for the current cpu
66 */ 110 */
67extern int ds_release_bts(struct task_struct *task); 111extern void ds_release_bts(struct bts_tracer *tracer);
68extern int ds_release_pebs(struct task_struct *task); 112extern void ds_suspend_bts(struct bts_tracer *tracer);
113extern void ds_resume_bts(struct bts_tracer *tracer);
114extern void ds_release_pebs(struct pebs_tracer *tracer);
115extern void ds_suspend_pebs(struct pebs_tracer *tracer);
116extern void ds_resume_pebs(struct pebs_tracer *tracer);
69 117
70/*
71 * Return the (array) index of the write pointer.
72 * (assuming an array of BTS/PEBS records)
73 *
74 * Returns -Eerrno on error
75 *
76 * task: the task to access;
77 * NULL to access the current cpu
78 * pos (out): if not NULL, will hold the result
79 */
80extern int ds_get_bts_index(struct task_struct *task, size_t *pos);
81extern int ds_get_pebs_index(struct task_struct *task, size_t *pos);
82 118
83/* 119/*
84 * Return the (array) index one record beyond the end of the array. 120 * The raw DS buffer state as it is used for BTS and PEBS recording.
85 * (assuming an array of BTS/PEBS records)
86 * 121 *
87 * Returns -Eerrno on error 122 * This is the low-level, arch-dependent interface for working
88 * 123 * directly on the raw trace data.
89 * task: the task to access;
90 * NULL to access the current cpu
91 * pos (out): if not NULL, will hold the result
92 */ 124 */
93extern int ds_get_bts_end(struct task_struct *task, size_t *pos); 125struct ds_trace {
94extern int ds_get_pebs_end(struct task_struct *task, size_t *pos); 126 /* the number of bts/pebs records */
127 size_t n;
128 /* the size of a bts/pebs record in bytes */
129 size_t size;
130 /* pointers into the raw buffer:
131 - to the first entry */
132 void *begin;
133 /* - one beyond the last entry */
134 void *end;
135 /* - one beyond the newest entry */
136 void *top;
137 /* - the interrupt threshold */
138 void *ith;
139 /* flags given on ds_request() */
140 unsigned int flags;
141};
95 142
96/* 143/*
97 * Provide a pointer to the BTS/PEBS record at parameter index. 144 * An arch-independent view on branch trace data.
98 * (assuming an array of BTS/PEBS records)
99 *
100 * The pointer points directly into the buffer. The user is
101 * responsible for copying the record.
102 *
103 * Returns the size of a single record on success; -Eerrno on error
104 *
105 * task: the task to access;
106 * NULL to access the current cpu
107 * index: the index of the requested record
108 * record (out): pointer to the requested record
109 */ 145 */
110extern int ds_access_bts(struct task_struct *task, 146enum bts_qualifier {
111 size_t index, const void **record); 147 bts_invalid,
112extern int ds_access_pebs(struct task_struct *task, 148#define BTS_INVALID bts_invalid
113 size_t index, const void **record); 149
150 bts_branch,
151#define BTS_BRANCH bts_branch
152
153 bts_task_arrives,
154#define BTS_TASK_ARRIVES bts_task_arrives
155
156 bts_task_departs,
157#define BTS_TASK_DEPARTS bts_task_departs
158
159 bts_qual_bit_size = 4,
160 bts_qual_max = (1 << bts_qual_bit_size),
161};
162
163struct bts_struct {
164 __u64 qualifier;
165 union {
166 /* BTS_BRANCH */
167 struct {
168 __u64 from;
169 __u64 to;
170 } lbr;
171 /* BTS_TASK_ARRIVES or BTS_TASK_DEPARTS */
172 struct {
173 __u64 jiffies;
174 pid_t pid;
175 } timestamp;
176 } variant;
177};
114 178
115/*
116 * Write one or more BTS/PEBS records at the write pointer index and
117 * advance the write pointer.
118 *
119 * If size is not a multiple of the record size, trailing bytes are
120 * zeroed out.
121 *
122 * May result in one or more overflow notifications.
123 *
124 * If called during overflow handling, that is, with index >=
125 * interrupt threshold, the write will wrap around.
126 *
127 * An overflow notification is given if and when the interrupt
128 * threshold is reached during or after the write.
129 *
130 * Returns the number of bytes written or -Eerrno.
131 *
132 * task: the task to access;
133 * NULL to access the current cpu
134 * buffer: the buffer to write
135 * size: the size of the buffer
136 */
137extern int ds_write_bts(struct task_struct *task,
138 const void *buffer, size_t size);
139extern int ds_write_pebs(struct task_struct *task,
140 const void *buffer, size_t size);
141 179
142/* 180/*
143 * Same as ds_write_bts/pebs, but omit ownership checks. 181 * The BTS state.
144 * 182 *
145 * This is needed to have some other task than the owner of the 183 * This gives access to the raw DS state and adds functions to provide
146 * BTS/PEBS buffer or the parameter task itself write into the 184 * an arch-independent view of the BTS data.
147 * respective buffer.
148 */ 185 */
149extern int ds_unchecked_write_bts(struct task_struct *task, 186struct bts_trace {
150 const void *buffer, size_t size); 187 struct ds_trace ds;
151extern int ds_unchecked_write_pebs(struct task_struct *task, 188
152 const void *buffer, size_t size); 189 int (*read)(struct bts_tracer *tracer, const void *at,
190 struct bts_struct *out);
191 int (*write)(struct bts_tracer *tracer, const struct bts_struct *in);
192};
193
153 194
154/* 195/*
155 * Reset the write pointer of the BTS/PEBS buffer. 196 * The PEBS state.
156 * 197 *
157 * Returns 0 on success; -Eerrno on error 198 * This gives access to the raw DS state and the PEBS-specific counter
158 * 199 * reset value.
159 * task: the task to access;
160 * NULL to access the current cpu
161 */ 200 */
162extern int ds_reset_bts(struct task_struct *task); 201struct pebs_trace {
163extern int ds_reset_pebs(struct task_struct *task); 202 struct ds_trace ds;
203
204 /* the PEBS reset value */
205 unsigned long long reset_value;
206};
207
164 208
165/* 209/*
166 * Clear the BTS/PEBS buffer and reset the write pointer. 210 * Read the BTS or PEBS trace.
167 * The entire buffer will be zeroed out.
168 * 211 *
169 * Returns 0 on success; -Eerrno on error 212 * Returns a view on the trace collected for the parameter tracer.
213 *
214 * The view remains valid as long as the traced task is not running or
215 * the tracer is suspended.
216 * Writes into the trace buffer are not reflected.
170 * 217 *
171 * task: the task to access; 218 * tracer: the tracer handle returned from ds_request_~()
172 * NULL to access the current cpu
173 */ 219 */
174extern int ds_clear_bts(struct task_struct *task); 220extern const struct bts_trace *ds_read_bts(struct bts_tracer *tracer);
175extern int ds_clear_pebs(struct task_struct *task); 221extern const struct pebs_trace *ds_read_pebs(struct pebs_tracer *tracer);
222
176 223
177/* 224/*
178 * Provide the PEBS counter reset value. 225 * Reset the write pointer of the BTS/PEBS buffer.
179 * 226 *
180 * Returns 0 on success; -Eerrno on error 227 * Returns 0 on success; -Eerrno on error
181 * 228 *
182 * task: the task to access; 229 * tracer: the tracer handle returned from ds_request_~()
183 * NULL to access the current cpu
184 * value (out): the counter reset value
185 */ 230 */
186extern int ds_get_pebs_reset(struct task_struct *task, u64 *value); 231extern int ds_reset_bts(struct bts_tracer *tracer);
232extern int ds_reset_pebs(struct pebs_tracer *tracer);
187 233
188/* 234/*
189 * Set the PEBS counter reset value. 235 * Set the PEBS counter reset value.
190 * 236 *
191 * Returns 0 on success; -Eerrno on error 237 * Returns 0 on success; -Eerrno on error
192 * 238 *
193 * task: the task to access; 239 * tracer: the tracer handle returned from ds_request_pebs()
194 * NULL to access the current cpu
195 * value: the new counter reset value 240 * value: the new counter reset value
196 */ 241 */
197extern int ds_set_pebs_reset(struct task_struct *task, u64 value); 242extern int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value);
198 243
199/* 244/*
200 * Initialization 245 * Initialization
@@ -202,39 +247,26 @@ extern int ds_set_pebs_reset(struct task_struct *task, u64 value);
202struct cpuinfo_x86; 247struct cpuinfo_x86;
203extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *); 248extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *);
204 249
205
206
207/* 250/*
208 * The DS context - part of struct thread_struct. 251 * Context switch work
209 */ 252 */
210struct ds_context { 253extern void ds_switch_to(struct task_struct *prev, struct task_struct *next);
211 /* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */
212 unsigned char *ds;
213 /* the owner of the BTS and PEBS configuration, respectively */
214 struct task_struct *owner[2];
215 /* buffer overflow notification function for BTS and PEBS */
216 ds_ovfl_callback_t callback[2];
217 /* the original buffer address */
218 void *buffer[2];
219 /* the number of allocated pages for on-request allocated buffers */
220 unsigned int pages[2];
221 /* use count */
222 unsigned long count;
223 /* a pointer to the context location inside the thread_struct
224 * or the per_cpu context array */
225 struct ds_context **this;
226 /* a pointer to the task owning this context, or NULL, if the
227 * context is owned by a cpu */
228 struct task_struct *task;
229};
230 254
231/* called by exit_thread() to free leftover contexts */ 255/*
232extern void ds_free(struct ds_context *context); 256 * Task clone/init and cleanup work
257 */
258extern void ds_copy_thread(struct task_struct *tsk, struct task_struct *father);
259extern void ds_exit_thread(struct task_struct *tsk);
233 260
234#else /* CONFIG_X86_DS */ 261#else /* CONFIG_X86_DS */
235 262
236struct cpuinfo_x86; 263struct cpuinfo_x86;
237static inline void __cpuinit ds_init_intel(struct cpuinfo_x86 *ignored) {} 264static inline void __cpuinit ds_init_intel(struct cpuinfo_x86 *ignored) {}
265static inline void ds_switch_to(struct task_struct *prev,
266 struct task_struct *next) {}
267static inline void ds_copy_thread(struct task_struct *tsk,
268 struct task_struct *father) {}
269static inline void ds_exit_thread(struct task_struct *tsk) {}
238 270
239#endif /* CONFIG_X86_DS */ 271#endif /* CONFIG_X86_DS */
240#endif /* _ASM_X86_DS_H */ 272#endif /* _ASM_X86_DS_H */
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 9e8bc29b8b17..b55b4a7fbefd 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -1,6 +1,33 @@
1#ifndef _ASM_X86_FTRACE_H 1#ifndef _ASM_X86_FTRACE_H
2#define _ASM_X86_FTRACE_H 2#define _ASM_X86_FTRACE_H
3 3
4#ifdef __ASSEMBLY__
5
6 .macro MCOUNT_SAVE_FRAME
7 /* taken from glibc */
8 subq $0x38, %rsp
9 movq %rax, (%rsp)
10 movq %rcx, 8(%rsp)
11 movq %rdx, 16(%rsp)
12 movq %rsi, 24(%rsp)
13 movq %rdi, 32(%rsp)
14 movq %r8, 40(%rsp)
15 movq %r9, 48(%rsp)
16 .endm
17
18 .macro MCOUNT_RESTORE_FRAME
19 movq 48(%rsp), %r9
20 movq 40(%rsp), %r8
21 movq 32(%rsp), %rdi
22 movq 24(%rsp), %rsi
23 movq 16(%rsp), %rdx
24 movq 8(%rsp), %rcx
25 movq (%rsp), %rax
26 addq $0x38, %rsp
27 .endm
28
29#endif
30
4#ifdef CONFIG_FUNCTION_TRACER 31#ifdef CONFIG_FUNCTION_TRACER
5#define MCOUNT_ADDR ((long)(mcount)) 32#define MCOUNT_ADDR ((long)(mcount))
6#define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */ 33#define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */
@@ -17,8 +44,40 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr)
17 */ 44 */
18 return addr - 1; 45 return addr - 1;
19} 46}
20#endif
21 47
48#ifdef CONFIG_DYNAMIC_FTRACE
49
50struct dyn_arch_ftrace {
51 /* No extra data needed for x86 */
52};
53
54#endif /* CONFIG_DYNAMIC_FTRACE */
55#endif /* __ASSEMBLY__ */
22#endif /* CONFIG_FUNCTION_TRACER */ 56#endif /* CONFIG_FUNCTION_TRACER */
23 57
58#ifdef CONFIG_FUNCTION_GRAPH_TRACER
59
60#ifndef __ASSEMBLY__
61
62/*
63 * Stack of return addresses for functions
64 * of a thread.
65 * Used in struct thread_info
66 */
67struct ftrace_ret_stack {
68 unsigned long ret;
69 unsigned long func;
70 unsigned long long calltime;
71};
72
73/*
74 * Primary handler of a function return.
75 * It relays on ftrace_return_to_handler.
76 * Defined in entry_32/64.S
77 */
78extern void return_to_handler(void);
79
80#endif /* __ASSEMBLY__ */
81#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
82
24#endif /* _ASM_X86_FTRACE_H */ 83#endif /* _ASM_X86_FTRACE_H */
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index 4640ddd58fb9..638bf6241807 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -85,7 +85,8 @@ static inline void native_write_msr(unsigned int msr,
85 asm volatile("wrmsr" : : "c" (msr), "a"(low), "d" (high) : "memory"); 85 asm volatile("wrmsr" : : "c" (msr), "a"(low), "d" (high) : "memory");
86} 86}
87 87
88static inline int native_write_msr_safe(unsigned int msr, 88/* Can be uninlined because referenced by paravirt */
89notrace static inline int native_write_msr_safe(unsigned int msr,
89 unsigned low, unsigned high) 90 unsigned low, unsigned high)
90{ 91{
91 int err; 92 int err;
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index a570eafa4755..091cd8855f2e 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -756,6 +756,19 @@ extern void switch_to_new_gdt(void);
756extern void cpu_init(void); 756extern void cpu_init(void);
757extern void init_gdt(int cpu); 757extern void init_gdt(int cpu);
758 758
759static inline unsigned long get_debugctlmsr(void)
760{
761 unsigned long debugctlmsr = 0;
762
763#ifndef CONFIG_X86_DEBUGCTLMSR
764 if (boot_cpu_data.x86 < 6)
765 return 0;
766#endif
767 rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr);
768
769 return debugctlmsr;
770}
771
759static inline void update_debugctlmsr(unsigned long debugctlmsr) 772static inline void update_debugctlmsr(unsigned long debugctlmsr)
760{ 773{
761#ifndef CONFIG_X86_DEBUGCTLMSR 774#ifndef CONFIG_X86_DEBUGCTLMSR
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index eefb0594b058..6d34d954c228 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -6,7 +6,6 @@
6#include <asm/processor-flags.h> 6#include <asm/processor-flags.h>
7 7
8#ifdef __KERNEL__ 8#ifdef __KERNEL__
9#include <asm/ds.h> /* the DS BTS struct is used for ptrace too */
10#include <asm/segment.h> 9#include <asm/segment.h>
11#endif 10#endif
12 11
@@ -128,34 +127,6 @@ struct pt_regs {
128#endif /* !__i386__ */ 127#endif /* !__i386__ */
129 128
130 129
131#ifdef CONFIG_X86_PTRACE_BTS
132/* a branch trace record entry
133 *
134 * In order to unify the interface between various processor versions,
135 * we use the below data structure for all processors.
136 */
137enum bts_qualifier {
138 BTS_INVALID = 0,
139 BTS_BRANCH,
140 BTS_TASK_ARRIVES,
141 BTS_TASK_DEPARTS
142};
143
144struct bts_struct {
145 __u64 qualifier;
146 union {
147 /* BTS_BRANCH */
148 struct {
149 __u64 from_ip;
150 __u64 to_ip;
151 } lbr;
152 /* BTS_TASK_ARRIVES or
153 BTS_TASK_DEPARTS */
154 __u64 jiffies;
155 } variant;
156};
157#endif /* CONFIG_X86_PTRACE_BTS */
158
159#ifdef __KERNEL__ 130#ifdef __KERNEL__
160 131
161#include <linux/init.h> 132#include <linux/init.h>
@@ -163,13 +134,6 @@ struct bts_struct {
163struct cpuinfo_x86; 134struct cpuinfo_x86;
164struct task_struct; 135struct task_struct;
165 136
166#ifdef CONFIG_X86_PTRACE_BTS
167extern void __cpuinit ptrace_bts_init_intel(struct cpuinfo_x86 *);
168extern void ptrace_bts_take_timestamp(struct task_struct *, enum bts_qualifier);
169#else
170#define ptrace_bts_init_intel(config) do {} while (0)
171#endif /* CONFIG_X86_PTRACE_BTS */
172
173extern unsigned long profile_pc(struct pt_regs *regs); 137extern unsigned long profile_pc(struct pt_regs *regs);
174 138
175extern unsigned long 139extern unsigned long
@@ -271,6 +235,13 @@ extern int do_get_thread_area(struct task_struct *p, int idx,
271extern int do_set_thread_area(struct task_struct *p, int idx, 235extern int do_set_thread_area(struct task_struct *p, int idx,
272 struct user_desc __user *info, int can_allocate); 236 struct user_desc __user *info, int can_allocate);
273 237
238extern void x86_ptrace_untrace(struct task_struct *);
239extern void x86_ptrace_fork(struct task_struct *child,
240 unsigned long clone_flags);
241
242#define arch_ptrace_untrace(tsk) x86_ptrace_untrace(tsk)
243#define arch_ptrace_fork(child, flags) x86_ptrace_fork(child, flags)
244
274#endif /* __KERNEL__ */ 245#endif /* __KERNEL__ */
275 246
276#endif /* !__ASSEMBLY__ */ 247#endif /* !__ASSEMBLY__ */
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 8dbc57390d25..98789647baa9 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -20,6 +20,8 @@
20struct task_struct; 20struct task_struct;
21struct exec_domain; 21struct exec_domain;
22#include <asm/processor.h> 22#include <asm/processor.h>
23#include <asm/ftrace.h>
24#include <asm/atomic.h>
23 25
24struct thread_info { 26struct thread_info {
25 struct task_struct *task; /* main task structure */ 27 struct task_struct *task; /* main task structure */
@@ -91,7 +93,6 @@ struct thread_info {
91#define TIF_FORCED_TF 24 /* true if TF in eflags artificially */ 93#define TIF_FORCED_TF 24 /* true if TF in eflags artificially */
92#define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */ 94#define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */
93#define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */ 95#define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */
94#define TIF_BTS_TRACE_TS 27 /* record scheduling event timestamps */
95 96
96#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) 97#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
97#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) 98#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
@@ -113,7 +114,6 @@ struct thread_info {
113#define _TIF_FORCED_TF (1 << TIF_FORCED_TF) 114#define _TIF_FORCED_TF (1 << TIF_FORCED_TF)
114#define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR) 115#define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR)
115#define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR) 116#define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR)
116#define _TIF_BTS_TRACE_TS (1 << TIF_BTS_TRACE_TS)
117 117
118/* work to do in syscall_trace_enter() */ 118/* work to do in syscall_trace_enter() */
119#define _TIF_WORK_SYSCALL_ENTRY \ 119#define _TIF_WORK_SYSCALL_ENTRY \
@@ -139,8 +139,7 @@ struct thread_info {
139 139
140/* flags to check in __switch_to() */ 140/* flags to check in __switch_to() */
141#define _TIF_WORK_CTXSW \ 141#define _TIF_WORK_CTXSW \
142 (_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_BTS_TRACE_TS| \ 142 (_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_NOTSC)
143 _TIF_NOTSC)
144 143
145#define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW 144#define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW
146#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG) 145#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG)
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 1f208aaee780..88dd768eab6d 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -66,6 +66,7 @@ obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o
66obj-$(CONFIG_X86_IO_APIC) += io_apic.o 66obj-$(CONFIG_X86_IO_APIC) += io_apic.o
67obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o 67obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o
68obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o 68obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
69obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
69obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o 70obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o
70obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o 71obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o
71obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o 72obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o
diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index 7397911f8478..b5229affb953 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -30,6 +30,7 @@
30#include <linux/module.h> 30#include <linux/module.h>
31#include <linux/dmi.h> 31#include <linux/dmi.h>
32#include <linux/dmar.h> 32#include <linux/dmar.h>
33#include <linux/ftrace.h>
33 34
34#include <asm/atomic.h> 35#include <asm/atomic.h>
35#include <asm/smp.h> 36#include <asm/smp.h>
@@ -790,7 +791,7 @@ static void local_apic_timer_interrupt(void)
790 * [ if a single-CPU system runs an SMP kernel then we call the local 791 * [ if a single-CPU system runs an SMP kernel then we call the local
791 * interrupt as well. Thus we cannot inline the local irq ... ] 792 * interrupt as well. Thus we cannot inline the local irq ... ]
792 */ 793 */
793void smp_apic_timer_interrupt(struct pt_regs *regs) 794void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs)
794{ 795{
795 struct pt_regs *old_regs = set_irq_regs(regs); 796 struct pt_regs *old_regs = set_irq_regs(regs);
796 797
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index a5c04e88777e..82db7f45e2de 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -2,6 +2,11 @@
2# Makefile for x86-compatible CPU details and quirks 2# Makefile for x86-compatible CPU details and quirks
3# 3#
4 4
5# Don't trace early stages of a secondary CPU boot
6ifdef CONFIG_FUNCTION_TRACER
7CFLAGS_REMOVE_common.o = -pg
8endif
9
5obj-y := intel_cacheinfo.o addon_cpuid_features.o 10obj-y := intel_cacheinfo.o addon_cpuid_features.o
6obj-y += proc.o capflags.o powerflags.o common.o 11obj-y += proc.o capflags.o powerflags.o common.o
7obj-y += vmware.o hypervisor.o 12obj-y += vmware.o hypervisor.o
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 8e48c5d4467d..88ea02dcb622 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -33,6 +33,7 @@
33#include <linux/cpufreq.h> 33#include <linux/cpufreq.h>
34#include <linux/compiler.h> 34#include <linux/compiler.h>
35#include <linux/dmi.h> 35#include <linux/dmi.h>
36#include <linux/ftrace.h>
36 37
37#include <linux/acpi.h> 38#include <linux/acpi.h>
38#include <acpi/processor.h> 39#include <acpi/processor.h>
@@ -391,6 +392,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
391 unsigned int next_perf_state = 0; /* Index into perf table */ 392 unsigned int next_perf_state = 0; /* Index into perf table */
392 unsigned int i; 393 unsigned int i;
393 int result = 0; 394 int result = 0;
395 struct power_trace it;
394 396
395 dprintk("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu); 397 dprintk("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu);
396 398
@@ -427,6 +429,8 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
427 } 429 }
428 } 430 }
429 431
432 trace_power_mark(&it, POWER_PSTATE, next_perf_state);
433
430 switch (data->cpu_feature) { 434 switch (data->cpu_feature) {
431 case SYSTEM_INTEL_MSR_CAPABLE: 435 case SYSTEM_INTEL_MSR_CAPABLE:
432 cmd.type = SYSTEM_INTEL_MSR_CAPABLE; 436 cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index ccfd2047630c..8ea6929e974c 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -11,7 +11,6 @@
11#include <asm/pgtable.h> 11#include <asm/pgtable.h>
12#include <asm/msr.h> 12#include <asm/msr.h>
13#include <asm/uaccess.h> 13#include <asm/uaccess.h>
14#include <asm/ptrace.h>
15#include <asm/ds.h> 14#include <asm/ds.h>
16#include <asm/bugs.h> 15#include <asm/bugs.h>
17 16
@@ -326,9 +325,6 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
326 set_cpu_cap(c, X86_FEATURE_P3); 325 set_cpu_cap(c, X86_FEATURE_P3);
327#endif 326#endif
328 327
329 if (cpu_has_bts)
330 ptrace_bts_init_intel(c);
331
332 if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) { 328 if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) {
333 /* 329 /*
334 * let's use the legacy cpuid vector 0x1 and 0x4 for topology 330 * let's use the legacy cpuid vector 0x1 and 0x4 for topology
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index d6938d9351cf..da91701a2348 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -6,14 +6,13 @@
6 * precise-event based sampling (PEBS). 6 * precise-event based sampling (PEBS).
7 * 7 *
8 * It manages: 8 * It manages:
9 * - per-thread and per-cpu allocation of BTS and PEBS 9 * - DS and BTS hardware configuration
10 * - buffer memory allocation (optional) 10 * - buffer overflow handling (to be done)
11 * - buffer overflow handling
12 * - buffer access 11 * - buffer access
13 * 12 *
14 * It assumes: 13 * It does not do:
15 * - get_task_struct on all parameter tasks 14 * - security checking (is the caller allowed to trace the task)
16 * - current is allowed to trace parameter tasks 15 * - buffer allocation (memory accounting)
17 * 16 *
18 * 17 *
19 * Copyright (C) 2007-2008 Intel Corporation. 18 * Copyright (C) 2007-2008 Intel Corporation.
@@ -28,22 +27,69 @@
28#include <linux/slab.h> 27#include <linux/slab.h>
29#include <linux/sched.h> 28#include <linux/sched.h>
30#include <linux/mm.h> 29#include <linux/mm.h>
30#include <linux/kernel.h>
31 31
32 32
33/* 33/*
34 * The configuration for a particular DS hardware implementation. 34 * The configuration for a particular DS hardware implementation.
35 */ 35 */
36struct ds_configuration { 36struct ds_configuration {
37 /* the size of the DS structure in bytes */ 37 /* the name of the configuration */
38 unsigned char sizeof_ds; 38 const char *name;
39 /* the size of one pointer-typed field in the DS structure in bytes; 39 /* the size of one pointer-typed field in the DS structure and
40 this covers the first 8 fields related to buffer management. */ 40 in the BTS and PEBS buffers in bytes;
41 this covers the first 8 DS fields related to buffer management. */
41 unsigned char sizeof_field; 42 unsigned char sizeof_field;
42 /* the size of a BTS/PEBS record in bytes */ 43 /* the size of a BTS/PEBS record in bytes */
43 unsigned char sizeof_rec[2]; 44 unsigned char sizeof_rec[2];
45 /* a series of bit-masks to control various features indexed
46 * by enum ds_feature */
47 unsigned long ctl[dsf_ctl_max];
44}; 48};
45static struct ds_configuration ds_cfg; 49static DEFINE_PER_CPU(struct ds_configuration, ds_cfg_array);
46 50
51#define ds_cfg per_cpu(ds_cfg_array, smp_processor_id())
52
53#define MAX_SIZEOF_DS (12 * 8) /* maximal size of a DS configuration */
54#define MAX_SIZEOF_BTS (3 * 8) /* maximal size of a BTS record */
55#define DS_ALIGNMENT (1 << 3) /* BTS and PEBS buffer alignment */
56
57#define BTS_CONTROL \
58 (ds_cfg.ctl[dsf_bts] | ds_cfg.ctl[dsf_bts_kernel] | ds_cfg.ctl[dsf_bts_user] |\
59 ds_cfg.ctl[dsf_bts_overflow])
60
61
62/*
63 * A BTS or PEBS tracer.
64 *
65 * This holds the configuration of the tracer and serves as a handle
66 * to identify tracers.
67 */
68struct ds_tracer {
69 /* the DS context (partially) owned by this tracer */
70 struct ds_context *context;
71 /* the buffer provided on ds_request() and its size in bytes */
72 void *buffer;
73 size_t size;
74};
75
76struct bts_tracer {
77 /* the common DS part */
78 struct ds_tracer ds;
79 /* the trace including the DS configuration */
80 struct bts_trace trace;
81 /* buffer overflow notification function */
82 bts_ovfl_callback_t ovfl;
83};
84
85struct pebs_tracer {
86 /* the common DS part */
87 struct ds_tracer ds;
88 /* the trace including the DS configuration */
89 struct pebs_trace trace;
90 /* buffer overflow notification function */
91 pebs_ovfl_callback_t ovfl;
92};
47 93
48/* 94/*
49 * Debug Store (DS) save area configuration (see Intel64 and IA32 95 * Debug Store (DS) save area configuration (see Intel64 and IA32
@@ -109,32 +155,9 @@ static inline void ds_set(unsigned char *base, enum ds_qualifier qual,
109 155
110 156
111/* 157/*
112 * Locking is done only for allocating BTS or PEBS resources and for 158 * Locking is done only for allocating BTS or PEBS resources.
113 * guarding context and buffer memory allocation.
114 *
115 * Most functions require the current task to own the ds context part
116 * they are going to access. All the locking is done when validating
117 * access to the context.
118 */ 159 */
119static spinlock_t ds_lock = __SPIN_LOCK_UNLOCKED(ds_lock); 160static DEFINE_SPINLOCK(ds_lock);
120
121/*
122 * Validate that the current task is allowed to access the BTS/PEBS
123 * buffer of the parameter task.
124 *
125 * Returns 0, if access is granted; -Eerrno, otherwise.
126 */
127static inline int ds_validate_access(struct ds_context *context,
128 enum ds_qualifier qual)
129{
130 if (!context)
131 return -EPERM;
132
133 if (context->owner[qual] == current)
134 return 0;
135
136 return -EPERM;
137}
138 161
139 162
140/* 163/*
@@ -150,27 +173,32 @@ static inline int ds_validate_access(struct ds_context *context,
150 * >0 number of per-thread tracers 173 * >0 number of per-thread tracers
151 * <0 number of per-cpu tracers 174 * <0 number of per-cpu tracers
152 * 175 *
153 * The below functions to get and put tracers and to check the
154 * allocation type require the ds_lock to be held by the caller.
155 *
156 * Tracers essentially gives the number of ds contexts for a certain 176 * Tracers essentially gives the number of ds contexts for a certain
157 * type of allocation. 177 * type of allocation.
158 */ 178 */
159static long tracers; 179static atomic_t tracers = ATOMIC_INIT(0);
160 180
161static inline void get_tracer(struct task_struct *task) 181static inline void get_tracer(struct task_struct *task)
162{ 182{
163 tracers += (task ? 1 : -1); 183 if (task)
184 atomic_inc(&tracers);
185 else
186 atomic_dec(&tracers);
164} 187}
165 188
166static inline void put_tracer(struct task_struct *task) 189static inline void put_tracer(struct task_struct *task)
167{ 190{
168 tracers -= (task ? 1 : -1); 191 if (task)
192 atomic_dec(&tracers);
193 else
194 atomic_inc(&tracers);
169} 195}
170 196
171static inline int check_tracer(struct task_struct *task) 197static inline int check_tracer(struct task_struct *task)
172{ 198{
173 return (task ? (tracers >= 0) : (tracers <= 0)); 199 return task ?
200 (atomic_read(&tracers) >= 0) :
201 (atomic_read(&tracers) <= 0);
174} 202}
175 203
176 204
@@ -183,99 +211,70 @@ static inline int check_tracer(struct task_struct *task)
183 * 211 *
184 * Contexts are use-counted. They are allocated on first access and 212 * Contexts are use-counted. They are allocated on first access and
185 * deallocated when the last user puts the context. 213 * deallocated when the last user puts the context.
186 *
187 * We distinguish between an allocating and a non-allocating get of a
188 * context:
189 * - the allocating get is used for requesting BTS/PEBS resources. It
190 * requires the caller to hold the global ds_lock.
191 * - the non-allocating get is used for all other cases. A
192 * non-existing context indicates an error. It acquires and releases
193 * the ds_lock itself for obtaining the context.
194 *
195 * A context and its DS configuration are allocated and deallocated
196 * together. A context always has a DS configuration of the
197 * appropriate size.
198 */ 214 */
199static DEFINE_PER_CPU(struct ds_context *, system_context); 215struct ds_context {
200 216 /* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */
201#define this_system_context per_cpu(system_context, smp_processor_id()) 217 unsigned char ds[MAX_SIZEOF_DS];
202 218 /* the owner of the BTS and PEBS configuration, respectively */
203/* 219 struct bts_tracer *bts_master;
204 * Returns the pointer to the parameter task's context or to the 220 struct pebs_tracer *pebs_master;
205 * system-wide context, if task is NULL. 221 /* use count */
206 * 222 unsigned long count;
207 * Increases the use count of the returned context, if not NULL. 223 /* a pointer to the context location inside the thread_struct
208 */ 224 * or the per_cpu context array */
209static inline struct ds_context *ds_get_context(struct task_struct *task) 225 struct ds_context **this;
210{ 226 /* a pointer to the task owning this context, or NULL, if the
211 struct ds_context *context; 227 * context is owned by a cpu */
212 unsigned long irq; 228 struct task_struct *task;
229};
213 230
214 spin_lock_irqsave(&ds_lock, irq); 231static DEFINE_PER_CPU(struct ds_context *, system_context_array);
215 232
216 context = (task ? task->thread.ds_ctx : this_system_context); 233#define system_context per_cpu(system_context_array, smp_processor_id())
217 if (context)
218 context->count++;
219 234
220 spin_unlock_irqrestore(&ds_lock, irq);
221
222 return context;
223}
224 235
225/* 236static inline struct ds_context *ds_get_context(struct task_struct *task)
226 * Same as ds_get_context, but allocates the context and it's DS
227 * structure, if necessary; returns NULL; if out of memory.
228 */
229static inline struct ds_context *ds_alloc_context(struct task_struct *task)
230{ 237{
231 struct ds_context **p_context = 238 struct ds_context **p_context =
232 (task ? &task->thread.ds_ctx : &this_system_context); 239 (task ? &task->thread.ds_ctx : &system_context);
233 struct ds_context *context = *p_context; 240 struct ds_context *context = NULL;
241 struct ds_context *new_context = NULL;
234 unsigned long irq; 242 unsigned long irq;
235 243
236 if (!context) { 244 /* Chances are small that we already have a context. */
237 context = kzalloc(sizeof(*context), GFP_KERNEL); 245 new_context = kzalloc(sizeof(*new_context), GFP_KERNEL);
238 if (!context) 246 if (!new_context)
239 return NULL; 247 return NULL;
240
241 context->ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL);
242 if (!context->ds) {
243 kfree(context);
244 return NULL;
245 }
246 248
247 spin_lock_irqsave(&ds_lock, irq); 249 spin_lock_irqsave(&ds_lock, irq);
248 250
249 if (*p_context) { 251 context = *p_context;
250 kfree(context->ds); 252 if (!context) {
251 kfree(context); 253 context = new_context;
252 254
253 context = *p_context; 255 context->this = p_context;
254 } else { 256 context->task = task;
255 *p_context = context; 257 context->count = 0;
256 258
257 context->this = p_context; 259 if (task)
258 context->task = task; 260 set_tsk_thread_flag(task, TIF_DS_AREA_MSR);
259 261
260 if (task) 262 if (!task || (task == current))
261 set_tsk_thread_flag(task, TIF_DS_AREA_MSR); 263 wrmsrl(MSR_IA32_DS_AREA, (unsigned long)context->ds);
262 264
263 if (!task || (task == current)) 265 *p_context = context;
264 wrmsrl(MSR_IA32_DS_AREA,
265 (unsigned long)context->ds);
266 }
267 spin_unlock_irqrestore(&ds_lock, irq);
268 } 266 }
269 267
270 context->count++; 268 context->count++;
271 269
270 spin_unlock_irqrestore(&ds_lock, irq);
271
272 if (context != new_context)
273 kfree(new_context);
274
272 return context; 275 return context;
273} 276}
274 277
275/*
276 * Decreases the use count of the parameter context, if not NULL.
277 * Deallocates the context, if the use count reaches zero.
278 */
279static inline void ds_put_context(struct ds_context *context) 278static inline void ds_put_context(struct ds_context *context)
280{ 279{
281 unsigned long irq; 280 unsigned long irq;
@@ -285,8 +284,10 @@ static inline void ds_put_context(struct ds_context *context)
285 284
286 spin_lock_irqsave(&ds_lock, irq); 285 spin_lock_irqsave(&ds_lock, irq);
287 286
288 if (--context->count) 287 if (--context->count) {
289 goto out; 288 spin_unlock_irqrestore(&ds_lock, irq);
289 return;
290 }
290 291
291 *(context->this) = NULL; 292 *(context->this) = NULL;
292 293
@@ -296,135 +297,263 @@ static inline void ds_put_context(struct ds_context *context)
296 if (!context->task || (context->task == current)) 297 if (!context->task || (context->task == current))
297 wrmsrl(MSR_IA32_DS_AREA, 0); 298 wrmsrl(MSR_IA32_DS_AREA, 0);
298 299
299 put_tracer(context->task); 300 spin_unlock_irqrestore(&ds_lock, irq);
300 301
301 /* free any leftover buffers from tracers that did not
302 * deallocate them properly. */
303 kfree(context->buffer[ds_bts]);
304 kfree(context->buffer[ds_pebs]);
305 kfree(context->ds);
306 kfree(context); 302 kfree(context);
307 out:
308 spin_unlock_irqrestore(&ds_lock, irq);
309} 303}
310 304
311 305
312/* 306/*
313 * Handle a buffer overflow 307 * Call the tracer's callback on a buffer overflow.
314 * 308 *
315 * task: the task whose buffers are overflowing;
316 * NULL for a buffer overflow on the current cpu
317 * context: the ds context 309 * context: the ds context
318 * qual: the buffer type 310 * qual: the buffer type
319 */ 311 */
320static void ds_overflow(struct task_struct *task, struct ds_context *context, 312static void ds_overflow(struct ds_context *context, enum ds_qualifier qual)
321 enum ds_qualifier qual)
322{ 313{
323 if (!context) 314 switch (qual) {
324 return; 315 case ds_bts:
325 316 if (context->bts_master &&
326 if (context->callback[qual]) 317 context->bts_master->ovfl)
327 (*context->callback[qual])(task); 318 context->bts_master->ovfl(context->bts_master);
328 319 break;
329 /* todo: do some more overflow handling */ 320 case ds_pebs:
321 if (context->pebs_master &&
322 context->pebs_master->ovfl)
323 context->pebs_master->ovfl(context->pebs_master);
324 break;
325 }
330} 326}
331 327
332 328
333/* 329/*
334 * Allocate a non-pageable buffer of the parameter size. 330 * Write raw data into the BTS or PEBS buffer.
335 * Checks the memory and the locked memory rlimit.
336 * 331 *
337 * Returns the buffer, if successful; 332 * The remainder of any partially written record is zeroed out.
338 * NULL, if out of memory or rlimit exceeded.
339 * 333 *
340 * size: the requested buffer size in bytes 334 * context: the DS context
341 * pages (out): if not NULL, contains the number of pages reserved 335 * qual: the buffer type
336 * record: the data to write
337 * size: the size of the data
342 */ 338 */
343static inline void *ds_allocate_buffer(size_t size, unsigned int *pages) 339static int ds_write(struct ds_context *context, enum ds_qualifier qual,
340 const void *record, size_t size)
344{ 341{
345 unsigned long rlim, vm, pgsz; 342 int bytes_written = 0;
346 void *buffer;
347 343
348 pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT; 344 if (!record)
345 return -EINVAL;
349 346
350 rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT; 347 while (size) {
351 vm = current->mm->total_vm + pgsz; 348 unsigned long base, index, end, write_end, int_th;
352 if (rlim < vm) 349 unsigned long write_size, adj_write_size;
353 return NULL;
354 350
355 rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; 351 /*
356 vm = current->mm->locked_vm + pgsz; 352 * write as much as possible without producing an
357 if (rlim < vm) 353 * overflow interrupt.
358 return NULL; 354 *
355 * interrupt_threshold must either be
356 * - bigger than absolute_maximum or
357 * - point to a record between buffer_base and absolute_maximum
358 *
359 * index points to a valid record.
360 */
361 base = ds_get(context->ds, qual, ds_buffer_base);
362 index = ds_get(context->ds, qual, ds_index);
363 end = ds_get(context->ds, qual, ds_absolute_maximum);
364 int_th = ds_get(context->ds, qual, ds_interrupt_threshold);
359 365
360 buffer = kzalloc(size, GFP_KERNEL); 366 write_end = min(end, int_th);
361 if (!buffer)
362 return NULL;
363 367
364 current->mm->total_vm += pgsz; 368 /* if we are already beyond the interrupt threshold,
365 current->mm->locked_vm += pgsz; 369 * we fill the entire buffer */
370 if (write_end <= index)
371 write_end = end;
366 372
367 if (pages) 373 if (write_end <= index)
368 *pages = pgsz; 374 break;
375
376 write_size = min((unsigned long) size, write_end - index);
377 memcpy((void *)index, record, write_size);
378
379 record = (const char *)record + write_size;
380 size -= write_size;
381 bytes_written += write_size;
382
383 adj_write_size = write_size / ds_cfg.sizeof_rec[qual];
384 adj_write_size *= ds_cfg.sizeof_rec[qual];
369 385
370 return buffer; 386 /* zero out trailing bytes */
387 memset((char *)index + write_size, 0,
388 adj_write_size - write_size);
389 index += adj_write_size;
390
391 if (index >= end)
392 index = base;
393 ds_set(context->ds, qual, ds_index, index);
394
395 if (index >= int_th)
396 ds_overflow(context, qual);
397 }
398
399 return bytes_written;
371} 400}
372 401
373static int ds_request(struct task_struct *task, void *base, size_t size, 402
374 ds_ovfl_callback_t ovfl, enum ds_qualifier qual) 403/*
404 * Branch Trace Store (BTS) uses the following format. Different
405 * architectures vary in the size of those fields.
406 * - source linear address
407 * - destination linear address
408 * - flags
409 *
410 * Later architectures use 64bit pointers throughout, whereas earlier
411 * architectures use 32bit pointers in 32bit mode.
412 *
413 * We compute the base address for the first 8 fields based on:
414 * - the field size stored in the DS configuration
415 * - the relative field position
416 *
417 * In order to store additional information in the BTS buffer, we use
418 * a special source address to indicate that the record requires
419 * special interpretation.
420 *
421 * Netburst indicated via a bit in the flags field whether the branch
422 * was predicted; this is ignored.
423 *
424 * We use two levels of abstraction:
425 * - the raw data level defined here
426 * - an arch-independent level defined in ds.h
427 */
428
429enum bts_field {
430 bts_from,
431 bts_to,
432 bts_flags,
433
434 bts_qual = bts_from,
435 bts_jiffies = bts_to,
436 bts_pid = bts_flags,
437
438 bts_qual_mask = (bts_qual_max - 1),
439 bts_escape = ((unsigned long)-1 & ~bts_qual_mask)
440};
441
442static inline unsigned long bts_get(const char *base, enum bts_field field)
375{ 443{
376 struct ds_context *context; 444 base += (ds_cfg.sizeof_field * field);
377 unsigned long buffer, adj; 445 return *(unsigned long *)base;
378 const unsigned long alignment = (1 << 3); 446}
379 unsigned long irq;
380 int error = 0;
381 447
382 if (!ds_cfg.sizeof_ds) 448static inline void bts_set(char *base, enum bts_field field, unsigned long val)
383 return -EOPNOTSUPP; 449{
450 base += (ds_cfg.sizeof_field * field);;
451 (*(unsigned long *)base) = val;
452}
384 453
385 /* we require some space to do alignment adjustments below */ 454
386 if (size < (alignment + ds_cfg.sizeof_rec[qual])) 455/*
456 * The raw BTS data is architecture dependent.
457 *
458 * For higher-level users, we give an arch-independent view.
459 * - ds.h defines struct bts_struct
460 * - bts_read translates one raw bts record into a bts_struct
461 * - bts_write translates one bts_struct into the raw format and
462 * writes it into the top of the parameter tracer's buffer.
463 *
464 * return: bytes read/written on success; -Eerrno, otherwise
465 */
466static int bts_read(struct bts_tracer *tracer, const void *at,
467 struct bts_struct *out)
468{
469 if (!tracer)
387 return -EINVAL; 470 return -EINVAL;
388 471
389 /* buffer overflow notification is not yet implemented */ 472 if (at < tracer->trace.ds.begin)
390 if (ovfl) 473 return -EINVAL;
391 return -EOPNOTSUPP;
392 474
475 if (tracer->trace.ds.end < (at + tracer->trace.ds.size))
476 return -EINVAL;
393 477
394 context = ds_alloc_context(task); 478 memset(out, 0, sizeof(*out));
395 if (!context) 479 if ((bts_get(at, bts_qual) & ~bts_qual_mask) == bts_escape) {
396 return -ENOMEM; 480 out->qualifier = (bts_get(at, bts_qual) & bts_qual_mask);
481 out->variant.timestamp.jiffies = bts_get(at, bts_jiffies);
482 out->variant.timestamp.pid = bts_get(at, bts_pid);
483 } else {
484 out->qualifier = bts_branch;
485 out->variant.lbr.from = bts_get(at, bts_from);
486 out->variant.lbr.to = bts_get(at, bts_to);
487
488 if (!out->variant.lbr.from && !out->variant.lbr.to)
489 out->qualifier = bts_invalid;
490 }
397 491
398 spin_lock_irqsave(&ds_lock, irq); 492 return ds_cfg.sizeof_rec[ds_bts];
493}
399 494
400 error = -EPERM; 495static int bts_write(struct bts_tracer *tracer, const struct bts_struct *in)
401 if (!check_tracer(task)) 496{
402 goto out_unlock; 497 unsigned char raw[MAX_SIZEOF_BTS];
403 498
404 get_tracer(task); 499 if (!tracer)
500 return -EINVAL;
405 501
406 error = -EALREADY; 502 if (MAX_SIZEOF_BTS < ds_cfg.sizeof_rec[ds_bts])
407 if (context->owner[qual] == current) 503 return -EOVERFLOW;
408 goto out_put_tracer;
409 error = -EPERM;
410 if (context->owner[qual] != NULL)
411 goto out_put_tracer;
412 context->owner[qual] = current;
413 504
414 spin_unlock_irqrestore(&ds_lock, irq); 505 switch (in->qualifier) {
506 case bts_invalid:
507 bts_set(raw, bts_from, 0);
508 bts_set(raw, bts_to, 0);
509 bts_set(raw, bts_flags, 0);
510 break;
511 case bts_branch:
512 bts_set(raw, bts_from, in->variant.lbr.from);
513 bts_set(raw, bts_to, in->variant.lbr.to);
514 bts_set(raw, bts_flags, 0);
515 break;
516 case bts_task_arrives:
517 case bts_task_departs:
518 bts_set(raw, bts_qual, (bts_escape | in->qualifier));
519 bts_set(raw, bts_jiffies, in->variant.timestamp.jiffies);
520 bts_set(raw, bts_pid, in->variant.timestamp.pid);
521 break;
522 default:
523 return -EINVAL;
524 }
415 525
526 return ds_write(tracer->ds.context, ds_bts, raw,
527 ds_cfg.sizeof_rec[ds_bts]);
528}
416 529
417 error = -ENOMEM;
418 if (!base) {
419 base = ds_allocate_buffer(size, &context->pages[qual]);
420 if (!base)
421 goto out_release;
422 530
423 context->buffer[qual] = base; 531static void ds_write_config(struct ds_context *context,
424 } 532 struct ds_trace *cfg, enum ds_qualifier qual)
425 error = 0; 533{
534 unsigned char *ds = context->ds;
426 535
427 context->callback[qual] = ovfl; 536 ds_set(ds, qual, ds_buffer_base, (unsigned long)cfg->begin);
537 ds_set(ds, qual, ds_index, (unsigned long)cfg->top);
538 ds_set(ds, qual, ds_absolute_maximum, (unsigned long)cfg->end);
539 ds_set(ds, qual, ds_interrupt_threshold, (unsigned long)cfg->ith);
540}
541
542static void ds_read_config(struct ds_context *context,
543 struct ds_trace *cfg, enum ds_qualifier qual)
544{
545 unsigned char *ds = context->ds;
546
547 cfg->begin = (void *)ds_get(ds, qual, ds_buffer_base);
548 cfg->top = (void *)ds_get(ds, qual, ds_index);
549 cfg->end = (void *)ds_get(ds, qual, ds_absolute_maximum);
550 cfg->ith = (void *)ds_get(ds, qual, ds_interrupt_threshold);
551}
552
553static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual,
554 void *base, size_t size, size_t ith,
555 unsigned int flags) {
556 unsigned long buffer, adj;
428 557
429 /* adjust the buffer address and size to meet alignment 558 /* adjust the buffer address and size to meet alignment
430 * constraints: 559 * constraints:
@@ -436,410 +565,383 @@ static int ds_request(struct task_struct *task, void *base, size_t size,
436 */ 565 */
437 buffer = (unsigned long)base; 566 buffer = (unsigned long)base;
438 567
439 adj = ALIGN(buffer, alignment) - buffer; 568 adj = ALIGN(buffer, DS_ALIGNMENT) - buffer;
440 buffer += adj; 569 buffer += adj;
441 size -= adj; 570 size -= adj;
442 571
443 size /= ds_cfg.sizeof_rec[qual]; 572 trace->n = size / ds_cfg.sizeof_rec[qual];
444 size *= ds_cfg.sizeof_rec[qual]; 573 trace->size = ds_cfg.sizeof_rec[qual];
445
446 ds_set(context->ds, qual, ds_buffer_base, buffer);
447 ds_set(context->ds, qual, ds_index, buffer);
448 ds_set(context->ds, qual, ds_absolute_maximum, buffer + size);
449
450 if (ovfl) {
451 /* todo: select a suitable interrupt threshold */
452 } else
453 ds_set(context->ds, qual,
454 ds_interrupt_threshold, buffer + size + 1);
455 574
456 /* we keep the context until ds_release */ 575 size = (trace->n * trace->size);
457 return error;
458
459 out_release:
460 context->owner[qual] = NULL;
461 ds_put_context(context);
462 put_tracer(task);
463 return error;
464 576
465 out_put_tracer: 577 trace->begin = (void *)buffer;
466 spin_unlock_irqrestore(&ds_lock, irq); 578 trace->top = trace->begin;
467 ds_put_context(context); 579 trace->end = (void *)(buffer + size);
468 put_tracer(task); 580 /* The value for 'no threshold' is -1, which will set the
469 return error; 581 * threshold outside of the buffer, just like we want it.
582 */
583 trace->ith = (void *)(buffer + size - ith);
470 584
471 out_unlock: 585 trace->flags = flags;
472 spin_unlock_irqrestore(&ds_lock, irq);
473 ds_put_context(context);
474 return error;
475} 586}
476 587
477int ds_request_bts(struct task_struct *task, void *base, size_t size,
478 ds_ovfl_callback_t ovfl)
479{
480 return ds_request(task, base, size, ovfl, ds_bts);
481}
482 588
483int ds_request_pebs(struct task_struct *task, void *base, size_t size, 589static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace,
484 ds_ovfl_callback_t ovfl) 590 enum ds_qualifier qual, struct task_struct *task,
485{ 591 void *base, size_t size, size_t th, unsigned int flags)
486 return ds_request(task, base, size, ovfl, ds_pebs);
487}
488
489static int ds_release(struct task_struct *task, enum ds_qualifier qual)
490{ 592{
491 struct ds_context *context; 593 struct ds_context *context;
492 int error; 594 int error;
493 595
494 context = ds_get_context(task); 596 error = -EINVAL;
495 error = ds_validate_access(context, qual); 597 if (!base)
496 if (error < 0)
497 goto out; 598 goto out;
498 599
499 kfree(context->buffer[qual]); 600 /* we require some space to do alignment adjustments below */
500 context->buffer[qual] = NULL; 601 error = -EINVAL;
501 602 if (size < (DS_ALIGNMENT + ds_cfg.sizeof_rec[qual]))
502 current->mm->total_vm -= context->pages[qual]; 603 goto out;
503 current->mm->locked_vm -= context->pages[qual];
504 context->pages[qual] = 0;
505 context->owner[qual] = NULL;
506
507 /*
508 * we put the context twice:
509 * once for the ds_get_context
510 * once for the corresponding ds_request
511 */
512 ds_put_context(context);
513 out:
514 ds_put_context(context);
515 return error;
516}
517 604
518int ds_release_bts(struct task_struct *task) 605 if (th != (size_t)-1) {
519{ 606 th *= ds_cfg.sizeof_rec[qual];
520 return ds_release(task, ds_bts);
521}
522 607
523int ds_release_pebs(struct task_struct *task) 608 error = -EINVAL;
524{ 609 if (size <= th)
525 return ds_release(task, ds_pebs); 610 goto out;
526} 611 }
527 612
528static int ds_get_index(struct task_struct *task, size_t *pos, 613 tracer->buffer = base;
529 enum ds_qualifier qual) 614 tracer->size = size;
530{
531 struct ds_context *context;
532 unsigned long base, index;
533 int error;
534 615
616 error = -ENOMEM;
535 context = ds_get_context(task); 617 context = ds_get_context(task);
536 error = ds_validate_access(context, qual); 618 if (!context)
537 if (error < 0)
538 goto out; 619 goto out;
620 tracer->context = context;
539 621
540 base = ds_get(context->ds, qual, ds_buffer_base); 622 ds_init_ds_trace(trace, qual, base, size, th, flags);
541 index = ds_get(context->ds, qual, ds_index);
542 623
543 error = ((index - base) / ds_cfg.sizeof_rec[qual]); 624 error = 0;
544 if (pos)
545 *pos = error;
546 out: 625 out:
547 ds_put_context(context);
548 return error; 626 return error;
549} 627}
550 628
551int ds_get_bts_index(struct task_struct *task, size_t *pos) 629struct bts_tracer *ds_request_bts(struct task_struct *task,
552{ 630 void *base, size_t size,
553 return ds_get_index(task, pos, ds_bts); 631 bts_ovfl_callback_t ovfl, size_t th,
554} 632 unsigned int flags)
555
556int ds_get_pebs_index(struct task_struct *task, size_t *pos)
557{
558 return ds_get_index(task, pos, ds_pebs);
559}
560
561static int ds_get_end(struct task_struct *task, size_t *pos,
562 enum ds_qualifier qual)
563{ 633{
564 struct ds_context *context; 634 struct bts_tracer *tracer;
565 unsigned long base, end; 635 unsigned long irq;
566 int error; 636 int error;
567 637
568 context = ds_get_context(task); 638 error = -EOPNOTSUPP;
569 error = ds_validate_access(context, qual); 639 if (!ds_cfg.ctl[dsf_bts])
570 if (error < 0)
571 goto out; 640 goto out;
572 641
573 base = ds_get(context->ds, qual, ds_buffer_base); 642 /* buffer overflow notification is not yet implemented */
574 end = ds_get(context->ds, qual, ds_absolute_maximum); 643 error = -EOPNOTSUPP;
644 if (ovfl)
645 goto out;
575 646
576 error = ((end - base) / ds_cfg.sizeof_rec[qual]); 647 error = -ENOMEM;
577 if (pos) 648 tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
578 *pos = error; 649 if (!tracer)
579 out: 650 goto out;
580 ds_put_context(context); 651 tracer->ovfl = ovfl;
581 return error;
582}
583 652
584int ds_get_bts_end(struct task_struct *task, size_t *pos) 653 error = ds_request(&tracer->ds, &tracer->trace.ds,
585{ 654 ds_bts, task, base, size, th, flags);
586 return ds_get_end(task, pos, ds_bts); 655 if (error < 0)
587} 656 goto out_tracer;
588 657
589int ds_get_pebs_end(struct task_struct *task, size_t *pos)
590{
591 return ds_get_end(task, pos, ds_pebs);
592}
593 658
594static int ds_access(struct task_struct *task, size_t index, 659 spin_lock_irqsave(&ds_lock, irq);
595 const void **record, enum ds_qualifier qual)
596{
597 struct ds_context *context;
598 unsigned long base, idx;
599 int error;
600 660
601 if (!record) 661 error = -EPERM;
602 return -EINVAL; 662 if (!check_tracer(task))
663 goto out_unlock;
664 get_tracer(task);
603 665
604 context = ds_get_context(task); 666 error = -EPERM;
605 error = ds_validate_access(context, qual); 667 if (tracer->ds.context->bts_master)
606 if (error < 0) 668 goto out_put_tracer;
607 goto out; 669 tracer->ds.context->bts_master = tracer;
608 670
609 base = ds_get(context->ds, qual, ds_buffer_base); 671 spin_unlock_irqrestore(&ds_lock, irq);
610 idx = base + (index * ds_cfg.sizeof_rec[qual]);
611 672
612 error = -EINVAL;
613 if (idx > ds_get(context->ds, qual, ds_absolute_maximum))
614 goto out;
615 673
616 *record = (const void *)idx; 674 tracer->trace.read = bts_read;
617 error = ds_cfg.sizeof_rec[qual]; 675 tracer->trace.write = bts_write;
618 out:
619 ds_put_context(context);
620 return error;
621}
622 676
623int ds_access_bts(struct task_struct *task, size_t index, const void **record) 677 ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts);
624{ 678 ds_resume_bts(tracer);
625 return ds_access(task, index, record, ds_bts);
626}
627 679
628int ds_access_pebs(struct task_struct *task, size_t index, const void **record) 680 return tracer;
629{ 681
630 return ds_access(task, index, record, ds_pebs); 682 out_put_tracer:
683 put_tracer(task);
684 out_unlock:
685 spin_unlock_irqrestore(&ds_lock, irq);
686 ds_put_context(tracer->ds.context);
687 out_tracer:
688 kfree(tracer);
689 out:
690 return ERR_PTR(error);
631} 691}
632 692
633static int ds_write(struct task_struct *task, const void *record, size_t size, 693struct pebs_tracer *ds_request_pebs(struct task_struct *task,
634 enum ds_qualifier qual, int force) 694 void *base, size_t size,
695 pebs_ovfl_callback_t ovfl, size_t th,
696 unsigned int flags)
635{ 697{
636 struct ds_context *context; 698 struct pebs_tracer *tracer;
699 unsigned long irq;
637 int error; 700 int error;
638 701
639 if (!record) 702 /* buffer overflow notification is not yet implemented */
640 return -EINVAL; 703 error = -EOPNOTSUPP;
704 if (ovfl)
705 goto out;
641 706
642 error = -EPERM; 707 error = -ENOMEM;
643 context = ds_get_context(task); 708 tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
644 if (!context) 709 if (!tracer)
645 goto out; 710 goto out;
711 tracer->ovfl = ovfl;
646 712
647 if (!force) { 713 error = ds_request(&tracer->ds, &tracer->trace.ds,
648 error = ds_validate_access(context, qual); 714 ds_pebs, task, base, size, th, flags);
649 if (error < 0) 715 if (error < 0)
650 goto out; 716 goto out_tracer;
651 }
652 717
653 error = 0; 718 spin_lock_irqsave(&ds_lock, irq);
654 while (size) {
655 unsigned long base, index, end, write_end, int_th;
656 unsigned long write_size, adj_write_size;
657 719
658 /* 720 error = -EPERM;
659 * write as much as possible without producing an 721 if (!check_tracer(task))
660 * overflow interrupt. 722 goto out_unlock;
661 * 723 get_tracer(task);
662 * interrupt_threshold must either be
663 * - bigger than absolute_maximum or
664 * - point to a record between buffer_base and absolute_maximum
665 *
666 * index points to a valid record.
667 */
668 base = ds_get(context->ds, qual, ds_buffer_base);
669 index = ds_get(context->ds, qual, ds_index);
670 end = ds_get(context->ds, qual, ds_absolute_maximum);
671 int_th = ds_get(context->ds, qual, ds_interrupt_threshold);
672 724
673 write_end = min(end, int_th); 725 error = -EPERM;
726 if (tracer->ds.context->pebs_master)
727 goto out_put_tracer;
728 tracer->ds.context->pebs_master = tracer;
674 729
675 /* if we are already beyond the interrupt threshold, 730 spin_unlock_irqrestore(&ds_lock, irq);
676 * we fill the entire buffer */
677 if (write_end <= index)
678 write_end = end;
679 731
680 if (write_end <= index) 732 ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts);
681 goto out; 733 ds_resume_pebs(tracer);
682 734
683 write_size = min((unsigned long) size, write_end - index); 735 return tracer;
684 memcpy((void *)index, record, write_size);
685 736
686 record = (const char *)record + write_size; 737 out_put_tracer:
687 size -= write_size; 738 put_tracer(task);
688 error += write_size; 739 out_unlock:
740 spin_unlock_irqrestore(&ds_lock, irq);
741 ds_put_context(tracer->ds.context);
742 out_tracer:
743 kfree(tracer);
744 out:
745 return ERR_PTR(error);
746}
689 747
690 adj_write_size = write_size / ds_cfg.sizeof_rec[qual]; 748void ds_release_bts(struct bts_tracer *tracer)
691 adj_write_size *= ds_cfg.sizeof_rec[qual]; 749{
750 if (!tracer)
751 return;
692 752
693 /* zero out trailing bytes */ 753 ds_suspend_bts(tracer);
694 memset((char *)index + write_size, 0,
695 adj_write_size - write_size);
696 index += adj_write_size;
697 754
698 if (index >= end) 755 WARN_ON_ONCE(tracer->ds.context->bts_master != tracer);
699 index = base; 756 tracer->ds.context->bts_master = NULL;
700 ds_set(context->ds, qual, ds_index, index);
701 757
702 if (index >= int_th) 758 put_tracer(tracer->ds.context->task);
703 ds_overflow(task, context, qual); 759 ds_put_context(tracer->ds.context);
704 }
705 760
706 out: 761 kfree(tracer);
707 ds_put_context(context);
708 return error;
709} 762}
710 763
711int ds_write_bts(struct task_struct *task, const void *record, size_t size) 764void ds_suspend_bts(struct bts_tracer *tracer)
712{ 765{
713 return ds_write(task, record, size, ds_bts, /* force = */ 0); 766 struct task_struct *task;
714}
715 767
716int ds_write_pebs(struct task_struct *task, const void *record, size_t size) 768 if (!tracer)
717{ 769 return;
718 return ds_write(task, record, size, ds_pebs, /* force = */ 0);
719}
720 770
721int ds_unchecked_write_bts(struct task_struct *task, 771 task = tracer->ds.context->task;
722 const void *record, size_t size)
723{
724 return ds_write(task, record, size, ds_bts, /* force = */ 1);
725}
726 772
727int ds_unchecked_write_pebs(struct task_struct *task, 773 if (!task || (task == current))
728 const void *record, size_t size) 774 update_debugctlmsr(get_debugctlmsr() & ~BTS_CONTROL);
729{ 775
730 return ds_write(task, record, size, ds_pebs, /* force = */ 1); 776 if (task) {
777 task->thread.debugctlmsr &= ~BTS_CONTROL;
778
779 if (!task->thread.debugctlmsr)
780 clear_tsk_thread_flag(task, TIF_DEBUGCTLMSR);
781 }
731} 782}
732 783
733static int ds_reset_or_clear(struct task_struct *task, 784void ds_resume_bts(struct bts_tracer *tracer)
734 enum ds_qualifier qual, int clear)
735{ 785{
736 struct ds_context *context; 786 struct task_struct *task;
737 unsigned long base, end; 787 unsigned long control;
738 int error;
739 788
740 context = ds_get_context(task); 789 if (!tracer)
741 error = ds_validate_access(context, qual); 790 return;
742 if (error < 0)
743 goto out;
744 791
745 base = ds_get(context->ds, qual, ds_buffer_base); 792 task = tracer->ds.context->task;
746 end = ds_get(context->ds, qual, ds_absolute_maximum);
747 793
748 if (clear) 794 control = ds_cfg.ctl[dsf_bts];
749 memset((void *)base, 0, end - base); 795 if (!(tracer->trace.ds.flags & BTS_KERNEL))
796 control |= ds_cfg.ctl[dsf_bts_kernel];
797 if (!(tracer->trace.ds.flags & BTS_USER))
798 control |= ds_cfg.ctl[dsf_bts_user];
750 799
751 ds_set(context->ds, qual, ds_index, base); 800 if (task) {
801 task->thread.debugctlmsr |= control;
802 set_tsk_thread_flag(task, TIF_DEBUGCTLMSR);
803 }
752 804
753 error = 0; 805 if (!task || (task == current))
754 out: 806 update_debugctlmsr(get_debugctlmsr() | control);
755 ds_put_context(context);
756 return error;
757} 807}
758 808
759int ds_reset_bts(struct task_struct *task) 809void ds_release_pebs(struct pebs_tracer *tracer)
760{ 810{
761 return ds_reset_or_clear(task, ds_bts, /* clear = */ 0); 811 if (!tracer)
812 return;
813
814 ds_suspend_pebs(tracer);
815
816 WARN_ON_ONCE(tracer->ds.context->pebs_master != tracer);
817 tracer->ds.context->pebs_master = NULL;
818
819 put_tracer(tracer->ds.context->task);
820 ds_put_context(tracer->ds.context);
821
822 kfree(tracer);
762} 823}
763 824
764int ds_reset_pebs(struct task_struct *task) 825void ds_suspend_pebs(struct pebs_tracer *tracer)
765{ 826{
766 return ds_reset_or_clear(task, ds_pebs, /* clear = */ 0); 827
767} 828}
768 829
769int ds_clear_bts(struct task_struct *task) 830void ds_resume_pebs(struct pebs_tracer *tracer)
770{ 831{
771 return ds_reset_or_clear(task, ds_bts, /* clear = */ 1); 832
772} 833}
773 834
774int ds_clear_pebs(struct task_struct *task) 835const struct bts_trace *ds_read_bts(struct bts_tracer *tracer)
775{ 836{
776 return ds_reset_or_clear(task, ds_pebs, /* clear = */ 1); 837 if (!tracer)
838 return NULL;
839
840 ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_bts);
841 return &tracer->trace;
777} 842}
778 843
779int ds_get_pebs_reset(struct task_struct *task, u64 *value) 844const struct pebs_trace *ds_read_pebs(struct pebs_tracer *tracer)
780{ 845{
781 struct ds_context *context; 846 if (!tracer)
782 int error; 847 return NULL;
783 848
784 if (!value) 849 ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_pebs);
850 tracer->trace.reset_value =
851 *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8));
852
853 return &tracer->trace;
854}
855
856int ds_reset_bts(struct bts_tracer *tracer)
857{
858 if (!tracer)
785 return -EINVAL; 859 return -EINVAL;
786 860
787 context = ds_get_context(task); 861 tracer->trace.ds.top = tracer->trace.ds.begin;
788 error = ds_validate_access(context, ds_pebs);
789 if (error < 0)
790 goto out;
791 862
792 *value = *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8)); 863 ds_set(tracer->ds.context->ds, ds_bts, ds_index,
864 (unsigned long)tracer->trace.ds.top);
793 865
794 error = 0; 866 return 0;
795 out:
796 ds_put_context(context);
797 return error;
798} 867}
799 868
800int ds_set_pebs_reset(struct task_struct *task, u64 value) 869int ds_reset_pebs(struct pebs_tracer *tracer)
801{ 870{
802 struct ds_context *context; 871 if (!tracer)
803 int error; 872 return -EINVAL;
804 873
805 context = ds_get_context(task); 874 tracer->trace.ds.top = tracer->trace.ds.begin;
806 error = ds_validate_access(context, ds_pebs);
807 if (error < 0)
808 goto out;
809 875
810 *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8)) = value; 876 ds_set(tracer->ds.context->ds, ds_bts, ds_index,
877 (unsigned long)tracer->trace.ds.top);
811 878
812 error = 0; 879 return 0;
813 out: 880}
814 ds_put_context(context); 881
815 return error; 882int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value)
883{
884 if (!tracer)
885 return -EINVAL;
886
887 *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8)) = value;
888
889 return 0;
816} 890}
817 891
818static const struct ds_configuration ds_cfg_var = { 892static const struct ds_configuration ds_cfg_netburst = {
819 .sizeof_ds = sizeof(long) * 12, 893 .name = "netburst",
820 .sizeof_field = sizeof(long), 894 .ctl[dsf_bts] = (1 << 2) | (1 << 3),
821 .sizeof_rec[ds_bts] = sizeof(long) * 3, 895 .ctl[dsf_bts_kernel] = (1 << 5),
896 .ctl[dsf_bts_user] = (1 << 6),
897
898 .sizeof_field = sizeof(long),
899 .sizeof_rec[ds_bts] = sizeof(long) * 3,
822#ifdef __i386__ 900#ifdef __i386__
823 .sizeof_rec[ds_pebs] = sizeof(long) * 10 901 .sizeof_rec[ds_pebs] = sizeof(long) * 10,
824#else 902#else
825 .sizeof_rec[ds_pebs] = sizeof(long) * 18 903 .sizeof_rec[ds_pebs] = sizeof(long) * 18,
826#endif 904#endif
827}; 905};
828static const struct ds_configuration ds_cfg_64 = { 906static const struct ds_configuration ds_cfg_pentium_m = {
829 .sizeof_ds = 8 * 12, 907 .name = "pentium m",
830 .sizeof_field = 8, 908 .ctl[dsf_bts] = (1 << 6) | (1 << 7),
831 .sizeof_rec[ds_bts] = 8 * 3, 909
910 .sizeof_field = sizeof(long),
911 .sizeof_rec[ds_bts] = sizeof(long) * 3,
832#ifdef __i386__ 912#ifdef __i386__
833 .sizeof_rec[ds_pebs] = 8 * 10 913 .sizeof_rec[ds_pebs] = sizeof(long) * 10,
834#else 914#else
835 .sizeof_rec[ds_pebs] = 8 * 18 915 .sizeof_rec[ds_pebs] = sizeof(long) * 18,
836#endif 916#endif
837}; 917};
918static const struct ds_configuration ds_cfg_core2 = {
919 .name = "core 2",
920 .ctl[dsf_bts] = (1 << 6) | (1 << 7),
921 .ctl[dsf_bts_kernel] = (1 << 9),
922 .ctl[dsf_bts_user] = (1 << 10),
923
924 .sizeof_field = 8,
925 .sizeof_rec[ds_bts] = 8 * 3,
926 .sizeof_rec[ds_pebs] = 8 * 18,
927};
838 928
839static inline void 929static void
840ds_configure(const struct ds_configuration *cfg) 930ds_configure(const struct ds_configuration *cfg)
841{ 931{
932 memset(&ds_cfg, 0, sizeof(ds_cfg));
842 ds_cfg = *cfg; 933 ds_cfg = *cfg;
934
935 printk(KERN_INFO "[ds] using %s configuration\n", ds_cfg.name);
936
937 if (!cpu_has_bts) {
938 ds_cfg.ctl[dsf_bts] = 0;
939 printk(KERN_INFO "[ds] bts not available\n");
940 }
941 if (!cpu_has_pebs)
942 printk(KERN_INFO "[ds] pebs not available\n");
943
944 WARN_ON_ONCE(MAX_SIZEOF_DS < (12 * ds_cfg.sizeof_field));
843} 945}
844 946
845void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) 947void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
@@ -852,10 +954,10 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
852 break; 954 break;
853 case 0xD: 955 case 0xD:
854 case 0xE: /* Pentium M */ 956 case 0xE: /* Pentium M */
855 ds_configure(&ds_cfg_var); 957 ds_configure(&ds_cfg_pentium_m);
856 break; 958 break;
857 default: /* Core2, Atom, ... */ 959 default: /* Core2, Atom, ... */
858 ds_configure(&ds_cfg_64); 960 ds_configure(&ds_cfg_core2);
859 break; 961 break;
860 } 962 }
861 break; 963 break;
@@ -864,7 +966,7 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
864 case 0x0: 966 case 0x0:
865 case 0x1: 967 case 0x1:
866 case 0x2: /* Netburst */ 968 case 0x2: /* Netburst */
867 ds_configure(&ds_cfg_var); 969 ds_configure(&ds_cfg_netburst);
868 break; 970 break;
869 default: 971 default:
870 /* sorry, don't know about them */ 972 /* sorry, don't know about them */
@@ -877,12 +979,52 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
877 } 979 }
878} 980}
879 981
880void ds_free(struct ds_context *context) 982/*
983 * Change the DS configuration from tracing prev to tracing next.
984 */
985void ds_switch_to(struct task_struct *prev, struct task_struct *next)
986{
987 struct ds_context *prev_ctx = prev->thread.ds_ctx;
988 struct ds_context *next_ctx = next->thread.ds_ctx;
989
990 if (prev_ctx) {
991 update_debugctlmsr(0);
992
993 if (prev_ctx->bts_master &&
994 (prev_ctx->bts_master->trace.ds.flags & BTS_TIMESTAMPS)) {
995 struct bts_struct ts = {
996 .qualifier = bts_task_departs,
997 .variant.timestamp.jiffies = jiffies_64,
998 .variant.timestamp.pid = prev->pid
999 };
1000 bts_write(prev_ctx->bts_master, &ts);
1001 }
1002 }
1003
1004 if (next_ctx) {
1005 if (next_ctx->bts_master &&
1006 (next_ctx->bts_master->trace.ds.flags & BTS_TIMESTAMPS)) {
1007 struct bts_struct ts = {
1008 .qualifier = bts_task_arrives,
1009 .variant.timestamp.jiffies = jiffies_64,
1010 .variant.timestamp.pid = next->pid
1011 };
1012 bts_write(next_ctx->bts_master, &ts);
1013 }
1014
1015 wrmsrl(MSR_IA32_DS_AREA, (unsigned long)next_ctx->ds);
1016 }
1017
1018 update_debugctlmsr(next->thread.debugctlmsr);
1019}
1020
1021void ds_copy_thread(struct task_struct *tsk, struct task_struct *father)
1022{
1023 clear_tsk_thread_flag(tsk, TIF_DS_AREA_MSR);
1024 tsk->thread.ds_ctx = NULL;
1025}
1026
1027void ds_exit_thread(struct task_struct *tsk)
881{ 1028{
882 /* This is called when the task owning the parameter context 1029 WARN_ON(tsk->thread.ds_ctx);
883 * is dying. There should not be any user of that context left
884 * to disturb us, anymore. */
885 unsigned long leftovers = context->count;
886 while (leftovers--)
887 ds_put_context(context);
888} 1030}
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 5962176dfabb..6b1f6f6f8661 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -30,6 +30,37 @@ void printk_address(unsigned long address, int reliable)
30 reliable ? "" : "? ", (void *) address); 30 reliable ? "" : "? ", (void *) address);
31} 31}
32 32
33#ifdef CONFIG_FUNCTION_GRAPH_TRACER
34static void
35print_ftrace_graph_addr(unsigned long addr, void *data,
36 const struct stacktrace_ops *ops,
37 struct thread_info *tinfo, int *graph)
38{
39 struct task_struct *task = tinfo->task;
40 unsigned long ret_addr;
41 int index = task->curr_ret_stack;
42
43 if (addr != (unsigned long)return_to_handler)
44 return;
45
46 if (!task->ret_stack || index < *graph)
47 return;
48
49 index -= *graph;
50 ret_addr = task->ret_stack[index].ret;
51
52 ops->address(data, ret_addr, 1);
53
54 (*graph)++;
55}
56#else
57static inline void
58print_ftrace_graph_addr(unsigned long addr, void *data,
59 const struct stacktrace_ops *ops,
60 struct thread_info *tinfo, int *graph)
61{ }
62#endif
63
33/* 64/*
34 * x86-64 can have up to three kernel stacks: 65 * x86-64 can have up to three kernel stacks:
35 * process stack 66 * process stack
@@ -54,7 +85,7 @@ unsigned long
54print_context_stack(struct thread_info *tinfo, 85print_context_stack(struct thread_info *tinfo,
55 unsigned long *stack, unsigned long bp, 86 unsigned long *stack, unsigned long bp,
56 const struct stacktrace_ops *ops, void *data, 87 const struct stacktrace_ops *ops, void *data,
57 unsigned long *end) 88 unsigned long *end, int *graph)
58{ 89{
59 struct stack_frame *frame = (struct stack_frame *)bp; 90 struct stack_frame *frame = (struct stack_frame *)bp;
60 91
@@ -70,6 +101,7 @@ print_context_stack(struct thread_info *tinfo,
70 } else { 101 } else {
71 ops->address(data, addr, bp == 0); 102 ops->address(data, addr, bp == 0);
72 } 103 }
104 print_ftrace_graph_addr(addr, data, ops, tinfo, graph);
73 } 105 }
74 stack++; 106 stack++;
75 } 107 }
diff --git a/arch/x86/kernel/dumpstack.h b/arch/x86/kernel/dumpstack.h
index 3119a801c32b..da87590b8698 100644
--- a/arch/x86/kernel/dumpstack.h
+++ b/arch/x86/kernel/dumpstack.h
@@ -18,7 +18,7 @@ extern unsigned long
18print_context_stack(struct thread_info *tinfo, 18print_context_stack(struct thread_info *tinfo,
19 unsigned long *stack, unsigned long bp, 19 unsigned long *stack, unsigned long bp,
20 const struct stacktrace_ops *ops, void *data, 20 const struct stacktrace_ops *ops, void *data,
21 unsigned long *end); 21 unsigned long *end, int *graph);
22 22
23extern void 23extern void
24show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, 24show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index 7b031b106ec8..d593cd1f58dc 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -23,6 +23,8 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
23 unsigned long *stack, unsigned long bp, 23 unsigned long *stack, unsigned long bp,
24 const struct stacktrace_ops *ops, void *data) 24 const struct stacktrace_ops *ops, void *data)
25{ 25{
26 int graph = 0;
27
26 if (!task) 28 if (!task)
27 task = current; 29 task = current;
28 30
@@ -50,7 +52,8 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
50 52
51 context = (struct thread_info *) 53 context = (struct thread_info *)
52 ((unsigned long)stack & (~(THREAD_SIZE - 1))); 54 ((unsigned long)stack & (~(THREAD_SIZE - 1)));
53 bp = print_context_stack(context, stack, bp, ops, data, NULL); 55 bp = print_context_stack(context, stack, bp, ops,
56 data, NULL, &graph);
54 57
55 stack = (unsigned long *)context->previous_esp; 58 stack = (unsigned long *)context->previous_esp;
56 if (!stack) 59 if (!stack)
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 33ff10287a5d..c302d0707048 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -109,6 +109,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
109 unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr; 109 unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr;
110 unsigned used = 0; 110 unsigned used = 0;
111 struct thread_info *tinfo; 111 struct thread_info *tinfo;
112 int graph = 0;
112 113
113 if (!task) 114 if (!task)
114 task = current; 115 task = current;
@@ -149,7 +150,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
149 break; 150 break;
150 151
151 bp = print_context_stack(tinfo, stack, bp, ops, 152 bp = print_context_stack(tinfo, stack, bp, ops,
152 data, estack_end); 153 data, estack_end, &graph);
153 ops->stack(data, "<EOE>"); 154 ops->stack(data, "<EOE>");
154 /* 155 /*
155 * We link to the next stack via the 156 * We link to the next stack via the
@@ -168,7 +169,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
168 if (ops->stack(data, "IRQ") < 0) 169 if (ops->stack(data, "IRQ") < 0)
169 break; 170 break;
170 bp = print_context_stack(tinfo, stack, bp, 171 bp = print_context_stack(tinfo, stack, bp,
171 ops, data, irqstack_end); 172 ops, data, irqstack_end, &graph);
172 /* 173 /*
173 * We link to the next stack (which would be 174 * We link to the next stack (which would be
174 * the process stack normally) the last 175 * the process stack normally) the last
@@ -186,7 +187,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
186 /* 187 /*
187 * This handles the process stack: 188 * This handles the process stack:
188 */ 189 */
189 bp = print_context_stack(tinfo, stack, bp, ops, data, NULL); 190 bp = print_context_stack(tinfo, stack, bp, ops, data, NULL, &graph);
190 put_cpu(); 191 put_cpu();
191} 192}
192EXPORT_SYMBOL(dump_trace); 193EXPORT_SYMBOL(dump_trace);
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index fe7014176eb0..d6f0490a7391 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -954,6 +954,9 @@ ENTRY(mcount)
954END(mcount) 954END(mcount)
955 955
956ENTRY(ftrace_caller) 956ENTRY(ftrace_caller)
957 cmpl $0, function_trace_stop
958 jne ftrace_stub
959
957 pushl %eax 960 pushl %eax
958 pushl %ecx 961 pushl %ecx
959 pushl %edx 962 pushl %edx
@@ -968,6 +971,11 @@ ftrace_call:
968 popl %edx 971 popl %edx
969 popl %ecx 972 popl %ecx
970 popl %eax 973 popl %eax
974#ifdef CONFIG_FUNCTION_GRAPH_TRACER
975.globl ftrace_graph_call
976ftrace_graph_call:
977 jmp ftrace_stub
978#endif
971 979
972.globl ftrace_stub 980.globl ftrace_stub
973ftrace_stub: 981ftrace_stub:
@@ -977,8 +985,18 @@ END(ftrace_caller)
977#else /* ! CONFIG_DYNAMIC_FTRACE */ 985#else /* ! CONFIG_DYNAMIC_FTRACE */
978 986
979ENTRY(mcount) 987ENTRY(mcount)
988 cmpl $0, function_trace_stop
989 jne ftrace_stub
990
980 cmpl $ftrace_stub, ftrace_trace_function 991 cmpl $ftrace_stub, ftrace_trace_function
981 jnz trace 992 jnz trace
993#ifdef CONFIG_FUNCTION_GRAPH_TRACER
994 cmpl $ftrace_stub, ftrace_graph_return
995 jnz ftrace_graph_caller
996
997 cmpl $ftrace_graph_entry_stub, ftrace_graph_entry
998 jnz ftrace_graph_caller
999#endif
982.globl ftrace_stub 1000.globl ftrace_stub
983ftrace_stub: 1001ftrace_stub:
984 ret 1002 ret
@@ -997,12 +1015,43 @@ trace:
997 popl %edx 1015 popl %edx
998 popl %ecx 1016 popl %ecx
999 popl %eax 1017 popl %eax
1000
1001 jmp ftrace_stub 1018 jmp ftrace_stub
1002END(mcount) 1019END(mcount)
1003#endif /* CONFIG_DYNAMIC_FTRACE */ 1020#endif /* CONFIG_DYNAMIC_FTRACE */
1004#endif /* CONFIG_FUNCTION_TRACER */ 1021#endif /* CONFIG_FUNCTION_TRACER */
1005 1022
1023#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1024ENTRY(ftrace_graph_caller)
1025 cmpl $0, function_trace_stop
1026 jne ftrace_stub
1027
1028 pushl %eax
1029 pushl %ecx
1030 pushl %edx
1031 movl 0xc(%esp), %edx
1032 lea 0x4(%ebp), %eax
1033 subl $MCOUNT_INSN_SIZE, %edx
1034 call prepare_ftrace_return
1035 popl %edx
1036 popl %ecx
1037 popl %eax
1038 ret
1039END(ftrace_graph_caller)
1040
1041.globl return_to_handler
1042return_to_handler:
1043 pushl $0
1044 pushl %eax
1045 pushl %ecx
1046 pushl %edx
1047 call ftrace_return_to_handler
1048 movl %eax, 0xc(%esp)
1049 popl %edx
1050 popl %ecx
1051 popl %eax
1052 ret
1053#endif
1054
1006.section .rodata,"a" 1055.section .rodata,"a"
1007#include "syscall_table_32.S" 1056#include "syscall_table_32.S"
1008 1057
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 3194636a4293..e28c7a987793 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -67,16 +67,10 @@ ENTRY(mcount)
67END(mcount) 67END(mcount)
68 68
69ENTRY(ftrace_caller) 69ENTRY(ftrace_caller)
70 cmpl $0, function_trace_stop
71 jne ftrace_stub
70 72
71 /* taken from glibc */ 73 MCOUNT_SAVE_FRAME
72 subq $0x38, %rsp
73 movq %rax, (%rsp)
74 movq %rcx, 8(%rsp)
75 movq %rdx, 16(%rsp)
76 movq %rsi, 24(%rsp)
77 movq %rdi, 32(%rsp)
78 movq %r8, 40(%rsp)
79 movq %r9, 48(%rsp)
80 74
81 movq 0x38(%rsp), %rdi 75 movq 0x38(%rsp), %rdi
82 movq 8(%rbp), %rsi 76 movq 8(%rbp), %rsi
@@ -86,14 +80,13 @@ ENTRY(ftrace_caller)
86ftrace_call: 80ftrace_call:
87 call ftrace_stub 81 call ftrace_stub
88 82
89 movq 48(%rsp), %r9 83 MCOUNT_RESTORE_FRAME
90 movq 40(%rsp), %r8 84
91 movq 32(%rsp), %rdi 85#ifdef CONFIG_FUNCTION_GRAPH_TRACER
92 movq 24(%rsp), %rsi 86.globl ftrace_graph_call
93 movq 16(%rsp), %rdx 87ftrace_graph_call:
94 movq 8(%rsp), %rcx 88 jmp ftrace_stub
95 movq (%rsp), %rax 89#endif
96 addq $0x38, %rsp
97 90
98.globl ftrace_stub 91.globl ftrace_stub
99ftrace_stub: 92ftrace_stub:
@@ -102,15 +95,63 @@ END(ftrace_caller)
102 95
103#else /* ! CONFIG_DYNAMIC_FTRACE */ 96#else /* ! CONFIG_DYNAMIC_FTRACE */
104ENTRY(mcount) 97ENTRY(mcount)
98 cmpl $0, function_trace_stop
99 jne ftrace_stub
100
105 cmpq $ftrace_stub, ftrace_trace_function 101 cmpq $ftrace_stub, ftrace_trace_function
106 jnz trace 102 jnz trace
103
104#ifdef CONFIG_FUNCTION_GRAPH_TRACER
105 cmpq $ftrace_stub, ftrace_graph_return
106 jnz ftrace_graph_caller
107
108 cmpq $ftrace_graph_entry_stub, ftrace_graph_entry
109 jnz ftrace_graph_caller
110#endif
111
107.globl ftrace_stub 112.globl ftrace_stub
108ftrace_stub: 113ftrace_stub:
109 retq 114 retq
110 115
111trace: 116trace:
112 /* taken from glibc */ 117 MCOUNT_SAVE_FRAME
113 subq $0x38, %rsp 118
119 movq 0x38(%rsp), %rdi
120 movq 8(%rbp), %rsi
121 subq $MCOUNT_INSN_SIZE, %rdi
122
123 call *ftrace_trace_function
124
125 MCOUNT_RESTORE_FRAME
126
127 jmp ftrace_stub
128END(mcount)
129#endif /* CONFIG_DYNAMIC_FTRACE */
130#endif /* CONFIG_FUNCTION_TRACER */
131
132#ifdef CONFIG_FUNCTION_GRAPH_TRACER
133ENTRY(ftrace_graph_caller)
134 cmpl $0, function_trace_stop
135 jne ftrace_stub
136
137 MCOUNT_SAVE_FRAME
138
139 leaq 8(%rbp), %rdi
140 movq 0x38(%rsp), %rsi
141 subq $MCOUNT_INSN_SIZE, %rsi
142
143 call prepare_ftrace_return
144
145 MCOUNT_RESTORE_FRAME
146
147 retq
148END(ftrace_graph_caller)
149
150
151.globl return_to_handler
152return_to_handler:
153 subq $80, %rsp
154
114 movq %rax, (%rsp) 155 movq %rax, (%rsp)
115 movq %rcx, 8(%rsp) 156 movq %rcx, 8(%rsp)
116 movq %rdx, 16(%rsp) 157 movq %rdx, 16(%rsp)
@@ -118,13 +159,14 @@ trace:
118 movq %rdi, 32(%rsp) 159 movq %rdi, 32(%rsp)
119 movq %r8, 40(%rsp) 160 movq %r8, 40(%rsp)
120 movq %r9, 48(%rsp) 161 movq %r9, 48(%rsp)
162 movq %r10, 56(%rsp)
163 movq %r11, 64(%rsp)
121 164
122 movq 0x38(%rsp), %rdi 165 call ftrace_return_to_handler
123 movq 8(%rbp), %rsi
124 subq $MCOUNT_INSN_SIZE, %rdi
125
126 call *ftrace_trace_function
127 166
167 movq %rax, 72(%rsp)
168 movq 64(%rsp), %r11
169 movq 56(%rsp), %r10
128 movq 48(%rsp), %r9 170 movq 48(%rsp), %r9
129 movq 40(%rsp), %r8 171 movq 40(%rsp), %r8
130 movq 32(%rsp), %rdi 172 movq 32(%rsp), %rdi
@@ -132,12 +174,10 @@ trace:
132 movq 16(%rsp), %rdx 174 movq 16(%rsp), %rdx
133 movq 8(%rsp), %rcx 175 movq 8(%rsp), %rcx
134 movq (%rsp), %rax 176 movq (%rsp), %rax
135 addq $0x38, %rsp 177 addq $72, %rsp
178 retq
179#endif
136 180
137 jmp ftrace_stub
138END(mcount)
139#endif /* CONFIG_DYNAMIC_FTRACE */
140#endif /* CONFIG_FUNCTION_TRACER */
141 181
142#ifndef CONFIG_PREEMPT 182#ifndef CONFIG_PREEMPT
143#define retint_kernel retint_restore_args 183#define retint_kernel retint_restore_args
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 50ea0ac8c9bf..1b43086b097a 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -14,14 +14,17 @@
14#include <linux/uaccess.h> 14#include <linux/uaccess.h>
15#include <linux/ftrace.h> 15#include <linux/ftrace.h>
16#include <linux/percpu.h> 16#include <linux/percpu.h>
17#include <linux/sched.h>
17#include <linux/init.h> 18#include <linux/init.h>
18#include <linux/list.h> 19#include <linux/list.h>
19 20
20#include <asm/ftrace.h> 21#include <asm/ftrace.h>
22#include <linux/ftrace.h>
21#include <asm/nops.h> 23#include <asm/nops.h>
24#include <asm/nmi.h>
22 25
23 26
24static unsigned char ftrace_nop[MCOUNT_INSN_SIZE]; 27#ifdef CONFIG_DYNAMIC_FTRACE
25 28
26union ftrace_code_union { 29union ftrace_code_union {
27 char code[MCOUNT_INSN_SIZE]; 30 char code[MCOUNT_INSN_SIZE];
@@ -31,18 +34,12 @@ union ftrace_code_union {
31 } __attribute__((packed)); 34 } __attribute__((packed));
32}; 35};
33 36
34
35static int ftrace_calc_offset(long ip, long addr) 37static int ftrace_calc_offset(long ip, long addr)
36{ 38{
37 return (int)(addr - ip); 39 return (int)(addr - ip);
38} 40}
39 41
40unsigned char *ftrace_nop_replace(void) 42static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
41{
42 return ftrace_nop;
43}
44
45unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
46{ 43{
47 static union ftrace_code_union calc; 44 static union ftrace_code_union calc;
48 45
@@ -56,7 +53,142 @@ unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
56 return calc.code; 53 return calc.code;
57} 54}
58 55
59int 56/*
57 * Modifying code must take extra care. On an SMP machine, if
58 * the code being modified is also being executed on another CPU
59 * that CPU will have undefined results and possibly take a GPF.
60 * We use kstop_machine to stop other CPUS from exectuing code.
61 * But this does not stop NMIs from happening. We still need
62 * to protect against that. We separate out the modification of
63 * the code to take care of this.
64 *
65 * Two buffers are added: An IP buffer and a "code" buffer.
66 *
67 * 1) Put the instruction pointer into the IP buffer
68 * and the new code into the "code" buffer.
69 * 2) Set a flag that says we are modifying code
70 * 3) Wait for any running NMIs to finish.
71 * 4) Write the code
72 * 5) clear the flag.
73 * 6) Wait for any running NMIs to finish.
74 *
75 * If an NMI is executed, the first thing it does is to call
76 * "ftrace_nmi_enter". This will check if the flag is set to write
77 * and if it is, it will write what is in the IP and "code" buffers.
78 *
79 * The trick is, it does not matter if everyone is writing the same
80 * content to the code location. Also, if a CPU is executing code
81 * it is OK to write to that code location if the contents being written
82 * are the same as what exists.
83 */
84
85static atomic_t in_nmi = ATOMIC_INIT(0);
86static int mod_code_status; /* holds return value of text write */
87static int mod_code_write; /* set when NMI should do the write */
88static void *mod_code_ip; /* holds the IP to write to */
89static void *mod_code_newcode; /* holds the text to write to the IP */
90
91static unsigned nmi_wait_count;
92static atomic_t nmi_update_count = ATOMIC_INIT(0);
93
94int ftrace_arch_read_dyn_info(char *buf, int size)
95{
96 int r;
97
98 r = snprintf(buf, size, "%u %u",
99 nmi_wait_count,
100 atomic_read(&nmi_update_count));
101 return r;
102}
103
104static void ftrace_mod_code(void)
105{
106 /*
107 * Yes, more than one CPU process can be writing to mod_code_status.
108 * (and the code itself)
109 * But if one were to fail, then they all should, and if one were
110 * to succeed, then they all should.
111 */
112 mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode,
113 MCOUNT_INSN_SIZE);
114}
115
116void ftrace_nmi_enter(void)
117{
118 atomic_inc(&in_nmi);
119 /* Must have in_nmi seen before reading write flag */
120 smp_mb();
121 if (mod_code_write) {
122 ftrace_mod_code();
123 atomic_inc(&nmi_update_count);
124 }
125}
126
127void ftrace_nmi_exit(void)
128{
129 /* Finish all executions before clearing in_nmi */
130 smp_wmb();
131 atomic_dec(&in_nmi);
132}
133
134static void wait_for_nmi(void)
135{
136 int waited = 0;
137
138 while (atomic_read(&in_nmi)) {
139 waited = 1;
140 cpu_relax();
141 }
142
143 if (waited)
144 nmi_wait_count++;
145}
146
147static int
148do_ftrace_mod_code(unsigned long ip, void *new_code)
149{
150 mod_code_ip = (void *)ip;
151 mod_code_newcode = new_code;
152
153 /* The buffers need to be visible before we let NMIs write them */
154 smp_wmb();
155
156 mod_code_write = 1;
157
158 /* Make sure write bit is visible before we wait on NMIs */
159 smp_mb();
160
161 wait_for_nmi();
162
163 /* Make sure all running NMIs have finished before we write the code */
164 smp_mb();
165
166 ftrace_mod_code();
167
168 /* Make sure the write happens before clearing the bit */
169 smp_wmb();
170
171 mod_code_write = 0;
172
173 /* make sure NMIs see the cleared bit */
174 smp_mb();
175
176 wait_for_nmi();
177
178 return mod_code_status;
179}
180
181
182
183
184static unsigned char ftrace_nop[MCOUNT_INSN_SIZE];
185
186static unsigned char *ftrace_nop_replace(void)
187{
188 return ftrace_nop;
189}
190
191static int
60ftrace_modify_code(unsigned long ip, unsigned char *old_code, 192ftrace_modify_code(unsigned long ip, unsigned char *old_code,
61 unsigned char *new_code) 193 unsigned char *new_code)
62{ 194{
@@ -81,7 +213,7 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code,
81 return -EINVAL; 213 return -EINVAL;
82 214
83 /* replace the text with the new text */ 215 /* replace the text with the new text */
84 if (probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE)) 216 if (do_ftrace_mod_code(ip, new_code))
85 return -EPERM; 217 return -EPERM;
86 218
87 sync_core(); 219 sync_core();
@@ -89,6 +221,29 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code,
89 return 0; 221 return 0;
90} 222}
91 223
224int ftrace_make_nop(struct module *mod,
225 struct dyn_ftrace *rec, unsigned long addr)
226{
227 unsigned char *new, *old;
228 unsigned long ip = rec->ip;
229
230 old = ftrace_call_replace(ip, addr);
231 new = ftrace_nop_replace();
232
233 return ftrace_modify_code(rec->ip, old, new);
234}
235
236int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
237{
238 unsigned char *new, *old;
239 unsigned long ip = rec->ip;
240
241 old = ftrace_nop_replace();
242 new = ftrace_call_replace(ip, addr);
243
244 return ftrace_modify_code(rec->ip, old, new);
245}
246
92int ftrace_update_ftrace_func(ftrace_func_t func) 247int ftrace_update_ftrace_func(ftrace_func_t func)
93{ 248{
94 unsigned long ip = (unsigned long)(&ftrace_call); 249 unsigned long ip = (unsigned long)(&ftrace_call);
@@ -165,3 +320,218 @@ int __init ftrace_dyn_arch_init(void *data)
165 320
166 return 0; 321 return 0;
167} 322}
323#endif
324
325#ifdef CONFIG_FUNCTION_GRAPH_TRACER
326
327#ifdef CONFIG_DYNAMIC_FTRACE
328extern void ftrace_graph_call(void);
329
330static int ftrace_mod_jmp(unsigned long ip,
331 int old_offset, int new_offset)
332{
333 unsigned char code[MCOUNT_INSN_SIZE];
334
335 if (probe_kernel_read(code, (void *)ip, MCOUNT_INSN_SIZE))
336 return -EFAULT;
337
338 if (code[0] != 0xe9 || old_offset != *(int *)(&code[1]))
339 return -EINVAL;
340
341 *(int *)(&code[1]) = new_offset;
342
343 if (do_ftrace_mod_code(ip, &code))
344 return -EPERM;
345
346 return 0;
347}
348
349int ftrace_enable_ftrace_graph_caller(void)
350{
351 unsigned long ip = (unsigned long)(&ftrace_graph_call);
352 int old_offset, new_offset;
353
354 old_offset = (unsigned long)(&ftrace_stub) - (ip + MCOUNT_INSN_SIZE);
355 new_offset = (unsigned long)(&ftrace_graph_caller) - (ip + MCOUNT_INSN_SIZE);
356
357 return ftrace_mod_jmp(ip, old_offset, new_offset);
358}
359
360int ftrace_disable_ftrace_graph_caller(void)
361{
362 unsigned long ip = (unsigned long)(&ftrace_graph_call);
363 int old_offset, new_offset;
364
365 old_offset = (unsigned long)(&ftrace_graph_caller) - (ip + MCOUNT_INSN_SIZE);
366 new_offset = (unsigned long)(&ftrace_stub) - (ip + MCOUNT_INSN_SIZE);
367
368 return ftrace_mod_jmp(ip, old_offset, new_offset);
369}
370
371#else /* CONFIG_DYNAMIC_FTRACE */
372
373/*
374 * These functions are picked from those used on
375 * this page for dynamic ftrace. They have been
376 * simplified to ignore all traces in NMI context.
377 */
378static atomic_t in_nmi;
379
380void ftrace_nmi_enter(void)
381{
382 atomic_inc(&in_nmi);
383}
384
385void ftrace_nmi_exit(void)
386{
387 atomic_dec(&in_nmi);
388}
389
390#endif /* !CONFIG_DYNAMIC_FTRACE */
391
392/* Add a function return address to the trace stack on thread info.*/
393static int push_return_trace(unsigned long ret, unsigned long long time,
394 unsigned long func, int *depth)
395{
396 int index;
397
398 if (!current->ret_stack)
399 return -EBUSY;
400
401 /* The return trace stack is full */
402 if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) {
403 atomic_inc(&current->trace_overrun);
404 return -EBUSY;
405 }
406
407 index = ++current->curr_ret_stack;
408 barrier();
409 current->ret_stack[index].ret = ret;
410 current->ret_stack[index].func = func;
411 current->ret_stack[index].calltime = time;
412 *depth = index;
413
414 return 0;
415}
416
417/* Retrieve a function return address to the trace stack on thread info.*/
418static void pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret)
419{
420 int index;
421
422 index = current->curr_ret_stack;
423
424 if (unlikely(index < 0)) {
425 ftrace_graph_stop();
426 WARN_ON(1);
427 /* Might as well panic, otherwise we have no where to go */
428 *ret = (unsigned long)panic;
429 return;
430 }
431
432 *ret = current->ret_stack[index].ret;
433 trace->func = current->ret_stack[index].func;
434 trace->calltime = current->ret_stack[index].calltime;
435 trace->overrun = atomic_read(&current->trace_overrun);
436 trace->depth = index;
437 barrier();
438 current->curr_ret_stack--;
439
440}
441
442/*
443 * Send the trace to the ring-buffer.
444 * @return the original return address.
445 */
446unsigned long ftrace_return_to_handler(void)
447{
448 struct ftrace_graph_ret trace;
449 unsigned long ret;
450
451 pop_return_trace(&trace, &ret);
452 trace.rettime = cpu_clock(raw_smp_processor_id());
453 ftrace_graph_return(&trace);
454
455 if (unlikely(!ret)) {
456 ftrace_graph_stop();
457 WARN_ON(1);
458 /* Might as well panic. What else to do? */
459 ret = (unsigned long)panic;
460 }
461
462 return ret;
463}
464
465/*
466 * Hook the return address and push it in the stack of return addrs
467 * in current thread info.
468 */
469void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
470{
471 unsigned long old;
472 unsigned long long calltime;
473 int faulted;
474 struct ftrace_graph_ent trace;
475 unsigned long return_hooker = (unsigned long)
476 &return_to_handler;
477
478 /* Nmi's are currently unsupported */
479 if (unlikely(atomic_read(&in_nmi)))
480 return;
481
482 if (unlikely(atomic_read(&current->tracing_graph_pause)))
483 return;
484
485 /*
486 * Protect against fault, even if it shouldn't
487 * happen. This tool is too much intrusive to
488 * ignore such a protection.
489 */
490 asm volatile(
491 "1: " _ASM_MOV " (%[parent_old]), %[old]\n"
492 "2: " _ASM_MOV " %[return_hooker], (%[parent_replaced])\n"
493 " movl $0, %[faulted]\n"
494
495 ".section .fixup, \"ax\"\n"
496 "3: movl $1, %[faulted]\n"
497 ".previous\n"
498
499 _ASM_EXTABLE(1b, 3b)
500 _ASM_EXTABLE(2b, 3b)
501
502 : [parent_replaced] "=r" (parent), [old] "=r" (old),
503 [faulted] "=r" (faulted)
504 : [parent_old] "0" (parent), [return_hooker] "r" (return_hooker)
505 : "memory"
506 );
507
508 if (unlikely(faulted)) {
509 ftrace_graph_stop();
510 WARN_ON(1);
511 return;
512 }
513
514 if (unlikely(!__kernel_text_address(old))) {
515 ftrace_graph_stop();
516 *parent = old;
517 WARN_ON(1);
518 return;
519 }
520
521 calltime = cpu_clock(raw_smp_processor_id());
522
523 if (push_return_trace(old, calltime,
524 self_addr, &trace.depth) == -EBUSY) {
525 *parent = old;
526 return;
527 }
528
529 trace.func = self_addr;
530
531 /* Only trace if the calling function expects to */
532 if (!ftrace_graph_entry(&trace)) {
533 current->curr_ret_stack--;
534 *parent = old;
535 }
536}
537#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 1d3d0e71b044..1df869e5bd0b 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -13,6 +13,7 @@
13#include <linux/seq_file.h> 13#include <linux/seq_file.h>
14#include <linux/module.h> 14#include <linux/module.h>
15#include <linux/delay.h> 15#include <linux/delay.h>
16#include <linux/ftrace.h>
16#include <asm/uaccess.h> 17#include <asm/uaccess.h>
17#include <asm/io_apic.h> 18#include <asm/io_apic.h>
18#include <asm/idle.h> 19#include <asm/idle.h>
@@ -45,7 +46,7 @@ static inline void stack_overflow_check(struct pt_regs *regs)
45 * SMP cross-CPU interrupts have their own specific 46 * SMP cross-CPU interrupts have their own specific
46 * handlers). 47 * handlers).
47 */ 48 */
48asmlinkage unsigned int do_IRQ(struct pt_regs *regs) 49asmlinkage unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
49{ 50{
50 struct pt_regs *old_regs = set_irq_regs(regs); 51 struct pt_regs *old_regs = set_irq_regs(regs);
51 struct irq_desc *desc; 52 struct irq_desc *desc;
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index b8f3e9dbabd7..e68bb9e30864 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -8,6 +8,7 @@
8#include <linux/module.h> 8#include <linux/module.h>
9#include <linux/pm.h> 9#include <linux/pm.h>
10#include <linux/clockchips.h> 10#include <linux/clockchips.h>
11#include <linux/ftrace.h>
11#include <asm/system.h> 12#include <asm/system.h>
12#include <asm/apic.h> 13#include <asm/apic.h>
13 14
@@ -102,6 +103,9 @@ static inline int hlt_use_halt(void)
102void default_idle(void) 103void default_idle(void)
103{ 104{
104 if (hlt_use_halt()) { 105 if (hlt_use_halt()) {
106 struct power_trace it;
107
108 trace_power_start(&it, POWER_CSTATE, 1);
105 current_thread_info()->status &= ~TS_POLLING; 109 current_thread_info()->status &= ~TS_POLLING;
106 /* 110 /*
107 * TS_POLLING-cleared state must be visible before we 111 * TS_POLLING-cleared state must be visible before we
@@ -114,6 +118,7 @@ void default_idle(void)
114 else 118 else
115 local_irq_enable(); 119 local_irq_enable();
116 current_thread_info()->status |= TS_POLLING; 120 current_thread_info()->status |= TS_POLLING;
121 trace_power_end(&it);
117 } else { 122 } else {
118 local_irq_enable(); 123 local_irq_enable();
119 /* loop is done by the caller */ 124 /* loop is done by the caller */
@@ -171,24 +176,31 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait);
171 */ 176 */
172void mwait_idle_with_hints(unsigned long ax, unsigned long cx) 177void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
173{ 178{
179 struct power_trace it;
180
181 trace_power_start(&it, POWER_CSTATE, (ax>>4)+1);
174 if (!need_resched()) { 182 if (!need_resched()) {
175 __monitor((void *)&current_thread_info()->flags, 0, 0); 183 __monitor((void *)&current_thread_info()->flags, 0, 0);
176 smp_mb(); 184 smp_mb();
177 if (!need_resched()) 185 if (!need_resched())
178 __mwait(ax, cx); 186 __mwait(ax, cx);
179 } 187 }
188 trace_power_end(&it);
180} 189}
181 190
182/* Default MONITOR/MWAIT with no hints, used for default C1 state */ 191/* Default MONITOR/MWAIT with no hints, used for default C1 state */
183static void mwait_idle(void) 192static void mwait_idle(void)
184{ 193{
194 struct power_trace it;
185 if (!need_resched()) { 195 if (!need_resched()) {
196 trace_power_start(&it, POWER_CSTATE, 1);
186 __monitor((void *)&current_thread_info()->flags, 0, 0); 197 __monitor((void *)&current_thread_info()->flags, 0, 0);
187 smp_mb(); 198 smp_mb();
188 if (!need_resched()) 199 if (!need_resched())
189 __sti_mwait(0, 0); 200 __sti_mwait(0, 0);
190 else 201 else
191 local_irq_enable(); 202 local_irq_enable();
203 trace_power_end(&it);
192 } else 204 } else
193 local_irq_enable(); 205 local_irq_enable();
194} 206}
@@ -200,9 +212,13 @@ static void mwait_idle(void)
200 */ 212 */
201static void poll_idle(void) 213static void poll_idle(void)
202{ 214{
215 struct power_trace it;
216
217 trace_power_start(&it, POWER_CSTATE, 0);
203 local_irq_enable(); 218 local_irq_enable();
204 while (!need_resched()) 219 while (!need_resched())
205 cpu_relax(); 220 cpu_relax();
221 trace_power_end(&it);
206} 222}
207 223
208/* 224/*
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 0a1302fe6d45..3ba155d24884 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -38,6 +38,7 @@
38#include <linux/percpu.h> 38#include <linux/percpu.h>
39#include <linux/prctl.h> 39#include <linux/prctl.h>
40#include <linux/dmi.h> 40#include <linux/dmi.h>
41#include <linux/ftrace.h>
41 42
42#include <asm/uaccess.h> 43#include <asm/uaccess.h>
43#include <asm/pgtable.h> 44#include <asm/pgtable.h>
@@ -59,6 +60,7 @@
59#include <asm/idle.h> 60#include <asm/idle.h>
60#include <asm/syscalls.h> 61#include <asm/syscalls.h>
61#include <asm/smp.h> 62#include <asm/smp.h>
63#include <asm/ds.h>
62 64
63asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); 65asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
64 66
@@ -250,14 +252,8 @@ void exit_thread(void)
250 tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET; 252 tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
251 put_cpu(); 253 put_cpu();
252 } 254 }
253#ifdef CONFIG_X86_DS 255
254 /* Free any DS contexts that have not been properly released. */ 256 ds_exit_thread(current);
255 if (unlikely(current->thread.ds_ctx)) {
256 /* we clear debugctl to make sure DS is not used. */
257 update_debugctlmsr(0);
258 ds_free(current->thread.ds_ctx);
259 }
260#endif /* CONFIG_X86_DS */
261} 257}
262 258
263void flush_thread(void) 259void flush_thread(void)
@@ -339,6 +335,12 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
339 kfree(p->thread.io_bitmap_ptr); 335 kfree(p->thread.io_bitmap_ptr);
340 p->thread.io_bitmap_max = 0; 336 p->thread.io_bitmap_max = 0;
341 } 337 }
338
339 ds_copy_thread(p, current);
340
341 clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR);
342 p->thread.debugctlmsr = 0;
343
342 return err; 344 return err;
343} 345}
344 346
@@ -419,48 +421,19 @@ int set_tsc_mode(unsigned int val)
419 return 0; 421 return 0;
420} 422}
421 423
422#ifdef CONFIG_X86_DS
423static int update_debugctl(struct thread_struct *prev,
424 struct thread_struct *next, unsigned long debugctl)
425{
426 unsigned long ds_prev = 0;
427 unsigned long ds_next = 0;
428
429 if (prev->ds_ctx)
430 ds_prev = (unsigned long)prev->ds_ctx->ds;
431 if (next->ds_ctx)
432 ds_next = (unsigned long)next->ds_ctx->ds;
433
434 if (ds_next != ds_prev) {
435 /* we clear debugctl to make sure DS
436 * is not in use when we change it */
437 debugctl = 0;
438 update_debugctlmsr(0);
439 wrmsr(MSR_IA32_DS_AREA, ds_next, 0);
440 }
441 return debugctl;
442}
443#else
444static int update_debugctl(struct thread_struct *prev,
445 struct thread_struct *next, unsigned long debugctl)
446{
447 return debugctl;
448}
449#endif /* CONFIG_X86_DS */
450
451static noinline void 424static noinline void
452__switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, 425__switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
453 struct tss_struct *tss) 426 struct tss_struct *tss)
454{ 427{
455 struct thread_struct *prev, *next; 428 struct thread_struct *prev, *next;
456 unsigned long debugctl;
457 429
458 prev = &prev_p->thread; 430 prev = &prev_p->thread;
459 next = &next_p->thread; 431 next = &next_p->thread;
460 432
461 debugctl = update_debugctl(prev, next, prev->debugctlmsr); 433 if (test_tsk_thread_flag(next_p, TIF_DS_AREA_MSR) ||
462 434 test_tsk_thread_flag(prev_p, TIF_DS_AREA_MSR))
463 if (next->debugctlmsr != debugctl) 435 ds_switch_to(prev_p, next_p);
436 else if (next->debugctlmsr != prev->debugctlmsr)
464 update_debugctlmsr(next->debugctlmsr); 437 update_debugctlmsr(next->debugctlmsr);
465 438
466 if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { 439 if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
@@ -482,15 +455,6 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
482 hard_enable_TSC(); 455 hard_enable_TSC();
483 } 456 }
484 457
485#ifdef CONFIG_X86_PTRACE_BTS
486 if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
487 ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
488
489 if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
490 ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
491#endif /* CONFIG_X86_PTRACE_BTS */
492
493
494 if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { 458 if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
495 /* 459 /*
496 * Disable the bitmap via an invalid offset. We still cache 460 * Disable the bitmap via an invalid offset. We still cache
@@ -548,7 +512,8 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
548 * the task-switch, and shows up in ret_from_fork in entry.S, 512 * the task-switch, and shows up in ret_from_fork in entry.S,
549 * for example. 513 * for example.
550 */ 514 */
551struct task_struct * __switch_to(struct task_struct *prev_p, struct task_struct *next_p) 515__notrace_funcgraph struct task_struct *
516__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
552{ 517{
553 struct thread_struct *prev = &prev_p->thread, 518 struct thread_struct *prev = &prev_p->thread,
554 *next = &next_p->thread; 519 *next = &next_p->thread;
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index c958120fb1b6..416fb9282f4f 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -39,6 +39,7 @@
39#include <linux/prctl.h> 39#include <linux/prctl.h>
40#include <linux/uaccess.h> 40#include <linux/uaccess.h>
41#include <linux/io.h> 41#include <linux/io.h>
42#include <linux/ftrace.h>
42 43
43#include <asm/pgtable.h> 44#include <asm/pgtable.h>
44#include <asm/system.h> 45#include <asm/system.h>
@@ -52,6 +53,7 @@
52#include <asm/ia32.h> 53#include <asm/ia32.h>
53#include <asm/idle.h> 54#include <asm/idle.h>
54#include <asm/syscalls.h> 55#include <asm/syscalls.h>
56#include <asm/ds.h>
55 57
56asmlinkage extern void ret_from_fork(void); 58asmlinkage extern void ret_from_fork(void);
57 59
@@ -235,14 +237,8 @@ void exit_thread(void)
235 t->io_bitmap_max = 0; 237 t->io_bitmap_max = 0;
236 put_cpu(); 238 put_cpu();
237 } 239 }
238#ifdef CONFIG_X86_DS 240
239 /* Free any DS contexts that have not been properly released. */ 241 ds_exit_thread(current);
240 if (unlikely(t->ds_ctx)) {
241 /* we clear debugctl to make sure DS is not used. */
242 update_debugctlmsr(0);
243 ds_free(t->ds_ctx);
244 }
245#endif /* CONFIG_X86_DS */
246} 242}
247 243
248void flush_thread(void) 244void flush_thread(void)
@@ -372,6 +368,12 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
372 if (err) 368 if (err)
373 goto out; 369 goto out;
374 } 370 }
371
372 ds_copy_thread(p, me);
373
374 clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR);
375 p->thread.debugctlmsr = 0;
376
375 err = 0; 377 err = 0;
376out: 378out:
377 if (err && p->thread.io_bitmap_ptr) { 379 if (err && p->thread.io_bitmap_ptr) {
@@ -470,35 +472,14 @@ static inline void __switch_to_xtra(struct task_struct *prev_p,
470 struct tss_struct *tss) 472 struct tss_struct *tss)
471{ 473{
472 struct thread_struct *prev, *next; 474 struct thread_struct *prev, *next;
473 unsigned long debugctl;
474 475
475 prev = &prev_p->thread, 476 prev = &prev_p->thread,
476 next = &next_p->thread; 477 next = &next_p->thread;
477 478
478 debugctl = prev->debugctlmsr; 479 if (test_tsk_thread_flag(next_p, TIF_DS_AREA_MSR) ||
479 480 test_tsk_thread_flag(prev_p, TIF_DS_AREA_MSR))
480#ifdef CONFIG_X86_DS 481 ds_switch_to(prev_p, next_p);
481 { 482 else if (next->debugctlmsr != prev->debugctlmsr)
482 unsigned long ds_prev = 0, ds_next = 0;
483
484 if (prev->ds_ctx)
485 ds_prev = (unsigned long)prev->ds_ctx->ds;
486 if (next->ds_ctx)
487 ds_next = (unsigned long)next->ds_ctx->ds;
488
489 if (ds_next != ds_prev) {
490 /*
491 * We clear debugctl to make sure DS
492 * is not in use when we change it:
493 */
494 debugctl = 0;
495 update_debugctlmsr(0);
496 wrmsrl(MSR_IA32_DS_AREA, ds_next);
497 }
498 }
499#endif /* CONFIG_X86_DS */
500
501 if (next->debugctlmsr != debugctl)
502 update_debugctlmsr(next->debugctlmsr); 483 update_debugctlmsr(next->debugctlmsr);
503 484
504 if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { 485 if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
@@ -533,14 +514,6 @@ static inline void __switch_to_xtra(struct task_struct *prev_p,
533 */ 514 */
534 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); 515 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
535 } 516 }
536
537#ifdef CONFIG_X86_PTRACE_BTS
538 if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
539 ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
540
541 if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
542 ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
543#endif /* CONFIG_X86_PTRACE_BTS */
544} 517}
545 518
546/* 519/*
@@ -551,8 +524,9 @@ static inline void __switch_to_xtra(struct task_struct *prev_p,
551 * - could test fs/gs bitsliced 524 * - could test fs/gs bitsliced
552 * 525 *
553 * Kprobes not supported here. Set the probe on schedule instead. 526 * Kprobes not supported here. Set the probe on schedule instead.
527 * Function graph tracer not supported too.
554 */ 528 */
555struct task_struct * 529__notrace_funcgraph struct task_struct *
556__switch_to(struct task_struct *prev_p, struct task_struct *next_p) 530__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
557{ 531{
558 struct thread_struct *prev = &prev_p->thread; 532 struct thread_struct *prev = &prev_p->thread;
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 06180dff5b2e..0a5df5f82fb9 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -581,158 +581,91 @@ static int ioperm_get(struct task_struct *target,
581} 581}
582 582
583#ifdef CONFIG_X86_PTRACE_BTS 583#ifdef CONFIG_X86_PTRACE_BTS
584/*
585 * The configuration for a particular BTS hardware implementation.
586 */
587struct bts_configuration {
588 /* the size of a BTS record in bytes; at most BTS_MAX_RECORD_SIZE */
589 unsigned char sizeof_bts;
590 /* the size of a field in the BTS record in bytes */
591 unsigned char sizeof_field;
592 /* a bitmask to enable/disable BTS in DEBUGCTL MSR */
593 unsigned long debugctl_mask;
594};
595static struct bts_configuration bts_cfg;
596
597#define BTS_MAX_RECORD_SIZE (8 * 3)
598
599
600/*
601 * Branch Trace Store (BTS) uses the following format. Different
602 * architectures vary in the size of those fields.
603 * - source linear address
604 * - destination linear address
605 * - flags
606 *
607 * Later architectures use 64bit pointers throughout, whereas earlier
608 * architectures use 32bit pointers in 32bit mode.
609 *
610 * We compute the base address for the first 8 fields based on:
611 * - the field size stored in the DS configuration
612 * - the relative field position
613 *
614 * In order to store additional information in the BTS buffer, we use
615 * a special source address to indicate that the record requires
616 * special interpretation.
617 *
618 * Netburst indicated via a bit in the flags field whether the branch
619 * was predicted; this is ignored.
620 */
621
622enum bts_field {
623 bts_from = 0,
624 bts_to,
625 bts_flags,
626
627 bts_escape = (unsigned long)-1,
628 bts_qual = bts_to,
629 bts_jiffies = bts_flags
630};
631
632static inline unsigned long bts_get(const char *base, enum bts_field field)
633{
634 base += (bts_cfg.sizeof_field * field);
635 return *(unsigned long *)base;
636}
637
638static inline void bts_set(char *base, enum bts_field field, unsigned long val)
639{
640 base += (bts_cfg.sizeof_field * field);;
641 (*(unsigned long *)base) = val;
642}
643
644/*
645 * Translate a BTS record from the raw format into the bts_struct format
646 *
647 * out (out): bts_struct interpretation
648 * raw: raw BTS record
649 */
650static void ptrace_bts_translate_record(struct bts_struct *out, const void *raw)
651{
652 memset(out, 0, sizeof(*out));
653 if (bts_get(raw, bts_from) == bts_escape) {
654 out->qualifier = bts_get(raw, bts_qual);
655 out->variant.jiffies = bts_get(raw, bts_jiffies);
656 } else {
657 out->qualifier = BTS_BRANCH;
658 out->variant.lbr.from_ip = bts_get(raw, bts_from);
659 out->variant.lbr.to_ip = bts_get(raw, bts_to);
660 }
661}
662
663static int ptrace_bts_read_record(struct task_struct *child, size_t index, 584static int ptrace_bts_read_record(struct task_struct *child, size_t index,
664 struct bts_struct __user *out) 585 struct bts_struct __user *out)
665{ 586{
666 struct bts_struct ret; 587 const struct bts_trace *trace;
667 const void *bts_record; 588 struct bts_struct bts;
668 size_t bts_index, bts_end; 589 const unsigned char *at;
669 int error; 590 int error;
670 591
671 error = ds_get_bts_end(child, &bts_end); 592 trace = ds_read_bts(child->bts);
672 if (error < 0) 593 if (!trace)
673 return error; 594 return -EPERM;
674
675 if (bts_end <= index)
676 return -EINVAL;
677 595
678 error = ds_get_bts_index(child, &bts_index); 596 at = trace->ds.top - ((index + 1) * trace->ds.size);
679 if (error < 0) 597 if ((void *)at < trace->ds.begin)
680 return error; 598 at += (trace->ds.n * trace->ds.size);
681 599
682 /* translate the ptrace bts index into the ds bts index */ 600 if (!trace->read)
683 bts_index += bts_end - (index + 1); 601 return -EOPNOTSUPP;
684 if (bts_end <= bts_index)
685 bts_index -= bts_end;
686 602
687 error = ds_access_bts(child, bts_index, &bts_record); 603 error = trace->read(child->bts, at, &bts);
688 if (error < 0) 604 if (error < 0)
689 return error; 605 return error;
690 606
691 ptrace_bts_translate_record(&ret, bts_record); 607 if (copy_to_user(out, &bts, sizeof(bts)))
692
693 if (copy_to_user(out, &ret, sizeof(ret)))
694 return -EFAULT; 608 return -EFAULT;
695 609
696 return sizeof(ret); 610 return sizeof(bts);
697} 611}
698 612
699static int ptrace_bts_drain(struct task_struct *child, 613static int ptrace_bts_drain(struct task_struct *child,
700 long size, 614 long size,
701 struct bts_struct __user *out) 615 struct bts_struct __user *out)
702{ 616{
703 struct bts_struct ret; 617 const struct bts_trace *trace;
704 const unsigned char *raw; 618 const unsigned char *at;
705 size_t end, i; 619 int error, drained = 0;
706 int error;
707 620
708 error = ds_get_bts_index(child, &end); 621 trace = ds_read_bts(child->bts);
709 if (error < 0) 622 if (!trace)
710 return error; 623 return -EPERM;
711 624
712 if (size < (end * sizeof(struct bts_struct))) 625 if (!trace->read)
626 return -EOPNOTSUPP;
627
628 if (size < (trace->ds.top - trace->ds.begin))
713 return -EIO; 629 return -EIO;
714 630
715 error = ds_access_bts(child, 0, (const void **)&raw); 631 for (at = trace->ds.begin; (void *)at < trace->ds.top;
716 if (error < 0) 632 out++, drained++, at += trace->ds.size) {
717 return error; 633 struct bts_struct bts;
634 int error;
718 635
719 for (i = 0; i < end; i++, out++, raw += bts_cfg.sizeof_bts) { 636 error = trace->read(child->bts, at, &bts);
720 ptrace_bts_translate_record(&ret, raw); 637 if (error < 0)
638 return error;
721 639
722 if (copy_to_user(out, &ret, sizeof(ret))) 640 if (copy_to_user(out, &bts, sizeof(bts)))
723 return -EFAULT; 641 return -EFAULT;
724 } 642 }
725 643
726 error = ds_clear_bts(child); 644 memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size);
645
646 error = ds_reset_bts(child->bts);
727 if (error < 0) 647 if (error < 0)
728 return error; 648 return error;
729 649
730 return end; 650 return drained;
731} 651}
732 652
733static void ptrace_bts_ovfl(struct task_struct *child) 653static int ptrace_bts_allocate_buffer(struct task_struct *child, size_t size)
734{ 654{
735 send_sig(child->thread.bts_ovfl_signal, child, 0); 655 child->bts_buffer = alloc_locked_buffer(size);
656 if (!child->bts_buffer)
657 return -ENOMEM;
658
659 child->bts_size = size;
660
661 return 0;
662}
663
664static void ptrace_bts_free_buffer(struct task_struct *child)
665{
666 free_locked_buffer(child->bts_buffer, child->bts_size);
667 child->bts_buffer = NULL;
668 child->bts_size = 0;
736} 669}
737 670
738static int ptrace_bts_config(struct task_struct *child, 671static int ptrace_bts_config(struct task_struct *child,
@@ -740,114 +673,86 @@ static int ptrace_bts_config(struct task_struct *child,
740 const struct ptrace_bts_config __user *ucfg) 673 const struct ptrace_bts_config __user *ucfg)
741{ 674{
742 struct ptrace_bts_config cfg; 675 struct ptrace_bts_config cfg;
743 int error = 0; 676 unsigned int flags = 0;
744
745 error = -EOPNOTSUPP;
746 if (!bts_cfg.sizeof_bts)
747 goto errout;
748 677
749 error = -EIO;
750 if (cfg_size < sizeof(cfg)) 678 if (cfg_size < sizeof(cfg))
751 goto errout; 679 return -EIO;
752 680
753 error = -EFAULT;
754 if (copy_from_user(&cfg, ucfg, sizeof(cfg))) 681 if (copy_from_user(&cfg, ucfg, sizeof(cfg)))
755 goto errout; 682 return -EFAULT;
756 683
757 error = -EINVAL; 684 if (child->bts) {
758 if ((cfg.flags & PTRACE_BTS_O_SIGNAL) && 685 ds_release_bts(child->bts);
759 !(cfg.flags & PTRACE_BTS_O_ALLOC)) 686 child->bts = NULL;
760 goto errout; 687 }
761 688
762 if (cfg.flags & PTRACE_BTS_O_ALLOC) { 689 if (cfg.flags & PTRACE_BTS_O_SIGNAL) {
763 ds_ovfl_callback_t ovfl = NULL; 690 if (!cfg.signal)
764 unsigned int sig = 0; 691 return -EINVAL;
765 692
766 /* we ignore the error in case we were not tracing child */ 693 return -EOPNOTSUPP;
767 (void)ds_release_bts(child);
768 694
769 if (cfg.flags & PTRACE_BTS_O_SIGNAL) { 695 child->thread.bts_ovfl_signal = cfg.signal;
770 if (!cfg.signal) 696 }
771 goto errout;
772 697
773 sig = cfg.signal; 698 if ((cfg.flags & PTRACE_BTS_O_ALLOC) &&
774 ovfl = ptrace_bts_ovfl; 699 (cfg.size != child->bts_size)) {
775 } 700 int error;
776 701
777 error = ds_request_bts(child, /* base = */ NULL, cfg.size, ovfl); 702 ptrace_bts_free_buffer(child);
778 if (error < 0)
779 goto errout;
780 703
781 child->thread.bts_ovfl_signal = sig; 704 error = ptrace_bts_allocate_buffer(child, cfg.size);
705 if (error < 0)
706 return error;
782 } 707 }
783 708
784 error = -EINVAL;
785 if (!child->thread.ds_ctx && cfg.flags)
786 goto errout;
787
788 if (cfg.flags & PTRACE_BTS_O_TRACE) 709 if (cfg.flags & PTRACE_BTS_O_TRACE)
789 child->thread.debugctlmsr |= bts_cfg.debugctl_mask; 710 flags |= BTS_USER;
790 else
791 child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask;
792 711
793 if (cfg.flags & PTRACE_BTS_O_SCHED) 712 if (cfg.flags & PTRACE_BTS_O_SCHED)
794 set_tsk_thread_flag(child, TIF_BTS_TRACE_TS); 713 flags |= BTS_TIMESTAMPS;
795 else
796 clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
797 714
798 error = sizeof(cfg); 715 child->bts = ds_request_bts(child, child->bts_buffer, child->bts_size,
716 /* ovfl = */ NULL, /* th = */ (size_t)-1,
717 flags);
718 if (IS_ERR(child->bts)) {
719 int error = PTR_ERR(child->bts);
799 720
800out: 721 ptrace_bts_free_buffer(child);
801 if (child->thread.debugctlmsr) 722 child->bts = NULL;
802 set_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
803 else
804 clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
805 723
806 return error; 724 return error;
725 }
807 726
808errout: 727 return sizeof(cfg);
809 child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask;
810 clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
811 goto out;
812} 728}
813 729
814static int ptrace_bts_status(struct task_struct *child, 730static int ptrace_bts_status(struct task_struct *child,
815 long cfg_size, 731 long cfg_size,
816 struct ptrace_bts_config __user *ucfg) 732 struct ptrace_bts_config __user *ucfg)
817{ 733{
734 const struct bts_trace *trace;
818 struct ptrace_bts_config cfg; 735 struct ptrace_bts_config cfg;
819 size_t end;
820 const void *base, *max;
821 int error;
822 736
823 if (cfg_size < sizeof(cfg)) 737 if (cfg_size < sizeof(cfg))
824 return -EIO; 738 return -EIO;
825 739
826 error = ds_get_bts_end(child, &end); 740 trace = ds_read_bts(child->bts);
827 if (error < 0) 741 if (!trace)
828 return error; 742 return -EPERM;
829
830 error = ds_access_bts(child, /* index = */ 0, &base);
831 if (error < 0)
832 return error;
833
834 error = ds_access_bts(child, /* index = */ end, &max);
835 if (error < 0)
836 return error;
837 743
838 memset(&cfg, 0, sizeof(cfg)); 744 memset(&cfg, 0, sizeof(cfg));
839 cfg.size = (max - base); 745 cfg.size = trace->ds.end - trace->ds.begin;
840 cfg.signal = child->thread.bts_ovfl_signal; 746 cfg.signal = child->thread.bts_ovfl_signal;
841 cfg.bts_size = sizeof(struct bts_struct); 747 cfg.bts_size = sizeof(struct bts_struct);
842 748
843 if (cfg.signal) 749 if (cfg.signal)
844 cfg.flags |= PTRACE_BTS_O_SIGNAL; 750 cfg.flags |= PTRACE_BTS_O_SIGNAL;
845 751
846 if (test_tsk_thread_flag(child, TIF_DEBUGCTLMSR) && 752 if (trace->ds.flags & BTS_USER)
847 child->thread.debugctlmsr & bts_cfg.debugctl_mask)
848 cfg.flags |= PTRACE_BTS_O_TRACE; 753 cfg.flags |= PTRACE_BTS_O_TRACE;
849 754
850 if (test_tsk_thread_flag(child, TIF_BTS_TRACE_TS)) 755 if (trace->ds.flags & BTS_TIMESTAMPS)
851 cfg.flags |= PTRACE_BTS_O_SCHED; 756 cfg.flags |= PTRACE_BTS_O_SCHED;
852 757
853 if (copy_to_user(ucfg, &cfg, sizeof(cfg))) 758 if (copy_to_user(ucfg, &cfg, sizeof(cfg)))
@@ -856,109 +761,77 @@ static int ptrace_bts_status(struct task_struct *child,
856 return sizeof(cfg); 761 return sizeof(cfg);
857} 762}
858 763
859static int ptrace_bts_write_record(struct task_struct *child, 764static int ptrace_bts_clear(struct task_struct *child)
860 const struct bts_struct *in)
861{ 765{
862 unsigned char bts_record[BTS_MAX_RECORD_SIZE]; 766 const struct bts_trace *trace;
863 767
864 BUG_ON(BTS_MAX_RECORD_SIZE < bts_cfg.sizeof_bts); 768 trace = ds_read_bts(child->bts);
769 if (!trace)
770 return -EPERM;
865 771
866 memset(bts_record, 0, bts_cfg.sizeof_bts); 772 memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size);
867 switch (in->qualifier) {
868 case BTS_INVALID:
869 break;
870 773
871 case BTS_BRANCH: 774 return ds_reset_bts(child->bts);
872 bts_set(bts_record, bts_from, in->variant.lbr.from_ip); 775}
873 bts_set(bts_record, bts_to, in->variant.lbr.to_ip);
874 break;
875 776
876 case BTS_TASK_ARRIVES: 777static int ptrace_bts_size(struct task_struct *child)
877 case BTS_TASK_DEPARTS: 778{
878 bts_set(bts_record, bts_from, bts_escape); 779 const struct bts_trace *trace;
879 bts_set(bts_record, bts_qual, in->qualifier);
880 bts_set(bts_record, bts_jiffies, in->variant.jiffies);
881 break;
882 780
883 default: 781 trace = ds_read_bts(child->bts);
884 return -EINVAL; 782 if (!trace)
885 } 783 return -EPERM;
886 784
887 /* The writing task will be the switched-to task on a context 785 return (trace->ds.top - trace->ds.begin) / trace->ds.size;
888 * switch. It needs to write into the switched-from task's BTS
889 * buffer. */
890 return ds_unchecked_write_bts(child, bts_record, bts_cfg.sizeof_bts);
891} 786}
892 787
893void ptrace_bts_take_timestamp(struct task_struct *tsk, 788static void ptrace_bts_fork(struct task_struct *tsk)
894 enum bts_qualifier qualifier)
895{ 789{
896 struct bts_struct rec = { 790 tsk->bts = NULL;
897 .qualifier = qualifier, 791 tsk->bts_buffer = NULL;
898 .variant.jiffies = jiffies_64 792 tsk->bts_size = 0;
899 }; 793 tsk->thread.bts_ovfl_signal = 0;
900
901 ptrace_bts_write_record(tsk, &rec);
902} 794}
903 795
904static const struct bts_configuration bts_cfg_netburst = { 796static void ptrace_bts_untrace(struct task_struct *child)
905 .sizeof_bts = sizeof(long) * 3, 797{
906 .sizeof_field = sizeof(long), 798 if (unlikely(child->bts)) {
907 .debugctl_mask = (1<<2)|(1<<3)|(1<<5) 799 ds_release_bts(child->bts);
908}; 800 child->bts = NULL;
801
802 /* We cannot update total_vm and locked_vm since
803 child's mm is already gone. But we can reclaim the
804 memory. */
805 kfree(child->bts_buffer);
806 child->bts_buffer = NULL;
807 child->bts_size = 0;
808 }
809}
909 810
910static const struct bts_configuration bts_cfg_pentium_m = { 811static void ptrace_bts_detach(struct task_struct *child)
911 .sizeof_bts = sizeof(long) * 3, 812{
912 .sizeof_field = sizeof(long), 813 if (unlikely(child->bts)) {
913 .debugctl_mask = (1<<6)|(1<<7) 814 ds_release_bts(child->bts);
914}; 815 child->bts = NULL;
915 816
916static const struct bts_configuration bts_cfg_core2 = { 817 ptrace_bts_free_buffer(child);
917 .sizeof_bts = 8 * 3, 818 }
918 .sizeof_field = 8, 819}
919 .debugctl_mask = (1<<6)|(1<<7)|(1<<9) 820#else
920}; 821static inline void ptrace_bts_fork(struct task_struct *tsk) {}
822static inline void ptrace_bts_detach(struct task_struct *child) {}
823static inline void ptrace_bts_untrace(struct task_struct *child) {}
824#endif /* CONFIG_X86_PTRACE_BTS */
921 825
922static inline void bts_configure(const struct bts_configuration *cfg) 826void x86_ptrace_fork(struct task_struct *child, unsigned long clone_flags)
923{ 827{
924 bts_cfg = *cfg; 828 ptrace_bts_fork(child);
925} 829}
926 830
927void __cpuinit ptrace_bts_init_intel(struct cpuinfo_x86 *c) 831void x86_ptrace_untrace(struct task_struct *child)
928{ 832{
929 switch (c->x86) { 833 ptrace_bts_untrace(child);
930 case 0x6:
931 switch (c->x86_model) {
932 case 0 ... 0xC:
933 /* sorry, don't know about them */
934 break;
935 case 0xD:
936 case 0xE: /* Pentium M */
937 bts_configure(&bts_cfg_pentium_m);
938 break;
939 default: /* Core2, Atom, ... */
940 bts_configure(&bts_cfg_core2);
941 break;
942 }
943 break;
944 case 0xF:
945 switch (c->x86_model) {
946 case 0x0:
947 case 0x1:
948 case 0x2: /* Netburst */
949 bts_configure(&bts_cfg_netburst);
950 break;
951 default:
952 /* sorry, don't know about them */
953 break;
954 }
955 break;
956 default:
957 /* sorry, don't know about them */
958 break;
959 }
960} 834}
961#endif /* CONFIG_X86_PTRACE_BTS */
962 835
963/* 836/*
964 * Called by kernel/ptrace.c when detaching.. 837 * Called by kernel/ptrace.c when detaching..
@@ -971,15 +844,7 @@ void ptrace_disable(struct task_struct *child)
971#ifdef TIF_SYSCALL_EMU 844#ifdef TIF_SYSCALL_EMU
972 clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); 845 clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
973#endif 846#endif
974#ifdef CONFIG_X86_PTRACE_BTS 847 ptrace_bts_detach(child);
975 (void)ds_release_bts(child);
976
977 child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask;
978 if (!child->thread.debugctlmsr)
979 clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
980
981 clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
982#endif /* CONFIG_X86_PTRACE_BTS */
983} 848}
984 849
985#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION 850#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
@@ -1111,7 +976,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
1111 break; 976 break;
1112 977
1113 case PTRACE_BTS_SIZE: 978 case PTRACE_BTS_SIZE:
1114 ret = ds_get_bts_index(child, /* pos = */ NULL); 979 ret = ptrace_bts_size(child);
1115 break; 980 break;
1116 981
1117 case PTRACE_BTS_GET: 982 case PTRACE_BTS_GET:
@@ -1120,7 +985,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
1120 break; 985 break;
1121 986
1122 case PTRACE_BTS_CLEAR: 987 case PTRACE_BTS_CLEAR:
1123 ret = ds_clear_bts(child); 988 ret = ptrace_bts_clear(child);
1124 break; 989 break;
1125 990
1126 case PTRACE_BTS_DRAIN: 991 case PTRACE_BTS_DRAIN:
@@ -1383,6 +1248,14 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
1383 1248
1384 case PTRACE_GET_THREAD_AREA: 1249 case PTRACE_GET_THREAD_AREA:
1385 case PTRACE_SET_THREAD_AREA: 1250 case PTRACE_SET_THREAD_AREA:
1251#ifdef CONFIG_X86_PTRACE_BTS
1252 case PTRACE_BTS_CONFIG:
1253 case PTRACE_BTS_STATUS:
1254 case PTRACE_BTS_SIZE:
1255 case PTRACE_BTS_GET:
1256 case PTRACE_BTS_CLEAR:
1257 case PTRACE_BTS_DRAIN:
1258#endif /* CONFIG_X86_PTRACE_BTS */
1386 return arch_ptrace(child, request, addr, data); 1259 return arch_ptrace(child, request, addr, data);
1387 1260
1388 default: 1261 default:
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 7a430c4d1551..f8500c969442 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -288,7 +288,7 @@ static int __cpuinitdata unsafe_smp;
288/* 288/*
289 * Activate a secondary processor. 289 * Activate a secondary processor.
290 */ 290 */
291static void __cpuinit start_secondary(void *unused) 291notrace static void __cpuinit start_secondary(void *unused)
292{ 292{
293 /* 293 /*
294 * Don't put *anything* before cpu_init(), SMP booting is too 294 * Don't put *anything* before cpu_init(), SMP booting is too
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index a03e7f6d90c3..10786af95545 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -6,6 +6,7 @@
6#include <linux/sched.h> 6#include <linux/sched.h>
7#include <linux/stacktrace.h> 7#include <linux/stacktrace.h>
8#include <linux/module.h> 8#include <linux/module.h>
9#include <linux/uaccess.h>
9#include <asm/stacktrace.h> 10#include <asm/stacktrace.h>
10 11
11static void save_stack_warning(void *data, char *msg) 12static void save_stack_warning(void *data, char *msg)
@@ -83,3 +84,66 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
83 trace->entries[trace->nr_entries++] = ULONG_MAX; 84 trace->entries[trace->nr_entries++] = ULONG_MAX;
84} 85}
85EXPORT_SYMBOL_GPL(save_stack_trace_tsk); 86EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
87
88/* Userspace stacktrace - based on kernel/trace/trace_sysprof.c */
89
90struct stack_frame {
91 const void __user *next_fp;
92 unsigned long ret_addr;
93};
94
95static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
96{
97 int ret;
98
99 if (!access_ok(VERIFY_READ, fp, sizeof(*frame)))
100 return 0;
101
102 ret = 1;
103 pagefault_disable();
104 if (__copy_from_user_inatomic(frame, fp, sizeof(*frame)))
105 ret = 0;
106 pagefault_enable();
107
108 return ret;
109}
110
111static inline void __save_stack_trace_user(struct stack_trace *trace)
112{
113 const struct pt_regs *regs = task_pt_regs(current);
114 const void __user *fp = (const void __user *)regs->bp;
115
116 if (trace->nr_entries < trace->max_entries)
117 trace->entries[trace->nr_entries++] = regs->ip;
118
119 while (trace->nr_entries < trace->max_entries) {
120 struct stack_frame frame;
121
122 frame.next_fp = NULL;
123 frame.ret_addr = 0;
124 if (!copy_stack_frame(fp, &frame))
125 break;
126 if ((unsigned long)fp < regs->sp)
127 break;
128 if (frame.ret_addr) {
129 trace->entries[trace->nr_entries++] =
130 frame.ret_addr;
131 }
132 if (fp == frame.next_fp)
133 break;
134 fp = frame.next_fp;
135 }
136}
137
138void save_stack_trace_user(struct stack_trace *trace)
139{
140 /*
141 * Trace user stack if we are not a kernel thread
142 */
143 if (current->mm) {
144 __save_stack_trace_user(trace);
145 }
146 if (trace->nr_entries < trace->max_entries)
147 trace->entries[trace->nr_entries++] = ULONG_MAX;
148}
149
diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S
index a9b8560adbc2..82c67559dde7 100644
--- a/arch/x86/kernel/vmlinux_32.lds.S
+++ b/arch/x86/kernel/vmlinux_32.lds.S
@@ -44,6 +44,7 @@ SECTIONS
44 SCHED_TEXT 44 SCHED_TEXT
45 LOCK_TEXT 45 LOCK_TEXT
46 KPROBES_TEXT 46 KPROBES_TEXT
47 IRQENTRY_TEXT
47 *(.fixup) 48 *(.fixup)
48 *(.gnu.warning) 49 *(.gnu.warning)
49 _etext = .; /* End of text section */ 50 _etext = .; /* End of text section */
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index 46e05447405b..1a614c0e6bef 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -35,6 +35,7 @@ SECTIONS
35 SCHED_TEXT 35 SCHED_TEXT
36 LOCK_TEXT 36 LOCK_TEXT
37 KPROBES_TEXT 37 KPROBES_TEXT
38 IRQENTRY_TEXT
38 *(.fixup) 39 *(.fixup)
39 *(.gnu.warning) 40 *(.gnu.warning)
40 _etext = .; /* End of text section */ 41 _etext = .; /* End of text section */
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index ebf2f12900f5..44153afc9067 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -17,6 +17,9 @@
17 * want per guest time just set the kernel.vsyscall64 sysctl to 0. 17 * want per guest time just set the kernel.vsyscall64 sysctl to 0.
18 */ 18 */
19 19
20/* Disable profiling for userspace code: */
21#define DISABLE_BRANCH_PROFILING
22
20#include <linux/time.h> 23#include <linux/time.h>
21#include <linux/init.h> 24#include <linux/init.h>
22#include <linux/kernel.h> 25#include <linux/kernel.h>
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index fea4565ff576..d8cc96a2738f 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -8,9 +8,8 @@ obj-$(CONFIG_X86_PTDUMP) += dump_pagetables.o
8 8
9obj-$(CONFIG_HIGHMEM) += highmem_32.o 9obj-$(CONFIG_HIGHMEM) += highmem_32.o
10 10
11obj-$(CONFIG_MMIOTRACE_HOOKS) += kmmio.o
12obj-$(CONFIG_MMIOTRACE) += mmiotrace.o 11obj-$(CONFIG_MMIOTRACE) += mmiotrace.o
13mmiotrace-y := pf_in.o mmio-mod.o 12mmiotrace-y := kmmio.o pf_in.o mmio-mod.o
14obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o 13obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o
15 14
16obj-$(CONFIG_NUMA) += numa_$(BITS).o 15obj-$(CONFIG_NUMA) += numa_$(BITS).o
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 46b5f753ff81..57ec8c86a877 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -53,7 +53,7 @@
53 53
54static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr) 54static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr)
55{ 55{
56#ifdef CONFIG_MMIOTRACE_HOOKS 56#ifdef CONFIG_MMIOTRACE
57 if (unlikely(is_kmmio_active())) 57 if (unlikely(is_kmmio_active()))
58 if (kmmio_handler(regs, addr) == 1) 58 if (kmmio_handler(regs, addr) == 1)
59 return -1; 59 return -1;
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index 1ef0f90813d6..d9d35824c56f 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -9,6 +9,9 @@
9 * Also alternative() doesn't work. 9 * Also alternative() doesn't work.
10 */ 10 */
11 11
12/* Disable profiling for userspace code: */
13#define DISABLE_BRANCH_PROFILING
14
12#include <linux/kernel.h> 15#include <linux/kernel.h>
13#include <linux/posix-timers.h> 16#include <linux/posix-timers.h>
14#include <linux/time.h> 17#include <linux/time.h>
diff --git a/block/Kconfig b/block/Kconfig
index 1ab7c15c8d7a..290b219fad9c 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -47,6 +47,7 @@ config BLK_DEV_IO_TRACE
47 depends on SYSFS 47 depends on SYSFS
48 select RELAY 48 select RELAY
49 select DEBUG_FS 49 select DEBUG_FS
50 select TRACEPOINTS
50 help 51 help
51 Say Y here if you want to be able to trace the block layer actions 52 Say Y here if you want to be able to trace the block layer actions
52 on a given queue. Tracing allows you to see any traffic happening 53 on a given queue. Tracing allows you to see any traffic happening
diff --git a/block/blk-core.c b/block/blk-core.c
index c36aa98fafa3..561e8a1b43a4 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -28,9 +28,23 @@
28#include <linux/task_io_accounting_ops.h> 28#include <linux/task_io_accounting_ops.h>
29#include <linux/blktrace_api.h> 29#include <linux/blktrace_api.h>
30#include <linux/fault-inject.h> 30#include <linux/fault-inject.h>
31#include <trace/block.h>
31 32
32#include "blk.h" 33#include "blk.h"
33 34
35DEFINE_TRACE(block_plug);
36DEFINE_TRACE(block_unplug_io);
37DEFINE_TRACE(block_unplug_timer);
38DEFINE_TRACE(block_getrq);
39DEFINE_TRACE(block_sleeprq);
40DEFINE_TRACE(block_rq_requeue);
41DEFINE_TRACE(block_bio_backmerge);
42DEFINE_TRACE(block_bio_frontmerge);
43DEFINE_TRACE(block_bio_queue);
44DEFINE_TRACE(block_rq_complete);
45DEFINE_TRACE(block_remap); /* Also used in drivers/md/dm.c */
46EXPORT_TRACEPOINT_SYMBOL_GPL(block_remap);
47
34static int __make_request(struct request_queue *q, struct bio *bio); 48static int __make_request(struct request_queue *q, struct bio *bio);
35 49
36/* 50/*
@@ -205,7 +219,7 @@ void blk_plug_device(struct request_queue *q)
205 219
206 if (!queue_flag_test_and_set(QUEUE_FLAG_PLUGGED, q)) { 220 if (!queue_flag_test_and_set(QUEUE_FLAG_PLUGGED, q)) {
207 mod_timer(&q->unplug_timer, jiffies + q->unplug_delay); 221 mod_timer(&q->unplug_timer, jiffies + q->unplug_delay);
208 blk_add_trace_generic(q, NULL, 0, BLK_TA_PLUG); 222 trace_block_plug(q);
209 } 223 }
210} 224}
211EXPORT_SYMBOL(blk_plug_device); 225EXPORT_SYMBOL(blk_plug_device);
@@ -292,9 +306,7 @@ void blk_unplug_work(struct work_struct *work)
292 struct request_queue *q = 306 struct request_queue *q =
293 container_of(work, struct request_queue, unplug_work); 307 container_of(work, struct request_queue, unplug_work);
294 308
295 blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL, 309 trace_block_unplug_io(q);
296 q->rq.count[READ] + q->rq.count[WRITE]);
297
298 q->unplug_fn(q); 310 q->unplug_fn(q);
299} 311}
300 312
@@ -302,9 +314,7 @@ void blk_unplug_timeout(unsigned long data)
302{ 314{
303 struct request_queue *q = (struct request_queue *)data; 315 struct request_queue *q = (struct request_queue *)data;
304 316
305 blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_TIMER, NULL, 317 trace_block_unplug_timer(q);
306 q->rq.count[READ] + q->rq.count[WRITE]);
307
308 kblockd_schedule_work(q, &q->unplug_work); 318 kblockd_schedule_work(q, &q->unplug_work);
309} 319}
310 320
@@ -314,9 +324,7 @@ void blk_unplug(struct request_queue *q)
314 * devices don't necessarily have an ->unplug_fn defined 324 * devices don't necessarily have an ->unplug_fn defined
315 */ 325 */
316 if (q->unplug_fn) { 326 if (q->unplug_fn) {
317 blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL, 327 trace_block_unplug_io(q);
318 q->rq.count[READ] + q->rq.count[WRITE]);
319
320 q->unplug_fn(q); 328 q->unplug_fn(q);
321 } 329 }
322} 330}
@@ -822,7 +830,7 @@ rq_starved:
822 if (ioc_batching(q, ioc)) 830 if (ioc_batching(q, ioc))
823 ioc->nr_batch_requests--; 831 ioc->nr_batch_requests--;
824 832
825 blk_add_trace_generic(q, bio, rw, BLK_TA_GETRQ); 833 trace_block_getrq(q, bio, rw);
826out: 834out:
827 return rq; 835 return rq;
828} 836}
@@ -848,7 +856,7 @@ static struct request *get_request_wait(struct request_queue *q, int rw_flags,
848 prepare_to_wait_exclusive(&rl->wait[rw], &wait, 856 prepare_to_wait_exclusive(&rl->wait[rw], &wait,
849 TASK_UNINTERRUPTIBLE); 857 TASK_UNINTERRUPTIBLE);
850 858
851 blk_add_trace_generic(q, bio, rw, BLK_TA_SLEEPRQ); 859 trace_block_sleeprq(q, bio, rw);
852 860
853 __generic_unplug_device(q); 861 __generic_unplug_device(q);
854 spin_unlock_irq(q->queue_lock); 862 spin_unlock_irq(q->queue_lock);
@@ -928,7 +936,7 @@ void blk_requeue_request(struct request_queue *q, struct request *rq)
928{ 936{
929 blk_delete_timer(rq); 937 blk_delete_timer(rq);
930 blk_clear_rq_complete(rq); 938 blk_clear_rq_complete(rq);
931 blk_add_trace_rq(q, rq, BLK_TA_REQUEUE); 939 trace_block_rq_requeue(q, rq);
932 940
933 if (blk_rq_tagged(rq)) 941 if (blk_rq_tagged(rq))
934 blk_queue_end_tag(q, rq); 942 blk_queue_end_tag(q, rq);
@@ -1167,7 +1175,7 @@ static int __make_request(struct request_queue *q, struct bio *bio)
1167 if (!ll_back_merge_fn(q, req, bio)) 1175 if (!ll_back_merge_fn(q, req, bio))
1168 break; 1176 break;
1169 1177
1170 blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE); 1178 trace_block_bio_backmerge(q, bio);
1171 1179
1172 req->biotail->bi_next = bio; 1180 req->biotail->bi_next = bio;
1173 req->biotail = bio; 1181 req->biotail = bio;
@@ -1186,7 +1194,7 @@ static int __make_request(struct request_queue *q, struct bio *bio)
1186 if (!ll_front_merge_fn(q, req, bio)) 1194 if (!ll_front_merge_fn(q, req, bio))
1187 break; 1195 break;
1188 1196
1189 blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE); 1197 trace_block_bio_frontmerge(q, bio);
1190 1198
1191 bio->bi_next = req->bio; 1199 bio->bi_next = req->bio;
1192 req->bio = bio; 1200 req->bio = bio;
@@ -1269,7 +1277,7 @@ static inline void blk_partition_remap(struct bio *bio)
1269 bio->bi_sector += p->start_sect; 1277 bio->bi_sector += p->start_sect;
1270 bio->bi_bdev = bdev->bd_contains; 1278 bio->bi_bdev = bdev->bd_contains;
1271 1279
1272 blk_add_trace_remap(bdev_get_queue(bio->bi_bdev), bio, 1280 trace_block_remap(bdev_get_queue(bio->bi_bdev), bio,
1273 bdev->bd_dev, bio->bi_sector, 1281 bdev->bd_dev, bio->bi_sector,
1274 bio->bi_sector - p->start_sect); 1282 bio->bi_sector - p->start_sect);
1275 } 1283 }
@@ -1441,10 +1449,10 @@ end_io:
1441 goto end_io; 1449 goto end_io;
1442 1450
1443 if (old_sector != -1) 1451 if (old_sector != -1)
1444 blk_add_trace_remap(q, bio, old_dev, bio->bi_sector, 1452 trace_block_remap(q, bio, old_dev, bio->bi_sector,
1445 old_sector); 1453 old_sector);
1446 1454
1447 blk_add_trace_bio(q, bio, BLK_TA_QUEUE); 1455 trace_block_bio_queue(q, bio);
1448 1456
1449 old_sector = bio->bi_sector; 1457 old_sector = bio->bi_sector;
1450 old_dev = bio->bi_bdev->bd_dev; 1458 old_dev = bio->bi_bdev->bd_dev;
@@ -1678,7 +1686,7 @@ static int __end_that_request_first(struct request *req, int error,
1678 int total_bytes, bio_nbytes, next_idx = 0; 1686 int total_bytes, bio_nbytes, next_idx = 0;
1679 struct bio *bio; 1687 struct bio *bio;
1680 1688
1681 blk_add_trace_rq(req->q, req, BLK_TA_COMPLETE); 1689 trace_block_rq_complete(req->q, req);
1682 1690
1683 /* 1691 /*
1684 * for a REQ_TYPE_BLOCK_PC request, we want to carry any eventual 1692 * for a REQ_TYPE_BLOCK_PC request, we want to carry any eventual
diff --git a/block/blktrace.c b/block/blktrace.c
index 85049a7e7a17..b0a2cae886db 100644
--- a/block/blktrace.c
+++ b/block/blktrace.c
@@ -23,10 +23,18 @@
23#include <linux/mutex.h> 23#include <linux/mutex.h>
24#include <linux/debugfs.h> 24#include <linux/debugfs.h>
25#include <linux/time.h> 25#include <linux/time.h>
26#include <trace/block.h>
26#include <asm/uaccess.h> 27#include <asm/uaccess.h>
27 28
28static unsigned int blktrace_seq __read_mostly = 1; 29static unsigned int blktrace_seq __read_mostly = 1;
29 30
31/* Global reference count of probes */
32static DEFINE_MUTEX(blk_probe_mutex);
33static atomic_t blk_probes_ref = ATOMIC_INIT(0);
34
35static int blk_register_tracepoints(void);
36static void blk_unregister_tracepoints(void);
37
30/* 38/*
31 * Send out a notify message. 39 * Send out a notify message.
32 */ 40 */
@@ -119,7 +127,7 @@ static u32 ddir_act[2] __read_mostly = { BLK_TC_ACT(BLK_TC_READ), BLK_TC_ACT(BLK
119 * The worker for the various blk_add_trace*() types. Fills out a 127 * The worker for the various blk_add_trace*() types. Fills out a
120 * blk_io_trace structure and places it in a per-cpu subbuffer. 128 * blk_io_trace structure and places it in a per-cpu subbuffer.
121 */ 129 */
122void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, 130static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
123 int rw, u32 what, int error, int pdu_len, void *pdu_data) 131 int rw, u32 what, int error, int pdu_len, void *pdu_data)
124{ 132{
125 struct task_struct *tsk = current; 133 struct task_struct *tsk = current;
@@ -177,8 +185,6 @@ void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
177 local_irq_restore(flags); 185 local_irq_restore(flags);
178} 186}
179 187
180EXPORT_SYMBOL_GPL(__blk_add_trace);
181
182static struct dentry *blk_tree_root; 188static struct dentry *blk_tree_root;
183static DEFINE_MUTEX(blk_tree_mutex); 189static DEFINE_MUTEX(blk_tree_mutex);
184static unsigned int root_users; 190static unsigned int root_users;
@@ -237,6 +243,10 @@ static void blk_trace_cleanup(struct blk_trace *bt)
237 free_percpu(bt->sequence); 243 free_percpu(bt->sequence);
238 free_percpu(bt->msg_data); 244 free_percpu(bt->msg_data);
239 kfree(bt); 245 kfree(bt);
246 mutex_lock(&blk_probe_mutex);
247 if (atomic_dec_and_test(&blk_probes_ref))
248 blk_unregister_tracepoints();
249 mutex_unlock(&blk_probe_mutex);
240} 250}
241 251
242int blk_trace_remove(struct request_queue *q) 252int blk_trace_remove(struct request_queue *q)
@@ -428,6 +438,14 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
428 bt->pid = buts->pid; 438 bt->pid = buts->pid;
429 bt->trace_state = Blktrace_setup; 439 bt->trace_state = Blktrace_setup;
430 440
441 mutex_lock(&blk_probe_mutex);
442 if (atomic_add_return(1, &blk_probes_ref) == 1) {
443 ret = blk_register_tracepoints();
444 if (ret)
445 goto probe_err;
446 }
447 mutex_unlock(&blk_probe_mutex);
448
431 ret = -EBUSY; 449 ret = -EBUSY;
432 old_bt = xchg(&q->blk_trace, bt); 450 old_bt = xchg(&q->blk_trace, bt);
433 if (old_bt) { 451 if (old_bt) {
@@ -436,6 +454,9 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
436 } 454 }
437 455
438 return 0; 456 return 0;
457probe_err:
458 atomic_dec(&blk_probes_ref);
459 mutex_unlock(&blk_probe_mutex);
439err: 460err:
440 if (dir) 461 if (dir)
441 blk_remove_tree(dir); 462 blk_remove_tree(dir);
@@ -562,3 +583,308 @@ void blk_trace_shutdown(struct request_queue *q)
562 blk_trace_remove(q); 583 blk_trace_remove(q);
563 } 584 }
564} 585}
586
587/*
588 * blktrace probes
589 */
590
591/**
592 * blk_add_trace_rq - Add a trace for a request oriented action
593 * @q: queue the io is for
594 * @rq: the source request
595 * @what: the action
596 *
597 * Description:
598 * Records an action against a request. Will log the bio offset + size.
599 *
600 **/
601static void blk_add_trace_rq(struct request_queue *q, struct request *rq,
602 u32 what)
603{
604 struct blk_trace *bt = q->blk_trace;
605 int rw = rq->cmd_flags & 0x03;
606
607 if (likely(!bt))
608 return;
609
610 if (blk_discard_rq(rq))
611 rw |= (1 << BIO_RW_DISCARD);
612
613 if (blk_pc_request(rq)) {
614 what |= BLK_TC_ACT(BLK_TC_PC);
615 __blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors,
616 sizeof(rq->cmd), rq->cmd);
617 } else {
618 what |= BLK_TC_ACT(BLK_TC_FS);
619 __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9,
620 rw, what, rq->errors, 0, NULL);
621 }
622}
623
624static void blk_add_trace_rq_abort(struct request_queue *q, struct request *rq)
625{
626 blk_add_trace_rq(q, rq, BLK_TA_ABORT);
627}
628
629static void blk_add_trace_rq_insert(struct request_queue *q, struct request *rq)
630{
631 blk_add_trace_rq(q, rq, BLK_TA_INSERT);
632}
633
634static void blk_add_trace_rq_issue(struct request_queue *q, struct request *rq)
635{
636 blk_add_trace_rq(q, rq, BLK_TA_ISSUE);
637}
638
639static void blk_add_trace_rq_requeue(struct request_queue *q, struct request *rq)
640{
641 blk_add_trace_rq(q, rq, BLK_TA_REQUEUE);
642}
643
644static void blk_add_trace_rq_complete(struct request_queue *q, struct request *rq)
645{
646 blk_add_trace_rq(q, rq, BLK_TA_COMPLETE);
647}
648
649/**
650 * blk_add_trace_bio - Add a trace for a bio oriented action
651 * @q: queue the io is for
652 * @bio: the source bio
653 * @what: the action
654 *
655 * Description:
656 * Records an action against a bio. Will log the bio offset + size.
657 *
658 **/
659static void blk_add_trace_bio(struct request_queue *q, struct bio *bio,
660 u32 what)
661{
662 struct blk_trace *bt = q->blk_trace;
663
664 if (likely(!bt))
665 return;
666
667 __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what,
668 !bio_flagged(bio, BIO_UPTODATE), 0, NULL);
669}
670
671static void blk_add_trace_bio_bounce(struct request_queue *q, struct bio *bio)
672{
673 blk_add_trace_bio(q, bio, BLK_TA_BOUNCE);
674}
675
676static void blk_add_trace_bio_complete(struct request_queue *q, struct bio *bio)
677{
678 blk_add_trace_bio(q, bio, BLK_TA_COMPLETE);
679}
680
681static void blk_add_trace_bio_backmerge(struct request_queue *q, struct bio *bio)
682{
683 blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE);
684}
685
686static void blk_add_trace_bio_frontmerge(struct request_queue *q, struct bio *bio)
687{
688 blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE);
689}
690
691static void blk_add_trace_bio_queue(struct request_queue *q, struct bio *bio)
692{
693 blk_add_trace_bio(q, bio, BLK_TA_QUEUE);
694}
695
696static void blk_add_trace_getrq(struct request_queue *q, struct bio *bio, int rw)
697{
698 if (bio)
699 blk_add_trace_bio(q, bio, BLK_TA_GETRQ);
700 else {
701 struct blk_trace *bt = q->blk_trace;
702
703 if (bt)
704 __blk_add_trace(bt, 0, 0, rw, BLK_TA_GETRQ, 0, 0, NULL);
705 }
706}
707
708
709static void blk_add_trace_sleeprq(struct request_queue *q, struct bio *bio, int rw)
710{
711 if (bio)
712 blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ);
713 else {
714 struct blk_trace *bt = q->blk_trace;
715
716 if (bt)
717 __blk_add_trace(bt, 0, 0, rw, BLK_TA_SLEEPRQ, 0, 0, NULL);
718 }
719}
720
721static void blk_add_trace_plug(struct request_queue *q)
722{
723 struct blk_trace *bt = q->blk_trace;
724
725 if (bt)
726 __blk_add_trace(bt, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL);
727}
728
729static void blk_add_trace_unplug_io(struct request_queue *q)
730{
731 struct blk_trace *bt = q->blk_trace;
732
733 if (bt) {
734 unsigned int pdu = q->rq.count[READ] + q->rq.count[WRITE];
735 __be64 rpdu = cpu_to_be64(pdu);
736
737 __blk_add_trace(bt, 0, 0, 0, BLK_TA_UNPLUG_IO, 0,
738 sizeof(rpdu), &rpdu);
739 }
740}
741
742static void blk_add_trace_unplug_timer(struct request_queue *q)
743{
744 struct blk_trace *bt = q->blk_trace;
745
746 if (bt) {
747 unsigned int pdu = q->rq.count[READ] + q->rq.count[WRITE];
748 __be64 rpdu = cpu_to_be64(pdu);
749
750 __blk_add_trace(bt, 0, 0, 0, BLK_TA_UNPLUG_TIMER, 0,
751 sizeof(rpdu), &rpdu);
752 }
753}
754
755static void blk_add_trace_split(struct request_queue *q, struct bio *bio,
756 unsigned int pdu)
757{
758 struct blk_trace *bt = q->blk_trace;
759
760 if (bt) {
761 __be64 rpdu = cpu_to_be64(pdu);
762
763 __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw,
764 BLK_TA_SPLIT, !bio_flagged(bio, BIO_UPTODATE),
765 sizeof(rpdu), &rpdu);
766 }
767}
768
769/**
770 * blk_add_trace_remap - Add a trace for a remap operation
771 * @q: queue the io is for
772 * @bio: the source bio
773 * @dev: target device
774 * @from: source sector
775 * @to: target sector
776 *
777 * Description:
778 * Device mapper or raid target sometimes need to split a bio because
779 * it spans a stripe (or similar). Add a trace for that action.
780 *
781 **/
782static void blk_add_trace_remap(struct request_queue *q, struct bio *bio,
783 dev_t dev, sector_t from, sector_t to)
784{
785 struct blk_trace *bt = q->blk_trace;
786 struct blk_io_trace_remap r;
787
788 if (likely(!bt))
789 return;
790
791 r.device = cpu_to_be32(dev);
792 r.device_from = cpu_to_be32(bio->bi_bdev->bd_dev);
793 r.sector = cpu_to_be64(to);
794
795 __blk_add_trace(bt, from, bio->bi_size, bio->bi_rw, BLK_TA_REMAP,
796 !bio_flagged(bio, BIO_UPTODATE), sizeof(r), &r);
797}
798
799/**
800 * blk_add_driver_data - Add binary message with driver-specific data
801 * @q: queue the io is for
802 * @rq: io request
803 * @data: driver-specific data
804 * @len: length of driver-specific data
805 *
806 * Description:
807 * Some drivers might want to write driver-specific data per request.
808 *
809 **/
810void blk_add_driver_data(struct request_queue *q,
811 struct request *rq,
812 void *data, size_t len)
813{
814 struct blk_trace *bt = q->blk_trace;
815
816 if (likely(!bt))
817 return;
818
819 if (blk_pc_request(rq))
820 __blk_add_trace(bt, 0, rq->data_len, 0, BLK_TA_DRV_DATA,
821 rq->errors, len, data);
822 else
823 __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9,
824 0, BLK_TA_DRV_DATA, rq->errors, len, data);
825}
826EXPORT_SYMBOL_GPL(blk_add_driver_data);
827
828static int blk_register_tracepoints(void)
829{
830 int ret;
831
832 ret = register_trace_block_rq_abort(blk_add_trace_rq_abort);
833 WARN_ON(ret);
834 ret = register_trace_block_rq_insert(blk_add_trace_rq_insert);
835 WARN_ON(ret);
836 ret = register_trace_block_rq_issue(blk_add_trace_rq_issue);
837 WARN_ON(ret);
838 ret = register_trace_block_rq_requeue(blk_add_trace_rq_requeue);
839 WARN_ON(ret);
840 ret = register_trace_block_rq_complete(blk_add_trace_rq_complete);
841 WARN_ON(ret);
842 ret = register_trace_block_bio_bounce(blk_add_trace_bio_bounce);
843 WARN_ON(ret);
844 ret = register_trace_block_bio_complete(blk_add_trace_bio_complete);
845 WARN_ON(ret);
846 ret = register_trace_block_bio_backmerge(blk_add_trace_bio_backmerge);
847 WARN_ON(ret);
848 ret = register_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge);
849 WARN_ON(ret);
850 ret = register_trace_block_bio_queue(blk_add_trace_bio_queue);
851 WARN_ON(ret);
852 ret = register_trace_block_getrq(blk_add_trace_getrq);
853 WARN_ON(ret);
854 ret = register_trace_block_sleeprq(blk_add_trace_sleeprq);
855 WARN_ON(ret);
856 ret = register_trace_block_plug(blk_add_trace_plug);
857 WARN_ON(ret);
858 ret = register_trace_block_unplug_timer(blk_add_trace_unplug_timer);
859 WARN_ON(ret);
860 ret = register_trace_block_unplug_io(blk_add_trace_unplug_io);
861 WARN_ON(ret);
862 ret = register_trace_block_split(blk_add_trace_split);
863 WARN_ON(ret);
864 ret = register_trace_block_remap(blk_add_trace_remap);
865 WARN_ON(ret);
866 return 0;
867}
868
869static void blk_unregister_tracepoints(void)
870{
871 unregister_trace_block_remap(blk_add_trace_remap);
872 unregister_trace_block_split(blk_add_trace_split);
873 unregister_trace_block_unplug_io(blk_add_trace_unplug_io);
874 unregister_trace_block_unplug_timer(blk_add_trace_unplug_timer);
875 unregister_trace_block_plug(blk_add_trace_plug);
876 unregister_trace_block_sleeprq(blk_add_trace_sleeprq);
877 unregister_trace_block_getrq(blk_add_trace_getrq);
878 unregister_trace_block_bio_queue(blk_add_trace_bio_queue);
879 unregister_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge);
880 unregister_trace_block_bio_backmerge(blk_add_trace_bio_backmerge);
881 unregister_trace_block_bio_complete(blk_add_trace_bio_complete);
882 unregister_trace_block_bio_bounce(blk_add_trace_bio_bounce);
883 unregister_trace_block_rq_complete(blk_add_trace_rq_complete);
884 unregister_trace_block_rq_requeue(blk_add_trace_rq_requeue);
885 unregister_trace_block_rq_issue(blk_add_trace_rq_issue);
886 unregister_trace_block_rq_insert(blk_add_trace_rq_insert);
887 unregister_trace_block_rq_abort(blk_add_trace_rq_abort);
888
889 tracepoint_synchronize_unregister();
890}
diff --git a/block/elevator.c b/block/elevator.c
index a6951f76ba0c..86836dd179c0 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -33,6 +33,7 @@
33#include <linux/compiler.h> 33#include <linux/compiler.h>
34#include <linux/delay.h> 34#include <linux/delay.h>
35#include <linux/blktrace_api.h> 35#include <linux/blktrace_api.h>
36#include <trace/block.h>
36#include <linux/hash.h> 37#include <linux/hash.h>
37#include <linux/uaccess.h> 38#include <linux/uaccess.h>
38 39
@@ -41,6 +42,8 @@
41static DEFINE_SPINLOCK(elv_list_lock); 42static DEFINE_SPINLOCK(elv_list_lock);
42static LIST_HEAD(elv_list); 43static LIST_HEAD(elv_list);
43 44
45DEFINE_TRACE(block_rq_abort);
46
44/* 47/*
45 * Merge hash stuff. 48 * Merge hash stuff.
46 */ 49 */
@@ -52,6 +55,9 @@ static const int elv_hash_shift = 6;
52#define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors) 55#define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors)
53#define ELV_ON_HASH(rq) (!hlist_unhashed(&(rq)->hash)) 56#define ELV_ON_HASH(rq) (!hlist_unhashed(&(rq)->hash))
54 57
58DEFINE_TRACE(block_rq_insert);
59DEFINE_TRACE(block_rq_issue);
60
55/* 61/*
56 * Query io scheduler to see if the current process issuing bio may be 62 * Query io scheduler to see if the current process issuing bio may be
57 * merged with rq. 63 * merged with rq.
@@ -586,7 +592,7 @@ void elv_insert(struct request_queue *q, struct request *rq, int where)
586 unsigned ordseq; 592 unsigned ordseq;
587 int unplug_it = 1; 593 int unplug_it = 1;
588 594
589 blk_add_trace_rq(q, rq, BLK_TA_INSERT); 595 trace_block_rq_insert(q, rq);
590 596
591 rq->q = q; 597 rq->q = q;
592 598
@@ -772,7 +778,7 @@ struct request *elv_next_request(struct request_queue *q)
772 * not be passed by new incoming requests 778 * not be passed by new incoming requests
773 */ 779 */
774 rq->cmd_flags |= REQ_STARTED; 780 rq->cmd_flags |= REQ_STARTED;
775 blk_add_trace_rq(q, rq, BLK_TA_ISSUE); 781 trace_block_rq_issue(q, rq);
776 } 782 }
777 783
778 if (!q->boundary_rq || q->boundary_rq == rq) { 784 if (!q->boundary_rq || q->boundary_rq == rq) {
@@ -914,7 +920,7 @@ void elv_abort_queue(struct request_queue *q)
914 while (!list_empty(&q->queue_head)) { 920 while (!list_empty(&q->queue_head)) {
915 rq = list_entry_rq(q->queue_head.next); 921 rq = list_entry_rq(q->queue_head.next);
916 rq->cmd_flags |= REQ_QUIET; 922 rq->cmd_flags |= REQ_QUIET;
917 blk_add_trace_rq(q, rq, BLK_TA_ABORT); 923 trace_block_rq_abort(q, rq);
918 __blk_end_request(rq, -EIO, blk_rq_bytes(rq)); 924 __blk_end_request(rq, -EIO, blk_rq_bytes(rq));
919 } 925 }
920} 926}
diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c
index ce0d9da52a8a..94966edfb44d 100644
--- a/drivers/char/sysrq.c
+++ b/drivers/char/sysrq.c
@@ -274,6 +274,22 @@ static struct sysrq_key_op sysrq_showstate_blocked_op = {
274 .enable_mask = SYSRQ_ENABLE_DUMP, 274 .enable_mask = SYSRQ_ENABLE_DUMP,
275}; 275};
276 276
277#ifdef CONFIG_TRACING
278#include <linux/ftrace.h>
279
280static void sysrq_ftrace_dump(int key, struct tty_struct *tty)
281{
282 ftrace_dump();
283}
284static struct sysrq_key_op sysrq_ftrace_dump_op = {
285 .handler = sysrq_ftrace_dump,
286 .help_msg = "dumpZ-ftrace-buffer",
287 .action_msg = "Dump ftrace buffer",
288 .enable_mask = SYSRQ_ENABLE_DUMP,
289};
290#else
291#define sysrq_ftrace_dump_op (*(struct sysrq_key_op *)0)
292#endif
277 293
278static void sysrq_handle_showmem(int key, struct tty_struct *tty) 294static void sysrq_handle_showmem(int key, struct tty_struct *tty)
279{ 295{
@@ -406,7 +422,7 @@ static struct sysrq_key_op *sysrq_key_table[36] = {
406 NULL, /* x */ 422 NULL, /* x */
407 /* y: May be registered on sparc64 for global register dump */ 423 /* y: May be registered on sparc64 for global register dump */
408 NULL, /* y */ 424 NULL, /* y */
409 NULL /* z */ 425 &sysrq_ftrace_dump_op, /* z */
410}; 426};
411 427
412/* key2index calculation, -1 on invalid index */ 428/* key2index calculation, -1 on invalid index */
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index c99e4728ff41..343094c3feeb 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -21,6 +21,7 @@
21#include <linux/idr.h> 21#include <linux/idr.h>
22#include <linux/hdreg.h> 22#include <linux/hdreg.h>
23#include <linux/blktrace_api.h> 23#include <linux/blktrace_api.h>
24#include <trace/block.h>
24 25
25#define DM_MSG_PREFIX "core" 26#define DM_MSG_PREFIX "core"
26 27
@@ -51,6 +52,8 @@ struct dm_target_io {
51 union map_info info; 52 union map_info info;
52}; 53};
53 54
55DEFINE_TRACE(block_bio_complete);
56
54union map_info *dm_get_mapinfo(struct bio *bio) 57union map_info *dm_get_mapinfo(struct bio *bio)
55{ 58{
56 if (bio && bio->bi_private) 59 if (bio && bio->bi_private)
@@ -504,8 +507,7 @@ static void dec_pending(struct dm_io *io, int error)
504 end_io_acct(io); 507 end_io_acct(io);
505 508
506 if (io->error != DM_ENDIO_REQUEUE) { 509 if (io->error != DM_ENDIO_REQUEUE) {
507 blk_add_trace_bio(io->md->queue, io->bio, 510 trace_block_bio_complete(io->md->queue, io->bio);
508 BLK_TA_COMPLETE);
509 511
510 bio_endio(io->bio, io->error); 512 bio_endio(io->bio, io->error);
511 } 513 }
@@ -598,7 +600,7 @@ static void __map_bio(struct dm_target *ti, struct bio *clone,
598 if (r == DM_MAPIO_REMAPPED) { 600 if (r == DM_MAPIO_REMAPPED) {
599 /* the bio has been remapped so dispatch it */ 601 /* the bio has been remapped so dispatch it */
600 602
601 blk_add_trace_remap(bdev_get_queue(clone->bi_bdev), clone, 603 trace_block_remap(bdev_get_queue(clone->bi_bdev), clone,
602 tio->io->bio->bi_bdev->bd_dev, 604 tio->io->bio->bi_bdev->bd_dev,
603 clone->bi_sector, sector); 605 clone->bi_sector, sector);
604 606
diff --git a/fs/bio.c b/fs/bio.c
index 77a55bcceedb..df99c882b807 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -26,8 +26,11 @@
26#include <linux/mempool.h> 26#include <linux/mempool.h>
27#include <linux/workqueue.h> 27#include <linux/workqueue.h>
28#include <linux/blktrace_api.h> 28#include <linux/blktrace_api.h>
29#include <trace/block.h>
29#include <scsi/sg.h> /* for struct sg_iovec */ 30#include <scsi/sg.h> /* for struct sg_iovec */
30 31
32DEFINE_TRACE(block_split);
33
31static struct kmem_cache *bio_slab __read_mostly; 34static struct kmem_cache *bio_slab __read_mostly;
32 35
33static mempool_t *bio_split_pool __read_mostly; 36static mempool_t *bio_split_pool __read_mostly;
@@ -1263,7 +1266,7 @@ struct bio_pair *bio_split(struct bio *bi, int first_sectors)
1263 if (!bp) 1266 if (!bp)
1264 return bp; 1267 return bp;
1265 1268
1266 blk_add_trace_pdu_int(bdev_get_queue(bi->bi_bdev), BLK_TA_SPLIT, bi, 1269 trace_block_split(bdev_get_queue(bi->bi_bdev), bi,
1267 bi->bi_sector + first_sectors); 1270 bi->bi_sector + first_sectors);
1268 1271
1269 BUG_ON(bi->bi_vcnt != 1); 1272 BUG_ON(bi->bi_vcnt != 1);
diff --git a/fs/seq_file.c b/fs/seq_file.c
index eba2eabcd2b8..16c211558c22 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -357,7 +357,18 @@ int seq_printf(struct seq_file *m, const char *f, ...)
357} 357}
358EXPORT_SYMBOL(seq_printf); 358EXPORT_SYMBOL(seq_printf);
359 359
360static char *mangle_path(char *s, char *p, char *esc) 360/**
361 * mangle_path - mangle and copy path to buffer beginning
362 * @s: buffer start
363 * @p: beginning of path in above buffer
364 * @esc: set of characters that need escaping
365 *
366 * Copy the path from @p to @s, replacing each occurrence of character from
367 * @esc with usual octal escape.
368 * Returns pointer past last written character in @s, or NULL in case of
369 * failure.
370 */
371char *mangle_path(char *s, char *p, char *esc)
361{ 372{
362 while (s <= p) { 373 while (s <= p) {
363 char c = *p++; 374 char c = *p++;
@@ -376,6 +387,7 @@ static char *mangle_path(char *s, char *p, char *esc)
376 } 387 }
377 return NULL; 388 return NULL;
378} 389}
390EXPORT_SYMBOL(mangle_path);
379 391
380/* 392/*
381 * return the absolute path of 'dentry' residing in mount 'mnt'. 393 * return the absolute path of 'dentry' residing in mount 'mnt'.
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 80744606bad1..c61fab1dd2f8 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -45,6 +45,22 @@
45#define MCOUNT_REC() 45#define MCOUNT_REC()
46#endif 46#endif
47 47
48#ifdef CONFIG_TRACE_BRANCH_PROFILING
49#define LIKELY_PROFILE() VMLINUX_SYMBOL(__start_annotated_branch_profile) = .; \
50 *(_ftrace_annotated_branch) \
51 VMLINUX_SYMBOL(__stop_annotated_branch_profile) = .;
52#else
53#define LIKELY_PROFILE()
54#endif
55
56#ifdef CONFIG_PROFILE_ALL_BRANCHES
57#define BRANCH_PROFILE() VMLINUX_SYMBOL(__start_branch_profile) = .; \
58 *(_ftrace_branch) \
59 VMLINUX_SYMBOL(__stop_branch_profile) = .;
60#else
61#define BRANCH_PROFILE()
62#endif
63
48/* .data section */ 64/* .data section */
49#define DATA_DATA \ 65#define DATA_DATA \
50 *(.data) \ 66 *(.data) \
@@ -60,9 +76,12 @@
60 VMLINUX_SYMBOL(__start___markers) = .; \ 76 VMLINUX_SYMBOL(__start___markers) = .; \
61 *(__markers) \ 77 *(__markers) \
62 VMLINUX_SYMBOL(__stop___markers) = .; \ 78 VMLINUX_SYMBOL(__stop___markers) = .; \
79 . = ALIGN(32); \
63 VMLINUX_SYMBOL(__start___tracepoints) = .; \ 80 VMLINUX_SYMBOL(__start___tracepoints) = .; \
64 *(__tracepoints) \ 81 *(__tracepoints) \
65 VMLINUX_SYMBOL(__stop___tracepoints) = .; 82 VMLINUX_SYMBOL(__stop___tracepoints) = .; \
83 LIKELY_PROFILE() \
84 BRANCH_PROFILE()
66 85
67#define RO_DATA(align) \ 86#define RO_DATA(align) \
68 . = ALIGN((align)); \ 87 . = ALIGN((align)); \
@@ -269,6 +288,16 @@
269 *(.kprobes.text) \ 288 *(.kprobes.text) \
270 VMLINUX_SYMBOL(__kprobes_text_end) = .; 289 VMLINUX_SYMBOL(__kprobes_text_end) = .;
271 290
291#ifdef CONFIG_FUNCTION_GRAPH_TRACER
292#define IRQENTRY_TEXT \
293 ALIGN_FUNCTION(); \
294 VMLINUX_SYMBOL(__irqentry_text_start) = .; \
295 *(.irqentry.text) \
296 VMLINUX_SYMBOL(__irqentry_text_end) = .;
297#else
298#define IRQENTRY_TEXT
299#endif
300
272/* Section used for early init (in .S files) */ 301/* Section used for early init (in .S files) */
273#define HEAD_TEXT *(.head.text) 302#define HEAD_TEXT *(.head.text)
274 303
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index bdf505d33e77..1dba3493d520 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -160,7 +160,6 @@ struct blk_trace {
160 160
161extern int blk_trace_ioctl(struct block_device *, unsigned, char __user *); 161extern int blk_trace_ioctl(struct block_device *, unsigned, char __user *);
162extern void blk_trace_shutdown(struct request_queue *); 162extern void blk_trace_shutdown(struct request_queue *);
163extern void __blk_add_trace(struct blk_trace *, sector_t, int, int, u32, int, int, void *);
164extern int do_blk_trace_setup(struct request_queue *q, 163extern int do_blk_trace_setup(struct request_queue *q,
165 char *name, dev_t dev, struct blk_user_trace_setup *buts); 164 char *name, dev_t dev, struct blk_user_trace_setup *buts);
166extern void __trace_note_message(struct blk_trace *, const char *fmt, ...); 165extern void __trace_note_message(struct blk_trace *, const char *fmt, ...);
@@ -186,168 +185,8 @@ extern void __trace_note_message(struct blk_trace *, const char *fmt, ...);
186 } while (0) 185 } while (0)
187#define BLK_TN_MAX_MSG 128 186#define BLK_TN_MAX_MSG 128
188 187
189/** 188extern void blk_add_driver_data(struct request_queue *q, struct request *rq,
190 * blk_add_trace_rq - Add a trace for a request oriented action 189 void *data, size_t len);
191 * @q: queue the io is for
192 * @rq: the source request
193 * @what: the action
194 *
195 * Description:
196 * Records an action against a request. Will log the bio offset + size.
197 *
198 **/
199static inline void blk_add_trace_rq(struct request_queue *q, struct request *rq,
200 u32 what)
201{
202 struct blk_trace *bt = q->blk_trace;
203 int rw = rq->cmd_flags & 0x03;
204
205 if (likely(!bt))
206 return;
207
208 if (blk_discard_rq(rq))
209 rw |= (1 << BIO_RW_DISCARD);
210
211 if (blk_pc_request(rq)) {
212 what |= BLK_TC_ACT(BLK_TC_PC);
213 __blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors, sizeof(rq->cmd), rq->cmd);
214 } else {
215 what |= BLK_TC_ACT(BLK_TC_FS);
216 __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9, rw, what, rq->errors, 0, NULL);
217 }
218}
219
220/**
221 * blk_add_trace_bio - Add a trace for a bio oriented action
222 * @q: queue the io is for
223 * @bio: the source bio
224 * @what: the action
225 *
226 * Description:
227 * Records an action against a bio. Will log the bio offset + size.
228 *
229 **/
230static inline void blk_add_trace_bio(struct request_queue *q, struct bio *bio,
231 u32 what)
232{
233 struct blk_trace *bt = q->blk_trace;
234
235 if (likely(!bt))
236 return;
237
238 __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what, !bio_flagged(bio, BIO_UPTODATE), 0, NULL);
239}
240
241/**
242 * blk_add_trace_generic - Add a trace for a generic action
243 * @q: queue the io is for
244 * @bio: the source bio
245 * @rw: the data direction
246 * @what: the action
247 *
248 * Description:
249 * Records a simple trace
250 *
251 **/
252static inline void blk_add_trace_generic(struct request_queue *q,
253 struct bio *bio, int rw, u32 what)
254{
255 struct blk_trace *bt = q->blk_trace;
256
257 if (likely(!bt))
258 return;
259
260 if (bio)
261 blk_add_trace_bio(q, bio, what);
262 else
263 __blk_add_trace(bt, 0, 0, rw, what, 0, 0, NULL);
264}
265
266/**
267 * blk_add_trace_pdu_int - Add a trace for a bio with an integer payload
268 * @q: queue the io is for
269 * @what: the action
270 * @bio: the source bio
271 * @pdu: the integer payload
272 *
273 * Description:
274 * Adds a trace with some integer payload. This might be an unplug
275 * option given as the action, with the depth at unplug time given
276 * as the payload
277 *
278 **/
279static inline void blk_add_trace_pdu_int(struct request_queue *q, u32 what,
280 struct bio *bio, unsigned int pdu)
281{
282 struct blk_trace *bt = q->blk_trace;
283 __be64 rpdu = cpu_to_be64(pdu);
284
285 if (likely(!bt))
286 return;
287
288 if (bio)
289 __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what, !bio_flagged(bio, BIO_UPTODATE), sizeof(rpdu), &rpdu);
290 else
291 __blk_add_trace(bt, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu);
292}
293
294/**
295 * blk_add_trace_remap - Add a trace for a remap operation
296 * @q: queue the io is for
297 * @bio: the source bio
298 * @dev: target device
299 * @from: source sector
300 * @to: target sector
301 *
302 * Description:
303 * Device mapper or raid target sometimes need to split a bio because
304 * it spans a stripe (or similar). Add a trace for that action.
305 *
306 **/
307static inline void blk_add_trace_remap(struct request_queue *q, struct bio *bio,
308 dev_t dev, sector_t from, sector_t to)
309{
310 struct blk_trace *bt = q->blk_trace;
311 struct blk_io_trace_remap r;
312
313 if (likely(!bt))
314 return;
315
316 r.device = cpu_to_be32(dev);
317 r.device_from = cpu_to_be32(bio->bi_bdev->bd_dev);
318 r.sector = cpu_to_be64(to);
319
320 __blk_add_trace(bt, from, bio->bi_size, bio->bi_rw, BLK_TA_REMAP, !bio_flagged(bio, BIO_UPTODATE), sizeof(r), &r);
321}
322
323/**
324 * blk_add_driver_data - Add binary message with driver-specific data
325 * @q: queue the io is for
326 * @rq: io request
327 * @data: driver-specific data
328 * @len: length of driver-specific data
329 *
330 * Description:
331 * Some drivers might want to write driver-specific data per request.
332 *
333 **/
334static inline void blk_add_driver_data(struct request_queue *q,
335 struct request *rq,
336 void *data, size_t len)
337{
338 struct blk_trace *bt = q->blk_trace;
339
340 if (likely(!bt))
341 return;
342
343 if (blk_pc_request(rq))
344 __blk_add_trace(bt, 0, rq->data_len, 0, BLK_TA_DRV_DATA,
345 rq->errors, len, data);
346 else
347 __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9,
348 0, BLK_TA_DRV_DATA, rq->errors, len, data);
349}
350
351extern int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, 190extern int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
352 char __user *arg); 191 char __user *arg);
353extern int blk_trace_startstop(struct request_queue *q, int start); 192extern int blk_trace_startstop(struct request_queue *q, int start);
@@ -356,13 +195,8 @@ extern int blk_trace_remove(struct request_queue *q);
356#else /* !CONFIG_BLK_DEV_IO_TRACE */ 195#else /* !CONFIG_BLK_DEV_IO_TRACE */
357#define blk_trace_ioctl(bdev, cmd, arg) (-ENOTTY) 196#define blk_trace_ioctl(bdev, cmd, arg) (-ENOTTY)
358#define blk_trace_shutdown(q) do { } while (0) 197#define blk_trace_shutdown(q) do { } while (0)
359#define blk_add_trace_rq(q, rq, what) do { } while (0)
360#define blk_add_trace_bio(q, rq, what) do { } while (0)
361#define blk_add_trace_generic(q, rq, rw, what) do { } while (0)
362#define blk_add_trace_pdu_int(q, what, bio, pdu) do { } while (0)
363#define blk_add_trace_remap(q, bio, dev, f, t) do {} while (0)
364#define blk_add_driver_data(q, rq, data, len) do {} while (0)
365#define do_blk_trace_setup(q, name, dev, buts) (-ENOTTY) 198#define do_blk_trace_setup(q, name, dev, buts) (-ENOTTY)
199#define blk_add_driver_data(q, rq, data, len) do {} while (0)
366#define blk_trace_setup(q, name, dev, arg) (-ENOTTY) 200#define blk_trace_setup(q, name, dev, arg) (-ENOTTY)
367#define blk_trace_startstop(q, start) (-ENOTTY) 201#define blk_trace_startstop(q, start) (-ENOTTY)
368#define blk_trace_remove(q) (-ENOTTY) 202#define blk_trace_remove(q) (-ENOTTY)
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 98115d9d04da..ea7c6be354b7 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -59,8 +59,88 @@ extern void __chk_io_ptr(const volatile void __iomem *);
59 * specific implementations come from the above header files 59 * specific implementations come from the above header files
60 */ 60 */
61 61
62#define likely(x) __builtin_expect(!!(x), 1) 62struct ftrace_branch_data {
63#define unlikely(x) __builtin_expect(!!(x), 0) 63 const char *func;
64 const char *file;
65 unsigned line;
66 union {
67 struct {
68 unsigned long correct;
69 unsigned long incorrect;
70 };
71 struct {
72 unsigned long miss;
73 unsigned long hit;
74 };
75 };
76};
77
78/*
79 * Note: DISABLE_BRANCH_PROFILING can be used by special lowlevel code
80 * to disable branch tracing on a per file basis.
81 */
82#if defined(CONFIG_TRACE_BRANCH_PROFILING) && !defined(DISABLE_BRANCH_PROFILING)
83void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
84
85#define likely_notrace(x) __builtin_expect(!!(x), 1)
86#define unlikely_notrace(x) __builtin_expect(!!(x), 0)
87
88#define __branch_check__(x, expect) ({ \
89 int ______r; \
90 static struct ftrace_branch_data \
91 __attribute__((__aligned__(4))) \
92 __attribute__((section("_ftrace_annotated_branch"))) \
93 ______f = { \
94 .func = __func__, \
95 .file = __FILE__, \
96 .line = __LINE__, \
97 }; \
98 ______r = likely_notrace(x); \
99 ftrace_likely_update(&______f, ______r, expect); \
100 ______r; \
101 })
102
103/*
104 * Using __builtin_constant_p(x) to ignore cases where the return
105 * value is always the same. This idea is taken from a similar patch
106 * written by Daniel Walker.
107 */
108# ifndef likely
109# define likely(x) (__builtin_constant_p(x) ? !!(x) : __branch_check__(x, 1))
110# endif
111# ifndef unlikely
112# define unlikely(x) (__builtin_constant_p(x) ? !!(x) : __branch_check__(x, 0))
113# endif
114
115#ifdef CONFIG_PROFILE_ALL_BRANCHES
116/*
117 * "Define 'is'", Bill Clinton
118 * "Define 'if'", Steven Rostedt
119 */
120#define if(cond) if (__builtin_constant_p((cond)) ? !!(cond) : \
121 ({ \
122 int ______r; \
123 static struct ftrace_branch_data \
124 __attribute__((__aligned__(4))) \
125 __attribute__((section("_ftrace_branch"))) \
126 ______f = { \
127 .func = __func__, \
128 .file = __FILE__, \
129 .line = __LINE__, \
130 }; \
131 ______r = !!(cond); \
132 if (______r) \
133 ______f.hit++; \
134 else \
135 ______f.miss++; \
136 ______r; \
137 }))
138#endif /* CONFIG_PROFILE_ALL_BRANCHES */
139
140#else
141# define likely(x) __builtin_expect(!!(x), 1)
142# define unlikely(x) __builtin_expect(!!(x), 0)
143#endif
64 144
65/* Optimization barrier */ 145/* Optimization barrier */
66#ifndef barrier 146#ifndef barrier
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 9c5bc6be2b09..677432b9cb7e 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -8,6 +8,8 @@
8#include <linux/types.h> 8#include <linux/types.h>
9#include <linux/module.h> 9#include <linux/module.h>
10#include <linux/kallsyms.h> 10#include <linux/kallsyms.h>
11#include <linux/bitops.h>
12#include <linux/sched.h>
11 13
12#ifdef CONFIG_FUNCTION_TRACER 14#ifdef CONFIG_FUNCTION_TRACER
13 15
@@ -24,6 +26,45 @@ struct ftrace_ops {
24 struct ftrace_ops *next; 26 struct ftrace_ops *next;
25}; 27};
26 28
29extern int function_trace_stop;
30
31/*
32 * Type of the current tracing.
33 */
34enum ftrace_tracing_type_t {
35 FTRACE_TYPE_ENTER = 0, /* Hook the call of the function */
36 FTRACE_TYPE_RETURN, /* Hook the return of the function */
37};
38
39/* Current tracing type, default is FTRACE_TYPE_ENTER */
40extern enum ftrace_tracing_type_t ftrace_tracing_type;
41
42/**
43 * ftrace_stop - stop function tracer.
44 *
45 * A quick way to stop the function tracer. Note this an on off switch,
46 * it is not something that is recursive like preempt_disable.
47 * This does not disable the calling of mcount, it only stops the
48 * calling of functions from mcount.
49 */
50static inline void ftrace_stop(void)
51{
52 function_trace_stop = 1;
53}
54
55/**
56 * ftrace_start - start the function tracer.
57 *
58 * This function is the inverse of ftrace_stop. This does not enable
59 * the function tracing if the function tracer is disabled. This only
60 * sets the function tracer flag to continue calling the functions
61 * from mcount.
62 */
63static inline void ftrace_start(void)
64{
65 function_trace_stop = 0;
66}
67
27/* 68/*
28 * The ftrace_ops must be a static and should also 69 * The ftrace_ops must be a static and should also
29 * be read_mostly. These functions do modify read_mostly variables 70 * be read_mostly. These functions do modify read_mostly variables
@@ -42,9 +83,21 @@ extern void ftrace_stub(unsigned long a0, unsigned long a1);
42# define unregister_ftrace_function(ops) do { } while (0) 83# define unregister_ftrace_function(ops) do { } while (0)
43# define clear_ftrace_function(ops) do { } while (0) 84# define clear_ftrace_function(ops) do { } while (0)
44static inline void ftrace_kill(void) { } 85static inline void ftrace_kill(void) { }
86static inline void ftrace_stop(void) { }
87static inline void ftrace_start(void) { }
45#endif /* CONFIG_FUNCTION_TRACER */ 88#endif /* CONFIG_FUNCTION_TRACER */
46 89
90#ifdef CONFIG_STACK_TRACER
91extern int stack_tracer_enabled;
92int
93stack_trace_sysctl(struct ctl_table *table, int write,
94 struct file *file, void __user *buffer, size_t *lenp,
95 loff_t *ppos);
96#endif
97
47#ifdef CONFIG_DYNAMIC_FTRACE 98#ifdef CONFIG_DYNAMIC_FTRACE
99/* asm/ftrace.h must be defined for archs supporting dynamic ftrace */
100#include <asm/ftrace.h>
48 101
49enum { 102enum {
50 FTRACE_FL_FREE = (1 << 0), 103 FTRACE_FL_FREE = (1 << 0),
@@ -60,6 +113,7 @@ struct dyn_ftrace {
60 struct list_head list; 113 struct list_head list;
61 unsigned long ip; /* address of mcount call-site */ 114 unsigned long ip; /* address of mcount call-site */
62 unsigned long flags; 115 unsigned long flags;
116 struct dyn_arch_ftrace arch;
63}; 117};
64 118
65int ftrace_force_update(void); 119int ftrace_force_update(void);
@@ -67,19 +121,48 @@ void ftrace_set_filter(unsigned char *buf, int len, int reset);
67 121
68/* defined in arch */ 122/* defined in arch */
69extern int ftrace_ip_converted(unsigned long ip); 123extern int ftrace_ip_converted(unsigned long ip);
70extern unsigned char *ftrace_nop_replace(void);
71extern unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr);
72extern int ftrace_dyn_arch_init(void *data); 124extern int ftrace_dyn_arch_init(void *data);
73extern int ftrace_update_ftrace_func(ftrace_func_t func); 125extern int ftrace_update_ftrace_func(ftrace_func_t func);
74extern void ftrace_caller(void); 126extern void ftrace_caller(void);
75extern void ftrace_call(void); 127extern void ftrace_call(void);
76extern void mcount_call(void); 128extern void mcount_call(void);
129#ifdef CONFIG_FUNCTION_GRAPH_TRACER
130extern void ftrace_graph_caller(void);
131extern int ftrace_enable_ftrace_graph_caller(void);
132extern int ftrace_disable_ftrace_graph_caller(void);
133#else
134static inline int ftrace_enable_ftrace_graph_caller(void) { return 0; }
135static inline int ftrace_disable_ftrace_graph_caller(void) { return 0; }
136#endif
137
138/**
139 * ftrace_make_nop - convert code into top
140 * @mod: module structure if called by module load initialization
141 * @rec: the mcount call site record
142 * @addr: the address that the call site should be calling
143 *
144 * This is a very sensitive operation and great care needs
145 * to be taken by the arch. The operation should carefully
146 * read the location, check to see if what is read is indeed
147 * what we expect it to be, and then on success of the compare,
148 * it should write to the location.
149 *
150 * The code segment at @rec->ip should be a caller to @addr
151 *
152 * Return must be:
153 * 0 on success
154 * -EFAULT on error reading the location
155 * -EINVAL on a failed compare of the contents
156 * -EPERM on error writing to the location
157 * Any other value will be considered a failure.
158 */
159extern int ftrace_make_nop(struct module *mod,
160 struct dyn_ftrace *rec, unsigned long addr);
77 161
78/** 162/**
79 * ftrace_modify_code - modify code segment 163 * ftrace_make_call - convert a nop call site into a call to addr
80 * @ip: the address of the code segment 164 * @rec: the mcount call site record
81 * @old_code: the contents of what is expected to be there 165 * @addr: the address that the call site should call
82 * @new_code: the code to patch in
83 * 166 *
84 * This is a very sensitive operation and great care needs 167 * This is a very sensitive operation and great care needs
85 * to be taken by the arch. The operation should carefully 168 * to be taken by the arch. The operation should carefully
@@ -87,6 +170,8 @@ extern void mcount_call(void);
87 * what we expect it to be, and then on success of the compare, 170 * what we expect it to be, and then on success of the compare,
88 * it should write to the location. 171 * it should write to the location.
89 * 172 *
173 * The code segment at @rec->ip should be a nop
174 *
90 * Return must be: 175 * Return must be:
91 * 0 on success 176 * 0 on success
92 * -EFAULT on error reading the location 177 * -EFAULT on error reading the location
@@ -94,8 +179,11 @@ extern void mcount_call(void);
94 * -EPERM on error writing to the location 179 * -EPERM on error writing to the location
95 * Any other value will be considered a failure. 180 * Any other value will be considered a failure.
96 */ 181 */
97extern int ftrace_modify_code(unsigned long ip, unsigned char *old_code, 182extern int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr);
98 unsigned char *new_code); 183
184
185/* May be defined in arch */
186extern int ftrace_arch_read_dyn_info(char *buf, int size);
99 187
100extern int skip_trace(unsigned long ip); 188extern int skip_trace(unsigned long ip);
101 189
@@ -103,7 +191,6 @@ extern void ftrace_release(void *start, unsigned long size);
103 191
104extern void ftrace_disable_daemon(void); 192extern void ftrace_disable_daemon(void);
105extern void ftrace_enable_daemon(void); 193extern void ftrace_enable_daemon(void);
106
107#else 194#else
108# define skip_trace(ip) ({ 0; }) 195# define skip_trace(ip) ({ 0; })
109# define ftrace_force_update() ({ 0; }) 196# define ftrace_force_update() ({ 0; })
@@ -182,6 +269,12 @@ static inline void __ftrace_enabled_restore(int enabled)
182#endif 269#endif
183 270
184#ifdef CONFIG_TRACING 271#ifdef CONFIG_TRACING
272extern int ftrace_dump_on_oops;
273
274extern void tracing_start(void);
275extern void tracing_stop(void);
276extern void ftrace_off_permanent(void);
277
185extern void 278extern void
186ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3); 279ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3);
187 280
@@ -210,8 +303,11 @@ extern void ftrace_dump(void);
210static inline void 303static inline void
211ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { } 304ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { }
212static inline int 305static inline int
213ftrace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 0))); 306ftrace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 2)));
214 307
308static inline void tracing_start(void) { }
309static inline void tracing_stop(void) { }
310static inline void ftrace_off_permanent(void) { }
215static inline int 311static inline int
216ftrace_printk(const char *fmt, ...) 312ftrace_printk(const char *fmt, ...)
217{ 313{
@@ -222,33 +318,178 @@ static inline void ftrace_dump(void) { }
222 318
223#ifdef CONFIG_FTRACE_MCOUNT_RECORD 319#ifdef CONFIG_FTRACE_MCOUNT_RECORD
224extern void ftrace_init(void); 320extern void ftrace_init(void);
225extern void ftrace_init_module(unsigned long *start, unsigned long *end); 321extern void ftrace_init_module(struct module *mod,
322 unsigned long *start, unsigned long *end);
226#else 323#else
227static inline void ftrace_init(void) { } 324static inline void ftrace_init(void) { }
228static inline void 325static inline void
229ftrace_init_module(unsigned long *start, unsigned long *end) { } 326ftrace_init_module(struct module *mod,
327 unsigned long *start, unsigned long *end) { }
328#endif
329
330enum {
331 POWER_NONE = 0,
332 POWER_CSTATE = 1,
333 POWER_PSTATE = 2,
334};
335
336struct power_trace {
337#ifdef CONFIG_POWER_TRACER
338 ktime_t stamp;
339 ktime_t end;
340 int type;
341 int state;
230#endif 342#endif
343};
231 344
345#ifdef CONFIG_POWER_TRACER
346extern void trace_power_start(struct power_trace *it, unsigned int type,
347 unsigned int state);
348extern void trace_power_mark(struct power_trace *it, unsigned int type,
349 unsigned int state);
350extern void trace_power_end(struct power_trace *it);
351#else
352static inline void trace_power_start(struct power_trace *it, unsigned int type,
353 unsigned int state) { }
354static inline void trace_power_mark(struct power_trace *it, unsigned int type,
355 unsigned int state) { }
356static inline void trace_power_end(struct power_trace *it) { }
357#endif
358
359
360/*
361 * Structure that defines an entry function trace.
362 */
363struct ftrace_graph_ent {
364 unsigned long func; /* Current function */
365 int depth;
366};
232 367
233struct boot_trace { 368/*
234 pid_t caller; 369 * Structure that defines a return function trace.
235 char func[KSYM_SYMBOL_LEN]; 370 */
236 int result; 371struct ftrace_graph_ret {
237 unsigned long long duration; /* usecs */ 372 unsigned long func; /* Current function */
238 ktime_t calltime; 373 unsigned long long calltime;
239 ktime_t rettime; 374 unsigned long long rettime;
375 /* Number of functions that overran the depth limit for current task */
376 unsigned long overrun;
377 int depth;
240}; 378};
241 379
242#ifdef CONFIG_BOOT_TRACER 380#ifdef CONFIG_FUNCTION_GRAPH_TRACER
243extern void trace_boot(struct boot_trace *it, initcall_t fn); 381
244extern void start_boot_trace(void); 382/*
245extern void stop_boot_trace(void); 383 * Sometimes we don't want to trace a function with the function
384 * graph tracer but we want them to keep traced by the usual function
385 * tracer if the function graph tracer is not configured.
386 */
387#define __notrace_funcgraph notrace
388
389/*
390 * We want to which function is an entrypoint of a hardirq.
391 * That will help us to put a signal on output.
392 */
393#define __irq_entry __attribute__((__section__(".irqentry.text")))
394
395/* Limits of hardirq entrypoints */
396extern char __irqentry_text_start[];
397extern char __irqentry_text_end[];
398
399#define FTRACE_RETFUNC_DEPTH 50
400#define FTRACE_RETSTACK_ALLOC_SIZE 32
401/* Type of the callback handlers for tracing function graph*/
402typedef void (*trace_func_graph_ret_t)(struct ftrace_graph_ret *); /* return */
403typedef int (*trace_func_graph_ent_t)(struct ftrace_graph_ent *); /* entry */
404
405extern int register_ftrace_graph(trace_func_graph_ret_t retfunc,
406 trace_func_graph_ent_t entryfunc);
407
408extern void ftrace_graph_stop(void);
409
410/* The current handlers in use */
411extern trace_func_graph_ret_t ftrace_graph_return;
412extern trace_func_graph_ent_t ftrace_graph_entry;
413
414extern void unregister_ftrace_graph(void);
415
416extern void ftrace_graph_init_task(struct task_struct *t);
417extern void ftrace_graph_exit_task(struct task_struct *t);
418
419static inline int task_curr_ret_stack(struct task_struct *t)
420{
421 return t->curr_ret_stack;
422}
423
424static inline void pause_graph_tracing(void)
425{
426 atomic_inc(&current->tracing_graph_pause);
427}
428
429static inline void unpause_graph_tracing(void)
430{
431 atomic_dec(&current->tracing_graph_pause);
432}
246#else 433#else
247static inline void trace_boot(struct boot_trace *it, initcall_t fn) { } 434
248static inline void start_boot_trace(void) { } 435#define __notrace_funcgraph
249static inline void stop_boot_trace(void) { } 436#define __irq_entry
437
438static inline void ftrace_graph_init_task(struct task_struct *t) { }
439static inline void ftrace_graph_exit_task(struct task_struct *t) { }
440
441static inline int task_curr_ret_stack(struct task_struct *tsk)
442{
443 return -1;
444}
445
446static inline void pause_graph_tracing(void) { }
447static inline void unpause_graph_tracing(void) { }
250#endif 448#endif
251 449
450#ifdef CONFIG_TRACING
451#include <linux/sched.h>
452
453/* flags for current->trace */
454enum {
455 TSK_TRACE_FL_TRACE_BIT = 0,
456 TSK_TRACE_FL_GRAPH_BIT = 1,
457};
458enum {
459 TSK_TRACE_FL_TRACE = 1 << TSK_TRACE_FL_TRACE_BIT,
460 TSK_TRACE_FL_GRAPH = 1 << TSK_TRACE_FL_GRAPH_BIT,
461};
462
463static inline void set_tsk_trace_trace(struct task_struct *tsk)
464{
465 set_bit(TSK_TRACE_FL_TRACE_BIT, &tsk->trace);
466}
467
468static inline void clear_tsk_trace_trace(struct task_struct *tsk)
469{
470 clear_bit(TSK_TRACE_FL_TRACE_BIT, &tsk->trace);
471}
472
473static inline int test_tsk_trace_trace(struct task_struct *tsk)
474{
475 return tsk->trace & TSK_TRACE_FL_TRACE;
476}
477
478static inline void set_tsk_trace_graph(struct task_struct *tsk)
479{
480 set_bit(TSK_TRACE_FL_GRAPH_BIT, &tsk->trace);
481}
482
483static inline void clear_tsk_trace_graph(struct task_struct *tsk)
484{
485 clear_bit(TSK_TRACE_FL_GRAPH_BIT, &tsk->trace);
486}
487
488static inline int test_tsk_trace_graph(struct task_struct *tsk)
489{
490 return tsk->trace & TSK_TRACE_FL_GRAPH;
491}
252 492
493#endif /* CONFIG_TRACING */
253 494
254#endif /* _LINUX_FTRACE_H */ 495#endif /* _LINUX_FTRACE_H */
diff --git a/include/linux/ftrace_irq.h b/include/linux/ftrace_irq.h
new file mode 100644
index 000000000000..366a054d0b05
--- /dev/null
+++ b/include/linux/ftrace_irq.h
@@ -0,0 +1,13 @@
1#ifndef _LINUX_FTRACE_IRQ_H
2#define _LINUX_FTRACE_IRQ_H
3
4
5#if defined(CONFIG_DYNAMIC_FTRACE) || defined(CONFIG_FUNCTION_GRAPH_TRACER)
6extern void ftrace_nmi_enter(void);
7extern void ftrace_nmi_exit(void);
8#else
9static inline void ftrace_nmi_enter(void) { }
10static inline void ftrace_nmi_exit(void) { }
11#endif
12
13#endif /* _LINUX_FTRACE_IRQ_H */
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index 181006cc94a0..89a56d79e4c6 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -4,6 +4,7 @@
4#include <linux/preempt.h> 4#include <linux/preempt.h>
5#include <linux/smp_lock.h> 5#include <linux/smp_lock.h>
6#include <linux/lockdep.h> 6#include <linux/lockdep.h>
7#include <linux/ftrace_irq.h>
7#include <asm/hardirq.h> 8#include <asm/hardirq.h>
8#include <asm/system.h> 9#include <asm/system.h>
9 10
@@ -161,7 +162,17 @@ extern void irq_enter(void);
161 */ 162 */
162extern void irq_exit(void); 163extern void irq_exit(void);
163 164
164#define nmi_enter() do { lockdep_off(); __irq_enter(); } while (0) 165#define nmi_enter() \
165#define nmi_exit() do { __irq_exit(); lockdep_on(); } while (0) 166 do { \
167 ftrace_nmi_enter(); \
168 lockdep_off(); \
169 __irq_enter(); \
170 } while (0)
171#define nmi_exit() \
172 do { \
173 __irq_exit(); \
174 lockdep_on(); \
175 ftrace_nmi_exit(); \
176 } while (0)
166 177
167#endif /* LINUX_HARDIRQ_H */ 178#endif /* LINUX_HARDIRQ_H */
diff --git a/include/linux/marker.h b/include/linux/marker.h
index 889196c7fbb1..b85e74ca782f 100644
--- a/include/linux/marker.h
+++ b/include/linux/marker.h
@@ -12,6 +12,7 @@
12 * See the file COPYING for more details. 12 * See the file COPYING for more details.
13 */ 13 */
14 14
15#include <stdarg.h>
15#include <linux/types.h> 16#include <linux/types.h>
16 17
17struct module; 18struct module;
@@ -48,10 +49,28 @@ struct marker {
48 void (*call)(const struct marker *mdata, void *call_private, ...); 49 void (*call)(const struct marker *mdata, void *call_private, ...);
49 struct marker_probe_closure single; 50 struct marker_probe_closure single;
50 struct marker_probe_closure *multi; 51 struct marker_probe_closure *multi;
52 const char *tp_name; /* Optional tracepoint name */
53 void *tp_cb; /* Optional tracepoint callback */
51} __attribute__((aligned(8))); 54} __attribute__((aligned(8)));
52 55
53#ifdef CONFIG_MARKERS 56#ifdef CONFIG_MARKERS
54 57
58#define _DEFINE_MARKER(name, tp_name_str, tp_cb, format) \
59 static const char __mstrtab_##name[] \
60 __attribute__((section("__markers_strings"))) \
61 = #name "\0" format; \
62 static struct marker __mark_##name \
63 __attribute__((section("__markers"), aligned(8))) = \
64 { __mstrtab_##name, &__mstrtab_##name[sizeof(#name)], \
65 0, 0, marker_probe_cb, { __mark_empty_function, NULL},\
66 NULL, tp_name_str, tp_cb }
67
68#define DEFINE_MARKER(name, format) \
69 _DEFINE_MARKER(name, NULL, NULL, format)
70
71#define DEFINE_MARKER_TP(name, tp_name, tp_cb, format) \
72 _DEFINE_MARKER(name, #tp_name, tp_cb, format)
73
55/* 74/*
56 * Note : the empty asm volatile with read constraint is used here instead of a 75 * Note : the empty asm volatile with read constraint is used here instead of a
57 * "used" attribute to fix a gcc 4.1.x bug. 76 * "used" attribute to fix a gcc 4.1.x bug.
@@ -65,14 +84,7 @@ struct marker {
65 */ 84 */
66#define __trace_mark(generic, name, call_private, format, args...) \ 85#define __trace_mark(generic, name, call_private, format, args...) \
67 do { \ 86 do { \
68 static const char __mstrtab_##name[] \ 87 DEFINE_MARKER(name, format); \
69 __attribute__((section("__markers_strings"))) \
70 = #name "\0" format; \
71 static struct marker __mark_##name \
72 __attribute__((section("__markers"), aligned(8))) = \
73 { __mstrtab_##name, &__mstrtab_##name[sizeof(#name)], \
74 0, 0, marker_probe_cb, \
75 { __mark_empty_function, NULL}, NULL }; \
76 __mark_check_format(format, ## args); \ 88 __mark_check_format(format, ## args); \
77 if (unlikely(__mark_##name.state)) { \ 89 if (unlikely(__mark_##name.state)) { \
78 (*__mark_##name.call) \ 90 (*__mark_##name.call) \
@@ -80,14 +92,39 @@ struct marker {
80 } \ 92 } \
81 } while (0) 93 } while (0)
82 94
95#define __trace_mark_tp(name, call_private, tp_name, tp_cb, format, args...) \
96 do { \
97 void __check_tp_type(void) \
98 { \
99 register_trace_##tp_name(tp_cb); \
100 } \
101 DEFINE_MARKER_TP(name, tp_name, tp_cb, format); \
102 __mark_check_format(format, ## args); \
103 (*__mark_##name.call)(&__mark_##name, call_private, \
104 ## args); \
105 } while (0)
106
83extern void marker_update_probe_range(struct marker *begin, 107extern void marker_update_probe_range(struct marker *begin,
84 struct marker *end); 108 struct marker *end);
109
110#define GET_MARKER(name) (__mark_##name)
111
85#else /* !CONFIG_MARKERS */ 112#else /* !CONFIG_MARKERS */
113#define DEFINE_MARKER(name, tp_name, tp_cb, format)
86#define __trace_mark(generic, name, call_private, format, args...) \ 114#define __trace_mark(generic, name, call_private, format, args...) \
87 __mark_check_format(format, ## args) 115 __mark_check_format(format, ## args)
116#define __trace_mark_tp(name, call_private, tp_name, tp_cb, format, args...) \
117 do { \
118 void __check_tp_type(void) \
119 { \
120 register_trace_##tp_name(tp_cb); \
121 } \
122 __mark_check_format(format, ## args); \
123 } while (0)
88static inline void marker_update_probe_range(struct marker *begin, 124static inline void marker_update_probe_range(struct marker *begin,
89 struct marker *end) 125 struct marker *end)
90{ } 126{ }
127#define GET_MARKER(name)
91#endif /* CONFIG_MARKERS */ 128#endif /* CONFIG_MARKERS */
92 129
93/** 130/**
@@ -117,6 +154,20 @@ static inline void marker_update_probe_range(struct marker *begin,
117 __trace_mark(1, name, NULL, format, ## args) 154 __trace_mark(1, name, NULL, format, ## args)
118 155
119/** 156/**
157 * trace_mark_tp - Marker in a tracepoint callback
158 * @name: marker name, not quoted.
159 * @tp_name: tracepoint name, not quoted.
160 * @tp_cb: tracepoint callback. Should have an associated global symbol so it
161 * is not optimized away by the compiler (should not be static).
162 * @format: format string
163 * @args...: variable argument list
164 *
165 * Places a marker in a tracepoint callback.
166 */
167#define trace_mark_tp(name, tp_name, tp_cb, format, args...) \
168 __trace_mark_tp(name, NULL, tp_name, tp_cb, format, ## args)
169
170/**
120 * MARK_NOARGS - Format string for a marker with no argument. 171 * MARK_NOARGS - Format string for a marker with no argument.
121 */ 172 */
122#define MARK_NOARGS " " 173#define MARK_NOARGS " "
@@ -136,8 +187,6 @@ extern marker_probe_func __mark_empty_function;
136 187
137extern void marker_probe_cb(const struct marker *mdata, 188extern void marker_probe_cb(const struct marker *mdata,
138 void *call_private, ...); 189 void *call_private, ...);
139extern void marker_probe_cb_noarg(const struct marker *mdata,
140 void *call_private, ...);
141 190
142/* 191/*
143 * Connect a probe to a marker. 192 * Connect a probe to a marker.
@@ -162,8 +211,10 @@ extern void *marker_get_private_data(const char *name, marker_probe_func *probe,
162 211
163/* 212/*
164 * marker_synchronize_unregister must be called between the last marker probe 213 * marker_synchronize_unregister must be called between the last marker probe
165 * unregistration and the end of module exit to make sure there is no caller 214 * unregistration and the first one of
166 * executing a probe when it is freed. 215 * - the end of module exit function
216 * - the free of any resource used by the probes
217 * to ensure the code and data are valid for any possibly running probes.
167 */ 218 */
168#define marker_synchronize_unregister() synchronize_sched() 219#define marker_synchronize_unregister() synchronize_sched()
169 220
diff --git a/include/linux/mm.h b/include/linux/mm.h
index d3ddd735e375..aaa8b843be28 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1305,5 +1305,7 @@ int vmemmap_populate_basepages(struct page *start_page,
1305int vmemmap_populate(struct page *start_page, unsigned long pages, int node); 1305int vmemmap_populate(struct page *start_page, unsigned long pages, int node);
1306void vmemmap_populate_print_last(void); 1306void vmemmap_populate_print_last(void);
1307 1307
1308extern void *alloc_locked_buffer(size_t size);
1309extern void free_locked_buffer(void *buffer, size_t size);
1308#endif /* __KERNEL__ */ 1310#endif /* __KERNEL__ */
1309#endif /* _LINUX_MM_H */ 1311#endif /* _LINUX_MM_H */
diff --git a/include/linux/pid.h b/include/linux/pid.h
index d7e98ff8021e..bb206c56d1f0 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -147,9 +147,9 @@ pid_t pid_vnr(struct pid *pid);
147#define do_each_pid_task(pid, type, task) \ 147#define do_each_pid_task(pid, type, task) \
148 do { \ 148 do { \
149 struct hlist_node *pos___; \ 149 struct hlist_node *pos___; \
150 if (pid != NULL) \ 150 if ((pid) != NULL) \
151 hlist_for_each_entry_rcu((task), pos___, \ 151 hlist_for_each_entry_rcu((task), pos___, \
152 &pid->tasks[type], pids[type].node) { 152 &(pid)->tasks[type], pids[type].node) {
153 153
154 /* 154 /*
155 * Both old and new leaders may be attached to 155 * Both old and new leaders may be attached to
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index 22641d5d45df..98b93ca4db06 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -94,6 +94,7 @@ extern void ptrace_notify(int exit_code);
94extern void __ptrace_link(struct task_struct *child, 94extern void __ptrace_link(struct task_struct *child,
95 struct task_struct *new_parent); 95 struct task_struct *new_parent);
96extern void __ptrace_unlink(struct task_struct *child); 96extern void __ptrace_unlink(struct task_struct *child);
97extern void ptrace_fork(struct task_struct *task, unsigned long clone_flags);
97#define PTRACE_MODE_READ 1 98#define PTRACE_MODE_READ 1
98#define PTRACE_MODE_ATTACH 2 99#define PTRACE_MODE_ATTACH 2
99/* Returns 0 on success, -errno on denial. */ 100/* Returns 0 on success, -errno on denial. */
@@ -313,6 +314,27 @@ static inline void user_enable_block_step(struct task_struct *task)
313#define arch_ptrace_stop(code, info) do { } while (0) 314#define arch_ptrace_stop(code, info) do { } while (0)
314#endif 315#endif
315 316
317#ifndef arch_ptrace_untrace
318/*
319 * Do machine-specific work before untracing child.
320 *
321 * This is called for a normal detach as well as from ptrace_exit()
322 * when the tracing task dies.
323 *
324 * Called with write_lock(&tasklist_lock) held.
325 */
326#define arch_ptrace_untrace(task) do { } while (0)
327#endif
328
329#ifndef arch_ptrace_fork
330/*
331 * Do machine-specific work to initialize a new task.
332 *
333 * This is called from copy_process().
334 */
335#define arch_ptrace_fork(child, clone_flags) do { } while (0)
336#endif
337
316extern int task_current_syscall(struct task_struct *target, long *callno, 338extern int task_current_syscall(struct task_struct *target, long *callno,
317 unsigned long args[6], unsigned int maxargs, 339 unsigned long args[6], unsigned int maxargs,
318 unsigned long *sp, unsigned long *pc); 340 unsigned long *sp, unsigned long *pc);
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 86f1f5e43e33..895dc9c1088c 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -142,6 +142,7 @@ struct rcu_head {
142 * on the write-side to insure proper synchronization. 142 * on the write-side to insure proper synchronization.
143 */ 143 */
144#define rcu_read_lock_sched() preempt_disable() 144#define rcu_read_lock_sched() preempt_disable()
145#define rcu_read_lock_sched_notrace() preempt_disable_notrace()
145 146
146/* 147/*
147 * rcu_read_unlock_sched - marks the end of a RCU-classic critical section 148 * rcu_read_unlock_sched - marks the end of a RCU-classic critical section
@@ -149,6 +150,7 @@ struct rcu_head {
149 * See rcu_read_lock_sched for more information. 150 * See rcu_read_lock_sched for more information.
150 */ 151 */
151#define rcu_read_unlock_sched() preempt_enable() 152#define rcu_read_unlock_sched() preempt_enable()
153#define rcu_read_unlock_sched_notrace() preempt_enable_notrace()
152 154
153 155
154 156
diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index e097c2e6b6dc..d363467c8f13 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -28,17 +28,19 @@ struct ring_buffer_event {
28 * size = 8 bytes 28 * size = 8 bytes
29 * 29 *
30 * @RINGBUF_TYPE_TIME_STAMP: Sync time stamp with external clock 30 * @RINGBUF_TYPE_TIME_STAMP: Sync time stamp with external clock
31 * array[0] = tv_nsec 31 * array[0] = tv_nsec
32 * array[1] = tv_sec 32 * array[1..2] = tv_sec
33 * size = 16 bytes 33 * size = 16 bytes
34 * 34 *
35 * @RINGBUF_TYPE_DATA: Data record 35 * @RINGBUF_TYPE_DATA: Data record
36 * If len is zero: 36 * If len is zero:
37 * array[0] holds the actual length 37 * array[0] holds the actual length
38 * array[1..(length+3)/4-1] holds data 38 * array[1..(length+3)/4] holds data
39 * size = 4 + 4 + length (bytes)
39 * else 40 * else
40 * length = len << 2 41 * length = len << 2
41 * array[0..(length+3)/4] holds data 42 * array[0..(length+3)/4-1] holds data
43 * size = 4 + length (bytes)
42 */ 44 */
43enum ring_buffer_type { 45enum ring_buffer_type {
44 RINGBUF_TYPE_PADDING, 46 RINGBUF_TYPE_PADDING,
@@ -122,6 +124,12 @@ void ring_buffer_normalize_time_stamp(int cpu, u64 *ts);
122 124
123void tracing_on(void); 125void tracing_on(void);
124void tracing_off(void); 126void tracing_off(void);
127void tracing_off_permanent(void);
128
129void *ring_buffer_alloc_read_page(struct ring_buffer *buffer);
130void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data);
131int ring_buffer_read_page(struct ring_buffer *buffer,
132 void **data_page, int cpu, int full);
125 133
126enum ring_buffer_flags { 134enum ring_buffer_flags {
127 RB_FL_OVERWRITE = 1 << 0, 135 RB_FL_OVERWRITE = 1 << 0,
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9624e2cfc2dc..0a1094d84b77 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -96,6 +96,7 @@ struct exec_domain;
96struct futex_pi_state; 96struct futex_pi_state;
97struct robust_list_head; 97struct robust_list_head;
98struct bio; 98struct bio;
99struct bts_tracer;
99 100
100/* 101/*
101 * List of flags we want to share for kernel threads, 102 * List of flags we want to share for kernel threads,
@@ -1130,6 +1131,19 @@ struct task_struct {
1130 struct list_head ptraced; 1131 struct list_head ptraced;
1131 struct list_head ptrace_entry; 1132 struct list_head ptrace_entry;
1132 1133
1134#ifdef CONFIG_X86_PTRACE_BTS
1135 /*
1136 * This is the tracer handle for the ptrace BTS extension.
1137 * This field actually belongs to the ptracer task.
1138 */
1139 struct bts_tracer *bts;
1140 /*
1141 * The buffer to hold the BTS data.
1142 */
1143 void *bts_buffer;
1144 size_t bts_size;
1145#endif /* CONFIG_X86_PTRACE_BTS */
1146
1133 /* PID/PID hash table linkage. */ 1147 /* PID/PID hash table linkage. */
1134 struct pid_link pids[PIDTYPE_MAX]; 1148 struct pid_link pids[PIDTYPE_MAX];
1135 struct list_head thread_group; 1149 struct list_head thread_group;
@@ -1313,6 +1327,23 @@ struct task_struct {
1313 unsigned long default_timer_slack_ns; 1327 unsigned long default_timer_slack_ns;
1314 1328
1315 struct list_head *scm_work_list; 1329 struct list_head *scm_work_list;
1330#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1331 /* Index of current stored adress in ret_stack */
1332 int curr_ret_stack;
1333 /* Stack of return addresses for return function tracing */
1334 struct ftrace_ret_stack *ret_stack;
1335 /*
1336 * Number of functions that haven't been traced
1337 * because of depth overrun.
1338 */
1339 atomic_t trace_overrun;
1340 /* Pause for the tracing */
1341 atomic_t tracing_graph_pause;
1342#endif
1343#ifdef CONFIG_TRACING
1344 /* state flags for use by tracers */
1345 unsigned long trace;
1346#endif
1316}; 1347};
1317 1348
1318/* 1349/*
diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
index dc50bcc282a8..b3dfa72f13b9 100644
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -34,6 +34,7 @@ struct seq_operations {
34 34
35#define SEQ_SKIP 1 35#define SEQ_SKIP 1
36 36
37char *mangle_path(char *s, char *p, char *esc);
37int seq_open(struct file *, const struct seq_operations *); 38int seq_open(struct file *, const struct seq_operations *);
38ssize_t seq_read(struct file *, char __user *, size_t, loff_t *); 39ssize_t seq_read(struct file *, char __user *, size_t, loff_t *);
39loff_t seq_lseek(struct file *, loff_t, int); 40loff_t seq_lseek(struct file *, loff_t, int);
diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h
index b106fd8e0d5c..1a8cecc4f38c 100644
--- a/include/linux/stacktrace.h
+++ b/include/linux/stacktrace.h
@@ -15,9 +15,17 @@ extern void save_stack_trace_tsk(struct task_struct *tsk,
15 struct stack_trace *trace); 15 struct stack_trace *trace);
16 16
17extern void print_stack_trace(struct stack_trace *trace, int spaces); 17extern void print_stack_trace(struct stack_trace *trace, int spaces);
18
19#ifdef CONFIG_USER_STACKTRACE_SUPPORT
20extern void save_stack_trace_user(struct stack_trace *trace);
21#else
22# define save_stack_trace_user(trace) do { } while (0)
23#endif
24
18#else 25#else
19# define save_stack_trace(trace) do { } while (0) 26# define save_stack_trace(trace) do { } while (0)
20# define save_stack_trace_tsk(tsk, trace) do { } while (0) 27# define save_stack_trace_tsk(tsk, trace) do { } while (0)
28# define save_stack_trace_user(trace) do { } while (0)
21# define print_stack_trace(trace, spaces) do { } while (0) 29# define print_stack_trace(trace, spaces) do { } while (0)
22#endif 30#endif
23 31
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index c5bb39c7a770..757005458366 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -24,8 +24,12 @@ struct tracepoint {
24 const char *name; /* Tracepoint name */ 24 const char *name; /* Tracepoint name */
25 int state; /* State. */ 25 int state; /* State. */
26 void **funcs; 26 void **funcs;
27} __attribute__((aligned(8))); 27} __attribute__((aligned(32))); /*
28 28 * Aligned on 32 bytes because it is
29 * globally visible and gcc happily
30 * align these on the structure size.
31 * Keep in sync with vmlinux.lds.h.
32 */
29 33
30#define TPPROTO(args...) args 34#define TPPROTO(args...) args
31#define TPARGS(args...) args 35#define TPARGS(args...) args
@@ -40,14 +44,14 @@ struct tracepoint {
40 do { \ 44 do { \
41 void **it_func; \ 45 void **it_func; \
42 \ 46 \
43 rcu_read_lock_sched(); \ 47 rcu_read_lock_sched_notrace(); \
44 it_func = rcu_dereference((tp)->funcs); \ 48 it_func = rcu_dereference((tp)->funcs); \
45 if (it_func) { \ 49 if (it_func) { \
46 do { \ 50 do { \
47 ((void(*)(proto))(*it_func))(args); \ 51 ((void(*)(proto))(*it_func))(args); \
48 } while (*(++it_func)); \ 52 } while (*(++it_func)); \
49 } \ 53 } \
50 rcu_read_unlock_sched(); \ 54 rcu_read_unlock_sched_notrace(); \
51 } while (0) 55 } while (0)
52 56
53/* 57/*
@@ -55,35 +59,40 @@ struct tracepoint {
55 * not add unwanted padding between the beginning of the section and the 59 * not add unwanted padding between the beginning of the section and the
56 * structure. Force alignment to the same alignment as the section start. 60 * structure. Force alignment to the same alignment as the section start.
57 */ 61 */
58#define DEFINE_TRACE(name, proto, args) \ 62#define DECLARE_TRACE(name, proto, args) \
63 extern struct tracepoint __tracepoint_##name; \
59 static inline void trace_##name(proto) \ 64 static inline void trace_##name(proto) \
60 { \ 65 { \
61 static const char __tpstrtab_##name[] \
62 __attribute__((section("__tracepoints_strings"))) \
63 = #name ":" #proto; \
64 static struct tracepoint __tracepoint_##name \
65 __attribute__((section("__tracepoints"), aligned(8))) = \
66 { __tpstrtab_##name, 0, NULL }; \
67 if (unlikely(__tracepoint_##name.state)) \ 66 if (unlikely(__tracepoint_##name.state)) \
68 __DO_TRACE(&__tracepoint_##name, \ 67 __DO_TRACE(&__tracepoint_##name, \
69 TPPROTO(proto), TPARGS(args)); \ 68 TPPROTO(proto), TPARGS(args)); \
70 } \ 69 } \
71 static inline int register_trace_##name(void (*probe)(proto)) \ 70 static inline int register_trace_##name(void (*probe)(proto)) \
72 { \ 71 { \
73 return tracepoint_probe_register(#name ":" #proto, \ 72 return tracepoint_probe_register(#name, (void *)probe); \
74 (void *)probe); \
75 } \ 73 } \
76 static inline void unregister_trace_##name(void (*probe)(proto))\ 74 static inline int unregister_trace_##name(void (*probe)(proto)) \
77 { \ 75 { \
78 tracepoint_probe_unregister(#name ":" #proto, \ 76 return tracepoint_probe_unregister(#name, (void *)probe);\
79 (void *)probe); \
80 } 77 }
81 78
79#define DEFINE_TRACE(name) \
80 static const char __tpstrtab_##name[] \
81 __attribute__((section("__tracepoints_strings"))) = #name; \
82 struct tracepoint __tracepoint_##name \
83 __attribute__((section("__tracepoints"), aligned(32))) = \
84 { __tpstrtab_##name, 0, NULL }
85
86#define EXPORT_TRACEPOINT_SYMBOL_GPL(name) \
87 EXPORT_SYMBOL_GPL(__tracepoint_##name)
88#define EXPORT_TRACEPOINT_SYMBOL(name) \
89 EXPORT_SYMBOL(__tracepoint_##name)
90
82extern void tracepoint_update_probe_range(struct tracepoint *begin, 91extern void tracepoint_update_probe_range(struct tracepoint *begin,
83 struct tracepoint *end); 92 struct tracepoint *end);
84 93
85#else /* !CONFIG_TRACEPOINTS */ 94#else /* !CONFIG_TRACEPOINTS */
86#define DEFINE_TRACE(name, proto, args) \ 95#define DECLARE_TRACE(name, proto, args) \
87 static inline void _do_trace_##name(struct tracepoint *tp, proto) \ 96 static inline void _do_trace_##name(struct tracepoint *tp, proto) \
88 { } \ 97 { } \
89 static inline void trace_##name(proto) \ 98 static inline void trace_##name(proto) \
@@ -92,8 +101,14 @@ extern void tracepoint_update_probe_range(struct tracepoint *begin,
92 { \ 101 { \
93 return -ENOSYS; \ 102 return -ENOSYS; \
94 } \ 103 } \
95 static inline void unregister_trace_##name(void (*probe)(proto))\ 104 static inline int unregister_trace_##name(void (*probe)(proto)) \
96 { } 105 { \
106 return -ENOSYS; \
107 }
108
109#define DEFINE_TRACE(name)
110#define EXPORT_TRACEPOINT_SYMBOL_GPL(name)
111#define EXPORT_TRACEPOINT_SYMBOL(name)
97 112
98static inline void tracepoint_update_probe_range(struct tracepoint *begin, 113static inline void tracepoint_update_probe_range(struct tracepoint *begin,
99 struct tracepoint *end) 114 struct tracepoint *end)
@@ -112,6 +127,10 @@ extern int tracepoint_probe_register(const char *name, void *probe);
112 */ 127 */
113extern int tracepoint_probe_unregister(const char *name, void *probe); 128extern int tracepoint_probe_unregister(const char *name, void *probe);
114 129
130extern int tracepoint_probe_register_noupdate(const char *name, void *probe);
131extern int tracepoint_probe_unregister_noupdate(const char *name, void *probe);
132extern void tracepoint_probe_update_all(void);
133
115struct tracepoint_iter { 134struct tracepoint_iter {
116 struct module *module; 135 struct module *module;
117 struct tracepoint *tracepoint; 136 struct tracepoint *tracepoint;
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 580700f20a1c..3f4954c55e53 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -325,7 +325,7 @@ extern struct class *tty_class;
325 * go away 325 * go away
326 */ 326 */
327 327
328extern inline struct tty_struct *tty_kref_get(struct tty_struct *tty) 328static inline struct tty_struct *tty_kref_get(struct tty_struct *tty)
329{ 329{
330 if (tty) 330 if (tty)
331 kref_get(&tty->kref); 331 kref_get(&tty->kref);
diff --git a/include/trace/block.h b/include/trace/block.h
new file mode 100644
index 000000000000..25c6a1fd5b77
--- /dev/null
+++ b/include/trace/block.h
@@ -0,0 +1,76 @@
1#ifndef _TRACE_BLOCK_H
2#define _TRACE_BLOCK_H
3
4#include <linux/blkdev.h>
5#include <linux/tracepoint.h>
6
7DECLARE_TRACE(block_rq_abort,
8 TPPROTO(struct request_queue *q, struct request *rq),
9 TPARGS(q, rq));
10
11DECLARE_TRACE(block_rq_insert,
12 TPPROTO(struct request_queue *q, struct request *rq),
13 TPARGS(q, rq));
14
15DECLARE_TRACE(block_rq_issue,
16 TPPROTO(struct request_queue *q, struct request *rq),
17 TPARGS(q, rq));
18
19DECLARE_TRACE(block_rq_requeue,
20 TPPROTO(struct request_queue *q, struct request *rq),
21 TPARGS(q, rq));
22
23DECLARE_TRACE(block_rq_complete,
24 TPPROTO(struct request_queue *q, struct request *rq),
25 TPARGS(q, rq));
26
27DECLARE_TRACE(block_bio_bounce,
28 TPPROTO(struct request_queue *q, struct bio *bio),
29 TPARGS(q, bio));
30
31DECLARE_TRACE(block_bio_complete,
32 TPPROTO(struct request_queue *q, struct bio *bio),
33 TPARGS(q, bio));
34
35DECLARE_TRACE(block_bio_backmerge,
36 TPPROTO(struct request_queue *q, struct bio *bio),
37 TPARGS(q, bio));
38
39DECLARE_TRACE(block_bio_frontmerge,
40 TPPROTO(struct request_queue *q, struct bio *bio),
41 TPARGS(q, bio));
42
43DECLARE_TRACE(block_bio_queue,
44 TPPROTO(struct request_queue *q, struct bio *bio),
45 TPARGS(q, bio));
46
47DECLARE_TRACE(block_getrq,
48 TPPROTO(struct request_queue *q, struct bio *bio, int rw),
49 TPARGS(q, bio, rw));
50
51DECLARE_TRACE(block_sleeprq,
52 TPPROTO(struct request_queue *q, struct bio *bio, int rw),
53 TPARGS(q, bio, rw));
54
55DECLARE_TRACE(block_plug,
56 TPPROTO(struct request_queue *q),
57 TPARGS(q));
58
59DECLARE_TRACE(block_unplug_timer,
60 TPPROTO(struct request_queue *q),
61 TPARGS(q));
62
63DECLARE_TRACE(block_unplug_io,
64 TPPROTO(struct request_queue *q),
65 TPARGS(q));
66
67DECLARE_TRACE(block_split,
68 TPPROTO(struct request_queue *q, struct bio *bio, unsigned int pdu),
69 TPARGS(q, bio, pdu));
70
71DECLARE_TRACE(block_remap,
72 TPPROTO(struct request_queue *q, struct bio *bio, dev_t dev,
73 sector_t from, sector_t to),
74 TPARGS(q, bio, dev, from, to));
75
76#endif
diff --git a/include/trace/boot.h b/include/trace/boot.h
new file mode 100644
index 000000000000..088ea089e31d
--- /dev/null
+++ b/include/trace/boot.h
@@ -0,0 +1,60 @@
1#ifndef _LINUX_TRACE_BOOT_H
2#define _LINUX_TRACE_BOOT_H
3
4#include <linux/module.h>
5#include <linux/kallsyms.h>
6#include <linux/init.h>
7
8/*
9 * Structure which defines the trace of an initcall
10 * while it is called.
11 * You don't have to fill the func field since it is
12 * only used internally by the tracer.
13 */
14struct boot_trace_call {
15 pid_t caller;
16 char func[KSYM_SYMBOL_LEN];
17};
18
19/*
20 * Structure which defines the trace of an initcall
21 * while it returns.
22 */
23struct boot_trace_ret {
24 char func[KSYM_SYMBOL_LEN];
25 int result;
26 unsigned long long duration; /* nsecs */
27};
28
29#ifdef CONFIG_BOOT_TRACER
30/* Append the traces on the ring-buffer */
31extern void trace_boot_call(struct boot_trace_call *bt, initcall_t fn);
32extern void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn);
33
34/* Tells the tracer that smp_pre_initcall is finished.
35 * So we can start the tracing
36 */
37extern void start_boot_trace(void);
38
39/* Resume the tracing of other necessary events
40 * such as sched switches
41 */
42extern void enable_boot_trace(void);
43
44/* Suspend this tracing. Actually, only sched_switches tracing have
45 * to be suspended. Initcalls doesn't need it.)
46 */
47extern void disable_boot_trace(void);
48#else
49static inline
50void trace_boot_call(struct boot_trace_call *bt, initcall_t fn) { }
51
52static inline
53void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn) { }
54
55static inline void start_boot_trace(void) { }
56static inline void enable_boot_trace(void) { }
57static inline void disable_boot_trace(void) { }
58#endif /* CONFIG_BOOT_TRACER */
59
60#endif /* __LINUX_TRACE_BOOT_H */
diff --git a/include/trace/sched.h b/include/trace/sched.h
index ad47369d01b5..0d81098ee9fc 100644
--- a/include/trace/sched.h
+++ b/include/trace/sched.h
@@ -4,52 +4,52 @@
4#include <linux/sched.h> 4#include <linux/sched.h>
5#include <linux/tracepoint.h> 5#include <linux/tracepoint.h>
6 6
7DEFINE_TRACE(sched_kthread_stop, 7DECLARE_TRACE(sched_kthread_stop,
8 TPPROTO(struct task_struct *t), 8 TPPROTO(struct task_struct *t),
9 TPARGS(t)); 9 TPARGS(t));
10 10
11DEFINE_TRACE(sched_kthread_stop_ret, 11DECLARE_TRACE(sched_kthread_stop_ret,
12 TPPROTO(int ret), 12 TPPROTO(int ret),
13 TPARGS(ret)); 13 TPARGS(ret));
14 14
15DEFINE_TRACE(sched_wait_task, 15DECLARE_TRACE(sched_wait_task,
16 TPPROTO(struct rq *rq, struct task_struct *p), 16 TPPROTO(struct rq *rq, struct task_struct *p),
17 TPARGS(rq, p)); 17 TPARGS(rq, p));
18 18
19DEFINE_TRACE(sched_wakeup, 19DECLARE_TRACE(sched_wakeup,
20 TPPROTO(struct rq *rq, struct task_struct *p), 20 TPPROTO(struct rq *rq, struct task_struct *p, int success),
21 TPARGS(rq, p)); 21 TPARGS(rq, p, success));
22 22
23DEFINE_TRACE(sched_wakeup_new, 23DECLARE_TRACE(sched_wakeup_new,
24 TPPROTO(struct rq *rq, struct task_struct *p), 24 TPPROTO(struct rq *rq, struct task_struct *p, int success),
25 TPARGS(rq, p)); 25 TPARGS(rq, p, success));
26 26
27DEFINE_TRACE(sched_switch, 27DECLARE_TRACE(sched_switch,
28 TPPROTO(struct rq *rq, struct task_struct *prev, 28 TPPROTO(struct rq *rq, struct task_struct *prev,
29 struct task_struct *next), 29 struct task_struct *next),
30 TPARGS(rq, prev, next)); 30 TPARGS(rq, prev, next));
31 31
32DEFINE_TRACE(sched_migrate_task, 32DECLARE_TRACE(sched_migrate_task,
33 TPPROTO(struct rq *rq, struct task_struct *p, int dest_cpu), 33 TPPROTO(struct task_struct *p, int orig_cpu, int dest_cpu),
34 TPARGS(rq, p, dest_cpu)); 34 TPARGS(p, orig_cpu, dest_cpu));
35 35
36DEFINE_TRACE(sched_process_free, 36DECLARE_TRACE(sched_process_free,
37 TPPROTO(struct task_struct *p), 37 TPPROTO(struct task_struct *p),
38 TPARGS(p)); 38 TPARGS(p));
39 39
40DEFINE_TRACE(sched_process_exit, 40DECLARE_TRACE(sched_process_exit,
41 TPPROTO(struct task_struct *p), 41 TPPROTO(struct task_struct *p),
42 TPARGS(p)); 42 TPARGS(p));
43 43
44DEFINE_TRACE(sched_process_wait, 44DECLARE_TRACE(sched_process_wait,
45 TPPROTO(struct pid *pid), 45 TPPROTO(struct pid *pid),
46 TPARGS(pid)); 46 TPARGS(pid));
47 47
48DEFINE_TRACE(sched_process_fork, 48DECLARE_TRACE(sched_process_fork,
49 TPPROTO(struct task_struct *parent, struct task_struct *child), 49 TPPROTO(struct task_struct *parent, struct task_struct *child),
50 TPARGS(parent, child)); 50 TPARGS(parent, child));
51 51
52DEFINE_TRACE(sched_signal_send, 52DECLARE_TRACE(sched_signal_send,
53 TPPROTO(int sig, struct task_struct *p), 53 TPPROTO(int sig, struct task_struct *p),
54 TPARGS(sig, p)); 54 TPARGS(sig, p));
55 55
diff --git a/init/Kconfig b/init/Kconfig
index f763762d544a..f291f086caa1 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -808,6 +808,7 @@ config TRACEPOINTS
808 808
809config MARKERS 809config MARKERS
810 bool "Activate markers" 810 bool "Activate markers"
811 depends on TRACEPOINTS
811 help 812 help
812 Place an empty function call at each marker site. Can be 813 Place an empty function call at each marker site. Can be
813 dynamically changed for a probe function. 814 dynamically changed for a probe function.
diff --git a/init/main.c b/init/main.c
index db843bff5732..17e9757bfde2 100644
--- a/init/main.c
+++ b/init/main.c
@@ -63,6 +63,7 @@
63#include <linux/signal.h> 63#include <linux/signal.h>
64#include <linux/idr.h> 64#include <linux/idr.h>
65#include <linux/ftrace.h> 65#include <linux/ftrace.h>
66#include <trace/boot.h>
66 67
67#include <asm/io.h> 68#include <asm/io.h>
68#include <asm/bugs.h> 69#include <asm/bugs.h>
@@ -704,31 +705,35 @@ core_param(initcall_debug, initcall_debug, bool, 0644);
704int do_one_initcall(initcall_t fn) 705int do_one_initcall(initcall_t fn)
705{ 706{
706 int count = preempt_count(); 707 int count = preempt_count();
707 ktime_t delta; 708 ktime_t calltime, delta, rettime;
708 char msgbuf[64]; 709 char msgbuf[64];
709 struct boot_trace it; 710 struct boot_trace_call call;
711 struct boot_trace_ret ret;
710 712
711 if (initcall_debug) { 713 if (initcall_debug) {
712 it.caller = task_pid_nr(current); 714 call.caller = task_pid_nr(current);
713 printk("calling %pF @ %i\n", fn, it.caller); 715 printk("calling %pF @ %i\n", fn, call.caller);
714 it.calltime = ktime_get(); 716 calltime = ktime_get();
717 trace_boot_call(&call, fn);
718 enable_boot_trace();
715 } 719 }
716 720
717 it.result = fn(); 721 ret.result = fn();
718 722
719 if (initcall_debug) { 723 if (initcall_debug) {
720 it.rettime = ktime_get(); 724 disable_boot_trace();
721 delta = ktime_sub(it.rettime, it.calltime); 725 rettime = ktime_get();
722 it.duration = (unsigned long long) delta.tv64 >> 10; 726 delta = ktime_sub(rettime, calltime);
727 ret.duration = (unsigned long long) ktime_to_ns(delta) >> 10;
728 trace_boot_ret(&ret, fn);
723 printk("initcall %pF returned %d after %Ld usecs\n", fn, 729 printk("initcall %pF returned %d after %Ld usecs\n", fn,
724 it.result, it.duration); 730 ret.result, ret.duration);
725 trace_boot(&it, fn);
726 } 731 }
727 732
728 msgbuf[0] = 0; 733 msgbuf[0] = 0;
729 734
730 if (it.result && it.result != -ENODEV && initcall_debug) 735 if (ret.result && ret.result != -ENODEV && initcall_debug)
731 sprintf(msgbuf, "error code %d ", it.result); 736 sprintf(msgbuf, "error code %d ", ret.result);
732 737
733 if (preempt_count() != count) { 738 if (preempt_count() != count) {
734 strlcat(msgbuf, "preemption imbalance ", sizeof(msgbuf)); 739 strlcat(msgbuf, "preemption imbalance ", sizeof(msgbuf));
@@ -742,7 +747,7 @@ int do_one_initcall(initcall_t fn)
742 printk("initcall %pF returned with %s\n", fn, msgbuf); 747 printk("initcall %pF returned with %s\n", fn, msgbuf);
743 } 748 }
744 749
745 return it.result; 750 return ret.result;
746} 751}
747 752
748 753
@@ -883,7 +888,7 @@ static int __init kernel_init(void * unused)
883 * we're essentially up and running. Get rid of the 888 * we're essentially up and running. Get rid of the
884 * initmem segments and start the user-mode stuff.. 889 * initmem segments and start the user-mode stuff..
885 */ 890 */
886 stop_boot_trace(); 891
887 init_post(); 892 init_post();
888 return 0; 893 return 0;
889} 894}
diff --git a/kernel/exit.c b/kernel/exit.c
index ccb87162ff62..c7422ca92038 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -55,6 +55,10 @@
55#include <asm/mmu_context.h> 55#include <asm/mmu_context.h>
56#include "cred-internals.h" 56#include "cred-internals.h"
57 57
58DEFINE_TRACE(sched_process_free);
59DEFINE_TRACE(sched_process_exit);
60DEFINE_TRACE(sched_process_wait);
61
58static void exit_mm(struct task_struct * tsk); 62static void exit_mm(struct task_struct * tsk);
59 63
60static inline int task_detached(struct task_struct *p) 64static inline int task_detached(struct task_struct *p)
@@ -1127,7 +1131,6 @@ NORET_TYPE void do_exit(long code)
1127 preempt_disable(); 1131 preempt_disable();
1128 /* causes final put_task_struct in finish_task_switch(). */ 1132 /* causes final put_task_struct in finish_task_switch(). */
1129 tsk->state = TASK_DEAD; 1133 tsk->state = TASK_DEAD;
1130
1131 schedule(); 1134 schedule();
1132 BUG(); 1135 BUG();
1133 /* Avoid "noreturn function does return". */ 1136 /* Avoid "noreturn function does return". */
diff --git a/kernel/extable.c b/kernel/extable.c
index a26cb2e17023..feb0317cf09a 100644
--- a/kernel/extable.c
+++ b/kernel/extable.c
@@ -17,6 +17,7 @@
17*/ 17*/
18#include <linux/module.h> 18#include <linux/module.h>
19#include <linux/init.h> 19#include <linux/init.h>
20#include <linux/ftrace.h>
20#include <asm/uaccess.h> 21#include <asm/uaccess.h>
21#include <asm/sections.h> 22#include <asm/sections.h>
22 23
@@ -40,7 +41,7 @@ const struct exception_table_entry *search_exception_tables(unsigned long addr)
40 return e; 41 return e;
41} 42}
42 43
43int core_kernel_text(unsigned long addr) 44__notrace_funcgraph int core_kernel_text(unsigned long addr)
44{ 45{
45 if (addr >= (unsigned long)_stext && 46 if (addr >= (unsigned long)_stext &&
46 addr <= (unsigned long)_etext) 47 addr <= (unsigned long)_etext)
@@ -53,7 +54,7 @@ int core_kernel_text(unsigned long addr)
53 return 0; 54 return 0;
54} 55}
55 56
56int __kernel_text_address(unsigned long addr) 57__notrace_funcgraph int __kernel_text_address(unsigned long addr)
57{ 58{
58 if (core_kernel_text(addr)) 59 if (core_kernel_text(addr))
59 return 1; 60 return 1;
diff --git a/kernel/fork.c b/kernel/fork.c
index 4e8ca23c0ede..6144b36cd897 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -47,6 +47,7 @@
47#include <linux/mount.h> 47#include <linux/mount.h>
48#include <linux/audit.h> 48#include <linux/audit.h>
49#include <linux/memcontrol.h> 49#include <linux/memcontrol.h>
50#include <linux/ftrace.h>
50#include <linux/profile.h> 51#include <linux/profile.h>
51#include <linux/rmap.h> 52#include <linux/rmap.h>
52#include <linux/acct.h> 53#include <linux/acct.h>
@@ -80,6 +81,8 @@ DEFINE_PER_CPU(unsigned long, process_counts) = 0;
80 81
81__cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */ 82__cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */
82 83
84DEFINE_TRACE(sched_process_fork);
85
83int nr_processes(void) 86int nr_processes(void)
84{ 87{
85 int cpu; 88 int cpu;
@@ -137,6 +140,7 @@ void free_task(struct task_struct *tsk)
137 prop_local_destroy_single(&tsk->dirties); 140 prop_local_destroy_single(&tsk->dirties);
138 free_thread_info(tsk->stack); 141 free_thread_info(tsk->stack);
139 rt_mutex_debug_task_free(tsk); 142 rt_mutex_debug_task_free(tsk);
143 ftrace_graph_exit_task(tsk);
140 free_task_struct(tsk); 144 free_task_struct(tsk);
141} 145}
142EXPORT_SYMBOL(free_task); 146EXPORT_SYMBOL(free_task);
@@ -1080,6 +1084,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1080#ifdef CONFIG_DEBUG_MUTEXES 1084#ifdef CONFIG_DEBUG_MUTEXES
1081 p->blocked_on = NULL; /* not blocked yet */ 1085 p->blocked_on = NULL; /* not blocked yet */
1082#endif 1086#endif
1087 if (unlikely(ptrace_reparented(current)))
1088 ptrace_fork(p, clone_flags);
1083 1089
1084 /* Perform scheduler related setup. Assign this task to a CPU. */ 1090 /* Perform scheduler related setup. Assign this task to a CPU. */
1085 sched_fork(p, clone_flags); 1091 sched_fork(p, clone_flags);
@@ -1120,6 +1126,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1120 } 1126 }
1121 } 1127 }
1122 1128
1129 ftrace_graph_init_task(p);
1130
1123 p->pid = pid_nr(pid); 1131 p->pid = pid_nr(pid);
1124 p->tgid = p->pid; 1132 p->tgid = p->pid;
1125 if (clone_flags & CLONE_THREAD) 1133 if (clone_flags & CLONE_THREAD)
@@ -1128,7 +1136,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1128 if (current->nsproxy != p->nsproxy) { 1136 if (current->nsproxy != p->nsproxy) {
1129 retval = ns_cgroup_clone(p, pid); 1137 retval = ns_cgroup_clone(p, pid);
1130 if (retval) 1138 if (retval)
1131 goto bad_fork_free_pid; 1139 goto bad_fork_free_graph;
1132 } 1140 }
1133 1141
1134 p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; 1142 p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
@@ -1221,7 +1229,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1221 spin_unlock(&current->sighand->siglock); 1229 spin_unlock(&current->sighand->siglock);
1222 write_unlock_irq(&tasklist_lock); 1230 write_unlock_irq(&tasklist_lock);
1223 retval = -ERESTARTNOINTR; 1231 retval = -ERESTARTNOINTR;
1224 goto bad_fork_free_pid; 1232 goto bad_fork_free_graph;
1225 } 1233 }
1226 1234
1227 if (clone_flags & CLONE_THREAD) { 1235 if (clone_flags & CLONE_THREAD) {
@@ -1258,6 +1266,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1258 cgroup_post_fork(p); 1266 cgroup_post_fork(p);
1259 return p; 1267 return p;
1260 1268
1269bad_fork_free_graph:
1270 ftrace_graph_exit_task(p);
1261bad_fork_free_pid: 1271bad_fork_free_pid:
1262 if (pid != &init_struct_pid) 1272 if (pid != &init_struct_pid)
1263 free_pid(pid); 1273 free_pid(pid);
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 8e7a7ce3ed0a..4fbc456f393d 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -21,6 +21,9 @@ static DEFINE_SPINLOCK(kthread_create_lock);
21static LIST_HEAD(kthread_create_list); 21static LIST_HEAD(kthread_create_list);
22struct task_struct *kthreadd_task; 22struct task_struct *kthreadd_task;
23 23
24DEFINE_TRACE(sched_kthread_stop);
25DEFINE_TRACE(sched_kthread_stop_ret);
26
24struct kthread_create_info 27struct kthread_create_info
25{ 28{
26 /* Information passed to kthread() from kthreadd. */ 29 /* Information passed to kthread() from kthreadd. */
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 46a404173db2..74b1878b8bb8 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -25,6 +25,7 @@
25 * Thanks to Arjan van de Ven for coming up with the initial idea of 25 * Thanks to Arjan van de Ven for coming up with the initial idea of
26 * mapping lock dependencies runtime. 26 * mapping lock dependencies runtime.
27 */ 27 */
28#define DISABLE_BRANCH_PROFILING
28#include <linux/mutex.h> 29#include <linux/mutex.h>
29#include <linux/sched.h> 30#include <linux/sched.h>
30#include <linux/delay.h> 31#include <linux/delay.h>
diff --git a/kernel/marker.c b/kernel/marker.c
index e9c6b2bc9400..ea54f2647868 100644
--- a/kernel/marker.c
+++ b/kernel/marker.c
@@ -43,6 +43,7 @@ static DEFINE_MUTEX(markers_mutex);
43 */ 43 */
44#define MARKER_HASH_BITS 6 44#define MARKER_HASH_BITS 6
45#define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS) 45#define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS)
46static struct hlist_head marker_table[MARKER_TABLE_SIZE];
46 47
47/* 48/*
48 * Note about RCU : 49 * Note about RCU :
@@ -64,11 +65,10 @@ struct marker_entry {
64 void *oldptr; 65 void *oldptr;
65 int rcu_pending; 66 int rcu_pending;
66 unsigned char ptype:1; 67 unsigned char ptype:1;
68 unsigned char format_allocated:1;
67 char name[0]; /* Contains name'\0'format'\0' */ 69 char name[0]; /* Contains name'\0'format'\0' */
68}; 70};
69 71
70static struct hlist_head marker_table[MARKER_TABLE_SIZE];
71
72/** 72/**
73 * __mark_empty_function - Empty probe callback 73 * __mark_empty_function - Empty probe callback
74 * @probe_private: probe private data 74 * @probe_private: probe private data
@@ -81,7 +81,7 @@ static struct hlist_head marker_table[MARKER_TABLE_SIZE];
81 * though the function pointer change and the marker enabling are two distinct 81 * though the function pointer change and the marker enabling are two distinct
82 * operations that modifies the execution flow of preemptible code. 82 * operations that modifies the execution flow of preemptible code.
83 */ 83 */
84void __mark_empty_function(void *probe_private, void *call_private, 84notrace void __mark_empty_function(void *probe_private, void *call_private,
85 const char *fmt, va_list *args) 85 const char *fmt, va_list *args)
86{ 86{
87} 87}
@@ -97,7 +97,8 @@ EXPORT_SYMBOL_GPL(__mark_empty_function);
97 * need to put a full smp_rmb() in this branch. This is why we do not use 97 * need to put a full smp_rmb() in this branch. This is why we do not use
98 * rcu_dereference() for the pointer read. 98 * rcu_dereference() for the pointer read.
99 */ 99 */
100void marker_probe_cb(const struct marker *mdata, void *call_private, ...) 100notrace void marker_probe_cb(const struct marker *mdata,
101 void *call_private, ...)
101{ 102{
102 va_list args; 103 va_list args;
103 char ptype; 104 char ptype;
@@ -107,7 +108,7 @@ void marker_probe_cb(const struct marker *mdata, void *call_private, ...)
107 * sure the teardown of the callbacks can be done correctly when they 108 * sure the teardown of the callbacks can be done correctly when they
108 * are in modules and they insure RCU read coherency. 109 * are in modules and they insure RCU read coherency.
109 */ 110 */
110 rcu_read_lock_sched(); 111 rcu_read_lock_sched_notrace();
111 ptype = mdata->ptype; 112 ptype = mdata->ptype;
112 if (likely(!ptype)) { 113 if (likely(!ptype)) {
113 marker_probe_func *func; 114 marker_probe_func *func;
@@ -145,7 +146,7 @@ void marker_probe_cb(const struct marker *mdata, void *call_private, ...)
145 va_end(args); 146 va_end(args);
146 } 147 }
147 } 148 }
148 rcu_read_unlock_sched(); 149 rcu_read_unlock_sched_notrace();
149} 150}
150EXPORT_SYMBOL_GPL(marker_probe_cb); 151EXPORT_SYMBOL_GPL(marker_probe_cb);
151 152
@@ -157,12 +158,13 @@ EXPORT_SYMBOL_GPL(marker_probe_cb);
157 * 158 *
158 * Should be connected to markers "MARK_NOARGS". 159 * Should be connected to markers "MARK_NOARGS".
159 */ 160 */
160void marker_probe_cb_noarg(const struct marker *mdata, void *call_private, ...) 161static notrace void marker_probe_cb_noarg(const struct marker *mdata,
162 void *call_private, ...)
161{ 163{
162 va_list args; /* not initialized */ 164 va_list args; /* not initialized */
163 char ptype; 165 char ptype;
164 166
165 rcu_read_lock_sched(); 167 rcu_read_lock_sched_notrace();
166 ptype = mdata->ptype; 168 ptype = mdata->ptype;
167 if (likely(!ptype)) { 169 if (likely(!ptype)) {
168 marker_probe_func *func; 170 marker_probe_func *func;
@@ -195,9 +197,8 @@ void marker_probe_cb_noarg(const struct marker *mdata, void *call_private, ...)
195 multi[i].func(multi[i].probe_private, call_private, 197 multi[i].func(multi[i].probe_private, call_private,
196 mdata->format, &args); 198 mdata->format, &args);
197 } 199 }
198 rcu_read_unlock_sched(); 200 rcu_read_unlock_sched_notrace();
199} 201}
200EXPORT_SYMBOL_GPL(marker_probe_cb_noarg);
201 202
202static void free_old_closure(struct rcu_head *head) 203static void free_old_closure(struct rcu_head *head)
203{ 204{
@@ -416,6 +417,7 @@ static struct marker_entry *add_marker(const char *name, const char *format)
416 e->single.probe_private = NULL; 417 e->single.probe_private = NULL;
417 e->multi = NULL; 418 e->multi = NULL;
418 e->ptype = 0; 419 e->ptype = 0;
420 e->format_allocated = 0;
419 e->refcount = 0; 421 e->refcount = 0;
420 e->rcu_pending = 0; 422 e->rcu_pending = 0;
421 hlist_add_head(&e->hlist, head); 423 hlist_add_head(&e->hlist, head);
@@ -447,6 +449,8 @@ static int remove_marker(const char *name)
447 if (e->single.func != __mark_empty_function) 449 if (e->single.func != __mark_empty_function)
448 return -EBUSY; 450 return -EBUSY;
449 hlist_del(&e->hlist); 451 hlist_del(&e->hlist);
452 if (e->format_allocated)
453 kfree(e->format);
450 /* Make sure the call_rcu has been executed */ 454 /* Make sure the call_rcu has been executed */
451 if (e->rcu_pending) 455 if (e->rcu_pending)
452 rcu_barrier_sched(); 456 rcu_barrier_sched();
@@ -457,57 +461,34 @@ static int remove_marker(const char *name)
457/* 461/*
458 * Set the mark_entry format to the format found in the element. 462 * Set the mark_entry format to the format found in the element.
459 */ 463 */
460static int marker_set_format(struct marker_entry **entry, const char *format) 464static int marker_set_format(struct marker_entry *entry, const char *format)
461{ 465{
462 struct marker_entry *e; 466 entry->format = kstrdup(format, GFP_KERNEL);
463 size_t name_len = strlen((*entry)->name) + 1; 467 if (!entry->format)
464 size_t format_len = strlen(format) + 1;
465
466
467 e = kmalloc(sizeof(struct marker_entry) + name_len + format_len,
468 GFP_KERNEL);
469 if (!e)
470 return -ENOMEM; 468 return -ENOMEM;
471 memcpy(&e->name[0], (*entry)->name, name_len); 469 entry->format_allocated = 1;
472 e->format = &e->name[name_len]; 470
473 memcpy(e->format, format, format_len);
474 if (strcmp(e->format, MARK_NOARGS) == 0)
475 e->call = marker_probe_cb_noarg;
476 else
477 e->call = marker_probe_cb;
478 e->single = (*entry)->single;
479 e->multi = (*entry)->multi;
480 e->ptype = (*entry)->ptype;
481 e->refcount = (*entry)->refcount;
482 e->rcu_pending = 0;
483 hlist_add_before(&e->hlist, &(*entry)->hlist);
484 hlist_del(&(*entry)->hlist);
485 /* Make sure the call_rcu has been executed */
486 if ((*entry)->rcu_pending)
487 rcu_barrier_sched();
488 kfree(*entry);
489 *entry = e;
490 trace_mark(core_marker_format, "name %s format %s", 471 trace_mark(core_marker_format, "name %s format %s",
491 e->name, e->format); 472 entry->name, entry->format);
492 return 0; 473 return 0;
493} 474}
494 475
495/* 476/*
496 * Sets the probe callback corresponding to one marker. 477 * Sets the probe callback corresponding to one marker.
497 */ 478 */
498static int set_marker(struct marker_entry **entry, struct marker *elem, 479static int set_marker(struct marker_entry *entry, struct marker *elem,
499 int active) 480 int active)
500{ 481{
501 int ret; 482 int ret = 0;
502 WARN_ON(strcmp((*entry)->name, elem->name) != 0); 483 WARN_ON(strcmp(entry->name, elem->name) != 0);
503 484
504 if ((*entry)->format) { 485 if (entry->format) {
505 if (strcmp((*entry)->format, elem->format) != 0) { 486 if (strcmp(entry->format, elem->format) != 0) {
506 printk(KERN_NOTICE 487 printk(KERN_NOTICE
507 "Format mismatch for probe %s " 488 "Format mismatch for probe %s "
508 "(%s), marker (%s)\n", 489 "(%s), marker (%s)\n",
509 (*entry)->name, 490 entry->name,
510 (*entry)->format, 491 entry->format,
511 elem->format); 492 elem->format);
512 return -EPERM; 493 return -EPERM;
513 } 494 }
@@ -523,37 +504,67 @@ static int set_marker(struct marker_entry **entry, struct marker *elem,
523 * pass from a "safe" callback (with argument) to an "unsafe" 504 * pass from a "safe" callback (with argument) to an "unsafe"
524 * callback (does not set arguments). 505 * callback (does not set arguments).
525 */ 506 */
526 elem->call = (*entry)->call; 507 elem->call = entry->call;
527 /* 508 /*
528 * Sanity check : 509 * Sanity check :
529 * We only update the single probe private data when the ptr is 510 * We only update the single probe private data when the ptr is
530 * set to a _non_ single probe! (0 -> 1 and N -> 1, N != 1) 511 * set to a _non_ single probe! (0 -> 1 and N -> 1, N != 1)
531 */ 512 */
532 WARN_ON(elem->single.func != __mark_empty_function 513 WARN_ON(elem->single.func != __mark_empty_function
533 && elem->single.probe_private 514 && elem->single.probe_private != entry->single.probe_private
534 != (*entry)->single.probe_private && 515 && !elem->ptype);
535 !elem->ptype); 516 elem->single.probe_private = entry->single.probe_private;
536 elem->single.probe_private = (*entry)->single.probe_private;
537 /* 517 /*
538 * Make sure the private data is valid when we update the 518 * Make sure the private data is valid when we update the
539 * single probe ptr. 519 * single probe ptr.
540 */ 520 */
541 smp_wmb(); 521 smp_wmb();
542 elem->single.func = (*entry)->single.func; 522 elem->single.func = entry->single.func;
543 /* 523 /*
544 * We also make sure that the new probe callbacks array is consistent 524 * We also make sure that the new probe callbacks array is consistent
545 * before setting a pointer to it. 525 * before setting a pointer to it.
546 */ 526 */
547 rcu_assign_pointer(elem->multi, (*entry)->multi); 527 rcu_assign_pointer(elem->multi, entry->multi);
548 /* 528 /*
549 * Update the function or multi probe array pointer before setting the 529 * Update the function or multi probe array pointer before setting the
550 * ptype. 530 * ptype.
551 */ 531 */
552 smp_wmb(); 532 smp_wmb();
553 elem->ptype = (*entry)->ptype; 533 elem->ptype = entry->ptype;
534
535 if (elem->tp_name && (active ^ elem->state)) {
536 WARN_ON(!elem->tp_cb);
537 /*
538 * It is ok to directly call the probe registration because type
539 * checking has been done in the __trace_mark_tp() macro.
540 */
541
542 if (active) {
543 /*
544 * try_module_get should always succeed because we hold
545 * lock_module() to get the tp_cb address.
546 */
547 ret = try_module_get(__module_text_address(
548 (unsigned long)elem->tp_cb));
549 BUG_ON(!ret);
550 ret = tracepoint_probe_register_noupdate(
551 elem->tp_name,
552 elem->tp_cb);
553 } else {
554 ret = tracepoint_probe_unregister_noupdate(
555 elem->tp_name,
556 elem->tp_cb);
557 /*
558 * tracepoint_probe_update_all() must be called
559 * before the module containing tp_cb is unloaded.
560 */
561 module_put(__module_text_address(
562 (unsigned long)elem->tp_cb));
563 }
564 }
554 elem->state = active; 565 elem->state = active;
555 566
556 return 0; 567 return ret;
557} 568}
558 569
559/* 570/*
@@ -564,7 +575,24 @@ static int set_marker(struct marker_entry **entry, struct marker *elem,
564 */ 575 */
565static void disable_marker(struct marker *elem) 576static void disable_marker(struct marker *elem)
566{ 577{
578 int ret;
579
567 /* leave "call" as is. It is known statically. */ 580 /* leave "call" as is. It is known statically. */
581 if (elem->tp_name && elem->state) {
582 WARN_ON(!elem->tp_cb);
583 /*
584 * It is ok to directly call the probe registration because type
585 * checking has been done in the __trace_mark_tp() macro.
586 */
587 ret = tracepoint_probe_unregister_noupdate(elem->tp_name,
588 elem->tp_cb);
589 WARN_ON(ret);
590 /*
591 * tracepoint_probe_update_all() must be called
592 * before the module containing tp_cb is unloaded.
593 */
594 module_put(__module_text_address((unsigned long)elem->tp_cb));
595 }
568 elem->state = 0; 596 elem->state = 0;
569 elem->single.func = __mark_empty_function; 597 elem->single.func = __mark_empty_function;
570 /* Update the function before setting the ptype */ 598 /* Update the function before setting the ptype */
@@ -594,8 +622,7 @@ void marker_update_probe_range(struct marker *begin,
594 for (iter = begin; iter < end; iter++) { 622 for (iter = begin; iter < end; iter++) {
595 mark_entry = get_marker(iter->name); 623 mark_entry = get_marker(iter->name);
596 if (mark_entry) { 624 if (mark_entry) {
597 set_marker(&mark_entry, iter, 625 set_marker(mark_entry, iter, !!mark_entry->refcount);
598 !!mark_entry->refcount);
599 /* 626 /*
600 * ignore error, continue 627 * ignore error, continue
601 */ 628 */
@@ -629,6 +656,7 @@ static void marker_update_probes(void)
629 marker_update_probe_range(__start___markers, __stop___markers); 656 marker_update_probe_range(__start___markers, __stop___markers);
630 /* Markers in modules. */ 657 /* Markers in modules. */
631 module_update_markers(); 658 module_update_markers();
659 tracepoint_probe_update_all();
632} 660}
633 661
634/** 662/**
@@ -657,7 +685,7 @@ int marker_probe_register(const char *name, const char *format,
657 ret = PTR_ERR(entry); 685 ret = PTR_ERR(entry);
658 } else if (format) { 686 } else if (format) {
659 if (!entry->format) 687 if (!entry->format)
660 ret = marker_set_format(&entry, format); 688 ret = marker_set_format(entry, format);
661 else if (strcmp(entry->format, format)) 689 else if (strcmp(entry->format, format))
662 ret = -EPERM; 690 ret = -EPERM;
663 } 691 }
@@ -676,10 +704,11 @@ int marker_probe_register(const char *name, const char *format,
676 goto end; 704 goto end;
677 } 705 }
678 mutex_unlock(&markers_mutex); 706 mutex_unlock(&markers_mutex);
679 marker_update_probes(); /* may update entry */ 707 marker_update_probes();
680 mutex_lock(&markers_mutex); 708 mutex_lock(&markers_mutex);
681 entry = get_marker(name); 709 entry = get_marker(name);
682 WARN_ON(!entry); 710 if (!entry)
711 goto end;
683 if (entry->rcu_pending) 712 if (entry->rcu_pending)
684 rcu_barrier_sched(); 713 rcu_barrier_sched();
685 entry->oldptr = old; 714 entry->oldptr = old;
@@ -720,7 +749,7 @@ int marker_probe_unregister(const char *name,
720 rcu_barrier_sched(); 749 rcu_barrier_sched();
721 old = marker_entry_remove_probe(entry, probe, probe_private); 750 old = marker_entry_remove_probe(entry, probe, probe_private);
722 mutex_unlock(&markers_mutex); 751 mutex_unlock(&markers_mutex);
723 marker_update_probes(); /* may update entry */ 752 marker_update_probes();
724 mutex_lock(&markers_mutex); 753 mutex_lock(&markers_mutex);
725 entry = get_marker(name); 754 entry = get_marker(name);
726 if (!entry) 755 if (!entry)
@@ -801,10 +830,11 @@ int marker_probe_unregister_private_data(marker_probe_func *probe,
801 rcu_barrier_sched(); 830 rcu_barrier_sched();
802 old = marker_entry_remove_probe(entry, NULL, probe_private); 831 old = marker_entry_remove_probe(entry, NULL, probe_private);
803 mutex_unlock(&markers_mutex); 832 mutex_unlock(&markers_mutex);
804 marker_update_probes(); /* may update entry */ 833 marker_update_probes();
805 mutex_lock(&markers_mutex); 834 mutex_lock(&markers_mutex);
806 entry = get_marker_from_private_data(probe, probe_private); 835 entry = get_marker_from_private_data(probe, probe_private);
807 WARN_ON(!entry); 836 if (!entry)
837 goto end;
808 if (entry->rcu_pending) 838 if (entry->rcu_pending)
809 rcu_barrier_sched(); 839 rcu_barrier_sched();
810 entry->oldptr = old; 840 entry->oldptr = old;
@@ -848,8 +878,6 @@ void *marker_get_private_data(const char *name, marker_probe_func *probe,
848 if (!e->ptype) { 878 if (!e->ptype) {
849 if (num == 0 && e->single.func == probe) 879 if (num == 0 && e->single.func == probe)
850 return e->single.probe_private; 880 return e->single.probe_private;
851 else
852 break;
853 } else { 881 } else {
854 struct marker_probe_closure *closure; 882 struct marker_probe_closure *closure;
855 int match = 0; 883 int match = 0;
@@ -861,8 +889,42 @@ void *marker_get_private_data(const char *name, marker_probe_func *probe,
861 return closure[i].probe_private; 889 return closure[i].probe_private;
862 } 890 }
863 } 891 }
892 break;
864 } 893 }
865 } 894 }
866 return ERR_PTR(-ENOENT); 895 return ERR_PTR(-ENOENT);
867} 896}
868EXPORT_SYMBOL_GPL(marker_get_private_data); 897EXPORT_SYMBOL_GPL(marker_get_private_data);
898
899#ifdef CONFIG_MODULES
900
901int marker_module_notify(struct notifier_block *self,
902 unsigned long val, void *data)
903{
904 struct module *mod = data;
905
906 switch (val) {
907 case MODULE_STATE_COMING:
908 marker_update_probe_range(mod->markers,
909 mod->markers + mod->num_markers);
910 break;
911 case MODULE_STATE_GOING:
912 marker_update_probe_range(mod->markers,
913 mod->markers + mod->num_markers);
914 break;
915 }
916 return 0;
917}
918
919struct notifier_block marker_module_nb = {
920 .notifier_call = marker_module_notify,
921 .priority = 0,
922};
923
924static int init_markers(void)
925{
926 return register_module_notifier(&marker_module_nb);
927}
928__initcall(init_markers);
929
930#endif /* CONFIG_MODULES */
diff --git a/kernel/module.c b/kernel/module.c
index 1f4cc00e0c20..dd2a54155b54 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2184,24 +2184,15 @@ static noinline struct module *load_module(void __user *umod,
2184 struct mod_debug *debug; 2184 struct mod_debug *debug;
2185 unsigned int num_debug; 2185 unsigned int num_debug;
2186 2186
2187#ifdef CONFIG_MARKERS
2188 marker_update_probe_range(mod->markers,
2189 mod->markers + mod->num_markers);
2190#endif
2191 debug = section_objs(hdr, sechdrs, secstrings, "__verbose", 2187 debug = section_objs(hdr, sechdrs, secstrings, "__verbose",
2192 sizeof(*debug), &num_debug); 2188 sizeof(*debug), &num_debug);
2193 dynamic_printk_setup(debug, num_debug); 2189 dynamic_printk_setup(debug, num_debug);
2194
2195#ifdef CONFIG_TRACEPOINTS
2196 tracepoint_update_probe_range(mod->tracepoints,
2197 mod->tracepoints + mod->num_tracepoints);
2198#endif
2199 } 2190 }
2200 2191
2201 /* sechdrs[0].sh_size is always zero */ 2192 /* sechdrs[0].sh_size is always zero */
2202 mseg = section_objs(hdr, sechdrs, secstrings, "__mcount_loc", 2193 mseg = section_objs(hdr, sechdrs, secstrings, "__mcount_loc",
2203 sizeof(*mseg), &num_mcount); 2194 sizeof(*mseg), &num_mcount);
2204 ftrace_init_module(mseg, mseg + num_mcount); 2195 ftrace_init_module(mod, mseg, mseg + num_mcount);
2205 2196
2206 err = module_finalize(hdr, sechdrs, mod); 2197 err = module_finalize(hdr, sechdrs, mod);
2207 if (err < 0) 2198 if (err < 0)
@@ -2713,7 +2704,7 @@ int is_module_address(unsigned long addr)
2713 2704
2714 2705
2715/* Is this a valid kernel address? */ 2706/* Is this a valid kernel address? */
2716struct module *__module_text_address(unsigned long addr) 2707__notrace_funcgraph struct module *__module_text_address(unsigned long addr)
2717{ 2708{
2718 struct module *mod; 2709 struct module *mod;
2719 2710
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index c9d74083746f..f77d3819ef57 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -22,7 +22,6 @@
22#include <linux/console.h> 22#include <linux/console.h>
23#include <linux/cpu.h> 23#include <linux/cpu.h>
24#include <linux/freezer.h> 24#include <linux/freezer.h>
25#include <linux/ftrace.h>
26 25
27#include "power.h" 26#include "power.h"
28 27
@@ -257,7 +256,7 @@ static int create_image(int platform_mode)
257 256
258int hibernation_snapshot(int platform_mode) 257int hibernation_snapshot(int platform_mode)
259{ 258{
260 int error, ftrace_save; 259 int error;
261 260
262 /* Free memory before shutting down devices. */ 261 /* Free memory before shutting down devices. */
263 error = swsusp_shrink_memory(); 262 error = swsusp_shrink_memory();
@@ -269,7 +268,6 @@ int hibernation_snapshot(int platform_mode)
269 goto Close; 268 goto Close;
270 269
271 suspend_console(); 270 suspend_console();
272 ftrace_save = __ftrace_enabled_save();
273 error = device_suspend(PMSG_FREEZE); 271 error = device_suspend(PMSG_FREEZE);
274 if (error) 272 if (error)
275 goto Recover_platform; 273 goto Recover_platform;
@@ -299,7 +297,6 @@ int hibernation_snapshot(int platform_mode)
299 Resume_devices: 297 Resume_devices:
300 device_resume(in_suspend ? 298 device_resume(in_suspend ?
301 (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE); 299 (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE);
302 __ftrace_enabled_restore(ftrace_save);
303 resume_console(); 300 resume_console();
304 Close: 301 Close:
305 platform_end(platform_mode); 302 platform_end(platform_mode);
@@ -370,11 +367,10 @@ static int resume_target_kernel(void)
370 367
371int hibernation_restore(int platform_mode) 368int hibernation_restore(int platform_mode)
372{ 369{
373 int error, ftrace_save; 370 int error;
374 371
375 pm_prepare_console(); 372 pm_prepare_console();
376 suspend_console(); 373 suspend_console();
377 ftrace_save = __ftrace_enabled_save();
378 error = device_suspend(PMSG_QUIESCE); 374 error = device_suspend(PMSG_QUIESCE);
379 if (error) 375 if (error)
380 goto Finish; 376 goto Finish;
@@ -389,7 +385,6 @@ int hibernation_restore(int platform_mode)
389 platform_restore_cleanup(platform_mode); 385 platform_restore_cleanup(platform_mode);
390 device_resume(PMSG_RECOVER); 386 device_resume(PMSG_RECOVER);
391 Finish: 387 Finish:
392 __ftrace_enabled_restore(ftrace_save);
393 resume_console(); 388 resume_console();
394 pm_restore_console(); 389 pm_restore_console();
395 return error; 390 return error;
@@ -402,7 +397,7 @@ int hibernation_restore(int platform_mode)
402 397
403int hibernation_platform_enter(void) 398int hibernation_platform_enter(void)
404{ 399{
405 int error, ftrace_save; 400 int error;
406 401
407 if (!hibernation_ops) 402 if (!hibernation_ops)
408 return -ENOSYS; 403 return -ENOSYS;
@@ -417,7 +412,6 @@ int hibernation_platform_enter(void)
417 goto Close; 412 goto Close;
418 413
419 suspend_console(); 414 suspend_console();
420 ftrace_save = __ftrace_enabled_save();
421 error = device_suspend(PMSG_HIBERNATE); 415 error = device_suspend(PMSG_HIBERNATE);
422 if (error) { 416 if (error) {
423 if (hibernation_ops->recover) 417 if (hibernation_ops->recover)
@@ -452,7 +446,6 @@ int hibernation_platform_enter(void)
452 hibernation_ops->finish(); 446 hibernation_ops->finish();
453 Resume_devices: 447 Resume_devices:
454 device_resume(PMSG_RESTORE); 448 device_resume(PMSG_RESTORE);
455 __ftrace_enabled_restore(ftrace_save);
456 resume_console(); 449 resume_console();
457 Close: 450 Close:
458 hibernation_ops->end(); 451 hibernation_ops->end();
diff --git a/kernel/power/main.c b/kernel/power/main.c
index b8f7ce9473e8..613f16941b85 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -22,7 +22,6 @@
22#include <linux/freezer.h> 22#include <linux/freezer.h>
23#include <linux/vmstat.h> 23#include <linux/vmstat.h>
24#include <linux/syscalls.h> 24#include <linux/syscalls.h>
25#include <linux/ftrace.h>
26 25
27#include "power.h" 26#include "power.h"
28 27
@@ -317,7 +316,7 @@ static int suspend_enter(suspend_state_t state)
317 */ 316 */
318int suspend_devices_and_enter(suspend_state_t state) 317int suspend_devices_and_enter(suspend_state_t state)
319{ 318{
320 int error, ftrace_save; 319 int error;
321 320
322 if (!suspend_ops) 321 if (!suspend_ops)
323 return -ENOSYS; 322 return -ENOSYS;
@@ -328,7 +327,6 @@ int suspend_devices_and_enter(suspend_state_t state)
328 goto Close; 327 goto Close;
329 } 328 }
330 suspend_console(); 329 suspend_console();
331 ftrace_save = __ftrace_enabled_save();
332 suspend_test_start(); 330 suspend_test_start();
333 error = device_suspend(PMSG_SUSPEND); 331 error = device_suspend(PMSG_SUSPEND);
334 if (error) { 332 if (error) {
@@ -360,7 +358,6 @@ int suspend_devices_and_enter(suspend_state_t state)
360 suspend_test_start(); 358 suspend_test_start();
361 device_resume(PMSG_RESUME); 359 device_resume(PMSG_RESUME);
362 suspend_test_finish("resume devices"); 360 suspend_test_finish("resume devices");
363 __ftrace_enabled_restore(ftrace_save);
364 resume_console(); 361 resume_console();
365 Close: 362 Close:
366 if (suspend_ops->end) 363 if (suspend_ops->end)
diff --git a/kernel/profile.c b/kernel/profile.c
index dc41827fbfee..60adefb59b5e 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -544,7 +544,7 @@ static const struct file_operations proc_profile_operations = {
544}; 544};
545 545
546#ifdef CONFIG_SMP 546#ifdef CONFIG_SMP
547static inline void profile_nop(void *unused) 547static void profile_nop(void *unused)
548{ 548{
549} 549}
550 550
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index ca2df68faf76..29dc700e198c 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -25,6 +25,17 @@
25#include <asm/pgtable.h> 25#include <asm/pgtable.h>
26#include <asm/uaccess.h> 26#include <asm/uaccess.h>
27 27
28
29/*
30 * Initialize a new task whose father had been ptraced.
31 *
32 * Called from copy_process().
33 */
34void ptrace_fork(struct task_struct *child, unsigned long clone_flags)
35{
36 arch_ptrace_fork(child, clone_flags);
37}
38
28/* 39/*
29 * ptrace a task: make the debugger its new parent and 40 * ptrace a task: make the debugger its new parent and
30 * move it to the ptrace list. 41 * move it to the ptrace list.
@@ -72,6 +83,7 @@ void __ptrace_unlink(struct task_struct *child)
72 child->parent = child->real_parent; 83 child->parent = child->real_parent;
73 list_del_init(&child->ptrace_entry); 84 list_del_init(&child->ptrace_entry);
74 85
86 arch_ptrace_untrace(child);
75 if (task_is_traced(child)) 87 if (task_is_traced(child))
76 ptrace_untrace(child); 88 ptrace_untrace(child);
77} 89}
diff --git a/kernel/sched.c b/kernel/sched.c
index 33cf4a1cbcd1..3798b954e6e8 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -118,6 +118,12 @@
118 */ 118 */
119#define RUNTIME_INF ((u64)~0ULL) 119#define RUNTIME_INF ((u64)~0ULL)
120 120
121DEFINE_TRACE(sched_wait_task);
122DEFINE_TRACE(sched_wakeup);
123DEFINE_TRACE(sched_wakeup_new);
124DEFINE_TRACE(sched_switch);
125DEFINE_TRACE(sched_migrate_task);
126
121#ifdef CONFIG_SMP 127#ifdef CONFIG_SMP
122/* 128/*
123 * Divide a load by a sched group cpu_power : (load / sg->__cpu_power) 129 * Divide a load by a sched group cpu_power : (load / sg->__cpu_power)
@@ -1847,6 +1853,8 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
1847 1853
1848 clock_offset = old_rq->clock - new_rq->clock; 1854 clock_offset = old_rq->clock - new_rq->clock;
1849 1855
1856 trace_sched_migrate_task(p, task_cpu(p), new_cpu);
1857
1850#ifdef CONFIG_SCHEDSTATS 1858#ifdef CONFIG_SCHEDSTATS
1851 if (p->se.wait_start) 1859 if (p->se.wait_start)
1852 p->se.wait_start -= clock_offset; 1860 p->se.wait_start -= clock_offset;
@@ -2318,7 +2326,7 @@ out_activate:
2318 success = 1; 2326 success = 1;
2319 2327
2320out_running: 2328out_running:
2321 trace_sched_wakeup(rq, p); 2329 trace_sched_wakeup(rq, p, success);
2322 check_preempt_curr(rq, p, sync); 2330 check_preempt_curr(rq, p, sync);
2323 2331
2324 p->state = TASK_RUNNING; 2332 p->state = TASK_RUNNING;
@@ -2451,7 +2459,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
2451 p->sched_class->task_new(rq, p); 2459 p->sched_class->task_new(rq, p);
2452 inc_nr_running(rq); 2460 inc_nr_running(rq);
2453 } 2461 }
2454 trace_sched_wakeup_new(rq, p); 2462 trace_sched_wakeup_new(rq, p, 1);
2455 check_preempt_curr(rq, p, 0); 2463 check_preempt_curr(rq, p, 0);
2456#ifdef CONFIG_SMP 2464#ifdef CONFIG_SMP
2457 if (p->sched_class->task_wake_up) 2465 if (p->sched_class->task_wake_up)
@@ -2864,7 +2872,6 @@ static void sched_migrate_task(struct task_struct *p, int dest_cpu)
2864 || unlikely(!cpu_active(dest_cpu))) 2872 || unlikely(!cpu_active(dest_cpu)))
2865 goto out; 2873 goto out;
2866 2874
2867 trace_sched_migrate_task(rq, p, dest_cpu);
2868 /* force the process onto the specified CPU */ 2875 /* force the process onto the specified CPU */
2869 if (migrate_task(p, dest_cpu, &req)) { 2876 if (migrate_task(p, dest_cpu, &req)) {
2870 /* Need to wait for migration thread (might exit: take ref). */ 2877 /* Need to wait for migration thread (might exit: take ref). */
@@ -5912,6 +5919,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
5912 * The idle tasks have their own, simple scheduling class: 5919 * The idle tasks have their own, simple scheduling class:
5913 */ 5920 */
5914 idle->sched_class = &idle_sched_class; 5921 idle->sched_class = &idle_sched_class;
5922 ftrace_graph_init_task(idle);
5915} 5923}
5916 5924
5917/* 5925/*
diff --git a/kernel/signal.c b/kernel/signal.c
index 2a64304ed54b..8e95855ff3cf 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -41,6 +41,8 @@
41 41
42static struct kmem_cache *sigqueue_cachep; 42static struct kmem_cache *sigqueue_cachep;
43 43
44DEFINE_TRACE(sched_signal_send);
45
44static void __user *sig_handler(struct task_struct *t, int sig) 46static void __user *sig_handler(struct task_struct *t, int sig)
45{ 47{
46 return t->sighand->action[sig - 1].sa.sa_handler; 48 return t->sighand->action[sig - 1].sa.sa_handler;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 9d52b57310af..0b627d9c93d8 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -487,6 +487,26 @@ static struct ctl_table kern_table[] = {
487 .proc_handler = &ftrace_enable_sysctl, 487 .proc_handler = &ftrace_enable_sysctl,
488 }, 488 },
489#endif 489#endif
490#ifdef CONFIG_STACK_TRACER
491 {
492 .ctl_name = CTL_UNNUMBERED,
493 .procname = "stack_tracer_enabled",
494 .data = &stack_tracer_enabled,
495 .maxlen = sizeof(int),
496 .mode = 0644,
497 .proc_handler = &stack_trace_sysctl,
498 },
499#endif
500#ifdef CONFIG_TRACING
501 {
502 .ctl_name = CTL_UNNUMBERED,
503 .procname = "ftrace_dump_on_oops",
504 .data = &ftrace_dump_on_oops,
505 .maxlen = sizeof(int),
506 .mode = 0644,
507 .proc_handler = &proc_dointvec,
508 },
509#endif
490#ifdef CONFIG_MODULES 510#ifdef CONFIG_MODULES
491 { 511 {
492 .ctl_name = KERN_MODPROBE, 512 .ctl_name = KERN_MODPROBE,
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 33dbefd471e8..e2a4ff6fc3a6 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -3,18 +3,34 @@
3# select HAVE_FUNCTION_TRACER: 3# select HAVE_FUNCTION_TRACER:
4# 4#
5 5
6config USER_STACKTRACE_SUPPORT
7 bool
8
6config NOP_TRACER 9config NOP_TRACER
7 bool 10 bool
8 11
9config HAVE_FUNCTION_TRACER 12config HAVE_FUNCTION_TRACER
10 bool 13 bool
11 14
15config HAVE_FUNCTION_GRAPH_TRACER
16 bool
17
18config HAVE_FUNCTION_TRACE_MCOUNT_TEST
19 bool
20 help
21 This gets selected when the arch tests the function_trace_stop
22 variable at the mcount call site. Otherwise, this variable
23 is tested by the called function.
24
12config HAVE_DYNAMIC_FTRACE 25config HAVE_DYNAMIC_FTRACE
13 bool 26 bool
14 27
15config HAVE_FTRACE_MCOUNT_RECORD 28config HAVE_FTRACE_MCOUNT_RECORD
16 bool 29 bool
17 30
31config HAVE_HW_BRANCH_TRACER
32 bool
33
18config TRACER_MAX_TRACE 34config TRACER_MAX_TRACE
19 bool 35 bool
20 36
@@ -47,6 +63,20 @@ config FUNCTION_TRACER
47 (the bootup default), then the overhead of the instructions is very 63 (the bootup default), then the overhead of the instructions is very
48 small and not measurable even in micro-benchmarks. 64 small and not measurable even in micro-benchmarks.
49 65
66config FUNCTION_GRAPH_TRACER
67 bool "Kernel Function Graph Tracer"
68 depends on HAVE_FUNCTION_GRAPH_TRACER
69 depends on FUNCTION_TRACER
70 default y
71 help
72 Enable the kernel to trace a function at both its return
73 and its entry.
74 It's first purpose is to trace the duration of functions and
75 draw a call graph for each thread with some informations like
76 the return value.
77 This is done by setting the current return address on the current
78 task structure into a stack of calls.
79
50config IRQSOFF_TRACER 80config IRQSOFF_TRACER
51 bool "Interrupts-off Latency Tracer" 81 bool "Interrupts-off Latency Tracer"
52 default n 82 default n
@@ -138,6 +168,70 @@ config BOOT_TRACER
138 selected, because the self-tests are an initcall as well and that 168 selected, because the self-tests are an initcall as well and that
139 would invalidate the boot trace. ) 169 would invalidate the boot trace. )
140 170
171config TRACE_BRANCH_PROFILING
172 bool "Trace likely/unlikely profiler"
173 depends on DEBUG_KERNEL
174 select TRACING
175 help
176 This tracer profiles all the the likely and unlikely macros
177 in the kernel. It will display the results in:
178
179 /debugfs/tracing/profile_annotated_branch
180
181 Note: this will add a significant overhead, only turn this
182 on if you need to profile the system's use of these macros.
183
184 Say N if unsure.
185
186config PROFILE_ALL_BRANCHES
187 bool "Profile all if conditionals"
188 depends on TRACE_BRANCH_PROFILING
189 help
190 This tracer profiles all branch conditions. Every if ()
191 taken in the kernel is recorded whether it hit or miss.
192 The results will be displayed in:
193
194 /debugfs/tracing/profile_branch
195
196 This configuration, when enabled, will impose a great overhead
197 on the system. This should only be enabled when the system
198 is to be analyzed
199
200 Say N if unsure.
201
202config TRACING_BRANCHES
203 bool
204 help
205 Selected by tracers that will trace the likely and unlikely
206 conditions. This prevents the tracers themselves from being
207 profiled. Profiling the tracing infrastructure can only happen
208 when the likelys and unlikelys are not being traced.
209
210config BRANCH_TRACER
211 bool "Trace likely/unlikely instances"
212 depends on TRACE_BRANCH_PROFILING
213 select TRACING_BRANCHES
214 help
215 This traces the events of likely and unlikely condition
216 calls in the kernel. The difference between this and the
217 "Trace likely/unlikely profiler" is that this is not a
218 histogram of the callers, but actually places the calling
219 events into a running trace buffer to see when and where the
220 events happened, as well as their results.
221
222 Say N if unsure.
223
224config POWER_TRACER
225 bool "Trace power consumption behavior"
226 depends on DEBUG_KERNEL
227 depends on X86
228 select TRACING
229 help
230 This tracer helps developers to analyze and optimize the kernels
231 power management decisions, specifically the C-state and P-state
232 behavior.
233
234
141config STACK_TRACER 235config STACK_TRACER
142 bool "Trace max stack" 236 bool "Trace max stack"
143 depends on HAVE_FUNCTION_TRACER 237 depends on HAVE_FUNCTION_TRACER
@@ -150,13 +244,26 @@ config STACK_TRACER
150 244
151 This tracer works by hooking into every function call that the 245 This tracer works by hooking into every function call that the
152 kernel executes, and keeping a maximum stack depth value and 246 kernel executes, and keeping a maximum stack depth value and
153 stack-trace saved. Because this logic has to execute in every 247 stack-trace saved. If this is configured with DYNAMIC_FTRACE
154 kernel function, all the time, this option can slow down the 248 then it will not have any overhead while the stack tracer
155 kernel measurably and is generally intended for kernel 249 is disabled.
156 developers only. 250
251 To enable the stack tracer on bootup, pass in 'stacktrace'
252 on the kernel command line.
253
254 The stack tracer can also be enabled or disabled via the
255 sysctl kernel.stack_tracer_enabled
157 256
158 Say N if unsure. 257 Say N if unsure.
159 258
259config HW_BRANCH_TRACER
260 depends on HAVE_HW_BRANCH_TRACER
261 bool "Trace hw branches"
262 select TRACING
263 help
264 This tracer records all branches on the system in a circular
265 buffer giving access to the last N branches for each cpu.
266
160config DYNAMIC_FTRACE 267config DYNAMIC_FTRACE
161 bool "enable/disable ftrace tracepoints dynamically" 268 bool "enable/disable ftrace tracepoints dynamically"
162 depends on FUNCTION_TRACER 269 depends on FUNCTION_TRACER
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index c8228b1a49e9..349d5a93653f 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -10,6 +10,11 @@ CFLAGS_trace_selftest_dynamic.o = -pg
10obj-y += trace_selftest_dynamic.o 10obj-y += trace_selftest_dynamic.o
11endif 11endif
12 12
13# If unlikely tracing is enabled, do not trace these files
14ifdef CONFIG_TRACING_BRANCHES
15KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
16endif
17
13obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o 18obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o
14obj-$(CONFIG_RING_BUFFER) += ring_buffer.o 19obj-$(CONFIG_RING_BUFFER) += ring_buffer.o
15 20
@@ -24,5 +29,9 @@ obj-$(CONFIG_NOP_TRACER) += trace_nop.o
24obj-$(CONFIG_STACK_TRACER) += trace_stack.o 29obj-$(CONFIG_STACK_TRACER) += trace_stack.o
25obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o 30obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o
26obj-$(CONFIG_BOOT_TRACER) += trace_boot.o 31obj-$(CONFIG_BOOT_TRACER) += trace_boot.o
32obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o
33obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o
34obj-$(CONFIG_HW_BRANCH_TRACER) += trace_hw_branches.o
35obj-$(CONFIG_POWER_TRACER) += trace_power.o
27 36
28libftrace-y := ftrace.o 37libftrace-y := ftrace.o
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 78db083390f0..2f32969c09df 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -47,6 +47,13 @@
47int ftrace_enabled __read_mostly; 47int ftrace_enabled __read_mostly;
48static int last_ftrace_enabled; 48static int last_ftrace_enabled;
49 49
50/* set when tracing only a pid */
51struct pid *ftrace_pid_trace;
52static struct pid * const ftrace_swapper_pid = &init_struct_pid;
53
54/* Quick disabling of function tracer. */
55int function_trace_stop;
56
50/* 57/*
51 * ftrace_disabled is set when an anomaly is discovered. 58 * ftrace_disabled is set when an anomaly is discovered.
52 * ftrace_disabled is much stronger than ftrace_enabled. 59 * ftrace_disabled is much stronger than ftrace_enabled.
@@ -55,6 +62,7 @@ static int ftrace_disabled __read_mostly;
55 62
56static DEFINE_SPINLOCK(ftrace_lock); 63static DEFINE_SPINLOCK(ftrace_lock);
57static DEFINE_MUTEX(ftrace_sysctl_lock); 64static DEFINE_MUTEX(ftrace_sysctl_lock);
65static DEFINE_MUTEX(ftrace_start_lock);
58 66
59static struct ftrace_ops ftrace_list_end __read_mostly = 67static struct ftrace_ops ftrace_list_end __read_mostly =
60{ 68{
@@ -63,6 +71,8 @@ static struct ftrace_ops ftrace_list_end __read_mostly =
63 71
64static struct ftrace_ops *ftrace_list __read_mostly = &ftrace_list_end; 72static struct ftrace_ops *ftrace_list __read_mostly = &ftrace_list_end;
65ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub; 73ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;
74ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub;
75ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub;
66 76
67static void ftrace_list_func(unsigned long ip, unsigned long parent_ip) 77static void ftrace_list_func(unsigned long ip, unsigned long parent_ip)
68{ 78{
@@ -79,6 +89,21 @@ static void ftrace_list_func(unsigned long ip, unsigned long parent_ip)
79 }; 89 };
80} 90}
81 91
92static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip)
93{
94 if (!test_tsk_trace_trace(current))
95 return;
96
97 ftrace_pid_function(ip, parent_ip);
98}
99
100static void set_ftrace_pid_function(ftrace_func_t func)
101{
102 /* do not set ftrace_pid_function to itself! */
103 if (func != ftrace_pid_func)
104 ftrace_pid_function = func;
105}
106
82/** 107/**
83 * clear_ftrace_function - reset the ftrace function 108 * clear_ftrace_function - reset the ftrace function
84 * 109 *
@@ -88,7 +113,23 @@ static void ftrace_list_func(unsigned long ip, unsigned long parent_ip)
88void clear_ftrace_function(void) 113void clear_ftrace_function(void)
89{ 114{
90 ftrace_trace_function = ftrace_stub; 115 ftrace_trace_function = ftrace_stub;
116 __ftrace_trace_function = ftrace_stub;
117 ftrace_pid_function = ftrace_stub;
118}
119
120#ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
121/*
122 * For those archs that do not test ftrace_trace_stop in their
123 * mcount call site, we need to do it from C.
124 */
125static void ftrace_test_stop_func(unsigned long ip, unsigned long parent_ip)
126{
127 if (function_trace_stop)
128 return;
129
130 __ftrace_trace_function(ip, parent_ip);
91} 131}
132#endif
92 133
93static int __register_ftrace_function(struct ftrace_ops *ops) 134static int __register_ftrace_function(struct ftrace_ops *ops)
94{ 135{
@@ -106,14 +147,28 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
106 ftrace_list = ops; 147 ftrace_list = ops;
107 148
108 if (ftrace_enabled) { 149 if (ftrace_enabled) {
150 ftrace_func_t func;
151
152 if (ops->next == &ftrace_list_end)
153 func = ops->func;
154 else
155 func = ftrace_list_func;
156
157 if (ftrace_pid_trace) {
158 set_ftrace_pid_function(func);
159 func = ftrace_pid_func;
160 }
161
109 /* 162 /*
110 * For one func, simply call it directly. 163 * For one func, simply call it directly.
111 * For more than one func, call the chain. 164 * For more than one func, call the chain.
112 */ 165 */
113 if (ops->next == &ftrace_list_end) 166#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
114 ftrace_trace_function = ops->func; 167 ftrace_trace_function = func;
115 else 168#else
116 ftrace_trace_function = ftrace_list_func; 169 __ftrace_trace_function = func;
170 ftrace_trace_function = ftrace_test_stop_func;
171#endif
117 } 172 }
118 173
119 spin_unlock(&ftrace_lock); 174 spin_unlock(&ftrace_lock);
@@ -152,9 +207,19 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
152 207
153 if (ftrace_enabled) { 208 if (ftrace_enabled) {
154 /* If we only have one func left, then call that directly */ 209 /* If we only have one func left, then call that directly */
155 if (ftrace_list == &ftrace_list_end || 210 if (ftrace_list->next == &ftrace_list_end) {
156 ftrace_list->next == &ftrace_list_end) 211 ftrace_func_t func = ftrace_list->func;
157 ftrace_trace_function = ftrace_list->func; 212
213 if (ftrace_pid_trace) {
214 set_ftrace_pid_function(func);
215 func = ftrace_pid_func;
216 }
217#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
218 ftrace_trace_function = func;
219#else
220 __ftrace_trace_function = func;
221#endif
222 }
158 } 223 }
159 224
160 out: 225 out:
@@ -163,6 +228,36 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
163 return ret; 228 return ret;
164} 229}
165 230
231static void ftrace_update_pid_func(void)
232{
233 ftrace_func_t func;
234
235 /* should not be called from interrupt context */
236 spin_lock(&ftrace_lock);
237
238 if (ftrace_trace_function == ftrace_stub)
239 goto out;
240
241 func = ftrace_trace_function;
242
243 if (ftrace_pid_trace) {
244 set_ftrace_pid_function(func);
245 func = ftrace_pid_func;
246 } else {
247 if (func == ftrace_pid_func)
248 func = ftrace_pid_function;
249 }
250
251#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
252 ftrace_trace_function = func;
253#else
254 __ftrace_trace_function = func;
255#endif
256
257 out:
258 spin_unlock(&ftrace_lock);
259}
260
166#ifdef CONFIG_DYNAMIC_FTRACE 261#ifdef CONFIG_DYNAMIC_FTRACE
167#ifndef CONFIG_FTRACE_MCOUNT_RECORD 262#ifndef CONFIG_FTRACE_MCOUNT_RECORD
168# error Dynamic ftrace depends on MCOUNT_RECORD 263# error Dynamic ftrace depends on MCOUNT_RECORD
@@ -182,6 +277,8 @@ enum {
182 FTRACE_UPDATE_TRACE_FUNC = (1 << 2), 277 FTRACE_UPDATE_TRACE_FUNC = (1 << 2),
183 FTRACE_ENABLE_MCOUNT = (1 << 3), 278 FTRACE_ENABLE_MCOUNT = (1 << 3),
184 FTRACE_DISABLE_MCOUNT = (1 << 4), 279 FTRACE_DISABLE_MCOUNT = (1 << 4),
280 FTRACE_START_FUNC_RET = (1 << 5),
281 FTRACE_STOP_FUNC_RET = (1 << 6),
185}; 282};
186 283
187static int ftrace_filtered; 284static int ftrace_filtered;
@@ -308,7 +405,7 @@ ftrace_record_ip(unsigned long ip)
308{ 405{
309 struct dyn_ftrace *rec; 406 struct dyn_ftrace *rec;
310 407
311 if (!ftrace_enabled || ftrace_disabled) 408 if (ftrace_disabled)
312 return NULL; 409 return NULL;
313 410
314 rec = ftrace_alloc_dyn_node(ip); 411 rec = ftrace_alloc_dyn_node(ip);
@@ -322,14 +419,51 @@ ftrace_record_ip(unsigned long ip)
322 return rec; 419 return rec;
323} 420}
324 421
325#define FTRACE_ADDR ((long)(ftrace_caller)) 422static void print_ip_ins(const char *fmt, unsigned char *p)
423{
424 int i;
425
426 printk(KERN_CONT "%s", fmt);
427
428 for (i = 0; i < MCOUNT_INSN_SIZE; i++)
429 printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]);
430}
431
432static void ftrace_bug(int failed, unsigned long ip)
433{
434 switch (failed) {
435 case -EFAULT:
436 FTRACE_WARN_ON_ONCE(1);
437 pr_info("ftrace faulted on modifying ");
438 print_ip_sym(ip);
439 break;
440 case -EINVAL:
441 FTRACE_WARN_ON_ONCE(1);
442 pr_info("ftrace failed to modify ");
443 print_ip_sym(ip);
444 print_ip_ins(" actual: ", (unsigned char *)ip);
445 printk(KERN_CONT "\n");
446 break;
447 case -EPERM:
448 FTRACE_WARN_ON_ONCE(1);
449 pr_info("ftrace faulted on writing ");
450 print_ip_sym(ip);
451 break;
452 default:
453 FTRACE_WARN_ON_ONCE(1);
454 pr_info("ftrace faulted on unknown error ");
455 print_ip_sym(ip);
456 }
457}
458
326 459
327static int 460static int
328__ftrace_replace_code(struct dyn_ftrace *rec, 461__ftrace_replace_code(struct dyn_ftrace *rec, int enable)
329 unsigned char *nop, int enable)
330{ 462{
331 unsigned long ip, fl; 463 unsigned long ip, fl;
332 unsigned char *call, *old, *new; 464 unsigned long ftrace_addr;
465
466 ftrace_addr = (unsigned long)ftrace_caller;
333 467
334 ip = rec->ip; 468 ip = rec->ip;
335 469
@@ -388,34 +522,28 @@ __ftrace_replace_code(struct dyn_ftrace *rec,
388 } 522 }
389 } 523 }
390 524
391 call = ftrace_call_replace(ip, FTRACE_ADDR); 525 if (rec->flags & FTRACE_FL_ENABLED)
392 526 return ftrace_make_call(rec, ftrace_addr);
393 if (rec->flags & FTRACE_FL_ENABLED) { 527 else
394 old = nop; 528 return ftrace_make_nop(NULL, rec, ftrace_addr);
395 new = call;
396 } else {
397 old = call;
398 new = nop;
399 }
400
401 return ftrace_modify_code(ip, old, new);
402} 529}
403 530
404static void ftrace_replace_code(int enable) 531static void ftrace_replace_code(int enable)
405{ 532{
406 int i, failed; 533 int i, failed;
407 unsigned char *nop = NULL;
408 struct dyn_ftrace *rec; 534 struct dyn_ftrace *rec;
409 struct ftrace_page *pg; 535 struct ftrace_page *pg;
410 536
411 nop = ftrace_nop_replace();
412
413 for (pg = ftrace_pages_start; pg; pg = pg->next) { 537 for (pg = ftrace_pages_start; pg; pg = pg->next) {
414 for (i = 0; i < pg->index; i++) { 538 for (i = 0; i < pg->index; i++) {
415 rec = &pg->records[i]; 539 rec = &pg->records[i];
416 540
417 /* don't modify code that has already faulted */ 541 /*
418 if (rec->flags & FTRACE_FL_FAILED) 542 * Skip over free records and records that have
543 * failed.
544 */
545 if (rec->flags & FTRACE_FL_FREE ||
546 rec->flags & FTRACE_FL_FAILED)
419 continue; 547 continue;
420 548
421 /* ignore updates to this record's mcount site */ 549 /* ignore updates to this record's mcount site */
@@ -426,68 +554,30 @@ static void ftrace_replace_code(int enable)
426 unfreeze_record(rec); 554 unfreeze_record(rec);
427 } 555 }
428 556
429 failed = __ftrace_replace_code(rec, nop, enable); 557 failed = __ftrace_replace_code(rec, enable);
430 if (failed && (rec->flags & FTRACE_FL_CONVERTED)) { 558 if (failed && (rec->flags & FTRACE_FL_CONVERTED)) {
431 rec->flags |= FTRACE_FL_FAILED; 559 rec->flags |= FTRACE_FL_FAILED;
432 if ((system_state == SYSTEM_BOOTING) || 560 if ((system_state == SYSTEM_BOOTING) ||
433 !core_kernel_text(rec->ip)) { 561 !core_kernel_text(rec->ip)) {
434 ftrace_free_rec(rec); 562 ftrace_free_rec(rec);
435 } 563 } else
564 ftrace_bug(failed, rec->ip);
436 } 565 }
437 } 566 }
438 } 567 }
439} 568}
440 569
441static void print_ip_ins(const char *fmt, unsigned char *p)
442{
443 int i;
444
445 printk(KERN_CONT "%s", fmt);
446
447 for (i = 0; i < MCOUNT_INSN_SIZE; i++)
448 printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]);
449}
450
451static int 570static int
452ftrace_code_disable(struct dyn_ftrace *rec) 571ftrace_code_disable(struct module *mod, struct dyn_ftrace *rec)
453{ 572{
454 unsigned long ip; 573 unsigned long ip;
455 unsigned char *nop, *call;
456 int ret; 574 int ret;
457 575
458 ip = rec->ip; 576 ip = rec->ip;
459 577
460 nop = ftrace_nop_replace(); 578 ret = ftrace_make_nop(mod, rec, mcount_addr);
461 call = ftrace_call_replace(ip, mcount_addr);
462
463 ret = ftrace_modify_code(ip, call, nop);
464 if (ret) { 579 if (ret) {
465 switch (ret) { 580 ftrace_bug(ret, ip);
466 case -EFAULT:
467 FTRACE_WARN_ON_ONCE(1);
468 pr_info("ftrace faulted on modifying ");
469 print_ip_sym(ip);
470 break;
471 case -EINVAL:
472 FTRACE_WARN_ON_ONCE(1);
473 pr_info("ftrace failed to modify ");
474 print_ip_sym(ip);
475 print_ip_ins(" expected: ", call);
476 print_ip_ins(" actual: ", (unsigned char *)ip);
477 print_ip_ins(" replace: ", nop);
478 printk(KERN_CONT "\n");
479 break;
480 case -EPERM:
481 FTRACE_WARN_ON_ONCE(1);
482 pr_info("ftrace faulted on writing ");
483 print_ip_sym(ip);
484 break;
485 default:
486 FTRACE_WARN_ON_ONCE(1);
487 pr_info("ftrace faulted on unknown error ");
488 print_ip_sym(ip);
489 }
490
491 rec->flags |= FTRACE_FL_FAILED; 581 rec->flags |= FTRACE_FL_FAILED;
492 return 0; 582 return 0;
493 } 583 }
@@ -506,6 +596,11 @@ static int __ftrace_modify_code(void *data)
506 if (*command & FTRACE_UPDATE_TRACE_FUNC) 596 if (*command & FTRACE_UPDATE_TRACE_FUNC)
507 ftrace_update_ftrace_func(ftrace_trace_function); 597 ftrace_update_ftrace_func(ftrace_trace_function);
508 598
599 if (*command & FTRACE_START_FUNC_RET)
600 ftrace_enable_ftrace_graph_caller();
601 else if (*command & FTRACE_STOP_FUNC_RET)
602 ftrace_disable_ftrace_graph_caller();
603
509 return 0; 604 return 0;
510} 605}
511 606
@@ -515,43 +610,43 @@ static void ftrace_run_update_code(int command)
515} 610}
516 611
517static ftrace_func_t saved_ftrace_func; 612static ftrace_func_t saved_ftrace_func;
518static int ftrace_start; 613static int ftrace_start_up;
519static DEFINE_MUTEX(ftrace_start_lock);
520 614
521static void ftrace_startup(void) 615static void ftrace_startup_enable(int command)
522{ 616{
523 int command = 0;
524
525 if (unlikely(ftrace_disabled))
526 return;
527
528 mutex_lock(&ftrace_start_lock);
529 ftrace_start++;
530 command |= FTRACE_ENABLE_CALLS;
531
532 if (saved_ftrace_func != ftrace_trace_function) { 617 if (saved_ftrace_func != ftrace_trace_function) {
533 saved_ftrace_func = ftrace_trace_function; 618 saved_ftrace_func = ftrace_trace_function;
534 command |= FTRACE_UPDATE_TRACE_FUNC; 619 command |= FTRACE_UPDATE_TRACE_FUNC;
535 } 620 }
536 621
537 if (!command || !ftrace_enabled) 622 if (!command || !ftrace_enabled)
538 goto out; 623 return;
539 624
540 ftrace_run_update_code(command); 625 ftrace_run_update_code(command);
541 out:
542 mutex_unlock(&ftrace_start_lock);
543} 626}
544 627
545static void ftrace_shutdown(void) 628static void ftrace_startup(int command)
546{ 629{
547 int command = 0; 630 if (unlikely(ftrace_disabled))
631 return;
632
633 mutex_lock(&ftrace_start_lock);
634 ftrace_start_up++;
635 command |= FTRACE_ENABLE_CALLS;
548 636
637 ftrace_startup_enable(command);
638
639 mutex_unlock(&ftrace_start_lock);
640}
641
642static void ftrace_shutdown(int command)
643{
549 if (unlikely(ftrace_disabled)) 644 if (unlikely(ftrace_disabled))
550 return; 645 return;
551 646
552 mutex_lock(&ftrace_start_lock); 647 mutex_lock(&ftrace_start_lock);
553 ftrace_start--; 648 ftrace_start_up--;
554 if (!ftrace_start) 649 if (!ftrace_start_up)
555 command |= FTRACE_DISABLE_CALLS; 650 command |= FTRACE_DISABLE_CALLS;
556 651
557 if (saved_ftrace_func != ftrace_trace_function) { 652 if (saved_ftrace_func != ftrace_trace_function) {
@@ -577,8 +672,8 @@ static void ftrace_startup_sysctl(void)
577 mutex_lock(&ftrace_start_lock); 672 mutex_lock(&ftrace_start_lock);
578 /* Force update next time */ 673 /* Force update next time */
579 saved_ftrace_func = NULL; 674 saved_ftrace_func = NULL;
580 /* ftrace_start is true if we want ftrace running */ 675 /* ftrace_start_up is true if we want ftrace running */
581 if (ftrace_start) 676 if (ftrace_start_up)
582 command |= FTRACE_ENABLE_CALLS; 677 command |= FTRACE_ENABLE_CALLS;
583 678
584 ftrace_run_update_code(command); 679 ftrace_run_update_code(command);
@@ -593,8 +688,8 @@ static void ftrace_shutdown_sysctl(void)
593 return; 688 return;
594 689
595 mutex_lock(&ftrace_start_lock); 690 mutex_lock(&ftrace_start_lock);
596 /* ftrace_start is true if ftrace is running */ 691 /* ftrace_start_up is true if ftrace is running */
597 if (ftrace_start) 692 if (ftrace_start_up)
598 command |= FTRACE_DISABLE_CALLS; 693 command |= FTRACE_DISABLE_CALLS;
599 694
600 ftrace_run_update_code(command); 695 ftrace_run_update_code(command);
@@ -605,7 +700,7 @@ static cycle_t ftrace_update_time;
605static unsigned long ftrace_update_cnt; 700static unsigned long ftrace_update_cnt;
606unsigned long ftrace_update_tot_cnt; 701unsigned long ftrace_update_tot_cnt;
607 702
608static int ftrace_update_code(void) 703static int ftrace_update_code(struct module *mod)
609{ 704{
610 struct dyn_ftrace *p, *t; 705 struct dyn_ftrace *p, *t;
611 cycle_t start, stop; 706 cycle_t start, stop;
@@ -622,7 +717,7 @@ static int ftrace_update_code(void)
622 list_del_init(&p->list); 717 list_del_init(&p->list);
623 718
624 /* convert record (i.e, patch mcount-call with NOP) */ 719 /* convert record (i.e, patch mcount-call with NOP) */
625 if (ftrace_code_disable(p)) { 720 if (ftrace_code_disable(mod, p)) {
626 p->flags |= FTRACE_FL_CONVERTED; 721 p->flags |= FTRACE_FL_CONVERTED;
627 ftrace_update_cnt++; 722 ftrace_update_cnt++;
628 } else 723 } else
@@ -690,7 +785,6 @@ enum {
690#define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */ 785#define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */
691 786
692struct ftrace_iterator { 787struct ftrace_iterator {
693 loff_t pos;
694 struct ftrace_page *pg; 788 struct ftrace_page *pg;
695 unsigned idx; 789 unsigned idx;
696 unsigned flags; 790 unsigned flags;
@@ -715,6 +809,8 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
715 iter->pg = iter->pg->next; 809 iter->pg = iter->pg->next;
716 iter->idx = 0; 810 iter->idx = 0;
717 goto retry; 811 goto retry;
812 } else {
813 iter->idx = -1;
718 } 814 }
719 } else { 815 } else {
720 rec = &iter->pg->records[iter->idx++]; 816 rec = &iter->pg->records[iter->idx++];
@@ -737,8 +833,6 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
737 } 833 }
738 spin_unlock(&ftrace_lock); 834 spin_unlock(&ftrace_lock);
739 835
740 iter->pos = *pos;
741
742 return rec; 836 return rec;
743} 837}
744 838
@@ -746,13 +840,15 @@ static void *t_start(struct seq_file *m, loff_t *pos)
746{ 840{
747 struct ftrace_iterator *iter = m->private; 841 struct ftrace_iterator *iter = m->private;
748 void *p = NULL; 842 void *p = NULL;
749 loff_t l = -1;
750 843
751 if (*pos > iter->pos) 844 if (*pos > 0) {
752 *pos = iter->pos; 845 if (iter->idx < 0)
846 return p;
847 (*pos)--;
848 iter->idx--;
849 }
753 850
754 l = *pos; 851 p = t_next(m, p, pos);
755 p = t_next(m, p, &l);
756 852
757 return p; 853 return p;
758} 854}
@@ -763,21 +859,15 @@ static void t_stop(struct seq_file *m, void *p)
763 859
764static int t_show(struct seq_file *m, void *v) 860static int t_show(struct seq_file *m, void *v)
765{ 861{
766 struct ftrace_iterator *iter = m->private;
767 struct dyn_ftrace *rec = v; 862 struct dyn_ftrace *rec = v;
768 char str[KSYM_SYMBOL_LEN]; 863 char str[KSYM_SYMBOL_LEN];
769 int ret = 0;
770 864
771 if (!rec) 865 if (!rec)
772 return 0; 866 return 0;
773 867
774 kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); 868 kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
775 869
776 ret = seq_printf(m, "%s\n", str); 870 seq_printf(m, "%s\n", str);
777 if (ret < 0) {
778 iter->pos--;
779 iter->idx--;
780 }
781 871
782 return 0; 872 return 0;
783} 873}
@@ -803,7 +893,6 @@ ftrace_avail_open(struct inode *inode, struct file *file)
803 return -ENOMEM; 893 return -ENOMEM;
804 894
805 iter->pg = ftrace_pages_start; 895 iter->pg = ftrace_pages_start;
806 iter->pos = 0;
807 896
808 ret = seq_open(file, &show_ftrace_seq_ops); 897 ret = seq_open(file, &show_ftrace_seq_ops);
809 if (!ret) { 898 if (!ret) {
@@ -890,7 +979,6 @@ ftrace_regex_open(struct inode *inode, struct file *file, int enable)
890 979
891 if (file->f_mode & FMODE_READ) { 980 if (file->f_mode & FMODE_READ) {
892 iter->pg = ftrace_pages_start; 981 iter->pg = ftrace_pages_start;
893 iter->pos = 0;
894 iter->flags = enable ? FTRACE_ITER_FILTER : 982 iter->flags = enable ? FTRACE_ITER_FILTER :
895 FTRACE_ITER_NOTRACE; 983 FTRACE_ITER_NOTRACE;
896 984
@@ -959,6 +1047,13 @@ ftrace_match(unsigned char *buff, int len, int enable)
959 int type = MATCH_FULL; 1047 int type = MATCH_FULL;
960 unsigned long flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE; 1048 unsigned long flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
961 unsigned i, match = 0, search_len = 0; 1049 unsigned i, match = 0, search_len = 0;
1050 int not = 0;
1051
1052 if (buff[0] == '!') {
1053 not = 1;
1054 buff++;
1055 len--;
1056 }
962 1057
963 for (i = 0; i < len; i++) { 1058 for (i = 0; i < len; i++) {
964 if (buff[i] == '*') { 1059 if (buff[i] == '*') {
@@ -1012,8 +1107,12 @@ ftrace_match(unsigned char *buff, int len, int enable)
1012 matched = 1; 1107 matched = 1;
1013 break; 1108 break;
1014 } 1109 }
1015 if (matched) 1110 if (matched) {
1016 rec->flags |= flag; 1111 if (not)
1112 rec->flags &= ~flag;
1113 else
1114 rec->flags |= flag;
1115 }
1017 } 1116 }
1018 pg = pg->next; 1117 pg = pg->next;
1019 } 1118 }
@@ -1181,7 +1280,7 @@ ftrace_regex_release(struct inode *inode, struct file *file, int enable)
1181 1280
1182 mutex_lock(&ftrace_sysctl_lock); 1281 mutex_lock(&ftrace_sysctl_lock);
1183 mutex_lock(&ftrace_start_lock); 1282 mutex_lock(&ftrace_start_lock);
1184 if (ftrace_start && ftrace_enabled) 1283 if (ftrace_start_up && ftrace_enabled)
1185 ftrace_run_update_code(FTRACE_ENABLE_CALLS); 1284 ftrace_run_update_code(FTRACE_ENABLE_CALLS);
1186 mutex_unlock(&ftrace_start_lock); 1285 mutex_unlock(&ftrace_start_lock);
1187 mutex_unlock(&ftrace_sysctl_lock); 1286 mutex_unlock(&ftrace_sysctl_lock);
@@ -1233,12 +1332,233 @@ static struct file_operations ftrace_notrace_fops = {
1233 .release = ftrace_notrace_release, 1332 .release = ftrace_notrace_release,
1234}; 1333};
1235 1334
1236static __init int ftrace_init_debugfs(void) 1335#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1336
1337static DEFINE_MUTEX(graph_lock);
1338
1339int ftrace_graph_count;
1340unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly;
1341
1342static void *
1343g_next(struct seq_file *m, void *v, loff_t *pos)
1237{ 1344{
1238 struct dentry *d_tracer; 1345 unsigned long *array = m->private;
1239 struct dentry *entry; 1346 int index = *pos;
1240 1347
1241 d_tracer = tracing_init_dentry(); 1348 (*pos)++;
1349
1350 if (index >= ftrace_graph_count)
1351 return NULL;
1352
1353 return &array[index];
1354}
1355
1356static void *g_start(struct seq_file *m, loff_t *pos)
1357{
1358 void *p = NULL;
1359
1360 mutex_lock(&graph_lock);
1361
1362 p = g_next(m, p, pos);
1363
1364 return p;
1365}
1366
1367static void g_stop(struct seq_file *m, void *p)
1368{
1369 mutex_unlock(&graph_lock);
1370}
1371
1372static int g_show(struct seq_file *m, void *v)
1373{
1374 unsigned long *ptr = v;
1375 char str[KSYM_SYMBOL_LEN];
1376
1377 if (!ptr)
1378 return 0;
1379
1380 kallsyms_lookup(*ptr, NULL, NULL, NULL, str);
1381
1382 seq_printf(m, "%s\n", str);
1383
1384 return 0;
1385}
1386
1387static struct seq_operations ftrace_graph_seq_ops = {
1388 .start = g_start,
1389 .next = g_next,
1390 .stop = g_stop,
1391 .show = g_show,
1392};
1393
1394static int
1395ftrace_graph_open(struct inode *inode, struct file *file)
1396{
1397 int ret = 0;
1398
1399 if (unlikely(ftrace_disabled))
1400 return -ENODEV;
1401
1402 mutex_lock(&graph_lock);
1403 if ((file->f_mode & FMODE_WRITE) &&
1404 !(file->f_flags & O_APPEND)) {
1405 ftrace_graph_count = 0;
1406 memset(ftrace_graph_funcs, 0, sizeof(ftrace_graph_funcs));
1407 }
1408
1409 if (file->f_mode & FMODE_READ) {
1410 ret = seq_open(file, &ftrace_graph_seq_ops);
1411 if (!ret) {
1412 struct seq_file *m = file->private_data;
1413 m->private = ftrace_graph_funcs;
1414 }
1415 } else
1416 file->private_data = ftrace_graph_funcs;
1417 mutex_unlock(&graph_lock);
1418
1419 return ret;
1420}
1421
1422static ssize_t
1423ftrace_graph_read(struct file *file, char __user *ubuf,
1424 size_t cnt, loff_t *ppos)
1425{
1426 if (file->f_mode & FMODE_READ)
1427 return seq_read(file, ubuf, cnt, ppos);
1428 else
1429 return -EPERM;
1430}
1431
1432static int
1433ftrace_set_func(unsigned long *array, int idx, char *buffer)
1434{
1435 char str[KSYM_SYMBOL_LEN];
1436 struct dyn_ftrace *rec;
1437 struct ftrace_page *pg;
1438 int found = 0;
1439 int i, j;
1440
1441 if (ftrace_disabled)
1442 return -ENODEV;
1443
1444 /* should not be called from interrupt context */
1445 spin_lock(&ftrace_lock);
1446
1447 for (pg = ftrace_pages_start; pg; pg = pg->next) {
1448 for (i = 0; i < pg->index; i++) {
1449 rec = &pg->records[i];
1450
1451 if (rec->flags & (FTRACE_FL_FAILED | FTRACE_FL_FREE))
1452 continue;
1453
1454 kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
1455 if (strcmp(str, buffer) == 0) {
1456 found = 1;
1457 for (j = 0; j < idx; j++)
1458 if (array[j] == rec->ip) {
1459 found = 0;
1460 break;
1461 }
1462 if (found)
1463 array[idx] = rec->ip;
1464 break;
1465 }
1466 }
1467 }
1468 spin_unlock(&ftrace_lock);
1469
1470 return found ? 0 : -EINVAL;
1471}
1472
1473static ssize_t
1474ftrace_graph_write(struct file *file, const char __user *ubuf,
1475 size_t cnt, loff_t *ppos)
1476{
1477 unsigned char buffer[FTRACE_BUFF_MAX+1];
1478 unsigned long *array;
1479 size_t read = 0;
1480 ssize_t ret;
1481 int index = 0;
1482 char ch;
1483
1484 if (!cnt || cnt < 0)
1485 return 0;
1486
1487 mutex_lock(&graph_lock);
1488
1489 if (ftrace_graph_count >= FTRACE_GRAPH_MAX_FUNCS) {
1490 ret = -EBUSY;
1491 goto out;
1492 }
1493
1494 if (file->f_mode & FMODE_READ) {
1495 struct seq_file *m = file->private_data;
1496 array = m->private;
1497 } else
1498 array = file->private_data;
1499
1500 ret = get_user(ch, ubuf++);
1501 if (ret)
1502 goto out;
1503 read++;
1504 cnt--;
1505
1506 /* skip white space */
1507 while (cnt && isspace(ch)) {
1508 ret = get_user(ch, ubuf++);
1509 if (ret)
1510 goto out;
1511 read++;
1512 cnt--;
1513 }
1514
1515 if (isspace(ch)) {
1516 *ppos += read;
1517 ret = read;
1518 goto out;
1519 }
1520
1521 while (cnt && !isspace(ch)) {
1522 if (index < FTRACE_BUFF_MAX)
1523 buffer[index++] = ch;
1524 else {
1525 ret = -EINVAL;
1526 goto out;
1527 }
1528 ret = get_user(ch, ubuf++);
1529 if (ret)
1530 goto out;
1531 read++;
1532 cnt--;
1533 }
1534 buffer[index] = 0;
1535
1536 /* we allow only one at a time */
1537 ret = ftrace_set_func(array, ftrace_graph_count, buffer);
1538 if (ret)
1539 goto out;
1540
1541 ftrace_graph_count++;
1542
1543 file->f_pos += read;
1544
1545 ret = read;
1546 out:
1547 mutex_unlock(&graph_lock);
1548
1549 return ret;
1550}
1551
1552static const struct file_operations ftrace_graph_fops = {
1553 .open = ftrace_graph_open,
1554 .read = ftrace_graph_read,
1555 .write = ftrace_graph_write,
1556};
1557#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
1558
1559static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer)
1560{
1561 struct dentry *entry;
1242 1562
1243 entry = debugfs_create_file("available_filter_functions", 0444, 1563 entry = debugfs_create_file("available_filter_functions", 0444,
1244 d_tracer, NULL, &ftrace_avail_fops); 1564 d_tracer, NULL, &ftrace_avail_fops);
@@ -1263,12 +1583,20 @@ static __init int ftrace_init_debugfs(void)
1263 pr_warning("Could not create debugfs " 1583 pr_warning("Could not create debugfs "
1264 "'set_ftrace_notrace' entry\n"); 1584 "'set_ftrace_notrace' entry\n");
1265 1585
1586#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1587 entry = debugfs_create_file("set_graph_function", 0444, d_tracer,
1588 NULL,
1589 &ftrace_graph_fops);
1590 if (!entry)
1591 pr_warning("Could not create debugfs "
1592 "'set_graph_function' entry\n");
1593#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
1594
1266 return 0; 1595 return 0;
1267} 1596}
1268 1597
1269fs_initcall(ftrace_init_debugfs); 1598static int ftrace_convert_nops(struct module *mod,
1270 1599 unsigned long *start,
1271static int ftrace_convert_nops(unsigned long *start,
1272 unsigned long *end) 1600 unsigned long *end)
1273{ 1601{
1274 unsigned long *p; 1602 unsigned long *p;
@@ -1279,23 +1607,32 @@ static int ftrace_convert_nops(unsigned long *start,
1279 p = start; 1607 p = start;
1280 while (p < end) { 1608 while (p < end) {
1281 addr = ftrace_call_adjust(*p++); 1609 addr = ftrace_call_adjust(*p++);
1610 /*
1611 * Some architecture linkers will pad between
1612 * the different mcount_loc sections of different
1613 * object files to satisfy alignments.
1614 * Skip any NULL pointers.
1615 */
1616 if (!addr)
1617 continue;
1282 ftrace_record_ip(addr); 1618 ftrace_record_ip(addr);
1283 } 1619 }
1284 1620
1285 /* disable interrupts to prevent kstop machine */ 1621 /* disable interrupts to prevent kstop machine */
1286 local_irq_save(flags); 1622 local_irq_save(flags);
1287 ftrace_update_code(); 1623 ftrace_update_code(mod);
1288 local_irq_restore(flags); 1624 local_irq_restore(flags);
1289 mutex_unlock(&ftrace_start_lock); 1625 mutex_unlock(&ftrace_start_lock);
1290 1626
1291 return 0; 1627 return 0;
1292} 1628}
1293 1629
1294void ftrace_init_module(unsigned long *start, unsigned long *end) 1630void ftrace_init_module(struct module *mod,
1631 unsigned long *start, unsigned long *end)
1295{ 1632{
1296 if (ftrace_disabled || start == end) 1633 if (ftrace_disabled || start == end)
1297 return; 1634 return;
1298 ftrace_convert_nops(start, end); 1635 ftrace_convert_nops(mod, start, end);
1299} 1636}
1300 1637
1301extern unsigned long __start_mcount_loc[]; 1638extern unsigned long __start_mcount_loc[];
@@ -1325,7 +1662,8 @@ void __init ftrace_init(void)
1325 1662
1326 last_ftrace_enabled = ftrace_enabled = 1; 1663 last_ftrace_enabled = ftrace_enabled = 1;
1327 1664
1328 ret = ftrace_convert_nops(__start_mcount_loc, 1665 ret = ftrace_convert_nops(NULL,
1666 __start_mcount_loc,
1329 __stop_mcount_loc); 1667 __stop_mcount_loc);
1330 1668
1331 return; 1669 return;
@@ -1342,12 +1680,186 @@ static int __init ftrace_nodyn_init(void)
1342} 1680}
1343device_initcall(ftrace_nodyn_init); 1681device_initcall(ftrace_nodyn_init);
1344 1682
1345# define ftrace_startup() do { } while (0) 1683static inline int ftrace_init_dyn_debugfs(struct dentry *d_tracer) { return 0; }
1346# define ftrace_shutdown() do { } while (0) 1684static inline void ftrace_startup_enable(int command) { }
1685/* Keep as macros so we do not need to define the commands */
1686# define ftrace_startup(command) do { } while (0)
1687# define ftrace_shutdown(command) do { } while (0)
1347# define ftrace_startup_sysctl() do { } while (0) 1688# define ftrace_startup_sysctl() do { } while (0)
1348# define ftrace_shutdown_sysctl() do { } while (0) 1689# define ftrace_shutdown_sysctl() do { } while (0)
1349#endif /* CONFIG_DYNAMIC_FTRACE */ 1690#endif /* CONFIG_DYNAMIC_FTRACE */
1350 1691
1692static ssize_t
1693ftrace_pid_read(struct file *file, char __user *ubuf,
1694 size_t cnt, loff_t *ppos)
1695{
1696 char buf[64];
1697 int r;
1698
1699 if (ftrace_pid_trace == ftrace_swapper_pid)
1700 r = sprintf(buf, "swapper tasks\n");
1701 else if (ftrace_pid_trace)
1702 r = sprintf(buf, "%u\n", pid_nr(ftrace_pid_trace));
1703 else
1704 r = sprintf(buf, "no pid\n");
1705
1706 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
1707}
1708
1709static void clear_ftrace_swapper(void)
1710{
1711 struct task_struct *p;
1712 int cpu;
1713
1714 get_online_cpus();
1715 for_each_online_cpu(cpu) {
1716 p = idle_task(cpu);
1717 clear_tsk_trace_trace(p);
1718 }
1719 put_online_cpus();
1720}
1721
1722static void set_ftrace_swapper(void)
1723{
1724 struct task_struct *p;
1725 int cpu;
1726
1727 get_online_cpus();
1728 for_each_online_cpu(cpu) {
1729 p = idle_task(cpu);
1730 set_tsk_trace_trace(p);
1731 }
1732 put_online_cpus();
1733}
1734
1735static void clear_ftrace_pid(struct pid *pid)
1736{
1737 struct task_struct *p;
1738
1739 do_each_pid_task(pid, PIDTYPE_PID, p) {
1740 clear_tsk_trace_trace(p);
1741 } while_each_pid_task(pid, PIDTYPE_PID, p);
1742 put_pid(pid);
1743}
1744
1745static void set_ftrace_pid(struct pid *pid)
1746{
1747 struct task_struct *p;
1748
1749 do_each_pid_task(pid, PIDTYPE_PID, p) {
1750 set_tsk_trace_trace(p);
1751 } while_each_pid_task(pid, PIDTYPE_PID, p);
1752}
1753
1754static void clear_ftrace_pid_task(struct pid **pid)
1755{
1756 if (*pid == ftrace_swapper_pid)
1757 clear_ftrace_swapper();
1758 else
1759 clear_ftrace_pid(*pid);
1760
1761 *pid = NULL;
1762}
1763
1764static void set_ftrace_pid_task(struct pid *pid)
1765{
1766 if (pid == ftrace_swapper_pid)
1767 set_ftrace_swapper();
1768 else
1769 set_ftrace_pid(pid);
1770}
1771
1772static ssize_t
1773ftrace_pid_write(struct file *filp, const char __user *ubuf,
1774 size_t cnt, loff_t *ppos)
1775{
1776 struct pid *pid;
1777 char buf[64];
1778 long val;
1779 int ret;
1780
1781 if (cnt >= sizeof(buf))
1782 return -EINVAL;
1783
1784 if (copy_from_user(&buf, ubuf, cnt))
1785 return -EFAULT;
1786
1787 buf[cnt] = 0;
1788
1789 ret = strict_strtol(buf, 10, &val);
1790 if (ret < 0)
1791 return ret;
1792
1793 mutex_lock(&ftrace_start_lock);
1794 if (val < 0) {
1795 /* disable pid tracing */
1796 if (!ftrace_pid_trace)
1797 goto out;
1798
1799 clear_ftrace_pid_task(&ftrace_pid_trace);
1800
1801 } else {
1802 /* swapper task is special */
1803 if (!val) {
1804 pid = ftrace_swapper_pid;
1805 if (pid == ftrace_pid_trace)
1806 goto out;
1807 } else {
1808 pid = find_get_pid(val);
1809
1810 if (pid == ftrace_pid_trace) {
1811 put_pid(pid);
1812 goto out;
1813 }
1814 }
1815
1816 if (ftrace_pid_trace)
1817 clear_ftrace_pid_task(&ftrace_pid_trace);
1818
1819 if (!pid)
1820 goto out;
1821
1822 ftrace_pid_trace = pid;
1823
1824 set_ftrace_pid_task(ftrace_pid_trace);
1825 }
1826
1827 /* update the function call */
1828 ftrace_update_pid_func();
1829 ftrace_startup_enable(0);
1830
1831 out:
1832 mutex_unlock(&ftrace_start_lock);
1833
1834 return cnt;
1835}
1836
1837static struct file_operations ftrace_pid_fops = {
1838 .read = ftrace_pid_read,
1839 .write = ftrace_pid_write,
1840};
1841
1842static __init int ftrace_init_debugfs(void)
1843{
1844 struct dentry *d_tracer;
1845 struct dentry *entry;
1846
1847 d_tracer = tracing_init_dentry();
1848 if (!d_tracer)
1849 return 0;
1850
1851 ftrace_init_dyn_debugfs(d_tracer);
1852
1853 entry = debugfs_create_file("set_ftrace_pid", 0644, d_tracer,
1854 NULL, &ftrace_pid_fops);
1855 if (!entry)
1856 pr_warning("Could not create debugfs "
1857 "'set_ftrace_pid' entry\n");
1858 return 0;
1859}
1860
1861fs_initcall(ftrace_init_debugfs);
1862
1351/** 1863/**
1352 * ftrace_kill - kill ftrace 1864 * ftrace_kill - kill ftrace
1353 * 1865 *
@@ -1381,10 +1893,11 @@ int register_ftrace_function(struct ftrace_ops *ops)
1381 return -1; 1893 return -1;
1382 1894
1383 mutex_lock(&ftrace_sysctl_lock); 1895 mutex_lock(&ftrace_sysctl_lock);
1896
1384 ret = __register_ftrace_function(ops); 1897 ret = __register_ftrace_function(ops);
1385 ftrace_startup(); 1898 ftrace_startup(0);
1386 mutex_unlock(&ftrace_sysctl_lock);
1387 1899
1900 mutex_unlock(&ftrace_sysctl_lock);
1388 return ret; 1901 return ret;
1389} 1902}
1390 1903
@@ -1400,7 +1913,7 @@ int unregister_ftrace_function(struct ftrace_ops *ops)
1400 1913
1401 mutex_lock(&ftrace_sysctl_lock); 1914 mutex_lock(&ftrace_sysctl_lock);
1402 ret = __unregister_ftrace_function(ops); 1915 ret = __unregister_ftrace_function(ops);
1403 ftrace_shutdown(); 1916 ftrace_shutdown(0);
1404 mutex_unlock(&ftrace_sysctl_lock); 1917 mutex_unlock(&ftrace_sysctl_lock);
1405 1918
1406 return ret; 1919 return ret;
@@ -1449,3 +1962,153 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
1449 return ret; 1962 return ret;
1450} 1963}
1451 1964
1965#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1966
1967static atomic_t ftrace_graph_active;
1968
1969int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace)
1970{
1971 return 0;
1972}
1973
1974/* The callbacks that hook a function */
1975trace_func_graph_ret_t ftrace_graph_return =
1976 (trace_func_graph_ret_t)ftrace_stub;
1977trace_func_graph_ent_t ftrace_graph_entry = ftrace_graph_entry_stub;
1978
1979/* Try to assign a return stack array on FTRACE_RETSTACK_ALLOC_SIZE tasks. */
1980static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list)
1981{
1982 int i;
1983 int ret = 0;
1984 unsigned long flags;
1985 int start = 0, end = FTRACE_RETSTACK_ALLOC_SIZE;
1986 struct task_struct *g, *t;
1987
1988 for (i = 0; i < FTRACE_RETSTACK_ALLOC_SIZE; i++) {
1989 ret_stack_list[i] = kmalloc(FTRACE_RETFUNC_DEPTH
1990 * sizeof(struct ftrace_ret_stack),
1991 GFP_KERNEL);
1992 if (!ret_stack_list[i]) {
1993 start = 0;
1994 end = i;
1995 ret = -ENOMEM;
1996 goto free;
1997 }
1998 }
1999
2000 read_lock_irqsave(&tasklist_lock, flags);
2001 do_each_thread(g, t) {
2002 if (start == end) {
2003 ret = -EAGAIN;
2004 goto unlock;
2005 }
2006
2007 if (t->ret_stack == NULL) {
2008 t->curr_ret_stack = -1;
2009 /* Make sure IRQs see the -1 first: */
2010 barrier();
2011 t->ret_stack = ret_stack_list[start++];
2012 atomic_set(&t->tracing_graph_pause, 0);
2013 atomic_set(&t->trace_overrun, 0);
2014 }
2015 } while_each_thread(g, t);
2016
2017unlock:
2018 read_unlock_irqrestore(&tasklist_lock, flags);
2019free:
2020 for (i = start; i < end; i++)
2021 kfree(ret_stack_list[i]);
2022 return ret;
2023}
2024
2025/* Allocate a return stack for each task */
2026static int start_graph_tracing(void)
2027{
2028 struct ftrace_ret_stack **ret_stack_list;
2029 int ret;
2030
2031 ret_stack_list = kmalloc(FTRACE_RETSTACK_ALLOC_SIZE *
2032 sizeof(struct ftrace_ret_stack *),
2033 GFP_KERNEL);
2034
2035 if (!ret_stack_list)
2036 return -ENOMEM;
2037
2038 do {
2039 ret = alloc_retstack_tasklist(ret_stack_list);
2040 } while (ret == -EAGAIN);
2041
2042 kfree(ret_stack_list);
2043 return ret;
2044}
2045
2046int register_ftrace_graph(trace_func_graph_ret_t retfunc,
2047 trace_func_graph_ent_t entryfunc)
2048{
2049 int ret = 0;
2050
2051 mutex_lock(&ftrace_sysctl_lock);
2052
2053 atomic_inc(&ftrace_graph_active);
2054 ret = start_graph_tracing();
2055 if (ret) {
2056 atomic_dec(&ftrace_graph_active);
2057 goto out;
2058 }
2059
2060 ftrace_graph_return = retfunc;
2061 ftrace_graph_entry = entryfunc;
2062
2063 ftrace_startup(FTRACE_START_FUNC_RET);
2064
2065out:
2066 mutex_unlock(&ftrace_sysctl_lock);
2067 return ret;
2068}
2069
2070void unregister_ftrace_graph(void)
2071{
2072 mutex_lock(&ftrace_sysctl_lock);
2073
2074 atomic_dec(&ftrace_graph_active);
2075 ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub;
2076 ftrace_graph_entry = ftrace_graph_entry_stub;
2077 ftrace_shutdown(FTRACE_STOP_FUNC_RET);
2078
2079 mutex_unlock(&ftrace_sysctl_lock);
2080}
2081
2082/* Allocate a return stack for newly created task */
2083void ftrace_graph_init_task(struct task_struct *t)
2084{
2085 if (atomic_read(&ftrace_graph_active)) {
2086 t->ret_stack = kmalloc(FTRACE_RETFUNC_DEPTH
2087 * sizeof(struct ftrace_ret_stack),
2088 GFP_KERNEL);
2089 if (!t->ret_stack)
2090 return;
2091 t->curr_ret_stack = -1;
2092 atomic_set(&t->tracing_graph_pause, 0);
2093 atomic_set(&t->trace_overrun, 0);
2094 } else
2095 t->ret_stack = NULL;
2096}
2097
2098void ftrace_graph_exit_task(struct task_struct *t)
2099{
2100 struct ftrace_ret_stack *ret_stack = t->ret_stack;
2101
2102 t->ret_stack = NULL;
2103 /* NULL must become visible to IRQs before we free it: */
2104 barrier();
2105
2106 kfree(ret_stack);
2107}
2108
2109void ftrace_graph_stop(void)
2110{
2111 ftrace_stop();
2112}
2113#endif
2114
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 668bbb5ef2bd..76f34c0ef29c 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -18,8 +18,46 @@
18 18
19#include "trace.h" 19#include "trace.h"
20 20
21/* Global flag to disable all recording to ring buffers */ 21/*
22static int ring_buffers_off __read_mostly; 22 * A fast way to enable or disable all ring buffers is to
23 * call tracing_on or tracing_off. Turning off the ring buffers
24 * prevents all ring buffers from being recorded to.
25 * Turning this switch on, makes it OK to write to the
26 * ring buffer, if the ring buffer is enabled itself.
27 *
28 * There's three layers that must be on in order to write
29 * to the ring buffer.
30 *
31 * 1) This global flag must be set.
32 * 2) The ring buffer must be enabled for recording.
33 * 3) The per cpu buffer must be enabled for recording.
34 *
35 * In case of an anomaly, this global flag has a bit set that
36 * will permantly disable all ring buffers.
37 */
38
39/*
40 * Global flag to disable all recording to ring buffers
41 * This has two bits: ON, DISABLED
42 *
43 * ON DISABLED
44 * ---- ----------
45 * 0 0 : ring buffers are off
46 * 1 0 : ring buffers are on
47 * X 1 : ring buffers are permanently disabled
48 */
49
50enum {
51 RB_BUFFERS_ON_BIT = 0,
52 RB_BUFFERS_DISABLED_BIT = 1,
53};
54
55enum {
56 RB_BUFFERS_ON = 1 << RB_BUFFERS_ON_BIT,
57 RB_BUFFERS_DISABLED = 1 << RB_BUFFERS_DISABLED_BIT,
58};
59
60static long ring_buffer_flags __read_mostly = RB_BUFFERS_ON;
23 61
24/** 62/**
25 * tracing_on - enable all tracing buffers 63 * tracing_on - enable all tracing buffers
@@ -29,7 +67,7 @@ static int ring_buffers_off __read_mostly;
29 */ 67 */
30void tracing_on(void) 68void tracing_on(void)
31{ 69{
32 ring_buffers_off = 0; 70 set_bit(RB_BUFFERS_ON_BIT, &ring_buffer_flags);
33} 71}
34 72
35/** 73/**
@@ -42,9 +80,22 @@ void tracing_on(void)
42 */ 80 */
43void tracing_off(void) 81void tracing_off(void)
44{ 82{
45 ring_buffers_off = 1; 83 clear_bit(RB_BUFFERS_ON_BIT, &ring_buffer_flags);
46} 84}
47 85
86/**
87 * tracing_off_permanent - permanently disable ring buffers
88 *
89 * This function, once called, will disable all ring buffers
90 * permanenty.
91 */
92void tracing_off_permanent(void)
93{
94 set_bit(RB_BUFFERS_DISABLED_BIT, &ring_buffer_flags);
95}
96
97#include "trace.h"
98
48/* Up this if you want to test the TIME_EXTENTS and normalization */ 99/* Up this if you want to test the TIME_EXTENTS and normalization */
49#define DEBUG_SHIFT 0 100#define DEBUG_SHIFT 0
50 101
@@ -56,7 +107,7 @@ u64 ring_buffer_time_stamp(int cpu)
56 preempt_disable_notrace(); 107 preempt_disable_notrace();
57 /* shift to debug/test normalization and TIME_EXTENTS */ 108 /* shift to debug/test normalization and TIME_EXTENTS */
58 time = sched_clock() << DEBUG_SHIFT; 109 time = sched_clock() << DEBUG_SHIFT;
59 preempt_enable_notrace(); 110 preempt_enable_no_resched_notrace();
60 111
61 return time; 112 return time;
62} 113}
@@ -144,20 +195,24 @@ void *ring_buffer_event_data(struct ring_buffer_event *event)
144#define TS_MASK ((1ULL << TS_SHIFT) - 1) 195#define TS_MASK ((1ULL << TS_SHIFT) - 1)
145#define TS_DELTA_TEST (~TS_MASK) 196#define TS_DELTA_TEST (~TS_MASK)
146 197
147/* 198struct buffer_data_page {
148 * This hack stolen from mm/slob.c.
149 * We can store per page timing information in the page frame of the page.
150 * Thanks to Peter Zijlstra for suggesting this idea.
151 */
152struct buffer_page {
153 u64 time_stamp; /* page time stamp */ 199 u64 time_stamp; /* page time stamp */
154 local_t write; /* index for next write */
155 local_t commit; /* write commited index */ 200 local_t commit; /* write commited index */
201 unsigned char data[]; /* data of buffer page */
202};
203
204struct buffer_page {
205 local_t write; /* index for next write */
156 unsigned read; /* index for next read */ 206 unsigned read; /* index for next read */
157 struct list_head list; /* list of free pages */ 207 struct list_head list; /* list of free pages */
158 void *page; /* Actual data page */ 208 struct buffer_data_page *page; /* Actual data page */
159}; 209};
160 210
211static void rb_init_page(struct buffer_data_page *bpage)
212{
213 local_set(&bpage->commit, 0);
214}
215
161/* 216/*
162 * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing 217 * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing
163 * this issue out. 218 * this issue out.
@@ -179,7 +234,7 @@ static inline int test_time_stamp(u64 delta)
179 return 0; 234 return 0;
180} 235}
181 236
182#define BUF_PAGE_SIZE PAGE_SIZE 237#define BUF_PAGE_SIZE (PAGE_SIZE - sizeof(struct buffer_data_page))
183 238
184/* 239/*
185 * head_page == tail_page && head == tail then buffer is empty. 240 * head_page == tail_page && head == tail then buffer is empty.
@@ -187,7 +242,8 @@ static inline int test_time_stamp(u64 delta)
187struct ring_buffer_per_cpu { 242struct ring_buffer_per_cpu {
188 int cpu; 243 int cpu;
189 struct ring_buffer *buffer; 244 struct ring_buffer *buffer;
190 spinlock_t lock; 245 spinlock_t reader_lock; /* serialize readers */
246 raw_spinlock_t lock;
191 struct lock_class_key lock_key; 247 struct lock_class_key lock_key;
192 struct list_head pages; 248 struct list_head pages;
193 struct buffer_page *head_page; /* read from head */ 249 struct buffer_page *head_page; /* read from head */
@@ -202,7 +258,6 @@ struct ring_buffer_per_cpu {
202}; 258};
203 259
204struct ring_buffer { 260struct ring_buffer {
205 unsigned long size;
206 unsigned pages; 261 unsigned pages;
207 unsigned flags; 262 unsigned flags;
208 int cpus; 263 int cpus;
@@ -221,32 +276,16 @@ struct ring_buffer_iter {
221 u64 read_stamp; 276 u64 read_stamp;
222}; 277};
223 278
279/* buffer may be either ring_buffer or ring_buffer_per_cpu */
224#define RB_WARN_ON(buffer, cond) \ 280#define RB_WARN_ON(buffer, cond) \
225 do { \ 281 ({ \
226 if (unlikely(cond)) { \ 282 int _____ret = unlikely(cond); \
227 atomic_inc(&buffer->record_disabled); \ 283 if (_____ret) { \
228 WARN_ON(1); \
229 } \
230 } while (0)
231
232#define RB_WARN_ON_RET(buffer, cond) \
233 do { \
234 if (unlikely(cond)) { \
235 atomic_inc(&buffer->record_disabled); \
236 WARN_ON(1); \
237 return -1; \
238 } \
239 } while (0)
240
241#define RB_WARN_ON_ONCE(buffer, cond) \
242 do { \
243 static int once; \
244 if (unlikely(cond) && !once) { \
245 once++; \
246 atomic_inc(&buffer->record_disabled); \ 284 atomic_inc(&buffer->record_disabled); \
247 WARN_ON(1); \ 285 WARN_ON(1); \
248 } \ 286 } \
249 } while (0) 287 _____ret; \
288 })
250 289
251/** 290/**
252 * check_pages - integrity check of buffer pages 291 * check_pages - integrity check of buffer pages
@@ -258,16 +297,20 @@ struct ring_buffer_iter {
258static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer) 297static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
259{ 298{
260 struct list_head *head = &cpu_buffer->pages; 299 struct list_head *head = &cpu_buffer->pages;
261 struct buffer_page *page, *tmp; 300 struct buffer_page *bpage, *tmp;
262 301
263 RB_WARN_ON_RET(cpu_buffer, head->next->prev != head); 302 if (RB_WARN_ON(cpu_buffer, head->next->prev != head))
264 RB_WARN_ON_RET(cpu_buffer, head->prev->next != head); 303 return -1;
304 if (RB_WARN_ON(cpu_buffer, head->prev->next != head))
305 return -1;
265 306
266 list_for_each_entry_safe(page, tmp, head, list) { 307 list_for_each_entry_safe(bpage, tmp, head, list) {
267 RB_WARN_ON_RET(cpu_buffer, 308 if (RB_WARN_ON(cpu_buffer,
268 page->list.next->prev != &page->list); 309 bpage->list.next->prev != &bpage->list))
269 RB_WARN_ON_RET(cpu_buffer, 310 return -1;
270 page->list.prev->next != &page->list); 311 if (RB_WARN_ON(cpu_buffer,
312 bpage->list.prev->next != &bpage->list))
313 return -1;
271 } 314 }
272 315
273 return 0; 316 return 0;
@@ -277,22 +320,23 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
277 unsigned nr_pages) 320 unsigned nr_pages)
278{ 321{
279 struct list_head *head = &cpu_buffer->pages; 322 struct list_head *head = &cpu_buffer->pages;
280 struct buffer_page *page, *tmp; 323 struct buffer_page *bpage, *tmp;
281 unsigned long addr; 324 unsigned long addr;
282 LIST_HEAD(pages); 325 LIST_HEAD(pages);
283 unsigned i; 326 unsigned i;
284 327
285 for (i = 0; i < nr_pages; i++) { 328 for (i = 0; i < nr_pages; i++) {
286 page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()), 329 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
287 GFP_KERNEL, cpu_to_node(cpu_buffer->cpu)); 330 GFP_KERNEL, cpu_to_node(cpu_buffer->cpu));
288 if (!page) 331 if (!bpage)
289 goto free_pages; 332 goto free_pages;
290 list_add(&page->list, &pages); 333 list_add(&bpage->list, &pages);
291 334
292 addr = __get_free_page(GFP_KERNEL); 335 addr = __get_free_page(GFP_KERNEL);
293 if (!addr) 336 if (!addr)
294 goto free_pages; 337 goto free_pages;
295 page->page = (void *)addr; 338 bpage->page = (void *)addr;
339 rb_init_page(bpage->page);
296 } 340 }
297 341
298 list_splice(&pages, head); 342 list_splice(&pages, head);
@@ -302,9 +346,9 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
302 return 0; 346 return 0;
303 347
304 free_pages: 348 free_pages:
305 list_for_each_entry_safe(page, tmp, &pages, list) { 349 list_for_each_entry_safe(bpage, tmp, &pages, list) {
306 list_del_init(&page->list); 350 list_del_init(&bpage->list);
307 free_buffer_page(page); 351 free_buffer_page(bpage);
308 } 352 }
309 return -ENOMEM; 353 return -ENOMEM;
310} 354}
@@ -313,7 +357,7 @@ static struct ring_buffer_per_cpu *
313rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu) 357rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
314{ 358{
315 struct ring_buffer_per_cpu *cpu_buffer; 359 struct ring_buffer_per_cpu *cpu_buffer;
316 struct buffer_page *page; 360 struct buffer_page *bpage;
317 unsigned long addr; 361 unsigned long addr;
318 int ret; 362 int ret;
319 363
@@ -324,19 +368,21 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
324 368
325 cpu_buffer->cpu = cpu; 369 cpu_buffer->cpu = cpu;
326 cpu_buffer->buffer = buffer; 370 cpu_buffer->buffer = buffer;
327 spin_lock_init(&cpu_buffer->lock); 371 spin_lock_init(&cpu_buffer->reader_lock);
372 cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
328 INIT_LIST_HEAD(&cpu_buffer->pages); 373 INIT_LIST_HEAD(&cpu_buffer->pages);
329 374
330 page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()), 375 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
331 GFP_KERNEL, cpu_to_node(cpu)); 376 GFP_KERNEL, cpu_to_node(cpu));
332 if (!page) 377 if (!bpage)
333 goto fail_free_buffer; 378 goto fail_free_buffer;
334 379
335 cpu_buffer->reader_page = page; 380 cpu_buffer->reader_page = bpage;
336 addr = __get_free_page(GFP_KERNEL); 381 addr = __get_free_page(GFP_KERNEL);
337 if (!addr) 382 if (!addr)
338 goto fail_free_reader; 383 goto fail_free_reader;
339 page->page = (void *)addr; 384 bpage->page = (void *)addr;
385 rb_init_page(bpage->page);
340 386
341 INIT_LIST_HEAD(&cpu_buffer->reader_page->list); 387 INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
342 388
@@ -361,14 +407,14 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
361static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer) 407static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
362{ 408{
363 struct list_head *head = &cpu_buffer->pages; 409 struct list_head *head = &cpu_buffer->pages;
364 struct buffer_page *page, *tmp; 410 struct buffer_page *bpage, *tmp;
365 411
366 list_del_init(&cpu_buffer->reader_page->list); 412 list_del_init(&cpu_buffer->reader_page->list);
367 free_buffer_page(cpu_buffer->reader_page); 413 free_buffer_page(cpu_buffer->reader_page);
368 414
369 list_for_each_entry_safe(page, tmp, head, list) { 415 list_for_each_entry_safe(bpage, tmp, head, list) {
370 list_del_init(&page->list); 416 list_del_init(&bpage->list);
371 free_buffer_page(page); 417 free_buffer_page(bpage);
372 } 418 }
373 kfree(cpu_buffer); 419 kfree(cpu_buffer);
374} 420}
@@ -465,7 +511,7 @@ static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer);
465static void 511static void
466rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages) 512rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
467{ 513{
468 struct buffer_page *page; 514 struct buffer_page *bpage;
469 struct list_head *p; 515 struct list_head *p;
470 unsigned i; 516 unsigned i;
471 517
@@ -473,13 +519,15 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
473 synchronize_sched(); 519 synchronize_sched();
474 520
475 for (i = 0; i < nr_pages; i++) { 521 for (i = 0; i < nr_pages; i++) {
476 BUG_ON(list_empty(&cpu_buffer->pages)); 522 if (RB_WARN_ON(cpu_buffer, list_empty(&cpu_buffer->pages)))
523 return;
477 p = cpu_buffer->pages.next; 524 p = cpu_buffer->pages.next;
478 page = list_entry(p, struct buffer_page, list); 525 bpage = list_entry(p, struct buffer_page, list);
479 list_del_init(&page->list); 526 list_del_init(&bpage->list);
480 free_buffer_page(page); 527 free_buffer_page(bpage);
481 } 528 }
482 BUG_ON(list_empty(&cpu_buffer->pages)); 529 if (RB_WARN_ON(cpu_buffer, list_empty(&cpu_buffer->pages)))
530 return;
483 531
484 rb_reset_cpu(cpu_buffer); 532 rb_reset_cpu(cpu_buffer);
485 533
@@ -493,7 +541,7 @@ static void
493rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer, 541rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
494 struct list_head *pages, unsigned nr_pages) 542 struct list_head *pages, unsigned nr_pages)
495{ 543{
496 struct buffer_page *page; 544 struct buffer_page *bpage;
497 struct list_head *p; 545 struct list_head *p;
498 unsigned i; 546 unsigned i;
499 547
@@ -501,11 +549,12 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
501 synchronize_sched(); 549 synchronize_sched();
502 550
503 for (i = 0; i < nr_pages; i++) { 551 for (i = 0; i < nr_pages; i++) {
504 BUG_ON(list_empty(pages)); 552 if (RB_WARN_ON(cpu_buffer, list_empty(pages)))
553 return;
505 p = pages->next; 554 p = pages->next;
506 page = list_entry(p, struct buffer_page, list); 555 bpage = list_entry(p, struct buffer_page, list);
507 list_del_init(&page->list); 556 list_del_init(&bpage->list);
508 list_add_tail(&page->list, &cpu_buffer->pages); 557 list_add_tail(&bpage->list, &cpu_buffer->pages);
509 } 558 }
510 rb_reset_cpu(cpu_buffer); 559 rb_reset_cpu(cpu_buffer);
511 560
@@ -532,7 +581,7 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
532{ 581{
533 struct ring_buffer_per_cpu *cpu_buffer; 582 struct ring_buffer_per_cpu *cpu_buffer;
534 unsigned nr_pages, rm_pages, new_pages; 583 unsigned nr_pages, rm_pages, new_pages;
535 struct buffer_page *page, *tmp; 584 struct buffer_page *bpage, *tmp;
536 unsigned long buffer_size; 585 unsigned long buffer_size;
537 unsigned long addr; 586 unsigned long addr;
538 LIST_HEAD(pages); 587 LIST_HEAD(pages);
@@ -562,7 +611,10 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
562 if (size < buffer_size) { 611 if (size < buffer_size) {
563 612
564 /* easy case, just free pages */ 613 /* easy case, just free pages */
565 BUG_ON(nr_pages >= buffer->pages); 614 if (RB_WARN_ON(buffer, nr_pages >= buffer->pages)) {
615 mutex_unlock(&buffer->mutex);
616 return -1;
617 }
566 618
567 rm_pages = buffer->pages - nr_pages; 619 rm_pages = buffer->pages - nr_pages;
568 620
@@ -581,21 +633,26 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
581 * add these pages to the cpu_buffers. Otherwise we just free 633 * add these pages to the cpu_buffers. Otherwise we just free
582 * them all and return -ENOMEM; 634 * them all and return -ENOMEM;
583 */ 635 */
584 BUG_ON(nr_pages <= buffer->pages); 636 if (RB_WARN_ON(buffer, nr_pages <= buffer->pages)) {
637 mutex_unlock(&buffer->mutex);
638 return -1;
639 }
640
585 new_pages = nr_pages - buffer->pages; 641 new_pages = nr_pages - buffer->pages;
586 642
587 for_each_buffer_cpu(buffer, cpu) { 643 for_each_buffer_cpu(buffer, cpu) {
588 for (i = 0; i < new_pages; i++) { 644 for (i = 0; i < new_pages; i++) {
589 page = kzalloc_node(ALIGN(sizeof(*page), 645 bpage = kzalloc_node(ALIGN(sizeof(*bpage),
590 cache_line_size()), 646 cache_line_size()),
591 GFP_KERNEL, cpu_to_node(cpu)); 647 GFP_KERNEL, cpu_to_node(cpu));
592 if (!page) 648 if (!bpage)
593 goto free_pages; 649 goto free_pages;
594 list_add(&page->list, &pages); 650 list_add(&bpage->list, &pages);
595 addr = __get_free_page(GFP_KERNEL); 651 addr = __get_free_page(GFP_KERNEL);
596 if (!addr) 652 if (!addr)
597 goto free_pages; 653 goto free_pages;
598 page->page = (void *)addr; 654 bpage->page = (void *)addr;
655 rb_init_page(bpage->page);
599 } 656 }
600 } 657 }
601 658
@@ -604,7 +661,10 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
604 rb_insert_pages(cpu_buffer, &pages, new_pages); 661 rb_insert_pages(cpu_buffer, &pages, new_pages);
605 } 662 }
606 663
607 BUG_ON(!list_empty(&pages)); 664 if (RB_WARN_ON(buffer, !list_empty(&pages))) {
665 mutex_unlock(&buffer->mutex);
666 return -1;
667 }
608 668
609 out: 669 out:
610 buffer->pages = nr_pages; 670 buffer->pages = nr_pages;
@@ -613,9 +673,9 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
613 return size; 673 return size;
614 674
615 free_pages: 675 free_pages:
616 list_for_each_entry_safe(page, tmp, &pages, list) { 676 list_for_each_entry_safe(bpage, tmp, &pages, list) {
617 list_del_init(&page->list); 677 list_del_init(&bpage->list);
618 free_buffer_page(page); 678 free_buffer_page(bpage);
619 } 679 }
620 mutex_unlock(&buffer->mutex); 680 mutex_unlock(&buffer->mutex);
621 return -ENOMEM; 681 return -ENOMEM;
@@ -626,9 +686,15 @@ static inline int rb_null_event(struct ring_buffer_event *event)
626 return event->type == RINGBUF_TYPE_PADDING; 686 return event->type == RINGBUF_TYPE_PADDING;
627} 687}
628 688
629static inline void *__rb_page_index(struct buffer_page *page, unsigned index) 689static inline void *
690__rb_data_page_index(struct buffer_data_page *bpage, unsigned index)
691{
692 return bpage->data + index;
693}
694
695static inline void *__rb_page_index(struct buffer_page *bpage, unsigned index)
630{ 696{
631 return page->page + index; 697 return bpage->page->data + index;
632} 698}
633 699
634static inline struct ring_buffer_event * 700static inline struct ring_buffer_event *
@@ -658,7 +724,7 @@ static inline unsigned rb_page_write(struct buffer_page *bpage)
658 724
659static inline unsigned rb_page_commit(struct buffer_page *bpage) 725static inline unsigned rb_page_commit(struct buffer_page *bpage)
660{ 726{
661 return local_read(&bpage->commit); 727 return local_read(&bpage->page->commit);
662} 728}
663 729
664/* Size is determined by what has been commited */ 730/* Size is determined by what has been commited */
@@ -693,7 +759,8 @@ static void rb_update_overflow(struct ring_buffer_per_cpu *cpu_buffer)
693 head += rb_event_length(event)) { 759 head += rb_event_length(event)) {
694 760
695 event = __rb_page_index(cpu_buffer->head_page, head); 761 event = __rb_page_index(cpu_buffer->head_page, head);
696 BUG_ON(rb_null_event(event)); 762 if (RB_WARN_ON(cpu_buffer, rb_null_event(event)))
763 return;
697 /* Only count data entries */ 764 /* Only count data entries */
698 if (event->type != RINGBUF_TYPE_DATA) 765 if (event->type != RINGBUF_TYPE_DATA)
699 continue; 766 continue;
@@ -703,14 +770,14 @@ static void rb_update_overflow(struct ring_buffer_per_cpu *cpu_buffer)
703} 770}
704 771
705static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer, 772static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer,
706 struct buffer_page **page) 773 struct buffer_page **bpage)
707{ 774{
708 struct list_head *p = (*page)->list.next; 775 struct list_head *p = (*bpage)->list.next;
709 776
710 if (p == &cpu_buffer->pages) 777 if (p == &cpu_buffer->pages)
711 p = p->next; 778 p = p->next;
712 779
713 *page = list_entry(p, struct buffer_page, list); 780 *bpage = list_entry(p, struct buffer_page, list);
714} 781}
715 782
716static inline unsigned 783static inline unsigned
@@ -746,16 +813,18 @@ rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer,
746 addr &= PAGE_MASK; 813 addr &= PAGE_MASK;
747 814
748 while (cpu_buffer->commit_page->page != (void *)addr) { 815 while (cpu_buffer->commit_page->page != (void *)addr) {
749 RB_WARN_ON(cpu_buffer, 816 if (RB_WARN_ON(cpu_buffer,
750 cpu_buffer->commit_page == cpu_buffer->tail_page); 817 cpu_buffer->commit_page == cpu_buffer->tail_page))
751 cpu_buffer->commit_page->commit = 818 return;
819 cpu_buffer->commit_page->page->commit =
752 cpu_buffer->commit_page->write; 820 cpu_buffer->commit_page->write;
753 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page); 821 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
754 cpu_buffer->write_stamp = cpu_buffer->commit_page->time_stamp; 822 cpu_buffer->write_stamp =
823 cpu_buffer->commit_page->page->time_stamp;
755 } 824 }
756 825
757 /* Now set the commit to the event's index */ 826 /* Now set the commit to the event's index */
758 local_set(&cpu_buffer->commit_page->commit, index); 827 local_set(&cpu_buffer->commit_page->page->commit, index);
759} 828}
760 829
761static inline void 830static inline void
@@ -769,25 +838,38 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
769 * back to us). This allows us to do a simple loop to 838 * back to us). This allows us to do a simple loop to
770 * assign the commit to the tail. 839 * assign the commit to the tail.
771 */ 840 */
841 again:
772 while (cpu_buffer->commit_page != cpu_buffer->tail_page) { 842 while (cpu_buffer->commit_page != cpu_buffer->tail_page) {
773 cpu_buffer->commit_page->commit = 843 cpu_buffer->commit_page->page->commit =
774 cpu_buffer->commit_page->write; 844 cpu_buffer->commit_page->write;
775 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page); 845 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
776 cpu_buffer->write_stamp = cpu_buffer->commit_page->time_stamp; 846 cpu_buffer->write_stamp =
847 cpu_buffer->commit_page->page->time_stamp;
777 /* add barrier to keep gcc from optimizing too much */ 848 /* add barrier to keep gcc from optimizing too much */
778 barrier(); 849 barrier();
779 } 850 }
780 while (rb_commit_index(cpu_buffer) != 851 while (rb_commit_index(cpu_buffer) !=
781 rb_page_write(cpu_buffer->commit_page)) { 852 rb_page_write(cpu_buffer->commit_page)) {
782 cpu_buffer->commit_page->commit = 853 cpu_buffer->commit_page->page->commit =
783 cpu_buffer->commit_page->write; 854 cpu_buffer->commit_page->write;
784 barrier(); 855 barrier();
785 } 856 }
857
858 /* again, keep gcc from optimizing */
859 barrier();
860
861 /*
862 * If an interrupt came in just after the first while loop
863 * and pushed the tail page forward, we will be left with
864 * a dangling commit that will never go forward.
865 */
866 if (unlikely(cpu_buffer->commit_page != cpu_buffer->tail_page))
867 goto again;
786} 868}
787 869
788static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer) 870static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
789{ 871{
790 cpu_buffer->read_stamp = cpu_buffer->reader_page->time_stamp; 872 cpu_buffer->read_stamp = cpu_buffer->reader_page->page->time_stamp;
791 cpu_buffer->reader_page->read = 0; 873 cpu_buffer->reader_page->read = 0;
792} 874}
793 875
@@ -806,7 +888,7 @@ static inline void rb_inc_iter(struct ring_buffer_iter *iter)
806 else 888 else
807 rb_inc_page(cpu_buffer, &iter->head_page); 889 rb_inc_page(cpu_buffer, &iter->head_page);
808 890
809 iter->read_stamp = iter->head_page->time_stamp; 891 iter->read_stamp = iter->head_page->page->time_stamp;
810 iter->head = 0; 892 iter->head = 0;
811} 893}
812 894
@@ -880,12 +962,15 @@ static struct ring_buffer_event *
880__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, 962__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
881 unsigned type, unsigned long length, u64 *ts) 963 unsigned type, unsigned long length, u64 *ts)
882{ 964{
883 struct buffer_page *tail_page, *head_page, *reader_page; 965 struct buffer_page *tail_page, *head_page, *reader_page, *commit_page;
884 unsigned long tail, write; 966 unsigned long tail, write;
885 struct ring_buffer *buffer = cpu_buffer->buffer; 967 struct ring_buffer *buffer = cpu_buffer->buffer;
886 struct ring_buffer_event *event; 968 struct ring_buffer_event *event;
887 unsigned long flags; 969 unsigned long flags;
888 970
971 commit_page = cpu_buffer->commit_page;
972 /* we just need to protect against interrupts */
973 barrier();
889 tail_page = cpu_buffer->tail_page; 974 tail_page = cpu_buffer->tail_page;
890 write = local_add_return(length, &tail_page->write); 975 write = local_add_return(length, &tail_page->write);
891 tail = write - length; 976 tail = write - length;
@@ -894,7 +979,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
894 if (write > BUF_PAGE_SIZE) { 979 if (write > BUF_PAGE_SIZE) {
895 struct buffer_page *next_page = tail_page; 980 struct buffer_page *next_page = tail_page;
896 981
897 spin_lock_irqsave(&cpu_buffer->lock, flags); 982 local_irq_save(flags);
983 __raw_spin_lock(&cpu_buffer->lock);
898 984
899 rb_inc_page(cpu_buffer, &next_page); 985 rb_inc_page(cpu_buffer, &next_page);
900 986
@@ -902,14 +988,15 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
902 reader_page = cpu_buffer->reader_page; 988 reader_page = cpu_buffer->reader_page;
903 989
904 /* we grabbed the lock before incrementing */ 990 /* we grabbed the lock before incrementing */
905 RB_WARN_ON(cpu_buffer, next_page == reader_page); 991 if (RB_WARN_ON(cpu_buffer, next_page == reader_page))
992 goto out_unlock;
906 993
907 /* 994 /*
908 * If for some reason, we had an interrupt storm that made 995 * If for some reason, we had an interrupt storm that made
909 * it all the way around the buffer, bail, and warn 996 * it all the way around the buffer, bail, and warn
910 * about it. 997 * about it.
911 */ 998 */
912 if (unlikely(next_page == cpu_buffer->commit_page)) { 999 if (unlikely(next_page == commit_page)) {
913 WARN_ON_ONCE(1); 1000 WARN_ON_ONCE(1);
914 goto out_unlock; 1001 goto out_unlock;
915 } 1002 }
@@ -940,12 +1027,12 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
940 */ 1027 */
941 if (tail_page == cpu_buffer->tail_page) { 1028 if (tail_page == cpu_buffer->tail_page) {
942 local_set(&next_page->write, 0); 1029 local_set(&next_page->write, 0);
943 local_set(&next_page->commit, 0); 1030 local_set(&next_page->page->commit, 0);
944 cpu_buffer->tail_page = next_page; 1031 cpu_buffer->tail_page = next_page;
945 1032
946 /* reread the time stamp */ 1033 /* reread the time stamp */
947 *ts = ring_buffer_time_stamp(cpu_buffer->cpu); 1034 *ts = ring_buffer_time_stamp(cpu_buffer->cpu);
948 cpu_buffer->tail_page->time_stamp = *ts; 1035 cpu_buffer->tail_page->page->time_stamp = *ts;
949 } 1036 }
950 1037
951 /* 1038 /*
@@ -970,7 +1057,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
970 rb_set_commit_to_write(cpu_buffer); 1057 rb_set_commit_to_write(cpu_buffer);
971 } 1058 }
972 1059
973 spin_unlock_irqrestore(&cpu_buffer->lock, flags); 1060 __raw_spin_unlock(&cpu_buffer->lock);
1061 local_irq_restore(flags);
974 1062
975 /* fail and let the caller try again */ 1063 /* fail and let the caller try again */
976 return ERR_PTR(-EAGAIN); 1064 return ERR_PTR(-EAGAIN);
@@ -978,7 +1066,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
978 1066
979 /* We reserved something on the buffer */ 1067 /* We reserved something on the buffer */
980 1068
981 BUG_ON(write > BUF_PAGE_SIZE); 1069 if (RB_WARN_ON(cpu_buffer, write > BUF_PAGE_SIZE))
1070 return NULL;
982 1071
983 event = __rb_page_index(tail_page, tail); 1072 event = __rb_page_index(tail_page, tail);
984 rb_update_event(event, type, length); 1073 rb_update_event(event, type, length);
@@ -988,12 +1077,13 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
988 * this page's time stamp. 1077 * this page's time stamp.
989 */ 1078 */
990 if (!tail && rb_is_commit(cpu_buffer, event)) 1079 if (!tail && rb_is_commit(cpu_buffer, event))
991 cpu_buffer->commit_page->time_stamp = *ts; 1080 cpu_buffer->commit_page->page->time_stamp = *ts;
992 1081
993 return event; 1082 return event;
994 1083
995 out_unlock: 1084 out_unlock:
996 spin_unlock_irqrestore(&cpu_buffer->lock, flags); 1085 __raw_spin_unlock(&cpu_buffer->lock);
1086 local_irq_restore(flags);
997 return NULL; 1087 return NULL;
998} 1088}
999 1089
@@ -1038,7 +1128,7 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
1038 event->time_delta = *delta & TS_MASK; 1128 event->time_delta = *delta & TS_MASK;
1039 event->array[0] = *delta >> TS_SHIFT; 1129 event->array[0] = *delta >> TS_SHIFT;
1040 } else { 1130 } else {
1041 cpu_buffer->commit_page->time_stamp = *ts; 1131 cpu_buffer->commit_page->page->time_stamp = *ts;
1042 event->time_delta = 0; 1132 event->time_delta = 0;
1043 event->array[0] = 0; 1133 event->array[0] = 0;
1044 } 1134 }
@@ -1076,10 +1166,8 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
1076 * storm or we have something buggy. 1166 * storm or we have something buggy.
1077 * Bail! 1167 * Bail!
1078 */ 1168 */
1079 if (unlikely(++nr_loops > 1000)) { 1169 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000))
1080 RB_WARN_ON(cpu_buffer, 1);
1081 return NULL; 1170 return NULL;
1082 }
1083 1171
1084 ts = ring_buffer_time_stamp(cpu_buffer->cpu); 1172 ts = ring_buffer_time_stamp(cpu_buffer->cpu);
1085 1173
@@ -1175,15 +1263,14 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer,
1175 struct ring_buffer_event *event; 1263 struct ring_buffer_event *event;
1176 int cpu, resched; 1264 int cpu, resched;
1177 1265
1178 if (ring_buffers_off) 1266 if (ring_buffer_flags != RB_BUFFERS_ON)
1179 return NULL; 1267 return NULL;
1180 1268
1181 if (atomic_read(&buffer->record_disabled)) 1269 if (atomic_read(&buffer->record_disabled))
1182 return NULL; 1270 return NULL;
1183 1271
1184 /* If we are tracing schedule, we don't want to recurse */ 1272 /* If we are tracing schedule, we don't want to recurse */
1185 resched = need_resched(); 1273 resched = ftrace_preempt_disable();
1186 preempt_disable_notrace();
1187 1274
1188 cpu = raw_smp_processor_id(); 1275 cpu = raw_smp_processor_id();
1189 1276
@@ -1214,10 +1301,7 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer,
1214 return event; 1301 return event;
1215 1302
1216 out: 1303 out:
1217 if (resched) 1304 ftrace_preempt_enable(resched);
1218 preempt_enable_no_resched_notrace();
1219 else
1220 preempt_enable_notrace();
1221 return NULL; 1305 return NULL;
1222} 1306}
1223 1307
@@ -1259,12 +1343,9 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer,
1259 /* 1343 /*
1260 * Only the last preempt count needs to restore preemption. 1344 * Only the last preempt count needs to restore preemption.
1261 */ 1345 */
1262 if (preempt_count() == 1) { 1346 if (preempt_count() == 1)
1263 if (per_cpu(rb_need_resched, cpu)) 1347 ftrace_preempt_enable(per_cpu(rb_need_resched, cpu));
1264 preempt_enable_no_resched_notrace(); 1348 else
1265 else
1266 preempt_enable_notrace();
1267 } else
1268 preempt_enable_no_resched_notrace(); 1349 preempt_enable_no_resched_notrace();
1269 1350
1270 return 0; 1351 return 0;
@@ -1294,14 +1375,13 @@ int ring_buffer_write(struct ring_buffer *buffer,
1294 int ret = -EBUSY; 1375 int ret = -EBUSY;
1295 int cpu, resched; 1376 int cpu, resched;
1296 1377
1297 if (ring_buffers_off) 1378 if (ring_buffer_flags != RB_BUFFERS_ON)
1298 return -EBUSY; 1379 return -EBUSY;
1299 1380
1300 if (atomic_read(&buffer->record_disabled)) 1381 if (atomic_read(&buffer->record_disabled))
1301 return -EBUSY; 1382 return -EBUSY;
1302 1383
1303 resched = need_resched(); 1384 resched = ftrace_preempt_disable();
1304 preempt_disable_notrace();
1305 1385
1306 cpu = raw_smp_processor_id(); 1386 cpu = raw_smp_processor_id();
1307 1387
@@ -1327,10 +1407,7 @@ int ring_buffer_write(struct ring_buffer *buffer,
1327 1407
1328 ret = 0; 1408 ret = 0;
1329 out: 1409 out:
1330 if (resched) 1410 ftrace_preempt_enable(resched);
1331 preempt_enable_no_resched_notrace();
1332 else
1333 preempt_enable_notrace();
1334 1411
1335 return ret; 1412 return ret;
1336} 1413}
@@ -1489,14 +1566,7 @@ unsigned long ring_buffer_overruns(struct ring_buffer *buffer)
1489 return overruns; 1566 return overruns;
1490} 1567}
1491 1568
1492/** 1569static void rb_iter_reset(struct ring_buffer_iter *iter)
1493 * ring_buffer_iter_reset - reset an iterator
1494 * @iter: The iterator to reset
1495 *
1496 * Resets the iterator, so that it will start from the beginning
1497 * again.
1498 */
1499void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
1500{ 1570{
1501 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; 1571 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
1502 1572
@@ -1511,7 +1581,24 @@ void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
1511 if (iter->head) 1581 if (iter->head)
1512 iter->read_stamp = cpu_buffer->read_stamp; 1582 iter->read_stamp = cpu_buffer->read_stamp;
1513 else 1583 else
1514 iter->read_stamp = iter->head_page->time_stamp; 1584 iter->read_stamp = iter->head_page->page->time_stamp;
1585}
1586
1587/**
1588 * ring_buffer_iter_reset - reset an iterator
1589 * @iter: The iterator to reset
1590 *
1591 * Resets the iterator, so that it will start from the beginning
1592 * again.
1593 */
1594void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
1595{
1596 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
1597 unsigned long flags;
1598
1599 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
1600 rb_iter_reset(iter);
1601 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
1515} 1602}
1516 1603
1517/** 1604/**
@@ -1597,7 +1684,8 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
1597 unsigned long flags; 1684 unsigned long flags;
1598 int nr_loops = 0; 1685 int nr_loops = 0;
1599 1686
1600 spin_lock_irqsave(&cpu_buffer->lock, flags); 1687 local_irq_save(flags);
1688 __raw_spin_lock(&cpu_buffer->lock);
1601 1689
1602 again: 1690 again:
1603 /* 1691 /*
@@ -1606,8 +1694,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
1606 * a case where we will loop three times. There should be no 1694 * a case where we will loop three times. There should be no
1607 * reason to loop four times (that I know of). 1695 * reason to loop four times (that I know of).
1608 */ 1696 */
1609 if (unlikely(++nr_loops > 3)) { 1697 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 3)) {
1610 RB_WARN_ON(cpu_buffer, 1);
1611 reader = NULL; 1698 reader = NULL;
1612 goto out; 1699 goto out;
1613 } 1700 }
@@ -1619,8 +1706,9 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
1619 goto out; 1706 goto out;
1620 1707
1621 /* Never should we have an index greater than the size */ 1708 /* Never should we have an index greater than the size */
1622 RB_WARN_ON(cpu_buffer, 1709 if (RB_WARN_ON(cpu_buffer,
1623 cpu_buffer->reader_page->read > rb_page_size(reader)); 1710 cpu_buffer->reader_page->read > rb_page_size(reader)))
1711 goto out;
1624 1712
1625 /* check if we caught up to the tail */ 1713 /* check if we caught up to the tail */
1626 reader = NULL; 1714 reader = NULL;
@@ -1637,7 +1725,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
1637 cpu_buffer->reader_page->list.prev = reader->list.prev; 1725 cpu_buffer->reader_page->list.prev = reader->list.prev;
1638 1726
1639 local_set(&cpu_buffer->reader_page->write, 0); 1727 local_set(&cpu_buffer->reader_page->write, 0);
1640 local_set(&cpu_buffer->reader_page->commit, 0); 1728 local_set(&cpu_buffer->reader_page->page->commit, 0);
1641 1729
1642 /* Make the reader page now replace the head */ 1730 /* Make the reader page now replace the head */
1643 reader->list.prev->next = &cpu_buffer->reader_page->list; 1731 reader->list.prev->next = &cpu_buffer->reader_page->list;
@@ -1659,7 +1747,8 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
1659 goto again; 1747 goto again;
1660 1748
1661 out: 1749 out:
1662 spin_unlock_irqrestore(&cpu_buffer->lock, flags); 1750 __raw_spin_unlock(&cpu_buffer->lock);
1751 local_irq_restore(flags);
1663 1752
1664 return reader; 1753 return reader;
1665} 1754}
@@ -1673,7 +1762,8 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
1673 reader = rb_get_reader_page(cpu_buffer); 1762 reader = rb_get_reader_page(cpu_buffer);
1674 1763
1675 /* This function should not be called when buffer is empty */ 1764 /* This function should not be called when buffer is empty */
1676 BUG_ON(!reader); 1765 if (RB_WARN_ON(cpu_buffer, !reader))
1766 return;
1677 1767
1678 event = rb_reader_event(cpu_buffer); 1768 event = rb_reader_event(cpu_buffer);
1679 1769
@@ -1700,7 +1790,9 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
1700 * Check if we are at the end of the buffer. 1790 * Check if we are at the end of the buffer.
1701 */ 1791 */
1702 if (iter->head >= rb_page_size(iter->head_page)) { 1792 if (iter->head >= rb_page_size(iter->head_page)) {
1703 BUG_ON(iter->head_page == cpu_buffer->commit_page); 1793 if (RB_WARN_ON(buffer,
1794 iter->head_page == cpu_buffer->commit_page))
1795 return;
1704 rb_inc_iter(iter); 1796 rb_inc_iter(iter);
1705 return; 1797 return;
1706 } 1798 }
@@ -1713,8 +1805,10 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
1713 * This should not be called to advance the header if we are 1805 * This should not be called to advance the header if we are
1714 * at the tail of the buffer. 1806 * at the tail of the buffer.
1715 */ 1807 */
1716 BUG_ON((iter->head_page == cpu_buffer->commit_page) && 1808 if (RB_WARN_ON(cpu_buffer,
1717 (iter->head + length > rb_commit_index(cpu_buffer))); 1809 (iter->head_page == cpu_buffer->commit_page) &&
1810 (iter->head + length > rb_commit_index(cpu_buffer))))
1811 return;
1718 1812
1719 rb_update_iter_read_stamp(iter, event); 1813 rb_update_iter_read_stamp(iter, event);
1720 1814
@@ -1726,17 +1820,8 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
1726 rb_advance_iter(iter); 1820 rb_advance_iter(iter);
1727} 1821}
1728 1822
1729/** 1823static struct ring_buffer_event *
1730 * ring_buffer_peek - peek at the next event to be read 1824rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1731 * @buffer: The ring buffer to read
1732 * @cpu: The cpu to peak at
1733 * @ts: The timestamp counter of this event.
1734 *
1735 * This will return the event that will be read next, but does
1736 * not consume the data.
1737 */
1738struct ring_buffer_event *
1739ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1740{ 1825{
1741 struct ring_buffer_per_cpu *cpu_buffer; 1826 struct ring_buffer_per_cpu *cpu_buffer;
1742 struct ring_buffer_event *event; 1827 struct ring_buffer_event *event;
@@ -1757,10 +1842,8 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1757 * can have. Nesting 10 deep of interrupts is clearly 1842 * can have. Nesting 10 deep of interrupts is clearly
1758 * an anomaly. 1843 * an anomaly.
1759 */ 1844 */
1760 if (unlikely(++nr_loops > 10)) { 1845 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 10))
1761 RB_WARN_ON(cpu_buffer, 1);
1762 return NULL; 1846 return NULL;
1763 }
1764 1847
1765 reader = rb_get_reader_page(cpu_buffer); 1848 reader = rb_get_reader_page(cpu_buffer);
1766 if (!reader) 1849 if (!reader)
@@ -1798,16 +1881,8 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1798 return NULL; 1881 return NULL;
1799} 1882}
1800 1883
1801/** 1884static struct ring_buffer_event *
1802 * ring_buffer_iter_peek - peek at the next event to be read 1885rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1803 * @iter: The ring buffer iterator
1804 * @ts: The timestamp counter of this event.
1805 *
1806 * This will return the event that will be read next, but does
1807 * not increment the iterator.
1808 */
1809struct ring_buffer_event *
1810ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1811{ 1886{
1812 struct ring_buffer *buffer; 1887 struct ring_buffer *buffer;
1813 struct ring_buffer_per_cpu *cpu_buffer; 1888 struct ring_buffer_per_cpu *cpu_buffer;
@@ -1829,10 +1904,8 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1829 * can have. Nesting 10 deep of interrupts is clearly 1904 * can have. Nesting 10 deep of interrupts is clearly
1830 * an anomaly. 1905 * an anomaly.
1831 */ 1906 */
1832 if (unlikely(++nr_loops > 10)) { 1907 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 10))
1833 RB_WARN_ON(cpu_buffer, 1);
1834 return NULL; 1908 return NULL;
1835 }
1836 1909
1837 if (rb_per_cpu_empty(cpu_buffer)) 1910 if (rb_per_cpu_empty(cpu_buffer))
1838 return NULL; 1911 return NULL;
@@ -1869,6 +1942,51 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1869} 1942}
1870 1943
1871/** 1944/**
1945 * ring_buffer_peek - peek at the next event to be read
1946 * @buffer: The ring buffer to read
1947 * @cpu: The cpu to peak at
1948 * @ts: The timestamp counter of this event.
1949 *
1950 * This will return the event that will be read next, but does
1951 * not consume the data.
1952 */
1953struct ring_buffer_event *
1954ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1955{
1956 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
1957 struct ring_buffer_event *event;
1958 unsigned long flags;
1959
1960 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
1961 event = rb_buffer_peek(buffer, cpu, ts);
1962 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
1963
1964 return event;
1965}
1966
1967/**
1968 * ring_buffer_iter_peek - peek at the next event to be read
1969 * @iter: The ring buffer iterator
1970 * @ts: The timestamp counter of this event.
1971 *
1972 * This will return the event that will be read next, but does
1973 * not increment the iterator.
1974 */
1975struct ring_buffer_event *
1976ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1977{
1978 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
1979 struct ring_buffer_event *event;
1980 unsigned long flags;
1981
1982 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
1983 event = rb_iter_peek(iter, ts);
1984 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
1985
1986 return event;
1987}
1988
1989/**
1872 * ring_buffer_consume - return an event and consume it 1990 * ring_buffer_consume - return an event and consume it
1873 * @buffer: The ring buffer to get the next event from 1991 * @buffer: The ring buffer to get the next event from
1874 * 1992 *
@@ -1879,19 +1997,24 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1879struct ring_buffer_event * 1997struct ring_buffer_event *
1880ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) 1998ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
1881{ 1999{
1882 struct ring_buffer_per_cpu *cpu_buffer; 2000 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
1883 struct ring_buffer_event *event; 2001 struct ring_buffer_event *event;
2002 unsigned long flags;
1884 2003
1885 if (!cpu_isset(cpu, buffer->cpumask)) 2004 if (!cpu_isset(cpu, buffer->cpumask))
1886 return NULL; 2005 return NULL;
1887 2006
1888 event = ring_buffer_peek(buffer, cpu, ts); 2007 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
2008
2009 event = rb_buffer_peek(buffer, cpu, ts);
1889 if (!event) 2010 if (!event)
1890 return NULL; 2011 goto out;
1891 2012
1892 cpu_buffer = buffer->buffers[cpu];
1893 rb_advance_reader(cpu_buffer); 2013 rb_advance_reader(cpu_buffer);
1894 2014
2015 out:
2016 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2017
1895 return event; 2018 return event;
1896} 2019}
1897 2020
@@ -1928,9 +2051,11 @@ ring_buffer_read_start(struct ring_buffer *buffer, int cpu)
1928 atomic_inc(&cpu_buffer->record_disabled); 2051 atomic_inc(&cpu_buffer->record_disabled);
1929 synchronize_sched(); 2052 synchronize_sched();
1930 2053
1931 spin_lock_irqsave(&cpu_buffer->lock, flags); 2054 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
1932 ring_buffer_iter_reset(iter); 2055 __raw_spin_lock(&cpu_buffer->lock);
1933 spin_unlock_irqrestore(&cpu_buffer->lock, flags); 2056 rb_iter_reset(iter);
2057 __raw_spin_unlock(&cpu_buffer->lock);
2058 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
1934 2059
1935 return iter; 2060 return iter;
1936} 2061}
@@ -1962,12 +2087,17 @@ struct ring_buffer_event *
1962ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts) 2087ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts)
1963{ 2088{
1964 struct ring_buffer_event *event; 2089 struct ring_buffer_event *event;
2090 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
2091 unsigned long flags;
1965 2092
1966 event = ring_buffer_iter_peek(iter, ts); 2093 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
2094 event = rb_iter_peek(iter, ts);
1967 if (!event) 2095 if (!event)
1968 return NULL; 2096 goto out;
1969 2097
1970 rb_advance_iter(iter); 2098 rb_advance_iter(iter);
2099 out:
2100 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
1971 2101
1972 return event; 2102 return event;
1973} 2103}
@@ -1987,7 +2117,7 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
1987 cpu_buffer->head_page 2117 cpu_buffer->head_page
1988 = list_entry(cpu_buffer->pages.next, struct buffer_page, list); 2118 = list_entry(cpu_buffer->pages.next, struct buffer_page, list);
1989 local_set(&cpu_buffer->head_page->write, 0); 2119 local_set(&cpu_buffer->head_page->write, 0);
1990 local_set(&cpu_buffer->head_page->commit, 0); 2120 local_set(&cpu_buffer->head_page->page->commit, 0);
1991 2121
1992 cpu_buffer->head_page->read = 0; 2122 cpu_buffer->head_page->read = 0;
1993 2123
@@ -1996,7 +2126,7 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
1996 2126
1997 INIT_LIST_HEAD(&cpu_buffer->reader_page->list); 2127 INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
1998 local_set(&cpu_buffer->reader_page->write, 0); 2128 local_set(&cpu_buffer->reader_page->write, 0);
1999 local_set(&cpu_buffer->reader_page->commit, 0); 2129 local_set(&cpu_buffer->reader_page->page->commit, 0);
2000 cpu_buffer->reader_page->read = 0; 2130 cpu_buffer->reader_page->read = 0;
2001 2131
2002 cpu_buffer->overrun = 0; 2132 cpu_buffer->overrun = 0;
@@ -2016,11 +2146,15 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
2016 if (!cpu_isset(cpu, buffer->cpumask)) 2146 if (!cpu_isset(cpu, buffer->cpumask))
2017 return; 2147 return;
2018 2148
2019 spin_lock_irqsave(&cpu_buffer->lock, flags); 2149 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
2150
2151 __raw_spin_lock(&cpu_buffer->lock);
2020 2152
2021 rb_reset_cpu(cpu_buffer); 2153 rb_reset_cpu(cpu_buffer);
2022 2154
2023 spin_unlock_irqrestore(&cpu_buffer->lock, flags); 2155 __raw_spin_unlock(&cpu_buffer->lock);
2156
2157 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2024} 2158}
2025 2159
2026/** 2160/**
@@ -2090,8 +2224,7 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
2090 return -EINVAL; 2224 return -EINVAL;
2091 2225
2092 /* At least make sure the two buffers are somewhat the same */ 2226 /* At least make sure the two buffers are somewhat the same */
2093 if (buffer_a->size != buffer_b->size || 2227 if (buffer_a->pages != buffer_b->pages)
2094 buffer_a->pages != buffer_b->pages)
2095 return -EINVAL; 2228 return -EINVAL;
2096 2229
2097 cpu_buffer_a = buffer_a->buffers[cpu]; 2230 cpu_buffer_a = buffer_a->buffers[cpu];
@@ -2118,16 +2251,178 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
2118 return 0; 2251 return 0;
2119} 2252}
2120 2253
2254static void rb_remove_entries(struct ring_buffer_per_cpu *cpu_buffer,
2255 struct buffer_data_page *bpage)
2256{
2257 struct ring_buffer_event *event;
2258 unsigned long head;
2259
2260 __raw_spin_lock(&cpu_buffer->lock);
2261 for (head = 0; head < local_read(&bpage->commit);
2262 head += rb_event_length(event)) {
2263
2264 event = __rb_data_page_index(bpage, head);
2265 if (RB_WARN_ON(cpu_buffer, rb_null_event(event)))
2266 return;
2267 /* Only count data entries */
2268 if (event->type != RINGBUF_TYPE_DATA)
2269 continue;
2270 cpu_buffer->entries--;
2271 }
2272 __raw_spin_unlock(&cpu_buffer->lock);
2273}
2274
2275/**
2276 * ring_buffer_alloc_read_page - allocate a page to read from buffer
2277 * @buffer: the buffer to allocate for.
2278 *
2279 * This function is used in conjunction with ring_buffer_read_page.
2280 * When reading a full page from the ring buffer, these functions
2281 * can be used to speed up the process. The calling function should
2282 * allocate a few pages first with this function. Then when it
2283 * needs to get pages from the ring buffer, it passes the result
2284 * of this function into ring_buffer_read_page, which will swap
2285 * the page that was allocated, with the read page of the buffer.
2286 *
2287 * Returns:
2288 * The page allocated, or NULL on error.
2289 */
2290void *ring_buffer_alloc_read_page(struct ring_buffer *buffer)
2291{
2292 unsigned long addr;
2293 struct buffer_data_page *bpage;
2294
2295 addr = __get_free_page(GFP_KERNEL);
2296 if (!addr)
2297 return NULL;
2298
2299 bpage = (void *)addr;
2300
2301 return bpage;
2302}
2303
2304/**
2305 * ring_buffer_free_read_page - free an allocated read page
2306 * @buffer: the buffer the page was allocate for
2307 * @data: the page to free
2308 *
2309 * Free a page allocated from ring_buffer_alloc_read_page.
2310 */
2311void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data)
2312{
2313 free_page((unsigned long)data);
2314}
2315
2316/**
2317 * ring_buffer_read_page - extract a page from the ring buffer
2318 * @buffer: buffer to extract from
2319 * @data_page: the page to use allocated from ring_buffer_alloc_read_page
2320 * @cpu: the cpu of the buffer to extract
2321 * @full: should the extraction only happen when the page is full.
2322 *
2323 * This function will pull out a page from the ring buffer and consume it.
2324 * @data_page must be the address of the variable that was returned
2325 * from ring_buffer_alloc_read_page. This is because the page might be used
2326 * to swap with a page in the ring buffer.
2327 *
2328 * for example:
2329 * rpage = ring_buffer_alloc_page(buffer);
2330 * if (!rpage)
2331 * return error;
2332 * ret = ring_buffer_read_page(buffer, &rpage, cpu, 0);
2333 * if (ret)
2334 * process_page(rpage);
2335 *
2336 * When @full is set, the function will not return true unless
2337 * the writer is off the reader page.
2338 *
2339 * Note: it is up to the calling functions to handle sleeps and wakeups.
2340 * The ring buffer can be used anywhere in the kernel and can not
2341 * blindly call wake_up. The layer that uses the ring buffer must be
2342 * responsible for that.
2343 *
2344 * Returns:
2345 * 1 if data has been transferred
2346 * 0 if no data has been transferred.
2347 */
2348int ring_buffer_read_page(struct ring_buffer *buffer,
2349 void **data_page, int cpu, int full)
2350{
2351 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
2352 struct ring_buffer_event *event;
2353 struct buffer_data_page *bpage;
2354 unsigned long flags;
2355 int ret = 0;
2356
2357 if (!data_page)
2358 return 0;
2359
2360 bpage = *data_page;
2361 if (!bpage)
2362 return 0;
2363
2364 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
2365
2366 /*
2367 * rb_buffer_peek will get the next ring buffer if
2368 * the current reader page is empty.
2369 */
2370 event = rb_buffer_peek(buffer, cpu, NULL);
2371 if (!event)
2372 goto out;
2373
2374 /* check for data */
2375 if (!local_read(&cpu_buffer->reader_page->page->commit))
2376 goto out;
2377 /*
2378 * If the writer is already off of the read page, then simply
2379 * switch the read page with the given page. Otherwise
2380 * we need to copy the data from the reader to the writer.
2381 */
2382 if (cpu_buffer->reader_page == cpu_buffer->commit_page) {
2383 unsigned int read = cpu_buffer->reader_page->read;
2384
2385 if (full)
2386 goto out;
2387 /* The writer is still on the reader page, we must copy */
2388 bpage = cpu_buffer->reader_page->page;
2389 memcpy(bpage->data,
2390 cpu_buffer->reader_page->page->data + read,
2391 local_read(&bpage->commit) - read);
2392
2393 /* consume what was read */
2394 cpu_buffer->reader_page += read;
2395
2396 } else {
2397 /* swap the pages */
2398 rb_init_page(bpage);
2399 bpage = cpu_buffer->reader_page->page;
2400 cpu_buffer->reader_page->page = *data_page;
2401 cpu_buffer->reader_page->read = 0;
2402 *data_page = bpage;
2403 }
2404 ret = 1;
2405
2406 /* update the entry counter */
2407 rb_remove_entries(cpu_buffer, bpage);
2408 out:
2409 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2410
2411 return ret;
2412}
2413
2121static ssize_t 2414static ssize_t
2122rb_simple_read(struct file *filp, char __user *ubuf, 2415rb_simple_read(struct file *filp, char __user *ubuf,
2123 size_t cnt, loff_t *ppos) 2416 size_t cnt, loff_t *ppos)
2124{ 2417{
2125 int *p = filp->private_data; 2418 long *p = filp->private_data;
2126 char buf[64]; 2419 char buf[64];
2127 int r; 2420 int r;
2128 2421
2129 /* !ring_buffers_off == tracing_on */ 2422 if (test_bit(RB_BUFFERS_DISABLED_BIT, p))
2130 r = sprintf(buf, "%d\n", !*p); 2423 r = sprintf(buf, "permanently disabled\n");
2424 else
2425 r = sprintf(buf, "%d\n", test_bit(RB_BUFFERS_ON_BIT, p));
2131 2426
2132 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 2427 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2133} 2428}
@@ -2136,7 +2431,7 @@ static ssize_t
2136rb_simple_write(struct file *filp, const char __user *ubuf, 2431rb_simple_write(struct file *filp, const char __user *ubuf,
2137 size_t cnt, loff_t *ppos) 2432 size_t cnt, loff_t *ppos)
2138{ 2433{
2139 int *p = filp->private_data; 2434 long *p = filp->private_data;
2140 char buf[64]; 2435 char buf[64];
2141 long val; 2436 long val;
2142 int ret; 2437 int ret;
@@ -2153,8 +2448,10 @@ rb_simple_write(struct file *filp, const char __user *ubuf,
2153 if (ret < 0) 2448 if (ret < 0)
2154 return ret; 2449 return ret;
2155 2450
2156 /* !ring_buffers_off == tracing_on */ 2451 if (val)
2157 *p = !val; 2452 set_bit(RB_BUFFERS_ON_BIT, p);
2453 else
2454 clear_bit(RB_BUFFERS_ON_BIT, p);
2158 2455
2159 (*ppos)++; 2456 (*ppos)++;
2160 2457
@@ -2176,7 +2473,7 @@ static __init int rb_init_debugfs(void)
2176 d_tracer = tracing_init_dentry(); 2473 d_tracer = tracing_init_dentry();
2177 2474
2178 entry = debugfs_create_file("tracing_on", 0644, d_tracer, 2475 entry = debugfs_create_file("tracing_on", 0644, d_tracer,
2179 &ring_buffers_off, &rb_simple_fops); 2476 &ring_buffer_flags, &rb_simple_fops);
2180 if (!entry) 2477 if (!entry)
2181 pr_warning("Could not create debugfs 'tracing_on' entry\n"); 2478 pr_warning("Could not create debugfs 'tracing_on' entry\n");
2182 2479
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 1ee9e4e454a0..f4bb3800318b 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -30,6 +30,7 @@
30#include <linux/gfp.h> 30#include <linux/gfp.h>
31#include <linux/fs.h> 31#include <linux/fs.h>
32#include <linux/kprobes.h> 32#include <linux/kprobes.h>
33#include <linux/seq_file.h>
33#include <linux/writeback.h> 34#include <linux/writeback.h>
34 35
35#include <linux/stacktrace.h> 36#include <linux/stacktrace.h>
@@ -43,6 +44,38 @@
43unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX; 44unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX;
44unsigned long __read_mostly tracing_thresh; 45unsigned long __read_mostly tracing_thresh;
45 46
47/*
48 * We need to change this state when a selftest is running.
49 * A selftest will lurk into the ring-buffer to count the
50 * entries inserted during the selftest although some concurrent
51 * insertions into the ring-buffer such as ftrace_printk could occurred
52 * at the same time, giving false positive or negative results.
53 */
54static bool __read_mostly tracing_selftest_running;
55
56/* For tracers that don't implement custom flags */
57static struct tracer_opt dummy_tracer_opt[] = {
58 { }
59};
60
61static struct tracer_flags dummy_tracer_flags = {
62 .val = 0,
63 .opts = dummy_tracer_opt
64};
65
66static int dummy_set_flag(u32 old_flags, u32 bit, int set)
67{
68 return 0;
69}
70
71/*
72 * Kill all tracing for good (never come back).
73 * It is initialized to 1 but will turn to zero if the initialization
74 * of the tracer is successful. But that is the only place that sets
75 * this back to zero.
76 */
77int tracing_disabled = 1;
78
46static DEFINE_PER_CPU(local_t, ftrace_cpu_disabled); 79static DEFINE_PER_CPU(local_t, ftrace_cpu_disabled);
47 80
48static inline void ftrace_disable_cpu(void) 81static inline void ftrace_disable_cpu(void)
@@ -62,7 +95,36 @@ static cpumask_t __read_mostly tracing_buffer_mask;
62#define for_each_tracing_cpu(cpu) \ 95#define for_each_tracing_cpu(cpu) \
63 for_each_cpu_mask(cpu, tracing_buffer_mask) 96 for_each_cpu_mask(cpu, tracing_buffer_mask)
64 97
65static int tracing_disabled = 1; 98/*
99 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
100 *
101 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
102 * is set, then ftrace_dump is called. This will output the contents
103 * of the ftrace buffers to the console. This is very useful for
104 * capturing traces that lead to crashes and outputing it to a
105 * serial console.
106 *
107 * It is default off, but you can enable it with either specifying
108 * "ftrace_dump_on_oops" in the kernel command line, or setting
109 * /proc/sys/kernel/ftrace_dump_on_oops to true.
110 */
111int ftrace_dump_on_oops;
112
113static int tracing_set_tracer(char *buf);
114
115static int __init set_ftrace(char *str)
116{
117 tracing_set_tracer(str);
118 return 1;
119}
120__setup("ftrace", set_ftrace);
121
122static int __init set_ftrace_dump_on_oops(char *str)
123{
124 ftrace_dump_on_oops = 1;
125 return 1;
126}
127__setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
66 128
67long 129long
68ns2usecs(cycle_t nsec) 130ns2usecs(cycle_t nsec)
@@ -112,6 +174,19 @@ static DEFINE_PER_CPU(struct trace_array_cpu, max_data);
112/* tracer_enabled is used to toggle activation of a tracer */ 174/* tracer_enabled is used to toggle activation of a tracer */
113static int tracer_enabled = 1; 175static int tracer_enabled = 1;
114 176
177/**
178 * tracing_is_enabled - return tracer_enabled status
179 *
180 * This function is used by other tracers to know the status
181 * of the tracer_enabled flag. Tracers may use this function
182 * to know if it should enable their features when starting
183 * up. See irqsoff tracer for an example (start_irqsoff_tracer).
184 */
185int tracing_is_enabled(void)
186{
187 return tracer_enabled;
188}
189
115/* function tracing enabled */ 190/* function tracing enabled */
116int ftrace_function_enabled; 191int ftrace_function_enabled;
117 192
@@ -153,8 +228,9 @@ static DEFINE_MUTEX(trace_types_lock);
153/* trace_wait is a waitqueue for tasks blocked on trace_poll */ 228/* trace_wait is a waitqueue for tasks blocked on trace_poll */
154static DECLARE_WAIT_QUEUE_HEAD(trace_wait); 229static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
155 230
156/* trace_flags holds iter_ctrl options */ 231/* trace_flags holds trace_options default values */
157unsigned long trace_flags = TRACE_ITER_PRINT_PARENT; 232unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
233 TRACE_ITER_ANNOTATE;
158 234
159/** 235/**
160 * trace_wake_up - wake up tasks waiting for trace input 236 * trace_wake_up - wake up tasks waiting for trace input
@@ -193,13 +269,6 @@ unsigned long nsecs_to_usecs(unsigned long nsecs)
193 return nsecs / 1000; 269 return nsecs / 1000;
194} 270}
195 271
196/*
197 * TRACE_ITER_SYM_MASK masks the options in trace_flags that
198 * control the output of kernel symbols.
199 */
200#define TRACE_ITER_SYM_MASK \
201 (TRACE_ITER_PRINT_PARENT|TRACE_ITER_SYM_OFFSET|TRACE_ITER_SYM_ADDR)
202
203/* These must match the bit postions in trace_iterator_flags */ 272/* These must match the bit postions in trace_iterator_flags */
204static const char *trace_options[] = { 273static const char *trace_options[] = {
205 "print-parent", 274 "print-parent",
@@ -213,6 +282,12 @@ static const char *trace_options[] = {
213 "stacktrace", 282 "stacktrace",
214 "sched-tree", 283 "sched-tree",
215 "ftrace_printk", 284 "ftrace_printk",
285 "ftrace_preempt",
286 "branch",
287 "annotate",
288 "userstacktrace",
289 "sym-userobj",
290 "printk-msg-only",
216 NULL 291 NULL
217}; 292};
218 293
@@ -359,6 +434,28 @@ trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len)
359 return trace_seq_putmem(s, hex, j); 434 return trace_seq_putmem(s, hex, j);
360} 435}
361 436
437static int
438trace_seq_path(struct trace_seq *s, struct path *path)
439{
440 unsigned char *p;
441
442 if (s->len >= (PAGE_SIZE - 1))
443 return 0;
444 p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len);
445 if (!IS_ERR(p)) {
446 p = mangle_path(s->buffer + s->len, p, "\n");
447 if (p) {
448 s->len = p - s->buffer;
449 return 1;
450 }
451 } else {
452 s->buffer[s->len++] = '?';
453 return 1;
454 }
455
456 return 0;
457}
458
362static void 459static void
363trace_seq_reset(struct trace_seq *s) 460trace_seq_reset(struct trace_seq *s)
364{ 461{
@@ -470,7 +567,17 @@ int register_tracer(struct tracer *type)
470 return -1; 567 return -1;
471 } 568 }
472 569
570 /*
571 * When this gets called we hold the BKL which means that
572 * preemption is disabled. Various trace selftests however
573 * need to disable and enable preemption for successful tests.
574 * So we drop the BKL here and grab it after the tests again.
575 */
576 unlock_kernel();
473 mutex_lock(&trace_types_lock); 577 mutex_lock(&trace_types_lock);
578
579 tracing_selftest_running = true;
580
474 for (t = trace_types; t; t = t->next) { 581 for (t = trace_types; t; t = t->next) {
475 if (strcmp(type->name, t->name) == 0) { 582 if (strcmp(type->name, t->name) == 0) {
476 /* already found */ 583 /* already found */
@@ -481,12 +588,20 @@ int register_tracer(struct tracer *type)
481 } 588 }
482 } 589 }
483 590
591 if (!type->set_flag)
592 type->set_flag = &dummy_set_flag;
593 if (!type->flags)
594 type->flags = &dummy_tracer_flags;
595 else
596 if (!type->flags->opts)
597 type->flags->opts = dummy_tracer_opt;
598
484#ifdef CONFIG_FTRACE_STARTUP_TEST 599#ifdef CONFIG_FTRACE_STARTUP_TEST
485 if (type->selftest) { 600 if (type->selftest) {
486 struct tracer *saved_tracer = current_trace; 601 struct tracer *saved_tracer = current_trace;
487 struct trace_array *tr = &global_trace; 602 struct trace_array *tr = &global_trace;
488 int saved_ctrl = tr->ctrl;
489 int i; 603 int i;
604
490 /* 605 /*
491 * Run a selftest on this tracer. 606 * Run a selftest on this tracer.
492 * Here we reset the trace buffer, and set the current 607 * Here we reset the trace buffer, and set the current
@@ -494,25 +609,23 @@ int register_tracer(struct tracer *type)
494 * internal tracing to verify that everything is in order. 609 * internal tracing to verify that everything is in order.
495 * If we fail, we do not register this tracer. 610 * If we fail, we do not register this tracer.
496 */ 611 */
497 for_each_tracing_cpu(i) { 612 for_each_tracing_cpu(i)
498 tracing_reset(tr, i); 613 tracing_reset(tr, i);
499 } 614
500 current_trace = type; 615 current_trace = type;
501 tr->ctrl = 0;
502 /* the test is responsible for initializing and enabling */ 616 /* the test is responsible for initializing and enabling */
503 pr_info("Testing tracer %s: ", type->name); 617 pr_info("Testing tracer %s: ", type->name);
504 ret = type->selftest(type, tr); 618 ret = type->selftest(type, tr);
505 /* the test is responsible for resetting too */ 619 /* the test is responsible for resetting too */
506 current_trace = saved_tracer; 620 current_trace = saved_tracer;
507 tr->ctrl = saved_ctrl;
508 if (ret) { 621 if (ret) {
509 printk(KERN_CONT "FAILED!\n"); 622 printk(KERN_CONT "FAILED!\n");
510 goto out; 623 goto out;
511 } 624 }
512 /* Only reset on passing, to avoid touching corrupted buffers */ 625 /* Only reset on passing, to avoid touching corrupted buffers */
513 for_each_tracing_cpu(i) { 626 for_each_tracing_cpu(i)
514 tracing_reset(tr, i); 627 tracing_reset(tr, i);
515 } 628
516 printk(KERN_CONT "PASSED\n"); 629 printk(KERN_CONT "PASSED\n");
517 } 630 }
518#endif 631#endif
@@ -524,7 +637,9 @@ int register_tracer(struct tracer *type)
524 max_tracer_type_len = len; 637 max_tracer_type_len = len;
525 638
526 out: 639 out:
640 tracing_selftest_running = false;
527 mutex_unlock(&trace_types_lock); 641 mutex_unlock(&trace_types_lock);
642 lock_kernel();
528 643
529 return ret; 644 return ret;
530} 645}
@@ -564,6 +679,16 @@ void tracing_reset(struct trace_array *tr, int cpu)
564 ftrace_enable_cpu(); 679 ftrace_enable_cpu();
565} 680}
566 681
682void tracing_reset_online_cpus(struct trace_array *tr)
683{
684 int cpu;
685
686 tr->time_start = ftrace_now(tr->cpu);
687
688 for_each_online_cpu(cpu)
689 tracing_reset(tr, cpu);
690}
691
567#define SAVED_CMDLINES 128 692#define SAVED_CMDLINES 128
568static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1]; 693static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
569static unsigned map_cmdline_to_pid[SAVED_CMDLINES]; 694static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
@@ -581,6 +706,91 @@ static void trace_init_cmdlines(void)
581 cmdline_idx = 0; 706 cmdline_idx = 0;
582} 707}
583 708
709static int trace_stop_count;
710static DEFINE_SPINLOCK(tracing_start_lock);
711
712/**
713 * ftrace_off_permanent - disable all ftrace code permanently
714 *
715 * This should only be called when a serious anomally has
716 * been detected. This will turn off the function tracing,
717 * ring buffers, and other tracing utilites. It takes no
718 * locks and can be called from any context.
719 */
720void ftrace_off_permanent(void)
721{
722 tracing_disabled = 1;
723 ftrace_stop();
724 tracing_off_permanent();
725}
726
727/**
728 * tracing_start - quick start of the tracer
729 *
730 * If tracing is enabled but was stopped by tracing_stop,
731 * this will start the tracer back up.
732 */
733void tracing_start(void)
734{
735 struct ring_buffer *buffer;
736 unsigned long flags;
737
738 if (tracing_disabled)
739 return;
740
741 spin_lock_irqsave(&tracing_start_lock, flags);
742 if (--trace_stop_count)
743 goto out;
744
745 if (trace_stop_count < 0) {
746 /* Someone screwed up their debugging */
747 WARN_ON_ONCE(1);
748 trace_stop_count = 0;
749 goto out;
750 }
751
752
753 buffer = global_trace.buffer;
754 if (buffer)
755 ring_buffer_record_enable(buffer);
756
757 buffer = max_tr.buffer;
758 if (buffer)
759 ring_buffer_record_enable(buffer);
760
761 ftrace_start();
762 out:
763 spin_unlock_irqrestore(&tracing_start_lock, flags);
764}
765
766/**
767 * tracing_stop - quick stop of the tracer
768 *
769 * Light weight way to stop tracing. Use in conjunction with
770 * tracing_start.
771 */
772void tracing_stop(void)
773{
774 struct ring_buffer *buffer;
775 unsigned long flags;
776
777 ftrace_stop();
778 spin_lock_irqsave(&tracing_start_lock, flags);
779 if (trace_stop_count++)
780 goto out;
781
782 buffer = global_trace.buffer;
783 if (buffer)
784 ring_buffer_record_disable(buffer);
785
786 buffer = max_tr.buffer;
787 if (buffer)
788 ring_buffer_record_disable(buffer);
789
790 out:
791 spin_unlock_irqrestore(&tracing_start_lock, flags);
792}
793
584void trace_stop_cmdline_recording(void); 794void trace_stop_cmdline_recording(void);
585 795
586static void trace_save_cmdline(struct task_struct *tsk) 796static void trace_save_cmdline(struct task_struct *tsk)
@@ -618,7 +828,7 @@ static void trace_save_cmdline(struct task_struct *tsk)
618 spin_unlock(&trace_cmdline_lock); 828 spin_unlock(&trace_cmdline_lock);
619} 829}
620 830
621static char *trace_find_cmdline(int pid) 831char *trace_find_cmdline(int pid)
622{ 832{
623 char *cmdline = "<...>"; 833 char *cmdline = "<...>";
624 unsigned map; 834 unsigned map;
@@ -655,6 +865,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
655 865
656 entry->preempt_count = pc & 0xff; 866 entry->preempt_count = pc & 0xff;
657 entry->pid = (tsk) ? tsk->pid : 0; 867 entry->pid = (tsk) ? tsk->pid : 0;
868 entry->tgid = (tsk) ? tsk->tgid : 0;
658 entry->flags = 869 entry->flags =
659#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT 870#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
660 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) | 871 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
@@ -691,6 +902,56 @@ trace_function(struct trace_array *tr, struct trace_array_cpu *data,
691 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 902 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
692} 903}
693 904
905#ifdef CONFIG_FUNCTION_GRAPH_TRACER
906static void __trace_graph_entry(struct trace_array *tr,
907 struct trace_array_cpu *data,
908 struct ftrace_graph_ent *trace,
909 unsigned long flags,
910 int pc)
911{
912 struct ring_buffer_event *event;
913 struct ftrace_graph_ent_entry *entry;
914 unsigned long irq_flags;
915
916 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
917 return;
918
919 event = ring_buffer_lock_reserve(global_trace.buffer, sizeof(*entry),
920 &irq_flags);
921 if (!event)
922 return;
923 entry = ring_buffer_event_data(event);
924 tracing_generic_entry_update(&entry->ent, flags, pc);
925 entry->ent.type = TRACE_GRAPH_ENT;
926 entry->graph_ent = *trace;
927 ring_buffer_unlock_commit(global_trace.buffer, event, irq_flags);
928}
929
930static void __trace_graph_return(struct trace_array *tr,
931 struct trace_array_cpu *data,
932 struct ftrace_graph_ret *trace,
933 unsigned long flags,
934 int pc)
935{
936 struct ring_buffer_event *event;
937 struct ftrace_graph_ret_entry *entry;
938 unsigned long irq_flags;
939
940 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
941 return;
942
943 event = ring_buffer_lock_reserve(global_trace.buffer, sizeof(*entry),
944 &irq_flags);
945 if (!event)
946 return;
947 entry = ring_buffer_event_data(event);
948 tracing_generic_entry_update(&entry->ent, flags, pc);
949 entry->ent.type = TRACE_GRAPH_RET;
950 entry->ret = *trace;
951 ring_buffer_unlock_commit(global_trace.buffer, event, irq_flags);
952}
953#endif
954
694void 955void
695ftrace(struct trace_array *tr, struct trace_array_cpu *data, 956ftrace(struct trace_array *tr, struct trace_array_cpu *data,
696 unsigned long ip, unsigned long parent_ip, unsigned long flags, 957 unsigned long ip, unsigned long parent_ip, unsigned long flags,
@@ -742,6 +1003,46 @@ void __trace_stack(struct trace_array *tr,
742 ftrace_trace_stack(tr, data, flags, skip, preempt_count()); 1003 ftrace_trace_stack(tr, data, flags, skip, preempt_count());
743} 1004}
744 1005
1006static void ftrace_trace_userstack(struct trace_array *tr,
1007 struct trace_array_cpu *data,
1008 unsigned long flags, int pc)
1009{
1010#ifdef CONFIG_STACKTRACE
1011 struct ring_buffer_event *event;
1012 struct userstack_entry *entry;
1013 struct stack_trace trace;
1014 unsigned long irq_flags;
1015
1016 if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
1017 return;
1018
1019 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
1020 &irq_flags);
1021 if (!event)
1022 return;
1023 entry = ring_buffer_event_data(event);
1024 tracing_generic_entry_update(&entry->ent, flags, pc);
1025 entry->ent.type = TRACE_USER_STACK;
1026
1027 memset(&entry->caller, 0, sizeof(entry->caller));
1028
1029 trace.nr_entries = 0;
1030 trace.max_entries = FTRACE_STACK_ENTRIES;
1031 trace.skip = 0;
1032 trace.entries = entry->caller;
1033
1034 save_stack_trace_user(&trace);
1035 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
1036#endif
1037}
1038
1039void __trace_userstack(struct trace_array *tr,
1040 struct trace_array_cpu *data,
1041 unsigned long flags)
1042{
1043 ftrace_trace_userstack(tr, data, flags, preempt_count());
1044}
1045
745static void 1046static void
746ftrace_trace_special(void *__tr, void *__data, 1047ftrace_trace_special(void *__tr, void *__data,
747 unsigned long arg1, unsigned long arg2, unsigned long arg3, 1048 unsigned long arg1, unsigned long arg2, unsigned long arg3,
@@ -765,6 +1066,7 @@ ftrace_trace_special(void *__tr, void *__data,
765 entry->arg3 = arg3; 1066 entry->arg3 = arg3;
766 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 1067 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
767 ftrace_trace_stack(tr, data, irq_flags, 4, pc); 1068 ftrace_trace_stack(tr, data, irq_flags, 4, pc);
1069 ftrace_trace_userstack(tr, data, irq_flags, pc);
768 1070
769 trace_wake_up(); 1071 trace_wake_up();
770} 1072}
@@ -803,6 +1105,7 @@ tracing_sched_switch_trace(struct trace_array *tr,
803 entry->next_cpu = task_cpu(next); 1105 entry->next_cpu = task_cpu(next);
804 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 1106 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
805 ftrace_trace_stack(tr, data, flags, 5, pc); 1107 ftrace_trace_stack(tr, data, flags, 5, pc);
1108 ftrace_trace_userstack(tr, data, flags, pc);
806} 1109}
807 1110
808void 1111void
@@ -832,6 +1135,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
832 entry->next_cpu = task_cpu(wakee); 1135 entry->next_cpu = task_cpu(wakee);
833 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 1136 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
834 ftrace_trace_stack(tr, data, flags, 6, pc); 1137 ftrace_trace_stack(tr, data, flags, 6, pc);
1138 ftrace_trace_userstack(tr, data, flags, pc);
835 1139
836 trace_wake_up(); 1140 trace_wake_up();
837} 1141}
@@ -841,26 +1145,28 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
841{ 1145{
842 struct trace_array *tr = &global_trace; 1146 struct trace_array *tr = &global_trace;
843 struct trace_array_cpu *data; 1147 struct trace_array_cpu *data;
1148 unsigned long flags;
844 int cpu; 1149 int cpu;
845 int pc; 1150 int pc;
846 1151
847 if (tracing_disabled || !tr->ctrl) 1152 if (tracing_disabled)
848 return; 1153 return;
849 1154
850 pc = preempt_count(); 1155 pc = preempt_count();
851 preempt_disable_notrace(); 1156 local_irq_save(flags);
852 cpu = raw_smp_processor_id(); 1157 cpu = raw_smp_processor_id();
853 data = tr->data[cpu]; 1158 data = tr->data[cpu];
854 1159
855 if (likely(!atomic_read(&data->disabled))) 1160 if (likely(atomic_inc_return(&data->disabled) == 1))
856 ftrace_trace_special(tr, data, arg1, arg2, arg3, pc); 1161 ftrace_trace_special(tr, data, arg1, arg2, arg3, pc);
857 1162
858 preempt_enable_notrace(); 1163 atomic_dec(&data->disabled);
1164 local_irq_restore(flags);
859} 1165}
860 1166
861#ifdef CONFIG_FUNCTION_TRACER 1167#ifdef CONFIG_FUNCTION_TRACER
862static void 1168static void
863function_trace_call(unsigned long ip, unsigned long parent_ip) 1169function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip)
864{ 1170{
865 struct trace_array *tr = &global_trace; 1171 struct trace_array *tr = &global_trace;
866 struct trace_array_cpu *data; 1172 struct trace_array_cpu *data;
@@ -873,8 +1179,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip)
873 return; 1179 return;
874 1180
875 pc = preempt_count(); 1181 pc = preempt_count();
876 resched = need_resched(); 1182 resched = ftrace_preempt_disable();
877 preempt_disable_notrace();
878 local_save_flags(flags); 1183 local_save_flags(flags);
879 cpu = raw_smp_processor_id(); 1184 cpu = raw_smp_processor_id();
880 data = tr->data[cpu]; 1185 data = tr->data[cpu];
@@ -884,12 +1189,97 @@ function_trace_call(unsigned long ip, unsigned long parent_ip)
884 trace_function(tr, data, ip, parent_ip, flags, pc); 1189 trace_function(tr, data, ip, parent_ip, flags, pc);
885 1190
886 atomic_dec(&data->disabled); 1191 atomic_dec(&data->disabled);
887 if (resched) 1192 ftrace_preempt_enable(resched);
888 preempt_enable_no_resched_notrace();
889 else
890 preempt_enable_notrace();
891} 1193}
892 1194
1195static void
1196function_trace_call(unsigned long ip, unsigned long parent_ip)
1197{
1198 struct trace_array *tr = &global_trace;
1199 struct trace_array_cpu *data;
1200 unsigned long flags;
1201 long disabled;
1202 int cpu;
1203 int pc;
1204
1205 if (unlikely(!ftrace_function_enabled))
1206 return;
1207
1208 /*
1209 * Need to use raw, since this must be called before the
1210 * recursive protection is performed.
1211 */
1212 local_irq_save(flags);
1213 cpu = raw_smp_processor_id();
1214 data = tr->data[cpu];
1215 disabled = atomic_inc_return(&data->disabled);
1216
1217 if (likely(disabled == 1)) {
1218 pc = preempt_count();
1219 trace_function(tr, data, ip, parent_ip, flags, pc);
1220 }
1221
1222 atomic_dec(&data->disabled);
1223 local_irq_restore(flags);
1224}
1225
1226#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1227int trace_graph_entry(struct ftrace_graph_ent *trace)
1228{
1229 struct trace_array *tr = &global_trace;
1230 struct trace_array_cpu *data;
1231 unsigned long flags;
1232 long disabled;
1233 int cpu;
1234 int pc;
1235
1236 if (!ftrace_trace_task(current))
1237 return 0;
1238
1239 if (!ftrace_graph_addr(trace->func))
1240 return 0;
1241
1242 local_irq_save(flags);
1243 cpu = raw_smp_processor_id();
1244 data = tr->data[cpu];
1245 disabled = atomic_inc_return(&data->disabled);
1246 if (likely(disabled == 1)) {
1247 pc = preempt_count();
1248 __trace_graph_entry(tr, data, trace, flags, pc);
1249 }
1250 /* Only do the atomic if it is not already set */
1251 if (!test_tsk_trace_graph(current))
1252 set_tsk_trace_graph(current);
1253 atomic_dec(&data->disabled);
1254 local_irq_restore(flags);
1255
1256 return 1;
1257}
1258
1259void trace_graph_return(struct ftrace_graph_ret *trace)
1260{
1261 struct trace_array *tr = &global_trace;
1262 struct trace_array_cpu *data;
1263 unsigned long flags;
1264 long disabled;
1265 int cpu;
1266 int pc;
1267
1268 local_irq_save(flags);
1269 cpu = raw_smp_processor_id();
1270 data = tr->data[cpu];
1271 disabled = atomic_inc_return(&data->disabled);
1272 if (likely(disabled == 1)) {
1273 pc = preempt_count();
1274 __trace_graph_return(tr, data, trace, flags, pc);
1275 }
1276 if (!trace->depth)
1277 clear_tsk_trace_graph(current);
1278 atomic_dec(&data->disabled);
1279 local_irq_restore(flags);
1280}
1281#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
1282
893static struct ftrace_ops trace_ops __read_mostly = 1283static struct ftrace_ops trace_ops __read_mostly =
894{ 1284{
895 .func = function_trace_call, 1285 .func = function_trace_call,
@@ -898,9 +1288,14 @@ static struct ftrace_ops trace_ops __read_mostly =
898void tracing_start_function_trace(void) 1288void tracing_start_function_trace(void)
899{ 1289{
900 ftrace_function_enabled = 0; 1290 ftrace_function_enabled = 0;
1291
1292 if (trace_flags & TRACE_ITER_PREEMPTONLY)
1293 trace_ops.func = function_trace_call_preempt_only;
1294 else
1295 trace_ops.func = function_trace_call;
1296
901 register_ftrace_function(&trace_ops); 1297 register_ftrace_function(&trace_ops);
902 if (tracer_enabled) 1298 ftrace_function_enabled = 1;
903 ftrace_function_enabled = 1;
904} 1299}
905 1300
906void tracing_stop_function_trace(void) 1301void tracing_stop_function_trace(void)
@@ -912,6 +1307,7 @@ void tracing_stop_function_trace(void)
912 1307
913enum trace_file_type { 1308enum trace_file_type {
914 TRACE_FILE_LAT_FMT = 1, 1309 TRACE_FILE_LAT_FMT = 1,
1310 TRACE_FILE_ANNOTATE = 2,
915}; 1311};
916 1312
917static void trace_iterator_increment(struct trace_iterator *iter, int cpu) 1313static void trace_iterator_increment(struct trace_iterator *iter, int cpu)
@@ -1047,10 +1443,6 @@ static void *s_start(struct seq_file *m, loff_t *pos)
1047 1443
1048 atomic_inc(&trace_record_cmdline_disabled); 1444 atomic_inc(&trace_record_cmdline_disabled);
1049 1445
1050 /* let the tracer grab locks here if needed */
1051 if (current_trace->start)
1052 current_trace->start(iter);
1053
1054 if (*pos != iter->pos) { 1446 if (*pos != iter->pos) {
1055 iter->ent = NULL; 1447 iter->ent = NULL;
1056 iter->cpu = 0; 1448 iter->cpu = 0;
@@ -1077,14 +1469,7 @@ static void *s_start(struct seq_file *m, loff_t *pos)
1077 1469
1078static void s_stop(struct seq_file *m, void *p) 1470static void s_stop(struct seq_file *m, void *p)
1079{ 1471{
1080 struct trace_iterator *iter = m->private;
1081
1082 atomic_dec(&trace_record_cmdline_disabled); 1472 atomic_dec(&trace_record_cmdline_disabled);
1083
1084 /* let the tracer release locks here if needed */
1085 if (current_trace && current_trace == iter->trace && iter->trace->stop)
1086 iter->trace->stop(iter);
1087
1088 mutex_unlock(&trace_types_lock); 1473 mutex_unlock(&trace_types_lock);
1089} 1474}
1090 1475
@@ -1143,7 +1528,7 @@ seq_print_sym_offset(struct trace_seq *s, const char *fmt,
1143# define IP_FMT "%016lx" 1528# define IP_FMT "%016lx"
1144#endif 1529#endif
1145 1530
1146static int 1531int
1147seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags) 1532seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
1148{ 1533{
1149 int ret; 1534 int ret;
@@ -1164,6 +1549,78 @@ seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
1164 return ret; 1549 return ret;
1165} 1550}
1166 1551
1552static inline int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
1553 unsigned long ip, unsigned long sym_flags)
1554{
1555 struct file *file = NULL;
1556 unsigned long vmstart = 0;
1557 int ret = 1;
1558
1559 if (mm) {
1560 const struct vm_area_struct *vma;
1561
1562 down_read(&mm->mmap_sem);
1563 vma = find_vma(mm, ip);
1564 if (vma) {
1565 file = vma->vm_file;
1566 vmstart = vma->vm_start;
1567 }
1568 if (file) {
1569 ret = trace_seq_path(s, &file->f_path);
1570 if (ret)
1571 ret = trace_seq_printf(s, "[+0x%lx]", ip - vmstart);
1572 }
1573 up_read(&mm->mmap_sem);
1574 }
1575 if (ret && ((sym_flags & TRACE_ITER_SYM_ADDR) || !file))
1576 ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
1577 return ret;
1578}
1579
1580static int
1581seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s,
1582 unsigned long sym_flags)
1583{
1584 struct mm_struct *mm = NULL;
1585 int ret = 1;
1586 unsigned int i;
1587
1588 if (trace_flags & TRACE_ITER_SYM_USEROBJ) {
1589 struct task_struct *task;
1590 /*
1591 * we do the lookup on the thread group leader,
1592 * since individual threads might have already quit!
1593 */
1594 rcu_read_lock();
1595 task = find_task_by_vpid(entry->ent.tgid);
1596 if (task)
1597 mm = get_task_mm(task);
1598 rcu_read_unlock();
1599 }
1600
1601 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
1602 unsigned long ip = entry->caller[i];
1603
1604 if (ip == ULONG_MAX || !ret)
1605 break;
1606 if (i && ret)
1607 ret = trace_seq_puts(s, " <- ");
1608 if (!ip) {
1609 if (ret)
1610 ret = trace_seq_puts(s, "??");
1611 continue;
1612 }
1613 if (!ret)
1614 break;
1615 if (ret)
1616 ret = seq_print_user_ip(s, mm, ip, sym_flags);
1617 }
1618
1619 if (mm)
1620 mmput(mm);
1621 return ret;
1622}
1623
1167static void print_lat_help_header(struct seq_file *m) 1624static void print_lat_help_header(struct seq_file *m)
1168{ 1625{
1169 seq_puts(m, "# _------=> CPU# \n"); 1626 seq_puts(m, "# _------=> CPU# \n");
@@ -1301,6 +1758,13 @@ lat_print_timestamp(struct trace_seq *s, u64 abs_usecs,
1301 1758
1302static const char state_to_char[] = TASK_STATE_TO_CHAR_STR; 1759static const char state_to_char[] = TASK_STATE_TO_CHAR_STR;
1303 1760
1761static int task_state_char(unsigned long state)
1762{
1763 int bit = state ? __ffs(state) + 1 : 0;
1764
1765 return bit < sizeof(state_to_char) - 1 ? state_to_char[bit] : '?';
1766}
1767
1304/* 1768/*
1305 * The message is supposed to contain an ending newline. 1769 * The message is supposed to contain an ending newline.
1306 * If the printing stops prematurely, try to add a newline of our own. 1770 * If the printing stops prematurely, try to add a newline of our own.
@@ -1338,6 +1802,23 @@ void trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter)
1338 trace_seq_putc(s, '\n'); 1802 trace_seq_putc(s, '\n');
1339} 1803}
1340 1804
1805static void test_cpu_buff_start(struct trace_iterator *iter)
1806{
1807 struct trace_seq *s = &iter->seq;
1808
1809 if (!(trace_flags & TRACE_ITER_ANNOTATE))
1810 return;
1811
1812 if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
1813 return;
1814
1815 if (cpu_isset(iter->cpu, iter->started))
1816 return;
1817
1818 cpu_set(iter->cpu, iter->started);
1819 trace_seq_printf(s, "##### CPU %u buffer started ####\n", iter->cpu);
1820}
1821
1341static enum print_line_t 1822static enum print_line_t
1342print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) 1823print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
1343{ 1824{
@@ -1352,11 +1833,12 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
1352 char *comm; 1833 char *comm;
1353 int S, T; 1834 int S, T;
1354 int i; 1835 int i;
1355 unsigned state;
1356 1836
1357 if (entry->type == TRACE_CONT) 1837 if (entry->type == TRACE_CONT)
1358 return TRACE_TYPE_HANDLED; 1838 return TRACE_TYPE_HANDLED;
1359 1839
1840 test_cpu_buff_start(iter);
1841
1360 next_entry = find_next_entry(iter, NULL, &next_ts); 1842 next_entry = find_next_entry(iter, NULL, &next_ts);
1361 if (!next_entry) 1843 if (!next_entry)
1362 next_ts = iter->ts; 1844 next_ts = iter->ts;
@@ -1396,12 +1878,8 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
1396 1878
1397 trace_assign_type(field, entry); 1879 trace_assign_type(field, entry);
1398 1880
1399 T = field->next_state < sizeof(state_to_char) ? 1881 T = task_state_char(field->next_state);
1400 state_to_char[field->next_state] : 'X'; 1882 S = task_state_char(field->prev_state);
1401
1402 state = field->prev_state ?
1403 __ffs(field->prev_state) + 1 : 0;
1404 S = state < sizeof(state_to_char) - 1 ? state_to_char[state] : 'X';
1405 comm = trace_find_cmdline(field->next_pid); 1883 comm = trace_find_cmdline(field->next_pid);
1406 trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n", 1884 trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n",
1407 field->prev_pid, 1885 field->prev_pid,
@@ -1448,6 +1926,27 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
1448 trace_seq_print_cont(s, iter); 1926 trace_seq_print_cont(s, iter);
1449 break; 1927 break;
1450 } 1928 }
1929 case TRACE_BRANCH: {
1930 struct trace_branch *field;
1931
1932 trace_assign_type(field, entry);
1933
1934 trace_seq_printf(s, "[%s] %s:%s:%d\n",
1935 field->correct ? " ok " : " MISS ",
1936 field->func,
1937 field->file,
1938 field->line);
1939 break;
1940 }
1941 case TRACE_USER_STACK: {
1942 struct userstack_entry *field;
1943
1944 trace_assign_type(field, entry);
1945
1946 seq_print_userip_objs(field, s, sym_flags);
1947 trace_seq_putc(s, '\n');
1948 break;
1949 }
1451 default: 1950 default:
1452 trace_seq_printf(s, "Unknown type %d\n", entry->type); 1951 trace_seq_printf(s, "Unknown type %d\n", entry->type);
1453 } 1952 }
@@ -1472,6 +1971,8 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
1472 if (entry->type == TRACE_CONT) 1971 if (entry->type == TRACE_CONT)
1473 return TRACE_TYPE_HANDLED; 1972 return TRACE_TYPE_HANDLED;
1474 1973
1974 test_cpu_buff_start(iter);
1975
1475 comm = trace_find_cmdline(iter->ent->pid); 1976 comm = trace_find_cmdline(iter->ent->pid);
1476 1977
1477 t = ns2usecs(iter->ts); 1978 t = ns2usecs(iter->ts);
@@ -1519,10 +2020,8 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
1519 2020
1520 trace_assign_type(field, entry); 2021 trace_assign_type(field, entry);
1521 2022
1522 S = field->prev_state < sizeof(state_to_char) ? 2023 T = task_state_char(field->next_state);
1523 state_to_char[field->prev_state] : 'X'; 2024 S = task_state_char(field->prev_state);
1524 T = field->next_state < sizeof(state_to_char) ?
1525 state_to_char[field->next_state] : 'X';
1526 ret = trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c\n", 2025 ret = trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c\n",
1527 field->prev_pid, 2026 field->prev_pid,
1528 field->prev_prio, 2027 field->prev_prio,
@@ -1581,6 +2080,37 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
1581 trace_seq_print_cont(s, iter); 2080 trace_seq_print_cont(s, iter);
1582 break; 2081 break;
1583 } 2082 }
2083 case TRACE_GRAPH_RET: {
2084 return print_graph_function(iter);
2085 }
2086 case TRACE_GRAPH_ENT: {
2087 return print_graph_function(iter);
2088 }
2089 case TRACE_BRANCH: {
2090 struct trace_branch *field;
2091
2092 trace_assign_type(field, entry);
2093
2094 trace_seq_printf(s, "[%s] %s:%s:%d\n",
2095 field->correct ? " ok " : " MISS ",
2096 field->func,
2097 field->file,
2098 field->line);
2099 break;
2100 }
2101 case TRACE_USER_STACK: {
2102 struct userstack_entry *field;
2103
2104 trace_assign_type(field, entry);
2105
2106 ret = seq_print_userip_objs(field, s, sym_flags);
2107 if (!ret)
2108 return TRACE_TYPE_PARTIAL_LINE;
2109 ret = trace_seq_putc(s, '\n');
2110 if (!ret)
2111 return TRACE_TYPE_PARTIAL_LINE;
2112 break;
2113 }
1584 } 2114 }
1585 return TRACE_TYPE_HANDLED; 2115 return TRACE_TYPE_HANDLED;
1586} 2116}
@@ -1621,12 +2151,9 @@ static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
1621 2151
1622 trace_assign_type(field, entry); 2152 trace_assign_type(field, entry);
1623 2153
1624 S = field->prev_state < sizeof(state_to_char) ? 2154 T = task_state_char(field->next_state);
1625 state_to_char[field->prev_state] : 'X'; 2155 S = entry->type == TRACE_WAKE ? '+' :
1626 T = field->next_state < sizeof(state_to_char) ? 2156 task_state_char(field->prev_state);
1627 state_to_char[field->next_state] : 'X';
1628 if (entry->type == TRACE_WAKE)
1629 S = '+';
1630 ret = trace_seq_printf(s, "%d %d %c %d %d %d %c\n", 2157 ret = trace_seq_printf(s, "%d %d %c %d %d %d %c\n",
1631 field->prev_pid, 2158 field->prev_pid,
1632 field->prev_prio, 2159 field->prev_prio,
@@ -1640,6 +2167,7 @@ static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
1640 break; 2167 break;
1641 } 2168 }
1642 case TRACE_SPECIAL: 2169 case TRACE_SPECIAL:
2170 case TRACE_USER_STACK:
1643 case TRACE_STACK: { 2171 case TRACE_STACK: {
1644 struct special_entry *field; 2172 struct special_entry *field;
1645 2173
@@ -1712,12 +2240,9 @@ static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
1712 2240
1713 trace_assign_type(field, entry); 2241 trace_assign_type(field, entry);
1714 2242
1715 S = field->prev_state < sizeof(state_to_char) ? 2243 T = task_state_char(field->next_state);
1716 state_to_char[field->prev_state] : 'X'; 2244 S = entry->type == TRACE_WAKE ? '+' :
1717 T = field->next_state < sizeof(state_to_char) ? 2245 task_state_char(field->prev_state);
1718 state_to_char[field->next_state] : 'X';
1719 if (entry->type == TRACE_WAKE)
1720 S = '+';
1721 SEQ_PUT_HEX_FIELD_RET(s, field->prev_pid); 2246 SEQ_PUT_HEX_FIELD_RET(s, field->prev_pid);
1722 SEQ_PUT_HEX_FIELD_RET(s, field->prev_prio); 2247 SEQ_PUT_HEX_FIELD_RET(s, field->prev_prio);
1723 SEQ_PUT_HEX_FIELD_RET(s, S); 2248 SEQ_PUT_HEX_FIELD_RET(s, S);
@@ -1728,6 +2253,7 @@ static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
1728 break; 2253 break;
1729 } 2254 }
1730 case TRACE_SPECIAL: 2255 case TRACE_SPECIAL:
2256 case TRACE_USER_STACK:
1731 case TRACE_STACK: { 2257 case TRACE_STACK: {
1732 struct special_entry *field; 2258 struct special_entry *field;
1733 2259
@@ -1744,6 +2270,25 @@ static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
1744 return TRACE_TYPE_HANDLED; 2270 return TRACE_TYPE_HANDLED;
1745} 2271}
1746 2272
2273static enum print_line_t print_printk_msg_only(struct trace_iterator *iter)
2274{
2275 struct trace_seq *s = &iter->seq;
2276 struct trace_entry *entry = iter->ent;
2277 struct print_entry *field;
2278 int ret;
2279
2280 trace_assign_type(field, entry);
2281
2282 ret = trace_seq_printf(s, field->buf);
2283 if (!ret)
2284 return TRACE_TYPE_PARTIAL_LINE;
2285
2286 if (entry->flags & TRACE_FLAG_CONT)
2287 trace_seq_print_cont(s, iter);
2288
2289 return TRACE_TYPE_HANDLED;
2290}
2291
1747static enum print_line_t print_bin_fmt(struct trace_iterator *iter) 2292static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
1748{ 2293{
1749 struct trace_seq *s = &iter->seq; 2294 struct trace_seq *s = &iter->seq;
@@ -1782,6 +2327,7 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
1782 break; 2327 break;
1783 } 2328 }
1784 case TRACE_SPECIAL: 2329 case TRACE_SPECIAL:
2330 case TRACE_USER_STACK:
1785 case TRACE_STACK: { 2331 case TRACE_STACK: {
1786 struct special_entry *field; 2332 struct special_entry *field;
1787 2333
@@ -1823,6 +2369,11 @@ static enum print_line_t print_trace_line(struct trace_iterator *iter)
1823 return ret; 2369 return ret;
1824 } 2370 }
1825 2371
2372 if (iter->ent->type == TRACE_PRINT &&
2373 trace_flags & TRACE_ITER_PRINTK &&
2374 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2375 return print_printk_msg_only(iter);
2376
1826 if (trace_flags & TRACE_ITER_BIN) 2377 if (trace_flags & TRACE_ITER_BIN)
1827 return print_bin_fmt(iter); 2378 return print_bin_fmt(iter);
1828 2379
@@ -1847,7 +2398,9 @@ static int s_show(struct seq_file *m, void *v)
1847 seq_printf(m, "# tracer: %s\n", iter->trace->name); 2398 seq_printf(m, "# tracer: %s\n", iter->trace->name);
1848 seq_puts(m, "#\n"); 2399 seq_puts(m, "#\n");
1849 } 2400 }
1850 if (iter->iter_flags & TRACE_FILE_LAT_FMT) { 2401 if (iter->trace && iter->trace->print_header)
2402 iter->trace->print_header(m);
2403 else if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
1851 /* print nothing if the buffers are empty */ 2404 /* print nothing if the buffers are empty */
1852 if (trace_empty(iter)) 2405 if (trace_empty(iter))
1853 return 0; 2406 return 0;
@@ -1899,6 +2452,15 @@ __tracing_open(struct inode *inode, struct file *file, int *ret)
1899 iter->trace = current_trace; 2452 iter->trace = current_trace;
1900 iter->pos = -1; 2453 iter->pos = -1;
1901 2454
2455 /* Notify the tracer early; before we stop tracing. */
2456 if (iter->trace && iter->trace->open)
2457 iter->trace->open(iter);
2458
2459 /* Annotate start of buffers if we had overruns */
2460 if (ring_buffer_overruns(iter->tr->buffer))
2461 iter->iter_flags |= TRACE_FILE_ANNOTATE;
2462
2463
1902 for_each_tracing_cpu(cpu) { 2464 for_each_tracing_cpu(cpu) {
1903 2465
1904 iter->buffer_iter[cpu] = 2466 iter->buffer_iter[cpu] =
@@ -1917,13 +2479,7 @@ __tracing_open(struct inode *inode, struct file *file, int *ret)
1917 m->private = iter; 2479 m->private = iter;
1918 2480
1919 /* stop the trace while dumping */ 2481 /* stop the trace while dumping */
1920 if (iter->tr->ctrl) { 2482 tracing_stop();
1921 tracer_enabled = 0;
1922 ftrace_function_enabled = 0;
1923 }
1924
1925 if (iter->trace && iter->trace->open)
1926 iter->trace->open(iter);
1927 2483
1928 mutex_unlock(&trace_types_lock); 2484 mutex_unlock(&trace_types_lock);
1929 2485
@@ -1966,14 +2522,7 @@ int tracing_release(struct inode *inode, struct file *file)
1966 iter->trace->close(iter); 2522 iter->trace->close(iter);
1967 2523
1968 /* reenable tracing if it was previously enabled */ 2524 /* reenable tracing if it was previously enabled */
1969 if (iter->tr->ctrl) { 2525 tracing_start();
1970 tracer_enabled = 1;
1971 /*
1972 * It is safe to enable function tracing even if it
1973 * isn't used
1974 */
1975 ftrace_function_enabled = 1;
1976 }
1977 mutex_unlock(&trace_types_lock); 2526 mutex_unlock(&trace_types_lock);
1978 2527
1979 seq_release(inode, file); 2528 seq_release(inode, file);
@@ -2151,7 +2700,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
2151 if (err) 2700 if (err)
2152 goto err_unlock; 2701 goto err_unlock;
2153 2702
2154 raw_local_irq_disable(); 2703 local_irq_disable();
2155 __raw_spin_lock(&ftrace_max_lock); 2704 __raw_spin_lock(&ftrace_max_lock);
2156 for_each_tracing_cpu(cpu) { 2705 for_each_tracing_cpu(cpu) {
2157 /* 2706 /*
@@ -2168,7 +2717,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
2168 } 2717 }
2169 } 2718 }
2170 __raw_spin_unlock(&ftrace_max_lock); 2719 __raw_spin_unlock(&ftrace_max_lock);
2171 raw_local_irq_enable(); 2720 local_irq_enable();
2172 2721
2173 tracing_cpumask = tracing_cpumask_new; 2722 tracing_cpumask = tracing_cpumask_new;
2174 2723
@@ -2189,13 +2738,16 @@ static struct file_operations tracing_cpumask_fops = {
2189}; 2738};
2190 2739
2191static ssize_t 2740static ssize_t
2192tracing_iter_ctrl_read(struct file *filp, char __user *ubuf, 2741tracing_trace_options_read(struct file *filp, char __user *ubuf,
2193 size_t cnt, loff_t *ppos) 2742 size_t cnt, loff_t *ppos)
2194{ 2743{
2744 int i;
2195 char *buf; 2745 char *buf;
2196 int r = 0; 2746 int r = 0;
2197 int len = 0; 2747 int len = 0;
2198 int i; 2748 u32 tracer_flags = current_trace->flags->val;
2749 struct tracer_opt *trace_opts = current_trace->flags->opts;
2750
2199 2751
2200 /* calulate max size */ 2752 /* calulate max size */
2201 for (i = 0; trace_options[i]; i++) { 2753 for (i = 0; trace_options[i]; i++) {
@@ -2203,6 +2755,15 @@ tracing_iter_ctrl_read(struct file *filp, char __user *ubuf,
2203 len += 3; /* "no" and space */ 2755 len += 3; /* "no" and space */
2204 } 2756 }
2205 2757
2758 /*
2759 * Increase the size with names of options specific
2760 * of the current tracer.
2761 */
2762 for (i = 0; trace_opts[i].name; i++) {
2763 len += strlen(trace_opts[i].name);
2764 len += 3; /* "no" and space */
2765 }
2766
2206 /* +2 for \n and \0 */ 2767 /* +2 for \n and \0 */
2207 buf = kmalloc(len + 2, GFP_KERNEL); 2768 buf = kmalloc(len + 2, GFP_KERNEL);
2208 if (!buf) 2769 if (!buf)
@@ -2215,6 +2776,15 @@ tracing_iter_ctrl_read(struct file *filp, char __user *ubuf,
2215 r += sprintf(buf + r, "no%s ", trace_options[i]); 2776 r += sprintf(buf + r, "no%s ", trace_options[i]);
2216 } 2777 }
2217 2778
2779 for (i = 0; trace_opts[i].name; i++) {
2780 if (tracer_flags & trace_opts[i].bit)
2781 r += sprintf(buf + r, "%s ",
2782 trace_opts[i].name);
2783 else
2784 r += sprintf(buf + r, "no%s ",
2785 trace_opts[i].name);
2786 }
2787
2218 r += sprintf(buf + r, "\n"); 2788 r += sprintf(buf + r, "\n");
2219 WARN_ON(r >= len + 2); 2789 WARN_ON(r >= len + 2);
2220 2790
@@ -2225,13 +2795,48 @@ tracing_iter_ctrl_read(struct file *filp, char __user *ubuf,
2225 return r; 2795 return r;
2226} 2796}
2227 2797
2798/* Try to assign a tracer specific option */
2799static int set_tracer_option(struct tracer *trace, char *cmp, int neg)
2800{
2801 struct tracer_flags *trace_flags = trace->flags;
2802 struct tracer_opt *opts = NULL;
2803 int ret = 0, i = 0;
2804 int len;
2805
2806 for (i = 0; trace_flags->opts[i].name; i++) {
2807 opts = &trace_flags->opts[i];
2808 len = strlen(opts->name);
2809
2810 if (strncmp(cmp, opts->name, len) == 0) {
2811 ret = trace->set_flag(trace_flags->val,
2812 opts->bit, !neg);
2813 break;
2814 }
2815 }
2816 /* Not found */
2817 if (!trace_flags->opts[i].name)
2818 return -EINVAL;
2819
2820 /* Refused to handle */
2821 if (ret)
2822 return ret;
2823
2824 if (neg)
2825 trace_flags->val &= ~opts->bit;
2826 else
2827 trace_flags->val |= opts->bit;
2828
2829 return 0;
2830}
2831
2228static ssize_t 2832static ssize_t
2229tracing_iter_ctrl_write(struct file *filp, const char __user *ubuf, 2833tracing_trace_options_write(struct file *filp, const char __user *ubuf,
2230 size_t cnt, loff_t *ppos) 2834 size_t cnt, loff_t *ppos)
2231{ 2835{
2232 char buf[64]; 2836 char buf[64];
2233 char *cmp = buf; 2837 char *cmp = buf;
2234 int neg = 0; 2838 int neg = 0;
2839 int ret;
2235 int i; 2840 int i;
2236 2841
2237 if (cnt >= sizeof(buf)) 2842 if (cnt >= sizeof(buf))
@@ -2258,11 +2863,13 @@ tracing_iter_ctrl_write(struct file *filp, const char __user *ubuf,
2258 break; 2863 break;
2259 } 2864 }
2260 } 2865 }
2261 /* 2866
2262 * If no option could be set, return an error: 2867 /* If no option could be set, test the specific tracer options */
2263 */ 2868 if (!trace_options[i]) {
2264 if (!trace_options[i]) 2869 ret = set_tracer_option(current_trace, cmp, neg);
2265 return -EINVAL; 2870 if (ret)
2871 return ret;
2872 }
2266 2873
2267 filp->f_pos += cnt; 2874 filp->f_pos += cnt;
2268 2875
@@ -2271,8 +2878,8 @@ tracing_iter_ctrl_write(struct file *filp, const char __user *ubuf,
2271 2878
2272static struct file_operations tracing_iter_fops = { 2879static struct file_operations tracing_iter_fops = {
2273 .open = tracing_open_generic, 2880 .open = tracing_open_generic,
2274 .read = tracing_iter_ctrl_read, 2881 .read = tracing_trace_options_read,
2275 .write = tracing_iter_ctrl_write, 2882 .write = tracing_trace_options_write,
2276}; 2883};
2277 2884
2278static const char readme_msg[] = 2885static const char readme_msg[] =
@@ -2286,9 +2893,9 @@ static const char readme_msg[] =
2286 "# echo sched_switch > /debug/tracing/current_tracer\n" 2893 "# echo sched_switch > /debug/tracing/current_tracer\n"
2287 "# cat /debug/tracing/current_tracer\n" 2894 "# cat /debug/tracing/current_tracer\n"
2288 "sched_switch\n" 2895 "sched_switch\n"
2289 "# cat /debug/tracing/iter_ctrl\n" 2896 "# cat /debug/tracing/trace_options\n"
2290 "noprint-parent nosym-offset nosym-addr noverbose\n" 2897 "noprint-parent nosym-offset nosym-addr noverbose\n"
2291 "# echo print-parent > /debug/tracing/iter_ctrl\n" 2898 "# echo print-parent > /debug/tracing/trace_options\n"
2292 "# echo 1 > /debug/tracing/tracing_enabled\n" 2899 "# echo 1 > /debug/tracing/tracing_enabled\n"
2293 "# cat /debug/tracing/trace > /tmp/trace.txt\n" 2900 "# cat /debug/tracing/trace > /tmp/trace.txt\n"
2294 "echo 0 > /debug/tracing/tracing_enabled\n" 2901 "echo 0 > /debug/tracing/tracing_enabled\n"
@@ -2311,11 +2918,10 @@ static ssize_t
2311tracing_ctrl_read(struct file *filp, char __user *ubuf, 2918tracing_ctrl_read(struct file *filp, char __user *ubuf,
2312 size_t cnt, loff_t *ppos) 2919 size_t cnt, loff_t *ppos)
2313{ 2920{
2314 struct trace_array *tr = filp->private_data;
2315 char buf[64]; 2921 char buf[64];
2316 int r; 2922 int r;
2317 2923
2318 r = sprintf(buf, "%ld\n", tr->ctrl); 2924 r = sprintf(buf, "%u\n", tracer_enabled);
2319 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 2925 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2320} 2926}
2321 2927
@@ -2343,16 +2949,18 @@ tracing_ctrl_write(struct file *filp, const char __user *ubuf,
2343 val = !!val; 2949 val = !!val;
2344 2950
2345 mutex_lock(&trace_types_lock); 2951 mutex_lock(&trace_types_lock);
2346 if (tr->ctrl ^ val) { 2952 if (tracer_enabled ^ val) {
2347 if (val) 2953 if (val) {
2348 tracer_enabled = 1; 2954 tracer_enabled = 1;
2349 else 2955 if (current_trace->start)
2956 current_trace->start(tr);
2957 tracing_start();
2958 } else {
2350 tracer_enabled = 0; 2959 tracer_enabled = 0;
2351 2960 tracing_stop();
2352 tr->ctrl = val; 2961 if (current_trace->stop)
2353 2962 current_trace->stop(tr);
2354 if (current_trace && current_trace->ctrl_update) 2963 }
2355 current_trace->ctrl_update(tr);
2356 } 2964 }
2357 mutex_unlock(&trace_types_lock); 2965 mutex_unlock(&trace_types_lock);
2358 2966
@@ -2378,29 +2986,11 @@ tracing_set_trace_read(struct file *filp, char __user *ubuf,
2378 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 2986 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2379} 2987}
2380 2988
2381static ssize_t 2989static int tracing_set_tracer(char *buf)
2382tracing_set_trace_write(struct file *filp, const char __user *ubuf,
2383 size_t cnt, loff_t *ppos)
2384{ 2990{
2385 struct trace_array *tr = &global_trace; 2991 struct trace_array *tr = &global_trace;
2386 struct tracer *t; 2992 struct tracer *t;
2387 char buf[max_tracer_type_len+1]; 2993 int ret = 0;
2388 int i;
2389 size_t ret;
2390
2391 ret = cnt;
2392
2393 if (cnt > max_tracer_type_len)
2394 cnt = max_tracer_type_len;
2395
2396 if (copy_from_user(&buf, ubuf, cnt))
2397 return -EFAULT;
2398
2399 buf[cnt] = 0;
2400
2401 /* strip ending whitespace. */
2402 for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
2403 buf[i] = 0;
2404 2994
2405 mutex_lock(&trace_types_lock); 2995 mutex_lock(&trace_types_lock);
2406 for (t = trace_types; t; t = t->next) { 2996 for (t = trace_types; t; t = t->next) {
@@ -2414,18 +3004,52 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf,
2414 if (t == current_trace) 3004 if (t == current_trace)
2415 goto out; 3005 goto out;
2416 3006
3007 trace_branch_disable();
2417 if (current_trace && current_trace->reset) 3008 if (current_trace && current_trace->reset)
2418 current_trace->reset(tr); 3009 current_trace->reset(tr);
2419 3010
2420 current_trace = t; 3011 current_trace = t;
2421 if (t->init) 3012 if (t->init) {
2422 t->init(tr); 3013 ret = t->init(tr);
3014 if (ret)
3015 goto out;
3016 }
2423 3017
3018 trace_branch_enable(tr);
2424 out: 3019 out:
2425 mutex_unlock(&trace_types_lock); 3020 mutex_unlock(&trace_types_lock);
2426 3021
2427 if (ret > 0) 3022 return ret;
2428 filp->f_pos += ret; 3023}
3024
3025static ssize_t
3026tracing_set_trace_write(struct file *filp, const char __user *ubuf,
3027 size_t cnt, loff_t *ppos)
3028{
3029 char buf[max_tracer_type_len+1];
3030 int i;
3031 size_t ret;
3032 int err;
3033
3034 ret = cnt;
3035
3036 if (cnt > max_tracer_type_len)
3037 cnt = max_tracer_type_len;
3038
3039 if (copy_from_user(&buf, ubuf, cnt))
3040 return -EFAULT;
3041
3042 buf[cnt] = 0;
3043
3044 /* strip ending whitespace. */
3045 for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
3046 buf[i] = 0;
3047
3048 err = tracing_set_tracer(buf);
3049 if (err)
3050 return err;
3051
3052 filp->f_pos += ret;
2429 3053
2430 return ret; 3054 return ret;
2431} 3055}
@@ -2492,6 +3116,10 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
2492 return -ENOMEM; 3116 return -ENOMEM;
2493 3117
2494 mutex_lock(&trace_types_lock); 3118 mutex_lock(&trace_types_lock);
3119
3120 /* trace pipe does not show start of buffer */
3121 cpus_setall(iter->started);
3122
2495 iter->tr = &global_trace; 3123 iter->tr = &global_trace;
2496 iter->trace = current_trace; 3124 iter->trace = current_trace;
2497 filp->private_data = iter; 3125 filp->private_data = iter;
@@ -2667,7 +3295,7 @@ tracing_entries_read(struct file *filp, char __user *ubuf,
2667 char buf[64]; 3295 char buf[64];
2668 int r; 3296 int r;
2669 3297
2670 r = sprintf(buf, "%lu\n", tr->entries); 3298 r = sprintf(buf, "%lu\n", tr->entries >> 10);
2671 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 3299 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2672} 3300}
2673 3301
@@ -2678,7 +3306,6 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
2678 unsigned long val; 3306 unsigned long val;
2679 char buf[64]; 3307 char buf[64];
2680 int ret, cpu; 3308 int ret, cpu;
2681 struct trace_array *tr = filp->private_data;
2682 3309
2683 if (cnt >= sizeof(buf)) 3310 if (cnt >= sizeof(buf))
2684 return -EINVAL; 3311 return -EINVAL;
@@ -2698,12 +3325,7 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
2698 3325
2699 mutex_lock(&trace_types_lock); 3326 mutex_lock(&trace_types_lock);
2700 3327
2701 if (tr->ctrl) { 3328 tracing_stop();
2702 cnt = -EBUSY;
2703 pr_info("ftrace: please disable tracing"
2704 " before modifying buffer size\n");
2705 goto out;
2706 }
2707 3329
2708 /* disable all cpu buffers */ 3330 /* disable all cpu buffers */
2709 for_each_tracing_cpu(cpu) { 3331 for_each_tracing_cpu(cpu) {
@@ -2713,6 +3335,9 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
2713 atomic_inc(&max_tr.data[cpu]->disabled); 3335 atomic_inc(&max_tr.data[cpu]->disabled);
2714 } 3336 }
2715 3337
3338 /* value is in KB */
3339 val <<= 10;
3340
2716 if (val != global_trace.entries) { 3341 if (val != global_trace.entries) {
2717 ret = ring_buffer_resize(global_trace.buffer, val); 3342 ret = ring_buffer_resize(global_trace.buffer, val);
2718 if (ret < 0) { 3343 if (ret < 0) {
@@ -2751,6 +3376,7 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
2751 atomic_dec(&max_tr.data[cpu]->disabled); 3376 atomic_dec(&max_tr.data[cpu]->disabled);
2752 } 3377 }
2753 3378
3379 tracing_start();
2754 max_tr.entries = global_trace.entries; 3380 max_tr.entries = global_trace.entries;
2755 mutex_unlock(&trace_types_lock); 3381 mutex_unlock(&trace_types_lock);
2756 3382
@@ -2762,7 +3388,7 @@ static int mark_printk(const char *fmt, ...)
2762 int ret; 3388 int ret;
2763 va_list args; 3389 va_list args;
2764 va_start(args, fmt); 3390 va_start(args, fmt);
2765 ret = trace_vprintk(0, fmt, args); 3391 ret = trace_vprintk(0, -1, fmt, args);
2766 va_end(args); 3392 va_end(args);
2767 return ret; 3393 return ret;
2768} 3394}
@@ -2773,9 +3399,8 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
2773{ 3399{
2774 char *buf; 3400 char *buf;
2775 char *end; 3401 char *end;
2776 struct trace_array *tr = &global_trace;
2777 3402
2778 if (!tr->ctrl || tracing_disabled) 3403 if (tracing_disabled)
2779 return -EINVAL; 3404 return -EINVAL;
2780 3405
2781 if (cnt > TRACE_BUF_SIZE) 3406 if (cnt > TRACE_BUF_SIZE)
@@ -2841,22 +3466,38 @@ static struct file_operations tracing_mark_fops = {
2841 3466
2842#ifdef CONFIG_DYNAMIC_FTRACE 3467#ifdef CONFIG_DYNAMIC_FTRACE
2843 3468
3469int __weak ftrace_arch_read_dyn_info(char *buf, int size)
3470{
3471 return 0;
3472}
3473
2844static ssize_t 3474static ssize_t
2845tracing_read_long(struct file *filp, char __user *ubuf, 3475tracing_read_dyn_info(struct file *filp, char __user *ubuf,
2846 size_t cnt, loff_t *ppos) 3476 size_t cnt, loff_t *ppos)
2847{ 3477{
3478 static char ftrace_dyn_info_buffer[1024];
3479 static DEFINE_MUTEX(dyn_info_mutex);
2848 unsigned long *p = filp->private_data; 3480 unsigned long *p = filp->private_data;
2849 char buf[64]; 3481 char *buf = ftrace_dyn_info_buffer;
3482 int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
2850 int r; 3483 int r;
2851 3484
2852 r = sprintf(buf, "%ld\n", *p); 3485 mutex_lock(&dyn_info_mutex);
3486 r = sprintf(buf, "%ld ", *p);
2853 3487
2854 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 3488 r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
3489 buf[r++] = '\n';
3490
3491 r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3492
3493 mutex_unlock(&dyn_info_mutex);
3494
3495 return r;
2855} 3496}
2856 3497
2857static struct file_operations tracing_read_long_fops = { 3498static struct file_operations tracing_dyn_info_fops = {
2858 .open = tracing_open_generic, 3499 .open = tracing_open_generic,
2859 .read = tracing_read_long, 3500 .read = tracing_read_dyn_info,
2860}; 3501};
2861#endif 3502#endif
2862 3503
@@ -2897,10 +3538,10 @@ static __init int tracer_init_debugfs(void)
2897 if (!entry) 3538 if (!entry)
2898 pr_warning("Could not create debugfs 'tracing_enabled' entry\n"); 3539 pr_warning("Could not create debugfs 'tracing_enabled' entry\n");
2899 3540
2900 entry = debugfs_create_file("iter_ctrl", 0644, d_tracer, 3541 entry = debugfs_create_file("trace_options", 0644, d_tracer,
2901 NULL, &tracing_iter_fops); 3542 NULL, &tracing_iter_fops);
2902 if (!entry) 3543 if (!entry)
2903 pr_warning("Could not create debugfs 'iter_ctrl' entry\n"); 3544 pr_warning("Could not create debugfs 'trace_options' entry\n");
2904 3545
2905 entry = debugfs_create_file("tracing_cpumask", 0644, d_tracer, 3546 entry = debugfs_create_file("tracing_cpumask", 0644, d_tracer,
2906 NULL, &tracing_cpumask_fops); 3547 NULL, &tracing_cpumask_fops);
@@ -2950,11 +3591,11 @@ static __init int tracer_init_debugfs(void)
2950 pr_warning("Could not create debugfs " 3591 pr_warning("Could not create debugfs "
2951 "'trace_pipe' entry\n"); 3592 "'trace_pipe' entry\n");
2952 3593
2953 entry = debugfs_create_file("trace_entries", 0644, d_tracer, 3594 entry = debugfs_create_file("buffer_size_kb", 0644, d_tracer,
2954 &global_trace, &tracing_entries_fops); 3595 &global_trace, &tracing_entries_fops);
2955 if (!entry) 3596 if (!entry)
2956 pr_warning("Could not create debugfs " 3597 pr_warning("Could not create debugfs "
2957 "'trace_entries' entry\n"); 3598 "'buffer_size_kb' entry\n");
2958 3599
2959 entry = debugfs_create_file("trace_marker", 0220, d_tracer, 3600 entry = debugfs_create_file("trace_marker", 0220, d_tracer,
2960 NULL, &tracing_mark_fops); 3601 NULL, &tracing_mark_fops);
@@ -2965,7 +3606,7 @@ static __init int tracer_init_debugfs(void)
2965#ifdef CONFIG_DYNAMIC_FTRACE 3606#ifdef CONFIG_DYNAMIC_FTRACE
2966 entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer, 3607 entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer,
2967 &ftrace_update_tot_cnt, 3608 &ftrace_update_tot_cnt,
2968 &tracing_read_long_fops); 3609 &tracing_dyn_info_fops);
2969 if (!entry) 3610 if (!entry)
2970 pr_warning("Could not create debugfs " 3611 pr_warning("Could not create debugfs "
2971 "'dyn_ftrace_total_info' entry\n"); 3612 "'dyn_ftrace_total_info' entry\n");
@@ -2976,7 +3617,7 @@ static __init int tracer_init_debugfs(void)
2976 return 0; 3617 return 0;
2977} 3618}
2978 3619
2979int trace_vprintk(unsigned long ip, const char *fmt, va_list args) 3620int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args)
2980{ 3621{
2981 static DEFINE_SPINLOCK(trace_buf_lock); 3622 static DEFINE_SPINLOCK(trace_buf_lock);
2982 static char trace_buf[TRACE_BUF_SIZE]; 3623 static char trace_buf[TRACE_BUF_SIZE];
@@ -2984,11 +3625,11 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2984 struct ring_buffer_event *event; 3625 struct ring_buffer_event *event;
2985 struct trace_array *tr = &global_trace; 3626 struct trace_array *tr = &global_trace;
2986 struct trace_array_cpu *data; 3627 struct trace_array_cpu *data;
2987 struct print_entry *entry;
2988 unsigned long flags, irq_flags;
2989 int cpu, len = 0, size, pc; 3628 int cpu, len = 0, size, pc;
3629 struct print_entry *entry;
3630 unsigned long irq_flags;
2990 3631
2991 if (!tr->ctrl || tracing_disabled) 3632 if (tracing_disabled || tracing_selftest_running)
2992 return 0; 3633 return 0;
2993 3634
2994 pc = preempt_count(); 3635 pc = preempt_count();
@@ -2999,7 +3640,8 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2999 if (unlikely(atomic_read(&data->disabled))) 3640 if (unlikely(atomic_read(&data->disabled)))
3000 goto out; 3641 goto out;
3001 3642
3002 spin_lock_irqsave(&trace_buf_lock, flags); 3643 pause_graph_tracing();
3644 spin_lock_irqsave(&trace_buf_lock, irq_flags);
3003 len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args); 3645 len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
3004 3646
3005 len = min(len, TRACE_BUF_SIZE-1); 3647 len = min(len, TRACE_BUF_SIZE-1);
@@ -3010,17 +3652,18 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3010 if (!event) 3652 if (!event)
3011 goto out_unlock; 3653 goto out_unlock;
3012 entry = ring_buffer_event_data(event); 3654 entry = ring_buffer_event_data(event);
3013 tracing_generic_entry_update(&entry->ent, flags, pc); 3655 tracing_generic_entry_update(&entry->ent, irq_flags, pc);
3014 entry->ent.type = TRACE_PRINT; 3656 entry->ent.type = TRACE_PRINT;
3015 entry->ip = ip; 3657 entry->ip = ip;
3658 entry->depth = depth;
3016 3659
3017 memcpy(&entry->buf, trace_buf, len); 3660 memcpy(&entry->buf, trace_buf, len);
3018 entry->buf[len] = 0; 3661 entry->buf[len] = 0;
3019 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 3662 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
3020 3663
3021 out_unlock: 3664 out_unlock:
3022 spin_unlock_irqrestore(&trace_buf_lock, flags); 3665 spin_unlock_irqrestore(&trace_buf_lock, irq_flags);
3023 3666 unpause_graph_tracing();
3024 out: 3667 out:
3025 preempt_enable_notrace(); 3668 preempt_enable_notrace();
3026 3669
@@ -3037,7 +3680,7 @@ int __ftrace_printk(unsigned long ip, const char *fmt, ...)
3037 return 0; 3680 return 0;
3038 3681
3039 va_start(ap, fmt); 3682 va_start(ap, fmt);
3040 ret = trace_vprintk(ip, fmt, ap); 3683 ret = trace_vprintk(ip, task_curr_ret_stack(current), fmt, ap);
3041 va_end(ap); 3684 va_end(ap);
3042 return ret; 3685 return ret;
3043} 3686}
@@ -3046,7 +3689,8 @@ EXPORT_SYMBOL_GPL(__ftrace_printk);
3046static int trace_panic_handler(struct notifier_block *this, 3689static int trace_panic_handler(struct notifier_block *this,
3047 unsigned long event, void *unused) 3690 unsigned long event, void *unused)
3048{ 3691{
3049 ftrace_dump(); 3692 if (ftrace_dump_on_oops)
3693 ftrace_dump();
3050 return NOTIFY_OK; 3694 return NOTIFY_OK;
3051} 3695}
3052 3696
@@ -3062,7 +3706,8 @@ static int trace_die_handler(struct notifier_block *self,
3062{ 3706{
3063 switch (val) { 3707 switch (val) {
3064 case DIE_OOPS: 3708 case DIE_OOPS:
3065 ftrace_dump(); 3709 if (ftrace_dump_on_oops)
3710 ftrace_dump();
3066 break; 3711 break;
3067 default: 3712 default:
3068 break; 3713 break;
@@ -3103,7 +3748,6 @@ trace_printk_seq(struct trace_seq *s)
3103 trace_seq_reset(s); 3748 trace_seq_reset(s);
3104} 3749}
3105 3750
3106
3107void ftrace_dump(void) 3751void ftrace_dump(void)
3108{ 3752{
3109 static DEFINE_SPINLOCK(ftrace_dump_lock); 3753 static DEFINE_SPINLOCK(ftrace_dump_lock);
@@ -3128,6 +3772,9 @@ void ftrace_dump(void)
3128 atomic_inc(&global_trace.data[cpu]->disabled); 3772 atomic_inc(&global_trace.data[cpu]->disabled);
3129 } 3773 }
3130 3774
3775 /* don't look at user memory in panic mode */
3776 trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
3777
3131 printk(KERN_TRACE "Dumping ftrace buffer:\n"); 3778 printk(KERN_TRACE "Dumping ftrace buffer:\n");
3132 3779
3133 iter.tr = &global_trace; 3780 iter.tr = &global_trace;
@@ -3221,7 +3868,6 @@ __init static int tracer_alloc_buffers(void)
3221#endif 3868#endif
3222 3869
3223 /* All seems OK, enable tracing */ 3870 /* All seems OK, enable tracing */
3224 global_trace.ctrl = tracer_enabled;
3225 tracing_disabled = 0; 3871 tracing_disabled = 0;
3226 3872
3227 atomic_notifier_chain_register(&panic_notifier_list, 3873 atomic_notifier_chain_register(&panic_notifier_list,
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 8465ad052707..cc7a4f864036 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -8,6 +8,7 @@
8#include <linux/ring_buffer.h> 8#include <linux/ring_buffer.h>
9#include <linux/mmiotrace.h> 9#include <linux/mmiotrace.h>
10#include <linux/ftrace.h> 10#include <linux/ftrace.h>
11#include <trace/boot.h>
11 12
12enum trace_type { 13enum trace_type {
13 __TRACE_FIRST_TYPE = 0, 14 __TRACE_FIRST_TYPE = 0,
@@ -21,7 +22,14 @@ enum trace_type {
21 TRACE_SPECIAL, 22 TRACE_SPECIAL,
22 TRACE_MMIO_RW, 23 TRACE_MMIO_RW,
23 TRACE_MMIO_MAP, 24 TRACE_MMIO_MAP,
24 TRACE_BOOT, 25 TRACE_BRANCH,
26 TRACE_BOOT_CALL,
27 TRACE_BOOT_RET,
28 TRACE_GRAPH_RET,
29 TRACE_GRAPH_ENT,
30 TRACE_USER_STACK,
31 TRACE_HW_BRANCHES,
32 TRACE_POWER,
25 33
26 __TRACE_LAST_TYPE 34 __TRACE_LAST_TYPE
27}; 35};
@@ -38,6 +46,7 @@ struct trace_entry {
38 unsigned char flags; 46 unsigned char flags;
39 unsigned char preempt_count; 47 unsigned char preempt_count;
40 int pid; 48 int pid;
49 int tgid;
41}; 50};
42 51
43/* 52/*
@@ -48,6 +57,18 @@ struct ftrace_entry {
48 unsigned long ip; 57 unsigned long ip;
49 unsigned long parent_ip; 58 unsigned long parent_ip;
50}; 59};
60
61/* Function call entry */
62struct ftrace_graph_ent_entry {
63 struct trace_entry ent;
64 struct ftrace_graph_ent graph_ent;
65};
66
67/* Function return entry */
68struct ftrace_graph_ret_entry {
69 struct trace_entry ent;
70 struct ftrace_graph_ret ret;
71};
51extern struct tracer boot_tracer; 72extern struct tracer boot_tracer;
52 73
53/* 74/*
@@ -85,12 +106,18 @@ struct stack_entry {
85 unsigned long caller[FTRACE_STACK_ENTRIES]; 106 unsigned long caller[FTRACE_STACK_ENTRIES];
86}; 107};
87 108
109struct userstack_entry {
110 struct trace_entry ent;
111 unsigned long caller[FTRACE_STACK_ENTRIES];
112};
113
88/* 114/*
89 * ftrace_printk entry: 115 * ftrace_printk entry:
90 */ 116 */
91struct print_entry { 117struct print_entry {
92 struct trace_entry ent; 118 struct trace_entry ent;
93 unsigned long ip; 119 unsigned long ip;
120 int depth;
94 char buf[]; 121 char buf[];
95}; 122};
96 123
@@ -112,9 +139,35 @@ struct trace_mmiotrace_map {
112 struct mmiotrace_map map; 139 struct mmiotrace_map map;
113}; 140};
114 141
115struct trace_boot { 142struct trace_boot_call {
116 struct trace_entry ent; 143 struct trace_entry ent;
117 struct boot_trace initcall; 144 struct boot_trace_call boot_call;
145};
146
147struct trace_boot_ret {
148 struct trace_entry ent;
149 struct boot_trace_ret boot_ret;
150};
151
152#define TRACE_FUNC_SIZE 30
153#define TRACE_FILE_SIZE 20
154struct trace_branch {
155 struct trace_entry ent;
156 unsigned line;
157 char func[TRACE_FUNC_SIZE+1];
158 char file[TRACE_FILE_SIZE+1];
159 char correct;
160};
161
162struct hw_branch_entry {
163 struct trace_entry ent;
164 u64 from;
165 u64 to;
166};
167
168struct trace_power {
169 struct trace_entry ent;
170 struct power_trace state_data;
118}; 171};
119 172
120/* 173/*
@@ -172,7 +225,6 @@ struct trace_iterator;
172struct trace_array { 225struct trace_array {
173 struct ring_buffer *buffer; 226 struct ring_buffer *buffer;
174 unsigned long entries; 227 unsigned long entries;
175 long ctrl;
176 int cpu; 228 int cpu;
177 cycle_t time_start; 229 cycle_t time_start;
178 struct task_struct *waiter; 230 struct task_struct *waiter;
@@ -212,13 +264,22 @@ extern void __ftrace_bad_type(void);
212 IF_ASSIGN(var, ent, struct ctx_switch_entry, 0); \ 264 IF_ASSIGN(var, ent, struct ctx_switch_entry, 0); \
213 IF_ASSIGN(var, ent, struct trace_field_cont, TRACE_CONT); \ 265 IF_ASSIGN(var, ent, struct trace_field_cont, TRACE_CONT); \
214 IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK); \ 266 IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK); \
267 IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\
215 IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \ 268 IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \
216 IF_ASSIGN(var, ent, struct special_entry, 0); \ 269 IF_ASSIGN(var, ent, struct special_entry, 0); \
217 IF_ASSIGN(var, ent, struct trace_mmiotrace_rw, \ 270 IF_ASSIGN(var, ent, struct trace_mmiotrace_rw, \
218 TRACE_MMIO_RW); \ 271 TRACE_MMIO_RW); \
219 IF_ASSIGN(var, ent, struct trace_mmiotrace_map, \ 272 IF_ASSIGN(var, ent, struct trace_mmiotrace_map, \
220 TRACE_MMIO_MAP); \ 273 TRACE_MMIO_MAP); \
221 IF_ASSIGN(var, ent, struct trace_boot, TRACE_BOOT); \ 274 IF_ASSIGN(var, ent, struct trace_boot_call, TRACE_BOOT_CALL);\
275 IF_ASSIGN(var, ent, struct trace_boot_ret, TRACE_BOOT_RET);\
276 IF_ASSIGN(var, ent, struct trace_branch, TRACE_BRANCH); \
277 IF_ASSIGN(var, ent, struct ftrace_graph_ent_entry, \
278 TRACE_GRAPH_ENT); \
279 IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry, \
280 TRACE_GRAPH_RET); \
281 IF_ASSIGN(var, ent, struct hw_branch_entry, TRACE_HW_BRANCHES);\
282 IF_ASSIGN(var, ent, struct trace_power, TRACE_POWER); \
222 __ftrace_bad_type(); \ 283 __ftrace_bad_type(); \
223 } while (0) 284 } while (0)
224 285
@@ -229,29 +290,56 @@ enum print_line_t {
229 TRACE_TYPE_UNHANDLED = 2 /* Relay to other output functions */ 290 TRACE_TYPE_UNHANDLED = 2 /* Relay to other output functions */
230}; 291};
231 292
293
294/*
295 * An option specific to a tracer. This is a boolean value.
296 * The bit is the bit index that sets its value on the
297 * flags value in struct tracer_flags.
298 */
299struct tracer_opt {
300 const char *name; /* Will appear on the trace_options file */
301 u32 bit; /* Mask assigned in val field in tracer_flags */
302};
303
304/*
305 * The set of specific options for a tracer. Your tracer
306 * have to set the initial value of the flags val.
307 */
308struct tracer_flags {
309 u32 val;
310 struct tracer_opt *opts;
311};
312
313/* Makes more easy to define a tracer opt */
314#define TRACER_OPT(s, b) .name = #s, .bit = b
315
232/* 316/*
233 * A specific tracer, represented by methods that operate on a trace array: 317 * A specific tracer, represented by methods that operate on a trace array:
234 */ 318 */
235struct tracer { 319struct tracer {
236 const char *name; 320 const char *name;
237 void (*init)(struct trace_array *tr); 321 /* Your tracer should raise a warning if init fails */
322 int (*init)(struct trace_array *tr);
238 void (*reset)(struct trace_array *tr); 323 void (*reset)(struct trace_array *tr);
324 void (*start)(struct trace_array *tr);
325 void (*stop)(struct trace_array *tr);
239 void (*open)(struct trace_iterator *iter); 326 void (*open)(struct trace_iterator *iter);
240 void (*pipe_open)(struct trace_iterator *iter); 327 void (*pipe_open)(struct trace_iterator *iter);
241 void (*close)(struct trace_iterator *iter); 328 void (*close)(struct trace_iterator *iter);
242 void (*start)(struct trace_iterator *iter);
243 void (*stop)(struct trace_iterator *iter);
244 ssize_t (*read)(struct trace_iterator *iter, 329 ssize_t (*read)(struct trace_iterator *iter,
245 struct file *filp, char __user *ubuf, 330 struct file *filp, char __user *ubuf,
246 size_t cnt, loff_t *ppos); 331 size_t cnt, loff_t *ppos);
247 void (*ctrl_update)(struct trace_array *tr);
248#ifdef CONFIG_FTRACE_STARTUP_TEST 332#ifdef CONFIG_FTRACE_STARTUP_TEST
249 int (*selftest)(struct tracer *trace, 333 int (*selftest)(struct tracer *trace,
250 struct trace_array *tr); 334 struct trace_array *tr);
251#endif 335#endif
336 void (*print_header)(struct seq_file *m);
252 enum print_line_t (*print_line)(struct trace_iterator *iter); 337 enum print_line_t (*print_line)(struct trace_iterator *iter);
338 /* If you handled the flag setting, return 0 */
339 int (*set_flag)(u32 old_flags, u32 bit, int set);
253 struct tracer *next; 340 struct tracer *next;
254 int print_max; 341 int print_max;
342 struct tracer_flags *flags;
255}; 343};
256 344
257struct trace_seq { 345struct trace_seq {
@@ -279,10 +367,14 @@ struct trace_iterator {
279 unsigned long iter_flags; 367 unsigned long iter_flags;
280 loff_t pos; 368 loff_t pos;
281 long idx; 369 long idx;
370
371 cpumask_t started;
282}; 372};
283 373
374int tracing_is_enabled(void);
284void trace_wake_up(void); 375void trace_wake_up(void);
285void tracing_reset(struct trace_array *tr, int cpu); 376void tracing_reset(struct trace_array *tr, int cpu);
377void tracing_reset_online_cpus(struct trace_array *tr);
286int tracing_open_generic(struct inode *inode, struct file *filp); 378int tracing_open_generic(struct inode *inode, struct file *filp);
287struct dentry *tracing_init_dentry(void); 379struct dentry *tracing_init_dentry(void);
288void init_tracer_sysprof_debugfs(struct dentry *d_tracer); 380void init_tracer_sysprof_debugfs(struct dentry *d_tracer);
@@ -321,8 +413,15 @@ void trace_function(struct trace_array *tr,
321 unsigned long parent_ip, 413 unsigned long parent_ip,
322 unsigned long flags, int pc); 414 unsigned long flags, int pc);
323 415
416void trace_graph_return(struct ftrace_graph_ret *trace);
417int trace_graph_entry(struct ftrace_graph_ent *trace);
418void trace_hw_branch(struct trace_array *tr, u64 from, u64 to);
419
324void tracing_start_cmdline_record(void); 420void tracing_start_cmdline_record(void);
325void tracing_stop_cmdline_record(void); 421void tracing_stop_cmdline_record(void);
422void tracing_sched_switch_assign_trace(struct trace_array *tr);
423void tracing_stop_sched_switch_record(void);
424void tracing_start_sched_switch_record(void);
326int register_tracer(struct tracer *type); 425int register_tracer(struct tracer *type);
327void unregister_tracer(struct tracer *type); 426void unregister_tracer(struct tracer *type);
328 427
@@ -358,6 +457,7 @@ struct tracer_switch_ops {
358 struct tracer_switch_ops *next; 457 struct tracer_switch_ops *next;
359}; 458};
360 459
460char *trace_find_cmdline(int pid);
361#endif /* CONFIG_CONTEXT_SWITCH_TRACER */ 461#endif /* CONFIG_CONTEXT_SWITCH_TRACER */
362 462
363#ifdef CONFIG_DYNAMIC_FTRACE 463#ifdef CONFIG_DYNAMIC_FTRACE
@@ -383,19 +483,79 @@ extern int trace_selftest_startup_sched_switch(struct tracer *trace,
383 struct trace_array *tr); 483 struct trace_array *tr);
384extern int trace_selftest_startup_sysprof(struct tracer *trace, 484extern int trace_selftest_startup_sysprof(struct tracer *trace,
385 struct trace_array *tr); 485 struct trace_array *tr);
486extern int trace_selftest_startup_branch(struct tracer *trace,
487 struct trace_array *tr);
386#endif /* CONFIG_FTRACE_STARTUP_TEST */ 488#endif /* CONFIG_FTRACE_STARTUP_TEST */
387 489
388extern void *head_page(struct trace_array_cpu *data); 490extern void *head_page(struct trace_array_cpu *data);
389extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...); 491extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...);
390extern void trace_seq_print_cont(struct trace_seq *s, 492extern void trace_seq_print_cont(struct trace_seq *s,
391 struct trace_iterator *iter); 493 struct trace_iterator *iter);
494
495extern int
496seq_print_ip_sym(struct trace_seq *s, unsigned long ip,
497 unsigned long sym_flags);
392extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, 498extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
393 size_t cnt); 499 size_t cnt);
394extern long ns2usecs(cycle_t nsec); 500extern long ns2usecs(cycle_t nsec);
395extern int trace_vprintk(unsigned long ip, const char *fmt, va_list args); 501extern int
502trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args);
396 503
397extern unsigned long trace_flags; 504extern unsigned long trace_flags;
398 505
506/* Standard output formatting function used for function return traces */
507#ifdef CONFIG_FUNCTION_GRAPH_TRACER
508extern enum print_line_t print_graph_function(struct trace_iterator *iter);
509
510#ifdef CONFIG_DYNAMIC_FTRACE
511/* TODO: make this variable */
512#define FTRACE_GRAPH_MAX_FUNCS 32
513extern int ftrace_graph_count;
514extern unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS];
515
516static inline int ftrace_graph_addr(unsigned long addr)
517{
518 int i;
519
520 if (!ftrace_graph_count || test_tsk_trace_graph(current))
521 return 1;
522
523 for (i = 0; i < ftrace_graph_count; i++) {
524 if (addr == ftrace_graph_funcs[i])
525 return 1;
526 }
527
528 return 0;
529}
530#else
531static inline int ftrace_trace_addr(unsigned long addr)
532{
533 return 1;
534}
535static inline int ftrace_graph_addr(unsigned long addr)
536{
537 return 1;
538}
539#endif /* CONFIG_DYNAMIC_FTRACE */
540
541#else /* CONFIG_FUNCTION_GRAPH_TRACER */
542static inline enum print_line_t
543print_graph_function(struct trace_iterator *iter)
544{
545 return TRACE_TYPE_UNHANDLED;
546}
547#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
548
549extern struct pid *ftrace_pid_trace;
550
551static inline int ftrace_trace_task(struct task_struct *task)
552{
553 if (!ftrace_pid_trace)
554 return 1;
555
556 return test_tsk_trace_trace(task);
557}
558
399/* 559/*
400 * trace_iterator_flags is an enumeration that defines bit 560 * trace_iterator_flags is an enumeration that defines bit
401 * positions into trace_flags that controls the output. 561 * positions into trace_flags that controls the output.
@@ -415,8 +575,93 @@ enum trace_iterator_flags {
415 TRACE_ITER_STACKTRACE = 0x100, 575 TRACE_ITER_STACKTRACE = 0x100,
416 TRACE_ITER_SCHED_TREE = 0x200, 576 TRACE_ITER_SCHED_TREE = 0x200,
417 TRACE_ITER_PRINTK = 0x400, 577 TRACE_ITER_PRINTK = 0x400,
578 TRACE_ITER_PREEMPTONLY = 0x800,
579 TRACE_ITER_BRANCH = 0x1000,
580 TRACE_ITER_ANNOTATE = 0x2000,
581 TRACE_ITER_USERSTACKTRACE = 0x4000,
582 TRACE_ITER_SYM_USEROBJ = 0x8000,
583 TRACE_ITER_PRINTK_MSGONLY = 0x10000
418}; 584};
419 585
586/*
587 * TRACE_ITER_SYM_MASK masks the options in trace_flags that
588 * control the output of kernel symbols.
589 */
590#define TRACE_ITER_SYM_MASK \
591 (TRACE_ITER_PRINT_PARENT|TRACE_ITER_SYM_OFFSET|TRACE_ITER_SYM_ADDR)
592
420extern struct tracer nop_trace; 593extern struct tracer nop_trace;
421 594
595/**
596 * ftrace_preempt_disable - disable preemption scheduler safe
597 *
598 * When tracing can happen inside the scheduler, there exists
599 * cases that the tracing might happen before the need_resched
600 * flag is checked. If this happens and the tracer calls
601 * preempt_enable (after a disable), a schedule might take place
602 * causing an infinite recursion.
603 *
604 * To prevent this, we read the need_recshed flag before
605 * disabling preemption. When we want to enable preemption we
606 * check the flag, if it is set, then we call preempt_enable_no_resched.
607 * Otherwise, we call preempt_enable.
608 *
609 * The rational for doing the above is that if need resched is set
610 * and we have yet to reschedule, we are either in an atomic location
611 * (where we do not need to check for scheduling) or we are inside
612 * the scheduler and do not want to resched.
613 */
614static inline int ftrace_preempt_disable(void)
615{
616 int resched;
617
618 resched = need_resched();
619 preempt_disable_notrace();
620
621 return resched;
622}
623
624/**
625 * ftrace_preempt_enable - enable preemption scheduler safe
626 * @resched: the return value from ftrace_preempt_disable
627 *
628 * This is a scheduler safe way to enable preemption and not miss
629 * any preemption checks. The disabled saved the state of preemption.
630 * If resched is set, then we were either inside an atomic or
631 * are inside the scheduler (we would have already scheduled
632 * otherwise). In this case, we do not want to call normal
633 * preempt_enable, but preempt_enable_no_resched instead.
634 */
635static inline void ftrace_preempt_enable(int resched)
636{
637 if (resched)
638 preempt_enable_no_resched_notrace();
639 else
640 preempt_enable_notrace();
641}
642
643#ifdef CONFIG_BRANCH_TRACER
644extern int enable_branch_tracing(struct trace_array *tr);
645extern void disable_branch_tracing(void);
646static inline int trace_branch_enable(struct trace_array *tr)
647{
648 if (trace_flags & TRACE_ITER_BRANCH)
649 return enable_branch_tracing(tr);
650 return 0;
651}
652static inline void trace_branch_disable(void)
653{
654 /* due to races, always disable */
655 disable_branch_tracing();
656}
657#else
658static inline int trace_branch_enable(struct trace_array *tr)
659{
660 return 0;
661}
662static inline void trace_branch_disable(void)
663{
664}
665#endif /* CONFIG_BRANCH_TRACER */
666
422#endif /* _LINUX_KERNEL_TRACE_H */ 667#endif /* _LINUX_KERNEL_TRACE_H */
diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
index d0a5e50eeff2..3ccebde28482 100644
--- a/kernel/trace/trace_boot.c
+++ b/kernel/trace/trace_boot.c
@@ -13,101 +13,161 @@
13#include "trace.h" 13#include "trace.h"
14 14
15static struct trace_array *boot_trace; 15static struct trace_array *boot_trace;
16static int trace_boot_enabled; 16static bool pre_initcalls_finished;
17 17
18 18/* Tells the boot tracer that the pre_smp_initcalls are finished.
19/* Should be started after do_pre_smp_initcalls() in init/main.c */ 19 * So we are ready .
20 * It doesn't enable sched events tracing however.
21 * You have to call enable_boot_trace to do so.
22 */
20void start_boot_trace(void) 23void start_boot_trace(void)
21{ 24{
22 trace_boot_enabled = 1; 25 pre_initcalls_finished = true;
23} 26}
24 27
25void stop_boot_trace(void) 28void enable_boot_trace(void)
26{ 29{
27 trace_boot_enabled = 0; 30 if (pre_initcalls_finished)
31 tracing_start_sched_switch_record();
28} 32}
29 33
30void reset_boot_trace(struct trace_array *tr) 34void disable_boot_trace(void)
31{ 35{
32 stop_boot_trace(); 36 if (pre_initcalls_finished)
37 tracing_stop_sched_switch_record();
33} 38}
34 39
35static void boot_trace_init(struct trace_array *tr) 40static int boot_trace_init(struct trace_array *tr)
36{ 41{
37 int cpu; 42 int cpu;
38 boot_trace = tr; 43 boot_trace = tr;
39 44
40 trace_boot_enabled = 0;
41
42 for_each_cpu_mask(cpu, cpu_possible_map) 45 for_each_cpu_mask(cpu, cpu_possible_map)
43 tracing_reset(tr, cpu); 46 tracing_reset(tr, cpu);
47
48 tracing_sched_switch_assign_trace(tr);
49 return 0;
44} 50}
45 51
46static void boot_trace_ctrl_update(struct trace_array *tr) 52static enum print_line_t
53initcall_call_print_line(struct trace_iterator *iter)
47{ 54{
48 if (tr->ctrl) 55 struct trace_entry *entry = iter->ent;
49 start_boot_trace(); 56 struct trace_seq *s = &iter->seq;
57 struct trace_boot_call *field;
58 struct boot_trace_call *call;
59 u64 ts;
60 unsigned long nsec_rem;
61 int ret;
62
63 trace_assign_type(field, entry);
64 call = &field->boot_call;
65 ts = iter->ts;
66 nsec_rem = do_div(ts, 1000000000);
67
68 ret = trace_seq_printf(s, "[%5ld.%09ld] calling %s @ %i\n",
69 (unsigned long)ts, nsec_rem, call->func, call->caller);
70
71 if (!ret)
72 return TRACE_TYPE_PARTIAL_LINE;
50 else 73 else
51 stop_boot_trace(); 74 return TRACE_TYPE_HANDLED;
52} 75}
53 76
54static enum print_line_t initcall_print_line(struct trace_iterator *iter) 77static enum print_line_t
78initcall_ret_print_line(struct trace_iterator *iter)
55{ 79{
56 int ret;
57 struct trace_entry *entry = iter->ent; 80 struct trace_entry *entry = iter->ent;
58 struct trace_boot *field = (struct trace_boot *)entry;
59 struct boot_trace *it = &field->initcall;
60 struct trace_seq *s = &iter->seq; 81 struct trace_seq *s = &iter->seq;
61 struct timespec calltime = ktime_to_timespec(it->calltime); 82 struct trace_boot_ret *field;
62 struct timespec rettime = ktime_to_timespec(it->rettime); 83 struct boot_trace_ret *init_ret;
63 84 u64 ts;
64 if (entry->type == TRACE_BOOT) { 85 unsigned long nsec_rem;
65 ret = trace_seq_printf(s, "[%5ld.%09ld] calling %s @ %i\n", 86 int ret;
66 calltime.tv_sec, 87
67 calltime.tv_nsec, 88 trace_assign_type(field, entry);
68 it->func, it->caller); 89 init_ret = &field->boot_ret;
69 if (!ret) 90 ts = iter->ts;
70 return TRACE_TYPE_PARTIAL_LINE; 91 nsec_rem = do_div(ts, 1000000000);
71 92
72 ret = trace_seq_printf(s, "[%5ld.%09ld] initcall %s " 93 ret = trace_seq_printf(s, "[%5ld.%09ld] initcall %s "
73 "returned %d after %lld msecs\n", 94 "returned %d after %llu msecs\n",
74 rettime.tv_sec, 95 (unsigned long) ts,
75 rettime.tv_nsec, 96 nsec_rem,
76 it->func, it->result, it->duration); 97 init_ret->func, init_ret->result, init_ret->duration);
77 98
78 if (!ret) 99 if (!ret)
79 return TRACE_TYPE_PARTIAL_LINE; 100 return TRACE_TYPE_PARTIAL_LINE;
101 else
80 return TRACE_TYPE_HANDLED; 102 return TRACE_TYPE_HANDLED;
103}
104
105static enum print_line_t initcall_print_line(struct trace_iterator *iter)
106{
107 struct trace_entry *entry = iter->ent;
108
109 switch (entry->type) {
110 case TRACE_BOOT_CALL:
111 return initcall_call_print_line(iter);
112 case TRACE_BOOT_RET:
113 return initcall_ret_print_line(iter);
114 default:
115 return TRACE_TYPE_UNHANDLED;
81 } 116 }
82 return TRACE_TYPE_UNHANDLED;
83} 117}
84 118
85struct tracer boot_tracer __read_mostly = 119struct tracer boot_tracer __read_mostly =
86{ 120{
87 .name = "initcall", 121 .name = "initcall",
88 .init = boot_trace_init, 122 .init = boot_trace_init,
89 .reset = reset_boot_trace, 123 .reset = tracing_reset_online_cpus,
90 .ctrl_update = boot_trace_ctrl_update,
91 .print_line = initcall_print_line, 124 .print_line = initcall_print_line,
92}; 125};
93 126
94void trace_boot(struct boot_trace *it, initcall_t fn) 127void trace_boot_call(struct boot_trace_call *bt, initcall_t fn)
95{ 128{
96 struct ring_buffer_event *event; 129 struct ring_buffer_event *event;
97 struct trace_boot *entry; 130 struct trace_boot_call *entry;
98 struct trace_array_cpu *data;
99 unsigned long irq_flags; 131 unsigned long irq_flags;
100 struct trace_array *tr = boot_trace; 132 struct trace_array *tr = boot_trace;
101 133
102 if (!trace_boot_enabled) 134 if (!pre_initcalls_finished)
103 return; 135 return;
104 136
105 /* Get its name now since this function could 137 /* Get its name now since this function could
106 * disappear because it is in the .init section. 138 * disappear because it is in the .init section.
107 */ 139 */
108 sprint_symbol(it->func, (unsigned long)fn); 140 sprint_symbol(bt->func, (unsigned long)fn);
141 preempt_disable();
142
143 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
144 &irq_flags);
145 if (!event)
146 goto out;
147 entry = ring_buffer_event_data(event);
148 tracing_generic_entry_update(&entry->ent, 0, 0);
149 entry->ent.type = TRACE_BOOT_CALL;
150 entry->boot_call = *bt;
151 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
152
153 trace_wake_up();
154
155 out:
156 preempt_enable();
157}
158
159void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn)
160{
161 struct ring_buffer_event *event;
162 struct trace_boot_ret *entry;
163 unsigned long irq_flags;
164 struct trace_array *tr = boot_trace;
165
166 if (!pre_initcalls_finished)
167 return;
168
169 sprint_symbol(bt->func, (unsigned long)fn);
109 preempt_disable(); 170 preempt_disable();
110 data = tr->data[smp_processor_id()];
111 171
112 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), 172 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
113 &irq_flags); 173 &irq_flags);
@@ -115,8 +175,8 @@ void trace_boot(struct boot_trace *it, initcall_t fn)
115 goto out; 175 goto out;
116 entry = ring_buffer_event_data(event); 176 entry = ring_buffer_event_data(event);
117 tracing_generic_entry_update(&entry->ent, 0, 0); 177 tracing_generic_entry_update(&entry->ent, 0, 0);
118 entry->ent.type = TRACE_BOOT; 178 entry->ent.type = TRACE_BOOT_RET;
119 entry->initcall = *it; 179 entry->boot_ret = *bt;
120 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 180 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
121 181
122 trace_wake_up(); 182 trace_wake_up();
diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c
new file mode 100644
index 000000000000..6c00feb3bac7
--- /dev/null
+++ b/kernel/trace/trace_branch.c
@@ -0,0 +1,342 @@
1/*
2 * unlikely profiler
3 *
4 * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
5 */
6#include <linux/kallsyms.h>
7#include <linux/seq_file.h>
8#include <linux/spinlock.h>
9#include <linux/irqflags.h>
10#include <linux/debugfs.h>
11#include <linux/uaccess.h>
12#include <linux/module.h>
13#include <linux/ftrace.h>
14#include <linux/hash.h>
15#include <linux/fs.h>
16#include <asm/local.h>
17#include "trace.h"
18
19#ifdef CONFIG_BRANCH_TRACER
20
21static int branch_tracing_enabled __read_mostly;
22static DEFINE_MUTEX(branch_tracing_mutex);
23static struct trace_array *branch_tracer;
24
25static void
26probe_likely_condition(struct ftrace_branch_data *f, int val, int expect)
27{
28 struct trace_array *tr = branch_tracer;
29 struct ring_buffer_event *event;
30 struct trace_branch *entry;
31 unsigned long flags, irq_flags;
32 int cpu, pc;
33 const char *p;
34
35 /*
36 * I would love to save just the ftrace_likely_data pointer, but
37 * this code can also be used by modules. Ugly things can happen
38 * if the module is unloaded, and then we go and read the
39 * pointer. This is slower, but much safer.
40 */
41
42 if (unlikely(!tr))
43 return;
44
45 local_irq_save(flags);
46 cpu = raw_smp_processor_id();
47 if (atomic_inc_return(&tr->data[cpu]->disabled) != 1)
48 goto out;
49
50 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
51 &irq_flags);
52 if (!event)
53 goto out;
54
55 pc = preempt_count();
56 entry = ring_buffer_event_data(event);
57 tracing_generic_entry_update(&entry->ent, flags, pc);
58 entry->ent.type = TRACE_BRANCH;
59
60 /* Strip off the path, only save the file */
61 p = f->file + strlen(f->file);
62 while (p >= f->file && *p != '/')
63 p--;
64 p++;
65
66 strncpy(entry->func, f->func, TRACE_FUNC_SIZE);
67 strncpy(entry->file, p, TRACE_FILE_SIZE);
68 entry->func[TRACE_FUNC_SIZE] = 0;
69 entry->file[TRACE_FILE_SIZE] = 0;
70 entry->line = f->line;
71 entry->correct = val == expect;
72
73 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
74
75 out:
76 atomic_dec(&tr->data[cpu]->disabled);
77 local_irq_restore(flags);
78}
79
80static inline
81void trace_likely_condition(struct ftrace_branch_data *f, int val, int expect)
82{
83 if (!branch_tracing_enabled)
84 return;
85
86 probe_likely_condition(f, val, expect);
87}
88
89int enable_branch_tracing(struct trace_array *tr)
90{
91 int ret = 0;
92
93 mutex_lock(&branch_tracing_mutex);
94 branch_tracer = tr;
95 /*
96 * Must be seen before enabling. The reader is a condition
97 * where we do not need a matching rmb()
98 */
99 smp_wmb();
100 branch_tracing_enabled++;
101 mutex_unlock(&branch_tracing_mutex);
102
103 return ret;
104}
105
106void disable_branch_tracing(void)
107{
108 mutex_lock(&branch_tracing_mutex);
109
110 if (!branch_tracing_enabled)
111 goto out_unlock;
112
113 branch_tracing_enabled--;
114
115 out_unlock:
116 mutex_unlock(&branch_tracing_mutex);
117}
118
119static void start_branch_trace(struct trace_array *tr)
120{
121 enable_branch_tracing(tr);
122}
123
124static void stop_branch_trace(struct trace_array *tr)
125{
126 disable_branch_tracing();
127}
128
129static int branch_trace_init(struct trace_array *tr)
130{
131 int cpu;
132
133 for_each_online_cpu(cpu)
134 tracing_reset(tr, cpu);
135
136 start_branch_trace(tr);
137 return 0;
138}
139
140static void branch_trace_reset(struct trace_array *tr)
141{
142 stop_branch_trace(tr);
143}
144
145struct tracer branch_trace __read_mostly =
146{
147 .name = "branch",
148 .init = branch_trace_init,
149 .reset = branch_trace_reset,
150#ifdef CONFIG_FTRACE_SELFTEST
151 .selftest = trace_selftest_startup_branch,
152#endif
153};
154
155__init static int init_branch_trace(void)
156{
157 return register_tracer(&branch_trace);
158}
159
160device_initcall(init_branch_trace);
161#else
162static inline
163void trace_likely_condition(struct ftrace_branch_data *f, int val, int expect)
164{
165}
166#endif /* CONFIG_BRANCH_TRACER */
167
168void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect)
169{
170 /*
171 * I would love to have a trace point here instead, but the
172 * trace point code is so inundated with unlikely and likely
173 * conditions that the recursive nightmare that exists is too
174 * much to try to get working. At least for now.
175 */
176 trace_likely_condition(f, val, expect);
177
178 /* FIXME: Make this atomic! */
179 if (val == expect)
180 f->correct++;
181 else
182 f->incorrect++;
183}
184EXPORT_SYMBOL(ftrace_likely_update);
185
186struct ftrace_pointer {
187 void *start;
188 void *stop;
189 int hit;
190};
191
192static void *
193t_next(struct seq_file *m, void *v, loff_t *pos)
194{
195 const struct ftrace_pointer *f = m->private;
196 struct ftrace_branch_data *p = v;
197
198 (*pos)++;
199
200 if (v == (void *)1)
201 return f->start;
202
203 ++p;
204
205 if ((void *)p >= (void *)f->stop)
206 return NULL;
207
208 return p;
209}
210
211static void *t_start(struct seq_file *m, loff_t *pos)
212{
213 void *t = (void *)1;
214 loff_t l = 0;
215
216 for (; t && l < *pos; t = t_next(m, t, &l))
217 ;
218
219 return t;
220}
221
222static void t_stop(struct seq_file *m, void *p)
223{
224}
225
226static int t_show(struct seq_file *m, void *v)
227{
228 const struct ftrace_pointer *fp = m->private;
229 struct ftrace_branch_data *p = v;
230 const char *f;
231 long percent;
232
233 if (v == (void *)1) {
234 if (fp->hit)
235 seq_printf(m, " miss hit %% ");
236 else
237 seq_printf(m, " correct incorrect %% ");
238 seq_printf(m, " Function "
239 " File Line\n"
240 " ------- --------- - "
241 " -------- "
242 " ---- ----\n");
243 return 0;
244 }
245
246 /* Only print the file, not the path */
247 f = p->file + strlen(p->file);
248 while (f >= p->file && *f != '/')
249 f--;
250 f++;
251
252 /*
253 * The miss is overlayed on correct, and hit on incorrect.
254 */
255 if (p->correct) {
256 percent = p->incorrect * 100;
257 percent /= p->correct + p->incorrect;
258 } else
259 percent = p->incorrect ? 100 : -1;
260
261 seq_printf(m, "%8lu %8lu ", p->correct, p->incorrect);
262 if (percent < 0)
263 seq_printf(m, " X ");
264 else
265 seq_printf(m, "%3ld ", percent);
266 seq_printf(m, "%-30.30s %-20.20s %d\n", p->func, f, p->line);
267 return 0;
268}
269
270static struct seq_operations tracing_likely_seq_ops = {
271 .start = t_start,
272 .next = t_next,
273 .stop = t_stop,
274 .show = t_show,
275};
276
277static int tracing_branch_open(struct inode *inode, struct file *file)
278{
279 int ret;
280
281 ret = seq_open(file, &tracing_likely_seq_ops);
282 if (!ret) {
283 struct seq_file *m = file->private_data;
284 m->private = (void *)inode->i_private;
285 }
286
287 return ret;
288}
289
290static const struct file_operations tracing_branch_fops = {
291 .open = tracing_branch_open,
292 .read = seq_read,
293 .llseek = seq_lseek,
294};
295
296#ifdef CONFIG_PROFILE_ALL_BRANCHES
297extern unsigned long __start_branch_profile[];
298extern unsigned long __stop_branch_profile[];
299
300static const struct ftrace_pointer ftrace_branch_pos = {
301 .start = __start_branch_profile,
302 .stop = __stop_branch_profile,
303 .hit = 1,
304};
305
306#endif /* CONFIG_PROFILE_ALL_BRANCHES */
307
308extern unsigned long __start_annotated_branch_profile[];
309extern unsigned long __stop_annotated_branch_profile[];
310
311static const struct ftrace_pointer ftrace_annotated_branch_pos = {
312 .start = __start_annotated_branch_profile,
313 .stop = __stop_annotated_branch_profile,
314};
315
316static __init int ftrace_branch_init(void)
317{
318 struct dentry *d_tracer;
319 struct dentry *entry;
320
321 d_tracer = tracing_init_dentry();
322
323 entry = debugfs_create_file("profile_annotated_branch", 0444, d_tracer,
324 (void *)&ftrace_annotated_branch_pos,
325 &tracing_branch_fops);
326 if (!entry)
327 pr_warning("Could not create debugfs "
328 "'profile_annotatet_branch' entry\n");
329
330#ifdef CONFIG_PROFILE_ALL_BRANCHES
331 entry = debugfs_create_file("profile_branch", 0444, d_tracer,
332 (void *)&ftrace_branch_pos,
333 &tracing_branch_fops);
334 if (!entry)
335 pr_warning("Could not create debugfs"
336 " 'profile_branch' entry\n");
337#endif
338
339 return 0;
340}
341
342device_initcall(ftrace_branch_init);
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 0f85a64003d3..9236d7e25a16 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -16,20 +16,10 @@
16 16
17#include "trace.h" 17#include "trace.h"
18 18
19static void function_reset(struct trace_array *tr)
20{
21 int cpu;
22
23 tr->time_start = ftrace_now(tr->cpu);
24
25 for_each_online_cpu(cpu)
26 tracing_reset(tr, cpu);
27}
28
29static void start_function_trace(struct trace_array *tr) 19static void start_function_trace(struct trace_array *tr)
30{ 20{
31 tr->cpu = get_cpu(); 21 tr->cpu = get_cpu();
32 function_reset(tr); 22 tracing_reset_online_cpus(tr);
33 put_cpu(); 23 put_cpu();
34 24
35 tracing_start_cmdline_record(); 25 tracing_start_cmdline_record();
@@ -42,24 +32,20 @@ static void stop_function_trace(struct trace_array *tr)
42 tracing_stop_cmdline_record(); 32 tracing_stop_cmdline_record();
43} 33}
44 34
45static void function_trace_init(struct trace_array *tr) 35static int function_trace_init(struct trace_array *tr)
46{ 36{
47 if (tr->ctrl) 37 start_function_trace(tr);
48 start_function_trace(tr); 38 return 0;
49} 39}
50 40
51static void function_trace_reset(struct trace_array *tr) 41static void function_trace_reset(struct trace_array *tr)
52{ 42{
53 if (tr->ctrl) 43 stop_function_trace(tr);
54 stop_function_trace(tr);
55} 44}
56 45
57static void function_trace_ctrl_update(struct trace_array *tr) 46static void function_trace_start(struct trace_array *tr)
58{ 47{
59 if (tr->ctrl) 48 tracing_reset_online_cpus(tr);
60 start_function_trace(tr);
61 else
62 stop_function_trace(tr);
63} 49}
64 50
65static struct tracer function_trace __read_mostly = 51static struct tracer function_trace __read_mostly =
@@ -67,7 +53,7 @@ static struct tracer function_trace __read_mostly =
67 .name = "function", 53 .name = "function",
68 .init = function_trace_init, 54 .init = function_trace_init,
69 .reset = function_trace_reset, 55 .reset = function_trace_reset,
70 .ctrl_update = function_trace_ctrl_update, 56 .start = function_trace_start,
71#ifdef CONFIG_FTRACE_SELFTEST 57#ifdef CONFIG_FTRACE_SELFTEST
72 .selftest = trace_selftest_startup_function, 58 .selftest = trace_selftest_startup_function,
73#endif 59#endif
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
new file mode 100644
index 000000000000..4bf39fcae97a
--- /dev/null
+++ b/kernel/trace/trace_functions_graph.c
@@ -0,0 +1,669 @@
1/*
2 *
3 * Function graph tracer.
4 * Copyright (c) 2008 Frederic Weisbecker <fweisbec@gmail.com>
5 * Mostly borrowed from function tracer which
6 * is Copyright (c) Steven Rostedt <srostedt@redhat.com>
7 *
8 */
9#include <linux/debugfs.h>
10#include <linux/uaccess.h>
11#include <linux/ftrace.h>
12#include <linux/fs.h>
13
14#include "trace.h"
15
16#define TRACE_GRAPH_INDENT 2
17
18/* Flag options */
19#define TRACE_GRAPH_PRINT_OVERRUN 0x1
20#define TRACE_GRAPH_PRINT_CPU 0x2
21#define TRACE_GRAPH_PRINT_OVERHEAD 0x4
22#define TRACE_GRAPH_PRINT_PROC 0x8
23
24static struct tracer_opt trace_opts[] = {
25 /* Display overruns ? */
26 { TRACER_OPT(funcgraph-overrun, TRACE_GRAPH_PRINT_OVERRUN) },
27 /* Display CPU ? */
28 { TRACER_OPT(funcgraph-cpu, TRACE_GRAPH_PRINT_CPU) },
29 /* Display Overhead ? */
30 { TRACER_OPT(funcgraph-overhead, TRACE_GRAPH_PRINT_OVERHEAD) },
31 /* Display proc name/pid */
32 { TRACER_OPT(funcgraph-proc, TRACE_GRAPH_PRINT_PROC) },
33 { } /* Empty entry */
34};
35
36static struct tracer_flags tracer_flags = {
37 /* Don't display overruns and proc by default */
38 .val = TRACE_GRAPH_PRINT_CPU | TRACE_GRAPH_PRINT_OVERHEAD,
39 .opts = trace_opts
40};
41
42/* pid on the last trace processed */
43static pid_t last_pid[NR_CPUS] = { [0 ... NR_CPUS-1] = -1 };
44
45static int graph_trace_init(struct trace_array *tr)
46{
47 int cpu, ret;
48
49 for_each_online_cpu(cpu)
50 tracing_reset(tr, cpu);
51
52 ret = register_ftrace_graph(&trace_graph_return,
53 &trace_graph_entry);
54 if (ret)
55 return ret;
56 tracing_start_cmdline_record();
57
58 return 0;
59}
60
61static void graph_trace_reset(struct trace_array *tr)
62{
63 tracing_stop_cmdline_record();
64 unregister_ftrace_graph();
65}
66
67static inline int log10_cpu(int nb)
68{
69 if (nb / 100)
70 return 3;
71 if (nb / 10)
72 return 2;
73 return 1;
74}
75
76static enum print_line_t
77print_graph_cpu(struct trace_seq *s, int cpu)
78{
79 int i;
80 int ret;
81 int log10_this = log10_cpu(cpu);
82 int log10_all = log10_cpu(cpus_weight_nr(cpu_online_map));
83
84
85 /*
86 * Start with a space character - to make it stand out
87 * to the right a bit when trace output is pasted into
88 * email:
89 */
90 ret = trace_seq_printf(s, " ");
91
92 /*
93 * Tricky - we space the CPU field according to the max
94 * number of online CPUs. On a 2-cpu system it would take
95 * a maximum of 1 digit - on a 128 cpu system it would
96 * take up to 3 digits:
97 */
98 for (i = 0; i < log10_all - log10_this; i++) {
99 ret = trace_seq_printf(s, " ");
100 if (!ret)
101 return TRACE_TYPE_PARTIAL_LINE;
102 }
103 ret = trace_seq_printf(s, "%d) ", cpu);
104 if (!ret)
105 return TRACE_TYPE_PARTIAL_LINE;
106
107 return TRACE_TYPE_HANDLED;
108}
109
110#define TRACE_GRAPH_PROCINFO_LENGTH 14
111
112static enum print_line_t
113print_graph_proc(struct trace_seq *s, pid_t pid)
114{
115 int i;
116 int ret;
117 int len;
118 char comm[8];
119 int spaces = 0;
120 /* sign + log10(MAX_INT) + '\0' */
121 char pid_str[11];
122
123 strncpy(comm, trace_find_cmdline(pid), 7);
124 comm[7] = '\0';
125 sprintf(pid_str, "%d", pid);
126
127 /* 1 stands for the "-" character */
128 len = strlen(comm) + strlen(pid_str) + 1;
129
130 if (len < TRACE_GRAPH_PROCINFO_LENGTH)
131 spaces = TRACE_GRAPH_PROCINFO_LENGTH - len;
132
133 /* First spaces to align center */
134 for (i = 0; i < spaces / 2; i++) {
135 ret = trace_seq_printf(s, " ");
136 if (!ret)
137 return TRACE_TYPE_PARTIAL_LINE;
138 }
139
140 ret = trace_seq_printf(s, "%s-%s", comm, pid_str);
141 if (!ret)
142 return TRACE_TYPE_PARTIAL_LINE;
143
144 /* Last spaces to align center */
145 for (i = 0; i < spaces - (spaces / 2); i++) {
146 ret = trace_seq_printf(s, " ");
147 if (!ret)
148 return TRACE_TYPE_PARTIAL_LINE;
149 }
150 return TRACE_TYPE_HANDLED;
151}
152
153
154/* If the pid changed since the last trace, output this event */
155static enum print_line_t
156verif_pid(struct trace_seq *s, pid_t pid, int cpu)
157{
158 pid_t prev_pid;
159 int ret;
160
161 if (last_pid[cpu] != -1 && last_pid[cpu] == pid)
162 return TRACE_TYPE_HANDLED;
163
164 prev_pid = last_pid[cpu];
165 last_pid[cpu] = pid;
166
167/*
168 * Context-switch trace line:
169
170 ------------------------------------------
171 | 1) migration/0--1 => sshd-1755
172 ------------------------------------------
173
174 */
175 ret = trace_seq_printf(s,
176 " ------------------------------------------\n");
177 if (!ret)
178 TRACE_TYPE_PARTIAL_LINE;
179
180 ret = print_graph_cpu(s, cpu);
181 if (ret == TRACE_TYPE_PARTIAL_LINE)
182 TRACE_TYPE_PARTIAL_LINE;
183
184 ret = print_graph_proc(s, prev_pid);
185 if (ret == TRACE_TYPE_PARTIAL_LINE)
186 TRACE_TYPE_PARTIAL_LINE;
187
188 ret = trace_seq_printf(s, " => ");
189 if (!ret)
190 TRACE_TYPE_PARTIAL_LINE;
191
192 ret = print_graph_proc(s, pid);
193 if (ret == TRACE_TYPE_PARTIAL_LINE)
194 TRACE_TYPE_PARTIAL_LINE;
195
196 ret = trace_seq_printf(s,
197 "\n ------------------------------------------\n\n");
198 if (!ret)
199 TRACE_TYPE_PARTIAL_LINE;
200
201 return ret;
202}
203
204static bool
205trace_branch_is_leaf(struct trace_iterator *iter,
206 struct ftrace_graph_ent_entry *curr)
207{
208 struct ring_buffer_iter *ring_iter;
209 struct ring_buffer_event *event;
210 struct ftrace_graph_ret_entry *next;
211
212 ring_iter = iter->buffer_iter[iter->cpu];
213
214 if (!ring_iter)
215 return false;
216
217 event = ring_buffer_iter_peek(ring_iter, NULL);
218
219 if (!event)
220 return false;
221
222 next = ring_buffer_event_data(event);
223
224 if (next->ent.type != TRACE_GRAPH_RET)
225 return false;
226
227 if (curr->ent.pid != next->ent.pid ||
228 curr->graph_ent.func != next->ret.func)
229 return false;
230
231 return true;
232}
233
234static enum print_line_t
235print_graph_irq(struct trace_seq *s, unsigned long addr,
236 enum trace_type type, int cpu, pid_t pid)
237{
238 int ret;
239
240 if (addr < (unsigned long)__irqentry_text_start ||
241 addr >= (unsigned long)__irqentry_text_end)
242 return TRACE_TYPE_UNHANDLED;
243
244 if (type == TRACE_GRAPH_ENT) {
245 ret = trace_seq_printf(s, "==========> | ");
246 } else {
247 /* Cpu */
248 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) {
249 ret = print_graph_cpu(s, cpu);
250 if (ret == TRACE_TYPE_PARTIAL_LINE)
251 return TRACE_TYPE_PARTIAL_LINE;
252 }
253 /* Proc */
254 if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) {
255 ret = print_graph_proc(s, pid);
256 if (ret == TRACE_TYPE_PARTIAL_LINE)
257 return TRACE_TYPE_PARTIAL_LINE;
258
259 ret = trace_seq_printf(s, " | ");
260 if (!ret)
261 return TRACE_TYPE_PARTIAL_LINE;
262 }
263
264 /* No overhead */
265 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) {
266 ret = trace_seq_printf(s, " ");
267 if (!ret)
268 return TRACE_TYPE_PARTIAL_LINE;
269 }
270
271 ret = trace_seq_printf(s, "<========== |\n");
272 }
273 if (!ret)
274 return TRACE_TYPE_PARTIAL_LINE;
275 return TRACE_TYPE_HANDLED;
276}
277
278static enum print_line_t
279print_graph_duration(unsigned long long duration, struct trace_seq *s)
280{
281 unsigned long nsecs_rem = do_div(duration, 1000);
282 /* log10(ULONG_MAX) + '\0' */
283 char msecs_str[21];
284 char nsecs_str[5];
285 int ret, len;
286 int i;
287
288 sprintf(msecs_str, "%lu", (unsigned long) duration);
289
290 /* Print msecs */
291 ret = trace_seq_printf(s, msecs_str);
292 if (!ret)
293 return TRACE_TYPE_PARTIAL_LINE;
294
295 len = strlen(msecs_str);
296
297 /* Print nsecs (we don't want to exceed 7 numbers) */
298 if (len < 7) {
299 snprintf(nsecs_str, 8 - len, "%03lu", nsecs_rem);
300 ret = trace_seq_printf(s, ".%s", nsecs_str);
301 if (!ret)
302 return TRACE_TYPE_PARTIAL_LINE;
303 len += strlen(nsecs_str);
304 }
305
306 ret = trace_seq_printf(s, " us ");
307 if (!ret)
308 return TRACE_TYPE_PARTIAL_LINE;
309
310 /* Print remaining spaces to fit the row's width */
311 for (i = len; i < 7; i++) {
312 ret = trace_seq_printf(s, " ");
313 if (!ret)
314 return TRACE_TYPE_PARTIAL_LINE;
315 }
316
317 ret = trace_seq_printf(s, "| ");
318 if (!ret)
319 return TRACE_TYPE_PARTIAL_LINE;
320 return TRACE_TYPE_HANDLED;
321
322}
323
324/* Signal a overhead of time execution to the output */
325static int
326print_graph_overhead(unsigned long long duration, struct trace_seq *s)
327{
328 /* Duration exceeded 100 msecs */
329 if (duration > 100000ULL)
330 return trace_seq_printf(s, "! ");
331
332 /* Duration exceeded 10 msecs */
333 if (duration > 10000ULL)
334 return trace_seq_printf(s, "+ ");
335
336 return trace_seq_printf(s, " ");
337}
338
339/* Case of a leaf function on its call entry */
340static enum print_line_t
341print_graph_entry_leaf(struct trace_iterator *iter,
342 struct ftrace_graph_ent_entry *entry, struct trace_seq *s)
343{
344 struct ftrace_graph_ret_entry *ret_entry;
345 struct ftrace_graph_ret *graph_ret;
346 struct ring_buffer_event *event;
347 struct ftrace_graph_ent *call;
348 unsigned long long duration;
349 int ret;
350 int i;
351
352 event = ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
353 ret_entry = ring_buffer_event_data(event);
354 graph_ret = &ret_entry->ret;
355 call = &entry->graph_ent;
356 duration = graph_ret->rettime - graph_ret->calltime;
357
358 /* Overhead */
359 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) {
360 ret = print_graph_overhead(duration, s);
361 if (!ret)
362 return TRACE_TYPE_PARTIAL_LINE;
363 }
364
365 /* Duration */
366 ret = print_graph_duration(duration, s);
367 if (ret == TRACE_TYPE_PARTIAL_LINE)
368 return TRACE_TYPE_PARTIAL_LINE;
369
370 /* Function */
371 for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) {
372 ret = trace_seq_printf(s, " ");
373 if (!ret)
374 return TRACE_TYPE_PARTIAL_LINE;
375 }
376
377 ret = seq_print_ip_sym(s, call->func, 0);
378 if (!ret)
379 return TRACE_TYPE_PARTIAL_LINE;
380
381 ret = trace_seq_printf(s, "();\n");
382 if (!ret)
383 return TRACE_TYPE_PARTIAL_LINE;
384
385 return TRACE_TYPE_HANDLED;
386}
387
388static enum print_line_t
389print_graph_entry_nested(struct ftrace_graph_ent_entry *entry,
390 struct trace_seq *s, pid_t pid, int cpu)
391{
392 int i;
393 int ret;
394 struct ftrace_graph_ent *call = &entry->graph_ent;
395
396 /* No overhead */
397 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) {
398 ret = trace_seq_printf(s, " ");
399 if (!ret)
400 return TRACE_TYPE_PARTIAL_LINE;
401 }
402
403 /* Interrupt */
404 ret = print_graph_irq(s, call->func, TRACE_GRAPH_ENT, cpu, pid);
405 if (ret == TRACE_TYPE_UNHANDLED) {
406 /* No time */
407 ret = trace_seq_printf(s, " | ");
408 if (!ret)
409 return TRACE_TYPE_PARTIAL_LINE;
410 } else {
411 if (ret == TRACE_TYPE_PARTIAL_LINE)
412 return TRACE_TYPE_PARTIAL_LINE;
413 }
414
415
416 /* Function */
417 for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) {
418 ret = trace_seq_printf(s, " ");
419 if (!ret)
420 return TRACE_TYPE_PARTIAL_LINE;
421 }
422
423 ret = seq_print_ip_sym(s, call->func, 0);
424 if (!ret)
425 return TRACE_TYPE_PARTIAL_LINE;
426
427 ret = trace_seq_printf(s, "() {\n");
428 if (!ret)
429 return TRACE_TYPE_PARTIAL_LINE;
430
431 return TRACE_TYPE_HANDLED;
432}
433
434static enum print_line_t
435print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
436 struct trace_iterator *iter, int cpu)
437{
438 int ret;
439 struct trace_entry *ent = iter->ent;
440
441 /* Pid */
442 if (verif_pid(s, ent->pid, cpu) == TRACE_TYPE_PARTIAL_LINE)
443 return TRACE_TYPE_PARTIAL_LINE;
444
445 /* Cpu */
446 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) {
447 ret = print_graph_cpu(s, cpu);
448 if (ret == TRACE_TYPE_PARTIAL_LINE)
449 return TRACE_TYPE_PARTIAL_LINE;
450 }
451
452 /* Proc */
453 if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) {
454 ret = print_graph_proc(s, ent->pid);
455 if (ret == TRACE_TYPE_PARTIAL_LINE)
456 return TRACE_TYPE_PARTIAL_LINE;
457
458 ret = trace_seq_printf(s, " | ");
459 if (!ret)
460 return TRACE_TYPE_PARTIAL_LINE;
461 }
462
463 if (trace_branch_is_leaf(iter, field))
464 return print_graph_entry_leaf(iter, field, s);
465 else
466 return print_graph_entry_nested(field, s, iter->ent->pid, cpu);
467
468}
469
470static enum print_line_t
471print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
472 struct trace_entry *ent, int cpu)
473{
474 int i;
475 int ret;
476 unsigned long long duration = trace->rettime - trace->calltime;
477
478 /* Pid */
479 if (verif_pid(s, ent->pid, cpu) == TRACE_TYPE_PARTIAL_LINE)
480 return TRACE_TYPE_PARTIAL_LINE;
481
482 /* Cpu */
483 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) {
484 ret = print_graph_cpu(s, cpu);
485 if (ret == TRACE_TYPE_PARTIAL_LINE)
486 return TRACE_TYPE_PARTIAL_LINE;
487 }
488
489 /* Proc */
490 if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) {
491 ret = print_graph_proc(s, ent->pid);
492 if (ret == TRACE_TYPE_PARTIAL_LINE)
493 return TRACE_TYPE_PARTIAL_LINE;
494
495 ret = trace_seq_printf(s, " | ");
496 if (!ret)
497 return TRACE_TYPE_PARTIAL_LINE;
498 }
499
500 /* Overhead */
501 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) {
502 ret = print_graph_overhead(duration, s);
503 if (!ret)
504 return TRACE_TYPE_PARTIAL_LINE;
505 }
506
507 /* Duration */
508 ret = print_graph_duration(duration, s);
509 if (ret == TRACE_TYPE_PARTIAL_LINE)
510 return TRACE_TYPE_PARTIAL_LINE;
511
512 /* Closing brace */
513 for (i = 0; i < trace->depth * TRACE_GRAPH_INDENT; i++) {
514 ret = trace_seq_printf(s, " ");
515 if (!ret)
516 return TRACE_TYPE_PARTIAL_LINE;
517 }
518
519 ret = trace_seq_printf(s, "}\n");
520 if (!ret)
521 return TRACE_TYPE_PARTIAL_LINE;
522
523 /* Overrun */
524 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERRUN) {
525 ret = trace_seq_printf(s, " (Overruns: %lu)\n",
526 trace->overrun);
527 if (!ret)
528 return TRACE_TYPE_PARTIAL_LINE;
529 }
530
531 ret = print_graph_irq(s, trace->func, TRACE_GRAPH_RET, cpu, ent->pid);
532 if (ret == TRACE_TYPE_PARTIAL_LINE)
533 return TRACE_TYPE_PARTIAL_LINE;
534
535 return TRACE_TYPE_HANDLED;
536}
537
538static enum print_line_t
539print_graph_comment(struct print_entry *trace, struct trace_seq *s,
540 struct trace_entry *ent, struct trace_iterator *iter)
541{
542 int i;
543 int ret;
544
545 /* Pid */
546 if (verif_pid(s, ent->pid, iter->cpu) == TRACE_TYPE_PARTIAL_LINE)
547 return TRACE_TYPE_PARTIAL_LINE;
548
549 /* Cpu */
550 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) {
551 ret = print_graph_cpu(s, iter->cpu);
552 if (ret == TRACE_TYPE_PARTIAL_LINE)
553 return TRACE_TYPE_PARTIAL_LINE;
554 }
555
556 /* Proc */
557 if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) {
558 ret = print_graph_proc(s, ent->pid);
559 if (ret == TRACE_TYPE_PARTIAL_LINE)
560 return TRACE_TYPE_PARTIAL_LINE;
561
562 ret = trace_seq_printf(s, " | ");
563 if (!ret)
564 return TRACE_TYPE_PARTIAL_LINE;
565 }
566
567 /* No overhead */
568 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) {
569 ret = trace_seq_printf(s, " ");
570 if (!ret)
571 return TRACE_TYPE_PARTIAL_LINE;
572 }
573
574 /* No time */
575 ret = trace_seq_printf(s, " | ");
576 if (!ret)
577 return TRACE_TYPE_PARTIAL_LINE;
578
579 /* Indentation */
580 if (trace->depth > 0)
581 for (i = 0; i < (trace->depth + 1) * TRACE_GRAPH_INDENT; i++) {
582 ret = trace_seq_printf(s, " ");
583 if (!ret)
584 return TRACE_TYPE_PARTIAL_LINE;
585 }
586
587 /* The comment */
588 ret = trace_seq_printf(s, "/* %s", trace->buf);
589 if (!ret)
590 return TRACE_TYPE_PARTIAL_LINE;
591
592 if (ent->flags & TRACE_FLAG_CONT)
593 trace_seq_print_cont(s, iter);
594
595 ret = trace_seq_printf(s, " */\n");
596 if (!ret)
597 return TRACE_TYPE_PARTIAL_LINE;
598
599 return TRACE_TYPE_HANDLED;
600}
601
602
603enum print_line_t
604print_graph_function(struct trace_iterator *iter)
605{
606 struct trace_seq *s = &iter->seq;
607 struct trace_entry *entry = iter->ent;
608
609 switch (entry->type) {
610 case TRACE_GRAPH_ENT: {
611 struct ftrace_graph_ent_entry *field;
612 trace_assign_type(field, entry);
613 return print_graph_entry(field, s, iter,
614 iter->cpu);
615 }
616 case TRACE_GRAPH_RET: {
617 struct ftrace_graph_ret_entry *field;
618 trace_assign_type(field, entry);
619 return print_graph_return(&field->ret, s, entry, iter->cpu);
620 }
621 case TRACE_PRINT: {
622 struct print_entry *field;
623 trace_assign_type(field, entry);
624 return print_graph_comment(field, s, entry, iter);
625 }
626 default:
627 return TRACE_TYPE_UNHANDLED;
628 }
629}
630
631static void print_graph_headers(struct seq_file *s)
632{
633 /* 1st line */
634 seq_printf(s, "# ");
635 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU)
636 seq_printf(s, "CPU ");
637 if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC)
638 seq_printf(s, "TASK/PID ");
639 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD)
640 seq_printf(s, "OVERHEAD/");
641 seq_printf(s, "DURATION FUNCTION CALLS\n");
642
643 /* 2nd line */
644 seq_printf(s, "# ");
645 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU)
646 seq_printf(s, "| ");
647 if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC)
648 seq_printf(s, "| | ");
649 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) {
650 seq_printf(s, "| ");
651 seq_printf(s, "| | | | |\n");
652 } else
653 seq_printf(s, " | | | | |\n");
654}
655static struct tracer graph_trace __read_mostly = {
656 .name = "function_graph",
657 .init = graph_trace_init,
658 .reset = graph_trace_reset,
659 .print_line = print_graph_function,
660 .print_header = print_graph_headers,
661 .flags = &tracer_flags,
662};
663
664static __init int init_graph_trace(void)
665{
666 return register_tracer(&graph_trace);
667}
668
669device_initcall(init_graph_trace);
diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c
new file mode 100644
index 000000000000..b6a3e20a49a9
--- /dev/null
+++ b/kernel/trace/trace_hw_branches.c
@@ -0,0 +1,195 @@
1/*
2 * h/w branch tracer for x86 based on bts
3 *
4 * Copyright (C) 2008 Markus Metzger <markus.t.metzger@gmail.com>
5 *
6 */
7
8#include <linux/module.h>
9#include <linux/fs.h>
10#include <linux/debugfs.h>
11#include <linux/ftrace.h>
12#include <linux/kallsyms.h>
13
14#include <asm/ds.h>
15
16#include "trace.h"
17
18
19#define SIZEOF_BTS (1 << 13)
20
21static DEFINE_PER_CPU(struct bts_tracer *, tracer);
22static DEFINE_PER_CPU(unsigned char[SIZEOF_BTS], buffer);
23
24#define this_tracer per_cpu(tracer, smp_processor_id())
25#define this_buffer per_cpu(buffer, smp_processor_id())
26
27
28static void bts_trace_start_cpu(void *arg)
29{
30 if (this_tracer)
31 ds_release_bts(this_tracer);
32
33 this_tracer =
34 ds_request_bts(/* task = */ NULL, this_buffer, SIZEOF_BTS,
35 /* ovfl = */ NULL, /* th = */ (size_t)-1,
36 BTS_KERNEL);
37 if (IS_ERR(this_tracer)) {
38 this_tracer = NULL;
39 return;
40 }
41}
42
43static void bts_trace_start(struct trace_array *tr)
44{
45 int cpu;
46
47 tracing_reset_online_cpus(tr);
48
49 for_each_cpu_mask(cpu, cpu_possible_map)
50 smp_call_function_single(cpu, bts_trace_start_cpu, NULL, 1);
51}
52
53static void bts_trace_stop_cpu(void *arg)
54{
55 if (this_tracer) {
56 ds_release_bts(this_tracer);
57 this_tracer = NULL;
58 }
59}
60
61static void bts_trace_stop(struct trace_array *tr)
62{
63 int cpu;
64
65 for_each_cpu_mask(cpu, cpu_possible_map)
66 smp_call_function_single(cpu, bts_trace_stop_cpu, NULL, 1);
67}
68
69static int bts_trace_init(struct trace_array *tr)
70{
71 tracing_reset_online_cpus(tr);
72 bts_trace_start(tr);
73
74 return 0;
75}
76
77static void bts_trace_print_header(struct seq_file *m)
78{
79 seq_puts(m,
80 "# CPU# FROM TO FUNCTION\n");
81 seq_puts(m,
82 "# | | | |\n");
83}
84
85static enum print_line_t bts_trace_print_line(struct trace_iterator *iter)
86{
87 struct trace_entry *entry = iter->ent;
88 struct trace_seq *seq = &iter->seq;
89 struct hw_branch_entry *it;
90
91 trace_assign_type(it, entry);
92
93 if (entry->type == TRACE_HW_BRANCHES) {
94 if (trace_seq_printf(seq, "%4d ", entry->cpu) &&
95 trace_seq_printf(seq, "0x%016llx -> 0x%016llx ",
96 it->from, it->to) &&
97 (!it->from ||
98 seq_print_ip_sym(seq, it->from, /* sym_flags = */ 0)) &&
99 trace_seq_printf(seq, "\n"))
100 return TRACE_TYPE_HANDLED;
101 return TRACE_TYPE_PARTIAL_LINE;;
102 }
103 return TRACE_TYPE_UNHANDLED;
104}
105
106void trace_hw_branch(struct trace_array *tr, u64 from, u64 to)
107{
108 struct ring_buffer_event *event;
109 struct hw_branch_entry *entry;
110 unsigned long irq;
111
112 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), &irq);
113 if (!event)
114 return;
115 entry = ring_buffer_event_data(event);
116 tracing_generic_entry_update(&entry->ent, 0, from);
117 entry->ent.type = TRACE_HW_BRANCHES;
118 entry->ent.cpu = smp_processor_id();
119 entry->from = from;
120 entry->to = to;
121 ring_buffer_unlock_commit(tr->buffer, event, irq);
122}
123
124static void trace_bts_at(struct trace_array *tr,
125 const struct bts_trace *trace, void *at)
126{
127 struct bts_struct bts;
128 int err = 0;
129
130 WARN_ON_ONCE(!trace->read);
131 if (!trace->read)
132 return;
133
134 err = trace->read(this_tracer, at, &bts);
135 if (err < 0)
136 return;
137
138 switch (bts.qualifier) {
139 case BTS_BRANCH:
140 trace_hw_branch(tr, bts.variant.lbr.from, bts.variant.lbr.to);
141 break;
142 }
143}
144
145static void trace_bts_cpu(void *arg)
146{
147 struct trace_array *tr = (struct trace_array *) arg;
148 const struct bts_trace *trace;
149 unsigned char *at;
150
151 if (!this_tracer)
152 return;
153
154 ds_suspend_bts(this_tracer);
155 trace = ds_read_bts(this_tracer);
156 if (!trace)
157 goto out;
158
159 for (at = trace->ds.top; (void *)at < trace->ds.end;
160 at += trace->ds.size)
161 trace_bts_at(tr, trace, at);
162
163 for (at = trace->ds.begin; (void *)at < trace->ds.top;
164 at += trace->ds.size)
165 trace_bts_at(tr, trace, at);
166
167out:
168 ds_resume_bts(this_tracer);
169}
170
171static void trace_bts_prepare(struct trace_iterator *iter)
172{
173 int cpu;
174
175 for_each_cpu_mask(cpu, cpu_possible_map)
176 smp_call_function_single(cpu, trace_bts_cpu, iter->tr, 1);
177}
178
179struct tracer bts_tracer __read_mostly =
180{
181 .name = "hw-branch-tracer",
182 .init = bts_trace_init,
183 .reset = bts_trace_stop,
184 .print_header = bts_trace_print_header,
185 .print_line = bts_trace_print_line,
186 .start = bts_trace_start,
187 .stop = bts_trace_stop,
188 .open = trace_bts_prepare
189};
190
191__init static int init_bts_trace(void)
192{
193 return register_tracer(&bts_tracer);
194}
195device_initcall(init_bts_trace);
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 9c74071c10e0..7c2e326bbc8b 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -353,15 +353,28 @@ void trace_preempt_off(unsigned long a0, unsigned long a1)
353} 353}
354#endif /* CONFIG_PREEMPT_TRACER */ 354#endif /* CONFIG_PREEMPT_TRACER */
355 355
356/*
357 * save_tracer_enabled is used to save the state of the tracer_enabled
358 * variable when we disable it when we open a trace output file.
359 */
360static int save_tracer_enabled;
361
356static void start_irqsoff_tracer(struct trace_array *tr) 362static void start_irqsoff_tracer(struct trace_array *tr)
357{ 363{
358 register_ftrace_function(&trace_ops); 364 register_ftrace_function(&trace_ops);
359 tracer_enabled = 1; 365 if (tracing_is_enabled()) {
366 tracer_enabled = 1;
367 save_tracer_enabled = 1;
368 } else {
369 tracer_enabled = 0;
370 save_tracer_enabled = 0;
371 }
360} 372}
361 373
362static void stop_irqsoff_tracer(struct trace_array *tr) 374static void stop_irqsoff_tracer(struct trace_array *tr)
363{ 375{
364 tracer_enabled = 0; 376 tracer_enabled = 0;
377 save_tracer_enabled = 0;
365 unregister_ftrace_function(&trace_ops); 378 unregister_ftrace_function(&trace_ops);
366} 379}
367 380
@@ -370,53 +383,55 @@ static void __irqsoff_tracer_init(struct trace_array *tr)
370 irqsoff_trace = tr; 383 irqsoff_trace = tr;
371 /* make sure that the tracer is visible */ 384 /* make sure that the tracer is visible */
372 smp_wmb(); 385 smp_wmb();
373 386 start_irqsoff_tracer(tr);
374 if (tr->ctrl)
375 start_irqsoff_tracer(tr);
376} 387}
377 388
378static void irqsoff_tracer_reset(struct trace_array *tr) 389static void irqsoff_tracer_reset(struct trace_array *tr)
379{ 390{
380 if (tr->ctrl) 391 stop_irqsoff_tracer(tr);
381 stop_irqsoff_tracer(tr);
382} 392}
383 393
384static void irqsoff_tracer_ctrl_update(struct trace_array *tr) 394static void irqsoff_tracer_start(struct trace_array *tr)
385{ 395{
386 if (tr->ctrl) 396 tracer_enabled = 1;
387 start_irqsoff_tracer(tr); 397 save_tracer_enabled = 1;
388 else 398}
389 stop_irqsoff_tracer(tr); 399
400static void irqsoff_tracer_stop(struct trace_array *tr)
401{
402 tracer_enabled = 0;
403 save_tracer_enabled = 0;
390} 404}
391 405
392static void irqsoff_tracer_open(struct trace_iterator *iter) 406static void irqsoff_tracer_open(struct trace_iterator *iter)
393{ 407{
394 /* stop the trace while dumping */ 408 /* stop the trace while dumping */
395 if (iter->tr->ctrl) 409 tracer_enabled = 0;
396 stop_irqsoff_tracer(iter->tr);
397} 410}
398 411
399static void irqsoff_tracer_close(struct trace_iterator *iter) 412static void irqsoff_tracer_close(struct trace_iterator *iter)
400{ 413{
401 if (iter->tr->ctrl) 414 /* restart tracing */
402 start_irqsoff_tracer(iter->tr); 415 tracer_enabled = save_tracer_enabled;
403} 416}
404 417
405#ifdef CONFIG_IRQSOFF_TRACER 418#ifdef CONFIG_IRQSOFF_TRACER
406static void irqsoff_tracer_init(struct trace_array *tr) 419static int irqsoff_tracer_init(struct trace_array *tr)
407{ 420{
408 trace_type = TRACER_IRQS_OFF; 421 trace_type = TRACER_IRQS_OFF;
409 422
410 __irqsoff_tracer_init(tr); 423 __irqsoff_tracer_init(tr);
424 return 0;
411} 425}
412static struct tracer irqsoff_tracer __read_mostly = 426static struct tracer irqsoff_tracer __read_mostly =
413{ 427{
414 .name = "irqsoff", 428 .name = "irqsoff",
415 .init = irqsoff_tracer_init, 429 .init = irqsoff_tracer_init,
416 .reset = irqsoff_tracer_reset, 430 .reset = irqsoff_tracer_reset,
431 .start = irqsoff_tracer_start,
432 .stop = irqsoff_tracer_stop,
417 .open = irqsoff_tracer_open, 433 .open = irqsoff_tracer_open,
418 .close = irqsoff_tracer_close, 434 .close = irqsoff_tracer_close,
419 .ctrl_update = irqsoff_tracer_ctrl_update,
420 .print_max = 1, 435 .print_max = 1,
421#ifdef CONFIG_FTRACE_SELFTEST 436#ifdef CONFIG_FTRACE_SELFTEST
422 .selftest = trace_selftest_startup_irqsoff, 437 .selftest = trace_selftest_startup_irqsoff,
@@ -428,11 +443,12 @@ static struct tracer irqsoff_tracer __read_mostly =
428#endif 443#endif
429 444
430#ifdef CONFIG_PREEMPT_TRACER 445#ifdef CONFIG_PREEMPT_TRACER
431static void preemptoff_tracer_init(struct trace_array *tr) 446static int preemptoff_tracer_init(struct trace_array *tr)
432{ 447{
433 trace_type = TRACER_PREEMPT_OFF; 448 trace_type = TRACER_PREEMPT_OFF;
434 449
435 __irqsoff_tracer_init(tr); 450 __irqsoff_tracer_init(tr);
451 return 0;
436} 452}
437 453
438static struct tracer preemptoff_tracer __read_mostly = 454static struct tracer preemptoff_tracer __read_mostly =
@@ -440,9 +456,10 @@ static struct tracer preemptoff_tracer __read_mostly =
440 .name = "preemptoff", 456 .name = "preemptoff",
441 .init = preemptoff_tracer_init, 457 .init = preemptoff_tracer_init,
442 .reset = irqsoff_tracer_reset, 458 .reset = irqsoff_tracer_reset,
459 .start = irqsoff_tracer_start,
460 .stop = irqsoff_tracer_stop,
443 .open = irqsoff_tracer_open, 461 .open = irqsoff_tracer_open,
444 .close = irqsoff_tracer_close, 462 .close = irqsoff_tracer_close,
445 .ctrl_update = irqsoff_tracer_ctrl_update,
446 .print_max = 1, 463 .print_max = 1,
447#ifdef CONFIG_FTRACE_SELFTEST 464#ifdef CONFIG_FTRACE_SELFTEST
448 .selftest = trace_selftest_startup_preemptoff, 465 .selftest = trace_selftest_startup_preemptoff,
@@ -456,11 +473,12 @@ static struct tracer preemptoff_tracer __read_mostly =
456#if defined(CONFIG_IRQSOFF_TRACER) && \ 473#if defined(CONFIG_IRQSOFF_TRACER) && \
457 defined(CONFIG_PREEMPT_TRACER) 474 defined(CONFIG_PREEMPT_TRACER)
458 475
459static void preemptirqsoff_tracer_init(struct trace_array *tr) 476static int preemptirqsoff_tracer_init(struct trace_array *tr)
460{ 477{
461 trace_type = TRACER_IRQS_OFF | TRACER_PREEMPT_OFF; 478 trace_type = TRACER_IRQS_OFF | TRACER_PREEMPT_OFF;
462 479
463 __irqsoff_tracer_init(tr); 480 __irqsoff_tracer_init(tr);
481 return 0;
464} 482}
465 483
466static struct tracer preemptirqsoff_tracer __read_mostly = 484static struct tracer preemptirqsoff_tracer __read_mostly =
@@ -468,9 +486,10 @@ static struct tracer preemptirqsoff_tracer __read_mostly =
468 .name = "preemptirqsoff", 486 .name = "preemptirqsoff",
469 .init = preemptirqsoff_tracer_init, 487 .init = preemptirqsoff_tracer_init,
470 .reset = irqsoff_tracer_reset, 488 .reset = irqsoff_tracer_reset,
489 .start = irqsoff_tracer_start,
490 .stop = irqsoff_tracer_stop,
471 .open = irqsoff_tracer_open, 491 .open = irqsoff_tracer_open,
472 .close = irqsoff_tracer_close, 492 .close = irqsoff_tracer_close,
473 .ctrl_update = irqsoff_tracer_ctrl_update,
474 .print_max = 1, 493 .print_max = 1,
475#ifdef CONFIG_FTRACE_SELFTEST 494#ifdef CONFIG_FTRACE_SELFTEST
476 .selftest = trace_selftest_startup_preemptirqsoff, 495 .selftest = trace_selftest_startup_preemptirqsoff,
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index e62cbf78eab6..fffcb069f1dc 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -22,44 +22,35 @@ static unsigned long prev_overruns;
22 22
23static void mmio_reset_data(struct trace_array *tr) 23static void mmio_reset_data(struct trace_array *tr)
24{ 24{
25 int cpu;
26
27 overrun_detected = false; 25 overrun_detected = false;
28 prev_overruns = 0; 26 prev_overruns = 0;
29 tr->time_start = ftrace_now(tr->cpu);
30 27
31 for_each_online_cpu(cpu) 28 tracing_reset_online_cpus(tr);
32 tracing_reset(tr, cpu);
33} 29}
34 30
35static void mmio_trace_init(struct trace_array *tr) 31static int mmio_trace_init(struct trace_array *tr)
36{ 32{
37 pr_debug("in %s\n", __func__); 33 pr_debug("in %s\n", __func__);
38 mmio_trace_array = tr; 34 mmio_trace_array = tr;
39 if (tr->ctrl) { 35
40 mmio_reset_data(tr); 36 mmio_reset_data(tr);
41 enable_mmiotrace(); 37 enable_mmiotrace();
42 } 38 return 0;
43} 39}
44 40
45static void mmio_trace_reset(struct trace_array *tr) 41static void mmio_trace_reset(struct trace_array *tr)
46{ 42{
47 pr_debug("in %s\n", __func__); 43 pr_debug("in %s\n", __func__);
48 if (tr->ctrl) 44
49 disable_mmiotrace(); 45 disable_mmiotrace();
50 mmio_reset_data(tr); 46 mmio_reset_data(tr);
51 mmio_trace_array = NULL; 47 mmio_trace_array = NULL;
52} 48}
53 49
54static void mmio_trace_ctrl_update(struct trace_array *tr) 50static void mmio_trace_start(struct trace_array *tr)
55{ 51{
56 pr_debug("in %s\n", __func__); 52 pr_debug("in %s\n", __func__);
57 if (tr->ctrl) { 53 mmio_reset_data(tr);
58 mmio_reset_data(tr);
59 enable_mmiotrace();
60 } else {
61 disable_mmiotrace();
62 }
63} 54}
64 55
65static int mmio_print_pcidev(struct trace_seq *s, const struct pci_dev *dev) 56static int mmio_print_pcidev(struct trace_seq *s, const struct pci_dev *dev)
@@ -296,10 +287,10 @@ static struct tracer mmio_tracer __read_mostly =
296 .name = "mmiotrace", 287 .name = "mmiotrace",
297 .init = mmio_trace_init, 288 .init = mmio_trace_init,
298 .reset = mmio_trace_reset, 289 .reset = mmio_trace_reset,
290 .start = mmio_trace_start,
299 .pipe_open = mmio_pipe_open, 291 .pipe_open = mmio_pipe_open,
300 .close = mmio_close, 292 .close = mmio_close,
301 .read = mmio_read, 293 .read = mmio_read,
302 .ctrl_update = mmio_trace_ctrl_update,
303 .print_line = mmio_print_line, 294 .print_line = mmio_print_line,
304}; 295};
305 296
@@ -371,5 +362,5 @@ void mmio_trace_mapping(struct mmiotrace_map *map)
371 362
372int mmio_trace_printk(const char *fmt, va_list args) 363int mmio_trace_printk(const char *fmt, va_list args)
373{ 364{
374 return trace_vprintk(0, fmt, args); 365 return trace_vprintk(0, -1, fmt, args);
375} 366}
diff --git a/kernel/trace/trace_nop.c b/kernel/trace/trace_nop.c
index 4592b4862515..b9767acd30ac 100644
--- a/kernel/trace/trace_nop.c
+++ b/kernel/trace/trace_nop.c
@@ -12,6 +12,27 @@
12 12
13#include "trace.h" 13#include "trace.h"
14 14
15/* Our two options */
16enum {
17 TRACE_NOP_OPT_ACCEPT = 0x1,
18 TRACE_NOP_OPT_REFUSE = 0x2
19};
20
21/* Options for the tracer (see trace_options file) */
22static struct tracer_opt nop_opts[] = {
23 /* Option that will be accepted by set_flag callback */
24 { TRACER_OPT(test_nop_accept, TRACE_NOP_OPT_ACCEPT) },
25 /* Option that will be refused by set_flag callback */
26 { TRACER_OPT(test_nop_refuse, TRACE_NOP_OPT_REFUSE) },
27 { } /* Always set a last empty entry */
28};
29
30static struct tracer_flags nop_flags = {
31 /* You can check your flags value here when you want. */
32 .val = 0, /* By default: all flags disabled */
33 .opts = nop_opts
34};
35
15static struct trace_array *ctx_trace; 36static struct trace_array *ctx_trace;
16 37
17static void start_nop_trace(struct trace_array *tr) 38static void start_nop_trace(struct trace_array *tr)
@@ -24,7 +45,7 @@ static void stop_nop_trace(struct trace_array *tr)
24 /* Nothing to do! */ 45 /* Nothing to do! */
25} 46}
26 47
27static void nop_trace_init(struct trace_array *tr) 48static int nop_trace_init(struct trace_array *tr)
28{ 49{
29 int cpu; 50 int cpu;
30 ctx_trace = tr; 51 ctx_trace = tr;
@@ -32,33 +53,53 @@ static void nop_trace_init(struct trace_array *tr)
32 for_each_online_cpu(cpu) 53 for_each_online_cpu(cpu)
33 tracing_reset(tr, cpu); 54 tracing_reset(tr, cpu);
34 55
35 if (tr->ctrl) 56 start_nop_trace(tr);
36 start_nop_trace(tr); 57 return 0;
37} 58}
38 59
39static void nop_trace_reset(struct trace_array *tr) 60static void nop_trace_reset(struct trace_array *tr)
40{ 61{
41 if (tr->ctrl) 62 stop_nop_trace(tr);
42 stop_nop_trace(tr);
43} 63}
44 64
45static void nop_trace_ctrl_update(struct trace_array *tr) 65/* It only serves as a signal handler and a callback to
66 * accept or refuse tthe setting of a flag.
67 * If you don't implement it, then the flag setting will be
68 * automatically accepted.
69 */
70static int nop_set_flag(u32 old_flags, u32 bit, int set)
46{ 71{
47 /* When starting a new trace, reset the buffers */ 72 /*
48 if (tr->ctrl) 73 * Note that you don't need to update nop_flags.val yourself.
49 start_nop_trace(tr); 74 * The tracing Api will do it automatically if you return 0
50 else 75 */
51 stop_nop_trace(tr); 76 if (bit == TRACE_NOP_OPT_ACCEPT) {
77 printk(KERN_DEBUG "nop_test_accept flag set to %d: we accept."
78 " Now cat trace_options to see the result\n",
79 set);
80 return 0;
81 }
82
83 if (bit == TRACE_NOP_OPT_REFUSE) {
84 printk(KERN_DEBUG "nop_test_refuse flag set to %d: we refuse."
85 "Now cat trace_options to see the result\n",
86 set);
87 return -EINVAL;
88 }
89
90 return 0;
52} 91}
53 92
93
54struct tracer nop_trace __read_mostly = 94struct tracer nop_trace __read_mostly =
55{ 95{
56 .name = "nop", 96 .name = "nop",
57 .init = nop_trace_init, 97 .init = nop_trace_init,
58 .reset = nop_trace_reset, 98 .reset = nop_trace_reset,
59 .ctrl_update = nop_trace_ctrl_update,
60#ifdef CONFIG_FTRACE_SELFTEST 99#ifdef CONFIG_FTRACE_SELFTEST
61 .selftest = trace_selftest_startup_nop, 100 .selftest = trace_selftest_startup_nop,
62#endif 101#endif
102 .flags = &nop_flags,
103 .set_flag = nop_set_flag
63}; 104};
64 105
diff --git a/kernel/trace/trace_power.c b/kernel/trace/trace_power.c
new file mode 100644
index 000000000000..a7172a352f62
--- /dev/null
+++ b/kernel/trace/trace_power.c
@@ -0,0 +1,179 @@
1/*
2 * ring buffer based C-state tracer
3 *
4 * Arjan van de Ven <arjan@linux.intel.com>
5 * Copyright (C) 2008 Intel Corporation
6 *
7 * Much is borrowed from trace_boot.c which is
8 * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
9 *
10 */
11
12#include <linux/init.h>
13#include <linux/debugfs.h>
14#include <linux/ftrace.h>
15#include <linux/kallsyms.h>
16#include <linux/module.h>
17
18#include "trace.h"
19
20static struct trace_array *power_trace;
21static int __read_mostly trace_power_enabled;
22
23
24static void start_power_trace(struct trace_array *tr)
25{
26 trace_power_enabled = 1;
27}
28
29static void stop_power_trace(struct trace_array *tr)
30{
31 trace_power_enabled = 0;
32}
33
34
35static int power_trace_init(struct trace_array *tr)
36{
37 int cpu;
38 power_trace = tr;
39
40 trace_power_enabled = 1;
41
42 for_each_cpu_mask(cpu, cpu_possible_map)
43 tracing_reset(tr, cpu);
44 return 0;
45}
46
47static enum print_line_t power_print_line(struct trace_iterator *iter)
48{
49 int ret = 0;
50 struct trace_entry *entry = iter->ent;
51 struct trace_power *field ;
52 struct power_trace *it;
53 struct trace_seq *s = &iter->seq;
54 struct timespec stamp;
55 struct timespec duration;
56
57 trace_assign_type(field, entry);
58 it = &field->state_data;
59 stamp = ktime_to_timespec(it->stamp);
60 duration = ktime_to_timespec(ktime_sub(it->end, it->stamp));
61
62 if (entry->type == TRACE_POWER) {
63 if (it->type == POWER_CSTATE)
64 ret = trace_seq_printf(s, "[%5ld.%09ld] CSTATE: Going to C%i on cpu %i for %ld.%09ld\n",
65 stamp.tv_sec,
66 stamp.tv_nsec,
67 it->state, iter->cpu,
68 duration.tv_sec,
69 duration.tv_nsec);
70 if (it->type == POWER_PSTATE)
71 ret = trace_seq_printf(s, "[%5ld.%09ld] PSTATE: Going to P%i on cpu %i\n",
72 stamp.tv_sec,
73 stamp.tv_nsec,
74 it->state, iter->cpu);
75 if (!ret)
76 return TRACE_TYPE_PARTIAL_LINE;
77 return TRACE_TYPE_HANDLED;
78 }
79 return TRACE_TYPE_UNHANDLED;
80}
81
82static struct tracer power_tracer __read_mostly =
83{
84 .name = "power",
85 .init = power_trace_init,
86 .start = start_power_trace,
87 .stop = stop_power_trace,
88 .reset = stop_power_trace,
89 .print_line = power_print_line,
90};
91
92static int init_power_trace(void)
93{
94 return register_tracer(&power_tracer);
95}
96device_initcall(init_power_trace);
97
98void trace_power_start(struct power_trace *it, unsigned int type,
99 unsigned int level)
100{
101 if (!trace_power_enabled)
102 return;
103
104 memset(it, 0, sizeof(struct power_trace));
105 it->state = level;
106 it->type = type;
107 it->stamp = ktime_get();
108}
109EXPORT_SYMBOL_GPL(trace_power_start);
110
111
112void trace_power_end(struct power_trace *it)
113{
114 struct ring_buffer_event *event;
115 struct trace_power *entry;
116 struct trace_array_cpu *data;
117 unsigned long irq_flags;
118 struct trace_array *tr = power_trace;
119
120 if (!trace_power_enabled)
121 return;
122
123 preempt_disable();
124 it->end = ktime_get();
125 data = tr->data[smp_processor_id()];
126
127 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
128 &irq_flags);
129 if (!event)
130 goto out;
131 entry = ring_buffer_event_data(event);
132 tracing_generic_entry_update(&entry->ent, 0, 0);
133 entry->ent.type = TRACE_POWER;
134 entry->state_data = *it;
135 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
136
137 trace_wake_up();
138
139 out:
140 preempt_enable();
141}
142EXPORT_SYMBOL_GPL(trace_power_end);
143
144void trace_power_mark(struct power_trace *it, unsigned int type,
145 unsigned int level)
146{
147 struct ring_buffer_event *event;
148 struct trace_power *entry;
149 struct trace_array_cpu *data;
150 unsigned long irq_flags;
151 struct trace_array *tr = power_trace;
152
153 if (!trace_power_enabled)
154 return;
155
156 memset(it, 0, sizeof(struct power_trace));
157 it->state = level;
158 it->type = type;
159 it->stamp = ktime_get();
160 preempt_disable();
161 it->end = it->stamp;
162 data = tr->data[smp_processor_id()];
163
164 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
165 &irq_flags);
166 if (!event)
167 goto out;
168 entry = ring_buffer_event_data(event);
169 tracing_generic_entry_update(&entry->ent, 0, 0);
170 entry->ent.type = TRACE_POWER;
171 entry->state_data = *it;
172 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
173
174 trace_wake_up();
175
176 out:
177 preempt_enable();
178}
179EXPORT_SYMBOL_GPL(trace_power_mark);
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index b8f56beb1a62..df175cb4564f 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -16,7 +16,8 @@
16 16
17static struct trace_array *ctx_trace; 17static struct trace_array *ctx_trace;
18static int __read_mostly tracer_enabled; 18static int __read_mostly tracer_enabled;
19static atomic_t sched_ref; 19static int sched_ref;
20static DEFINE_MUTEX(sched_register_mutex);
20 21
21static void 22static void
22probe_sched_switch(struct rq *__rq, struct task_struct *prev, 23probe_sched_switch(struct rq *__rq, struct task_struct *prev,
@@ -27,7 +28,7 @@ probe_sched_switch(struct rq *__rq, struct task_struct *prev,
27 int cpu; 28 int cpu;
28 int pc; 29 int pc;
29 30
30 if (!atomic_read(&sched_ref)) 31 if (!sched_ref)
31 return; 32 return;
32 33
33 tracing_record_cmdline(prev); 34 tracing_record_cmdline(prev);
@@ -48,7 +49,7 @@ probe_sched_switch(struct rq *__rq, struct task_struct *prev,
48} 49}
49 50
50static void 51static void
51probe_sched_wakeup(struct rq *__rq, struct task_struct *wakee) 52probe_sched_wakeup(struct rq *__rq, struct task_struct *wakee, int success)
52{ 53{
53 struct trace_array_cpu *data; 54 struct trace_array_cpu *data;
54 unsigned long flags; 55 unsigned long flags;
@@ -71,16 +72,6 @@ probe_sched_wakeup(struct rq *__rq, struct task_struct *wakee)
71 local_irq_restore(flags); 72 local_irq_restore(flags);
72} 73}
73 74
74static void sched_switch_reset(struct trace_array *tr)
75{
76 int cpu;
77
78 tr->time_start = ftrace_now(tr->cpu);
79
80 for_each_online_cpu(cpu)
81 tracing_reset(tr, cpu);
82}
83
84static int tracing_sched_register(void) 75static int tracing_sched_register(void)
85{ 76{
86 int ret; 77 int ret;
@@ -123,20 +114,18 @@ static void tracing_sched_unregister(void)
123 114
124static void tracing_start_sched_switch(void) 115static void tracing_start_sched_switch(void)
125{ 116{
126 long ref; 117 mutex_lock(&sched_register_mutex);
127 118 if (!(sched_ref++))
128 ref = atomic_inc_return(&sched_ref);
129 if (ref == 1)
130 tracing_sched_register(); 119 tracing_sched_register();
120 mutex_unlock(&sched_register_mutex);
131} 121}
132 122
133static void tracing_stop_sched_switch(void) 123static void tracing_stop_sched_switch(void)
134{ 124{
135 long ref; 125 mutex_lock(&sched_register_mutex);
136 126 if (!(--sched_ref))
137 ref = atomic_dec_and_test(&sched_ref);
138 if (ref)
139 tracing_sched_unregister(); 127 tracing_sched_unregister();
128 mutex_unlock(&sched_register_mutex);
140} 129}
141 130
142void tracing_start_cmdline_record(void) 131void tracing_start_cmdline_record(void)
@@ -149,40 +138,86 @@ void tracing_stop_cmdline_record(void)
149 tracing_stop_sched_switch(); 138 tracing_stop_sched_switch();
150} 139}
151 140
141/**
142 * tracing_start_sched_switch_record - start tracing context switches
143 *
144 * Turns on context switch tracing for a tracer.
145 */
146void tracing_start_sched_switch_record(void)
147{
148 if (unlikely(!ctx_trace)) {
149 WARN_ON(1);
150 return;
151 }
152
153 tracing_start_sched_switch();
154
155 mutex_lock(&sched_register_mutex);
156 tracer_enabled++;
157 mutex_unlock(&sched_register_mutex);
158}
159
160/**
161 * tracing_stop_sched_switch_record - start tracing context switches
162 *
163 * Turns off context switch tracing for a tracer.
164 */
165void tracing_stop_sched_switch_record(void)
166{
167 mutex_lock(&sched_register_mutex);
168 tracer_enabled--;
169 WARN_ON(tracer_enabled < 0);
170 mutex_unlock(&sched_register_mutex);
171
172 tracing_stop_sched_switch();
173}
174
175/**
176 * tracing_sched_switch_assign_trace - assign a trace array for ctx switch
177 * @tr: trace array pointer to assign
178 *
179 * Some tracers might want to record the context switches in their
180 * trace. This function lets those tracers assign the trace array
181 * to use.
182 */
183void tracing_sched_switch_assign_trace(struct trace_array *tr)
184{
185 ctx_trace = tr;
186}
187
152static void start_sched_trace(struct trace_array *tr) 188static void start_sched_trace(struct trace_array *tr)
153{ 189{
154 sched_switch_reset(tr); 190 tracing_reset_online_cpus(tr);
155 tracing_start_cmdline_record(); 191 tracing_start_sched_switch_record();
156 tracer_enabled = 1;
157} 192}
158 193
159static void stop_sched_trace(struct trace_array *tr) 194static void stop_sched_trace(struct trace_array *tr)
160{ 195{
161 tracer_enabled = 0; 196 tracing_stop_sched_switch_record();
162 tracing_stop_cmdline_record();
163} 197}
164 198
165static void sched_switch_trace_init(struct trace_array *tr) 199static int sched_switch_trace_init(struct trace_array *tr)
166{ 200{
167 ctx_trace = tr; 201 ctx_trace = tr;
168 202 start_sched_trace(tr);
169 if (tr->ctrl) 203 return 0;
170 start_sched_trace(tr);
171} 204}
172 205
173static void sched_switch_trace_reset(struct trace_array *tr) 206static void sched_switch_trace_reset(struct trace_array *tr)
174{ 207{
175 if (tr->ctrl) 208 if (sched_ref)
176 stop_sched_trace(tr); 209 stop_sched_trace(tr);
177} 210}
178 211
179static void sched_switch_trace_ctrl_update(struct trace_array *tr) 212static void sched_switch_trace_start(struct trace_array *tr)
180{ 213{
181 /* When starting a new trace, reset the buffers */ 214 tracing_reset_online_cpus(tr);
182 if (tr->ctrl) 215 tracing_start_sched_switch();
183 start_sched_trace(tr); 216}
184 else 217
185 stop_sched_trace(tr); 218static void sched_switch_trace_stop(struct trace_array *tr)
219{
220 tracing_stop_sched_switch();
186} 221}
187 222
188static struct tracer sched_switch_trace __read_mostly = 223static struct tracer sched_switch_trace __read_mostly =
@@ -190,7 +225,8 @@ static struct tracer sched_switch_trace __read_mostly =
190 .name = "sched_switch", 225 .name = "sched_switch",
191 .init = sched_switch_trace_init, 226 .init = sched_switch_trace_init,
192 .reset = sched_switch_trace_reset, 227 .reset = sched_switch_trace_reset,
193 .ctrl_update = sched_switch_trace_ctrl_update, 228 .start = sched_switch_trace_start,
229 .stop = sched_switch_trace_stop,
194#ifdef CONFIG_FTRACE_SELFTEST 230#ifdef CONFIG_FTRACE_SELFTEST
195 .selftest = trace_selftest_startup_sched_switch, 231 .selftest = trace_selftest_startup_sched_switch,
196#endif 232#endif
@@ -198,14 +234,7 @@ static struct tracer sched_switch_trace __read_mostly =
198 234
199__init static int init_sched_switch_trace(void) 235__init static int init_sched_switch_trace(void)
200{ 236{
201 int ret = 0;
202
203 if (atomic_read(&sched_ref))
204 ret = tracing_sched_register();
205 if (ret) {
206 pr_info("error registering scheduler trace\n");
207 return ret;
208 }
209 return register_tracer(&sched_switch_trace); 237 return register_tracer(&sched_switch_trace);
210} 238}
211device_initcall(init_sched_switch_trace); 239device_initcall(init_sched_switch_trace);
240
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 3ae93f16b565..43586b689e31 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -50,8 +50,7 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
50 return; 50 return;
51 51
52 pc = preempt_count(); 52 pc = preempt_count();
53 resched = need_resched(); 53 resched = ftrace_preempt_disable();
54 preempt_disable_notrace();
55 54
56 cpu = raw_smp_processor_id(); 55 cpu = raw_smp_processor_id();
57 data = tr->data[cpu]; 56 data = tr->data[cpu];
@@ -81,15 +80,7 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
81 out: 80 out:
82 atomic_dec(&data->disabled); 81 atomic_dec(&data->disabled);
83 82
84 /* 83 ftrace_preempt_enable(resched);
85 * To prevent recursion from the scheduler, if the
86 * resched flag was set before we entered, then
87 * don't reschedule.
88 */
89 if (resched)
90 preempt_enable_no_resched_notrace();
91 else
92 preempt_enable_notrace();
93} 84}
94 85
95static struct ftrace_ops trace_ops __read_mostly = 86static struct ftrace_ops trace_ops __read_mostly =
@@ -220,7 +211,7 @@ static void wakeup_reset(struct trace_array *tr)
220} 211}
221 212
222static void 213static void
223probe_wakeup(struct rq *rq, struct task_struct *p) 214probe_wakeup(struct rq *rq, struct task_struct *p, int success)
224{ 215{
225 int cpu = smp_processor_id(); 216 int cpu = smp_processor_id();
226 unsigned long flags; 217 unsigned long flags;
@@ -271,6 +262,12 @@ out:
271 atomic_dec(&wakeup_trace->data[cpu]->disabled); 262 atomic_dec(&wakeup_trace->data[cpu]->disabled);
272} 263}
273 264
265/*
266 * save_tracer_enabled is used to save the state of the tracer_enabled
267 * variable when we disable it when we open a trace output file.
268 */
269static int save_tracer_enabled;
270
274static void start_wakeup_tracer(struct trace_array *tr) 271static void start_wakeup_tracer(struct trace_array *tr)
275{ 272{
276 int ret; 273 int ret;
@@ -309,7 +306,13 @@ static void start_wakeup_tracer(struct trace_array *tr)
309 306
310 register_ftrace_function(&trace_ops); 307 register_ftrace_function(&trace_ops);
311 308
312 tracer_enabled = 1; 309 if (tracing_is_enabled()) {
310 tracer_enabled = 1;
311 save_tracer_enabled = 1;
312 } else {
313 tracer_enabled = 0;
314 save_tracer_enabled = 0;
315 }
313 316
314 return; 317 return;
315fail_deprobe_wake_new: 318fail_deprobe_wake_new:
@@ -321,49 +324,53 @@ fail_deprobe:
321static void stop_wakeup_tracer(struct trace_array *tr) 324static void stop_wakeup_tracer(struct trace_array *tr)
322{ 325{
323 tracer_enabled = 0; 326 tracer_enabled = 0;
327 save_tracer_enabled = 0;
324 unregister_ftrace_function(&trace_ops); 328 unregister_ftrace_function(&trace_ops);
325 unregister_trace_sched_switch(probe_wakeup_sched_switch); 329 unregister_trace_sched_switch(probe_wakeup_sched_switch);
326 unregister_trace_sched_wakeup_new(probe_wakeup); 330 unregister_trace_sched_wakeup_new(probe_wakeup);
327 unregister_trace_sched_wakeup(probe_wakeup); 331 unregister_trace_sched_wakeup(probe_wakeup);
328} 332}
329 333
330static void wakeup_tracer_init(struct trace_array *tr) 334static int wakeup_tracer_init(struct trace_array *tr)
331{ 335{
332 wakeup_trace = tr; 336 wakeup_trace = tr;
333 337 start_wakeup_tracer(tr);
334 if (tr->ctrl) 338 return 0;
335 start_wakeup_tracer(tr);
336} 339}
337 340
338static void wakeup_tracer_reset(struct trace_array *tr) 341static void wakeup_tracer_reset(struct trace_array *tr)
339{ 342{
340 if (tr->ctrl) { 343 stop_wakeup_tracer(tr);
341 stop_wakeup_tracer(tr); 344 /* make sure we put back any tasks we are tracing */
342 /* make sure we put back any tasks we are tracing */ 345 wakeup_reset(tr);
343 wakeup_reset(tr); 346}
344 } 347
348static void wakeup_tracer_start(struct trace_array *tr)
349{
350 wakeup_reset(tr);
351 tracer_enabled = 1;
352 save_tracer_enabled = 1;
345} 353}
346 354
347static void wakeup_tracer_ctrl_update(struct trace_array *tr) 355static void wakeup_tracer_stop(struct trace_array *tr)
348{ 356{
349 if (tr->ctrl) 357 tracer_enabled = 0;
350 start_wakeup_tracer(tr); 358 save_tracer_enabled = 0;
351 else
352 stop_wakeup_tracer(tr);
353} 359}
354 360
355static void wakeup_tracer_open(struct trace_iterator *iter) 361static void wakeup_tracer_open(struct trace_iterator *iter)
356{ 362{
357 /* stop the trace while dumping */ 363 /* stop the trace while dumping */
358 if (iter->tr->ctrl) 364 tracer_enabled = 0;
359 stop_wakeup_tracer(iter->tr);
360} 365}
361 366
362static void wakeup_tracer_close(struct trace_iterator *iter) 367static void wakeup_tracer_close(struct trace_iterator *iter)
363{ 368{
364 /* forget about any processes we were recording */ 369 /* forget about any processes we were recording */
365 if (iter->tr->ctrl) 370 if (save_tracer_enabled) {
366 start_wakeup_tracer(iter->tr); 371 wakeup_reset(iter->tr);
372 tracer_enabled = 1;
373 }
367} 374}
368 375
369static struct tracer wakeup_tracer __read_mostly = 376static struct tracer wakeup_tracer __read_mostly =
@@ -371,9 +378,10 @@ static struct tracer wakeup_tracer __read_mostly =
371 .name = "wakeup", 378 .name = "wakeup",
372 .init = wakeup_tracer_init, 379 .init = wakeup_tracer_init,
373 .reset = wakeup_tracer_reset, 380 .reset = wakeup_tracer_reset,
381 .start = wakeup_tracer_start,
382 .stop = wakeup_tracer_stop,
374 .open = wakeup_tracer_open, 383 .open = wakeup_tracer_open,
375 .close = wakeup_tracer_close, 384 .close = wakeup_tracer_close,
376 .ctrl_update = wakeup_tracer_ctrl_update,
377 .print_max = 1, 385 .print_max = 1,
378#ifdef CONFIG_FTRACE_SELFTEST 386#ifdef CONFIG_FTRACE_SELFTEST
379 .selftest = trace_selftest_startup_wakeup, 387 .selftest = trace_selftest_startup_wakeup,
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 90bc752a7580..88c8eb70f54a 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -13,6 +13,7 @@ static inline int trace_valid_entry(struct trace_entry *entry)
13 case TRACE_STACK: 13 case TRACE_STACK:
14 case TRACE_PRINT: 14 case TRACE_PRINT:
15 case TRACE_SPECIAL: 15 case TRACE_SPECIAL:
16 case TRACE_BRANCH:
16 return 1; 17 return 1;
17 } 18 }
18 return 0; 19 return 0;
@@ -51,7 +52,7 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
51 int cpu, ret = 0; 52 int cpu, ret = 0;
52 53
53 /* Don't allow flipping of max traces now */ 54 /* Don't allow flipping of max traces now */
54 raw_local_irq_save(flags); 55 local_irq_save(flags);
55 __raw_spin_lock(&ftrace_max_lock); 56 __raw_spin_lock(&ftrace_max_lock);
56 57
57 cnt = ring_buffer_entries(tr->buffer); 58 cnt = ring_buffer_entries(tr->buffer);
@@ -62,7 +63,7 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
62 break; 63 break;
63 } 64 }
64 __raw_spin_unlock(&ftrace_max_lock); 65 __raw_spin_unlock(&ftrace_max_lock);
65 raw_local_irq_restore(flags); 66 local_irq_restore(flags);
66 67
67 if (count) 68 if (count)
68 *count = cnt; 69 *count = cnt;
@@ -70,6 +71,11 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
70 return ret; 71 return ret;
71} 72}
72 73
74static inline void warn_failed_init_tracer(struct tracer *trace, int init_ret)
75{
76 printk(KERN_WARNING "Failed to init %s tracer, init returned %d\n",
77 trace->name, init_ret);
78}
73#ifdef CONFIG_FUNCTION_TRACER 79#ifdef CONFIG_FUNCTION_TRACER
74 80
75#ifdef CONFIG_DYNAMIC_FTRACE 81#ifdef CONFIG_DYNAMIC_FTRACE
@@ -110,8 +116,11 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
110 ftrace_set_filter(func_name, strlen(func_name), 1); 116 ftrace_set_filter(func_name, strlen(func_name), 1);
111 117
112 /* enable tracing */ 118 /* enable tracing */
113 tr->ctrl = 1; 119 ret = trace->init(tr);
114 trace->init(tr); 120 if (ret) {
121 warn_failed_init_tracer(trace, ret);
122 goto out;
123 }
115 124
116 /* Sleep for a 1/10 of a second */ 125 /* Sleep for a 1/10 of a second */
117 msleep(100); 126 msleep(100);
@@ -134,13 +143,13 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
134 msleep(100); 143 msleep(100);
135 144
136 /* stop the tracing. */ 145 /* stop the tracing. */
137 tr->ctrl = 0; 146 tracing_stop();
138 trace->ctrl_update(tr);
139 ftrace_enabled = 0; 147 ftrace_enabled = 0;
140 148
141 /* check the trace buffer */ 149 /* check the trace buffer */
142 ret = trace_test_buffer(tr, &count); 150 ret = trace_test_buffer(tr, &count);
143 trace->reset(tr); 151 trace->reset(tr);
152 tracing_start();
144 153
145 /* we should only have one item */ 154 /* we should only have one item */
146 if (!ret && count != 1) { 155 if (!ret && count != 1) {
@@ -148,6 +157,7 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
148 ret = -1; 157 ret = -1;
149 goto out; 158 goto out;
150 } 159 }
160
151 out: 161 out:
152 ftrace_enabled = save_ftrace_enabled; 162 ftrace_enabled = save_ftrace_enabled;
153 tracer_enabled = save_tracer_enabled; 163 tracer_enabled = save_tracer_enabled;
@@ -180,18 +190,22 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
180 ftrace_enabled = 1; 190 ftrace_enabled = 1;
181 tracer_enabled = 1; 191 tracer_enabled = 1;
182 192
183 tr->ctrl = 1; 193 ret = trace->init(tr);
184 trace->init(tr); 194 if (ret) {
195 warn_failed_init_tracer(trace, ret);
196 goto out;
197 }
198
185 /* Sleep for a 1/10 of a second */ 199 /* Sleep for a 1/10 of a second */
186 msleep(100); 200 msleep(100);
187 /* stop the tracing. */ 201 /* stop the tracing. */
188 tr->ctrl = 0; 202 tracing_stop();
189 trace->ctrl_update(tr);
190 ftrace_enabled = 0; 203 ftrace_enabled = 0;
191 204
192 /* check the trace buffer */ 205 /* check the trace buffer */
193 ret = trace_test_buffer(tr, &count); 206 ret = trace_test_buffer(tr, &count);
194 trace->reset(tr); 207 trace->reset(tr);
208 tracing_start();
195 209
196 if (!ret && !count) { 210 if (!ret && !count) {
197 printk(KERN_CONT ".. no entries found .."); 211 printk(KERN_CONT ".. no entries found ..");
@@ -223,8 +237,12 @@ trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr)
223 int ret; 237 int ret;
224 238
225 /* start the tracing */ 239 /* start the tracing */
226 tr->ctrl = 1; 240 ret = trace->init(tr);
227 trace->init(tr); 241 if (ret) {
242 warn_failed_init_tracer(trace, ret);
243 return ret;
244 }
245
228 /* reset the max latency */ 246 /* reset the max latency */
229 tracing_max_latency = 0; 247 tracing_max_latency = 0;
230 /* disable interrupts for a bit */ 248 /* disable interrupts for a bit */
@@ -232,13 +250,13 @@ trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr)
232 udelay(100); 250 udelay(100);
233 local_irq_enable(); 251 local_irq_enable();
234 /* stop the tracing. */ 252 /* stop the tracing. */
235 tr->ctrl = 0; 253 tracing_stop();
236 trace->ctrl_update(tr);
237 /* check both trace buffers */ 254 /* check both trace buffers */
238 ret = trace_test_buffer(tr, NULL); 255 ret = trace_test_buffer(tr, NULL);
239 if (!ret) 256 if (!ret)
240 ret = trace_test_buffer(&max_tr, &count); 257 ret = trace_test_buffer(&max_tr, &count);
241 trace->reset(tr); 258 trace->reset(tr);
259 tracing_start();
242 260
243 if (!ret && !count) { 261 if (!ret && !count) {
244 printk(KERN_CONT ".. no entries found .."); 262 printk(KERN_CONT ".. no entries found ..");
@@ -259,9 +277,26 @@ trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr)
259 unsigned long count; 277 unsigned long count;
260 int ret; 278 int ret;
261 279
280 /*
281 * Now that the big kernel lock is no longer preemptable,
282 * and this is called with the BKL held, it will always
283 * fail. If preemption is already disabled, simply
284 * pass the test. When the BKL is removed, or becomes
285 * preemptible again, we will once again test this,
286 * so keep it in.
287 */
288 if (preempt_count()) {
289 printk(KERN_CONT "can not test ... force ");
290 return 0;
291 }
292
262 /* start the tracing */ 293 /* start the tracing */
263 tr->ctrl = 1; 294 ret = trace->init(tr);
264 trace->init(tr); 295 if (ret) {
296 warn_failed_init_tracer(trace, ret);
297 return ret;
298 }
299
265 /* reset the max latency */ 300 /* reset the max latency */
266 tracing_max_latency = 0; 301 tracing_max_latency = 0;
267 /* disable preemption for a bit */ 302 /* disable preemption for a bit */
@@ -269,13 +304,13 @@ trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr)
269 udelay(100); 304 udelay(100);
270 preempt_enable(); 305 preempt_enable();
271 /* stop the tracing. */ 306 /* stop the tracing. */
272 tr->ctrl = 0; 307 tracing_stop();
273 trace->ctrl_update(tr);
274 /* check both trace buffers */ 308 /* check both trace buffers */
275 ret = trace_test_buffer(tr, NULL); 309 ret = trace_test_buffer(tr, NULL);
276 if (!ret) 310 if (!ret)
277 ret = trace_test_buffer(&max_tr, &count); 311 ret = trace_test_buffer(&max_tr, &count);
278 trace->reset(tr); 312 trace->reset(tr);
313 tracing_start();
279 314
280 if (!ret && !count) { 315 if (!ret && !count) {
281 printk(KERN_CONT ".. no entries found .."); 316 printk(KERN_CONT ".. no entries found ..");
@@ -296,9 +331,25 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *
296 unsigned long count; 331 unsigned long count;
297 int ret; 332 int ret;
298 333
334 /*
335 * Now that the big kernel lock is no longer preemptable,
336 * and this is called with the BKL held, it will always
337 * fail. If preemption is already disabled, simply
338 * pass the test. When the BKL is removed, or becomes
339 * preemptible again, we will once again test this,
340 * so keep it in.
341 */
342 if (preempt_count()) {
343 printk(KERN_CONT "can not test ... force ");
344 return 0;
345 }
346
299 /* start the tracing */ 347 /* start the tracing */
300 tr->ctrl = 1; 348 ret = trace->init(tr);
301 trace->init(tr); 349 if (ret) {
350 warn_failed_init_tracer(trace, ret);
351 goto out;
352 }
302 353
303 /* reset the max latency */ 354 /* reset the max latency */
304 tracing_max_latency = 0; 355 tracing_max_latency = 0;
@@ -312,27 +363,30 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *
312 local_irq_enable(); 363 local_irq_enable();
313 364
314 /* stop the tracing. */ 365 /* stop the tracing. */
315 tr->ctrl = 0; 366 tracing_stop();
316 trace->ctrl_update(tr);
317 /* check both trace buffers */ 367 /* check both trace buffers */
318 ret = trace_test_buffer(tr, NULL); 368 ret = trace_test_buffer(tr, NULL);
319 if (ret) 369 if (ret) {
370 tracing_start();
320 goto out; 371 goto out;
372 }
321 373
322 ret = trace_test_buffer(&max_tr, &count); 374 ret = trace_test_buffer(&max_tr, &count);
323 if (ret) 375 if (ret) {
376 tracing_start();
324 goto out; 377 goto out;
378 }
325 379
326 if (!ret && !count) { 380 if (!ret && !count) {
327 printk(KERN_CONT ".. no entries found .."); 381 printk(KERN_CONT ".. no entries found ..");
328 ret = -1; 382 ret = -1;
383 tracing_start();
329 goto out; 384 goto out;
330 } 385 }
331 386
332 /* do the test by disabling interrupts first this time */ 387 /* do the test by disabling interrupts first this time */
333 tracing_max_latency = 0; 388 tracing_max_latency = 0;
334 tr->ctrl = 1; 389 tracing_start();
335 trace->ctrl_update(tr);
336 preempt_disable(); 390 preempt_disable();
337 local_irq_disable(); 391 local_irq_disable();
338 udelay(100); 392 udelay(100);
@@ -341,8 +395,7 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *
341 local_irq_enable(); 395 local_irq_enable();
342 396
343 /* stop the tracing. */ 397 /* stop the tracing. */
344 tr->ctrl = 0; 398 tracing_stop();
345 trace->ctrl_update(tr);
346 /* check both trace buffers */ 399 /* check both trace buffers */
347 ret = trace_test_buffer(tr, NULL); 400 ret = trace_test_buffer(tr, NULL);
348 if (ret) 401 if (ret)
@@ -358,6 +411,7 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *
358 411
359 out: 412 out:
360 trace->reset(tr); 413 trace->reset(tr);
414 tracing_start();
361 tracing_max_latency = save_max; 415 tracing_max_latency = save_max;
362 416
363 return ret; 417 return ret;
@@ -423,8 +477,12 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr)
423 wait_for_completion(&isrt); 477 wait_for_completion(&isrt);
424 478
425 /* start the tracing */ 479 /* start the tracing */
426 tr->ctrl = 1; 480 ret = trace->init(tr);
427 trace->init(tr); 481 if (ret) {
482 warn_failed_init_tracer(trace, ret);
483 return ret;
484 }
485
428 /* reset the max latency */ 486 /* reset the max latency */
429 tracing_max_latency = 0; 487 tracing_max_latency = 0;
430 488
@@ -448,8 +506,7 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr)
448 msleep(100); 506 msleep(100);
449 507
450 /* stop the tracing. */ 508 /* stop the tracing. */
451 tr->ctrl = 0; 509 tracing_stop();
452 trace->ctrl_update(tr);
453 /* check both trace buffers */ 510 /* check both trace buffers */
454 ret = trace_test_buffer(tr, NULL); 511 ret = trace_test_buffer(tr, NULL);
455 if (!ret) 512 if (!ret)
@@ -457,6 +514,7 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr)
457 514
458 515
459 trace->reset(tr); 516 trace->reset(tr);
517 tracing_start();
460 518
461 tracing_max_latency = save_max; 519 tracing_max_latency = save_max;
462 520
@@ -480,16 +538,20 @@ trace_selftest_startup_sched_switch(struct tracer *trace, struct trace_array *tr
480 int ret; 538 int ret;
481 539
482 /* start the tracing */ 540 /* start the tracing */
483 tr->ctrl = 1; 541 ret = trace->init(tr);
484 trace->init(tr); 542 if (ret) {
543 warn_failed_init_tracer(trace, ret);
544 return ret;
545 }
546
485 /* Sleep for a 1/10 of a second */ 547 /* Sleep for a 1/10 of a second */
486 msleep(100); 548 msleep(100);
487 /* stop the tracing. */ 549 /* stop the tracing. */
488 tr->ctrl = 0; 550 tracing_stop();
489 trace->ctrl_update(tr);
490 /* check the trace buffer */ 551 /* check the trace buffer */
491 ret = trace_test_buffer(tr, &count); 552 ret = trace_test_buffer(tr, &count);
492 trace->reset(tr); 553 trace->reset(tr);
554 tracing_start();
493 555
494 if (!ret && !count) { 556 if (!ret && !count) {
495 printk(KERN_CONT ".. no entries found .."); 557 printk(KERN_CONT ".. no entries found ..");
@@ -508,17 +570,48 @@ trace_selftest_startup_sysprof(struct tracer *trace, struct trace_array *tr)
508 int ret; 570 int ret;
509 571
510 /* start the tracing */ 572 /* start the tracing */
511 tr->ctrl = 1; 573 ret = trace->init(tr);
512 trace->init(tr); 574 if (ret) {
575 warn_failed_init_tracer(trace, ret);
576 return 0;
577 }
578
513 /* Sleep for a 1/10 of a second */ 579 /* Sleep for a 1/10 of a second */
514 msleep(100); 580 msleep(100);
515 /* stop the tracing. */ 581 /* stop the tracing. */
516 tr->ctrl = 0; 582 tracing_stop();
517 trace->ctrl_update(tr);
518 /* check the trace buffer */ 583 /* check the trace buffer */
519 ret = trace_test_buffer(tr, &count); 584 ret = trace_test_buffer(tr, &count);
520 trace->reset(tr); 585 trace->reset(tr);
586 tracing_start();
521 587
522 return ret; 588 return ret;
523} 589}
524#endif /* CONFIG_SYSPROF_TRACER */ 590#endif /* CONFIG_SYSPROF_TRACER */
591
592#ifdef CONFIG_BRANCH_TRACER
593int
594trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr)
595{
596 unsigned long count;
597 int ret;
598
599 /* start the tracing */
600 ret = trace->init(tr);
601 if (ret) {
602 warn_failed_init_tracer(trace, ret);
603 return ret;
604 }
605
606 /* Sleep for a 1/10 of a second */
607 msleep(100);
608 /* stop the tracing. */
609 tracing_stop();
610 /* check the trace buffer */
611 ret = trace_test_buffer(tr, &count);
612 trace->reset(tr);
613 tracing_start();
614
615 return ret;
616}
617#endif /* CONFIG_BRANCH_TRACER */
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 3bdb44bde4b7..d0871bc0aca5 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -10,6 +10,7 @@
10#include <linux/debugfs.h> 10#include <linux/debugfs.h>
11#include <linux/ftrace.h> 11#include <linux/ftrace.h>
12#include <linux/module.h> 12#include <linux/module.h>
13#include <linux/sysctl.h>
13#include <linux/init.h> 14#include <linux/init.h>
14#include <linux/fs.h> 15#include <linux/fs.h>
15#include "trace.h" 16#include "trace.h"
@@ -31,6 +32,10 @@ static raw_spinlock_t max_stack_lock =
31 32
32static int stack_trace_disabled __read_mostly; 33static int stack_trace_disabled __read_mostly;
33static DEFINE_PER_CPU(int, trace_active); 34static DEFINE_PER_CPU(int, trace_active);
35static DEFINE_MUTEX(stack_sysctl_mutex);
36
37int stack_tracer_enabled;
38static int last_stack_tracer_enabled;
34 39
35static inline void check_stack(void) 40static inline void check_stack(void)
36{ 41{
@@ -48,7 +53,7 @@ static inline void check_stack(void)
48 if (!object_is_on_stack(&this_size)) 53 if (!object_is_on_stack(&this_size))
49 return; 54 return;
50 55
51 raw_local_irq_save(flags); 56 local_irq_save(flags);
52 __raw_spin_lock(&max_stack_lock); 57 __raw_spin_lock(&max_stack_lock);
53 58
54 /* a race could have already updated it */ 59 /* a race could have already updated it */
@@ -78,6 +83,7 @@ static inline void check_stack(void)
78 * on a new max, so it is far from a fast path. 83 * on a new max, so it is far from a fast path.
79 */ 84 */
80 while (i < max_stack_trace.nr_entries) { 85 while (i < max_stack_trace.nr_entries) {
86 int found = 0;
81 87
82 stack_dump_index[i] = this_size; 88 stack_dump_index[i] = this_size;
83 p = start; 89 p = start;
@@ -86,17 +92,19 @@ static inline void check_stack(void)
86 if (*p == stack_dump_trace[i]) { 92 if (*p == stack_dump_trace[i]) {
87 this_size = stack_dump_index[i++] = 93 this_size = stack_dump_index[i++] =
88 (top - p) * sizeof(unsigned long); 94 (top - p) * sizeof(unsigned long);
95 found = 1;
89 /* Start the search from here */ 96 /* Start the search from here */
90 start = p + 1; 97 start = p + 1;
91 } 98 }
92 } 99 }
93 100
94 i++; 101 if (!found)
102 i++;
95 } 103 }
96 104
97 out: 105 out:
98 __raw_spin_unlock(&max_stack_lock); 106 __raw_spin_unlock(&max_stack_lock);
99 raw_local_irq_restore(flags); 107 local_irq_restore(flags);
100} 108}
101 109
102static void 110static void
@@ -107,8 +115,7 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip)
107 if (unlikely(!ftrace_enabled || stack_trace_disabled)) 115 if (unlikely(!ftrace_enabled || stack_trace_disabled))
108 return; 116 return;
109 117
110 resched = need_resched(); 118 resched = ftrace_preempt_disable();
111 preempt_disable_notrace();
112 119
113 cpu = raw_smp_processor_id(); 120 cpu = raw_smp_processor_id();
114 /* no atomic needed, we only modify this variable by this cpu */ 121 /* no atomic needed, we only modify this variable by this cpu */
@@ -120,10 +127,7 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip)
120 out: 127 out:
121 per_cpu(trace_active, cpu)--; 128 per_cpu(trace_active, cpu)--;
122 /* prevent recursion in schedule */ 129 /* prevent recursion in schedule */
123 if (resched) 130 ftrace_preempt_enable(resched);
124 preempt_enable_no_resched_notrace();
125 else
126 preempt_enable_notrace();
127} 131}
128 132
129static struct ftrace_ops trace_ops __read_mostly = 133static struct ftrace_ops trace_ops __read_mostly =
@@ -166,16 +170,16 @@ stack_max_size_write(struct file *filp, const char __user *ubuf,
166 if (ret < 0) 170 if (ret < 0)
167 return ret; 171 return ret;
168 172
169 raw_local_irq_save(flags); 173 local_irq_save(flags);
170 __raw_spin_lock(&max_stack_lock); 174 __raw_spin_lock(&max_stack_lock);
171 *ptr = val; 175 *ptr = val;
172 __raw_spin_unlock(&max_stack_lock); 176 __raw_spin_unlock(&max_stack_lock);
173 raw_local_irq_restore(flags); 177 local_irq_restore(flags);
174 178
175 return count; 179 return count;
176} 180}
177 181
178static struct file_operations stack_max_size_fops = { 182static const struct file_operations stack_max_size_fops = {
179 .open = tracing_open_generic, 183 .open = tracing_open_generic,
180 .read = stack_max_size_read, 184 .read = stack_max_size_read,
181 .write = stack_max_size_write, 185 .write = stack_max_size_write,
@@ -273,7 +277,7 @@ static int t_show(struct seq_file *m, void *v)
273 return 0; 277 return 0;
274} 278}
275 279
276static struct seq_operations stack_trace_seq_ops = { 280static const struct seq_operations stack_trace_seq_ops = {
277 .start = t_start, 281 .start = t_start,
278 .next = t_next, 282 .next = t_next,
279 .stop = t_stop, 283 .stop = t_stop,
@@ -289,12 +293,47 @@ static int stack_trace_open(struct inode *inode, struct file *file)
289 return ret; 293 return ret;
290} 294}
291 295
292static struct file_operations stack_trace_fops = { 296static const struct file_operations stack_trace_fops = {
293 .open = stack_trace_open, 297 .open = stack_trace_open,
294 .read = seq_read, 298 .read = seq_read,
295 .llseek = seq_lseek, 299 .llseek = seq_lseek,
296}; 300};
297 301
302int
303stack_trace_sysctl(struct ctl_table *table, int write,
304 struct file *file, void __user *buffer, size_t *lenp,
305 loff_t *ppos)
306{
307 int ret;
308
309 mutex_lock(&stack_sysctl_mutex);
310
311 ret = proc_dointvec(table, write, file, buffer, lenp, ppos);
312
313 if (ret || !write ||
314 (last_stack_tracer_enabled == stack_tracer_enabled))
315 goto out;
316
317 last_stack_tracer_enabled = stack_tracer_enabled;
318
319 if (stack_tracer_enabled)
320 register_ftrace_function(&trace_ops);
321 else
322 unregister_ftrace_function(&trace_ops);
323
324 out:
325 mutex_unlock(&stack_sysctl_mutex);
326 return ret;
327}
328
329static __init int enable_stacktrace(char *str)
330{
331 stack_tracer_enabled = 1;
332 last_stack_tracer_enabled = 1;
333 return 1;
334}
335__setup("stacktrace", enable_stacktrace);
336
298static __init int stack_trace_init(void) 337static __init int stack_trace_init(void)
299{ 338{
300 struct dentry *d_tracer; 339 struct dentry *d_tracer;
@@ -312,7 +351,8 @@ static __init int stack_trace_init(void)
312 if (!entry) 351 if (!entry)
313 pr_warning("Could not create debugfs 'stack_trace' entry\n"); 352 pr_warning("Could not create debugfs 'stack_trace' entry\n");
314 353
315 register_ftrace_function(&trace_ops); 354 if (stack_tracer_enabled)
355 register_ftrace_function(&trace_ops);
316 356
317 return 0; 357 return 0;
318} 358}
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c
index 9587d3bcba55..01becf1f19ff 100644
--- a/kernel/trace/trace_sysprof.c
+++ b/kernel/trace/trace_sysprof.c
@@ -234,20 +234,10 @@ static void stop_stack_timers(void)
234 stop_stack_timer(cpu); 234 stop_stack_timer(cpu);
235} 235}
236 236
237static void stack_reset(struct trace_array *tr)
238{
239 int cpu;
240
241 tr->time_start = ftrace_now(tr->cpu);
242
243 for_each_online_cpu(cpu)
244 tracing_reset(tr, cpu);
245}
246
247static void start_stack_trace(struct trace_array *tr) 237static void start_stack_trace(struct trace_array *tr)
248{ 238{
249 mutex_lock(&sample_timer_lock); 239 mutex_lock(&sample_timer_lock);
250 stack_reset(tr); 240 tracing_reset_online_cpus(tr);
251 start_stack_timers(); 241 start_stack_timers();
252 tracer_enabled = 1; 242 tracer_enabled = 1;
253 mutex_unlock(&sample_timer_lock); 243 mutex_unlock(&sample_timer_lock);
@@ -261,27 +251,17 @@ static void stop_stack_trace(struct trace_array *tr)
261 mutex_unlock(&sample_timer_lock); 251 mutex_unlock(&sample_timer_lock);
262} 252}
263 253
264static void stack_trace_init(struct trace_array *tr) 254static int stack_trace_init(struct trace_array *tr)
265{ 255{
266 sysprof_trace = tr; 256 sysprof_trace = tr;
267 257
268 if (tr->ctrl) 258 start_stack_trace(tr);
269 start_stack_trace(tr); 259 return 0;
270} 260}
271 261
272static void stack_trace_reset(struct trace_array *tr) 262static void stack_trace_reset(struct trace_array *tr)
273{ 263{
274 if (tr->ctrl) 264 stop_stack_trace(tr);
275 stop_stack_trace(tr);
276}
277
278static void stack_trace_ctrl_update(struct trace_array *tr)
279{
280 /* When starting a new trace, reset the buffers */
281 if (tr->ctrl)
282 start_stack_trace(tr);
283 else
284 stop_stack_trace(tr);
285} 265}
286 266
287static struct tracer stack_trace __read_mostly = 267static struct tracer stack_trace __read_mostly =
@@ -289,7 +269,6 @@ static struct tracer stack_trace __read_mostly =
289 .name = "sysprof", 269 .name = "sysprof",
290 .init = stack_trace_init, 270 .init = stack_trace_init,
291 .reset = stack_trace_reset, 271 .reset = stack_trace_reset,
292 .ctrl_update = stack_trace_ctrl_update,
293#ifdef CONFIG_FTRACE_SELFTEST 272#ifdef CONFIG_FTRACE_SELFTEST
294 .selftest = trace_selftest_startup_sysprof, 273 .selftest = trace_selftest_startup_sysprof,
295#endif 274#endif
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index af8c85664882..79602740bbb5 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -43,6 +43,7 @@ static DEFINE_MUTEX(tracepoints_mutex);
43 */ 43 */
44#define TRACEPOINT_HASH_BITS 6 44#define TRACEPOINT_HASH_BITS 6
45#define TRACEPOINT_TABLE_SIZE (1 << TRACEPOINT_HASH_BITS) 45#define TRACEPOINT_TABLE_SIZE (1 << TRACEPOINT_HASH_BITS)
46static struct hlist_head tracepoint_table[TRACEPOINT_TABLE_SIZE];
46 47
47/* 48/*
48 * Note about RCU : 49 * Note about RCU :
@@ -54,40 +55,43 @@ struct tracepoint_entry {
54 struct hlist_node hlist; 55 struct hlist_node hlist;
55 void **funcs; 56 void **funcs;
56 int refcount; /* Number of times armed. 0 if disarmed. */ 57 int refcount; /* Number of times armed. 0 if disarmed. */
57 struct rcu_head rcu;
58 void *oldptr;
59 unsigned char rcu_pending:1;
60 char name[0]; 58 char name[0];
61}; 59};
62 60
63static struct hlist_head tracepoint_table[TRACEPOINT_TABLE_SIZE]; 61struct tp_probes {
62 union {
63 struct rcu_head rcu;
64 struct list_head list;
65 } u;
66 void *probes[0];
67};
64 68
65static void free_old_closure(struct rcu_head *head) 69static inline void *allocate_probes(int count)
66{ 70{
67 struct tracepoint_entry *entry = container_of(head, 71 struct tp_probes *p = kmalloc(count * sizeof(void *)
68 struct tracepoint_entry, rcu); 72 + sizeof(struct tp_probes), GFP_KERNEL);
69 kfree(entry->oldptr); 73 return p == NULL ? NULL : p->probes;
70 /* Make sure we free the data before setting the pending flag to 0 */
71 smp_wmb();
72 entry->rcu_pending = 0;
73} 74}
74 75
75static void tracepoint_entry_free_old(struct tracepoint_entry *entry, void *old) 76static void rcu_free_old_probes(struct rcu_head *head)
76{ 77{
77 if (!old) 78 kfree(container_of(head, struct tp_probes, u.rcu));
78 return; 79}
79 entry->oldptr = old; 80
80 entry->rcu_pending = 1; 81static inline void release_probes(void *old)
81 /* write rcu_pending before calling the RCU callback */ 82{
82 smp_wmb(); 83 if (old) {
83 call_rcu_sched(&entry->rcu, free_old_closure); 84 struct tp_probes *tp_probes = container_of(old,
85 struct tp_probes, probes[0]);
86 call_rcu_sched(&tp_probes->u.rcu, rcu_free_old_probes);
87 }
84} 88}
85 89
86static void debug_print_probes(struct tracepoint_entry *entry) 90static void debug_print_probes(struct tracepoint_entry *entry)
87{ 91{
88 int i; 92 int i;
89 93
90 if (!tracepoint_debug) 94 if (!tracepoint_debug || !entry->funcs)
91 return; 95 return;
92 96
93 for (i = 0; entry->funcs[i]; i++) 97 for (i = 0; entry->funcs[i]; i++)
@@ -111,12 +115,13 @@ tracepoint_entry_add_probe(struct tracepoint_entry *entry, void *probe)
111 return ERR_PTR(-EEXIST); 115 return ERR_PTR(-EEXIST);
112 } 116 }
113 /* + 2 : one for new probe, one for NULL func */ 117 /* + 2 : one for new probe, one for NULL func */
114 new = kzalloc((nr_probes + 2) * sizeof(void *), GFP_KERNEL); 118 new = allocate_probes(nr_probes + 2);
115 if (new == NULL) 119 if (new == NULL)
116 return ERR_PTR(-ENOMEM); 120 return ERR_PTR(-ENOMEM);
117 if (old) 121 if (old)
118 memcpy(new, old, nr_probes * sizeof(void *)); 122 memcpy(new, old, nr_probes * sizeof(void *));
119 new[nr_probes] = probe; 123 new[nr_probes] = probe;
124 new[nr_probes + 1] = NULL;
120 entry->refcount = nr_probes + 1; 125 entry->refcount = nr_probes + 1;
121 entry->funcs = new; 126 entry->funcs = new;
122 debug_print_probes(entry); 127 debug_print_probes(entry);
@@ -132,7 +137,7 @@ tracepoint_entry_remove_probe(struct tracepoint_entry *entry, void *probe)
132 old = entry->funcs; 137 old = entry->funcs;
133 138
134 if (!old) 139 if (!old)
135 return NULL; 140 return ERR_PTR(-ENOENT);
136 141
137 debug_print_probes(entry); 142 debug_print_probes(entry);
138 /* (N -> M), (N > 1, M >= 0) probes */ 143 /* (N -> M), (N > 1, M >= 0) probes */
@@ -151,13 +156,13 @@ tracepoint_entry_remove_probe(struct tracepoint_entry *entry, void *probe)
151 int j = 0; 156 int j = 0;
152 /* N -> M, (N > 1, M > 0) */ 157 /* N -> M, (N > 1, M > 0) */
153 /* + 1 for NULL */ 158 /* + 1 for NULL */
154 new = kzalloc((nr_probes - nr_del + 1) 159 new = allocate_probes(nr_probes - nr_del + 1);
155 * sizeof(void *), GFP_KERNEL);
156 if (new == NULL) 160 if (new == NULL)
157 return ERR_PTR(-ENOMEM); 161 return ERR_PTR(-ENOMEM);
158 for (i = 0; old[i]; i++) 162 for (i = 0; old[i]; i++)
159 if ((probe && old[i] != probe)) 163 if ((probe && old[i] != probe))
160 new[j++] = old[i]; 164 new[j++] = old[i];
165 new[nr_probes - nr_del] = NULL;
161 entry->refcount = nr_probes - nr_del; 166 entry->refcount = nr_probes - nr_del;
162 entry->funcs = new; 167 entry->funcs = new;
163 } 168 }
@@ -215,7 +220,6 @@ static struct tracepoint_entry *add_tracepoint(const char *name)
215 memcpy(&e->name[0], name, name_len); 220 memcpy(&e->name[0], name, name_len);
216 e->funcs = NULL; 221 e->funcs = NULL;
217 e->refcount = 0; 222 e->refcount = 0;
218 e->rcu_pending = 0;
219 hlist_add_head(&e->hlist, head); 223 hlist_add_head(&e->hlist, head);
220 return e; 224 return e;
221} 225}
@@ -224,32 +228,10 @@ static struct tracepoint_entry *add_tracepoint(const char *name)
224 * Remove the tracepoint from the tracepoint hash table. Must be called with 228 * Remove the tracepoint from the tracepoint hash table. Must be called with
225 * mutex_lock held. 229 * mutex_lock held.
226 */ 230 */
227static int remove_tracepoint(const char *name) 231static inline void remove_tracepoint(struct tracepoint_entry *e)
228{ 232{
229 struct hlist_head *head;
230 struct hlist_node *node;
231 struct tracepoint_entry *e;
232 int found = 0;
233 size_t len = strlen(name) + 1;
234 u32 hash = jhash(name, len-1, 0);
235
236 head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)];
237 hlist_for_each_entry(e, node, head, hlist) {
238 if (!strcmp(name, e->name)) {
239 found = 1;
240 break;
241 }
242 }
243 if (!found)
244 return -ENOENT;
245 if (e->refcount)
246 return -EBUSY;
247 hlist_del(&e->hlist); 233 hlist_del(&e->hlist);
248 /* Make sure the call_rcu_sched has been executed */
249 if (e->rcu_pending)
250 rcu_barrier_sched();
251 kfree(e); 234 kfree(e);
252 return 0;
253} 235}
254 236
255/* 237/*
@@ -280,6 +262,7 @@ static void set_tracepoint(struct tracepoint_entry **entry,
280static void disable_tracepoint(struct tracepoint *elem) 262static void disable_tracepoint(struct tracepoint *elem)
281{ 263{
282 elem->state = 0; 264 elem->state = 0;
265 rcu_assign_pointer(elem->funcs, NULL);
283} 266}
284 267
285/** 268/**
@@ -320,6 +303,23 @@ static void tracepoint_update_probes(void)
320 module_update_tracepoints(); 303 module_update_tracepoints();
321} 304}
322 305
306static void *tracepoint_add_probe(const char *name, void *probe)
307{
308 struct tracepoint_entry *entry;
309 void *old;
310
311 entry = get_tracepoint(name);
312 if (!entry) {
313 entry = add_tracepoint(name);
314 if (IS_ERR(entry))
315 return entry;
316 }
317 old = tracepoint_entry_add_probe(entry, probe);
318 if (IS_ERR(old) && !entry->refcount)
319 remove_tracepoint(entry);
320 return old;
321}
322
323/** 323/**
324 * tracepoint_probe_register - Connect a probe to a tracepoint 324 * tracepoint_probe_register - Connect a probe to a tracepoint
325 * @name: tracepoint name 325 * @name: tracepoint name
@@ -330,44 +330,36 @@ static void tracepoint_update_probes(void)
330 */ 330 */
331int tracepoint_probe_register(const char *name, void *probe) 331int tracepoint_probe_register(const char *name, void *probe)
332{ 332{
333 struct tracepoint_entry *entry;
334 int ret = 0;
335 void *old; 333 void *old;
336 334
337 mutex_lock(&tracepoints_mutex); 335 mutex_lock(&tracepoints_mutex);
338 entry = get_tracepoint(name); 336 old = tracepoint_add_probe(name, probe);
339 if (!entry) {
340 entry = add_tracepoint(name);
341 if (IS_ERR(entry)) {
342 ret = PTR_ERR(entry);
343 goto end;
344 }
345 }
346 /*
347 * If we detect that a call_rcu_sched is pending for this tracepoint,
348 * make sure it's executed now.
349 */
350 if (entry->rcu_pending)
351 rcu_barrier_sched();
352 old = tracepoint_entry_add_probe(entry, probe);
353 if (IS_ERR(old)) {
354 ret = PTR_ERR(old);
355 goto end;
356 }
357 mutex_unlock(&tracepoints_mutex); 337 mutex_unlock(&tracepoints_mutex);
338 if (IS_ERR(old))
339 return PTR_ERR(old);
340
358 tracepoint_update_probes(); /* may update entry */ 341 tracepoint_update_probes(); /* may update entry */
359 mutex_lock(&tracepoints_mutex); 342 release_probes(old);
360 entry = get_tracepoint(name); 343 return 0;
361 WARN_ON(!entry);
362 if (entry->rcu_pending)
363 rcu_barrier_sched();
364 tracepoint_entry_free_old(entry, old);
365end:
366 mutex_unlock(&tracepoints_mutex);
367 return ret;
368} 344}
369EXPORT_SYMBOL_GPL(tracepoint_probe_register); 345EXPORT_SYMBOL_GPL(tracepoint_probe_register);
370 346
347static void *tracepoint_remove_probe(const char *name, void *probe)
348{
349 struct tracepoint_entry *entry;
350 void *old;
351
352 entry = get_tracepoint(name);
353 if (!entry)
354 return ERR_PTR(-ENOENT);
355 old = tracepoint_entry_remove_probe(entry, probe);
356 if (IS_ERR(old))
357 return old;
358 if (!entry->refcount)
359 remove_tracepoint(entry);
360 return old;
361}
362
371/** 363/**
372 * tracepoint_probe_unregister - Disconnect a probe from a tracepoint 364 * tracepoint_probe_unregister - Disconnect a probe from a tracepoint
373 * @name: tracepoint name 365 * @name: tracepoint name
@@ -380,38 +372,104 @@ EXPORT_SYMBOL_GPL(tracepoint_probe_register);
380 */ 372 */
381int tracepoint_probe_unregister(const char *name, void *probe) 373int tracepoint_probe_unregister(const char *name, void *probe)
382{ 374{
383 struct tracepoint_entry *entry;
384 void *old; 375 void *old;
385 int ret = -ENOENT;
386 376
387 mutex_lock(&tracepoints_mutex); 377 mutex_lock(&tracepoints_mutex);
388 entry = get_tracepoint(name); 378 old = tracepoint_remove_probe(name, probe);
389 if (!entry)
390 goto end;
391 if (entry->rcu_pending)
392 rcu_barrier_sched();
393 old = tracepoint_entry_remove_probe(entry, probe);
394 if (!old) {
395 printk(KERN_WARNING "Warning: Trying to unregister a probe"
396 "that doesn't exist\n");
397 goto end;
398 }
399 mutex_unlock(&tracepoints_mutex); 379 mutex_unlock(&tracepoints_mutex);
380 if (IS_ERR(old))
381 return PTR_ERR(old);
382
400 tracepoint_update_probes(); /* may update entry */ 383 tracepoint_update_probes(); /* may update entry */
384 release_probes(old);
385 return 0;
386}
387EXPORT_SYMBOL_GPL(tracepoint_probe_unregister);
388
389static LIST_HEAD(old_probes);
390static int need_update;
391
392static void tracepoint_add_old_probes(void *old)
393{
394 need_update = 1;
395 if (old) {
396 struct tp_probes *tp_probes = container_of(old,
397 struct tp_probes, probes[0]);
398 list_add(&tp_probes->u.list, &old_probes);
399 }
400}
401
402/**
403 * tracepoint_probe_register_noupdate - register a probe but not connect
404 * @name: tracepoint name
405 * @probe: probe handler
406 *
407 * caller must call tracepoint_probe_update_all()
408 */
409int tracepoint_probe_register_noupdate(const char *name, void *probe)
410{
411 void *old;
412
401 mutex_lock(&tracepoints_mutex); 413 mutex_lock(&tracepoints_mutex);
402 entry = get_tracepoint(name); 414 old = tracepoint_add_probe(name, probe);
403 if (!entry) 415 if (IS_ERR(old)) {
404 goto end; 416 mutex_unlock(&tracepoints_mutex);
405 if (entry->rcu_pending) 417 return PTR_ERR(old);
406 rcu_barrier_sched(); 418 }
407 tracepoint_entry_free_old(entry, old); 419 tracepoint_add_old_probes(old);
408 remove_tracepoint(name); /* Ignore busy error message */
409 ret = 0;
410end:
411 mutex_unlock(&tracepoints_mutex); 420 mutex_unlock(&tracepoints_mutex);
412 return ret; 421 return 0;
413} 422}
414EXPORT_SYMBOL_GPL(tracepoint_probe_unregister); 423EXPORT_SYMBOL_GPL(tracepoint_probe_register_noupdate);
424
425/**
426 * tracepoint_probe_unregister_noupdate - remove a probe but not disconnect
427 * @name: tracepoint name
428 * @probe: probe function pointer
429 *
430 * caller must call tracepoint_probe_update_all()
431 */
432int tracepoint_probe_unregister_noupdate(const char *name, void *probe)
433{
434 void *old;
435
436 mutex_lock(&tracepoints_mutex);
437 old = tracepoint_remove_probe(name, probe);
438 if (IS_ERR(old)) {
439 mutex_unlock(&tracepoints_mutex);
440 return PTR_ERR(old);
441 }
442 tracepoint_add_old_probes(old);
443 mutex_unlock(&tracepoints_mutex);
444 return 0;
445}
446EXPORT_SYMBOL_GPL(tracepoint_probe_unregister_noupdate);
447
448/**
449 * tracepoint_probe_update_all - update tracepoints
450 */
451void tracepoint_probe_update_all(void)
452{
453 LIST_HEAD(release_probes);
454 struct tp_probes *pos, *next;
455
456 mutex_lock(&tracepoints_mutex);
457 if (!need_update) {
458 mutex_unlock(&tracepoints_mutex);
459 return;
460 }
461 if (!list_empty(&old_probes))
462 list_replace_init(&old_probes, &release_probes);
463 need_update = 0;
464 mutex_unlock(&tracepoints_mutex);
465
466 tracepoint_update_probes();
467 list_for_each_entry_safe(pos, next, &release_probes, u.list) {
468 list_del(&pos->u.list);
469 call_rcu_sched(&pos->u.rcu, rcu_free_old_probes);
470 }
471}
472EXPORT_SYMBOL_GPL(tracepoint_probe_update_all);
415 473
416/** 474/**
417 * tracepoint_get_iter_range - Get a next tracepoint iterator given a range. 475 * tracepoint_get_iter_range - Get a next tracepoint iterator given a range.
@@ -483,3 +541,36 @@ void tracepoint_iter_reset(struct tracepoint_iter *iter)
483 iter->tracepoint = NULL; 541 iter->tracepoint = NULL;
484} 542}
485EXPORT_SYMBOL_GPL(tracepoint_iter_reset); 543EXPORT_SYMBOL_GPL(tracepoint_iter_reset);
544
545#ifdef CONFIG_MODULES
546
547int tracepoint_module_notify(struct notifier_block *self,
548 unsigned long val, void *data)
549{
550 struct module *mod = data;
551
552 switch (val) {
553 case MODULE_STATE_COMING:
554 tracepoint_update_probe_range(mod->tracepoints,
555 mod->tracepoints + mod->num_tracepoints);
556 break;
557 case MODULE_STATE_GOING:
558 tracepoint_update_probe_range(mod->tracepoints,
559 mod->tracepoints + mod->num_tracepoints);
560 break;
561 }
562 return 0;
563}
564
565struct notifier_block tracepoint_module_nb = {
566 .notifier_call = tracepoint_module_notify,
567 .priority = 0,
568};
569
570static int init_tracepoints(void)
571{
572 return register_module_notifier(&tracepoint_module_nb);
573}
574__initcall(init_tracepoints);
575
576#endif /* CONFIG_MODULES */
diff --git a/mm/bounce.c b/mm/bounce.c
index 06722c403058..bf0cf7c8387b 100644
--- a/mm/bounce.c
+++ b/mm/bounce.c
@@ -14,6 +14,7 @@
14#include <linux/hash.h> 14#include <linux/hash.h>
15#include <linux/highmem.h> 15#include <linux/highmem.h>
16#include <linux/blktrace_api.h> 16#include <linux/blktrace_api.h>
17#include <trace/block.h>
17#include <asm/tlbflush.h> 18#include <asm/tlbflush.h>
18 19
19#define POOL_SIZE 64 20#define POOL_SIZE 64
@@ -21,6 +22,8 @@
21 22
22static mempool_t *page_pool, *isa_page_pool; 23static mempool_t *page_pool, *isa_page_pool;
23 24
25DEFINE_TRACE(block_bio_bounce);
26
24#ifdef CONFIG_HIGHMEM 27#ifdef CONFIG_HIGHMEM
25static __init int init_emergency_pool(void) 28static __init int init_emergency_pool(void)
26{ 29{
@@ -222,7 +225,7 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
222 if (!bio) 225 if (!bio)
223 return; 226 return;
224 227
225 blk_add_trace_bio(q, *bio_orig, BLK_TA_BOUNCE); 228 trace_block_bio_bounce(q, *bio_orig);
226 229
227 /* 230 /*
228 * at least one page was bounced, fill in possible non-highmem 231 * at least one page was bounced, fill in possible non-highmem
diff --git a/mm/mlock.c b/mm/mlock.c
index 1ada366570cb..3035a56e7616 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -667,3 +667,48 @@ void user_shm_unlock(size_t size, struct user_struct *user)
667 spin_unlock(&shmlock_user_lock); 667 spin_unlock(&shmlock_user_lock);
668 free_uid(user); 668 free_uid(user);
669} 669}
670
671void *alloc_locked_buffer(size_t size)
672{
673 unsigned long rlim, vm, pgsz;
674 void *buffer = NULL;
675
676 pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT;
677
678 down_write(&current->mm->mmap_sem);
679
680 rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
681 vm = current->mm->total_vm + pgsz;
682 if (rlim < vm)
683 goto out;
684
685 rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
686 vm = current->mm->locked_vm + pgsz;
687 if (rlim < vm)
688 goto out;
689
690 buffer = kzalloc(size, GFP_KERNEL);
691 if (!buffer)
692 goto out;
693
694 current->mm->total_vm += pgsz;
695 current->mm->locked_vm += pgsz;
696
697 out:
698 up_write(&current->mm->mmap_sem);
699 return buffer;
700}
701
702void free_locked_buffer(void *buffer, size_t size)
703{
704 unsigned long pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT;
705
706 down_write(&current->mm->mmap_sem);
707
708 current->mm->total_vm -= pgsz;
709 current->mm->locked_vm -= pgsz;
710
711 up_write(&current->mm->mmap_sem);
712
713 kfree(buffer);
714}
diff --git a/samples/tracepoints/tp-samples-trace.h b/samples/tracepoints/tp-samples-trace.h
index 0216b55bd640..01724e04c556 100644
--- a/samples/tracepoints/tp-samples-trace.h
+++ b/samples/tracepoints/tp-samples-trace.h
@@ -4,10 +4,10 @@
4#include <linux/proc_fs.h> /* for struct inode and struct file */ 4#include <linux/proc_fs.h> /* for struct inode and struct file */
5#include <linux/tracepoint.h> 5#include <linux/tracepoint.h>
6 6
7DEFINE_TRACE(subsys_event, 7DECLARE_TRACE(subsys_event,
8 TPPROTO(struct inode *inode, struct file *file), 8 TPPROTO(struct inode *inode, struct file *file),
9 TPARGS(inode, file)); 9 TPARGS(inode, file));
10DEFINE_TRACE(subsys_eventb, 10DECLARE_TRACE(subsys_eventb,
11 TPPROTO(void), 11 TPPROTO(void),
12 TPARGS()); 12 TPARGS());
13#endif 13#endif
diff --git a/samples/tracepoints/tracepoint-probe-sample.c b/samples/tracepoints/tracepoint-probe-sample.c
index 55abfdda4bd4..e3a964889dc7 100644
--- a/samples/tracepoints/tracepoint-probe-sample.c
+++ b/samples/tracepoints/tracepoint-probe-sample.c
@@ -46,6 +46,7 @@ void __exit tp_sample_trace_exit(void)
46{ 46{
47 unregister_trace_subsys_eventb(probe_subsys_eventb); 47 unregister_trace_subsys_eventb(probe_subsys_eventb);
48 unregister_trace_subsys_event(probe_subsys_event); 48 unregister_trace_subsys_event(probe_subsys_event);
49 tracepoint_synchronize_unregister();
49} 50}
50 51
51module_exit(tp_sample_trace_exit); 52module_exit(tp_sample_trace_exit);
diff --git a/samples/tracepoints/tracepoint-probe-sample2.c b/samples/tracepoints/tracepoint-probe-sample2.c
index 5e9fcf4afffe..685a5acb4562 100644
--- a/samples/tracepoints/tracepoint-probe-sample2.c
+++ b/samples/tracepoints/tracepoint-probe-sample2.c
@@ -33,6 +33,7 @@ module_init(tp_sample_trace_init);
33void __exit tp_sample_trace_exit(void) 33void __exit tp_sample_trace_exit(void)
34{ 34{
35 unregister_trace_subsys_event(probe_subsys_event); 35 unregister_trace_subsys_event(probe_subsys_event);
36 tracepoint_synchronize_unregister();
36} 37}
37 38
38module_exit(tp_sample_trace_exit); 39module_exit(tp_sample_trace_exit);
diff --git a/samples/tracepoints/tracepoint-sample.c b/samples/tracepoints/tracepoint-sample.c
index 4ae4b7fcc043..00d169792a3e 100644
--- a/samples/tracepoints/tracepoint-sample.c
+++ b/samples/tracepoints/tracepoint-sample.c
@@ -13,6 +13,9 @@
13#include <linux/proc_fs.h> 13#include <linux/proc_fs.h>
14#include "tp-samples-trace.h" 14#include "tp-samples-trace.h"
15 15
16DEFINE_TRACE(subsys_event);
17DEFINE_TRACE(subsys_eventb);
18
16struct proc_dir_entry *pentry_example; 19struct proc_dir_entry *pentry_example;
17 20
18static int my_open(struct inode *inode, struct file *file) 21static int my_open(struct inode *inode, struct file *file)
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index 468fbc9016c7..7a176773af85 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -198,16 +198,10 @@ cmd_modversions = \
198 fi; 198 fi;
199endif 199endif
200 200
201ifdef CONFIG_64BIT
202arch_bits = 64
203else
204arch_bits = 32
205endif
206
207ifdef CONFIG_FTRACE_MCOUNT_RECORD 201ifdef CONFIG_FTRACE_MCOUNT_RECORD
208cmd_record_mcount = perl $(srctree)/scripts/recordmcount.pl \ 202cmd_record_mcount = perl $(srctree)/scripts/recordmcount.pl "$(ARCH)" \
209 "$(ARCH)" "$(arch_bits)" "$(OBJDUMP)" "$(OBJCOPY)" "$(CC)" "$(LD)" \ 203 "$(if $(CONFIG_64BIT),64,32)" \
210 "$(NM)" "$(RM)" "$(MV)" "$(@)"; 204 "$(OBJDUMP)" "$(OBJCOPY)" "$(CC)" "$(LD)" "$(NM)" "$(RM)" "$(MV)" "$(@)";
211endif 205endif
212 206
213define rule_cc_o_c 207define rule_cc_o_c
diff --git a/scripts/bootgraph.pl b/scripts/bootgraph.pl
index d2c61efc216f..f0af9aa9b243 100644
--- a/scripts/bootgraph.pl
+++ b/scripts/bootgraph.pl
@@ -78,11 +78,13 @@ while (<>) {
78} 78}
79 79
80if ($count == 0) { 80if ($count == 0) {
81 print "No data found in the dmesg. Make sure that 'printk.time=1' and\n"; 81 print STDERR <<END;
82 print "'initcall_debug' are passed on the kernel command line.\n\n"; 82No data found in the dmesg. Make sure that 'printk.time=1' and
83 print "Usage: \n"; 83'initcall_debug' are passed on the kernel command line.
84 print " dmesg | perl scripts/bootgraph.pl > output.svg\n\n"; 84Usage:
85 exit; 85 dmesg | perl scripts/bootgraph.pl > output.svg
86END
87 exit 1;
86} 88}
87 89
88print "<?xml version=\"1.0\" standalone=\"no\"?> \n"; 90print "<?xml version=\"1.0\" standalone=\"no\"?> \n";
@@ -109,8 +111,8 @@ my $stylecounter = 0;
109my %rows; 111my %rows;
110my $rowscount = 1; 112my $rowscount = 1;
111my @initcalls = sort { $start{$a} <=> $start{$b} } keys(%start); 113my @initcalls = sort { $start{$a} <=> $start{$b} } keys(%start);
112my $key; 114
113foreach $key (@initcalls) { 115foreach my $key (@initcalls) {
114 my $duration = $end{$key} - $start{$key}; 116 my $duration = $end{$key} - $start{$key};
115 117
116 if ($duration >= $threshold) { 118 if ($duration >= $threshold) {
diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
index c67cec8e90f4..fe831412bea9 100755
--- a/scripts/recordmcount.pl
+++ b/scripts/recordmcount.pl
@@ -112,6 +112,9 @@ my ($arch, $bits, $objdump, $objcopy, $cc,
112# Acceptable sections to record. 112# Acceptable sections to record.
113my %text_sections = ( 113my %text_sections = (
114 ".text" => 1, 114 ".text" => 1,
115 ".sched.text" => 1,
116 ".spinlock.text" => 1,
117 ".irqentry.text" => 1,
115); 118);
116 119
117$objdump = "objdump" if ((length $objdump) == 0); 120$objdump = "objdump" if ((length $objdump) == 0);
@@ -130,10 +133,13 @@ my %weak; # List of weak functions
130my %convert; # List of local functions used that needs conversion 133my %convert; # List of local functions used that needs conversion
131 134
132my $type; 135my $type;
136my $nm_regex; # Find the local functions (return function)
133my $section_regex; # Find the start of a section 137my $section_regex; # Find the start of a section
134my $function_regex; # Find the name of a function 138my $function_regex; # Find the name of a function
135 # (return offset and func name) 139 # (return offset and func name)
136my $mcount_regex; # Find the call site to mcount (return offset) 140my $mcount_regex; # Find the call site to mcount (return offset)
141my $alignment; # The .align value to use for $mcount_section
142my $section_type; # Section header plus possible alignment command
137 143
138if ($arch eq "x86") { 144if ($arch eq "x86") {
139 if ($bits == 64) { 145 if ($bits == 64) {
@@ -143,11 +149,21 @@ if ($arch eq "x86") {
143 } 149 }
144} 150}
145 151
152#
153# We base the defaults off of i386, the other archs may
154# feel free to change them in the below if statements.
155#
156$nm_regex = "^[0-9a-fA-F]+\\s+t\\s+(\\S+)";
157$section_regex = "Disassembly of section\\s+(\\S+):";
158$function_regex = "^([0-9a-fA-F]+)\\s+<(.*?)>:";
159$mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\smcount\$";
160$section_type = '@progbits';
161$type = ".long";
162
146if ($arch eq "x86_64") { 163if ($arch eq "x86_64") {
147 $section_regex = "Disassembly of section\\s+(\\S+):";
148 $function_regex = "^([0-9a-fA-F]+)\\s+<(.*?)>:";
149 $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\smcount([+-]0x[0-9a-zA-Z]+)?\$"; 164 $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\smcount([+-]0x[0-9a-zA-Z]+)?\$";
150 $type = ".quad"; 165 $type = ".quad";
166 $alignment = 8;
151 167
152 # force flags for this arch 168 # force flags for this arch
153 $ld .= " -m elf_x86_64"; 169 $ld .= " -m elf_x86_64";
@@ -156,10 +172,7 @@ if ($arch eq "x86_64") {
156 $cc .= " -m64"; 172 $cc .= " -m64";
157 173
158} elsif ($arch eq "i386") { 174} elsif ($arch eq "i386") {
159 $section_regex = "Disassembly of section\\s+(\\S+):"; 175 $alignment = 4;
160 $function_regex = "^([0-9a-fA-F]+)\\s+<(.*?)>:";
161 $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\smcount\$";
162 $type = ".long";
163 176
164 # force flags for this arch 177 # force flags for this arch
165 $ld .= " -m elf_i386"; 178 $ld .= " -m elf_i386";
@@ -168,16 +181,26 @@ if ($arch eq "x86_64") {
168 $cc .= " -m32"; 181 $cc .= " -m32";
169 182
170} elsif ($arch eq "sh") { 183} elsif ($arch eq "sh") {
171 $section_regex = "Disassembly of section\\s+(\\S+):"; 184 $alignment = 2;
172 $function_regex = "^([0-9a-fA-F]+)\\s+<(.*?)>:";
173 $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\smcount\$";
174 $type = ".long";
175 185
176 # force flags for this arch 186 # force flags for this arch
177 $ld .= " -m shlelf_linux"; 187 $ld .= " -m shlelf_linux";
178 $objcopy .= " -O elf32-sh-linux"; 188 $objcopy .= " -O elf32-sh-linux";
179 $cc .= " -m32"; 189 $cc .= " -m32";
180 190
191} elsif ($arch eq "powerpc") {
192 $nm_regex = "^[0-9a-fA-F]+\\s+t\\s+(\\.?\\S+)";
193 $function_regex = "^([0-9a-fA-F]+)\\s+<(\\.?.*?)>:";
194 $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\s\\.?_mcount\$";
195
196 if ($bits == 64) {
197 $type = ".quad";
198 }
199
200} elsif ($arch eq "arm") {
201 $alignment = 2;
202 $section_type = '%progbits';
203
181} else { 204} else {
182 die "Arch $arch is not supported with CONFIG_FTRACE_MCOUNT_RECORD"; 205 die "Arch $arch is not supported with CONFIG_FTRACE_MCOUNT_RECORD";
183} 206}
@@ -247,7 +270,7 @@ if (!$found_version) {
247# 270#
248open (IN, "$nm $inputfile|") || die "error running $nm"; 271open (IN, "$nm $inputfile|") || die "error running $nm";
249while (<IN>) { 272while (<IN>) {
250 if (/^[0-9a-fA-F]+\s+t\s+(\S+)/) { 273 if (/$nm_regex/) {
251 $locals{$1} = 1; 274 $locals{$1} = 1;
252 } elsif (/^[0-9a-fA-F]+\s+([wW])\s+(\S+)/) { 275 } elsif (/^[0-9a-fA-F]+\s+([wW])\s+(\S+)/) {
253 $weak{$2} = $1; 276 $weak{$2} = $1;
@@ -298,7 +321,8 @@ sub update_funcs
298 if (!$opened) { 321 if (!$opened) {
299 open(FILE, ">$mcount_s") || die "can't create $mcount_s\n"; 322 open(FILE, ">$mcount_s") || die "can't create $mcount_s\n";
300 $opened = 1; 323 $opened = 1;
301 print FILE "\t.section $mcount_section,\"a\",\@progbits\n"; 324 print FILE "\t.section $mcount_section,\"a\",$section_type\n";
325 print FILE "\t.align $alignment\n" if (defined($alignment));
302 } 326 }
303 printf FILE "\t%s %s + %d\n", $type, $ref_func, $offsets[$i] - $offset; 327 printf FILE "\t%s %s + %d\n", $type, $ref_func, $offsets[$i] - $offset;
304 } 328 }
diff --git a/scripts/trace/power.pl b/scripts/trace/power.pl
new file mode 100644
index 000000000000..4f729b3501e0
--- /dev/null
+++ b/scripts/trace/power.pl
@@ -0,0 +1,108 @@
1#!/usr/bin/perl
2
3# Copyright 2008, Intel Corporation
4#
5# This file is part of the Linux kernel
6#
7# This program file is free software; you can redistribute it and/or modify it
8# under the terms of the GNU General Public License as published by the
9# Free Software Foundation; version 2 of the License.
10#
11# This program is distributed in the hope that it will be useful, but WITHOUT
12# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14# for more details.
15#
16# You should have received a copy of the GNU General Public License
17# along with this program in a file named COPYING; if not, write to the
18# Free Software Foundation, Inc.,
19# 51 Franklin Street, Fifth Floor,
20# Boston, MA 02110-1301 USA
21#
22# Authors:
23# Arjan van de Ven <arjan@linux.intel.com>
24
25
26#
27# This script turns a cstate ftrace output into a SVG graphic that shows
28# historic C-state information
29#
30#
31# cat /sys/kernel/debug/tracing/trace | perl power.pl > out.svg
32#
33
34my @styles;
35my $base = 0;
36
37my @pstate_last;
38my @pstate_level;
39
40$styles[0] = "fill:rgb(0,0,255);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
41$styles[1] = "fill:rgb(0,255,0);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
42$styles[2] = "fill:rgb(255,0,20);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
43$styles[3] = "fill:rgb(255,255,20);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
44$styles[4] = "fill:rgb(255,0,255);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
45$styles[5] = "fill:rgb(0,255,255);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
46$styles[6] = "fill:rgb(0,128,255);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
47$styles[7] = "fill:rgb(0,255,128);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
48$styles[8] = "fill:rgb(0,25,20);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
49
50
51print "<?xml version=\"1.0\" standalone=\"no\"?> \n";
52print "<svg width=\"10000\" height=\"100%\" version=\"1.1\" xmlns=\"http://www.w3.org/2000/svg\">\n";
53
54my $scale = 30000.0;
55while (<>) {
56 my $line = $_;
57 if ($line =~ /([0-9\.]+)\] CSTATE: Going to C([0-9]) on cpu ([0-9]+) for ([0-9\.]+)/) {
58 if ($base == 0) {
59 $base = $1;
60 }
61 my $time = $1 - $base;
62 $time = $time * $scale;
63 my $C = $2;
64 my $cpu = $3;
65 my $y = 400 * $cpu;
66 my $duration = $4 * $scale;
67 my $msec = int($4 * 100000)/100.0;
68 my $height = $C * 20;
69 $style = $styles[$C];
70
71 $y = $y + 140 - $height;
72
73 $x2 = $time + 4;
74 $y2 = $y + 4;
75
76
77 print "<rect x=\"$time\" width=\"$duration\" y=\"$y\" height=\"$height\" style=\"$style\"/>\n";
78 print "<text transform=\"translate($x2,$y2) rotate(90)\">C$C $msec</text>\n";
79 }
80 if ($line =~ /([0-9\.]+)\] PSTATE: Going to P([0-9]) on cpu ([0-9]+)/) {
81 my $time = $1 - $base;
82 my $state = $2;
83 my $cpu = $3;
84
85 if (defined($pstate_last[$cpu])) {
86 my $from = $pstate_last[$cpu];
87 my $oldstate = $pstate_state[$cpu];
88 my $duration = ($time-$from) * $scale;
89
90 $from = $from * $scale;
91 my $to = $from + $duration;
92 my $height = 140 - ($oldstate * (140/8));
93
94 my $y = 400 * $cpu + 200 + $height;
95 my $y2 = $y+4;
96 my $style = $styles[8];
97
98 print "<rect x=\"$from\" y=\"$y\" width=\"$duration\" height=\"5\" style=\"$style\"/>\n";
99 print "<text transform=\"translate($from,$y2)\">P$oldstate (cpu $cpu)</text>\n";
100 };
101
102 $pstate_last[$cpu] = $time;
103 $pstate_state[$cpu] = $state;
104 }
105}
106
107
108print "</svg>\n";
diff --git a/scripts/tracing/draw_functrace.py b/scripts/tracing/draw_functrace.py
new file mode 100644
index 000000000000..902f9a992620
--- /dev/null
+++ b/scripts/tracing/draw_functrace.py
@@ -0,0 +1,130 @@
1#!/usr/bin/python
2
3"""
4Copyright 2008 (c) Frederic Weisbecker <fweisbec@gmail.com>
5Licensed under the terms of the GNU GPL License version 2
6
7This script parses a trace provided by the function tracer in
8kernel/trace/trace_functions.c
9The resulted trace is processed into a tree to produce a more human
10view of the call stack by drawing textual but hierarchical tree of
11calls. Only the functions's names and the the call time are provided.
12
13Usage:
14 Be sure that you have CONFIG_FUNCTION_TRACER
15 # mkdir /debugfs
16 # mount -t debug debug /debug
17 # echo function > /debug/tracing/current_tracer
18 $ cat /debug/tracing/trace_pipe > ~/raw_trace_func
19 Wait some times but not too much, the script is a bit slow.
20 Break the pipe (Ctrl + Z)
21 $ scripts/draw_functrace.py < raw_trace_func > draw_functrace
22 Then you have your drawn trace in draw_functrace
23"""
24
25
26import sys, re
27
28class CallTree:
29 """ This class provides a tree representation of the functions
30 call stack. If a function has no parent in the kernel (interrupt,
31 syscall, kernel thread...) then it is attached to a virtual parent
32 called ROOT.
33 """
34 ROOT = None
35
36 def __init__(self, func, time = None, parent = None):
37 self._func = func
38 self._time = time
39 if parent is None:
40 self._parent = CallTree.ROOT
41 else:
42 self._parent = parent
43 self._children = []
44
45 def calls(self, func, calltime):
46 """ If a function calls another one, call this method to insert it
47 into the tree at the appropriate place.
48 @return: A reference to the newly created child node.
49 """
50 child = CallTree(func, calltime, self)
51 self._children.append(child)
52 return child
53
54 def getParent(self, func):
55 """ Retrieve the last parent of the current node that
56 has the name given by func. If this function is not
57 on a parent, then create it as new child of root
58 @return: A reference to the parent.
59 """
60 tree = self
61 while tree != CallTree.ROOT and tree._func != func:
62 tree = tree._parent
63 if tree == CallTree.ROOT:
64 child = CallTree.ROOT.calls(func, None)
65 return child
66 return tree
67
68 def __repr__(self):
69 return self.__toString("", True)
70
71 def __toString(self, branch, lastChild):
72 if self._time is not None:
73 s = "%s----%s (%s)\n" % (branch, self._func, self._time)
74 else:
75 s = "%s----%s\n" % (branch, self._func)
76
77 i = 0
78 if lastChild:
79 branch = branch[:-1] + " "
80 while i < len(self._children):
81 if i != len(self._children) - 1:
82 s += "%s" % self._children[i].__toString(branch +\
83 " |", False)
84 else:
85 s += "%s" % self._children[i].__toString(branch +\
86 " |", True)
87 i += 1
88 return s
89
90class BrokenLineException(Exception):
91 """If the last line is not complete because of the pipe breakage,
92 we want to stop the processing and ignore this line.
93 """
94 pass
95
96class CommentLineException(Exception):
97 """ If the line is a comment (as in the beginning of the trace file),
98 just ignore it.
99 """
100 pass
101
102
103def parseLine(line):
104 line = line.strip()
105 if line.startswith("#"):
106 raise CommentLineException
107 m = re.match("[^]]+?\\] +([0-9.]+): (\\w+) <-(\\w+)", line)
108 if m is None:
109 raise BrokenLineException
110 return (m.group(1), m.group(2), m.group(3))
111
112
113def main():
114 CallTree.ROOT = CallTree("Root (Nowhere)", None, None)
115 tree = CallTree.ROOT
116
117 for line in sys.stdin:
118 try:
119 calltime, callee, caller = parseLine(line)
120 except BrokenLineException:
121 break
122 except CommentLineException:
123 continue
124 tree = tree.getParent(caller)
125 tree = tree.calls(callee, calltime)
126
127 print CallTree.ROOT
128
129if __name__ == "__main__":
130 main()