aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/testing/debugfs-kmemtrace71
-rw-r--r--Documentation/kernel-parameters.txt2
-rw-r--r--Documentation/trace/ftrace-design.txt153
-rw-r--r--Documentation/trace/kmemtrace.txt126
-rw-r--r--Documentation/trace/kprobetrace.txt2
-rw-r--r--MAINTAINERS9
-rw-r--r--Makefile4
-rw-r--r--arch/Kconfig7
-rw-r--r--arch/alpha/include/asm/local64.h1
-rw-r--r--arch/arm/include/asm/local64.h1
-rw-r--r--arch/arm/kernel/perf_event.c18
-rw-r--r--arch/avr32/include/asm/local64.h1
-rw-r--r--arch/blackfin/include/asm/local64.h1
-rw-r--r--arch/cris/include/asm/local64.h1
-rw-r--r--arch/frv/include/asm/local64.h1
-rw-r--r--arch/frv/kernel/local64.h1
-rw-r--r--arch/h8300/include/asm/local64.h1
-rw-r--r--arch/ia64/include/asm/local64.h1
-rw-r--r--arch/m32r/include/asm/local64.h1
-rw-r--r--arch/m68k/include/asm/local64.h1
-rw-r--r--arch/microblaze/include/asm/local64.h1
-rw-r--r--arch/mips/include/asm/local64.h1
-rw-r--r--arch/mn10300/include/asm/local64.h1
-rw-r--r--arch/parisc/include/asm/local64.h1
-rw-r--r--arch/powerpc/include/asm/local64.h1
-rw-r--r--arch/powerpc/include/asm/perf_event.h12
-rw-r--r--arch/powerpc/kernel/misc.S26
-rw-r--r--arch/powerpc/kernel/perf_event.c41
-rw-r--r--arch/powerpc/kernel/perf_event_fsl_emb.c29
-rw-r--r--arch/s390/include/asm/local64.h1
-rw-r--r--arch/score/include/asm/local64.h1
-rw-r--r--arch/sh/include/asm/local64.h1
-rw-r--r--arch/sh/kernel/perf_event.c6
-rw-r--r--arch/sparc/include/asm/local64.h1
-rw-r--r--arch/sparc/include/asm/perf_event.h8
-rw-r--r--arch/sparc/kernel/helpers.S6
-rw-r--r--arch/sparc/kernel/perf_event.c25
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/include/asm/hw_breakpoint.h2
-rw-r--r--arch/x86/include/asm/local64.h1
-rw-r--r--arch/x86/include/asm/nmi.h2
-rw-r--r--arch/x86/include/asm/perf_event.h18
-rw-r--r--arch/x86/include/asm/perf_event_p4.h99
-rw-r--r--arch/x86/include/asm/stacktrace.h49
-rw-r--r--arch/x86/kernel/apic/Makefile7
-rw-r--r--arch/x86/kernel/apic/hw_nmi.c107
-rw-r--r--arch/x86/kernel/apic/nmi.c7
-rw-r--r--arch/x86/kernel/cpu/perf_event.c62
-rw-r--r--arch/x86/kernel/cpu/perf_event_p4.c156
-rw-r--r--arch/x86/kernel/dumpstack.c1
-rw-r--r--arch/x86/kernel/dumpstack.h56
-rw-r--r--arch/x86/kernel/dumpstack_32.c2
-rw-r--r--arch/x86/kernel/dumpstack_64.c1
-rw-r--r--arch/x86/kernel/hw_breakpoint.c51
-rw-r--r--arch/x86/kernel/kprobes.c33
-rw-r--r--arch/x86/kernel/process_32.c4
-rw-r--r--arch/x86/kernel/process_64.c5
-rw-r--r--arch/x86/kernel/stacktrace.c31
-rw-r--r--arch/x86/kernel/traps.c7
-rw-r--r--arch/x86/mm/pf_in.c30
-rw-r--r--arch/x86/oprofile/nmi_int.c16
-rw-r--r--arch/xtensa/include/asm/local64.h1
-rw-r--r--drivers/oprofile/event_buffer.c3
-rw-r--r--fs/exec.c1
-rw-r--r--include/asm-generic/local64.h96
-rw-r--r--include/asm-generic/vmlinux.lds.h4
-rw-r--r--include/linux/ftrace.h5
-rw-r--r--include/linux/ftrace_event.h18
-rw-r--r--include/linux/kernel.h5
-rw-r--r--include/linux/kmemtrace.h25
-rw-r--r--include/linux/nmi.h13
-rw-r--r--include/linux/perf_event.h95
-rw-r--r--include/linux/sched.h24
-rw-r--r--include/linux/slab_def.h3
-rw-r--r--include/linux/slub_def.h3
-rw-r--r--include/linux/syscalls.h2
-rw-r--r--include/trace/boot.h60
-rw-r--r--include/trace/events/sched.h32
-rw-r--r--include/trace/events/timer.h80
-rw-r--r--include/trace/ftrace.h23
-rw-r--r--include/trace/syscall.h1
-rw-r--r--init/main.c29
-rw-r--r--kernel/Makefile2
-rw-r--r--kernel/hw_breakpoint.c78
-rw-r--r--kernel/perf_event.c458
-rw-r--r--kernel/sched.c6
-rw-r--r--kernel/softlockup.c293
-rw-r--r--kernel/sysctl.c55
-rw-r--r--kernel/timer.c1
-rw-r--r--kernel/trace/Kconfig68
-rw-r--r--kernel/trace/Makefile4
-rw-r--r--kernel/trace/ftrace.c5
-rw-r--r--kernel/trace/kmemtrace.c529
-rw-r--r--kernel/trace/ring_buffer.c40
-rw-r--r--kernel/trace/trace.c127
-rw-r--r--kernel/trace/trace.h90
-rw-r--r--kernel/trace/trace_boot.c185
-rw-r--r--kernel/trace/trace_clock.c5
-rw-r--r--kernel/trace/trace_entries.h94
-rw-r--r--kernel/trace/trace_event_perf.c27
-rw-r--r--kernel/trace/trace_events.c299
-rw-r--r--kernel/trace/trace_events_filter.c27
-rw-r--r--kernel/trace/trace_export.c8
-rw-r--r--kernel/trace/trace_functions.c6
-rw-r--r--kernel/trace/trace_functions_graph.c3
-rw-r--r--kernel/trace/trace_irqsoff.c3
-rw-r--r--kernel/trace/trace_kprobe.c383
-rw-r--r--kernel/trace/trace_ksym.c508
-rw-r--r--kernel/trace/trace_output.c69
-rw-r--r--kernel/trace/trace_sched_wakeup.c7
-rw-r--r--kernel/trace/trace_selftest.c87
-rw-r--r--kernel/trace/trace_stack.c6
-rw-r--r--kernel/trace/trace_syscalls.c7
-rw-r--r--kernel/trace/trace_sysprof.c329
-rw-r--r--kernel/watchdog.c567
-rw-r--r--lib/Kconfig.debug35
-rw-r--r--mm/mmap.c6
-rw-r--r--mm/slab.c1
-rw-r--r--mm/slob.c4
-rw-r--r--mm/slub.c1
-rw-r--r--scripts/package/Makefile37
-rwxr-xr-xscripts/recordmcount.pl2
-rw-r--r--tools/perf/.gitignore2
-rw-r--r--tools/perf/Documentation/perf-buildid-cache.txt8
-rw-r--r--tools/perf/Documentation/perf-probe.txt8
-rw-r--r--tools/perf/Documentation/perf-record.txt13
-rw-r--r--tools/perf/Documentation/perf-stat.txt7
-rw-r--r--tools/perf/Documentation/perf-top.txt8
-rw-r--r--tools/perf/MANIFEST12
-rw-r--r--tools/perf/Makefile113
-rw-r--r--tools/perf/arch/sh/Makefile4
-rw-r--r--tools/perf/arch/sh/util/dwarf-regs.c55
-rw-r--r--tools/perf/builtin-annotate.c6
-rw-r--r--tools/perf/builtin-buildid-cache.c3
-rw-r--r--tools/perf/builtin-buildid-list.c4
-rw-r--r--tools/perf/builtin-diff.c9
-rw-r--r--tools/perf/builtin-probe.c3
-rw-r--r--tools/perf/builtin-record.c81
-rw-r--r--tools/perf/builtin-report.c27
-rw-r--r--tools/perf/builtin-stat.c14
-rw-r--r--tools/perf/builtin-top.c40
-rw-r--r--tools/perf/builtin-trace.c32
-rw-r--r--tools/perf/feature-tests.mak119
-rw-r--r--tools/perf/perf-archive.sh20
-rw-r--r--tools/perf/perf.c2
-rw-r--r--tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py30
-rw-r--r--tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/SchedGui.py184
-rw-r--r--tools/perf/scripts/python/bin/sched-migration-record2
-rw-r--r--tools/perf/scripts/python/bin/sched-migration-report3
-rw-r--r--tools/perf/scripts/python/sched-migration.py461
-rw-r--r--tools/perf/util/build-id.c28
-rw-r--r--tools/perf/util/cache.h1
-rw-r--r--tools/perf/util/callchain.c2
-rw-r--r--tools/perf/util/callchain.h2
-rw-r--r--tools/perf/util/config.c64
-rw-r--r--tools/perf/util/cpumap.c57
-rw-r--r--tools/perf/util/cpumap.h2
-rw-r--r--tools/perf/util/debug.c10
-rw-r--r--tools/perf/util/event.c107
-rw-r--r--tools/perf/util/event.h6
-rw-r--r--tools/perf/util/header.c13
-rw-r--r--tools/perf/util/hist.c214
-rw-r--r--tools/perf/util/hist.h30
-rw-r--r--tools/perf/util/map.c116
-rw-r--r--tools/perf/util/map.h14
-rw-r--r--tools/perf/util/newt.c1164
-rw-r--r--tools/perf/util/parse-events.c11
-rw-r--r--tools/perf/util/probe-event.c271
-rw-r--r--tools/perf/util/probe-event.h29
-rw-r--r--tools/perf/util/probe-finder.c248
-rw-r--r--tools/perf/util/probe-finder.h10
-rw-r--r--tools/perf/util/session.c62
-rw-r--r--tools/perf/util/sort.c40
-rw-r--r--tools/perf/util/sort.h22
-rw-r--r--tools/perf/util/symbol.c299
-rw-r--r--tools/perf/util/symbol.h18
-rw-r--r--tools/perf/util/thread.c7
-rw-r--r--tools/perf/util/thread.h2
-rw-r--r--tools/perf/util/util.h3
179 files changed, 5611 insertions, 4809 deletions
diff --git a/Documentation/ABI/testing/debugfs-kmemtrace b/Documentation/ABI/testing/debugfs-kmemtrace
deleted file mode 100644
index 5e6a92a02d85..000000000000
--- a/Documentation/ABI/testing/debugfs-kmemtrace
+++ /dev/null
@@ -1,71 +0,0 @@
1What: /sys/kernel/debug/kmemtrace/
2Date: July 2008
3Contact: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
4Description:
5
6In kmemtrace-enabled kernels, the following files are created:
7
8/sys/kernel/debug/kmemtrace/
9 cpu<n> (0400) Per-CPU tracing data, see below. (binary)
10 total_overruns (0400) Total number of bytes which were dropped from
11 cpu<n> files because of full buffer condition,
12 non-binary. (text)
13 abi_version (0400) Kernel's kmemtrace ABI version. (text)
14
15Each per-CPU file should be read according to the relay interface. That is,
16the reader should set affinity to that specific CPU and, as currently done by
17the userspace application (though there are other methods), use poll() with
18an infinite timeout before every read(). Otherwise, erroneous data may be
19read. The binary data has the following _core_ format:
20
21 Event ID (1 byte) Unsigned integer, one of:
22 0 - represents an allocation (KMEMTRACE_EVENT_ALLOC)
23 1 - represents a freeing of previously allocated memory
24 (KMEMTRACE_EVENT_FREE)
25 Type ID (1 byte) Unsigned integer, one of:
26 0 - this is a kmalloc() / kfree()
27 1 - this is a kmem_cache_alloc() / kmem_cache_free()
28 2 - this is a __get_free_pages() et al.
29 Event size (2 bytes) Unsigned integer representing the
30 size of this event. Used to extend
31 kmemtrace. Discard the bytes you
32 don't know about.
33 Sequence number (4 bytes) Signed integer used to reorder data
34 logged on SMP machines. Wraparound
35 must be taken into account, although
36 it is unlikely.
37 Caller address (8 bytes) Return address to the caller.
38 Pointer to mem (8 bytes) Pointer to target memory area. Can be
39 NULL, but not all such calls might be
40 recorded.
41
42In case of KMEMTRACE_EVENT_ALLOC events, the next fields follow:
43
44 Requested bytes (8 bytes) Total number of requested bytes,
45 unsigned, must not be zero.
46 Allocated bytes (8 bytes) Total number of actually allocated
47 bytes, unsigned, must not be lower
48 than requested bytes.
49 Requested flags (4 bytes) GFP flags supplied by the caller.
50 Target CPU (4 bytes) Signed integer, valid for event id 1.
51 If equal to -1, target CPU is the same
52 as origin CPU, but the reverse might
53 not be true.
54
55The data is made available in the same endianness the machine has.
56
57Other event ids and type ids may be defined and added. Other fields may be
58added by increasing event size, but see below for details.
59Every modification to the ABI, including new id definitions, are followed
60by bumping the ABI version by one.
61
62Adding new data to the packet (features) is done at the end of the mandatory
63data:
64 Feature size (2 byte)
65 Feature ID (1 byte)
66 Feature data (Feature size - 3 bytes)
67
68
69Users:
70 kmemtrace-user - git://repo.or.cz/kmemtrace-user.git
71
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index f72ba727441f..f20c7abc0329 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1816,6 +1816,8 @@ and is between 256 and 4096 characters. It is defined in the file
1816 1816
1817 nousb [USB] Disable the USB subsystem 1817 nousb [USB] Disable the USB subsystem
1818 1818
1819 nowatchdog [KNL] Disable the lockup detector.
1820
1819 nowb [ARM] 1821 nowb [ARM]
1820 1822
1821 nox2apic [X86-64,APIC] Do not enable x2APIC mode. 1823 nox2apic [X86-64,APIC] Do not enable x2APIC mode.
diff --git a/Documentation/trace/ftrace-design.txt b/Documentation/trace/ftrace-design.txt
index f1f81afee8a0..dc52bd442c92 100644
--- a/Documentation/trace/ftrace-design.txt
+++ b/Documentation/trace/ftrace-design.txt
@@ -13,6 +13,9 @@ Note that this focuses on architecture implementation details only. If you
13want more explanation of a feature in terms of common code, review the common 13want more explanation of a feature in terms of common code, review the common
14ftrace.txt file. 14ftrace.txt file.
15 15
16Ideally, everyone who wishes to retain performance while supporting tracing in
17their kernel should make it all the way to dynamic ftrace support.
18
16 19
17Prerequisites 20Prerequisites
18------------- 21-------------
@@ -215,7 +218,7 @@ An arch may pass in a unique value (frame pointer) to both the entering and
215exiting of a function. On exit, the value is compared and if it does not 218exiting of a function. On exit, the value is compared and if it does not
216match, then it will panic the kernel. This is largely a sanity check for bad 219match, then it will panic the kernel. This is largely a sanity check for bad
217code generation with gcc. If gcc for your port sanely updates the frame 220code generation with gcc. If gcc for your port sanely updates the frame
218pointer under different opitmization levels, then ignore this option. 221pointer under different optimization levels, then ignore this option.
219 222
220However, adding support for it isn't terribly difficult. In your assembly code 223However, adding support for it isn't terribly difficult. In your assembly code
221that calls prepare_ftrace_return(), pass the frame pointer as the 3rd argument. 224that calls prepare_ftrace_return(), pass the frame pointer as the 3rd argument.
@@ -234,7 +237,7 @@ If you can't trace NMI functions, then skip this option.
234 237
235 238
236HAVE_SYSCALL_TRACEPOINTS 239HAVE_SYSCALL_TRACEPOINTS
237--------------------- 240------------------------
238 241
239You need very few things to get the syscalls tracing in an arch. 242You need very few things to get the syscalls tracing in an arch.
240 243
@@ -250,12 +253,152 @@ You need very few things to get the syscalls tracing in an arch.
250HAVE_FTRACE_MCOUNT_RECORD 253HAVE_FTRACE_MCOUNT_RECORD
251------------------------- 254-------------------------
252 255
253See scripts/recordmcount.pl for more info. 256See scripts/recordmcount.pl for more info. Just fill in the arch-specific
257details for how to locate the addresses of mcount call sites via objdump.
258This option doesn't make much sense without also implementing dynamic ftrace.
254 259
260
261HAVE_DYNAMIC_FTRACE
262-------------------
263
264You will first need HAVE_FTRACE_MCOUNT_RECORD and HAVE_FUNCTION_TRACER, so
265scroll your reader back up if you got over eager.
266
267Once those are out of the way, you will need to implement:
268 - asm/ftrace.h:
269 - MCOUNT_ADDR
270 - ftrace_call_adjust()
271 - struct dyn_arch_ftrace{}
272 - asm code:
273 - mcount() (new stub)
274 - ftrace_caller()
275 - ftrace_call()
276 - ftrace_stub()
277 - C code:
278 - ftrace_dyn_arch_init()
279 - ftrace_make_nop()
280 - ftrace_make_call()
281 - ftrace_update_ftrace_func()
282
283First you will need to fill out some arch details in your asm/ftrace.h.
284
285Define MCOUNT_ADDR as the address of your mcount symbol similar to:
286 #define MCOUNT_ADDR ((unsigned long)mcount)
287Since no one else will have a decl for that function, you will need to:
288 extern void mcount(void);
289
290You will also need the helper function ftrace_call_adjust(). Most people
291will be able to stub it out like so:
292 static inline unsigned long ftrace_call_adjust(unsigned long addr)
293 {
294 return addr;
295 }
255<details to be filled> 296<details to be filled>
256 297
298Lastly you will need the custom dyn_arch_ftrace structure. If you need
299some extra state when runtime patching arbitrary call sites, this is the
300place. For now though, create an empty struct:
301 struct dyn_arch_ftrace {
302 /* No extra data needed */
303 };
304
305With the header out of the way, we can fill out the assembly code. While we
306did already create a mcount() function earlier, dynamic ftrace only wants a
307stub function. This is because the mcount() will only be used during boot
308and then all references to it will be patched out never to return. Instead,
309the guts of the old mcount() will be used to create a new ftrace_caller()
310function. Because the two are hard to merge, it will most likely be a lot
311easier to have two separate definitions split up by #ifdefs. Same goes for
312the ftrace_stub() as that will now be inlined in ftrace_caller().
313
314Before we get confused anymore, let's check out some pseudo code so you can
315implement your own stuff in assembly:
257 316
258HAVE_DYNAMIC_FTRACE 317void mcount(void)
259--------------------- 318{
319 return;
320}
321
322void ftrace_caller(void)
323{
324 /* implement HAVE_FUNCTION_TRACE_MCOUNT_TEST if you desire */
325
326 /* save all state needed by the ABI (see paragraph above) */
327
328 unsigned long frompc = ...;
329 unsigned long selfpc = <return address> - MCOUNT_INSN_SIZE;
330
331ftrace_call:
332 ftrace_stub(frompc, selfpc);
333
334 /* restore all state needed by the ABI */
335
336ftrace_stub:
337 return;
338}
339
340This might look a little odd at first, but keep in mind that we will be runtime
341patching multiple things. First, only functions that we actually want to trace
342will be patched to call ftrace_caller(). Second, since we only have one tracer
343active at a time, we will patch the ftrace_caller() function itself to call the
344specific tracer in question. That is the point of the ftrace_call label.
345
346With that in mind, let's move on to the C code that will actually be doing the
347runtime patching. You'll need a little knowledge of your arch's opcodes in
348order to make it through the next section.
349
350Every arch has an init callback function. If you need to do something early on
351to initialize some state, this is the time to do that. Otherwise, this simple
352function below should be sufficient for most people:
353
354int __init ftrace_dyn_arch_init(void *data)
355{
356 /* return value is done indirectly via data */
357 *(unsigned long *)data = 0;
358
359 return 0;
360}
361
362There are two functions that are used to do runtime patching of arbitrary
363functions. The first is used to turn the mcount call site into a nop (which
364is what helps us retain runtime performance when not tracing). The second is
365used to turn the mcount call site into a call to an arbitrary location (but
366typically that is ftracer_caller()). See the general function definition in
367linux/ftrace.h for the functions:
368 ftrace_make_nop()
369 ftrace_make_call()
370The rec->ip value is the address of the mcount call site that was collected
371by the scripts/recordmcount.pl during build time.
372
373The last function is used to do runtime patching of the active tracer. This
374will be modifying the assembly code at the location of the ftrace_call symbol
375inside of the ftrace_caller() function. So you should have sufficient padding
376at that location to support the new function calls you'll be inserting. Some
377people will be using a "call" type instruction while others will be using a
378"branch" type instruction. Specifically, the function is:
379 ftrace_update_ftrace_func()
380
381
382HAVE_DYNAMIC_FTRACE + HAVE_FUNCTION_GRAPH_TRACER
383------------------------------------------------
384
385The function grapher needs a few tweaks in order to work with dynamic ftrace.
386Basically, you will need to:
387 - update:
388 - ftrace_caller()
389 - ftrace_graph_call()
390 - ftrace_graph_caller()
391 - implement:
392 - ftrace_enable_ftrace_graph_caller()
393 - ftrace_disable_ftrace_graph_caller()
260 394
261<details to be filled> 395<details to be filled>
396Quick notes:
397 - add a nop stub after the ftrace_call location named ftrace_graph_call;
398 stub needs to be large enough to support a call to ftrace_graph_caller()
399 - update ftrace_graph_caller() to work with being called by the new
400 ftrace_caller() since some semantics may have changed
401 - ftrace_enable_ftrace_graph_caller() will runtime patch the
402 ftrace_graph_call location with a call to ftrace_graph_caller()
403 - ftrace_disable_ftrace_graph_caller() will runtime patch the
404 ftrace_graph_call location with nops
diff --git a/Documentation/trace/kmemtrace.txt b/Documentation/trace/kmemtrace.txt
deleted file mode 100644
index 6308735e58ca..000000000000
--- a/Documentation/trace/kmemtrace.txt
+++ /dev/null
@@ -1,126 +0,0 @@
1 kmemtrace - Kernel Memory Tracer
2
3 by Eduard - Gabriel Munteanu
4 <eduard.munteanu@linux360.ro>
5
6I. Introduction
7===============
8
9kmemtrace helps kernel developers figure out two things:
101) how different allocators (SLAB, SLUB etc.) perform
112) how kernel code allocates memory and how much
12
13To do this, we trace every allocation and export information to the userspace
14through the relay interface. We export things such as the number of requested
15bytes, the number of bytes actually allocated (i.e. including internal
16fragmentation), whether this is a slab allocation or a plain kmalloc() and so
17on.
18
19The actual analysis is performed by a userspace tool (see section III for
20details on where to get it from). It logs the data exported by the kernel,
21processes it and (as of writing this) can provide the following information:
22- the total amount of memory allocated and fragmentation per call-site
23- the amount of memory allocated and fragmentation per allocation
24- total memory allocated and fragmentation in the collected dataset
25- number of cross-CPU allocation and frees (makes sense in NUMA environments)
26
27Moreover, it can potentially find inconsistent and erroneous behavior in
28kernel code, such as using slab free functions on kmalloc'ed memory or
29allocating less memory than requested (but not truly failed allocations).
30
31kmemtrace also makes provisions for tracing on some arch and analysing the
32data on another.
33
34II. Design and goals
35====================
36
37kmemtrace was designed to handle rather large amounts of data. Thus, it uses
38the relay interface to export whatever is logged to userspace, which then
39stores it. Analysis and reporting is done asynchronously, that is, after the
40data is collected and stored. By design, it allows one to log and analyse
41on different machines and different arches.
42
43As of writing this, the ABI is not considered stable, though it might not
44change much. However, no guarantees are made about compatibility yet. When
45deemed stable, the ABI should still allow easy extension while maintaining
46backward compatibility. This is described further in Documentation/ABI.
47
48Summary of design goals:
49 - allow logging and analysis to be done across different machines
50 - be fast and anticipate usage in high-load environments (*)
51 - be reasonably extensible
52 - make it possible for GNU/Linux distributions to have kmemtrace
53 included in their repositories
54
55(*) - one of the reasons Pekka Enberg's original userspace data analysis
56 tool's code was rewritten from Perl to C (although this is more than a
57 simple conversion)
58
59
60III. Quick usage guide
61======================
62
631) Get a kernel that supports kmemtrace and build it accordingly (i.e. enable
64CONFIG_KMEMTRACE).
65
662) Get the userspace tool and build it:
67$ git clone git://repo.or.cz/kmemtrace-user.git # current repository
68$ cd kmemtrace-user/
69$ ./autogen.sh
70$ ./configure
71$ make
72
733) Boot the kmemtrace-enabled kernel if you haven't, preferably in the
74'single' runlevel (so that relay buffers don't fill up easily), and run
75kmemtrace:
76# '$' does not mean user, but root here.
77$ mount -t debugfs none /sys/kernel/debug
78$ mount -t proc none /proc
79$ cd path/to/kmemtrace-user/
80$ ./kmemtraced
81Wait a bit, then stop it with CTRL+C.
82$ cat /sys/kernel/debug/kmemtrace/total_overruns # Check if we didn't
83 # overrun, should
84 # be zero.
85$ (Optionally) [Run kmemtrace_check separately on each cpu[0-9]*.out file to
86 check its correctness]
87$ ./kmemtrace-report
88
89Now you should have a nice and short summary of how the allocator performs.
90
91IV. FAQ and known issues
92========================
93
94Q: 'cat /sys/kernel/debug/kmemtrace/total_overruns' is non-zero, how do I fix
95this? Should I worry?
96A: If it's non-zero, this affects kmemtrace's accuracy, depending on how
97large the number is. You can fix it by supplying a higher
98'kmemtrace.subbufs=N' kernel parameter.
99---
100
101Q: kmemtrace_check reports errors, how do I fix this? Should I worry?
102A: This is a bug and should be reported. It can occur for a variety of
103reasons:
104 - possible bugs in relay code
105 - possible misuse of relay by kmemtrace
106 - timestamps being collected unorderly
107Or you may fix it yourself and send us a patch.
108---
109
110Q: kmemtrace_report shows many errors, how do I fix this? Should I worry?
111A: This is a known issue and I'm working on it. These might be true errors
112in kernel code, which may have inconsistent behavior (e.g. allocating memory
113with kmem_cache_alloc() and freeing it with kfree()). Pekka Enberg pointed
114out this behavior may work with SLAB, but may fail with other allocators.
115
116It may also be due to lack of tracing in some unusual allocator functions.
117
118We don't want bug reports regarding this issue yet.
119---
120
121V. See also
122===========
123
124Documentation/kernel-parameters.txt
125Documentation/ABI/testing/debugfs-kmemtrace
126
diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt
index ec94748ae65b..5f77d94598dd 100644
--- a/Documentation/trace/kprobetrace.txt
+++ b/Documentation/trace/kprobetrace.txt
@@ -42,7 +42,7 @@ Synopsis of kprobe_events
42 +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(**) 42 +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(**)
43 NAME=FETCHARG : Set NAME as the argument name of FETCHARG. 43 NAME=FETCHARG : Set NAME as the argument name of FETCHARG.
44 FETCHARG:TYPE : Set TYPE as the type of FETCHARG. Currently, basic types 44 FETCHARG:TYPE : Set TYPE as the type of FETCHARG. Currently, basic types
45 (u8/u16/u32/u64/s8/s16/s32/s64) are supported. 45 (u8/u16/u32/u64/s8/s16/s32/s64) and string are supported.
46 46
47 (*) only for return probe. 47 (*) only for return probe.
48 (**) this is useful for fetching a field of data structures. 48 (**) this is useful for fetching a field of data structures.
diff --git a/MAINTAINERS b/MAINTAINERS
index 11e34d5272b8..100a3f535c9f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3403,13 +3403,6 @@ F: include/linux/kmemleak.h
3403F: mm/kmemleak.c 3403F: mm/kmemleak.c
3404F: mm/kmemleak-test.c 3404F: mm/kmemleak-test.c
3405 3405
3406KMEMTRACE
3407M: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
3408S: Maintained
3409F: Documentation/trace/kmemtrace.txt
3410F: include/linux/kmemtrace.h
3411F: kernel/trace/kmemtrace.c
3412
3413KPROBES 3406KPROBES
3414M: Ananth N Mavinakayanahalli <ananth@in.ibm.com> 3407M: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
3415M: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> 3408M: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
@@ -5685,7 +5678,7 @@ TRACING
5685M: Steven Rostedt <rostedt@goodmis.org> 5678M: Steven Rostedt <rostedt@goodmis.org>
5686M: Frederic Weisbecker <fweisbec@gmail.com> 5679M: Frederic Weisbecker <fweisbec@gmail.com>
5687M: Ingo Molnar <mingo@redhat.com> 5680M: Ingo Molnar <mingo@redhat.com>
5688T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git tracing/core 5681T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git perf/core
5689S: Maintained 5682S: Maintained
5690F: Documentation/trace/ftrace.txt 5683F: Documentation/trace/ftrace.txt
5691F: arch/*/*/*/ftrace.h 5684F: arch/*/*/*/ftrace.h
diff --git a/Makefile b/Makefile
index 66c94aad3665..7431c283f15b 100644
--- a/Makefile
+++ b/Makefile
@@ -420,7 +420,7 @@ endif
420no-dot-config-targets := clean mrproper distclean \ 420no-dot-config-targets := clean mrproper distclean \
421 cscope TAGS tags help %docs check% coccicheck \ 421 cscope TAGS tags help %docs check% coccicheck \
422 include/linux/version.h headers_% \ 422 include/linux/version.h headers_% \
423 kernelversion 423 kernelversion %src-pkg
424 424
425config-targets := 0 425config-targets := 0
426mixed-targets := 0 426mixed-targets := 0
@@ -1168,6 +1168,8 @@ distclean: mrproper
1168# rpm target kept for backward compatibility 1168# rpm target kept for backward compatibility
1169package-dir := $(srctree)/scripts/package 1169package-dir := $(srctree)/scripts/package
1170 1170
1171%src-pkg: FORCE
1172 $(Q)$(MAKE) $(build)=$(package-dir) $@
1171%pkg: include/config/kernel.release FORCE 1173%pkg: include/config/kernel.release FORCE
1172 $(Q)$(MAKE) $(build)=$(package-dir) $@ 1174 $(Q)$(MAKE) $(build)=$(package-dir) $@
1173rpm: include/config/kernel.release FORCE 1175rpm: include/config/kernel.release FORCE
diff --git a/arch/Kconfig b/arch/Kconfig
index acda512da2e2..4877a8c8ee16 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -151,4 +151,11 @@ config HAVE_MIXED_BREAKPOINTS_REGS
151config HAVE_USER_RETURN_NOTIFIER 151config HAVE_USER_RETURN_NOTIFIER
152 bool 152 bool
153 153
154config HAVE_PERF_EVENTS_NMI
155 bool
156 help
157 System hardware can generate an NMI using the perf event
158 subsystem. Also has support for calculating CPU cycle events
159 to determine how many clock cycles in a given period.
160
154source "kernel/gcov/Kconfig" 161source "kernel/gcov/Kconfig"
diff --git a/arch/alpha/include/asm/local64.h b/arch/alpha/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/alpha/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/arm/include/asm/local64.h b/arch/arm/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/arm/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index de12536d687f..417c392ddf1c 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -164,20 +164,20 @@ armpmu_event_set_period(struct perf_event *event,
164 struct hw_perf_event *hwc, 164 struct hw_perf_event *hwc,
165 int idx) 165 int idx)
166{ 166{
167 s64 left = atomic64_read(&hwc->period_left); 167 s64 left = local64_read(&hwc->period_left);
168 s64 period = hwc->sample_period; 168 s64 period = hwc->sample_period;
169 int ret = 0; 169 int ret = 0;
170 170
171 if (unlikely(left <= -period)) { 171 if (unlikely(left <= -period)) {
172 left = period; 172 left = period;
173 atomic64_set(&hwc->period_left, left); 173 local64_set(&hwc->period_left, left);
174 hwc->last_period = period; 174 hwc->last_period = period;
175 ret = 1; 175 ret = 1;
176 } 176 }
177 177
178 if (unlikely(left <= 0)) { 178 if (unlikely(left <= 0)) {
179 left += period; 179 left += period;
180 atomic64_set(&hwc->period_left, left); 180 local64_set(&hwc->period_left, left);
181 hwc->last_period = period; 181 hwc->last_period = period;
182 ret = 1; 182 ret = 1;
183 } 183 }
@@ -185,7 +185,7 @@ armpmu_event_set_period(struct perf_event *event,
185 if (left > (s64)armpmu->max_period) 185 if (left > (s64)armpmu->max_period)
186 left = armpmu->max_period; 186 left = armpmu->max_period;
187 187
188 atomic64_set(&hwc->prev_count, (u64)-left); 188 local64_set(&hwc->prev_count, (u64)-left);
189 189
190 armpmu->write_counter(idx, (u64)(-left) & 0xffffffff); 190 armpmu->write_counter(idx, (u64)(-left) & 0xffffffff);
191 191
@@ -204,18 +204,18 @@ armpmu_event_update(struct perf_event *event,
204 u64 delta; 204 u64 delta;
205 205
206again: 206again:
207 prev_raw_count = atomic64_read(&hwc->prev_count); 207 prev_raw_count = local64_read(&hwc->prev_count);
208 new_raw_count = armpmu->read_counter(idx); 208 new_raw_count = armpmu->read_counter(idx);
209 209
210 if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count, 210 if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
211 new_raw_count) != prev_raw_count) 211 new_raw_count) != prev_raw_count)
212 goto again; 212 goto again;
213 213
214 delta = (new_raw_count << shift) - (prev_raw_count << shift); 214 delta = (new_raw_count << shift) - (prev_raw_count << shift);
215 delta >>= shift; 215 delta >>= shift;
216 216
217 atomic64_add(delta, &event->count); 217 local64_add(delta, &event->count);
218 atomic64_sub(delta, &hwc->period_left); 218 local64_sub(delta, &hwc->period_left);
219 219
220 return new_raw_count; 220 return new_raw_count;
221} 221}
@@ -478,7 +478,7 @@ __hw_perf_event_init(struct perf_event *event)
478 if (!hwc->sample_period) { 478 if (!hwc->sample_period) {
479 hwc->sample_period = armpmu->max_period; 479 hwc->sample_period = armpmu->max_period;
480 hwc->last_period = hwc->sample_period; 480 hwc->last_period = hwc->sample_period;
481 atomic64_set(&hwc->period_left, hwc->sample_period); 481 local64_set(&hwc->period_left, hwc->sample_period);
482 } 482 }
483 483
484 err = 0; 484 err = 0;
diff --git a/arch/avr32/include/asm/local64.h b/arch/avr32/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/avr32/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/blackfin/include/asm/local64.h b/arch/blackfin/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/blackfin/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/cris/include/asm/local64.h b/arch/cris/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/cris/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/frv/include/asm/local64.h b/arch/frv/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/frv/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/frv/kernel/local64.h b/arch/frv/kernel/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/frv/kernel/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/h8300/include/asm/local64.h b/arch/h8300/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/h8300/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/ia64/include/asm/local64.h b/arch/ia64/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/ia64/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/m32r/include/asm/local64.h b/arch/m32r/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/m32r/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/m68k/include/asm/local64.h b/arch/m68k/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/m68k/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/microblaze/include/asm/local64.h b/arch/microblaze/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/microblaze/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/mips/include/asm/local64.h b/arch/mips/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/mips/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/mn10300/include/asm/local64.h b/arch/mn10300/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/mn10300/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/parisc/include/asm/local64.h b/arch/parisc/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/parisc/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/powerpc/include/asm/local64.h b/arch/powerpc/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/powerpc/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/powerpc/include/asm/perf_event.h b/arch/powerpc/include/asm/perf_event.h
index e6d4ce69b126..5c16b891d501 100644
--- a/arch/powerpc/include/asm/perf_event.h
+++ b/arch/powerpc/include/asm/perf_event.h
@@ -21,3 +21,15 @@
21#ifdef CONFIG_FSL_EMB_PERF_EVENT 21#ifdef CONFIG_FSL_EMB_PERF_EVENT
22#include <asm/perf_event_fsl_emb.h> 22#include <asm/perf_event_fsl_emb.h>
23#endif 23#endif
24
25#ifdef CONFIG_PERF_EVENTS
26#include <asm/ptrace.h>
27#include <asm/reg.h>
28
29#define perf_arch_fetch_caller_regs(regs, __ip) \
30 do { \
31 (regs)->nip = __ip; \
32 (regs)->gpr[1] = *(unsigned long *)__get_SP(); \
33 asm volatile("mfmsr %0" : "=r" ((regs)->msr)); \
34 } while (0)
35#endif
diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S
index 22e507c8a556..2d29752cbe16 100644
--- a/arch/powerpc/kernel/misc.S
+++ b/arch/powerpc/kernel/misc.S
@@ -127,29 +127,3 @@ _GLOBAL(__setup_cpu_power7)
127_GLOBAL(__restore_cpu_power7) 127_GLOBAL(__restore_cpu_power7)
128 /* place holder */ 128 /* place holder */
129 blr 129 blr
130
131/*
132 * Get a minimal set of registers for our caller's nth caller.
133 * r3 = regs pointer, r5 = n.
134 *
135 * We only get R1 (stack pointer), NIP (next instruction pointer)
136 * and LR (link register). These are all we can get in the
137 * general case without doing complicated stack unwinding, but
138 * fortunately they are enough to do a stack backtrace, which
139 * is all we need them for.
140 */
141_GLOBAL(perf_arch_fetch_caller_regs)
142 mr r6,r1
143 cmpwi r5,0
144 mflr r4
145 ble 2f
146 mtctr r5
1471: PPC_LL r6,0(r6)
148 bdnz 1b
149 PPC_LL r4,PPC_LR_STKOFF(r6)
1502: PPC_LL r7,0(r6)
151 PPC_LL r7,PPC_LR_STKOFF(r7)
152 PPC_STL r6,GPR1-STACK_FRAME_OVERHEAD(r3)
153 PPC_STL r4,_NIP-STACK_FRAME_OVERHEAD(r3)
154 PPC_STL r7,_LINK-STACK_FRAME_OVERHEAD(r3)
155 blr
diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c
index 5c14ffe51258..d301a30445e0 100644
--- a/arch/powerpc/kernel/perf_event.c
+++ b/arch/powerpc/kernel/perf_event.c
@@ -410,15 +410,15 @@ static void power_pmu_read(struct perf_event *event)
410 * Therefore we treat them like NMIs. 410 * Therefore we treat them like NMIs.
411 */ 411 */
412 do { 412 do {
413 prev = atomic64_read(&event->hw.prev_count); 413 prev = local64_read(&event->hw.prev_count);
414 barrier(); 414 barrier();
415 val = read_pmc(event->hw.idx); 415 val = read_pmc(event->hw.idx);
416 } while (atomic64_cmpxchg(&event->hw.prev_count, prev, val) != prev); 416 } while (local64_cmpxchg(&event->hw.prev_count, prev, val) != prev);
417 417
418 /* The counters are only 32 bits wide */ 418 /* The counters are only 32 bits wide */
419 delta = (val - prev) & 0xfffffffful; 419 delta = (val - prev) & 0xfffffffful;
420 atomic64_add(delta, &event->count); 420 local64_add(delta, &event->count);
421 atomic64_sub(delta, &event->hw.period_left); 421 local64_sub(delta, &event->hw.period_left);
422} 422}
423 423
424/* 424/*
@@ -444,10 +444,10 @@ static void freeze_limited_counters(struct cpu_hw_events *cpuhw,
444 if (!event->hw.idx) 444 if (!event->hw.idx)
445 continue; 445 continue;
446 val = (event->hw.idx == 5) ? pmc5 : pmc6; 446 val = (event->hw.idx == 5) ? pmc5 : pmc6;
447 prev = atomic64_read(&event->hw.prev_count); 447 prev = local64_read(&event->hw.prev_count);
448 event->hw.idx = 0; 448 event->hw.idx = 0;
449 delta = (val - prev) & 0xfffffffful; 449 delta = (val - prev) & 0xfffffffful;
450 atomic64_add(delta, &event->count); 450 local64_add(delta, &event->count);
451 } 451 }
452} 452}
453 453
@@ -462,7 +462,7 @@ static void thaw_limited_counters(struct cpu_hw_events *cpuhw,
462 event = cpuhw->limited_counter[i]; 462 event = cpuhw->limited_counter[i];
463 event->hw.idx = cpuhw->limited_hwidx[i]; 463 event->hw.idx = cpuhw->limited_hwidx[i];
464 val = (event->hw.idx == 5) ? pmc5 : pmc6; 464 val = (event->hw.idx == 5) ? pmc5 : pmc6;
465 atomic64_set(&event->hw.prev_count, val); 465 local64_set(&event->hw.prev_count, val);
466 perf_event_update_userpage(event); 466 perf_event_update_userpage(event);
467 } 467 }
468} 468}
@@ -666,11 +666,11 @@ void hw_perf_enable(void)
666 } 666 }
667 val = 0; 667 val = 0;
668 if (event->hw.sample_period) { 668 if (event->hw.sample_period) {
669 left = atomic64_read(&event->hw.period_left); 669 left = local64_read(&event->hw.period_left);
670 if (left < 0x80000000L) 670 if (left < 0x80000000L)
671 val = 0x80000000L - left; 671 val = 0x80000000L - left;
672 } 672 }
673 atomic64_set(&event->hw.prev_count, val); 673 local64_set(&event->hw.prev_count, val);
674 event->hw.idx = idx; 674 event->hw.idx = idx;
675 write_pmc(idx, val); 675 write_pmc(idx, val);
676 perf_event_update_userpage(event); 676 perf_event_update_userpage(event);
@@ -754,7 +754,7 @@ static int power_pmu_enable(struct perf_event *event)
754 * skip the schedulability test here, it will be peformed 754 * skip the schedulability test here, it will be peformed
755 * at commit time(->commit_txn) as a whole 755 * at commit time(->commit_txn) as a whole
756 */ 756 */
757 if (cpuhw->group_flag & PERF_EVENT_TXN_STARTED) 757 if (cpuhw->group_flag & PERF_EVENT_TXN)
758 goto nocheck; 758 goto nocheck;
759 759
760 if (check_excludes(cpuhw->event, cpuhw->flags, n0, 1)) 760 if (check_excludes(cpuhw->event, cpuhw->flags, n0, 1))
@@ -845,8 +845,8 @@ static void power_pmu_unthrottle(struct perf_event *event)
845 if (left < 0x80000000L) 845 if (left < 0x80000000L)
846 val = 0x80000000L - left; 846 val = 0x80000000L - left;
847 write_pmc(event->hw.idx, val); 847 write_pmc(event->hw.idx, val);
848 atomic64_set(&event->hw.prev_count, val); 848 local64_set(&event->hw.prev_count, val);
849 atomic64_set(&event->hw.period_left, left); 849 local64_set(&event->hw.period_left, left);
850 perf_event_update_userpage(event); 850 perf_event_update_userpage(event);
851 perf_enable(); 851 perf_enable();
852 local_irq_restore(flags); 852 local_irq_restore(flags);
@@ -861,7 +861,7 @@ void power_pmu_start_txn(const struct pmu *pmu)
861{ 861{
862 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); 862 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
863 863
864 cpuhw->group_flag |= PERF_EVENT_TXN_STARTED; 864 cpuhw->group_flag |= PERF_EVENT_TXN;
865 cpuhw->n_txn_start = cpuhw->n_events; 865 cpuhw->n_txn_start = cpuhw->n_events;
866} 866}
867 867
@@ -874,7 +874,7 @@ void power_pmu_cancel_txn(const struct pmu *pmu)
874{ 874{
875 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); 875 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
876 876
877 cpuhw->group_flag &= ~PERF_EVENT_TXN_STARTED; 877 cpuhw->group_flag &= ~PERF_EVENT_TXN;
878} 878}
879 879
880/* 880/*
@@ -900,6 +900,7 @@ int power_pmu_commit_txn(const struct pmu *pmu)
900 for (i = cpuhw->n_txn_start; i < n; ++i) 900 for (i = cpuhw->n_txn_start; i < n; ++i)
901 cpuhw->event[i]->hw.config = cpuhw->events[i]; 901 cpuhw->event[i]->hw.config = cpuhw->events[i];
902 902
903 cpuhw->group_flag &= ~PERF_EVENT_TXN;
903 return 0; 904 return 0;
904} 905}
905 906
@@ -1111,7 +1112,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
1111 event->hw.config = events[n]; 1112 event->hw.config = events[n];
1112 event->hw.event_base = cflags[n]; 1113 event->hw.event_base = cflags[n];
1113 event->hw.last_period = event->hw.sample_period; 1114 event->hw.last_period = event->hw.sample_period;
1114 atomic64_set(&event->hw.period_left, event->hw.last_period); 1115 local64_set(&event->hw.period_left, event->hw.last_period);
1115 1116
1116 /* 1117 /*
1117 * See if we need to reserve the PMU. 1118 * See if we need to reserve the PMU.
@@ -1149,16 +1150,16 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
1149 int record = 0; 1150 int record = 0;
1150 1151
1151 /* we don't have to worry about interrupts here */ 1152 /* we don't have to worry about interrupts here */
1152 prev = atomic64_read(&event->hw.prev_count); 1153 prev = local64_read(&event->hw.prev_count);
1153 delta = (val - prev) & 0xfffffffful; 1154 delta = (val - prev) & 0xfffffffful;
1154 atomic64_add(delta, &event->count); 1155 local64_add(delta, &event->count);
1155 1156
1156 /* 1157 /*
1157 * See if the total period for this event has expired, 1158 * See if the total period for this event has expired,
1158 * and update for the next period. 1159 * and update for the next period.
1159 */ 1160 */
1160 val = 0; 1161 val = 0;
1161 left = atomic64_read(&event->hw.period_left) - delta; 1162 left = local64_read(&event->hw.period_left) - delta;
1162 if (period) { 1163 if (period) {
1163 if (left <= 0) { 1164 if (left <= 0) {
1164 left += period; 1165 left += period;
@@ -1196,8 +1197,8 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
1196 } 1197 }
1197 1198
1198 write_pmc(event->hw.idx, val); 1199 write_pmc(event->hw.idx, val);
1199 atomic64_set(&event->hw.prev_count, val); 1200 local64_set(&event->hw.prev_count, val);
1200 atomic64_set(&event->hw.period_left, left); 1201 local64_set(&event->hw.period_left, left);
1201 perf_event_update_userpage(event); 1202 perf_event_update_userpage(event);
1202} 1203}
1203 1204
diff --git a/arch/powerpc/kernel/perf_event_fsl_emb.c b/arch/powerpc/kernel/perf_event_fsl_emb.c
index babcceecd2ea..1ba45471ae43 100644
--- a/arch/powerpc/kernel/perf_event_fsl_emb.c
+++ b/arch/powerpc/kernel/perf_event_fsl_emb.c
@@ -162,15 +162,15 @@ static void fsl_emb_pmu_read(struct perf_event *event)
162 * Therefore we treat them like NMIs. 162 * Therefore we treat them like NMIs.
163 */ 163 */
164 do { 164 do {
165 prev = atomic64_read(&event->hw.prev_count); 165 prev = local64_read(&event->hw.prev_count);
166 barrier(); 166 barrier();
167 val = read_pmc(event->hw.idx); 167 val = read_pmc(event->hw.idx);
168 } while (atomic64_cmpxchg(&event->hw.prev_count, prev, val) != prev); 168 } while (local64_cmpxchg(&event->hw.prev_count, prev, val) != prev);
169 169
170 /* The counters are only 32 bits wide */ 170 /* The counters are only 32 bits wide */
171 delta = (val - prev) & 0xfffffffful; 171 delta = (val - prev) & 0xfffffffful;
172 atomic64_add(delta, &event->count); 172 local64_add(delta, &event->count);
173 atomic64_sub(delta, &event->hw.period_left); 173 local64_sub(delta, &event->hw.period_left);
174} 174}
175 175
176/* 176/*
@@ -296,11 +296,11 @@ static int fsl_emb_pmu_enable(struct perf_event *event)
296 296
297 val = 0; 297 val = 0;
298 if (event->hw.sample_period) { 298 if (event->hw.sample_period) {
299 s64 left = atomic64_read(&event->hw.period_left); 299 s64 left = local64_read(&event->hw.period_left);
300 if (left < 0x80000000L) 300 if (left < 0x80000000L)
301 val = 0x80000000L - left; 301 val = 0x80000000L - left;
302 } 302 }
303 atomic64_set(&event->hw.prev_count, val); 303 local64_set(&event->hw.prev_count, val);
304 write_pmc(i, val); 304 write_pmc(i, val);
305 perf_event_update_userpage(event); 305 perf_event_update_userpage(event);
306 306
@@ -371,8 +371,8 @@ static void fsl_emb_pmu_unthrottle(struct perf_event *event)
371 if (left < 0x80000000L) 371 if (left < 0x80000000L)
372 val = 0x80000000L - left; 372 val = 0x80000000L - left;
373 write_pmc(event->hw.idx, val); 373 write_pmc(event->hw.idx, val);
374 atomic64_set(&event->hw.prev_count, val); 374 local64_set(&event->hw.prev_count, val);
375 atomic64_set(&event->hw.period_left, left); 375 local64_set(&event->hw.period_left, left);
376 perf_event_update_userpage(event); 376 perf_event_update_userpage(event);
377 perf_enable(); 377 perf_enable();
378 local_irq_restore(flags); 378 local_irq_restore(flags);
@@ -500,7 +500,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
500 return ERR_PTR(-ENOTSUPP); 500 return ERR_PTR(-ENOTSUPP);
501 501
502 event->hw.last_period = event->hw.sample_period; 502 event->hw.last_period = event->hw.sample_period;
503 atomic64_set(&event->hw.period_left, event->hw.last_period); 503 local64_set(&event->hw.period_left, event->hw.last_period);
504 504
505 /* 505 /*
506 * See if we need to reserve the PMU. 506 * See if we need to reserve the PMU.
@@ -541,16 +541,16 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
541 int record = 0; 541 int record = 0;
542 542
543 /* we don't have to worry about interrupts here */ 543 /* we don't have to worry about interrupts here */
544 prev = atomic64_read(&event->hw.prev_count); 544 prev = local64_read(&event->hw.prev_count);
545 delta = (val - prev) & 0xfffffffful; 545 delta = (val - prev) & 0xfffffffful;
546 atomic64_add(delta, &event->count); 546 local64_add(delta, &event->count);
547 547
548 /* 548 /*
549 * See if the total period for this event has expired, 549 * See if the total period for this event has expired,
550 * and update for the next period. 550 * and update for the next period.
551 */ 551 */
552 val = 0; 552 val = 0;
553 left = atomic64_read(&event->hw.period_left) - delta; 553 left = local64_read(&event->hw.period_left) - delta;
554 if (period) { 554 if (period) {
555 if (left <= 0) { 555 if (left <= 0) {
556 left += period; 556 left += period;
@@ -569,6 +569,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
569 struct perf_sample_data data; 569 struct perf_sample_data data;
570 570
571 perf_sample_data_init(&data, 0); 571 perf_sample_data_init(&data, 0);
572 data.period = event->hw.last_period;
572 573
573 if (perf_event_overflow(event, nmi, &data, regs)) { 574 if (perf_event_overflow(event, nmi, &data, regs)) {
574 /* 575 /*
@@ -584,8 +585,8 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
584 } 585 }
585 586
586 write_pmc(event->hw.idx, val); 587 write_pmc(event->hw.idx, val);
587 atomic64_set(&event->hw.prev_count, val); 588 local64_set(&event->hw.prev_count, val);
588 atomic64_set(&event->hw.period_left, left); 589 local64_set(&event->hw.period_left, left);
589 perf_event_update_userpage(event); 590 perf_event_update_userpage(event);
590} 591}
591 592
diff --git a/arch/s390/include/asm/local64.h b/arch/s390/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/s390/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/score/include/asm/local64.h b/arch/score/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/score/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/sh/include/asm/local64.h b/arch/sh/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/sh/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/sh/kernel/perf_event.c b/arch/sh/kernel/perf_event.c
index 81b6de41ae5d..7a3dc3567258 100644
--- a/arch/sh/kernel/perf_event.c
+++ b/arch/sh/kernel/perf_event.c
@@ -185,10 +185,10 @@ static void sh_perf_event_update(struct perf_event *event,
185 * this is the simplest approach for maintaining consistency. 185 * this is the simplest approach for maintaining consistency.
186 */ 186 */
187again: 187again:
188 prev_raw_count = atomic64_read(&hwc->prev_count); 188 prev_raw_count = local64_read(&hwc->prev_count);
189 new_raw_count = sh_pmu->read(idx); 189 new_raw_count = sh_pmu->read(idx);
190 190
191 if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count, 191 if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
192 new_raw_count) != prev_raw_count) 192 new_raw_count) != prev_raw_count)
193 goto again; 193 goto again;
194 194
@@ -203,7 +203,7 @@ again:
203 delta = (new_raw_count << shift) - (prev_raw_count << shift); 203 delta = (new_raw_count << shift) - (prev_raw_count << shift);
204 delta >>= shift; 204 delta >>= shift;
205 205
206 atomic64_add(delta, &event->count); 206 local64_add(delta, &event->count);
207} 207}
208 208
209static void sh_pmu_disable(struct perf_event *event) 209static void sh_pmu_disable(struct perf_event *event)
diff --git a/arch/sparc/include/asm/local64.h b/arch/sparc/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/sparc/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/sparc/include/asm/perf_event.h b/arch/sparc/include/asm/perf_event.h
index 7e2669894ce8..74c4e0cd889c 100644
--- a/arch/sparc/include/asm/perf_event.h
+++ b/arch/sparc/include/asm/perf_event.h
@@ -6,7 +6,15 @@ extern void set_perf_event_pending(void);
6#define PERF_EVENT_INDEX_OFFSET 0 6#define PERF_EVENT_INDEX_OFFSET 0
7 7
8#ifdef CONFIG_PERF_EVENTS 8#ifdef CONFIG_PERF_EVENTS
9#include <asm/ptrace.h>
10
9extern void init_hw_perf_events(void); 11extern void init_hw_perf_events(void);
12
13extern void
14__perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip);
15
16#define perf_arch_fetch_caller_regs(pt_regs, ip) \
17 __perf_arch_fetch_caller_regs(pt_regs, ip, 1);
10#else 18#else
11static inline void init_hw_perf_events(void) { } 19static inline void init_hw_perf_events(void) { }
12#endif 20#endif
diff --git a/arch/sparc/kernel/helpers.S b/arch/sparc/kernel/helpers.S
index 92090cc9e829..682fee06a16b 100644
--- a/arch/sparc/kernel/helpers.S
+++ b/arch/sparc/kernel/helpers.S
@@ -47,9 +47,9 @@ stack_trace_flush:
47 .size stack_trace_flush,.-stack_trace_flush 47 .size stack_trace_flush,.-stack_trace_flush
48 48
49#ifdef CONFIG_PERF_EVENTS 49#ifdef CONFIG_PERF_EVENTS
50 .globl perf_arch_fetch_caller_regs 50 .globl __perf_arch_fetch_caller_regs
51 .type perf_arch_fetch_caller_regs,#function 51 .type __perf_arch_fetch_caller_regs,#function
52perf_arch_fetch_caller_regs: 52__perf_arch_fetch_caller_regs:
53 /* We always read the %pstate into %o5 since we will use 53 /* We always read the %pstate into %o5 since we will use
54 * that to construct a fake %tstate to store into the regs. 54 * that to construct a fake %tstate to store into the regs.
55 */ 55 */
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index 44faabc3c02c..357ced3c33ff 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -572,18 +572,18 @@ static u64 sparc_perf_event_update(struct perf_event *event,
572 s64 delta; 572 s64 delta;
573 573
574again: 574again:
575 prev_raw_count = atomic64_read(&hwc->prev_count); 575 prev_raw_count = local64_read(&hwc->prev_count);
576 new_raw_count = read_pmc(idx); 576 new_raw_count = read_pmc(idx);
577 577
578 if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count, 578 if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
579 new_raw_count) != prev_raw_count) 579 new_raw_count) != prev_raw_count)
580 goto again; 580 goto again;
581 581
582 delta = (new_raw_count << shift) - (prev_raw_count << shift); 582 delta = (new_raw_count << shift) - (prev_raw_count << shift);
583 delta >>= shift; 583 delta >>= shift;
584 584
585 atomic64_add(delta, &event->count); 585 local64_add(delta, &event->count);
586 atomic64_sub(delta, &hwc->period_left); 586 local64_sub(delta, &hwc->period_left);
587 587
588 return new_raw_count; 588 return new_raw_count;
589} 589}
@@ -591,27 +591,27 @@ again:
591static int sparc_perf_event_set_period(struct perf_event *event, 591static int sparc_perf_event_set_period(struct perf_event *event,
592 struct hw_perf_event *hwc, int idx) 592 struct hw_perf_event *hwc, int idx)
593{ 593{
594 s64 left = atomic64_read(&hwc->period_left); 594 s64 left = local64_read(&hwc->period_left);
595 s64 period = hwc->sample_period; 595 s64 period = hwc->sample_period;
596 int ret = 0; 596 int ret = 0;
597 597
598 if (unlikely(left <= -period)) { 598 if (unlikely(left <= -period)) {
599 left = period; 599 left = period;
600 atomic64_set(&hwc->period_left, left); 600 local64_set(&hwc->period_left, left);
601 hwc->last_period = period; 601 hwc->last_period = period;
602 ret = 1; 602 ret = 1;
603 } 603 }
604 604
605 if (unlikely(left <= 0)) { 605 if (unlikely(left <= 0)) {
606 left += period; 606 left += period;
607 atomic64_set(&hwc->period_left, left); 607 local64_set(&hwc->period_left, left);
608 hwc->last_period = period; 608 hwc->last_period = period;
609 ret = 1; 609 ret = 1;
610 } 610 }
611 if (left > MAX_PERIOD) 611 if (left > MAX_PERIOD)
612 left = MAX_PERIOD; 612 left = MAX_PERIOD;
613 613
614 atomic64_set(&hwc->prev_count, (u64)-left); 614 local64_set(&hwc->prev_count, (u64)-left);
615 615
616 write_pmc(idx, (u64)(-left) & 0xffffffff); 616 write_pmc(idx, (u64)(-left) & 0xffffffff);
617 617
@@ -1006,7 +1006,7 @@ static int sparc_pmu_enable(struct perf_event *event)
1006 * skip the schedulability test here, it will be peformed 1006 * skip the schedulability test here, it will be peformed
1007 * at commit time(->commit_txn) as a whole 1007 * at commit time(->commit_txn) as a whole
1008 */ 1008 */
1009 if (cpuc->group_flag & PERF_EVENT_TXN_STARTED) 1009 if (cpuc->group_flag & PERF_EVENT_TXN)
1010 goto nocheck; 1010 goto nocheck;
1011 1011
1012 if (check_excludes(cpuc->event, n0, 1)) 1012 if (check_excludes(cpuc->event, n0, 1))
@@ -1088,7 +1088,7 @@ static int __hw_perf_event_init(struct perf_event *event)
1088 if (!hwc->sample_period) { 1088 if (!hwc->sample_period) {
1089 hwc->sample_period = MAX_PERIOD; 1089 hwc->sample_period = MAX_PERIOD;
1090 hwc->last_period = hwc->sample_period; 1090 hwc->last_period = hwc->sample_period;
1091 atomic64_set(&hwc->period_left, hwc->sample_period); 1091 local64_set(&hwc->period_left, hwc->sample_period);
1092 } 1092 }
1093 1093
1094 return 0; 1094 return 0;
@@ -1103,7 +1103,7 @@ static void sparc_pmu_start_txn(const struct pmu *pmu)
1103{ 1103{
1104 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); 1104 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
1105 1105
1106 cpuhw->group_flag |= PERF_EVENT_TXN_STARTED; 1106 cpuhw->group_flag |= PERF_EVENT_TXN;
1107} 1107}
1108 1108
1109/* 1109/*
@@ -1115,7 +1115,7 @@ static void sparc_pmu_cancel_txn(const struct pmu *pmu)
1115{ 1115{
1116 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); 1116 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
1117 1117
1118 cpuhw->group_flag &= ~PERF_EVENT_TXN_STARTED; 1118 cpuhw->group_flag &= ~PERF_EVENT_TXN;
1119} 1119}
1120 1120
1121/* 1121/*
@@ -1138,6 +1138,7 @@ static int sparc_pmu_commit_txn(const struct pmu *pmu)
1138 if (sparc_check_constraints(cpuc->event, cpuc->events, n)) 1138 if (sparc_check_constraints(cpuc->event, cpuc->events, n))
1139 return -EAGAIN; 1139 return -EAGAIN;
1140 1140
1141 cpuc->group_flag &= ~PERF_EVENT_TXN;
1141 return 0; 1142 return 0;
1142} 1143}
1143 1144
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index dcb0593b4a66..6f77afa6bca9 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -55,6 +55,7 @@ config X86
55 select HAVE_HW_BREAKPOINT 55 select HAVE_HW_BREAKPOINT
56 select HAVE_MIXED_BREAKPOINTS_REGS 56 select HAVE_MIXED_BREAKPOINTS_REGS
57 select PERF_EVENTS 57 select PERF_EVENTS
58 select HAVE_PERF_EVENTS_NMI
58 select ANON_INODES 59 select ANON_INODES
59 select HAVE_ARCH_KMEMCHECK 60 select HAVE_ARCH_KMEMCHECK
60 select HAVE_USER_RETURN_NOTIFIER 61 select HAVE_USER_RETURN_NOTIFIER
diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h
index 942255310e6a..528a11e8d3e3 100644
--- a/arch/x86/include/asm/hw_breakpoint.h
+++ b/arch/x86/include/asm/hw_breakpoint.h
@@ -20,10 +20,10 @@ struct arch_hw_breakpoint {
20#include <linux/list.h> 20#include <linux/list.h>
21 21
22/* Available HW breakpoint length encodings */ 22/* Available HW breakpoint length encodings */
23#define X86_BREAKPOINT_LEN_X 0x00
23#define X86_BREAKPOINT_LEN_1 0x40 24#define X86_BREAKPOINT_LEN_1 0x40
24#define X86_BREAKPOINT_LEN_2 0x44 25#define X86_BREAKPOINT_LEN_2 0x44
25#define X86_BREAKPOINT_LEN_4 0x4c 26#define X86_BREAKPOINT_LEN_4 0x4c
26#define X86_BREAKPOINT_LEN_EXECUTE 0x40
27 27
28#ifdef CONFIG_X86_64 28#ifdef CONFIG_X86_64
29#define X86_BREAKPOINT_LEN_8 0x48 29#define X86_BREAKPOINT_LEN_8 0x48
diff --git a/arch/x86/include/asm/local64.h b/arch/x86/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/x86/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index 93da9c3f3341..932f0f86b4b7 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -17,7 +17,9 @@ int do_nmi_callback(struct pt_regs *regs, int cpu);
17 17
18extern void die_nmi(char *str, struct pt_regs *regs, int do_panic); 18extern void die_nmi(char *str, struct pt_regs *regs, int do_panic);
19extern int check_nmi_watchdog(void); 19extern int check_nmi_watchdog(void);
20#if !defined(CONFIG_LOCKUP_DETECTOR)
20extern int nmi_watchdog_enabled; 21extern int nmi_watchdog_enabled;
22#endif
21extern int avail_to_resrv_perfctr_nmi_bit(unsigned int); 23extern int avail_to_resrv_perfctr_nmi_bit(unsigned int);
22extern int reserve_perfctr_nmi(unsigned int); 24extern int reserve_perfctr_nmi(unsigned int);
23extern void release_perfctr_nmi(unsigned int); 25extern void release_perfctr_nmi(unsigned int);
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 254883d0c7e0..6e742cc4251b 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -68,8 +68,9 @@ union cpuid10_eax {
68 68
69union cpuid10_edx { 69union cpuid10_edx {
70 struct { 70 struct {
71 unsigned int num_counters_fixed:4; 71 unsigned int num_counters_fixed:5;
72 unsigned int reserved:28; 72 unsigned int bit_width_fixed:8;
73 unsigned int reserved:19;
73 } split; 74 } split;
74 unsigned int full; 75 unsigned int full;
75}; 76};
@@ -140,6 +141,19 @@ extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
140extern unsigned long perf_misc_flags(struct pt_regs *regs); 141extern unsigned long perf_misc_flags(struct pt_regs *regs);
141#define perf_misc_flags(regs) perf_misc_flags(regs) 142#define perf_misc_flags(regs) perf_misc_flags(regs)
142 143
144#include <asm/stacktrace.h>
145
146/*
147 * We abuse bit 3 from flags to pass exact information, see perf_misc_flags
148 * and the comment with PERF_EFLAGS_EXACT.
149 */
150#define perf_arch_fetch_caller_regs(regs, __ip) { \
151 (regs)->ip = (__ip); \
152 (regs)->bp = caller_frame_pointer(); \
153 (regs)->cs = __KERNEL_CS; \
154 regs->flags = 0; \
155}
156
143#else 157#else
144static inline void init_hw_perf_events(void) { } 158static inline void init_hw_perf_events(void) { }
145static inline void perf_events_lapic_init(void) { } 159static inline void perf_events_lapic_init(void) { }
diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h
index 64a8ebff06fc..def500776b16 100644
--- a/arch/x86/include/asm/perf_event_p4.h
+++ b/arch/x86/include/asm/perf_event_p4.h
@@ -19,7 +19,6 @@
19#define ARCH_P4_RESERVED_ESCR (2) /* IQ_ESCR(0,1) not always present */ 19#define ARCH_P4_RESERVED_ESCR (2) /* IQ_ESCR(0,1) not always present */
20#define ARCH_P4_MAX_ESCR (ARCH_P4_TOTAL_ESCR - ARCH_P4_RESERVED_ESCR) 20#define ARCH_P4_MAX_ESCR (ARCH_P4_TOTAL_ESCR - ARCH_P4_RESERVED_ESCR)
21#define ARCH_P4_MAX_CCCR (18) 21#define ARCH_P4_MAX_CCCR (18)
22#define ARCH_P4_MAX_COUNTER (ARCH_P4_MAX_CCCR / 2)
23 22
24#define P4_ESCR_EVENT_MASK 0x7e000000U 23#define P4_ESCR_EVENT_MASK 0x7e000000U
25#define P4_ESCR_EVENT_SHIFT 25 24#define P4_ESCR_EVENT_SHIFT 25
@@ -71,10 +70,6 @@
71#define P4_CCCR_THRESHOLD(v) ((v) << P4_CCCR_THRESHOLD_SHIFT) 70#define P4_CCCR_THRESHOLD(v) ((v) << P4_CCCR_THRESHOLD_SHIFT)
72#define P4_CCCR_ESEL(v) ((v) << P4_CCCR_ESCR_SELECT_SHIFT) 71#define P4_CCCR_ESEL(v) ((v) << P4_CCCR_ESCR_SELECT_SHIFT)
73 72
74/* Custom bits in reerved CCCR area */
75#define P4_CCCR_CACHE_OPS_MASK 0x0000003fU
76
77
78/* Non HT mask */ 73/* Non HT mask */
79#define P4_CCCR_MASK \ 74#define P4_CCCR_MASK \
80 (P4_CCCR_OVF | \ 75 (P4_CCCR_OVF | \
@@ -106,8 +101,7 @@
106 * ESCR and CCCR but rather an only packed value should 101 * ESCR and CCCR but rather an only packed value should
107 * be unpacked and written to a proper addresses 102 * be unpacked and written to a proper addresses
108 * 103 *
109 * the base idea is to pack as much info as 104 * the base idea is to pack as much info as possible
110 * possible
111 */ 105 */
112#define p4_config_pack_escr(v) (((u64)(v)) << 32) 106#define p4_config_pack_escr(v) (((u64)(v)) << 32)
113#define p4_config_pack_cccr(v) (((u64)(v)) & 0xffffffffULL) 107#define p4_config_pack_cccr(v) (((u64)(v)) & 0xffffffffULL)
@@ -130,8 +124,6 @@
130 t; \ 124 t; \
131 }) 125 })
132 126
133#define p4_config_unpack_cache_event(v) (((u64)(v)) & P4_CCCR_CACHE_OPS_MASK)
134
135#define P4_CONFIG_HT_SHIFT 63 127#define P4_CONFIG_HT_SHIFT 63
136#define P4_CONFIG_HT (1ULL << P4_CONFIG_HT_SHIFT) 128#define P4_CONFIG_HT (1ULL << P4_CONFIG_HT_SHIFT)
137 129
@@ -214,6 +206,12 @@ static inline u32 p4_default_escr_conf(int cpu, int exclude_os, int exclude_usr)
214 return escr; 206 return escr;
215} 207}
216 208
209/*
210 * This are the events which should be used in "Event Select"
211 * field of ESCR register, they are like unique keys which allow
212 * the kernel to determinate which CCCR and COUNTER should be
213 * used to track an event
214 */
217enum P4_EVENTS { 215enum P4_EVENTS {
218 P4_EVENT_TC_DELIVER_MODE, 216 P4_EVENT_TC_DELIVER_MODE,
219 P4_EVENT_BPU_FETCH_REQUEST, 217 P4_EVENT_BPU_FETCH_REQUEST,
@@ -561,7 +559,7 @@ enum P4_EVENT_OPCODES {
561 * a caller should use P4_ESCR_EMASK_NAME helper to 559 * a caller should use P4_ESCR_EMASK_NAME helper to
562 * pick the EventMask needed, for example 560 * pick the EventMask needed, for example
563 * 561 *
564 * P4_ESCR_EMASK_NAME(P4_EVENT_TC_DELIVER_MODE, DD) 562 * P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DD)
565 */ 563 */
566enum P4_ESCR_EMASKS { 564enum P4_ESCR_EMASKS {
567 P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, DD, 0), 565 P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, DD, 0),
@@ -753,43 +751,50 @@ enum P4_ESCR_EMASKS {
753 P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_COMPLETED, BOGUS, 1), 751 P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_COMPLETED, BOGUS, 1),
754}; 752};
755 753
756/* P4 PEBS: stale for a while */ 754/*
757#define P4_PEBS_METRIC_MASK 0x00001fffU 755 * P4 PEBS specifics (Replay Event only)
758#define P4_PEBS_UOB_TAG 0x01000000U 756 *
759#define P4_PEBS_ENABLE 0x02000000U 757 * Format (bits):
760 758 * 0-6: metric from P4_PEBS_METRIC enum
761/* Replay metrics for MSR_IA32_PEBS_ENABLE and MSR_P4_PEBS_MATRIX_VERT */ 759 * 7 : reserved
762#define P4_PEBS__1stl_cache_load_miss_retired 0x3000001 760 * 8 : reserved
763#define P4_PEBS__2ndl_cache_load_miss_retired 0x3000002 761 * 9-11 : reserved
764#define P4_PEBS__dtlb_load_miss_retired 0x3000004 762 *
765#define P4_PEBS__dtlb_store_miss_retired 0x3000004 763 * Note we have UOP and PEBS bits reserved for now
766#define P4_PEBS__dtlb_all_miss_retired 0x3000004 764 * just in case if we will need them once
767#define P4_PEBS__tagged_mispred_branch 0x3018000 765 */
768#define P4_PEBS__mob_load_replay_retired 0x3000200 766#define P4_PEBS_CONFIG_ENABLE (1 << 7)
769#define P4_PEBS__split_load_retired 0x3000400 767#define P4_PEBS_CONFIG_UOP_TAG (1 << 8)
770#define P4_PEBS__split_store_retired 0x3000400 768#define P4_PEBS_CONFIG_METRIC_MASK 0x3f
771 769#define P4_PEBS_CONFIG_MASK 0xff
772#define P4_VERT__1stl_cache_load_miss_retired 0x0000001 770
773#define P4_VERT__2ndl_cache_load_miss_retired 0x0000001 771/*
774#define P4_VERT__dtlb_load_miss_retired 0x0000001 772 * mem: Only counters MSR_IQ_COUNTER4 (16) and
775#define P4_VERT__dtlb_store_miss_retired 0x0000002 773 * MSR_IQ_COUNTER5 (17) are allowed for PEBS sampling
776#define P4_VERT__dtlb_all_miss_retired 0x0000003 774 */
777#define P4_VERT__tagged_mispred_branch 0x0000010 775#define P4_PEBS_ENABLE 0x02000000U
778#define P4_VERT__mob_load_replay_retired 0x0000001 776#define P4_PEBS_ENABLE_UOP_TAG 0x01000000U
779#define P4_VERT__split_load_retired 0x0000001 777
780#define P4_VERT__split_store_retired 0x0000002 778#define p4_config_unpack_metric(v) (((u64)(v)) & P4_PEBS_CONFIG_METRIC_MASK)
781 779#define p4_config_unpack_pebs(v) (((u64)(v)) & P4_PEBS_CONFIG_MASK)
782enum P4_CACHE_EVENTS { 780
783 P4_CACHE__NONE, 781#define p4_config_pebs_has(v, mask) (p4_config_unpack_pebs(v) & (mask))
784 782
785 P4_CACHE__1stl_cache_load_miss_retired, 783enum P4_PEBS_METRIC {
786 P4_CACHE__2ndl_cache_load_miss_retired, 784 P4_PEBS_METRIC__none,
787 P4_CACHE__dtlb_load_miss_retired, 785
788 P4_CACHE__dtlb_store_miss_retired, 786 P4_PEBS_METRIC__1stl_cache_load_miss_retired,
789 P4_CACHE__itlb_reference_hit, 787 P4_PEBS_METRIC__2ndl_cache_load_miss_retired,
790 P4_CACHE__itlb_reference_miss, 788 P4_PEBS_METRIC__dtlb_load_miss_retired,
791 789 P4_PEBS_METRIC__dtlb_store_miss_retired,
792 P4_CACHE__MAX 790 P4_PEBS_METRIC__dtlb_all_miss_retired,
791 P4_PEBS_METRIC__tagged_mispred_branch,
792 P4_PEBS_METRIC__mob_load_replay_retired,
793 P4_PEBS_METRIC__split_load_retired,
794 P4_PEBS_METRIC__split_store_retired,
795
796 P4_PEBS_METRIC__max
793}; 797};
794 798
795#endif /* PERF_EVENT_P4_H */ 799#endif /* PERF_EVENT_P4_H */
800
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index 4dab78edbad9..2b16a2ad23dc 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -1,6 +1,13 @@
1/*
2 * Copyright (C) 1991, 1992 Linus Torvalds
3 * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
4 */
5
1#ifndef _ASM_X86_STACKTRACE_H 6#ifndef _ASM_X86_STACKTRACE_H
2#define _ASM_X86_STACKTRACE_H 7#define _ASM_X86_STACKTRACE_H
3 8
9#include <linux/uaccess.h>
10
4extern int kstack_depth_to_print; 11extern int kstack_depth_to_print;
5 12
6struct thread_info; 13struct thread_info;
@@ -42,4 +49,46 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
42 unsigned long *stack, unsigned long bp, 49 unsigned long *stack, unsigned long bp,
43 const struct stacktrace_ops *ops, void *data); 50 const struct stacktrace_ops *ops, void *data);
44 51
52#ifdef CONFIG_X86_32
53#define STACKSLOTS_PER_LINE 8
54#define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :)
55#else
56#define STACKSLOTS_PER_LINE 4
57#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :)
58#endif
59
60extern void
61show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
62 unsigned long *stack, unsigned long bp, char *log_lvl);
63
64extern void
65show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
66 unsigned long *sp, unsigned long bp, char *log_lvl);
67
68extern unsigned int code_bytes;
69
70/* The form of the top of the frame on the stack */
71struct stack_frame {
72 struct stack_frame *next_frame;
73 unsigned long return_address;
74};
75
76struct stack_frame_ia32 {
77 u32 next_frame;
78 u32 return_address;
79};
80
81static inline unsigned long caller_frame_pointer(void)
82{
83 struct stack_frame *frame;
84
85 get_bp(frame);
86
87#ifdef CONFIG_FRAME_POINTER
88 frame = frame->next_frame;
89#endif
90
91 return (unsigned long)frame;
92}
93
45#endif /* _ASM_X86_STACKTRACE_H */ 94#endif /* _ASM_X86_STACKTRACE_H */
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile
index 565c1bfc507d..910f20b457c4 100644
--- a/arch/x86/kernel/apic/Makefile
+++ b/arch/x86/kernel/apic/Makefile
@@ -2,7 +2,12 @@
2# Makefile for local APIC drivers and for the IO-APIC code 2# Makefile for local APIC drivers and for the IO-APIC code
3# 3#
4 4
5obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o probe_$(BITS).o ipi.o nmi.o 5obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o probe_$(BITS).o ipi.o
6ifneq ($(CONFIG_HARDLOCKUP_DETECTOR),y)
7obj-$(CONFIG_X86_LOCAL_APIC) += nmi.o
8endif
9obj-$(CONFIG_HARDLOCKUP_DETECTOR) += hw_nmi.o
10
6obj-$(CONFIG_X86_IO_APIC) += io_apic.o 11obj-$(CONFIG_X86_IO_APIC) += io_apic.o
7obj-$(CONFIG_SMP) += ipi.o 12obj-$(CONFIG_SMP) += ipi.o
8 13
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
new file mode 100644
index 000000000000..cefd6942f0e9
--- /dev/null
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -0,0 +1,107 @@
1/*
2 * HW NMI watchdog support
3 *
4 * started by Don Zickus, Copyright (C) 2010 Red Hat, Inc.
5 *
6 * Arch specific calls to support NMI watchdog
7 *
8 * Bits copied from original nmi.c file
9 *
10 */
11#include <asm/apic.h>
12
13#include <linux/cpumask.h>
14#include <linux/kdebug.h>
15#include <linux/notifier.h>
16#include <linux/kprobes.h>
17#include <linux/nmi.h>
18#include <linux/module.h>
19
20/* For reliability, we're prepared to waste bits here. */
21static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
22
23u64 hw_nmi_get_sample_period(void)
24{
25 return (u64)(cpu_khz) * 1000 * 60;
26}
27
28#ifdef ARCH_HAS_NMI_WATCHDOG
29void arch_trigger_all_cpu_backtrace(void)
30{
31 int i;
32
33 cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask);
34
35 printk(KERN_INFO "sending NMI to all CPUs:\n");
36 apic->send_IPI_all(NMI_VECTOR);
37
38 /* Wait for up to 10 seconds for all CPUs to do the backtrace */
39 for (i = 0; i < 10 * 1000; i++) {
40 if (cpumask_empty(to_cpumask(backtrace_mask)))
41 break;
42 mdelay(1);
43 }
44}
45
46static int __kprobes
47arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self,
48 unsigned long cmd, void *__args)
49{
50 struct die_args *args = __args;
51 struct pt_regs *regs;
52 int cpu = smp_processor_id();
53
54 switch (cmd) {
55 case DIE_NMI:
56 case DIE_NMI_IPI:
57 break;
58
59 default:
60 return NOTIFY_DONE;
61 }
62
63 regs = args->regs;
64
65 if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {
66 static arch_spinlock_t lock = __ARCH_SPIN_LOCK_UNLOCKED;
67
68 arch_spin_lock(&lock);
69 printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu);
70 show_regs(regs);
71 dump_stack();
72 arch_spin_unlock(&lock);
73 cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
74 return NOTIFY_STOP;
75 }
76
77 return NOTIFY_DONE;
78}
79
80static __read_mostly struct notifier_block backtrace_notifier = {
81 .notifier_call = arch_trigger_all_cpu_backtrace_handler,
82 .next = NULL,
83 .priority = 1
84};
85
86static int __init register_trigger_all_cpu_backtrace(void)
87{
88 register_die_notifier(&backtrace_notifier);
89 return 0;
90}
91early_initcall(register_trigger_all_cpu_backtrace);
92#endif
93
94/* STUB calls to mimic old nmi_watchdog behaviour */
95#if defined(CONFIG_X86_LOCAL_APIC)
96unsigned int nmi_watchdog = NMI_NONE;
97EXPORT_SYMBOL(nmi_watchdog);
98void acpi_nmi_enable(void) { return; }
99void acpi_nmi_disable(void) { return; }
100#endif
101atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */
102EXPORT_SYMBOL(nmi_active);
103int unknown_nmi_panic;
104void cpu_nmi_set_wd_enabled(void) { return; }
105void stop_apic_nmi_watchdog(void *unused) { return; }
106void setup_apic_nmi_watchdog(void *unused) { return; }
107int __init check_nmi_watchdog(void) { return 0; }
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c
index 1edaf15c0b8e..a43f71cb30f8 100644
--- a/arch/x86/kernel/apic/nmi.c
+++ b/arch/x86/kernel/apic/nmi.c
@@ -401,13 +401,6 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
401 int cpu = smp_processor_id(); 401 int cpu = smp_processor_id();
402 int rc = 0; 402 int rc = 0;
403 403
404 /* check for other users first */
405 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
406 == NOTIFY_STOP) {
407 rc = 1;
408 touched = 1;
409 }
410
411 sum = get_timer_irqs(cpu); 404 sum = get_timer_irqs(cpu);
412 405
413 if (__get_cpu_var(nmi_touch)) { 406 if (__get_cpu_var(nmi_touch)) {
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 5db5b7d65a18..f2da20fda02d 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -220,6 +220,7 @@ struct x86_pmu {
220 struct perf_event *event); 220 struct perf_event *event);
221 struct event_constraint *event_constraints; 221 struct event_constraint *event_constraints;
222 void (*quirks)(void); 222 void (*quirks)(void);
223 int perfctr_second_write;
223 224
224 int (*cpu_prepare)(int cpu); 225 int (*cpu_prepare)(int cpu);
225 void (*cpu_starting)(int cpu); 226 void (*cpu_starting)(int cpu);
@@ -295,10 +296,10 @@ x86_perf_event_update(struct perf_event *event)
295 * count to the generic event atomically: 296 * count to the generic event atomically:
296 */ 297 */
297again: 298again:
298 prev_raw_count = atomic64_read(&hwc->prev_count); 299 prev_raw_count = local64_read(&hwc->prev_count);
299 rdmsrl(hwc->event_base + idx, new_raw_count); 300 rdmsrl(hwc->event_base + idx, new_raw_count);
300 301
301 if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count, 302 if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
302 new_raw_count) != prev_raw_count) 303 new_raw_count) != prev_raw_count)
303 goto again; 304 goto again;
304 305
@@ -313,8 +314,8 @@ again:
313 delta = (new_raw_count << shift) - (prev_raw_count << shift); 314 delta = (new_raw_count << shift) - (prev_raw_count << shift);
314 delta >>= shift; 315 delta >>= shift;
315 316
316 atomic64_add(delta, &event->count); 317 local64_add(delta, &event->count);
317 atomic64_sub(delta, &hwc->period_left); 318 local64_sub(delta, &hwc->period_left);
318 319
319 return new_raw_count; 320 return new_raw_count;
320} 321}
@@ -438,7 +439,7 @@ static int x86_setup_perfctr(struct perf_event *event)
438 if (!hwc->sample_period) { 439 if (!hwc->sample_period) {
439 hwc->sample_period = x86_pmu.max_period; 440 hwc->sample_period = x86_pmu.max_period;
440 hwc->last_period = hwc->sample_period; 441 hwc->last_period = hwc->sample_period;
441 atomic64_set(&hwc->period_left, hwc->sample_period); 442 local64_set(&hwc->period_left, hwc->sample_period);
442 } else { 443 } else {
443 /* 444 /*
444 * If we have a PMU initialized but no APIC 445 * If we have a PMU initialized but no APIC
@@ -885,7 +886,7 @@ static int
885x86_perf_event_set_period(struct perf_event *event) 886x86_perf_event_set_period(struct perf_event *event)
886{ 887{
887 struct hw_perf_event *hwc = &event->hw; 888 struct hw_perf_event *hwc = &event->hw;
888 s64 left = atomic64_read(&hwc->period_left); 889 s64 left = local64_read(&hwc->period_left);
889 s64 period = hwc->sample_period; 890 s64 period = hwc->sample_period;
890 int ret = 0, idx = hwc->idx; 891 int ret = 0, idx = hwc->idx;
891 892
@@ -897,14 +898,14 @@ x86_perf_event_set_period(struct perf_event *event)
897 */ 898 */
898 if (unlikely(left <= -period)) { 899 if (unlikely(left <= -period)) {
899 left = period; 900 left = period;
900 atomic64_set(&hwc->period_left, left); 901 local64_set(&hwc->period_left, left);
901 hwc->last_period = period; 902 hwc->last_period = period;
902 ret = 1; 903 ret = 1;
903 } 904 }
904 905
905 if (unlikely(left <= 0)) { 906 if (unlikely(left <= 0)) {
906 left += period; 907 left += period;
907 atomic64_set(&hwc->period_left, left); 908 local64_set(&hwc->period_left, left);
908 hwc->last_period = period; 909 hwc->last_period = period;
909 ret = 1; 910 ret = 1;
910 } 911 }
@@ -923,10 +924,19 @@ x86_perf_event_set_period(struct perf_event *event)
923 * The hw event starts counting from this event offset, 924 * The hw event starts counting from this event offset,
924 * mark it to be able to extra future deltas: 925 * mark it to be able to extra future deltas:
925 */ 926 */
926 atomic64_set(&hwc->prev_count, (u64)-left); 927 local64_set(&hwc->prev_count, (u64)-left);
927 928
928 wrmsrl(hwc->event_base + idx, 929 wrmsrl(hwc->event_base + idx, (u64)(-left) & x86_pmu.cntval_mask);
930
931 /*
932 * Due to erratum on certan cpu we need
933 * a second write to be sure the register
934 * is updated properly
935 */
936 if (x86_pmu.perfctr_second_write) {
937 wrmsrl(hwc->event_base + idx,
929 (u64)(-left) & x86_pmu.cntval_mask); 938 (u64)(-left) & x86_pmu.cntval_mask);
939 }
930 940
931 perf_event_update_userpage(event); 941 perf_event_update_userpage(event);
932 942
@@ -969,7 +979,7 @@ static int x86_pmu_enable(struct perf_event *event)
969 * skip the schedulability test here, it will be peformed 979 * skip the schedulability test here, it will be peformed
970 * at commit time(->commit_txn) as a whole 980 * at commit time(->commit_txn) as a whole
971 */ 981 */
972 if (cpuc->group_flag & PERF_EVENT_TXN_STARTED) 982 if (cpuc->group_flag & PERF_EVENT_TXN)
973 goto out; 983 goto out;
974 984
975 ret = x86_pmu.schedule_events(cpuc, n, assign); 985 ret = x86_pmu.schedule_events(cpuc, n, assign);
@@ -1096,7 +1106,7 @@ static void x86_pmu_disable(struct perf_event *event)
1096 * The events never got scheduled and ->cancel_txn will truncate 1106 * The events never got scheduled and ->cancel_txn will truncate
1097 * the event_list. 1107 * the event_list.
1098 */ 1108 */
1099 if (cpuc->group_flag & PERF_EVENT_TXN_STARTED) 1109 if (cpuc->group_flag & PERF_EVENT_TXN)
1100 return; 1110 return;
1101 1111
1102 x86_pmu_stop(event); 1112 x86_pmu_stop(event);
@@ -1388,7 +1398,7 @@ static void x86_pmu_start_txn(const struct pmu *pmu)
1388{ 1398{
1389 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1399 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1390 1400
1391 cpuc->group_flag |= PERF_EVENT_TXN_STARTED; 1401 cpuc->group_flag |= PERF_EVENT_TXN;
1392 cpuc->n_txn = 0; 1402 cpuc->n_txn = 0;
1393} 1403}
1394 1404
@@ -1401,7 +1411,7 @@ static void x86_pmu_cancel_txn(const struct pmu *pmu)
1401{ 1411{
1402 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1412 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1403 1413
1404 cpuc->group_flag &= ~PERF_EVENT_TXN_STARTED; 1414 cpuc->group_flag &= ~PERF_EVENT_TXN;
1405 /* 1415 /*
1406 * Truncate the collected events. 1416 * Truncate the collected events.
1407 */ 1417 */
@@ -1435,11 +1445,7 @@ static int x86_pmu_commit_txn(const struct pmu *pmu)
1435 */ 1445 */
1436 memcpy(cpuc->assign, assign, n*sizeof(int)); 1446 memcpy(cpuc->assign, assign, n*sizeof(int));
1437 1447
1438 /* 1448 cpuc->group_flag &= ~PERF_EVENT_TXN;
1439 * Clear out the txn count so that ->cancel_txn() which gets
1440 * run after ->commit_txn() doesn't undo things.
1441 */
1442 cpuc->n_txn = 0;
1443 1449
1444 return 0; 1450 return 0;
1445} 1451}
@@ -1607,8 +1613,6 @@ static const struct stacktrace_ops backtrace_ops = {
1607 .walk_stack = print_context_stack_bp, 1613 .walk_stack = print_context_stack_bp,
1608}; 1614};
1609 1615
1610#include "../dumpstack.h"
1611
1612static void 1616static void
1613perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) 1617perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
1614{ 1618{
@@ -1730,22 +1734,6 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
1730 return entry; 1734 return entry;
1731} 1735}
1732 1736
1733void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip)
1734{
1735 regs->ip = ip;
1736 /*
1737 * perf_arch_fetch_caller_regs adds another call, we need to increment
1738 * the skip level
1739 */
1740 regs->bp = rewind_frame_pointer(skip + 1);
1741 regs->cs = __KERNEL_CS;
1742 /*
1743 * We abuse bit 3 to pass exact information, see perf_misc_flags
1744 * and the comment with PERF_EFLAGS_EXACT.
1745 */
1746 regs->flags = 0;
1747}
1748
1749unsigned long perf_instruction_pointer(struct pt_regs *regs) 1737unsigned long perf_instruction_pointer(struct pt_regs *regs)
1750{ 1738{
1751 unsigned long ip; 1739 unsigned long ip;
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index ae85d69644d1..107711bf0ee8 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -21,22 +21,36 @@ struct p4_event_bind {
21 char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on abscence */ 21 char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on abscence */
22}; 22};
23 23
24struct p4_cache_event_bind { 24struct p4_pebs_bind {
25 unsigned int metric_pebs; 25 unsigned int metric_pebs;
26 unsigned int metric_vert; 26 unsigned int metric_vert;
27}; 27};
28 28
29#define P4_GEN_CACHE_EVENT_BIND(name) \ 29/* it sets P4_PEBS_ENABLE_UOP_TAG as well */
30 [P4_CACHE__##name] = { \ 30#define P4_GEN_PEBS_BIND(name, pebs, vert) \
31 .metric_pebs = P4_PEBS__##name, \ 31 [P4_PEBS_METRIC__##name] = { \
32 .metric_vert = P4_VERT__##name, \ 32 .metric_pebs = pebs | P4_PEBS_ENABLE_UOP_TAG, \
33 .metric_vert = vert, \
33 } 34 }
34 35
35static struct p4_cache_event_bind p4_cache_event_bind_map[] = { 36/*
36 P4_GEN_CACHE_EVENT_BIND(1stl_cache_load_miss_retired), 37 * note we have P4_PEBS_ENABLE_UOP_TAG always set here
37 P4_GEN_CACHE_EVENT_BIND(2ndl_cache_load_miss_retired), 38 *
38 P4_GEN_CACHE_EVENT_BIND(dtlb_load_miss_retired), 39 * it's needed for mapping P4_PEBS_CONFIG_METRIC_MASK bits of
39 P4_GEN_CACHE_EVENT_BIND(dtlb_store_miss_retired), 40 * event configuration to find out which values are to be
41 * written into MSR_IA32_PEBS_ENABLE and MSR_P4_PEBS_MATRIX_VERT
42 * resgisters
43 */
44static struct p4_pebs_bind p4_pebs_bind_map[] = {
45 P4_GEN_PEBS_BIND(1stl_cache_load_miss_retired, 0x0000001, 0x0000001),
46 P4_GEN_PEBS_BIND(2ndl_cache_load_miss_retired, 0x0000002, 0x0000001),
47 P4_GEN_PEBS_BIND(dtlb_load_miss_retired, 0x0000004, 0x0000001),
48 P4_GEN_PEBS_BIND(dtlb_store_miss_retired, 0x0000004, 0x0000002),
49 P4_GEN_PEBS_BIND(dtlb_all_miss_retired, 0x0000004, 0x0000003),
50 P4_GEN_PEBS_BIND(tagged_mispred_branch, 0x0018000, 0x0000010),
51 P4_GEN_PEBS_BIND(mob_load_replay_retired, 0x0000200, 0x0000001),
52 P4_GEN_PEBS_BIND(split_load_retired, 0x0000400, 0x0000001),
53 P4_GEN_PEBS_BIND(split_store_retired, 0x0000400, 0x0000002),
40}; 54};
41 55
42/* 56/*
@@ -281,10 +295,10 @@ static struct p4_event_bind p4_event_bind_map[] = {
281 }, 295 },
282}; 296};
283 297
284#define P4_GEN_CACHE_EVENT(event, bit, cache_event) \ 298#define P4_GEN_CACHE_EVENT(event, bit, metric) \
285 p4_config_pack_escr(P4_ESCR_EVENT(event) | \ 299 p4_config_pack_escr(P4_ESCR_EVENT(event) | \
286 P4_ESCR_EMASK_BIT(event, bit)) | \ 300 P4_ESCR_EMASK_BIT(event, bit)) | \
287 p4_config_pack_cccr(cache_event | \ 301 p4_config_pack_cccr(metric | \
288 P4_CCCR_ESEL(P4_OPCODE_ESEL(P4_OPCODE(event)))) 302 P4_CCCR_ESEL(P4_OPCODE_ESEL(P4_OPCODE(event))))
289 303
290static __initconst const u64 p4_hw_cache_event_ids 304static __initconst const u64 p4_hw_cache_event_ids
@@ -296,34 +310,34 @@ static __initconst const u64 p4_hw_cache_event_ids
296 [ C(OP_READ) ] = { 310 [ C(OP_READ) ] = {
297 [ C(RESULT_ACCESS) ] = 0x0, 311 [ C(RESULT_ACCESS) ] = 0x0,
298 [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, 312 [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
299 P4_CACHE__1stl_cache_load_miss_retired), 313 P4_PEBS_METRIC__1stl_cache_load_miss_retired),
300 }, 314 },
301 }, 315 },
302 [ C(LL ) ] = { 316 [ C(LL ) ] = {
303 [ C(OP_READ) ] = { 317 [ C(OP_READ) ] = {
304 [ C(RESULT_ACCESS) ] = 0x0, 318 [ C(RESULT_ACCESS) ] = 0x0,
305 [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, 319 [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
306 P4_CACHE__2ndl_cache_load_miss_retired), 320 P4_PEBS_METRIC__2ndl_cache_load_miss_retired),
307 }, 321 },
308}, 322},
309 [ C(DTLB) ] = { 323 [ C(DTLB) ] = {
310 [ C(OP_READ) ] = { 324 [ C(OP_READ) ] = {
311 [ C(RESULT_ACCESS) ] = 0x0, 325 [ C(RESULT_ACCESS) ] = 0x0,
312 [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, 326 [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
313 P4_CACHE__dtlb_load_miss_retired), 327 P4_PEBS_METRIC__dtlb_load_miss_retired),
314 }, 328 },
315 [ C(OP_WRITE) ] = { 329 [ C(OP_WRITE) ] = {
316 [ C(RESULT_ACCESS) ] = 0x0, 330 [ C(RESULT_ACCESS) ] = 0x0,
317 [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, 331 [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
318 P4_CACHE__dtlb_store_miss_retired), 332 P4_PEBS_METRIC__dtlb_store_miss_retired),
319 }, 333 },
320 }, 334 },
321 [ C(ITLB) ] = { 335 [ C(ITLB) ] = {
322 [ C(OP_READ) ] = { 336 [ C(OP_READ) ] = {
323 [ C(RESULT_ACCESS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, HIT, 337 [ C(RESULT_ACCESS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, HIT,
324 P4_CACHE__itlb_reference_hit), 338 P4_PEBS_METRIC__none),
325 [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, MISS, 339 [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, MISS,
326 P4_CACHE__itlb_reference_miss), 340 P4_PEBS_METRIC__none),
327 }, 341 },
328 [ C(OP_WRITE) ] = { 342 [ C(OP_WRITE) ] = {
329 [ C(RESULT_ACCESS) ] = -1, 343 [ C(RESULT_ACCESS) ] = -1,
@@ -414,11 +428,37 @@ static u64 p4_pmu_event_map(int hw_event)
414 return config; 428 return config;
415} 429}
416 430
431static int p4_validate_raw_event(struct perf_event *event)
432{
433 unsigned int v;
434
435 /* user data may have out-of-bound event index */
436 v = p4_config_unpack_event(event->attr.config);
437 if (v >= ARRAY_SIZE(p4_event_bind_map)) {
438 pr_warning("P4 PMU: Unknown event code: %d\n", v);
439 return -EINVAL;
440 }
441
442 /*
443 * it may have some screwed PEBS bits
444 */
445 if (p4_config_pebs_has(event->attr.config, P4_PEBS_CONFIG_ENABLE)) {
446 pr_warning("P4 PMU: PEBS are not supported yet\n");
447 return -EINVAL;
448 }
449 v = p4_config_unpack_metric(event->attr.config);
450 if (v >= ARRAY_SIZE(p4_pebs_bind_map)) {
451 pr_warning("P4 PMU: Unknown metric code: %d\n", v);
452 return -EINVAL;
453 }
454
455 return 0;
456}
457
417static int p4_hw_config(struct perf_event *event) 458static int p4_hw_config(struct perf_event *event)
418{ 459{
419 int cpu = get_cpu(); 460 int cpu = get_cpu();
420 int rc = 0; 461 int rc = 0;
421 unsigned int evnt;
422 u32 escr, cccr; 462 u32 escr, cccr;
423 463
424 /* 464 /*
@@ -438,12 +478,9 @@ static int p4_hw_config(struct perf_event *event)
438 478
439 if (event->attr.type == PERF_TYPE_RAW) { 479 if (event->attr.type == PERF_TYPE_RAW) {
440 480
441 /* user data may have out-of-bound event index */ 481 rc = p4_validate_raw_event(event);
442 evnt = p4_config_unpack_event(event->attr.config); 482 if (rc)
443 if (evnt >= ARRAY_SIZE(p4_event_bind_map)) {
444 rc = -EINVAL;
445 goto out; 483 goto out;
446 }
447 484
448 /* 485 /*
449 * We don't control raw events so it's up to the caller 486 * We don't control raw events so it's up to the caller
@@ -451,12 +488,15 @@ static int p4_hw_config(struct perf_event *event)
451 * on HT machine but allow HT-compatible specifics to be 488 * on HT machine but allow HT-compatible specifics to be
452 * passed on) 489 * passed on)
453 * 490 *
491 * Note that for RAW events we allow user to use P4_CCCR_RESERVED
492 * bits since we keep additional info here (for cache events and etc)
493 *
454 * XXX: HT wide things should check perf_paranoid_cpu() && 494 * XXX: HT wide things should check perf_paranoid_cpu() &&
455 * CAP_SYS_ADMIN 495 * CAP_SYS_ADMIN
456 */ 496 */
457 event->hw.config |= event->attr.config & 497 event->hw.config |= event->attr.config &
458 (p4_config_pack_escr(P4_ESCR_MASK_HT) | 498 (p4_config_pack_escr(P4_ESCR_MASK_HT) |
459 p4_config_pack_cccr(P4_CCCR_MASK_HT)); 499 p4_config_pack_cccr(P4_CCCR_MASK_HT | P4_CCCR_RESERVED));
460 } 500 }
461 501
462 rc = x86_setup_perfctr(event); 502 rc = x86_setup_perfctr(event);
@@ -482,6 +522,29 @@ static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc)
482 return overflow; 522 return overflow;
483} 523}
484 524
525static void p4_pmu_disable_pebs(void)
526{
527 /*
528 * FIXME
529 *
530 * It's still allowed that two threads setup same cache
531 * events so we can't simply clear metrics until we knew
532 * noone is depending on us, so we need kind of counter
533 * for "ReplayEvent" users.
534 *
535 * What is more complex -- RAW events, if user (for some
536 * reason) will pass some cache event metric with improper
537 * event opcode -- it's fine from hardware point of view
538 * but completely nonsence from "meaning" of such action.
539 *
540 * So at moment let leave metrics turned on forever -- it's
541 * ok for now but need to be revisited!
542 *
543 * (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)0);
544 * (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)0);
545 */
546}
547
485static inline void p4_pmu_disable_event(struct perf_event *event) 548static inline void p4_pmu_disable_event(struct perf_event *event)
486{ 549{
487 struct hw_perf_event *hwc = &event->hw; 550 struct hw_perf_event *hwc = &event->hw;
@@ -507,6 +570,26 @@ static void p4_pmu_disable_all(void)
507 continue; 570 continue;
508 p4_pmu_disable_event(event); 571 p4_pmu_disable_event(event);
509 } 572 }
573
574 p4_pmu_disable_pebs();
575}
576
577/* configuration must be valid */
578static void p4_pmu_enable_pebs(u64 config)
579{
580 struct p4_pebs_bind *bind;
581 unsigned int idx;
582
583 BUILD_BUG_ON(P4_PEBS_METRIC__max > P4_PEBS_CONFIG_METRIC_MASK);
584
585 idx = p4_config_unpack_metric(config);
586 if (idx == P4_PEBS_METRIC__none)
587 return;
588
589 bind = &p4_pebs_bind_map[idx];
590
591 (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)bind->metric_pebs);
592 (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)bind->metric_vert);
510} 593}
511 594
512static void p4_pmu_enable_event(struct perf_event *event) 595static void p4_pmu_enable_event(struct perf_event *event)
@@ -515,9 +598,7 @@ static void p4_pmu_enable_event(struct perf_event *event)
515 int thread = p4_ht_config_thread(hwc->config); 598 int thread = p4_ht_config_thread(hwc->config);
516 u64 escr_conf = p4_config_unpack_escr(p4_clear_ht_bit(hwc->config)); 599 u64 escr_conf = p4_config_unpack_escr(p4_clear_ht_bit(hwc->config));
517 unsigned int idx = p4_config_unpack_event(hwc->config); 600 unsigned int idx = p4_config_unpack_event(hwc->config);
518 unsigned int idx_cache = p4_config_unpack_cache_event(hwc->config);
519 struct p4_event_bind *bind; 601 struct p4_event_bind *bind;
520 struct p4_cache_event_bind *bind_cache;
521 u64 escr_addr, cccr; 602 u64 escr_addr, cccr;
522 603
523 bind = &p4_event_bind_map[idx]; 604 bind = &p4_event_bind_map[idx];
@@ -537,16 +618,10 @@ static void p4_pmu_enable_event(struct perf_event *event)
537 cccr = p4_config_unpack_cccr(hwc->config); 618 cccr = p4_config_unpack_cccr(hwc->config);
538 619
539 /* 620 /*
540 * it could be Cache event so that we need to 621 * it could be Cache event so we need to write metrics
541 * set metrics into additional MSRs 622 * into additional MSRs
542 */ 623 */
543 BUILD_BUG_ON(P4_CACHE__MAX > P4_CCCR_CACHE_OPS_MASK); 624 p4_pmu_enable_pebs(hwc->config);
544 if (idx_cache > P4_CACHE__NONE &&
545 idx_cache < ARRAY_SIZE(p4_cache_event_bind_map)) {
546 bind_cache = &p4_cache_event_bind_map[idx_cache];
547 (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)bind_cache->metric_pebs);
548 (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)bind_cache->metric_vert);
549 }
550 625
551 (void)checking_wrmsrl(escr_addr, escr_conf); 626 (void)checking_wrmsrl(escr_addr, escr_conf);
552 (void)checking_wrmsrl(hwc->config_base + hwc->idx, 627 (void)checking_wrmsrl(hwc->config_base + hwc->idx,
@@ -829,6 +904,15 @@ static __initconst const struct x86_pmu p4_pmu = {
829 .max_period = (1ULL << 39) - 1, 904 .max_period = (1ULL << 39) - 1,
830 .hw_config = p4_hw_config, 905 .hw_config = p4_hw_config,
831 .schedule_events = p4_pmu_schedule_events, 906 .schedule_events = p4_pmu_schedule_events,
907 /*
908 * This handles erratum N15 in intel doc 249199-029,
909 * the counter may not be updated correctly on write
910 * so we need a second write operation to do the trick
911 * (the official workaround didn't work)
912 *
913 * the former idea is taken from OProfile code
914 */
915 .perfctr_second_write = 1,
832}; 916};
833 917
834static __init int p4_pmu_init(void) 918static __init int p4_pmu_init(void)
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index c89a386930b7..6e8752c1bd52 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -18,7 +18,6 @@
18 18
19#include <asm/stacktrace.h> 19#include <asm/stacktrace.h>
20 20
21#include "dumpstack.h"
22 21
23int panic_on_unrecovered_nmi; 22int panic_on_unrecovered_nmi;
24int panic_on_io_nmi; 23int panic_on_io_nmi;
diff --git a/arch/x86/kernel/dumpstack.h b/arch/x86/kernel/dumpstack.h
deleted file mode 100644
index e1a93be4fd44..000000000000
--- a/arch/x86/kernel/dumpstack.h
+++ /dev/null
@@ -1,56 +0,0 @@
1/*
2 * Copyright (C) 1991, 1992 Linus Torvalds
3 * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
4 */
5
6#ifndef DUMPSTACK_H
7#define DUMPSTACK_H
8
9#ifdef CONFIG_X86_32
10#define STACKSLOTS_PER_LINE 8
11#define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :)
12#else
13#define STACKSLOTS_PER_LINE 4
14#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :)
15#endif
16
17#include <linux/uaccess.h>
18
19extern void
20show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
21 unsigned long *stack, unsigned long bp, char *log_lvl);
22
23extern void
24show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
25 unsigned long *sp, unsigned long bp, char *log_lvl);
26
27extern unsigned int code_bytes;
28
29/* The form of the top of the frame on the stack */
30struct stack_frame {
31 struct stack_frame *next_frame;
32 unsigned long return_address;
33};
34
35struct stack_frame_ia32 {
36 u32 next_frame;
37 u32 return_address;
38};
39
40static inline unsigned long rewind_frame_pointer(int n)
41{
42 struct stack_frame *frame;
43
44 get_bp(frame);
45
46#ifdef CONFIG_FRAME_POINTER
47 while (n--) {
48 if (probe_kernel_address(&frame->next_frame, frame))
49 break;
50 }
51#endif
52
53 return (unsigned long)frame;
54}
55
56#endif /* DUMPSTACK_H */
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index 11540a189d93..0f6376ffa2d9 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -16,8 +16,6 @@
16 16
17#include <asm/stacktrace.h> 17#include <asm/stacktrace.h>
18 18
19#include "dumpstack.h"
20
21 19
22void dump_trace(struct task_struct *task, struct pt_regs *regs, 20void dump_trace(struct task_struct *task, struct pt_regs *regs,
23 unsigned long *stack, unsigned long bp, 21 unsigned long *stack, unsigned long bp,
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 272c9f1f05f3..57a21f11c791 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -16,7 +16,6 @@
16 16
17#include <asm/stacktrace.h> 17#include <asm/stacktrace.h>
18 18
19#include "dumpstack.h"
20 19
21#define N_EXCEPTION_STACKS_END \ 20#define N_EXCEPTION_STACKS_END \
22 (N_EXCEPTION_STACKS + DEBUG_STKSZ/EXCEPTION_STKSZ - 2) 21 (N_EXCEPTION_STACKS + DEBUG_STKSZ/EXCEPTION_STKSZ - 2)
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index a8f1b803d2fd..a474ec37c32f 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -208,6 +208,9 @@ int arch_bp_generic_fields(int x86_len, int x86_type,
208{ 208{
209 /* Len */ 209 /* Len */
210 switch (x86_len) { 210 switch (x86_len) {
211 case X86_BREAKPOINT_LEN_X:
212 *gen_len = sizeof(long);
213 break;
211 case X86_BREAKPOINT_LEN_1: 214 case X86_BREAKPOINT_LEN_1:
212 *gen_len = HW_BREAKPOINT_LEN_1; 215 *gen_len = HW_BREAKPOINT_LEN_1;
213 break; 216 break;
@@ -251,6 +254,29 @@ static int arch_build_bp_info(struct perf_event *bp)
251 254
252 info->address = bp->attr.bp_addr; 255 info->address = bp->attr.bp_addr;
253 256
257 /* Type */
258 switch (bp->attr.bp_type) {
259 case HW_BREAKPOINT_W:
260 info->type = X86_BREAKPOINT_WRITE;
261 break;
262 case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
263 info->type = X86_BREAKPOINT_RW;
264 break;
265 case HW_BREAKPOINT_X:
266 info->type = X86_BREAKPOINT_EXECUTE;
267 /*
268 * x86 inst breakpoints need to have a specific undefined len.
269 * But we still need to check userspace is not trying to setup
270 * an unsupported length, to get a range breakpoint for example.
271 */
272 if (bp->attr.bp_len == sizeof(long)) {
273 info->len = X86_BREAKPOINT_LEN_X;
274 return 0;
275 }
276 default:
277 return -EINVAL;
278 }
279
254 /* Len */ 280 /* Len */
255 switch (bp->attr.bp_len) { 281 switch (bp->attr.bp_len) {
256 case HW_BREAKPOINT_LEN_1: 282 case HW_BREAKPOINT_LEN_1:
@@ -271,21 +297,6 @@ static int arch_build_bp_info(struct perf_event *bp)
271 return -EINVAL; 297 return -EINVAL;
272 } 298 }
273 299
274 /* Type */
275 switch (bp->attr.bp_type) {
276 case HW_BREAKPOINT_W:
277 info->type = X86_BREAKPOINT_WRITE;
278 break;
279 case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
280 info->type = X86_BREAKPOINT_RW;
281 break;
282 case HW_BREAKPOINT_X:
283 info->type = X86_BREAKPOINT_EXECUTE;
284 break;
285 default:
286 return -EINVAL;
287 }
288
289 return 0; 300 return 0;
290} 301}
291/* 302/*
@@ -305,6 +316,9 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
305 ret = -EINVAL; 316 ret = -EINVAL;
306 317
307 switch (info->len) { 318 switch (info->len) {
319 case X86_BREAKPOINT_LEN_X:
320 align = sizeof(long) -1;
321 break;
308 case X86_BREAKPOINT_LEN_1: 322 case X86_BREAKPOINT_LEN_1:
309 align = 0; 323 align = 0;
310 break; 324 break;
@@ -466,6 +480,13 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args)
466 480
467 perf_bp_event(bp, args->regs); 481 perf_bp_event(bp, args->regs);
468 482
483 /*
484 * Set up resume flag to avoid breakpoint recursion when
485 * returning back to origin.
486 */
487 if (bp->hw.info.type == X86_BREAKPOINT_EXECUTE)
488 args->regs->flags |= X86_EFLAGS_RF;
489
469 rcu_read_unlock(); 490 rcu_read_unlock();
470 } 491 }
471 /* 492 /*
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 675879b65ce6..1bfb6cf4dd55 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -126,16 +126,22 @@ static void __kprobes synthesize_reljump(void *from, void *to)
126} 126}
127 127
128/* 128/*
129 * Check for the REX prefix which can only exist on X86_64 129 * Skip the prefixes of the instruction.
130 * X86_32 always returns 0
131 */ 130 */
132static int __kprobes is_REX_prefix(kprobe_opcode_t *insn) 131static kprobe_opcode_t *__kprobes skip_prefixes(kprobe_opcode_t *insn)
133{ 132{
133 insn_attr_t attr;
134
135 attr = inat_get_opcode_attribute((insn_byte_t)*insn);
136 while (inat_is_legacy_prefix(attr)) {
137 insn++;
138 attr = inat_get_opcode_attribute((insn_byte_t)*insn);
139 }
134#ifdef CONFIG_X86_64 140#ifdef CONFIG_X86_64
135 if ((*insn & 0xf0) == 0x40) 141 if (inat_is_rex_prefix(attr))
136 return 1; 142 insn++;
137#endif 143#endif
138 return 0; 144 return insn;
139} 145}
140 146
141/* 147/*
@@ -272,6 +278,9 @@ static int __kprobes can_probe(unsigned long paddr)
272 */ 278 */
273static int __kprobes is_IF_modifier(kprobe_opcode_t *insn) 279static int __kprobes is_IF_modifier(kprobe_opcode_t *insn)
274{ 280{
281 /* Skip prefixes */
282 insn = skip_prefixes(insn);
283
275 switch (*insn) { 284 switch (*insn) {
276 case 0xfa: /* cli */ 285 case 0xfa: /* cli */
277 case 0xfb: /* sti */ 286 case 0xfb: /* sti */
@@ -280,13 +289,6 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn)
280 return 1; 289 return 1;
281 } 290 }
282 291
283 /*
284 * on X86_64, 0x40-0x4f are REX prefixes so we need to look
285 * at the next byte instead.. but of course not recurse infinitely
286 */
287 if (is_REX_prefix(insn))
288 return is_IF_modifier(++insn);
289
290 return 0; 292 return 0;
291} 293}
292 294
@@ -803,9 +805,8 @@ static void __kprobes resume_execution(struct kprobe *p,
803 unsigned long orig_ip = (unsigned long)p->addr; 805 unsigned long orig_ip = (unsigned long)p->addr;
804 kprobe_opcode_t *insn = p->ainsn.insn; 806 kprobe_opcode_t *insn = p->ainsn.insn;
805 807
806 /*skip the REX prefix*/ 808 /* Skip prefixes */
807 if (is_REX_prefix(insn)) 809 insn = skip_prefixes(insn);
808 insn++;
809 810
810 regs->flags &= ~X86_EFLAGS_TF; 811 regs->flags &= ~X86_EFLAGS_TF;
811 switch (*insn) { 812 switch (*insn) {
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 8d128783af47..96586c3cbbbf 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -57,6 +57,8 @@
57#include <asm/syscalls.h> 57#include <asm/syscalls.h>
58#include <asm/debugreg.h> 58#include <asm/debugreg.h>
59 59
60#include <trace/events/power.h>
61
60asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); 62asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
61 63
62/* 64/*
@@ -111,6 +113,8 @@ void cpu_idle(void)
111 stop_critical_timings(); 113 stop_critical_timings();
112 pm_idle(); 114 pm_idle();
113 start_critical_timings(); 115 start_critical_timings();
116
117 trace_power_end(smp_processor_id());
114 } 118 }
115 tick_nohz_restart_sched_tick(); 119 tick_nohz_restart_sched_tick();
116 preempt_enable_no_resched(); 120 preempt_enable_no_resched();
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 3c2422a99f1f..3d9ea531ddd1 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -51,6 +51,8 @@
51#include <asm/syscalls.h> 51#include <asm/syscalls.h>
52#include <asm/debugreg.h> 52#include <asm/debugreg.h>
53 53
54#include <trace/events/power.h>
55
54asmlinkage extern void ret_from_fork(void); 56asmlinkage extern void ret_from_fork(void);
55 57
56DEFINE_PER_CPU(unsigned long, old_rsp); 58DEFINE_PER_CPU(unsigned long, old_rsp);
@@ -138,6 +140,9 @@ void cpu_idle(void)
138 stop_critical_timings(); 140 stop_critical_timings();
139 pm_idle(); 141 pm_idle();
140 start_critical_timings(); 142 start_critical_timings();
143
144 trace_power_end(smp_processor_id());
145
141 /* In many cases the interrupt that ended idle 146 /* In many cases the interrupt that ended idle
142 has already called exit_idle. But some idle 147 has already called exit_idle. But some idle
143 loops can be woken up without interrupt. */ 148 loops can be woken up without interrupt. */
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index 922eefbb3f6c..b53c525368a7 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -23,11 +23,16 @@ static int save_stack_stack(void *data, char *name)
23 return 0; 23 return 0;
24} 24}
25 25
26static void save_stack_address(void *data, unsigned long addr, int reliable) 26static void
27__save_stack_address(void *data, unsigned long addr, bool reliable, bool nosched)
27{ 28{
28 struct stack_trace *trace = data; 29 struct stack_trace *trace = data;
30#ifdef CONFIG_FRAME_POINTER
29 if (!reliable) 31 if (!reliable)
30 return; 32 return;
33#endif
34 if (nosched && in_sched_functions(addr))
35 return;
31 if (trace->skip > 0) { 36 if (trace->skip > 0) {
32 trace->skip--; 37 trace->skip--;
33 return; 38 return;
@@ -36,20 +41,15 @@ static void save_stack_address(void *data, unsigned long addr, int reliable)
36 trace->entries[trace->nr_entries++] = addr; 41 trace->entries[trace->nr_entries++] = addr;
37} 42}
38 43
44static void save_stack_address(void *data, unsigned long addr, int reliable)
45{
46 return __save_stack_address(data, addr, reliable, false);
47}
48
39static void 49static void
40save_stack_address_nosched(void *data, unsigned long addr, int reliable) 50save_stack_address_nosched(void *data, unsigned long addr, int reliable)
41{ 51{
42 struct stack_trace *trace = (struct stack_trace *)data; 52 return __save_stack_address(data, addr, reliable, true);
43 if (!reliable)
44 return;
45 if (in_sched_functions(addr))
46 return;
47 if (trace->skip > 0) {
48 trace->skip--;
49 return;
50 }
51 if (trace->nr_entries < trace->max_entries)
52 trace->entries[trace->nr_entries++] = addr;
53} 53}
54 54
55static const struct stacktrace_ops save_stack_ops = { 55static const struct stacktrace_ops save_stack_ops = {
@@ -96,12 +96,13 @@ EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
96 96
97/* Userspace stacktrace - based on kernel/trace/trace_sysprof.c */ 97/* Userspace stacktrace - based on kernel/trace/trace_sysprof.c */
98 98
99struct stack_frame { 99struct stack_frame_user {
100 const void __user *next_fp; 100 const void __user *next_fp;
101 unsigned long ret_addr; 101 unsigned long ret_addr;
102}; 102};
103 103
104static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) 104static int
105copy_stack_frame(const void __user *fp, struct stack_frame_user *frame)
105{ 106{
106 int ret; 107 int ret;
107 108
@@ -126,7 +127,7 @@ static inline void __save_stack_trace_user(struct stack_trace *trace)
126 trace->entries[trace->nr_entries++] = regs->ip; 127 trace->entries[trace->nr_entries++] = regs->ip;
127 128
128 while (trace->nr_entries < trace->max_entries) { 129 while (trace->nr_entries < trace->max_entries) {
129 struct stack_frame frame; 130 struct stack_frame_user frame;
130 131
131 frame.next_fp = NULL; 132 frame.next_fp = NULL;
132 frame.ret_addr = 0; 133 frame.ret_addr = 0;
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 725ef4d17cd5..60788dee0f8a 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -392,7 +392,13 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
392 if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT) 392 if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT)
393 == NOTIFY_STOP) 393 == NOTIFY_STOP)
394 return; 394 return;
395
395#ifdef CONFIG_X86_LOCAL_APIC 396#ifdef CONFIG_X86_LOCAL_APIC
397 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
398 == NOTIFY_STOP)
399 return;
400
401#ifndef CONFIG_LOCKUP_DETECTOR
396 /* 402 /*
397 * Ok, so this is none of the documented NMI sources, 403 * Ok, so this is none of the documented NMI sources,
398 * so it must be the NMI watchdog. 404 * so it must be the NMI watchdog.
@@ -400,6 +406,7 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
400 if (nmi_watchdog_tick(regs, reason)) 406 if (nmi_watchdog_tick(regs, reason))
401 return; 407 return;
402 if (!do_nmi_callback(regs, cpu)) 408 if (!do_nmi_callback(regs, cpu))
409#endif /* !CONFIG_LOCKUP_DETECTOR */
403 unknown_nmi_error(reason, regs); 410 unknown_nmi_error(reason, regs);
404#else 411#else
405 unknown_nmi_error(reason, regs); 412 unknown_nmi_error(reason, regs);
diff --git a/arch/x86/mm/pf_in.c b/arch/x86/mm/pf_in.c
index 308e32570d84..38e6d174c497 100644
--- a/arch/x86/mm/pf_in.c
+++ b/arch/x86/mm/pf_in.c
@@ -40,16 +40,16 @@ static unsigned char prefix_codes[] = {
40static unsigned int reg_rop[] = { 40static unsigned int reg_rop[] = {
41 0x8A, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F 41 0x8A, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F
42}; 42};
43static unsigned int reg_wop[] = { 0x88, 0x89 }; 43static unsigned int reg_wop[] = { 0x88, 0x89, 0xAA, 0xAB };
44static unsigned int imm_wop[] = { 0xC6, 0xC7 }; 44static unsigned int imm_wop[] = { 0xC6, 0xC7 };
45/* IA32 Manual 3, 3-432*/ 45/* IA32 Manual 3, 3-432*/
46static unsigned int rw8[] = { 0x88, 0x8A, 0xC6 }; 46static unsigned int rw8[] = { 0x88, 0x8A, 0xC6, 0xAA };
47static unsigned int rw32[] = { 47static unsigned int rw32[] = {
48 0x89, 0x8B, 0xC7, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F 48 0x89, 0x8B, 0xC7, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F, 0xAB
49}; 49};
50static unsigned int mw8[] = { 0x88, 0x8A, 0xC6, 0xB60F, 0xBE0F }; 50static unsigned int mw8[] = { 0x88, 0x8A, 0xC6, 0xB60F, 0xBE0F, 0xAA };
51static unsigned int mw16[] = { 0xB70F, 0xBF0F }; 51static unsigned int mw16[] = { 0xB70F, 0xBF0F };
52static unsigned int mw32[] = { 0x89, 0x8B, 0xC7 }; 52static unsigned int mw32[] = { 0x89, 0x8B, 0xC7, 0xAB };
53static unsigned int mw64[] = {}; 53static unsigned int mw64[] = {};
54#else /* not __i386__ */ 54#else /* not __i386__ */
55static unsigned char prefix_codes[] = { 55static unsigned char prefix_codes[] = {
@@ -63,20 +63,20 @@ static unsigned char prefix_codes[] = {
63static unsigned int reg_rop[] = { 63static unsigned int reg_rop[] = {
64 0x8A, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F 64 0x8A, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F
65}; 65};
66static unsigned int reg_wop[] = { 0x88, 0x89 }; 66static unsigned int reg_wop[] = { 0x88, 0x89, 0xAA, 0xAB };
67static unsigned int imm_wop[] = { 0xC6, 0xC7 }; 67static unsigned int imm_wop[] = { 0xC6, 0xC7 };
68static unsigned int rw8[] = { 0xC6, 0x88, 0x8A }; 68static unsigned int rw8[] = { 0xC6, 0x88, 0x8A, 0xAA };
69static unsigned int rw32[] = { 69static unsigned int rw32[] = {
70 0xC7, 0x89, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F 70 0xC7, 0x89, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F, 0xAB
71}; 71};
72/* 8 bit only */ 72/* 8 bit only */
73static unsigned int mw8[] = { 0xC6, 0x88, 0x8A, 0xB60F, 0xBE0F }; 73static unsigned int mw8[] = { 0xC6, 0x88, 0x8A, 0xB60F, 0xBE0F, 0xAA };
74/* 16 bit only */ 74/* 16 bit only */
75static unsigned int mw16[] = { 0xB70F, 0xBF0F }; 75static unsigned int mw16[] = { 0xB70F, 0xBF0F };
76/* 16 or 32 bit */ 76/* 16 or 32 bit */
77static unsigned int mw32[] = { 0xC7 }; 77static unsigned int mw32[] = { 0xC7 };
78/* 16, 32 or 64 bit */ 78/* 16, 32 or 64 bit */
79static unsigned int mw64[] = { 0x89, 0x8B }; 79static unsigned int mw64[] = { 0x89, 0x8B, 0xAB };
80#endif /* not __i386__ */ 80#endif /* not __i386__ */
81 81
82struct prefix_bits { 82struct prefix_bits {
@@ -410,7 +410,6 @@ static unsigned long *get_reg_w32(int no, struct pt_regs *regs)
410unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs) 410unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs)
411{ 411{
412 unsigned int opcode; 412 unsigned int opcode;
413 unsigned char mod_rm;
414 int reg; 413 int reg;
415 unsigned char *p; 414 unsigned char *p;
416 struct prefix_bits prf; 415 struct prefix_bits prf;
@@ -437,8 +436,13 @@ unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs)
437 goto err; 436 goto err;
438 437
439do_work: 438do_work:
440 mod_rm = *p; 439 /* for STOS, source register is fixed */
441 reg = ((mod_rm >> 3) & 0x7) | (prf.rexr << 3); 440 if (opcode == 0xAA || opcode == 0xAB) {
441 reg = arg_AX;
442 } else {
443 unsigned char mod_rm = *p;
444 reg = ((mod_rm >> 3) & 0x7) | (prf.rexr << 3);
445 }
442 switch (get_ins_reg_width(ins_addr)) { 446 switch (get_ins_reg_width(ins_addr)) {
443 case 1: 447 case 1:
444 return *get_reg_w8(reg, prf.rex, regs); 448 return *get_reg_w8(reg, prf.rex, regs);
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index b28d2f1253bb..1ba67dc8006a 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -634,6 +634,18 @@ static int __init ppro_init(char **cpu_type)
634 if (force_arch_perfmon && cpu_has_arch_perfmon) 634 if (force_arch_perfmon && cpu_has_arch_perfmon)
635 return 0; 635 return 0;
636 636
637 /*
638 * Documentation on identifying Intel processors by CPU family
639 * and model can be found in the Intel Software Developer's
640 * Manuals (SDM):
641 *
642 * http://www.intel.com/products/processor/manuals/
643 *
644 * As of May 2010 the documentation for this was in the:
645 * "Intel 64 and IA-32 Architectures Software Developer's
646 * Manual Volume 3B: System Programming Guide", "Table B-1
647 * CPUID Signature Values of DisplayFamily_DisplayModel".
648 */
637 switch (cpu_model) { 649 switch (cpu_model) {
638 case 0 ... 2: 650 case 0 ... 2:
639 *cpu_type = "i386/ppro"; 651 *cpu_type = "i386/ppro";
@@ -655,12 +667,12 @@ static int __init ppro_init(char **cpu_type)
655 case 15: case 23: 667 case 15: case 23:
656 *cpu_type = "i386/core_2"; 668 *cpu_type = "i386/core_2";
657 break; 669 break;
670 case 0x1a:
658 case 0x2e: 671 case 0x2e:
659 case 26:
660 spec = &op_arch_perfmon_spec; 672 spec = &op_arch_perfmon_spec;
661 *cpu_type = "i386/core_i7"; 673 *cpu_type = "i386/core_i7";
662 break; 674 break;
663 case 28: 675 case 0x1c:
664 *cpu_type = "i386/atom"; 676 *cpu_type = "i386/atom";
665 break; 677 break;
666 default: 678 default:
diff --git a/arch/xtensa/include/asm/local64.h b/arch/xtensa/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/xtensa/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/drivers/oprofile/event_buffer.c b/drivers/oprofile/event_buffer.c
index 5df60a6b6776..dd87e86048be 100644
--- a/drivers/oprofile/event_buffer.c
+++ b/drivers/oprofile/event_buffer.c
@@ -135,7 +135,7 @@ static int event_buffer_open(struct inode *inode, struct file *file)
135 * echo 1 >/dev/oprofile/enable 135 * echo 1 >/dev/oprofile/enable
136 */ 136 */
137 137
138 return 0; 138 return nonseekable_open(inode, file);
139 139
140fail: 140fail:
141 dcookie_unregister(file->private_data); 141 dcookie_unregister(file->private_data);
@@ -205,4 +205,5 @@ const struct file_operations event_buffer_fops = {
205 .open = event_buffer_open, 205 .open = event_buffer_open,
206 .release = event_buffer_release, 206 .release = event_buffer_release,
207 .read = event_buffer_read, 207 .read = event_buffer_read,
208 .llseek = no_llseek,
208}; 209};
diff --git a/fs/exec.c b/fs/exec.c
index e19de6a80339..97d91a03fb13 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -653,6 +653,7 @@ int setup_arg_pages(struct linux_binprm *bprm,
653 else 653 else
654 stack_base = vma->vm_start - stack_expand; 654 stack_base = vma->vm_start - stack_expand;
655#endif 655#endif
656 current->mm->start_stack = bprm->p;
656 ret = expand_stack(vma, stack_base); 657 ret = expand_stack(vma, stack_base);
657 if (ret) 658 if (ret)
658 ret = -EFAULT; 659 ret = -EFAULT;
diff --git a/include/asm-generic/local64.h b/include/asm-generic/local64.h
new file mode 100644
index 000000000000..02ac760c1a8b
--- /dev/null
+++ b/include/asm-generic/local64.h
@@ -0,0 +1,96 @@
1#ifndef _ASM_GENERIC_LOCAL64_H
2#define _ASM_GENERIC_LOCAL64_H
3
4#include <linux/percpu.h>
5#include <asm/types.h>
6
7/*
8 * A signed long type for operations which are atomic for a single CPU.
9 * Usually used in combination with per-cpu variables.
10 *
11 * This is the default implementation, which uses atomic64_t. Which is
12 * rather pointless. The whole point behind local64_t is that some processors
13 * can perform atomic adds and subtracts in a manner which is atomic wrt IRQs
14 * running on this CPU. local64_t allows exploitation of such capabilities.
15 */
16
17/* Implement in terms of atomics. */
18
19#if BITS_PER_LONG == 64
20
21#include <asm/local.h>
22
23typedef struct {
24 local_t a;
25} local64_t;
26
27#define LOCAL64_INIT(i) { LOCAL_INIT(i) }
28
29#define local64_read(l) local_read(&(l)->a)
30#define local64_set(l,i) local_set((&(l)->a),(i))
31#define local64_inc(l) local_inc(&(l)->a)
32#define local64_dec(l) local_dec(&(l)->a)
33#define local64_add(i,l) local_add((i),(&(l)->a))
34#define local64_sub(i,l) local_sub((i),(&(l)->a))
35
36#define local64_sub_and_test(i, l) local_sub_and_test((i), (&(l)->a))
37#define local64_dec_and_test(l) local_dec_and_test(&(l)->a)
38#define local64_inc_and_test(l) local_inc_and_test(&(l)->a)
39#define local64_add_negative(i, l) local_add_negative((i), (&(l)->a))
40#define local64_add_return(i, l) local_add_return((i), (&(l)->a))
41#define local64_sub_return(i, l) local_sub_return((i), (&(l)->a))
42#define local64_inc_return(l) local_inc_return(&(l)->a)
43
44#define local64_cmpxchg(l, o, n) local_cmpxchg((&(l)->a), (o), (n))
45#define local64_xchg(l, n) local_xchg((&(l)->a), (n))
46#define local64_add_unless(l, _a, u) local_add_unless((&(l)->a), (_a), (u))
47#define local64_inc_not_zero(l) local_inc_not_zero(&(l)->a)
48
49/* Non-atomic variants, ie. preemption disabled and won't be touched
50 * in interrupt, etc. Some archs can optimize this case well. */
51#define __local64_inc(l) local64_set((l), local64_read(l) + 1)
52#define __local64_dec(l) local64_set((l), local64_read(l) - 1)
53#define __local64_add(i,l) local64_set((l), local64_read(l) + (i))
54#define __local64_sub(i,l) local64_set((l), local64_read(l) - (i))
55
56#else /* BITS_PER_LONG != 64 */
57
58#include <asm/atomic.h>
59
60/* Don't use typedef: don't want them to be mixed with atomic_t's. */
61typedef struct {
62 atomic64_t a;
63} local64_t;
64
65#define LOCAL64_INIT(i) { ATOMIC_LONG_INIT(i) }
66
67#define local64_read(l) atomic64_read(&(l)->a)
68#define local64_set(l,i) atomic64_set((&(l)->a),(i))
69#define local64_inc(l) atomic64_inc(&(l)->a)
70#define local64_dec(l) atomic64_dec(&(l)->a)
71#define local64_add(i,l) atomic64_add((i),(&(l)->a))
72#define local64_sub(i,l) atomic64_sub((i),(&(l)->a))
73
74#define local64_sub_and_test(i, l) atomic64_sub_and_test((i), (&(l)->a))
75#define local64_dec_and_test(l) atomic64_dec_and_test(&(l)->a)
76#define local64_inc_and_test(l) atomic64_inc_and_test(&(l)->a)
77#define local64_add_negative(i, l) atomic64_add_negative((i), (&(l)->a))
78#define local64_add_return(i, l) atomic64_add_return((i), (&(l)->a))
79#define local64_sub_return(i, l) atomic64_sub_return((i), (&(l)->a))
80#define local64_inc_return(l) atomic64_inc_return(&(l)->a)
81
82#define local64_cmpxchg(l, o, n) atomic64_cmpxchg((&(l)->a), (o), (n))
83#define local64_xchg(l, n) atomic64_xchg((&(l)->a), (n))
84#define local64_add_unless(l, _a, u) atomic64_add_unless((&(l)->a), (_a), (u))
85#define local64_inc_not_zero(l) atomic64_inc_not_zero(&(l)->a)
86
87/* Non-atomic variants, ie. preemption disabled and won't be touched
88 * in interrupt, etc. Some archs can optimize this case well. */
89#define __local64_inc(l) local64_set((l), local64_read(l) + 1)
90#define __local64_dec(l) local64_set((l), local64_read(l) - 1)
91#define __local64_add(i,l) local64_set((l), local64_read(l) + (i))
92#define __local64_sub(i,l) local64_set((l), local64_read(l) - (i))
93
94#endif /* BITS_PER_LONG != 64 */
95
96#endif /* _ASM_GENERIC_LOCAL64_H */
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 4e7ae6002056..8a92a170fb7d 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -156,10 +156,6 @@
156 CPU_KEEP(exit.data) \ 156 CPU_KEEP(exit.data) \
157 MEM_KEEP(init.data) \ 157 MEM_KEEP(init.data) \
158 MEM_KEEP(exit.data) \ 158 MEM_KEEP(exit.data) \
159 . = ALIGN(8); \
160 VMLINUX_SYMBOL(__start___markers) = .; \
161 *(__markers) \
162 VMLINUX_SYMBOL(__stop___markers) = .; \
163 . = ALIGN(32); \ 159 . = ALIGN(32); \
164 VMLINUX_SYMBOL(__start___tracepoints) = .; \ 160 VMLINUX_SYMBOL(__start___tracepoints) = .; \
165 *(__tracepoints) \ 161 *(__tracepoints) \
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 41e46330d9be..dcd6a7c3a435 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -1,3 +1,8 @@
1/*
2 * Ftrace header. For implementation details beyond the random comments
3 * scattered below, see: Documentation/trace/ftrace-design.txt
4 */
5
1#ifndef _LINUX_FTRACE_H 6#ifndef _LINUX_FTRACE_H
2#define _LINUX_FTRACE_H 7#define _LINUX_FTRACE_H
3 8
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 3167f2df4126..02b8b24f8f51 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -11,8 +11,6 @@ struct trace_array;
11struct tracer; 11struct tracer;
12struct dentry; 12struct dentry;
13 13
14DECLARE_PER_CPU(struct trace_seq, ftrace_event_seq);
15
16struct trace_print_flags { 14struct trace_print_flags {
17 unsigned long mask; 15 unsigned long mask;
18 const char *name; 16 const char *name;
@@ -58,6 +56,9 @@ struct trace_iterator {
58 struct ring_buffer_iter *buffer_iter[NR_CPUS]; 56 struct ring_buffer_iter *buffer_iter[NR_CPUS];
59 unsigned long iter_flags; 57 unsigned long iter_flags;
60 58
59 /* trace_seq for __print_flags() and __print_symbolic() etc. */
60 struct trace_seq tmp_seq;
61
61 /* The below is zeroed out in pipe_read */ 62 /* The below is zeroed out in pipe_read */
62 struct trace_seq seq; 63 struct trace_seq seq;
63 struct trace_entry *ent; 64 struct trace_entry *ent;
@@ -146,14 +147,19 @@ struct ftrace_event_class {
146 int (*raw_init)(struct ftrace_event_call *); 147 int (*raw_init)(struct ftrace_event_call *);
147}; 148};
148 149
150extern int ftrace_event_reg(struct ftrace_event_call *event,
151 enum trace_reg type);
152
149enum { 153enum {
150 TRACE_EVENT_FL_ENABLED_BIT, 154 TRACE_EVENT_FL_ENABLED_BIT,
151 TRACE_EVENT_FL_FILTERED_BIT, 155 TRACE_EVENT_FL_FILTERED_BIT,
156 TRACE_EVENT_FL_RECORDED_CMD_BIT,
152}; 157};
153 158
154enum { 159enum {
155 TRACE_EVENT_FL_ENABLED = (1 << TRACE_EVENT_FL_ENABLED_BIT), 160 TRACE_EVENT_FL_ENABLED = (1 << TRACE_EVENT_FL_ENABLED_BIT),
156 TRACE_EVENT_FL_FILTERED = (1 << TRACE_EVENT_FL_FILTERED_BIT), 161 TRACE_EVENT_FL_FILTERED = (1 << TRACE_EVENT_FL_FILTERED_BIT),
162 TRACE_EVENT_FL_RECORDED_CMD = (1 << TRACE_EVENT_FL_RECORDED_CMD_BIT),
157}; 163};
158 164
159struct ftrace_event_call { 165struct ftrace_event_call {
@@ -171,6 +177,7 @@ struct ftrace_event_call {
171 * 32 bit flags: 177 * 32 bit flags:
172 * bit 1: enabled 178 * bit 1: enabled
173 * bit 2: filter_active 179 * bit 2: filter_active
180 * bit 3: enabled cmd record
174 * 181 *
175 * Changes to flags must hold the event_mutex. 182 * Changes to flags must hold the event_mutex.
176 * 183 *
@@ -257,8 +264,7 @@ static inline void
257perf_trace_buf_submit(void *raw_data, int size, int rctx, u64 addr, 264perf_trace_buf_submit(void *raw_data, int size, int rctx, u64 addr,
258 u64 count, struct pt_regs *regs, void *head) 265 u64 count, struct pt_regs *regs, void *head)
259{ 266{
260 perf_tp_event(addr, count, raw_data, size, regs, head); 267 perf_tp_event(addr, count, raw_data, size, regs, head, rctx);
261 perf_swevent_put_recursion_context(rctx);
262} 268}
263#endif 269#endif
264 270
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 5de838b0fc1a..38e462e00594 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -513,9 +513,6 @@ extern void tracing_start(void);
513extern void tracing_stop(void); 513extern void tracing_stop(void);
514extern void ftrace_off_permanent(void); 514extern void ftrace_off_permanent(void);
515 515
516extern void
517ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3);
518
519static inline void __attribute__ ((format (printf, 1, 2))) 516static inline void __attribute__ ((format (printf, 1, 2)))
520____trace_printk_check_format(const char *fmt, ...) 517____trace_printk_check_format(const char *fmt, ...)
521{ 518{
@@ -591,8 +588,6 @@ __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap);
591 588
592extern void ftrace_dump(enum ftrace_dump_mode oops_dump_mode); 589extern void ftrace_dump(enum ftrace_dump_mode oops_dump_mode);
593#else 590#else
594static inline void
595ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { }
596static inline int 591static inline int
597trace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 2))); 592trace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 2)));
598 593
diff --git a/include/linux/kmemtrace.h b/include/linux/kmemtrace.h
deleted file mode 100644
index b616d3930c3b..000000000000
--- a/include/linux/kmemtrace.h
+++ /dev/null
@@ -1,25 +0,0 @@
1/*
2 * Copyright (C) 2008 Eduard - Gabriel Munteanu
3 *
4 * This file is released under GPL version 2.
5 */
6
7#ifndef _LINUX_KMEMTRACE_H
8#define _LINUX_KMEMTRACE_H
9
10#ifdef __KERNEL__
11
12#include <trace/events/kmem.h>
13
14#ifdef CONFIG_KMEMTRACE
15extern void kmemtrace_init(void);
16#else
17static inline void kmemtrace_init(void)
18{
19}
20#endif
21
22#endif /* __KERNEL__ */
23
24#endif /* _LINUX_KMEMTRACE_H */
25
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index b752e807adde..06aab5eee134 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -20,10 +20,14 @@ extern void touch_nmi_watchdog(void);
20extern void acpi_nmi_disable(void); 20extern void acpi_nmi_disable(void);
21extern void acpi_nmi_enable(void); 21extern void acpi_nmi_enable(void);
22#else 22#else
23#ifndef CONFIG_HARDLOCKUP_DETECTOR
23static inline void touch_nmi_watchdog(void) 24static inline void touch_nmi_watchdog(void)
24{ 25{
25 touch_softlockup_watchdog(); 26 touch_softlockup_watchdog();
26} 27}
28#else
29extern void touch_nmi_watchdog(void);
30#endif
27static inline void acpi_nmi_disable(void) { } 31static inline void acpi_nmi_disable(void) { }
28static inline void acpi_nmi_enable(void) { } 32static inline void acpi_nmi_enable(void) { }
29#endif 33#endif
@@ -47,4 +51,13 @@ static inline bool trigger_all_cpu_backtrace(void)
47} 51}
48#endif 52#endif
49 53
54#ifdef CONFIG_LOCKUP_DETECTOR
55int hw_nmi_is_cpu_stuck(struct pt_regs *);
56u64 hw_nmi_get_sample_period(void);
57extern int watchdog_enabled;
58struct ctl_table;
59extern int proc_dowatchdog_enabled(struct ctl_table *, int ,
60 void __user *, size_t *, loff_t *);
61#endif
62
50#endif 63#endif
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 5d0266d94985..937495c25073 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -214,8 +214,9 @@ struct perf_event_attr {
214 * See also PERF_RECORD_MISC_EXACT_IP 214 * See also PERF_RECORD_MISC_EXACT_IP
215 */ 215 */
216 precise_ip : 2, /* skid constraint */ 216 precise_ip : 2, /* skid constraint */
217 mmap_data : 1, /* non-exec mmap data */
217 218
218 __reserved_1 : 47; 219 __reserved_1 : 46;
219 220
220 union { 221 union {
221 __u32 wakeup_events; /* wakeup every n events */ 222 __u32 wakeup_events; /* wakeup every n events */
@@ -461,6 +462,7 @@ enum perf_callchain_context {
461 462
462#ifdef CONFIG_PERF_EVENTS 463#ifdef CONFIG_PERF_EVENTS
463# include <asm/perf_event.h> 464# include <asm/perf_event.h>
465# include <asm/local64.h>
464#endif 466#endif
465 467
466struct perf_guest_info_callbacks { 468struct perf_guest_info_callbacks {
@@ -531,14 +533,16 @@ struct hw_perf_event {
531 struct hrtimer hrtimer; 533 struct hrtimer hrtimer;
532 }; 534 };
533#ifdef CONFIG_HAVE_HW_BREAKPOINT 535#ifdef CONFIG_HAVE_HW_BREAKPOINT
534 /* breakpoint */ 536 struct { /* breakpoint */
535 struct arch_hw_breakpoint info; 537 struct arch_hw_breakpoint info;
538 struct list_head bp_list;
539 };
536#endif 540#endif
537 }; 541 };
538 atomic64_t prev_count; 542 local64_t prev_count;
539 u64 sample_period; 543 u64 sample_period;
540 u64 last_period; 544 u64 last_period;
541 atomic64_t period_left; 545 local64_t period_left;
542 u64 interrupts; 546 u64 interrupts;
543 547
544 u64 freq_time_stamp; 548 u64 freq_time_stamp;
@@ -548,7 +552,10 @@ struct hw_perf_event {
548 552
549struct perf_event; 553struct perf_event;
550 554
551#define PERF_EVENT_TXN_STARTED 1 555/*
556 * Common implementation detail of pmu::{start,commit,cancel}_txn
557 */
558#define PERF_EVENT_TXN 0x1
552 559
553/** 560/**
554 * struct pmu - generic performance monitoring unit 561 * struct pmu - generic performance monitoring unit
@@ -562,14 +569,28 @@ struct pmu {
562 void (*unthrottle) (struct perf_event *event); 569 void (*unthrottle) (struct perf_event *event);
563 570
564 /* 571 /*
565 * group events scheduling is treated as a transaction, 572 * Group events scheduling is treated as a transaction, add group
566 * add group events as a whole and perform one schedulability test. 573 * events as a whole and perform one schedulability test. If the test
567 * If test fails, roll back the whole group 574 * fails, roll back the whole group
568 */ 575 */
569 576
577 /*
578 * Start the transaction, after this ->enable() doesn't need
579 * to do schedulability tests.
580 */
570 void (*start_txn) (const struct pmu *pmu); 581 void (*start_txn) (const struct pmu *pmu);
571 void (*cancel_txn) (const struct pmu *pmu); 582 /*
583 * If ->start_txn() disabled the ->enable() schedulability test
584 * then ->commit_txn() is required to perform one. On success
585 * the transaction is closed. On error the transaction is kept
586 * open until ->cancel_txn() is called.
587 */
572 int (*commit_txn) (const struct pmu *pmu); 588 int (*commit_txn) (const struct pmu *pmu);
589 /*
590 * Will cancel the transaction, assumes ->disable() is called for
591 * each successfull ->enable() during the transaction.
592 */
593 void (*cancel_txn) (const struct pmu *pmu);
573}; 594};
574 595
575/** 596/**
@@ -584,7 +605,9 @@ enum perf_event_active_state {
584 605
585struct file; 606struct file;
586 607
587struct perf_mmap_data { 608#define PERF_BUFFER_WRITABLE 0x01
609
610struct perf_buffer {
588 atomic_t refcount; 611 atomic_t refcount;
589 struct rcu_head rcu_head; 612 struct rcu_head rcu_head;
590#ifdef CONFIG_PERF_USE_VMALLOC 613#ifdef CONFIG_PERF_USE_VMALLOC
@@ -650,7 +673,8 @@ struct perf_event {
650 673
651 enum perf_event_active_state state; 674 enum perf_event_active_state state;
652 unsigned int attach_state; 675 unsigned int attach_state;
653 atomic64_t count; 676 local64_t count;
677 atomic64_t child_count;
654 678
655 /* 679 /*
656 * These are the total time in nanoseconds that the event 680 * These are the total time in nanoseconds that the event
@@ -709,7 +733,7 @@ struct perf_event {
709 atomic_t mmap_count; 733 atomic_t mmap_count;
710 int mmap_locked; 734 int mmap_locked;
711 struct user_struct *mmap_user; 735 struct user_struct *mmap_user;
712 struct perf_mmap_data *data; 736 struct perf_buffer *buffer;
713 737
714 /* poll related */ 738 /* poll related */
715 wait_queue_head_t waitq; 739 wait_queue_head_t waitq;
@@ -807,7 +831,7 @@ struct perf_cpu_context {
807 831
808struct perf_output_handle { 832struct perf_output_handle {
809 struct perf_event *event; 833 struct perf_event *event;
810 struct perf_mmap_data *data; 834 struct perf_buffer *buffer;
811 unsigned long wakeup; 835 unsigned long wakeup;
812 unsigned long size; 836 unsigned long size;
813 void *addr; 837 void *addr;
@@ -910,8 +934,10 @@ extern atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
910 934
911extern void __perf_sw_event(u32, u64, int, struct pt_regs *, u64); 935extern void __perf_sw_event(u32, u64, int, struct pt_regs *, u64);
912 936
913extern void 937#ifndef perf_arch_fetch_caller_regs
914perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip); 938static inline void
939perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip) { }
940#endif
915 941
916/* 942/*
917 * Take a snapshot of the regs. Skip ip and frame pointer to 943 * Take a snapshot of the regs. Skip ip and frame pointer to
@@ -921,31 +947,11 @@ perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip);
921 * - bp for callchains 947 * - bp for callchains
922 * - eflags, for future purposes, just in case 948 * - eflags, for future purposes, just in case
923 */ 949 */
924static inline void perf_fetch_caller_regs(struct pt_regs *regs, int skip) 950static inline void perf_fetch_caller_regs(struct pt_regs *regs)
925{ 951{
926 unsigned long ip;
927
928 memset(regs, 0, sizeof(*regs)); 952 memset(regs, 0, sizeof(*regs));
929 953
930 switch (skip) { 954 perf_arch_fetch_caller_regs(regs, CALLER_ADDR0);
931 case 1 :
932 ip = CALLER_ADDR0;
933 break;
934 case 2 :
935 ip = CALLER_ADDR1;
936 break;
937 case 3 :
938 ip = CALLER_ADDR2;
939 break;
940 case 4:
941 ip = CALLER_ADDR3;
942 break;
943 /* No need to support further for now */
944 default:
945 ip = 0;
946 }
947
948 return perf_arch_fetch_caller_regs(regs, ip, skip);
949} 955}
950 956
951static inline void 957static inline void
@@ -955,21 +961,14 @@ perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr)
955 struct pt_regs hot_regs; 961 struct pt_regs hot_regs;
956 962
957 if (!regs) { 963 if (!regs) {
958 perf_fetch_caller_regs(&hot_regs, 1); 964 perf_fetch_caller_regs(&hot_regs);
959 regs = &hot_regs; 965 regs = &hot_regs;
960 } 966 }
961 __perf_sw_event(event_id, nr, nmi, regs, addr); 967 __perf_sw_event(event_id, nr, nmi, regs, addr);
962 } 968 }
963} 969}
964 970
965extern void __perf_event_mmap(struct vm_area_struct *vma); 971extern void perf_event_mmap(struct vm_area_struct *vma);
966
967static inline void perf_event_mmap(struct vm_area_struct *vma)
968{
969 if (vma->vm_flags & VM_EXEC)
970 __perf_event_mmap(vma);
971}
972
973extern struct perf_guest_info_callbacks *perf_guest_cbs; 972extern struct perf_guest_info_callbacks *perf_guest_cbs;
974extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); 973extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
975extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); 974extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
@@ -1001,7 +1000,7 @@ static inline bool perf_paranoid_kernel(void)
1001extern void perf_event_init(void); 1000extern void perf_event_init(void);
1002extern void perf_tp_event(u64 addr, u64 count, void *record, 1001extern void perf_tp_event(u64 addr, u64 count, void *record,
1003 int entry_size, struct pt_regs *regs, 1002 int entry_size, struct pt_regs *regs,
1004 struct hlist_head *head); 1003 struct hlist_head *head, int rctx);
1005extern void perf_bp_event(struct perf_event *event, void *data); 1004extern void perf_bp_event(struct perf_event *event, void *data);
1006 1005
1007#ifndef perf_misc_flags 1006#ifndef perf_misc_flags
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 0478888c6899..3992f50de614 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -316,20 +316,16 @@ extern void scheduler_tick(void);
316 316
317extern void sched_show_task(struct task_struct *p); 317extern void sched_show_task(struct task_struct *p);
318 318
319#ifdef CONFIG_DETECT_SOFTLOCKUP 319#ifdef CONFIG_LOCKUP_DETECTOR
320extern void softlockup_tick(void);
321extern void touch_softlockup_watchdog(void); 320extern void touch_softlockup_watchdog(void);
322extern void touch_softlockup_watchdog_sync(void); 321extern void touch_softlockup_watchdog_sync(void);
323extern void touch_all_softlockup_watchdogs(void); 322extern void touch_all_softlockup_watchdogs(void);
324extern int proc_dosoftlockup_thresh(struct ctl_table *table, int write, 323extern int proc_dowatchdog_thresh(struct ctl_table *table, int write,
325 void __user *buffer, 324 void __user *buffer,
326 size_t *lenp, loff_t *ppos); 325 size_t *lenp, loff_t *ppos);
327extern unsigned int softlockup_panic; 326extern unsigned int softlockup_panic;
328extern int softlockup_thresh; 327extern int softlockup_thresh;
329#else 328#else
330static inline void softlockup_tick(void)
331{
332}
333static inline void touch_softlockup_watchdog(void) 329static inline void touch_softlockup_watchdog(void)
334{ 330{
335} 331}
@@ -2435,18 +2431,6 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
2435 2431
2436#endif /* CONFIG_SMP */ 2432#endif /* CONFIG_SMP */
2437 2433
2438#ifdef CONFIG_TRACING
2439extern void
2440__trace_special(void *__tr, void *__data,
2441 unsigned long arg1, unsigned long arg2, unsigned long arg3);
2442#else
2443static inline void
2444__trace_special(void *__tr, void *__data,
2445 unsigned long arg1, unsigned long arg2, unsigned long arg3)
2446{
2447}
2448#endif
2449
2450extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask); 2434extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
2451extern long sched_getaffinity(pid_t pid, struct cpumask *mask); 2435extern long sched_getaffinity(pid_t pid, struct cpumask *mask);
2452 2436
diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
index 1812dac8c496..1acfa73ce2ac 100644
--- a/include/linux/slab_def.h
+++ b/include/linux/slab_def.h
@@ -14,7 +14,8 @@
14#include <asm/page.h> /* kmalloc_sizes.h needs PAGE_SIZE */ 14#include <asm/page.h> /* kmalloc_sizes.h needs PAGE_SIZE */
15#include <asm/cache.h> /* kmalloc_sizes.h needs L1_CACHE_BYTES */ 15#include <asm/cache.h> /* kmalloc_sizes.h needs L1_CACHE_BYTES */
16#include <linux/compiler.h> 16#include <linux/compiler.h>
17#include <linux/kmemtrace.h> 17
18#include <trace/events/kmem.h>
18 19
19#ifndef ARCH_KMALLOC_MINALIGN 20#ifndef ARCH_KMALLOC_MINALIGN
20/* 21/*
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 4ba59cfc1f75..6447a723ecb1 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -10,9 +10,10 @@
10#include <linux/gfp.h> 10#include <linux/gfp.h>
11#include <linux/workqueue.h> 11#include <linux/workqueue.h>
12#include <linux/kobject.h> 12#include <linux/kobject.h>
13#include <linux/kmemtrace.h>
14#include <linux/kmemleak.h> 13#include <linux/kmemleak.h>
15 14
15#include <trace/events/kmem.h>
16
16enum stat_item { 17enum stat_item {
17 ALLOC_FASTPATH, /* Allocation from cpu slab */ 18 ALLOC_FASTPATH, /* Allocation from cpu slab */
18 ALLOC_SLOWPATH, /* Allocation by getting a new cpu slab */ 19 ALLOC_SLOWPATH, /* Allocation by getting a new cpu slab */
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 13ebb5413a79..a6bfd1367d2a 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -167,7 +167,6 @@ extern struct trace_event_functions exit_syscall_print_funcs;
167 .enter_event = &event_enter_##sname, \ 167 .enter_event = &event_enter_##sname, \
168 .exit_event = &event_exit_##sname, \ 168 .exit_event = &event_exit_##sname, \
169 .enter_fields = LIST_HEAD_INIT(__syscall_meta_##sname.enter_fields), \ 169 .enter_fields = LIST_HEAD_INIT(__syscall_meta_##sname.enter_fields), \
170 .exit_fields = LIST_HEAD_INIT(__syscall_meta_##sname.exit_fields), \
171 }; 170 };
172 171
173#define SYSCALL_DEFINE0(sname) \ 172#define SYSCALL_DEFINE0(sname) \
@@ -182,7 +181,6 @@ extern struct trace_event_functions exit_syscall_print_funcs;
182 .enter_event = &event_enter__##sname, \ 181 .enter_event = &event_enter__##sname, \
183 .exit_event = &event_exit__##sname, \ 182 .exit_event = &event_exit__##sname, \
184 .enter_fields = LIST_HEAD_INIT(__syscall_meta__##sname.enter_fields), \ 183 .enter_fields = LIST_HEAD_INIT(__syscall_meta__##sname.enter_fields), \
185 .exit_fields = LIST_HEAD_INIT(__syscall_meta__##sname.exit_fields), \
186 }; \ 184 }; \
187 asmlinkage long sys_##sname(void) 185 asmlinkage long sys_##sname(void)
188#else 186#else
diff --git a/include/trace/boot.h b/include/trace/boot.h
deleted file mode 100644
index 088ea089e31d..000000000000
--- a/include/trace/boot.h
+++ /dev/null
@@ -1,60 +0,0 @@
1#ifndef _LINUX_TRACE_BOOT_H
2#define _LINUX_TRACE_BOOT_H
3
4#include <linux/module.h>
5#include <linux/kallsyms.h>
6#include <linux/init.h>
7
8/*
9 * Structure which defines the trace of an initcall
10 * while it is called.
11 * You don't have to fill the func field since it is
12 * only used internally by the tracer.
13 */
14struct boot_trace_call {
15 pid_t caller;
16 char func[KSYM_SYMBOL_LEN];
17};
18
19/*
20 * Structure which defines the trace of an initcall
21 * while it returns.
22 */
23struct boot_trace_ret {
24 char func[KSYM_SYMBOL_LEN];
25 int result;
26 unsigned long long duration; /* nsecs */
27};
28
29#ifdef CONFIG_BOOT_TRACER
30/* Append the traces on the ring-buffer */
31extern void trace_boot_call(struct boot_trace_call *bt, initcall_t fn);
32extern void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn);
33
34/* Tells the tracer that smp_pre_initcall is finished.
35 * So we can start the tracing
36 */
37extern void start_boot_trace(void);
38
39/* Resume the tracing of other necessary events
40 * such as sched switches
41 */
42extern void enable_boot_trace(void);
43
44/* Suspend this tracing. Actually, only sched_switches tracing have
45 * to be suspended. Initcalls doesn't need it.)
46 */
47extern void disable_boot_trace(void);
48#else
49static inline
50void trace_boot_call(struct boot_trace_call *bt, initcall_t fn) { }
51
52static inline
53void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn) { }
54
55static inline void start_boot_trace(void) { }
56static inline void enable_boot_trace(void) { }
57static inline void disable_boot_trace(void) { }
58#endif /* CONFIG_BOOT_TRACER */
59
60#endif /* __LINUX_TRACE_BOOT_H */
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index b9e1dd6c6208..9208c92aeab5 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -50,31 +50,6 @@ TRACE_EVENT(sched_kthread_stop_ret,
50); 50);
51 51
52/* 52/*
53 * Tracepoint for waiting on task to unschedule:
54 */
55TRACE_EVENT(sched_wait_task,
56
57 TP_PROTO(struct task_struct *p),
58
59 TP_ARGS(p),
60
61 TP_STRUCT__entry(
62 __array( char, comm, TASK_COMM_LEN )
63 __field( pid_t, pid )
64 __field( int, prio )
65 ),
66
67 TP_fast_assign(
68 memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
69 __entry->pid = p->pid;
70 __entry->prio = p->prio;
71 ),
72
73 TP_printk("comm=%s pid=%d prio=%d",
74 __entry->comm, __entry->pid, __entry->prio)
75);
76
77/*
78 * Tracepoint for waking up a task: 53 * Tracepoint for waking up a task:
79 */ 54 */
80DECLARE_EVENT_CLASS(sched_wakeup_template, 55DECLARE_EVENT_CLASS(sched_wakeup_template,
@@ -240,6 +215,13 @@ DEFINE_EVENT(sched_process_template, sched_process_exit,
240 TP_ARGS(p)); 215 TP_ARGS(p));
241 216
242/* 217/*
218 * Tracepoint for waiting on task to unschedule:
219 */
220DEFINE_EVENT(sched_process_template, sched_wait_task,
221 TP_PROTO(struct task_struct *p),
222 TP_ARGS(p));
223
224/*
243 * Tracepoint for a waiting task: 225 * Tracepoint for a waiting task:
244 */ 226 */
245TRACE_EVENT(sched_process_wait, 227TRACE_EVENT(sched_process_wait,
diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h
index 9496b965d62a..c624126a9c8a 100644
--- a/include/trace/events/timer.h
+++ b/include/trace/events/timer.h
@@ -8,11 +8,7 @@
8#include <linux/hrtimer.h> 8#include <linux/hrtimer.h>
9#include <linux/timer.h> 9#include <linux/timer.h>
10 10
11/** 11DECLARE_EVENT_CLASS(timer_class,
12 * timer_init - called when the timer is initialized
13 * @timer: pointer to struct timer_list
14 */
15TRACE_EVENT(timer_init,
16 12
17 TP_PROTO(struct timer_list *timer), 13 TP_PROTO(struct timer_list *timer),
18 14
@@ -30,6 +26,17 @@ TRACE_EVENT(timer_init,
30); 26);
31 27
32/** 28/**
29 * timer_init - called when the timer is initialized
30 * @timer: pointer to struct timer_list
31 */
32DEFINE_EVENT(timer_class, timer_init,
33
34 TP_PROTO(struct timer_list *timer),
35
36 TP_ARGS(timer)
37);
38
39/**
33 * timer_start - called when the timer is started 40 * timer_start - called when the timer is started
34 * @timer: pointer to struct timer_list 41 * @timer: pointer to struct timer_list
35 * @expires: the timers expiry time 42 * @expires: the timers expiry time
@@ -94,42 +101,22 @@ TRACE_EVENT(timer_expire_entry,
94 * NOTE: Do NOT derefernce timer in TP_fast_assign. The pointer might 101 * NOTE: Do NOT derefernce timer in TP_fast_assign. The pointer might
95 * be invalid. We solely track the pointer. 102 * be invalid. We solely track the pointer.
96 */ 103 */
97TRACE_EVENT(timer_expire_exit, 104DEFINE_EVENT(timer_class, timer_expire_exit,
98 105
99 TP_PROTO(struct timer_list *timer), 106 TP_PROTO(struct timer_list *timer),
100 107
101 TP_ARGS(timer), 108 TP_ARGS(timer)
102
103 TP_STRUCT__entry(
104 __field(void *, timer )
105 ),
106
107 TP_fast_assign(
108 __entry->timer = timer;
109 ),
110
111 TP_printk("timer=%p", __entry->timer)
112); 109);
113 110
114/** 111/**
115 * timer_cancel - called when the timer is canceled 112 * timer_cancel - called when the timer is canceled
116 * @timer: pointer to struct timer_list 113 * @timer: pointer to struct timer_list
117 */ 114 */
118TRACE_EVENT(timer_cancel, 115DEFINE_EVENT(timer_class, timer_cancel,
119 116
120 TP_PROTO(struct timer_list *timer), 117 TP_PROTO(struct timer_list *timer),
121 118
122 TP_ARGS(timer), 119 TP_ARGS(timer)
123
124 TP_STRUCT__entry(
125 __field( void *, timer )
126 ),
127
128 TP_fast_assign(
129 __entry->timer = timer;
130 ),
131
132 TP_printk("timer=%p", __entry->timer)
133); 120);
134 121
135/** 122/**
@@ -224,14 +211,7 @@ TRACE_EVENT(hrtimer_expire_entry,
224 (unsigned long long)ktime_to_ns((ktime_t) { .tv64 = __entry->now })) 211 (unsigned long long)ktime_to_ns((ktime_t) { .tv64 = __entry->now }))
225 ); 212 );
226 213
227/** 214DECLARE_EVENT_CLASS(hrtimer_class,
228 * hrtimer_expire_exit - called immediately after the hrtimer callback returns
229 * @timer: pointer to struct hrtimer
230 *
231 * When used in combination with the hrtimer_expire_entry tracepoint we can
232 * determine the runtime of the callback function.
233 */
234TRACE_EVENT(hrtimer_expire_exit,
235 215
236 TP_PROTO(struct hrtimer *hrtimer), 216 TP_PROTO(struct hrtimer *hrtimer),
237 217
@@ -249,24 +229,28 @@ TRACE_EVENT(hrtimer_expire_exit,
249); 229);
250 230
251/** 231/**
252 * hrtimer_cancel - called when the hrtimer is canceled 232 * hrtimer_expire_exit - called immediately after the hrtimer callback returns
253 * @hrtimer: pointer to struct hrtimer 233 * @timer: pointer to struct hrtimer
234 *
235 * When used in combination with the hrtimer_expire_entry tracepoint we can
236 * determine the runtime of the callback function.
254 */ 237 */
255TRACE_EVENT(hrtimer_cancel, 238DEFINE_EVENT(hrtimer_class, hrtimer_expire_exit,
256 239
257 TP_PROTO(struct hrtimer *hrtimer), 240 TP_PROTO(struct hrtimer *hrtimer),
258 241
259 TP_ARGS(hrtimer), 242 TP_ARGS(hrtimer)
243);
260 244
261 TP_STRUCT__entry( 245/**
262 __field( void *, hrtimer ) 246 * hrtimer_cancel - called when the hrtimer is canceled
263 ), 247 * @hrtimer: pointer to struct hrtimer
248 */
249DEFINE_EVENT(hrtimer_class, hrtimer_cancel,
264 250
265 TP_fast_assign( 251 TP_PROTO(struct hrtimer *hrtimer),
266 __entry->hrtimer = hrtimer;
267 ),
268 252
269 TP_printk("hrtimer=%p", __entry->hrtimer) 253 TP_ARGS(hrtimer)
270); 254);
271 255
272/** 256/**
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 5a64905d7278..a9377c0083ad 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -75,15 +75,12 @@
75#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ 75#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \
76 DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) 76 DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
77 77
78#undef __cpparg
79#define __cpparg(arg...) arg
80
81/* Callbacks are meaningless to ftrace. */ 78/* Callbacks are meaningless to ftrace. */
82#undef TRACE_EVENT_FN 79#undef TRACE_EVENT_FN
83#define TRACE_EVENT_FN(name, proto, args, tstruct, \ 80#define TRACE_EVENT_FN(name, proto, args, tstruct, \
84 assign, print, reg, unreg) \ 81 assign, print, reg, unreg) \
85 TRACE_EVENT(name, __cpparg(proto), __cpparg(args), \ 82 TRACE_EVENT(name, PARAMS(proto), PARAMS(args), \
86 __cpparg(tstruct), __cpparg(assign), __cpparg(print)) \ 83 PARAMS(tstruct), PARAMS(assign), PARAMS(print)) \
87 84
88#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) 85#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
89 86
@@ -145,7 +142,7 @@
145 * struct trace_seq *s = &iter->seq; 142 * struct trace_seq *s = &iter->seq;
146 * struct ftrace_raw_<call> *field; <-- defined in stage 1 143 * struct ftrace_raw_<call> *field; <-- defined in stage 1
147 * struct trace_entry *entry; 144 * struct trace_entry *entry;
148 * struct trace_seq *p; 145 * struct trace_seq *p = &iter->tmp_seq;
149 * int ret; 146 * int ret;
150 * 147 *
151 * entry = iter->ent; 148 * entry = iter->ent;
@@ -157,12 +154,10 @@
157 * 154 *
158 * field = (typeof(field))entry; 155 * field = (typeof(field))entry;
159 * 156 *
160 * p = &get_cpu_var(ftrace_event_seq);
161 * trace_seq_init(p); 157 * trace_seq_init(p);
162 * ret = trace_seq_printf(s, "%s: ", <call>); 158 * ret = trace_seq_printf(s, "%s: ", <call>);
163 * if (ret) 159 * if (ret)
164 * ret = trace_seq_printf(s, <TP_printk> "\n"); 160 * ret = trace_seq_printf(s, <TP_printk> "\n");
165 * put_cpu();
166 * if (!ret) 161 * if (!ret)
167 * return TRACE_TYPE_PARTIAL_LINE; 162 * return TRACE_TYPE_PARTIAL_LINE;
168 * 163 *
@@ -216,7 +211,7 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags, \
216 struct trace_seq *s = &iter->seq; \ 211 struct trace_seq *s = &iter->seq; \
217 struct ftrace_raw_##call *field; \ 212 struct ftrace_raw_##call *field; \
218 struct trace_entry *entry; \ 213 struct trace_entry *entry; \
219 struct trace_seq *p; \ 214 struct trace_seq *p = &iter->tmp_seq; \
220 int ret; \ 215 int ret; \
221 \ 216 \
222 event = container_of(trace_event, struct ftrace_event_call, \ 217 event = container_of(trace_event, struct ftrace_event_call, \
@@ -231,12 +226,10 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags, \
231 \ 226 \
232 field = (typeof(field))entry; \ 227 field = (typeof(field))entry; \
233 \ 228 \
234 p = &get_cpu_var(ftrace_event_seq); \
235 trace_seq_init(p); \ 229 trace_seq_init(p); \
236 ret = trace_seq_printf(s, "%s: ", event->name); \ 230 ret = trace_seq_printf(s, "%s: ", event->name); \
237 if (ret) \ 231 if (ret) \
238 ret = trace_seq_printf(s, print); \ 232 ret = trace_seq_printf(s, print); \
239 put_cpu(); \
240 if (!ret) \ 233 if (!ret) \
241 return TRACE_TYPE_PARTIAL_LINE; \ 234 return TRACE_TYPE_PARTIAL_LINE; \
242 \ 235 \
@@ -255,7 +248,7 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags, \
255 struct trace_seq *s = &iter->seq; \ 248 struct trace_seq *s = &iter->seq; \
256 struct ftrace_raw_##template *field; \ 249 struct ftrace_raw_##template *field; \
257 struct trace_entry *entry; \ 250 struct trace_entry *entry; \
258 struct trace_seq *p; \ 251 struct trace_seq *p = &iter->tmp_seq; \
259 int ret; \ 252 int ret; \
260 \ 253 \
261 entry = iter->ent; \ 254 entry = iter->ent; \
@@ -267,12 +260,10 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags, \
267 \ 260 \
268 field = (typeof(field))entry; \ 261 field = (typeof(field))entry; \
269 \ 262 \
270 p = &get_cpu_var(ftrace_event_seq); \
271 trace_seq_init(p); \ 263 trace_seq_init(p); \
272 ret = trace_seq_printf(s, "%s: ", #call); \ 264 ret = trace_seq_printf(s, "%s: ", #call); \
273 if (ret) \ 265 if (ret) \
274 ret = trace_seq_printf(s, print); \ 266 ret = trace_seq_printf(s, print); \
275 put_cpu(); \
276 if (!ret) \ 267 if (!ret) \
277 return TRACE_TYPE_PARTIAL_LINE; \ 268 return TRACE_TYPE_PARTIAL_LINE; \
278 \ 269 \
@@ -439,6 +430,7 @@ static inline notrace int ftrace_get_offsets_##call( \
439 * .fields = LIST_HEAD_INIT(event_class_##call.fields), 430 * .fields = LIST_HEAD_INIT(event_class_##call.fields),
440 * .raw_init = trace_event_raw_init, 431 * .raw_init = trace_event_raw_init,
441 * .probe = ftrace_raw_event_##call, 432 * .probe = ftrace_raw_event_##call,
433 * .reg = ftrace_event_reg,
442 * }; 434 * };
443 * 435 *
444 * static struct ftrace_event_call __used 436 * static struct ftrace_event_call __used
@@ -567,6 +559,7 @@ static struct ftrace_event_class __used event_class_##call = { \
567 .fields = LIST_HEAD_INIT(event_class_##call.fields),\ 559 .fields = LIST_HEAD_INIT(event_class_##call.fields),\
568 .raw_init = trace_event_raw_init, \ 560 .raw_init = trace_event_raw_init, \
569 .probe = ftrace_raw_event_##call, \ 561 .probe = ftrace_raw_event_##call, \
562 .reg = ftrace_event_reg, \
570 _TRACE_PERF_INIT(call) \ 563 _TRACE_PERF_INIT(call) \
571}; 564};
572 565
@@ -705,7 +698,7 @@ perf_trace_##call(void *__data, proto) \
705 int __data_size; \ 698 int __data_size; \
706 int rctx; \ 699 int rctx; \
707 \ 700 \
708 perf_fetch_caller_regs(&__regs, 1); \ 701 perf_fetch_caller_regs(&__regs); \
709 \ 702 \
710 __data_size = ftrace_get_offsets_##call(&__data_offsets, args); \ 703 __data_size = ftrace_get_offsets_##call(&__data_offsets, args); \
711 __entry_size = ALIGN(__data_size + sizeof(*entry) + sizeof(u32),\ 704 __entry_size = ALIGN(__data_size + sizeof(*entry) + sizeof(u32),\
diff --git a/include/trace/syscall.h b/include/trace/syscall.h
index 257e08960d7b..31966a4fb8cc 100644
--- a/include/trace/syscall.h
+++ b/include/trace/syscall.h
@@ -26,7 +26,6 @@ struct syscall_metadata {
26 const char **types; 26 const char **types;
27 const char **args; 27 const char **args;
28 struct list_head enter_fields; 28 struct list_head enter_fields;
29 struct list_head exit_fields;
30 29
31 struct ftrace_event_call *enter_event; 30 struct ftrace_event_call *enter_event;
32 struct ftrace_event_call *exit_event; 31 struct ftrace_event_call *exit_event;
diff --git a/init/main.c b/init/main.c
index 4ddb53f04f2a..b03a4c1f69fa 100644
--- a/init/main.c
+++ b/init/main.c
@@ -66,11 +66,9 @@
66#include <linux/ftrace.h> 66#include <linux/ftrace.h>
67#include <linux/async.h> 67#include <linux/async.h>
68#include <linux/kmemcheck.h> 68#include <linux/kmemcheck.h>
69#include <linux/kmemtrace.h>
70#include <linux/sfi.h> 69#include <linux/sfi.h>
71#include <linux/shmem_fs.h> 70#include <linux/shmem_fs.h>
72#include <linux/slab.h> 71#include <linux/slab.h>
73#include <trace/boot.h>
74 72
75#include <asm/io.h> 73#include <asm/io.h>
76#include <asm/bugs.h> 74#include <asm/bugs.h>
@@ -664,7 +662,6 @@ asmlinkage void __init start_kernel(void)
664#endif 662#endif
665 page_cgroup_init(); 663 page_cgroup_init();
666 enable_debug_pagealloc(); 664 enable_debug_pagealloc();
667 kmemtrace_init();
668 kmemleak_init(); 665 kmemleak_init();
669 debug_objects_mem_init(); 666 debug_objects_mem_init();
670 idr_init_cache(); 667 idr_init_cache();
@@ -726,38 +723,33 @@ int initcall_debug;
726core_param(initcall_debug, initcall_debug, bool, 0644); 723core_param(initcall_debug, initcall_debug, bool, 0644);
727 724
728static char msgbuf[64]; 725static char msgbuf[64];
729static struct boot_trace_call call;
730static struct boot_trace_ret ret;
731 726
732int do_one_initcall(initcall_t fn) 727int do_one_initcall(initcall_t fn)
733{ 728{
734 int count = preempt_count(); 729 int count = preempt_count();
735 ktime_t calltime, delta, rettime; 730 ktime_t calltime, delta, rettime;
731 unsigned long long duration;
732 int ret;
736 733
737 if (initcall_debug) { 734 if (initcall_debug) {
738 call.caller = task_pid_nr(current); 735 printk("calling %pF @ %i\n", fn, task_pid_nr(current));
739 printk("calling %pF @ %i\n", fn, call.caller);
740 calltime = ktime_get(); 736 calltime = ktime_get();
741 trace_boot_call(&call, fn);
742 enable_boot_trace();
743 } 737 }
744 738
745 ret.result = fn(); 739 ret = fn();
746 740
747 if (initcall_debug) { 741 if (initcall_debug) {
748 disable_boot_trace();
749 rettime = ktime_get(); 742 rettime = ktime_get();
750 delta = ktime_sub(rettime, calltime); 743 delta = ktime_sub(rettime, calltime);
751 ret.duration = (unsigned long long) ktime_to_ns(delta) >> 10; 744 duration = (unsigned long long) ktime_to_ns(delta) >> 10;
752 trace_boot_ret(&ret, fn); 745 printk("initcall %pF returned %d after %lld usecs\n", fn,
753 printk("initcall %pF returned %d after %Ld usecs\n", fn, 746 ret, duration);
754 ret.result, ret.duration);
755 } 747 }
756 748
757 msgbuf[0] = 0; 749 msgbuf[0] = 0;
758 750
759 if (ret.result && ret.result != -ENODEV && initcall_debug) 751 if (ret && ret != -ENODEV && initcall_debug)
760 sprintf(msgbuf, "error code %d ", ret.result); 752 sprintf(msgbuf, "error code %d ", ret);
761 753
762 if (preempt_count() != count) { 754 if (preempt_count() != count) {
763 strlcat(msgbuf, "preemption imbalance ", sizeof(msgbuf)); 755 strlcat(msgbuf, "preemption imbalance ", sizeof(msgbuf));
@@ -771,7 +763,7 @@ int do_one_initcall(initcall_t fn)
771 printk("initcall %pF returned with %s\n", fn, msgbuf); 763 printk("initcall %pF returned with %s\n", fn, msgbuf);
772 } 764 }
773 765
774 return ret.result; 766 return ret;
775} 767}
776 768
777 769
@@ -895,7 +887,6 @@ static int __init kernel_init(void * unused)
895 smp_prepare_cpus(setup_max_cpus); 887 smp_prepare_cpus(setup_max_cpus);
896 888
897 do_pre_smp_initcalls(); 889 do_pre_smp_initcalls();
898 start_boot_trace();
899 890
900 smp_init(); 891 smp_init();
901 sched_init_smp(); 892 sched_init_smp();
diff --git a/kernel/Makefile b/kernel/Makefile
index 057472fbc272..ce53fb2bd1d9 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -76,8 +76,8 @@ obj-$(CONFIG_GCOV_KERNEL) += gcov/
76obj-$(CONFIG_AUDIT_TREE) += audit_tree.o 76obj-$(CONFIG_AUDIT_TREE) += audit_tree.o
77obj-$(CONFIG_KPROBES) += kprobes.o 77obj-$(CONFIG_KPROBES) += kprobes.o
78obj-$(CONFIG_KGDB) += debug/ 78obj-$(CONFIG_KGDB) += debug/
79obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o
80obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o 79obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o
80obj-$(CONFIG_LOCKUP_DETECTOR) += watchdog.o
81obj-$(CONFIG_GENERIC_HARDIRQS) += irq/ 81obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
82obj-$(CONFIG_SECCOMP) += seccomp.o 82obj-$(CONFIG_SECCOMP) += seccomp.o
83obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o 83obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index 71ed3ce29e12..d71a987fd2bf 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -41,6 +41,7 @@
41#include <linux/sched.h> 41#include <linux/sched.h>
42#include <linux/init.h> 42#include <linux/init.h>
43#include <linux/slab.h> 43#include <linux/slab.h>
44#include <linux/list.h>
44#include <linux/cpu.h> 45#include <linux/cpu.h>
45#include <linux/smp.h> 46#include <linux/smp.h>
46 47
@@ -62,6 +63,9 @@ static DEFINE_PER_CPU(unsigned int, nr_bp_flexible[TYPE_MAX]);
62 63
63static int nr_slots[TYPE_MAX]; 64static int nr_slots[TYPE_MAX];
64 65
66/* Keep track of the breakpoints attached to tasks */
67static LIST_HEAD(bp_task_head);
68
65static int constraints_initialized; 69static int constraints_initialized;
66 70
67/* Gather the number of total pinned and un-pinned bp in a cpuset */ 71/* Gather the number of total pinned and un-pinned bp in a cpuset */
@@ -103,33 +107,21 @@ static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type)
103 return 0; 107 return 0;
104} 108}
105 109
106static int task_bp_pinned(struct task_struct *tsk, enum bp_type_idx type) 110/*
111 * Count the number of breakpoints of the same type and same task.
112 * The given event must be not on the list.
113 */
114static int task_bp_pinned(struct perf_event *bp, enum bp_type_idx type)
107{ 115{
108 struct perf_event_context *ctx = tsk->perf_event_ctxp; 116 struct perf_event_context *ctx = bp->ctx;
109 struct list_head *list; 117 struct perf_event *iter;
110 struct perf_event *bp;
111 unsigned long flags;
112 int count = 0; 118 int count = 0;
113 119
114 if (WARN_ONCE(!ctx, "No perf context for this task")) 120 list_for_each_entry(iter, &bp_task_head, hw.bp_list) {
115 return 0; 121 if (iter->ctx == ctx && find_slot_idx(iter) == type)
116 122 count += hw_breakpoint_weight(iter);
117 list = &ctx->event_list;
118
119 raw_spin_lock_irqsave(&ctx->lock, flags);
120
121 /*
122 * The current breakpoint counter is not included in the list
123 * at the open() callback time
124 */
125 list_for_each_entry(bp, list, event_entry) {
126 if (bp->attr.type == PERF_TYPE_BREAKPOINT)
127 if (find_slot_idx(bp) == type)
128 count += hw_breakpoint_weight(bp);
129 } 123 }
130 124
131 raw_spin_unlock_irqrestore(&ctx->lock, flags);
132
133 return count; 125 return count;
134} 126}
135 127
@@ -149,7 +141,7 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp,
149 if (!tsk) 141 if (!tsk)
150 slots->pinned += max_task_bp_pinned(cpu, type); 142 slots->pinned += max_task_bp_pinned(cpu, type);
151 else 143 else
152 slots->pinned += task_bp_pinned(tsk, type); 144 slots->pinned += task_bp_pinned(bp, type);
153 slots->flexible = per_cpu(nr_bp_flexible[type], cpu); 145 slots->flexible = per_cpu(nr_bp_flexible[type], cpu);
154 146
155 return; 147 return;
@@ -162,7 +154,7 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp,
162 if (!tsk) 154 if (!tsk)
163 nr += max_task_bp_pinned(cpu, type); 155 nr += max_task_bp_pinned(cpu, type);
164 else 156 else
165 nr += task_bp_pinned(tsk, type); 157 nr += task_bp_pinned(bp, type);
166 158
167 if (nr > slots->pinned) 159 if (nr > slots->pinned)
168 slots->pinned = nr; 160 slots->pinned = nr;
@@ -188,7 +180,7 @@ fetch_this_slot(struct bp_busy_slots *slots, int weight)
188/* 180/*
189 * Add a pinned breakpoint for the given task in our constraint table 181 * Add a pinned breakpoint for the given task in our constraint table
190 */ 182 */
191static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable, 183static void toggle_bp_task_slot(struct perf_event *bp, int cpu, bool enable,
192 enum bp_type_idx type, int weight) 184 enum bp_type_idx type, int weight)
193{ 185{
194 unsigned int *tsk_pinned; 186 unsigned int *tsk_pinned;
@@ -196,10 +188,11 @@ static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable,
196 int old_idx = 0; 188 int old_idx = 0;
197 int idx = 0; 189 int idx = 0;
198 190
199 old_count = task_bp_pinned(tsk, type); 191 old_count = task_bp_pinned(bp, type);
200 old_idx = old_count - 1; 192 old_idx = old_count - 1;
201 idx = old_idx + weight; 193 idx = old_idx + weight;
202 194
195 /* tsk_pinned[n] is the number of tasks having n breakpoints */
203 tsk_pinned = per_cpu(nr_task_bp_pinned[type], cpu); 196 tsk_pinned = per_cpu(nr_task_bp_pinned[type], cpu);
204 if (enable) { 197 if (enable) {
205 tsk_pinned[idx]++; 198 tsk_pinned[idx]++;
@@ -222,23 +215,30 @@ toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type,
222 int cpu = bp->cpu; 215 int cpu = bp->cpu;
223 struct task_struct *tsk = bp->ctx->task; 216 struct task_struct *tsk = bp->ctx->task;
224 217
218 /* Pinned counter cpu profiling */
219 if (!tsk) {
220
221 if (enable)
222 per_cpu(nr_cpu_bp_pinned[type], bp->cpu) += weight;
223 else
224 per_cpu(nr_cpu_bp_pinned[type], bp->cpu) -= weight;
225 return;
226 }
227
225 /* Pinned counter task profiling */ 228 /* Pinned counter task profiling */
226 if (tsk) {
227 if (cpu >= 0) {
228 toggle_bp_task_slot(tsk, cpu, enable, type, weight);
229 return;
230 }
231 229
230 if (!enable)
231 list_del(&bp->hw.bp_list);
232
233 if (cpu >= 0) {
234 toggle_bp_task_slot(bp, cpu, enable, type, weight);
235 } else {
232 for_each_online_cpu(cpu) 236 for_each_online_cpu(cpu)
233 toggle_bp_task_slot(tsk, cpu, enable, type, weight); 237 toggle_bp_task_slot(bp, cpu, enable, type, weight);
234 return;
235 } 238 }
236 239
237 /* Pinned counter cpu profiling */
238 if (enable) 240 if (enable)
239 per_cpu(nr_cpu_bp_pinned[type], bp->cpu) += weight; 241 list_add_tail(&bp->hw.bp_list, &bp_task_head);
240 else
241 per_cpu(nr_cpu_bp_pinned[type], bp->cpu) -= weight;
242} 242}
243 243
244/* 244/*
@@ -312,6 +312,10 @@ static int __reserve_bp_slot(struct perf_event *bp)
312 weight = hw_breakpoint_weight(bp); 312 weight = hw_breakpoint_weight(bp);
313 313
314 fetch_bp_busy_slots(&slots, bp, type); 314 fetch_bp_busy_slots(&slots, bp, type);
315 /*
316 * Simulate the addition of this breakpoint to the constraints
317 * and see the result.
318 */
315 fetch_this_slot(&slots, weight); 319 fetch_this_slot(&slots, weight);
316 320
317 /* Flexible counters need to keep at least one slot */ 321 /* Flexible counters need to keep at least one slot */
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index ff86c558af4c..c772a3d4000d 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -675,7 +675,6 @@ group_sched_in(struct perf_event *group_event,
675 struct perf_event *event, *partial_group = NULL; 675 struct perf_event *event, *partial_group = NULL;
676 const struct pmu *pmu = group_event->pmu; 676 const struct pmu *pmu = group_event->pmu;
677 bool txn = false; 677 bool txn = false;
678 int ret;
679 678
680 if (group_event->state == PERF_EVENT_STATE_OFF) 679 if (group_event->state == PERF_EVENT_STATE_OFF)
681 return 0; 680 return 0;
@@ -703,14 +702,8 @@ group_sched_in(struct perf_event *group_event,
703 } 702 }
704 } 703 }
705 704
706 if (!txn) 705 if (!txn || !pmu->commit_txn(pmu))
707 return 0;
708
709 ret = pmu->commit_txn(pmu);
710 if (!ret) {
711 pmu->cancel_txn(pmu);
712 return 0; 706 return 0;
713 }
714 707
715group_error: 708group_error:
716 /* 709 /*
@@ -1155,9 +1148,9 @@ static void __perf_event_sync_stat(struct perf_event *event,
1155 * In order to keep per-task stats reliable we need to flip the event 1148 * In order to keep per-task stats reliable we need to flip the event
1156 * values when we flip the contexts. 1149 * values when we flip the contexts.
1157 */ 1150 */
1158 value = atomic64_read(&next_event->count); 1151 value = local64_read(&next_event->count);
1159 value = atomic64_xchg(&event->count, value); 1152 value = local64_xchg(&event->count, value);
1160 atomic64_set(&next_event->count, value); 1153 local64_set(&next_event->count, value);
1161 1154
1162 swap(event->total_time_enabled, next_event->total_time_enabled); 1155 swap(event->total_time_enabled, next_event->total_time_enabled);
1163 swap(event->total_time_running, next_event->total_time_running); 1156 swap(event->total_time_running, next_event->total_time_running);
@@ -1547,10 +1540,10 @@ static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count)
1547 1540
1548 hwc->sample_period = sample_period; 1541 hwc->sample_period = sample_period;
1549 1542
1550 if (atomic64_read(&hwc->period_left) > 8*sample_period) { 1543 if (local64_read(&hwc->period_left) > 8*sample_period) {
1551 perf_disable(); 1544 perf_disable();
1552 perf_event_stop(event); 1545 perf_event_stop(event);
1553 atomic64_set(&hwc->period_left, 0); 1546 local64_set(&hwc->period_left, 0);
1554 perf_event_start(event); 1547 perf_event_start(event);
1555 perf_enable(); 1548 perf_enable();
1556 } 1549 }
@@ -1591,7 +1584,7 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
1591 1584
1592 perf_disable(); 1585 perf_disable();
1593 event->pmu->read(event); 1586 event->pmu->read(event);
1594 now = atomic64_read(&event->count); 1587 now = local64_read(&event->count);
1595 delta = now - hwc->freq_count_stamp; 1588 delta = now - hwc->freq_count_stamp;
1596 hwc->freq_count_stamp = now; 1589 hwc->freq_count_stamp = now;
1597 1590
@@ -1743,6 +1736,11 @@ static void __perf_event_read(void *info)
1743 event->pmu->read(event); 1736 event->pmu->read(event);
1744} 1737}
1745 1738
1739static inline u64 perf_event_count(struct perf_event *event)
1740{
1741 return local64_read(&event->count) + atomic64_read(&event->child_count);
1742}
1743
1746static u64 perf_event_read(struct perf_event *event) 1744static u64 perf_event_read(struct perf_event *event)
1747{ 1745{
1748 /* 1746 /*
@@ -1762,7 +1760,7 @@ static u64 perf_event_read(struct perf_event *event)
1762 raw_spin_unlock_irqrestore(&ctx->lock, flags); 1760 raw_spin_unlock_irqrestore(&ctx->lock, flags);
1763 } 1761 }
1764 1762
1765 return atomic64_read(&event->count); 1763 return perf_event_count(event);
1766} 1764}
1767 1765
1768/* 1766/*
@@ -1883,7 +1881,7 @@ static void free_event_rcu(struct rcu_head *head)
1883} 1881}
1884 1882
1885static void perf_pending_sync(struct perf_event *event); 1883static void perf_pending_sync(struct perf_event *event);
1886static void perf_mmap_data_put(struct perf_mmap_data *data); 1884static void perf_buffer_put(struct perf_buffer *buffer);
1887 1885
1888static void free_event(struct perf_event *event) 1886static void free_event(struct perf_event *event)
1889{ 1887{
@@ -1891,7 +1889,7 @@ static void free_event(struct perf_event *event)
1891 1889
1892 if (!event->parent) { 1890 if (!event->parent) {
1893 atomic_dec(&nr_events); 1891 atomic_dec(&nr_events);
1894 if (event->attr.mmap) 1892 if (event->attr.mmap || event->attr.mmap_data)
1895 atomic_dec(&nr_mmap_events); 1893 atomic_dec(&nr_mmap_events);
1896 if (event->attr.comm) 1894 if (event->attr.comm)
1897 atomic_dec(&nr_comm_events); 1895 atomic_dec(&nr_comm_events);
@@ -1899,9 +1897,9 @@ static void free_event(struct perf_event *event)
1899 atomic_dec(&nr_task_events); 1897 atomic_dec(&nr_task_events);
1900 } 1898 }
1901 1899
1902 if (event->data) { 1900 if (event->buffer) {
1903 perf_mmap_data_put(event->data); 1901 perf_buffer_put(event->buffer);
1904 event->data = NULL; 1902 event->buffer = NULL;
1905 } 1903 }
1906 1904
1907 if (event->destroy) 1905 if (event->destroy)
@@ -2126,13 +2124,13 @@ perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
2126static unsigned int perf_poll(struct file *file, poll_table *wait) 2124static unsigned int perf_poll(struct file *file, poll_table *wait)
2127{ 2125{
2128 struct perf_event *event = file->private_data; 2126 struct perf_event *event = file->private_data;
2129 struct perf_mmap_data *data; 2127 struct perf_buffer *buffer;
2130 unsigned int events = POLL_HUP; 2128 unsigned int events = POLL_HUP;
2131 2129
2132 rcu_read_lock(); 2130 rcu_read_lock();
2133 data = rcu_dereference(event->data); 2131 buffer = rcu_dereference(event->buffer);
2134 if (data) 2132 if (buffer)
2135 events = atomic_xchg(&data->poll, 0); 2133 events = atomic_xchg(&buffer->poll, 0);
2136 rcu_read_unlock(); 2134 rcu_read_unlock();
2137 2135
2138 poll_wait(file, &event->waitq, wait); 2136 poll_wait(file, &event->waitq, wait);
@@ -2143,7 +2141,7 @@ static unsigned int perf_poll(struct file *file, poll_table *wait)
2143static void perf_event_reset(struct perf_event *event) 2141static void perf_event_reset(struct perf_event *event)
2144{ 2142{
2145 (void)perf_event_read(event); 2143 (void)perf_event_read(event);
2146 atomic64_set(&event->count, 0); 2144 local64_set(&event->count, 0);
2147 perf_event_update_userpage(event); 2145 perf_event_update_userpage(event);
2148} 2146}
2149 2147
@@ -2342,14 +2340,14 @@ static int perf_event_index(struct perf_event *event)
2342void perf_event_update_userpage(struct perf_event *event) 2340void perf_event_update_userpage(struct perf_event *event)
2343{ 2341{
2344 struct perf_event_mmap_page *userpg; 2342 struct perf_event_mmap_page *userpg;
2345 struct perf_mmap_data *data; 2343 struct perf_buffer *buffer;
2346 2344
2347 rcu_read_lock(); 2345 rcu_read_lock();
2348 data = rcu_dereference(event->data); 2346 buffer = rcu_dereference(event->buffer);
2349 if (!data) 2347 if (!buffer)
2350 goto unlock; 2348 goto unlock;
2351 2349
2352 userpg = data->user_page; 2350 userpg = buffer->user_page;
2353 2351
2354 /* 2352 /*
2355 * Disable preemption so as to not let the corresponding user-space 2353 * Disable preemption so as to not let the corresponding user-space
@@ -2359,9 +2357,9 @@ void perf_event_update_userpage(struct perf_event *event)
2359 ++userpg->lock; 2357 ++userpg->lock;
2360 barrier(); 2358 barrier();
2361 userpg->index = perf_event_index(event); 2359 userpg->index = perf_event_index(event);
2362 userpg->offset = atomic64_read(&event->count); 2360 userpg->offset = perf_event_count(event);
2363 if (event->state == PERF_EVENT_STATE_ACTIVE) 2361 if (event->state == PERF_EVENT_STATE_ACTIVE)
2364 userpg->offset -= atomic64_read(&event->hw.prev_count); 2362 userpg->offset -= local64_read(&event->hw.prev_count);
2365 2363
2366 userpg->time_enabled = event->total_time_enabled + 2364 userpg->time_enabled = event->total_time_enabled +
2367 atomic64_read(&event->child_total_time_enabled); 2365 atomic64_read(&event->child_total_time_enabled);
@@ -2376,6 +2374,25 @@ unlock:
2376 rcu_read_unlock(); 2374 rcu_read_unlock();
2377} 2375}
2378 2376
2377static unsigned long perf_data_size(struct perf_buffer *buffer);
2378
2379static void
2380perf_buffer_init(struct perf_buffer *buffer, long watermark, int flags)
2381{
2382 long max_size = perf_data_size(buffer);
2383
2384 if (watermark)
2385 buffer->watermark = min(max_size, watermark);
2386
2387 if (!buffer->watermark)
2388 buffer->watermark = max_size / 2;
2389
2390 if (flags & PERF_BUFFER_WRITABLE)
2391 buffer->writable = 1;
2392
2393 atomic_set(&buffer->refcount, 1);
2394}
2395
2379#ifndef CONFIG_PERF_USE_VMALLOC 2396#ifndef CONFIG_PERF_USE_VMALLOC
2380 2397
2381/* 2398/*
@@ -2383,15 +2400,15 @@ unlock:
2383 */ 2400 */
2384 2401
2385static struct page * 2402static struct page *
2386perf_mmap_to_page(struct perf_mmap_data *data, unsigned long pgoff) 2403perf_mmap_to_page(struct perf_buffer *buffer, unsigned long pgoff)
2387{ 2404{
2388 if (pgoff > data->nr_pages) 2405 if (pgoff > buffer->nr_pages)
2389 return NULL; 2406 return NULL;
2390 2407
2391 if (pgoff == 0) 2408 if (pgoff == 0)
2392 return virt_to_page(data->user_page); 2409 return virt_to_page(buffer->user_page);
2393 2410
2394 return virt_to_page(data->data_pages[pgoff - 1]); 2411 return virt_to_page(buffer->data_pages[pgoff - 1]);
2395} 2412}
2396 2413
2397static void *perf_mmap_alloc_page(int cpu) 2414static void *perf_mmap_alloc_page(int cpu)
@@ -2407,42 +2424,44 @@ static void *perf_mmap_alloc_page(int cpu)
2407 return page_address(page); 2424 return page_address(page);
2408} 2425}
2409 2426
2410static struct perf_mmap_data * 2427static struct perf_buffer *
2411perf_mmap_data_alloc(struct perf_event *event, int nr_pages) 2428perf_buffer_alloc(int nr_pages, long watermark, int cpu, int flags)
2412{ 2429{
2413 struct perf_mmap_data *data; 2430 struct perf_buffer *buffer;
2414 unsigned long size; 2431 unsigned long size;
2415 int i; 2432 int i;
2416 2433
2417 size = sizeof(struct perf_mmap_data); 2434 size = sizeof(struct perf_buffer);
2418 size += nr_pages * sizeof(void *); 2435 size += nr_pages * sizeof(void *);
2419 2436
2420 data = kzalloc(size, GFP_KERNEL); 2437 buffer = kzalloc(size, GFP_KERNEL);
2421 if (!data) 2438 if (!buffer)
2422 goto fail; 2439 goto fail;
2423 2440
2424 data->user_page = perf_mmap_alloc_page(event->cpu); 2441 buffer->user_page = perf_mmap_alloc_page(cpu);
2425 if (!data->user_page) 2442 if (!buffer->user_page)
2426 goto fail_user_page; 2443 goto fail_user_page;
2427 2444
2428 for (i = 0; i < nr_pages; i++) { 2445 for (i = 0; i < nr_pages; i++) {
2429 data->data_pages[i] = perf_mmap_alloc_page(event->cpu); 2446 buffer->data_pages[i] = perf_mmap_alloc_page(cpu);
2430 if (!data->data_pages[i]) 2447 if (!buffer->data_pages[i])
2431 goto fail_data_pages; 2448 goto fail_data_pages;
2432 } 2449 }
2433 2450
2434 data->nr_pages = nr_pages; 2451 buffer->nr_pages = nr_pages;
2452
2453 perf_buffer_init(buffer, watermark, flags);
2435 2454
2436 return data; 2455 return buffer;
2437 2456
2438fail_data_pages: 2457fail_data_pages:
2439 for (i--; i >= 0; i--) 2458 for (i--; i >= 0; i--)
2440 free_page((unsigned long)data->data_pages[i]); 2459 free_page((unsigned long)buffer->data_pages[i]);
2441 2460
2442 free_page((unsigned long)data->user_page); 2461 free_page((unsigned long)buffer->user_page);
2443 2462
2444fail_user_page: 2463fail_user_page:
2445 kfree(data); 2464 kfree(buffer);
2446 2465
2447fail: 2466fail:
2448 return NULL; 2467 return NULL;
@@ -2456,17 +2475,17 @@ static void perf_mmap_free_page(unsigned long addr)
2456 __free_page(page); 2475 __free_page(page);
2457} 2476}
2458 2477
2459static void perf_mmap_data_free(struct perf_mmap_data *data) 2478static void perf_buffer_free(struct perf_buffer *buffer)
2460{ 2479{
2461 int i; 2480 int i;
2462 2481
2463 perf_mmap_free_page((unsigned long)data->user_page); 2482 perf_mmap_free_page((unsigned long)buffer->user_page);
2464 for (i = 0; i < data->nr_pages; i++) 2483 for (i = 0; i < buffer->nr_pages; i++)
2465 perf_mmap_free_page((unsigned long)data->data_pages[i]); 2484 perf_mmap_free_page((unsigned long)buffer->data_pages[i]);
2466 kfree(data); 2485 kfree(buffer);
2467} 2486}
2468 2487
2469static inline int page_order(struct perf_mmap_data *data) 2488static inline int page_order(struct perf_buffer *buffer)
2470{ 2489{
2471 return 0; 2490 return 0;
2472} 2491}
@@ -2479,18 +2498,18 @@ static inline int page_order(struct perf_mmap_data *data)
2479 * Required for architectures that have d-cache aliasing issues. 2498 * Required for architectures that have d-cache aliasing issues.
2480 */ 2499 */
2481 2500
2482static inline int page_order(struct perf_mmap_data *data) 2501static inline int page_order(struct perf_buffer *buffer)
2483{ 2502{
2484 return data->page_order; 2503 return buffer->page_order;
2485} 2504}
2486 2505
2487static struct page * 2506static struct page *
2488perf_mmap_to_page(struct perf_mmap_data *data, unsigned long pgoff) 2507perf_mmap_to_page(struct perf_buffer *buffer, unsigned long pgoff)
2489{ 2508{
2490 if (pgoff > (1UL << page_order(data))) 2509 if (pgoff > (1UL << page_order(buffer)))
2491 return NULL; 2510 return NULL;
2492 2511
2493 return vmalloc_to_page((void *)data->user_page + pgoff * PAGE_SIZE); 2512 return vmalloc_to_page((void *)buffer->user_page + pgoff * PAGE_SIZE);
2494} 2513}
2495 2514
2496static void perf_mmap_unmark_page(void *addr) 2515static void perf_mmap_unmark_page(void *addr)
@@ -2500,57 +2519,59 @@ static void perf_mmap_unmark_page(void *addr)
2500 page->mapping = NULL; 2519 page->mapping = NULL;
2501} 2520}
2502 2521
2503static void perf_mmap_data_free_work(struct work_struct *work) 2522static void perf_buffer_free_work(struct work_struct *work)
2504{ 2523{
2505 struct perf_mmap_data *data; 2524 struct perf_buffer *buffer;
2506 void *base; 2525 void *base;
2507 int i, nr; 2526 int i, nr;
2508 2527
2509 data = container_of(work, struct perf_mmap_data, work); 2528 buffer = container_of(work, struct perf_buffer, work);
2510 nr = 1 << page_order(data); 2529 nr = 1 << page_order(buffer);
2511 2530
2512 base = data->user_page; 2531 base = buffer->user_page;
2513 for (i = 0; i < nr + 1; i++) 2532 for (i = 0; i < nr + 1; i++)
2514 perf_mmap_unmark_page(base + (i * PAGE_SIZE)); 2533 perf_mmap_unmark_page(base + (i * PAGE_SIZE));
2515 2534
2516 vfree(base); 2535 vfree(base);
2517 kfree(data); 2536 kfree(buffer);
2518} 2537}
2519 2538
2520static void perf_mmap_data_free(struct perf_mmap_data *data) 2539static void perf_buffer_free(struct perf_buffer *buffer)
2521{ 2540{
2522 schedule_work(&data->work); 2541 schedule_work(&buffer->work);
2523} 2542}
2524 2543
2525static struct perf_mmap_data * 2544static struct perf_buffer *
2526perf_mmap_data_alloc(struct perf_event *event, int nr_pages) 2545perf_buffer_alloc(int nr_pages, long watermark, int cpu, int flags)
2527{ 2546{
2528 struct perf_mmap_data *data; 2547 struct perf_buffer *buffer;
2529 unsigned long size; 2548 unsigned long size;
2530 void *all_buf; 2549 void *all_buf;
2531 2550
2532 size = sizeof(struct perf_mmap_data); 2551 size = sizeof(struct perf_buffer);
2533 size += sizeof(void *); 2552 size += sizeof(void *);
2534 2553
2535 data = kzalloc(size, GFP_KERNEL); 2554 buffer = kzalloc(size, GFP_KERNEL);
2536 if (!data) 2555 if (!buffer)
2537 goto fail; 2556 goto fail;
2538 2557
2539 INIT_WORK(&data->work, perf_mmap_data_free_work); 2558 INIT_WORK(&buffer->work, perf_buffer_free_work);
2540 2559
2541 all_buf = vmalloc_user((nr_pages + 1) * PAGE_SIZE); 2560 all_buf = vmalloc_user((nr_pages + 1) * PAGE_SIZE);
2542 if (!all_buf) 2561 if (!all_buf)
2543 goto fail_all_buf; 2562 goto fail_all_buf;
2544 2563
2545 data->user_page = all_buf; 2564 buffer->user_page = all_buf;
2546 data->data_pages[0] = all_buf + PAGE_SIZE; 2565 buffer->data_pages[0] = all_buf + PAGE_SIZE;
2547 data->page_order = ilog2(nr_pages); 2566 buffer->page_order = ilog2(nr_pages);
2548 data->nr_pages = 1; 2567 buffer->nr_pages = 1;
2568
2569 perf_buffer_init(buffer, watermark, flags);
2549 2570
2550 return data; 2571 return buffer;
2551 2572
2552fail_all_buf: 2573fail_all_buf:
2553 kfree(data); 2574 kfree(buffer);
2554 2575
2555fail: 2576fail:
2556 return NULL; 2577 return NULL;
@@ -2558,15 +2579,15 @@ fail:
2558 2579
2559#endif 2580#endif
2560 2581
2561static unsigned long perf_data_size(struct perf_mmap_data *data) 2582static unsigned long perf_data_size(struct perf_buffer *buffer)
2562{ 2583{
2563 return data->nr_pages << (PAGE_SHIFT + page_order(data)); 2584 return buffer->nr_pages << (PAGE_SHIFT + page_order(buffer));
2564} 2585}
2565 2586
2566static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 2587static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
2567{ 2588{
2568 struct perf_event *event = vma->vm_file->private_data; 2589 struct perf_event *event = vma->vm_file->private_data;
2569 struct perf_mmap_data *data; 2590 struct perf_buffer *buffer;
2570 int ret = VM_FAULT_SIGBUS; 2591 int ret = VM_FAULT_SIGBUS;
2571 2592
2572 if (vmf->flags & FAULT_FLAG_MKWRITE) { 2593 if (vmf->flags & FAULT_FLAG_MKWRITE) {
@@ -2576,14 +2597,14 @@ static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
2576 } 2597 }
2577 2598
2578 rcu_read_lock(); 2599 rcu_read_lock();
2579 data = rcu_dereference(event->data); 2600 buffer = rcu_dereference(event->buffer);
2580 if (!data) 2601 if (!buffer)
2581 goto unlock; 2602 goto unlock;
2582 2603
2583 if (vmf->pgoff && (vmf->flags & FAULT_FLAG_WRITE)) 2604 if (vmf->pgoff && (vmf->flags & FAULT_FLAG_WRITE))
2584 goto unlock; 2605 goto unlock;
2585 2606
2586 vmf->page = perf_mmap_to_page(data, vmf->pgoff); 2607 vmf->page = perf_mmap_to_page(buffer, vmf->pgoff);
2587 if (!vmf->page) 2608 if (!vmf->page)
2588 goto unlock; 2609 goto unlock;
2589 2610
@@ -2598,52 +2619,35 @@ unlock:
2598 return ret; 2619 return ret;
2599} 2620}
2600 2621
2601static void 2622static void perf_buffer_free_rcu(struct rcu_head *rcu_head)
2602perf_mmap_data_init(struct perf_event *event, struct perf_mmap_data *data)
2603{
2604 long max_size = perf_data_size(data);
2605
2606 if (event->attr.watermark) {
2607 data->watermark = min_t(long, max_size,
2608 event->attr.wakeup_watermark);
2609 }
2610
2611 if (!data->watermark)
2612 data->watermark = max_size / 2;
2613
2614 atomic_set(&data->refcount, 1);
2615 rcu_assign_pointer(event->data, data);
2616}
2617
2618static void perf_mmap_data_free_rcu(struct rcu_head *rcu_head)
2619{ 2623{
2620 struct perf_mmap_data *data; 2624 struct perf_buffer *buffer;
2621 2625
2622 data = container_of(rcu_head, struct perf_mmap_data, rcu_head); 2626 buffer = container_of(rcu_head, struct perf_buffer, rcu_head);
2623 perf_mmap_data_free(data); 2627 perf_buffer_free(buffer);
2624} 2628}
2625 2629
2626static struct perf_mmap_data *perf_mmap_data_get(struct perf_event *event) 2630static struct perf_buffer *perf_buffer_get(struct perf_event *event)
2627{ 2631{
2628 struct perf_mmap_data *data; 2632 struct perf_buffer *buffer;
2629 2633
2630 rcu_read_lock(); 2634 rcu_read_lock();
2631 data = rcu_dereference(event->data); 2635 buffer = rcu_dereference(event->buffer);
2632 if (data) { 2636 if (buffer) {
2633 if (!atomic_inc_not_zero(&data->refcount)) 2637 if (!atomic_inc_not_zero(&buffer->refcount))
2634 data = NULL; 2638 buffer = NULL;
2635 } 2639 }
2636 rcu_read_unlock(); 2640 rcu_read_unlock();
2637 2641
2638 return data; 2642 return buffer;
2639} 2643}
2640 2644
2641static void perf_mmap_data_put(struct perf_mmap_data *data) 2645static void perf_buffer_put(struct perf_buffer *buffer)
2642{ 2646{
2643 if (!atomic_dec_and_test(&data->refcount)) 2647 if (!atomic_dec_and_test(&buffer->refcount))
2644 return; 2648 return;
2645 2649
2646 call_rcu(&data->rcu_head, perf_mmap_data_free_rcu); 2650 call_rcu(&buffer->rcu_head, perf_buffer_free_rcu);
2647} 2651}
2648 2652
2649static void perf_mmap_open(struct vm_area_struct *vma) 2653static void perf_mmap_open(struct vm_area_struct *vma)
@@ -2658,16 +2662,16 @@ static void perf_mmap_close(struct vm_area_struct *vma)
2658 struct perf_event *event = vma->vm_file->private_data; 2662 struct perf_event *event = vma->vm_file->private_data;
2659 2663
2660 if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) { 2664 if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) {
2661 unsigned long size = perf_data_size(event->data); 2665 unsigned long size = perf_data_size(event->buffer);
2662 struct user_struct *user = event->mmap_user; 2666 struct user_struct *user = event->mmap_user;
2663 struct perf_mmap_data *data = event->data; 2667 struct perf_buffer *buffer = event->buffer;
2664 2668
2665 atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm); 2669 atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm);
2666 vma->vm_mm->locked_vm -= event->mmap_locked; 2670 vma->vm_mm->locked_vm -= event->mmap_locked;
2667 rcu_assign_pointer(event->data, NULL); 2671 rcu_assign_pointer(event->buffer, NULL);
2668 mutex_unlock(&event->mmap_mutex); 2672 mutex_unlock(&event->mmap_mutex);
2669 2673
2670 perf_mmap_data_put(data); 2674 perf_buffer_put(buffer);
2671 free_uid(user); 2675 free_uid(user);
2672 } 2676 }
2673} 2677}
@@ -2685,11 +2689,11 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
2685 unsigned long user_locked, user_lock_limit; 2689 unsigned long user_locked, user_lock_limit;
2686 struct user_struct *user = current_user(); 2690 struct user_struct *user = current_user();
2687 unsigned long locked, lock_limit; 2691 unsigned long locked, lock_limit;
2688 struct perf_mmap_data *data; 2692 struct perf_buffer *buffer;
2689 unsigned long vma_size; 2693 unsigned long vma_size;
2690 unsigned long nr_pages; 2694 unsigned long nr_pages;
2691 long user_extra, extra; 2695 long user_extra, extra;
2692 int ret = 0; 2696 int ret = 0, flags = 0;
2693 2697
2694 /* 2698 /*
2695 * Don't allow mmap() of inherited per-task counters. This would 2699 * Don't allow mmap() of inherited per-task counters. This would
@@ -2706,7 +2710,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
2706 nr_pages = (vma_size / PAGE_SIZE) - 1; 2710 nr_pages = (vma_size / PAGE_SIZE) - 1;
2707 2711
2708 /* 2712 /*
2709 * If we have data pages ensure they're a power-of-two number, so we 2713 * If we have buffer pages ensure they're a power-of-two number, so we
2710 * can do bitmasks instead of modulo. 2714 * can do bitmasks instead of modulo.
2711 */ 2715 */
2712 if (nr_pages != 0 && !is_power_of_2(nr_pages)) 2716 if (nr_pages != 0 && !is_power_of_2(nr_pages))
@@ -2720,9 +2724,9 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
2720 2724
2721 WARN_ON_ONCE(event->ctx->parent_ctx); 2725 WARN_ON_ONCE(event->ctx->parent_ctx);
2722 mutex_lock(&event->mmap_mutex); 2726 mutex_lock(&event->mmap_mutex);
2723 if (event->data) { 2727 if (event->buffer) {
2724 if (event->data->nr_pages == nr_pages) 2728 if (event->buffer->nr_pages == nr_pages)
2725 atomic_inc(&event->data->refcount); 2729 atomic_inc(&event->buffer->refcount);
2726 else 2730 else
2727 ret = -EINVAL; 2731 ret = -EINVAL;
2728 goto unlock; 2732 goto unlock;
@@ -2752,17 +2756,18 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
2752 goto unlock; 2756 goto unlock;
2753 } 2757 }
2754 2758
2755 WARN_ON(event->data); 2759 WARN_ON(event->buffer);
2760
2761 if (vma->vm_flags & VM_WRITE)
2762 flags |= PERF_BUFFER_WRITABLE;
2756 2763
2757 data = perf_mmap_data_alloc(event, nr_pages); 2764 buffer = perf_buffer_alloc(nr_pages, event->attr.wakeup_watermark,
2758 if (!data) { 2765 event->cpu, flags);
2766 if (!buffer) {
2759 ret = -ENOMEM; 2767 ret = -ENOMEM;
2760 goto unlock; 2768 goto unlock;
2761 } 2769 }
2762 2770 rcu_assign_pointer(event->buffer, buffer);
2763 perf_mmap_data_init(event, data);
2764 if (vma->vm_flags & VM_WRITE)
2765 event->data->writable = 1;
2766 2771
2767 atomic_long_add(user_extra, &user->locked_vm); 2772 atomic_long_add(user_extra, &user->locked_vm);
2768 event->mmap_locked = extra; 2773 event->mmap_locked = extra;
@@ -2941,11 +2946,6 @@ __weak struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
2941 return NULL; 2946 return NULL;
2942} 2947}
2943 2948
2944__weak
2945void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip)
2946{
2947}
2948
2949 2949
2950/* 2950/*
2951 * We assume there is only KVM supporting the callbacks. 2951 * We assume there is only KVM supporting the callbacks.
@@ -2971,15 +2971,15 @@ EXPORT_SYMBOL_GPL(perf_unregister_guest_info_callbacks);
2971/* 2971/*
2972 * Output 2972 * Output
2973 */ 2973 */
2974static bool perf_output_space(struct perf_mmap_data *data, unsigned long tail, 2974static bool perf_output_space(struct perf_buffer *buffer, unsigned long tail,
2975 unsigned long offset, unsigned long head) 2975 unsigned long offset, unsigned long head)
2976{ 2976{
2977 unsigned long mask; 2977 unsigned long mask;
2978 2978
2979 if (!data->writable) 2979 if (!buffer->writable)
2980 return true; 2980 return true;
2981 2981
2982 mask = perf_data_size(data) - 1; 2982 mask = perf_data_size(buffer) - 1;
2983 2983
2984 offset = (offset - tail) & mask; 2984 offset = (offset - tail) & mask;
2985 head = (head - tail) & mask; 2985 head = (head - tail) & mask;
@@ -2992,7 +2992,7 @@ static bool perf_output_space(struct perf_mmap_data *data, unsigned long tail,
2992 2992
2993static void perf_output_wakeup(struct perf_output_handle *handle) 2993static void perf_output_wakeup(struct perf_output_handle *handle)
2994{ 2994{
2995 atomic_set(&handle->data->poll, POLL_IN); 2995 atomic_set(&handle->buffer->poll, POLL_IN);
2996 2996
2997 if (handle->nmi) { 2997 if (handle->nmi) {
2998 handle->event->pending_wakeup = 1; 2998 handle->event->pending_wakeup = 1;
@@ -3012,45 +3012,45 @@ static void perf_output_wakeup(struct perf_output_handle *handle)
3012 */ 3012 */
3013static void perf_output_get_handle(struct perf_output_handle *handle) 3013static void perf_output_get_handle(struct perf_output_handle *handle)
3014{ 3014{
3015 struct perf_mmap_data *data = handle->data; 3015 struct perf_buffer *buffer = handle->buffer;
3016 3016
3017 preempt_disable(); 3017 preempt_disable();
3018 local_inc(&data->nest); 3018 local_inc(&buffer->nest);
3019 handle->wakeup = local_read(&data->wakeup); 3019 handle->wakeup = local_read(&buffer->wakeup);
3020} 3020}
3021 3021
3022static void perf_output_put_handle(struct perf_output_handle *handle) 3022static void perf_output_put_handle(struct perf_output_handle *handle)
3023{ 3023{
3024 struct perf_mmap_data *data = handle->data; 3024 struct perf_buffer *buffer = handle->buffer;
3025 unsigned long head; 3025 unsigned long head;
3026 3026
3027again: 3027again:
3028 head = local_read(&data->head); 3028 head = local_read(&buffer->head);
3029 3029
3030 /* 3030 /*
3031 * IRQ/NMI can happen here, which means we can miss a head update. 3031 * IRQ/NMI can happen here, which means we can miss a head update.
3032 */ 3032 */
3033 3033
3034 if (!local_dec_and_test(&data->nest)) 3034 if (!local_dec_and_test(&buffer->nest))
3035 goto out; 3035 goto out;
3036 3036
3037 /* 3037 /*
3038 * Publish the known good head. Rely on the full barrier implied 3038 * Publish the known good head. Rely on the full barrier implied
3039 * by atomic_dec_and_test() order the data->head read and this 3039 * by atomic_dec_and_test() order the buffer->head read and this
3040 * write. 3040 * write.
3041 */ 3041 */
3042 data->user_page->data_head = head; 3042 buffer->user_page->data_head = head;
3043 3043
3044 /* 3044 /*
3045 * Now check if we missed an update, rely on the (compiler) 3045 * Now check if we missed an update, rely on the (compiler)
3046 * barrier in atomic_dec_and_test() to re-read data->head. 3046 * barrier in atomic_dec_and_test() to re-read buffer->head.
3047 */ 3047 */
3048 if (unlikely(head != local_read(&data->head))) { 3048 if (unlikely(head != local_read(&buffer->head))) {
3049 local_inc(&data->nest); 3049 local_inc(&buffer->nest);
3050 goto again; 3050 goto again;
3051 } 3051 }
3052 3052
3053 if (handle->wakeup != local_read(&data->wakeup)) 3053 if (handle->wakeup != local_read(&buffer->wakeup))
3054 perf_output_wakeup(handle); 3054 perf_output_wakeup(handle);
3055 3055
3056 out: 3056 out:
@@ -3070,12 +3070,12 @@ __always_inline void perf_output_copy(struct perf_output_handle *handle,
3070 buf += size; 3070 buf += size;
3071 handle->size -= size; 3071 handle->size -= size;
3072 if (!handle->size) { 3072 if (!handle->size) {
3073 struct perf_mmap_data *data = handle->data; 3073 struct perf_buffer *buffer = handle->buffer;
3074 3074
3075 handle->page++; 3075 handle->page++;
3076 handle->page &= data->nr_pages - 1; 3076 handle->page &= buffer->nr_pages - 1;
3077 handle->addr = data->data_pages[handle->page]; 3077 handle->addr = buffer->data_pages[handle->page];
3078 handle->size = PAGE_SIZE << page_order(data); 3078 handle->size = PAGE_SIZE << page_order(buffer);
3079 } 3079 }
3080 } while (len); 3080 } while (len);
3081} 3081}
@@ -3084,7 +3084,7 @@ int perf_output_begin(struct perf_output_handle *handle,
3084 struct perf_event *event, unsigned int size, 3084 struct perf_event *event, unsigned int size,
3085 int nmi, int sample) 3085 int nmi, int sample)
3086{ 3086{
3087 struct perf_mmap_data *data; 3087 struct perf_buffer *buffer;
3088 unsigned long tail, offset, head; 3088 unsigned long tail, offset, head;
3089 int have_lost; 3089 int have_lost;
3090 struct { 3090 struct {
@@ -3100,19 +3100,19 @@ int perf_output_begin(struct perf_output_handle *handle,
3100 if (event->parent) 3100 if (event->parent)
3101 event = event->parent; 3101 event = event->parent;
3102 3102
3103 data = rcu_dereference(event->data); 3103 buffer = rcu_dereference(event->buffer);
3104 if (!data) 3104 if (!buffer)
3105 goto out; 3105 goto out;
3106 3106
3107 handle->data = data; 3107 handle->buffer = buffer;
3108 handle->event = event; 3108 handle->event = event;
3109 handle->nmi = nmi; 3109 handle->nmi = nmi;
3110 handle->sample = sample; 3110 handle->sample = sample;
3111 3111
3112 if (!data->nr_pages) 3112 if (!buffer->nr_pages)
3113 goto out; 3113 goto out;
3114 3114
3115 have_lost = local_read(&data->lost); 3115 have_lost = local_read(&buffer->lost);
3116 if (have_lost) 3116 if (have_lost)
3117 size += sizeof(lost_event); 3117 size += sizeof(lost_event);
3118 3118
@@ -3124,30 +3124,30 @@ int perf_output_begin(struct perf_output_handle *handle,
3124 * tail pointer. So that all reads will be completed before the 3124 * tail pointer. So that all reads will be completed before the
3125 * write is issued. 3125 * write is issued.
3126 */ 3126 */
3127 tail = ACCESS_ONCE(data->user_page->data_tail); 3127 tail = ACCESS_ONCE(buffer->user_page->data_tail);
3128 smp_rmb(); 3128 smp_rmb();
3129 offset = head = local_read(&data->head); 3129 offset = head = local_read(&buffer->head);
3130 head += size; 3130 head += size;
3131 if (unlikely(!perf_output_space(data, tail, offset, head))) 3131 if (unlikely(!perf_output_space(buffer, tail, offset, head)))
3132 goto fail; 3132 goto fail;
3133 } while (local_cmpxchg(&data->head, offset, head) != offset); 3133 } while (local_cmpxchg(&buffer->head, offset, head) != offset);
3134 3134
3135 if (head - local_read(&data->wakeup) > data->watermark) 3135 if (head - local_read(&buffer->wakeup) > buffer->watermark)
3136 local_add(data->watermark, &data->wakeup); 3136 local_add(buffer->watermark, &buffer->wakeup);
3137 3137
3138 handle->page = offset >> (PAGE_SHIFT + page_order(data)); 3138 handle->page = offset >> (PAGE_SHIFT + page_order(buffer));
3139 handle->page &= data->nr_pages - 1; 3139 handle->page &= buffer->nr_pages - 1;
3140 handle->size = offset & ((PAGE_SIZE << page_order(data)) - 1); 3140 handle->size = offset & ((PAGE_SIZE << page_order(buffer)) - 1);
3141 handle->addr = data->data_pages[handle->page]; 3141 handle->addr = buffer->data_pages[handle->page];
3142 handle->addr += handle->size; 3142 handle->addr += handle->size;
3143 handle->size = (PAGE_SIZE << page_order(data)) - handle->size; 3143 handle->size = (PAGE_SIZE << page_order(buffer)) - handle->size;
3144 3144
3145 if (have_lost) { 3145 if (have_lost) {
3146 lost_event.header.type = PERF_RECORD_LOST; 3146 lost_event.header.type = PERF_RECORD_LOST;
3147 lost_event.header.misc = 0; 3147 lost_event.header.misc = 0;
3148 lost_event.header.size = sizeof(lost_event); 3148 lost_event.header.size = sizeof(lost_event);
3149 lost_event.id = event->id; 3149 lost_event.id = event->id;
3150 lost_event.lost = local_xchg(&data->lost, 0); 3150 lost_event.lost = local_xchg(&buffer->lost, 0);
3151 3151
3152 perf_output_put(handle, lost_event); 3152 perf_output_put(handle, lost_event);
3153 } 3153 }
@@ -3155,7 +3155,7 @@ int perf_output_begin(struct perf_output_handle *handle,
3155 return 0; 3155 return 0;
3156 3156
3157fail: 3157fail:
3158 local_inc(&data->lost); 3158 local_inc(&buffer->lost);
3159 perf_output_put_handle(handle); 3159 perf_output_put_handle(handle);
3160out: 3160out:
3161 rcu_read_unlock(); 3161 rcu_read_unlock();
@@ -3166,15 +3166,15 @@ out:
3166void perf_output_end(struct perf_output_handle *handle) 3166void perf_output_end(struct perf_output_handle *handle)
3167{ 3167{
3168 struct perf_event *event = handle->event; 3168 struct perf_event *event = handle->event;
3169 struct perf_mmap_data *data = handle->data; 3169 struct perf_buffer *buffer = handle->buffer;
3170 3170
3171 int wakeup_events = event->attr.wakeup_events; 3171 int wakeup_events = event->attr.wakeup_events;
3172 3172
3173 if (handle->sample && wakeup_events) { 3173 if (handle->sample && wakeup_events) {
3174 int events = local_inc_return(&data->events); 3174 int events = local_inc_return(&buffer->events);
3175 if (events >= wakeup_events) { 3175 if (events >= wakeup_events) {
3176 local_sub(wakeup_events, &data->events); 3176 local_sub(wakeup_events, &buffer->events);
3177 local_inc(&data->wakeup); 3177 local_inc(&buffer->wakeup);
3178 } 3178 }
3179 } 3179 }
3180 3180
@@ -3211,7 +3211,7 @@ static void perf_output_read_one(struct perf_output_handle *handle,
3211 u64 values[4]; 3211 u64 values[4];
3212 int n = 0; 3212 int n = 0;
3213 3213
3214 values[n++] = atomic64_read(&event->count); 3214 values[n++] = perf_event_count(event);
3215 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { 3215 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
3216 values[n++] = event->total_time_enabled + 3216 values[n++] = event->total_time_enabled +
3217 atomic64_read(&event->child_total_time_enabled); 3217 atomic64_read(&event->child_total_time_enabled);
@@ -3248,7 +3248,7 @@ static void perf_output_read_group(struct perf_output_handle *handle,
3248 if (leader != event) 3248 if (leader != event)
3249 leader->pmu->read(leader); 3249 leader->pmu->read(leader);
3250 3250
3251 values[n++] = atomic64_read(&leader->count); 3251 values[n++] = perf_event_count(leader);
3252 if (read_format & PERF_FORMAT_ID) 3252 if (read_format & PERF_FORMAT_ID)
3253 values[n++] = primary_event_id(leader); 3253 values[n++] = primary_event_id(leader);
3254 3254
@@ -3260,7 +3260,7 @@ static void perf_output_read_group(struct perf_output_handle *handle,
3260 if (sub != event) 3260 if (sub != event)
3261 sub->pmu->read(sub); 3261 sub->pmu->read(sub);
3262 3262
3263 values[n++] = atomic64_read(&sub->count); 3263 values[n++] = perf_event_count(sub);
3264 if (read_format & PERF_FORMAT_ID) 3264 if (read_format & PERF_FORMAT_ID)
3265 values[n++] = primary_event_id(sub); 3265 values[n++] = primary_event_id(sub);
3266 3266
@@ -3491,7 +3491,7 @@ perf_event_read_event(struct perf_event *event,
3491/* 3491/*
3492 * task tracking -- fork/exit 3492 * task tracking -- fork/exit
3493 * 3493 *
3494 * enabled by: attr.comm | attr.mmap | attr.task 3494 * enabled by: attr.comm | attr.mmap | attr.mmap_data | attr.task
3495 */ 3495 */
3496 3496
3497struct perf_task_event { 3497struct perf_task_event {
@@ -3541,7 +3541,8 @@ static int perf_event_task_match(struct perf_event *event)
3541 if (event->cpu != -1 && event->cpu != smp_processor_id()) 3541 if (event->cpu != -1 && event->cpu != smp_processor_id())
3542 return 0; 3542 return 0;
3543 3543
3544 if (event->attr.comm || event->attr.mmap || event->attr.task) 3544 if (event->attr.comm || event->attr.mmap ||
3545 event->attr.mmap_data || event->attr.task)
3545 return 1; 3546 return 1;
3546 3547
3547 return 0; 3548 return 0;
@@ -3766,7 +3767,8 @@ static void perf_event_mmap_output(struct perf_event *event,
3766} 3767}
3767 3768
3768static int perf_event_mmap_match(struct perf_event *event, 3769static int perf_event_mmap_match(struct perf_event *event,
3769 struct perf_mmap_event *mmap_event) 3770 struct perf_mmap_event *mmap_event,
3771 int executable)
3770{ 3772{
3771 if (event->state < PERF_EVENT_STATE_INACTIVE) 3773 if (event->state < PERF_EVENT_STATE_INACTIVE)
3772 return 0; 3774 return 0;
@@ -3774,19 +3776,21 @@ static int perf_event_mmap_match(struct perf_event *event,
3774 if (event->cpu != -1 && event->cpu != smp_processor_id()) 3776 if (event->cpu != -1 && event->cpu != smp_processor_id())
3775 return 0; 3777 return 0;
3776 3778
3777 if (event->attr.mmap) 3779 if ((!executable && event->attr.mmap_data) ||
3780 (executable && event->attr.mmap))
3778 return 1; 3781 return 1;
3779 3782
3780 return 0; 3783 return 0;
3781} 3784}
3782 3785
3783static void perf_event_mmap_ctx(struct perf_event_context *ctx, 3786static void perf_event_mmap_ctx(struct perf_event_context *ctx,
3784 struct perf_mmap_event *mmap_event) 3787 struct perf_mmap_event *mmap_event,
3788 int executable)
3785{ 3789{
3786 struct perf_event *event; 3790 struct perf_event *event;
3787 3791
3788 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { 3792 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
3789 if (perf_event_mmap_match(event, mmap_event)) 3793 if (perf_event_mmap_match(event, mmap_event, executable))
3790 perf_event_mmap_output(event, mmap_event); 3794 perf_event_mmap_output(event, mmap_event);
3791 } 3795 }
3792} 3796}
@@ -3830,6 +3834,14 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
3830 if (!vma->vm_mm) { 3834 if (!vma->vm_mm) {
3831 name = strncpy(tmp, "[vdso]", sizeof(tmp)); 3835 name = strncpy(tmp, "[vdso]", sizeof(tmp));
3832 goto got_name; 3836 goto got_name;
3837 } else if (vma->vm_start <= vma->vm_mm->start_brk &&
3838 vma->vm_end >= vma->vm_mm->brk) {
3839 name = strncpy(tmp, "[heap]", sizeof(tmp));
3840 goto got_name;
3841 } else if (vma->vm_start <= vma->vm_mm->start_stack &&
3842 vma->vm_end >= vma->vm_mm->start_stack) {
3843 name = strncpy(tmp, "[stack]", sizeof(tmp));
3844 goto got_name;
3833 } 3845 }
3834 3846
3835 name = strncpy(tmp, "//anon", sizeof(tmp)); 3847 name = strncpy(tmp, "//anon", sizeof(tmp));
@@ -3846,17 +3858,17 @@ got_name:
3846 3858
3847 rcu_read_lock(); 3859 rcu_read_lock();
3848 cpuctx = &get_cpu_var(perf_cpu_context); 3860 cpuctx = &get_cpu_var(perf_cpu_context);
3849 perf_event_mmap_ctx(&cpuctx->ctx, mmap_event); 3861 perf_event_mmap_ctx(&cpuctx->ctx, mmap_event, vma->vm_flags & VM_EXEC);
3850 ctx = rcu_dereference(current->perf_event_ctxp); 3862 ctx = rcu_dereference(current->perf_event_ctxp);
3851 if (ctx) 3863 if (ctx)
3852 perf_event_mmap_ctx(ctx, mmap_event); 3864 perf_event_mmap_ctx(ctx, mmap_event, vma->vm_flags & VM_EXEC);
3853 put_cpu_var(perf_cpu_context); 3865 put_cpu_var(perf_cpu_context);
3854 rcu_read_unlock(); 3866 rcu_read_unlock();
3855 3867
3856 kfree(buf); 3868 kfree(buf);
3857} 3869}
3858 3870
3859void __perf_event_mmap(struct vm_area_struct *vma) 3871void perf_event_mmap(struct vm_area_struct *vma)
3860{ 3872{
3861 struct perf_mmap_event mmap_event; 3873 struct perf_mmap_event mmap_event;
3862 3874
@@ -4018,14 +4030,14 @@ static u64 perf_swevent_set_period(struct perf_event *event)
4018 hwc->last_period = hwc->sample_period; 4030 hwc->last_period = hwc->sample_period;
4019 4031
4020again: 4032again:
4021 old = val = atomic64_read(&hwc->period_left); 4033 old = val = local64_read(&hwc->period_left);
4022 if (val < 0) 4034 if (val < 0)
4023 return 0; 4035 return 0;
4024 4036
4025 nr = div64_u64(period + val, period); 4037 nr = div64_u64(period + val, period);
4026 offset = nr * period; 4038 offset = nr * period;
4027 val -= offset; 4039 val -= offset;
4028 if (atomic64_cmpxchg(&hwc->period_left, old, val) != old) 4040 if (local64_cmpxchg(&hwc->period_left, old, val) != old)
4029 goto again; 4041 goto again;
4030 4042
4031 return nr; 4043 return nr;
@@ -4064,7 +4076,7 @@ static void perf_swevent_add(struct perf_event *event, u64 nr,
4064{ 4076{
4065 struct hw_perf_event *hwc = &event->hw; 4077 struct hw_perf_event *hwc = &event->hw;
4066 4078
4067 atomic64_add(nr, &event->count); 4079 local64_add(nr, &event->count);
4068 4080
4069 if (!regs) 4081 if (!regs)
4070 return; 4082 return;
@@ -4075,7 +4087,7 @@ static void perf_swevent_add(struct perf_event *event, u64 nr,
4075 if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq) 4087 if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq)
4076 return perf_swevent_overflow(event, 1, nmi, data, regs); 4088 return perf_swevent_overflow(event, 1, nmi, data, regs);
4077 4089
4078 if (atomic64_add_negative(nr, &hwc->period_left)) 4090 if (local64_add_negative(nr, &hwc->period_left))
4079 return; 4091 return;
4080 4092
4081 perf_swevent_overflow(event, 0, nmi, data, regs); 4093 perf_swevent_overflow(event, 0, nmi, data, regs);
@@ -4213,14 +4225,12 @@ int perf_swevent_get_recursion_context(void)
4213} 4225}
4214EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context); 4226EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context);
4215 4227
4216void perf_swevent_put_recursion_context(int rctx) 4228void inline perf_swevent_put_recursion_context(int rctx)
4217{ 4229{
4218 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); 4230 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
4219 barrier(); 4231 barrier();
4220 cpuctx->recursion[rctx]--; 4232 cpuctx->recursion[rctx]--;
4221} 4233}
4222EXPORT_SYMBOL_GPL(perf_swevent_put_recursion_context);
4223
4224 4234
4225void __perf_sw_event(u32 event_id, u64 nr, int nmi, 4235void __perf_sw_event(u32 event_id, u64 nr, int nmi,
4226 struct pt_regs *regs, u64 addr) 4236 struct pt_regs *regs, u64 addr)
@@ -4368,8 +4378,8 @@ static void cpu_clock_perf_event_update(struct perf_event *event)
4368 u64 now; 4378 u64 now;
4369 4379
4370 now = cpu_clock(cpu); 4380 now = cpu_clock(cpu);
4371 prev = atomic64_xchg(&event->hw.prev_count, now); 4381 prev = local64_xchg(&event->hw.prev_count, now);
4372 atomic64_add(now - prev, &event->count); 4382 local64_add(now - prev, &event->count);
4373} 4383}
4374 4384
4375static int cpu_clock_perf_event_enable(struct perf_event *event) 4385static int cpu_clock_perf_event_enable(struct perf_event *event)
@@ -4377,7 +4387,7 @@ static int cpu_clock_perf_event_enable(struct perf_event *event)
4377 struct hw_perf_event *hwc = &event->hw; 4387 struct hw_perf_event *hwc = &event->hw;
4378 int cpu = raw_smp_processor_id(); 4388 int cpu = raw_smp_processor_id();
4379 4389
4380 atomic64_set(&hwc->prev_count, cpu_clock(cpu)); 4390 local64_set(&hwc->prev_count, cpu_clock(cpu));
4381 perf_swevent_start_hrtimer(event); 4391 perf_swevent_start_hrtimer(event);
4382 4392
4383 return 0; 4393 return 0;
@@ -4409,9 +4419,9 @@ static void task_clock_perf_event_update(struct perf_event *event, u64 now)
4409 u64 prev; 4419 u64 prev;
4410 s64 delta; 4420 s64 delta;
4411 4421
4412 prev = atomic64_xchg(&event->hw.prev_count, now); 4422 prev = local64_xchg(&event->hw.prev_count, now);
4413 delta = now - prev; 4423 delta = now - prev;
4414 atomic64_add(delta, &event->count); 4424 local64_add(delta, &event->count);
4415} 4425}
4416 4426
4417static int task_clock_perf_event_enable(struct perf_event *event) 4427static int task_clock_perf_event_enable(struct perf_event *event)
@@ -4421,7 +4431,7 @@ static int task_clock_perf_event_enable(struct perf_event *event)
4421 4431
4422 now = event->ctx->time; 4432 now = event->ctx->time;
4423 4433
4424 atomic64_set(&hwc->prev_count, now); 4434 local64_set(&hwc->prev_count, now);
4425 4435
4426 perf_swevent_start_hrtimer(event); 4436 perf_swevent_start_hrtimer(event);
4427 4437
@@ -4601,7 +4611,7 @@ static int perf_tp_event_match(struct perf_event *event,
4601} 4611}
4602 4612
4603void perf_tp_event(u64 addr, u64 count, void *record, int entry_size, 4613void perf_tp_event(u64 addr, u64 count, void *record, int entry_size,
4604 struct pt_regs *regs, struct hlist_head *head) 4614 struct pt_regs *regs, struct hlist_head *head, int rctx)
4605{ 4615{
4606 struct perf_sample_data data; 4616 struct perf_sample_data data;
4607 struct perf_event *event; 4617 struct perf_event *event;
@@ -4615,12 +4625,12 @@ void perf_tp_event(u64 addr, u64 count, void *record, int entry_size,
4615 perf_sample_data_init(&data, addr); 4625 perf_sample_data_init(&data, addr);
4616 data.raw = &raw; 4626 data.raw = &raw;
4617 4627
4618 rcu_read_lock();
4619 hlist_for_each_entry_rcu(event, node, head, hlist_entry) { 4628 hlist_for_each_entry_rcu(event, node, head, hlist_entry) {
4620 if (perf_tp_event_match(event, &data, regs)) 4629 if (perf_tp_event_match(event, &data, regs))
4621 perf_swevent_add(event, count, 1, &data, regs); 4630 perf_swevent_add(event, count, 1, &data, regs);
4622 } 4631 }
4623 rcu_read_unlock(); 4632
4633 perf_swevent_put_recursion_context(rctx);
4624} 4634}
4625EXPORT_SYMBOL_GPL(perf_tp_event); 4635EXPORT_SYMBOL_GPL(perf_tp_event);
4626 4636
@@ -4864,7 +4874,7 @@ perf_event_alloc(struct perf_event_attr *attr,
4864 hwc->sample_period = 1; 4874 hwc->sample_period = 1;
4865 hwc->last_period = hwc->sample_period; 4875 hwc->last_period = hwc->sample_period;
4866 4876
4867 atomic64_set(&hwc->period_left, hwc->sample_period); 4877 local64_set(&hwc->period_left, hwc->sample_period);
4868 4878
4869 /* 4879 /*
4870 * we currently do not support PERF_FORMAT_GROUP on inherited events 4880 * we currently do not support PERF_FORMAT_GROUP on inherited events
@@ -4913,7 +4923,7 @@ done:
4913 4923
4914 if (!event->parent) { 4924 if (!event->parent) {
4915 atomic_inc(&nr_events); 4925 atomic_inc(&nr_events);
4916 if (event->attr.mmap) 4926 if (event->attr.mmap || event->attr.mmap_data)
4917 atomic_inc(&nr_mmap_events); 4927 atomic_inc(&nr_mmap_events);
4918 if (event->attr.comm) 4928 if (event->attr.comm)
4919 atomic_inc(&nr_comm_events); 4929 atomic_inc(&nr_comm_events);
@@ -5007,7 +5017,7 @@ err_size:
5007static int 5017static int
5008perf_event_set_output(struct perf_event *event, struct perf_event *output_event) 5018perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
5009{ 5019{
5010 struct perf_mmap_data *data = NULL, *old_data = NULL; 5020 struct perf_buffer *buffer = NULL, *old_buffer = NULL;
5011 int ret = -EINVAL; 5021 int ret = -EINVAL;
5012 5022
5013 if (!output_event) 5023 if (!output_event)
@@ -5037,19 +5047,19 @@ set:
5037 5047
5038 if (output_event) { 5048 if (output_event) {
5039 /* get the buffer we want to redirect to */ 5049 /* get the buffer we want to redirect to */
5040 data = perf_mmap_data_get(output_event); 5050 buffer = perf_buffer_get(output_event);
5041 if (!data) 5051 if (!buffer)
5042 goto unlock; 5052 goto unlock;
5043 } 5053 }
5044 5054
5045 old_data = event->data; 5055 old_buffer = event->buffer;
5046 rcu_assign_pointer(event->data, data); 5056 rcu_assign_pointer(event->buffer, buffer);
5047 ret = 0; 5057 ret = 0;
5048unlock: 5058unlock:
5049 mutex_unlock(&event->mmap_mutex); 5059 mutex_unlock(&event->mmap_mutex);
5050 5060
5051 if (old_data) 5061 if (old_buffer)
5052 perf_mmap_data_put(old_data); 5062 perf_buffer_put(old_buffer);
5053out: 5063out:
5054 return ret; 5064 return ret;
5055} 5065}
@@ -5298,7 +5308,7 @@ inherit_event(struct perf_event *parent_event,
5298 hwc->sample_period = sample_period; 5308 hwc->sample_period = sample_period;
5299 hwc->last_period = sample_period; 5309 hwc->last_period = sample_period;
5300 5310
5301 atomic64_set(&hwc->period_left, sample_period); 5311 local64_set(&hwc->period_left, sample_period);
5302 } 5312 }
5303 5313
5304 child_event->overflow_handler = parent_event->overflow_handler; 5314 child_event->overflow_handler = parent_event->overflow_handler;
@@ -5359,12 +5369,12 @@ static void sync_child_event(struct perf_event *child_event,
5359 if (child_event->attr.inherit_stat) 5369 if (child_event->attr.inherit_stat)
5360 perf_event_read_event(child_event, child); 5370 perf_event_read_event(child_event, child);
5361 5371
5362 child_val = atomic64_read(&child_event->count); 5372 child_val = perf_event_count(child_event);
5363 5373
5364 /* 5374 /*
5365 * Add back the child's count to the parent's count: 5375 * Add back the child's count to the parent's count:
5366 */ 5376 */
5367 atomic64_add(child_val, &parent_event->count); 5377 atomic64_add(child_val, &parent_event->child_count);
5368 atomic64_add(child_event->total_time_enabled, 5378 atomic64_add(child_event->total_time_enabled,
5369 &parent_event->child_total_time_enabled); 5379 &parent_event->child_total_time_enabled);
5370 atomic64_add(child_event->total_time_running, 5380 atomic64_add(child_event->total_time_running,
diff --git a/kernel/sched.c b/kernel/sched.c
index f52a8801b7a2..265cf3a2b5d8 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3726,7 +3726,7 @@ int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner)
3726 * off of preempt_enable. Kernel preemptions off return from interrupt 3726 * off of preempt_enable. Kernel preemptions off return from interrupt
3727 * occur there and call schedule directly. 3727 * occur there and call schedule directly.
3728 */ 3728 */
3729asmlinkage void __sched preempt_schedule(void) 3729asmlinkage void __sched notrace preempt_schedule(void)
3730{ 3730{
3731 struct thread_info *ti = current_thread_info(); 3731 struct thread_info *ti = current_thread_info();
3732 3732
@@ -3738,9 +3738,9 @@ asmlinkage void __sched preempt_schedule(void)
3738 return; 3738 return;
3739 3739
3740 do { 3740 do {
3741 add_preempt_count(PREEMPT_ACTIVE); 3741 add_preempt_count_notrace(PREEMPT_ACTIVE);
3742 schedule(); 3742 schedule();
3743 sub_preempt_count(PREEMPT_ACTIVE); 3743 sub_preempt_count_notrace(PREEMPT_ACTIVE);
3744 3744
3745 /* 3745 /*
3746 * Check again in case we missed a preemption opportunity 3746 * Check again in case we missed a preemption opportunity
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
deleted file mode 100644
index 4b493f67dcb5..000000000000
--- a/kernel/softlockup.c
+++ /dev/null
@@ -1,293 +0,0 @@
1/*
2 * Detect Soft Lockups
3 *
4 * started by Ingo Molnar, Copyright (C) 2005, 2006 Red Hat, Inc.
5 *
6 * this code detects soft lockups: incidents in where on a CPU
7 * the kernel does not reschedule for 10 seconds or more.
8 */
9#include <linux/mm.h>
10#include <linux/cpu.h>
11#include <linux/nmi.h>
12#include <linux/init.h>
13#include <linux/delay.h>
14#include <linux/freezer.h>
15#include <linux/kthread.h>
16#include <linux/lockdep.h>
17#include <linux/notifier.h>
18#include <linux/module.h>
19#include <linux/sysctl.h>
20
21#include <asm/irq_regs.h>
22
23static DEFINE_SPINLOCK(print_lock);
24
25static DEFINE_PER_CPU(unsigned long, softlockup_touch_ts); /* touch timestamp */
26static DEFINE_PER_CPU(unsigned long, softlockup_print_ts); /* print timestamp */
27static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog);
28static DEFINE_PER_CPU(bool, softlock_touch_sync);
29
30static int __read_mostly did_panic;
31int __read_mostly softlockup_thresh = 60;
32
33/*
34 * Should we panic (and reboot, if panic_timeout= is set) when a
35 * soft-lockup occurs:
36 */
37unsigned int __read_mostly softlockup_panic =
38 CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;
39
40static int __init softlockup_panic_setup(char *str)
41{
42 softlockup_panic = simple_strtoul(str, NULL, 0);
43
44 return 1;
45}
46__setup("softlockup_panic=", softlockup_panic_setup);
47
48static int
49softlock_panic(struct notifier_block *this, unsigned long event, void *ptr)
50{
51 did_panic = 1;
52
53 return NOTIFY_DONE;
54}
55
56static struct notifier_block panic_block = {
57 .notifier_call = softlock_panic,
58};
59
60/*
61 * Returns seconds, approximately. We don't need nanosecond
62 * resolution, and we don't need to waste time with a big divide when
63 * 2^30ns == 1.074s.
64 */
65static unsigned long get_timestamp(int this_cpu)
66{
67 return cpu_clock(this_cpu) >> 30LL; /* 2^30 ~= 10^9 */
68}
69
70static void __touch_softlockup_watchdog(void)
71{
72 int this_cpu = raw_smp_processor_id();
73
74 __raw_get_cpu_var(softlockup_touch_ts) = get_timestamp(this_cpu);
75}
76
77void touch_softlockup_watchdog(void)
78{
79 __raw_get_cpu_var(softlockup_touch_ts) = 0;
80}
81EXPORT_SYMBOL(touch_softlockup_watchdog);
82
83void touch_softlockup_watchdog_sync(void)
84{
85 __raw_get_cpu_var(softlock_touch_sync) = true;
86 __raw_get_cpu_var(softlockup_touch_ts) = 0;
87}
88
89void touch_all_softlockup_watchdogs(void)
90{
91 int cpu;
92
93 /* Cause each CPU to re-update its timestamp rather than complain */
94 for_each_online_cpu(cpu)
95 per_cpu(softlockup_touch_ts, cpu) = 0;
96}
97EXPORT_SYMBOL(touch_all_softlockup_watchdogs);
98
99int proc_dosoftlockup_thresh(struct ctl_table *table, int write,
100 void __user *buffer,
101 size_t *lenp, loff_t *ppos)
102{
103 touch_all_softlockup_watchdogs();
104 return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
105}
106
107/*
108 * This callback runs from the timer interrupt, and checks
109 * whether the watchdog thread has hung or not:
110 */
111void softlockup_tick(void)
112{
113 int this_cpu = smp_processor_id();
114 unsigned long touch_ts = per_cpu(softlockup_touch_ts, this_cpu);
115 unsigned long print_ts;
116 struct pt_regs *regs = get_irq_regs();
117 unsigned long now;
118
119 /* Is detection switched off? */
120 if (!per_cpu(softlockup_watchdog, this_cpu) || softlockup_thresh <= 0) {
121 /* Be sure we don't false trigger if switched back on */
122 if (touch_ts)
123 per_cpu(softlockup_touch_ts, this_cpu) = 0;
124 return;
125 }
126
127 if (touch_ts == 0) {
128 if (unlikely(per_cpu(softlock_touch_sync, this_cpu))) {
129 /*
130 * If the time stamp was touched atomically
131 * make sure the scheduler tick is up to date.
132 */
133 per_cpu(softlock_touch_sync, this_cpu) = false;
134 sched_clock_tick();
135 }
136 __touch_softlockup_watchdog();
137 return;
138 }
139
140 print_ts = per_cpu(softlockup_print_ts, this_cpu);
141
142 /* report at most once a second */
143 if (print_ts == touch_ts || did_panic)
144 return;
145
146 /* do not print during early bootup: */
147 if (unlikely(system_state != SYSTEM_RUNNING)) {
148 __touch_softlockup_watchdog();
149 return;
150 }
151
152 now = get_timestamp(this_cpu);
153
154 /*
155 * Wake up the high-prio watchdog task twice per
156 * threshold timespan.
157 */
158 if (time_after(now - softlockup_thresh/2, touch_ts))
159 wake_up_process(per_cpu(softlockup_watchdog, this_cpu));
160
161 /* Warn about unreasonable delays: */
162 if (time_before_eq(now - softlockup_thresh, touch_ts))
163 return;
164
165 per_cpu(softlockup_print_ts, this_cpu) = touch_ts;
166
167 spin_lock(&print_lock);
168 printk(KERN_ERR "BUG: soft lockup - CPU#%d stuck for %lus! [%s:%d]\n",
169 this_cpu, now - touch_ts,
170 current->comm, task_pid_nr(current));
171 print_modules();
172 print_irqtrace_events(current);
173 if (regs)
174 show_regs(regs);
175 else
176 dump_stack();
177 spin_unlock(&print_lock);
178
179 if (softlockup_panic)
180 panic("softlockup: hung tasks");
181}
182
183/*
184 * The watchdog thread - runs every second and touches the timestamp.
185 */
186static int watchdog(void *__bind_cpu)
187{
188 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
189
190 sched_setscheduler(current, SCHED_FIFO, &param);
191
192 /* initialize timestamp */
193 __touch_softlockup_watchdog();
194
195 set_current_state(TASK_INTERRUPTIBLE);
196 /*
197 * Run briefly once per second to reset the softlockup timestamp.
198 * If this gets delayed for more than 60 seconds then the
199 * debug-printout triggers in softlockup_tick().
200 */
201 while (!kthread_should_stop()) {
202 __touch_softlockup_watchdog();
203 schedule();
204
205 if (kthread_should_stop())
206 break;
207
208 set_current_state(TASK_INTERRUPTIBLE);
209 }
210 __set_current_state(TASK_RUNNING);
211
212 return 0;
213}
214
215/*
216 * Create/destroy watchdog threads as CPUs come and go:
217 */
218static int __cpuinit
219cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
220{
221 int hotcpu = (unsigned long)hcpu;
222 struct task_struct *p;
223
224 switch (action) {
225 case CPU_UP_PREPARE:
226 case CPU_UP_PREPARE_FROZEN:
227 BUG_ON(per_cpu(softlockup_watchdog, hotcpu));
228 p = kthread_create(watchdog, hcpu, "watchdog/%d", hotcpu);
229 if (IS_ERR(p)) {
230 printk(KERN_ERR "watchdog for %i failed\n", hotcpu);
231 return NOTIFY_BAD;
232 }
233 per_cpu(softlockup_touch_ts, hotcpu) = 0;
234 per_cpu(softlockup_watchdog, hotcpu) = p;
235 kthread_bind(p, hotcpu);
236 break;
237 case CPU_ONLINE:
238 case CPU_ONLINE_FROZEN:
239 wake_up_process(per_cpu(softlockup_watchdog, hotcpu));
240 break;
241#ifdef CONFIG_HOTPLUG_CPU
242 case CPU_UP_CANCELED:
243 case CPU_UP_CANCELED_FROZEN:
244 if (!per_cpu(softlockup_watchdog, hotcpu))
245 break;
246 /* Unbind so it can run. Fall thru. */
247 kthread_bind(per_cpu(softlockup_watchdog, hotcpu),
248 cpumask_any(cpu_online_mask));
249 case CPU_DEAD:
250 case CPU_DEAD_FROZEN:
251 p = per_cpu(softlockup_watchdog, hotcpu);
252 per_cpu(softlockup_watchdog, hotcpu) = NULL;
253 kthread_stop(p);
254 break;
255#endif /* CONFIG_HOTPLUG_CPU */
256 }
257 return NOTIFY_OK;
258}
259
260static struct notifier_block __cpuinitdata cpu_nfb = {
261 .notifier_call = cpu_callback
262};
263
264static int __initdata nosoftlockup;
265
266static int __init nosoftlockup_setup(char *str)
267{
268 nosoftlockup = 1;
269 return 1;
270}
271__setup("nosoftlockup", nosoftlockup_setup);
272
273static int __init spawn_softlockup_task(void)
274{
275 void *cpu = (void *)(long)smp_processor_id();
276 int err;
277
278 if (nosoftlockup)
279 return 0;
280
281 err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
282 if (err == NOTIFY_BAD) {
283 BUG();
284 return 1;
285 }
286 cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
287 register_cpu_notifier(&cpu_nfb);
288
289 atomic_notifier_chain_register(&panic_notifier_list, &panic_block);
290
291 return 0;
292}
293early_initcall(spawn_softlockup_task);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index d24f761f4876..6f79c7f81c96 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -76,6 +76,10 @@
76#include <scsi/sg.h> 76#include <scsi/sg.h>
77#endif 77#endif
78 78
79#ifdef CONFIG_LOCKUP_DETECTOR
80#include <linux/nmi.h>
81#endif
82
79 83
80#if defined(CONFIG_SYSCTL) 84#if defined(CONFIG_SYSCTL)
81 85
@@ -106,7 +110,7 @@ extern int blk_iopoll_enabled;
106#endif 110#endif
107 111
108/* Constants used for minimum and maximum */ 112/* Constants used for minimum and maximum */
109#ifdef CONFIG_DETECT_SOFTLOCKUP 113#ifdef CONFIG_LOCKUP_DETECTOR
110static int sixty = 60; 114static int sixty = 60;
111static int neg_one = -1; 115static int neg_one = -1;
112#endif 116#endif
@@ -710,7 +714,34 @@ static struct ctl_table kern_table[] = {
710 .mode = 0444, 714 .mode = 0444,
711 .proc_handler = proc_dointvec, 715 .proc_handler = proc_dointvec,
712 }, 716 },
713#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) 717#if defined(CONFIG_LOCKUP_DETECTOR)
718 {
719 .procname = "watchdog",
720 .data = &watchdog_enabled,
721 .maxlen = sizeof (int),
722 .mode = 0644,
723 .proc_handler = proc_dowatchdog_enabled,
724 },
725 {
726 .procname = "watchdog_thresh",
727 .data = &softlockup_thresh,
728 .maxlen = sizeof(int),
729 .mode = 0644,
730 .proc_handler = proc_dowatchdog_thresh,
731 .extra1 = &neg_one,
732 .extra2 = &sixty,
733 },
734 {
735 .procname = "softlockup_panic",
736 .data = &softlockup_panic,
737 .maxlen = sizeof(int),
738 .mode = 0644,
739 .proc_handler = proc_dointvec_minmax,
740 .extra1 = &zero,
741 .extra2 = &one,
742 },
743#endif
744#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) && !defined(CONFIG_LOCKUP_DETECTOR)
714 { 745 {
715 .procname = "unknown_nmi_panic", 746 .procname = "unknown_nmi_panic",
716 .data = &unknown_nmi_panic, 747 .data = &unknown_nmi_panic,
@@ -813,26 +844,6 @@ static struct ctl_table kern_table[] = {
813 .proc_handler = proc_dointvec, 844 .proc_handler = proc_dointvec,
814 }, 845 },
815#endif 846#endif
816#ifdef CONFIG_DETECT_SOFTLOCKUP
817 {
818 .procname = "softlockup_panic",
819 .data = &softlockup_panic,
820 .maxlen = sizeof(int),
821 .mode = 0644,
822 .proc_handler = proc_dointvec_minmax,
823 .extra1 = &zero,
824 .extra2 = &one,
825 },
826 {
827 .procname = "softlockup_thresh",
828 .data = &softlockup_thresh,
829 .maxlen = sizeof(int),
830 .mode = 0644,
831 .proc_handler = proc_dosoftlockup_thresh,
832 .extra1 = &neg_one,
833 .extra2 = &sixty,
834 },
835#endif
836#ifdef CONFIG_DETECT_HUNG_TASK 847#ifdef CONFIG_DETECT_HUNG_TASK
837 { 848 {
838 .procname = "hung_task_panic", 849 .procname = "hung_task_panic",
diff --git a/kernel/timer.c b/kernel/timer.c
index efde11e197c4..6aa6f7e69ad5 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1302,7 +1302,6 @@ void run_local_timers(void)
1302{ 1302{
1303 hrtimer_run_queues(); 1303 hrtimer_run_queues();
1304 raise_softirq(TIMER_SOFTIRQ); 1304 raise_softirq(TIMER_SOFTIRQ);
1305 softlockup_tick();
1306} 1305}
1307 1306
1308/* 1307/*
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 8b1797c4545b..c7683fd8a03a 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -194,15 +194,6 @@ config PREEMPT_TRACER
194 enabled. This option and the irqs-off timing option can be 194 enabled. This option and the irqs-off timing option can be
195 used together or separately.) 195 used together or separately.)
196 196
197config SYSPROF_TRACER
198 bool "Sysprof Tracer"
199 depends on X86
200 select GENERIC_TRACER
201 select CONTEXT_SWITCH_TRACER
202 help
203 This tracer provides the trace needed by the 'Sysprof' userspace
204 tool.
205
206config SCHED_TRACER 197config SCHED_TRACER
207 bool "Scheduling Latency Tracer" 198 bool "Scheduling Latency Tracer"
208 select GENERIC_TRACER 199 select GENERIC_TRACER
@@ -229,23 +220,6 @@ config FTRACE_SYSCALLS
229 help 220 help
230 Basic tracer to catch the syscall entry and exit events. 221 Basic tracer to catch the syscall entry and exit events.
231 222
232config BOOT_TRACER
233 bool "Trace boot initcalls"
234 select GENERIC_TRACER
235 select CONTEXT_SWITCH_TRACER
236 help
237 This tracer helps developers to optimize boot times: it records
238 the timings of the initcalls and traces key events and the identity
239 of tasks that can cause boot delays, such as context-switches.
240
241 Its aim is to be parsed by the scripts/bootgraph.pl tool to
242 produce pretty graphics about boot inefficiencies, giving a visual
243 representation of the delays during initcalls - but the raw
244 /debug/tracing/trace text output is readable too.
245
246 You must pass in initcall_debug and ftrace=initcall to the kernel
247 command line to enable this on bootup.
248
249config TRACE_BRANCH_PROFILING 223config TRACE_BRANCH_PROFILING
250 bool 224 bool
251 select GENERIC_TRACER 225 select GENERIC_TRACER
@@ -325,28 +299,6 @@ config BRANCH_TRACER
325 299
326 Say N if unsure. 300 Say N if unsure.
327 301
328config KSYM_TRACER
329 bool "Trace read and write access on kernel memory locations"
330 depends on HAVE_HW_BREAKPOINT
331 select TRACING
332 help
333 This tracer helps find read and write operations on any given kernel
334 symbol i.e. /proc/kallsyms.
335
336config PROFILE_KSYM_TRACER
337 bool "Profile all kernel memory accesses on 'watched' variables"
338 depends on KSYM_TRACER
339 help
340 This tracer profiles kernel accesses on variables watched through the
341 ksym tracer ftrace plugin. Depending upon the hardware, all read
342 and write operations on kernel variables can be monitored for
343 accesses.
344
345 The results will be displayed in:
346 /debugfs/tracing/profile_ksym
347
348 Say N if unsure.
349
350config STACK_TRACER 302config STACK_TRACER
351 bool "Trace max stack" 303 bool "Trace max stack"
352 depends on HAVE_FUNCTION_TRACER 304 depends on HAVE_FUNCTION_TRACER
@@ -371,26 +323,6 @@ config STACK_TRACER
371 323
372 Say N if unsure. 324 Say N if unsure.
373 325
374config KMEMTRACE
375 bool "Trace SLAB allocations"
376 select GENERIC_TRACER
377 help
378 kmemtrace provides tracing for slab allocator functions, such as
379 kmalloc, kfree, kmem_cache_alloc, kmem_cache_free, etc. Collected
380 data is then fed to the userspace application in order to analyse
381 allocation hotspots, internal fragmentation and so on, making it
382 possible to see how well an allocator performs, as well as debug
383 and profile kernel code.
384
385 This requires an userspace application to use. See
386 Documentation/trace/kmemtrace.txt for more information.
387
388 Saying Y will make the kernel somewhat larger and slower. However,
389 if you disable kmemtrace at run-time or boot-time, the performance
390 impact is minimal (depending on the arch the kernel is built for).
391
392 If unsure, say N.
393
394config WORKQUEUE_TRACER 326config WORKQUEUE_TRACER
395 bool "Trace workqueues" 327 bool "Trace workqueues"
396 select GENERIC_TRACER 328 select GENERIC_TRACER
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 4215530b490b..53f338190b26 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -30,7 +30,6 @@ obj-$(CONFIG_TRACING) += trace_output.o
30obj-$(CONFIG_TRACING) += trace_stat.o 30obj-$(CONFIG_TRACING) += trace_stat.o
31obj-$(CONFIG_TRACING) += trace_printk.o 31obj-$(CONFIG_TRACING) += trace_printk.o
32obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o 32obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
33obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o
34obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o 33obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o
35obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o 34obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o
36obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o 35obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o
@@ -38,10 +37,8 @@ obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o
38obj-$(CONFIG_NOP_TRACER) += trace_nop.o 37obj-$(CONFIG_NOP_TRACER) += trace_nop.o
39obj-$(CONFIG_STACK_TRACER) += trace_stack.o 38obj-$(CONFIG_STACK_TRACER) += trace_stack.o
40obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o 39obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o
41obj-$(CONFIG_BOOT_TRACER) += trace_boot.o
42obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o 40obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o
43obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o 41obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o
44obj-$(CONFIG_KMEMTRACE) += kmemtrace.o
45obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o 42obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o
46obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o 43obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o
47ifeq ($(CONFIG_BLOCK),y) 44ifeq ($(CONFIG_BLOCK),y)
@@ -55,7 +52,6 @@ obj-$(CONFIG_EVENT_TRACING) += trace_event_perf.o
55endif 52endif
56obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o 53obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
57obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o 54obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
58obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o
59obj-$(CONFIG_EVENT_TRACING) += power-traces.o 55obj-$(CONFIG_EVENT_TRACING) += power-traces.o
60ifeq ($(CONFIG_TRACING),y) 56ifeq ($(CONFIG_TRACING),y)
61obj-$(CONFIG_KGDB_KDB) += trace_kdb.o 57obj-$(CONFIG_KGDB_KDB) += trace_kdb.o
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 6d2cb14f9449..0d88ce9b9fb8 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1883,7 +1883,6 @@ function_trace_probe_call(unsigned long ip, unsigned long parent_ip)
1883 struct hlist_head *hhd; 1883 struct hlist_head *hhd;
1884 struct hlist_node *n; 1884 struct hlist_node *n;
1885 unsigned long key; 1885 unsigned long key;
1886 int resched;
1887 1886
1888 key = hash_long(ip, FTRACE_HASH_BITS); 1887 key = hash_long(ip, FTRACE_HASH_BITS);
1889 1888
@@ -1897,12 +1896,12 @@ function_trace_probe_call(unsigned long ip, unsigned long parent_ip)
1897 * period. This syncs the hash iteration and freeing of items 1896 * period. This syncs the hash iteration and freeing of items
1898 * on the hash. rcu_read_lock is too dangerous here. 1897 * on the hash. rcu_read_lock is too dangerous here.
1899 */ 1898 */
1900 resched = ftrace_preempt_disable(); 1899 preempt_disable_notrace();
1901 hlist_for_each_entry_rcu(entry, n, hhd, node) { 1900 hlist_for_each_entry_rcu(entry, n, hhd, node) {
1902 if (entry->ip == ip) 1901 if (entry->ip == ip)
1903 entry->ops->func(ip, parent_ip, &entry->data); 1902 entry->ops->func(ip, parent_ip, &entry->data);
1904 } 1903 }
1905 ftrace_preempt_enable(resched); 1904 preempt_enable_notrace();
1906} 1905}
1907 1906
1908static struct ftrace_ops trace_probe_ops __read_mostly = 1907static struct ftrace_ops trace_probe_ops __read_mostly =
diff --git a/kernel/trace/kmemtrace.c b/kernel/trace/kmemtrace.c
deleted file mode 100644
index bbfc1bb1660b..000000000000
--- a/kernel/trace/kmemtrace.c
+++ /dev/null
@@ -1,529 +0,0 @@
1/*
2 * Memory allocator tracing
3 *
4 * Copyright (C) 2008 Eduard - Gabriel Munteanu
5 * Copyright (C) 2008 Pekka Enberg <penberg@cs.helsinki.fi>
6 * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
7 */
8
9#include <linux/tracepoint.h>
10#include <linux/seq_file.h>
11#include <linux/debugfs.h>
12#include <linux/dcache.h>
13#include <linux/fs.h>
14
15#include <linux/kmemtrace.h>
16
17#include "trace_output.h"
18#include "trace.h"
19
20/* Select an alternative, minimalistic output than the original one */
21#define TRACE_KMEM_OPT_MINIMAL 0x1
22
23static struct tracer_opt kmem_opts[] = {
24 /* Default disable the minimalistic output */
25 { TRACER_OPT(kmem_minimalistic, TRACE_KMEM_OPT_MINIMAL) },
26 { }
27};
28
29static struct tracer_flags kmem_tracer_flags = {
30 .val = 0,
31 .opts = kmem_opts
32};
33
34static struct trace_array *kmemtrace_array;
35
36/* Trace allocations */
37static inline void kmemtrace_alloc(enum kmemtrace_type_id type_id,
38 unsigned long call_site,
39 const void *ptr,
40 size_t bytes_req,
41 size_t bytes_alloc,
42 gfp_t gfp_flags,
43 int node)
44{
45 struct ftrace_event_call *call = &event_kmem_alloc;
46 struct trace_array *tr = kmemtrace_array;
47 struct kmemtrace_alloc_entry *entry;
48 struct ring_buffer_event *event;
49
50 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry));
51 if (!event)
52 return;
53
54 entry = ring_buffer_event_data(event);
55 tracing_generic_entry_update(&entry->ent, 0, 0);
56
57 entry->ent.type = TRACE_KMEM_ALLOC;
58 entry->type_id = type_id;
59 entry->call_site = call_site;
60 entry->ptr = ptr;
61 entry->bytes_req = bytes_req;
62 entry->bytes_alloc = bytes_alloc;
63 entry->gfp_flags = gfp_flags;
64 entry->node = node;
65
66 if (!filter_check_discard(call, entry, tr->buffer, event))
67 ring_buffer_unlock_commit(tr->buffer, event);
68
69 trace_wake_up();
70}
71
72static inline void kmemtrace_free(enum kmemtrace_type_id type_id,
73 unsigned long call_site,
74 const void *ptr)
75{
76 struct ftrace_event_call *call = &event_kmem_free;
77 struct trace_array *tr = kmemtrace_array;
78 struct kmemtrace_free_entry *entry;
79 struct ring_buffer_event *event;
80
81 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry));
82 if (!event)
83 return;
84 entry = ring_buffer_event_data(event);
85 tracing_generic_entry_update(&entry->ent, 0, 0);
86
87 entry->ent.type = TRACE_KMEM_FREE;
88 entry->type_id = type_id;
89 entry->call_site = call_site;
90 entry->ptr = ptr;
91
92 if (!filter_check_discard(call, entry, tr->buffer, event))
93 ring_buffer_unlock_commit(tr->buffer, event);
94
95 trace_wake_up();
96}
97
98static void kmemtrace_kmalloc(void *ignore,
99 unsigned long call_site,
100 const void *ptr,
101 size_t bytes_req,
102 size_t bytes_alloc,
103 gfp_t gfp_flags)
104{
105 kmemtrace_alloc(KMEMTRACE_TYPE_KMALLOC, call_site, ptr,
106 bytes_req, bytes_alloc, gfp_flags, -1);
107}
108
109static void kmemtrace_kmem_cache_alloc(void *ignore,
110 unsigned long call_site,
111 const void *ptr,
112 size_t bytes_req,
113 size_t bytes_alloc,
114 gfp_t gfp_flags)
115{
116 kmemtrace_alloc(KMEMTRACE_TYPE_CACHE, call_site, ptr,
117 bytes_req, bytes_alloc, gfp_flags, -1);
118}
119
120static void kmemtrace_kmalloc_node(void *ignore,
121 unsigned long call_site,
122 const void *ptr,
123 size_t bytes_req,
124 size_t bytes_alloc,
125 gfp_t gfp_flags,
126 int node)
127{
128 kmemtrace_alloc(KMEMTRACE_TYPE_KMALLOC, call_site, ptr,
129 bytes_req, bytes_alloc, gfp_flags, node);
130}
131
132static void kmemtrace_kmem_cache_alloc_node(void *ignore,
133 unsigned long call_site,
134 const void *ptr,
135 size_t bytes_req,
136 size_t bytes_alloc,
137 gfp_t gfp_flags,
138 int node)
139{
140 kmemtrace_alloc(KMEMTRACE_TYPE_CACHE, call_site, ptr,
141 bytes_req, bytes_alloc, gfp_flags, node);
142}
143
144static void
145kmemtrace_kfree(void *ignore, unsigned long call_site, const void *ptr)
146{
147 kmemtrace_free(KMEMTRACE_TYPE_KMALLOC, call_site, ptr);
148}
149
150static void kmemtrace_kmem_cache_free(void *ignore,
151 unsigned long call_site, const void *ptr)
152{
153 kmemtrace_free(KMEMTRACE_TYPE_CACHE, call_site, ptr);
154}
155
156static int kmemtrace_start_probes(void)
157{
158 int err;
159
160 err = register_trace_kmalloc(kmemtrace_kmalloc, NULL);
161 if (err)
162 return err;
163 err = register_trace_kmem_cache_alloc(kmemtrace_kmem_cache_alloc, NULL);
164 if (err)
165 return err;
166 err = register_trace_kmalloc_node(kmemtrace_kmalloc_node, NULL);
167 if (err)
168 return err;
169 err = register_trace_kmem_cache_alloc_node(kmemtrace_kmem_cache_alloc_node, NULL);
170 if (err)
171 return err;
172 err = register_trace_kfree(kmemtrace_kfree, NULL);
173 if (err)
174 return err;
175 err = register_trace_kmem_cache_free(kmemtrace_kmem_cache_free, NULL);
176
177 return err;
178}
179
180static void kmemtrace_stop_probes(void)
181{
182 unregister_trace_kmalloc(kmemtrace_kmalloc, NULL);
183 unregister_trace_kmem_cache_alloc(kmemtrace_kmem_cache_alloc, NULL);
184 unregister_trace_kmalloc_node(kmemtrace_kmalloc_node, NULL);
185 unregister_trace_kmem_cache_alloc_node(kmemtrace_kmem_cache_alloc_node, NULL);
186 unregister_trace_kfree(kmemtrace_kfree, NULL);
187 unregister_trace_kmem_cache_free(kmemtrace_kmem_cache_free, NULL);
188}
189
190static int kmem_trace_init(struct trace_array *tr)
191{
192 kmemtrace_array = tr;
193
194 tracing_reset_online_cpus(tr);
195
196 kmemtrace_start_probes();
197
198 return 0;
199}
200
201static void kmem_trace_reset(struct trace_array *tr)
202{
203 kmemtrace_stop_probes();
204}
205
206static void kmemtrace_headers(struct seq_file *s)
207{
208 /* Don't need headers for the original kmemtrace output */
209 if (!(kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL))
210 return;
211
212 seq_printf(s, "#\n");
213 seq_printf(s, "# ALLOC TYPE REQ GIVEN FLAGS "
214 " POINTER NODE CALLER\n");
215 seq_printf(s, "# FREE | | | | "
216 " | | | |\n");
217 seq_printf(s, "# |\n\n");
218}
219
220/*
221 * The following functions give the original output from kmemtrace,
222 * plus the origin CPU, since reordering occurs in-kernel now.
223 */
224
225#define KMEMTRACE_USER_ALLOC 0
226#define KMEMTRACE_USER_FREE 1
227
228struct kmemtrace_user_event {
229 u8 event_id;
230 u8 type_id;
231 u16 event_size;
232 u32 cpu;
233 u64 timestamp;
234 unsigned long call_site;
235 unsigned long ptr;
236};
237
238struct kmemtrace_user_event_alloc {
239 size_t bytes_req;
240 size_t bytes_alloc;
241 unsigned gfp_flags;
242 int node;
243};
244
245static enum print_line_t
246kmemtrace_print_alloc(struct trace_iterator *iter, int flags,
247 struct trace_event *event)
248{
249 struct trace_seq *s = &iter->seq;
250 struct kmemtrace_alloc_entry *entry;
251 int ret;
252
253 trace_assign_type(entry, iter->ent);
254
255 ret = trace_seq_printf(s, "type_id %d call_site %pF ptr %lu "
256 "bytes_req %lu bytes_alloc %lu gfp_flags %lu node %d\n",
257 entry->type_id, (void *)entry->call_site, (unsigned long)entry->ptr,
258 (unsigned long)entry->bytes_req, (unsigned long)entry->bytes_alloc,
259 (unsigned long)entry->gfp_flags, entry->node);
260
261 if (!ret)
262 return TRACE_TYPE_PARTIAL_LINE;
263 return TRACE_TYPE_HANDLED;
264}
265
266static enum print_line_t
267kmemtrace_print_free(struct trace_iterator *iter, int flags,
268 struct trace_event *event)
269{
270 struct trace_seq *s = &iter->seq;
271 struct kmemtrace_free_entry *entry;
272 int ret;
273
274 trace_assign_type(entry, iter->ent);
275
276 ret = trace_seq_printf(s, "type_id %d call_site %pF ptr %lu\n",
277 entry->type_id, (void *)entry->call_site,
278 (unsigned long)entry->ptr);
279
280 if (!ret)
281 return TRACE_TYPE_PARTIAL_LINE;
282 return TRACE_TYPE_HANDLED;
283}
284
285static enum print_line_t
286kmemtrace_print_alloc_user(struct trace_iterator *iter, int flags,
287 struct trace_event *event)
288{
289 struct trace_seq *s = &iter->seq;
290 struct kmemtrace_alloc_entry *entry;
291 struct kmemtrace_user_event *ev;
292 struct kmemtrace_user_event_alloc *ev_alloc;
293
294 trace_assign_type(entry, iter->ent);
295
296 ev = trace_seq_reserve(s, sizeof(*ev));
297 if (!ev)
298 return TRACE_TYPE_PARTIAL_LINE;
299
300 ev->event_id = KMEMTRACE_USER_ALLOC;
301 ev->type_id = entry->type_id;
302 ev->event_size = sizeof(*ev) + sizeof(*ev_alloc);
303 ev->cpu = iter->cpu;
304 ev->timestamp = iter->ts;
305 ev->call_site = entry->call_site;
306 ev->ptr = (unsigned long)entry->ptr;
307
308 ev_alloc = trace_seq_reserve(s, sizeof(*ev_alloc));
309 if (!ev_alloc)
310 return TRACE_TYPE_PARTIAL_LINE;
311
312 ev_alloc->bytes_req = entry->bytes_req;
313 ev_alloc->bytes_alloc = entry->bytes_alloc;
314 ev_alloc->gfp_flags = entry->gfp_flags;
315 ev_alloc->node = entry->node;
316
317 return TRACE_TYPE_HANDLED;
318}
319
320static enum print_line_t
321kmemtrace_print_free_user(struct trace_iterator *iter, int flags,
322 struct trace_event *event)
323{
324 struct trace_seq *s = &iter->seq;
325 struct kmemtrace_free_entry *entry;
326 struct kmemtrace_user_event *ev;
327
328 trace_assign_type(entry, iter->ent);
329
330 ev = trace_seq_reserve(s, sizeof(*ev));
331 if (!ev)
332 return TRACE_TYPE_PARTIAL_LINE;
333
334 ev->event_id = KMEMTRACE_USER_FREE;
335 ev->type_id = entry->type_id;
336 ev->event_size = sizeof(*ev);
337 ev->cpu = iter->cpu;
338 ev->timestamp = iter->ts;
339 ev->call_site = entry->call_site;
340 ev->ptr = (unsigned long)entry->ptr;
341
342 return TRACE_TYPE_HANDLED;
343}
344
345/* The two other following provide a more minimalistic output */
346static enum print_line_t
347kmemtrace_print_alloc_compress(struct trace_iterator *iter)
348{
349 struct kmemtrace_alloc_entry *entry;
350 struct trace_seq *s = &iter->seq;
351 int ret;
352
353 trace_assign_type(entry, iter->ent);
354
355 /* Alloc entry */
356 ret = trace_seq_printf(s, " + ");
357 if (!ret)
358 return TRACE_TYPE_PARTIAL_LINE;
359
360 /* Type */
361 switch (entry->type_id) {
362 case KMEMTRACE_TYPE_KMALLOC:
363 ret = trace_seq_printf(s, "K ");
364 break;
365 case KMEMTRACE_TYPE_CACHE:
366 ret = trace_seq_printf(s, "C ");
367 break;
368 case KMEMTRACE_TYPE_PAGES:
369 ret = trace_seq_printf(s, "P ");
370 break;
371 default:
372 ret = trace_seq_printf(s, "? ");
373 }
374
375 if (!ret)
376 return TRACE_TYPE_PARTIAL_LINE;
377
378 /* Requested */
379 ret = trace_seq_printf(s, "%4zu ", entry->bytes_req);
380 if (!ret)
381 return TRACE_TYPE_PARTIAL_LINE;
382
383 /* Allocated */
384 ret = trace_seq_printf(s, "%4zu ", entry->bytes_alloc);
385 if (!ret)
386 return TRACE_TYPE_PARTIAL_LINE;
387
388 /* Flags
389 * TODO: would be better to see the name of the GFP flag names
390 */
391 ret = trace_seq_printf(s, "%08x ", entry->gfp_flags);
392 if (!ret)
393 return TRACE_TYPE_PARTIAL_LINE;
394
395 /* Pointer to allocated */
396 ret = trace_seq_printf(s, "0x%tx ", (ptrdiff_t)entry->ptr);
397 if (!ret)
398 return TRACE_TYPE_PARTIAL_LINE;
399
400 /* Node and call site*/
401 ret = trace_seq_printf(s, "%4d %pf\n", entry->node,
402 (void *)entry->call_site);
403 if (!ret)
404 return TRACE_TYPE_PARTIAL_LINE;
405
406 return TRACE_TYPE_HANDLED;
407}
408
409static enum print_line_t
410kmemtrace_print_free_compress(struct trace_iterator *iter)
411{
412 struct kmemtrace_free_entry *entry;
413 struct trace_seq *s = &iter->seq;
414 int ret;
415
416 trace_assign_type(entry, iter->ent);
417
418 /* Free entry */
419 ret = trace_seq_printf(s, " - ");
420 if (!ret)
421 return TRACE_TYPE_PARTIAL_LINE;
422
423 /* Type */
424 switch (entry->type_id) {
425 case KMEMTRACE_TYPE_KMALLOC:
426 ret = trace_seq_printf(s, "K ");
427 break;
428 case KMEMTRACE_TYPE_CACHE:
429 ret = trace_seq_printf(s, "C ");
430 break;
431 case KMEMTRACE_TYPE_PAGES:
432 ret = trace_seq_printf(s, "P ");
433 break;
434 default:
435 ret = trace_seq_printf(s, "? ");
436 }
437
438 if (!ret)
439 return TRACE_TYPE_PARTIAL_LINE;
440
441 /* Skip requested/allocated/flags */
442 ret = trace_seq_printf(s, " ");
443 if (!ret)
444 return TRACE_TYPE_PARTIAL_LINE;
445
446 /* Pointer to allocated */
447 ret = trace_seq_printf(s, "0x%tx ", (ptrdiff_t)entry->ptr);
448 if (!ret)
449 return TRACE_TYPE_PARTIAL_LINE;
450
451 /* Skip node and print call site*/
452 ret = trace_seq_printf(s, " %pf\n", (void *)entry->call_site);
453 if (!ret)
454 return TRACE_TYPE_PARTIAL_LINE;
455
456 return TRACE_TYPE_HANDLED;
457}
458
459static enum print_line_t kmemtrace_print_line(struct trace_iterator *iter)
460{
461 struct trace_entry *entry = iter->ent;
462
463 if (!(kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL))
464 return TRACE_TYPE_UNHANDLED;
465
466 switch (entry->type) {
467 case TRACE_KMEM_ALLOC:
468 return kmemtrace_print_alloc_compress(iter);
469 case TRACE_KMEM_FREE:
470 return kmemtrace_print_free_compress(iter);
471 default:
472 return TRACE_TYPE_UNHANDLED;
473 }
474}
475
476static struct trace_event_functions kmem_trace_alloc_funcs = {
477 .trace = kmemtrace_print_alloc,
478 .binary = kmemtrace_print_alloc_user,
479};
480
481static struct trace_event kmem_trace_alloc = {
482 .type = TRACE_KMEM_ALLOC,
483 .funcs = &kmem_trace_alloc_funcs,
484};
485
486static struct trace_event_functions kmem_trace_free_funcs = {
487 .trace = kmemtrace_print_free,
488 .binary = kmemtrace_print_free_user,
489};
490
491static struct trace_event kmem_trace_free = {
492 .type = TRACE_KMEM_FREE,
493 .funcs = &kmem_trace_free_funcs,
494};
495
496static struct tracer kmem_tracer __read_mostly = {
497 .name = "kmemtrace",
498 .init = kmem_trace_init,
499 .reset = kmem_trace_reset,
500 .print_line = kmemtrace_print_line,
501 .print_header = kmemtrace_headers,
502 .flags = &kmem_tracer_flags
503};
504
505void kmemtrace_init(void)
506{
507 /* earliest opportunity to start kmem tracing */
508}
509
510static int __init init_kmem_tracer(void)
511{
512 if (!register_ftrace_event(&kmem_trace_alloc)) {
513 pr_warning("Warning: could not register kmem events\n");
514 return 1;
515 }
516
517 if (!register_ftrace_event(&kmem_trace_free)) {
518 pr_warning("Warning: could not register kmem events\n");
519 return 1;
520 }
521
522 if (register_tracer(&kmem_tracer) != 0) {
523 pr_warning("Warning: could not register the kmem tracer\n");
524 return 1;
525 }
526
527 return 0;
528}
529device_initcall(init_kmem_tracer);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 1da7b6ea8b85..3632ce87674f 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -443,6 +443,7 @@ int ring_buffer_print_page_header(struct trace_seq *s)
443 */ 443 */
444struct ring_buffer_per_cpu { 444struct ring_buffer_per_cpu {
445 int cpu; 445 int cpu;
446 atomic_t record_disabled;
446 struct ring_buffer *buffer; 447 struct ring_buffer *buffer;
447 spinlock_t reader_lock; /* serialize readers */ 448 spinlock_t reader_lock; /* serialize readers */
448 arch_spinlock_t lock; 449 arch_spinlock_t lock;
@@ -462,7 +463,6 @@ struct ring_buffer_per_cpu {
462 unsigned long read; 463 unsigned long read;
463 u64 write_stamp; 464 u64 write_stamp;
464 u64 read_stamp; 465 u64 read_stamp;
465 atomic_t record_disabled;
466}; 466};
467 467
468struct ring_buffer { 468struct ring_buffer {
@@ -2242,8 +2242,6 @@ static void trace_recursive_unlock(void)
2242 2242
2243#endif 2243#endif
2244 2244
2245static DEFINE_PER_CPU(int, rb_need_resched);
2246
2247/** 2245/**
2248 * ring_buffer_lock_reserve - reserve a part of the buffer 2246 * ring_buffer_lock_reserve - reserve a part of the buffer
2249 * @buffer: the ring buffer to reserve from 2247 * @buffer: the ring buffer to reserve from
@@ -2264,13 +2262,13 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
2264{ 2262{
2265 struct ring_buffer_per_cpu *cpu_buffer; 2263 struct ring_buffer_per_cpu *cpu_buffer;
2266 struct ring_buffer_event *event; 2264 struct ring_buffer_event *event;
2267 int cpu, resched; 2265 int cpu;
2268 2266
2269 if (ring_buffer_flags != RB_BUFFERS_ON) 2267 if (ring_buffer_flags != RB_BUFFERS_ON)
2270 return NULL; 2268 return NULL;
2271 2269
2272 /* If we are tracing schedule, we don't want to recurse */ 2270 /* If we are tracing schedule, we don't want to recurse */
2273 resched = ftrace_preempt_disable(); 2271 preempt_disable_notrace();
2274 2272
2275 if (atomic_read(&buffer->record_disabled)) 2273 if (atomic_read(&buffer->record_disabled))
2276 goto out_nocheck; 2274 goto out_nocheck;
@@ -2295,21 +2293,13 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
2295 if (!event) 2293 if (!event)
2296 goto out; 2294 goto out;
2297 2295
2298 /*
2299 * Need to store resched state on this cpu.
2300 * Only the first needs to.
2301 */
2302
2303 if (preempt_count() == 1)
2304 per_cpu(rb_need_resched, cpu) = resched;
2305
2306 return event; 2296 return event;
2307 2297
2308 out: 2298 out:
2309 trace_recursive_unlock(); 2299 trace_recursive_unlock();
2310 2300
2311 out_nocheck: 2301 out_nocheck:
2312 ftrace_preempt_enable(resched); 2302 preempt_enable_notrace();
2313 return NULL; 2303 return NULL;
2314} 2304}
2315EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve); 2305EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve);
@@ -2355,13 +2345,7 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer,
2355 2345
2356 trace_recursive_unlock(); 2346 trace_recursive_unlock();
2357 2347
2358 /* 2348 preempt_enable_notrace();
2359 * Only the last preempt count needs to restore preemption.
2360 */
2361 if (preempt_count() == 1)
2362 ftrace_preempt_enable(per_cpu(rb_need_resched, cpu));
2363 else
2364 preempt_enable_no_resched_notrace();
2365 2349
2366 return 0; 2350 return 0;
2367} 2351}
@@ -2469,13 +2453,7 @@ void ring_buffer_discard_commit(struct ring_buffer *buffer,
2469 2453
2470 trace_recursive_unlock(); 2454 trace_recursive_unlock();
2471 2455
2472 /* 2456 preempt_enable_notrace();
2473 * Only the last preempt count needs to restore preemption.
2474 */
2475 if (preempt_count() == 1)
2476 ftrace_preempt_enable(per_cpu(rb_need_resched, cpu));
2477 else
2478 preempt_enable_no_resched_notrace();
2479 2457
2480} 2458}
2481EXPORT_SYMBOL_GPL(ring_buffer_discard_commit); 2459EXPORT_SYMBOL_GPL(ring_buffer_discard_commit);
@@ -2501,12 +2479,12 @@ int ring_buffer_write(struct ring_buffer *buffer,
2501 struct ring_buffer_event *event; 2479 struct ring_buffer_event *event;
2502 void *body; 2480 void *body;
2503 int ret = -EBUSY; 2481 int ret = -EBUSY;
2504 int cpu, resched; 2482 int cpu;
2505 2483
2506 if (ring_buffer_flags != RB_BUFFERS_ON) 2484 if (ring_buffer_flags != RB_BUFFERS_ON)
2507 return -EBUSY; 2485 return -EBUSY;
2508 2486
2509 resched = ftrace_preempt_disable(); 2487 preempt_disable_notrace();
2510 2488
2511 if (atomic_read(&buffer->record_disabled)) 2489 if (atomic_read(&buffer->record_disabled))
2512 goto out; 2490 goto out;
@@ -2536,7 +2514,7 @@ int ring_buffer_write(struct ring_buffer *buffer,
2536 2514
2537 ret = 0; 2515 ret = 0;
2538 out: 2516 out:
2539 ftrace_preempt_enable(resched); 2517 preempt_enable_notrace();
2540 2518
2541 return ret; 2519 return ret;
2542} 2520}
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index d6736b93dc2a..ed1032d6f81d 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -341,7 +341,7 @@ static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
341/* trace_flags holds trace_options default values */ 341/* trace_flags holds trace_options default values */
342unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | 342unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
343 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME | 343 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
344 TRACE_ITER_GRAPH_TIME; 344 TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD;
345 345
346static int trace_stop_count; 346static int trace_stop_count;
347static DEFINE_SPINLOCK(tracing_start_lock); 347static DEFINE_SPINLOCK(tracing_start_lock);
@@ -425,6 +425,7 @@ static const char *trace_options[] = {
425 "latency-format", 425 "latency-format",
426 "sleep-time", 426 "sleep-time",
427 "graph-time", 427 "graph-time",
428 "record-cmd",
428 NULL 429 NULL
429}; 430};
430 431
@@ -656,6 +657,10 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
656 return; 657 return;
657 658
658 WARN_ON_ONCE(!irqs_disabled()); 659 WARN_ON_ONCE(!irqs_disabled());
660 if (!current_trace->use_max_tr) {
661 WARN_ON_ONCE(1);
662 return;
663 }
659 arch_spin_lock(&ftrace_max_lock); 664 arch_spin_lock(&ftrace_max_lock);
660 665
661 tr->buffer = max_tr.buffer; 666 tr->buffer = max_tr.buffer;
@@ -682,6 +687,11 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
682 return; 687 return;
683 688
684 WARN_ON_ONCE(!irqs_disabled()); 689 WARN_ON_ONCE(!irqs_disabled());
690 if (!current_trace->use_max_tr) {
691 WARN_ON_ONCE(1);
692 return;
693 }
694
685 arch_spin_lock(&ftrace_max_lock); 695 arch_spin_lock(&ftrace_max_lock);
686 696
687 ftrace_disable_cpu(); 697 ftrace_disable_cpu();
@@ -726,7 +736,7 @@ __acquires(kernel_lock)
726 return -1; 736 return -1;
727 } 737 }
728 738
729 if (strlen(type->name) > MAX_TRACER_SIZE) { 739 if (strlen(type->name) >= MAX_TRACER_SIZE) {
730 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE); 740 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
731 return -1; 741 return -1;
732 } 742 }
@@ -1328,61 +1338,6 @@ static void __trace_userstack(struct trace_array *tr, unsigned long flags)
1328 1338
1329#endif /* CONFIG_STACKTRACE */ 1339#endif /* CONFIG_STACKTRACE */
1330 1340
1331static void
1332ftrace_trace_special(void *__tr,
1333 unsigned long arg1, unsigned long arg2, unsigned long arg3,
1334 int pc)
1335{
1336 struct ftrace_event_call *call = &event_special;
1337 struct ring_buffer_event *event;
1338 struct trace_array *tr = __tr;
1339 struct ring_buffer *buffer = tr->buffer;
1340 struct special_entry *entry;
1341
1342 event = trace_buffer_lock_reserve(buffer, TRACE_SPECIAL,
1343 sizeof(*entry), 0, pc);
1344 if (!event)
1345 return;
1346 entry = ring_buffer_event_data(event);
1347 entry->arg1 = arg1;
1348 entry->arg2 = arg2;
1349 entry->arg3 = arg3;
1350
1351 if (!filter_check_discard(call, entry, buffer, event))
1352 trace_buffer_unlock_commit(buffer, event, 0, pc);
1353}
1354
1355void
1356__trace_special(void *__tr, void *__data,
1357 unsigned long arg1, unsigned long arg2, unsigned long arg3)
1358{
1359 ftrace_trace_special(__tr, arg1, arg2, arg3, preempt_count());
1360}
1361
1362void
1363ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
1364{
1365 struct trace_array *tr = &global_trace;
1366 struct trace_array_cpu *data;
1367 unsigned long flags;
1368 int cpu;
1369 int pc;
1370
1371 if (tracing_disabled)
1372 return;
1373
1374 pc = preempt_count();
1375 local_irq_save(flags);
1376 cpu = raw_smp_processor_id();
1377 data = tr->data[cpu];
1378
1379 if (likely(atomic_inc_return(&data->disabled) == 1))
1380 ftrace_trace_special(tr, arg1, arg2, arg3, pc);
1381
1382 atomic_dec(&data->disabled);
1383 local_irq_restore(flags);
1384}
1385
1386/** 1341/**
1387 * trace_vbprintk - write binary msg to tracing buffer 1342 * trace_vbprintk - write binary msg to tracing buffer
1388 * 1343 *
@@ -1401,7 +1356,6 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
1401 struct bprint_entry *entry; 1356 struct bprint_entry *entry;
1402 unsigned long flags; 1357 unsigned long flags;
1403 int disable; 1358 int disable;
1404 int resched;
1405 int cpu, len = 0, size, pc; 1359 int cpu, len = 0, size, pc;
1406 1360
1407 if (unlikely(tracing_selftest_running || tracing_disabled)) 1361 if (unlikely(tracing_selftest_running || tracing_disabled))
@@ -1411,7 +1365,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
1411 pause_graph_tracing(); 1365 pause_graph_tracing();
1412 1366
1413 pc = preempt_count(); 1367 pc = preempt_count();
1414 resched = ftrace_preempt_disable(); 1368 preempt_disable_notrace();
1415 cpu = raw_smp_processor_id(); 1369 cpu = raw_smp_processor_id();
1416 data = tr->data[cpu]; 1370 data = tr->data[cpu];
1417 1371
@@ -1449,7 +1403,7 @@ out_unlock:
1449 1403
1450out: 1404out:
1451 atomic_dec_return(&data->disabled); 1405 atomic_dec_return(&data->disabled);
1452 ftrace_preempt_enable(resched); 1406 preempt_enable_notrace();
1453 unpause_graph_tracing(); 1407 unpause_graph_tracing();
1454 1408
1455 return len; 1409 return len;
@@ -2386,6 +2340,7 @@ static const struct file_operations show_traces_fops = {
2386 .open = show_traces_open, 2340 .open = show_traces_open,
2387 .read = seq_read, 2341 .read = seq_read,
2388 .release = seq_release, 2342 .release = seq_release,
2343 .llseek = seq_lseek,
2389}; 2344};
2390 2345
2391/* 2346/*
@@ -2479,6 +2434,7 @@ static const struct file_operations tracing_cpumask_fops = {
2479 .open = tracing_open_generic, 2434 .open = tracing_open_generic,
2480 .read = tracing_cpumask_read, 2435 .read = tracing_cpumask_read,
2481 .write = tracing_cpumask_write, 2436 .write = tracing_cpumask_write,
2437 .llseek = generic_file_llseek,
2482}; 2438};
2483 2439
2484static int tracing_trace_options_show(struct seq_file *m, void *v) 2440static int tracing_trace_options_show(struct seq_file *m, void *v)
@@ -2554,6 +2510,9 @@ static void set_tracer_flags(unsigned int mask, int enabled)
2554 trace_flags |= mask; 2510 trace_flags |= mask;
2555 else 2511 else
2556 trace_flags &= ~mask; 2512 trace_flags &= ~mask;
2513
2514 if (mask == TRACE_ITER_RECORD_CMD)
2515 trace_event_enable_cmd_record(enabled);
2557} 2516}
2558 2517
2559static ssize_t 2518static ssize_t
@@ -2645,6 +2604,7 @@ tracing_readme_read(struct file *filp, char __user *ubuf,
2645static const struct file_operations tracing_readme_fops = { 2604static const struct file_operations tracing_readme_fops = {
2646 .open = tracing_open_generic, 2605 .open = tracing_open_generic,
2647 .read = tracing_readme_read, 2606 .read = tracing_readme_read,
2607 .llseek = generic_file_llseek,
2648}; 2608};
2649 2609
2650static ssize_t 2610static ssize_t
@@ -2695,6 +2655,7 @@ tracing_saved_cmdlines_read(struct file *file, char __user *ubuf,
2695static const struct file_operations tracing_saved_cmdlines_fops = { 2655static const struct file_operations tracing_saved_cmdlines_fops = {
2696 .open = tracing_open_generic, 2656 .open = tracing_open_generic,
2697 .read = tracing_saved_cmdlines_read, 2657 .read = tracing_saved_cmdlines_read,
2658 .llseek = generic_file_llseek,
2698}; 2659};
2699 2660
2700static ssize_t 2661static ssize_t
@@ -2790,6 +2751,9 @@ static int tracing_resize_ring_buffer(unsigned long size)
2790 if (ret < 0) 2751 if (ret < 0)
2791 return ret; 2752 return ret;
2792 2753
2754 if (!current_trace->use_max_tr)
2755 goto out;
2756
2793 ret = ring_buffer_resize(max_tr.buffer, size); 2757 ret = ring_buffer_resize(max_tr.buffer, size);
2794 if (ret < 0) { 2758 if (ret < 0) {
2795 int r; 2759 int r;
@@ -2817,11 +2781,14 @@ static int tracing_resize_ring_buffer(unsigned long size)
2817 return ret; 2781 return ret;
2818 } 2782 }
2819 2783
2784 max_tr.entries = size;
2785 out:
2820 global_trace.entries = size; 2786 global_trace.entries = size;
2821 2787
2822 return ret; 2788 return ret;
2823} 2789}
2824 2790
2791
2825/** 2792/**
2826 * tracing_update_buffers - used by tracing facility to expand ring buffers 2793 * tracing_update_buffers - used by tracing facility to expand ring buffers
2827 * 2794 *
@@ -2882,12 +2849,26 @@ static int tracing_set_tracer(const char *buf)
2882 trace_branch_disable(); 2849 trace_branch_disable();
2883 if (current_trace && current_trace->reset) 2850 if (current_trace && current_trace->reset)
2884 current_trace->reset(tr); 2851 current_trace->reset(tr);
2885 2852 if (current_trace && current_trace->use_max_tr) {
2853 /*
2854 * We don't free the ring buffer. instead, resize it because
2855 * The max_tr ring buffer has some state (e.g. ring->clock) and
2856 * we want preserve it.
2857 */
2858 ring_buffer_resize(max_tr.buffer, 1);
2859 max_tr.entries = 1;
2860 }
2886 destroy_trace_option_files(topts); 2861 destroy_trace_option_files(topts);
2887 2862
2888 current_trace = t; 2863 current_trace = t;
2889 2864
2890 topts = create_trace_option_files(current_trace); 2865 topts = create_trace_option_files(current_trace);
2866 if (current_trace->use_max_tr) {
2867 ret = ring_buffer_resize(max_tr.buffer, global_trace.entries);
2868 if (ret < 0)
2869 goto out;
2870 max_tr.entries = global_trace.entries;
2871 }
2891 2872
2892 if (t->init) { 2873 if (t->init) {
2893 ret = tracer_init(t, tr); 2874 ret = tracer_init(t, tr);
@@ -3024,6 +3005,7 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
3024 if (iter->trace->pipe_open) 3005 if (iter->trace->pipe_open)
3025 iter->trace->pipe_open(iter); 3006 iter->trace->pipe_open(iter);
3026 3007
3008 nonseekable_open(inode, filp);
3027out: 3009out:
3028 mutex_unlock(&trace_types_lock); 3010 mutex_unlock(&trace_types_lock);
3029 return ret; 3011 return ret;
@@ -3469,7 +3451,6 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
3469 } 3451 }
3470 3452
3471 tracing_start(); 3453 tracing_start();
3472 max_tr.entries = global_trace.entries;
3473 mutex_unlock(&trace_types_lock); 3454 mutex_unlock(&trace_types_lock);
3474 3455
3475 return cnt; 3456 return cnt;
@@ -3582,18 +3563,21 @@ static const struct file_operations tracing_max_lat_fops = {
3582 .open = tracing_open_generic, 3563 .open = tracing_open_generic,
3583 .read = tracing_max_lat_read, 3564 .read = tracing_max_lat_read,
3584 .write = tracing_max_lat_write, 3565 .write = tracing_max_lat_write,
3566 .llseek = generic_file_llseek,
3585}; 3567};
3586 3568
3587static const struct file_operations tracing_ctrl_fops = { 3569static const struct file_operations tracing_ctrl_fops = {
3588 .open = tracing_open_generic, 3570 .open = tracing_open_generic,
3589 .read = tracing_ctrl_read, 3571 .read = tracing_ctrl_read,
3590 .write = tracing_ctrl_write, 3572 .write = tracing_ctrl_write,
3573 .llseek = generic_file_llseek,
3591}; 3574};
3592 3575
3593static const struct file_operations set_tracer_fops = { 3576static const struct file_operations set_tracer_fops = {
3594 .open = tracing_open_generic, 3577 .open = tracing_open_generic,
3595 .read = tracing_set_trace_read, 3578 .read = tracing_set_trace_read,
3596 .write = tracing_set_trace_write, 3579 .write = tracing_set_trace_write,
3580 .llseek = generic_file_llseek,
3597}; 3581};
3598 3582
3599static const struct file_operations tracing_pipe_fops = { 3583static const struct file_operations tracing_pipe_fops = {
@@ -3602,17 +3586,20 @@ static const struct file_operations tracing_pipe_fops = {
3602 .read = tracing_read_pipe, 3586 .read = tracing_read_pipe,
3603 .splice_read = tracing_splice_read_pipe, 3587 .splice_read = tracing_splice_read_pipe,
3604 .release = tracing_release_pipe, 3588 .release = tracing_release_pipe,
3589 .llseek = no_llseek,
3605}; 3590};
3606 3591
3607static const struct file_operations tracing_entries_fops = { 3592static const struct file_operations tracing_entries_fops = {
3608 .open = tracing_open_generic, 3593 .open = tracing_open_generic,
3609 .read = tracing_entries_read, 3594 .read = tracing_entries_read,
3610 .write = tracing_entries_write, 3595 .write = tracing_entries_write,
3596 .llseek = generic_file_llseek,
3611}; 3597};
3612 3598
3613static const struct file_operations tracing_mark_fops = { 3599static const struct file_operations tracing_mark_fops = {
3614 .open = tracing_open_generic, 3600 .open = tracing_open_generic,
3615 .write = tracing_mark_write, 3601 .write = tracing_mark_write,
3602 .llseek = generic_file_llseek,
3616}; 3603};
3617 3604
3618static const struct file_operations trace_clock_fops = { 3605static const struct file_operations trace_clock_fops = {
@@ -3918,6 +3905,7 @@ tracing_stats_read(struct file *filp, char __user *ubuf,
3918static const struct file_operations tracing_stats_fops = { 3905static const struct file_operations tracing_stats_fops = {
3919 .open = tracing_open_generic, 3906 .open = tracing_open_generic,
3920 .read = tracing_stats_read, 3907 .read = tracing_stats_read,
3908 .llseek = generic_file_llseek,
3921}; 3909};
3922 3910
3923#ifdef CONFIG_DYNAMIC_FTRACE 3911#ifdef CONFIG_DYNAMIC_FTRACE
@@ -3954,6 +3942,7 @@ tracing_read_dyn_info(struct file *filp, char __user *ubuf,
3954static const struct file_operations tracing_dyn_info_fops = { 3942static const struct file_operations tracing_dyn_info_fops = {
3955 .open = tracing_open_generic, 3943 .open = tracing_open_generic,
3956 .read = tracing_read_dyn_info, 3944 .read = tracing_read_dyn_info,
3945 .llseek = generic_file_llseek,
3957}; 3946};
3958#endif 3947#endif
3959 3948
@@ -4107,6 +4096,7 @@ static const struct file_operations trace_options_fops = {
4107 .open = tracing_open_generic, 4096 .open = tracing_open_generic,
4108 .read = trace_options_read, 4097 .read = trace_options_read,
4109 .write = trace_options_write, 4098 .write = trace_options_write,
4099 .llseek = generic_file_llseek,
4110}; 4100};
4111 4101
4112static ssize_t 4102static ssize_t
@@ -4158,6 +4148,7 @@ static const struct file_operations trace_options_core_fops = {
4158 .open = tracing_open_generic, 4148 .open = tracing_open_generic,
4159 .read = trace_options_core_read, 4149 .read = trace_options_core_read,
4160 .write = trace_options_core_write, 4150 .write = trace_options_core_write,
4151 .llseek = generic_file_llseek,
4161}; 4152};
4162 4153
4163struct dentry *trace_create_file(const char *name, 4154struct dentry *trace_create_file(const char *name,
@@ -4347,9 +4338,6 @@ static __init int tracer_init_debugfs(void)
4347 trace_create_file("dyn_ftrace_total_info", 0444, d_tracer, 4338 trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
4348 &ftrace_update_tot_cnt, &tracing_dyn_info_fops); 4339 &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
4349#endif 4340#endif
4350#ifdef CONFIG_SYSPROF_TRACER
4351 init_tracer_sysprof_debugfs(d_tracer);
4352#endif
4353 4341
4354 create_trace_options_dir(); 4342 create_trace_options_dir();
4355 4343
@@ -4576,16 +4564,14 @@ __init static int tracer_alloc_buffers(void)
4576 4564
4577 4565
4578#ifdef CONFIG_TRACER_MAX_TRACE 4566#ifdef CONFIG_TRACER_MAX_TRACE
4579 max_tr.buffer = ring_buffer_alloc(ring_buf_size, 4567 max_tr.buffer = ring_buffer_alloc(1, TRACE_BUFFER_FLAGS);
4580 TRACE_BUFFER_FLAGS);
4581 if (!max_tr.buffer) { 4568 if (!max_tr.buffer) {
4582 printk(KERN_ERR "tracer: failed to allocate max ring buffer!\n"); 4569 printk(KERN_ERR "tracer: failed to allocate max ring buffer!\n");
4583 WARN_ON(1); 4570 WARN_ON(1);
4584 ring_buffer_free(global_trace.buffer); 4571 ring_buffer_free(global_trace.buffer);
4585 goto out_free_cpumask; 4572 goto out_free_cpumask;
4586 } 4573 }
4587 max_tr.entries = ring_buffer_size(max_tr.buffer); 4574 max_tr.entries = 1;
4588 WARN_ON(max_tr.entries != global_trace.entries);
4589#endif 4575#endif
4590 4576
4591 /* Allocate the first page for all buffers */ 4577 /* Allocate the first page for all buffers */
@@ -4598,9 +4584,6 @@ __init static int tracer_alloc_buffers(void)
4598 4584
4599 register_tracer(&nop_trace); 4585 register_tracer(&nop_trace);
4600 current_trace = &nop_trace; 4586 current_trace = &nop_trace;
4601#ifdef CONFIG_BOOT_TRACER
4602 register_tracer(&boot_tracer);
4603#endif
4604 /* All seems OK, enable tracing */ 4587 /* All seems OK, enable tracing */
4605 tracing_disabled = 0; 4588 tracing_disabled = 0;
4606 4589
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 0605fc00c176..d39b3c5454a5 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -9,10 +9,7 @@
9#include <linux/mmiotrace.h> 9#include <linux/mmiotrace.h>
10#include <linux/tracepoint.h> 10#include <linux/tracepoint.h>
11#include <linux/ftrace.h> 11#include <linux/ftrace.h>
12#include <trace/boot.h>
13#include <linux/kmemtrace.h>
14#include <linux/hw_breakpoint.h> 12#include <linux/hw_breakpoint.h>
15
16#include <linux/trace_seq.h> 13#include <linux/trace_seq.h>
17#include <linux/ftrace_event.h> 14#include <linux/ftrace_event.h>
18 15
@@ -25,30 +22,17 @@ enum trace_type {
25 TRACE_STACK, 22 TRACE_STACK,
26 TRACE_PRINT, 23 TRACE_PRINT,
27 TRACE_BPRINT, 24 TRACE_BPRINT,
28 TRACE_SPECIAL,
29 TRACE_MMIO_RW, 25 TRACE_MMIO_RW,
30 TRACE_MMIO_MAP, 26 TRACE_MMIO_MAP,
31 TRACE_BRANCH, 27 TRACE_BRANCH,
32 TRACE_BOOT_CALL,
33 TRACE_BOOT_RET,
34 TRACE_GRAPH_RET, 28 TRACE_GRAPH_RET,
35 TRACE_GRAPH_ENT, 29 TRACE_GRAPH_ENT,
36 TRACE_USER_STACK, 30 TRACE_USER_STACK,
37 TRACE_KMEM_ALLOC,
38 TRACE_KMEM_FREE,
39 TRACE_BLK, 31 TRACE_BLK,
40 TRACE_KSYM,
41 32
42 __TRACE_LAST_TYPE, 33 __TRACE_LAST_TYPE,
43}; 34};
44 35
45enum kmemtrace_type_id {
46 KMEMTRACE_TYPE_KMALLOC = 0, /* kmalloc() or kfree(). */
47 KMEMTRACE_TYPE_CACHE, /* kmem_cache_*(). */
48 KMEMTRACE_TYPE_PAGES, /* __get_free_pages() and friends. */
49};
50
51extern struct tracer boot_tracer;
52 36
53#undef __field 37#undef __field
54#define __field(type, item) type item; 38#define __field(type, item) type item;
@@ -204,23 +188,15 @@ extern void __ftrace_bad_type(void);
204 IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\ 188 IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\
205 IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \ 189 IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \
206 IF_ASSIGN(var, ent, struct bprint_entry, TRACE_BPRINT); \ 190 IF_ASSIGN(var, ent, struct bprint_entry, TRACE_BPRINT); \
207 IF_ASSIGN(var, ent, struct special_entry, 0); \
208 IF_ASSIGN(var, ent, struct trace_mmiotrace_rw, \ 191 IF_ASSIGN(var, ent, struct trace_mmiotrace_rw, \
209 TRACE_MMIO_RW); \ 192 TRACE_MMIO_RW); \
210 IF_ASSIGN(var, ent, struct trace_mmiotrace_map, \ 193 IF_ASSIGN(var, ent, struct trace_mmiotrace_map, \
211 TRACE_MMIO_MAP); \ 194 TRACE_MMIO_MAP); \
212 IF_ASSIGN(var, ent, struct trace_boot_call, TRACE_BOOT_CALL);\
213 IF_ASSIGN(var, ent, struct trace_boot_ret, TRACE_BOOT_RET);\
214 IF_ASSIGN(var, ent, struct trace_branch, TRACE_BRANCH); \ 195 IF_ASSIGN(var, ent, struct trace_branch, TRACE_BRANCH); \
215 IF_ASSIGN(var, ent, struct ftrace_graph_ent_entry, \ 196 IF_ASSIGN(var, ent, struct ftrace_graph_ent_entry, \
216 TRACE_GRAPH_ENT); \ 197 TRACE_GRAPH_ENT); \
217 IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry, \ 198 IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry, \
218 TRACE_GRAPH_RET); \ 199 TRACE_GRAPH_RET); \
219 IF_ASSIGN(var, ent, struct kmemtrace_alloc_entry, \
220 TRACE_KMEM_ALLOC); \
221 IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \
222 TRACE_KMEM_FREE); \
223 IF_ASSIGN(var, ent, struct ksym_trace_entry, TRACE_KSYM);\
224 __ftrace_bad_type(); \ 200 __ftrace_bad_type(); \
225 } while (0) 201 } while (0)
226 202
@@ -298,6 +274,7 @@ struct tracer {
298 struct tracer *next; 274 struct tracer *next;
299 int print_max; 275 int print_max;
300 struct tracer_flags *flags; 276 struct tracer_flags *flags;
277 int use_max_tr;
301}; 278};
302 279
303 280
@@ -318,7 +295,6 @@ struct dentry *trace_create_file(const char *name,
318 const struct file_operations *fops); 295 const struct file_operations *fops);
319 296
320struct dentry *tracing_init_dentry(void); 297struct dentry *tracing_init_dentry(void);
321void init_tracer_sysprof_debugfs(struct dentry *d_tracer);
322 298
323struct ring_buffer_event; 299struct ring_buffer_event;
324 300
@@ -363,11 +339,6 @@ void tracing_sched_wakeup_trace(struct trace_array *tr,
363 struct task_struct *wakee, 339 struct task_struct *wakee,
364 struct task_struct *cur, 340 struct task_struct *cur,
365 unsigned long flags, int pc); 341 unsigned long flags, int pc);
366void trace_special(struct trace_array *tr,
367 struct trace_array_cpu *data,
368 unsigned long arg1,
369 unsigned long arg2,
370 unsigned long arg3, int pc);
371void trace_function(struct trace_array *tr, 342void trace_function(struct trace_array *tr,
372 unsigned long ip, 343 unsigned long ip,
373 unsigned long parent_ip, 344 unsigned long parent_ip,
@@ -398,8 +369,6 @@ extern cpumask_var_t __read_mostly tracing_buffer_mask;
398#define for_each_tracing_cpu(cpu) \ 369#define for_each_tracing_cpu(cpu) \
399 for_each_cpu(cpu, tracing_buffer_mask) 370 for_each_cpu(cpu, tracing_buffer_mask)
400 371
401extern int process_new_ksym_entry(char *ksymname, int op, unsigned long addr);
402
403extern unsigned long nsecs_to_usecs(unsigned long nsecs); 372extern unsigned long nsecs_to_usecs(unsigned long nsecs);
404 373
405extern unsigned long tracing_thresh; 374extern unsigned long tracing_thresh;
@@ -469,12 +438,8 @@ extern int trace_selftest_startup_nop(struct tracer *trace,
469 struct trace_array *tr); 438 struct trace_array *tr);
470extern int trace_selftest_startup_sched_switch(struct tracer *trace, 439extern int trace_selftest_startup_sched_switch(struct tracer *trace,
471 struct trace_array *tr); 440 struct trace_array *tr);
472extern int trace_selftest_startup_sysprof(struct tracer *trace,
473 struct trace_array *tr);
474extern int trace_selftest_startup_branch(struct tracer *trace, 441extern int trace_selftest_startup_branch(struct tracer *trace,
475 struct trace_array *tr); 442 struct trace_array *tr);
476extern int trace_selftest_startup_ksym(struct tracer *trace,
477 struct trace_array *tr);
478#endif /* CONFIG_FTRACE_STARTUP_TEST */ 443#endif /* CONFIG_FTRACE_STARTUP_TEST */
479 444
480extern void *head_page(struct trace_array_cpu *data); 445extern void *head_page(struct trace_array_cpu *data);
@@ -636,6 +601,7 @@ enum trace_iterator_flags {
636 TRACE_ITER_LATENCY_FMT = 0x20000, 601 TRACE_ITER_LATENCY_FMT = 0x20000,
637 TRACE_ITER_SLEEP_TIME = 0x40000, 602 TRACE_ITER_SLEEP_TIME = 0x40000,
638 TRACE_ITER_GRAPH_TIME = 0x80000, 603 TRACE_ITER_GRAPH_TIME = 0x80000,
604 TRACE_ITER_RECORD_CMD = 0x100000,
639}; 605};
640 606
641/* 607/*
@@ -647,54 +613,6 @@ enum trace_iterator_flags {
647 613
648extern struct tracer nop_trace; 614extern struct tracer nop_trace;
649 615
650/**
651 * ftrace_preempt_disable - disable preemption scheduler safe
652 *
653 * When tracing can happen inside the scheduler, there exists
654 * cases that the tracing might happen before the need_resched
655 * flag is checked. If this happens and the tracer calls
656 * preempt_enable (after a disable), a schedule might take place
657 * causing an infinite recursion.
658 *
659 * To prevent this, we read the need_resched flag before
660 * disabling preemption. When we want to enable preemption we
661 * check the flag, if it is set, then we call preempt_enable_no_resched.
662 * Otherwise, we call preempt_enable.
663 *
664 * The rational for doing the above is that if need_resched is set
665 * and we have yet to reschedule, we are either in an atomic location
666 * (where we do not need to check for scheduling) or we are inside
667 * the scheduler and do not want to resched.
668 */
669static inline int ftrace_preempt_disable(void)
670{
671 int resched;
672
673 resched = need_resched();
674 preempt_disable_notrace();
675
676 return resched;
677}
678
679/**
680 * ftrace_preempt_enable - enable preemption scheduler safe
681 * @resched: the return value from ftrace_preempt_disable
682 *
683 * This is a scheduler safe way to enable preemption and not miss
684 * any preemption checks. The disabled saved the state of preemption.
685 * If resched is set, then we are either inside an atomic or
686 * are inside the scheduler (we would have already scheduled
687 * otherwise). In this case, we do not want to call normal
688 * preempt_enable, but preempt_enable_no_resched instead.
689 */
690static inline void ftrace_preempt_enable(int resched)
691{
692 if (resched)
693 preempt_enable_no_resched_notrace();
694 else
695 preempt_enable_notrace();
696}
697
698#ifdef CONFIG_BRANCH_TRACER 616#ifdef CONFIG_BRANCH_TRACER
699extern int enable_branch_tracing(struct trace_array *tr); 617extern int enable_branch_tracing(struct trace_array *tr);
700extern void disable_branch_tracing(void); 618extern void disable_branch_tracing(void);
@@ -785,6 +703,8 @@ struct filter_pred {
785 int pop_n; 703 int pop_n;
786}; 704};
787 705
706extern struct list_head ftrace_common_fields;
707
788extern enum regex_type 708extern enum regex_type
789filter_parse_regex(char *buff, int len, char **search, int *not); 709filter_parse_regex(char *buff, int len, char **search, int *not);
790extern void print_event_filter(struct ftrace_event_call *call, 710extern void print_event_filter(struct ftrace_event_call *call,
@@ -814,6 +734,8 @@ filter_check_discard(struct ftrace_event_call *call, void *rec,
814 return 0; 734 return 0;
815} 735}
816 736
737extern void trace_event_enable_cmd_record(bool enable);
738
817extern struct mutex event_mutex; 739extern struct mutex event_mutex;
818extern struct list_head ftrace_events; 740extern struct list_head ftrace_events;
819 741
diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
deleted file mode 100644
index c21d5f3956ad..000000000000
--- a/kernel/trace/trace_boot.c
+++ /dev/null
@@ -1,185 +0,0 @@
1/*
2 * ring buffer based initcalls tracer
3 *
4 * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
5 *
6 */
7
8#include <linux/init.h>
9#include <linux/debugfs.h>
10#include <linux/ftrace.h>
11#include <linux/kallsyms.h>
12#include <linux/time.h>
13
14#include "trace.h"
15#include "trace_output.h"
16
17static struct trace_array *boot_trace;
18static bool pre_initcalls_finished;
19
20/* Tells the boot tracer that the pre_smp_initcalls are finished.
21 * So we are ready .
22 * It doesn't enable sched events tracing however.
23 * You have to call enable_boot_trace to do so.
24 */
25void start_boot_trace(void)
26{
27 pre_initcalls_finished = true;
28}
29
30void enable_boot_trace(void)
31{
32 if (boot_trace && pre_initcalls_finished)
33 tracing_start_sched_switch_record();
34}
35
36void disable_boot_trace(void)
37{
38 if (boot_trace && pre_initcalls_finished)
39 tracing_stop_sched_switch_record();
40}
41
42static int boot_trace_init(struct trace_array *tr)
43{
44 boot_trace = tr;
45
46 if (!tr)
47 return 0;
48
49 tracing_reset_online_cpus(tr);
50
51 tracing_sched_switch_assign_trace(tr);
52 return 0;
53}
54
55static enum print_line_t
56initcall_call_print_line(struct trace_iterator *iter)
57{
58 struct trace_entry *entry = iter->ent;
59 struct trace_seq *s = &iter->seq;
60 struct trace_boot_call *field;
61 struct boot_trace_call *call;
62 u64 ts;
63 unsigned long nsec_rem;
64 int ret;
65
66 trace_assign_type(field, entry);
67 call = &field->boot_call;
68 ts = iter->ts;
69 nsec_rem = do_div(ts, NSEC_PER_SEC);
70
71 ret = trace_seq_printf(s, "[%5ld.%09ld] calling %s @ %i\n",
72 (unsigned long)ts, nsec_rem, call->func, call->caller);
73
74 if (!ret)
75 return TRACE_TYPE_PARTIAL_LINE;
76 else
77 return TRACE_TYPE_HANDLED;
78}
79
80static enum print_line_t
81initcall_ret_print_line(struct trace_iterator *iter)
82{
83 struct trace_entry *entry = iter->ent;
84 struct trace_seq *s = &iter->seq;
85 struct trace_boot_ret *field;
86 struct boot_trace_ret *init_ret;
87 u64 ts;
88 unsigned long nsec_rem;
89 int ret;
90
91 trace_assign_type(field, entry);
92 init_ret = &field->boot_ret;
93 ts = iter->ts;
94 nsec_rem = do_div(ts, NSEC_PER_SEC);
95
96 ret = trace_seq_printf(s, "[%5ld.%09ld] initcall %s "
97 "returned %d after %llu msecs\n",
98 (unsigned long) ts,
99 nsec_rem,
100 init_ret->func, init_ret->result, init_ret->duration);
101
102 if (!ret)
103 return TRACE_TYPE_PARTIAL_LINE;
104 else
105 return TRACE_TYPE_HANDLED;
106}
107
108static enum print_line_t initcall_print_line(struct trace_iterator *iter)
109{
110 struct trace_entry *entry = iter->ent;
111
112 switch (entry->type) {
113 case TRACE_BOOT_CALL:
114 return initcall_call_print_line(iter);
115 case TRACE_BOOT_RET:
116 return initcall_ret_print_line(iter);
117 default:
118 return TRACE_TYPE_UNHANDLED;
119 }
120}
121
122struct tracer boot_tracer __read_mostly =
123{
124 .name = "initcall",
125 .init = boot_trace_init,
126 .reset = tracing_reset_online_cpus,
127 .print_line = initcall_print_line,
128};
129
130void trace_boot_call(struct boot_trace_call *bt, initcall_t fn)
131{
132 struct ftrace_event_call *call = &event_boot_call;
133 struct ring_buffer_event *event;
134 struct ring_buffer *buffer;
135 struct trace_boot_call *entry;
136 struct trace_array *tr = boot_trace;
137
138 if (!tr || !pre_initcalls_finished)
139 return;
140
141 /* Get its name now since this function could
142 * disappear because it is in the .init section.
143 */
144 sprint_symbol(bt->func, (unsigned long)fn);
145 preempt_disable();
146
147 buffer = tr->buffer;
148 event = trace_buffer_lock_reserve(buffer, TRACE_BOOT_CALL,
149 sizeof(*entry), 0, 0);
150 if (!event)
151 goto out;
152 entry = ring_buffer_event_data(event);
153 entry->boot_call = *bt;
154 if (!filter_check_discard(call, entry, buffer, event))
155 trace_buffer_unlock_commit(buffer, event, 0, 0);
156 out:
157 preempt_enable();
158}
159
160void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn)
161{
162 struct ftrace_event_call *call = &event_boot_ret;
163 struct ring_buffer_event *event;
164 struct ring_buffer *buffer;
165 struct trace_boot_ret *entry;
166 struct trace_array *tr = boot_trace;
167
168 if (!tr || !pre_initcalls_finished)
169 return;
170
171 sprint_symbol(bt->func, (unsigned long)fn);
172 preempt_disable();
173
174 buffer = tr->buffer;
175 event = trace_buffer_lock_reserve(buffer, TRACE_BOOT_RET,
176 sizeof(*entry), 0, 0);
177 if (!event)
178 goto out;
179 entry = ring_buffer_event_data(event);
180 entry->boot_ret = *bt;
181 if (!filter_check_discard(call, entry, buffer, event))
182 trace_buffer_unlock_commit(buffer, event, 0, 0);
183 out:
184 preempt_enable();
185}
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
index 9d589d8dcd1a..52fda6c04ac3 100644
--- a/kernel/trace/trace_clock.c
+++ b/kernel/trace/trace_clock.c
@@ -32,16 +32,15 @@
32u64 notrace trace_clock_local(void) 32u64 notrace trace_clock_local(void)
33{ 33{
34 u64 clock; 34 u64 clock;
35 int resched;
36 35
37 /* 36 /*
38 * sched_clock() is an architecture implemented, fast, scalable, 37 * sched_clock() is an architecture implemented, fast, scalable,
39 * lockless clock. It is not guaranteed to be coherent across 38 * lockless clock. It is not guaranteed to be coherent across
40 * CPUs, nor across CPU idle events. 39 * CPUs, nor across CPU idle events.
41 */ 40 */
42 resched = ftrace_preempt_disable(); 41 preempt_disable_notrace();
43 clock = sched_clock(); 42 clock = sched_clock();
44 ftrace_preempt_enable(resched); 43 preempt_enable_notrace();
45 44
46 return clock; 45 return clock;
47} 46}
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index dc008c1240da..e3dfecaf13e6 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -151,23 +151,6 @@ FTRACE_ENTRY_DUP(wakeup, ctx_switch_entry,
151); 151);
152 152
153/* 153/*
154 * Special (free-form) trace entry:
155 */
156FTRACE_ENTRY(special, special_entry,
157
158 TRACE_SPECIAL,
159
160 F_STRUCT(
161 __field( unsigned long, arg1 )
162 __field( unsigned long, arg2 )
163 __field( unsigned long, arg3 )
164 ),
165
166 F_printk("(%08lx) (%08lx) (%08lx)",
167 __entry->arg1, __entry->arg2, __entry->arg3)
168);
169
170/*
171 * Stack-trace entry: 154 * Stack-trace entry:
172 */ 155 */
173 156
@@ -271,33 +254,6 @@ FTRACE_ENTRY(mmiotrace_map, trace_mmiotrace_map,
271 __entry->map_id, __entry->opcode) 254 __entry->map_id, __entry->opcode)
272); 255);
273 256
274FTRACE_ENTRY(boot_call, trace_boot_call,
275
276 TRACE_BOOT_CALL,
277
278 F_STRUCT(
279 __field_struct( struct boot_trace_call, boot_call )
280 __field_desc( pid_t, boot_call, caller )
281 __array_desc( char, boot_call, func, KSYM_SYMBOL_LEN)
282 ),
283
284 F_printk("%d %s", __entry->caller, __entry->func)
285);
286
287FTRACE_ENTRY(boot_ret, trace_boot_ret,
288
289 TRACE_BOOT_RET,
290
291 F_STRUCT(
292 __field_struct( struct boot_trace_ret, boot_ret )
293 __array_desc( char, boot_ret, func, KSYM_SYMBOL_LEN)
294 __field_desc( int, boot_ret, result )
295 __field_desc( unsigned long, boot_ret, duration )
296 ),
297
298 F_printk("%s %d %lx",
299 __entry->func, __entry->result, __entry->duration)
300);
301 257
302#define TRACE_FUNC_SIZE 30 258#define TRACE_FUNC_SIZE 30
303#define TRACE_FILE_SIZE 20 259#define TRACE_FILE_SIZE 20
@@ -318,53 +274,3 @@ FTRACE_ENTRY(branch, trace_branch,
318 __entry->func, __entry->file, __entry->correct) 274 __entry->func, __entry->file, __entry->correct)
319); 275);
320 276
321FTRACE_ENTRY(kmem_alloc, kmemtrace_alloc_entry,
322
323 TRACE_KMEM_ALLOC,
324
325 F_STRUCT(
326 __field( enum kmemtrace_type_id, type_id )
327 __field( unsigned long, call_site )
328 __field( const void *, ptr )
329 __field( size_t, bytes_req )
330 __field( size_t, bytes_alloc )
331 __field( gfp_t, gfp_flags )
332 __field( int, node )
333 ),
334
335 F_printk("type:%u call_site:%lx ptr:%p req:%zi alloc:%zi"
336 " flags:%x node:%d",
337 __entry->type_id, __entry->call_site, __entry->ptr,
338 __entry->bytes_req, __entry->bytes_alloc,
339 __entry->gfp_flags, __entry->node)
340);
341
342FTRACE_ENTRY(kmem_free, kmemtrace_free_entry,
343
344 TRACE_KMEM_FREE,
345
346 F_STRUCT(
347 __field( enum kmemtrace_type_id, type_id )
348 __field( unsigned long, call_site )
349 __field( const void *, ptr )
350 ),
351
352 F_printk("type:%u call_site:%lx ptr:%p",
353 __entry->type_id, __entry->call_site, __entry->ptr)
354);
355
356FTRACE_ENTRY(ksym_trace, ksym_trace_entry,
357
358 TRACE_KSYM,
359
360 F_STRUCT(
361 __field( unsigned long, ip )
362 __field( unsigned char, type )
363 __array( char , cmd, TASK_COMM_LEN )
364 __field( unsigned long, addr )
365 ),
366
367 F_printk("ip: %pF type: %d ksym_name: %pS cmd: %s",
368 (void *)__entry->ip, (unsigned int)__entry->type,
369 (void *)__entry->addr, __entry->cmd)
370);
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 8a2b73f7c068..000e6e85b445 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -9,8 +9,6 @@
9#include <linux/kprobes.h> 9#include <linux/kprobes.h>
10#include "trace.h" 10#include "trace.h"
11 11
12EXPORT_SYMBOL_GPL(perf_arch_fetch_caller_regs);
13
14static char *perf_trace_buf[4]; 12static char *perf_trace_buf[4];
15 13
16/* 14/*
@@ -56,13 +54,7 @@ static int perf_trace_event_init(struct ftrace_event_call *tp_event,
56 } 54 }
57 } 55 }
58 56
59 if (tp_event->class->reg) 57 ret = tp_event->class->reg(tp_event, TRACE_REG_PERF_REGISTER);
60 ret = tp_event->class->reg(tp_event, TRACE_REG_PERF_REGISTER);
61 else
62 ret = tracepoint_probe_register(tp_event->name,
63 tp_event->class->perf_probe,
64 tp_event);
65
66 if (ret) 58 if (ret)
67 goto fail; 59 goto fail;
68 60
@@ -96,9 +88,7 @@ int perf_trace_init(struct perf_event *p_event)
96 mutex_lock(&event_mutex); 88 mutex_lock(&event_mutex);
97 list_for_each_entry(tp_event, &ftrace_events, list) { 89 list_for_each_entry(tp_event, &ftrace_events, list) {
98 if (tp_event->event.type == event_id && 90 if (tp_event->event.type == event_id &&
99 tp_event->class && 91 tp_event->class && tp_event->class->reg &&
100 (tp_event->class->perf_probe ||
101 tp_event->class->reg) &&
102 try_module_get(tp_event->mod)) { 92 try_module_get(tp_event->mod)) {
103 ret = perf_trace_event_init(tp_event, p_event); 93 ret = perf_trace_event_init(tp_event, p_event);
104 break; 94 break;
@@ -138,18 +128,13 @@ void perf_trace_destroy(struct perf_event *p_event)
138 if (--tp_event->perf_refcount > 0) 128 if (--tp_event->perf_refcount > 0)
139 goto out; 129 goto out;
140 130
141 if (tp_event->class->reg) 131 tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER);
142 tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER);
143 else
144 tracepoint_probe_unregister(tp_event->name,
145 tp_event->class->perf_probe,
146 tp_event);
147 132
148 /* 133 /*
149 * Ensure our callback won't be called anymore. See 134 * Ensure our callback won't be called anymore. The buffers
150 * tracepoint_probe_unregister() and __DO_TRACE(). 135 * will be freed after that.
151 */ 136 */
152 synchronize_sched(); 137 tracepoint_synchronize_unregister();
153 138
154 free_percpu(tp_event->perf_events); 139 free_percpu(tp_event->perf_events);
155 tp_event->perf_events = NULL; 140 tp_event->perf_events = NULL;
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 53cffc0b0801..09b4fa6e4d3b 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -28,6 +28,7 @@
28DEFINE_MUTEX(event_mutex); 28DEFINE_MUTEX(event_mutex);
29 29
30LIST_HEAD(ftrace_events); 30LIST_HEAD(ftrace_events);
31LIST_HEAD(ftrace_common_fields);
31 32
32struct list_head * 33struct list_head *
33trace_get_fields(struct ftrace_event_call *event_call) 34trace_get_fields(struct ftrace_event_call *event_call)
@@ -37,15 +38,11 @@ trace_get_fields(struct ftrace_event_call *event_call)
37 return event_call->class->get_fields(event_call); 38 return event_call->class->get_fields(event_call);
38} 39}
39 40
40int trace_define_field(struct ftrace_event_call *call, const char *type, 41static int __trace_define_field(struct list_head *head, const char *type,
41 const char *name, int offset, int size, int is_signed, 42 const char *name, int offset, int size,
42 int filter_type) 43 int is_signed, int filter_type)
43{ 44{
44 struct ftrace_event_field *field; 45 struct ftrace_event_field *field;
45 struct list_head *head;
46
47 if (WARN_ON(!call->class))
48 return 0;
49 46
50 field = kzalloc(sizeof(*field), GFP_KERNEL); 47 field = kzalloc(sizeof(*field), GFP_KERNEL);
51 if (!field) 48 if (!field)
@@ -68,7 +65,6 @@ int trace_define_field(struct ftrace_event_call *call, const char *type,
68 field->size = size; 65 field->size = size;
69 field->is_signed = is_signed; 66 field->is_signed = is_signed;
70 67
71 head = trace_get_fields(call);
72 list_add(&field->link, head); 68 list_add(&field->link, head);
73 69
74 return 0; 70 return 0;
@@ -80,17 +76,32 @@ err:
80 76
81 return -ENOMEM; 77 return -ENOMEM;
82} 78}
79
80int trace_define_field(struct ftrace_event_call *call, const char *type,
81 const char *name, int offset, int size, int is_signed,
82 int filter_type)
83{
84 struct list_head *head;
85
86 if (WARN_ON(!call->class))
87 return 0;
88
89 head = trace_get_fields(call);
90 return __trace_define_field(head, type, name, offset, size,
91 is_signed, filter_type);
92}
83EXPORT_SYMBOL_GPL(trace_define_field); 93EXPORT_SYMBOL_GPL(trace_define_field);
84 94
85#define __common_field(type, item) \ 95#define __common_field(type, item) \
86 ret = trace_define_field(call, #type, "common_" #item, \ 96 ret = __trace_define_field(&ftrace_common_fields, #type, \
87 offsetof(typeof(ent), item), \ 97 "common_" #item, \
88 sizeof(ent.item), \ 98 offsetof(typeof(ent), item), \
89 is_signed_type(type), FILTER_OTHER); \ 99 sizeof(ent.item), \
100 is_signed_type(type), FILTER_OTHER); \
90 if (ret) \ 101 if (ret) \
91 return ret; 102 return ret;
92 103
93static int trace_define_common_fields(struct ftrace_event_call *call) 104static int trace_define_common_fields(void)
94{ 105{
95 int ret; 106 int ret;
96 struct trace_entry ent; 107 struct trace_entry ent;
@@ -130,6 +141,55 @@ int trace_event_raw_init(struct ftrace_event_call *call)
130} 141}
131EXPORT_SYMBOL_GPL(trace_event_raw_init); 142EXPORT_SYMBOL_GPL(trace_event_raw_init);
132 143
144int ftrace_event_reg(struct ftrace_event_call *call, enum trace_reg type)
145{
146 switch (type) {
147 case TRACE_REG_REGISTER:
148 return tracepoint_probe_register(call->name,
149 call->class->probe,
150 call);
151 case TRACE_REG_UNREGISTER:
152 tracepoint_probe_unregister(call->name,
153 call->class->probe,
154 call);
155 return 0;
156
157#ifdef CONFIG_PERF_EVENTS
158 case TRACE_REG_PERF_REGISTER:
159 return tracepoint_probe_register(call->name,
160 call->class->perf_probe,
161 call);
162 case TRACE_REG_PERF_UNREGISTER:
163 tracepoint_probe_unregister(call->name,
164 call->class->perf_probe,
165 call);
166 return 0;
167#endif
168 }
169 return 0;
170}
171EXPORT_SYMBOL_GPL(ftrace_event_reg);
172
173void trace_event_enable_cmd_record(bool enable)
174{
175 struct ftrace_event_call *call;
176
177 mutex_lock(&event_mutex);
178 list_for_each_entry(call, &ftrace_events, list) {
179 if (!(call->flags & TRACE_EVENT_FL_ENABLED))
180 continue;
181
182 if (enable) {
183 tracing_start_cmdline_record();
184 call->flags |= TRACE_EVENT_FL_RECORDED_CMD;
185 } else {
186 tracing_stop_cmdline_record();
187 call->flags &= ~TRACE_EVENT_FL_RECORDED_CMD;
188 }
189 }
190 mutex_unlock(&event_mutex);
191}
192
133static int ftrace_event_enable_disable(struct ftrace_event_call *call, 193static int ftrace_event_enable_disable(struct ftrace_event_call *call,
134 int enable) 194 int enable)
135{ 195{
@@ -139,24 +199,20 @@ static int ftrace_event_enable_disable(struct ftrace_event_call *call,
139 case 0: 199 case 0:
140 if (call->flags & TRACE_EVENT_FL_ENABLED) { 200 if (call->flags & TRACE_EVENT_FL_ENABLED) {
141 call->flags &= ~TRACE_EVENT_FL_ENABLED; 201 call->flags &= ~TRACE_EVENT_FL_ENABLED;
142 tracing_stop_cmdline_record(); 202 if (call->flags & TRACE_EVENT_FL_RECORDED_CMD) {
143 if (call->class->reg) 203 tracing_stop_cmdline_record();
144 call->class->reg(call, TRACE_REG_UNREGISTER); 204 call->flags &= ~TRACE_EVENT_FL_RECORDED_CMD;
145 else 205 }
146 tracepoint_probe_unregister(call->name, 206 call->class->reg(call, TRACE_REG_UNREGISTER);
147 call->class->probe,
148 call);
149 } 207 }
150 break; 208 break;
151 case 1: 209 case 1:
152 if (!(call->flags & TRACE_EVENT_FL_ENABLED)) { 210 if (!(call->flags & TRACE_EVENT_FL_ENABLED)) {
153 tracing_start_cmdline_record(); 211 if (trace_flags & TRACE_ITER_RECORD_CMD) {
154 if (call->class->reg) 212 tracing_start_cmdline_record();
155 ret = call->class->reg(call, TRACE_REG_REGISTER); 213 call->flags |= TRACE_EVENT_FL_RECORDED_CMD;
156 else 214 }
157 ret = tracepoint_probe_register(call->name, 215 ret = call->class->reg(call, TRACE_REG_REGISTER);
158 call->class->probe,
159 call);
160 if (ret) { 216 if (ret) {
161 tracing_stop_cmdline_record(); 217 tracing_stop_cmdline_record();
162 pr_info("event trace: Could not enable event " 218 pr_info("event trace: Could not enable event "
@@ -194,8 +250,7 @@ static int __ftrace_set_clr_event(const char *match, const char *sub,
194 mutex_lock(&event_mutex); 250 mutex_lock(&event_mutex);
195 list_for_each_entry(call, &ftrace_events, list) { 251 list_for_each_entry(call, &ftrace_events, list) {
196 252
197 if (!call->name || !call->class || 253 if (!call->name || !call->class || !call->class->reg)
198 (!call->class->probe && !call->class->reg))
199 continue; 254 continue;
200 255
201 if (match && 256 if (match &&
@@ -321,7 +376,7 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
321 * The ftrace subsystem is for showing formats only. 376 * The ftrace subsystem is for showing formats only.
322 * They can not be enabled or disabled via the event files. 377 * They can not be enabled or disabled via the event files.
323 */ 378 */
324 if (call->class && (call->class->probe || call->class->reg)) 379 if (call->class && call->class->reg)
325 return call; 380 return call;
326 } 381 }
327 382
@@ -474,8 +529,7 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
474 529
475 mutex_lock(&event_mutex); 530 mutex_lock(&event_mutex);
476 list_for_each_entry(call, &ftrace_events, list) { 531 list_for_each_entry(call, &ftrace_events, list) {
477 if (!call->name || !call->class || 532 if (!call->name || !call->class || !call->class->reg)
478 (!call->class->probe && !call->class->reg))
479 continue; 533 continue;
480 534
481 if (system && strcmp(call->class->system, system) != 0) 535 if (system && strcmp(call->class->system, system) != 0)
@@ -544,32 +598,10 @@ out:
544 return ret; 598 return ret;
545} 599}
546 600
547static ssize_t 601static void print_event_fields(struct trace_seq *s, struct list_head *head)
548event_format_read(struct file *filp, char __user *ubuf, size_t cnt,
549 loff_t *ppos)
550{ 602{
551 struct ftrace_event_call *call = filp->private_data;
552 struct ftrace_event_field *field; 603 struct ftrace_event_field *field;
553 struct list_head *head;
554 struct trace_seq *s;
555 int common_field_count = 5;
556 char *buf;
557 int r = 0;
558
559 if (*ppos)
560 return 0;
561
562 s = kmalloc(sizeof(*s), GFP_KERNEL);
563 if (!s)
564 return -ENOMEM;
565
566 trace_seq_init(s);
567
568 trace_seq_printf(s, "name: %s\n", call->name);
569 trace_seq_printf(s, "ID: %d\n", call->event.type);
570 trace_seq_printf(s, "format:\n");
571 604
572 head = trace_get_fields(call);
573 list_for_each_entry_reverse(field, head, link) { 605 list_for_each_entry_reverse(field, head, link) {
574 /* 606 /*
575 * Smartly shows the array type(except dynamic array). 607 * Smartly shows the array type(except dynamic array).
@@ -584,29 +616,54 @@ event_format_read(struct file *filp, char __user *ubuf, size_t cnt,
584 array_descriptor = NULL; 616 array_descriptor = NULL;
585 617
586 if (!array_descriptor) { 618 if (!array_descriptor) {
587 r = trace_seq_printf(s, "\tfield:%s %s;\toffset:%u;" 619 trace_seq_printf(s, "\tfield:%s %s;\toffset:%u;"
588 "\tsize:%u;\tsigned:%d;\n", 620 "\tsize:%u;\tsigned:%d;\n",
589 field->type, field->name, field->offset, 621 field->type, field->name, field->offset,
590 field->size, !!field->is_signed); 622 field->size, !!field->is_signed);
591 } else { 623 } else {
592 r = trace_seq_printf(s, "\tfield:%.*s %s%s;\toffset:%u;" 624 trace_seq_printf(s, "\tfield:%.*s %s%s;\toffset:%u;"
593 "\tsize:%u;\tsigned:%d;\n", 625 "\tsize:%u;\tsigned:%d;\n",
594 (int)(array_descriptor - field->type), 626 (int)(array_descriptor - field->type),
595 field->type, field->name, 627 field->type, field->name,
596 array_descriptor, field->offset, 628 array_descriptor, field->offset,
597 field->size, !!field->is_signed); 629 field->size, !!field->is_signed);
598 } 630 }
631 }
632}
599 633
600 if (--common_field_count == 0) 634static ssize_t
601 r = trace_seq_printf(s, "\n"); 635event_format_read(struct file *filp, char __user *ubuf, size_t cnt,
636 loff_t *ppos)
637{
638 struct ftrace_event_call *call = filp->private_data;
639 struct list_head *head;
640 struct trace_seq *s;
641 char *buf;
642 int r;
602 643
603 if (!r) 644 if (*ppos)
604 break; 645 return 0;
605 } 646
647 s = kmalloc(sizeof(*s), GFP_KERNEL);
648 if (!s)
649 return -ENOMEM;
650
651 trace_seq_init(s);
652
653 trace_seq_printf(s, "name: %s\n", call->name);
654 trace_seq_printf(s, "ID: %d\n", call->event.type);
655 trace_seq_printf(s, "format:\n");
656
657 /* print common fields */
658 print_event_fields(s, &ftrace_common_fields);
606 659
607 if (r) 660 trace_seq_putc(s, '\n');
608 r = trace_seq_printf(s, "\nprint fmt: %s\n", 661
609 call->print_fmt); 662 /* print event specific fields */
663 head = trace_get_fields(call);
664 print_event_fields(s, head);
665
666 r = trace_seq_printf(s, "\nprint fmt: %s\n", call->print_fmt);
610 667
611 if (!r) { 668 if (!r) {
612 /* 669 /*
@@ -963,35 +1020,31 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
963 return -1; 1020 return -1;
964 } 1021 }
965 1022
966 if (call->class->probe || call->class->reg) 1023 if (call->class->reg)
967 trace_create_file("enable", 0644, call->dir, call, 1024 trace_create_file("enable", 0644, call->dir, call,
968 enable); 1025 enable);
969 1026
970#ifdef CONFIG_PERF_EVENTS 1027#ifdef CONFIG_PERF_EVENTS
971 if (call->event.type && (call->class->perf_probe || call->class->reg)) 1028 if (call->event.type && call->class->reg)
972 trace_create_file("id", 0444, call->dir, call, 1029 trace_create_file("id", 0444, call->dir, call,
973 id); 1030 id);
974#endif 1031#endif
975 1032
976 if (call->class->define_fields) { 1033 /*
977 /* 1034 * Other events may have the same class. Only update
978 * Other events may have the same class. Only update 1035 * the fields if they are not already defined.
979 * the fields if they are not already defined. 1036 */
980 */ 1037 head = trace_get_fields(call);
981 head = trace_get_fields(call); 1038 if (list_empty(head)) {
982 if (list_empty(head)) { 1039 ret = call->class->define_fields(call);
983 ret = trace_define_common_fields(call); 1040 if (ret < 0) {
984 if (!ret) 1041 pr_warning("Could not initialize trace point"
985 ret = call->class->define_fields(call); 1042 " events/%s\n", call->name);
986 if (ret < 0) { 1043 return ret;
987 pr_warning("Could not initialize trace point"
988 " events/%s\n", call->name);
989 return ret;
990 }
991 } 1044 }
992 trace_create_file("filter", 0644, call->dir, call,
993 filter);
994 } 1045 }
1046 trace_create_file("filter", 0644, call->dir, call,
1047 filter);
995 1048
996 trace_create_file("format", 0444, call->dir, call, 1049 trace_create_file("format", 0444, call->dir, call,
997 format); 1050 format);
@@ -999,11 +1052,17 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
999 return 0; 1052 return 0;
1000} 1053}
1001 1054
1002static int __trace_add_event_call(struct ftrace_event_call *call) 1055static int
1056__trace_add_event_call(struct ftrace_event_call *call, struct module *mod,
1057 const struct file_operations *id,
1058 const struct file_operations *enable,
1059 const struct file_operations *filter,
1060 const struct file_operations *format)
1003{ 1061{
1004 struct dentry *d_events; 1062 struct dentry *d_events;
1005 int ret; 1063 int ret;
1006 1064
1065 /* The linker may leave blanks */
1007 if (!call->name) 1066 if (!call->name)
1008 return -EINVAL; 1067 return -EINVAL;
1009 1068
@@ -1011,8 +1070,8 @@ static int __trace_add_event_call(struct ftrace_event_call *call)
1011 ret = call->class->raw_init(call); 1070 ret = call->class->raw_init(call);
1012 if (ret < 0) { 1071 if (ret < 0) {
1013 if (ret != -ENOSYS) 1072 if (ret != -ENOSYS)
1014 pr_warning("Could not initialize trace " 1073 pr_warning("Could not initialize trace events/%s\n",
1015 "events/%s\n", call->name); 1074 call->name);
1016 return ret; 1075 return ret;
1017 } 1076 }
1018 } 1077 }
@@ -1021,11 +1080,10 @@ static int __trace_add_event_call(struct ftrace_event_call *call)
1021 if (!d_events) 1080 if (!d_events)
1022 return -ENOENT; 1081 return -ENOENT;
1023 1082
1024 ret = event_create_dir(call, d_events, &ftrace_event_id_fops, 1083 ret = event_create_dir(call, d_events, id, enable, filter, format);
1025 &ftrace_enable_fops, &ftrace_event_filter_fops,
1026 &ftrace_event_format_fops);
1027 if (!ret) 1084 if (!ret)
1028 list_add(&call->list, &ftrace_events); 1085 list_add(&call->list, &ftrace_events);
1086 call->mod = mod;
1029 1087
1030 return ret; 1088 return ret;
1031} 1089}
@@ -1035,7 +1093,10 @@ int trace_add_event_call(struct ftrace_event_call *call)
1035{ 1093{
1036 int ret; 1094 int ret;
1037 mutex_lock(&event_mutex); 1095 mutex_lock(&event_mutex);
1038 ret = __trace_add_event_call(call); 1096 ret = __trace_add_event_call(call, NULL, &ftrace_event_id_fops,
1097 &ftrace_enable_fops,
1098 &ftrace_event_filter_fops,
1099 &ftrace_event_format_fops);
1039 mutex_unlock(&event_mutex); 1100 mutex_unlock(&event_mutex);
1040 return ret; 1101 return ret;
1041} 1102}
@@ -1152,8 +1213,6 @@ static void trace_module_add_events(struct module *mod)
1152{ 1213{
1153 struct ftrace_module_file_ops *file_ops = NULL; 1214 struct ftrace_module_file_ops *file_ops = NULL;
1154 struct ftrace_event_call *call, *start, *end; 1215 struct ftrace_event_call *call, *start, *end;
1155 struct dentry *d_events;
1156 int ret;
1157 1216
1158 start = mod->trace_events; 1217 start = mod->trace_events;
1159 end = mod->trace_events + mod->num_trace_events; 1218 end = mod->trace_events + mod->num_trace_events;
@@ -1161,38 +1220,14 @@ static void trace_module_add_events(struct module *mod)
1161 if (start == end) 1220 if (start == end)
1162 return; 1221 return;
1163 1222
1164 d_events = event_trace_events_dir(); 1223 file_ops = trace_create_file_ops(mod);
1165 if (!d_events) 1224 if (!file_ops)
1166 return; 1225 return;
1167 1226
1168 for_each_event(call, start, end) { 1227 for_each_event(call, start, end) {
1169 /* The linker may leave blanks */ 1228 __trace_add_event_call(call, mod,
1170 if (!call->name)
1171 continue;
1172 if (call->class->raw_init) {
1173 ret = call->class->raw_init(call);
1174 if (ret < 0) {
1175 if (ret != -ENOSYS)
1176 pr_warning("Could not initialize trace "
1177 "point events/%s\n", call->name);
1178 continue;
1179 }
1180 }
1181 /*
1182 * This module has events, create file ops for this module
1183 * if not already done.
1184 */
1185 if (!file_ops) {
1186 file_ops = trace_create_file_ops(mod);
1187 if (!file_ops)
1188 return;
1189 }
1190 call->mod = mod;
1191 ret = event_create_dir(call, d_events,
1192 &file_ops->id, &file_ops->enable, 1229 &file_ops->id, &file_ops->enable,
1193 &file_ops->filter, &file_ops->format); 1230 &file_ops->filter, &file_ops->format);
1194 if (!ret)
1195 list_add(&call->list, &ftrace_events);
1196 } 1231 }
1197} 1232}
1198 1233
@@ -1319,25 +1354,14 @@ static __init int event_trace_init(void)
1319 trace_create_file("enable", 0644, d_events, 1354 trace_create_file("enable", 0644, d_events,
1320 NULL, &ftrace_system_enable_fops); 1355 NULL, &ftrace_system_enable_fops);
1321 1356
1357 if (trace_define_common_fields())
1358 pr_warning("tracing: Failed to allocate common fields");
1359
1322 for_each_event(call, __start_ftrace_events, __stop_ftrace_events) { 1360 for_each_event(call, __start_ftrace_events, __stop_ftrace_events) {
1323 /* The linker may leave blanks */ 1361 __trace_add_event_call(call, NULL, &ftrace_event_id_fops,
1324 if (!call->name)
1325 continue;
1326 if (call->class->raw_init) {
1327 ret = call->class->raw_init(call);
1328 if (ret < 0) {
1329 if (ret != -ENOSYS)
1330 pr_warning("Could not initialize trace "
1331 "point events/%s\n", call->name);
1332 continue;
1333 }
1334 }
1335 ret = event_create_dir(call, d_events, &ftrace_event_id_fops,
1336 &ftrace_enable_fops, 1362 &ftrace_enable_fops,
1337 &ftrace_event_filter_fops, 1363 &ftrace_event_filter_fops,
1338 &ftrace_event_format_fops); 1364 &ftrace_event_format_fops);
1339 if (!ret)
1340 list_add(&call->list, &ftrace_events);
1341 } 1365 }
1342 1366
1343 while (true) { 1367 while (true) {
@@ -1524,12 +1548,11 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip)
1524 struct ftrace_entry *entry; 1548 struct ftrace_entry *entry;
1525 unsigned long flags; 1549 unsigned long flags;
1526 long disabled; 1550 long disabled;
1527 int resched;
1528 int cpu; 1551 int cpu;
1529 int pc; 1552 int pc;
1530 1553
1531 pc = preempt_count(); 1554 pc = preempt_count();
1532 resched = ftrace_preempt_disable(); 1555 preempt_disable_notrace();
1533 cpu = raw_smp_processor_id(); 1556 cpu = raw_smp_processor_id();
1534 disabled = atomic_inc_return(&per_cpu(ftrace_test_event_disable, cpu)); 1557 disabled = atomic_inc_return(&per_cpu(ftrace_test_event_disable, cpu));
1535 1558
@@ -1551,7 +1574,7 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip)
1551 1574
1552 out: 1575 out:
1553 atomic_dec(&per_cpu(ftrace_test_event_disable, cpu)); 1576 atomic_dec(&per_cpu(ftrace_test_event_disable, cpu));
1554 ftrace_preempt_enable(resched); 1577 preempt_enable_notrace();
1555} 1578}
1556 1579
1557static struct ftrace_ops trace_ops __initdata = 1580static struct ftrace_ops trace_ops __initdata =
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 57bb1bb32999..36d40104b17f 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -497,12 +497,10 @@ void print_subsystem_event_filter(struct event_subsystem *system,
497} 497}
498 498
499static struct ftrace_event_field * 499static struct ftrace_event_field *
500find_event_field(struct ftrace_event_call *call, char *name) 500__find_event_field(struct list_head *head, char *name)
501{ 501{
502 struct ftrace_event_field *field; 502 struct ftrace_event_field *field;
503 struct list_head *head;
504 503
505 head = trace_get_fields(call);
506 list_for_each_entry(field, head, link) { 504 list_for_each_entry(field, head, link) {
507 if (!strcmp(field->name, name)) 505 if (!strcmp(field->name, name))
508 return field; 506 return field;
@@ -511,6 +509,20 @@ find_event_field(struct ftrace_event_call *call, char *name)
511 return NULL; 509 return NULL;
512} 510}
513 511
512static struct ftrace_event_field *
513find_event_field(struct ftrace_event_call *call, char *name)
514{
515 struct ftrace_event_field *field;
516 struct list_head *head;
517
518 field = __find_event_field(&ftrace_common_fields, name);
519 if (field)
520 return field;
521
522 head = trace_get_fields(call);
523 return __find_event_field(head, name);
524}
525
514static void filter_free_pred(struct filter_pred *pred) 526static void filter_free_pred(struct filter_pred *pred)
515{ 527{
516 if (!pred) 528 if (!pred)
@@ -627,9 +639,6 @@ static int init_subsystem_preds(struct event_subsystem *system)
627 int err; 639 int err;
628 640
629 list_for_each_entry(call, &ftrace_events, list) { 641 list_for_each_entry(call, &ftrace_events, list) {
630 if (!call->class || !call->class->define_fields)
631 continue;
632
633 if (strcmp(call->class->system, system->name) != 0) 642 if (strcmp(call->class->system, system->name) != 0)
634 continue; 643 continue;
635 644
@@ -646,9 +655,6 @@ static void filter_free_subsystem_preds(struct event_subsystem *system)
646 struct ftrace_event_call *call; 655 struct ftrace_event_call *call;
647 656
648 list_for_each_entry(call, &ftrace_events, list) { 657 list_for_each_entry(call, &ftrace_events, list) {
649 if (!call->class || !call->class->define_fields)
650 continue;
651
652 if (strcmp(call->class->system, system->name) != 0) 658 if (strcmp(call->class->system, system->name) != 0)
653 continue; 659 continue;
654 660
@@ -1251,9 +1257,6 @@ static int replace_system_preds(struct event_subsystem *system,
1251 list_for_each_entry(call, &ftrace_events, list) { 1257 list_for_each_entry(call, &ftrace_events, list) {
1252 struct event_filter *filter = call->filter; 1258 struct event_filter *filter = call->filter;
1253 1259
1254 if (!call->class || !call->class->define_fields)
1255 continue;
1256
1257 if (strcmp(call->class->system, system->name) != 0) 1260 if (strcmp(call->class->system, system->name) != 0)
1258 continue; 1261 continue;
1259 1262
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index 8536e2a65969..4ba44deaac25 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -125,12 +125,6 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call) \
125 125
126#include "trace_entries.h" 126#include "trace_entries.h"
127 127
128static int ftrace_raw_init_event(struct ftrace_event_call *call)
129{
130 INIT_LIST_HEAD(&call->class->fields);
131 return 0;
132}
133
134#undef __entry 128#undef __entry
135#define __entry REC 129#define __entry REC
136 130
@@ -158,7 +152,7 @@ static int ftrace_raw_init_event(struct ftrace_event_call *call)
158struct ftrace_event_class event_class_ftrace_##call = { \ 152struct ftrace_event_class event_class_ftrace_##call = { \
159 .system = __stringify(TRACE_SYSTEM), \ 153 .system = __stringify(TRACE_SYSTEM), \
160 .define_fields = ftrace_define_fields_##call, \ 154 .define_fields = ftrace_define_fields_##call, \
161 .raw_init = ftrace_raw_init_event, \ 155 .fields = LIST_HEAD_INIT(event_class_ftrace_##call.fields),\
162}; \ 156}; \
163 \ 157 \
164struct ftrace_event_call __used \ 158struct ftrace_event_call __used \
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index b3f3776b0cd6..16aee4d44e8f 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -54,14 +54,14 @@ function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip)
54 struct trace_array_cpu *data; 54 struct trace_array_cpu *data;
55 unsigned long flags; 55 unsigned long flags;
56 long disabled; 56 long disabled;
57 int cpu, resched; 57 int cpu;
58 int pc; 58 int pc;
59 59
60 if (unlikely(!ftrace_function_enabled)) 60 if (unlikely(!ftrace_function_enabled))
61 return; 61 return;
62 62
63 pc = preempt_count(); 63 pc = preempt_count();
64 resched = ftrace_preempt_disable(); 64 preempt_disable_notrace();
65 local_save_flags(flags); 65 local_save_flags(flags);
66 cpu = raw_smp_processor_id(); 66 cpu = raw_smp_processor_id();
67 data = tr->data[cpu]; 67 data = tr->data[cpu];
@@ -71,7 +71,7 @@ function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip)
71 trace_function(tr, ip, parent_ip, flags, pc); 71 trace_function(tr, ip, parent_ip, flags, pc);
72 72
73 atomic_dec(&data->disabled); 73 atomic_dec(&data->disabled);
74 ftrace_preempt_enable(resched); 74 preempt_enable_notrace();
75} 75}
76 76
77static void 77static void
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 79f4bac99a94..6bff23625781 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -641,7 +641,8 @@ trace_print_graph_duration(unsigned long long duration, struct trace_seq *s)
641 641
642 /* Print nsecs (we don't want to exceed 7 numbers) */ 642 /* Print nsecs (we don't want to exceed 7 numbers) */
643 if (len < 7) { 643 if (len < 7) {
644 snprintf(nsecs_str, 8 - len, "%03lu", nsecs_rem); 644 snprintf(nsecs_str, min(sizeof(nsecs_str), 8UL - len), "%03lu",
645 nsecs_rem);
645 ret = trace_seq_printf(s, ".%s", nsecs_str); 646 ret = trace_seq_printf(s, ".%s", nsecs_str);
646 if (!ret) 647 if (!ret)
647 return TRACE_TYPE_PARTIAL_LINE; 648 return TRACE_TYPE_PARTIAL_LINE;
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 6fd486e0cef4..73a6b0601f2e 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -649,6 +649,7 @@ static struct tracer irqsoff_tracer __read_mostly =
649#endif 649#endif
650 .open = irqsoff_trace_open, 650 .open = irqsoff_trace_open,
651 .close = irqsoff_trace_close, 651 .close = irqsoff_trace_close,
652 .use_max_tr = 1,
652}; 653};
653# define register_irqsoff(trace) register_tracer(&trace) 654# define register_irqsoff(trace) register_tracer(&trace)
654#else 655#else
@@ -681,6 +682,7 @@ static struct tracer preemptoff_tracer __read_mostly =
681#endif 682#endif
682 .open = irqsoff_trace_open, 683 .open = irqsoff_trace_open,
683 .close = irqsoff_trace_close, 684 .close = irqsoff_trace_close,
685 .use_max_tr = 1,
684}; 686};
685# define register_preemptoff(trace) register_tracer(&trace) 687# define register_preemptoff(trace) register_tracer(&trace)
686#else 688#else
@@ -715,6 +717,7 @@ static struct tracer preemptirqsoff_tracer __read_mostly =
715#endif 717#endif
716 .open = irqsoff_trace_open, 718 .open = irqsoff_trace_open,
717 .close = irqsoff_trace_close, 719 .close = irqsoff_trace_close,
720 .use_max_tr = 1,
718}; 721};
719 722
720# define register_preemptirqsoff(trace) register_tracer(&trace) 723# define register_preemptirqsoff(trace) register_tracer(&trace)
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index f52b5f50299d..8b27c9849b42 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -30,6 +30,8 @@
30#include <linux/ptrace.h> 30#include <linux/ptrace.h>
31#include <linux/perf_event.h> 31#include <linux/perf_event.h>
32#include <linux/stringify.h> 32#include <linux/stringify.h>
33#include <linux/limits.h>
34#include <linux/uaccess.h>
33#include <asm/bitsperlong.h> 35#include <asm/bitsperlong.h>
34 36
35#include "trace.h" 37#include "trace.h"
@@ -38,6 +40,7 @@
38#define MAX_TRACE_ARGS 128 40#define MAX_TRACE_ARGS 128
39#define MAX_ARGSTR_LEN 63 41#define MAX_ARGSTR_LEN 63
40#define MAX_EVENT_NAME_LEN 64 42#define MAX_EVENT_NAME_LEN 64
43#define MAX_STRING_SIZE PATH_MAX
41#define KPROBE_EVENT_SYSTEM "kprobes" 44#define KPROBE_EVENT_SYSTEM "kprobes"
42 45
43/* Reserved field names */ 46/* Reserved field names */
@@ -58,14 +61,16 @@ const char *reserved_field_names[] = {
58}; 61};
59 62
60/* Printing function type */ 63/* Printing function type */
61typedef int (*print_type_func_t)(struct trace_seq *, const char *, void *); 64typedef int (*print_type_func_t)(struct trace_seq *, const char *, void *,
65 void *);
62#define PRINT_TYPE_FUNC_NAME(type) print_type_##type 66#define PRINT_TYPE_FUNC_NAME(type) print_type_##type
63#define PRINT_TYPE_FMT_NAME(type) print_type_format_##type 67#define PRINT_TYPE_FMT_NAME(type) print_type_format_##type
64 68
65/* Printing in basic type function template */ 69/* Printing in basic type function template */
66#define DEFINE_BASIC_PRINT_TYPE_FUNC(type, fmt, cast) \ 70#define DEFINE_BASIC_PRINT_TYPE_FUNC(type, fmt, cast) \
67static __kprobes int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s, \ 71static __kprobes int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s, \
68 const char *name, void *data)\ 72 const char *name, \
73 void *data, void *ent)\
69{ \ 74{ \
70 return trace_seq_printf(s, " %s=" fmt, name, (cast)*(type *)data);\ 75 return trace_seq_printf(s, " %s=" fmt, name, (cast)*(type *)data);\
71} \ 76} \
@@ -80,6 +85,49 @@ DEFINE_BASIC_PRINT_TYPE_FUNC(s16, "%d", int)
80DEFINE_BASIC_PRINT_TYPE_FUNC(s32, "%ld", long) 85DEFINE_BASIC_PRINT_TYPE_FUNC(s32, "%ld", long)
81DEFINE_BASIC_PRINT_TYPE_FUNC(s64, "%lld", long long) 86DEFINE_BASIC_PRINT_TYPE_FUNC(s64, "%lld", long long)
82 87
88/* data_rloc: data relative location, compatible with u32 */
89#define make_data_rloc(len, roffs) \
90 (((u32)(len) << 16) | ((u32)(roffs) & 0xffff))
91#define get_rloc_len(dl) ((u32)(dl) >> 16)
92#define get_rloc_offs(dl) ((u32)(dl) & 0xffff)
93
94static inline void *get_rloc_data(u32 *dl)
95{
96 return (u8 *)dl + get_rloc_offs(*dl);
97}
98
99/* For data_loc conversion */
100static inline void *get_loc_data(u32 *dl, void *ent)
101{
102 return (u8 *)ent + get_rloc_offs(*dl);
103}
104
105/*
106 * Convert data_rloc to data_loc:
107 * data_rloc stores the offset from data_rloc itself, but data_loc
108 * stores the offset from event entry.
109 */
110#define convert_rloc_to_loc(dl, offs) ((u32)(dl) + (offs))
111
112/* For defining macros, define string/string_size types */
113typedef u32 string;
114typedef u32 string_size;
115
116/* Print type function for string type */
117static __kprobes int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s,
118 const char *name,
119 void *data, void *ent)
120{
121 int len = *(u32 *)data >> 16;
122
123 if (!len)
124 return trace_seq_printf(s, " %s=(fault)", name);
125 else
126 return trace_seq_printf(s, " %s=\"%s\"", name,
127 (const char *)get_loc_data(data, ent));
128}
129static const char PRINT_TYPE_FMT_NAME(string)[] = "\\\"%s\\\"";
130
83/* Data fetch function type */ 131/* Data fetch function type */
84typedef void (*fetch_func_t)(struct pt_regs *, void *, void *); 132typedef void (*fetch_func_t)(struct pt_regs *, void *, void *);
85 133
@@ -94,32 +142,38 @@ static __kprobes void call_fetch(struct fetch_param *fprm,
94 return fprm->fn(regs, fprm->data, dest); 142 return fprm->fn(regs, fprm->data, dest);
95} 143}
96 144
97#define FETCH_FUNC_NAME(kind, type) fetch_##kind##_##type 145#define FETCH_FUNC_NAME(method, type) fetch_##method##_##type
98/* 146/*
99 * Define macro for basic types - we don't need to define s* types, because 147 * Define macro for basic types - we don't need to define s* types, because
100 * we have to care only about bitwidth at recording time. 148 * we have to care only about bitwidth at recording time.
101 */ 149 */
102#define DEFINE_BASIC_FETCH_FUNCS(kind) \ 150#define DEFINE_BASIC_FETCH_FUNCS(method) \
103DEFINE_FETCH_##kind(u8) \ 151DEFINE_FETCH_##method(u8) \
104DEFINE_FETCH_##kind(u16) \ 152DEFINE_FETCH_##method(u16) \
105DEFINE_FETCH_##kind(u32) \ 153DEFINE_FETCH_##method(u32) \
106DEFINE_FETCH_##kind(u64) 154DEFINE_FETCH_##method(u64)
107 155
108#define CHECK_BASIC_FETCH_FUNCS(kind, fn) \ 156#define CHECK_FETCH_FUNCS(method, fn) \
109 ((FETCH_FUNC_NAME(kind, u8) == fn) || \ 157 (((FETCH_FUNC_NAME(method, u8) == fn) || \
110 (FETCH_FUNC_NAME(kind, u16) == fn) || \ 158 (FETCH_FUNC_NAME(method, u16) == fn) || \
111 (FETCH_FUNC_NAME(kind, u32) == fn) || \ 159 (FETCH_FUNC_NAME(method, u32) == fn) || \
112 (FETCH_FUNC_NAME(kind, u64) == fn)) 160 (FETCH_FUNC_NAME(method, u64) == fn) || \
161 (FETCH_FUNC_NAME(method, string) == fn) || \
162 (FETCH_FUNC_NAME(method, string_size) == fn)) \
163 && (fn != NULL))
113 164
114/* Data fetch function templates */ 165/* Data fetch function templates */
115#define DEFINE_FETCH_reg(type) \ 166#define DEFINE_FETCH_reg(type) \
116static __kprobes void FETCH_FUNC_NAME(reg, type)(struct pt_regs *regs, \ 167static __kprobes void FETCH_FUNC_NAME(reg, type)(struct pt_regs *regs, \
117 void *offset, void *dest) \ 168 void *offset, void *dest) \
118{ \ 169{ \
119 *(type *)dest = (type)regs_get_register(regs, \ 170 *(type *)dest = (type)regs_get_register(regs, \
120 (unsigned int)((unsigned long)offset)); \ 171 (unsigned int)((unsigned long)offset)); \
121} 172}
122DEFINE_BASIC_FETCH_FUNCS(reg) 173DEFINE_BASIC_FETCH_FUNCS(reg)
174/* No string on the register */
175#define fetch_reg_string NULL
176#define fetch_reg_string_size NULL
123 177
124#define DEFINE_FETCH_stack(type) \ 178#define DEFINE_FETCH_stack(type) \
125static __kprobes void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs,\ 179static __kprobes void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs,\
@@ -129,6 +183,9 @@ static __kprobes void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs,\
129 (unsigned int)((unsigned long)offset)); \ 183 (unsigned int)((unsigned long)offset)); \
130} 184}
131DEFINE_BASIC_FETCH_FUNCS(stack) 185DEFINE_BASIC_FETCH_FUNCS(stack)
186/* No string on the stack entry */
187#define fetch_stack_string NULL
188#define fetch_stack_string_size NULL
132 189
133#define DEFINE_FETCH_retval(type) \ 190#define DEFINE_FETCH_retval(type) \
134static __kprobes void FETCH_FUNC_NAME(retval, type)(struct pt_regs *regs,\ 191static __kprobes void FETCH_FUNC_NAME(retval, type)(struct pt_regs *regs,\
@@ -137,6 +194,9 @@ static __kprobes void FETCH_FUNC_NAME(retval, type)(struct pt_regs *regs,\
137 *(type *)dest = (type)regs_return_value(regs); \ 194 *(type *)dest = (type)regs_return_value(regs); \
138} 195}
139DEFINE_BASIC_FETCH_FUNCS(retval) 196DEFINE_BASIC_FETCH_FUNCS(retval)
197/* No string on the retval */
198#define fetch_retval_string NULL
199#define fetch_retval_string_size NULL
140 200
141#define DEFINE_FETCH_memory(type) \ 201#define DEFINE_FETCH_memory(type) \
142static __kprobes void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs,\ 202static __kprobes void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs,\
@@ -149,6 +209,62 @@ static __kprobes void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs,\
149 *(type *)dest = retval; \ 209 *(type *)dest = retval; \
150} 210}
151DEFINE_BASIC_FETCH_FUNCS(memory) 211DEFINE_BASIC_FETCH_FUNCS(memory)
212/*
213 * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max
214 * length and relative data location.
215 */
216static __kprobes void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs,
217 void *addr, void *dest)
218{
219 long ret;
220 int maxlen = get_rloc_len(*(u32 *)dest);
221 u8 *dst = get_rloc_data(dest);
222 u8 *src = addr;
223 mm_segment_t old_fs = get_fs();
224 if (!maxlen)
225 return;
226 /*
227 * Try to get string again, since the string can be changed while
228 * probing.
229 */
230 set_fs(KERNEL_DS);
231 pagefault_disable();
232 do
233 ret = __copy_from_user_inatomic(dst++, src++, 1);
234 while (dst[-1] && ret == 0 && src - (u8 *)addr < maxlen);
235 dst[-1] = '\0';
236 pagefault_enable();
237 set_fs(old_fs);
238
239 if (ret < 0) { /* Failed to fetch string */
240 ((u8 *)get_rloc_data(dest))[0] = '\0';
241 *(u32 *)dest = make_data_rloc(0, get_rloc_offs(*(u32 *)dest));
242 } else
243 *(u32 *)dest = make_data_rloc(src - (u8 *)addr,
244 get_rloc_offs(*(u32 *)dest));
245}
246/* Return the length of string -- including null terminal byte */
247static __kprobes void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs,
248 void *addr, void *dest)
249{
250 int ret, len = 0;
251 u8 c;
252 mm_segment_t old_fs = get_fs();
253
254 set_fs(KERNEL_DS);
255 pagefault_disable();
256 do {
257 ret = __copy_from_user_inatomic(&c, (u8 *)addr + len, 1);
258 len++;
259 } while (c && ret == 0 && len < MAX_STRING_SIZE);
260 pagefault_enable();
261 set_fs(old_fs);
262
263 if (ret < 0) /* Failed to check the length */
264 *(u32 *)dest = 0;
265 else
266 *(u32 *)dest = len;
267}
152 268
153/* Memory fetching by symbol */ 269/* Memory fetching by symbol */
154struct symbol_cache { 270struct symbol_cache {
@@ -203,6 +319,8 @@ static __kprobes void FETCH_FUNC_NAME(symbol, type)(struct pt_regs *regs,\
203 *(type *)dest = 0; \ 319 *(type *)dest = 0; \
204} 320}
205DEFINE_BASIC_FETCH_FUNCS(symbol) 321DEFINE_BASIC_FETCH_FUNCS(symbol)
322DEFINE_FETCH_symbol(string)
323DEFINE_FETCH_symbol(string_size)
206 324
207/* Dereference memory access function */ 325/* Dereference memory access function */
208struct deref_fetch_param { 326struct deref_fetch_param {
@@ -224,12 +342,14 @@ static __kprobes void FETCH_FUNC_NAME(deref, type)(struct pt_regs *regs,\
224 *(type *)dest = 0; \ 342 *(type *)dest = 0; \
225} 343}
226DEFINE_BASIC_FETCH_FUNCS(deref) 344DEFINE_BASIC_FETCH_FUNCS(deref)
345DEFINE_FETCH_deref(string)
346DEFINE_FETCH_deref(string_size)
227 347
228static __kprobes void free_deref_fetch_param(struct deref_fetch_param *data) 348static __kprobes void free_deref_fetch_param(struct deref_fetch_param *data)
229{ 349{
230 if (CHECK_BASIC_FETCH_FUNCS(deref, data->orig.fn)) 350 if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
231 free_deref_fetch_param(data->orig.data); 351 free_deref_fetch_param(data->orig.data);
232 else if (CHECK_BASIC_FETCH_FUNCS(symbol, data->orig.fn)) 352 else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))
233 free_symbol_cache(data->orig.data); 353 free_symbol_cache(data->orig.data);
234 kfree(data); 354 kfree(data);
235} 355}
@@ -240,23 +360,43 @@ static __kprobes void free_deref_fetch_param(struct deref_fetch_param *data)
240#define DEFAULT_FETCH_TYPE _DEFAULT_FETCH_TYPE(BITS_PER_LONG) 360#define DEFAULT_FETCH_TYPE _DEFAULT_FETCH_TYPE(BITS_PER_LONG)
241#define DEFAULT_FETCH_TYPE_STR __stringify(DEFAULT_FETCH_TYPE) 361#define DEFAULT_FETCH_TYPE_STR __stringify(DEFAULT_FETCH_TYPE)
242 362
243#define ASSIGN_FETCH_FUNC(kind, type) \ 363/* Fetch types */
244 .kind = FETCH_FUNC_NAME(kind, type) 364enum {
245 365 FETCH_MTD_reg = 0,
246#define ASSIGN_FETCH_TYPE(ptype, ftype, sign) \ 366 FETCH_MTD_stack,
247 {.name = #ptype, \ 367 FETCH_MTD_retval,
248 .size = sizeof(ftype), \ 368 FETCH_MTD_memory,
249 .is_signed = sign, \ 369 FETCH_MTD_symbol,
250 .print = PRINT_TYPE_FUNC_NAME(ptype), \ 370 FETCH_MTD_deref,
251 .fmt = PRINT_TYPE_FMT_NAME(ptype), \ 371 FETCH_MTD_END,
252ASSIGN_FETCH_FUNC(reg, ftype), \ 372};
253ASSIGN_FETCH_FUNC(stack, ftype), \ 373
254ASSIGN_FETCH_FUNC(retval, ftype), \ 374#define ASSIGN_FETCH_FUNC(method, type) \
255ASSIGN_FETCH_FUNC(memory, ftype), \ 375 [FETCH_MTD_##method] = FETCH_FUNC_NAME(method, type)
256ASSIGN_FETCH_FUNC(symbol, ftype), \ 376
257ASSIGN_FETCH_FUNC(deref, ftype), \ 377#define __ASSIGN_FETCH_TYPE(_name, ptype, ftype, _size, sign, _fmttype) \
378 {.name = _name, \
379 .size = _size, \
380 .is_signed = sign, \
381 .print = PRINT_TYPE_FUNC_NAME(ptype), \
382 .fmt = PRINT_TYPE_FMT_NAME(ptype), \
383 .fmttype = _fmttype, \
384 .fetch = { \
385ASSIGN_FETCH_FUNC(reg, ftype), \
386ASSIGN_FETCH_FUNC(stack, ftype), \
387ASSIGN_FETCH_FUNC(retval, ftype), \
388ASSIGN_FETCH_FUNC(memory, ftype), \
389ASSIGN_FETCH_FUNC(symbol, ftype), \
390ASSIGN_FETCH_FUNC(deref, ftype), \
391 } \
258 } 392 }
259 393
394#define ASSIGN_FETCH_TYPE(ptype, ftype, sign) \
395 __ASSIGN_FETCH_TYPE(#ptype, ptype, ftype, sizeof(ftype), sign, #ptype)
396
397#define FETCH_TYPE_STRING 0
398#define FETCH_TYPE_STRSIZE 1
399
260/* Fetch type information table */ 400/* Fetch type information table */
261static const struct fetch_type { 401static const struct fetch_type {
262 const char *name; /* Name of type */ 402 const char *name; /* Name of type */
@@ -264,14 +404,16 @@ static const struct fetch_type {
264 int is_signed; /* Signed flag */ 404 int is_signed; /* Signed flag */
265 print_type_func_t print; /* Print functions */ 405 print_type_func_t print; /* Print functions */
266 const char *fmt; /* Fromat string */ 406 const char *fmt; /* Fromat string */
407 const char *fmttype; /* Name in format file */
267 /* Fetch functions */ 408 /* Fetch functions */
268 fetch_func_t reg; 409 fetch_func_t fetch[FETCH_MTD_END];
269 fetch_func_t stack;
270 fetch_func_t retval;
271 fetch_func_t memory;
272 fetch_func_t symbol;
273 fetch_func_t deref;
274} fetch_type_table[] = { 410} fetch_type_table[] = {
411 /* Special types */
412 [FETCH_TYPE_STRING] = __ASSIGN_FETCH_TYPE("string", string, string,
413 sizeof(u32), 1, "__data_loc char[]"),
414 [FETCH_TYPE_STRSIZE] = __ASSIGN_FETCH_TYPE("string_size", u32,
415 string_size, sizeof(u32), 0, "u32"),
416 /* Basic types */
275 ASSIGN_FETCH_TYPE(u8, u8, 0), 417 ASSIGN_FETCH_TYPE(u8, u8, 0),
276 ASSIGN_FETCH_TYPE(u16, u16, 0), 418 ASSIGN_FETCH_TYPE(u16, u16, 0),
277 ASSIGN_FETCH_TYPE(u32, u32, 0), 419 ASSIGN_FETCH_TYPE(u32, u32, 0),
@@ -302,12 +444,28 @@ static __kprobes void fetch_stack_address(struct pt_regs *regs,
302 *(unsigned long *)dest = kernel_stack_pointer(regs); 444 *(unsigned long *)dest = kernel_stack_pointer(regs);
303} 445}
304 446
447static fetch_func_t get_fetch_size_function(const struct fetch_type *type,
448 fetch_func_t orig_fn)
449{
450 int i;
451
452 if (type != &fetch_type_table[FETCH_TYPE_STRING])
453 return NULL; /* Only string type needs size function */
454 for (i = 0; i < FETCH_MTD_END; i++)
455 if (type->fetch[i] == orig_fn)
456 return fetch_type_table[FETCH_TYPE_STRSIZE].fetch[i];
457
458 WARN_ON(1); /* This should not happen */
459 return NULL;
460}
461
305/** 462/**
306 * Kprobe event core functions 463 * Kprobe event core functions
307 */ 464 */
308 465
309struct probe_arg { 466struct probe_arg {
310 struct fetch_param fetch; 467 struct fetch_param fetch;
468 struct fetch_param fetch_size;
311 unsigned int offset; /* Offset from argument entry */ 469 unsigned int offset; /* Offset from argument entry */
312 const char *name; /* Name of this argument */ 470 const char *name; /* Name of this argument */
313 const char *comm; /* Command of this argument */ 471 const char *comm; /* Command of this argument */
@@ -429,9 +587,9 @@ error:
429 587
430static void free_probe_arg(struct probe_arg *arg) 588static void free_probe_arg(struct probe_arg *arg)
431{ 589{
432 if (CHECK_BASIC_FETCH_FUNCS(deref, arg->fetch.fn)) 590 if (CHECK_FETCH_FUNCS(deref, arg->fetch.fn))
433 free_deref_fetch_param(arg->fetch.data); 591 free_deref_fetch_param(arg->fetch.data);
434 else if (CHECK_BASIC_FETCH_FUNCS(symbol, arg->fetch.fn)) 592 else if (CHECK_FETCH_FUNCS(symbol, arg->fetch.fn))
435 free_symbol_cache(arg->fetch.data); 593 free_symbol_cache(arg->fetch.data);
436 kfree(arg->name); 594 kfree(arg->name);
437 kfree(arg->comm); 595 kfree(arg->comm);
@@ -548,7 +706,7 @@ static int parse_probe_vars(char *arg, const struct fetch_type *t,
548 706
549 if (strcmp(arg, "retval") == 0) { 707 if (strcmp(arg, "retval") == 0) {
550 if (is_return) 708 if (is_return)
551 f->fn = t->retval; 709 f->fn = t->fetch[FETCH_MTD_retval];
552 else 710 else
553 ret = -EINVAL; 711 ret = -EINVAL;
554 } else if (strncmp(arg, "stack", 5) == 0) { 712 } else if (strncmp(arg, "stack", 5) == 0) {
@@ -562,7 +720,7 @@ static int parse_probe_vars(char *arg, const struct fetch_type *t,
562 if (ret || param > PARAM_MAX_STACK) 720 if (ret || param > PARAM_MAX_STACK)
563 ret = -EINVAL; 721 ret = -EINVAL;
564 else { 722 else {
565 f->fn = t->stack; 723 f->fn = t->fetch[FETCH_MTD_stack];
566 f->data = (void *)param; 724 f->data = (void *)param;
567 } 725 }
568 } else 726 } else
@@ -588,7 +746,7 @@ static int __parse_probe_arg(char *arg, const struct fetch_type *t,
588 case '%': /* named register */ 746 case '%': /* named register */
589 ret = regs_query_register_offset(arg + 1); 747 ret = regs_query_register_offset(arg + 1);
590 if (ret >= 0) { 748 if (ret >= 0) {
591 f->fn = t->reg; 749 f->fn = t->fetch[FETCH_MTD_reg];
592 f->data = (void *)(unsigned long)ret; 750 f->data = (void *)(unsigned long)ret;
593 ret = 0; 751 ret = 0;
594 } 752 }
@@ -598,7 +756,7 @@ static int __parse_probe_arg(char *arg, const struct fetch_type *t,
598 ret = strict_strtoul(arg + 1, 0, &param); 756 ret = strict_strtoul(arg + 1, 0, &param);
599 if (ret) 757 if (ret)
600 break; 758 break;
601 f->fn = t->memory; 759 f->fn = t->fetch[FETCH_MTD_memory];
602 f->data = (void *)param; 760 f->data = (void *)param;
603 } else { 761 } else {
604 ret = split_symbol_offset(arg + 1, &offset); 762 ret = split_symbol_offset(arg + 1, &offset);
@@ -606,7 +764,7 @@ static int __parse_probe_arg(char *arg, const struct fetch_type *t,
606 break; 764 break;
607 f->data = alloc_symbol_cache(arg + 1, offset); 765 f->data = alloc_symbol_cache(arg + 1, offset);
608 if (f->data) 766 if (f->data)
609 f->fn = t->symbol; 767 f->fn = t->fetch[FETCH_MTD_symbol];
610 } 768 }
611 break; 769 break;
612 case '+': /* deref memory */ 770 case '+': /* deref memory */
@@ -636,14 +794,17 @@ static int __parse_probe_arg(char *arg, const struct fetch_type *t,
636 if (ret) 794 if (ret)
637 kfree(dprm); 795 kfree(dprm);
638 else { 796 else {
639 f->fn = t->deref; 797 f->fn = t->fetch[FETCH_MTD_deref];
640 f->data = (void *)dprm; 798 f->data = (void *)dprm;
641 } 799 }
642 } 800 }
643 break; 801 break;
644 } 802 }
645 if (!ret && !f->fn) 803 if (!ret && !f->fn) { /* Parsed, but do not find fetch method */
804 pr_info("%s type has no corresponding fetch method.\n",
805 t->name);
646 ret = -EINVAL; 806 ret = -EINVAL;
807 }
647 return ret; 808 return ret;
648} 809}
649 810
@@ -652,6 +813,7 @@ static int parse_probe_arg(char *arg, struct trace_probe *tp,
652 struct probe_arg *parg, int is_return) 813 struct probe_arg *parg, int is_return)
653{ 814{
654 const char *t; 815 const char *t;
816 int ret;
655 817
656 if (strlen(arg) > MAX_ARGSTR_LEN) { 818 if (strlen(arg) > MAX_ARGSTR_LEN) {
657 pr_info("Argument is too long.: %s\n", arg); 819 pr_info("Argument is too long.: %s\n", arg);
@@ -674,7 +836,13 @@ static int parse_probe_arg(char *arg, struct trace_probe *tp,
674 } 836 }
675 parg->offset = tp->size; 837 parg->offset = tp->size;
676 tp->size += parg->type->size; 838 tp->size += parg->type->size;
677 return __parse_probe_arg(arg, parg->type, &parg->fetch, is_return); 839 ret = __parse_probe_arg(arg, parg->type, &parg->fetch, is_return);
840 if (ret >= 0) {
841 parg->fetch_size.fn = get_fetch_size_function(parg->type,
842 parg->fetch.fn);
843 parg->fetch_size.data = parg->fetch.data;
844 }
845 return ret;
678} 846}
679 847
680/* Return 1 if name is reserved or already used by another argument */ 848/* Return 1 if name is reserved or already used by another argument */
@@ -757,14 +925,17 @@ static int create_trace_probe(int argc, char **argv)
757 pr_info("Delete command needs an event name.\n"); 925 pr_info("Delete command needs an event name.\n");
758 return -EINVAL; 926 return -EINVAL;
759 } 927 }
928 mutex_lock(&probe_lock);
760 tp = find_probe_event(event, group); 929 tp = find_probe_event(event, group);
761 if (!tp) { 930 if (!tp) {
931 mutex_unlock(&probe_lock);
762 pr_info("Event %s/%s doesn't exist.\n", group, event); 932 pr_info("Event %s/%s doesn't exist.\n", group, event);
763 return -ENOENT; 933 return -ENOENT;
764 } 934 }
765 /* delete an event */ 935 /* delete an event */
766 unregister_trace_probe(tp); 936 unregister_trace_probe(tp);
767 free_trace_probe(tp); 937 free_trace_probe(tp);
938 mutex_unlock(&probe_lock);
768 return 0; 939 return 0;
769 } 940 }
770 941
@@ -1043,6 +1214,54 @@ static const struct file_operations kprobe_profile_ops = {
1043 .release = seq_release, 1214 .release = seq_release,
1044}; 1215};
1045 1216
1217/* Sum up total data length for dynamic arraies (strings) */
1218static __kprobes int __get_data_size(struct trace_probe *tp,
1219 struct pt_regs *regs)
1220{
1221 int i, ret = 0;
1222 u32 len;
1223
1224 for (i = 0; i < tp->nr_args; i++)
1225 if (unlikely(tp->args[i].fetch_size.fn)) {
1226 call_fetch(&tp->args[i].fetch_size, regs, &len);
1227 ret += len;
1228 }
1229
1230 return ret;
1231}
1232
1233/* Store the value of each argument */
1234static __kprobes void store_trace_args(int ent_size, struct trace_probe *tp,
1235 struct pt_regs *regs,
1236 u8 *data, int maxlen)
1237{
1238 int i;
1239 u32 end = tp->size;
1240 u32 *dl; /* Data (relative) location */
1241
1242 for (i = 0; i < tp->nr_args; i++) {
1243 if (unlikely(tp->args[i].fetch_size.fn)) {
1244 /*
1245 * First, we set the relative location and
1246 * maximum data length to *dl
1247 */
1248 dl = (u32 *)(data + tp->args[i].offset);
1249 *dl = make_data_rloc(maxlen, end - tp->args[i].offset);
1250 /* Then try to fetch string or dynamic array data */
1251 call_fetch(&tp->args[i].fetch, regs, dl);
1252 /* Reduce maximum length */
1253 end += get_rloc_len(*dl);
1254 maxlen -= get_rloc_len(*dl);
1255 /* Trick here, convert data_rloc to data_loc */
1256 *dl = convert_rloc_to_loc(*dl,
1257 ent_size + tp->args[i].offset);
1258 } else
1259 /* Just fetching data normally */
1260 call_fetch(&tp->args[i].fetch, regs,
1261 data + tp->args[i].offset);
1262 }
1263}
1264
1046/* Kprobe handler */ 1265/* Kprobe handler */
1047static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs) 1266static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
1048{ 1267{
@@ -1050,8 +1269,7 @@ static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
1050 struct kprobe_trace_entry_head *entry; 1269 struct kprobe_trace_entry_head *entry;
1051 struct ring_buffer_event *event; 1270 struct ring_buffer_event *event;
1052 struct ring_buffer *buffer; 1271 struct ring_buffer *buffer;
1053 u8 *data; 1272 int size, dsize, pc;
1054 int size, i, pc;
1055 unsigned long irq_flags; 1273 unsigned long irq_flags;
1056 struct ftrace_event_call *call = &tp->call; 1274 struct ftrace_event_call *call = &tp->call;
1057 1275
@@ -1060,7 +1278,8 @@ static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
1060 local_save_flags(irq_flags); 1278 local_save_flags(irq_flags);
1061 pc = preempt_count(); 1279 pc = preempt_count();
1062 1280
1063 size = sizeof(*entry) + tp->size; 1281 dsize = __get_data_size(tp, regs);
1282 size = sizeof(*entry) + tp->size + dsize;
1064 1283
1065 event = trace_current_buffer_lock_reserve(&buffer, call->event.type, 1284 event = trace_current_buffer_lock_reserve(&buffer, call->event.type,
1066 size, irq_flags, pc); 1285 size, irq_flags, pc);
@@ -1069,9 +1288,7 @@ static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
1069 1288
1070 entry = ring_buffer_event_data(event); 1289 entry = ring_buffer_event_data(event);
1071 entry->ip = (unsigned long)kp->addr; 1290 entry->ip = (unsigned long)kp->addr;
1072 data = (u8 *)&entry[1]; 1291 store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
1073 for (i = 0; i < tp->nr_args; i++)
1074 call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset);
1075 1292
1076 if (!filter_current_check_discard(buffer, call, entry, event)) 1293 if (!filter_current_check_discard(buffer, call, entry, event))
1077 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc); 1294 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
@@ -1085,15 +1302,15 @@ static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri,
1085 struct kretprobe_trace_entry_head *entry; 1302 struct kretprobe_trace_entry_head *entry;
1086 struct ring_buffer_event *event; 1303 struct ring_buffer_event *event;
1087 struct ring_buffer *buffer; 1304 struct ring_buffer *buffer;
1088 u8 *data; 1305 int size, pc, dsize;
1089 int size, i, pc;
1090 unsigned long irq_flags; 1306 unsigned long irq_flags;
1091 struct ftrace_event_call *call = &tp->call; 1307 struct ftrace_event_call *call = &tp->call;
1092 1308
1093 local_save_flags(irq_flags); 1309 local_save_flags(irq_flags);
1094 pc = preempt_count(); 1310 pc = preempt_count();
1095 1311
1096 size = sizeof(*entry) + tp->size; 1312 dsize = __get_data_size(tp, regs);
1313 size = sizeof(*entry) + tp->size + dsize;
1097 1314
1098 event = trace_current_buffer_lock_reserve(&buffer, call->event.type, 1315 event = trace_current_buffer_lock_reserve(&buffer, call->event.type,
1099 size, irq_flags, pc); 1316 size, irq_flags, pc);
@@ -1103,9 +1320,7 @@ static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri,
1103 entry = ring_buffer_event_data(event); 1320 entry = ring_buffer_event_data(event);
1104 entry->func = (unsigned long)tp->rp.kp.addr; 1321 entry->func = (unsigned long)tp->rp.kp.addr;
1105 entry->ret_ip = (unsigned long)ri->ret_addr; 1322 entry->ret_ip = (unsigned long)ri->ret_addr;
1106 data = (u8 *)&entry[1]; 1323 store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
1107 for (i = 0; i < tp->nr_args; i++)
1108 call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset);
1109 1324
1110 if (!filter_current_check_discard(buffer, call, entry, event)) 1325 if (!filter_current_check_discard(buffer, call, entry, event))
1111 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc); 1326 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
@@ -1137,7 +1352,7 @@ print_kprobe_event(struct trace_iterator *iter, int flags,
1137 data = (u8 *)&field[1]; 1352 data = (u8 *)&field[1];
1138 for (i = 0; i < tp->nr_args; i++) 1353 for (i = 0; i < tp->nr_args; i++)
1139 if (!tp->args[i].type->print(s, tp->args[i].name, 1354 if (!tp->args[i].type->print(s, tp->args[i].name,
1140 data + tp->args[i].offset)) 1355 data + tp->args[i].offset, field))
1141 goto partial; 1356 goto partial;
1142 1357
1143 if (!trace_seq_puts(s, "\n")) 1358 if (!trace_seq_puts(s, "\n"))
@@ -1179,7 +1394,7 @@ print_kretprobe_event(struct trace_iterator *iter, int flags,
1179 data = (u8 *)&field[1]; 1394 data = (u8 *)&field[1];
1180 for (i = 0; i < tp->nr_args; i++) 1395 for (i = 0; i < tp->nr_args; i++)
1181 if (!tp->args[i].type->print(s, tp->args[i].name, 1396 if (!tp->args[i].type->print(s, tp->args[i].name,
1182 data + tp->args[i].offset)) 1397 data + tp->args[i].offset, field))
1183 goto partial; 1398 goto partial;
1184 1399
1185 if (!trace_seq_puts(s, "\n")) 1400 if (!trace_seq_puts(s, "\n"))
@@ -1214,11 +1429,6 @@ static void probe_event_disable(struct ftrace_event_call *call)
1214 } 1429 }
1215} 1430}
1216 1431
1217static int probe_event_raw_init(struct ftrace_event_call *event_call)
1218{
1219 return 0;
1220}
1221
1222#undef DEFINE_FIELD 1432#undef DEFINE_FIELD
1223#define DEFINE_FIELD(type, item, name, is_signed) \ 1433#define DEFINE_FIELD(type, item, name, is_signed) \
1224 do { \ 1434 do { \
@@ -1239,7 +1449,7 @@ static int kprobe_event_define_fields(struct ftrace_event_call *event_call)
1239 DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0); 1449 DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0);
1240 /* Set argument names as fields */ 1450 /* Set argument names as fields */
1241 for (i = 0; i < tp->nr_args; i++) { 1451 for (i = 0; i < tp->nr_args; i++) {
1242 ret = trace_define_field(event_call, tp->args[i].type->name, 1452 ret = trace_define_field(event_call, tp->args[i].type->fmttype,
1243 tp->args[i].name, 1453 tp->args[i].name,
1244 sizeof(field) + tp->args[i].offset, 1454 sizeof(field) + tp->args[i].offset,
1245 tp->args[i].type->size, 1455 tp->args[i].type->size,
@@ -1261,7 +1471,7 @@ static int kretprobe_event_define_fields(struct ftrace_event_call *event_call)
1261 DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0); 1471 DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0);
1262 /* Set argument names as fields */ 1472 /* Set argument names as fields */
1263 for (i = 0; i < tp->nr_args; i++) { 1473 for (i = 0; i < tp->nr_args; i++) {
1264 ret = trace_define_field(event_call, tp->args[i].type->name, 1474 ret = trace_define_field(event_call, tp->args[i].type->fmttype,
1265 tp->args[i].name, 1475 tp->args[i].name,
1266 sizeof(field) + tp->args[i].offset, 1476 sizeof(field) + tp->args[i].offset,
1267 tp->args[i].type->size, 1477 tp->args[i].type->size,
@@ -1301,8 +1511,13 @@ static int __set_print_fmt(struct trace_probe *tp, char *buf, int len)
1301 pos += snprintf(buf + pos, LEN_OR_ZERO, "\", %s", arg); 1511 pos += snprintf(buf + pos, LEN_OR_ZERO, "\", %s", arg);
1302 1512
1303 for (i = 0; i < tp->nr_args; i++) { 1513 for (i = 0; i < tp->nr_args; i++) {
1304 pos += snprintf(buf + pos, LEN_OR_ZERO, ", REC->%s", 1514 if (strcmp(tp->args[i].type->name, "string") == 0)
1305 tp->args[i].name); 1515 pos += snprintf(buf + pos, LEN_OR_ZERO,
1516 ", __get_str(%s)",
1517 tp->args[i].name);
1518 else
1519 pos += snprintf(buf + pos, LEN_OR_ZERO, ", REC->%s",
1520 tp->args[i].name);
1306 } 1521 }
1307 1522
1308#undef LEN_OR_ZERO 1523#undef LEN_OR_ZERO
@@ -1339,11 +1554,11 @@ static __kprobes void kprobe_perf_func(struct kprobe *kp,
1339 struct ftrace_event_call *call = &tp->call; 1554 struct ftrace_event_call *call = &tp->call;
1340 struct kprobe_trace_entry_head *entry; 1555 struct kprobe_trace_entry_head *entry;
1341 struct hlist_head *head; 1556 struct hlist_head *head;
1342 u8 *data; 1557 int size, __size, dsize;
1343 int size, __size, i;
1344 int rctx; 1558 int rctx;
1345 1559
1346 __size = sizeof(*entry) + tp->size; 1560 dsize = __get_data_size(tp, regs);
1561 __size = sizeof(*entry) + tp->size + dsize;
1347 size = ALIGN(__size + sizeof(u32), sizeof(u64)); 1562 size = ALIGN(__size + sizeof(u32), sizeof(u64));
1348 size -= sizeof(u32); 1563 size -= sizeof(u32);
1349 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, 1564 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
@@ -1355,9 +1570,8 @@ static __kprobes void kprobe_perf_func(struct kprobe *kp,
1355 return; 1570 return;
1356 1571
1357 entry->ip = (unsigned long)kp->addr; 1572 entry->ip = (unsigned long)kp->addr;
1358 data = (u8 *)&entry[1]; 1573 memset(&entry[1], 0, dsize);
1359 for (i = 0; i < tp->nr_args; i++) 1574 store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
1360 call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset);
1361 1575
1362 head = this_cpu_ptr(call->perf_events); 1576 head = this_cpu_ptr(call->perf_events);
1363 perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head); 1577 perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head);
@@ -1371,11 +1585,11 @@ static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri,
1371 struct ftrace_event_call *call = &tp->call; 1585 struct ftrace_event_call *call = &tp->call;
1372 struct kretprobe_trace_entry_head *entry; 1586 struct kretprobe_trace_entry_head *entry;
1373 struct hlist_head *head; 1587 struct hlist_head *head;
1374 u8 *data; 1588 int size, __size, dsize;
1375 int size, __size, i;
1376 int rctx; 1589 int rctx;
1377 1590
1378 __size = sizeof(*entry) + tp->size; 1591 dsize = __get_data_size(tp, regs);
1592 __size = sizeof(*entry) + tp->size + dsize;
1379 size = ALIGN(__size + sizeof(u32), sizeof(u64)); 1593 size = ALIGN(__size + sizeof(u32), sizeof(u64));
1380 size -= sizeof(u32); 1594 size -= sizeof(u32);
1381 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, 1595 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
@@ -1388,9 +1602,7 @@ static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri,
1388 1602
1389 entry->func = (unsigned long)tp->rp.kp.addr; 1603 entry->func = (unsigned long)tp->rp.kp.addr;
1390 entry->ret_ip = (unsigned long)ri->ret_addr; 1604 entry->ret_ip = (unsigned long)ri->ret_addr;
1391 data = (u8 *)&entry[1]; 1605 store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
1392 for (i = 0; i < tp->nr_args; i++)
1393 call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset);
1394 1606
1395 head = this_cpu_ptr(call->perf_events); 1607 head = this_cpu_ptr(call->perf_events);
1396 perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, regs, head); 1608 perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, regs, head);
@@ -1486,15 +1698,12 @@ static int register_probe_event(struct trace_probe *tp)
1486 int ret; 1698 int ret;
1487 1699
1488 /* Initialize ftrace_event_call */ 1700 /* Initialize ftrace_event_call */
1701 INIT_LIST_HEAD(&call->class->fields);
1489 if (probe_is_return(tp)) { 1702 if (probe_is_return(tp)) {
1490 INIT_LIST_HEAD(&call->class->fields);
1491 call->event.funcs = &kretprobe_funcs; 1703 call->event.funcs = &kretprobe_funcs;
1492 call->class->raw_init = probe_event_raw_init;
1493 call->class->define_fields = kretprobe_event_define_fields; 1704 call->class->define_fields = kretprobe_event_define_fields;
1494 } else { 1705 } else {
1495 INIT_LIST_HEAD(&call->class->fields);
1496 call->event.funcs = &kprobe_funcs; 1706 call->event.funcs = &kprobe_funcs;
1497 call->class->raw_init = probe_event_raw_init;
1498 call->class->define_fields = kprobe_event_define_fields; 1707 call->class->define_fields = kprobe_event_define_fields;
1499 } 1708 }
1500 if (set_print_fmt(tp) < 0) 1709 if (set_print_fmt(tp) < 0)
diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c
deleted file mode 100644
index 8eaf00749b65..000000000000
--- a/kernel/trace/trace_ksym.c
+++ /dev/null
@@ -1,508 +0,0 @@
1/*
2 * trace_ksym.c - Kernel Symbol Tracer
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * Copyright (C) IBM Corporation, 2009
19 */
20
21#include <linux/kallsyms.h>
22#include <linux/uaccess.h>
23#include <linux/debugfs.h>
24#include <linux/ftrace.h>
25#include <linux/module.h>
26#include <linux/slab.h>
27#include <linux/fs.h>
28
29#include "trace_output.h"
30#include "trace.h"
31
32#include <linux/hw_breakpoint.h>
33#include <asm/hw_breakpoint.h>
34
35#include <asm/atomic.h>
36
37#define KSYM_TRACER_OP_LEN 3 /* rw- */
38
39struct trace_ksym {
40 struct perf_event **ksym_hbp;
41 struct perf_event_attr attr;
42#ifdef CONFIG_PROFILE_KSYM_TRACER
43 atomic64_t counter;
44#endif
45 struct hlist_node ksym_hlist;
46};
47
48static struct trace_array *ksym_trace_array;
49
50static unsigned int ksym_tracing_enabled;
51
52static HLIST_HEAD(ksym_filter_head);
53
54static DEFINE_MUTEX(ksym_tracer_mutex);
55
56#ifdef CONFIG_PROFILE_KSYM_TRACER
57
58#define MAX_UL_INT 0xffffffff
59
60void ksym_collect_stats(unsigned long hbp_hit_addr)
61{
62 struct hlist_node *node;
63 struct trace_ksym *entry;
64
65 rcu_read_lock();
66 hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) {
67 if (entry->attr.bp_addr == hbp_hit_addr) {
68 atomic64_inc(&entry->counter);
69 break;
70 }
71 }
72 rcu_read_unlock();
73}
74#endif /* CONFIG_PROFILE_KSYM_TRACER */
75
76void ksym_hbp_handler(struct perf_event *hbp, int nmi,
77 struct perf_sample_data *data,
78 struct pt_regs *regs)
79{
80 struct ring_buffer_event *event;
81 struct ksym_trace_entry *entry;
82 struct ring_buffer *buffer;
83 int pc;
84
85 if (!ksym_tracing_enabled)
86 return;
87
88 buffer = ksym_trace_array->buffer;
89
90 pc = preempt_count();
91
92 event = trace_buffer_lock_reserve(buffer, TRACE_KSYM,
93 sizeof(*entry), 0, pc);
94 if (!event)
95 return;
96
97 entry = ring_buffer_event_data(event);
98 entry->ip = instruction_pointer(regs);
99 entry->type = hw_breakpoint_type(hbp);
100 entry->addr = hw_breakpoint_addr(hbp);
101 strlcpy(entry->cmd, current->comm, TASK_COMM_LEN);
102
103#ifdef CONFIG_PROFILE_KSYM_TRACER
104 ksym_collect_stats(hw_breakpoint_addr(hbp));
105#endif /* CONFIG_PROFILE_KSYM_TRACER */
106
107 trace_buffer_unlock_commit(buffer, event, 0, pc);
108}
109
110/* Valid access types are represented as
111 *
112 * rw- : Set Read/Write Access Breakpoint
113 * -w- : Set Write Access Breakpoint
114 * --- : Clear Breakpoints
115 * --x : Set Execution Break points (Not available yet)
116 *
117 */
118static int ksym_trace_get_access_type(char *str)
119{
120 int access = 0;
121
122 if (str[0] == 'r')
123 access |= HW_BREAKPOINT_R;
124
125 if (str[1] == 'w')
126 access |= HW_BREAKPOINT_W;
127
128 if (str[2] == 'x')
129 access |= HW_BREAKPOINT_X;
130
131 switch (access) {
132 case HW_BREAKPOINT_R:
133 case HW_BREAKPOINT_W:
134 case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
135 return access;
136 default:
137 return -EINVAL;
138 }
139}
140
141/*
142 * There can be several possible malformed requests and we attempt to capture
143 * all of them. We enumerate some of the rules
144 * 1. We will not allow kernel symbols with ':' since it is used as a delimiter.
145 * i.e. multiple ':' symbols disallowed. Possible uses are of the form
146 * <module>:<ksym_name>:<op>.
147 * 2. No delimiter symbol ':' in the input string
148 * 3. Spurious operator symbols or symbols not in their respective positions
149 * 4. <ksym_name>:--- i.e. clear breakpoint request when ksym_name not in file
150 * 5. Kernel symbol not a part of /proc/kallsyms
151 * 6. Duplicate requests
152 */
153static int parse_ksym_trace_str(char *input_string, char **ksymname,
154 unsigned long *addr)
155{
156 int ret;
157
158 *ksymname = strsep(&input_string, ":");
159 *addr = kallsyms_lookup_name(*ksymname);
160
161 /* Check for malformed request: (2), (1) and (5) */
162 if ((!input_string) ||
163 (strlen(input_string) != KSYM_TRACER_OP_LEN) ||
164 (*addr == 0))
165 return -EINVAL;;
166
167 ret = ksym_trace_get_access_type(input_string);
168
169 return ret;
170}
171
172int process_new_ksym_entry(char *ksymname, int op, unsigned long addr)
173{
174 struct trace_ksym *entry;
175 int ret = -ENOMEM;
176
177 entry = kzalloc(sizeof(struct trace_ksym), GFP_KERNEL);
178 if (!entry)
179 return -ENOMEM;
180
181 hw_breakpoint_init(&entry->attr);
182
183 entry->attr.bp_type = op;
184 entry->attr.bp_addr = addr;
185 entry->attr.bp_len = HW_BREAKPOINT_LEN_4;
186
187 entry->ksym_hbp = register_wide_hw_breakpoint(&entry->attr,
188 ksym_hbp_handler);
189
190 if (IS_ERR(entry->ksym_hbp)) {
191 ret = PTR_ERR(entry->ksym_hbp);
192 if (ret == -ENOSPC) {
193 printk(KERN_ERR "ksym_tracer: Maximum limit reached."
194 " No new requests for tracing can be accepted now.\n");
195 } else {
196 printk(KERN_INFO "ksym_tracer request failed. Try again"
197 " later!!\n");
198 }
199 goto err;
200 }
201
202 hlist_add_head_rcu(&(entry->ksym_hlist), &ksym_filter_head);
203
204 return 0;
205
206err:
207 kfree(entry);
208
209 return ret;
210}
211
212static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf,
213 size_t count, loff_t *ppos)
214{
215 struct trace_ksym *entry;
216 struct hlist_node *node;
217 struct trace_seq *s;
218 ssize_t cnt = 0;
219 int ret;
220
221 s = kmalloc(sizeof(*s), GFP_KERNEL);
222 if (!s)
223 return -ENOMEM;
224 trace_seq_init(s);
225
226 mutex_lock(&ksym_tracer_mutex);
227
228 hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
229 ret = trace_seq_printf(s, "%pS:",
230 (void *)(unsigned long)entry->attr.bp_addr);
231 if (entry->attr.bp_type == HW_BREAKPOINT_R)
232 ret = trace_seq_puts(s, "r--\n");
233 else if (entry->attr.bp_type == HW_BREAKPOINT_W)
234 ret = trace_seq_puts(s, "-w-\n");
235 else if (entry->attr.bp_type == (HW_BREAKPOINT_W | HW_BREAKPOINT_R))
236 ret = trace_seq_puts(s, "rw-\n");
237 WARN_ON_ONCE(!ret);
238 }
239
240 cnt = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
241
242 mutex_unlock(&ksym_tracer_mutex);
243
244 kfree(s);
245
246 return cnt;
247}
248
249static void __ksym_trace_reset(void)
250{
251 struct trace_ksym *entry;
252 struct hlist_node *node, *node1;
253
254 mutex_lock(&ksym_tracer_mutex);
255 hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head,
256 ksym_hlist) {
257 unregister_wide_hw_breakpoint(entry->ksym_hbp);
258 hlist_del_rcu(&(entry->ksym_hlist));
259 synchronize_rcu();
260 kfree(entry);
261 }
262 mutex_unlock(&ksym_tracer_mutex);
263}
264
265static ssize_t ksym_trace_filter_write(struct file *file,
266 const char __user *buffer,
267 size_t count, loff_t *ppos)
268{
269 struct trace_ksym *entry;
270 struct hlist_node *node;
271 char *buf, *input_string, *ksymname = NULL;
272 unsigned long ksym_addr = 0;
273 int ret, op, changed = 0;
274
275 buf = kzalloc(count + 1, GFP_KERNEL);
276 if (!buf)
277 return -ENOMEM;
278
279 ret = -EFAULT;
280 if (copy_from_user(buf, buffer, count))
281 goto out;
282
283 buf[count] = '\0';
284 input_string = strstrip(buf);
285
286 /*
287 * Clear all breakpoints if:
288 * 1: echo > ksym_trace_filter
289 * 2: echo 0 > ksym_trace_filter
290 * 3: echo "*:---" > ksym_trace_filter
291 */
292 if (!input_string[0] || !strcmp(input_string, "0") ||
293 !strcmp(input_string, "*:---")) {
294 __ksym_trace_reset();
295 ret = 0;
296 goto out;
297 }
298
299 ret = op = parse_ksym_trace_str(input_string, &ksymname, &ksym_addr);
300 if (ret < 0)
301 goto out;
302
303 mutex_lock(&ksym_tracer_mutex);
304
305 ret = -EINVAL;
306 hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
307 if (entry->attr.bp_addr == ksym_addr) {
308 /* Check for malformed request: (6) */
309 if (entry->attr.bp_type != op)
310 changed = 1;
311 else
312 goto out_unlock;
313 break;
314 }
315 }
316 if (changed) {
317 unregister_wide_hw_breakpoint(entry->ksym_hbp);
318 entry->attr.bp_type = op;
319 ret = 0;
320 if (op > 0) {
321 entry->ksym_hbp =
322 register_wide_hw_breakpoint(&entry->attr,
323 ksym_hbp_handler);
324 if (IS_ERR(entry->ksym_hbp))
325 ret = PTR_ERR(entry->ksym_hbp);
326 else
327 goto out_unlock;
328 }
329 /* Error or "symbol:---" case: drop it */
330 hlist_del_rcu(&(entry->ksym_hlist));
331 synchronize_rcu();
332 kfree(entry);
333 goto out_unlock;
334 } else {
335 /* Check for malformed request: (4) */
336 if (op)
337 ret = process_new_ksym_entry(ksymname, op, ksym_addr);
338 }
339out_unlock:
340 mutex_unlock(&ksym_tracer_mutex);
341out:
342 kfree(buf);
343 return !ret ? count : ret;
344}
345
346static const struct file_operations ksym_tracing_fops = {
347 .open = tracing_open_generic,
348 .read = ksym_trace_filter_read,
349 .write = ksym_trace_filter_write,
350};
351
352static void ksym_trace_reset(struct trace_array *tr)
353{
354 ksym_tracing_enabled = 0;
355 __ksym_trace_reset();
356}
357
358static int ksym_trace_init(struct trace_array *tr)
359{
360 int cpu, ret = 0;
361
362 for_each_online_cpu(cpu)
363 tracing_reset(tr, cpu);
364 ksym_tracing_enabled = 1;
365 ksym_trace_array = tr;
366
367 return ret;
368}
369
370static void ksym_trace_print_header(struct seq_file *m)
371{
372 seq_puts(m,
373 "# TASK-PID CPU# Symbol "
374 "Type Function\n");
375 seq_puts(m,
376 "# | | | "
377 " | |\n");
378}
379
380static enum print_line_t ksym_trace_output(struct trace_iterator *iter)
381{
382 struct trace_entry *entry = iter->ent;
383 struct trace_seq *s = &iter->seq;
384 struct ksym_trace_entry *field;
385 char str[KSYM_SYMBOL_LEN];
386 int ret;
387
388 if (entry->type != TRACE_KSYM)
389 return TRACE_TYPE_UNHANDLED;
390
391 trace_assign_type(field, entry);
392
393 ret = trace_seq_printf(s, "%11s-%-5d [%03d] %pS", field->cmd,
394 entry->pid, iter->cpu, (char *)field->addr);
395 if (!ret)
396 return TRACE_TYPE_PARTIAL_LINE;
397
398 switch (field->type) {
399 case HW_BREAKPOINT_R:
400 ret = trace_seq_printf(s, " R ");
401 break;
402 case HW_BREAKPOINT_W:
403 ret = trace_seq_printf(s, " W ");
404 break;
405 case HW_BREAKPOINT_R | HW_BREAKPOINT_W:
406 ret = trace_seq_printf(s, " RW ");
407 break;
408 default:
409 return TRACE_TYPE_PARTIAL_LINE;
410 }
411
412 if (!ret)
413 return TRACE_TYPE_PARTIAL_LINE;
414
415 sprint_symbol(str, field->ip);
416 ret = trace_seq_printf(s, "%s\n", str);
417 if (!ret)
418 return TRACE_TYPE_PARTIAL_LINE;
419
420 return TRACE_TYPE_HANDLED;
421}
422
423struct tracer ksym_tracer __read_mostly =
424{
425 .name = "ksym_tracer",
426 .init = ksym_trace_init,
427 .reset = ksym_trace_reset,
428#ifdef CONFIG_FTRACE_SELFTEST
429 .selftest = trace_selftest_startup_ksym,
430#endif
431 .print_header = ksym_trace_print_header,
432 .print_line = ksym_trace_output
433};
434
435#ifdef CONFIG_PROFILE_KSYM_TRACER
436static int ksym_profile_show(struct seq_file *m, void *v)
437{
438 struct hlist_node *node;
439 struct trace_ksym *entry;
440 int access_type = 0;
441 char fn_name[KSYM_NAME_LEN];
442
443 seq_puts(m, " Access Type ");
444 seq_puts(m, " Symbol Counter\n");
445 seq_puts(m, " ----------- ");
446 seq_puts(m, " ------ -------\n");
447
448 rcu_read_lock();
449 hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) {
450
451 access_type = entry->attr.bp_type;
452
453 switch (access_type) {
454 case HW_BREAKPOINT_R:
455 seq_puts(m, " R ");
456 break;
457 case HW_BREAKPOINT_W:
458 seq_puts(m, " W ");
459 break;
460 case HW_BREAKPOINT_R | HW_BREAKPOINT_W:
461 seq_puts(m, " RW ");
462 break;
463 default:
464 seq_puts(m, " NA ");
465 }
466
467 if (lookup_symbol_name(entry->attr.bp_addr, fn_name) >= 0)
468 seq_printf(m, " %-36s", fn_name);
469 else
470 seq_printf(m, " %-36s", "<NA>");
471 seq_printf(m, " %15llu\n",
472 (unsigned long long)atomic64_read(&entry->counter));
473 }
474 rcu_read_unlock();
475
476 return 0;
477}
478
479static int ksym_profile_open(struct inode *node, struct file *file)
480{
481 return single_open(file, ksym_profile_show, NULL);
482}
483
484static const struct file_operations ksym_profile_fops = {
485 .open = ksym_profile_open,
486 .read = seq_read,
487 .llseek = seq_lseek,
488 .release = single_release,
489};
490#endif /* CONFIG_PROFILE_KSYM_TRACER */
491
492__init static int init_ksym_trace(void)
493{
494 struct dentry *d_tracer;
495
496 d_tracer = tracing_init_dentry();
497
498 trace_create_file("ksym_trace_filter", 0644, d_tracer,
499 NULL, &ksym_tracing_fops);
500
501#ifdef CONFIG_PROFILE_KSYM_TRACER
502 trace_create_file("ksym_profile", 0444, d_tracer,
503 NULL, &ksym_profile_fops);
504#endif
505
506 return register_tracer(&ksym_tracer);
507}
508device_initcall(init_ksym_trace);
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 57c1b4596470..02272baa2206 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -16,9 +16,6 @@
16 16
17DECLARE_RWSEM(trace_event_mutex); 17DECLARE_RWSEM(trace_event_mutex);
18 18
19DEFINE_PER_CPU(struct trace_seq, ftrace_event_seq);
20EXPORT_PER_CPU_SYMBOL(ftrace_event_seq);
21
22static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly; 19static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly;
23 20
24static int next_event_type = __TRACE_LAST_TYPE + 1; 21static int next_event_type = __TRACE_LAST_TYPE + 1;
@@ -1069,65 +1066,6 @@ static struct trace_event trace_wake_event = {
1069 .funcs = &trace_wake_funcs, 1066 .funcs = &trace_wake_funcs,
1070}; 1067};
1071 1068
1072/* TRACE_SPECIAL */
1073static enum print_line_t trace_special_print(struct trace_iterator *iter,
1074 int flags, struct trace_event *event)
1075{
1076 struct special_entry *field;
1077
1078 trace_assign_type(field, iter->ent);
1079
1080 if (!trace_seq_printf(&iter->seq, "# %ld %ld %ld\n",
1081 field->arg1,
1082 field->arg2,
1083 field->arg3))
1084 return TRACE_TYPE_PARTIAL_LINE;
1085
1086 return TRACE_TYPE_HANDLED;
1087}
1088
1089static enum print_line_t trace_special_hex(struct trace_iterator *iter,
1090 int flags, struct trace_event *event)
1091{
1092 struct special_entry *field;
1093 struct trace_seq *s = &iter->seq;
1094
1095 trace_assign_type(field, iter->ent);
1096
1097 SEQ_PUT_HEX_FIELD_RET(s, field->arg1);
1098 SEQ_PUT_HEX_FIELD_RET(s, field->arg2);
1099 SEQ_PUT_HEX_FIELD_RET(s, field->arg3);
1100
1101 return TRACE_TYPE_HANDLED;
1102}
1103
1104static enum print_line_t trace_special_bin(struct trace_iterator *iter,
1105 int flags, struct trace_event *event)
1106{
1107 struct special_entry *field;
1108 struct trace_seq *s = &iter->seq;
1109
1110 trace_assign_type(field, iter->ent);
1111
1112 SEQ_PUT_FIELD_RET(s, field->arg1);
1113 SEQ_PUT_FIELD_RET(s, field->arg2);
1114 SEQ_PUT_FIELD_RET(s, field->arg3);
1115
1116 return TRACE_TYPE_HANDLED;
1117}
1118
1119static struct trace_event_functions trace_special_funcs = {
1120 .trace = trace_special_print,
1121 .raw = trace_special_print,
1122 .hex = trace_special_hex,
1123 .binary = trace_special_bin,
1124};
1125
1126static struct trace_event trace_special_event = {
1127 .type = TRACE_SPECIAL,
1128 .funcs = &trace_special_funcs,
1129};
1130
1131/* TRACE_STACK */ 1069/* TRACE_STACK */
1132 1070
1133static enum print_line_t trace_stack_print(struct trace_iterator *iter, 1071static enum print_line_t trace_stack_print(struct trace_iterator *iter,
@@ -1161,9 +1099,6 @@ static enum print_line_t trace_stack_print(struct trace_iterator *iter,
1161 1099
1162static struct trace_event_functions trace_stack_funcs = { 1100static struct trace_event_functions trace_stack_funcs = {
1163 .trace = trace_stack_print, 1101 .trace = trace_stack_print,
1164 .raw = trace_special_print,
1165 .hex = trace_special_hex,
1166 .binary = trace_special_bin,
1167}; 1102};
1168 1103
1169static struct trace_event trace_stack_event = { 1104static struct trace_event trace_stack_event = {
@@ -1194,9 +1129,6 @@ static enum print_line_t trace_user_stack_print(struct trace_iterator *iter,
1194 1129
1195static struct trace_event_functions trace_user_stack_funcs = { 1130static struct trace_event_functions trace_user_stack_funcs = {
1196 .trace = trace_user_stack_print, 1131 .trace = trace_user_stack_print,
1197 .raw = trace_special_print,
1198 .hex = trace_special_hex,
1199 .binary = trace_special_bin,
1200}; 1132};
1201 1133
1202static struct trace_event trace_user_stack_event = { 1134static struct trace_event trace_user_stack_event = {
@@ -1314,7 +1246,6 @@ static struct trace_event *events[] __initdata = {
1314 &trace_fn_event, 1246 &trace_fn_event,
1315 &trace_ctx_event, 1247 &trace_ctx_event,
1316 &trace_wake_event, 1248 &trace_wake_event,
1317 &trace_special_event,
1318 &trace_stack_event, 1249 &trace_stack_event,
1319 &trace_user_stack_event, 1250 &trace_user_stack_event,
1320 &trace_bprint_event, 1251 &trace_bprint_event,
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 0e73bc2ef8c5..4086eae6e81b 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -46,7 +46,6 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
46 struct trace_array_cpu *data; 46 struct trace_array_cpu *data;
47 unsigned long flags; 47 unsigned long flags;
48 long disabled; 48 long disabled;
49 int resched;
50 int cpu; 49 int cpu;
51 int pc; 50 int pc;
52 51
@@ -54,7 +53,7 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
54 return; 53 return;
55 54
56 pc = preempt_count(); 55 pc = preempt_count();
57 resched = ftrace_preempt_disable(); 56 preempt_disable_notrace();
58 57
59 cpu = raw_smp_processor_id(); 58 cpu = raw_smp_processor_id();
60 if (cpu != wakeup_current_cpu) 59 if (cpu != wakeup_current_cpu)
@@ -74,7 +73,7 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
74 out: 73 out:
75 atomic_dec(&data->disabled); 74 atomic_dec(&data->disabled);
76 out_enable: 75 out_enable:
77 ftrace_preempt_enable(resched); 76 preempt_enable_notrace();
78} 77}
79 78
80static struct ftrace_ops trace_ops __read_mostly = 79static struct ftrace_ops trace_ops __read_mostly =
@@ -383,6 +382,7 @@ static struct tracer wakeup_tracer __read_mostly =
383#ifdef CONFIG_FTRACE_SELFTEST 382#ifdef CONFIG_FTRACE_SELFTEST
384 .selftest = trace_selftest_startup_wakeup, 383 .selftest = trace_selftest_startup_wakeup,
385#endif 384#endif
385 .use_max_tr = 1,
386}; 386};
387 387
388static struct tracer wakeup_rt_tracer __read_mostly = 388static struct tracer wakeup_rt_tracer __read_mostly =
@@ -397,6 +397,7 @@ static struct tracer wakeup_rt_tracer __read_mostly =
397#ifdef CONFIG_FTRACE_SELFTEST 397#ifdef CONFIG_FTRACE_SELFTEST
398 .selftest = trace_selftest_startup_wakeup, 398 .selftest = trace_selftest_startup_wakeup,
399#endif 399#endif
400 .use_max_tr = 1,
400}; 401};
401 402
402__init static int init_wakeup_tracer(void) 403__init static int init_wakeup_tracer(void)
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 250e7f9bd2f0..155a415b3209 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -13,11 +13,9 @@ static inline int trace_valid_entry(struct trace_entry *entry)
13 case TRACE_WAKE: 13 case TRACE_WAKE:
14 case TRACE_STACK: 14 case TRACE_STACK:
15 case TRACE_PRINT: 15 case TRACE_PRINT:
16 case TRACE_SPECIAL:
17 case TRACE_BRANCH: 16 case TRACE_BRANCH:
18 case TRACE_GRAPH_ENT: 17 case TRACE_GRAPH_ENT:
19 case TRACE_GRAPH_RET: 18 case TRACE_GRAPH_RET:
20 case TRACE_KSYM:
21 return 1; 19 return 1;
22 } 20 }
23 return 0; 21 return 0;
@@ -691,38 +689,6 @@ trace_selftest_startup_sched_switch(struct tracer *trace, struct trace_array *tr
691} 689}
692#endif /* CONFIG_CONTEXT_SWITCH_TRACER */ 690#endif /* CONFIG_CONTEXT_SWITCH_TRACER */
693 691
694#ifdef CONFIG_SYSPROF_TRACER
695int
696trace_selftest_startup_sysprof(struct tracer *trace, struct trace_array *tr)
697{
698 unsigned long count;
699 int ret;
700
701 /* start the tracing */
702 ret = tracer_init(trace, tr);
703 if (ret) {
704 warn_failed_init_tracer(trace, ret);
705 return ret;
706 }
707
708 /* Sleep for a 1/10 of a second */
709 msleep(100);
710 /* stop the tracing. */
711 tracing_stop();
712 /* check the trace buffer */
713 ret = trace_test_buffer(tr, &count);
714 trace->reset(tr);
715 tracing_start();
716
717 if (!ret && !count) {
718 printk(KERN_CONT ".. no entries found ..");
719 ret = -1;
720 }
721
722 return ret;
723}
724#endif /* CONFIG_SYSPROF_TRACER */
725
726#ifdef CONFIG_BRANCH_TRACER 692#ifdef CONFIG_BRANCH_TRACER
727int 693int
728trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr) 694trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr)
@@ -755,56 +721,3 @@ trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr)
755} 721}
756#endif /* CONFIG_BRANCH_TRACER */ 722#endif /* CONFIG_BRANCH_TRACER */
757 723
758#ifdef CONFIG_KSYM_TRACER
759static int ksym_selftest_dummy;
760
761int
762trace_selftest_startup_ksym(struct tracer *trace, struct trace_array *tr)
763{
764 unsigned long count;
765 int ret;
766
767 /* start the tracing */
768 ret = tracer_init(trace, tr);
769 if (ret) {
770 warn_failed_init_tracer(trace, ret);
771 return ret;
772 }
773
774 ksym_selftest_dummy = 0;
775 /* Register the read-write tracing request */
776
777 ret = process_new_ksym_entry("ksym_selftest_dummy",
778 HW_BREAKPOINT_R | HW_BREAKPOINT_W,
779 (unsigned long)(&ksym_selftest_dummy));
780
781 if (ret < 0) {
782 printk(KERN_CONT "ksym_trace read-write startup test failed\n");
783 goto ret_path;
784 }
785 /* Perform a read and a write operation over the dummy variable to
786 * trigger the tracer
787 */
788 if (ksym_selftest_dummy == 0)
789 ksym_selftest_dummy++;
790
791 /* stop the tracing. */
792 tracing_stop();
793 /* check the trace buffer */
794 ret = trace_test_buffer(tr, &count);
795 trace->reset(tr);
796 tracing_start();
797
798 /* read & write operations - one each is performed on the dummy variable
799 * triggering two entries in the trace buffer
800 */
801 if (!ret && count != 2) {
802 printk(KERN_CONT "Ksym tracer startup test failed");
803 ret = -1;
804 }
805
806ret_path:
807 return ret;
808}
809#endif /* CONFIG_KSYM_TRACER */
810
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index f4bc9b27de5f..056468eae7cf 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -110,12 +110,12 @@ static inline void check_stack(void)
110static void 110static void
111stack_trace_call(unsigned long ip, unsigned long parent_ip) 111stack_trace_call(unsigned long ip, unsigned long parent_ip)
112{ 112{
113 int cpu, resched; 113 int cpu;
114 114
115 if (unlikely(!ftrace_enabled || stack_trace_disabled)) 115 if (unlikely(!ftrace_enabled || stack_trace_disabled))
116 return; 116 return;
117 117
118 resched = ftrace_preempt_disable(); 118 preempt_disable_notrace();
119 119
120 cpu = raw_smp_processor_id(); 120 cpu = raw_smp_processor_id();
121 /* no atomic needed, we only modify this variable by this cpu */ 121 /* no atomic needed, we only modify this variable by this cpu */
@@ -127,7 +127,7 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip)
127 out: 127 out:
128 per_cpu(trace_active, cpu)--; 128 per_cpu(trace_active, cpu)--;
129 /* prevent recursion in schedule */ 129 /* prevent recursion in schedule */
130 ftrace_preempt_enable(resched); 130 preempt_enable_notrace();
131} 131}
132 132
133static struct ftrace_ops trace_ops __read_mostly = 133static struct ftrace_ops trace_ops __read_mostly =
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 34e35804304b..bac752f0cfb5 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -23,6 +23,9 @@ static int syscall_exit_register(struct ftrace_event_call *event,
23static int syscall_enter_define_fields(struct ftrace_event_call *call); 23static int syscall_enter_define_fields(struct ftrace_event_call *call);
24static int syscall_exit_define_fields(struct ftrace_event_call *call); 24static int syscall_exit_define_fields(struct ftrace_event_call *call);
25 25
26/* All syscall exit events have the same fields */
27static LIST_HEAD(syscall_exit_fields);
28
26static struct list_head * 29static struct list_head *
27syscall_get_enter_fields(struct ftrace_event_call *call) 30syscall_get_enter_fields(struct ftrace_event_call *call)
28{ 31{
@@ -34,9 +37,7 @@ syscall_get_enter_fields(struct ftrace_event_call *call)
34static struct list_head * 37static struct list_head *
35syscall_get_exit_fields(struct ftrace_event_call *call) 38syscall_get_exit_fields(struct ftrace_event_call *call)
36{ 39{
37 struct syscall_metadata *entry = call->data; 40 return &syscall_exit_fields;
38
39 return &entry->exit_fields;
40} 41}
41 42
42struct trace_event_functions enter_syscall_print_funcs = { 43struct trace_event_functions enter_syscall_print_funcs = {
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c
deleted file mode 100644
index a7974a552ca9..000000000000
--- a/kernel/trace/trace_sysprof.c
+++ /dev/null
@@ -1,329 +0,0 @@
1/*
2 * trace stack traces
3 *
4 * Copyright (C) 2004-2008, Soeren Sandmann
5 * Copyright (C) 2007 Steven Rostedt <srostedt@redhat.com>
6 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7 */
8#include <linux/kallsyms.h>
9#include <linux/debugfs.h>
10#include <linux/hrtimer.h>
11#include <linux/uaccess.h>
12#include <linux/ftrace.h>
13#include <linux/module.h>
14#include <linux/irq.h>
15#include <linux/fs.h>
16
17#include <asm/stacktrace.h>
18
19#include "trace.h"
20
21static struct trace_array *sysprof_trace;
22static int __read_mostly tracer_enabled;
23
24/*
25 * 1 msec sample interval by default:
26 */
27static unsigned long sample_period = 1000000;
28static const unsigned int sample_max_depth = 512;
29
30static DEFINE_MUTEX(sample_timer_lock);
31/*
32 * Per CPU hrtimers that do the profiling:
33 */
34static DEFINE_PER_CPU(struct hrtimer, stack_trace_hrtimer);
35
36struct stack_frame {
37 const void __user *next_fp;
38 unsigned long return_address;
39};
40
41static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
42{
43 int ret;
44
45 if (!access_ok(VERIFY_READ, fp, sizeof(*frame)))
46 return 0;
47
48 ret = 1;
49 pagefault_disable();
50 if (__copy_from_user_inatomic(frame, fp, sizeof(*frame)))
51 ret = 0;
52 pagefault_enable();
53
54 return ret;
55}
56
57struct backtrace_info {
58 struct trace_array_cpu *data;
59 struct trace_array *tr;
60 int pos;
61};
62
63static void
64backtrace_warning_symbol(void *data, char *msg, unsigned long symbol)
65{
66 /* Ignore warnings */
67}
68
69static void backtrace_warning(void *data, char *msg)
70{
71 /* Ignore warnings */
72}
73
74static int backtrace_stack(void *data, char *name)
75{
76 /* Don't bother with IRQ stacks for now */
77 return -1;
78}
79
80static void backtrace_address(void *data, unsigned long addr, int reliable)
81{
82 struct backtrace_info *info = data;
83
84 if (info->pos < sample_max_depth && reliable) {
85 __trace_special(info->tr, info->data, 1, addr, 0);
86
87 info->pos++;
88 }
89}
90
91static const struct stacktrace_ops backtrace_ops = {
92 .warning = backtrace_warning,
93 .warning_symbol = backtrace_warning_symbol,
94 .stack = backtrace_stack,
95 .address = backtrace_address,
96 .walk_stack = print_context_stack,
97};
98
99static int
100trace_kernel(struct pt_regs *regs, struct trace_array *tr,
101 struct trace_array_cpu *data)
102{
103 struct backtrace_info info;
104 unsigned long bp;
105 char *stack;
106
107 info.tr = tr;
108 info.data = data;
109 info.pos = 1;
110
111 __trace_special(info.tr, info.data, 1, regs->ip, 0);
112
113 stack = ((char *)regs + sizeof(struct pt_regs));
114#ifdef CONFIG_FRAME_POINTER
115 bp = regs->bp;
116#else
117 bp = 0;
118#endif
119
120 dump_trace(NULL, regs, (void *)stack, bp, &backtrace_ops, &info);
121
122 return info.pos;
123}
124
125static void timer_notify(struct pt_regs *regs, int cpu)
126{
127 struct trace_array_cpu *data;
128 struct stack_frame frame;
129 struct trace_array *tr;
130 const void __user *fp;
131 int is_user;
132 int i;
133
134 if (!regs)
135 return;
136
137 tr = sysprof_trace;
138 data = tr->data[cpu];
139 is_user = user_mode(regs);
140
141 if (!current || current->pid == 0)
142 return;
143
144 if (is_user && current->state != TASK_RUNNING)
145 return;
146
147 __trace_special(tr, data, 0, 0, current->pid);
148
149 if (!is_user)
150 i = trace_kernel(regs, tr, data);
151 else
152 i = 0;
153
154 /*
155 * Trace user stack if we are not a kernel thread
156 */
157 if (current->mm && i < sample_max_depth) {
158 regs = (struct pt_regs *)current->thread.sp0 - 1;
159
160 fp = (void __user *)regs->bp;
161
162 __trace_special(tr, data, 2, regs->ip, 0);
163
164 while (i < sample_max_depth) {
165 frame.next_fp = NULL;
166 frame.return_address = 0;
167 if (!copy_stack_frame(fp, &frame))
168 break;
169 if ((unsigned long)fp < regs->sp)
170 break;
171
172 __trace_special(tr, data, 2, frame.return_address,
173 (unsigned long)fp);
174 fp = frame.next_fp;
175
176 i++;
177 }
178
179 }
180
181 /*
182 * Special trace entry if we overflow the max depth:
183 */
184 if (i == sample_max_depth)
185 __trace_special(tr, data, -1, -1, -1);
186
187 __trace_special(tr, data, 3, current->pid, i);
188}
189
190static enum hrtimer_restart stack_trace_timer_fn(struct hrtimer *hrtimer)
191{
192 /* trace here */
193 timer_notify(get_irq_regs(), smp_processor_id());
194
195 hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period));
196
197 return HRTIMER_RESTART;
198}
199
200static void start_stack_timer(void *unused)
201{
202 struct hrtimer *hrtimer = &__get_cpu_var(stack_trace_hrtimer);
203
204 hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
205 hrtimer->function = stack_trace_timer_fn;
206
207 hrtimer_start(hrtimer, ns_to_ktime(sample_period),
208 HRTIMER_MODE_REL_PINNED);
209}
210
211static void start_stack_timers(void)
212{
213 on_each_cpu(start_stack_timer, NULL, 1);
214}
215
216static void stop_stack_timer(int cpu)
217{
218 struct hrtimer *hrtimer = &per_cpu(stack_trace_hrtimer, cpu);
219
220 hrtimer_cancel(hrtimer);
221}
222
223static void stop_stack_timers(void)
224{
225 int cpu;
226
227 for_each_online_cpu(cpu)
228 stop_stack_timer(cpu);
229}
230
231static void stop_stack_trace(struct trace_array *tr)
232{
233 mutex_lock(&sample_timer_lock);
234 stop_stack_timers();
235 tracer_enabled = 0;
236 mutex_unlock(&sample_timer_lock);
237}
238
239static int stack_trace_init(struct trace_array *tr)
240{
241 sysprof_trace = tr;
242
243 tracing_start_cmdline_record();
244
245 mutex_lock(&sample_timer_lock);
246 start_stack_timers();
247 tracer_enabled = 1;
248 mutex_unlock(&sample_timer_lock);
249 return 0;
250}
251
252static void stack_trace_reset(struct trace_array *tr)
253{
254 tracing_stop_cmdline_record();
255 stop_stack_trace(tr);
256}
257
258static struct tracer stack_trace __read_mostly =
259{
260 .name = "sysprof",
261 .init = stack_trace_init,
262 .reset = stack_trace_reset,
263#ifdef CONFIG_FTRACE_SELFTEST
264 .selftest = trace_selftest_startup_sysprof,
265#endif
266};
267
268__init static int init_stack_trace(void)
269{
270 return register_tracer(&stack_trace);
271}
272device_initcall(init_stack_trace);
273
274#define MAX_LONG_DIGITS 22
275
276static ssize_t
277sysprof_sample_read(struct file *filp, char __user *ubuf,
278 size_t cnt, loff_t *ppos)
279{
280 char buf[MAX_LONG_DIGITS];
281 int r;
282
283 r = sprintf(buf, "%ld\n", nsecs_to_usecs(sample_period));
284
285 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
286}
287
288static ssize_t
289sysprof_sample_write(struct file *filp, const char __user *ubuf,
290 size_t cnt, loff_t *ppos)
291{
292 char buf[MAX_LONG_DIGITS];
293 unsigned long val;
294
295 if (cnt > MAX_LONG_DIGITS-1)
296 cnt = MAX_LONG_DIGITS-1;
297
298 if (copy_from_user(&buf, ubuf, cnt))
299 return -EFAULT;
300
301 buf[cnt] = 0;
302
303 val = simple_strtoul(buf, NULL, 10);
304 /*
305 * Enforce a minimum sample period of 100 usecs:
306 */
307 if (val < 100)
308 val = 100;
309
310 mutex_lock(&sample_timer_lock);
311 stop_stack_timers();
312 sample_period = val * 1000;
313 start_stack_timers();
314 mutex_unlock(&sample_timer_lock);
315
316 return cnt;
317}
318
319static const struct file_operations sysprof_sample_fops = {
320 .read = sysprof_sample_read,
321 .write = sysprof_sample_write,
322};
323
324void init_tracer_sysprof_debugfs(struct dentry *d_tracer)
325{
326
327 trace_create_file("sysprof_sample_period", 0644,
328 d_tracer, NULL, &sysprof_sample_fops);
329}
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
new file mode 100644
index 000000000000..613bc1f04610
--- /dev/null
+++ b/kernel/watchdog.c
@@ -0,0 +1,567 @@
1/*
2 * Detect hard and soft lockups on a system
3 *
4 * started by Don Zickus, Copyright (C) 2010 Red Hat, Inc.
5 *
6 * this code detects hard lockups: incidents in where on a CPU
7 * the kernel does not respond to anything except NMI.
8 *
9 * Note: Most of this code is borrowed heavily from softlockup.c,
10 * so thanks to Ingo for the initial implementation.
11 * Some chunks also taken from arch/x86/kernel/apic/nmi.c, thanks
12 * to those contributors as well.
13 */
14
15#include <linux/mm.h>
16#include <linux/cpu.h>
17#include <linux/nmi.h>
18#include <linux/init.h>
19#include <linux/delay.h>
20#include <linux/freezer.h>
21#include <linux/kthread.h>
22#include <linux/lockdep.h>
23#include <linux/notifier.h>
24#include <linux/module.h>
25#include <linux/sysctl.h>
26
27#include <asm/irq_regs.h>
28#include <linux/perf_event.h>
29
30int watchdog_enabled;
31int __read_mostly softlockup_thresh = 60;
32
33static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
34static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog);
35static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer);
36static DEFINE_PER_CPU(bool, softlockup_touch_sync);
37static DEFINE_PER_CPU(bool, soft_watchdog_warn);
38#ifdef CONFIG_HARDLOCKUP_DETECTOR
39static DEFINE_PER_CPU(bool, hard_watchdog_warn);
40static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
41static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
42static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
43static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
44#endif
45
46static int __read_mostly did_panic;
47static int __initdata no_watchdog;
48
49
50/* boot commands */
51/*
52 * Should we panic when a soft-lockup or hard-lockup occurs:
53 */
54#ifdef CONFIG_HARDLOCKUP_DETECTOR
55static int hardlockup_panic;
56
57static int __init hardlockup_panic_setup(char *str)
58{
59 if (!strncmp(str, "panic", 5))
60 hardlockup_panic = 1;
61 return 1;
62}
63__setup("nmi_watchdog=", hardlockup_panic_setup);
64#endif
65
66unsigned int __read_mostly softlockup_panic =
67 CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;
68
69static int __init softlockup_panic_setup(char *str)
70{
71 softlockup_panic = simple_strtoul(str, NULL, 0);
72
73 return 1;
74}
75__setup("softlockup_panic=", softlockup_panic_setup);
76
77static int __init nowatchdog_setup(char *str)
78{
79 no_watchdog = 1;
80 return 1;
81}
82__setup("nowatchdog", nowatchdog_setup);
83
84/* deprecated */
85static int __init nosoftlockup_setup(char *str)
86{
87 no_watchdog = 1;
88 return 1;
89}
90__setup("nosoftlockup", nosoftlockup_setup);
91/* */
92
93
94/*
95 * Returns seconds, approximately. We don't need nanosecond
96 * resolution, and we don't need to waste time with a big divide when
97 * 2^30ns == 1.074s.
98 */
99static unsigned long get_timestamp(int this_cpu)
100{
101 return cpu_clock(this_cpu) >> 30LL; /* 2^30 ~= 10^9 */
102}
103
104static unsigned long get_sample_period(void)
105{
106 /*
107 * convert softlockup_thresh from seconds to ns
108 * the divide by 5 is to give hrtimer 5 chances to
109 * increment before the hardlockup detector generates
110 * a warning
111 */
112 return softlockup_thresh / 5 * NSEC_PER_SEC;
113}
114
115/* Commands for resetting the watchdog */
116static void __touch_watchdog(void)
117{
118 int this_cpu = smp_processor_id();
119
120 __get_cpu_var(watchdog_touch_ts) = get_timestamp(this_cpu);
121}
122
123void touch_softlockup_watchdog(void)
124{
125 __get_cpu_var(watchdog_touch_ts) = 0;
126}
127EXPORT_SYMBOL(touch_softlockup_watchdog);
128
129void touch_all_softlockup_watchdogs(void)
130{
131 int cpu;
132
133 /*
134 * this is done lockless
135 * do we care if a 0 races with a timestamp?
136 * all it means is the softlock check starts one cycle later
137 */
138 for_each_online_cpu(cpu)
139 per_cpu(watchdog_touch_ts, cpu) = 0;
140}
141
142#ifdef CONFIG_HARDLOCKUP_DETECTOR
143void touch_nmi_watchdog(void)
144{
145 __get_cpu_var(watchdog_nmi_touch) = true;
146 touch_softlockup_watchdog();
147}
148EXPORT_SYMBOL(touch_nmi_watchdog);
149
150#endif
151
152void touch_softlockup_watchdog_sync(void)
153{
154 __raw_get_cpu_var(softlockup_touch_sync) = true;
155 __raw_get_cpu_var(watchdog_touch_ts) = 0;
156}
157
158#ifdef CONFIG_HARDLOCKUP_DETECTOR
159/* watchdog detector functions */
160static int is_hardlockup(void)
161{
162 unsigned long hrint = __get_cpu_var(hrtimer_interrupts);
163
164 if (__get_cpu_var(hrtimer_interrupts_saved) == hrint)
165 return 1;
166
167 __get_cpu_var(hrtimer_interrupts_saved) = hrint;
168 return 0;
169}
170#endif
171
172static int is_softlockup(unsigned long touch_ts)
173{
174 unsigned long now = get_timestamp(smp_processor_id());
175
176 /* Warn about unreasonable delays: */
177 if (time_after(now, touch_ts + softlockup_thresh))
178 return now - touch_ts;
179
180 return 0;
181}
182
183static int
184watchdog_panic(struct notifier_block *this, unsigned long event, void *ptr)
185{
186 did_panic = 1;
187
188 return NOTIFY_DONE;
189}
190
191static struct notifier_block panic_block = {
192 .notifier_call = watchdog_panic,
193};
194
195#ifdef CONFIG_HARDLOCKUP_DETECTOR
196static struct perf_event_attr wd_hw_attr = {
197 .type = PERF_TYPE_HARDWARE,
198 .config = PERF_COUNT_HW_CPU_CYCLES,
199 .size = sizeof(struct perf_event_attr),
200 .pinned = 1,
201 .disabled = 1,
202};
203
204/* Callback function for perf event subsystem */
205void watchdog_overflow_callback(struct perf_event *event, int nmi,
206 struct perf_sample_data *data,
207 struct pt_regs *regs)
208{
209 if (__get_cpu_var(watchdog_nmi_touch) == true) {
210 __get_cpu_var(watchdog_nmi_touch) = false;
211 return;
212 }
213
214 /* check for a hardlockup
215 * This is done by making sure our timer interrupt
216 * is incrementing. The timer interrupt should have
217 * fired multiple times before we overflow'd. If it hasn't
218 * then this is a good indication the cpu is stuck
219 */
220 if (is_hardlockup()) {
221 int this_cpu = smp_processor_id();
222
223 /* only print hardlockups once */
224 if (__get_cpu_var(hard_watchdog_warn) == true)
225 return;
226
227 if (hardlockup_panic)
228 panic("Watchdog detected hard LOCKUP on cpu %d", this_cpu);
229 else
230 WARN(1, "Watchdog detected hard LOCKUP on cpu %d", this_cpu);
231
232 __get_cpu_var(hard_watchdog_warn) = true;
233 return;
234 }
235
236 __get_cpu_var(hard_watchdog_warn) = false;
237 return;
238}
239static void watchdog_interrupt_count(void)
240{
241 __get_cpu_var(hrtimer_interrupts)++;
242}
243#else
244static inline void watchdog_interrupt_count(void) { return; }
245#endif /* CONFIG_HARDLOCKUP_DETECTOR */
246
247/* watchdog kicker functions */
248static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
249{
250 unsigned long touch_ts = __get_cpu_var(watchdog_touch_ts);
251 struct pt_regs *regs = get_irq_regs();
252 int duration;
253
254 /* kick the hardlockup detector */
255 watchdog_interrupt_count();
256
257 /* kick the softlockup detector */
258 wake_up_process(__get_cpu_var(softlockup_watchdog));
259
260 /* .. and repeat */
261 hrtimer_forward_now(hrtimer, ns_to_ktime(get_sample_period()));
262
263 if (touch_ts == 0) {
264 if (unlikely(__get_cpu_var(softlockup_touch_sync))) {
265 /*
266 * If the time stamp was touched atomically
267 * make sure the scheduler tick is up to date.
268 */
269 __get_cpu_var(softlockup_touch_sync) = false;
270 sched_clock_tick();
271 }
272 __touch_watchdog();
273 return HRTIMER_RESTART;
274 }
275
276 /* check for a softlockup
277 * This is done by making sure a high priority task is
278 * being scheduled. The task touches the watchdog to
279 * indicate it is getting cpu time. If it hasn't then
280 * this is a good indication some task is hogging the cpu
281 */
282 duration = is_softlockup(touch_ts);
283 if (unlikely(duration)) {
284 /* only warn once */
285 if (__get_cpu_var(soft_watchdog_warn) == true)
286 return HRTIMER_RESTART;
287
288 printk(KERN_ERR "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
289 smp_processor_id(), duration,
290 current->comm, task_pid_nr(current));
291 print_modules();
292 print_irqtrace_events(current);
293 if (regs)
294 show_regs(regs);
295 else
296 dump_stack();
297
298 if (softlockup_panic)
299 panic("softlockup: hung tasks");
300 __get_cpu_var(soft_watchdog_warn) = true;
301 } else
302 __get_cpu_var(soft_watchdog_warn) = false;
303
304 return HRTIMER_RESTART;
305}
306
307
308/*
309 * The watchdog thread - touches the timestamp.
310 */
311static int watchdog(void *unused)
312{
313 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
314 struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer);
315
316 sched_setscheduler(current, SCHED_FIFO, &param);
317
318 /* initialize timestamp */
319 __touch_watchdog();
320
321 /* kick off the timer for the hardlockup detector */
322 /* done here because hrtimer_start can only pin to smp_processor_id() */
323 hrtimer_start(hrtimer, ns_to_ktime(get_sample_period()),
324 HRTIMER_MODE_REL_PINNED);
325
326 set_current_state(TASK_INTERRUPTIBLE);
327 /*
328 * Run briefly once per second to reset the softlockup timestamp.
329 * If this gets delayed for more than 60 seconds then the
330 * debug-printout triggers in watchdog_timer_fn().
331 */
332 while (!kthread_should_stop()) {
333 __touch_watchdog();
334 schedule();
335
336 if (kthread_should_stop())
337 break;
338
339 set_current_state(TASK_INTERRUPTIBLE);
340 }
341 __set_current_state(TASK_RUNNING);
342
343 return 0;
344}
345
346
347#ifdef CONFIG_HARDLOCKUP_DETECTOR
348static int watchdog_nmi_enable(int cpu)
349{
350 struct perf_event_attr *wd_attr;
351 struct perf_event *event = per_cpu(watchdog_ev, cpu);
352
353 /* is it already setup and enabled? */
354 if (event && event->state > PERF_EVENT_STATE_OFF)
355 goto out;
356
357 /* it is setup but not enabled */
358 if (event != NULL)
359 goto out_enable;
360
361 /* Try to register using hardware perf events */
362 wd_attr = &wd_hw_attr;
363 wd_attr->sample_period = hw_nmi_get_sample_period();
364 event = perf_event_create_kernel_counter(wd_attr, cpu, -1, watchdog_overflow_callback);
365 if (!IS_ERR(event)) {
366 printk(KERN_INFO "NMI watchdog enabled, takes one hw-pmu counter.\n");
367 goto out_save;
368 }
369
370 printk(KERN_ERR "NMI watchdog failed to create perf event on cpu%i: %p\n", cpu, event);
371 return -1;
372
373 /* success path */
374out_save:
375 per_cpu(watchdog_ev, cpu) = event;
376out_enable:
377 perf_event_enable(per_cpu(watchdog_ev, cpu));
378out:
379 return 0;
380}
381
382static void watchdog_nmi_disable(int cpu)
383{
384 struct perf_event *event = per_cpu(watchdog_ev, cpu);
385
386 if (event) {
387 perf_event_disable(event);
388 per_cpu(watchdog_ev, cpu) = NULL;
389
390 /* should be in cleanup, but blocks oprofile */
391 perf_event_release_kernel(event);
392 }
393 return;
394}
395#else
396static int watchdog_nmi_enable(int cpu) { return 0; }
397static void watchdog_nmi_disable(int cpu) { return; }
398#endif /* CONFIG_HARDLOCKUP_DETECTOR */
399
400/* prepare/enable/disable routines */
401static int watchdog_prepare_cpu(int cpu)
402{
403 struct hrtimer *hrtimer = &per_cpu(watchdog_hrtimer, cpu);
404
405 WARN_ON(per_cpu(softlockup_watchdog, cpu));
406 hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
407 hrtimer->function = watchdog_timer_fn;
408
409 return 0;
410}
411
412static int watchdog_enable(int cpu)
413{
414 struct task_struct *p = per_cpu(softlockup_watchdog, cpu);
415
416 /* enable the perf event */
417 if (watchdog_nmi_enable(cpu) != 0)
418 return -1;
419
420 /* create the watchdog thread */
421 if (!p) {
422 p = kthread_create(watchdog, (void *)(unsigned long)cpu, "watchdog/%d", cpu);
423 if (IS_ERR(p)) {
424 printk(KERN_ERR "softlockup watchdog for %i failed\n", cpu);
425 return -1;
426 }
427 kthread_bind(p, cpu);
428 per_cpu(watchdog_touch_ts, cpu) = 0;
429 per_cpu(softlockup_watchdog, cpu) = p;
430 wake_up_process(p);
431 }
432
433 return 0;
434}
435
436static void watchdog_disable(int cpu)
437{
438 struct task_struct *p = per_cpu(softlockup_watchdog, cpu);
439 struct hrtimer *hrtimer = &per_cpu(watchdog_hrtimer, cpu);
440
441 /*
442 * cancel the timer first to stop incrementing the stats
443 * and waking up the kthread
444 */
445 hrtimer_cancel(hrtimer);
446
447 /* disable the perf event */
448 watchdog_nmi_disable(cpu);
449
450 /* stop the watchdog thread */
451 if (p) {
452 per_cpu(softlockup_watchdog, cpu) = NULL;
453 kthread_stop(p);
454 }
455
456 /* if any cpu succeeds, watchdog is considered enabled for the system */
457 watchdog_enabled = 1;
458}
459
460static void watchdog_enable_all_cpus(void)
461{
462 int cpu;
463 int result = 0;
464
465 for_each_online_cpu(cpu)
466 result += watchdog_enable(cpu);
467
468 if (result)
469 printk(KERN_ERR "watchdog: failed to be enabled on some cpus\n");
470
471}
472
473static void watchdog_disable_all_cpus(void)
474{
475 int cpu;
476
477 for_each_online_cpu(cpu)
478 watchdog_disable(cpu);
479
480 /* if all watchdogs are disabled, then they are disabled for the system */
481 watchdog_enabled = 0;
482}
483
484
485/* sysctl functions */
486#ifdef CONFIG_SYSCTL
487/*
488 * proc handler for /proc/sys/kernel/nmi_watchdog
489 */
490
491int proc_dowatchdog_enabled(struct ctl_table *table, int write,
492 void __user *buffer, size_t *length, loff_t *ppos)
493{
494 proc_dointvec(table, write, buffer, length, ppos);
495
496 if (watchdog_enabled)
497 watchdog_enable_all_cpus();
498 else
499 watchdog_disable_all_cpus();
500 return 0;
501}
502
503int proc_dowatchdog_thresh(struct ctl_table *table, int write,
504 void __user *buffer,
505 size_t *lenp, loff_t *ppos)
506{
507 return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
508}
509#endif /* CONFIG_SYSCTL */
510
511
512/*
513 * Create/destroy watchdog threads as CPUs come and go:
514 */
515static int __cpuinit
516cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
517{
518 int hotcpu = (unsigned long)hcpu;
519
520 switch (action) {
521 case CPU_UP_PREPARE:
522 case CPU_UP_PREPARE_FROZEN:
523 if (watchdog_prepare_cpu(hotcpu))
524 return NOTIFY_BAD;
525 break;
526 case CPU_ONLINE:
527 case CPU_ONLINE_FROZEN:
528 if (watchdog_enable(hotcpu))
529 return NOTIFY_BAD;
530 break;
531#ifdef CONFIG_HOTPLUG_CPU
532 case CPU_UP_CANCELED:
533 case CPU_UP_CANCELED_FROZEN:
534 watchdog_disable(hotcpu);
535 break;
536 case CPU_DEAD:
537 case CPU_DEAD_FROZEN:
538 watchdog_disable(hotcpu);
539 break;
540#endif /* CONFIG_HOTPLUG_CPU */
541 }
542 return NOTIFY_OK;
543}
544
545static struct notifier_block __cpuinitdata cpu_nfb = {
546 .notifier_call = cpu_callback
547};
548
549static int __init spawn_watchdog_task(void)
550{
551 void *cpu = (void *)(long)smp_processor_id();
552 int err;
553
554 if (no_watchdog)
555 return 0;
556
557 err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
558 WARN_ON(err == NOTIFY_BAD);
559
560 cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
561 register_cpu_notifier(&cpu_nfb);
562
563 atomic_notifier_chain_register(&panic_notifier_list, &panic_block);
564
565 return 0;
566}
567early_initcall(spawn_watchdog_task);
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index e80d6bf1c43d..ff87ddc4cbd5 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -152,28 +152,33 @@ config DEBUG_SHIRQ
152 Drivers ought to be able to handle interrupts coming in at those 152 Drivers ought to be able to handle interrupts coming in at those
153 points; some don't and need to be caught. 153 points; some don't and need to be caught.
154 154
155config DETECT_SOFTLOCKUP 155config LOCKUP_DETECTOR
156 bool "Detect Soft Lockups" 156 bool "Detect Hard and Soft Lockups"
157 depends on DEBUG_KERNEL && !S390 157 depends on DEBUG_KERNEL && !S390
158 default y
159 help 158 help
160 Say Y here to enable the kernel to detect "soft lockups", 159 Say Y here to enable the kernel to act as a watchdog to detect
161 which are bugs that cause the kernel to loop in kernel 160 hard and soft lockups.
161
162 Softlockups are bugs that cause the kernel to loop in kernel
162 mode for more than 60 seconds, without giving other tasks a 163 mode for more than 60 seconds, without giving other tasks a
163 chance to run. 164 chance to run. The current stack trace is displayed upon
165 detection and the system will stay locked up.
164 166
165 When a soft-lockup is detected, the kernel will print the 167 Hardlockups are bugs that cause the CPU to loop in kernel mode
166 current stack trace (which you should report), but the 168 for more than 60 seconds, without letting other interrupts have a
167 system will stay locked up. This feature has negligible 169 chance to run. The current stack trace is displayed upon detection
168 overhead. 170 and the system will stay locked up.
171
172 The overhead should be minimal. A periodic hrtimer runs to
173 generate interrupts and kick the watchdog task every 10-12 seconds.
174 An NMI is generated every 60 seconds or so to check for hardlockups.
169 175
170 (Note that "hard lockups" are separate type of bugs that 176config HARDLOCKUP_DETECTOR
171 can be detected via the NMI-watchdog, on platforms that 177 def_bool LOCKUP_DETECTOR && PERF_EVENTS && HAVE_PERF_EVENTS_NMI
172 support it.)
173 178
174config BOOTPARAM_SOFTLOCKUP_PANIC 179config BOOTPARAM_SOFTLOCKUP_PANIC
175 bool "Panic (Reboot) On Soft Lockups" 180 bool "Panic (Reboot) On Soft Lockups"
176 depends on DETECT_SOFTLOCKUP 181 depends on LOCKUP_DETECTOR
177 help 182 help
178 Say Y here to enable the kernel to panic on "soft lockups", 183 Say Y here to enable the kernel to panic on "soft lockups",
179 which are bugs that cause the kernel to loop in kernel 184 which are bugs that cause the kernel to loop in kernel
@@ -190,7 +195,7 @@ config BOOTPARAM_SOFTLOCKUP_PANIC
190 195
191config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE 196config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE
192 int 197 int
193 depends on DETECT_SOFTLOCKUP 198 depends on LOCKUP_DETECTOR
194 range 0 1 199 range 0 1
195 default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC 200 default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC
196 default 1 if BOOTPARAM_SOFTLOCKUP_PANIC 201 default 1 if BOOTPARAM_SOFTLOCKUP_PANIC
diff --git a/mm/mmap.c b/mm/mmap.c
index 456ec6f27889..e38e910cb756 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1734,8 +1734,10 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
1734 grow = (address - vma->vm_end) >> PAGE_SHIFT; 1734 grow = (address - vma->vm_end) >> PAGE_SHIFT;
1735 1735
1736 error = acct_stack_growth(vma, size, grow); 1736 error = acct_stack_growth(vma, size, grow);
1737 if (!error) 1737 if (!error) {
1738 vma->vm_end = address; 1738 vma->vm_end = address;
1739 perf_event_mmap(vma);
1740 }
1739 } 1741 }
1740 anon_vma_unlock(vma); 1742 anon_vma_unlock(vma);
1741 return error; 1743 return error;
@@ -1781,6 +1783,7 @@ static int expand_downwards(struct vm_area_struct *vma,
1781 if (!error) { 1783 if (!error) {
1782 vma->vm_start = address; 1784 vma->vm_start = address;
1783 vma->vm_pgoff -= grow; 1785 vma->vm_pgoff -= grow;
1786 perf_event_mmap(vma);
1784 } 1787 }
1785 } 1788 }
1786 anon_vma_unlock(vma); 1789 anon_vma_unlock(vma);
@@ -2208,6 +2211,7 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
2208 vma->vm_page_prot = vm_get_page_prot(flags); 2211 vma->vm_page_prot = vm_get_page_prot(flags);
2209 vma_link(mm, vma, prev, rb_link, rb_parent); 2212 vma_link(mm, vma, prev, rb_link, rb_parent);
2210out: 2213out:
2214 perf_event_mmap(vma);
2211 mm->total_vm += len >> PAGE_SHIFT; 2215 mm->total_vm += len >> PAGE_SHIFT;
2212 if (flags & VM_LOCKED) { 2216 if (flags & VM_LOCKED) {
2213 if (!mlock_vma_pages_range(vma, addr, addr + len)) 2217 if (!mlock_vma_pages_range(vma, addr, addr + len))
diff --git a/mm/slab.c b/mm/slab.c
index e49f8f46f46d..47360c3e5abd 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -102,7 +102,6 @@
102#include <linux/cpu.h> 102#include <linux/cpu.h>
103#include <linux/sysctl.h> 103#include <linux/sysctl.h>
104#include <linux/module.h> 104#include <linux/module.h>
105#include <linux/kmemtrace.h>
106#include <linux/rcupdate.h> 105#include <linux/rcupdate.h>
107#include <linux/string.h> 106#include <linux/string.h>
108#include <linux/uaccess.h> 107#include <linux/uaccess.h>
diff --git a/mm/slob.c b/mm/slob.c
index 19d2e5d46724..3f19a347dabf 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -66,8 +66,10 @@
66#include <linux/module.h> 66#include <linux/module.h>
67#include <linux/rcupdate.h> 67#include <linux/rcupdate.h>
68#include <linux/list.h> 68#include <linux/list.h>
69#include <linux/kmemtrace.h>
70#include <linux/kmemleak.h> 69#include <linux/kmemleak.h>
70
71#include <trace/events/kmem.h>
72
71#include <asm/atomic.h> 73#include <asm/atomic.h>
72 74
73/* 75/*
diff --git a/mm/slub.c b/mm/slub.c
index 578f68f3c51f..7bb7940f4eee 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -17,7 +17,6 @@
17#include <linux/slab.h> 17#include <linux/slab.h>
18#include <linux/proc_fs.h> 18#include <linux/proc_fs.h>
19#include <linux/seq_file.h> 19#include <linux/seq_file.h>
20#include <linux/kmemtrace.h>
21#include <linux/kmemcheck.h> 20#include <linux/kmemcheck.h>
22#include <linux/cpu.h> 21#include <linux/cpu.h>
23#include <linux/cpuset.h> 22#include <linux/cpuset.h>
diff --git a/scripts/package/Makefile b/scripts/package/Makefile
index d2c29b63adda..d0b931b994fc 100644
--- a/scripts/package/Makefile
+++ b/scripts/package/Makefile
@@ -111,13 +111,38 @@ tar%pkg: FORCE
111clean-dirs += $(objtree)/tar-install/ 111clean-dirs += $(objtree)/tar-install/
112 112
113 113
114# perf-pkg - generate a source tarball with perf source
115# ---------------------------------------------------------------------------
116
117perf-tar=perf-$(KERNELVERSION)
118
119quiet_cmd_perf_tar = TAR
120 cmd_perf_tar = \
121git archive --prefix=$(perf-tar)/ HEAD^{tree} \
122 $$(cat $(srctree)/tools/perf/MANIFEST) -o $(perf-tar).tar; \
123mkdir -p $(perf-tar); \
124git rev-parse HEAD > $(perf-tar)/HEAD; \
125tar rf $(perf-tar).tar $(perf-tar)/HEAD; \
126rm -r $(perf-tar); \
127$(if $(findstring tar-src,$@),, \
128$(if $(findstring bz2,$@),bzip2, \
129$(if $(findstring gz,$@),gzip, \
130$(error unknown target $@))) \
131 -f -9 $(perf-tar).tar)
132
133perf-%pkg: FORCE
134 $(call cmd,perf_tar)
135
114# Help text displayed when executing 'make help' 136# Help text displayed when executing 'make help'
115# --------------------------------------------------------------------------- 137# ---------------------------------------------------------------------------
116help: FORCE 138help: FORCE
117 @echo ' rpm-pkg - Build both source and binary RPM kernel packages' 139 @echo ' rpm-pkg - Build both source and binary RPM kernel packages'
118 @echo ' binrpm-pkg - Build only the binary kernel package' 140 @echo ' binrpm-pkg - Build only the binary kernel package'
119 @echo ' deb-pkg - Build the kernel as an deb package' 141 @echo ' deb-pkg - Build the kernel as an deb package'
120 @echo ' tar-pkg - Build the kernel as an uncompressed tarball' 142 @echo ' tar-pkg - Build the kernel as an uncompressed tarball'
121 @echo ' targz-pkg - Build the kernel as a gzip compressed tarball' 143 @echo ' targz-pkg - Build the kernel as a gzip compressed tarball'
122 @echo ' tarbz2-pkg - Build the kernel as a bzip2 compressed tarball' 144 @echo ' tarbz2-pkg - Build the kernel as a bzip2 compressed tarball'
145 @echo ' perf-tar-src-pkg - Build $(perf-tar).tar source tarball'
146 @echo ' perf-targz-src-pkg - Build $(perf-tar).tar.gz source tarball'
147 @echo ' perf-tarbz2-src-pkg - Build $(perf-tar).tar.bz2 source tarball'
123 148
diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
index f3c9c0a90b98..0171060b5fd6 100755
--- a/scripts/recordmcount.pl
+++ b/scripts/recordmcount.pl
@@ -326,7 +326,7 @@ if ($arch eq "x86_64") {
326 # 14: R_MIPS_NONE *ABS* 326 # 14: R_MIPS_NONE *ABS*
327 # 18: 00020021 nop 327 # 18: 00020021 nop
328 if ($is_module eq "0") { 328 if ($is_module eq "0") {
329 $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\s_mcount\$"; 329 $mcount_regex = "^\\s*([0-9a-fA-F]+): R_MIPS_26\\s+_mcount\$";
330 } else { 330 } else {
331 $mcount_regex = "^\\s*([0-9a-fA-F]+): R_MIPS_HI16\\s+_mcount\$"; 331 $mcount_regex = "^\\s*([0-9a-fA-F]+): R_MIPS_HI16\\s+_mcount\$";
332 } 332 }
diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore
index e1d60d780784..cb43289e447f 100644
--- a/tools/perf/.gitignore
+++ b/tools/perf/.gitignore
@@ -18,3 +18,5 @@ perf-archive
18tags 18tags
19TAGS 19TAGS
20cscope* 20cscope*
21config.mak
22config.mak.autogen
diff --git a/tools/perf/Documentation/perf-buildid-cache.txt b/tools/perf/Documentation/perf-buildid-cache.txt
index 5d1a9500277f..c1057701a7dc 100644
--- a/tools/perf/Documentation/perf-buildid-cache.txt
+++ b/tools/perf/Documentation/perf-buildid-cache.txt
@@ -12,9 +12,9 @@ SYNOPSIS
12 12
13DESCRIPTION 13DESCRIPTION
14----------- 14-----------
15This command manages the build-id cache. It can add and remove files to the 15This command manages the build-id cache. It can add and remove files to/from
16cache. In the future it should as well purge older entries, set upper limits 16the cache. In the future it should as well purge older entries, set upper
17for the space used by the cache, etc. 17limits for the space used by the cache, etc.
18 18
19OPTIONS 19OPTIONS
20------- 20-------
@@ -23,7 +23,7 @@ OPTIONS
23 Add specified file to the cache. 23 Add specified file to the cache.
24-r:: 24-r::
25--remove=:: 25--remove=::
26 Remove specified file to the cache. 26 Remove specified file from the cache.
27-v:: 27-v::
28--verbose:: 28--verbose::
29 Be more verbose. 29 Be more verbose.
diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt
index 94a258c96a44..27d52dae5a43 100644
--- a/tools/perf/Documentation/perf-probe.txt
+++ b/tools/perf/Documentation/perf-probe.txt
@@ -31,6 +31,10 @@ OPTIONS
31--vmlinux=PATH:: 31--vmlinux=PATH::
32 Specify vmlinux path which has debuginfo (Dwarf binary). 32 Specify vmlinux path which has debuginfo (Dwarf binary).
33 33
34-s::
35--source=PATH::
36 Specify path to kernel source.
37
34-v:: 38-v::
35--verbose:: 39--verbose::
36 Be more verbose (show parsed arguments, etc). 40 Be more verbose (show parsed arguments, etc).
@@ -90,8 +94,8 @@ Each probe argument follows below syntax.
90 94
91 [NAME=]LOCALVAR|$retval|%REG|@SYMBOL[:TYPE] 95 [NAME=]LOCALVAR|$retval|%REG|@SYMBOL[:TYPE]
92 96
93'NAME' specifies the name of this argument (optional). You can use the name of local variable, local data structure member (e.g. var->field, var.field2), or kprobe-tracer argument format (e.g. $retval, %ax, etc). Note that the name of this argument will be set as the last member name if you specify a local data structure member (e.g. field2 for 'var->field1.field2'.) 97'NAME' specifies the name of this argument (optional). You can use the name of local variable, local data structure member (e.g. var->field, var.field2), local array with fixed index (e.g. array[1], var->array[0], var->pointer[2]), or kprobe-tracer argument format (e.g. $retval, %ax, etc). Note that the name of this argument will be set as the last member name if you specify a local data structure member (e.g. field2 for 'var->field1.field2'.)
94'TYPE' casts the type of this argument (optional). If omitted, perf probe automatically set the type based on debuginfo. 98'TYPE' casts the type of this argument (optional). If omitted, perf probe automatically set the type based on debuginfo. You can specify 'string' type only for the local variable or structure member which is an array of or a pointer to 'char' or 'unsigned char' type.
95 99
96LINE SYNTAX 100LINE SYNTAX
97----------- 101-----------
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 34e255fc3e2f..3ee27dccfde9 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -103,6 +103,19 @@ OPTIONS
103--raw-samples:: 103--raw-samples::
104Collect raw sample records from all opened counters (default for tracepoint counters). 104Collect raw sample records from all opened counters (default for tracepoint counters).
105 105
106-C::
107--cpu::
108Collect samples only on the list of cpus provided. Multiple CPUs can be provided as a
109comma-sperated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
110In per-thread mode with inheritance mode on (default), samples are captured only when
111the thread executes on the designated CPUs. Default is to monitor all CPUs.
112
113-N::
114--no-buildid-cache::
115Do not update the builid cache. This saves some overhead in situations
116where the information in the perf.data file (which includes buildids)
117is sufficient.
118
106SEE ALSO 119SEE ALSO
107-------- 120--------
108linkperf:perf-stat[1], linkperf:perf-list[1] 121linkperf:perf-stat[1], linkperf:perf-list[1]
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 909fa766fa1c..4b3a2d46b437 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -46,6 +46,13 @@ OPTIONS
46-B:: 46-B::
47 print large numbers with thousands' separators according to locale 47 print large numbers with thousands' separators according to locale
48 48
49-C::
50--cpu=::
51Count only on the list of cpus provided. Multiple CPUs can be provided as a
52comma-sperated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
53In per-thread mode, this option is ignored. The -a option is still necessary
54to activate system-wide monitoring. Default is to count on all CPUs.
55
49EXAMPLES 56EXAMPLES
50-------- 57--------
51 58
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index 785b9fc32a46..1f9687663f2a 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -25,9 +25,11 @@ OPTIONS
25--count=<count>:: 25--count=<count>::
26 Event period to sample. 26 Event period to sample.
27 27
28-C <cpu>:: 28-C <cpu-list>::
29--CPU=<cpu>:: 29--cpu=<cpu>::
30 CPU to profile. 30Monitor only on the list of cpus provided. Multiple CPUs can be provided as a
31comma-sperated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
32Default is to monitor all CPUS.
31 33
32-d <seconds>:: 34-d <seconds>::
33--delay=<seconds>:: 35--delay=<seconds>::
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
new file mode 100644
index 000000000000..8c7fc0c8f0b8
--- /dev/null
+++ b/tools/perf/MANIFEST
@@ -0,0 +1,12 @@
1tools/perf
2include/linux/perf_event.h
3include/linux/rbtree.h
4include/linux/list.h
5include/linux/hash.h
6include/linux/stringify.h
7lib/rbtree.c
8include/linux/swab.h
9arch/*/include/asm/unistd*.h
10include/linux/poison.h
11include/linux/magic.h
12include/linux/hw_breakpoint.h
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index d75c28a825f5..26f626d45a9e 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -285,14 +285,10 @@ else
285 QUIET_STDERR = ">/dev/null 2>&1" 285 QUIET_STDERR = ">/dev/null 2>&1"
286endif 286endif
287 287
288BITBUCKET = "/dev/null" 288-include feature-tests.mak
289 289
290ifneq ($(shell sh -c "(echo '\#include <stdio.h>'; echo 'int main(void) { return puts(\"hi\"); }') | $(CC) -x c - $(ALL_CFLAGS) -o $(BITBUCKET) "$(QUIET_STDERR)" && echo y"), y) 290ifeq ($(call try-cc,$(SOURCE_HELLO),-Werror -fstack-protector-all),y)
291 BITBUCKET = .perf.dev.null 291 CFLAGS := $(CFLAGS) -fstack-protector-all
292endif
293
294ifeq ($(shell sh -c "echo 'int foo(void) {char X[2]; return 3;}' | $(CC) -x c -c -Werror -fstack-protector-all - -o $(BITBUCKET) "$(QUIET_STDERR)" && echo y"), y)
295 CFLAGS := $(CFLAGS) -fstack-protector-all
296endif 292endif
297 293
298 294
@@ -508,7 +504,8 @@ PERFLIBS = $(LIB_FILE)
508-include config.mak 504-include config.mak
509 505
510ifndef NO_DWARF 506ifndef NO_DWARF
511ifneq ($(shell sh -c "(echo '\#include <dwarf.h>'; echo '\#include <libdw.h>'; echo '\#include <version.h>'; echo '\#ifndef _ELFUTILS_PREREQ'; echo '\#error'; echo '\#endif'; echo 'int main(void) { Dwarf *dbg; dbg = dwarf_begin(0, DWARF_C_READ); return (long)dbg; }') | $(CC) -x c - $(ALL_CFLAGS) -I/usr/include/elfutils -ldw -lelf -o $(BITBUCKET) $(ALL_LDFLAGS) $(EXTLIBS) "$(QUIET_STDERR)" && echo y"), y) 507FLAGS_DWARF=$(ALL_CFLAGS) -I/usr/include/elfutils -ldw -lelf $(ALL_LDFLAGS) $(EXTLIBS)
508ifneq ($(call try-cc,$(SOURCE_DWARF),$(FLAGS_DWARF)),y)
512 msg := $(warning No libdw.h found or old libdw.h found or elfutils is older than 0.138, disables dwarf support. Please install new elfutils-devel/libdw-dev); 509 msg := $(warning No libdw.h found or old libdw.h found or elfutils is older than 0.138, disables dwarf support. Please install new elfutils-devel/libdw-dev);
513 NO_DWARF := 1 510 NO_DWARF := 1
514endif # Dwarf support 511endif # Dwarf support
@@ -536,16 +533,18 @@ ifneq ($(OUTPUT),)
536 BASIC_CFLAGS += -I$(OUTPUT) 533 BASIC_CFLAGS += -I$(OUTPUT)
537endif 534endif
538 535
539ifeq ($(shell sh -c "(echo '\#include <libelf.h>'; echo 'int main(void) { Elf * elf = elf_begin(0, ELF_C_READ, 0); return (long)elf; }') | $(CC) -x c - $(ALL_CFLAGS) -o $(BITBUCKET) $(ALL_LDFLAGS) $(EXTLIBS) "$(QUIET_STDERR)" && echo y"), y) 536FLAGS_LIBELF=$(ALL_CFLAGS) $(ALL_LDFLAGS) $(EXTLIBS)
540ifneq ($(shell sh -c "(echo '\#include <gnu/libc-version.h>'; echo 'int main(void) { const char * version = gnu_get_libc_version(); return (long)version; }') | $(CC) -x c - $(ALL_CFLAGS) -o $(BITBUCKET) $(ALL_LDFLAGS) $(EXTLIBS) "$(QUIET_STDERR)" && echo y"), y) 537ifneq ($(call try-cc,$(SOURCE_LIBELF),$(FLAGS_LIBELF)),y)
541 msg := $(error No gnu/libc-version.h found, please install glibc-dev[el]/glibc-static); 538 FLAGS_GLIBC=$(ALL_CFLAGS) $(ALL_LDFLAGS)
539 ifneq ($(call try-cc,$(SOURCE_GLIBC),$(FLAGS_GLIBC)),y)
540 msg := $(error No gnu/libc-version.h found, please install glibc-dev[el]/glibc-static);
541 else
542 msg := $(error No libelf.h/libelf found, please install libelf-dev/elfutils-libelf-devel);
543 endif
542endif 544endif
543 545
544 ifneq ($(shell sh -c "(echo '\#include <libelf.h>'; echo 'int main(void) { Elf * elf = elf_begin(0, ELF_C_READ_MMAP, 0); return (long)elf; }') | $(CC) -x c - $(ALL_CFLAGS) -o $(BITBUCKET) $(ALL_LDFLAGS) $(EXTLIBS) "$(QUIET_STDERR)" && echo y"), y) 546ifneq ($(call try-cc,$(SOURCE_ELF_MMAP),$(FLAGS_COMMON)),y)
545 BASIC_CFLAGS += -DLIBELF_NO_MMAP 547 BASIC_CFLAGS += -DLIBELF_NO_MMAP
546 endif
547else
548 msg := $(error No libelf.h/libelf found, please install libelf-dev/elfutils-libelf-devel and glibc-dev[el]);
549endif 548endif
550 549
551ifndef NO_DWARF 550ifndef NO_DWARF
@@ -561,64 +560,73 @@ endif # NO_DWARF
561ifdef NO_NEWT 560ifdef NO_NEWT
562 BASIC_CFLAGS += -DNO_NEWT_SUPPORT 561 BASIC_CFLAGS += -DNO_NEWT_SUPPORT
563else 562else
564ifneq ($(shell sh -c "(echo '\#include <newt.h>'; echo 'int main(void) { newtInit(); newtCls(); return newtFinished(); }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -lnewt -o $(BITBUCKET) $(ALL_LDFLAGS) $(EXTLIBS) "$(QUIET_STDERR)" && echo y"), y) 563 FLAGS_NEWT=$(ALL_CFLAGS) $(ALL_LDFLAGS) $(EXTLIBS) -lnewt
565 msg := $(warning newt not found, disables TUI support. Please install newt-devel or libnewt-dev); 564 ifneq ($(call try-cc,$(SOURCE_NEWT),$(FLAGS_NEWT)),y)
566 BASIC_CFLAGS += -DNO_NEWT_SUPPORT 565 msg := $(warning newt not found, disables TUI support. Please install newt-devel or libnewt-dev);
567else 566 BASIC_CFLAGS += -DNO_NEWT_SUPPORT
568 # Fedora has /usr/include/slang/slang.h, but ubuntu /usr/include/slang.h 567 else
569 BASIC_CFLAGS += -I/usr/include/slang 568 # Fedora has /usr/include/slang/slang.h, but ubuntu /usr/include/slang.h
570 EXTLIBS += -lnewt -lslang 569 BASIC_CFLAGS += -I/usr/include/slang
571 LIB_OBJS += $(OUTPUT)util/newt.o 570 EXTLIBS += -lnewt -lslang
572endif 571 LIB_OBJS += $(OUTPUT)util/newt.o
573endif # NO_NEWT 572 endif
574
575ifndef NO_LIBPERL
576PERL_EMBED_LDOPTS = `perl -MExtUtils::Embed -e ldopts 2>/dev/null`
577PERL_EMBED_CCOPTS = `perl -MExtUtils::Embed -e ccopts 2>/dev/null`
578endif 573endif
579 574
580ifneq ($(shell sh -c "(echo '\#include <EXTERN.h>'; echo '\#include <perl.h>'; echo 'int main(void) { perl_alloc(); return 0; }') | $(CC) -x c - $(PERL_EMBED_CCOPTS) -o $(BITBUCKET) $(PERL_EMBED_LDOPTS) > /dev/null 2>&1 && echo y"), y) 575ifdef NO_LIBPERL
581 BASIC_CFLAGS += -DNO_LIBPERL 576 BASIC_CFLAGS += -DNO_LIBPERL
582else 577else
583 ALL_LDFLAGS += $(PERL_EMBED_LDOPTS) 578 PERL_EMBED_LDOPTS = `perl -MExtUtils::Embed -e ldopts 2>/dev/null`
584 LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-perl.o 579 PERL_EMBED_CCOPTS = `perl -MExtUtils::Embed -e ccopts 2>/dev/null`
585 LIB_OBJS += $(OUTPUT)scripts/perl/Perf-Trace-Util/Context.o 580 FLAGS_PERL_EMBED=$(PERL_EMBED_CCOPTS) $(PERL_EMBED_LDOPTS)
586endif
587 581
588ifndef NO_LIBPYTHON 582 ifneq ($(call try-cc,$(SOURCE_PERL_EMBED),$(FLAGS_PERL_EMBED)),y)
589PYTHON_EMBED_LDOPTS = `python-config --ldflags 2>/dev/null` 583 BASIC_CFLAGS += -DNO_LIBPERL
590PYTHON_EMBED_CCOPTS = `python-config --cflags 2>/dev/null` 584 else
585 ALL_LDFLAGS += $(PERL_EMBED_LDOPTS)
586 LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-perl.o
587 LIB_OBJS += $(OUTPUT)scripts/perl/Perf-Trace-Util/Context.o
588 endif
591endif 589endif
592 590
593ifneq ($(shell sh -c "(echo '\#include <Python.h>'; echo 'int main(void) { Py_Initialize(); return 0; }') | $(CC) -x c - $(PYTHON_EMBED_CCOPTS) -o $(BITBUCKET) $(PYTHON_EMBED_LDOPTS) > /dev/null 2>&1 && echo y"), y) 591ifdef NO_LIBPYTHON
594 BASIC_CFLAGS += -DNO_LIBPYTHON 592 BASIC_CFLAGS += -DNO_LIBPYTHON
595else 593else
596 ALL_LDFLAGS += $(PYTHON_EMBED_LDOPTS) 594 PYTHON_EMBED_LDOPTS = `python-config --ldflags 2>/dev/null`
597 LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-python.o 595 PYTHON_EMBED_CCOPTS = `python-config --cflags 2>/dev/null`
598 LIB_OBJS += $(OUTPUT)scripts/python/Perf-Trace-Util/Context.o 596 FLAGS_PYTHON_EMBED=$(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS)
597 ifneq ($(call try-cc,$(SOURCE_PYTHON_EMBED),$(FLAGS_PYTHON_EMBED)),y)
598 BASIC_CFLAGS += -DNO_LIBPYTHON
599 else
600 ALL_LDFLAGS += $(PYTHON_EMBED_LDOPTS)
601 LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-python.o
602 LIB_OBJS += $(OUTPUT)scripts/python/Perf-Trace-Util/Context.o
603 endif
599endif 604endif
600 605
601ifdef NO_DEMANGLE 606ifdef NO_DEMANGLE
602 BASIC_CFLAGS += -DNO_DEMANGLE 607 BASIC_CFLAGS += -DNO_DEMANGLE
603else 608else
604 ifdef HAVE_CPLUS_DEMANGLE 609 ifdef HAVE_CPLUS_DEMANGLE
605 EXTLIBS += -liberty 610 EXTLIBS += -liberty
606 BASIC_CFLAGS += -DHAVE_CPLUS_DEMANGLE 611 BASIC_CFLAGS += -DHAVE_CPLUS_DEMANGLE
607 else 612 else
608 has_bfd := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o $(BITBUCKET) $(ALL_LDFLAGS) $(EXTLIBS) -lbfd "$(QUIET_STDERR)" && echo y") 613 FLAGS_BFD=$(ALL_CFLAGS) $(ALL_LDFLAGS) $(EXTLIBS) -lbfd
609 614 has_bfd := $(call try-cc,$(SOURCE_BFD),$(FLAGS_BFD))
610 ifeq ($(has_bfd),y) 615 ifeq ($(has_bfd),y)
611 EXTLIBS += -lbfd 616 EXTLIBS += -lbfd
612 else 617 else
613 has_bfd_iberty := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o $(BITBUCKET) $(ALL_LDFLAGS) $(EXTLIBS) -lbfd -liberty "$(QUIET_STDERR)" && echo y") 618 FLAGS_BFD_IBERTY=$(FLAGS_BFD) -liberty
619 has_bfd_iberty := $(call try-cc,$(SOURCE_BFD),$(FLAGS_BFD_IBERTY))
614 ifeq ($(has_bfd_iberty),y) 620 ifeq ($(has_bfd_iberty),y)
615 EXTLIBS += -lbfd -liberty 621 EXTLIBS += -lbfd -liberty
616 else 622 else
617 has_bfd_iberty_z := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o $(BITBUCKET) $(ALL_LDFLAGS) $(EXTLIBS) -lbfd -liberty -lz "$(QUIET_STDERR)" && echo y") 623 FLAGS_BFD_IBERTY_Z=$(FLAGS_BFD_IBERTY) -lz
624 has_bfd_iberty_z := $(call try-cc,$(SOURCE_BFD),$(FLAGS_BFD_IBERTY_Z))
618 ifeq ($(has_bfd_iberty_z),y) 625 ifeq ($(has_bfd_iberty_z),y)
619 EXTLIBS += -lbfd -liberty -lz 626 EXTLIBS += -lbfd -liberty -lz
620 else 627 else
621 has_cplus_demangle := $(shell sh -c "(echo 'extern char *cplus_demangle(const char *, int);'; echo 'int main(void) { cplus_demangle(0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o $(BITBUCKET) $(ALL_LDFLAGS) $(EXTLIBS) -liberty "$(QUIET_STDERR)" && echo y") 628 FLAGS_CPLUS_DEMANGLE=$(ALL_CFLAGS) $(ALL_LDFLAGS) $(EXTLIBS) -liberty
629 has_cplus_demangle := $(call try-cc,$(SOURCE_CPLUS_DEMANGLE),$(FLAGS_CPLUS_DEMANGLE))
622 ifeq ($(has_cplus_demangle),y) 630 ifeq ($(has_cplus_demangle),y)
623 EXTLIBS += -liberty 631 EXTLIBS += -liberty
624 BASIC_CFLAGS += -DHAVE_CPLUS_DEMANGLE 632 BASIC_CFLAGS += -DHAVE_CPLUS_DEMANGLE
@@ -867,7 +875,7 @@ export TAR INSTALL DESTDIR SHELL_PATH
867 875
868SHELL = $(SHELL_PATH) 876SHELL = $(SHELL_PATH)
869 877
870all:: .perf.dev.null shell_compatibility_test $(ALL_PROGRAMS) $(BUILT_INS) $(OTHER_PROGRAMS) $(OUTPUT)PERF-BUILD-OPTIONS 878all:: shell_compatibility_test $(ALL_PROGRAMS) $(BUILT_INS) $(OTHER_PROGRAMS) $(OUTPUT)PERF-BUILD-OPTIONS
871ifneq (,$X) 879ifneq (,$X)
872 $(foreach p,$(patsubst %$X,%,$(filter %$X,$(ALL_PROGRAMS) $(BUILT_INS) perf$X)), test '$p' -ef '$p$X' || $(RM) '$p';) 880 $(foreach p,$(patsubst %$X,%,$(filter %$X,$(ALL_PROGRAMS) $(BUILT_INS) perf$X)), test '$p' -ef '$p$X' || $(RM) '$p';)
873endif 881endif
@@ -1197,11 +1205,6 @@ clean:
1197.PHONY: .FORCE-PERF-VERSION-FILE TAGS tags cscope .FORCE-PERF-CFLAGS 1205.PHONY: .FORCE-PERF-VERSION-FILE TAGS tags cscope .FORCE-PERF-CFLAGS
1198.PHONY: .FORCE-PERF-BUILD-OPTIONS 1206.PHONY: .FORCE-PERF-BUILD-OPTIONS
1199 1207
1200.perf.dev.null:
1201 touch .perf.dev.null
1202
1203.INTERMEDIATE: .perf.dev.null
1204
1205### Make sure built-ins do not have dups and listed in perf.c 1208### Make sure built-ins do not have dups and listed in perf.c
1206# 1209#
1207check-builtins:: 1210check-builtins::
diff --git a/tools/perf/arch/sh/Makefile b/tools/perf/arch/sh/Makefile
new file mode 100644
index 000000000000..15130b50dfe3
--- /dev/null
+++ b/tools/perf/arch/sh/Makefile
@@ -0,0 +1,4 @@
1ifndef NO_DWARF
2PERF_HAVE_DWARF_REGS := 1
3LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o
4endif
diff --git a/tools/perf/arch/sh/util/dwarf-regs.c b/tools/perf/arch/sh/util/dwarf-regs.c
new file mode 100644
index 000000000000..a11edb007a6c
--- /dev/null
+++ b/tools/perf/arch/sh/util/dwarf-regs.c
@@ -0,0 +1,55 @@
1/*
2 * Mapping of DWARF debug register numbers into register names.
3 *
4 * Copyright (C) 2010 Matt Fleming <matt@console-pimps.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19 *
20 */
21
22#include <libio.h>
23#include <dwarf-regs.h>
24
25/*
26 * Generic dwarf analysis helpers
27 */
28
29#define SH_MAX_REGS 18
30const char *sh_regs_table[SH_MAX_REGS] = {
31 "r0",
32 "r1",
33 "r2",
34 "r3",
35 "r4",
36 "r5",
37 "r6",
38 "r7",
39 "r8",
40 "r9",
41 "r10",
42 "r11",
43 "r12",
44 "r13",
45 "r14",
46 "r15",
47 "pc",
48 "pr",
49};
50
51/* Return architecture dependent register string (for kprobe-tracer) */
52const char *get_arch_regstr(unsigned int n)
53{
54 return (n <= SH_MAX_REGS) ? sh_regs_table[n] : NULL;
55}
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 96db5248e995..fd20670ce986 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -61,11 +61,9 @@ static int hists__add_entry(struct hists *self, struct addr_location *al)
61static int process_sample_event(event_t *event, struct perf_session *session) 61static int process_sample_event(event_t *event, struct perf_session *session)
62{ 62{
63 struct addr_location al; 63 struct addr_location al;
64 struct sample_data data;
64 65
65 dump_printf("(IP, %d): %d: %#Lx\n", event->header.misc, 66 if (event__preprocess_sample(event, session, &al, &data, NULL) < 0) {
66 event->ip.pid, event->ip.ip);
67
68 if (event__preprocess_sample(event, session, &al, NULL) < 0) {
69 pr_warning("problem processing %d event, skipping it.\n", 67 pr_warning("problem processing %d event, skipping it.\n",
70 event->header.type); 68 event->header.type);
71 return -1; 69 return -1;
diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c
index f8e3d1852029..29ad20e67919 100644
--- a/tools/perf/builtin-buildid-cache.c
+++ b/tools/perf/builtin-buildid-cache.c
@@ -78,8 +78,7 @@ static int __cmd_buildid_cache(void)
78 struct str_node *pos; 78 struct str_node *pos;
79 char debugdir[PATH_MAX]; 79 char debugdir[PATH_MAX];
80 80
81 snprintf(debugdir, sizeof(debugdir), "%s/%s", getenv("HOME"), 81 snprintf(debugdir, sizeof(debugdir), "%s", buildid_dir);
82 DEBUG_CACHE_DIR);
83 82
84 if (add_name_list_str) { 83 if (add_name_list_str) {
85 list = strlist__new(true, add_name_list_str); 84 list = strlist__new(true, add_name_list_str);
diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c
index 99890728409e..44a47e13bd67 100644
--- a/tools/perf/builtin-buildid-list.c
+++ b/tools/perf/builtin-buildid-list.c
@@ -43,10 +43,8 @@ static int __cmd_buildid_list(void)
43 if (session == NULL) 43 if (session == NULL)
44 return -1; 44 return -1;
45 45
46 if (with_hits) { 46 if (with_hits)
47 symbol_conf.full_paths = true;
48 perf_session__process_events(session, &build_id__mark_dso_hit_ops); 47 perf_session__process_events(session, &build_id__mark_dso_hit_ops);
49 }
50 48
51 perf_session__fprintf_dsos_buildid(session, stdout, with_hits); 49 perf_session__fprintf_dsos_buildid(session, stdout, with_hits);
52 50
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index a6e2fdc7a04e..fca1d4402910 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -35,10 +35,7 @@ static int diff__process_sample_event(event_t *event, struct perf_session *sessi
35 struct addr_location al; 35 struct addr_location al;
36 struct sample_data data = { .period = 1, }; 36 struct sample_data data = { .period = 1, };
37 37
38 dump_printf("(IP, %d): %d: %#Lx\n", event->header.misc, 38 if (event__preprocess_sample(event, session, &al, &data, NULL) < 0) {
39 event->ip.pid, event->ip.ip);
40
41 if (event__preprocess_sample(event, session, &al, NULL) < 0) {
42 pr_warning("problem processing %d event, skipping it.\n", 39 pr_warning("problem processing %d event, skipping it.\n",
43 event->header.type); 40 event->header.type);
44 return -1; 41 return -1;
@@ -47,8 +44,6 @@ static int diff__process_sample_event(event_t *event, struct perf_session *sessi
47 if (al.filtered || al.sym == NULL) 44 if (al.filtered || al.sym == NULL)
48 return 0; 45 return 0;
49 46
50 event__parse_sample(event, session->sample_type, &data);
51
52 if (hists__add_entry(&session->hists, &al, data.period)) { 47 if (hists__add_entry(&session->hists, &al, data.period)) {
53 pr_warning("problem incrementing symbol period, skipping event\n"); 48 pr_warning("problem incrementing symbol period, skipping event\n");
54 return -1; 49 return -1;
@@ -185,8 +180,6 @@ static const struct option options[] = {
185 OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), 180 OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
186 OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules, 181 OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules,
187 "load module symbols - WARNING: use only with -k and LIVE kernel"), 182 "load module symbols - WARNING: use only with -k and LIVE kernel"),
188 OPT_BOOLEAN('P', "full-paths", &symbol_conf.full_paths,
189 "Don't shorten the pathnames taking into account the cwd"),
190 OPT_STRING('d', "dsos", &symbol_conf.dso_list_str, "dso[,dso...]", 183 OPT_STRING('d', "dsos", &symbol_conf.dso_list_str, "dso[,dso...]",
191 "only consider symbols in these dsos"), 184 "only consider symbols in these dsos"),
192 OPT_STRING('C', "comms", &symbol_conf.comm_list_str, "comm[,comm...]", 185 OPT_STRING('C', "comms", &symbol_conf.comm_list_str, "comm[,comm...]",
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index e4a4da32a568..199d5e19554f 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -182,6 +182,8 @@ static const struct option options[] = {
182 "Show source code lines.", opt_show_lines), 182 "Show source code lines.", opt_show_lines),
183 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, 183 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
184 "file", "vmlinux pathname"), 184 "file", "vmlinux pathname"),
185 OPT_STRING('s', "source", &symbol_conf.source_prefix,
186 "directory", "path to kernel source"),
185#endif 187#endif
186 OPT__DRY_RUN(&probe_event_dry_run), 188 OPT__DRY_RUN(&probe_event_dry_run),
187 OPT_INTEGER('\0', "max-probes", &params.max_probe_points, 189 OPT_INTEGER('\0', "max-probes", &params.max_probe_points,
@@ -265,4 +267,3 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
265 } 267 }
266 return 0; 268 return 0;
267} 269}
268
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 711745f56bba..ff77b805de71 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -49,7 +49,6 @@ static int group = 0;
49static int realtime_prio = 0; 49static int realtime_prio = 0;
50static bool raw_samples = false; 50static bool raw_samples = false;
51static bool system_wide = false; 51static bool system_wide = false;
52static int profile_cpu = -1;
53static pid_t target_pid = -1; 52static pid_t target_pid = -1;
54static pid_t target_tid = -1; 53static pid_t target_tid = -1;
55static pid_t *all_tids = NULL; 54static pid_t *all_tids = NULL;
@@ -61,6 +60,7 @@ static bool call_graph = false;
61static bool inherit_stat = false; 60static bool inherit_stat = false;
62static bool no_samples = false; 61static bool no_samples = false;
63static bool sample_address = false; 62static bool sample_address = false;
63static bool no_buildid = false;
64 64
65static long samples = 0; 65static long samples = 0;
66static u64 bytes_written = 0; 66static u64 bytes_written = 0;
@@ -74,6 +74,7 @@ static int file_new = 1;
74static off_t post_processing_offset; 74static off_t post_processing_offset;
75 75
76static struct perf_session *session; 76static struct perf_session *session;
77static const char *cpu_list;
77 78
78struct mmap_data { 79struct mmap_data {
79 int counter; 80 int counter;
@@ -268,12 +269,17 @@ static void create_counter(int counter, int cpu)
268 if (inherit_stat) 269 if (inherit_stat)
269 attr->inherit_stat = 1; 270 attr->inherit_stat = 1;
270 271
271 if (sample_address) 272 if (sample_address) {
272 attr->sample_type |= PERF_SAMPLE_ADDR; 273 attr->sample_type |= PERF_SAMPLE_ADDR;
274 attr->mmap_data = track;
275 }
273 276
274 if (call_graph) 277 if (call_graph)
275 attr->sample_type |= PERF_SAMPLE_CALLCHAIN; 278 attr->sample_type |= PERF_SAMPLE_CALLCHAIN;
276 279
280 if (system_wide)
281 attr->sample_type |= PERF_SAMPLE_CPU;
282
277 if (raw_samples) { 283 if (raw_samples) {
278 attr->sample_type |= PERF_SAMPLE_TIME; 284 attr->sample_type |= PERF_SAMPLE_TIME;
279 attr->sample_type |= PERF_SAMPLE_RAW; 285 attr->sample_type |= PERF_SAMPLE_RAW;
@@ -300,7 +306,7 @@ try_again:
300 die("Permission error - are you root?\n" 306 die("Permission error - are you root?\n"
301 "\t Consider tweaking" 307 "\t Consider tweaking"
302 " /proc/sys/kernel/perf_event_paranoid.\n"); 308 " /proc/sys/kernel/perf_event_paranoid.\n");
303 else if (err == ENODEV && profile_cpu != -1) { 309 else if (err == ENODEV && cpu_list) {
304 die("No such device - did you specify" 310 die("No such device - did you specify"
305 " an out-of-range profile CPU?\n"); 311 " an out-of-range profile CPU?\n");
306 } 312 }
@@ -433,14 +439,14 @@ static void atexit_header(void)
433 439
434 process_buildids(); 440 process_buildids();
435 perf_header__write(&session->header, output, true); 441 perf_header__write(&session->header, output, true);
442 perf_session__delete(session);
443 symbol__exit();
436 } 444 }
437} 445}
438 446
439static void event__synthesize_guest_os(struct machine *machine, void *data) 447static void event__synthesize_guest_os(struct machine *machine, void *data)
440{ 448{
441 int err; 449 int err;
442 char *guest_kallsyms;
443 char path[PATH_MAX];
444 struct perf_session *psession = data; 450 struct perf_session *psession = data;
445 451
446 if (machine__is_host(machine)) 452 if (machine__is_host(machine))
@@ -460,13 +466,6 @@ static void event__synthesize_guest_os(struct machine *machine, void *data)
460 pr_err("Couldn't record guest kernel [%d]'s reference" 466 pr_err("Couldn't record guest kernel [%d]'s reference"
461 " relocation symbol.\n", machine->pid); 467 " relocation symbol.\n", machine->pid);
462 468
463 if (machine__is_default_guest(machine))
464 guest_kallsyms = (char *) symbol_conf.default_guest_kallsyms;
465 else {
466 sprintf(path, "%s/proc/kallsyms", machine->root_dir);
467 guest_kallsyms = path;
468 }
469
470 /* 469 /*
471 * We use _stext for guest kernel because guest kernel's /proc/kallsyms 470 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
472 * have no _text sometimes. 471 * have no _text sometimes.
@@ -561,12 +560,15 @@ static int __cmd_record(int argc, const char **argv)
561 if (!file_new) { 560 if (!file_new) {
562 err = perf_header__read(session, output); 561 err = perf_header__read(session, output);
563 if (err < 0) 562 if (err < 0)
564 return err; 563 goto out_delete_session;
565 } 564 }
566 565
567 if (have_tracepoints(attrs, nr_counters)) 566 if (have_tracepoints(attrs, nr_counters))
568 perf_header__set_feat(&session->header, HEADER_TRACE_INFO); 567 perf_header__set_feat(&session->header, HEADER_TRACE_INFO);
569 568
569 /*
570 * perf_session__delete(session) will be called at atexit_header()
571 */
570 atexit(atexit_header); 572 atexit(atexit_header);
571 573
572 if (forks) { 574 if (forks) {
@@ -622,10 +624,15 @@ static int __cmd_record(int argc, const char **argv)
622 close(child_ready_pipe[0]); 624 close(child_ready_pipe[0]);
623 } 625 }
624 626
625 if ((!system_wide && no_inherit) || profile_cpu != -1) { 627 nr_cpus = read_cpu_map(cpu_list);
626 open_counters(profile_cpu); 628 if (nr_cpus < 1) {
629 perror("failed to collect number of CPUs\n");
630 return -1;
631 }
632
633 if (!system_wide && no_inherit && !cpu_list) {
634 open_counters(-1);
627 } else { 635 } else {
628 nr_cpus = read_cpu_map();
629 for (i = 0; i < nr_cpus; i++) 636 for (i = 0; i < nr_cpus; i++)
630 open_counters(cpumap[i]); 637 open_counters(cpumap[i]);
631 } 638 }
@@ -704,7 +711,7 @@ static int __cmd_record(int argc, const char **argv)
704 if (perf_guest) 711 if (perf_guest)
705 perf_session__process_machines(session, event__synthesize_guest_os); 712 perf_session__process_machines(session, event__synthesize_guest_os);
706 713
707 if (!system_wide && profile_cpu == -1) 714 if (!system_wide)
708 event__synthesize_thread(target_tid, process_synthesized_event, 715 event__synthesize_thread(target_tid, process_synthesized_event,
709 session); 716 session);
710 else 717 else
@@ -766,6 +773,10 @@ static int __cmd_record(int argc, const char **argv)
766 bytes_written / 24); 773 bytes_written / 24);
767 774
768 return 0; 775 return 0;
776
777out_delete_session:
778 perf_session__delete(session);
779 return err;
769} 780}
770 781
771static const char * const record_usage[] = { 782static const char * const record_usage[] = {
@@ -794,8 +805,8 @@ static const struct option options[] = {
794 "system-wide collection from all CPUs"), 805 "system-wide collection from all CPUs"),
795 OPT_BOOLEAN('A', "append", &append_file, 806 OPT_BOOLEAN('A', "append", &append_file,
796 "append to the output file to do incremental profiling"), 807 "append to the output file to do incremental profiling"),
797 OPT_INTEGER('C', "profile_cpu", &profile_cpu, 808 OPT_STRING('C', "cpu", &cpu_list, "cpu",
798 "CPU to profile on"), 809 "list of cpus to monitor"),
799 OPT_BOOLEAN('f', "force", &force, 810 OPT_BOOLEAN('f', "force", &force,
800 "overwrite existing data file (deprecated)"), 811 "overwrite existing data file (deprecated)"),
801 OPT_U64('c', "count", &user_interval, "event period to sample"), 812 OPT_U64('c', "count", &user_interval, "event period to sample"),
@@ -815,17 +826,19 @@ static const struct option options[] = {
815 "Sample addresses"), 826 "Sample addresses"),
816 OPT_BOOLEAN('n', "no-samples", &no_samples, 827 OPT_BOOLEAN('n', "no-samples", &no_samples,
817 "don't sample"), 828 "don't sample"),
829 OPT_BOOLEAN('N', "no-buildid-cache", &no_buildid,
830 "do not update the buildid cache"),
818 OPT_END() 831 OPT_END()
819}; 832};
820 833
821int cmd_record(int argc, const char **argv, const char *prefix __used) 834int cmd_record(int argc, const char **argv, const char *prefix __used)
822{ 835{
823 int i,j; 836 int i, j, err = -ENOMEM;
824 837
825 argc = parse_options(argc, argv, options, record_usage, 838 argc = parse_options(argc, argv, options, record_usage,
826 PARSE_OPT_STOP_AT_NON_OPTION); 839 PARSE_OPT_STOP_AT_NON_OPTION);
827 if (!argc && target_pid == -1 && target_tid == -1 && 840 if (!argc && target_pid == -1 && target_tid == -1 &&
828 !system_wide && profile_cpu == -1) 841 !system_wide && !cpu_list)
829 usage_with_options(record_usage, options); 842 usage_with_options(record_usage, options);
830 843
831 if (force && append_file) { 844 if (force && append_file) {
@@ -839,6 +852,8 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
839 } 852 }
840 853
841 symbol__init(); 854 symbol__init();
855 if (no_buildid)
856 disable_buildid_cache();
842 857
843 if (!nr_counters) { 858 if (!nr_counters) {
844 nr_counters = 1; 859 nr_counters = 1;
@@ -857,7 +872,7 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
857 } else { 872 } else {
858 all_tids=malloc(sizeof(pid_t)); 873 all_tids=malloc(sizeof(pid_t));
859 if (!all_tids) 874 if (!all_tids)
860 return -ENOMEM; 875 goto out_symbol_exit;
861 876
862 all_tids[0] = target_tid; 877 all_tids[0] = target_tid;
863 thread_num = 1; 878 thread_num = 1;
@@ -867,13 +882,13 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
867 for (j = 0; j < MAX_COUNTERS; j++) { 882 for (j = 0; j < MAX_COUNTERS; j++) {
868 fd[i][j] = malloc(sizeof(int)*thread_num); 883 fd[i][j] = malloc(sizeof(int)*thread_num);
869 if (!fd[i][j]) 884 if (!fd[i][j])
870 return -ENOMEM; 885 goto out_free_fd;
871 } 886 }
872 } 887 }
873 event_array = malloc( 888 event_array = malloc(
874 sizeof(struct pollfd)*MAX_NR_CPUS*MAX_COUNTERS*thread_num); 889 sizeof(struct pollfd)*MAX_NR_CPUS*MAX_COUNTERS*thread_num);
875 if (!event_array) 890 if (!event_array)
876 return -ENOMEM; 891 goto out_free_fd;
877 892
878 if (user_interval != ULLONG_MAX) 893 if (user_interval != ULLONG_MAX)
879 default_interval = user_interval; 894 default_interval = user_interval;
@@ -889,8 +904,22 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
889 default_interval = freq; 904 default_interval = freq;
890 } else { 905 } else {
891 fprintf(stderr, "frequency and count are zero, aborting\n"); 906 fprintf(stderr, "frequency and count are zero, aborting\n");
892 exit(EXIT_FAILURE); 907 err = -EINVAL;
908 goto out_free_event_array;
893 } 909 }
894 910
895 return __cmd_record(argc, argv); 911 err = __cmd_record(argc, argv);
912
913out_free_event_array:
914 free(event_array);
915out_free_fd:
916 for (i = 0; i < MAX_NR_CPUS; i++) {
917 for (j = 0; j < MAX_COUNTERS; j++)
918 free(fd[i][j]);
919 }
920 free(all_tids);
921 all_tids = NULL;
922out_symbol_exit:
923 symbol__exit();
924 return err;
896} 925}
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index fd7407c7205c..2f4b92925b26 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -155,30 +155,7 @@ static int process_sample_event(event_t *event, struct perf_session *session)
155 struct addr_location al; 155 struct addr_location al;
156 struct perf_event_attr *attr; 156 struct perf_event_attr *attr;
157 157
158 event__parse_sample(event, session->sample_type, &data); 158 if (event__preprocess_sample(event, session, &al, &data, NULL) < 0) {
159
160 dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc,
161 data.pid, data.tid, data.ip, data.period);
162
163 if (session->sample_type & PERF_SAMPLE_CALLCHAIN) {
164 unsigned int i;
165
166 dump_printf("... chain: nr:%Lu\n", data.callchain->nr);
167
168 if (!ip_callchain__valid(data.callchain, event)) {
169 pr_debug("call-chain problem with event, "
170 "skipping it.\n");
171 return 0;
172 }
173
174 if (dump_trace) {
175 for (i = 0; i < data.callchain->nr; i++)
176 dump_printf("..... %2d: %016Lx\n",
177 i, data.callchain->ips[i]);
178 }
179 }
180
181 if (event__preprocess_sample(event, session, &al, NULL) < 0) {
182 fprintf(stderr, "problem processing %d event, skipping it.\n", 159 fprintf(stderr, "problem processing %d event, skipping it.\n",
183 event->header.type); 160 event->header.type);
184 return -1; 161 return -1;
@@ -464,8 +441,6 @@ static const struct option options[] = {
464 "pretty printing style key: normal raw"), 441 "pretty printing style key: normal raw"),
465 OPT_STRING('s', "sort", &sort_order, "key[,key2...]", 442 OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
466 "sort by key(s): pid, comm, dso, symbol, parent"), 443 "sort by key(s): pid, comm, dso, symbol, parent"),
467 OPT_BOOLEAN('P', "full-paths", &symbol_conf.full_paths,
468 "Don't shorten the pathnames taking into account the cwd"),
469 OPT_BOOLEAN(0, "showcpuutilization", &symbol_conf.show_cpu_utilization, 444 OPT_BOOLEAN(0, "showcpuutilization", &symbol_conf.show_cpu_utilization,
470 "Show sample percentage for different cpu modes"), 445 "Show sample percentage for different cpu modes"),
471 OPT_STRING('p', "parent", &parent_pattern, "regex", 446 OPT_STRING('p', "parent", &parent_pattern, "regex",
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 9a39ca3c3ac4..a6b4d44f9502 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -69,7 +69,7 @@ static struct perf_event_attr default_attrs[] = {
69}; 69};
70 70
71static bool system_wide = false; 71static bool system_wide = false;
72static unsigned int nr_cpus = 0; 72static int nr_cpus = 0;
73static int run_idx = 0; 73static int run_idx = 0;
74 74
75static int run_count = 1; 75static int run_count = 1;
@@ -82,6 +82,7 @@ static int thread_num = 0;
82static pid_t child_pid = -1; 82static pid_t child_pid = -1;
83static bool null_run = false; 83static bool null_run = false;
84static bool big_num = false; 84static bool big_num = false;
85static const char *cpu_list;
85 86
86 87
87static int *fd[MAX_NR_CPUS][MAX_COUNTERS]; 88static int *fd[MAX_NR_CPUS][MAX_COUNTERS];
@@ -158,7 +159,7 @@ static int create_perf_stat_counter(int counter)
158 PERF_FORMAT_TOTAL_TIME_RUNNING; 159 PERF_FORMAT_TOTAL_TIME_RUNNING;
159 160
160 if (system_wide) { 161 if (system_wide) {
161 unsigned int cpu; 162 int cpu;
162 163
163 for (cpu = 0; cpu < nr_cpus; cpu++) { 164 for (cpu = 0; cpu < nr_cpus; cpu++) {
164 fd[cpu][counter][0] = sys_perf_event_open(attr, 165 fd[cpu][counter][0] = sys_perf_event_open(attr,
@@ -208,7 +209,7 @@ static inline int nsec_counter(int counter)
208static void read_counter(int counter) 209static void read_counter(int counter)
209{ 210{
210 u64 count[3], single_count[3]; 211 u64 count[3], single_count[3];
211 unsigned int cpu; 212 int cpu;
212 size_t res, nv; 213 size_t res, nv;
213 int scaled; 214 int scaled;
214 int i, thread; 215 int i, thread;
@@ -542,6 +543,8 @@ static const struct option options[] = {
542 "null run - dont start any counters"), 543 "null run - dont start any counters"),
543 OPT_BOOLEAN('B', "big-num", &big_num, 544 OPT_BOOLEAN('B', "big-num", &big_num,
544 "print large numbers with thousands\' separators"), 545 "print large numbers with thousands\' separators"),
546 OPT_STRING('C', "cpu", &cpu_list, "cpu",
547 "list of cpus to monitor in system-wide"),
545 OPT_END() 548 OPT_END()
546}; 549};
547 550
@@ -566,10 +569,13 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used)
566 } 569 }
567 570
568 if (system_wide) 571 if (system_wide)
569 nr_cpus = read_cpu_map(); 572 nr_cpus = read_cpu_map(cpu_list);
570 else 573 else
571 nr_cpus = 1; 574 nr_cpus = 1;
572 575
576 if (nr_cpus < 1)
577 usage_with_options(stat_usage, options);
578
573 if (target_pid != -1) { 579 if (target_pid != -1) {
574 target_tid = target_pid; 580 target_tid = target_pid;
575 thread_num = find_all_tid(target_pid, &all_tids); 581 thread_num = find_all_tid(target_pid, &all_tids);
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index a66f4272b994..b513e40974f4 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -102,6 +102,7 @@ struct sym_entry *sym_filter_entry_sched = NULL;
102static int sym_pcnt_filter = 5; 102static int sym_pcnt_filter = 5;
103static int sym_counter = 0; 103static int sym_counter = 0;
104static int display_weighted = -1; 104static int display_weighted = -1;
105static const char *cpu_list;
105 106
106/* 107/*
107 * Symbols 108 * Symbols
@@ -982,6 +983,7 @@ static void event__process_sample(const event_t *self,
982 u64 ip = self->ip.ip; 983 u64 ip = self->ip.ip;
983 struct sym_entry *syme; 984 struct sym_entry *syme;
984 struct addr_location al; 985 struct addr_location al;
986 struct sample_data data;
985 struct machine *machine; 987 struct machine *machine;
986 u8 origin = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; 988 u8 origin = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
987 989
@@ -1024,7 +1026,8 @@ static void event__process_sample(const event_t *self,
1024 if (self->header.misc & PERF_RECORD_MISC_EXACT_IP) 1026 if (self->header.misc & PERF_RECORD_MISC_EXACT_IP)
1025 exact_samples++; 1027 exact_samples++;
1026 1028
1027 if (event__preprocess_sample(self, session, &al, symbol_filter) < 0 || 1029 if (event__preprocess_sample(self, session, &al, &data,
1030 symbol_filter) < 0 ||
1028 al.filtered) 1031 al.filtered)
1029 return; 1032 return;
1030 1033
@@ -1079,26 +1082,6 @@ static void event__process_sample(const event_t *self,
1079 } 1082 }
1080} 1083}
1081 1084
1082static int event__process(event_t *event, struct perf_session *session)
1083{
1084 switch (event->header.type) {
1085 case PERF_RECORD_COMM:
1086 event__process_comm(event, session);
1087 break;
1088 case PERF_RECORD_MMAP:
1089 event__process_mmap(event, session);
1090 break;
1091 case PERF_RECORD_FORK:
1092 case PERF_RECORD_EXIT:
1093 event__process_task(event, session);
1094 break;
1095 default:
1096 break;
1097 }
1098
1099 return 0;
1100}
1101
1102struct mmap_data { 1085struct mmap_data {
1103 int counter; 1086 int counter;
1104 void *base; 1087 void *base;
@@ -1351,8 +1334,8 @@ static const struct option options[] = {
1351 "profile events on existing thread id"), 1334 "profile events on existing thread id"),
1352 OPT_BOOLEAN('a', "all-cpus", &system_wide, 1335 OPT_BOOLEAN('a', "all-cpus", &system_wide,
1353 "system-wide collection from all CPUs"), 1336 "system-wide collection from all CPUs"),
1354 OPT_INTEGER('C', "CPU", &profile_cpu, 1337 OPT_STRING('C', "cpu", &cpu_list, "cpu",
1355 "CPU to profile on"), 1338 "list of cpus to monitor"),
1356 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, 1339 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
1357 "file", "vmlinux pathname"), 1340 "file", "vmlinux pathname"),
1358 OPT_BOOLEAN('K', "hide_kernel_symbols", &hide_kernel_symbols, 1341 OPT_BOOLEAN('K', "hide_kernel_symbols", &hide_kernel_symbols,
@@ -1428,10 +1411,10 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
1428 return -ENOMEM; 1411 return -ENOMEM;
1429 1412
1430 /* CPU and PID are mutually exclusive */ 1413 /* CPU and PID are mutually exclusive */
1431 if (target_tid > 0 && profile_cpu != -1) { 1414 if (target_tid > 0 && cpu_list) {
1432 printf("WARNING: PID switch overriding CPU\n"); 1415 printf("WARNING: PID switch overriding CPU\n");
1433 sleep(1); 1416 sleep(1);
1434 profile_cpu = -1; 1417 cpu_list = NULL;
1435 } 1418 }
1436 1419
1437 if (!nr_counters) 1420 if (!nr_counters)
@@ -1469,10 +1452,13 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
1469 attrs[counter].sample_period = default_interval; 1452 attrs[counter].sample_period = default_interval;
1470 } 1453 }
1471 1454
1472 if (target_tid != -1 || profile_cpu != -1) 1455 if (target_tid != -1)
1473 nr_cpus = 1; 1456 nr_cpus = 1;
1474 else 1457 else
1475 nr_cpus = read_cpu_map(); 1458 nr_cpus = read_cpu_map(cpu_list);
1459
1460 if (nr_cpus < 1)
1461 usage_with_options(top_usage, options);
1476 1462
1477 get_term_dimensions(&winsize); 1463 get_term_dimensions(&winsize);
1478 if (print_entries == 0) { 1464 if (print_entries == 0) {
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index dddf3f01b5ab..294da725a57d 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -11,8 +11,9 @@
11 11
12static char const *script_name; 12static char const *script_name;
13static char const *generate_script_lang; 13static char const *generate_script_lang;
14static bool debug_ordering; 14static bool debug_mode;
15static u64 last_timestamp; 15static u64 last_timestamp;
16static u64 nr_unordered;
16 17
17static int default_start_script(const char *script __unused, 18static int default_start_script(const char *script __unused,
18 int argc __unused, 19 int argc __unused,
@@ -91,13 +92,15 @@ static int process_sample_event(event_t *event, struct perf_session *session)
91 } 92 }
92 93
93 if (session->sample_type & PERF_SAMPLE_RAW) { 94 if (session->sample_type & PERF_SAMPLE_RAW) {
94 if (debug_ordering) { 95 if (debug_mode) {
95 if (data.time < last_timestamp) { 96 if (data.time < last_timestamp) {
96 pr_err("Samples misordered, previous: %llu " 97 pr_err("Samples misordered, previous: %llu "
97 "this: %llu\n", last_timestamp, 98 "this: %llu\n", last_timestamp,
98 data.time); 99 data.time);
100 nr_unordered++;
99 } 101 }
100 last_timestamp = data.time; 102 last_timestamp = data.time;
103 return 0;
101 } 104 }
102 /* 105 /*
103 * FIXME: better resolve from pid from the struct trace_entry 106 * FIXME: better resolve from pid from the struct trace_entry
@@ -113,6 +116,15 @@ static int process_sample_event(event_t *event, struct perf_session *session)
113 return 0; 116 return 0;
114} 117}
115 118
119static u64 nr_lost;
120
121static int process_lost_event(event_t *event, struct perf_session *session __used)
122{
123 nr_lost += event->lost.lost;
124
125 return 0;
126}
127
116static struct perf_event_ops event_ops = { 128static struct perf_event_ops event_ops = {
117 .sample = process_sample_event, 129 .sample = process_sample_event,
118 .comm = event__process_comm, 130 .comm = event__process_comm,
@@ -120,6 +132,7 @@ static struct perf_event_ops event_ops = {
120 .event_type = event__process_event_type, 132 .event_type = event__process_event_type,
121 .tracing_data = event__process_tracing_data, 133 .tracing_data = event__process_tracing_data,
122 .build_id = event__process_build_id, 134 .build_id = event__process_build_id,
135 .lost = process_lost_event,
123 .ordered_samples = true, 136 .ordered_samples = true,
124}; 137};
125 138
@@ -132,9 +145,18 @@ static void sig_handler(int sig __unused)
132 145
133static int __cmd_trace(struct perf_session *session) 146static int __cmd_trace(struct perf_session *session)
134{ 147{
148 int ret;
149
135 signal(SIGINT, sig_handler); 150 signal(SIGINT, sig_handler);
136 151
137 return perf_session__process_events(session, &event_ops); 152 ret = perf_session__process_events(session, &event_ops);
153
154 if (debug_mode) {
155 pr_err("Misordered timestamps: %llu\n", nr_unordered);
156 pr_err("Lost events: %llu\n", nr_lost);
157 }
158
159 return ret;
138} 160}
139 161
140struct script_spec { 162struct script_spec {
@@ -544,8 +566,8 @@ static const struct option options[] = {
544 "generate perf-trace.xx script in specified language"), 566 "generate perf-trace.xx script in specified language"),
545 OPT_STRING('i', "input", &input_name, "file", 567 OPT_STRING('i', "input", &input_name, "file",
546 "input file name"), 568 "input file name"),
547 OPT_BOOLEAN('d', "debug-ordering", &debug_ordering, 569 OPT_BOOLEAN('d', "debug-mode", &debug_mode,
548 "check that samples time ordering is monotonic"), 570 "do various checks like samples ordering and lost events"),
549 571
550 OPT_END() 572 OPT_END()
551}; 573};
diff --git a/tools/perf/feature-tests.mak b/tools/perf/feature-tests.mak
new file mode 100644
index 000000000000..ddb68e601f0e
--- /dev/null
+++ b/tools/perf/feature-tests.mak
@@ -0,0 +1,119 @@
1define SOURCE_HELLO
2#include <stdio.h>
3int main(void)
4{
5 return puts(\"hi\");
6}
7endef
8
9ifndef NO_DWARF
10define SOURCE_DWARF
11#include <dwarf.h>
12#include <libdw.h>
13#include <version.h>
14#ifndef _ELFUTILS_PREREQ
15#error
16#endif
17
18int main(void)
19{
20 Dwarf *dbg = dwarf_begin(0, DWARF_C_READ);
21 return (long)dbg;
22}
23endef
24endif
25
26define SOURCE_LIBELF
27#include <libelf.h>
28
29int main(void)
30{
31 Elf *elf = elf_begin(0, ELF_C_READ, 0);
32 return (long)elf;
33}
34endef
35
36define SOURCE_GLIBC
37#include <gnu/libc-version.h>
38
39int main(void)
40{
41 const char *version = gnu_get_libc_version();
42 return (long)version;
43}
44endef
45
46define SOURCE_ELF_MMAP
47#include <libelf.h>
48int main(void)
49{
50 Elf *elf = elf_begin(0, ELF_C_READ_MMAP, 0);
51 return (long)elf;
52}
53endef
54
55ifndef NO_NEWT
56define SOURCE_NEWT
57#include <newt.h>
58
59int main(void)
60{
61 newtInit();
62 newtCls();
63 return newtFinished();
64}
65endef
66endif
67
68ifndef NO_LIBPERL
69define SOURCE_PERL_EMBED
70#include <EXTERN.h>
71#include <perl.h>
72
73int main(void)
74{
75perl_alloc();
76return 0;
77}
78endef
79endif
80
81ifndef NO_LIBPYTHON
82define SOURCE_PYTHON_EMBED
83#include <Python.h>
84
85int main(void)
86{
87 Py_Initialize();
88 return 0;
89}
90endef
91endif
92
93define SOURCE_BFD
94#include <bfd.h>
95
96int main(void)
97{
98 bfd_demangle(0, 0, 0);
99 return 0;
100}
101endef
102
103define SOURCE_CPLUS_DEMANGLE
104extern char *cplus_demangle(const char *, int);
105
106int main(void)
107{
108 cplus_demangle(0, 0);
109 return 0;
110}
111endef
112
113# try-cc
114# Usage: option = $(call try-cc, source-to-build, cc-options)
115try-cc = $(shell sh -c \
116 'TMP="$(TMPOUT).$$$$"; \
117 echo "$(1)" | \
118 $(CC) -x c - $(2) -o "$$TMP" > /dev/null 2>&1 && echo y; \
119 rm -f "$$TMP"')
diff --git a/tools/perf/perf-archive.sh b/tools/perf/perf-archive.sh
index 2e7a4f417e20..677e59d62a8d 100644
--- a/tools/perf/perf-archive.sh
+++ b/tools/perf/perf-archive.sh
@@ -7,7 +7,17 @@ if [ $# -ne 0 ] ; then
7 PERF_DATA=$1 7 PERF_DATA=$1
8fi 8fi
9 9
10DEBUGDIR=~/.debug/ 10#
11# PERF_BUILDID_DIR environment variable set by perf
12# path to buildid directory, default to $HOME/.debug
13#
14if [ -z $PERF_BUILDID_DIR ]; then
15 PERF_BUILDID_DIR=~/.debug/
16else
17 # append / to make substitutions work
18 PERF_BUILDID_DIR=$PERF_BUILDID_DIR/
19fi
20
11BUILDIDS=$(mktemp /tmp/perf-archive-buildids.XXXXXX) 21BUILDIDS=$(mktemp /tmp/perf-archive-buildids.XXXXXX)
12NOBUILDID=0000000000000000000000000000000000000000 22NOBUILDID=0000000000000000000000000000000000000000
13 23
@@ -22,13 +32,13 @@ MANIFEST=$(mktemp /tmp/perf-archive-manifest.XXXXXX)
22 32
23cut -d ' ' -f 1 $BUILDIDS | \ 33cut -d ' ' -f 1 $BUILDIDS | \
24while read build_id ; do 34while read build_id ; do
25 linkname=$DEBUGDIR.build-id/${build_id:0:2}/${build_id:2} 35 linkname=$PERF_BUILDID_DIR.build-id/${build_id:0:2}/${build_id:2}
26 filename=$(readlink -f $linkname) 36 filename=$(readlink -f $linkname)
27 echo ${linkname#$DEBUGDIR} >> $MANIFEST 37 echo ${linkname#$PERF_BUILDID_DIR} >> $MANIFEST
28 echo ${filename#$DEBUGDIR} >> $MANIFEST 38 echo ${filename#$PERF_BUILDID_DIR} >> $MANIFEST
29done 39done
30 40
31tar cfj $PERF_DATA.tar.bz2 -C $DEBUGDIR -T $MANIFEST 41tar cfj $PERF_DATA.tar.bz2 -C $PERF_BUILDID_DIR -T $MANIFEST
32rm -f $MANIFEST $BUILDIDS 42rm -f $MANIFEST $BUILDIDS
33echo -e "Now please run:\n" 43echo -e "Now please run:\n"
34echo -e "$ tar xvf $PERF_DATA.tar.bz2 -C ~/.debug\n" 44echo -e "$ tar xvf $PERF_DATA.tar.bz2 -C ~/.debug\n"
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index 6e4871191138..cdd6c03f1e14 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -458,6 +458,8 @@ int main(int argc, const char **argv)
458 handle_options(&argv, &argc, NULL); 458 handle_options(&argv, &argc, NULL);
459 commit_pager_choice(); 459 commit_pager_choice();
460 set_debugfs_path(); 460 set_debugfs_path();
461 set_buildid_dir();
462
461 if (argc > 0) { 463 if (argc > 0) {
462 if (!prefixcmp(argv[0], "--")) 464 if (!prefixcmp(argv[0], "--"))
463 argv[0] += 2; 465 argv[0] += 2;
diff --git a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py
index 1dc464ee2ca8..aad7525bca1d 100644
--- a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py
+++ b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py
@@ -89,3 +89,33 @@ def trace_flag_str(value):
89 value &= ~idx 89 value &= ~idx
90 90
91 return string 91 return string
92
93
94def taskState(state):
95 states = {
96 0 : "R",
97 1 : "S",
98 2 : "D",
99 64: "DEAD"
100 }
101
102 if state not in states:
103 return "Unknown"
104
105 return states[state]
106
107
108class EventHeaders:
109 def __init__(self, common_cpu, common_secs, common_nsecs,
110 common_pid, common_comm):
111 self.cpu = common_cpu
112 self.secs = common_secs
113 self.nsecs = common_nsecs
114 self.pid = common_pid
115 self.comm = common_comm
116
117 def ts(self):
118 return (self.secs * (10 ** 9)) + self.nsecs
119
120 def ts_format(self):
121 return "%d.%d" % (self.secs, int(self.nsecs / 1000))
diff --git a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/SchedGui.py b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/SchedGui.py
new file mode 100644
index 000000000000..ae9a56e43e05
--- /dev/null
+++ b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/SchedGui.py
@@ -0,0 +1,184 @@
1# SchedGui.py - Python extension for perf trace, basic GUI code for
2# traces drawing and overview.
3#
4# Copyright (C) 2010 by Frederic Weisbecker <fweisbec@gmail.com>
5#
6# This software is distributed under the terms of the GNU General
7# Public License ("GPL") version 2 as published by the Free Software
8# Foundation.
9
10
11try:
12 import wx
13except ImportError:
14 raise ImportError, "You need to install the wxpython lib for this script"
15
16
17class RootFrame(wx.Frame):
18 Y_OFFSET = 100
19 RECT_HEIGHT = 100
20 RECT_SPACE = 50
21 EVENT_MARKING_WIDTH = 5
22
23 def __init__(self, sched_tracer, title, parent = None, id = -1):
24 wx.Frame.__init__(self, parent, id, title)
25
26 (self.screen_width, self.screen_height) = wx.GetDisplaySize()
27 self.screen_width -= 10
28 self.screen_height -= 10
29 self.zoom = 0.5
30 self.scroll_scale = 20
31 self.sched_tracer = sched_tracer
32 self.sched_tracer.set_root_win(self)
33 (self.ts_start, self.ts_end) = sched_tracer.interval()
34 self.update_width_virtual()
35 self.nr_rects = sched_tracer.nr_rectangles() + 1
36 self.height_virtual = RootFrame.Y_OFFSET + (self.nr_rects * (RootFrame.RECT_HEIGHT + RootFrame.RECT_SPACE))
37
38 # whole window panel
39 self.panel = wx.Panel(self, size=(self.screen_width, self.screen_height))
40
41 # scrollable container
42 self.scroll = wx.ScrolledWindow(self.panel)
43 self.scroll.SetScrollbars(self.scroll_scale, self.scroll_scale, self.width_virtual / self.scroll_scale, self.height_virtual / self.scroll_scale)
44 self.scroll.EnableScrolling(True, True)
45 self.scroll.SetFocus()
46
47 # scrollable drawing area
48 self.scroll_panel = wx.Panel(self.scroll, size=(self.screen_width - 15, self.screen_height / 2))
49 self.scroll_panel.Bind(wx.EVT_PAINT, self.on_paint)
50 self.scroll_panel.Bind(wx.EVT_KEY_DOWN, self.on_key_press)
51 self.scroll_panel.Bind(wx.EVT_LEFT_DOWN, self.on_mouse_down)
52 self.scroll.Bind(wx.EVT_PAINT, self.on_paint)
53 self.scroll.Bind(wx.EVT_KEY_DOWN, self.on_key_press)
54 self.scroll.Bind(wx.EVT_LEFT_DOWN, self.on_mouse_down)
55
56 self.scroll.Fit()
57 self.Fit()
58
59 self.scroll_panel.SetDimensions(-1, -1, self.width_virtual, self.height_virtual, wx.SIZE_USE_EXISTING)
60
61 self.txt = None
62
63 self.Show(True)
64
65 def us_to_px(self, val):
66 return val / (10 ** 3) * self.zoom
67
68 def px_to_us(self, val):
69 return (val / self.zoom) * (10 ** 3)
70
71 def scroll_start(self):
72 (x, y) = self.scroll.GetViewStart()
73 return (x * self.scroll_scale, y * self.scroll_scale)
74
75 def scroll_start_us(self):
76 (x, y) = self.scroll_start()
77 return self.px_to_us(x)
78
79 def paint_rectangle_zone(self, nr, color, top_color, start, end):
80 offset_px = self.us_to_px(start - self.ts_start)
81 width_px = self.us_to_px(end - self.ts_start)
82
83 offset_py = RootFrame.Y_OFFSET + (nr * (RootFrame.RECT_HEIGHT + RootFrame.RECT_SPACE))
84 width_py = RootFrame.RECT_HEIGHT
85
86 dc = self.dc
87
88 if top_color is not None:
89 (r, g, b) = top_color
90 top_color = wx.Colour(r, g, b)
91 brush = wx.Brush(top_color, wx.SOLID)
92 dc.SetBrush(brush)
93 dc.DrawRectangle(offset_px, offset_py, width_px, RootFrame.EVENT_MARKING_WIDTH)
94 width_py -= RootFrame.EVENT_MARKING_WIDTH
95 offset_py += RootFrame.EVENT_MARKING_WIDTH
96
97 (r ,g, b) = color
98 color = wx.Colour(r, g, b)
99 brush = wx.Brush(color, wx.SOLID)
100 dc.SetBrush(brush)
101 dc.DrawRectangle(offset_px, offset_py, width_px, width_py)
102
103 def update_rectangles(self, dc, start, end):
104 start += self.ts_start
105 end += self.ts_start
106 self.sched_tracer.fill_zone(start, end)
107
108 def on_paint(self, event):
109 dc = wx.PaintDC(self.scroll_panel)
110 self.dc = dc
111
112 width = min(self.width_virtual, self.screen_width)
113 (x, y) = self.scroll_start()
114 start = self.px_to_us(x)
115 end = self.px_to_us(x + width)
116 self.update_rectangles(dc, start, end)
117
118 def rect_from_ypixel(self, y):
119 y -= RootFrame.Y_OFFSET
120 rect = y / (RootFrame.RECT_HEIGHT + RootFrame.RECT_SPACE)
121 height = y % (RootFrame.RECT_HEIGHT + RootFrame.RECT_SPACE)
122
123 if rect < 0 or rect > self.nr_rects - 1 or height > RootFrame.RECT_HEIGHT:
124 return -1
125
126 return rect
127
128 def update_summary(self, txt):
129 if self.txt:
130 self.txt.Destroy()
131 self.txt = wx.StaticText(self.panel, -1, txt, (0, (self.screen_height / 2) + 50))
132
133
134 def on_mouse_down(self, event):
135 (x, y) = event.GetPositionTuple()
136 rect = self.rect_from_ypixel(y)
137 if rect == -1:
138 return
139
140 t = self.px_to_us(x) + self.ts_start
141
142 self.sched_tracer.mouse_down(rect, t)
143
144
145 def update_width_virtual(self):
146 self.width_virtual = self.us_to_px(self.ts_end - self.ts_start)
147
148 def __zoom(self, x):
149 self.update_width_virtual()
150 (xpos, ypos) = self.scroll.GetViewStart()
151 xpos = self.us_to_px(x) / self.scroll_scale
152 self.scroll.SetScrollbars(self.scroll_scale, self.scroll_scale, self.width_virtual / self.scroll_scale, self.height_virtual / self.scroll_scale, xpos, ypos)
153 self.Refresh()
154
155 def zoom_in(self):
156 x = self.scroll_start_us()
157 self.zoom *= 2
158 self.__zoom(x)
159
160 def zoom_out(self):
161 x = self.scroll_start_us()
162 self.zoom /= 2
163 self.__zoom(x)
164
165
166 def on_key_press(self, event):
167 key = event.GetRawKeyCode()
168 if key == ord("+"):
169 self.zoom_in()
170 return
171 if key == ord("-"):
172 self.zoom_out()
173 return
174
175 key = event.GetKeyCode()
176 (x, y) = self.scroll.GetViewStart()
177 if key == wx.WXK_RIGHT:
178 self.scroll.Scroll(x + 1, y)
179 elif key == wx.WXK_LEFT:
180 self.scroll.Scroll(x - 1, y)
181 elif key == wx.WXK_DOWN:
182 self.scroll.Scroll(x, y + 1)
183 elif key == wx.WXK_UP:
184 self.scroll.Scroll(x, y - 1)
diff --git a/tools/perf/scripts/python/bin/sched-migration-record b/tools/perf/scripts/python/bin/sched-migration-record
new file mode 100644
index 000000000000..17a3e9bd9e8f
--- /dev/null
+++ b/tools/perf/scripts/python/bin/sched-migration-record
@@ -0,0 +1,2 @@
1#!/bin/bash
2perf record -m 16384 -a -e sched:sched_wakeup -e sched:sched_wakeup_new -e sched:sched_switch -e sched:sched_migrate_task $@
diff --git a/tools/perf/scripts/python/bin/sched-migration-report b/tools/perf/scripts/python/bin/sched-migration-report
new file mode 100644
index 000000000000..61d05f72e443
--- /dev/null
+++ b/tools/perf/scripts/python/bin/sched-migration-report
@@ -0,0 +1,3 @@
1#!/bin/bash
2# description: sched migration overview
3perf trace $@ -s ~/libexec/perf-core/scripts/python/sched-migration.py
diff --git a/tools/perf/scripts/python/sched-migration.py b/tools/perf/scripts/python/sched-migration.py
new file mode 100644
index 000000000000..b934383c3364
--- /dev/null
+++ b/tools/perf/scripts/python/sched-migration.py
@@ -0,0 +1,461 @@
1#!/usr/bin/python
2#
3# Cpu task migration overview toy
4#
5# Copyright (C) 2010 Frederic Weisbecker <fweisbec@gmail.com>
6#
7# perf trace event handlers have been generated by perf trace -g python
8#
9# This software is distributed under the terms of the GNU General
10# Public License ("GPL") version 2 as published by the Free Software
11# Foundation.
12
13
14import os
15import sys
16
17from collections import defaultdict
18from UserList import UserList
19
20sys.path.append(os.environ['PERF_EXEC_PATH'] + \
21 '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
22sys.path.append('scripts/python/Perf-Trace-Util/lib/Perf/Trace')
23
24from perf_trace_context import *
25from Core import *
26from SchedGui import *
27
28
29threads = { 0 : "idle"}
30
31def thread_name(pid):
32 return "%s:%d" % (threads[pid], pid)
33
34class RunqueueEventUnknown:
35 @staticmethod
36 def color():
37 return None
38
39 def __repr__(self):
40 return "unknown"
41
42class RunqueueEventSleep:
43 @staticmethod
44 def color():
45 return (0, 0, 0xff)
46
47 def __init__(self, sleeper):
48 self.sleeper = sleeper
49
50 def __repr__(self):
51 return "%s gone to sleep" % thread_name(self.sleeper)
52
53class RunqueueEventWakeup:
54 @staticmethod
55 def color():
56 return (0xff, 0xff, 0)
57
58 def __init__(self, wakee):
59 self.wakee = wakee
60
61 def __repr__(self):
62 return "%s woke up" % thread_name(self.wakee)
63
64class RunqueueEventFork:
65 @staticmethod
66 def color():
67 return (0, 0xff, 0)
68
69 def __init__(self, child):
70 self.child = child
71
72 def __repr__(self):
73 return "new forked task %s" % thread_name(self.child)
74
75class RunqueueMigrateIn:
76 @staticmethod
77 def color():
78 return (0, 0xf0, 0xff)
79
80 def __init__(self, new):
81 self.new = new
82
83 def __repr__(self):
84 return "task migrated in %s" % thread_name(self.new)
85
86class RunqueueMigrateOut:
87 @staticmethod
88 def color():
89 return (0xff, 0, 0xff)
90
91 def __init__(self, old):
92 self.old = old
93
94 def __repr__(self):
95 return "task migrated out %s" % thread_name(self.old)
96
97class RunqueueSnapshot:
98 def __init__(self, tasks = [0], event = RunqueueEventUnknown()):
99 self.tasks = tuple(tasks)
100 self.event = event
101
102 def sched_switch(self, prev, prev_state, next):
103 event = RunqueueEventUnknown()
104
105 if taskState(prev_state) == "R" and next in self.tasks \
106 and prev in self.tasks:
107 return self
108
109 if taskState(prev_state) != "R":
110 event = RunqueueEventSleep(prev)
111
112 next_tasks = list(self.tasks[:])
113 if prev in self.tasks:
114 if taskState(prev_state) != "R":
115 next_tasks.remove(prev)
116 elif taskState(prev_state) == "R":
117 next_tasks.append(prev)
118
119 if next not in next_tasks:
120 next_tasks.append(next)
121
122 return RunqueueSnapshot(next_tasks, event)
123
124 def migrate_out(self, old):
125 if old not in self.tasks:
126 return self
127 next_tasks = [task for task in self.tasks if task != old]
128
129 return RunqueueSnapshot(next_tasks, RunqueueMigrateOut(old))
130
131 def __migrate_in(self, new, event):
132 if new in self.tasks:
133 self.event = event
134 return self
135 next_tasks = self.tasks[:] + tuple([new])
136
137 return RunqueueSnapshot(next_tasks, event)
138
139 def migrate_in(self, new):
140 return self.__migrate_in(new, RunqueueMigrateIn(new))
141
142 def wake_up(self, new):
143 return self.__migrate_in(new, RunqueueEventWakeup(new))
144
145 def wake_up_new(self, new):
146 return self.__migrate_in(new, RunqueueEventFork(new))
147
148 def load(self):
149 """ Provide the number of tasks on the runqueue.
150 Don't count idle"""
151 return len(self.tasks) - 1
152
153 def __repr__(self):
154 ret = self.tasks.__repr__()
155 ret += self.origin_tostring()
156
157 return ret
158
159class TimeSlice:
160 def __init__(self, start, prev):
161 self.start = start
162 self.prev = prev
163 self.end = start
164 # cpus that triggered the event
165 self.event_cpus = []
166 if prev is not None:
167 self.total_load = prev.total_load
168 self.rqs = prev.rqs.copy()
169 else:
170 self.rqs = defaultdict(RunqueueSnapshot)
171 self.total_load = 0
172
173 def __update_total_load(self, old_rq, new_rq):
174 diff = new_rq.load() - old_rq.load()
175 self.total_load += diff
176
177 def sched_switch(self, ts_list, prev, prev_state, next, cpu):
178 old_rq = self.prev.rqs[cpu]
179 new_rq = old_rq.sched_switch(prev, prev_state, next)
180
181 if old_rq is new_rq:
182 return
183
184 self.rqs[cpu] = new_rq
185 self.__update_total_load(old_rq, new_rq)
186 ts_list.append(self)
187 self.event_cpus = [cpu]
188
189 def migrate(self, ts_list, new, old_cpu, new_cpu):
190 if old_cpu == new_cpu:
191 return
192 old_rq = self.prev.rqs[old_cpu]
193 out_rq = old_rq.migrate_out(new)
194 self.rqs[old_cpu] = out_rq
195 self.__update_total_load(old_rq, out_rq)
196
197 new_rq = self.prev.rqs[new_cpu]
198 in_rq = new_rq.migrate_in(new)
199 self.rqs[new_cpu] = in_rq
200 self.__update_total_load(new_rq, in_rq)
201
202 ts_list.append(self)
203
204 if old_rq is not out_rq:
205 self.event_cpus.append(old_cpu)
206 self.event_cpus.append(new_cpu)
207
208 def wake_up(self, ts_list, pid, cpu, fork):
209 old_rq = self.prev.rqs[cpu]
210 if fork:
211 new_rq = old_rq.wake_up_new(pid)
212 else:
213 new_rq = old_rq.wake_up(pid)
214
215 if new_rq is old_rq:
216 return
217 self.rqs[cpu] = new_rq
218 self.__update_total_load(old_rq, new_rq)
219 ts_list.append(self)
220 self.event_cpus = [cpu]
221
222 def next(self, t):
223 self.end = t
224 return TimeSlice(t, self)
225
226class TimeSliceList(UserList):
227 def __init__(self, arg = []):
228 self.data = arg
229
230 def get_time_slice(self, ts):
231 if len(self.data) == 0:
232 slice = TimeSlice(ts, TimeSlice(-1, None))
233 else:
234 slice = self.data[-1].next(ts)
235 return slice
236
237 def find_time_slice(self, ts):
238 start = 0
239 end = len(self.data)
240 found = -1
241 searching = True
242 while searching:
243 if start == end or start == end - 1:
244 searching = False
245
246 i = (end + start) / 2
247 if self.data[i].start <= ts and self.data[i].end >= ts:
248 found = i
249 end = i
250 continue
251
252 if self.data[i].end < ts:
253 start = i
254
255 elif self.data[i].start > ts:
256 end = i
257
258 return found
259
260 def set_root_win(self, win):
261 self.root_win = win
262
263 def mouse_down(self, cpu, t):
264 idx = self.find_time_slice(t)
265 if idx == -1:
266 return
267
268 ts = self[idx]
269 rq = ts.rqs[cpu]
270 raw = "CPU: %d\n" % cpu
271 raw += "Last event : %s\n" % rq.event.__repr__()
272 raw += "Timestamp : %d.%06d\n" % (ts.start / (10 ** 9), (ts.start % (10 ** 9)) / 1000)
273 raw += "Duration : %6d us\n" % ((ts.end - ts.start) / (10 ** 6))
274 raw += "Load = %d\n" % rq.load()
275 for t in rq.tasks:
276 raw += "%s \n" % thread_name(t)
277
278 self.root_win.update_summary(raw)
279
280 def update_rectangle_cpu(self, slice, cpu):
281 rq = slice.rqs[cpu]
282
283 if slice.total_load != 0:
284 load_rate = rq.load() / float(slice.total_load)
285 else:
286 load_rate = 0
287
288 red_power = int(0xff - (0xff * load_rate))
289 color = (0xff, red_power, red_power)
290
291 top_color = None
292
293 if cpu in slice.event_cpus:
294 top_color = rq.event.color()
295
296 self.root_win.paint_rectangle_zone(cpu, color, top_color, slice.start, slice.end)
297
298 def fill_zone(self, start, end):
299 i = self.find_time_slice(start)
300 if i == -1:
301 return
302
303 for i in xrange(i, len(self.data)):
304 timeslice = self.data[i]
305 if timeslice.start > end:
306 return
307
308 for cpu in timeslice.rqs:
309 self.update_rectangle_cpu(timeslice, cpu)
310
311 def interval(self):
312 if len(self.data) == 0:
313 return (0, 0)
314
315 return (self.data[0].start, self.data[-1].end)
316
317 def nr_rectangles(self):
318 last_ts = self.data[-1]
319 max_cpu = 0
320 for cpu in last_ts.rqs:
321 if cpu > max_cpu:
322 max_cpu = cpu
323 return max_cpu
324
325
326class SchedEventProxy:
327 def __init__(self):
328 self.current_tsk = defaultdict(lambda : -1)
329 self.timeslices = TimeSliceList()
330
331 def sched_switch(self, headers, prev_comm, prev_pid, prev_prio, prev_state,
332 next_comm, next_pid, next_prio):
333 """ Ensure the task we sched out this cpu is really the one
334 we logged. Otherwise we may have missed traces """
335
336 on_cpu_task = self.current_tsk[headers.cpu]
337
338 if on_cpu_task != -1 and on_cpu_task != prev_pid:
339 print "Sched switch event rejected ts: %s cpu: %d prev: %s(%d) next: %s(%d)" % \
340 (headers.ts_format(), headers.cpu, prev_comm, prev_pid, next_comm, next_pid)
341
342 threads[prev_pid] = prev_comm
343 threads[next_pid] = next_comm
344 self.current_tsk[headers.cpu] = next_pid
345
346 ts = self.timeslices.get_time_slice(headers.ts())
347 ts.sched_switch(self.timeslices, prev_pid, prev_state, next_pid, headers.cpu)
348
349 def migrate(self, headers, pid, prio, orig_cpu, dest_cpu):
350 ts = self.timeslices.get_time_slice(headers.ts())
351 ts.migrate(self.timeslices, pid, orig_cpu, dest_cpu)
352
353 def wake_up(self, headers, comm, pid, success, target_cpu, fork):
354 if success == 0:
355 return
356 ts = self.timeslices.get_time_slice(headers.ts())
357 ts.wake_up(self.timeslices, pid, target_cpu, fork)
358
359
360def trace_begin():
361 global parser
362 parser = SchedEventProxy()
363
364def trace_end():
365 app = wx.App(False)
366 timeslices = parser.timeslices
367 frame = RootFrame(timeslices, "Migration")
368 app.MainLoop()
369
370def sched__sched_stat_runtime(event_name, context, common_cpu,
371 common_secs, common_nsecs, common_pid, common_comm,
372 comm, pid, runtime, vruntime):
373 pass
374
375def sched__sched_stat_iowait(event_name, context, common_cpu,
376 common_secs, common_nsecs, common_pid, common_comm,
377 comm, pid, delay):
378 pass
379
380def sched__sched_stat_sleep(event_name, context, common_cpu,
381 common_secs, common_nsecs, common_pid, common_comm,
382 comm, pid, delay):
383 pass
384
385def sched__sched_stat_wait(event_name, context, common_cpu,
386 common_secs, common_nsecs, common_pid, common_comm,
387 comm, pid, delay):
388 pass
389
390def sched__sched_process_fork(event_name, context, common_cpu,
391 common_secs, common_nsecs, common_pid, common_comm,
392 parent_comm, parent_pid, child_comm, child_pid):
393 pass
394
395def sched__sched_process_wait(event_name, context, common_cpu,
396 common_secs, common_nsecs, common_pid, common_comm,
397 comm, pid, prio):
398 pass
399
400def sched__sched_process_exit(event_name, context, common_cpu,
401 common_secs, common_nsecs, common_pid, common_comm,
402 comm, pid, prio):
403 pass
404
405def sched__sched_process_free(event_name, context, common_cpu,
406 common_secs, common_nsecs, common_pid, common_comm,
407 comm, pid, prio):
408 pass
409
410def sched__sched_migrate_task(event_name, context, common_cpu,
411 common_secs, common_nsecs, common_pid, common_comm,
412 comm, pid, prio, orig_cpu,
413 dest_cpu):
414 headers = EventHeaders(common_cpu, common_secs, common_nsecs,
415 common_pid, common_comm)
416 parser.migrate(headers, pid, prio, orig_cpu, dest_cpu)
417
418def sched__sched_switch(event_name, context, common_cpu,
419 common_secs, common_nsecs, common_pid, common_comm,
420 prev_comm, prev_pid, prev_prio, prev_state,
421 next_comm, next_pid, next_prio):
422
423 headers = EventHeaders(common_cpu, common_secs, common_nsecs,
424 common_pid, common_comm)
425 parser.sched_switch(headers, prev_comm, prev_pid, prev_prio, prev_state,
426 next_comm, next_pid, next_prio)
427
428def sched__sched_wakeup_new(event_name, context, common_cpu,
429 common_secs, common_nsecs, common_pid, common_comm,
430 comm, pid, prio, success,
431 target_cpu):
432 headers = EventHeaders(common_cpu, common_secs, common_nsecs,
433 common_pid, common_comm)
434 parser.wake_up(headers, comm, pid, success, target_cpu, 1)
435
436def sched__sched_wakeup(event_name, context, common_cpu,
437 common_secs, common_nsecs, common_pid, common_comm,
438 comm, pid, prio, success,
439 target_cpu):
440 headers = EventHeaders(common_cpu, common_secs, common_nsecs,
441 common_pid, common_comm)
442 parser.wake_up(headers, comm, pid, success, target_cpu, 0)
443
444def sched__sched_wait_task(event_name, context, common_cpu,
445 common_secs, common_nsecs, common_pid, common_comm,
446 comm, pid, prio):
447 pass
448
449def sched__sched_kthread_stop_ret(event_name, context, common_cpu,
450 common_secs, common_nsecs, common_pid, common_comm,
451 ret):
452 pass
453
454def sched__sched_kthread_stop(event_name, context, common_cpu,
455 common_secs, common_nsecs, common_pid, common_comm,
456 comm, pid):
457 pass
458
459def trace_unhandled(event_name, context, common_cpu, common_secs, common_nsecs,
460 common_pid, common_comm):
461 pass
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index 70c5cf87d020..e437edb72417 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -12,6 +12,7 @@
12#include "event.h" 12#include "event.h"
13#include "symbol.h" 13#include "symbol.h"
14#include <linux/kernel.h> 14#include <linux/kernel.h>
15#include "debug.h"
15 16
16static int build_id__mark_dso_hit(event_t *event, struct perf_session *session) 17static int build_id__mark_dso_hit(event_t *event, struct perf_session *session)
17{ 18{
@@ -34,28 +35,43 @@ static int build_id__mark_dso_hit(event_t *event, struct perf_session *session)
34 return 0; 35 return 0;
35} 36}
36 37
38static int event__exit_del_thread(event_t *self, struct perf_session *session)
39{
40 struct thread *thread = perf_session__findnew(session, self->fork.tid);
41
42 dump_printf("(%d:%d):(%d:%d)\n", self->fork.pid, self->fork.tid,
43 self->fork.ppid, self->fork.ptid);
44
45 if (thread) {
46 rb_erase(&thread->rb_node, &session->threads);
47 session->last_match = NULL;
48 thread__delete(thread);
49 }
50
51 return 0;
52}
53
37struct perf_event_ops build_id__mark_dso_hit_ops = { 54struct perf_event_ops build_id__mark_dso_hit_ops = {
38 .sample = build_id__mark_dso_hit, 55 .sample = build_id__mark_dso_hit,
39 .mmap = event__process_mmap, 56 .mmap = event__process_mmap,
40 .fork = event__process_task, 57 .fork = event__process_task,
58 .exit = event__exit_del_thread,
41}; 59};
42 60
43char *dso__build_id_filename(struct dso *self, char *bf, size_t size) 61char *dso__build_id_filename(struct dso *self, char *bf, size_t size)
44{ 62{
45 char build_id_hex[BUILD_ID_SIZE * 2 + 1]; 63 char build_id_hex[BUILD_ID_SIZE * 2 + 1];
46 const char *home;
47 64
48 if (!self->has_build_id) 65 if (!self->has_build_id)
49 return NULL; 66 return NULL;
50 67
51 build_id__sprintf(self->build_id, sizeof(self->build_id), build_id_hex); 68 build_id__sprintf(self->build_id, sizeof(self->build_id), build_id_hex);
52 home = getenv("HOME");
53 if (bf == NULL) { 69 if (bf == NULL) {
54 if (asprintf(&bf, "%s/%s/.build-id/%.2s/%s", home, 70 if (asprintf(&bf, "%s/.build-id/%.2s/%s", buildid_dir,
55 DEBUG_CACHE_DIR, build_id_hex, build_id_hex + 2) < 0) 71 build_id_hex, build_id_hex + 2) < 0)
56 return NULL; 72 return NULL;
57 } else 73 } else
58 snprintf(bf, size, "%s/%s/.build-id/%.2s/%s", home, 74 snprintf(bf, size, "%s/.build-id/%.2s/%s", buildid_dir,
59 DEBUG_CACHE_DIR, build_id_hex, build_id_hex + 2); 75 build_id_hex, build_id_hex + 2);
60 return bf; 76 return bf;
61} 77}
diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h
index 65fe664fddf6..27e9ebe4076e 100644
--- a/tools/perf/util/cache.h
+++ b/tools/perf/util/cache.h
@@ -23,6 +23,7 @@ extern int perf_config(config_fn_t fn, void *);
23extern int perf_config_int(const char *, const char *); 23extern int perf_config_int(const char *, const char *);
24extern int perf_config_bool(const char *, const char *); 24extern int perf_config_bool(const char *, const char *);
25extern int config_error_nonbool(const char *); 25extern int config_error_nonbool(const char *);
26extern const char *perf_config_dirname(const char *, const char *);
26 27
27/* pager.c */ 28/* pager.c */
28extern void setup_pager(void); 29extern void setup_pager(void);
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 52c777e451ed..f231f43424d2 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -18,7 +18,7 @@
18#include "util.h" 18#include "util.h"
19#include "callchain.h" 19#include "callchain.h"
20 20
21bool ip_callchain__valid(struct ip_callchain *chain, event_t *event) 21bool ip_callchain__valid(struct ip_callchain *chain, const event_t *event)
22{ 22{
23 unsigned int chain_size = event->header.size; 23 unsigned int chain_size = event->header.size;
24 chain_size -= (unsigned long)&event->ip.__more_data - (unsigned long)event; 24 chain_size -= (unsigned long)&event->ip.__more_data - (unsigned long)event;
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index f2e9ee164bd8..624a96c636fd 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -63,5 +63,5 @@ int register_callchain_param(struct callchain_param *param);
63int append_chain(struct callchain_node *root, struct ip_callchain *chain, 63int append_chain(struct callchain_node *root, struct ip_callchain *chain,
64 struct map_symbol *syms, u64 period); 64 struct map_symbol *syms, u64 period);
65 65
66bool ip_callchain__valid(struct ip_callchain *chain, event_t *event); 66bool ip_callchain__valid(struct ip_callchain *chain, const event_t *event);
67#endif /* __PERF_CALLCHAIN_H */ 67#endif /* __PERF_CALLCHAIN_H */
diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c
index dabe892d0e53..e02d78cae70f 100644
--- a/tools/perf/util/config.c
+++ b/tools/perf/util/config.c
@@ -11,6 +11,11 @@
11 11
12#define MAXNAME (256) 12#define MAXNAME (256)
13 13
14#define DEBUG_CACHE_DIR ".debug"
15
16
17char buildid_dir[MAXPATHLEN]; /* root dir for buildid, binary cache */
18
14static FILE *config_file; 19static FILE *config_file;
15static const char *config_file_name; 20static const char *config_file_name;
16static int config_linenr; 21static int config_linenr;
@@ -127,7 +132,7 @@ static int get_value(config_fn_t fn, void *data, char *name, unsigned int len)
127 break; 132 break;
128 if (!iskeychar(c)) 133 if (!iskeychar(c))
129 break; 134 break;
130 name[len++] = tolower(c); 135 name[len++] = c;
131 if (len >= MAXNAME) 136 if (len >= MAXNAME)
132 return -1; 137 return -1;
133 } 138 }
@@ -327,6 +332,13 @@ int perf_config_bool(const char *name, const char *value)
327 return !!perf_config_bool_or_int(name, value, &discard); 332 return !!perf_config_bool_or_int(name, value, &discard);
328} 333}
329 334
335const char *perf_config_dirname(const char *name, const char *value)
336{
337 if (!name)
338 return NULL;
339 return value;
340}
341
330static int perf_default_core_config(const char *var __used, const char *value __used) 342static int perf_default_core_config(const char *var __used, const char *value __used)
331{ 343{
332 /* Add other config variables here and to Documentation/config.txt. */ 344 /* Add other config variables here and to Documentation/config.txt. */
@@ -428,3 +440,53 @@ int config_error_nonbool(const char *var)
428{ 440{
429 return error("Missing value for '%s'", var); 441 return error("Missing value for '%s'", var);
430} 442}
443
444struct buildid_dir_config {
445 char *dir;
446};
447
448static int buildid_dir_command_config(const char *var, const char *value,
449 void *data)
450{
451 struct buildid_dir_config *c = data;
452 const char *v;
453
454 /* same dir for all commands */
455 if (!prefixcmp(var, "buildid.") && !strcmp(var + 8, "dir")) {
456 v = perf_config_dirname(var, value);
457 if (!v)
458 return -1;
459 strncpy(c->dir, v, MAXPATHLEN-1);
460 c->dir[MAXPATHLEN-1] = '\0';
461 }
462 return 0;
463}
464
465static void check_buildid_dir_config(void)
466{
467 struct buildid_dir_config c;
468 c.dir = buildid_dir;
469 perf_config(buildid_dir_command_config, &c);
470}
471
472void set_buildid_dir(void)
473{
474 buildid_dir[0] = '\0';
475
476 /* try config file */
477 check_buildid_dir_config();
478
479 /* default to $HOME/.debug */
480 if (buildid_dir[0] == '\0') {
481 char *v = getenv("HOME");
482 if (v) {
483 snprintf(buildid_dir, MAXPATHLEN-1, "%s/%s",
484 v, DEBUG_CACHE_DIR);
485 } else {
486 strncpy(buildid_dir, DEBUG_CACHE_DIR, MAXPATHLEN-1);
487 }
488 buildid_dir[MAXPATHLEN-1] = '\0';
489 }
490 /* for communicating with external commands */
491 setenv("PERF_BUILDID_DIR", buildid_dir, 1);
492}
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index 4e01490e51e5..0f9b8d7a7d7e 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -20,7 +20,7 @@ static int default_cpu_map(void)
20 return nr_cpus; 20 return nr_cpus;
21} 21}
22 22
23int read_cpu_map(void) 23static int read_all_cpu_map(void)
24{ 24{
25 FILE *onlnf; 25 FILE *onlnf;
26 int nr_cpus = 0; 26 int nr_cpus = 0;
@@ -57,3 +57,58 @@ int read_cpu_map(void)
57 57
58 return default_cpu_map(); 58 return default_cpu_map();
59} 59}
60
61int read_cpu_map(const char *cpu_list)
62{
63 unsigned long start_cpu, end_cpu = 0;
64 char *p = NULL;
65 int i, nr_cpus = 0;
66
67 if (!cpu_list)
68 return read_all_cpu_map();
69
70 if (!isdigit(*cpu_list))
71 goto invalid;
72
73 while (isdigit(*cpu_list)) {
74 p = NULL;
75 start_cpu = strtoul(cpu_list, &p, 0);
76 if (start_cpu >= INT_MAX
77 || (*p != '\0' && *p != ',' && *p != '-'))
78 goto invalid;
79
80 if (*p == '-') {
81 cpu_list = ++p;
82 p = NULL;
83 end_cpu = strtoul(cpu_list, &p, 0);
84
85 if (end_cpu >= INT_MAX || (*p != '\0' && *p != ','))
86 goto invalid;
87
88 if (end_cpu < start_cpu)
89 goto invalid;
90 } else {
91 end_cpu = start_cpu;
92 }
93
94 for (; start_cpu <= end_cpu; start_cpu++) {
95 /* check for duplicates */
96 for (i = 0; i < nr_cpus; i++)
97 if (cpumap[i] == (int)start_cpu)
98 goto invalid;
99
100 assert(nr_cpus < MAX_NR_CPUS);
101 cpumap[nr_cpus++] = (int)start_cpu;
102 }
103 if (*p)
104 ++p;
105
106 cpu_list = p;
107 }
108 if (nr_cpus > 0)
109 return nr_cpus;
110
111 return default_cpu_map();
112invalid:
113 return -1;
114}
diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
index 86c78bb33098..3e60f56e490e 100644
--- a/tools/perf/util/cpumap.h
+++ b/tools/perf/util/cpumap.h
@@ -1,7 +1,7 @@
1#ifndef __PERF_CPUMAP_H 1#ifndef __PERF_CPUMAP_H
2#define __PERF_CPUMAP_H 2#define __PERF_CPUMAP_H
3 3
4extern int read_cpu_map(void); 4extern int read_cpu_map(const char *cpu_list);
5extern int cpumap[]; 5extern int cpumap[];
6 6
7#endif /* __PERF_CPUMAP_H */ 7#endif /* __PERF_CPUMAP_H */
diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c
index 6cddff2bc970..318dab15d177 100644
--- a/tools/perf/util/debug.c
+++ b/tools/perf/util/debug.c
@@ -86,12 +86,10 @@ void trace_event(event_t *event)
86 dump_printf_color(" ", color); 86 dump_printf_color(" ", color);
87 for (j = 0; j < 15-(i & 15); j++) 87 for (j = 0; j < 15-(i & 15); j++)
88 dump_printf_color(" ", color); 88 dump_printf_color(" ", color);
89 for (j = 0; j < (i & 15); j++) { 89 for (j = i & ~15; j <= i; j++) {
90 if (isprint(raw_event[i-15+j])) 90 dump_printf_color("%c", color,
91 dump_printf_color("%c", color, 91 isprint(raw_event[j]) ?
92 raw_event[i-15+j]); 92 raw_event[j] : '.');
93 else
94 dump_printf_color(".", color);
95 } 93 }
96 dump_printf_color("\n", color); 94 dump_printf_color("\n", color);
97 } 95 }
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 2fbf6a463c81..dab9e754a281 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -151,7 +151,6 @@ static int event__synthesize_mmap_events(pid_t pid, pid_t tgid,
151 continue; 151 continue;
152 pbf += n + 3; 152 pbf += n + 3;
153 if (*pbf == 'x') { /* vm_exec */ 153 if (*pbf == 'x') { /* vm_exec */
154 u64 vm_pgoff;
155 char *execname = strchr(bf, '/'); 154 char *execname = strchr(bf, '/');
156 155
157 /* Catch VDSO */ 156 /* Catch VDSO */
@@ -162,12 +161,7 @@ static int event__synthesize_mmap_events(pid_t pid, pid_t tgid,
162 continue; 161 continue;
163 162
164 pbf += 3; 163 pbf += 3;
165 n = hex2u64(pbf, &vm_pgoff); 164 n = hex2u64(pbf, &ev.mmap.pgoff);
166 /* pgoff is in bytes, not pages */
167 if (n >= 0)
168 ev.mmap.pgoff = vm_pgoff << getpagesize();
169 else
170 ev.mmap.pgoff = 0;
171 165
172 size = strlen(execname); 166 size = strlen(execname);
173 execname[size - 1] = '\0'; /* Remove \n */ 167 execname[size - 1] = '\0'; /* Remove \n */
@@ -340,30 +334,29 @@ int event__synthesize_kernel_mmap(event__handler_t process,
340 return process(&ev, session); 334 return process(&ev, session);
341} 335}
342 336
343static void thread__comm_adjust(struct thread *self) 337static void thread__comm_adjust(struct thread *self, struct hists *hists)
344{ 338{
345 char *comm = self->comm; 339 char *comm = self->comm;
346 340
347 if (!symbol_conf.col_width_list_str && !symbol_conf.field_sep && 341 if (!symbol_conf.col_width_list_str && !symbol_conf.field_sep &&
348 (!symbol_conf.comm_list || 342 (!symbol_conf.comm_list ||
349 strlist__has_entry(symbol_conf.comm_list, comm))) { 343 strlist__has_entry(symbol_conf.comm_list, comm))) {
350 unsigned int slen = strlen(comm); 344 u16 slen = strlen(comm);
351 345
352 if (slen > comms__col_width) { 346 if (hists__new_col_len(hists, HISTC_COMM, slen))
353 comms__col_width = slen; 347 hists__set_col_len(hists, HISTC_THREAD, slen + 6);
354 threads__col_width = slen + 6;
355 }
356 } 348 }
357} 349}
358 350
359static int thread__set_comm_adjust(struct thread *self, const char *comm) 351static int thread__set_comm_adjust(struct thread *self, const char *comm,
352 struct hists *hists)
360{ 353{
361 int ret = thread__set_comm(self, comm); 354 int ret = thread__set_comm(self, comm);
362 355
363 if (ret) 356 if (ret)
364 return ret; 357 return ret;
365 358
366 thread__comm_adjust(self); 359 thread__comm_adjust(self, hists);
367 360
368 return 0; 361 return 0;
369} 362}
@@ -374,7 +367,8 @@ int event__process_comm(event_t *self, struct perf_session *session)
374 367
375 dump_printf(": %s:%d\n", self->comm.comm, self->comm.tid); 368 dump_printf(": %s:%d\n", self->comm.comm, self->comm.tid);
376 369
377 if (thread == NULL || thread__set_comm_adjust(thread, self->comm.comm)) { 370 if (thread == NULL || thread__set_comm_adjust(thread, self->comm.comm,
371 &session->hists)) {
378 dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n"); 372 dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n");
379 return -1; 373 return -1;
380 } 374 }
@@ -456,6 +450,7 @@ static int event__process_kernel_mmap(event_t *self,
456 goto out_problem; 450 goto out_problem;
457 451
458 map->dso->short_name = name; 452 map->dso->short_name = name;
453 map->dso->sname_alloc = 1;
459 map->end = map->start + self->mmap.len; 454 map->end = map->start + self->mmap.len;
460 } else if (is_kernel_mmap) { 455 } else if (is_kernel_mmap) {
461 const char *symbol_name = (self->mmap.filename + 456 const char *symbol_name = (self->mmap.filename +
@@ -514,12 +509,13 @@ int event__process_mmap(event_t *self, struct perf_session *session)
514 if (machine == NULL) 509 if (machine == NULL)
515 goto out_problem; 510 goto out_problem;
516 thread = perf_session__findnew(session, self->mmap.pid); 511 thread = perf_session__findnew(session, self->mmap.pid);
512 if (thread == NULL)
513 goto out_problem;
517 map = map__new(&machine->user_dsos, self->mmap.start, 514 map = map__new(&machine->user_dsos, self->mmap.start,
518 self->mmap.len, self->mmap.pgoff, 515 self->mmap.len, self->mmap.pgoff,
519 self->mmap.pid, self->mmap.filename, 516 self->mmap.pid, self->mmap.filename,
520 MAP__FUNCTION, session->cwd, session->cwdlen); 517 MAP__FUNCTION);
521 518 if (map == NULL)
522 if (thread == NULL || map == NULL)
523 goto out_problem; 519 goto out_problem;
524 520
525 thread__insert_map(thread, map); 521 thread__insert_map(thread, map);
@@ -552,6 +548,26 @@ int event__process_task(event_t *self, struct perf_session *session)
552 return 0; 548 return 0;
553} 549}
554 550
551int event__process(event_t *event, struct perf_session *session)
552{
553 switch (event->header.type) {
554 case PERF_RECORD_COMM:
555 event__process_comm(event, session);
556 break;
557 case PERF_RECORD_MMAP:
558 event__process_mmap(event, session);
559 break;
560 case PERF_RECORD_FORK:
561 case PERF_RECORD_EXIT:
562 event__process_task(event, session);
563 break;
564 default:
565 break;
566 }
567
568 return 0;
569}
570
555void thread__find_addr_map(struct thread *self, 571void thread__find_addr_map(struct thread *self,
556 struct perf_session *session, u8 cpumode, 572 struct perf_session *session, u8 cpumode,
557 enum map_type type, pid_t pid, u64 addr, 573 enum map_type type, pid_t pid, u64 addr,
@@ -641,27 +657,49 @@ void thread__find_addr_location(struct thread *self,
641 al->sym = NULL; 657 al->sym = NULL;
642} 658}
643 659
644static void dso__calc_col_width(struct dso *self) 660static void dso__calc_col_width(struct dso *self, struct hists *hists)
645{ 661{
646 if (!symbol_conf.col_width_list_str && !symbol_conf.field_sep && 662 if (!symbol_conf.col_width_list_str && !symbol_conf.field_sep &&
647 (!symbol_conf.dso_list || 663 (!symbol_conf.dso_list ||
648 strlist__has_entry(symbol_conf.dso_list, self->name))) { 664 strlist__has_entry(symbol_conf.dso_list, self->name))) {
649 u16 slen = self->short_name_len; 665 u16 slen = dso__name_len(self);
650 if (verbose) 666 hists__new_col_len(hists, HISTC_DSO, slen);
651 slen = self->long_name_len;
652 if (dsos__col_width < slen)
653 dsos__col_width = slen;
654 } 667 }
655 668
656 self->slen_calculated = 1; 669 self->slen_calculated = 1;
657} 670}
658 671
659int event__preprocess_sample(const event_t *self, struct perf_session *session, 672int event__preprocess_sample(const event_t *self, struct perf_session *session,
660 struct addr_location *al, symbol_filter_t filter) 673 struct addr_location *al, struct sample_data *data,
674 symbol_filter_t filter)
661{ 675{
662 u8 cpumode = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; 676 u8 cpumode = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
663 struct thread *thread = perf_session__findnew(session, self->ip.pid); 677 struct thread *thread;
664 678
679 event__parse_sample(self, session->sample_type, data);
680
681 dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld cpu:%d\n",
682 self->header.misc, data->pid, data->tid, data->ip,
683 data->period, data->cpu);
684
685 if (session->sample_type & PERF_SAMPLE_CALLCHAIN) {
686 unsigned int i;
687
688 dump_printf("... chain: nr:%Lu\n", data->callchain->nr);
689
690 if (!ip_callchain__valid(data->callchain, self)) {
691 pr_debug("call-chain problem with event, "
692 "skipping it.\n");
693 goto out_filtered;
694 }
695
696 if (dump_trace) {
697 for (i = 0; i < data->callchain->nr; i++)
698 dump_printf("..... %2d: %016Lx\n",
699 i, data->callchain->ips[i]);
700 }
701 }
702 thread = perf_session__findnew(session, self->ip.pid);
665 if (thread == NULL) 703 if (thread == NULL)
666 return -1; 704 return -1;
667 705
@@ -687,6 +725,7 @@ int event__preprocess_sample(const event_t *self, struct perf_session *session,
687 al->map ? al->map->dso->long_name : 725 al->map ? al->map->dso->long_name :
688 al->level == 'H' ? "[hypervisor]" : "<not found>"); 726 al->level == 'H' ? "[hypervisor]" : "<not found>");
689 al->sym = NULL; 727 al->sym = NULL;
728 al->cpu = data->cpu;
690 729
691 if (al->map) { 730 if (al->map) {
692 if (symbol_conf.dso_list && 731 if (symbol_conf.dso_list &&
@@ -703,16 +742,17 @@ int event__preprocess_sample(const event_t *self, struct perf_session *session,
703 * sampled. 742 * sampled.
704 */ 743 */
705 if (!sort_dso.elide && !al->map->dso->slen_calculated) 744 if (!sort_dso.elide && !al->map->dso->slen_calculated)
706 dso__calc_col_width(al->map->dso); 745 dso__calc_col_width(al->map->dso, &session->hists);
707 746
708 al->sym = map__find_symbol(al->map, al->addr, filter); 747 al->sym = map__find_symbol(al->map, al->addr, filter);
709 } else { 748 } else {
710 const unsigned int unresolved_col_width = BITS_PER_LONG / 4; 749 const unsigned int unresolved_col_width = BITS_PER_LONG / 4;
711 750
712 if (dsos__col_width < unresolved_col_width && 751 if (hists__col_len(&session->hists, HISTC_DSO) < unresolved_col_width &&
713 !symbol_conf.col_width_list_str && !symbol_conf.field_sep && 752 !symbol_conf.col_width_list_str && !symbol_conf.field_sep &&
714 !symbol_conf.dso_list) 753 !symbol_conf.dso_list)
715 dsos__col_width = unresolved_col_width; 754 hists__set_col_len(&session->hists, HISTC_DSO,
755 unresolved_col_width);
716 } 756 }
717 757
718 if (symbol_conf.sym_list && al->sym && 758 if (symbol_conf.sym_list && al->sym &&
@@ -726,9 +766,9 @@ out_filtered:
726 return 0; 766 return 0;
727} 767}
728 768
729int event__parse_sample(event_t *event, u64 type, struct sample_data *data) 769int event__parse_sample(const event_t *event, u64 type, struct sample_data *data)
730{ 770{
731 u64 *array = event->sample.array; 771 const u64 *array = event->sample.array;
732 772
733 if (type & PERF_SAMPLE_IP) { 773 if (type & PERF_SAMPLE_IP) {
734 data->ip = event->ip.ip; 774 data->ip = event->ip.ip;
@@ -767,7 +807,8 @@ int event__parse_sample(event_t *event, u64 type, struct sample_data *data)
767 u32 *p = (u32 *)array; 807 u32 *p = (u32 *)array;
768 data->cpu = *p; 808 data->cpu = *p;
769 array++; 809 array++;
770 } 810 } else
811 data->cpu = -1;
771 812
772 if (type & PERF_SAMPLE_PERIOD) { 813 if (type & PERF_SAMPLE_PERIOD) {
773 data->period = *array; 814 data->period = *array;
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 8577085db067..8e790dae7026 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -154,11 +154,13 @@ int event__process_comm(event_t *self, struct perf_session *session);
154int event__process_lost(event_t *self, struct perf_session *session); 154int event__process_lost(event_t *self, struct perf_session *session);
155int event__process_mmap(event_t *self, struct perf_session *session); 155int event__process_mmap(event_t *self, struct perf_session *session);
156int event__process_task(event_t *self, struct perf_session *session); 156int event__process_task(event_t *self, struct perf_session *session);
157int event__process(event_t *event, struct perf_session *session);
157 158
158struct addr_location; 159struct addr_location;
159int event__preprocess_sample(const event_t *self, struct perf_session *session, 160int event__preprocess_sample(const event_t *self, struct perf_session *session,
160 struct addr_location *al, symbol_filter_t filter); 161 struct addr_location *al, struct sample_data *data,
161int event__parse_sample(event_t *event, u64 type, struct sample_data *data); 162 symbol_filter_t filter);
163int event__parse_sample(const event_t *event, u64 type, struct sample_data *data);
162 164
163extern const char *event__name[]; 165extern const char *event__name[];
164 166
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 1f62435f96c2..d7e67b167ea3 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -16,6 +16,8 @@
16#include "symbol.h" 16#include "symbol.h"
17#include "debug.h" 17#include "debug.h"
18 18
19static bool no_buildid_cache = false;
20
19/* 21/*
20 * Create new perf.data header attribute: 22 * Create new perf.data header attribute:
21 */ 23 */
@@ -385,8 +387,7 @@ static int perf_session__cache_build_ids(struct perf_session *self)
385 int ret; 387 int ret;
386 char debugdir[PATH_MAX]; 388 char debugdir[PATH_MAX];
387 389
388 snprintf(debugdir, sizeof(debugdir), "%s/%s", getenv("HOME"), 390 snprintf(debugdir, sizeof(debugdir), "%s", buildid_dir);
389 DEBUG_CACHE_DIR);
390 391
391 if (mkdir(debugdir, 0755) != 0 && errno != EEXIST) 392 if (mkdir(debugdir, 0755) != 0 && errno != EEXIST)
392 return -1; 393 return -1;
@@ -471,7 +472,8 @@ static int perf_header__adds_write(struct perf_header *self, int fd)
471 } 472 }
472 buildid_sec->size = lseek(fd, 0, SEEK_CUR) - 473 buildid_sec->size = lseek(fd, 0, SEEK_CUR) -
473 buildid_sec->offset; 474 buildid_sec->offset;
474 perf_session__cache_build_ids(session); 475 if (!no_buildid_cache)
476 perf_session__cache_build_ids(session);
475 } 477 }
476 478
477 lseek(fd, sec_start, SEEK_SET); 479 lseek(fd, sec_start, SEEK_SET);
@@ -1190,3 +1192,8 @@ int event__process_build_id(event_t *self,
1190 session); 1192 session);
1191 return 0; 1193 return 0;
1192} 1194}
1195
1196void disable_buildid_cache(void)
1197{
1198 no_buildid_cache = true;
1199}
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 784ee0bdda77..e7263d49bcf0 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -5,11 +5,61 @@
5#include "sort.h" 5#include "sort.h"
6#include <math.h> 6#include <math.h>
7 7
8enum hist_filter {
9 HIST_FILTER__DSO,
10 HIST_FILTER__THREAD,
11 HIST_FILTER__PARENT,
12};
13
8struct callchain_param callchain_param = { 14struct callchain_param callchain_param = {
9 .mode = CHAIN_GRAPH_REL, 15 .mode = CHAIN_GRAPH_REL,
10 .min_percent = 0.5 16 .min_percent = 0.5
11}; 17};
12 18
19u16 hists__col_len(struct hists *self, enum hist_column col)
20{
21 return self->col_len[col];
22}
23
24void hists__set_col_len(struct hists *self, enum hist_column col, u16 len)
25{
26 self->col_len[col] = len;
27}
28
29bool hists__new_col_len(struct hists *self, enum hist_column col, u16 len)
30{
31 if (len > hists__col_len(self, col)) {
32 hists__set_col_len(self, col, len);
33 return true;
34 }
35 return false;
36}
37
38static void hists__reset_col_len(struct hists *self)
39{
40 enum hist_column col;
41
42 for (col = 0; col < HISTC_NR_COLS; ++col)
43 hists__set_col_len(self, col, 0);
44}
45
46static void hists__calc_col_len(struct hists *self, struct hist_entry *h)
47{
48 u16 len;
49
50 if (h->ms.sym)
51 hists__new_col_len(self, HISTC_SYMBOL, h->ms.sym->namelen);
52
53 len = thread__comm_len(h->thread);
54 if (hists__new_col_len(self, HISTC_COMM, len))
55 hists__set_col_len(self, HISTC_THREAD, len + 6);
56
57 if (h->ms.map) {
58 len = dso__name_len(h->ms.map->dso);
59 hists__new_col_len(self, HISTC_DSO, len);
60 }
61}
62
13static void hist_entry__add_cpumode_period(struct hist_entry *self, 63static void hist_entry__add_cpumode_period(struct hist_entry *self,
14 unsigned int cpumode, u64 period) 64 unsigned int cpumode, u64 period)
15{ 65{
@@ -43,6 +93,8 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template)
43 if (self != NULL) { 93 if (self != NULL) {
44 *self = *template; 94 *self = *template;
45 self->nr_events = 1; 95 self->nr_events = 1;
96 if (self->ms.map)
97 self->ms.map->referenced = true;
46 if (symbol_conf.use_callchain) 98 if (symbol_conf.use_callchain)
47 callchain_init(self->callchain); 99 callchain_init(self->callchain);
48 } 100 }
@@ -50,11 +102,19 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template)
50 return self; 102 return self;
51} 103}
52 104
53static void hists__inc_nr_entries(struct hists *self, struct hist_entry *entry) 105static void hists__inc_nr_entries(struct hists *self, struct hist_entry *h)
54{ 106{
55 if (entry->ms.sym && self->max_sym_namelen < entry->ms.sym->namelen) 107 if (!h->filtered) {
56 self->max_sym_namelen = entry->ms.sym->namelen; 108 hists__calc_col_len(self, h);
57 ++self->nr_entries; 109 ++self->nr_entries;
110 }
111}
112
113static u8 symbol__parent_filter(const struct symbol *parent)
114{
115 if (symbol_conf.exclude_other && parent == NULL)
116 return 1 << HIST_FILTER__PARENT;
117 return 0;
58} 118}
59 119
60struct hist_entry *__hists__add_entry(struct hists *self, 120struct hist_entry *__hists__add_entry(struct hists *self,
@@ -70,10 +130,12 @@ struct hist_entry *__hists__add_entry(struct hists *self,
70 .map = al->map, 130 .map = al->map,
71 .sym = al->sym, 131 .sym = al->sym,
72 }, 132 },
133 .cpu = al->cpu,
73 .ip = al->addr, 134 .ip = al->addr,
74 .level = al->level, 135 .level = al->level,
75 .period = period, 136 .period = period,
76 .parent = sym_parent, 137 .parent = sym_parent,
138 .filtered = symbol__parent_filter(sym_parent),
77 }; 139 };
78 int cmp; 140 int cmp;
79 141
@@ -191,7 +253,7 @@ void hists__collapse_resort(struct hists *self)
191 tmp = RB_ROOT; 253 tmp = RB_ROOT;
192 next = rb_first(&self->entries); 254 next = rb_first(&self->entries);
193 self->nr_entries = 0; 255 self->nr_entries = 0;
194 self->max_sym_namelen = 0; 256 hists__reset_col_len(self);
195 257
196 while (next) { 258 while (next) {
197 n = rb_entry(next, struct hist_entry, rb_node); 259 n = rb_entry(next, struct hist_entry, rb_node);
@@ -248,7 +310,7 @@ void hists__output_resort(struct hists *self)
248 next = rb_first(&self->entries); 310 next = rb_first(&self->entries);
249 311
250 self->nr_entries = 0; 312 self->nr_entries = 0;
251 self->max_sym_namelen = 0; 313 hists__reset_col_len(self);
252 314
253 while (next) { 315 while (next) {
254 n = rb_entry(next, struct hist_entry, rb_node); 316 n = rb_entry(next, struct hist_entry, rb_node);
@@ -515,8 +577,9 @@ static size_t hist_entry_callchain__fprintf(FILE *fp, struct hist_entry *self,
515} 577}
516 578
517int hist_entry__snprintf(struct hist_entry *self, char *s, size_t size, 579int hist_entry__snprintf(struct hist_entry *self, char *s, size_t size,
518 struct hists *pair_hists, bool show_displacement, 580 struct hists *hists, struct hists *pair_hists,
519 long displacement, bool color, u64 session_total) 581 bool show_displacement, long displacement,
582 bool color, u64 session_total)
520{ 583{
521 struct sort_entry *se; 584 struct sort_entry *se;
522 u64 period, total, period_sys, period_us, period_guest_sys, period_guest_us; 585 u64 period, total, period_sys, period_us, period_guest_sys, period_guest_us;
@@ -620,29 +683,25 @@ int hist_entry__snprintf(struct hist_entry *self, char *s, size_t size,
620 683
621 ret += snprintf(s + ret, size - ret, "%s", sep ?: " "); 684 ret += snprintf(s + ret, size - ret, "%s", sep ?: " ");
622 ret += se->se_snprintf(self, s + ret, size - ret, 685 ret += se->se_snprintf(self, s + ret, size - ret,
623 se->se_width ? *se->se_width : 0); 686 hists__col_len(hists, se->se_width_idx));
624 } 687 }
625 688
626 return ret; 689 return ret;
627} 690}
628 691
629int hist_entry__fprintf(struct hist_entry *self, struct hists *pair_hists, 692int hist_entry__fprintf(struct hist_entry *self, struct hists *hists,
630 bool show_displacement, long displacement, FILE *fp, 693 struct hists *pair_hists, bool show_displacement,
631 u64 session_total) 694 long displacement, FILE *fp, u64 session_total)
632{ 695{
633 char bf[512]; 696 char bf[512];
634 int ret; 697 hist_entry__snprintf(self, bf, sizeof(bf), hists, pair_hists,
635 698 show_displacement, displacement,
636 ret = hist_entry__snprintf(self, bf, sizeof(bf), pair_hists, 699 true, session_total);
637 show_displacement, displacement,
638 true, session_total);
639 if (!ret)
640 return 0;
641
642 return fprintf(fp, "%s\n", bf); 700 return fprintf(fp, "%s\n", bf);
643} 701}
644 702
645static size_t hist_entry__fprintf_callchain(struct hist_entry *self, FILE *fp, 703static size_t hist_entry__fprintf_callchain(struct hist_entry *self,
704 struct hists *hists, FILE *fp,
646 u64 session_total) 705 u64 session_total)
647{ 706{
648 int left_margin = 0; 707 int left_margin = 0;
@@ -650,7 +709,7 @@ static size_t hist_entry__fprintf_callchain(struct hist_entry *self, FILE *fp,
650 if (sort__first_dimension == SORT_COMM) { 709 if (sort__first_dimension == SORT_COMM) {
651 struct sort_entry *se = list_first_entry(&hist_entry__sort_list, 710 struct sort_entry *se = list_first_entry(&hist_entry__sort_list,
652 typeof(*se), list); 711 typeof(*se), list);
653 left_margin = se->se_width ? *se->se_width : 0; 712 left_margin = hists__col_len(hists, se->se_width_idx);
654 left_margin -= thread__comm_len(self->thread); 713 left_margin -= thread__comm_len(self->thread);
655 } 714 }
656 715
@@ -721,17 +780,17 @@ size_t hists__fprintf(struct hists *self, struct hists *pair,
721 continue; 780 continue;
722 } 781 }
723 width = strlen(se->se_header); 782 width = strlen(se->se_header);
724 if (se->se_width) { 783 if (symbol_conf.col_width_list_str) {
725 if (symbol_conf.col_width_list_str) { 784 if (col_width) {
726 if (col_width) { 785 hists__set_col_len(self, se->se_width_idx,
727 *se->se_width = atoi(col_width); 786 atoi(col_width));
728 col_width = strchr(col_width, ','); 787 col_width = strchr(col_width, ',');
729 if (col_width) 788 if (col_width)
730 ++col_width; 789 ++col_width;
731 }
732 } 790 }
733 width = *se->se_width = max(*se->se_width, width);
734 } 791 }
792 if (!hists__new_col_len(self, se->se_width_idx, width))
793 width = hists__col_len(self, se->se_width_idx);
735 fprintf(fp, " %*s", width, se->se_header); 794 fprintf(fp, " %*s", width, se->se_header);
736 } 795 }
737 fprintf(fp, "\n"); 796 fprintf(fp, "\n");
@@ -754,9 +813,8 @@ size_t hists__fprintf(struct hists *self, struct hists *pair,
754 continue; 813 continue;
755 814
756 fprintf(fp, " "); 815 fprintf(fp, " ");
757 if (se->se_width) 816 width = hists__col_len(self, se->se_width_idx);
758 width = *se->se_width; 817 if (width == 0)
759 else
760 width = strlen(se->se_header); 818 width = strlen(se->se_header);
761 for (i = 0; i < width; i++) 819 for (i = 0; i < width; i++)
762 fprintf(fp, "."); 820 fprintf(fp, ".");
@@ -767,7 +825,6 @@ size_t hists__fprintf(struct hists *self, struct hists *pair,
767print_entries: 825print_entries:
768 for (nd = rb_first(&self->entries); nd; nd = rb_next(nd)) { 826 for (nd = rb_first(&self->entries); nd; nd = rb_next(nd)) {
769 struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); 827 struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
770 int cnt;
771 828
772 if (show_displacement) { 829 if (show_displacement) {
773 if (h->pair != NULL) 830 if (h->pair != NULL)
@@ -777,17 +834,12 @@ print_entries:
777 displacement = 0; 834 displacement = 0;
778 ++position; 835 ++position;
779 } 836 }
780 cnt = hist_entry__fprintf(h, pair, show_displacement, 837 ret += hist_entry__fprintf(h, self, pair, show_displacement,
781 displacement, fp, self->stats.total_period); 838 displacement, fp, self->stats.total_period);
782 /* Ignore those that didn't match the parent filter */
783 if (!cnt)
784 continue;
785
786 ret += cnt;
787 839
788 if (symbol_conf.use_callchain) 840 if (symbol_conf.use_callchain)
789 ret += hist_entry__fprintf_callchain(h, fp, self->stats.total_period); 841 ret += hist_entry__fprintf_callchain(h, self, fp,
790 842 self->stats.total_period);
791 if (h->ms.map == NULL && verbose > 1) { 843 if (h->ms.map == NULL && verbose > 1) {
792 __map_groups__fprintf_maps(&h->thread->mg, 844 __map_groups__fprintf_maps(&h->thread->mg,
793 MAP__FUNCTION, verbose, fp); 845 MAP__FUNCTION, verbose, fp);
@@ -800,10 +852,49 @@ print_entries:
800 return ret; 852 return ret;
801} 853}
802 854
803enum hist_filter { 855/*
804 HIST_FILTER__DSO, 856 * See hists__fprintf to match the column widths
805 HIST_FILTER__THREAD, 857 */
806}; 858unsigned int hists__sort_list_width(struct hists *self)
859{
860 struct sort_entry *se;
861 int ret = 9; /* total % */
862
863 if (symbol_conf.show_cpu_utilization) {
864 ret += 7; /* count_sys % */
865 ret += 6; /* count_us % */
866 if (perf_guest) {
867 ret += 13; /* count_guest_sys % */
868 ret += 12; /* count_guest_us % */
869 }
870 }
871
872 if (symbol_conf.show_nr_samples)
873 ret += 11;
874
875 list_for_each_entry(se, &hist_entry__sort_list, list)
876 if (!se->elide)
877 ret += 2 + hists__col_len(self, se->se_width_idx);
878
879 return ret;
880}
881
882static void hists__remove_entry_filter(struct hists *self, struct hist_entry *h,
883 enum hist_filter filter)
884{
885 h->filtered &= ~(1 << filter);
886 if (h->filtered)
887 return;
888
889 ++self->nr_entries;
890 if (h->ms.unfolded)
891 self->nr_entries += h->nr_rows;
892 h->row_offset = 0;
893 self->stats.total_period += h->period;
894 self->stats.nr_events[PERF_RECORD_SAMPLE] += h->nr_events;
895
896 hists__calc_col_len(self, h);
897}
807 898
808void hists__filter_by_dso(struct hists *self, const struct dso *dso) 899void hists__filter_by_dso(struct hists *self, const struct dso *dso)
809{ 900{
@@ -811,7 +902,7 @@ void hists__filter_by_dso(struct hists *self, const struct dso *dso)
811 902
812 self->nr_entries = self->stats.total_period = 0; 903 self->nr_entries = self->stats.total_period = 0;
813 self->stats.nr_events[PERF_RECORD_SAMPLE] = 0; 904 self->stats.nr_events[PERF_RECORD_SAMPLE] = 0;
814 self->max_sym_namelen = 0; 905 hists__reset_col_len(self);
815 906
816 for (nd = rb_first(&self->entries); nd; nd = rb_next(nd)) { 907 for (nd = rb_first(&self->entries); nd; nd = rb_next(nd)) {
817 struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); 908 struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
@@ -824,15 +915,7 @@ void hists__filter_by_dso(struct hists *self, const struct dso *dso)
824 continue; 915 continue;
825 } 916 }
826 917
827 h->filtered &= ~(1 << HIST_FILTER__DSO); 918 hists__remove_entry_filter(self, h, HIST_FILTER__DSO);
828 if (!h->filtered) {
829 ++self->nr_entries;
830 self->stats.total_period += h->period;
831 self->stats.nr_events[PERF_RECORD_SAMPLE] += h->nr_events;
832 if (h->ms.sym &&
833 self->max_sym_namelen < h->ms.sym->namelen)
834 self->max_sym_namelen = h->ms.sym->namelen;
835 }
836 } 919 }
837} 920}
838 921
@@ -842,7 +925,7 @@ void hists__filter_by_thread(struct hists *self, const struct thread *thread)
842 925
843 self->nr_entries = self->stats.total_period = 0; 926 self->nr_entries = self->stats.total_period = 0;
844 self->stats.nr_events[PERF_RECORD_SAMPLE] = 0; 927 self->stats.nr_events[PERF_RECORD_SAMPLE] = 0;
845 self->max_sym_namelen = 0; 928 hists__reset_col_len(self);
846 929
847 for (nd = rb_first(&self->entries); nd; nd = rb_next(nd)) { 930 for (nd = rb_first(&self->entries); nd; nd = rb_next(nd)) {
848 struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); 931 struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
@@ -851,15 +934,8 @@ void hists__filter_by_thread(struct hists *self, const struct thread *thread)
851 h->filtered |= (1 << HIST_FILTER__THREAD); 934 h->filtered |= (1 << HIST_FILTER__THREAD);
852 continue; 935 continue;
853 } 936 }
854 h->filtered &= ~(1 << HIST_FILTER__THREAD); 937
855 if (!h->filtered) { 938 hists__remove_entry_filter(self, h, HIST_FILTER__THREAD);
856 ++self->nr_entries;
857 self->stats.total_period += h->period;
858 self->stats.nr_events[PERF_RECORD_SAMPLE] += h->nr_events;
859 if (h->ms.sym &&
860 self->max_sym_namelen < h->ms.sym->namelen)
861 self->max_sym_namelen = h->ms.sym->namelen;
862 }
863 } 939 }
864} 940}
865 941
@@ -1052,7 +1128,7 @@ fallback:
1052 dso, dso->long_name, sym, sym->name); 1128 dso, dso->long_name, sym, sym->name);
1053 1129
1054 snprintf(command, sizeof(command), 1130 snprintf(command, sizeof(command),
1055 "objdump --start-address=0x%016Lx --stop-address=0x%016Lx -dS %s|grep -v %s|expand", 1131 "objdump --start-address=0x%016Lx --stop-address=0x%016Lx -dS -C %s|grep -v %s|expand",
1056 map__rip_2objdump(map, sym->start), 1132 map__rip_2objdump(map, sym->start),
1057 map__rip_2objdump(map, sym->end), 1133 map__rip_2objdump(map, sym->end),
1058 filename, filename); 1134 filename, filename);
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 83fa33a7b38b..65a48db46a29 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -56,6 +56,16 @@ struct events_stats {
56 u32 nr_unknown_events; 56 u32 nr_unknown_events;
57}; 57};
58 58
59enum hist_column {
60 HISTC_SYMBOL,
61 HISTC_DSO,
62 HISTC_THREAD,
63 HISTC_COMM,
64 HISTC_PARENT,
65 HISTC_CPU,
66 HISTC_NR_COLS, /* Last entry */
67};
68
59struct hists { 69struct hists {
60 struct rb_node rb_node; 70 struct rb_node rb_node;
61 struct rb_root entries; 71 struct rb_root entries;
@@ -64,7 +74,7 @@ struct hists {
64 u64 config; 74 u64 config;
65 u64 event_stream; 75 u64 event_stream;
66 u32 type; 76 u32 type;
67 u32 max_sym_namelen; 77 u16 col_len[HISTC_NR_COLS];
68}; 78};
69 79
70struct hist_entry *__hists__add_entry(struct hists *self, 80struct hist_entry *__hists__add_entry(struct hists *self,
@@ -72,12 +82,13 @@ struct hist_entry *__hists__add_entry(struct hists *self,
72 struct symbol *parent, u64 period); 82 struct symbol *parent, u64 period);
73extern int64_t hist_entry__cmp(struct hist_entry *, struct hist_entry *); 83extern int64_t hist_entry__cmp(struct hist_entry *, struct hist_entry *);
74extern int64_t hist_entry__collapse(struct hist_entry *, struct hist_entry *); 84extern int64_t hist_entry__collapse(struct hist_entry *, struct hist_entry *);
75int hist_entry__fprintf(struct hist_entry *self, struct hists *pair_hists, 85int hist_entry__fprintf(struct hist_entry *self, struct hists *hists,
76 bool show_displacement, long displacement, FILE *fp, 86 struct hists *pair_hists, bool show_displacement,
77 u64 total); 87 long displacement, FILE *fp, u64 total);
78int hist_entry__snprintf(struct hist_entry *self, char *bf, size_t size, 88int hist_entry__snprintf(struct hist_entry *self, char *bf, size_t size,
79 struct hists *pair_hists, bool show_displacement, 89 struct hists *hists, struct hists *pair_hists,
80 long displacement, bool color, u64 total); 90 bool show_displacement, long displacement,
91 bool color, u64 total);
81void hist_entry__free(struct hist_entry *); 92void hist_entry__free(struct hist_entry *);
82 93
83void hists__output_resort(struct hists *self); 94void hists__output_resort(struct hists *self);
@@ -95,6 +106,10 @@ int hist_entry__annotate(struct hist_entry *self, struct list_head *head);
95void hists__filter_by_dso(struct hists *self, const struct dso *dso); 106void hists__filter_by_dso(struct hists *self, const struct dso *dso);
96void hists__filter_by_thread(struct hists *self, const struct thread *thread); 107void hists__filter_by_thread(struct hists *self, const struct thread *thread);
97 108
109u16 hists__col_len(struct hists *self, enum hist_column col);
110void hists__set_col_len(struct hists *self, enum hist_column col, u16 len);
111bool hists__new_col_len(struct hists *self, enum hist_column col, u16 len);
112
98#ifdef NO_NEWT_SUPPORT 113#ifdef NO_NEWT_SUPPORT
99static inline int hists__browse(struct hists *self __used, 114static inline int hists__browse(struct hists *self __used,
100 const char *helpline __used, 115 const char *helpline __used,
@@ -126,4 +141,7 @@ int hist_entry__tui_annotate(struct hist_entry *self);
126 141
127int hists__tui_browse_tree(struct rb_root *self, const char *help); 142int hists__tui_browse_tree(struct rb_root *self, const char *help);
128#endif 143#endif
144
145unsigned int hists__sort_list_width(struct hists *self);
146
129#endif /* __PERF_HIST_H */ 147#endif /* __PERF_HIST_H */
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index e672f2fef65b..3a7eb6ec0eec 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -17,16 +17,6 @@ static inline int is_anon_memory(const char *filename)
17 return strcmp(filename, "//anon") == 0; 17 return strcmp(filename, "//anon") == 0;
18} 18}
19 19
20static int strcommon(const char *pathname, char *cwd, int cwdlen)
21{
22 int n = 0;
23
24 while (n < cwdlen && pathname[n] == cwd[n])
25 ++n;
26
27 return n;
28}
29
30void map__init(struct map *self, enum map_type type, 20void map__init(struct map *self, enum map_type type,
31 u64 start, u64 end, u64 pgoff, struct dso *dso) 21 u64 start, u64 end, u64 pgoff, struct dso *dso)
32{ 22{
@@ -39,11 +29,12 @@ void map__init(struct map *self, enum map_type type,
39 self->unmap_ip = map__unmap_ip; 29 self->unmap_ip = map__unmap_ip;
40 RB_CLEAR_NODE(&self->rb_node); 30 RB_CLEAR_NODE(&self->rb_node);
41 self->groups = NULL; 31 self->groups = NULL;
32 self->referenced = false;
42} 33}
43 34
44struct map *map__new(struct list_head *dsos__list, u64 start, u64 len, 35struct map *map__new(struct list_head *dsos__list, u64 start, u64 len,
45 u64 pgoff, u32 pid, char *filename, 36 u64 pgoff, u32 pid, char *filename,
46 enum map_type type, char *cwd, int cwdlen) 37 enum map_type type)
47{ 38{
48 struct map *self = malloc(sizeof(*self)); 39 struct map *self = malloc(sizeof(*self));
49 40
@@ -52,16 +43,6 @@ struct map *map__new(struct list_head *dsos__list, u64 start, u64 len,
52 struct dso *dso; 43 struct dso *dso;
53 int anon; 44 int anon;
54 45
55 if (cwd) {
56 int n = strcommon(filename, cwd, cwdlen);
57
58 if (n == cwdlen) {
59 snprintf(newfilename, sizeof(newfilename),
60 ".%s", filename + n);
61 filename = newfilename;
62 }
63 }
64
65 anon = is_anon_memory(filename); 46 anon = is_anon_memory(filename);
66 47
67 if (anon) { 48 if (anon) {
@@ -248,6 +229,39 @@ void map_groups__init(struct map_groups *self)
248 self->machine = NULL; 229 self->machine = NULL;
249} 230}
250 231
232static void maps__delete(struct rb_root *self)
233{
234 struct rb_node *next = rb_first(self);
235
236 while (next) {
237 struct map *pos = rb_entry(next, struct map, rb_node);
238
239 next = rb_next(&pos->rb_node);
240 rb_erase(&pos->rb_node, self);
241 map__delete(pos);
242 }
243}
244
245static void maps__delete_removed(struct list_head *self)
246{
247 struct map *pos, *n;
248
249 list_for_each_entry_safe(pos, n, self, node) {
250 list_del(&pos->node);
251 map__delete(pos);
252 }
253}
254
255void map_groups__exit(struct map_groups *self)
256{
257 int i;
258
259 for (i = 0; i < MAP__NR_TYPES; ++i) {
260 maps__delete(&self->maps[i]);
261 maps__delete_removed(&self->removed_maps[i]);
262 }
263}
264
251void map_groups__flush(struct map_groups *self) 265void map_groups__flush(struct map_groups *self)
252{ 266{
253 int type; 267 int type;
@@ -374,6 +388,7 @@ int map_groups__fixup_overlappings(struct map_groups *self, struct map *map,
374{ 388{
375 struct rb_root *root = &self->maps[map->type]; 389 struct rb_root *root = &self->maps[map->type];
376 struct rb_node *next = rb_first(root); 390 struct rb_node *next = rb_first(root);
391 int err = 0;
377 392
378 while (next) { 393 while (next) {
379 struct map *pos = rb_entry(next, struct map, rb_node); 394 struct map *pos = rb_entry(next, struct map, rb_node);
@@ -390,20 +405,16 @@ int map_groups__fixup_overlappings(struct map_groups *self, struct map *map,
390 405
391 rb_erase(&pos->rb_node, root); 406 rb_erase(&pos->rb_node, root);
392 /* 407 /*
393 * We may have references to this map, for instance in some
394 * hist_entry instances, so just move them to a separate
395 * list.
396 */
397 list_add_tail(&pos->node, &self->removed_maps[map->type]);
398 /*
399 * Now check if we need to create new maps for areas not 408 * Now check if we need to create new maps for areas not
400 * overlapped by the new map: 409 * overlapped by the new map:
401 */ 410 */
402 if (map->start > pos->start) { 411 if (map->start > pos->start) {
403 struct map *before = map__clone(pos); 412 struct map *before = map__clone(pos);
404 413
405 if (before == NULL) 414 if (before == NULL) {
406 return -ENOMEM; 415 err = -ENOMEM;
416 goto move_map;
417 }
407 418
408 before->end = map->start - 1; 419 before->end = map->start - 1;
409 map_groups__insert(self, before); 420 map_groups__insert(self, before);
@@ -414,14 +425,27 @@ int map_groups__fixup_overlappings(struct map_groups *self, struct map *map,
414 if (map->end < pos->end) { 425 if (map->end < pos->end) {
415 struct map *after = map__clone(pos); 426 struct map *after = map__clone(pos);
416 427
417 if (after == NULL) 428 if (after == NULL) {
418 return -ENOMEM; 429 err = -ENOMEM;
430 goto move_map;
431 }
419 432
420 after->start = map->end + 1; 433 after->start = map->end + 1;
421 map_groups__insert(self, after); 434 map_groups__insert(self, after);
422 if (verbose >= 2) 435 if (verbose >= 2)
423 map__fprintf(after, fp); 436 map__fprintf(after, fp);
424 } 437 }
438move_map:
439 /*
440 * If we have references, just move them to a separate list.
441 */
442 if (pos->referenced)
443 list_add_tail(&pos->node, &self->removed_maps[map->type]);
444 else
445 map__delete(pos);
446
447 if (err)
448 return err;
425 } 449 }
426 450
427 return 0; 451 return 0;
@@ -493,6 +517,11 @@ void maps__insert(struct rb_root *maps, struct map *map)
493 rb_insert_color(&map->rb_node, maps); 517 rb_insert_color(&map->rb_node, maps);
494} 518}
495 519
520void maps__remove(struct rb_root *self, struct map *map)
521{
522 rb_erase(&map->rb_node, self);
523}
524
496struct map *maps__find(struct rb_root *maps, u64 ip) 525struct map *maps__find(struct rb_root *maps, u64 ip)
497{ 526{
498 struct rb_node **p = &maps->rb_node; 527 struct rb_node **p = &maps->rb_node;
@@ -526,6 +555,31 @@ int machine__init(struct machine *self, const char *root_dir, pid_t pid)
526 return self->root_dir == NULL ? -ENOMEM : 0; 555 return self->root_dir == NULL ? -ENOMEM : 0;
527} 556}
528 557
558static void dsos__delete(struct list_head *self)
559{
560 struct dso *pos, *n;
561
562 list_for_each_entry_safe(pos, n, self, node) {
563 list_del(&pos->node);
564 dso__delete(pos);
565 }
566}
567
568void machine__exit(struct machine *self)
569{
570 map_groups__exit(&self->kmaps);
571 dsos__delete(&self->user_dsos);
572 dsos__delete(&self->kernel_dsos);
573 free(self->root_dir);
574 self->root_dir = NULL;
575}
576
577void machine__delete(struct machine *self)
578{
579 machine__exit(self);
580 free(self);
581}
582
529struct machine *machines__add(struct rb_root *self, pid_t pid, 583struct machine *machines__add(struct rb_root *self, pid_t pid,
530 const char *root_dir) 584 const char *root_dir)
531{ 585{
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index f39134512829..78575796d5f3 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -29,7 +29,8 @@ struct map {
29 }; 29 };
30 u64 start; 30 u64 start;
31 u64 end; 31 u64 end;
32 enum map_type type; 32 u8 /* enum map_type */ type;
33 bool referenced;
33 u32 priv; 34 u32 priv;
34 u64 pgoff; 35 u64 pgoff;
35 36
@@ -106,7 +107,7 @@ void map__init(struct map *self, enum map_type type,
106 u64 start, u64 end, u64 pgoff, struct dso *dso); 107 u64 start, u64 end, u64 pgoff, struct dso *dso);
107struct map *map__new(struct list_head *dsos__list, u64 start, u64 len, 108struct map *map__new(struct list_head *dsos__list, u64 start, u64 len,
108 u64 pgoff, u32 pid, char *filename, 109 u64 pgoff, u32 pid, char *filename,
109 enum map_type type, char *cwd, int cwdlen); 110 enum map_type type);
110void map__delete(struct map *self); 111void map__delete(struct map *self);
111struct map *map__clone(struct map *self); 112struct map *map__clone(struct map *self);
112int map__overlap(struct map *l, struct map *r); 113int map__overlap(struct map *l, struct map *r);
@@ -125,8 +126,10 @@ void map__reloc_vmlinux(struct map *self);
125size_t __map_groups__fprintf_maps(struct map_groups *self, 126size_t __map_groups__fprintf_maps(struct map_groups *self,
126 enum map_type type, int verbose, FILE *fp); 127 enum map_type type, int verbose, FILE *fp);
127void maps__insert(struct rb_root *maps, struct map *map); 128void maps__insert(struct rb_root *maps, struct map *map);
129void maps__remove(struct rb_root *self, struct map *map);
128struct map *maps__find(struct rb_root *maps, u64 addr); 130struct map *maps__find(struct rb_root *maps, u64 addr);
129void map_groups__init(struct map_groups *self); 131void map_groups__init(struct map_groups *self);
132void map_groups__exit(struct map_groups *self);
130int map_groups__clone(struct map_groups *self, 133int map_groups__clone(struct map_groups *self,
131 struct map_groups *parent, enum map_type type); 134 struct map_groups *parent, enum map_type type);
132size_t map_groups__fprintf(struct map_groups *self, int verbose, FILE *fp); 135size_t map_groups__fprintf(struct map_groups *self, int verbose, FILE *fp);
@@ -142,6 +145,8 @@ struct machine *machines__find(struct rb_root *self, pid_t pid);
142struct machine *machines__findnew(struct rb_root *self, pid_t pid); 145struct machine *machines__findnew(struct rb_root *self, pid_t pid);
143char *machine__mmap_name(struct machine *self, char *bf, size_t size); 146char *machine__mmap_name(struct machine *self, char *bf, size_t size);
144int machine__init(struct machine *self, const char *root_dir, pid_t pid); 147int machine__init(struct machine *self, const char *root_dir, pid_t pid);
148void machine__exit(struct machine *self);
149void machine__delete(struct machine *self);
145 150
146/* 151/*
147 * Default guest kernel is defined by parameter --guestkallsyms 152 * Default guest kernel is defined by parameter --guestkallsyms
@@ -163,6 +168,11 @@ static inline void map_groups__insert(struct map_groups *self, struct map *map)
163 map->groups = self; 168 map->groups = self;
164} 169}
165 170
171static inline void map_groups__remove(struct map_groups *self, struct map *map)
172{
173 maps__remove(&self->maps[map->type], map);
174}
175
166static inline struct map *map_groups__find(struct map_groups *self, 176static inline struct map *map_groups__find(struct map_groups *self,
167 enum map_type type, u64 addr) 177 enum map_type type, u64 addr)
168{ 178{
diff --git a/tools/perf/util/newt.c b/tools/perf/util/newt.c
index 7537ca15900b..91de99b58445 100644
--- a/tools/perf/util/newt.c
+++ b/tools/perf/util/newt.c
@@ -11,6 +11,7 @@
11#define HAVE_LONG_LONG __GLIBC_HAVE_LONG_LONG 11#define HAVE_LONG_LONG __GLIBC_HAVE_LONG_LONG
12#endif 12#endif
13#include <slang.h> 13#include <slang.h>
14#include <signal.h>
14#include <stdlib.h> 15#include <stdlib.h>
15#include <newt.h> 16#include <newt.h>
16#include <sys/ttydefaults.h> 17#include <sys/ttydefaults.h>
@@ -278,9 +279,48 @@ struct ui_browser {
278 void *first_visible_entry, *entries; 279 void *first_visible_entry, *entries;
279 u16 top, left, width, height; 280 u16 top, left, width, height;
280 void *priv; 281 void *priv;
282 unsigned int (*refresh_entries)(struct ui_browser *self);
283 void (*seek)(struct ui_browser *self,
284 off_t offset, int whence);
281 u32 nr_entries; 285 u32 nr_entries;
282}; 286};
283 287
288static void ui_browser__list_head_seek(struct ui_browser *self,
289 off_t offset, int whence)
290{
291 struct list_head *head = self->entries;
292 struct list_head *pos;
293
294 switch (whence) {
295 case SEEK_SET:
296 pos = head->next;
297 break;
298 case SEEK_CUR:
299 pos = self->first_visible_entry;
300 break;
301 case SEEK_END:
302 pos = head->prev;
303 break;
304 default:
305 return;
306 }
307
308 if (offset > 0) {
309 while (offset-- != 0)
310 pos = pos->next;
311 } else {
312 while (offset++ != 0)
313 pos = pos->prev;
314 }
315
316 self->first_visible_entry = pos;
317}
318
319static bool ui_browser__is_current_entry(struct ui_browser *self, unsigned row)
320{
321 return (self->first_visible_entry_idx + row) == self->index;
322}
323
284static void ui_browser__refresh_dimensions(struct ui_browser *self) 324static void ui_browser__refresh_dimensions(struct ui_browser *self)
285{ 325{
286 int cols, rows; 326 int cols, rows;
@@ -297,8 +337,36 @@ static void ui_browser__refresh_dimensions(struct ui_browser *self)
297 337
298static void ui_browser__reset_index(struct ui_browser *self) 338static void ui_browser__reset_index(struct ui_browser *self)
299{ 339{
300 self->index = self->first_visible_entry_idx = 0; 340 self->index = self->first_visible_entry_idx = 0;
301 self->first_visible_entry = NULL; 341 self->seek(self, 0, SEEK_SET);
342}
343
344static int ui_browser__show(struct ui_browser *self, const char *title)
345{
346 if (self->form != NULL) {
347 newtFormDestroy(self->form);
348 newtPopWindow();
349 }
350 ui_browser__refresh_dimensions(self);
351 newtCenteredWindow(self->width, self->height, title);
352 self->form = newt_form__new();
353 if (self->form == NULL)
354 return -1;
355
356 self->sb = newtVerticalScrollbar(self->width, 0, self->height,
357 HE_COLORSET_NORMAL,
358 HE_COLORSET_SELECTED);
359 if (self->sb == NULL)
360 return -1;
361
362 newtFormAddHotKey(self->form, NEWT_KEY_UP);
363 newtFormAddHotKey(self->form, NEWT_KEY_DOWN);
364 newtFormAddHotKey(self->form, NEWT_KEY_PGUP);
365 newtFormAddHotKey(self->form, NEWT_KEY_PGDN);
366 newtFormAddHotKey(self->form, NEWT_KEY_HOME);
367 newtFormAddHotKey(self->form, NEWT_KEY_END);
368 newtFormAddComponent(self->form, self->sb);
369 return 0;
302} 370}
303 371
304static int objdump_line__show(struct objdump_line *self, struct list_head *head, 372static int objdump_line__show(struct objdump_line *self, struct list_head *head,
@@ -352,26 +420,10 @@ static int objdump_line__show(struct objdump_line *self, struct list_head *head,
352 420
353static int ui_browser__refresh_entries(struct ui_browser *self) 421static int ui_browser__refresh_entries(struct ui_browser *self)
354{ 422{
355 struct objdump_line *pos; 423 int row;
356 struct list_head *head = self->entries;
357 struct hist_entry *he = self->priv;
358 int row = 0;
359 int len = he->ms.sym->end - he->ms.sym->start;
360
361 if (self->first_visible_entry == NULL || self->first_visible_entry == self->entries)
362 self->first_visible_entry = head->next;
363
364 pos = list_entry(self->first_visible_entry, struct objdump_line, node);
365
366 list_for_each_entry_from(pos, head, node) {
367 bool current_entry = (self->first_visible_entry_idx + row) == self->index;
368 SLsmg_gotorc(self->top + row, self->left);
369 objdump_line__show(pos, head, self->width,
370 he, len, current_entry);
371 if (++row == self->height)
372 break;
373 }
374 424
425 newtScrollbarSet(self->sb, self->index, self->nr_entries - 1);
426 row = self->refresh_entries(self);
375 SLsmg_set_color(HE_COLORSET_NORMAL); 427 SLsmg_set_color(HE_COLORSET_NORMAL);
376 SLsmg_fill_region(self->top + row, self->left, 428 SLsmg_fill_region(self->top + row, self->left,
377 self->height - row, self->width, ' '); 429 self->height - row, self->width, ' ');
@@ -379,42 +431,13 @@ static int ui_browser__refresh_entries(struct ui_browser *self)
379 return 0; 431 return 0;
380} 432}
381 433
382static int ui_browser__run(struct ui_browser *self, const char *title, 434static int ui_browser__run(struct ui_browser *self, struct newtExitStruct *es)
383 struct newtExitStruct *es)
384{ 435{
385 if (self->form) {
386 newtFormDestroy(self->form);
387 newtPopWindow();
388 }
389
390 ui_browser__refresh_dimensions(self);
391 newtCenteredWindow(self->width + 2, self->height, title);
392 self->form = newt_form__new();
393 if (self->form == NULL)
394 return -1;
395
396 self->sb = newtVerticalScrollbar(self->width + 1, 0, self->height,
397 HE_COLORSET_NORMAL,
398 HE_COLORSET_SELECTED);
399 if (self->sb == NULL)
400 return -1;
401
402 newtFormAddHotKey(self->form, NEWT_KEY_UP);
403 newtFormAddHotKey(self->form, NEWT_KEY_DOWN);
404 newtFormAddHotKey(self->form, NEWT_KEY_PGUP);
405 newtFormAddHotKey(self->form, NEWT_KEY_PGDN);
406 newtFormAddHotKey(self->form, ' ');
407 newtFormAddHotKey(self->form, NEWT_KEY_HOME);
408 newtFormAddHotKey(self->form, NEWT_KEY_END);
409 newtFormAddHotKey(self->form, NEWT_KEY_TAB);
410 newtFormAddHotKey(self->form, NEWT_KEY_RIGHT);
411
412 if (ui_browser__refresh_entries(self) < 0) 436 if (ui_browser__refresh_entries(self) < 0)
413 return -1; 437 return -1;
414 newtFormAddComponent(self->form, self->sb);
415 438
416 while (1) { 439 while (1) {
417 unsigned int offset; 440 off_t offset;
418 441
419 newtFormRun(self->form, es); 442 newtFormRun(self->form, es);
420 443
@@ -428,9 +451,8 @@ static int ui_browser__run(struct ui_browser *self, const char *title,
428 break; 451 break;
429 ++self->index; 452 ++self->index;
430 if (self->index == self->first_visible_entry_idx + self->height) { 453 if (self->index == self->first_visible_entry_idx + self->height) {
431 struct list_head *pos = self->first_visible_entry;
432 ++self->first_visible_entry_idx; 454 ++self->first_visible_entry_idx;
433 self->first_visible_entry = pos->next; 455 self->seek(self, +1, SEEK_CUR);
434 } 456 }
435 break; 457 break;
436 case NEWT_KEY_UP: 458 case NEWT_KEY_UP:
@@ -438,9 +460,8 @@ static int ui_browser__run(struct ui_browser *self, const char *title,
438 break; 460 break;
439 --self->index; 461 --self->index;
440 if (self->index < self->first_visible_entry_idx) { 462 if (self->index < self->first_visible_entry_idx) {
441 struct list_head *pos = self->first_visible_entry;
442 --self->first_visible_entry_idx; 463 --self->first_visible_entry_idx;
443 self->first_visible_entry = pos->prev; 464 self->seek(self, -1, SEEK_CUR);
444 } 465 }
445 break; 466 break;
446 case NEWT_KEY_PGDN: 467 case NEWT_KEY_PGDN:
@@ -453,12 +474,7 @@ static int ui_browser__run(struct ui_browser *self, const char *title,
453 offset = self->nr_entries - 1 - self->index; 474 offset = self->nr_entries - 1 - self->index;
454 self->index += offset; 475 self->index += offset;
455 self->first_visible_entry_idx += offset; 476 self->first_visible_entry_idx += offset;
456 477 self->seek(self, +offset, SEEK_CUR);
457 while (offset--) {
458 struct list_head *pos = self->first_visible_entry;
459 self->first_visible_entry = pos->next;
460 }
461
462 break; 478 break;
463 case NEWT_KEY_PGUP: 479 case NEWT_KEY_PGUP:
464 if (self->first_visible_entry_idx == 0) 480 if (self->first_visible_entry_idx == 0)
@@ -471,36 +487,22 @@ static int ui_browser__run(struct ui_browser *self, const char *title,
471 487
472 self->index -= offset; 488 self->index -= offset;
473 self->first_visible_entry_idx -= offset; 489 self->first_visible_entry_idx -= offset;
474 490 self->seek(self, -offset, SEEK_CUR);
475 while (offset--) {
476 struct list_head *pos = self->first_visible_entry;
477 self->first_visible_entry = pos->prev;
478 }
479 break; 491 break;
480 case NEWT_KEY_HOME: 492 case NEWT_KEY_HOME:
481 ui_browser__reset_index(self); 493 ui_browser__reset_index(self);
482 break; 494 break;
483 case NEWT_KEY_END: { 495 case NEWT_KEY_END:
484 struct list_head *head = self->entries;
485 offset = self->height - 1; 496 offset = self->height - 1;
497 if (offset >= self->nr_entries)
498 offset = self->nr_entries - 1;
486 499
487 if (offset > self->nr_entries) 500 self->index = self->nr_entries - 1;
488 offset = self->nr_entries; 501 self->first_visible_entry_idx = self->index - offset;
489 502 self->seek(self, -offset, SEEK_END);
490 self->index = self->first_visible_entry_idx = self->nr_entries - 1 - offset;
491 self->first_visible_entry = head->prev;
492 while (offset-- != 0) {
493 struct list_head *pos = self->first_visible_entry;
494 self->first_visible_entry = pos->prev;
495 }
496 }
497 break; 503 break;
498 case NEWT_KEY_RIGHT:
499 case NEWT_KEY_LEFT:
500 case NEWT_KEY_TAB:
501 return es->u.key;
502 default: 504 default:
503 continue; 505 return es->u.key;
504 } 506 }
505 if (ui_browser__refresh_entries(self) < 0) 507 if (ui_browser__refresh_entries(self) < 0)
506 return -1; 508 return -1;
@@ -508,38 +510,6 @@ static int ui_browser__run(struct ui_browser *self, const char *title,
508 return 0; 510 return 0;
509} 511}
510 512
511/*
512 * When debugging newt problems it was useful to be able to "unroll"
513 * the calls to newtCheckBoxTreeAdd{Array,Item}, so that we can generate
514 * a source file with the sequence of calls to these methods, to then
515 * tweak the arrays to get the intended results, so I'm keeping this code
516 * here, may be useful again in the future.
517 */
518#undef NEWT_DEBUG
519
520static void newt_checkbox_tree__add(newtComponent tree, const char *str,
521 void *priv, int *indexes)
522{
523#ifdef NEWT_DEBUG
524 /* Print the newtCheckboxTreeAddArray to tinker with its index arrays */
525 int i = 0, len = 40 - strlen(str);
526
527 fprintf(stderr,
528 "\tnewtCheckboxTreeAddItem(tree, %*.*s\"%s\", (void *)%p, 0, ",
529 len, len, " ", str, priv);
530 while (indexes[i] != NEWT_ARG_LAST) {
531 if (indexes[i] != NEWT_ARG_APPEND)
532 fprintf(stderr, " %d,", indexes[i]);
533 else
534 fprintf(stderr, " %s,", "NEWT_ARG_APPEND");
535 ++i;
536 }
537 fprintf(stderr, " %s", " NEWT_ARG_LAST);\n");
538 fflush(stderr);
539#endif
540 newtCheckboxTreeAddArray(tree, str, priv, 0, indexes);
541}
542
543static char *callchain_list__sym_name(struct callchain_list *self, 513static char *callchain_list__sym_name(struct callchain_list *self,
544 char *bf, size_t bfsize) 514 char *bf, size_t bfsize)
545{ 515{
@@ -550,144 +520,29 @@ static char *callchain_list__sym_name(struct callchain_list *self,
550 return bf; 520 return bf;
551} 521}
552 522
553static void __callchain__append_graph_browser(struct callchain_node *self, 523static unsigned int hist_entry__annotate_browser_refresh(struct ui_browser *self)
554 newtComponent tree, u64 total,
555 int *indexes, int depth)
556{ 524{
557 struct rb_node *node; 525 struct objdump_line *pos;
558 u64 new_total, remaining; 526 struct list_head *head = self->entries;
559 int idx = 0; 527 struct hist_entry *he = self->priv;
560 528 int row = 0;
561 if (callchain_param.mode == CHAIN_GRAPH_REL) 529 int len = he->ms.sym->end - he->ms.sym->start;
562 new_total = self->children_hit;
563 else
564 new_total = total;
565
566 remaining = new_total;
567 node = rb_first(&self->rb_root);
568 while (node) {
569 struct callchain_node *child = rb_entry(node, struct callchain_node, rb_node);
570 struct rb_node *next = rb_next(node);
571 u64 cumul = cumul_hits(child);
572 struct callchain_list *chain;
573 int first = true, printed = 0;
574 int chain_idx = -1;
575 remaining -= cumul;
576
577 indexes[depth] = NEWT_ARG_APPEND;
578 indexes[depth + 1] = NEWT_ARG_LAST;
579
580 list_for_each_entry(chain, &child->val, list) {
581 char ipstr[BITS_PER_LONG / 4 + 1],
582 *alloc_str = NULL;
583 const char *str = callchain_list__sym_name(chain, ipstr, sizeof(ipstr));
584
585 if (first) {
586 double percent = cumul * 100.0 / new_total;
587
588 first = false;
589 if (asprintf(&alloc_str, "%2.2f%% %s", percent, str) < 0)
590 str = "Not enough memory!";
591 else
592 str = alloc_str;
593 } else {
594 indexes[depth] = idx;
595 indexes[depth + 1] = NEWT_ARG_APPEND;
596 indexes[depth + 2] = NEWT_ARG_LAST;
597 ++chain_idx;
598 }
599 newt_checkbox_tree__add(tree, str, &chain->ms, indexes);
600 free(alloc_str);
601 ++printed;
602 }
603
604 indexes[depth] = idx;
605 if (chain_idx != -1)
606 indexes[depth + 1] = chain_idx;
607 if (printed != 0)
608 ++idx;
609 __callchain__append_graph_browser(child, tree, new_total, indexes,
610 depth + (chain_idx != -1 ? 2 : 1));
611 node = next;
612 }
613}
614
615static void callchain__append_graph_browser(struct callchain_node *self,
616 newtComponent tree, u64 total,
617 int *indexes, int parent_idx)
618{
619 struct callchain_list *chain;
620 int i = 0;
621
622 indexes[1] = NEWT_ARG_APPEND;
623 indexes[2] = NEWT_ARG_LAST;
624
625 list_for_each_entry(chain, &self->val, list) {
626 char ipstr[BITS_PER_LONG / 4 + 1], *str;
627
628 if (chain->ip >= PERF_CONTEXT_MAX)
629 continue;
630
631 if (!i++ && sort__first_dimension == SORT_SYM)
632 continue;
633 530
634 str = callchain_list__sym_name(chain, ipstr, sizeof(ipstr)); 531 if (self->first_visible_entry == NULL || self->first_visible_entry == self->entries)
635 newt_checkbox_tree__add(tree, str, &chain->ms, indexes); 532 self->first_visible_entry = head->next;
636 }
637 533
638 indexes[1] = parent_idx; 534 pos = list_entry(self->first_visible_entry, struct objdump_line, node);
639 indexes[2] = NEWT_ARG_APPEND;
640 indexes[3] = NEWT_ARG_LAST;
641 __callchain__append_graph_browser(self, tree, total, indexes, 2);
642}
643 535
644static void hist_entry__append_callchain_browser(struct hist_entry *self, 536 list_for_each_entry_from(pos, head, node) {
645 newtComponent tree, u64 total, int parent_idx) 537 bool current_entry = ui_browser__is_current_entry(self, row);
646{ 538 SLsmg_gotorc(self->top + row, self->left);
647 struct rb_node *rb_node; 539 objdump_line__show(pos, head, self->width,
648 int indexes[1024] = { [0] = parent_idx, }; 540 he, len, current_entry);
649 int idx = 0; 541 if (++row == self->height)
650 struct callchain_node *chain;
651
652 rb_node = rb_first(&self->sorted_chain);
653 while (rb_node) {
654 chain = rb_entry(rb_node, struct callchain_node, rb_node);
655 switch (callchain_param.mode) {
656 case CHAIN_FLAT:
657 break;
658 case CHAIN_GRAPH_ABS: /* falldown */
659 case CHAIN_GRAPH_REL:
660 callchain__append_graph_browser(chain, tree, total, indexes, idx++);
661 break;
662 case CHAIN_NONE:
663 default:
664 break; 542 break;
665 }
666 rb_node = rb_next(rb_node);
667 } 543 }
668}
669
670static size_t hist_entry__append_browser(struct hist_entry *self,
671 newtComponent tree, u64 total)
672{
673 char s[256];
674 size_t ret;
675
676 if (symbol_conf.exclude_other && !self->parent)
677 return 0;
678 544
679 ret = hist_entry__snprintf(self, s, sizeof(s), NULL, 545 return row;
680 false, 0, false, total);
681 if (symbol_conf.use_callchain) {
682 int indexes[2];
683
684 indexes[0] = NEWT_ARG_APPEND;
685 indexes[1] = NEWT_ARG_LAST;
686 newt_checkbox_tree__add(tree, s, &self->ms, indexes);
687 } else
688 newtListboxAppendEntry(tree, s, &self->ms);
689
690 return ret;
691} 546}
692 547
693int hist_entry__tui_annotate(struct hist_entry *self) 548int hist_entry__tui_annotate(struct hist_entry *self)
@@ -712,7 +567,9 @@ int hist_entry__tui_annotate(struct hist_entry *self)
712 ui_helpline__push("Press <- or ESC to exit"); 567 ui_helpline__push("Press <- or ESC to exit");
713 568
714 memset(&browser, 0, sizeof(browser)); 569 memset(&browser, 0, sizeof(browser));
715 browser.entries = &head; 570 browser.entries = &head;
571 browser.refresh_entries = hist_entry__annotate_browser_refresh;
572 browser.seek = ui_browser__list_head_seek;
716 browser.priv = self; 573 browser.priv = self;
717 list_for_each_entry(pos, &head, node) { 574 list_for_each_entry(pos, &head, node) {
718 size_t line_len = strlen(pos->line); 575 size_t line_len = strlen(pos->line);
@@ -722,7 +579,9 @@ int hist_entry__tui_annotate(struct hist_entry *self)
722 } 579 }
723 580
724 browser.width += 18; /* Percentage */ 581 browser.width += 18; /* Percentage */
725 ret = ui_browser__run(&browser, self->ms.sym->name, &es); 582 ui_browser__show(&browser, self->ms.sym->name);
583 newtFormAddHotKey(browser.form, ' ');
584 ret = ui_browser__run(&browser, &es);
726 newtFormDestroy(browser.form); 585 newtFormDestroy(browser.form);
727 newtPopWindow(); 586 newtPopWindow();
728 list_for_each_entry_safe(pos, n, &head, node) { 587 list_for_each_entry_safe(pos, n, &head, node) {
@@ -733,157 +592,48 @@ int hist_entry__tui_annotate(struct hist_entry *self)
733 return ret; 592 return ret;
734} 593}
735 594
736static const void *newt__symbol_tree_get_current(newtComponent self)
737{
738 if (symbol_conf.use_callchain)
739 return newtCheckboxTreeGetCurrent(self);
740 return newtListboxGetCurrent(self);
741}
742
743static void hist_browser__selection(newtComponent self, void *data)
744{
745 const struct map_symbol **symbol_ptr = data;
746 *symbol_ptr = newt__symbol_tree_get_current(self);
747}
748
749struct hist_browser { 595struct hist_browser {
750 newtComponent form, tree; 596 struct ui_browser b;
751 const struct map_symbol *selection; 597 struct hists *hists;
598 struct hist_entry *he_selection;
599 struct map_symbol *selection;
752}; 600};
753 601
754static struct hist_browser *hist_browser__new(void) 602static void hist_browser__reset(struct hist_browser *self);
603static int hist_browser__run(struct hist_browser *self, const char *title,
604 struct newtExitStruct *es);
605static unsigned int hist_browser__refresh_entries(struct ui_browser *self);
606static void ui_browser__hists_seek(struct ui_browser *self,
607 off_t offset, int whence);
608
609static struct hist_browser *hist_browser__new(struct hists *hists)
755{ 610{
756 struct hist_browser *self = malloc(sizeof(*self)); 611 struct hist_browser *self = zalloc(sizeof(*self));
757 612
758 if (self != NULL) 613 if (self) {
759 self->form = NULL; 614 self->hists = hists;
615 self->b.refresh_entries = hist_browser__refresh_entries;
616 self->b.seek = ui_browser__hists_seek;
617 }
760 618
761 return self; 619 return self;
762} 620}
763 621
764static void hist_browser__delete(struct hist_browser *self) 622static void hist_browser__delete(struct hist_browser *self)
765{ 623{
766 newtFormDestroy(self->form); 624 newtFormDestroy(self->b.form);
767 newtPopWindow(); 625 newtPopWindow();
768 free(self); 626 free(self);
769} 627}
770 628
771static int hist_browser__populate(struct hist_browser *self, struct hists *hists,
772 const char *title)
773{
774 int max_len = 0, idx, cols, rows;
775 struct ui_progress *progress;
776 struct rb_node *nd;
777 u64 curr_hist = 0;
778 char seq[] = ".", unit;
779 char str[256];
780 unsigned long nr_events = hists->stats.nr_events[PERF_RECORD_SAMPLE];
781
782 if (self->form) {
783 newtFormDestroy(self->form);
784 newtPopWindow();
785 }
786
787 nr_events = convert_unit(nr_events, &unit);
788 snprintf(str, sizeof(str), "Events: %lu%c ",
789 nr_events, unit);
790 newtDrawRootText(0, 0, str);
791
792 newtGetScreenSize(NULL, &rows);
793
794 if (symbol_conf.use_callchain)
795 self->tree = newtCheckboxTreeMulti(0, 0, rows - 5, seq,
796 NEWT_FLAG_SCROLL);
797 else
798 self->tree = newtListbox(0, 0, rows - 5,
799 (NEWT_FLAG_SCROLL |
800 NEWT_FLAG_RETURNEXIT));
801
802 newtComponentAddCallback(self->tree, hist_browser__selection,
803 &self->selection);
804
805 progress = ui_progress__new("Adding entries to the browser...",
806 hists->nr_entries);
807 if (progress == NULL)
808 return -1;
809
810 idx = 0;
811 for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) {
812 struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
813 int len;
814
815 if (h->filtered)
816 continue;
817
818 len = hist_entry__append_browser(h, self->tree, hists->stats.total_period);
819 if (len > max_len)
820 max_len = len;
821 if (symbol_conf.use_callchain)
822 hist_entry__append_callchain_browser(h, self->tree,
823 hists->stats.total_period, idx++);
824 ++curr_hist;
825 if (curr_hist % 5)
826 ui_progress__update(progress, curr_hist);
827 }
828
829 ui_progress__delete(progress);
830
831 newtGetScreenSize(&cols, &rows);
832
833 if (max_len > cols)
834 max_len = cols - 3;
835
836 if (!symbol_conf.use_callchain)
837 newtListboxSetWidth(self->tree, max_len);
838
839 newtCenteredWindow(max_len + (symbol_conf.use_callchain ? 5 : 0),
840 rows - 5, title);
841 self->form = newt_form__new();
842 if (self->form == NULL)
843 return -1;
844
845 newtFormAddHotKey(self->form, 'A');
846 newtFormAddHotKey(self->form, 'a');
847 newtFormAddHotKey(self->form, 'D');
848 newtFormAddHotKey(self->form, 'd');
849 newtFormAddHotKey(self->form, 'T');
850 newtFormAddHotKey(self->form, 't');
851 newtFormAddHotKey(self->form, '?');
852 newtFormAddHotKey(self->form, 'H');
853 newtFormAddHotKey(self->form, 'h');
854 newtFormAddHotKey(self->form, NEWT_KEY_F1);
855 newtFormAddHotKey(self->form, NEWT_KEY_RIGHT);
856 newtFormAddHotKey(self->form, NEWT_KEY_TAB);
857 newtFormAddHotKey(self->form, NEWT_KEY_UNTAB);
858 newtFormAddComponents(self->form, self->tree, NULL);
859 self->selection = newt__symbol_tree_get_current(self->tree);
860
861 return 0;
862}
863
864static struct hist_entry *hist_browser__selected_entry(struct hist_browser *self) 629static struct hist_entry *hist_browser__selected_entry(struct hist_browser *self)
865{ 630{
866 int *indexes; 631 return self->he_selection;
867
868 if (!symbol_conf.use_callchain)
869 goto out;
870
871 indexes = newtCheckboxTreeFindItem(self->tree, (void *)self->selection);
872 if (indexes) {
873 bool is_hist_entry = indexes[1] == NEWT_ARG_LAST;
874 free(indexes);
875 if (is_hist_entry)
876 goto out;
877 }
878 return NULL;
879out:
880 return container_of(self->selection, struct hist_entry, ms);
881} 632}
882 633
883static struct thread *hist_browser__selected_thread(struct hist_browser *self) 634static struct thread *hist_browser__selected_thread(struct hist_browser *self)
884{ 635{
885 struct hist_entry *he = hist_browser__selected_entry(self); 636 return self->he_selection->thread;
886 return he ? he->thread : NULL;
887} 637}
888 638
889static int hist_browser__title(char *bf, size_t size, const char *ev_name, 639static int hist_browser__title(char *bf, size_t size, const char *ev_name,
@@ -905,7 +655,7 @@ static int hist_browser__title(char *bf, size_t size, const char *ev_name,
905 655
906int hists__browse(struct hists *self, const char *helpline, const char *ev_name) 656int hists__browse(struct hists *self, const char *helpline, const char *ev_name)
907{ 657{
908 struct hist_browser *browser = hist_browser__new(); 658 struct hist_browser *browser = hist_browser__new(self);
909 struct pstack *fstack; 659 struct pstack *fstack;
910 const struct thread *thread_filter = NULL; 660 const struct thread *thread_filter = NULL;
911 const struct dso *dso_filter = NULL; 661 const struct dso *dso_filter = NULL;
@@ -924,8 +674,6 @@ int hists__browse(struct hists *self, const char *helpline, const char *ev_name)
924 674
925 hist_browser__title(msg, sizeof(msg), ev_name, 675 hist_browser__title(msg, sizeof(msg), ev_name,
926 dso_filter, thread_filter); 676 dso_filter, thread_filter);
927 if (hist_browser__populate(browser, self, msg) < 0)
928 goto out_free_stack;
929 677
930 while (1) { 678 while (1) {
931 const struct thread *thread; 679 const struct thread *thread;
@@ -934,7 +682,8 @@ int hists__browse(struct hists *self, const char *helpline, const char *ev_name)
934 int nr_options = 0, choice = 0, i, 682 int nr_options = 0, choice = 0, i,
935 annotate = -2, zoom_dso = -2, zoom_thread = -2; 683 annotate = -2, zoom_dso = -2, zoom_thread = -2;
936 684
937 newtFormRun(browser->form, &es); 685 if (hist_browser__run(browser, msg, &es))
686 break;
938 687
939 thread = hist_browser__selected_thread(browser); 688 thread = hist_browser__selected_thread(browser);
940 dso = browser->selection->map ? browser->selection->map->dso : NULL; 689 dso = browser->selection->map ? browser->selection->map->dso : NULL;
@@ -1069,8 +818,7 @@ zoom_out_dso:
1069 hists__filter_by_dso(self, dso_filter); 818 hists__filter_by_dso(self, dso_filter);
1070 hist_browser__title(msg, sizeof(msg), ev_name, 819 hist_browser__title(msg, sizeof(msg), ev_name,
1071 dso_filter, thread_filter); 820 dso_filter, thread_filter);
1072 if (hist_browser__populate(browser, self, msg) < 0) 821 hist_browser__reset(browser);
1073 goto out;
1074 } else if (choice == zoom_thread) { 822 } else if (choice == zoom_thread) {
1075zoom_thread: 823zoom_thread:
1076 if (thread_filter) { 824 if (thread_filter) {
@@ -1088,8 +836,7 @@ zoom_out_thread:
1088 hists__filter_by_thread(self, thread_filter); 836 hists__filter_by_thread(self, thread_filter);
1089 hist_browser__title(msg, sizeof(msg), ev_name, 837 hist_browser__title(msg, sizeof(msg), ev_name,
1090 dso_filter, thread_filter); 838 dso_filter, thread_filter);
1091 if (hist_browser__populate(browser, self, msg) < 0) 839 hist_browser__reset(browser);
1092 goto out;
1093 } 840 }
1094 } 841 }
1095out_free_stack: 842out_free_stack:
@@ -1145,6 +892,13 @@ static struct newtPercentTreeColors {
1145 "blue", "lightgray", 892 "blue", "lightgray",
1146}; 893};
1147 894
895static void newt_suspend(void *d __used)
896{
897 newtSuspend();
898 raise(SIGTSTP);
899 newtResume();
900}
901
1148void setup_browser(void) 902void setup_browser(void)
1149{ 903{
1150 struct newtPercentTreeColors *c = &defaultPercentTreeColors; 904 struct newtPercentTreeColors *c = &defaultPercentTreeColors;
@@ -1158,6 +912,7 @@ void setup_browser(void)
1158 use_browser = 1; 912 use_browser = 1;
1159 newtInit(); 913 newtInit();
1160 newtCls(); 914 newtCls();
915 newtSetSuspendCallback(newt_suspend, NULL);
1161 ui_helpline__puts(" "); 916 ui_helpline__puts(" ");
1162 sltt_set_color(HE_COLORSET_TOP, NULL, c->topColorFg, c->topColorBg); 917 sltt_set_color(HE_COLORSET_TOP, NULL, c->topColorFg, c->topColorBg);
1163 sltt_set_color(HE_COLORSET_MEDIUM, NULL, c->mediumColorFg, c->mediumColorBg); 918 sltt_set_color(HE_COLORSET_MEDIUM, NULL, c->mediumColorFg, c->mediumColorBg);
@@ -1176,3 +931,638 @@ void exit_browser(bool wait_for_ok)
1176 newtFinished(); 931 newtFinished();
1177 } 932 }
1178} 933}
934
935static void hist_browser__refresh_dimensions(struct hist_browser *self)
936{
937 /* 3 == +/- toggle symbol before actual hist_entry rendering */
938 self->b.width = 3 + (hists__sort_list_width(self->hists) +
939 sizeof("[k]"));
940}
941
942static void hist_browser__reset(struct hist_browser *self)
943{
944 self->b.nr_entries = self->hists->nr_entries;
945 hist_browser__refresh_dimensions(self);
946 ui_browser__reset_index(&self->b);
947}
948
949static char tree__folded_sign(bool unfolded)
950{
951 return unfolded ? '-' : '+';
952}
953
954static char map_symbol__folded(const struct map_symbol *self)
955{
956 return self->has_children ? tree__folded_sign(self->unfolded) : ' ';
957}
958
959static char hist_entry__folded(const struct hist_entry *self)
960{
961 return map_symbol__folded(&self->ms);
962}
963
964static char callchain_list__folded(const struct callchain_list *self)
965{
966 return map_symbol__folded(&self->ms);
967}
968
969static bool map_symbol__toggle_fold(struct map_symbol *self)
970{
971 if (!self->has_children)
972 return false;
973
974 self->unfolded = !self->unfolded;
975 return true;
976}
977
978#define LEVEL_OFFSET_STEP 3
979
980static int hist_browser__show_callchain_node_rb_tree(struct hist_browser *self,
981 struct callchain_node *chain_node,
982 u64 total, int level,
983 unsigned short row,
984 off_t *row_offset,
985 bool *is_current_entry)
986{
987 struct rb_node *node;
988 int first_row = row, width, offset = level * LEVEL_OFFSET_STEP;
989 u64 new_total, remaining;
990
991 if (callchain_param.mode == CHAIN_GRAPH_REL)
992 new_total = chain_node->children_hit;
993 else
994 new_total = total;
995
996 remaining = new_total;
997 node = rb_first(&chain_node->rb_root);
998 while (node) {
999 struct callchain_node *child = rb_entry(node, struct callchain_node, rb_node);
1000 struct rb_node *next = rb_next(node);
1001 u64 cumul = cumul_hits(child);
1002 struct callchain_list *chain;
1003 char folded_sign = ' ';
1004 int first = true;
1005 int extra_offset = 0;
1006
1007 remaining -= cumul;
1008
1009 list_for_each_entry(chain, &child->val, list) {
1010 char ipstr[BITS_PER_LONG / 4 + 1], *alloc_str;
1011 const char *str;
1012 int color;
1013 bool was_first = first;
1014
1015 if (first) {
1016 first = false;
1017 chain->ms.has_children = chain->list.next != &child->val ||
1018 rb_first(&child->rb_root) != NULL;
1019 } else {
1020 extra_offset = LEVEL_OFFSET_STEP;
1021 chain->ms.has_children = chain->list.next == &child->val &&
1022 rb_first(&child->rb_root) != NULL;
1023 }
1024
1025 folded_sign = callchain_list__folded(chain);
1026 if (*row_offset != 0) {
1027 --*row_offset;
1028 goto do_next;
1029 }
1030
1031 alloc_str = NULL;
1032 str = callchain_list__sym_name(chain, ipstr, sizeof(ipstr));
1033 if (was_first) {
1034 double percent = cumul * 100.0 / new_total;
1035
1036 if (asprintf(&alloc_str, "%2.2f%% %s", percent, str) < 0)
1037 str = "Not enough memory!";
1038 else
1039 str = alloc_str;
1040 }
1041
1042 color = HE_COLORSET_NORMAL;
1043 width = self->b.width - (offset + extra_offset + 2);
1044 if (ui_browser__is_current_entry(&self->b, row)) {
1045 self->selection = &chain->ms;
1046 color = HE_COLORSET_SELECTED;
1047 *is_current_entry = true;
1048 }
1049
1050 SLsmg_set_color(color);
1051 SLsmg_gotorc(self->b.top + row, self->b.left);
1052 slsmg_write_nstring(" ", offset + extra_offset);
1053 slsmg_printf("%c ", folded_sign);
1054 slsmg_write_nstring(str, width);
1055 free(alloc_str);
1056
1057 if (++row == self->b.height)
1058 goto out;
1059do_next:
1060 if (folded_sign == '+')
1061 break;
1062 }
1063
1064 if (folded_sign == '-') {
1065 const int new_level = level + (extra_offset ? 2 : 1);
1066 row += hist_browser__show_callchain_node_rb_tree(self, child, new_total,
1067 new_level, row, row_offset,
1068 is_current_entry);
1069 }
1070 if (row == self->b.height)
1071 goto out;
1072 node = next;
1073 }
1074out:
1075 return row - first_row;
1076}
1077
1078static int hist_browser__show_callchain_node(struct hist_browser *self,
1079 struct callchain_node *node,
1080 int level, unsigned short row,
1081 off_t *row_offset,
1082 bool *is_current_entry)
1083{
1084 struct callchain_list *chain;
1085 int first_row = row,
1086 offset = level * LEVEL_OFFSET_STEP,
1087 width = self->b.width - offset;
1088 char folded_sign = ' ';
1089
1090 list_for_each_entry(chain, &node->val, list) {
1091 char ipstr[BITS_PER_LONG / 4 + 1], *s;
1092 int color;
1093 /*
1094 * FIXME: This should be moved to somewhere else,
1095 * probably when the callchain is created, so as not to
1096 * traverse it all over again
1097 */
1098 chain->ms.has_children = rb_first(&node->rb_root) != NULL;
1099 folded_sign = callchain_list__folded(chain);
1100
1101 if (*row_offset != 0) {
1102 --*row_offset;
1103 continue;
1104 }
1105
1106 color = HE_COLORSET_NORMAL;
1107 if (ui_browser__is_current_entry(&self->b, row)) {
1108 self->selection = &chain->ms;
1109 color = HE_COLORSET_SELECTED;
1110 *is_current_entry = true;
1111 }
1112
1113 s = callchain_list__sym_name(chain, ipstr, sizeof(ipstr));
1114 SLsmg_gotorc(self->b.top + row, self->b.left);
1115 SLsmg_set_color(color);
1116 slsmg_write_nstring(" ", offset);
1117 slsmg_printf("%c ", folded_sign);
1118 slsmg_write_nstring(s, width - 2);
1119
1120 if (++row == self->b.height)
1121 goto out;
1122 }
1123
1124 if (folded_sign == '-')
1125 row += hist_browser__show_callchain_node_rb_tree(self, node,
1126 self->hists->stats.total_period,
1127 level + 1, row,
1128 row_offset,
1129 is_current_entry);
1130out:
1131 return row - first_row;
1132}
1133
1134static int hist_browser__show_callchain(struct hist_browser *self,
1135 struct rb_root *chain,
1136 int level, unsigned short row,
1137 off_t *row_offset,
1138 bool *is_current_entry)
1139{
1140 struct rb_node *nd;
1141 int first_row = row;
1142
1143 for (nd = rb_first(chain); nd; nd = rb_next(nd)) {
1144 struct callchain_node *node = rb_entry(nd, struct callchain_node, rb_node);
1145
1146 row += hist_browser__show_callchain_node(self, node, level,
1147 row, row_offset,
1148 is_current_entry);
1149 if (row == self->b.height)
1150 break;
1151 }
1152
1153 return row - first_row;
1154}
1155
1156static int hist_browser__show_entry(struct hist_browser *self,
1157 struct hist_entry *entry,
1158 unsigned short row)
1159{
1160 char s[256];
1161 double percent;
1162 int printed = 0;
1163 int color, width = self->b.width;
1164 char folded_sign = ' ';
1165 bool current_entry = ui_browser__is_current_entry(&self->b, row);
1166 off_t row_offset = entry->row_offset;
1167
1168 if (current_entry) {
1169 self->he_selection = entry;
1170 self->selection = &entry->ms;
1171 }
1172
1173 if (symbol_conf.use_callchain) {
1174 entry->ms.has_children = !RB_EMPTY_ROOT(&entry->sorted_chain);
1175 folded_sign = hist_entry__folded(entry);
1176 }
1177
1178 if (row_offset == 0) {
1179 hist_entry__snprintf(entry, s, sizeof(s), self->hists, NULL, false,
1180 0, false, self->hists->stats.total_period);
1181 percent = (entry->period * 100.0) / self->hists->stats.total_period;
1182
1183 color = HE_COLORSET_SELECTED;
1184 if (!current_entry) {
1185 if (percent >= MIN_RED)
1186 color = HE_COLORSET_TOP;
1187 else if (percent >= MIN_GREEN)
1188 color = HE_COLORSET_MEDIUM;
1189 else
1190 color = HE_COLORSET_NORMAL;
1191 }
1192
1193 SLsmg_set_color(color);
1194 SLsmg_gotorc(self->b.top + row, self->b.left);
1195 if (symbol_conf.use_callchain) {
1196 slsmg_printf("%c ", folded_sign);
1197 width -= 2;
1198 }
1199 slsmg_write_nstring(s, width);
1200 ++row;
1201 ++printed;
1202 } else
1203 --row_offset;
1204
1205 if (folded_sign == '-' && row != self->b.height) {
1206 printed += hist_browser__show_callchain(self, &entry->sorted_chain,
1207 1, row, &row_offset,
1208 &current_entry);
1209 if (current_entry)
1210 self->he_selection = entry;
1211 }
1212
1213 return printed;
1214}
1215
1216static unsigned int hist_browser__refresh_entries(struct ui_browser *self)
1217{
1218 unsigned row = 0;
1219 struct rb_node *nd;
1220 struct hist_browser *hb = container_of(self, struct hist_browser, b);
1221
1222 if (self->first_visible_entry == NULL)
1223 self->first_visible_entry = rb_first(&hb->hists->entries);
1224
1225 for (nd = self->first_visible_entry; nd; nd = rb_next(nd)) {
1226 struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
1227
1228 if (h->filtered)
1229 continue;
1230
1231 row += hist_browser__show_entry(hb, h, row);
1232 if (row == self->height)
1233 break;
1234 }
1235
1236 return row;
1237}
1238
1239static void callchain_node__init_have_children_rb_tree(struct callchain_node *self)
1240{
1241 struct rb_node *nd = rb_first(&self->rb_root);
1242
1243 for (nd = rb_first(&self->rb_root); nd; nd = rb_next(nd)) {
1244 struct callchain_node *child = rb_entry(nd, struct callchain_node, rb_node);
1245 struct callchain_list *chain;
1246 int first = true;
1247
1248 list_for_each_entry(chain, &child->val, list) {
1249 if (first) {
1250 first = false;
1251 chain->ms.has_children = chain->list.next != &child->val ||
1252 rb_first(&child->rb_root) != NULL;
1253 } else
1254 chain->ms.has_children = chain->list.next == &child->val &&
1255 rb_first(&child->rb_root) != NULL;
1256 }
1257
1258 callchain_node__init_have_children_rb_tree(child);
1259 }
1260}
1261
1262static void callchain_node__init_have_children(struct callchain_node *self)
1263{
1264 struct callchain_list *chain;
1265
1266 list_for_each_entry(chain, &self->val, list)
1267 chain->ms.has_children = rb_first(&self->rb_root) != NULL;
1268
1269 callchain_node__init_have_children_rb_tree(self);
1270}
1271
1272static void callchain__init_have_children(struct rb_root *self)
1273{
1274 struct rb_node *nd;
1275
1276 for (nd = rb_first(self); nd; nd = rb_next(nd)) {
1277 struct callchain_node *node = rb_entry(nd, struct callchain_node, rb_node);
1278 callchain_node__init_have_children(node);
1279 }
1280}
1281
1282static void hist_entry__init_have_children(struct hist_entry *self)
1283{
1284 if (!self->init_have_children) {
1285 callchain__init_have_children(&self->sorted_chain);
1286 self->init_have_children = true;
1287 }
1288}
1289
1290static struct rb_node *hists__filter_entries(struct rb_node *nd)
1291{
1292 while (nd != NULL) {
1293 struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
1294 if (!h->filtered)
1295 return nd;
1296
1297 nd = rb_next(nd);
1298 }
1299
1300 return NULL;
1301}
1302
1303static struct rb_node *hists__filter_prev_entries(struct rb_node *nd)
1304{
1305 while (nd != NULL) {
1306 struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
1307 if (!h->filtered)
1308 return nd;
1309
1310 nd = rb_prev(nd);
1311 }
1312
1313 return NULL;
1314}
1315
1316static void ui_browser__hists_seek(struct ui_browser *self,
1317 off_t offset, int whence)
1318{
1319 struct hist_entry *h;
1320 struct rb_node *nd;
1321 bool first = true;
1322
1323 switch (whence) {
1324 case SEEK_SET:
1325 nd = hists__filter_entries(rb_first(self->entries));
1326 break;
1327 case SEEK_CUR:
1328 nd = self->first_visible_entry;
1329 goto do_offset;
1330 case SEEK_END:
1331 nd = hists__filter_prev_entries(rb_last(self->entries));
1332 first = false;
1333 break;
1334 default:
1335 return;
1336 }
1337
1338 /*
1339 * Moves not relative to the first visible entry invalidates its
1340 * row_offset:
1341 */
1342 h = rb_entry(self->first_visible_entry, struct hist_entry, rb_node);
1343 h->row_offset = 0;
1344
1345 /*
1346 * Here we have to check if nd is expanded (+), if it is we can't go
1347 * the next top level hist_entry, instead we must compute an offset of
1348 * what _not_ to show and not change the first visible entry.
1349 *
1350 * This offset increments when we are going from top to bottom and
1351 * decreases when we're going from bottom to top.
1352 *
1353 * As we don't have backpointers to the top level in the callchains
1354 * structure, we need to always print the whole hist_entry callchain,
1355 * skipping the first ones that are before the first visible entry
1356 * and stop when we printed enough lines to fill the screen.
1357 */
1358do_offset:
1359 if (offset > 0) {
1360 do {
1361 h = rb_entry(nd, struct hist_entry, rb_node);
1362 if (h->ms.unfolded) {
1363 u16 remaining = h->nr_rows - h->row_offset;
1364 if (offset > remaining) {
1365 offset -= remaining;
1366 h->row_offset = 0;
1367 } else {
1368 h->row_offset += offset;
1369 offset = 0;
1370 self->first_visible_entry = nd;
1371 break;
1372 }
1373 }
1374 nd = hists__filter_entries(rb_next(nd));
1375 if (nd == NULL)
1376 break;
1377 --offset;
1378 self->first_visible_entry = nd;
1379 } while (offset != 0);
1380 } else if (offset < 0) {
1381 while (1) {
1382 h = rb_entry(nd, struct hist_entry, rb_node);
1383 if (h->ms.unfolded) {
1384 if (first) {
1385 if (-offset > h->row_offset) {
1386 offset += h->row_offset;
1387 h->row_offset = 0;
1388 } else {
1389 h->row_offset += offset;
1390 offset = 0;
1391 self->first_visible_entry = nd;
1392 break;
1393 }
1394 } else {
1395 if (-offset > h->nr_rows) {
1396 offset += h->nr_rows;
1397 h->row_offset = 0;
1398 } else {
1399 h->row_offset = h->nr_rows + offset;
1400 offset = 0;
1401 self->first_visible_entry = nd;
1402 break;
1403 }
1404 }
1405 }
1406
1407 nd = hists__filter_prev_entries(rb_prev(nd));
1408 if (nd == NULL)
1409 break;
1410 ++offset;
1411 self->first_visible_entry = nd;
1412 if (offset == 0) {
1413 /*
1414 * Last unfiltered hist_entry, check if it is
1415 * unfolded, if it is then we should have
1416 * row_offset at its last entry.
1417 */
1418 h = rb_entry(nd, struct hist_entry, rb_node);
1419 if (h->ms.unfolded)
1420 h->row_offset = h->nr_rows;
1421 break;
1422 }
1423 first = false;
1424 }
1425 } else {
1426 self->first_visible_entry = nd;
1427 h = rb_entry(nd, struct hist_entry, rb_node);
1428 h->row_offset = 0;
1429 }
1430}
1431
1432static int callchain_node__count_rows_rb_tree(struct callchain_node *self)
1433{
1434 int n = 0;
1435 struct rb_node *nd;
1436
1437 for (nd = rb_first(&self->rb_root); nd; nd = rb_next(nd)) {
1438 struct callchain_node *child = rb_entry(nd, struct callchain_node, rb_node);
1439 struct callchain_list *chain;
1440 char folded_sign = ' '; /* No children */
1441
1442 list_for_each_entry(chain, &child->val, list) {
1443 ++n;
1444 /* We need this because we may not have children */
1445 folded_sign = callchain_list__folded(chain);
1446 if (folded_sign == '+')
1447 break;
1448 }
1449
1450 if (folded_sign == '-') /* Have children and they're unfolded */
1451 n += callchain_node__count_rows_rb_tree(child);
1452 }
1453
1454 return n;
1455}
1456
1457static int callchain_node__count_rows(struct callchain_node *node)
1458{
1459 struct callchain_list *chain;
1460 bool unfolded = false;
1461 int n = 0;
1462
1463 list_for_each_entry(chain, &node->val, list) {
1464 ++n;
1465 unfolded = chain->ms.unfolded;
1466 }
1467
1468 if (unfolded)
1469 n += callchain_node__count_rows_rb_tree(node);
1470
1471 return n;
1472}
1473
1474static int callchain__count_rows(struct rb_root *chain)
1475{
1476 struct rb_node *nd;
1477 int n = 0;
1478
1479 for (nd = rb_first(chain); nd; nd = rb_next(nd)) {
1480 struct callchain_node *node = rb_entry(nd, struct callchain_node, rb_node);
1481 n += callchain_node__count_rows(node);
1482 }
1483
1484 return n;
1485}
1486
1487static bool hist_browser__toggle_fold(struct hist_browser *self)
1488{
1489 if (map_symbol__toggle_fold(self->selection)) {
1490 struct hist_entry *he = self->he_selection;
1491
1492 hist_entry__init_have_children(he);
1493 self->hists->nr_entries -= he->nr_rows;
1494
1495 if (he->ms.unfolded)
1496 he->nr_rows = callchain__count_rows(&he->sorted_chain);
1497 else
1498 he->nr_rows = 0;
1499 self->hists->nr_entries += he->nr_rows;
1500 self->b.nr_entries = self->hists->nr_entries;
1501
1502 return true;
1503 }
1504
1505 /* If it doesn't have children, no toggling performed */
1506 return false;
1507}
1508
1509static int hist_browser__run(struct hist_browser *self, const char *title,
1510 struct newtExitStruct *es)
1511{
1512 char str[256], unit;
1513 unsigned long nr_events = self->hists->stats.nr_events[PERF_RECORD_SAMPLE];
1514
1515 self->b.entries = &self->hists->entries;
1516 self->b.nr_entries = self->hists->nr_entries;
1517
1518 hist_browser__refresh_dimensions(self);
1519
1520 nr_events = convert_unit(nr_events, &unit);
1521 snprintf(str, sizeof(str), "Events: %lu%c ",
1522 nr_events, unit);
1523 newtDrawRootText(0, 0, str);
1524
1525 if (ui_browser__show(&self->b, title) < 0)
1526 return -1;
1527
1528 newtFormAddHotKey(self->b.form, 'A');
1529 newtFormAddHotKey(self->b.form, 'a');
1530 newtFormAddHotKey(self->b.form, '?');
1531 newtFormAddHotKey(self->b.form, 'h');
1532 newtFormAddHotKey(self->b.form, 'H');
1533 newtFormAddHotKey(self->b.form, 'd');
1534
1535 newtFormAddHotKey(self->b.form, NEWT_KEY_LEFT);
1536 newtFormAddHotKey(self->b.form, NEWT_KEY_RIGHT);
1537 newtFormAddHotKey(self->b.form, NEWT_KEY_ENTER);
1538
1539 while (1) {
1540 ui_browser__run(&self->b, es);
1541
1542 if (es->reason != NEWT_EXIT_HOTKEY)
1543 break;
1544 switch (es->u.key) {
1545 case 'd': { /* Debug */
1546 static int seq;
1547 struct hist_entry *h = rb_entry(self->b.first_visible_entry,
1548 struct hist_entry, rb_node);
1549 ui_helpline__pop();
1550 ui_helpline__fpush("%d: nr_ent=(%d,%d), height=%d, idx=%d, fve: idx=%d, row_off=%d, nrows=%d",
1551 seq++, self->b.nr_entries,
1552 self->hists->nr_entries,
1553 self->b.height,
1554 self->b.index,
1555 self->b.first_visible_entry_idx,
1556 h->row_offset, h->nr_rows);
1557 }
1558 continue;
1559 case NEWT_KEY_ENTER:
1560 if (hist_browser__toggle_fold(self))
1561 break;
1562 /* fall thru */
1563 default:
1564 return 0;
1565 }
1566 }
1567 return 0;
1568}
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 9bf0f402ca73..4af5bd59cfd1 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -602,8 +602,15 @@ parse_breakpoint_event(const char **strp, struct perf_event_attr *attr)
602 return EVT_FAILED; 602 return EVT_FAILED;
603 } 603 }
604 604
605 /* We should find a nice way to override the access type */ 605 /*
606 attr->bp_len = HW_BREAKPOINT_LEN_4; 606 * We should find a nice way to override the access length
607 * Provide some defaults for now
608 */
609 if (attr->bp_type == HW_BREAKPOINT_X)
610 attr->bp_len = sizeof(long);
611 else
612 attr->bp_len = HW_BREAKPOINT_LEN_4;
613
607 attr->type = PERF_TYPE_BREAKPOINT; 614 attr->type = PERF_TYPE_BREAKPOINT;
608 615
609 return EVT_HANDLED; 616 return EVT_HANDLED;
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 914c67095d96..2e665cb84055 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * probe-event.c : perf-probe definition to kprobe_events format converter 2 * probe-event.c : perf-probe definition to probe_events format converter
3 * 3 *
4 * Written by Masami Hiramatsu <mhiramat@redhat.com> 4 * Written by Masami Hiramatsu <mhiramat@redhat.com>
5 * 5 *
@@ -120,8 +120,11 @@ static int open_vmlinux(void)
120 return open(machine.vmlinux_maps[MAP__FUNCTION]->dso->long_name, O_RDONLY); 120 return open(machine.vmlinux_maps[MAP__FUNCTION]->dso->long_name, O_RDONLY);
121} 121}
122 122
123/* Convert trace point to probe point with debuginfo */ 123/*
124static int convert_to_perf_probe_point(struct kprobe_trace_point *tp, 124 * Convert trace point to probe point with debuginfo
125 * Currently only handles kprobes.
126 */
127static int kprobe_convert_to_perf_probe(struct probe_trace_point *tp,
125 struct perf_probe_point *pp) 128 struct perf_probe_point *pp)
126{ 129{
127 struct symbol *sym; 130 struct symbol *sym;
@@ -151,8 +154,8 @@ static int convert_to_perf_probe_point(struct kprobe_trace_point *tp,
151} 154}
152 155
153/* Try to find perf_probe_event with debuginfo */ 156/* Try to find perf_probe_event with debuginfo */
154static int try_to_find_kprobe_trace_events(struct perf_probe_event *pev, 157static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
155 struct kprobe_trace_event **tevs, 158 struct probe_trace_event **tevs,
156 int max_tevs) 159 int max_tevs)
157{ 160{
158 bool need_dwarf = perf_probe_event_need_dwarf(pev); 161 bool need_dwarf = perf_probe_event_need_dwarf(pev);
@@ -169,11 +172,11 @@ static int try_to_find_kprobe_trace_events(struct perf_probe_event *pev,
169 } 172 }
170 173
171 /* Searching trace events corresponding to probe event */ 174 /* Searching trace events corresponding to probe event */
172 ntevs = find_kprobe_trace_events(fd, pev, tevs, max_tevs); 175 ntevs = find_probe_trace_events(fd, pev, tevs, max_tevs);
173 close(fd); 176 close(fd);
174 177
175 if (ntevs > 0) { /* Succeeded to find trace events */ 178 if (ntevs > 0) { /* Succeeded to find trace events */
176 pr_debug("find %d kprobe_trace_events.\n", ntevs); 179 pr_debug("find %d probe_trace_events.\n", ntevs);
177 return ntevs; 180 return ntevs;
178 } 181 }
179 182
@@ -195,6 +198,65 @@ static int try_to_find_kprobe_trace_events(struct perf_probe_event *pev,
195 return ntevs; 198 return ntevs;
196} 199}
197 200
201/*
202 * Find a src file from a DWARF tag path. Prepend optional source path prefix
203 * and chop off leading directories that do not exist. Result is passed back as
204 * a newly allocated path on success.
205 * Return 0 if file was found and readable, -errno otherwise.
206 */
207static int get_real_path(const char *raw_path, const char *comp_dir,
208 char **new_path)
209{
210 const char *prefix = symbol_conf.source_prefix;
211
212 if (!prefix) {
213 if (raw_path[0] != '/' && comp_dir)
214 /* If not an absolute path, try to use comp_dir */
215 prefix = comp_dir;
216 else {
217 if (access(raw_path, R_OK) == 0) {
218 *new_path = strdup(raw_path);
219 return 0;
220 } else
221 return -errno;
222 }
223 }
224
225 *new_path = malloc((strlen(prefix) + strlen(raw_path) + 2));
226 if (!*new_path)
227 return -ENOMEM;
228
229 for (;;) {
230 sprintf(*new_path, "%s/%s", prefix, raw_path);
231
232 if (access(*new_path, R_OK) == 0)
233 return 0;
234
235 if (!symbol_conf.source_prefix)
236 /* In case of searching comp_dir, don't retry */
237 return -errno;
238
239 switch (errno) {
240 case ENAMETOOLONG:
241 case ENOENT:
242 case EROFS:
243 case EFAULT:
244 raw_path = strchr(++raw_path, '/');
245 if (!raw_path) {
246 free(*new_path);
247 *new_path = NULL;
248 return -ENOENT;
249 }
250 continue;
251
252 default:
253 free(*new_path);
254 *new_path = NULL;
255 return -errno;
256 }
257 }
258}
259
198#define LINEBUF_SIZE 256 260#define LINEBUF_SIZE 256
199#define NR_ADDITIONAL_LINES 2 261#define NR_ADDITIONAL_LINES 2
200 262
@@ -244,6 +306,7 @@ int show_line_range(struct line_range *lr)
244 struct line_node *ln; 306 struct line_node *ln;
245 FILE *fp; 307 FILE *fp;
246 int fd, ret; 308 int fd, ret;
309 char *tmp;
247 310
248 /* Search a line range */ 311 /* Search a line range */
249 ret = init_vmlinux(); 312 ret = init_vmlinux();
@@ -266,6 +329,15 @@ int show_line_range(struct line_range *lr)
266 return ret; 329 return ret;
267 } 330 }
268 331
332 /* Convert source file path */
333 tmp = lr->path;
334 ret = get_real_path(tmp, lr->comp_dir, &lr->path);
335 free(tmp); /* Free old path */
336 if (ret < 0) {
337 pr_warning("Failed to find source file. (%d)\n", ret);
338 return ret;
339 }
340
269 setup_pager(); 341 setup_pager();
270 342
271 if (lr->function) 343 if (lr->function)
@@ -308,8 +380,8 @@ end:
308 380
309#else /* !DWARF_SUPPORT */ 381#else /* !DWARF_SUPPORT */
310 382
311static int convert_to_perf_probe_point(struct kprobe_trace_point *tp, 383static int kprobe_convert_to_perf_probe(struct probe_trace_point *tp,
312 struct perf_probe_point *pp) 384 struct perf_probe_point *pp)
313{ 385{
314 pp->function = strdup(tp->symbol); 386 pp->function = strdup(tp->symbol);
315 if (pp->function == NULL) 387 if (pp->function == NULL)
@@ -320,8 +392,8 @@ static int convert_to_perf_probe_point(struct kprobe_trace_point *tp,
320 return 0; 392 return 0;
321} 393}
322 394
323static int try_to_find_kprobe_trace_events(struct perf_probe_event *pev, 395static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
324 struct kprobe_trace_event **tevs __unused, 396 struct probe_trace_event **tevs __unused,
325 int max_tevs __unused) 397 int max_tevs __unused)
326{ 398{
327 if (perf_probe_event_need_dwarf(pev)) { 399 if (perf_probe_event_need_dwarf(pev)) {
@@ -557,7 +629,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev)
557/* Parse perf-probe event argument */ 629/* Parse perf-probe event argument */
558static int parse_perf_probe_arg(char *str, struct perf_probe_arg *arg) 630static int parse_perf_probe_arg(char *str, struct perf_probe_arg *arg)
559{ 631{
560 char *tmp; 632 char *tmp, *goodname;
561 struct perf_probe_arg_field **fieldp; 633 struct perf_probe_arg_field **fieldp;
562 634
563 pr_debug("parsing arg: %s into ", str); 635 pr_debug("parsing arg: %s into ", str);
@@ -580,7 +652,7 @@ static int parse_perf_probe_arg(char *str, struct perf_probe_arg *arg)
580 pr_debug("type:%s ", arg->type); 652 pr_debug("type:%s ", arg->type);
581 } 653 }
582 654
583 tmp = strpbrk(str, "-."); 655 tmp = strpbrk(str, "-.[");
584 if (!is_c_varname(str) || !tmp) { 656 if (!is_c_varname(str) || !tmp) {
585 /* A variable, register, symbol or special value */ 657 /* A variable, register, symbol or special value */
586 arg->var = strdup(str); 658 arg->var = strdup(str);
@@ -590,10 +662,11 @@ static int parse_perf_probe_arg(char *str, struct perf_probe_arg *arg)
590 return 0; 662 return 0;
591 } 663 }
592 664
593 /* Structure fields */ 665 /* Structure fields or array element */
594 arg->var = strndup(str, tmp - str); 666 arg->var = strndup(str, tmp - str);
595 if (arg->var == NULL) 667 if (arg->var == NULL)
596 return -ENOMEM; 668 return -ENOMEM;
669 goodname = arg->var;
597 pr_debug("%s, ", arg->var); 670 pr_debug("%s, ", arg->var);
598 fieldp = &arg->field; 671 fieldp = &arg->field;
599 672
@@ -601,22 +674,38 @@ static int parse_perf_probe_arg(char *str, struct perf_probe_arg *arg)
601 *fieldp = zalloc(sizeof(struct perf_probe_arg_field)); 674 *fieldp = zalloc(sizeof(struct perf_probe_arg_field));
602 if (*fieldp == NULL) 675 if (*fieldp == NULL)
603 return -ENOMEM; 676 return -ENOMEM;
604 if (*tmp == '.') { 677 if (*tmp == '[') { /* Array */
605 str = tmp + 1; 678 str = tmp;
606 (*fieldp)->ref = false; 679 (*fieldp)->index = strtol(str + 1, &tmp, 0);
607 } else if (tmp[1] == '>') {
608 str = tmp + 2;
609 (*fieldp)->ref = true; 680 (*fieldp)->ref = true;
610 } else { 681 if (*tmp != ']' || tmp == str + 1) {
611 semantic_error("Argument parse error: %s\n", str); 682 semantic_error("Array index must be a"
612 return -EINVAL; 683 " number.\n");
684 return -EINVAL;
685 }
686 tmp++;
687 if (*tmp == '\0')
688 tmp = NULL;
689 } else { /* Structure */
690 if (*tmp == '.') {
691 str = tmp + 1;
692 (*fieldp)->ref = false;
693 } else if (tmp[1] == '>') {
694 str = tmp + 2;
695 (*fieldp)->ref = true;
696 } else {
697 semantic_error("Argument parse error: %s\n",
698 str);
699 return -EINVAL;
700 }
701 tmp = strpbrk(str, "-.[");
613 } 702 }
614
615 tmp = strpbrk(str, "-.");
616 if (tmp) { 703 if (tmp) {
617 (*fieldp)->name = strndup(str, tmp - str); 704 (*fieldp)->name = strndup(str, tmp - str);
618 if ((*fieldp)->name == NULL) 705 if ((*fieldp)->name == NULL)
619 return -ENOMEM; 706 return -ENOMEM;
707 if (*str != '[')
708 goodname = (*fieldp)->name;
620 pr_debug("%s(%d), ", (*fieldp)->name, (*fieldp)->ref); 709 pr_debug("%s(%d), ", (*fieldp)->name, (*fieldp)->ref);
621 fieldp = &(*fieldp)->next; 710 fieldp = &(*fieldp)->next;
622 } 711 }
@@ -624,11 +713,13 @@ static int parse_perf_probe_arg(char *str, struct perf_probe_arg *arg)
624 (*fieldp)->name = strdup(str); 713 (*fieldp)->name = strdup(str);
625 if ((*fieldp)->name == NULL) 714 if ((*fieldp)->name == NULL)
626 return -ENOMEM; 715 return -ENOMEM;
716 if (*str != '[')
717 goodname = (*fieldp)->name;
627 pr_debug("%s(%d)\n", (*fieldp)->name, (*fieldp)->ref); 718 pr_debug("%s(%d)\n", (*fieldp)->name, (*fieldp)->ref);
628 719
629 /* If no name is specified, set the last field name */ 720 /* If no name is specified, set the last field name (not array index)*/
630 if (!arg->name) { 721 if (!arg->name) {
631 arg->name = strdup((*fieldp)->name); 722 arg->name = strdup(goodname);
632 if (arg->name == NULL) 723 if (arg->name == NULL)
633 return -ENOMEM; 724 return -ENOMEM;
634 } 725 }
@@ -693,16 +784,17 @@ bool perf_probe_event_need_dwarf(struct perf_probe_event *pev)
693 return false; 784 return false;
694} 785}
695 786
696/* Parse kprobe_events event into struct probe_point */ 787/* Parse probe_events event into struct probe_point */
697int parse_kprobe_trace_command(const char *cmd, struct kprobe_trace_event *tev) 788static int parse_probe_trace_command(const char *cmd,
789 struct probe_trace_event *tev)
698{ 790{
699 struct kprobe_trace_point *tp = &tev->point; 791 struct probe_trace_point *tp = &tev->point;
700 char pr; 792 char pr;
701 char *p; 793 char *p;
702 int ret, i, argc; 794 int ret, i, argc;
703 char **argv; 795 char **argv;
704 796
705 pr_debug("Parsing kprobe_events: %s\n", cmd); 797 pr_debug("Parsing probe_events: %s\n", cmd);
706 argv = argv_split(cmd, &argc); 798 argv = argv_split(cmd, &argc);
707 if (!argv) { 799 if (!argv) {
708 pr_debug("Failed to split arguments.\n"); 800 pr_debug("Failed to split arguments.\n");
@@ -734,7 +826,7 @@ int parse_kprobe_trace_command(const char *cmd, struct kprobe_trace_event *tev)
734 tp->offset = 0; 826 tp->offset = 0;
735 827
736 tev->nargs = argc - 2; 828 tev->nargs = argc - 2;
737 tev->args = zalloc(sizeof(struct kprobe_trace_arg) * tev->nargs); 829 tev->args = zalloc(sizeof(struct probe_trace_arg) * tev->nargs);
738 if (tev->args == NULL) { 830 if (tev->args == NULL) {
739 ret = -ENOMEM; 831 ret = -ENOMEM;
740 goto out; 832 goto out;
@@ -776,8 +868,11 @@ int synthesize_perf_probe_arg(struct perf_probe_arg *pa, char *buf, size_t len)
776 len -= ret; 868 len -= ret;
777 869
778 while (field) { 870 while (field) {
779 ret = e_snprintf(tmp, len, "%s%s", field->ref ? "->" : ".", 871 if (field->name[0] == '[')
780 field->name); 872 ret = e_snprintf(tmp, len, "%s", field->name);
873 else
874 ret = e_snprintf(tmp, len, "%s%s",
875 field->ref ? "->" : ".", field->name);
781 if (ret <= 0) 876 if (ret <= 0)
782 goto error; 877 goto error;
783 tmp += ret; 878 tmp += ret;
@@ -877,13 +972,13 @@ char *synthesize_perf_probe_command(struct perf_probe_event *pev)
877} 972}
878#endif 973#endif
879 974
880static int __synthesize_kprobe_trace_arg_ref(struct kprobe_trace_arg_ref *ref, 975static int __synthesize_probe_trace_arg_ref(struct probe_trace_arg_ref *ref,
881 char **buf, size_t *buflen, 976 char **buf, size_t *buflen,
882 int depth) 977 int depth)
883{ 978{
884 int ret; 979 int ret;
885 if (ref->next) { 980 if (ref->next) {
886 depth = __synthesize_kprobe_trace_arg_ref(ref->next, buf, 981 depth = __synthesize_probe_trace_arg_ref(ref->next, buf,
887 buflen, depth + 1); 982 buflen, depth + 1);
888 if (depth < 0) 983 if (depth < 0)
889 goto out; 984 goto out;
@@ -901,9 +996,10 @@ out:
901 996
902} 997}
903 998
904static int synthesize_kprobe_trace_arg(struct kprobe_trace_arg *arg, 999static int synthesize_probe_trace_arg(struct probe_trace_arg *arg,
905 char *buf, size_t buflen) 1000 char *buf, size_t buflen)
906{ 1001{
1002 struct probe_trace_arg_ref *ref = arg->ref;
907 int ret, depth = 0; 1003 int ret, depth = 0;
908 char *tmp = buf; 1004 char *tmp = buf;
909 1005
@@ -917,16 +1013,24 @@ static int synthesize_kprobe_trace_arg(struct kprobe_trace_arg *arg,
917 buf += ret; 1013 buf += ret;
918 buflen -= ret; 1014 buflen -= ret;
919 1015
1016 /* Special case: @XXX */
1017 if (arg->value[0] == '@' && arg->ref)
1018 ref = ref->next;
1019
920 /* Dereferencing arguments */ 1020 /* Dereferencing arguments */
921 if (arg->ref) { 1021 if (ref) {
922 depth = __synthesize_kprobe_trace_arg_ref(arg->ref, &buf, 1022 depth = __synthesize_probe_trace_arg_ref(ref, &buf,
923 &buflen, 1); 1023 &buflen, 1);
924 if (depth < 0) 1024 if (depth < 0)
925 return depth; 1025 return depth;
926 } 1026 }
927 1027
928 /* Print argument value */ 1028 /* Print argument value */
929 ret = e_snprintf(buf, buflen, "%s", arg->value); 1029 if (arg->value[0] == '@' && arg->ref)
1030 ret = e_snprintf(buf, buflen, "%s%+ld", arg->value,
1031 arg->ref->offset);
1032 else
1033 ret = e_snprintf(buf, buflen, "%s", arg->value);
930 if (ret < 0) 1034 if (ret < 0)
931 return ret; 1035 return ret;
932 buf += ret; 1036 buf += ret;
@@ -951,9 +1055,9 @@ static int synthesize_kprobe_trace_arg(struct kprobe_trace_arg *arg,
951 return buf - tmp; 1055 return buf - tmp;
952} 1056}
953 1057
954char *synthesize_kprobe_trace_command(struct kprobe_trace_event *tev) 1058char *synthesize_probe_trace_command(struct probe_trace_event *tev)
955{ 1059{
956 struct kprobe_trace_point *tp = &tev->point; 1060 struct probe_trace_point *tp = &tev->point;
957 char *buf; 1061 char *buf;
958 int i, len, ret; 1062 int i, len, ret;
959 1063
@@ -969,7 +1073,7 @@ char *synthesize_kprobe_trace_command(struct kprobe_trace_event *tev)
969 goto error; 1073 goto error;
970 1074
971 for (i = 0; i < tev->nargs; i++) { 1075 for (i = 0; i < tev->nargs; i++) {
972 ret = synthesize_kprobe_trace_arg(&tev->args[i], buf + len, 1076 ret = synthesize_probe_trace_arg(&tev->args[i], buf + len,
973 MAX_CMDLEN - len); 1077 MAX_CMDLEN - len);
974 if (ret <= 0) 1078 if (ret <= 0)
975 goto error; 1079 goto error;
@@ -982,7 +1086,7 @@ error:
982 return NULL; 1086 return NULL;
983} 1087}
984 1088
985int convert_to_perf_probe_event(struct kprobe_trace_event *tev, 1089static int convert_to_perf_probe_event(struct probe_trace_event *tev,
986 struct perf_probe_event *pev) 1090 struct perf_probe_event *pev)
987{ 1091{
988 char buf[64] = ""; 1092 char buf[64] = "";
@@ -995,7 +1099,7 @@ int convert_to_perf_probe_event(struct kprobe_trace_event *tev,
995 return -ENOMEM; 1099 return -ENOMEM;
996 1100
997 /* Convert trace_point to probe_point */ 1101 /* Convert trace_point to probe_point */
998 ret = convert_to_perf_probe_point(&tev->point, &pev->point); 1102 ret = kprobe_convert_to_perf_probe(&tev->point, &pev->point);
999 if (ret < 0) 1103 if (ret < 0)
1000 return ret; 1104 return ret;
1001 1105
@@ -1008,7 +1112,7 @@ int convert_to_perf_probe_event(struct kprobe_trace_event *tev,
1008 if (tev->args[i].name) 1112 if (tev->args[i].name)
1009 pev->args[i].name = strdup(tev->args[i].name); 1113 pev->args[i].name = strdup(tev->args[i].name);
1010 else { 1114 else {
1011 ret = synthesize_kprobe_trace_arg(&tev->args[i], 1115 ret = synthesize_probe_trace_arg(&tev->args[i],
1012 buf, 64); 1116 buf, 64);
1013 pev->args[i].name = strdup(buf); 1117 pev->args[i].name = strdup(buf);
1014 } 1118 }
@@ -1059,9 +1163,9 @@ void clear_perf_probe_event(struct perf_probe_event *pev)
1059 memset(pev, 0, sizeof(*pev)); 1163 memset(pev, 0, sizeof(*pev));
1060} 1164}
1061 1165
1062void clear_kprobe_trace_event(struct kprobe_trace_event *tev) 1166static void clear_probe_trace_event(struct probe_trace_event *tev)
1063{ 1167{
1064 struct kprobe_trace_arg_ref *ref, *next; 1168 struct probe_trace_arg_ref *ref, *next;
1065 int i; 1169 int i;
1066 1170
1067 if (tev->event) 1171 if (tev->event)
@@ -1122,7 +1226,7 @@ static int open_kprobe_events(bool readwrite)
1122} 1226}
1123 1227
1124/* Get raw string list of current kprobe_events */ 1228/* Get raw string list of current kprobe_events */
1125static struct strlist *get_kprobe_trace_command_rawlist(int fd) 1229static struct strlist *get_probe_trace_command_rawlist(int fd)
1126{ 1230{
1127 int ret, idx; 1231 int ret, idx;
1128 FILE *fp; 1232 FILE *fp;
@@ -1190,7 +1294,7 @@ static int show_perf_probe_event(struct perf_probe_event *pev)
1190int show_perf_probe_events(void) 1294int show_perf_probe_events(void)
1191{ 1295{
1192 int fd, ret; 1296 int fd, ret;
1193 struct kprobe_trace_event tev; 1297 struct probe_trace_event tev;
1194 struct perf_probe_event pev; 1298 struct perf_probe_event pev;
1195 struct strlist *rawlist; 1299 struct strlist *rawlist;
1196 struct str_node *ent; 1300 struct str_node *ent;
@@ -1207,20 +1311,20 @@ int show_perf_probe_events(void)
1207 if (fd < 0) 1311 if (fd < 0)
1208 return fd; 1312 return fd;
1209 1313
1210 rawlist = get_kprobe_trace_command_rawlist(fd); 1314 rawlist = get_probe_trace_command_rawlist(fd);
1211 close(fd); 1315 close(fd);
1212 if (!rawlist) 1316 if (!rawlist)
1213 return -ENOENT; 1317 return -ENOENT;
1214 1318
1215 strlist__for_each(ent, rawlist) { 1319 strlist__for_each(ent, rawlist) {
1216 ret = parse_kprobe_trace_command(ent->s, &tev); 1320 ret = parse_probe_trace_command(ent->s, &tev);
1217 if (ret >= 0) { 1321 if (ret >= 0) {
1218 ret = convert_to_perf_probe_event(&tev, &pev); 1322 ret = convert_to_perf_probe_event(&tev, &pev);
1219 if (ret >= 0) 1323 if (ret >= 0)
1220 ret = show_perf_probe_event(&pev); 1324 ret = show_perf_probe_event(&pev);
1221 } 1325 }
1222 clear_perf_probe_event(&pev); 1326 clear_perf_probe_event(&pev);
1223 clear_kprobe_trace_event(&tev); 1327 clear_probe_trace_event(&tev);
1224 if (ret < 0) 1328 if (ret < 0)
1225 break; 1329 break;
1226 } 1330 }
@@ -1230,20 +1334,19 @@ int show_perf_probe_events(void)
1230} 1334}
1231 1335
1232/* Get current perf-probe event names */ 1336/* Get current perf-probe event names */
1233static struct strlist *get_kprobe_trace_event_names(int fd, bool include_group) 1337static struct strlist *get_probe_trace_event_names(int fd, bool include_group)
1234{ 1338{
1235 char buf[128]; 1339 char buf[128];
1236 struct strlist *sl, *rawlist; 1340 struct strlist *sl, *rawlist;
1237 struct str_node *ent; 1341 struct str_node *ent;
1238 struct kprobe_trace_event tev; 1342 struct probe_trace_event tev;
1239 int ret = 0; 1343 int ret = 0;
1240 1344
1241 memset(&tev, 0, sizeof(tev)); 1345 memset(&tev, 0, sizeof(tev));
1242 1346 rawlist = get_probe_trace_command_rawlist(fd);
1243 rawlist = get_kprobe_trace_command_rawlist(fd);
1244 sl = strlist__new(true, NULL); 1347 sl = strlist__new(true, NULL);
1245 strlist__for_each(ent, rawlist) { 1348 strlist__for_each(ent, rawlist) {
1246 ret = parse_kprobe_trace_command(ent->s, &tev); 1349 ret = parse_probe_trace_command(ent->s, &tev);
1247 if (ret < 0) 1350 if (ret < 0)
1248 break; 1351 break;
1249 if (include_group) { 1352 if (include_group) {
@@ -1253,7 +1356,7 @@ static struct strlist *get_kprobe_trace_event_names(int fd, bool include_group)
1253 ret = strlist__add(sl, buf); 1356 ret = strlist__add(sl, buf);
1254 } else 1357 } else
1255 ret = strlist__add(sl, tev.event); 1358 ret = strlist__add(sl, tev.event);
1256 clear_kprobe_trace_event(&tev); 1359 clear_probe_trace_event(&tev);
1257 if (ret < 0) 1360 if (ret < 0)
1258 break; 1361 break;
1259 } 1362 }
@@ -1266,13 +1369,13 @@ static struct strlist *get_kprobe_trace_event_names(int fd, bool include_group)
1266 return sl; 1369 return sl;
1267} 1370}
1268 1371
1269static int write_kprobe_trace_event(int fd, struct kprobe_trace_event *tev) 1372static int write_probe_trace_event(int fd, struct probe_trace_event *tev)
1270{ 1373{
1271 int ret = 0; 1374 int ret = 0;
1272 char *buf = synthesize_kprobe_trace_command(tev); 1375 char *buf = synthesize_probe_trace_command(tev);
1273 1376
1274 if (!buf) { 1377 if (!buf) {
1275 pr_debug("Failed to synthesize kprobe trace event.\n"); 1378 pr_debug("Failed to synthesize probe trace event.\n");
1276 return -EINVAL; 1379 return -EINVAL;
1277 } 1380 }
1278 1381
@@ -1325,12 +1428,12 @@ static int get_new_event_name(char *buf, size_t len, const char *base,
1325 return ret; 1428 return ret;
1326} 1429}
1327 1430
1328static int __add_kprobe_trace_events(struct perf_probe_event *pev, 1431static int __add_probe_trace_events(struct perf_probe_event *pev,
1329 struct kprobe_trace_event *tevs, 1432 struct probe_trace_event *tevs,
1330 int ntevs, bool allow_suffix) 1433 int ntevs, bool allow_suffix)
1331{ 1434{
1332 int i, fd, ret; 1435 int i, fd, ret;
1333 struct kprobe_trace_event *tev = NULL; 1436 struct probe_trace_event *tev = NULL;
1334 char buf[64]; 1437 char buf[64];
1335 const char *event, *group; 1438 const char *event, *group;
1336 struct strlist *namelist; 1439 struct strlist *namelist;
@@ -1339,7 +1442,7 @@ static int __add_kprobe_trace_events(struct perf_probe_event *pev,
1339 if (fd < 0) 1442 if (fd < 0)
1340 return fd; 1443 return fd;
1341 /* Get current event names */ 1444 /* Get current event names */
1342 namelist = get_kprobe_trace_event_names(fd, false); 1445 namelist = get_probe_trace_event_names(fd, false);
1343 if (!namelist) { 1446 if (!namelist) {
1344 pr_debug("Failed to get current event list.\n"); 1447 pr_debug("Failed to get current event list.\n");
1345 return -EIO; 1448 return -EIO;
@@ -1374,7 +1477,7 @@ static int __add_kprobe_trace_events(struct perf_probe_event *pev,
1374 ret = -ENOMEM; 1477 ret = -ENOMEM;
1375 break; 1478 break;
1376 } 1479 }
1377 ret = write_kprobe_trace_event(fd, tev); 1480 ret = write_probe_trace_event(fd, tev);
1378 if (ret < 0) 1481 if (ret < 0)
1379 break; 1482 break;
1380 /* Add added event name to namelist */ 1483 /* Add added event name to namelist */
@@ -1411,21 +1514,21 @@ static int __add_kprobe_trace_events(struct perf_probe_event *pev,
1411 return ret; 1514 return ret;
1412} 1515}
1413 1516
1414static int convert_to_kprobe_trace_events(struct perf_probe_event *pev, 1517static int convert_to_probe_trace_events(struct perf_probe_event *pev,
1415 struct kprobe_trace_event **tevs, 1518 struct probe_trace_event **tevs,
1416 int max_tevs) 1519 int max_tevs)
1417{ 1520{
1418 struct symbol *sym; 1521 struct symbol *sym;
1419 int ret = 0, i; 1522 int ret = 0, i;
1420 struct kprobe_trace_event *tev; 1523 struct probe_trace_event *tev;
1421 1524
1422 /* Convert perf_probe_event with debuginfo */ 1525 /* Convert perf_probe_event with debuginfo */
1423 ret = try_to_find_kprobe_trace_events(pev, tevs, max_tevs); 1526 ret = try_to_find_probe_trace_events(pev, tevs, max_tevs);
1424 if (ret != 0) 1527 if (ret != 0)
1425 return ret; 1528 return ret;
1426 1529
1427 /* Allocate trace event buffer */ 1530 /* Allocate trace event buffer */
1428 tev = *tevs = zalloc(sizeof(struct kprobe_trace_event)); 1531 tev = *tevs = zalloc(sizeof(struct probe_trace_event));
1429 if (tev == NULL) 1532 if (tev == NULL)
1430 return -ENOMEM; 1533 return -ENOMEM;
1431 1534
@@ -1438,7 +1541,7 @@ static int convert_to_kprobe_trace_events(struct perf_probe_event *pev,
1438 tev->point.offset = pev->point.offset; 1541 tev->point.offset = pev->point.offset;
1439 tev->nargs = pev->nargs; 1542 tev->nargs = pev->nargs;
1440 if (tev->nargs) { 1543 if (tev->nargs) {
1441 tev->args = zalloc(sizeof(struct kprobe_trace_arg) 1544 tev->args = zalloc(sizeof(struct probe_trace_arg)
1442 * tev->nargs); 1545 * tev->nargs);
1443 if (tev->args == NULL) { 1546 if (tev->args == NULL) {
1444 ret = -ENOMEM; 1547 ret = -ENOMEM;
@@ -1479,7 +1582,7 @@ static int convert_to_kprobe_trace_events(struct perf_probe_event *pev,
1479 1582
1480 return 1; 1583 return 1;
1481error: 1584error:
1482 clear_kprobe_trace_event(tev); 1585 clear_probe_trace_event(tev);
1483 free(tev); 1586 free(tev);
1484 *tevs = NULL; 1587 *tevs = NULL;
1485 return ret; 1588 return ret;
@@ -1487,7 +1590,7 @@ error:
1487 1590
1488struct __event_package { 1591struct __event_package {
1489 struct perf_probe_event *pev; 1592 struct perf_probe_event *pev;
1490 struct kprobe_trace_event *tevs; 1593 struct probe_trace_event *tevs;
1491 int ntevs; 1594 int ntevs;
1492}; 1595};
1493 1596
@@ -1510,7 +1613,7 @@ int add_perf_probe_events(struct perf_probe_event *pevs, int npevs,
1510 for (i = 0; i < npevs; i++) { 1613 for (i = 0; i < npevs; i++) {
1511 pkgs[i].pev = &pevs[i]; 1614 pkgs[i].pev = &pevs[i];
1512 /* Convert with or without debuginfo */ 1615 /* Convert with or without debuginfo */
1513 ret = convert_to_kprobe_trace_events(pkgs[i].pev, 1616 ret = convert_to_probe_trace_events(pkgs[i].pev,
1514 &pkgs[i].tevs, max_tevs); 1617 &pkgs[i].tevs, max_tevs);
1515 if (ret < 0) 1618 if (ret < 0)
1516 goto end; 1619 goto end;
@@ -1519,24 +1622,24 @@ int add_perf_probe_events(struct perf_probe_event *pevs, int npevs,
1519 1622
1520 /* Loop 2: add all events */ 1623 /* Loop 2: add all events */
1521 for (i = 0; i < npevs && ret >= 0; i++) 1624 for (i = 0; i < npevs && ret >= 0; i++)
1522 ret = __add_kprobe_trace_events(pkgs[i].pev, pkgs[i].tevs, 1625 ret = __add_probe_trace_events(pkgs[i].pev, pkgs[i].tevs,
1523 pkgs[i].ntevs, force_add); 1626 pkgs[i].ntevs, force_add);
1524end: 1627end:
1525 /* Loop 3: cleanup trace events */ 1628 /* Loop 3: cleanup trace events */
1526 for (i = 0; i < npevs; i++) 1629 for (i = 0; i < npevs; i++)
1527 for (j = 0; j < pkgs[i].ntevs; j++) 1630 for (j = 0; j < pkgs[i].ntevs; j++)
1528 clear_kprobe_trace_event(&pkgs[i].tevs[j]); 1631 clear_probe_trace_event(&pkgs[i].tevs[j]);
1529 1632
1530 return ret; 1633 return ret;
1531} 1634}
1532 1635
1533static int __del_trace_kprobe_event(int fd, struct str_node *ent) 1636static int __del_trace_probe_event(int fd, struct str_node *ent)
1534{ 1637{
1535 char *p; 1638 char *p;
1536 char buf[128]; 1639 char buf[128];
1537 int ret; 1640 int ret;
1538 1641
1539 /* Convert from perf-probe event to trace-kprobe event */ 1642 /* Convert from perf-probe event to trace-probe event */
1540 ret = e_snprintf(buf, 128, "-:%s", ent->s); 1643 ret = e_snprintf(buf, 128, "-:%s", ent->s);
1541 if (ret < 0) 1644 if (ret < 0)
1542 goto error; 1645 goto error;
@@ -1562,7 +1665,7 @@ error:
1562 return ret; 1665 return ret;
1563} 1666}
1564 1667
1565static int del_trace_kprobe_event(int fd, const char *group, 1668static int del_trace_probe_event(int fd, const char *group,
1566 const char *event, struct strlist *namelist) 1669 const char *event, struct strlist *namelist)
1567{ 1670{
1568 char buf[128]; 1671 char buf[128];
@@ -1579,7 +1682,7 @@ static int del_trace_kprobe_event(int fd, const char *group,
1579 strlist__for_each_safe(ent, n, namelist) 1682 strlist__for_each_safe(ent, n, namelist)
1580 if (strglobmatch(ent->s, buf)) { 1683 if (strglobmatch(ent->s, buf)) {
1581 found++; 1684 found++;
1582 ret = __del_trace_kprobe_event(fd, ent); 1685 ret = __del_trace_probe_event(fd, ent);
1583 if (ret < 0) 1686 if (ret < 0)
1584 break; 1687 break;
1585 strlist__remove(namelist, ent); 1688 strlist__remove(namelist, ent);
@@ -1588,7 +1691,7 @@ static int del_trace_kprobe_event(int fd, const char *group,
1588 ent = strlist__find(namelist, buf); 1691 ent = strlist__find(namelist, buf);
1589 if (ent) { 1692 if (ent) {
1590 found++; 1693 found++;
1591 ret = __del_trace_kprobe_event(fd, ent); 1694 ret = __del_trace_probe_event(fd, ent);
1592 if (ret >= 0) 1695 if (ret >= 0)
1593 strlist__remove(namelist, ent); 1696 strlist__remove(namelist, ent);
1594 } 1697 }
@@ -1612,7 +1715,7 @@ int del_perf_probe_events(struct strlist *dellist)
1612 return fd; 1715 return fd;
1613 1716
1614 /* Get current event names */ 1717 /* Get current event names */
1615 namelist = get_kprobe_trace_event_names(fd, true); 1718 namelist = get_probe_trace_event_names(fd, true);
1616 if (namelist == NULL) 1719 if (namelist == NULL)
1617 return -EINVAL; 1720 return -EINVAL;
1618 1721
@@ -1633,7 +1736,7 @@ int del_perf_probe_events(struct strlist *dellist)
1633 event = str; 1736 event = str;
1634 } 1737 }
1635 pr_debug("Group: %s, Event: %s\n", group, event); 1738 pr_debug("Group: %s, Event: %s\n", group, event);
1636 ret = del_trace_kprobe_event(fd, group, event, namelist); 1739 ret = del_trace_probe_event(fd, group, event, namelist);
1637 free(str); 1740 free(str);
1638 if (ret < 0) 1741 if (ret < 0)
1639 break; 1742 break;
diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
index e9db1a214ca4..5af39243a25b 100644
--- a/tools/perf/util/probe-event.h
+++ b/tools/perf/util/probe-event.h
@@ -7,33 +7,33 @@
7extern bool probe_event_dry_run; 7extern bool probe_event_dry_run;
8 8
9/* kprobe-tracer tracing point */ 9/* kprobe-tracer tracing point */
10struct kprobe_trace_point { 10struct probe_trace_point {
11 char *symbol; /* Base symbol */ 11 char *symbol; /* Base symbol */
12 unsigned long offset; /* Offset from symbol */ 12 unsigned long offset; /* Offset from symbol */
13 bool retprobe; /* Return probe flag */ 13 bool retprobe; /* Return probe flag */
14}; 14};
15 15
16/* kprobe-tracer tracing argument referencing offset */ 16/* probe-tracer tracing argument referencing offset */
17struct kprobe_trace_arg_ref { 17struct probe_trace_arg_ref {
18 struct kprobe_trace_arg_ref *next; /* Next reference */ 18 struct probe_trace_arg_ref *next; /* Next reference */
19 long offset; /* Offset value */ 19 long offset; /* Offset value */
20}; 20};
21 21
22/* kprobe-tracer tracing argument */ 22/* kprobe-tracer tracing argument */
23struct kprobe_trace_arg { 23struct probe_trace_arg {
24 char *name; /* Argument name */ 24 char *name; /* Argument name */
25 char *value; /* Base value */ 25 char *value; /* Base value */
26 char *type; /* Type name */ 26 char *type; /* Type name */
27 struct kprobe_trace_arg_ref *ref; /* Referencing offset */ 27 struct probe_trace_arg_ref *ref; /* Referencing offset */
28}; 28};
29 29
30/* kprobe-tracer tracing event (point + arg) */ 30/* kprobe-tracer tracing event (point + arg) */
31struct kprobe_trace_event { 31struct probe_trace_event {
32 char *event; /* Event name */ 32 char *event; /* Event name */
33 char *group; /* Group name */ 33 char *group; /* Group name */
34 struct kprobe_trace_point point; /* Trace point */ 34 struct probe_trace_point point; /* Trace point */
35 int nargs; /* Number of args */ 35 int nargs; /* Number of args */
36 struct kprobe_trace_arg *args; /* Arguments */ 36 struct probe_trace_arg *args; /* Arguments */
37}; 37};
38 38
39/* Perf probe probing point */ 39/* Perf probe probing point */
@@ -50,6 +50,7 @@ struct perf_probe_point {
50struct perf_probe_arg_field { 50struct perf_probe_arg_field {
51 struct perf_probe_arg_field *next; /* Next field */ 51 struct perf_probe_arg_field *next; /* Next field */
52 char *name; /* Name of the field */ 52 char *name; /* Name of the field */
53 long index; /* Array index number */
53 bool ref; /* Referencing flag */ 54 bool ref; /* Referencing flag */
54}; 55};
55 56
@@ -85,31 +86,25 @@ struct line_range {
85 int end; /* End line number */ 86 int end; /* End line number */
86 int offset; /* Start line offset */ 87 int offset; /* Start line offset */
87 char *path; /* Real path name */ 88 char *path; /* Real path name */
89 char *comp_dir; /* Compile directory */
88 struct list_head line_list; /* Visible lines */ 90 struct list_head line_list; /* Visible lines */
89}; 91};
90 92
91/* Command string to events */ 93/* Command string to events */
92extern int parse_perf_probe_command(const char *cmd, 94extern int parse_perf_probe_command(const char *cmd,
93 struct perf_probe_event *pev); 95 struct perf_probe_event *pev);
94extern int parse_kprobe_trace_command(const char *cmd,
95 struct kprobe_trace_event *tev);
96 96
97/* Events to command string */ 97/* Events to command string */
98extern char *synthesize_perf_probe_command(struct perf_probe_event *pev); 98extern char *synthesize_perf_probe_command(struct perf_probe_event *pev);
99extern char *synthesize_kprobe_trace_command(struct kprobe_trace_event *tev); 99extern char *synthesize_probe_trace_command(struct probe_trace_event *tev);
100extern int synthesize_perf_probe_arg(struct perf_probe_arg *pa, char *buf, 100extern int synthesize_perf_probe_arg(struct perf_probe_arg *pa, char *buf,
101 size_t len); 101 size_t len);
102 102
103/* Check the perf_probe_event needs debuginfo */ 103/* Check the perf_probe_event needs debuginfo */
104extern bool perf_probe_event_need_dwarf(struct perf_probe_event *pev); 104extern bool perf_probe_event_need_dwarf(struct perf_probe_event *pev);
105 105
106/* Convert from kprobe_trace_event to perf_probe_event */
107extern int convert_to_perf_probe_event(struct kprobe_trace_event *tev,
108 struct perf_probe_event *pev);
109
110/* Release event contents */ 106/* Release event contents */
111extern void clear_perf_probe_event(struct perf_probe_event *pev); 107extern void clear_perf_probe_event(struct perf_probe_event *pev);
112extern void clear_kprobe_trace_event(struct kprobe_trace_event *tev);
113 108
114/* Command string to line-range */ 109/* Command string to line-range */
115extern int parse_line_range_desc(const char *cmd, struct line_range *lr); 110extern int parse_line_range_desc(const char *cmd, struct line_range *lr);
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index d964cb199c67..840f1aabbb74 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -37,6 +37,7 @@
37#include "event.h" 37#include "event.h"
38#include "debug.h" 38#include "debug.h"
39#include "util.h" 39#include "util.h"
40#include "symbol.h"
40#include "probe-finder.h" 41#include "probe-finder.h"
41 42
42/* Kprobe tracer basic type is up to u64 */ 43/* Kprobe tracer basic type is up to u64 */
@@ -143,12 +144,21 @@ static const char *cu_find_realpath(Dwarf_Die *cu_die, const char *fname)
143 return src; 144 return src;
144} 145}
145 146
147/* Get DW_AT_comp_dir (should be NULL with older gcc) */
148static const char *cu_get_comp_dir(Dwarf_Die *cu_die)
149{
150 Dwarf_Attribute attr;
151 if (dwarf_attr(cu_die, DW_AT_comp_dir, &attr) == NULL)
152 return NULL;
153 return dwarf_formstring(&attr);
154}
155
146/* Compare diename and tname */ 156/* Compare diename and tname */
147static bool die_compare_name(Dwarf_Die *dw_die, const char *tname) 157static bool die_compare_name(Dwarf_Die *dw_die, const char *tname)
148{ 158{
149 const char *name; 159 const char *name;
150 name = dwarf_diename(dw_die); 160 name = dwarf_diename(dw_die);
151 return name ? strcmp(tname, name) : -1; 161 return name ? (strcmp(tname, name) == 0) : false;
152} 162}
153 163
154/* Get type die, but skip qualifiers and typedef */ 164/* Get type die, but skip qualifiers and typedef */
@@ -319,7 +329,7 @@ static int __die_find_variable_cb(Dwarf_Die *die_mem, void *data)
319 tag = dwarf_tag(die_mem); 329 tag = dwarf_tag(die_mem);
320 if ((tag == DW_TAG_formal_parameter || 330 if ((tag == DW_TAG_formal_parameter ||
321 tag == DW_TAG_variable) && 331 tag == DW_TAG_variable) &&
322 (die_compare_name(die_mem, name) == 0)) 332 die_compare_name(die_mem, name))
323 return DIE_FIND_CB_FOUND; 333 return DIE_FIND_CB_FOUND;
324 334
325 return DIE_FIND_CB_CONTINUE; 335 return DIE_FIND_CB_CONTINUE;
@@ -338,7 +348,7 @@ static int __die_find_member_cb(Dwarf_Die *die_mem, void *data)
338 const char *name = data; 348 const char *name = data;
339 349
340 if ((dwarf_tag(die_mem) == DW_TAG_member) && 350 if ((dwarf_tag(die_mem) == DW_TAG_member) &&
341 (die_compare_name(die_mem, name) == 0)) 351 die_compare_name(die_mem, name))
342 return DIE_FIND_CB_FOUND; 352 return DIE_FIND_CB_FOUND;
343 353
344 return DIE_FIND_CB_SIBLING; 354 return DIE_FIND_CB_SIBLING;
@@ -356,14 +366,50 @@ static Dwarf_Die *die_find_member(Dwarf_Die *st_die, const char *name,
356 * Probe finder related functions 366 * Probe finder related functions
357 */ 367 */
358 368
369static struct probe_trace_arg_ref *alloc_trace_arg_ref(long offs)
370{
371 struct probe_trace_arg_ref *ref;
372 ref = zalloc(sizeof(struct probe_trace_arg_ref));
373 if (ref != NULL)
374 ref->offset = offs;
375 return ref;
376}
377
359/* Show a location */ 378/* Show a location */
360static int convert_location(Dwarf_Op *op, struct probe_finder *pf) 379static int convert_variable_location(Dwarf_Die *vr_die, struct probe_finder *pf)
361{ 380{
381 Dwarf_Attribute attr;
382 Dwarf_Op *op;
383 size_t nops;
362 unsigned int regn; 384 unsigned int regn;
363 Dwarf_Word offs = 0; 385 Dwarf_Word offs = 0;
364 bool ref = false; 386 bool ref = false;
365 const char *regs; 387 const char *regs;
366 struct kprobe_trace_arg *tvar = pf->tvar; 388 struct probe_trace_arg *tvar = pf->tvar;
389 int ret;
390
391 /* TODO: handle more than 1 exprs */
392 if (dwarf_attr(vr_die, DW_AT_location, &attr) == NULL ||
393 dwarf_getlocation_addr(&attr, pf->addr, &op, &nops, 1) <= 0 ||
394 nops == 0) {
395 /* TODO: Support const_value */
396 pr_err("Failed to find the location of %s at this address.\n"
397 " Perhaps, it has been optimized out.\n", pf->pvar->var);
398 return -ENOENT;
399 }
400
401 if (op->atom == DW_OP_addr) {
402 /* Static variables on memory (not stack), make @varname */
403 ret = strlen(dwarf_diename(vr_die));
404 tvar->value = zalloc(ret + 2);
405 if (tvar->value == NULL)
406 return -ENOMEM;
407 snprintf(tvar->value, ret + 2, "@%s", dwarf_diename(vr_die));
408 tvar->ref = alloc_trace_arg_ref((long)offs);
409 if (tvar->ref == NULL)
410 return -ENOMEM;
411 return 0;
412 }
367 413
368 /* If this is based on frame buffer, set the offset */ 414 /* If this is based on frame buffer, set the offset */
369 if (op->atom == DW_OP_fbreg) { 415 if (op->atom == DW_OP_fbreg) {
@@ -405,27 +451,72 @@ static int convert_location(Dwarf_Op *op, struct probe_finder *pf)
405 return -ENOMEM; 451 return -ENOMEM;
406 452
407 if (ref) { 453 if (ref) {
408 tvar->ref = zalloc(sizeof(struct kprobe_trace_arg_ref)); 454 tvar->ref = alloc_trace_arg_ref((long)offs);
409 if (tvar->ref == NULL) 455 if (tvar->ref == NULL)
410 return -ENOMEM; 456 return -ENOMEM;
411 tvar->ref->offset = (long)offs;
412 } 457 }
413 return 0; 458 return 0;
414} 459}
415 460
416static int convert_variable_type(Dwarf_Die *vr_die, 461static int convert_variable_type(Dwarf_Die *vr_die,
417 struct kprobe_trace_arg *targ) 462 struct probe_trace_arg *tvar,
463 const char *cast)
418{ 464{
465 struct probe_trace_arg_ref **ref_ptr = &tvar->ref;
419 Dwarf_Die type; 466 Dwarf_Die type;
420 char buf[16]; 467 char buf[16];
421 int ret; 468 int ret;
422 469
470 /* TODO: check all types */
471 if (cast && strcmp(cast, "string") != 0) {
472 /* Non string type is OK */
473 tvar->type = strdup(cast);
474 return (tvar->type == NULL) ? -ENOMEM : 0;
475 }
476
423 if (die_get_real_type(vr_die, &type) == NULL) { 477 if (die_get_real_type(vr_die, &type) == NULL) {
424 pr_warning("Failed to get a type information of %s.\n", 478 pr_warning("Failed to get a type information of %s.\n",
425 dwarf_diename(vr_die)); 479 dwarf_diename(vr_die));
426 return -ENOENT; 480 return -ENOENT;
427 } 481 }
428 482
483 pr_debug("%s type is %s.\n",
484 dwarf_diename(vr_die), dwarf_diename(&type));
485
486 if (cast && strcmp(cast, "string") == 0) { /* String type */
487 ret = dwarf_tag(&type);
488 if (ret != DW_TAG_pointer_type &&
489 ret != DW_TAG_array_type) {
490 pr_warning("Failed to cast into string: "
491 "%s(%s) is not a pointer nor array.",
492 dwarf_diename(vr_die), dwarf_diename(&type));
493 return -EINVAL;
494 }
495 if (ret == DW_TAG_pointer_type) {
496 if (die_get_real_type(&type, &type) == NULL) {
497 pr_warning("Failed to get a type information.");
498 return -ENOENT;
499 }
500 while (*ref_ptr)
501 ref_ptr = &(*ref_ptr)->next;
502 /* Add new reference with offset +0 */
503 *ref_ptr = zalloc(sizeof(struct probe_trace_arg_ref));
504 if (*ref_ptr == NULL) {
505 pr_warning("Out of memory error\n");
506 return -ENOMEM;
507 }
508 }
509 if (!die_compare_name(&type, "char") &&
510 !die_compare_name(&type, "unsigned char")) {
511 pr_warning("Failed to cast into string: "
512 "%s is not (unsigned) char *.",
513 dwarf_diename(vr_die));
514 return -EINVAL;
515 }
516 tvar->type = strdup(cast);
517 return (tvar->type == NULL) ? -ENOMEM : 0;
518 }
519
429 ret = die_get_byte_size(&type) * 8; 520 ret = die_get_byte_size(&type) * 8;
430 if (ret) { 521 if (ret) {
431 /* Check the bitwidth */ 522 /* Check the bitwidth */
@@ -445,8 +536,8 @@ static int convert_variable_type(Dwarf_Die *vr_die,
445 strerror(-ret)); 536 strerror(-ret));
446 return ret; 537 return ret;
447 } 538 }
448 targ->type = strdup(buf); 539 tvar->type = strdup(buf);
449 if (targ->type == NULL) 540 if (tvar->type == NULL)
450 return -ENOMEM; 541 return -ENOMEM;
451 } 542 }
452 return 0; 543 return 0;
@@ -454,22 +545,50 @@ static int convert_variable_type(Dwarf_Die *vr_die,
454 545
455static int convert_variable_fields(Dwarf_Die *vr_die, const char *varname, 546static int convert_variable_fields(Dwarf_Die *vr_die, const char *varname,
456 struct perf_probe_arg_field *field, 547 struct perf_probe_arg_field *field,
457 struct kprobe_trace_arg_ref **ref_ptr, 548 struct probe_trace_arg_ref **ref_ptr,
458 Dwarf_Die *die_mem) 549 Dwarf_Die *die_mem)
459{ 550{
460 struct kprobe_trace_arg_ref *ref = *ref_ptr; 551 struct probe_trace_arg_ref *ref = *ref_ptr;
461 Dwarf_Die type; 552 Dwarf_Die type;
462 Dwarf_Word offs; 553 Dwarf_Word offs;
463 int ret; 554 int ret, tag;
464 555
465 pr_debug("converting %s in %s\n", field->name, varname); 556 pr_debug("converting %s in %s\n", field->name, varname);
466 if (die_get_real_type(vr_die, &type) == NULL) { 557 if (die_get_real_type(vr_die, &type) == NULL) {
467 pr_warning("Failed to get the type of %s.\n", varname); 558 pr_warning("Failed to get the type of %s.\n", varname);
468 return -ENOENT; 559 return -ENOENT;
469 } 560 }
470 561 pr_debug2("Var real type: (%x)\n", (unsigned)dwarf_dieoffset(&type));
471 /* Check the pointer and dereference */ 562 tag = dwarf_tag(&type);
472 if (dwarf_tag(&type) == DW_TAG_pointer_type) { 563
564 if (field->name[0] == '[' &&
565 (tag == DW_TAG_array_type || tag == DW_TAG_pointer_type)) {
566 if (field->next)
567 /* Save original type for next field */
568 memcpy(die_mem, &type, sizeof(*die_mem));
569 /* Get the type of this array */
570 if (die_get_real_type(&type, &type) == NULL) {
571 pr_warning("Failed to get the type of %s.\n", varname);
572 return -ENOENT;
573 }
574 pr_debug2("Array real type: (%x)\n",
575 (unsigned)dwarf_dieoffset(&type));
576 if (tag == DW_TAG_pointer_type) {
577 ref = zalloc(sizeof(struct probe_trace_arg_ref));
578 if (ref == NULL)
579 return -ENOMEM;
580 if (*ref_ptr)
581 (*ref_ptr)->next = ref;
582 else
583 *ref_ptr = ref;
584 }
585 ref->offset += die_get_byte_size(&type) * field->index;
586 if (!field->next)
587 /* Save vr_die for converting types */
588 memcpy(die_mem, vr_die, sizeof(*die_mem));
589 goto next;
590 } else if (tag == DW_TAG_pointer_type) {
591 /* Check the pointer and dereference */
473 if (!field->ref) { 592 if (!field->ref) {
474 pr_err("Semantic error: %s must be referred by '->'\n", 593 pr_err("Semantic error: %s must be referred by '->'\n",
475 field->name); 594 field->name);
@@ -486,7 +605,7 @@ static int convert_variable_fields(Dwarf_Die *vr_die, const char *varname,
486 return -EINVAL; 605 return -EINVAL;
487 } 606 }
488 607
489 ref = zalloc(sizeof(struct kprobe_trace_arg_ref)); 608 ref = zalloc(sizeof(struct probe_trace_arg_ref));
490 if (ref == NULL) 609 if (ref == NULL)
491 return -ENOMEM; 610 return -ENOMEM;
492 if (*ref_ptr) 611 if (*ref_ptr)
@@ -495,10 +614,15 @@ static int convert_variable_fields(Dwarf_Die *vr_die, const char *varname,
495 *ref_ptr = ref; 614 *ref_ptr = ref;
496 } else { 615 } else {
497 /* Verify it is a data structure */ 616 /* Verify it is a data structure */
498 if (dwarf_tag(&type) != DW_TAG_structure_type) { 617 if (tag != DW_TAG_structure_type) {
499 pr_warning("%s is not a data structure.\n", varname); 618 pr_warning("%s is not a data structure.\n", varname);
500 return -EINVAL; 619 return -EINVAL;
501 } 620 }
621 if (field->name[0] == '[') {
622 pr_err("Semantic error: %s is not a pointor nor array.",
623 varname);
624 return -EINVAL;
625 }
502 if (field->ref) { 626 if (field->ref) {
503 pr_err("Semantic error: %s must be referred by '.'\n", 627 pr_err("Semantic error: %s must be referred by '.'\n",
504 field->name); 628 field->name);
@@ -525,6 +649,7 @@ static int convert_variable_fields(Dwarf_Die *vr_die, const char *varname,
525 } 649 }
526 ref->offset += (long)offs; 650 ref->offset += (long)offs;
527 651
652next:
528 /* Converting next field */ 653 /* Converting next field */
529 if (field->next) 654 if (field->next)
530 return convert_variable_fields(die_mem, field->name, 655 return convert_variable_fields(die_mem, field->name,
@@ -536,51 +661,32 @@ static int convert_variable_fields(Dwarf_Die *vr_die, const char *varname,
536/* Show a variables in kprobe event format */ 661/* Show a variables in kprobe event format */
537static int convert_variable(Dwarf_Die *vr_die, struct probe_finder *pf) 662static int convert_variable(Dwarf_Die *vr_die, struct probe_finder *pf)
538{ 663{
539 Dwarf_Attribute attr;
540 Dwarf_Die die_mem; 664 Dwarf_Die die_mem;
541 Dwarf_Op *expr;
542 size_t nexpr;
543 int ret; 665 int ret;
544 666
545 if (dwarf_attr(vr_die, DW_AT_location, &attr) == NULL) 667 pr_debug("Converting variable %s into trace event.\n",
546 goto error; 668 dwarf_diename(vr_die));
547 /* TODO: handle more than 1 exprs */
548 ret = dwarf_getlocation_addr(&attr, pf->addr, &expr, &nexpr, 1);
549 if (ret <= 0 || nexpr == 0)
550 goto error;
551 669
552 ret = convert_location(expr, pf); 670 ret = convert_variable_location(vr_die, pf);
553 if (ret == 0 && pf->pvar->field) { 671 if (ret == 0 && pf->pvar->field) {
554 ret = convert_variable_fields(vr_die, pf->pvar->var, 672 ret = convert_variable_fields(vr_die, pf->pvar->var,
555 pf->pvar->field, &pf->tvar->ref, 673 pf->pvar->field, &pf->tvar->ref,
556 &die_mem); 674 &die_mem);
557 vr_die = &die_mem; 675 vr_die = &die_mem;
558 } 676 }
559 if (ret == 0) { 677 if (ret == 0)
560 if (pf->pvar->type) { 678 ret = convert_variable_type(vr_die, pf->tvar, pf->pvar->type);
561 pf->tvar->type = strdup(pf->pvar->type);
562 if (pf->tvar->type == NULL)
563 ret = -ENOMEM;
564 } else
565 ret = convert_variable_type(vr_die, pf->tvar);
566 }
567 /* *expr will be cached in libdw. Don't free it. */ 679 /* *expr will be cached in libdw. Don't free it. */
568 return ret; 680 return ret;
569error:
570 /* TODO: Support const_value */
571 pr_err("Failed to find the location of %s at this address.\n"
572 " Perhaps, it has been optimized out.\n", pf->pvar->var);
573 return -ENOENT;
574} 681}
575 682
576/* Find a variable in a subprogram die */ 683/* Find a variable in a subprogram die */
577static int find_variable(Dwarf_Die *sp_die, struct probe_finder *pf) 684static int find_variable(Dwarf_Die *sp_die, struct probe_finder *pf)
578{ 685{
579 Dwarf_Die vr_die; 686 Dwarf_Die vr_die, *scopes;
580 char buf[32], *ptr; 687 char buf[32], *ptr;
581 int ret; 688 int ret, nscopes;
582 689
583 /* TODO: Support arrays */
584 if (pf->pvar->name) 690 if (pf->pvar->name)
585 pf->tvar->name = strdup(pf->pvar->name); 691 pf->tvar->name = strdup(pf->pvar->name);
586 else { 692 else {
@@ -607,18 +713,32 @@ static int find_variable(Dwarf_Die *sp_die, struct probe_finder *pf)
607 pr_debug("Searching '%s' variable in context.\n", 713 pr_debug("Searching '%s' variable in context.\n",
608 pf->pvar->var); 714 pf->pvar->var);
609 /* Search child die for local variables and parameters. */ 715 /* Search child die for local variables and parameters. */
610 if (!die_find_variable(sp_die, pf->pvar->var, &vr_die)) { 716 if (die_find_variable(sp_die, pf->pvar->var, &vr_die))
717 ret = convert_variable(&vr_die, pf);
718 else {
719 /* Search upper class */
720 nscopes = dwarf_getscopes_die(sp_die, &scopes);
721 if (nscopes > 0) {
722 ret = dwarf_getscopevar(scopes, nscopes, pf->pvar->var,
723 0, NULL, 0, 0, &vr_die);
724 if (ret >= 0)
725 ret = convert_variable(&vr_die, pf);
726 else
727 ret = -ENOENT;
728 free(scopes);
729 } else
730 ret = -ENOENT;
731 }
732 if (ret < 0)
611 pr_warning("Failed to find '%s' in this function.\n", 733 pr_warning("Failed to find '%s' in this function.\n",
612 pf->pvar->var); 734 pf->pvar->var);
613 return -ENOENT; 735 return ret;
614 }
615 return convert_variable(&vr_die, pf);
616} 736}
617 737
618/* Show a probe point to output buffer */ 738/* Show a probe point to output buffer */
619static int convert_probe_point(Dwarf_Die *sp_die, struct probe_finder *pf) 739static int convert_probe_point(Dwarf_Die *sp_die, struct probe_finder *pf)
620{ 740{
621 struct kprobe_trace_event *tev; 741 struct probe_trace_event *tev;
622 Dwarf_Addr eaddr; 742 Dwarf_Addr eaddr;
623 Dwarf_Die die_mem; 743 Dwarf_Die die_mem;
624 const char *name; 744 const char *name;
@@ -683,7 +803,7 @@ static int convert_probe_point(Dwarf_Die *sp_die, struct probe_finder *pf)
683 803
684 /* Find each argument */ 804 /* Find each argument */
685 tev->nargs = pf->pev->nargs; 805 tev->nargs = pf->pev->nargs;
686 tev->args = zalloc(sizeof(struct kprobe_trace_arg) * tev->nargs); 806 tev->args = zalloc(sizeof(struct probe_trace_arg) * tev->nargs);
687 if (tev->args == NULL) 807 if (tev->args == NULL)
688 return -ENOMEM; 808 return -ENOMEM;
689 for (i = 0; i < pf->pev->nargs; i++) { 809 for (i = 0; i < pf->pev->nargs; i++) {
@@ -897,7 +1017,7 @@ static int probe_point_search_cb(Dwarf_Die *sp_die, void *data)
897 1017
898 /* Check tag and diename */ 1018 /* Check tag and diename */
899 if (dwarf_tag(sp_die) != DW_TAG_subprogram || 1019 if (dwarf_tag(sp_die) != DW_TAG_subprogram ||
900 die_compare_name(sp_die, pp->function) != 0) 1020 !die_compare_name(sp_die, pp->function))
901 return DWARF_CB_OK; 1021 return DWARF_CB_OK;
902 1022
903 pf->fname = dwarf_decl_file(sp_die); 1023 pf->fname = dwarf_decl_file(sp_die);
@@ -940,9 +1060,9 @@ static int find_probe_point_by_func(struct probe_finder *pf)
940 return _param.retval; 1060 return _param.retval;
941} 1061}
942 1062
943/* Find kprobe_trace_events specified by perf_probe_event from debuginfo */ 1063/* Find probe_trace_events specified by perf_probe_event from debuginfo */
944int find_kprobe_trace_events(int fd, struct perf_probe_event *pev, 1064int find_probe_trace_events(int fd, struct perf_probe_event *pev,
945 struct kprobe_trace_event **tevs, int max_tevs) 1065 struct probe_trace_event **tevs, int max_tevs)
946{ 1066{
947 struct probe_finder pf = {.pev = pev, .max_tevs = max_tevs}; 1067 struct probe_finder pf = {.pev = pev, .max_tevs = max_tevs};
948 struct perf_probe_point *pp = &pev->point; 1068 struct perf_probe_point *pp = &pev->point;
@@ -952,7 +1072,7 @@ int find_kprobe_trace_events(int fd, struct perf_probe_event *pev,
952 Dwarf *dbg; 1072 Dwarf *dbg;
953 int ret = 0; 1073 int ret = 0;
954 1074
955 pf.tevs = zalloc(sizeof(struct kprobe_trace_event) * max_tevs); 1075 pf.tevs = zalloc(sizeof(struct probe_trace_event) * max_tevs);
956 if (pf.tevs == NULL) 1076 if (pf.tevs == NULL)
957 return -ENOMEM; 1077 return -ENOMEM;
958 *tevs = pf.tevs; 1078 *tevs = pf.tevs;
@@ -1096,7 +1216,7 @@ end:
1096static int line_range_add_line(const char *src, unsigned int lineno, 1216static int line_range_add_line(const char *src, unsigned int lineno,
1097 struct line_range *lr) 1217 struct line_range *lr)
1098{ 1218{
1099 /* Copy real path */ 1219 /* Copy source path */
1100 if (!lr->path) { 1220 if (!lr->path) {
1101 lr->path = strdup(src); 1221 lr->path = strdup(src);
1102 if (lr->path == NULL) 1222 if (lr->path == NULL)
@@ -1220,7 +1340,7 @@ static int line_range_search_cb(Dwarf_Die *sp_die, void *data)
1220 struct line_range *lr = lf->lr; 1340 struct line_range *lr = lf->lr;
1221 1341
1222 if (dwarf_tag(sp_die) == DW_TAG_subprogram && 1342 if (dwarf_tag(sp_die) == DW_TAG_subprogram &&
1223 die_compare_name(sp_die, lr->function) == 0) { 1343 die_compare_name(sp_die, lr->function)) {
1224 lf->fname = dwarf_decl_file(sp_die); 1344 lf->fname = dwarf_decl_file(sp_die);
1225 dwarf_decl_line(sp_die, &lr->offset); 1345 dwarf_decl_line(sp_die, &lr->offset);
1226 pr_debug("fname: %s, lineno:%d\n", lf->fname, lr->offset); 1346 pr_debug("fname: %s, lineno:%d\n", lf->fname, lr->offset);
@@ -1263,6 +1383,7 @@ int find_line_range(int fd, struct line_range *lr)
1263 size_t cuhl; 1383 size_t cuhl;
1264 Dwarf_Die *diep; 1384 Dwarf_Die *diep;
1265 Dwarf *dbg; 1385 Dwarf *dbg;
1386 const char *comp_dir;
1266 1387
1267 dbg = dwarf_begin(fd, DWARF_C_READ); 1388 dbg = dwarf_begin(fd, DWARF_C_READ);
1268 if (!dbg) { 1389 if (!dbg) {
@@ -1298,7 +1419,18 @@ int find_line_range(int fd, struct line_range *lr)
1298 } 1419 }
1299 off = noff; 1420 off = noff;
1300 } 1421 }
1301 pr_debug("path: %lx\n", (unsigned long)lr->path); 1422
1423 /* Store comp_dir */
1424 if (lf.found) {
1425 comp_dir = cu_get_comp_dir(&lf.cu_die);
1426 if (comp_dir) {
1427 lr->comp_dir = strdup(comp_dir);
1428 if (!lr->comp_dir)
1429 ret = -ENOMEM;
1430 }
1431 }
1432
1433 pr_debug("path: %s\n", lr->path);
1302 dwarf_end(dbg); 1434 dwarf_end(dbg);
1303 1435
1304 return (ret < 0) ? ret : lf.found; 1436 return (ret < 0) ? ret : lf.found;
diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h
index e1f61dcd18ff..4507d519f183 100644
--- a/tools/perf/util/probe-finder.h
+++ b/tools/perf/util/probe-finder.h
@@ -16,9 +16,9 @@ static inline int is_c_varname(const char *name)
16} 16}
17 17
18#ifdef DWARF_SUPPORT 18#ifdef DWARF_SUPPORT
19/* Find kprobe_trace_events specified by perf_probe_event from debuginfo */ 19/* Find probe_trace_events specified by perf_probe_event from debuginfo */
20extern int find_kprobe_trace_events(int fd, struct perf_probe_event *pev, 20extern int find_probe_trace_events(int fd, struct perf_probe_event *pev,
21 struct kprobe_trace_event **tevs, 21 struct probe_trace_event **tevs,
22 int max_tevs); 22 int max_tevs);
23 23
24/* Find a perf_probe_point from debuginfo */ 24/* Find a perf_probe_point from debuginfo */
@@ -33,7 +33,7 @@ extern int find_line_range(int fd, struct line_range *lr);
33 33
34struct probe_finder { 34struct probe_finder {
35 struct perf_probe_event *pev; /* Target probe event */ 35 struct perf_probe_event *pev; /* Target probe event */
36 struct kprobe_trace_event *tevs; /* Result trace events */ 36 struct probe_trace_event *tevs; /* Result trace events */
37 int ntevs; /* Number of trace events */ 37 int ntevs; /* Number of trace events */
38 int max_tevs; /* Max number of trace events */ 38 int max_tevs; /* Max number of trace events */
39 39
@@ -50,7 +50,7 @@ struct probe_finder {
50#endif 50#endif
51 Dwarf_Op *fb_ops; /* Frame base attribute */ 51 Dwarf_Op *fb_ops; /* Frame base attribute */
52 struct perf_probe_arg *pvar; /* Current target variable */ 52 struct perf_probe_arg *pvar; /* Current target variable */
53 struct kprobe_trace_arg *tvar; /* Current result variable */ 53 struct probe_trace_arg *tvar; /* Current result variable */
54}; 54};
55 55
56struct line_finder { 56struct line_finder {
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index c422cd676313..fa9d652c2dc3 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -27,8 +27,10 @@ static int perf_session__open(struct perf_session *self, bool force)
27 27
28 self->fd = open(self->filename, O_RDONLY); 28 self->fd = open(self->filename, O_RDONLY);
29 if (self->fd < 0) { 29 if (self->fd < 0) {
30 pr_err("failed to open file: %s", self->filename); 30 int err = errno;
31 if (!strcmp(self->filename, "perf.data")) 31
32 pr_err("failed to open %s: %s", self->filename, strerror(err));
33 if (err == ENOENT && !strcmp(self->filename, "perf.data"))
32 pr_err(" (try 'perf record' first)"); 34 pr_err(" (try 'perf record' first)");
33 pr_err("\n"); 35 pr_err("\n");
34 return -errno; 36 return -errno;
@@ -77,6 +79,12 @@ int perf_session__create_kernel_maps(struct perf_session *self)
77 return ret; 79 return ret;
78} 80}
79 81
82static void perf_session__destroy_kernel_maps(struct perf_session *self)
83{
84 machine__destroy_kernel_maps(&self->host_machine);
85 machines__destroy_guest_kernel_maps(&self->machines);
86}
87
80struct perf_session *perf_session__new(const char *filename, int mode, bool force, bool repipe) 88struct perf_session *perf_session__new(const char *filename, int mode, bool force, bool repipe)
81{ 89{
82 size_t len = filename ? strlen(filename) + 1 : 0; 90 size_t len = filename ? strlen(filename) + 1 : 0;
@@ -94,8 +102,6 @@ struct perf_session *perf_session__new(const char *filename, int mode, bool forc
94 self->hists_tree = RB_ROOT; 102 self->hists_tree = RB_ROOT;
95 self->last_match = NULL; 103 self->last_match = NULL;
96 self->mmap_window = 32; 104 self->mmap_window = 32;
97 self->cwd = NULL;
98 self->cwdlen = 0;
99 self->machines = RB_ROOT; 105 self->machines = RB_ROOT;
100 self->repipe = repipe; 106 self->repipe = repipe;
101 INIT_LIST_HEAD(&self->ordered_samples.samples_head); 107 INIT_LIST_HEAD(&self->ordered_samples.samples_head);
@@ -124,16 +130,43 @@ out_delete:
124 return NULL; 130 return NULL;
125} 131}
126 132
133static void perf_session__delete_dead_threads(struct perf_session *self)
134{
135 struct thread *n, *t;
136
137 list_for_each_entry_safe(t, n, &self->dead_threads, node) {
138 list_del(&t->node);
139 thread__delete(t);
140 }
141}
142
143static void perf_session__delete_threads(struct perf_session *self)
144{
145 struct rb_node *nd = rb_first(&self->threads);
146
147 while (nd) {
148 struct thread *t = rb_entry(nd, struct thread, rb_node);
149
150 rb_erase(&t->rb_node, &self->threads);
151 nd = rb_next(nd);
152 thread__delete(t);
153 }
154}
155
127void perf_session__delete(struct perf_session *self) 156void perf_session__delete(struct perf_session *self)
128{ 157{
129 perf_header__exit(&self->header); 158 perf_header__exit(&self->header);
159 perf_session__destroy_kernel_maps(self);
160 perf_session__delete_dead_threads(self);
161 perf_session__delete_threads(self);
162 machine__exit(&self->host_machine);
130 close(self->fd); 163 close(self->fd);
131 free(self->cwd);
132 free(self); 164 free(self);
133} 165}
134 166
135void perf_session__remove_thread(struct perf_session *self, struct thread *th) 167void perf_session__remove_thread(struct perf_session *self, struct thread *th)
136{ 168{
169 self->last_match = NULL;
137 rb_erase(&th->rb_node, &self->threads); 170 rb_erase(&th->rb_node, &self->threads);
138 /* 171 /*
139 * We may have references to this thread, for instance in some hist_entry 172 * We may have references to this thread, for instance in some hist_entry
@@ -830,23 +863,6 @@ int perf_session__process_events(struct perf_session *self,
830 if (perf_session__register_idle_thread(self) == NULL) 863 if (perf_session__register_idle_thread(self) == NULL)
831 return -ENOMEM; 864 return -ENOMEM;
832 865
833 if (!symbol_conf.full_paths) {
834 char bf[PATH_MAX];
835
836 if (getcwd(bf, sizeof(bf)) == NULL) {
837 err = -errno;
838out_getcwd_err:
839 pr_err("failed to get the current directory\n");
840 goto out_err;
841 }
842 self->cwd = strdup(bf);
843 if (self->cwd == NULL) {
844 err = -ENOMEM;
845 goto out_getcwd_err;
846 }
847 self->cwdlen = strlen(self->cwd);
848 }
849
850 if (!self->fd_pipe) 866 if (!self->fd_pipe)
851 err = __perf_session__process_events(self, 867 err = __perf_session__process_events(self,
852 self->header.data_offset, 868 self->header.data_offset,
@@ -854,7 +870,7 @@ out_getcwd_err:
854 self->size, ops); 870 self->size, ops);
855 else 871 else
856 err = __perf_session__process_pipe_events(self, ops); 872 err = __perf_session__process_pipe_events(self, ops);
857out_err: 873
858 return err; 874 return err;
859} 875}
860 876
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 2316cb5a4116..1c61a4f4aa8a 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -1,4 +1,5 @@
1#include "sort.h" 1#include "sort.h"
2#include "hist.h"
2 3
3regex_t parent_regex; 4regex_t parent_regex;
4const char default_parent_pattern[] = "^sys_|^do_page_fault"; 5const char default_parent_pattern[] = "^sys_|^do_page_fault";
@@ -10,10 +11,6 @@ int sort__has_parent = 0;
10 11
11enum sort_type sort__first_dimension; 12enum sort_type sort__first_dimension;
12 13
13unsigned int dsos__col_width;
14unsigned int comms__col_width;
15unsigned int threads__col_width;
16static unsigned int parent_symbol__col_width;
17char * field_sep; 14char * field_sep;
18 15
19LIST_HEAD(hist_entry__sort_list); 16LIST_HEAD(hist_entry__sort_list);
@@ -28,12 +25,14 @@ static int hist_entry__sym_snprintf(struct hist_entry *self, char *bf,
28 size_t size, unsigned int width); 25 size_t size, unsigned int width);
29static int hist_entry__parent_snprintf(struct hist_entry *self, char *bf, 26static int hist_entry__parent_snprintf(struct hist_entry *self, char *bf,
30 size_t size, unsigned int width); 27 size_t size, unsigned int width);
28static int hist_entry__cpu_snprintf(struct hist_entry *self, char *bf,
29 size_t size, unsigned int width);
31 30
32struct sort_entry sort_thread = { 31struct sort_entry sort_thread = {
33 .se_header = "Command: Pid", 32 .se_header = "Command: Pid",
34 .se_cmp = sort__thread_cmp, 33 .se_cmp = sort__thread_cmp,
35 .se_snprintf = hist_entry__thread_snprintf, 34 .se_snprintf = hist_entry__thread_snprintf,
36 .se_width = &threads__col_width, 35 .se_width_idx = HISTC_THREAD,
37}; 36};
38 37
39struct sort_entry sort_comm = { 38struct sort_entry sort_comm = {
@@ -41,27 +40,35 @@ struct sort_entry sort_comm = {
41 .se_cmp = sort__comm_cmp, 40 .se_cmp = sort__comm_cmp,
42 .se_collapse = sort__comm_collapse, 41 .se_collapse = sort__comm_collapse,
43 .se_snprintf = hist_entry__comm_snprintf, 42 .se_snprintf = hist_entry__comm_snprintf,
44 .se_width = &comms__col_width, 43 .se_width_idx = HISTC_COMM,
45}; 44};
46 45
47struct sort_entry sort_dso = { 46struct sort_entry sort_dso = {
48 .se_header = "Shared Object", 47 .se_header = "Shared Object",
49 .se_cmp = sort__dso_cmp, 48 .se_cmp = sort__dso_cmp,
50 .se_snprintf = hist_entry__dso_snprintf, 49 .se_snprintf = hist_entry__dso_snprintf,
51 .se_width = &dsos__col_width, 50 .se_width_idx = HISTC_DSO,
52}; 51};
53 52
54struct sort_entry sort_sym = { 53struct sort_entry sort_sym = {
55 .se_header = "Symbol", 54 .se_header = "Symbol",
56 .se_cmp = sort__sym_cmp, 55 .se_cmp = sort__sym_cmp,
57 .se_snprintf = hist_entry__sym_snprintf, 56 .se_snprintf = hist_entry__sym_snprintf,
57 .se_width_idx = HISTC_SYMBOL,
58}; 58};
59 59
60struct sort_entry sort_parent = { 60struct sort_entry sort_parent = {
61 .se_header = "Parent symbol", 61 .se_header = "Parent symbol",
62 .se_cmp = sort__parent_cmp, 62 .se_cmp = sort__parent_cmp,
63 .se_snprintf = hist_entry__parent_snprintf, 63 .se_snprintf = hist_entry__parent_snprintf,
64 .se_width = &parent_symbol__col_width, 64 .se_width_idx = HISTC_PARENT,
65};
66
67struct sort_entry sort_cpu = {
68 .se_header = "CPU",
69 .se_cmp = sort__cpu_cmp,
70 .se_snprintf = hist_entry__cpu_snprintf,
71 .se_width_idx = HISTC_CPU,
65}; 72};
66 73
67struct sort_dimension { 74struct sort_dimension {
@@ -76,6 +83,7 @@ static struct sort_dimension sort_dimensions[] = {
76 { .name = "dso", .entry = &sort_dso, }, 83 { .name = "dso", .entry = &sort_dso, },
77 { .name = "symbol", .entry = &sort_sym, }, 84 { .name = "symbol", .entry = &sort_sym, },
78 { .name = "parent", .entry = &sort_parent, }, 85 { .name = "parent", .entry = &sort_parent, },
86 { .name = "cpu", .entry = &sort_cpu, },
79}; 87};
80 88
81int64_t cmp_null(void *l, void *r) 89int64_t cmp_null(void *l, void *r)
@@ -242,6 +250,20 @@ static int hist_entry__parent_snprintf(struct hist_entry *self, char *bf,
242 self->parent ? self->parent->name : "[other]"); 250 self->parent ? self->parent->name : "[other]");
243} 251}
244 252
253/* --sort cpu */
254
255int64_t
256sort__cpu_cmp(struct hist_entry *left, struct hist_entry *right)
257{
258 return right->cpu - left->cpu;
259}
260
261static int hist_entry__cpu_snprintf(struct hist_entry *self, char *bf,
262 size_t size, unsigned int width)
263{
264 return repsep_snprintf(bf, size, "%-*d", width, self->cpu);
265}
266
245int sort_dimension__add(const char *tok) 267int sort_dimension__add(const char *tok)
246{ 268{
247 unsigned int i; 269 unsigned int i;
@@ -281,6 +303,8 @@ int sort_dimension__add(const char *tok)
281 sort__first_dimension = SORT_SYM; 303 sort__first_dimension = SORT_SYM;
282 else if (!strcmp(sd->name, "parent")) 304 else if (!strcmp(sd->name, "parent"))
283 sort__first_dimension = SORT_PARENT; 305 sort__first_dimension = SORT_PARENT;
306 else if (!strcmp(sd->name, "cpu"))
307 sort__first_dimension = SORT_CPU;
284 } 308 }
285 309
286 list_add_tail(&sd->entry->list, &hist_entry__sort_list); 310 list_add_tail(&sd->entry->list, &hist_entry__sort_list);
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 0d61c4082f43..46e531d09e8b 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -36,11 +36,14 @@ extern struct sort_entry sort_comm;
36extern struct sort_entry sort_dso; 36extern struct sort_entry sort_dso;
37extern struct sort_entry sort_sym; 37extern struct sort_entry sort_sym;
38extern struct sort_entry sort_parent; 38extern struct sort_entry sort_parent;
39extern unsigned int dsos__col_width;
40extern unsigned int comms__col_width;
41extern unsigned int threads__col_width;
42extern enum sort_type sort__first_dimension; 39extern enum sort_type sort__first_dimension;
43 40
41/**
42 * struct hist_entry - histogram entry
43 *
44 * @row_offset - offset from the first callchain expanded to appear on screen
45 * @nr_rows - rows expanded in callchain, recalculated on folding/unfolding
46 */
44struct hist_entry { 47struct hist_entry {
45 struct rb_node rb_node; 48 struct rb_node rb_node;
46 u64 period; 49 u64 period;
@@ -51,7 +54,14 @@ struct hist_entry {
51 struct map_symbol ms; 54 struct map_symbol ms;
52 struct thread *thread; 55 struct thread *thread;
53 u64 ip; 56 u64 ip;
57 s32 cpu;
54 u32 nr_events; 58 u32 nr_events;
59
60 /* XXX These two should move to some tree widget lib */
61 u16 row_offset;
62 u16 nr_rows;
63
64 bool init_have_children;
55 char level; 65 char level;
56 u8 filtered; 66 u8 filtered;
57 struct symbol *parent; 67 struct symbol *parent;
@@ -68,7 +78,8 @@ enum sort_type {
68 SORT_COMM, 78 SORT_COMM,
69 SORT_DSO, 79 SORT_DSO,
70 SORT_SYM, 80 SORT_SYM,
71 SORT_PARENT 81 SORT_PARENT,
82 SORT_CPU,
72}; 83};
73 84
74/* 85/*
@@ -84,7 +95,7 @@ struct sort_entry {
84 int64_t (*se_collapse)(struct hist_entry *, struct hist_entry *); 95 int64_t (*se_collapse)(struct hist_entry *, struct hist_entry *);
85 int (*se_snprintf)(struct hist_entry *self, char *bf, size_t size, 96 int (*se_snprintf)(struct hist_entry *self, char *bf, size_t size,
86 unsigned int width); 97 unsigned int width);
87 unsigned int *se_width; 98 u8 se_width_idx;
88 bool elide; 99 bool elide;
89}; 100};
90 101
@@ -104,6 +115,7 @@ extern int64_t sort__comm_collapse(struct hist_entry *, struct hist_entry *);
104extern int64_t sort__dso_cmp(struct hist_entry *, struct hist_entry *); 115extern int64_t sort__dso_cmp(struct hist_entry *, struct hist_entry *);
105extern int64_t sort__sym_cmp(struct hist_entry *, struct hist_entry *); 116extern int64_t sort__sym_cmp(struct hist_entry *, struct hist_entry *);
106extern int64_t sort__parent_cmp(struct hist_entry *, struct hist_entry *); 117extern int64_t sort__parent_cmp(struct hist_entry *, struct hist_entry *);
118int64_t sort__cpu_cmp(struct hist_entry *left, struct hist_entry *right);
107extern size_t sort__parent_print(FILE *, struct hist_entry *, unsigned int); 119extern size_t sort__parent_print(FILE *, struct hist_entry *, unsigned int);
108extern int sort_dimension__add(const char *); 120extern int sort_dimension__add(const char *);
109void sort_entry__setup_elide(struct sort_entry *self, struct strlist *list, 121void sort_entry__setup_elide(struct sort_entry *self, struct strlist *list,
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 5b276833e2bf..6f0dd90c36ce 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -12,6 +12,7 @@
12#include <fcntl.h> 12#include <fcntl.h>
13#include <unistd.h> 13#include <unistd.h>
14#include "build-id.h" 14#include "build-id.h"
15#include "debug.h"
15#include "symbol.h" 16#include "symbol.h"
16#include "strlist.h" 17#include "strlist.h"
17 18
@@ -25,6 +26,8 @@
25#define NT_GNU_BUILD_ID 3 26#define NT_GNU_BUILD_ID 3
26#endif 27#endif
27 28
29static bool dso__build_id_equal(const struct dso *self, u8 *build_id);
30static int elf_read_build_id(Elf *elf, void *bf, size_t size);
28static void dsos__add(struct list_head *head, struct dso *dso); 31static void dsos__add(struct list_head *head, struct dso *dso);
29static struct map *map__new2(u64 start, struct dso *dso, enum map_type type); 32static struct map *map__new2(u64 start, struct dso *dso, enum map_type type);
30static int dso__load_kernel_sym(struct dso *self, struct map *map, 33static int dso__load_kernel_sym(struct dso *self, struct map *map,
@@ -40,6 +43,14 @@ struct symbol_conf symbol_conf = {
40 .try_vmlinux_path = true, 43 .try_vmlinux_path = true,
41}; 44};
42 45
46int dso__name_len(const struct dso *self)
47{
48 if (verbose)
49 return self->long_name_len;
50
51 return self->short_name_len;
52}
53
43bool dso__loaded(const struct dso *self, enum map_type type) 54bool dso__loaded(const struct dso *self, enum map_type type)
44{ 55{
45 return self->loaded & (1 << type); 56 return self->loaded & (1 << type);
@@ -215,7 +226,9 @@ void dso__delete(struct dso *self)
215 int i; 226 int i;
216 for (i = 0; i < MAP__NR_TYPES; ++i) 227 for (i = 0; i < MAP__NR_TYPES; ++i)
217 symbols__delete(&self->symbols[i]); 228 symbols__delete(&self->symbols[i]);
218 if (self->long_name != self->name) 229 if (self->sname_alloc)
230 free((char *)self->short_name);
231 if (self->lname_alloc)
219 free(self->long_name); 232 free(self->long_name);
220 free(self); 233 free(self);
221} 234}
@@ -933,8 +946,28 @@ static bool elf_sec__is_a(GElf_Shdr *self, Elf_Data *secstrs, enum map_type type
933 } 946 }
934} 947}
935 948
949static size_t elf_addr_to_index(Elf *elf, GElf_Addr addr)
950{
951 Elf_Scn *sec = NULL;
952 GElf_Shdr shdr;
953 size_t cnt = 1;
954
955 while ((sec = elf_nextscn(elf, sec)) != NULL) {
956 gelf_getshdr(sec, &shdr);
957
958 if ((addr >= shdr.sh_addr) &&
959 (addr < (shdr.sh_addr + shdr.sh_size)))
960 return cnt;
961
962 ++cnt;
963 }
964
965 return -1;
966}
967
936static int dso__load_sym(struct dso *self, struct map *map, const char *name, 968static int dso__load_sym(struct dso *self, struct map *map, const char *name,
937 int fd, symbol_filter_t filter, int kmodule) 969 int fd, symbol_filter_t filter, int kmodule,
970 int want_symtab)
938{ 971{
939 struct kmap *kmap = self->kernel ? map__kmap(map) : NULL; 972 struct kmap *kmap = self->kernel ? map__kmap(map) : NULL;
940 struct map *curr_map = map; 973 struct map *curr_map = map;
@@ -944,31 +977,51 @@ static int dso__load_sym(struct dso *self, struct map *map, const char *name,
944 int err = -1; 977 int err = -1;
945 uint32_t idx; 978 uint32_t idx;
946 GElf_Ehdr ehdr; 979 GElf_Ehdr ehdr;
947 GElf_Shdr shdr; 980 GElf_Shdr shdr, opdshdr;
948 Elf_Data *syms; 981 Elf_Data *syms, *opddata = NULL;
949 GElf_Sym sym; 982 GElf_Sym sym;
950 Elf_Scn *sec, *sec_strndx; 983 Elf_Scn *sec, *sec_strndx, *opdsec;
951 Elf *elf; 984 Elf *elf;
952 int nr = 0; 985 int nr = 0;
986 size_t opdidx = 0;
953 987
954 elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL); 988 elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
955 if (elf == NULL) { 989 if (elf == NULL) {
956 pr_err("%s: cannot read %s ELF file.\n", __func__, name); 990 pr_debug("%s: cannot read %s ELF file.\n", __func__, name);
957 goto out_close; 991 goto out_close;
958 } 992 }
959 993
960 if (gelf_getehdr(elf, &ehdr) == NULL) { 994 if (gelf_getehdr(elf, &ehdr) == NULL) {
961 pr_err("%s: cannot get elf header.\n", __func__); 995 pr_debug("%s: cannot get elf header.\n", __func__);
962 goto out_elf_end; 996 goto out_elf_end;
963 } 997 }
964 998
999 /* Always reject images with a mismatched build-id: */
1000 if (self->has_build_id) {
1001 u8 build_id[BUILD_ID_SIZE];
1002
1003 if (elf_read_build_id(elf, build_id,
1004 BUILD_ID_SIZE) != BUILD_ID_SIZE)
1005 goto out_elf_end;
1006
1007 if (!dso__build_id_equal(self, build_id))
1008 goto out_elf_end;
1009 }
1010
965 sec = elf_section_by_name(elf, &ehdr, &shdr, ".symtab", NULL); 1011 sec = elf_section_by_name(elf, &ehdr, &shdr, ".symtab", NULL);
966 if (sec == NULL) { 1012 if (sec == NULL) {
1013 if (want_symtab)
1014 goto out_elf_end;
1015
967 sec = elf_section_by_name(elf, &ehdr, &shdr, ".dynsym", NULL); 1016 sec = elf_section_by_name(elf, &ehdr, &shdr, ".dynsym", NULL);
968 if (sec == NULL) 1017 if (sec == NULL)
969 goto out_elf_end; 1018 goto out_elf_end;
970 } 1019 }
971 1020
1021 opdsec = elf_section_by_name(elf, &ehdr, &opdshdr, ".opd", &opdidx);
1022 if (opdsec)
1023 opddata = elf_rawdata(opdsec, NULL);
1024
972 syms = elf_getdata(sec, NULL); 1025 syms = elf_getdata(sec, NULL);
973 if (syms == NULL) 1026 if (syms == NULL)
974 goto out_elf_end; 1027 goto out_elf_end;
@@ -1013,6 +1066,13 @@ static int dso__load_sym(struct dso *self, struct map *map, const char *name,
1013 if (!is_label && !elf_sym__is_a(&sym, map->type)) 1066 if (!is_label && !elf_sym__is_a(&sym, map->type))
1014 continue; 1067 continue;
1015 1068
1069 if (opdsec && sym.st_shndx == opdidx) {
1070 u32 offset = sym.st_value - opdshdr.sh_addr;
1071 u64 *opd = opddata->d_buf + offset;
1072 sym.st_value = *opd;
1073 sym.st_shndx = elf_addr_to_index(elf, sym.st_value);
1074 }
1075
1016 sec = elf_getscn(elf, sym.st_shndx); 1076 sec = elf_getscn(elf, sym.st_shndx);
1017 if (!sec) 1077 if (!sec)
1018 goto out_elf_end; 1078 goto out_elf_end;
@@ -1151,37 +1211,26 @@ bool __dsos__read_build_ids(struct list_head *head, bool with_hits)
1151 */ 1211 */
1152#define NOTE_ALIGN(n) (((n) + 3) & -4U) 1212#define NOTE_ALIGN(n) (((n) + 3) & -4U)
1153 1213
1154int filename__read_build_id(const char *filename, void *bf, size_t size) 1214static int elf_read_build_id(Elf *elf, void *bf, size_t size)
1155{ 1215{
1156 int fd, err = -1; 1216 int err = -1;
1157 GElf_Ehdr ehdr; 1217 GElf_Ehdr ehdr;
1158 GElf_Shdr shdr; 1218 GElf_Shdr shdr;
1159 Elf_Data *data; 1219 Elf_Data *data;
1160 Elf_Scn *sec; 1220 Elf_Scn *sec;
1161 Elf_Kind ek; 1221 Elf_Kind ek;
1162 void *ptr; 1222 void *ptr;
1163 Elf *elf;
1164 1223
1165 if (size < BUILD_ID_SIZE) 1224 if (size < BUILD_ID_SIZE)
1166 goto out; 1225 goto out;
1167 1226
1168 fd = open(filename, O_RDONLY);
1169 if (fd < 0)
1170 goto out;
1171
1172 elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
1173 if (elf == NULL) {
1174 pr_debug2("%s: cannot read %s ELF file.\n", __func__, filename);
1175 goto out_close;
1176 }
1177
1178 ek = elf_kind(elf); 1227 ek = elf_kind(elf);
1179 if (ek != ELF_K_ELF) 1228 if (ek != ELF_K_ELF)
1180 goto out_elf_end; 1229 goto out;
1181 1230
1182 if (gelf_getehdr(elf, &ehdr) == NULL) { 1231 if (gelf_getehdr(elf, &ehdr) == NULL) {
1183 pr_err("%s: cannot get elf header.\n", __func__); 1232 pr_err("%s: cannot get elf header.\n", __func__);
1184 goto out_elf_end; 1233 goto out;
1185 } 1234 }
1186 1235
1187 sec = elf_section_by_name(elf, &ehdr, &shdr, 1236 sec = elf_section_by_name(elf, &ehdr, &shdr,
@@ -1190,12 +1239,12 @@ int filename__read_build_id(const char *filename, void *bf, size_t size)
1190 sec = elf_section_by_name(elf, &ehdr, &shdr, 1239 sec = elf_section_by_name(elf, &ehdr, &shdr,
1191 ".notes", NULL); 1240 ".notes", NULL);
1192 if (sec == NULL) 1241 if (sec == NULL)
1193 goto out_elf_end; 1242 goto out;
1194 } 1243 }
1195 1244
1196 data = elf_getdata(sec, NULL); 1245 data = elf_getdata(sec, NULL);
1197 if (data == NULL) 1246 if (data == NULL)
1198 goto out_elf_end; 1247 goto out;
1199 1248
1200 ptr = data->d_buf; 1249 ptr = data->d_buf;
1201 while (ptr < (data->d_buf + data->d_size)) { 1250 while (ptr < (data->d_buf + data->d_size)) {
@@ -1217,7 +1266,31 @@ int filename__read_build_id(const char *filename, void *bf, size_t size)
1217 } 1266 }
1218 ptr += descsz; 1267 ptr += descsz;
1219 } 1268 }
1220out_elf_end: 1269
1270out:
1271 return err;
1272}
1273
1274int filename__read_build_id(const char *filename, void *bf, size_t size)
1275{
1276 int fd, err = -1;
1277 Elf *elf;
1278
1279 if (size < BUILD_ID_SIZE)
1280 goto out;
1281
1282 fd = open(filename, O_RDONLY);
1283 if (fd < 0)
1284 goto out;
1285
1286 elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
1287 if (elf == NULL) {
1288 pr_debug2("%s: cannot read %s ELF file.\n", __func__, filename);
1289 goto out_close;
1290 }
1291
1292 err = elf_read_build_id(elf, bf, size);
1293
1221 elf_end(elf); 1294 elf_end(elf);
1222out_close: 1295out_close:
1223 close(fd); 1296 close(fd);
@@ -1293,11 +1366,11 @@ int dso__load(struct dso *self, struct map *map, symbol_filter_t filter)
1293{ 1366{
1294 int size = PATH_MAX; 1367 int size = PATH_MAX;
1295 char *name; 1368 char *name;
1296 u8 build_id[BUILD_ID_SIZE];
1297 int ret = -1; 1369 int ret = -1;
1298 int fd; 1370 int fd;
1299 struct machine *machine; 1371 struct machine *machine;
1300 const char *root_dir; 1372 const char *root_dir;
1373 int want_symtab;
1301 1374
1302 dso__set_loaded(self, map->type); 1375 dso__set_loaded(self, map->type);
1303 1376
@@ -1324,13 +1397,18 @@ int dso__load(struct dso *self, struct map *map, symbol_filter_t filter)
1324 return ret; 1397 return ret;
1325 } 1398 }
1326 1399
1327 self->origin = DSO__ORIG_BUILD_ID_CACHE; 1400 /* Iterate over candidate debug images.
1328 if (dso__build_id_filename(self, name, size) != NULL) 1401 * On the first pass, only load images if they have a full symtab.
1329 goto open_file; 1402 * Failing that, do a second pass where we accept .dynsym also
1330more: 1403 */
1331 do { 1404 for (self->origin = DSO__ORIG_BUILD_ID_CACHE, want_symtab = 1;
1332 self->origin++; 1405 self->origin != DSO__ORIG_NOT_FOUND;
1406 self->origin++) {
1333 switch (self->origin) { 1407 switch (self->origin) {
1408 case DSO__ORIG_BUILD_ID_CACHE:
1409 if (dso__build_id_filename(self, name, size) == NULL)
1410 continue;
1411 break;
1334 case DSO__ORIG_FEDORA: 1412 case DSO__ORIG_FEDORA:
1335 snprintf(name, size, "/usr/lib/debug%s.debug", 1413 snprintf(name, size, "/usr/lib/debug%s.debug",
1336 self->long_name); 1414 self->long_name);
@@ -1339,21 +1417,20 @@ more:
1339 snprintf(name, size, "/usr/lib/debug%s", 1417 snprintf(name, size, "/usr/lib/debug%s",
1340 self->long_name); 1418 self->long_name);
1341 break; 1419 break;
1342 case DSO__ORIG_BUILDID: 1420 case DSO__ORIG_BUILDID: {
1343 if (filename__read_build_id(self->long_name, build_id, 1421 char build_id_hex[BUILD_ID_SIZE * 2 + 1];
1344 sizeof(build_id))) { 1422
1345 char build_id_hex[BUILD_ID_SIZE * 2 + 1]; 1423 if (!self->has_build_id)
1346 build_id__sprintf(build_id, sizeof(build_id), 1424 continue;
1347 build_id_hex); 1425
1348 snprintf(name, size, 1426 build_id__sprintf(self->build_id,
1349 "/usr/lib/debug/.build-id/%.2s/%s.debug", 1427 sizeof(self->build_id),
1350 build_id_hex, build_id_hex + 2); 1428 build_id_hex);
1351 if (self->has_build_id) 1429 snprintf(name, size,
1352 goto compare_build_id; 1430 "/usr/lib/debug/.build-id/%.2s/%s.debug",
1353 break; 1431 build_id_hex, build_id_hex + 2);
1354 } 1432 }
1355 self->origin++; 1433 break;
1356 /* Fall thru */
1357 case DSO__ORIG_DSO: 1434 case DSO__ORIG_DSO:
1358 snprintf(name, size, "%s", self->long_name); 1435 snprintf(name, size, "%s", self->long_name);
1359 break; 1436 break;
@@ -1366,36 +1443,41 @@ more:
1366 break; 1443 break;
1367 1444
1368 default: 1445 default:
1369 goto out; 1446 /*
1447 * If we wanted a full symtab but no image had one,
1448 * relax our requirements and repeat the search.
1449 */
1450 if (want_symtab) {
1451 want_symtab = 0;
1452 self->origin = DSO__ORIG_BUILD_ID_CACHE;
1453 } else
1454 continue;
1370 } 1455 }
1371 1456
1372 if (self->has_build_id) { 1457 /* Name is now the name of the next image to try */
1373 if (filename__read_build_id(name, build_id,
1374 sizeof(build_id)) < 0)
1375 goto more;
1376compare_build_id:
1377 if (!dso__build_id_equal(self, build_id))
1378 goto more;
1379 }
1380open_file:
1381 fd = open(name, O_RDONLY); 1458 fd = open(name, O_RDONLY);
1382 } while (fd < 0); 1459 if (fd < 0)
1460 continue;
1383 1461
1384 ret = dso__load_sym(self, map, name, fd, filter, 0); 1462 ret = dso__load_sym(self, map, name, fd, filter, 0,
1385 close(fd); 1463 want_symtab);
1464 close(fd);
1386 1465
1387 /* 1466 /*
1388 * Some people seem to have debuginfo files _WITHOUT_ debug info!?!? 1467 * Some people seem to have debuginfo files _WITHOUT_ debug
1389 */ 1468 * info!?!?
1390 if (!ret) 1469 */
1391 goto more; 1470 if (!ret)
1471 continue;
1392 1472
1393 if (ret > 0) { 1473 if (ret > 0) {
1394 int nr_plt = dso__synthesize_plt_symbols(self, map, filter); 1474 int nr_plt = dso__synthesize_plt_symbols(self, map, filter);
1395 if (nr_plt > 0) 1475 if (nr_plt > 0)
1396 ret += nr_plt; 1476 ret += nr_plt;
1477 break;
1478 }
1397 } 1479 }
1398out: 1480
1399 free(name); 1481 free(name);
1400 if (ret < 0 && strstr(self->name, " (deleted)") != NULL) 1482 if (ret < 0 && strstr(self->name, " (deleted)") != NULL)
1401 return 0; 1483 return 0;
@@ -1494,6 +1576,7 @@ static int map_groups__set_modules_path_dir(struct map_groups *self,
1494 goto out; 1576 goto out;
1495 } 1577 }
1496 dso__set_long_name(map->dso, long_name); 1578 dso__set_long_name(map->dso, long_name);
1579 map->dso->lname_alloc = 1;
1497 dso__kernel_module_get_build_id(map->dso, ""); 1580 dso__kernel_module_get_build_id(map->dso, "");
1498 } 1581 }
1499 } 1582 }
@@ -1656,36 +1739,12 @@ static int dso__load_vmlinux(struct dso *self, struct map *map,
1656{ 1739{
1657 int err = -1, fd; 1740 int err = -1, fd;
1658 1741
1659 if (self->has_build_id) {
1660 u8 build_id[BUILD_ID_SIZE];
1661
1662 if (filename__read_build_id(vmlinux, build_id,
1663 sizeof(build_id)) < 0) {
1664 pr_debug("No build_id in %s, ignoring it\n", vmlinux);
1665 return -1;
1666 }
1667 if (!dso__build_id_equal(self, build_id)) {
1668 char expected_build_id[BUILD_ID_SIZE * 2 + 1],
1669 vmlinux_build_id[BUILD_ID_SIZE * 2 + 1];
1670
1671 build_id__sprintf(self->build_id,
1672 sizeof(self->build_id),
1673 expected_build_id);
1674 build_id__sprintf(build_id, sizeof(build_id),
1675 vmlinux_build_id);
1676 pr_debug("build_id in %s is %s while expected is %s, "
1677 "ignoring it\n", vmlinux, vmlinux_build_id,
1678 expected_build_id);
1679 return -1;
1680 }
1681 }
1682
1683 fd = open(vmlinux, O_RDONLY); 1742 fd = open(vmlinux, O_RDONLY);
1684 if (fd < 0) 1743 if (fd < 0)
1685 return -1; 1744 return -1;
1686 1745
1687 dso__set_loaded(self, map->type); 1746 dso__set_loaded(self, map->type);
1688 err = dso__load_sym(self, map, vmlinux, fd, filter, 0); 1747 err = dso__load_sym(self, map, vmlinux, fd, filter, 0, 0);
1689 close(fd); 1748 close(fd);
1690 1749
1691 if (err > 0) 1750 if (err > 0)
@@ -2048,6 +2107,36 @@ int __machine__create_kernel_maps(struct machine *self, struct dso *kernel)
2048 return 0; 2107 return 0;
2049} 2108}
2050 2109
2110void machine__destroy_kernel_maps(struct machine *self)
2111{
2112 enum map_type type;
2113
2114 for (type = 0; type < MAP__NR_TYPES; ++type) {
2115 struct kmap *kmap;
2116
2117 if (self->vmlinux_maps[type] == NULL)
2118 continue;
2119
2120 kmap = map__kmap(self->vmlinux_maps[type]);
2121 map_groups__remove(&self->kmaps, self->vmlinux_maps[type]);
2122 if (kmap->ref_reloc_sym) {
2123 /*
2124 * ref_reloc_sym is shared among all maps, so free just
2125 * on one of them.
2126 */
2127 if (type == MAP__FUNCTION) {
2128 free((char *)kmap->ref_reloc_sym->name);
2129 kmap->ref_reloc_sym->name = NULL;
2130 free(kmap->ref_reloc_sym);
2131 }
2132 kmap->ref_reloc_sym = NULL;
2133 }
2134
2135 map__delete(self->vmlinux_maps[type]);
2136 self->vmlinux_maps[type] = NULL;
2137 }
2138}
2139
2051int machine__create_kernel_maps(struct machine *self) 2140int machine__create_kernel_maps(struct machine *self)
2052{ 2141{
2053 struct dso *kernel = machine__create_kernel(self); 2142 struct dso *kernel = machine__create_kernel(self);
@@ -2189,6 +2278,15 @@ out_free_comm_list:
2189 return -1; 2278 return -1;
2190} 2279}
2191 2280
2281void symbol__exit(void)
2282{
2283 strlist__delete(symbol_conf.sym_list);
2284 strlist__delete(symbol_conf.dso_list);
2285 strlist__delete(symbol_conf.comm_list);
2286 vmlinux_path__exit();
2287 symbol_conf.sym_list = symbol_conf.dso_list = symbol_conf.comm_list = NULL;
2288}
2289
2192int machines__create_kernel_maps(struct rb_root *self, pid_t pid) 2290int machines__create_kernel_maps(struct rb_root *self, pid_t pid)
2193{ 2291{
2194 struct machine *machine = machines__findnew(self, pid); 2292 struct machine *machine = machines__findnew(self, pid);
@@ -2283,6 +2381,19 @@ failure:
2283 return ret; 2381 return ret;
2284} 2382}
2285 2383
2384void machines__destroy_guest_kernel_maps(struct rb_root *self)
2385{
2386 struct rb_node *next = rb_first(self);
2387
2388 while (next) {
2389 struct machine *pos = rb_entry(next, struct machine, rb_node);
2390
2391 next = rb_next(&pos->rb_node);
2392 rb_erase(&pos->rb_node, self);
2393 machine__delete(pos);
2394 }
2395}
2396
2286int machine__load_kallsyms(struct machine *self, const char *filename, 2397int machine__load_kallsyms(struct machine *self, const char *filename,
2287 enum map_type type, symbol_filter_t filter) 2398 enum map_type type, symbol_filter_t filter)
2288{ 2399{
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 5e02d2c17154..906be20011d9 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -9,8 +9,6 @@
9#include <linux/rbtree.h> 9#include <linux/rbtree.h>
10#include <stdio.h> 10#include <stdio.h>
11 11
12#define DEBUG_CACHE_DIR ".debug"
13
14#ifdef HAVE_CPLUS_DEMANGLE 12#ifdef HAVE_CPLUS_DEMANGLE
15extern char *cplus_demangle(const char *, int); 13extern char *cplus_demangle(const char *, int);
16 14
@@ -70,9 +68,9 @@ struct symbol_conf {
70 show_nr_samples, 68 show_nr_samples,
71 use_callchain, 69 use_callchain,
72 exclude_other, 70 exclude_other,
73 full_paths,
74 show_cpu_utilization; 71 show_cpu_utilization;
75 const char *vmlinux_name, 72 const char *vmlinux_name,
73 *source_prefix,
76 *field_sep; 74 *field_sep;
77 const char *default_guest_vmlinux_name, 75 const char *default_guest_vmlinux_name,
78 *default_guest_kallsyms, 76 *default_guest_kallsyms,
@@ -103,6 +101,8 @@ struct ref_reloc_sym {
103struct map_symbol { 101struct map_symbol {
104 struct map *map; 102 struct map *map;
105 struct symbol *sym; 103 struct symbol *sym;
104 bool unfolded;
105 bool has_children;
106}; 106};
107 107
108struct addr_location { 108struct addr_location {
@@ -112,7 +112,8 @@ struct addr_location {
112 u64 addr; 112 u64 addr;
113 char level; 113 char level;
114 bool filtered; 114 bool filtered;
115 unsigned int cpumode; 115 u8 cpumode;
116 s32 cpu;
116}; 117};
117 118
118enum dso_kernel_type { 119enum dso_kernel_type {
@@ -125,12 +126,14 @@ struct dso {
125 struct list_head node; 126 struct list_head node;
126 struct rb_root symbols[MAP__NR_TYPES]; 127 struct rb_root symbols[MAP__NR_TYPES];
127 struct rb_root symbol_names[MAP__NR_TYPES]; 128 struct rb_root symbol_names[MAP__NR_TYPES];
129 enum dso_kernel_type kernel;
128 u8 adjust_symbols:1; 130 u8 adjust_symbols:1;
129 u8 slen_calculated:1; 131 u8 slen_calculated:1;
130 u8 has_build_id:1; 132 u8 has_build_id:1;
131 enum dso_kernel_type kernel;
132 u8 hit:1; 133 u8 hit:1;
133 u8 annotate_warned:1; 134 u8 annotate_warned:1;
135 u8 sname_alloc:1;
136 u8 lname_alloc:1;
134 unsigned char origin; 137 unsigned char origin;
135 u8 sorted_by_name; 138 u8 sorted_by_name;
136 u8 loaded; 139 u8 loaded;
@@ -146,6 +149,8 @@ struct dso *dso__new(const char *name);
146struct dso *dso__new_kernel(const char *name); 149struct dso *dso__new_kernel(const char *name);
147void dso__delete(struct dso *self); 150void dso__delete(struct dso *self);
148 151
152int dso__name_len(const struct dso *self);
153
149bool dso__loaded(const struct dso *self, enum map_type type); 154bool dso__loaded(const struct dso *self, enum map_type type);
150bool dso__sorted_by_name(const struct dso *self, enum map_type type); 155bool dso__sorted_by_name(const struct dso *self, enum map_type type);
151 156
@@ -207,13 +212,16 @@ int kallsyms__parse(const char *filename, void *arg,
207 int (*process_symbol)(void *arg, const char *name, 212 int (*process_symbol)(void *arg, const char *name,
208 char type, u64 start)); 213 char type, u64 start));
209 214
215void machine__destroy_kernel_maps(struct machine *self);
210int __machine__create_kernel_maps(struct machine *self, struct dso *kernel); 216int __machine__create_kernel_maps(struct machine *self, struct dso *kernel);
211int machine__create_kernel_maps(struct machine *self); 217int machine__create_kernel_maps(struct machine *self);
212 218
213int machines__create_kernel_maps(struct rb_root *self, pid_t pid); 219int machines__create_kernel_maps(struct rb_root *self, pid_t pid);
214int machines__create_guest_kernel_maps(struct rb_root *self); 220int machines__create_guest_kernel_maps(struct rb_root *self);
221void machines__destroy_guest_kernel_maps(struct rb_root *self);
215 222
216int symbol__init(void); 223int symbol__init(void);
224void symbol__exit(void);
217bool symbol_type__is_a(char symbol_type, enum map_type map_type); 225bool symbol_type__is_a(char symbol_type, enum map_type map_type);
218 226
219size_t machine__fprintf_vmlinux_path(struct machine *self, FILE *fp); 227size_t machine__fprintf_vmlinux_path(struct machine *self, FILE *fp);
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 9a448b47400c..8c72d888e449 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -62,6 +62,13 @@ static struct thread *thread__new(pid_t pid)
62 return self; 62 return self;
63} 63}
64 64
65void thread__delete(struct thread *self)
66{
67 map_groups__exit(&self->mg);
68 free(self->comm);
69 free(self);
70}
71
65int thread__set_comm(struct thread *self, const char *comm) 72int thread__set_comm(struct thread *self, const char *comm)
66{ 73{
67 int err; 74 int err;
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index ee6bbcf277ca..688500ff826f 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -20,6 +20,8 @@ struct thread {
20 20
21struct perf_session; 21struct perf_session;
22 22
23void thread__delete(struct thread *self);
24
23int find_all_tid(int pid, pid_t ** all_tid); 25int find_all_tid(int pid, pid_t ** all_tid);
24int thread__set_comm(struct thread *self, const char *comm); 26int thread__set_comm(struct thread *self, const char *comm);
25int thread__comm_len(struct thread *self); 27int thread__comm_len(struct thread *self);
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index 4e8b6b0c551c..f380fed74359 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -89,6 +89,7 @@
89 89
90extern const char *graph_line; 90extern const char *graph_line;
91extern const char *graph_dotted_line; 91extern const char *graph_dotted_line;
92extern char buildid_dir[];
92 93
93/* On most systems <limits.h> would have given us this, but 94/* On most systems <limits.h> would have given us this, but
94 * not on some systems (e.g. GNU/Hurd). 95 * not on some systems (e.g. GNU/Hurd).
@@ -152,6 +153,8 @@ extern void warning(const char *err, ...) __attribute__((format (printf, 1, 2)))
152extern void set_die_routine(void (*routine)(const char *err, va_list params) NORETURN); 153extern void set_die_routine(void (*routine)(const char *err, va_list params) NORETURN);
153 154
154extern int prefixcmp(const char *str, const char *prefix); 155extern int prefixcmp(const char *str, const char *prefix);
156extern void set_buildid_dir(void);
157extern void disable_buildid_cache(void);
155 158
156static inline const char *skip_prefix(const char *str, const char *prefix) 159static inline const char *skip_prefix(const char *str, const char *prefix)
157{ 160{