aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/testing/debugfs-kmemtrace71
-rw-r--r--Documentation/trace/kmemtrace.txt126
-rw-r--r--Documentation/trace/kprobetrace.txt2
-rw-r--r--MAINTAINERS9
-rw-r--r--Makefile4
-rw-r--r--arch/alpha/include/asm/local64.h1
-rw-r--r--arch/arm/include/asm/local64.h1
-rw-r--r--arch/arm/kernel/perf_event.c18
-rw-r--r--arch/avr32/include/asm/local64.h1
-rw-r--r--arch/blackfin/include/asm/local64.h1
-rw-r--r--arch/cris/include/asm/local64.h1
-rw-r--r--arch/frv/include/asm/local64.h1
-rw-r--r--arch/frv/kernel/local64.h1
-rw-r--r--arch/h8300/include/asm/local64.h1
-rw-r--r--arch/ia64/include/asm/local64.h1
-rw-r--r--arch/m32r/include/asm/local64.h1
-rw-r--r--arch/m68k/include/asm/local64.h1
-rw-r--r--arch/microblaze/include/asm/local64.h1
-rw-r--r--arch/mips/include/asm/local64.h1
-rw-r--r--arch/mn10300/include/asm/local64.h1
-rw-r--r--arch/parisc/include/asm/local64.h1
-rw-r--r--arch/powerpc/include/asm/local64.h1
-rw-r--r--arch/powerpc/include/asm/perf_event.h12
-rw-r--r--arch/powerpc/kernel/misc.S26
-rw-r--r--arch/powerpc/kernel/perf_event.c41
-rw-r--r--arch/s390/include/asm/local64.h1
-rw-r--r--arch/score/include/asm/local64.h1
-rw-r--r--arch/sh/include/asm/local64.h1
-rw-r--r--arch/sh/kernel/perf_event.c6
-rw-r--r--arch/sparc/include/asm/local64.h1
-rw-r--r--arch/sparc/include/asm/perf_event.h8
-rw-r--r--arch/sparc/kernel/helpers.S6
-rw-r--r--arch/sparc/kernel/perf_event.c25
-rw-r--r--arch/x86/include/asm/hw_breakpoint.h2
-rw-r--r--arch/x86/include/asm/local64.h1
-rw-r--r--arch/x86/include/asm/perf_event.h18
-rw-r--r--arch/x86/include/asm/perf_event_p4.h99
-rw-r--r--arch/x86/include/asm/stacktrace.h49
-rw-r--r--arch/x86/kernel/cpu/perf_event.c62
-rw-r--r--arch/x86/kernel/cpu/perf_event_p4.c156
-rw-r--r--arch/x86/kernel/dumpstack.c1
-rw-r--r--arch/x86/kernel/dumpstack.h56
-rw-r--r--arch/x86/kernel/dumpstack_32.c2
-rw-r--r--arch/x86/kernel/dumpstack_64.c1
-rw-r--r--arch/x86/kernel/hw_breakpoint.c51
-rw-r--r--arch/x86/kernel/kprobes.c33
-rw-r--r--arch/x86/kernel/process_32.c4
-rw-r--r--arch/x86/kernel/process_64.c5
-rw-r--r--arch/x86/kernel/stacktrace.c31
-rw-r--r--arch/xtensa/include/asm/local64.h1
-rw-r--r--fs/exec.c1
-rw-r--r--include/asm-generic/local64.h96
-rw-r--r--include/asm-generic/vmlinux.lds.h4
-rw-r--r--include/linux/ftrace_event.h6
-rw-r--r--include/linux/kmemtrace.h25
-rw-r--r--include/linux/perf_event.h95
-rw-r--r--include/linux/slab_def.h3
-rw-r--r--include/linux/slub_def.h3
-rw-r--r--include/linux/syscalls.h2
-rw-r--r--include/trace/boot.h60
-rw-r--r--include/trace/events/sched.h32
-rw-r--r--include/trace/events/timer.h80
-rw-r--r--include/trace/ftrace.h4
-rw-r--r--include/trace/syscall.h1
-rw-r--r--init/main.c29
-rw-r--r--kernel/hw_breakpoint.c78
-rw-r--r--kernel/perf_event.c458
-rw-r--r--kernel/sched.c6
-rw-r--r--kernel/trace/Kconfig59
-rw-r--r--kernel/trace/Makefile3
-rw-r--r--kernel/trace/ftrace.c5
-rw-r--r--kernel/trace/kmemtrace.c529
-rw-r--r--kernel/trace/ring_buffer.c38
-rw-r--r--kernel/trace/trace.c8
-rw-r--r--kernel/trace/trace.h76
-rw-r--r--kernel/trace/trace_boot.c185
-rw-r--r--kernel/trace/trace_clock.c5
-rw-r--r--kernel/trace/trace_entries.h77
-rw-r--r--kernel/trace/trace_event_perf.c21
-rw-r--r--kernel/trace/trace_events.c269
-rw-r--r--kernel/trace/trace_events_filter.c27
-rw-r--r--kernel/trace/trace_export.c8
-rw-r--r--kernel/trace/trace_functions.c6
-rw-r--r--kernel/trace/trace_functions_graph.c3
-rw-r--r--kernel/trace/trace_kprobe.c380
-rw-r--r--kernel/trace/trace_ksym.c508
-rw-r--r--kernel/trace/trace_sched_wakeup.c5
-rw-r--r--kernel/trace/trace_selftest.c54
-rw-r--r--kernel/trace/trace_stack.c6
-rw-r--r--kernel/trace/trace_syscalls.c7
-rw-r--r--kernel/trace/trace_sysprof.c7
-rw-r--r--mm/mmap.c6
-rw-r--r--mm/slab.c1
-rw-r--r--mm/slob.c4
-rw-r--r--mm/slub.c1
-rw-r--r--scripts/package/Makefile37
-rw-r--r--tools/perf/.gitignore2
-rw-r--r--tools/perf/Documentation/perf-probe.txt8
-rw-r--r--tools/perf/Documentation/perf-record.txt13
-rw-r--r--tools/perf/Documentation/perf-stat.txt7
-rw-r--r--tools/perf/Documentation/perf-top.txt8
-rw-r--r--tools/perf/MANIFEST12
-rw-r--r--tools/perf/Makefile141
-rw-r--r--tools/perf/arch/sh/Makefile4
-rw-r--r--tools/perf/arch/sh/util/dwarf-regs.c55
-rw-r--r--tools/perf/builtin-annotate.c6
-rw-r--r--tools/perf/builtin-buildid-cache.c3
-rw-r--r--tools/perf/builtin-diff.c7
-rw-r--r--tools/perf/builtin-probe.c2
-rw-r--r--tools/perf/builtin-record.c44
-rw-r--r--tools/perf/builtin-report.c25
-rw-r--r--tools/perf/builtin-stat.c14
-rw-r--r--tools/perf/builtin-top.c20
-rw-r--r--tools/perf/builtin-trace.c32
-rw-r--r--tools/perf/feature-tests.mak119
-rw-r--r--tools/perf/perf-archive.sh20
-rw-r--r--tools/perf/perf.c2
-rw-r--r--tools/perf/util/build-id.c10
-rw-r--r--tools/perf/util/cache.h1
-rw-r--r--tools/perf/util/callchain.c2
-rw-r--r--tools/perf/util/callchain.h2
-rw-r--r--tools/perf/util/config.c64
-rw-r--r--tools/perf/util/cpumap.c57
-rw-r--r--tools/perf/util/cpumap.h2
-rw-r--r--tools/perf/util/debug.c10
-rw-r--r--tools/perf/util/event.c37
-rw-r--r--tools/perf/util/event.h5
-rw-r--r--tools/perf/util/header.c13
-rw-r--r--tools/perf/util/hist.c39
-rw-r--r--tools/perf/util/newt.c199
-rw-r--r--tools/perf/util/parse-events.c11
-rw-r--r--tools/perf/util/probe-event.c140
-rw-r--r--tools/perf/util/probe-event.h2
-rw-r--r--tools/perf/util/probe-finder.c228
-rw-r--r--tools/perf/util/session.c6
-rw-r--r--tools/perf/util/sort.c27
-rw-r--r--tools/perf/util/sort.h6
-rw-r--r--tools/perf/util/symbol.c37
-rw-r--r--tools/perf/util/symbol.h6
-rw-r--r--tools/perf/util/util.h3
140 files changed, 2528 insertions, 3187 deletions
diff --git a/Documentation/ABI/testing/debugfs-kmemtrace b/Documentation/ABI/testing/debugfs-kmemtrace
deleted file mode 100644
index 5e6a92a02d85..000000000000
--- a/Documentation/ABI/testing/debugfs-kmemtrace
+++ /dev/null
@@ -1,71 +0,0 @@
1What: /sys/kernel/debug/kmemtrace/
2Date: July 2008
3Contact: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
4Description:
5
6In kmemtrace-enabled kernels, the following files are created:
7
8/sys/kernel/debug/kmemtrace/
9 cpu<n> (0400) Per-CPU tracing data, see below. (binary)
10 total_overruns (0400) Total number of bytes which were dropped from
11 cpu<n> files because of full buffer condition,
12 non-binary. (text)
13 abi_version (0400) Kernel's kmemtrace ABI version. (text)
14
15Each per-CPU file should be read according to the relay interface. That is,
16the reader should set affinity to that specific CPU and, as currently done by
17the userspace application (though there are other methods), use poll() with
18an infinite timeout before every read(). Otherwise, erroneous data may be
19read. The binary data has the following _core_ format:
20
21 Event ID (1 byte) Unsigned integer, one of:
22 0 - represents an allocation (KMEMTRACE_EVENT_ALLOC)
23 1 - represents a freeing of previously allocated memory
24 (KMEMTRACE_EVENT_FREE)
25 Type ID (1 byte) Unsigned integer, one of:
26 0 - this is a kmalloc() / kfree()
27 1 - this is a kmem_cache_alloc() / kmem_cache_free()
28 2 - this is a __get_free_pages() et al.
29 Event size (2 bytes) Unsigned integer representing the
30 size of this event. Used to extend
31 kmemtrace. Discard the bytes you
32 don't know about.
33 Sequence number (4 bytes) Signed integer used to reorder data
34 logged on SMP machines. Wraparound
35 must be taken into account, although
36 it is unlikely.
37 Caller address (8 bytes) Return address to the caller.
38 Pointer to mem (8 bytes) Pointer to target memory area. Can be
39 NULL, but not all such calls might be
40 recorded.
41
42In case of KMEMTRACE_EVENT_ALLOC events, the next fields follow:
43
44 Requested bytes (8 bytes) Total number of requested bytes,
45 unsigned, must not be zero.
46 Allocated bytes (8 bytes) Total number of actually allocated
47 bytes, unsigned, must not be lower
48 than requested bytes.
49 Requested flags (4 bytes) GFP flags supplied by the caller.
50 Target CPU (4 bytes) Signed integer, valid for event id 1.
51 If equal to -1, target CPU is the same
52 as origin CPU, but the reverse might
53 not be true.
54
55The data is made available in the same endianness the machine has.
56
57Other event ids and type ids may be defined and added. Other fields may be
58added by increasing event size, but see below for details.
59Every modification to the ABI, including new id definitions, are followed
60by bumping the ABI version by one.
61
62Adding new data to the packet (features) is done at the end of the mandatory
63data:
64 Feature size (2 byte)
65 Feature ID (1 byte)
66 Feature data (Feature size - 3 bytes)
67
68
69Users:
70 kmemtrace-user - git://repo.or.cz/kmemtrace-user.git
71
diff --git a/Documentation/trace/kmemtrace.txt b/Documentation/trace/kmemtrace.txt
deleted file mode 100644
index 6308735e58ca..000000000000
--- a/Documentation/trace/kmemtrace.txt
+++ /dev/null
@@ -1,126 +0,0 @@
1 kmemtrace - Kernel Memory Tracer
2
3 by Eduard - Gabriel Munteanu
4 <eduard.munteanu@linux360.ro>
5
6I. Introduction
7===============
8
9kmemtrace helps kernel developers figure out two things:
101) how different allocators (SLAB, SLUB etc.) perform
112) how kernel code allocates memory and how much
12
13To do this, we trace every allocation and export information to the userspace
14through the relay interface. We export things such as the number of requested
15bytes, the number of bytes actually allocated (i.e. including internal
16fragmentation), whether this is a slab allocation or a plain kmalloc() and so
17on.
18
19The actual analysis is performed by a userspace tool (see section III for
20details on where to get it from). It logs the data exported by the kernel,
21processes it and (as of writing this) can provide the following information:
22- the total amount of memory allocated and fragmentation per call-site
23- the amount of memory allocated and fragmentation per allocation
24- total memory allocated and fragmentation in the collected dataset
25- number of cross-CPU allocation and frees (makes sense in NUMA environments)
26
27Moreover, it can potentially find inconsistent and erroneous behavior in
28kernel code, such as using slab free functions on kmalloc'ed memory or
29allocating less memory than requested (but not truly failed allocations).
30
31kmemtrace also makes provisions for tracing on some arch and analysing the
32data on another.
33
34II. Design and goals
35====================
36
37kmemtrace was designed to handle rather large amounts of data. Thus, it uses
38the relay interface to export whatever is logged to userspace, which then
39stores it. Analysis and reporting is done asynchronously, that is, after the
40data is collected and stored. By design, it allows one to log and analyse
41on different machines and different arches.
42
43As of writing this, the ABI is not considered stable, though it might not
44change much. However, no guarantees are made about compatibility yet. When
45deemed stable, the ABI should still allow easy extension while maintaining
46backward compatibility. This is described further in Documentation/ABI.
47
48Summary of design goals:
49 - allow logging and analysis to be done across different machines
50 - be fast and anticipate usage in high-load environments (*)
51 - be reasonably extensible
52 - make it possible for GNU/Linux distributions to have kmemtrace
53 included in their repositories
54
55(*) - one of the reasons Pekka Enberg's original userspace data analysis
56 tool's code was rewritten from Perl to C (although this is more than a
57 simple conversion)
58
59
60III. Quick usage guide
61======================
62
631) Get a kernel that supports kmemtrace and build it accordingly (i.e. enable
64CONFIG_KMEMTRACE).
65
662) Get the userspace tool and build it:
67$ git clone git://repo.or.cz/kmemtrace-user.git # current repository
68$ cd kmemtrace-user/
69$ ./autogen.sh
70$ ./configure
71$ make
72
733) Boot the kmemtrace-enabled kernel if you haven't, preferably in the
74'single' runlevel (so that relay buffers don't fill up easily), and run
75kmemtrace:
76# '$' does not mean user, but root here.
77$ mount -t debugfs none /sys/kernel/debug
78$ mount -t proc none /proc
79$ cd path/to/kmemtrace-user/
80$ ./kmemtraced
81Wait a bit, then stop it with CTRL+C.
82$ cat /sys/kernel/debug/kmemtrace/total_overruns # Check if we didn't
83 # overrun, should
84 # be zero.
85$ (Optionally) [Run kmemtrace_check separately on each cpu[0-9]*.out file to
86 check its correctness]
87$ ./kmemtrace-report
88
89Now you should have a nice and short summary of how the allocator performs.
90
91IV. FAQ and known issues
92========================
93
94Q: 'cat /sys/kernel/debug/kmemtrace/total_overruns' is non-zero, how do I fix
95this? Should I worry?
96A: If it's non-zero, this affects kmemtrace's accuracy, depending on how
97large the number is. You can fix it by supplying a higher
98'kmemtrace.subbufs=N' kernel parameter.
99---
100
101Q: kmemtrace_check reports errors, how do I fix this? Should I worry?
102A: This is a bug and should be reported. It can occur for a variety of
103reasons:
104 - possible bugs in relay code
105 - possible misuse of relay by kmemtrace
106 - timestamps being collected unorderly
107Or you may fix it yourself and send us a patch.
108---
109
110Q: kmemtrace_report shows many errors, how do I fix this? Should I worry?
111A: This is a known issue and I'm working on it. These might be true errors
112in kernel code, which may have inconsistent behavior (e.g. allocating memory
113with kmem_cache_alloc() and freeing it with kfree()). Pekka Enberg pointed
114out this behavior may work with SLAB, but may fail with other allocators.
115
116It may also be due to lack of tracing in some unusual allocator functions.
117
118We don't want bug reports regarding this issue yet.
119---
120
121V. See also
122===========
123
124Documentation/kernel-parameters.txt
125Documentation/ABI/testing/debugfs-kmemtrace
126
diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt
index ec94748ae65b..5f77d94598dd 100644
--- a/Documentation/trace/kprobetrace.txt
+++ b/Documentation/trace/kprobetrace.txt
@@ -42,7 +42,7 @@ Synopsis of kprobe_events
42 +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(**) 42 +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(**)
43 NAME=FETCHARG : Set NAME as the argument name of FETCHARG. 43 NAME=FETCHARG : Set NAME as the argument name of FETCHARG.
44 FETCHARG:TYPE : Set TYPE as the type of FETCHARG. Currently, basic types 44 FETCHARG:TYPE : Set TYPE as the type of FETCHARG. Currently, basic types
45 (u8/u16/u32/u64/s8/s16/s32/s64) are supported. 45 (u8/u16/u32/u64/s8/s16/s32/s64) and string are supported.
46 46
47 (*) only for return probe. 47 (*) only for return probe.
48 (**) this is useful for fetching a field of data structures. 48 (**) this is useful for fetching a field of data structures.
diff --git a/MAINTAINERS b/MAINTAINERS
index db3d0f5061f9..b7f6a88ce129 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3380,13 +3380,6 @@ F: include/linux/kmemleak.h
3380F: mm/kmemleak.c 3380F: mm/kmemleak.c
3381F: mm/kmemleak-test.c 3381F: mm/kmemleak-test.c
3382 3382
3383KMEMTRACE
3384M: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
3385S: Maintained
3386F: Documentation/trace/kmemtrace.txt
3387F: include/linux/kmemtrace.h
3388F: kernel/trace/kmemtrace.c
3389
3390KPROBES 3383KPROBES
3391M: Ananth N Mavinakayanahalli <ananth@in.ibm.com> 3384M: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
3392M: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> 3385M: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
@@ -5654,7 +5647,7 @@ TRACING
5654M: Steven Rostedt <rostedt@goodmis.org> 5647M: Steven Rostedt <rostedt@goodmis.org>
5655M: Frederic Weisbecker <fweisbec@gmail.com> 5648M: Frederic Weisbecker <fweisbec@gmail.com>
5656M: Ingo Molnar <mingo@redhat.com> 5649M: Ingo Molnar <mingo@redhat.com>
5657T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git tracing/core 5650T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git perf/core
5658S: Maintained 5651S: Maintained
5659F: Documentation/trace/ftrace.txt 5652F: Documentation/trace/ftrace.txt
5660F: arch/*/*/*/ftrace.h 5653F: arch/*/*/*/ftrace.h
diff --git a/Makefile b/Makefile
index 037ff4e62ca0..f5787e2e1504 100644
--- a/Makefile
+++ b/Makefile
@@ -414,7 +414,7 @@ endif
414no-dot-config-targets := clean mrproper distclean \ 414no-dot-config-targets := clean mrproper distclean \
415 cscope TAGS tags help %docs check% \ 415 cscope TAGS tags help %docs check% \
416 include/linux/version.h headers_% \ 416 include/linux/version.h headers_% \
417 kernelrelease kernelversion 417 kernelrelease kernelversion %src-pkg
418 418
419config-targets := 0 419config-targets := 0
420mixed-targets := 0 420mixed-targets := 0
@@ -1158,6 +1158,8 @@ distclean: mrproper
1158# rpm target kept for backward compatibility 1158# rpm target kept for backward compatibility
1159package-dir := $(srctree)/scripts/package 1159package-dir := $(srctree)/scripts/package
1160 1160
1161%src-pkg: FORCE
1162 $(Q)$(MAKE) $(build)=$(package-dir) $@
1161%pkg: include/config/kernel.release FORCE 1163%pkg: include/config/kernel.release FORCE
1162 $(Q)$(MAKE) $(build)=$(package-dir) $@ 1164 $(Q)$(MAKE) $(build)=$(package-dir) $@
1163rpm: include/config/kernel.release FORCE 1165rpm: include/config/kernel.release FORCE
diff --git a/arch/alpha/include/asm/local64.h b/arch/alpha/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/alpha/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/arm/include/asm/local64.h b/arch/arm/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/arm/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index de12536d687f..417c392ddf1c 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -164,20 +164,20 @@ armpmu_event_set_period(struct perf_event *event,
164 struct hw_perf_event *hwc, 164 struct hw_perf_event *hwc,
165 int idx) 165 int idx)
166{ 166{
167 s64 left = atomic64_read(&hwc->period_left); 167 s64 left = local64_read(&hwc->period_left);
168 s64 period = hwc->sample_period; 168 s64 period = hwc->sample_period;
169 int ret = 0; 169 int ret = 0;
170 170
171 if (unlikely(left <= -period)) { 171 if (unlikely(left <= -period)) {
172 left = period; 172 left = period;
173 atomic64_set(&hwc->period_left, left); 173 local64_set(&hwc->period_left, left);
174 hwc->last_period = period; 174 hwc->last_period = period;
175 ret = 1; 175 ret = 1;
176 } 176 }
177 177
178 if (unlikely(left <= 0)) { 178 if (unlikely(left <= 0)) {
179 left += period; 179 left += period;
180 atomic64_set(&hwc->period_left, left); 180 local64_set(&hwc->period_left, left);
181 hwc->last_period = period; 181 hwc->last_period = period;
182 ret = 1; 182 ret = 1;
183 } 183 }
@@ -185,7 +185,7 @@ armpmu_event_set_period(struct perf_event *event,
185 if (left > (s64)armpmu->max_period) 185 if (left > (s64)armpmu->max_period)
186 left = armpmu->max_period; 186 left = armpmu->max_period;
187 187
188 atomic64_set(&hwc->prev_count, (u64)-left); 188 local64_set(&hwc->prev_count, (u64)-left);
189 189
190 armpmu->write_counter(idx, (u64)(-left) & 0xffffffff); 190 armpmu->write_counter(idx, (u64)(-left) & 0xffffffff);
191 191
@@ -204,18 +204,18 @@ armpmu_event_update(struct perf_event *event,
204 u64 delta; 204 u64 delta;
205 205
206again: 206again:
207 prev_raw_count = atomic64_read(&hwc->prev_count); 207 prev_raw_count = local64_read(&hwc->prev_count);
208 new_raw_count = armpmu->read_counter(idx); 208 new_raw_count = armpmu->read_counter(idx);
209 209
210 if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count, 210 if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
211 new_raw_count) != prev_raw_count) 211 new_raw_count) != prev_raw_count)
212 goto again; 212 goto again;
213 213
214 delta = (new_raw_count << shift) - (prev_raw_count << shift); 214 delta = (new_raw_count << shift) - (prev_raw_count << shift);
215 delta >>= shift; 215 delta >>= shift;
216 216
217 atomic64_add(delta, &event->count); 217 local64_add(delta, &event->count);
218 atomic64_sub(delta, &hwc->period_left); 218 local64_sub(delta, &hwc->period_left);
219 219
220 return new_raw_count; 220 return new_raw_count;
221} 221}
@@ -478,7 +478,7 @@ __hw_perf_event_init(struct perf_event *event)
478 if (!hwc->sample_period) { 478 if (!hwc->sample_period) {
479 hwc->sample_period = armpmu->max_period; 479 hwc->sample_period = armpmu->max_period;
480 hwc->last_period = hwc->sample_period; 480 hwc->last_period = hwc->sample_period;
481 atomic64_set(&hwc->period_left, hwc->sample_period); 481 local64_set(&hwc->period_left, hwc->sample_period);
482 } 482 }
483 483
484 err = 0; 484 err = 0;
diff --git a/arch/avr32/include/asm/local64.h b/arch/avr32/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/avr32/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/blackfin/include/asm/local64.h b/arch/blackfin/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/blackfin/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/cris/include/asm/local64.h b/arch/cris/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/cris/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/frv/include/asm/local64.h b/arch/frv/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/frv/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/frv/kernel/local64.h b/arch/frv/kernel/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/frv/kernel/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/h8300/include/asm/local64.h b/arch/h8300/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/h8300/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/ia64/include/asm/local64.h b/arch/ia64/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/ia64/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/m32r/include/asm/local64.h b/arch/m32r/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/m32r/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/m68k/include/asm/local64.h b/arch/m68k/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/m68k/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/microblaze/include/asm/local64.h b/arch/microblaze/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/microblaze/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/mips/include/asm/local64.h b/arch/mips/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/mips/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/mn10300/include/asm/local64.h b/arch/mn10300/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/mn10300/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/parisc/include/asm/local64.h b/arch/parisc/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/parisc/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/powerpc/include/asm/local64.h b/arch/powerpc/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/powerpc/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/powerpc/include/asm/perf_event.h b/arch/powerpc/include/asm/perf_event.h
index e6d4ce69b126..5c16b891d501 100644
--- a/arch/powerpc/include/asm/perf_event.h
+++ b/arch/powerpc/include/asm/perf_event.h
@@ -21,3 +21,15 @@
21#ifdef CONFIG_FSL_EMB_PERF_EVENT 21#ifdef CONFIG_FSL_EMB_PERF_EVENT
22#include <asm/perf_event_fsl_emb.h> 22#include <asm/perf_event_fsl_emb.h>
23#endif 23#endif
24
25#ifdef CONFIG_PERF_EVENTS
26#include <asm/ptrace.h>
27#include <asm/reg.h>
28
29#define perf_arch_fetch_caller_regs(regs, __ip) \
30 do { \
31 (regs)->nip = __ip; \
32 (regs)->gpr[1] = *(unsigned long *)__get_SP(); \
33 asm volatile("mfmsr %0" : "=r" ((regs)->msr)); \
34 } while (0)
35#endif
diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S
index 22e507c8a556..2d29752cbe16 100644
--- a/arch/powerpc/kernel/misc.S
+++ b/arch/powerpc/kernel/misc.S
@@ -127,29 +127,3 @@ _GLOBAL(__setup_cpu_power7)
127_GLOBAL(__restore_cpu_power7) 127_GLOBAL(__restore_cpu_power7)
128 /* place holder */ 128 /* place holder */
129 blr 129 blr
130
131/*
132 * Get a minimal set of registers for our caller's nth caller.
133 * r3 = regs pointer, r5 = n.
134 *
135 * We only get R1 (stack pointer), NIP (next instruction pointer)
136 * and LR (link register). These are all we can get in the
137 * general case without doing complicated stack unwinding, but
138 * fortunately they are enough to do a stack backtrace, which
139 * is all we need them for.
140 */
141_GLOBAL(perf_arch_fetch_caller_regs)
142 mr r6,r1
143 cmpwi r5,0
144 mflr r4
145 ble 2f
146 mtctr r5
1471: PPC_LL r6,0(r6)
148 bdnz 1b
149 PPC_LL r4,PPC_LR_STKOFF(r6)
1502: PPC_LL r7,0(r6)
151 PPC_LL r7,PPC_LR_STKOFF(r7)
152 PPC_STL r6,GPR1-STACK_FRAME_OVERHEAD(r3)
153 PPC_STL r4,_NIP-STACK_FRAME_OVERHEAD(r3)
154 PPC_STL r7,_LINK-STACK_FRAME_OVERHEAD(r3)
155 blr
diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c
index 5c14ffe51258..d301a30445e0 100644
--- a/arch/powerpc/kernel/perf_event.c
+++ b/arch/powerpc/kernel/perf_event.c
@@ -410,15 +410,15 @@ static void power_pmu_read(struct perf_event *event)
410 * Therefore we treat them like NMIs. 410 * Therefore we treat them like NMIs.
411 */ 411 */
412 do { 412 do {
413 prev = atomic64_read(&event->hw.prev_count); 413 prev = local64_read(&event->hw.prev_count);
414 barrier(); 414 barrier();
415 val = read_pmc(event->hw.idx); 415 val = read_pmc(event->hw.idx);
416 } while (atomic64_cmpxchg(&event->hw.prev_count, prev, val) != prev); 416 } while (local64_cmpxchg(&event->hw.prev_count, prev, val) != prev);
417 417
418 /* The counters are only 32 bits wide */ 418 /* The counters are only 32 bits wide */
419 delta = (val - prev) & 0xfffffffful; 419 delta = (val - prev) & 0xfffffffful;
420 atomic64_add(delta, &event->count); 420 local64_add(delta, &event->count);
421 atomic64_sub(delta, &event->hw.period_left); 421 local64_sub(delta, &event->hw.period_left);
422} 422}
423 423
424/* 424/*
@@ -444,10 +444,10 @@ static void freeze_limited_counters(struct cpu_hw_events *cpuhw,
444 if (!event->hw.idx) 444 if (!event->hw.idx)
445 continue; 445 continue;
446 val = (event->hw.idx == 5) ? pmc5 : pmc6; 446 val = (event->hw.idx == 5) ? pmc5 : pmc6;
447 prev = atomic64_read(&event->hw.prev_count); 447 prev = local64_read(&event->hw.prev_count);
448 event->hw.idx = 0; 448 event->hw.idx = 0;
449 delta = (val - prev) & 0xfffffffful; 449 delta = (val - prev) & 0xfffffffful;
450 atomic64_add(delta, &event->count); 450 local64_add(delta, &event->count);
451 } 451 }
452} 452}
453 453
@@ -462,7 +462,7 @@ static void thaw_limited_counters(struct cpu_hw_events *cpuhw,
462 event = cpuhw->limited_counter[i]; 462 event = cpuhw->limited_counter[i];
463 event->hw.idx = cpuhw->limited_hwidx[i]; 463 event->hw.idx = cpuhw->limited_hwidx[i];
464 val = (event->hw.idx == 5) ? pmc5 : pmc6; 464 val = (event->hw.idx == 5) ? pmc5 : pmc6;
465 atomic64_set(&event->hw.prev_count, val); 465 local64_set(&event->hw.prev_count, val);
466 perf_event_update_userpage(event); 466 perf_event_update_userpage(event);
467 } 467 }
468} 468}
@@ -666,11 +666,11 @@ void hw_perf_enable(void)
666 } 666 }
667 val = 0; 667 val = 0;
668 if (event->hw.sample_period) { 668 if (event->hw.sample_period) {
669 left = atomic64_read(&event->hw.period_left); 669 left = local64_read(&event->hw.period_left);
670 if (left < 0x80000000L) 670 if (left < 0x80000000L)
671 val = 0x80000000L - left; 671 val = 0x80000000L - left;
672 } 672 }
673 atomic64_set(&event->hw.prev_count, val); 673 local64_set(&event->hw.prev_count, val);
674 event->hw.idx = idx; 674 event->hw.idx = idx;
675 write_pmc(idx, val); 675 write_pmc(idx, val);
676 perf_event_update_userpage(event); 676 perf_event_update_userpage(event);
@@ -754,7 +754,7 @@ static int power_pmu_enable(struct perf_event *event)
754 * skip the schedulability test here, it will be peformed 754 * skip the schedulability test here, it will be peformed
755 * at commit time(->commit_txn) as a whole 755 * at commit time(->commit_txn) as a whole
756 */ 756 */
757 if (cpuhw->group_flag & PERF_EVENT_TXN_STARTED) 757 if (cpuhw->group_flag & PERF_EVENT_TXN)
758 goto nocheck; 758 goto nocheck;
759 759
760 if (check_excludes(cpuhw->event, cpuhw->flags, n0, 1)) 760 if (check_excludes(cpuhw->event, cpuhw->flags, n0, 1))
@@ -845,8 +845,8 @@ static void power_pmu_unthrottle(struct perf_event *event)
845 if (left < 0x80000000L) 845 if (left < 0x80000000L)
846 val = 0x80000000L - left; 846 val = 0x80000000L - left;
847 write_pmc(event->hw.idx, val); 847 write_pmc(event->hw.idx, val);
848 atomic64_set(&event->hw.prev_count, val); 848 local64_set(&event->hw.prev_count, val);
849 atomic64_set(&event->hw.period_left, left); 849 local64_set(&event->hw.period_left, left);
850 perf_event_update_userpage(event); 850 perf_event_update_userpage(event);
851 perf_enable(); 851 perf_enable();
852 local_irq_restore(flags); 852 local_irq_restore(flags);
@@ -861,7 +861,7 @@ void power_pmu_start_txn(const struct pmu *pmu)
861{ 861{
862 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); 862 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
863 863
864 cpuhw->group_flag |= PERF_EVENT_TXN_STARTED; 864 cpuhw->group_flag |= PERF_EVENT_TXN;
865 cpuhw->n_txn_start = cpuhw->n_events; 865 cpuhw->n_txn_start = cpuhw->n_events;
866} 866}
867 867
@@ -874,7 +874,7 @@ void power_pmu_cancel_txn(const struct pmu *pmu)
874{ 874{
875 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); 875 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
876 876
877 cpuhw->group_flag &= ~PERF_EVENT_TXN_STARTED; 877 cpuhw->group_flag &= ~PERF_EVENT_TXN;
878} 878}
879 879
880/* 880/*
@@ -900,6 +900,7 @@ int power_pmu_commit_txn(const struct pmu *pmu)
900 for (i = cpuhw->n_txn_start; i < n; ++i) 900 for (i = cpuhw->n_txn_start; i < n; ++i)
901 cpuhw->event[i]->hw.config = cpuhw->events[i]; 901 cpuhw->event[i]->hw.config = cpuhw->events[i];
902 902
903 cpuhw->group_flag &= ~PERF_EVENT_TXN;
903 return 0; 904 return 0;
904} 905}
905 906
@@ -1111,7 +1112,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
1111 event->hw.config = events[n]; 1112 event->hw.config = events[n];
1112 event->hw.event_base = cflags[n]; 1113 event->hw.event_base = cflags[n];
1113 event->hw.last_period = event->hw.sample_period; 1114 event->hw.last_period = event->hw.sample_period;
1114 atomic64_set(&event->hw.period_left, event->hw.last_period); 1115 local64_set(&event->hw.period_left, event->hw.last_period);
1115 1116
1116 /* 1117 /*
1117 * See if we need to reserve the PMU. 1118 * See if we need to reserve the PMU.
@@ -1149,16 +1150,16 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
1149 int record = 0; 1150 int record = 0;
1150 1151
1151 /* we don't have to worry about interrupts here */ 1152 /* we don't have to worry about interrupts here */
1152 prev = atomic64_read(&event->hw.prev_count); 1153 prev = local64_read(&event->hw.prev_count);
1153 delta = (val - prev) & 0xfffffffful; 1154 delta = (val - prev) & 0xfffffffful;
1154 atomic64_add(delta, &event->count); 1155 local64_add(delta, &event->count);
1155 1156
1156 /* 1157 /*
1157 * See if the total period for this event has expired, 1158 * See if the total period for this event has expired,
1158 * and update for the next period. 1159 * and update for the next period.
1159 */ 1160 */
1160 val = 0; 1161 val = 0;
1161 left = atomic64_read(&event->hw.period_left) - delta; 1162 left = local64_read(&event->hw.period_left) - delta;
1162 if (period) { 1163 if (period) {
1163 if (left <= 0) { 1164 if (left <= 0) {
1164 left += period; 1165 left += period;
@@ -1196,8 +1197,8 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
1196 } 1197 }
1197 1198
1198 write_pmc(event->hw.idx, val); 1199 write_pmc(event->hw.idx, val);
1199 atomic64_set(&event->hw.prev_count, val); 1200 local64_set(&event->hw.prev_count, val);
1200 atomic64_set(&event->hw.period_left, left); 1201 local64_set(&event->hw.period_left, left);
1201 perf_event_update_userpage(event); 1202 perf_event_update_userpage(event);
1202} 1203}
1203 1204
diff --git a/arch/s390/include/asm/local64.h b/arch/s390/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/s390/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/score/include/asm/local64.h b/arch/score/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/score/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/sh/include/asm/local64.h b/arch/sh/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/sh/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/sh/kernel/perf_event.c b/arch/sh/kernel/perf_event.c
index 81b6de41ae5d..7a3dc3567258 100644
--- a/arch/sh/kernel/perf_event.c
+++ b/arch/sh/kernel/perf_event.c
@@ -185,10 +185,10 @@ static void sh_perf_event_update(struct perf_event *event,
185 * this is the simplest approach for maintaining consistency. 185 * this is the simplest approach for maintaining consistency.
186 */ 186 */
187again: 187again:
188 prev_raw_count = atomic64_read(&hwc->prev_count); 188 prev_raw_count = local64_read(&hwc->prev_count);
189 new_raw_count = sh_pmu->read(idx); 189 new_raw_count = sh_pmu->read(idx);
190 190
191 if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count, 191 if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
192 new_raw_count) != prev_raw_count) 192 new_raw_count) != prev_raw_count)
193 goto again; 193 goto again;
194 194
@@ -203,7 +203,7 @@ again:
203 delta = (new_raw_count << shift) - (prev_raw_count << shift); 203 delta = (new_raw_count << shift) - (prev_raw_count << shift);
204 delta >>= shift; 204 delta >>= shift;
205 205
206 atomic64_add(delta, &event->count); 206 local64_add(delta, &event->count);
207} 207}
208 208
209static void sh_pmu_disable(struct perf_event *event) 209static void sh_pmu_disable(struct perf_event *event)
diff --git a/arch/sparc/include/asm/local64.h b/arch/sparc/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/sparc/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/sparc/include/asm/perf_event.h b/arch/sparc/include/asm/perf_event.h
index 7e2669894ce8..74c4e0cd889c 100644
--- a/arch/sparc/include/asm/perf_event.h
+++ b/arch/sparc/include/asm/perf_event.h
@@ -6,7 +6,15 @@ extern void set_perf_event_pending(void);
6#define PERF_EVENT_INDEX_OFFSET 0 6#define PERF_EVENT_INDEX_OFFSET 0
7 7
8#ifdef CONFIG_PERF_EVENTS 8#ifdef CONFIG_PERF_EVENTS
9#include <asm/ptrace.h>
10
9extern void init_hw_perf_events(void); 11extern void init_hw_perf_events(void);
12
13extern void
14__perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip);
15
16#define perf_arch_fetch_caller_regs(pt_regs, ip) \
17 __perf_arch_fetch_caller_regs(pt_regs, ip, 1);
10#else 18#else
11static inline void init_hw_perf_events(void) { } 19static inline void init_hw_perf_events(void) { }
12#endif 20#endif
diff --git a/arch/sparc/kernel/helpers.S b/arch/sparc/kernel/helpers.S
index 92090cc9e829..682fee06a16b 100644
--- a/arch/sparc/kernel/helpers.S
+++ b/arch/sparc/kernel/helpers.S
@@ -47,9 +47,9 @@ stack_trace_flush:
47 .size stack_trace_flush,.-stack_trace_flush 47 .size stack_trace_flush,.-stack_trace_flush
48 48
49#ifdef CONFIG_PERF_EVENTS 49#ifdef CONFIG_PERF_EVENTS
50 .globl perf_arch_fetch_caller_regs 50 .globl __perf_arch_fetch_caller_regs
51 .type perf_arch_fetch_caller_regs,#function 51 .type __perf_arch_fetch_caller_regs,#function
52perf_arch_fetch_caller_regs: 52__perf_arch_fetch_caller_regs:
53 /* We always read the %pstate into %o5 since we will use 53 /* We always read the %pstate into %o5 since we will use
54 * that to construct a fake %tstate to store into the regs. 54 * that to construct a fake %tstate to store into the regs.
55 */ 55 */
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index 44faabc3c02c..357ced3c33ff 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -572,18 +572,18 @@ static u64 sparc_perf_event_update(struct perf_event *event,
572 s64 delta; 572 s64 delta;
573 573
574again: 574again:
575 prev_raw_count = atomic64_read(&hwc->prev_count); 575 prev_raw_count = local64_read(&hwc->prev_count);
576 new_raw_count = read_pmc(idx); 576 new_raw_count = read_pmc(idx);
577 577
578 if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count, 578 if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
579 new_raw_count) != prev_raw_count) 579 new_raw_count) != prev_raw_count)
580 goto again; 580 goto again;
581 581
582 delta = (new_raw_count << shift) - (prev_raw_count << shift); 582 delta = (new_raw_count << shift) - (prev_raw_count << shift);
583 delta >>= shift; 583 delta >>= shift;
584 584
585 atomic64_add(delta, &event->count); 585 local64_add(delta, &event->count);
586 atomic64_sub(delta, &hwc->period_left); 586 local64_sub(delta, &hwc->period_left);
587 587
588 return new_raw_count; 588 return new_raw_count;
589} 589}
@@ -591,27 +591,27 @@ again:
591static int sparc_perf_event_set_period(struct perf_event *event, 591static int sparc_perf_event_set_period(struct perf_event *event,
592 struct hw_perf_event *hwc, int idx) 592 struct hw_perf_event *hwc, int idx)
593{ 593{
594 s64 left = atomic64_read(&hwc->period_left); 594 s64 left = local64_read(&hwc->period_left);
595 s64 period = hwc->sample_period; 595 s64 period = hwc->sample_period;
596 int ret = 0; 596 int ret = 0;
597 597
598 if (unlikely(left <= -period)) { 598 if (unlikely(left <= -period)) {
599 left = period; 599 left = period;
600 atomic64_set(&hwc->period_left, left); 600 local64_set(&hwc->period_left, left);
601 hwc->last_period = period; 601 hwc->last_period = period;
602 ret = 1; 602 ret = 1;
603 } 603 }
604 604
605 if (unlikely(left <= 0)) { 605 if (unlikely(left <= 0)) {
606 left += period; 606 left += period;
607 atomic64_set(&hwc->period_left, left); 607 local64_set(&hwc->period_left, left);
608 hwc->last_period = period; 608 hwc->last_period = period;
609 ret = 1; 609 ret = 1;
610 } 610 }
611 if (left > MAX_PERIOD) 611 if (left > MAX_PERIOD)
612 left = MAX_PERIOD; 612 left = MAX_PERIOD;
613 613
614 atomic64_set(&hwc->prev_count, (u64)-left); 614 local64_set(&hwc->prev_count, (u64)-left);
615 615
616 write_pmc(idx, (u64)(-left) & 0xffffffff); 616 write_pmc(idx, (u64)(-left) & 0xffffffff);
617 617
@@ -1006,7 +1006,7 @@ static int sparc_pmu_enable(struct perf_event *event)
1006 * skip the schedulability test here, it will be peformed 1006 * skip the schedulability test here, it will be peformed
1007 * at commit time(->commit_txn) as a whole 1007 * at commit time(->commit_txn) as a whole
1008 */ 1008 */
1009 if (cpuc->group_flag & PERF_EVENT_TXN_STARTED) 1009 if (cpuc->group_flag & PERF_EVENT_TXN)
1010 goto nocheck; 1010 goto nocheck;
1011 1011
1012 if (check_excludes(cpuc->event, n0, 1)) 1012 if (check_excludes(cpuc->event, n0, 1))
@@ -1088,7 +1088,7 @@ static int __hw_perf_event_init(struct perf_event *event)
1088 if (!hwc->sample_period) { 1088 if (!hwc->sample_period) {
1089 hwc->sample_period = MAX_PERIOD; 1089 hwc->sample_period = MAX_PERIOD;
1090 hwc->last_period = hwc->sample_period; 1090 hwc->last_period = hwc->sample_period;
1091 atomic64_set(&hwc->period_left, hwc->sample_period); 1091 local64_set(&hwc->period_left, hwc->sample_period);
1092 } 1092 }
1093 1093
1094 return 0; 1094 return 0;
@@ -1103,7 +1103,7 @@ static void sparc_pmu_start_txn(const struct pmu *pmu)
1103{ 1103{
1104 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); 1104 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
1105 1105
1106 cpuhw->group_flag |= PERF_EVENT_TXN_STARTED; 1106 cpuhw->group_flag |= PERF_EVENT_TXN;
1107} 1107}
1108 1108
1109/* 1109/*
@@ -1115,7 +1115,7 @@ static void sparc_pmu_cancel_txn(const struct pmu *pmu)
1115{ 1115{
1116 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); 1116 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
1117 1117
1118 cpuhw->group_flag &= ~PERF_EVENT_TXN_STARTED; 1118 cpuhw->group_flag &= ~PERF_EVENT_TXN;
1119} 1119}
1120 1120
1121/* 1121/*
@@ -1138,6 +1138,7 @@ static int sparc_pmu_commit_txn(const struct pmu *pmu)
1138 if (sparc_check_constraints(cpuc->event, cpuc->events, n)) 1138 if (sparc_check_constraints(cpuc->event, cpuc->events, n))
1139 return -EAGAIN; 1139 return -EAGAIN;
1140 1140
1141 cpuc->group_flag &= ~PERF_EVENT_TXN;
1141 return 0; 1142 return 0;
1142} 1143}
1143 1144
diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h
index 942255310e6a..528a11e8d3e3 100644
--- a/arch/x86/include/asm/hw_breakpoint.h
+++ b/arch/x86/include/asm/hw_breakpoint.h
@@ -20,10 +20,10 @@ struct arch_hw_breakpoint {
20#include <linux/list.h> 20#include <linux/list.h>
21 21
22/* Available HW breakpoint length encodings */ 22/* Available HW breakpoint length encodings */
23#define X86_BREAKPOINT_LEN_X 0x00
23#define X86_BREAKPOINT_LEN_1 0x40 24#define X86_BREAKPOINT_LEN_1 0x40
24#define X86_BREAKPOINT_LEN_2 0x44 25#define X86_BREAKPOINT_LEN_2 0x44
25#define X86_BREAKPOINT_LEN_4 0x4c 26#define X86_BREAKPOINT_LEN_4 0x4c
26#define X86_BREAKPOINT_LEN_EXECUTE 0x40
27 27
28#ifdef CONFIG_X86_64 28#ifdef CONFIG_X86_64
29#define X86_BREAKPOINT_LEN_8 0x48 29#define X86_BREAKPOINT_LEN_8 0x48
diff --git a/arch/x86/include/asm/local64.h b/arch/x86/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/x86/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 254883d0c7e0..6e742cc4251b 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -68,8 +68,9 @@ union cpuid10_eax {
68 68
69union cpuid10_edx { 69union cpuid10_edx {
70 struct { 70 struct {
71 unsigned int num_counters_fixed:4; 71 unsigned int num_counters_fixed:5;
72 unsigned int reserved:28; 72 unsigned int bit_width_fixed:8;
73 unsigned int reserved:19;
73 } split; 74 } split;
74 unsigned int full; 75 unsigned int full;
75}; 76};
@@ -140,6 +141,19 @@ extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
140extern unsigned long perf_misc_flags(struct pt_regs *regs); 141extern unsigned long perf_misc_flags(struct pt_regs *regs);
141#define perf_misc_flags(regs) perf_misc_flags(regs) 142#define perf_misc_flags(regs) perf_misc_flags(regs)
142 143
144#include <asm/stacktrace.h>
145
146/*
147 * We abuse bit 3 from flags to pass exact information, see perf_misc_flags
148 * and the comment with PERF_EFLAGS_EXACT.
149 */
150#define perf_arch_fetch_caller_regs(regs, __ip) { \
151 (regs)->ip = (__ip); \
152 (regs)->bp = caller_frame_pointer(); \
153 (regs)->cs = __KERNEL_CS; \
154 regs->flags = 0; \
155}
156
143#else 157#else
144static inline void init_hw_perf_events(void) { } 158static inline void init_hw_perf_events(void) { }
145static inline void perf_events_lapic_init(void) { } 159static inline void perf_events_lapic_init(void) { }
diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h
index 64a8ebff06fc..def500776b16 100644
--- a/arch/x86/include/asm/perf_event_p4.h
+++ b/arch/x86/include/asm/perf_event_p4.h
@@ -19,7 +19,6 @@
19#define ARCH_P4_RESERVED_ESCR (2) /* IQ_ESCR(0,1) not always present */ 19#define ARCH_P4_RESERVED_ESCR (2) /* IQ_ESCR(0,1) not always present */
20#define ARCH_P4_MAX_ESCR (ARCH_P4_TOTAL_ESCR - ARCH_P4_RESERVED_ESCR) 20#define ARCH_P4_MAX_ESCR (ARCH_P4_TOTAL_ESCR - ARCH_P4_RESERVED_ESCR)
21#define ARCH_P4_MAX_CCCR (18) 21#define ARCH_P4_MAX_CCCR (18)
22#define ARCH_P4_MAX_COUNTER (ARCH_P4_MAX_CCCR / 2)
23 22
24#define P4_ESCR_EVENT_MASK 0x7e000000U 23#define P4_ESCR_EVENT_MASK 0x7e000000U
25#define P4_ESCR_EVENT_SHIFT 25 24#define P4_ESCR_EVENT_SHIFT 25
@@ -71,10 +70,6 @@
71#define P4_CCCR_THRESHOLD(v) ((v) << P4_CCCR_THRESHOLD_SHIFT) 70#define P4_CCCR_THRESHOLD(v) ((v) << P4_CCCR_THRESHOLD_SHIFT)
72#define P4_CCCR_ESEL(v) ((v) << P4_CCCR_ESCR_SELECT_SHIFT) 71#define P4_CCCR_ESEL(v) ((v) << P4_CCCR_ESCR_SELECT_SHIFT)
73 72
74/* Custom bits in reerved CCCR area */
75#define P4_CCCR_CACHE_OPS_MASK 0x0000003fU
76
77
78/* Non HT mask */ 73/* Non HT mask */
79#define P4_CCCR_MASK \ 74#define P4_CCCR_MASK \
80 (P4_CCCR_OVF | \ 75 (P4_CCCR_OVF | \
@@ -106,8 +101,7 @@
106 * ESCR and CCCR but rather an only packed value should 101 * ESCR and CCCR but rather an only packed value should
107 * be unpacked and written to a proper addresses 102 * be unpacked and written to a proper addresses
108 * 103 *
109 * the base idea is to pack as much info as 104 * the base idea is to pack as much info as possible
110 * possible
111 */ 105 */
112#define p4_config_pack_escr(v) (((u64)(v)) << 32) 106#define p4_config_pack_escr(v) (((u64)(v)) << 32)
113#define p4_config_pack_cccr(v) (((u64)(v)) & 0xffffffffULL) 107#define p4_config_pack_cccr(v) (((u64)(v)) & 0xffffffffULL)
@@ -130,8 +124,6 @@
130 t; \ 124 t; \
131 }) 125 })
132 126
133#define p4_config_unpack_cache_event(v) (((u64)(v)) & P4_CCCR_CACHE_OPS_MASK)
134
135#define P4_CONFIG_HT_SHIFT 63 127#define P4_CONFIG_HT_SHIFT 63
136#define P4_CONFIG_HT (1ULL << P4_CONFIG_HT_SHIFT) 128#define P4_CONFIG_HT (1ULL << P4_CONFIG_HT_SHIFT)
137 129
@@ -214,6 +206,12 @@ static inline u32 p4_default_escr_conf(int cpu, int exclude_os, int exclude_usr)
214 return escr; 206 return escr;
215} 207}
216 208
209/*
210 * This are the events which should be used in "Event Select"
211 * field of ESCR register, they are like unique keys which allow
212 * the kernel to determinate which CCCR and COUNTER should be
213 * used to track an event
214 */
217enum P4_EVENTS { 215enum P4_EVENTS {
218 P4_EVENT_TC_DELIVER_MODE, 216 P4_EVENT_TC_DELIVER_MODE,
219 P4_EVENT_BPU_FETCH_REQUEST, 217 P4_EVENT_BPU_FETCH_REQUEST,
@@ -561,7 +559,7 @@ enum P4_EVENT_OPCODES {
561 * a caller should use P4_ESCR_EMASK_NAME helper to 559 * a caller should use P4_ESCR_EMASK_NAME helper to
562 * pick the EventMask needed, for example 560 * pick the EventMask needed, for example
563 * 561 *
564 * P4_ESCR_EMASK_NAME(P4_EVENT_TC_DELIVER_MODE, DD) 562 * P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DD)
565 */ 563 */
566enum P4_ESCR_EMASKS { 564enum P4_ESCR_EMASKS {
567 P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, DD, 0), 565 P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, DD, 0),
@@ -753,43 +751,50 @@ enum P4_ESCR_EMASKS {
753 P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_COMPLETED, BOGUS, 1), 751 P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_COMPLETED, BOGUS, 1),
754}; 752};
755 753
756/* P4 PEBS: stale for a while */ 754/*
757#define P4_PEBS_METRIC_MASK 0x00001fffU 755 * P4 PEBS specifics (Replay Event only)
758#define P4_PEBS_UOB_TAG 0x01000000U 756 *
759#define P4_PEBS_ENABLE 0x02000000U 757 * Format (bits):
760 758 * 0-6: metric from P4_PEBS_METRIC enum
761/* Replay metrics for MSR_IA32_PEBS_ENABLE and MSR_P4_PEBS_MATRIX_VERT */ 759 * 7 : reserved
762#define P4_PEBS__1stl_cache_load_miss_retired 0x3000001 760 * 8 : reserved
763#define P4_PEBS__2ndl_cache_load_miss_retired 0x3000002 761 * 9-11 : reserved
764#define P4_PEBS__dtlb_load_miss_retired 0x3000004 762 *
765#define P4_PEBS__dtlb_store_miss_retired 0x3000004 763 * Note we have UOP and PEBS bits reserved for now
766#define P4_PEBS__dtlb_all_miss_retired 0x3000004 764 * just in case if we will need them once
767#define P4_PEBS__tagged_mispred_branch 0x3018000 765 */
768#define P4_PEBS__mob_load_replay_retired 0x3000200 766#define P4_PEBS_CONFIG_ENABLE (1 << 7)
769#define P4_PEBS__split_load_retired 0x3000400 767#define P4_PEBS_CONFIG_UOP_TAG (1 << 8)
770#define P4_PEBS__split_store_retired 0x3000400 768#define P4_PEBS_CONFIG_METRIC_MASK 0x3f
771 769#define P4_PEBS_CONFIG_MASK 0xff
772#define P4_VERT__1stl_cache_load_miss_retired 0x0000001 770
773#define P4_VERT__2ndl_cache_load_miss_retired 0x0000001 771/*
774#define P4_VERT__dtlb_load_miss_retired 0x0000001 772 * mem: Only counters MSR_IQ_COUNTER4 (16) and
775#define P4_VERT__dtlb_store_miss_retired 0x0000002 773 * MSR_IQ_COUNTER5 (17) are allowed for PEBS sampling
776#define P4_VERT__dtlb_all_miss_retired 0x0000003 774 */
777#define P4_VERT__tagged_mispred_branch 0x0000010 775#define P4_PEBS_ENABLE 0x02000000U
778#define P4_VERT__mob_load_replay_retired 0x0000001 776#define P4_PEBS_ENABLE_UOP_TAG 0x01000000U
779#define P4_VERT__split_load_retired 0x0000001 777
780#define P4_VERT__split_store_retired 0x0000002 778#define p4_config_unpack_metric(v) (((u64)(v)) & P4_PEBS_CONFIG_METRIC_MASK)
781 779#define p4_config_unpack_pebs(v) (((u64)(v)) & P4_PEBS_CONFIG_MASK)
782enum P4_CACHE_EVENTS { 780
783 P4_CACHE__NONE, 781#define p4_config_pebs_has(v, mask) (p4_config_unpack_pebs(v) & (mask))
784 782
785 P4_CACHE__1stl_cache_load_miss_retired, 783enum P4_PEBS_METRIC {
786 P4_CACHE__2ndl_cache_load_miss_retired, 784 P4_PEBS_METRIC__none,
787 P4_CACHE__dtlb_load_miss_retired, 785
788 P4_CACHE__dtlb_store_miss_retired, 786 P4_PEBS_METRIC__1stl_cache_load_miss_retired,
789 P4_CACHE__itlb_reference_hit, 787 P4_PEBS_METRIC__2ndl_cache_load_miss_retired,
790 P4_CACHE__itlb_reference_miss, 788 P4_PEBS_METRIC__dtlb_load_miss_retired,
791 789 P4_PEBS_METRIC__dtlb_store_miss_retired,
792 P4_CACHE__MAX 790 P4_PEBS_METRIC__dtlb_all_miss_retired,
791 P4_PEBS_METRIC__tagged_mispred_branch,
792 P4_PEBS_METRIC__mob_load_replay_retired,
793 P4_PEBS_METRIC__split_load_retired,
794 P4_PEBS_METRIC__split_store_retired,
795
796 P4_PEBS_METRIC__max
793}; 797};
794 798
795#endif /* PERF_EVENT_P4_H */ 799#endif /* PERF_EVENT_P4_H */
800
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index 4dab78edbad9..2b16a2ad23dc 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -1,6 +1,13 @@
1/*
2 * Copyright (C) 1991, 1992 Linus Torvalds
3 * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
4 */
5
1#ifndef _ASM_X86_STACKTRACE_H 6#ifndef _ASM_X86_STACKTRACE_H
2#define _ASM_X86_STACKTRACE_H 7#define _ASM_X86_STACKTRACE_H
3 8
9#include <linux/uaccess.h>
10
4extern int kstack_depth_to_print; 11extern int kstack_depth_to_print;
5 12
6struct thread_info; 13struct thread_info;
@@ -42,4 +49,46 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
42 unsigned long *stack, unsigned long bp, 49 unsigned long *stack, unsigned long bp,
43 const struct stacktrace_ops *ops, void *data); 50 const struct stacktrace_ops *ops, void *data);
44 51
52#ifdef CONFIG_X86_32
53#define STACKSLOTS_PER_LINE 8
54#define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :)
55#else
56#define STACKSLOTS_PER_LINE 4
57#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :)
58#endif
59
60extern void
61show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
62 unsigned long *stack, unsigned long bp, char *log_lvl);
63
64extern void
65show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
66 unsigned long *sp, unsigned long bp, char *log_lvl);
67
68extern unsigned int code_bytes;
69
70/* The form of the top of the frame on the stack */
71struct stack_frame {
72 struct stack_frame *next_frame;
73 unsigned long return_address;
74};
75
76struct stack_frame_ia32 {
77 u32 next_frame;
78 u32 return_address;
79};
80
81static inline unsigned long caller_frame_pointer(void)
82{
83 struct stack_frame *frame;
84
85 get_bp(frame);
86
87#ifdef CONFIG_FRAME_POINTER
88 frame = frame->next_frame;
89#endif
90
91 return (unsigned long)frame;
92}
93
45#endif /* _ASM_X86_STACKTRACE_H */ 94#endif /* _ASM_X86_STACKTRACE_H */
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 5db5b7d65a18..f2da20fda02d 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -220,6 +220,7 @@ struct x86_pmu {
220 struct perf_event *event); 220 struct perf_event *event);
221 struct event_constraint *event_constraints; 221 struct event_constraint *event_constraints;
222 void (*quirks)(void); 222 void (*quirks)(void);
223 int perfctr_second_write;
223 224
224 int (*cpu_prepare)(int cpu); 225 int (*cpu_prepare)(int cpu);
225 void (*cpu_starting)(int cpu); 226 void (*cpu_starting)(int cpu);
@@ -295,10 +296,10 @@ x86_perf_event_update(struct perf_event *event)
295 * count to the generic event atomically: 296 * count to the generic event atomically:
296 */ 297 */
297again: 298again:
298 prev_raw_count = atomic64_read(&hwc->prev_count); 299 prev_raw_count = local64_read(&hwc->prev_count);
299 rdmsrl(hwc->event_base + idx, new_raw_count); 300 rdmsrl(hwc->event_base + idx, new_raw_count);
300 301
301 if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count, 302 if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
302 new_raw_count) != prev_raw_count) 303 new_raw_count) != prev_raw_count)
303 goto again; 304 goto again;
304 305
@@ -313,8 +314,8 @@ again:
313 delta = (new_raw_count << shift) - (prev_raw_count << shift); 314 delta = (new_raw_count << shift) - (prev_raw_count << shift);
314 delta >>= shift; 315 delta >>= shift;
315 316
316 atomic64_add(delta, &event->count); 317 local64_add(delta, &event->count);
317 atomic64_sub(delta, &hwc->period_left); 318 local64_sub(delta, &hwc->period_left);
318 319
319 return new_raw_count; 320 return new_raw_count;
320} 321}
@@ -438,7 +439,7 @@ static int x86_setup_perfctr(struct perf_event *event)
438 if (!hwc->sample_period) { 439 if (!hwc->sample_period) {
439 hwc->sample_period = x86_pmu.max_period; 440 hwc->sample_period = x86_pmu.max_period;
440 hwc->last_period = hwc->sample_period; 441 hwc->last_period = hwc->sample_period;
441 atomic64_set(&hwc->period_left, hwc->sample_period); 442 local64_set(&hwc->period_left, hwc->sample_period);
442 } else { 443 } else {
443 /* 444 /*
444 * If we have a PMU initialized but no APIC 445 * If we have a PMU initialized but no APIC
@@ -885,7 +886,7 @@ static int
885x86_perf_event_set_period(struct perf_event *event) 886x86_perf_event_set_period(struct perf_event *event)
886{ 887{
887 struct hw_perf_event *hwc = &event->hw; 888 struct hw_perf_event *hwc = &event->hw;
888 s64 left = atomic64_read(&hwc->period_left); 889 s64 left = local64_read(&hwc->period_left);
889 s64 period = hwc->sample_period; 890 s64 period = hwc->sample_period;
890 int ret = 0, idx = hwc->idx; 891 int ret = 0, idx = hwc->idx;
891 892
@@ -897,14 +898,14 @@ x86_perf_event_set_period(struct perf_event *event)
897 */ 898 */
898 if (unlikely(left <= -period)) { 899 if (unlikely(left <= -period)) {
899 left = period; 900 left = period;
900 atomic64_set(&hwc->period_left, left); 901 local64_set(&hwc->period_left, left);
901 hwc->last_period = period; 902 hwc->last_period = period;
902 ret = 1; 903 ret = 1;
903 } 904 }
904 905
905 if (unlikely(left <= 0)) { 906 if (unlikely(left <= 0)) {
906 left += period; 907 left += period;
907 atomic64_set(&hwc->period_left, left); 908 local64_set(&hwc->period_left, left);
908 hwc->last_period = period; 909 hwc->last_period = period;
909 ret = 1; 910 ret = 1;
910 } 911 }
@@ -923,10 +924,19 @@ x86_perf_event_set_period(struct perf_event *event)
923 * The hw event starts counting from this event offset, 924 * The hw event starts counting from this event offset,
924 * mark it to be able to extra future deltas: 925 * mark it to be able to extra future deltas:
925 */ 926 */
926 atomic64_set(&hwc->prev_count, (u64)-left); 927 local64_set(&hwc->prev_count, (u64)-left);
927 928
928 wrmsrl(hwc->event_base + idx, 929 wrmsrl(hwc->event_base + idx, (u64)(-left) & x86_pmu.cntval_mask);
930
931 /*
932 * Due to erratum on certan cpu we need
933 * a second write to be sure the register
934 * is updated properly
935 */
936 if (x86_pmu.perfctr_second_write) {
937 wrmsrl(hwc->event_base + idx,
929 (u64)(-left) & x86_pmu.cntval_mask); 938 (u64)(-left) & x86_pmu.cntval_mask);
939 }
930 940
931 perf_event_update_userpage(event); 941 perf_event_update_userpage(event);
932 942
@@ -969,7 +979,7 @@ static int x86_pmu_enable(struct perf_event *event)
969 * skip the schedulability test here, it will be peformed 979 * skip the schedulability test here, it will be peformed
970 * at commit time(->commit_txn) as a whole 980 * at commit time(->commit_txn) as a whole
971 */ 981 */
972 if (cpuc->group_flag & PERF_EVENT_TXN_STARTED) 982 if (cpuc->group_flag & PERF_EVENT_TXN)
973 goto out; 983 goto out;
974 984
975 ret = x86_pmu.schedule_events(cpuc, n, assign); 985 ret = x86_pmu.schedule_events(cpuc, n, assign);
@@ -1096,7 +1106,7 @@ static void x86_pmu_disable(struct perf_event *event)
1096 * The events never got scheduled and ->cancel_txn will truncate 1106 * The events never got scheduled and ->cancel_txn will truncate
1097 * the event_list. 1107 * the event_list.
1098 */ 1108 */
1099 if (cpuc->group_flag & PERF_EVENT_TXN_STARTED) 1109 if (cpuc->group_flag & PERF_EVENT_TXN)
1100 return; 1110 return;
1101 1111
1102 x86_pmu_stop(event); 1112 x86_pmu_stop(event);
@@ -1388,7 +1398,7 @@ static void x86_pmu_start_txn(const struct pmu *pmu)
1388{ 1398{
1389 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1399 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1390 1400
1391 cpuc->group_flag |= PERF_EVENT_TXN_STARTED; 1401 cpuc->group_flag |= PERF_EVENT_TXN;
1392 cpuc->n_txn = 0; 1402 cpuc->n_txn = 0;
1393} 1403}
1394 1404
@@ -1401,7 +1411,7 @@ static void x86_pmu_cancel_txn(const struct pmu *pmu)
1401{ 1411{
1402 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1412 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1403 1413
1404 cpuc->group_flag &= ~PERF_EVENT_TXN_STARTED; 1414 cpuc->group_flag &= ~PERF_EVENT_TXN;
1405 /* 1415 /*
1406 * Truncate the collected events. 1416 * Truncate the collected events.
1407 */ 1417 */
@@ -1435,11 +1445,7 @@ static int x86_pmu_commit_txn(const struct pmu *pmu)
1435 */ 1445 */
1436 memcpy(cpuc->assign, assign, n*sizeof(int)); 1446 memcpy(cpuc->assign, assign, n*sizeof(int));
1437 1447
1438 /* 1448 cpuc->group_flag &= ~PERF_EVENT_TXN;
1439 * Clear out the txn count so that ->cancel_txn() which gets
1440 * run after ->commit_txn() doesn't undo things.
1441 */
1442 cpuc->n_txn = 0;
1443 1449
1444 return 0; 1450 return 0;
1445} 1451}
@@ -1607,8 +1613,6 @@ static const struct stacktrace_ops backtrace_ops = {
1607 .walk_stack = print_context_stack_bp, 1613 .walk_stack = print_context_stack_bp,
1608}; 1614};
1609 1615
1610#include "../dumpstack.h"
1611
1612static void 1616static void
1613perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) 1617perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
1614{ 1618{
@@ -1730,22 +1734,6 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
1730 return entry; 1734 return entry;
1731} 1735}
1732 1736
1733void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip)
1734{
1735 regs->ip = ip;
1736 /*
1737 * perf_arch_fetch_caller_regs adds another call, we need to increment
1738 * the skip level
1739 */
1740 regs->bp = rewind_frame_pointer(skip + 1);
1741 regs->cs = __KERNEL_CS;
1742 /*
1743 * We abuse bit 3 to pass exact information, see perf_misc_flags
1744 * and the comment with PERF_EFLAGS_EXACT.
1745 */
1746 regs->flags = 0;
1747}
1748
1749unsigned long perf_instruction_pointer(struct pt_regs *regs) 1737unsigned long perf_instruction_pointer(struct pt_regs *regs)
1750{ 1738{
1751 unsigned long ip; 1739 unsigned long ip;
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index ae85d69644d1..107711bf0ee8 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -21,22 +21,36 @@ struct p4_event_bind {
21 char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on abscence */ 21 char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on abscence */
22}; 22};
23 23
24struct p4_cache_event_bind { 24struct p4_pebs_bind {
25 unsigned int metric_pebs; 25 unsigned int metric_pebs;
26 unsigned int metric_vert; 26 unsigned int metric_vert;
27}; 27};
28 28
29#define P4_GEN_CACHE_EVENT_BIND(name) \ 29/* it sets P4_PEBS_ENABLE_UOP_TAG as well */
30 [P4_CACHE__##name] = { \ 30#define P4_GEN_PEBS_BIND(name, pebs, vert) \
31 .metric_pebs = P4_PEBS__##name, \ 31 [P4_PEBS_METRIC__##name] = { \
32 .metric_vert = P4_VERT__##name, \ 32 .metric_pebs = pebs | P4_PEBS_ENABLE_UOP_TAG, \
33 .metric_vert = vert, \
33 } 34 }
34 35
35static struct p4_cache_event_bind p4_cache_event_bind_map[] = { 36/*
36 P4_GEN_CACHE_EVENT_BIND(1stl_cache_load_miss_retired), 37 * note we have P4_PEBS_ENABLE_UOP_TAG always set here
37 P4_GEN_CACHE_EVENT_BIND(2ndl_cache_load_miss_retired), 38 *
38 P4_GEN_CACHE_EVENT_BIND(dtlb_load_miss_retired), 39 * it's needed for mapping P4_PEBS_CONFIG_METRIC_MASK bits of
39 P4_GEN_CACHE_EVENT_BIND(dtlb_store_miss_retired), 40 * event configuration to find out which values are to be
41 * written into MSR_IA32_PEBS_ENABLE and MSR_P4_PEBS_MATRIX_VERT
42 * resgisters
43 */
44static struct p4_pebs_bind p4_pebs_bind_map[] = {
45 P4_GEN_PEBS_BIND(1stl_cache_load_miss_retired, 0x0000001, 0x0000001),
46 P4_GEN_PEBS_BIND(2ndl_cache_load_miss_retired, 0x0000002, 0x0000001),
47 P4_GEN_PEBS_BIND(dtlb_load_miss_retired, 0x0000004, 0x0000001),
48 P4_GEN_PEBS_BIND(dtlb_store_miss_retired, 0x0000004, 0x0000002),
49 P4_GEN_PEBS_BIND(dtlb_all_miss_retired, 0x0000004, 0x0000003),
50 P4_GEN_PEBS_BIND(tagged_mispred_branch, 0x0018000, 0x0000010),
51 P4_GEN_PEBS_BIND(mob_load_replay_retired, 0x0000200, 0x0000001),
52 P4_GEN_PEBS_BIND(split_load_retired, 0x0000400, 0x0000001),
53 P4_GEN_PEBS_BIND(split_store_retired, 0x0000400, 0x0000002),
40}; 54};
41 55
42/* 56/*
@@ -281,10 +295,10 @@ static struct p4_event_bind p4_event_bind_map[] = {
281 }, 295 },
282}; 296};
283 297
284#define P4_GEN_CACHE_EVENT(event, bit, cache_event) \ 298#define P4_GEN_CACHE_EVENT(event, bit, metric) \
285 p4_config_pack_escr(P4_ESCR_EVENT(event) | \ 299 p4_config_pack_escr(P4_ESCR_EVENT(event) | \
286 P4_ESCR_EMASK_BIT(event, bit)) | \ 300 P4_ESCR_EMASK_BIT(event, bit)) | \
287 p4_config_pack_cccr(cache_event | \ 301 p4_config_pack_cccr(metric | \
288 P4_CCCR_ESEL(P4_OPCODE_ESEL(P4_OPCODE(event)))) 302 P4_CCCR_ESEL(P4_OPCODE_ESEL(P4_OPCODE(event))))
289 303
290static __initconst const u64 p4_hw_cache_event_ids 304static __initconst const u64 p4_hw_cache_event_ids
@@ -296,34 +310,34 @@ static __initconst const u64 p4_hw_cache_event_ids
296 [ C(OP_READ) ] = { 310 [ C(OP_READ) ] = {
297 [ C(RESULT_ACCESS) ] = 0x0, 311 [ C(RESULT_ACCESS) ] = 0x0,
298 [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, 312 [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
299 P4_CACHE__1stl_cache_load_miss_retired), 313 P4_PEBS_METRIC__1stl_cache_load_miss_retired),
300 }, 314 },
301 }, 315 },
302 [ C(LL ) ] = { 316 [ C(LL ) ] = {
303 [ C(OP_READ) ] = { 317 [ C(OP_READ) ] = {
304 [ C(RESULT_ACCESS) ] = 0x0, 318 [ C(RESULT_ACCESS) ] = 0x0,
305 [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, 319 [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
306 P4_CACHE__2ndl_cache_load_miss_retired), 320 P4_PEBS_METRIC__2ndl_cache_load_miss_retired),
307 }, 321 },
308}, 322},
309 [ C(DTLB) ] = { 323 [ C(DTLB) ] = {
310 [ C(OP_READ) ] = { 324 [ C(OP_READ) ] = {
311 [ C(RESULT_ACCESS) ] = 0x0, 325 [ C(RESULT_ACCESS) ] = 0x0,
312 [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, 326 [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
313 P4_CACHE__dtlb_load_miss_retired), 327 P4_PEBS_METRIC__dtlb_load_miss_retired),
314 }, 328 },
315 [ C(OP_WRITE) ] = { 329 [ C(OP_WRITE) ] = {
316 [ C(RESULT_ACCESS) ] = 0x0, 330 [ C(RESULT_ACCESS) ] = 0x0,
317 [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, 331 [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
318 P4_CACHE__dtlb_store_miss_retired), 332 P4_PEBS_METRIC__dtlb_store_miss_retired),
319 }, 333 },
320 }, 334 },
321 [ C(ITLB) ] = { 335 [ C(ITLB) ] = {
322 [ C(OP_READ) ] = { 336 [ C(OP_READ) ] = {
323 [ C(RESULT_ACCESS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, HIT, 337 [ C(RESULT_ACCESS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, HIT,
324 P4_CACHE__itlb_reference_hit), 338 P4_PEBS_METRIC__none),
325 [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, MISS, 339 [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, MISS,
326 P4_CACHE__itlb_reference_miss), 340 P4_PEBS_METRIC__none),
327 }, 341 },
328 [ C(OP_WRITE) ] = { 342 [ C(OP_WRITE) ] = {
329 [ C(RESULT_ACCESS) ] = -1, 343 [ C(RESULT_ACCESS) ] = -1,
@@ -414,11 +428,37 @@ static u64 p4_pmu_event_map(int hw_event)
414 return config; 428 return config;
415} 429}
416 430
431static int p4_validate_raw_event(struct perf_event *event)
432{
433 unsigned int v;
434
435 /* user data may have out-of-bound event index */
436 v = p4_config_unpack_event(event->attr.config);
437 if (v >= ARRAY_SIZE(p4_event_bind_map)) {
438 pr_warning("P4 PMU: Unknown event code: %d\n", v);
439 return -EINVAL;
440 }
441
442 /*
443 * it may have some screwed PEBS bits
444 */
445 if (p4_config_pebs_has(event->attr.config, P4_PEBS_CONFIG_ENABLE)) {
446 pr_warning("P4 PMU: PEBS are not supported yet\n");
447 return -EINVAL;
448 }
449 v = p4_config_unpack_metric(event->attr.config);
450 if (v >= ARRAY_SIZE(p4_pebs_bind_map)) {
451 pr_warning("P4 PMU: Unknown metric code: %d\n", v);
452 return -EINVAL;
453 }
454
455 return 0;
456}
457
417static int p4_hw_config(struct perf_event *event) 458static int p4_hw_config(struct perf_event *event)
418{ 459{
419 int cpu = get_cpu(); 460 int cpu = get_cpu();
420 int rc = 0; 461 int rc = 0;
421 unsigned int evnt;
422 u32 escr, cccr; 462 u32 escr, cccr;
423 463
424 /* 464 /*
@@ -438,12 +478,9 @@ static int p4_hw_config(struct perf_event *event)
438 478
439 if (event->attr.type == PERF_TYPE_RAW) { 479 if (event->attr.type == PERF_TYPE_RAW) {
440 480
441 /* user data may have out-of-bound event index */ 481 rc = p4_validate_raw_event(event);
442 evnt = p4_config_unpack_event(event->attr.config); 482 if (rc)
443 if (evnt >= ARRAY_SIZE(p4_event_bind_map)) {
444 rc = -EINVAL;
445 goto out; 483 goto out;
446 }
447 484
448 /* 485 /*
449 * We don't control raw events so it's up to the caller 486 * We don't control raw events so it's up to the caller
@@ -451,12 +488,15 @@ static int p4_hw_config(struct perf_event *event)
451 * on HT machine but allow HT-compatible specifics to be 488 * on HT machine but allow HT-compatible specifics to be
452 * passed on) 489 * passed on)
453 * 490 *
491 * Note that for RAW events we allow user to use P4_CCCR_RESERVED
492 * bits since we keep additional info here (for cache events and etc)
493 *
454 * XXX: HT wide things should check perf_paranoid_cpu() && 494 * XXX: HT wide things should check perf_paranoid_cpu() &&
455 * CAP_SYS_ADMIN 495 * CAP_SYS_ADMIN
456 */ 496 */
457 event->hw.config |= event->attr.config & 497 event->hw.config |= event->attr.config &
458 (p4_config_pack_escr(P4_ESCR_MASK_HT) | 498 (p4_config_pack_escr(P4_ESCR_MASK_HT) |
459 p4_config_pack_cccr(P4_CCCR_MASK_HT)); 499 p4_config_pack_cccr(P4_CCCR_MASK_HT | P4_CCCR_RESERVED));
460 } 500 }
461 501
462 rc = x86_setup_perfctr(event); 502 rc = x86_setup_perfctr(event);
@@ -482,6 +522,29 @@ static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc)
482 return overflow; 522 return overflow;
483} 523}
484 524
525static void p4_pmu_disable_pebs(void)
526{
527 /*
528 * FIXME
529 *
530 * It's still allowed that two threads setup same cache
531 * events so we can't simply clear metrics until we knew
532 * noone is depending on us, so we need kind of counter
533 * for "ReplayEvent" users.
534 *
535 * What is more complex -- RAW events, if user (for some
536 * reason) will pass some cache event metric with improper
537 * event opcode -- it's fine from hardware point of view
538 * but completely nonsence from "meaning" of such action.
539 *
540 * So at moment let leave metrics turned on forever -- it's
541 * ok for now but need to be revisited!
542 *
543 * (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)0);
544 * (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)0);
545 */
546}
547
485static inline void p4_pmu_disable_event(struct perf_event *event) 548static inline void p4_pmu_disable_event(struct perf_event *event)
486{ 549{
487 struct hw_perf_event *hwc = &event->hw; 550 struct hw_perf_event *hwc = &event->hw;
@@ -507,6 +570,26 @@ static void p4_pmu_disable_all(void)
507 continue; 570 continue;
508 p4_pmu_disable_event(event); 571 p4_pmu_disable_event(event);
509 } 572 }
573
574 p4_pmu_disable_pebs();
575}
576
577/* configuration must be valid */
578static void p4_pmu_enable_pebs(u64 config)
579{
580 struct p4_pebs_bind *bind;
581 unsigned int idx;
582
583 BUILD_BUG_ON(P4_PEBS_METRIC__max > P4_PEBS_CONFIG_METRIC_MASK);
584
585 idx = p4_config_unpack_metric(config);
586 if (idx == P4_PEBS_METRIC__none)
587 return;
588
589 bind = &p4_pebs_bind_map[idx];
590
591 (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)bind->metric_pebs);
592 (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)bind->metric_vert);
510} 593}
511 594
512static void p4_pmu_enable_event(struct perf_event *event) 595static void p4_pmu_enable_event(struct perf_event *event)
@@ -515,9 +598,7 @@ static void p4_pmu_enable_event(struct perf_event *event)
515 int thread = p4_ht_config_thread(hwc->config); 598 int thread = p4_ht_config_thread(hwc->config);
516 u64 escr_conf = p4_config_unpack_escr(p4_clear_ht_bit(hwc->config)); 599 u64 escr_conf = p4_config_unpack_escr(p4_clear_ht_bit(hwc->config));
517 unsigned int idx = p4_config_unpack_event(hwc->config); 600 unsigned int idx = p4_config_unpack_event(hwc->config);
518 unsigned int idx_cache = p4_config_unpack_cache_event(hwc->config);
519 struct p4_event_bind *bind; 601 struct p4_event_bind *bind;
520 struct p4_cache_event_bind *bind_cache;
521 u64 escr_addr, cccr; 602 u64 escr_addr, cccr;
522 603
523 bind = &p4_event_bind_map[idx]; 604 bind = &p4_event_bind_map[idx];
@@ -537,16 +618,10 @@ static void p4_pmu_enable_event(struct perf_event *event)
537 cccr = p4_config_unpack_cccr(hwc->config); 618 cccr = p4_config_unpack_cccr(hwc->config);
538 619
539 /* 620 /*
540 * it could be Cache event so that we need to 621 * it could be Cache event so we need to write metrics
541 * set metrics into additional MSRs 622 * into additional MSRs
542 */ 623 */
543 BUILD_BUG_ON(P4_CACHE__MAX > P4_CCCR_CACHE_OPS_MASK); 624 p4_pmu_enable_pebs(hwc->config);
544 if (idx_cache > P4_CACHE__NONE &&
545 idx_cache < ARRAY_SIZE(p4_cache_event_bind_map)) {
546 bind_cache = &p4_cache_event_bind_map[idx_cache];
547 (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)bind_cache->metric_pebs);
548 (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)bind_cache->metric_vert);
549 }
550 625
551 (void)checking_wrmsrl(escr_addr, escr_conf); 626 (void)checking_wrmsrl(escr_addr, escr_conf);
552 (void)checking_wrmsrl(hwc->config_base + hwc->idx, 627 (void)checking_wrmsrl(hwc->config_base + hwc->idx,
@@ -829,6 +904,15 @@ static __initconst const struct x86_pmu p4_pmu = {
829 .max_period = (1ULL << 39) - 1, 904 .max_period = (1ULL << 39) - 1,
830 .hw_config = p4_hw_config, 905 .hw_config = p4_hw_config,
831 .schedule_events = p4_pmu_schedule_events, 906 .schedule_events = p4_pmu_schedule_events,
907 /*
908 * This handles erratum N15 in intel doc 249199-029,
909 * the counter may not be updated correctly on write
910 * so we need a second write operation to do the trick
911 * (the official workaround didn't work)
912 *
913 * the former idea is taken from OProfile code
914 */
915 .perfctr_second_write = 1,
832}; 916};
833 917
834static __init int p4_pmu_init(void) 918static __init int p4_pmu_init(void)
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index c89a386930b7..6e8752c1bd52 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -18,7 +18,6 @@
18 18
19#include <asm/stacktrace.h> 19#include <asm/stacktrace.h>
20 20
21#include "dumpstack.h"
22 21
23int panic_on_unrecovered_nmi; 22int panic_on_unrecovered_nmi;
24int panic_on_io_nmi; 23int panic_on_io_nmi;
diff --git a/arch/x86/kernel/dumpstack.h b/arch/x86/kernel/dumpstack.h
deleted file mode 100644
index e1a93be4fd44..000000000000
--- a/arch/x86/kernel/dumpstack.h
+++ /dev/null
@@ -1,56 +0,0 @@
1/*
2 * Copyright (C) 1991, 1992 Linus Torvalds
3 * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
4 */
5
6#ifndef DUMPSTACK_H
7#define DUMPSTACK_H
8
9#ifdef CONFIG_X86_32
10#define STACKSLOTS_PER_LINE 8
11#define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :)
12#else
13#define STACKSLOTS_PER_LINE 4
14#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :)
15#endif
16
17#include <linux/uaccess.h>
18
19extern void
20show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
21 unsigned long *stack, unsigned long bp, char *log_lvl);
22
23extern void
24show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
25 unsigned long *sp, unsigned long bp, char *log_lvl);
26
27extern unsigned int code_bytes;
28
29/* The form of the top of the frame on the stack */
30struct stack_frame {
31 struct stack_frame *next_frame;
32 unsigned long return_address;
33};
34
35struct stack_frame_ia32 {
36 u32 next_frame;
37 u32 return_address;
38};
39
40static inline unsigned long rewind_frame_pointer(int n)
41{
42 struct stack_frame *frame;
43
44 get_bp(frame);
45
46#ifdef CONFIG_FRAME_POINTER
47 while (n--) {
48 if (probe_kernel_address(&frame->next_frame, frame))
49 break;
50 }
51#endif
52
53 return (unsigned long)frame;
54}
55
56#endif /* DUMPSTACK_H */
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index 11540a189d93..0f6376ffa2d9 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -16,8 +16,6 @@
16 16
17#include <asm/stacktrace.h> 17#include <asm/stacktrace.h>
18 18
19#include "dumpstack.h"
20
21 19
22void dump_trace(struct task_struct *task, struct pt_regs *regs, 20void dump_trace(struct task_struct *task, struct pt_regs *regs,
23 unsigned long *stack, unsigned long bp, 21 unsigned long *stack, unsigned long bp,
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 272c9f1f05f3..57a21f11c791 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -16,7 +16,6 @@
16 16
17#include <asm/stacktrace.h> 17#include <asm/stacktrace.h>
18 18
19#include "dumpstack.h"
20 19
21#define N_EXCEPTION_STACKS_END \ 20#define N_EXCEPTION_STACKS_END \
22 (N_EXCEPTION_STACKS + DEBUG_STKSZ/EXCEPTION_STKSZ - 2) 21 (N_EXCEPTION_STACKS + DEBUG_STKSZ/EXCEPTION_STKSZ - 2)
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index a8f1b803d2fd..a474ec37c32f 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -208,6 +208,9 @@ int arch_bp_generic_fields(int x86_len, int x86_type,
208{ 208{
209 /* Len */ 209 /* Len */
210 switch (x86_len) { 210 switch (x86_len) {
211 case X86_BREAKPOINT_LEN_X:
212 *gen_len = sizeof(long);
213 break;
211 case X86_BREAKPOINT_LEN_1: 214 case X86_BREAKPOINT_LEN_1:
212 *gen_len = HW_BREAKPOINT_LEN_1; 215 *gen_len = HW_BREAKPOINT_LEN_1;
213 break; 216 break;
@@ -251,6 +254,29 @@ static int arch_build_bp_info(struct perf_event *bp)
251 254
252 info->address = bp->attr.bp_addr; 255 info->address = bp->attr.bp_addr;
253 256
257 /* Type */
258 switch (bp->attr.bp_type) {
259 case HW_BREAKPOINT_W:
260 info->type = X86_BREAKPOINT_WRITE;
261 break;
262 case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
263 info->type = X86_BREAKPOINT_RW;
264 break;
265 case HW_BREAKPOINT_X:
266 info->type = X86_BREAKPOINT_EXECUTE;
267 /*
268 * x86 inst breakpoints need to have a specific undefined len.
269 * But we still need to check userspace is not trying to setup
270 * an unsupported length, to get a range breakpoint for example.
271 */
272 if (bp->attr.bp_len == sizeof(long)) {
273 info->len = X86_BREAKPOINT_LEN_X;
274 return 0;
275 }
276 default:
277 return -EINVAL;
278 }
279
254 /* Len */ 280 /* Len */
255 switch (bp->attr.bp_len) { 281 switch (bp->attr.bp_len) {
256 case HW_BREAKPOINT_LEN_1: 282 case HW_BREAKPOINT_LEN_1:
@@ -271,21 +297,6 @@ static int arch_build_bp_info(struct perf_event *bp)
271 return -EINVAL; 297 return -EINVAL;
272 } 298 }
273 299
274 /* Type */
275 switch (bp->attr.bp_type) {
276 case HW_BREAKPOINT_W:
277 info->type = X86_BREAKPOINT_WRITE;
278 break;
279 case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
280 info->type = X86_BREAKPOINT_RW;
281 break;
282 case HW_BREAKPOINT_X:
283 info->type = X86_BREAKPOINT_EXECUTE;
284 break;
285 default:
286 return -EINVAL;
287 }
288
289 return 0; 300 return 0;
290} 301}
291/* 302/*
@@ -305,6 +316,9 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
305 ret = -EINVAL; 316 ret = -EINVAL;
306 317
307 switch (info->len) { 318 switch (info->len) {
319 case X86_BREAKPOINT_LEN_X:
320 align = sizeof(long) -1;
321 break;
308 case X86_BREAKPOINT_LEN_1: 322 case X86_BREAKPOINT_LEN_1:
309 align = 0; 323 align = 0;
310 break; 324 break;
@@ -466,6 +480,13 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args)
466 480
467 perf_bp_event(bp, args->regs); 481 perf_bp_event(bp, args->regs);
468 482
483 /*
484 * Set up resume flag to avoid breakpoint recursion when
485 * returning back to origin.
486 */
487 if (bp->hw.info.type == X86_BREAKPOINT_EXECUTE)
488 args->regs->flags |= X86_EFLAGS_RF;
489
469 rcu_read_unlock(); 490 rcu_read_unlock();
470 } 491 }
471 /* 492 /*
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 675879b65ce6..1bfb6cf4dd55 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -126,16 +126,22 @@ static void __kprobes synthesize_reljump(void *from, void *to)
126} 126}
127 127
128/* 128/*
129 * Check for the REX prefix which can only exist on X86_64 129 * Skip the prefixes of the instruction.
130 * X86_32 always returns 0
131 */ 130 */
132static int __kprobes is_REX_prefix(kprobe_opcode_t *insn) 131static kprobe_opcode_t *__kprobes skip_prefixes(kprobe_opcode_t *insn)
133{ 132{
133 insn_attr_t attr;
134
135 attr = inat_get_opcode_attribute((insn_byte_t)*insn);
136 while (inat_is_legacy_prefix(attr)) {
137 insn++;
138 attr = inat_get_opcode_attribute((insn_byte_t)*insn);
139 }
134#ifdef CONFIG_X86_64 140#ifdef CONFIG_X86_64
135 if ((*insn & 0xf0) == 0x40) 141 if (inat_is_rex_prefix(attr))
136 return 1; 142 insn++;
137#endif 143#endif
138 return 0; 144 return insn;
139} 145}
140 146
141/* 147/*
@@ -272,6 +278,9 @@ static int __kprobes can_probe(unsigned long paddr)
272 */ 278 */
273static int __kprobes is_IF_modifier(kprobe_opcode_t *insn) 279static int __kprobes is_IF_modifier(kprobe_opcode_t *insn)
274{ 280{
281 /* Skip prefixes */
282 insn = skip_prefixes(insn);
283
275 switch (*insn) { 284 switch (*insn) {
276 case 0xfa: /* cli */ 285 case 0xfa: /* cli */
277 case 0xfb: /* sti */ 286 case 0xfb: /* sti */
@@ -280,13 +289,6 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn)
280 return 1; 289 return 1;
281 } 290 }
282 291
283 /*
284 * on X86_64, 0x40-0x4f are REX prefixes so we need to look
285 * at the next byte instead.. but of course not recurse infinitely
286 */
287 if (is_REX_prefix(insn))
288 return is_IF_modifier(++insn);
289
290 return 0; 292 return 0;
291} 293}
292 294
@@ -803,9 +805,8 @@ static void __kprobes resume_execution(struct kprobe *p,
803 unsigned long orig_ip = (unsigned long)p->addr; 805 unsigned long orig_ip = (unsigned long)p->addr;
804 kprobe_opcode_t *insn = p->ainsn.insn; 806 kprobe_opcode_t *insn = p->ainsn.insn;
805 807
806 /*skip the REX prefix*/ 808 /* Skip prefixes */
807 if (is_REX_prefix(insn)) 809 insn = skip_prefixes(insn);
808 insn++;
809 810
810 regs->flags &= ~X86_EFLAGS_TF; 811 regs->flags &= ~X86_EFLAGS_TF;
811 switch (*insn) { 812 switch (*insn) {
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 8d128783af47..96586c3cbbbf 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -57,6 +57,8 @@
57#include <asm/syscalls.h> 57#include <asm/syscalls.h>
58#include <asm/debugreg.h> 58#include <asm/debugreg.h>
59 59
60#include <trace/events/power.h>
61
60asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); 62asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
61 63
62/* 64/*
@@ -111,6 +113,8 @@ void cpu_idle(void)
111 stop_critical_timings(); 113 stop_critical_timings();
112 pm_idle(); 114 pm_idle();
113 start_critical_timings(); 115 start_critical_timings();
116
117 trace_power_end(smp_processor_id());
114 } 118 }
115 tick_nohz_restart_sched_tick(); 119 tick_nohz_restart_sched_tick();
116 preempt_enable_no_resched(); 120 preempt_enable_no_resched();
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 3c2422a99f1f..3d9ea531ddd1 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -51,6 +51,8 @@
51#include <asm/syscalls.h> 51#include <asm/syscalls.h>
52#include <asm/debugreg.h> 52#include <asm/debugreg.h>
53 53
54#include <trace/events/power.h>
55
54asmlinkage extern void ret_from_fork(void); 56asmlinkage extern void ret_from_fork(void);
55 57
56DEFINE_PER_CPU(unsigned long, old_rsp); 58DEFINE_PER_CPU(unsigned long, old_rsp);
@@ -138,6 +140,9 @@ void cpu_idle(void)
138 stop_critical_timings(); 140 stop_critical_timings();
139 pm_idle(); 141 pm_idle();
140 start_critical_timings(); 142 start_critical_timings();
143
144 trace_power_end(smp_processor_id());
145
141 /* In many cases the interrupt that ended idle 146 /* In many cases the interrupt that ended idle
142 has already called exit_idle. But some idle 147 has already called exit_idle. But some idle
143 loops can be woken up without interrupt. */ 148 loops can be woken up without interrupt. */
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index 922eefbb3f6c..b53c525368a7 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -23,11 +23,16 @@ static int save_stack_stack(void *data, char *name)
23 return 0; 23 return 0;
24} 24}
25 25
26static void save_stack_address(void *data, unsigned long addr, int reliable) 26static void
27__save_stack_address(void *data, unsigned long addr, bool reliable, bool nosched)
27{ 28{
28 struct stack_trace *trace = data; 29 struct stack_trace *trace = data;
30#ifdef CONFIG_FRAME_POINTER
29 if (!reliable) 31 if (!reliable)
30 return; 32 return;
33#endif
34 if (nosched && in_sched_functions(addr))
35 return;
31 if (trace->skip > 0) { 36 if (trace->skip > 0) {
32 trace->skip--; 37 trace->skip--;
33 return; 38 return;
@@ -36,20 +41,15 @@ static void save_stack_address(void *data, unsigned long addr, int reliable)
36 trace->entries[trace->nr_entries++] = addr; 41 trace->entries[trace->nr_entries++] = addr;
37} 42}
38 43
44static void save_stack_address(void *data, unsigned long addr, int reliable)
45{
46 return __save_stack_address(data, addr, reliable, false);
47}
48
39static void 49static void
40save_stack_address_nosched(void *data, unsigned long addr, int reliable) 50save_stack_address_nosched(void *data, unsigned long addr, int reliable)
41{ 51{
42 struct stack_trace *trace = (struct stack_trace *)data; 52 return __save_stack_address(data, addr, reliable, true);
43 if (!reliable)
44 return;
45 if (in_sched_functions(addr))
46 return;
47 if (trace->skip > 0) {
48 trace->skip--;
49 return;
50 }
51 if (trace->nr_entries < trace->max_entries)
52 trace->entries[trace->nr_entries++] = addr;
53} 53}
54 54
55static const struct stacktrace_ops save_stack_ops = { 55static const struct stacktrace_ops save_stack_ops = {
@@ -96,12 +96,13 @@ EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
96 96
97/* Userspace stacktrace - based on kernel/trace/trace_sysprof.c */ 97/* Userspace stacktrace - based on kernel/trace/trace_sysprof.c */
98 98
99struct stack_frame { 99struct stack_frame_user {
100 const void __user *next_fp; 100 const void __user *next_fp;
101 unsigned long ret_addr; 101 unsigned long ret_addr;
102}; 102};
103 103
104static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) 104static int
105copy_stack_frame(const void __user *fp, struct stack_frame_user *frame)
105{ 106{
106 int ret; 107 int ret;
107 108
@@ -126,7 +127,7 @@ static inline void __save_stack_trace_user(struct stack_trace *trace)
126 trace->entries[trace->nr_entries++] = regs->ip; 127 trace->entries[trace->nr_entries++] = regs->ip;
127 128
128 while (trace->nr_entries < trace->max_entries) { 129 while (trace->nr_entries < trace->max_entries) {
129 struct stack_frame frame; 130 struct stack_frame_user frame;
130 131
131 frame.next_fp = NULL; 132 frame.next_fp = NULL;
132 frame.ret_addr = 0; 133 frame.ret_addr = 0;
diff --git a/arch/xtensa/include/asm/local64.h b/arch/xtensa/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/xtensa/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/fs/exec.c b/fs/exec.c
index e19de6a80339..97d91a03fb13 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -653,6 +653,7 @@ int setup_arg_pages(struct linux_binprm *bprm,
653 else 653 else
654 stack_base = vma->vm_start - stack_expand; 654 stack_base = vma->vm_start - stack_expand;
655#endif 655#endif
656 current->mm->start_stack = bprm->p;
656 ret = expand_stack(vma, stack_base); 657 ret = expand_stack(vma, stack_base);
657 if (ret) 658 if (ret)
658 ret = -EFAULT; 659 ret = -EFAULT;
diff --git a/include/asm-generic/local64.h b/include/asm-generic/local64.h
new file mode 100644
index 000000000000..02ac760c1a8b
--- /dev/null
+++ b/include/asm-generic/local64.h
@@ -0,0 +1,96 @@
1#ifndef _ASM_GENERIC_LOCAL64_H
2#define _ASM_GENERIC_LOCAL64_H
3
4#include <linux/percpu.h>
5#include <asm/types.h>
6
7/*
8 * A signed long type for operations which are atomic for a single CPU.
9 * Usually used in combination with per-cpu variables.
10 *
11 * This is the default implementation, which uses atomic64_t. Which is
12 * rather pointless. The whole point behind local64_t is that some processors
13 * can perform atomic adds and subtracts in a manner which is atomic wrt IRQs
14 * running on this CPU. local64_t allows exploitation of such capabilities.
15 */
16
17/* Implement in terms of atomics. */
18
19#if BITS_PER_LONG == 64
20
21#include <asm/local.h>
22
23typedef struct {
24 local_t a;
25} local64_t;
26
27#define LOCAL64_INIT(i) { LOCAL_INIT(i) }
28
29#define local64_read(l) local_read(&(l)->a)
30#define local64_set(l,i) local_set((&(l)->a),(i))
31#define local64_inc(l) local_inc(&(l)->a)
32#define local64_dec(l) local_dec(&(l)->a)
33#define local64_add(i,l) local_add((i),(&(l)->a))
34#define local64_sub(i,l) local_sub((i),(&(l)->a))
35
36#define local64_sub_and_test(i, l) local_sub_and_test((i), (&(l)->a))
37#define local64_dec_and_test(l) local_dec_and_test(&(l)->a)
38#define local64_inc_and_test(l) local_inc_and_test(&(l)->a)
39#define local64_add_negative(i, l) local_add_negative((i), (&(l)->a))
40#define local64_add_return(i, l) local_add_return((i), (&(l)->a))
41#define local64_sub_return(i, l) local_sub_return((i), (&(l)->a))
42#define local64_inc_return(l) local_inc_return(&(l)->a)
43
44#define local64_cmpxchg(l, o, n) local_cmpxchg((&(l)->a), (o), (n))
45#define local64_xchg(l, n) local_xchg((&(l)->a), (n))
46#define local64_add_unless(l, _a, u) local_add_unless((&(l)->a), (_a), (u))
47#define local64_inc_not_zero(l) local_inc_not_zero(&(l)->a)
48
49/* Non-atomic variants, ie. preemption disabled and won't be touched
50 * in interrupt, etc. Some archs can optimize this case well. */
51#define __local64_inc(l) local64_set((l), local64_read(l) + 1)
52#define __local64_dec(l) local64_set((l), local64_read(l) - 1)
53#define __local64_add(i,l) local64_set((l), local64_read(l) + (i))
54#define __local64_sub(i,l) local64_set((l), local64_read(l) - (i))
55
56#else /* BITS_PER_LONG != 64 */
57
58#include <asm/atomic.h>
59
60/* Don't use typedef: don't want them to be mixed with atomic_t's. */
61typedef struct {
62 atomic64_t a;
63} local64_t;
64
65#define LOCAL64_INIT(i) { ATOMIC_LONG_INIT(i) }
66
67#define local64_read(l) atomic64_read(&(l)->a)
68#define local64_set(l,i) atomic64_set((&(l)->a),(i))
69#define local64_inc(l) atomic64_inc(&(l)->a)
70#define local64_dec(l) atomic64_dec(&(l)->a)
71#define local64_add(i,l) atomic64_add((i),(&(l)->a))
72#define local64_sub(i,l) atomic64_sub((i),(&(l)->a))
73
74#define local64_sub_and_test(i, l) atomic64_sub_and_test((i), (&(l)->a))
75#define local64_dec_and_test(l) atomic64_dec_and_test(&(l)->a)
76#define local64_inc_and_test(l) atomic64_inc_and_test(&(l)->a)
77#define local64_add_negative(i, l) atomic64_add_negative((i), (&(l)->a))
78#define local64_add_return(i, l) atomic64_add_return((i), (&(l)->a))
79#define local64_sub_return(i, l) atomic64_sub_return((i), (&(l)->a))
80#define local64_inc_return(l) atomic64_inc_return(&(l)->a)
81
82#define local64_cmpxchg(l, o, n) atomic64_cmpxchg((&(l)->a), (o), (n))
83#define local64_xchg(l, n) atomic64_xchg((&(l)->a), (n))
84#define local64_add_unless(l, _a, u) atomic64_add_unless((&(l)->a), (_a), (u))
85#define local64_inc_not_zero(l) atomic64_inc_not_zero(&(l)->a)
86
87/* Non-atomic variants, ie. preemption disabled and won't be touched
88 * in interrupt, etc. Some archs can optimize this case well. */
89#define __local64_inc(l) local64_set((l), local64_read(l) + 1)
90#define __local64_dec(l) local64_set((l), local64_read(l) - 1)
91#define __local64_add(i,l) local64_set((l), local64_read(l) + (i))
92#define __local64_sub(i,l) local64_set((l), local64_read(l) - (i))
93
94#endif /* BITS_PER_LONG != 64 */
95
96#endif /* _ASM_GENERIC_LOCAL64_H */
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 48c5299cbf26..415b1a9118e7 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -150,10 +150,6 @@
150 CPU_KEEP(exit.data) \ 150 CPU_KEEP(exit.data) \
151 MEM_KEEP(init.data) \ 151 MEM_KEEP(init.data) \
152 MEM_KEEP(exit.data) \ 152 MEM_KEEP(exit.data) \
153 . = ALIGN(8); \
154 VMLINUX_SYMBOL(__start___markers) = .; \
155 *(__markers) \
156 VMLINUX_SYMBOL(__stop___markers) = .; \
157 . = ALIGN(32); \ 153 . = ALIGN(32); \
158 VMLINUX_SYMBOL(__start___tracepoints) = .; \ 154 VMLINUX_SYMBOL(__start___tracepoints) = .; \
159 *(__tracepoints) \ 155 *(__tracepoints) \
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 3167f2df4126..01df7ca4ead7 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -146,6 +146,9 @@ struct ftrace_event_class {
146 int (*raw_init)(struct ftrace_event_call *); 146 int (*raw_init)(struct ftrace_event_call *);
147}; 147};
148 148
149extern int ftrace_event_reg(struct ftrace_event_call *event,
150 enum trace_reg type);
151
149enum { 152enum {
150 TRACE_EVENT_FL_ENABLED_BIT, 153 TRACE_EVENT_FL_ENABLED_BIT,
151 TRACE_EVENT_FL_FILTERED_BIT, 154 TRACE_EVENT_FL_FILTERED_BIT,
@@ -257,8 +260,7 @@ static inline void
257perf_trace_buf_submit(void *raw_data, int size, int rctx, u64 addr, 260perf_trace_buf_submit(void *raw_data, int size, int rctx, u64 addr,
258 u64 count, struct pt_regs *regs, void *head) 261 u64 count, struct pt_regs *regs, void *head)
259{ 262{
260 perf_tp_event(addr, count, raw_data, size, regs, head); 263 perf_tp_event(addr, count, raw_data, size, regs, head, rctx);
261 perf_swevent_put_recursion_context(rctx);
262} 264}
263#endif 265#endif
264 266
diff --git a/include/linux/kmemtrace.h b/include/linux/kmemtrace.h
deleted file mode 100644
index b616d3930c3b..000000000000
--- a/include/linux/kmemtrace.h
+++ /dev/null
@@ -1,25 +0,0 @@
1/*
2 * Copyright (C) 2008 Eduard - Gabriel Munteanu
3 *
4 * This file is released under GPL version 2.
5 */
6
7#ifndef _LINUX_KMEMTRACE_H
8#define _LINUX_KMEMTRACE_H
9
10#ifdef __KERNEL__
11
12#include <trace/events/kmem.h>
13
14#ifdef CONFIG_KMEMTRACE
15extern void kmemtrace_init(void);
16#else
17static inline void kmemtrace_init(void)
18{
19}
20#endif
21
22#endif /* __KERNEL__ */
23
24#endif /* _LINUX_KMEMTRACE_H */
25
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 5d0266d94985..937495c25073 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -214,8 +214,9 @@ struct perf_event_attr {
214 * See also PERF_RECORD_MISC_EXACT_IP 214 * See also PERF_RECORD_MISC_EXACT_IP
215 */ 215 */
216 precise_ip : 2, /* skid constraint */ 216 precise_ip : 2, /* skid constraint */
217 mmap_data : 1, /* non-exec mmap data */
217 218
218 __reserved_1 : 47; 219 __reserved_1 : 46;
219 220
220 union { 221 union {
221 __u32 wakeup_events; /* wakeup every n events */ 222 __u32 wakeup_events; /* wakeup every n events */
@@ -461,6 +462,7 @@ enum perf_callchain_context {
461 462
462#ifdef CONFIG_PERF_EVENTS 463#ifdef CONFIG_PERF_EVENTS
463# include <asm/perf_event.h> 464# include <asm/perf_event.h>
465# include <asm/local64.h>
464#endif 466#endif
465 467
466struct perf_guest_info_callbacks { 468struct perf_guest_info_callbacks {
@@ -531,14 +533,16 @@ struct hw_perf_event {
531 struct hrtimer hrtimer; 533 struct hrtimer hrtimer;
532 }; 534 };
533#ifdef CONFIG_HAVE_HW_BREAKPOINT 535#ifdef CONFIG_HAVE_HW_BREAKPOINT
534 /* breakpoint */ 536 struct { /* breakpoint */
535 struct arch_hw_breakpoint info; 537 struct arch_hw_breakpoint info;
538 struct list_head bp_list;
539 };
536#endif 540#endif
537 }; 541 };
538 atomic64_t prev_count; 542 local64_t prev_count;
539 u64 sample_period; 543 u64 sample_period;
540 u64 last_period; 544 u64 last_period;
541 atomic64_t period_left; 545 local64_t period_left;
542 u64 interrupts; 546 u64 interrupts;
543 547
544 u64 freq_time_stamp; 548 u64 freq_time_stamp;
@@ -548,7 +552,10 @@ struct hw_perf_event {
548 552
549struct perf_event; 553struct perf_event;
550 554
551#define PERF_EVENT_TXN_STARTED 1 555/*
556 * Common implementation detail of pmu::{start,commit,cancel}_txn
557 */
558#define PERF_EVENT_TXN 0x1
552 559
553/** 560/**
554 * struct pmu - generic performance monitoring unit 561 * struct pmu - generic performance monitoring unit
@@ -562,14 +569,28 @@ struct pmu {
562 void (*unthrottle) (struct perf_event *event); 569 void (*unthrottle) (struct perf_event *event);
563 570
564 /* 571 /*
565 * group events scheduling is treated as a transaction, 572 * Group events scheduling is treated as a transaction, add group
566 * add group events as a whole and perform one schedulability test. 573 * events as a whole and perform one schedulability test. If the test
567 * If test fails, roll back the whole group 574 * fails, roll back the whole group
568 */ 575 */
569 576
577 /*
578 * Start the transaction, after this ->enable() doesn't need
579 * to do schedulability tests.
580 */
570 void (*start_txn) (const struct pmu *pmu); 581 void (*start_txn) (const struct pmu *pmu);
571 void (*cancel_txn) (const struct pmu *pmu); 582 /*
583 * If ->start_txn() disabled the ->enable() schedulability test
584 * then ->commit_txn() is required to perform one. On success
585 * the transaction is closed. On error the transaction is kept
586 * open until ->cancel_txn() is called.
587 */
572 int (*commit_txn) (const struct pmu *pmu); 588 int (*commit_txn) (const struct pmu *pmu);
589 /*
590 * Will cancel the transaction, assumes ->disable() is called for
591 * each successfull ->enable() during the transaction.
592 */
593 void (*cancel_txn) (const struct pmu *pmu);
573}; 594};
574 595
575/** 596/**
@@ -584,7 +605,9 @@ enum perf_event_active_state {
584 605
585struct file; 606struct file;
586 607
587struct perf_mmap_data { 608#define PERF_BUFFER_WRITABLE 0x01
609
610struct perf_buffer {
588 atomic_t refcount; 611 atomic_t refcount;
589 struct rcu_head rcu_head; 612 struct rcu_head rcu_head;
590#ifdef CONFIG_PERF_USE_VMALLOC 613#ifdef CONFIG_PERF_USE_VMALLOC
@@ -650,7 +673,8 @@ struct perf_event {
650 673
651 enum perf_event_active_state state; 674 enum perf_event_active_state state;
652 unsigned int attach_state; 675 unsigned int attach_state;
653 atomic64_t count; 676 local64_t count;
677 atomic64_t child_count;
654 678
655 /* 679 /*
656 * These are the total time in nanoseconds that the event 680 * These are the total time in nanoseconds that the event
@@ -709,7 +733,7 @@ struct perf_event {
709 atomic_t mmap_count; 733 atomic_t mmap_count;
710 int mmap_locked; 734 int mmap_locked;
711 struct user_struct *mmap_user; 735 struct user_struct *mmap_user;
712 struct perf_mmap_data *data; 736 struct perf_buffer *buffer;
713 737
714 /* poll related */ 738 /* poll related */
715 wait_queue_head_t waitq; 739 wait_queue_head_t waitq;
@@ -807,7 +831,7 @@ struct perf_cpu_context {
807 831
808struct perf_output_handle { 832struct perf_output_handle {
809 struct perf_event *event; 833 struct perf_event *event;
810 struct perf_mmap_data *data; 834 struct perf_buffer *buffer;
811 unsigned long wakeup; 835 unsigned long wakeup;
812 unsigned long size; 836 unsigned long size;
813 void *addr; 837 void *addr;
@@ -910,8 +934,10 @@ extern atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
910 934
911extern void __perf_sw_event(u32, u64, int, struct pt_regs *, u64); 935extern void __perf_sw_event(u32, u64, int, struct pt_regs *, u64);
912 936
913extern void 937#ifndef perf_arch_fetch_caller_regs
914perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip); 938static inline void
939perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip) { }
940#endif
915 941
916/* 942/*
917 * Take a snapshot of the regs. Skip ip and frame pointer to 943 * Take a snapshot of the regs. Skip ip and frame pointer to
@@ -921,31 +947,11 @@ perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip);
921 * - bp for callchains 947 * - bp for callchains
922 * - eflags, for future purposes, just in case 948 * - eflags, for future purposes, just in case
923 */ 949 */
924static inline void perf_fetch_caller_regs(struct pt_regs *regs, int skip) 950static inline void perf_fetch_caller_regs(struct pt_regs *regs)
925{ 951{
926 unsigned long ip;
927
928 memset(regs, 0, sizeof(*regs)); 952 memset(regs, 0, sizeof(*regs));
929 953
930 switch (skip) { 954 perf_arch_fetch_caller_regs(regs, CALLER_ADDR0);
931 case 1 :
932 ip = CALLER_ADDR0;
933 break;
934 case 2 :
935 ip = CALLER_ADDR1;
936 break;
937 case 3 :
938 ip = CALLER_ADDR2;
939 break;
940 case 4:
941 ip = CALLER_ADDR3;
942 break;
943 /* No need to support further for now */
944 default:
945 ip = 0;
946 }
947
948 return perf_arch_fetch_caller_regs(regs, ip, skip);
949} 955}
950 956
951static inline void 957static inline void
@@ -955,21 +961,14 @@ perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr)
955 struct pt_regs hot_regs; 961 struct pt_regs hot_regs;
956 962
957 if (!regs) { 963 if (!regs) {
958 perf_fetch_caller_regs(&hot_regs, 1); 964 perf_fetch_caller_regs(&hot_regs);
959 regs = &hot_regs; 965 regs = &hot_regs;
960 } 966 }
961 __perf_sw_event(event_id, nr, nmi, regs, addr); 967 __perf_sw_event(event_id, nr, nmi, regs, addr);
962 } 968 }
963} 969}
964 970
965extern void __perf_event_mmap(struct vm_area_struct *vma); 971extern void perf_event_mmap(struct vm_area_struct *vma);
966
967static inline void perf_event_mmap(struct vm_area_struct *vma)
968{
969 if (vma->vm_flags & VM_EXEC)
970 __perf_event_mmap(vma);
971}
972
973extern struct perf_guest_info_callbacks *perf_guest_cbs; 972extern struct perf_guest_info_callbacks *perf_guest_cbs;
974extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); 973extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
975extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); 974extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
@@ -1001,7 +1000,7 @@ static inline bool perf_paranoid_kernel(void)
1001extern void perf_event_init(void); 1000extern void perf_event_init(void);
1002extern void perf_tp_event(u64 addr, u64 count, void *record, 1001extern void perf_tp_event(u64 addr, u64 count, void *record,
1003 int entry_size, struct pt_regs *regs, 1002 int entry_size, struct pt_regs *regs,
1004 struct hlist_head *head); 1003 struct hlist_head *head, int rctx);
1005extern void perf_bp_event(struct perf_event *event, void *data); 1004extern void perf_bp_event(struct perf_event *event, void *data);
1006 1005
1007#ifndef perf_misc_flags 1006#ifndef perf_misc_flags
diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
index 1812dac8c496..1acfa73ce2ac 100644
--- a/include/linux/slab_def.h
+++ b/include/linux/slab_def.h
@@ -14,7 +14,8 @@
14#include <asm/page.h> /* kmalloc_sizes.h needs PAGE_SIZE */ 14#include <asm/page.h> /* kmalloc_sizes.h needs PAGE_SIZE */
15#include <asm/cache.h> /* kmalloc_sizes.h needs L1_CACHE_BYTES */ 15#include <asm/cache.h> /* kmalloc_sizes.h needs L1_CACHE_BYTES */
16#include <linux/compiler.h> 16#include <linux/compiler.h>
17#include <linux/kmemtrace.h> 17
18#include <trace/events/kmem.h>
18 19
19#ifndef ARCH_KMALLOC_MINALIGN 20#ifndef ARCH_KMALLOC_MINALIGN
20/* 21/*
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 4ba59cfc1f75..6447a723ecb1 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -10,9 +10,10 @@
10#include <linux/gfp.h> 10#include <linux/gfp.h>
11#include <linux/workqueue.h> 11#include <linux/workqueue.h>
12#include <linux/kobject.h> 12#include <linux/kobject.h>
13#include <linux/kmemtrace.h>
14#include <linux/kmemleak.h> 13#include <linux/kmemleak.h>
15 14
15#include <trace/events/kmem.h>
16
16enum stat_item { 17enum stat_item {
17 ALLOC_FASTPATH, /* Allocation from cpu slab */ 18 ALLOC_FASTPATH, /* Allocation from cpu slab */
18 ALLOC_SLOWPATH, /* Allocation by getting a new cpu slab */ 19 ALLOC_SLOWPATH, /* Allocation by getting a new cpu slab */
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 13ebb5413a79..a6bfd1367d2a 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -167,7 +167,6 @@ extern struct trace_event_functions exit_syscall_print_funcs;
167 .enter_event = &event_enter_##sname, \ 167 .enter_event = &event_enter_##sname, \
168 .exit_event = &event_exit_##sname, \ 168 .exit_event = &event_exit_##sname, \
169 .enter_fields = LIST_HEAD_INIT(__syscall_meta_##sname.enter_fields), \ 169 .enter_fields = LIST_HEAD_INIT(__syscall_meta_##sname.enter_fields), \
170 .exit_fields = LIST_HEAD_INIT(__syscall_meta_##sname.exit_fields), \
171 }; 170 };
172 171
173#define SYSCALL_DEFINE0(sname) \ 172#define SYSCALL_DEFINE0(sname) \
@@ -182,7 +181,6 @@ extern struct trace_event_functions exit_syscall_print_funcs;
182 .enter_event = &event_enter__##sname, \ 181 .enter_event = &event_enter__##sname, \
183 .exit_event = &event_exit__##sname, \ 182 .exit_event = &event_exit__##sname, \
184 .enter_fields = LIST_HEAD_INIT(__syscall_meta__##sname.enter_fields), \ 183 .enter_fields = LIST_HEAD_INIT(__syscall_meta__##sname.enter_fields), \
185 .exit_fields = LIST_HEAD_INIT(__syscall_meta__##sname.exit_fields), \
186 }; \ 184 }; \
187 asmlinkage long sys_##sname(void) 185 asmlinkage long sys_##sname(void)
188#else 186#else
diff --git a/include/trace/boot.h b/include/trace/boot.h
deleted file mode 100644
index 088ea089e31d..000000000000
--- a/include/trace/boot.h
+++ /dev/null
@@ -1,60 +0,0 @@
1#ifndef _LINUX_TRACE_BOOT_H
2#define _LINUX_TRACE_BOOT_H
3
4#include <linux/module.h>
5#include <linux/kallsyms.h>
6#include <linux/init.h>
7
8/*
9 * Structure which defines the trace of an initcall
10 * while it is called.
11 * You don't have to fill the func field since it is
12 * only used internally by the tracer.
13 */
14struct boot_trace_call {
15 pid_t caller;
16 char func[KSYM_SYMBOL_LEN];
17};
18
19/*
20 * Structure which defines the trace of an initcall
21 * while it returns.
22 */
23struct boot_trace_ret {
24 char func[KSYM_SYMBOL_LEN];
25 int result;
26 unsigned long long duration; /* nsecs */
27};
28
29#ifdef CONFIG_BOOT_TRACER
30/* Append the traces on the ring-buffer */
31extern void trace_boot_call(struct boot_trace_call *bt, initcall_t fn);
32extern void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn);
33
34/* Tells the tracer that smp_pre_initcall is finished.
35 * So we can start the tracing
36 */
37extern void start_boot_trace(void);
38
39/* Resume the tracing of other necessary events
40 * such as sched switches
41 */
42extern void enable_boot_trace(void);
43
44/* Suspend this tracing. Actually, only sched_switches tracing have
45 * to be suspended. Initcalls doesn't need it.)
46 */
47extern void disable_boot_trace(void);
48#else
49static inline
50void trace_boot_call(struct boot_trace_call *bt, initcall_t fn) { }
51
52static inline
53void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn) { }
54
55static inline void start_boot_trace(void) { }
56static inline void enable_boot_trace(void) { }
57static inline void disable_boot_trace(void) { }
58#endif /* CONFIG_BOOT_TRACER */
59
60#endif /* __LINUX_TRACE_BOOT_H */
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index b9e1dd6c6208..9208c92aeab5 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -50,31 +50,6 @@ TRACE_EVENT(sched_kthread_stop_ret,
50); 50);
51 51
52/* 52/*
53 * Tracepoint for waiting on task to unschedule:
54 */
55TRACE_EVENT(sched_wait_task,
56
57 TP_PROTO(struct task_struct *p),
58
59 TP_ARGS(p),
60
61 TP_STRUCT__entry(
62 __array( char, comm, TASK_COMM_LEN )
63 __field( pid_t, pid )
64 __field( int, prio )
65 ),
66
67 TP_fast_assign(
68 memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
69 __entry->pid = p->pid;
70 __entry->prio = p->prio;
71 ),
72
73 TP_printk("comm=%s pid=%d prio=%d",
74 __entry->comm, __entry->pid, __entry->prio)
75);
76
77/*
78 * Tracepoint for waking up a task: 53 * Tracepoint for waking up a task:
79 */ 54 */
80DECLARE_EVENT_CLASS(sched_wakeup_template, 55DECLARE_EVENT_CLASS(sched_wakeup_template,
@@ -240,6 +215,13 @@ DEFINE_EVENT(sched_process_template, sched_process_exit,
240 TP_ARGS(p)); 215 TP_ARGS(p));
241 216
242/* 217/*
218 * Tracepoint for waiting on task to unschedule:
219 */
220DEFINE_EVENT(sched_process_template, sched_wait_task,
221 TP_PROTO(struct task_struct *p),
222 TP_ARGS(p));
223
224/*
243 * Tracepoint for a waiting task: 225 * Tracepoint for a waiting task:
244 */ 226 */
245TRACE_EVENT(sched_process_wait, 227TRACE_EVENT(sched_process_wait,
diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h
index 9496b965d62a..c624126a9c8a 100644
--- a/include/trace/events/timer.h
+++ b/include/trace/events/timer.h
@@ -8,11 +8,7 @@
8#include <linux/hrtimer.h> 8#include <linux/hrtimer.h>
9#include <linux/timer.h> 9#include <linux/timer.h>
10 10
11/** 11DECLARE_EVENT_CLASS(timer_class,
12 * timer_init - called when the timer is initialized
13 * @timer: pointer to struct timer_list
14 */
15TRACE_EVENT(timer_init,
16 12
17 TP_PROTO(struct timer_list *timer), 13 TP_PROTO(struct timer_list *timer),
18 14
@@ -30,6 +26,17 @@ TRACE_EVENT(timer_init,
30); 26);
31 27
32/** 28/**
29 * timer_init - called when the timer is initialized
30 * @timer: pointer to struct timer_list
31 */
32DEFINE_EVENT(timer_class, timer_init,
33
34 TP_PROTO(struct timer_list *timer),
35
36 TP_ARGS(timer)
37);
38
39/**
33 * timer_start - called when the timer is started 40 * timer_start - called when the timer is started
34 * @timer: pointer to struct timer_list 41 * @timer: pointer to struct timer_list
35 * @expires: the timers expiry time 42 * @expires: the timers expiry time
@@ -94,42 +101,22 @@ TRACE_EVENT(timer_expire_entry,
94 * NOTE: Do NOT derefernce timer in TP_fast_assign. The pointer might 101 * NOTE: Do NOT derefernce timer in TP_fast_assign. The pointer might
95 * be invalid. We solely track the pointer. 102 * be invalid. We solely track the pointer.
96 */ 103 */
97TRACE_EVENT(timer_expire_exit, 104DEFINE_EVENT(timer_class, timer_expire_exit,
98 105
99 TP_PROTO(struct timer_list *timer), 106 TP_PROTO(struct timer_list *timer),
100 107
101 TP_ARGS(timer), 108 TP_ARGS(timer)
102
103 TP_STRUCT__entry(
104 __field(void *, timer )
105 ),
106
107 TP_fast_assign(
108 __entry->timer = timer;
109 ),
110
111 TP_printk("timer=%p", __entry->timer)
112); 109);
113 110
114/** 111/**
115 * timer_cancel - called when the timer is canceled 112 * timer_cancel - called when the timer is canceled
116 * @timer: pointer to struct timer_list 113 * @timer: pointer to struct timer_list
117 */ 114 */
118TRACE_EVENT(timer_cancel, 115DEFINE_EVENT(timer_class, timer_cancel,
119 116
120 TP_PROTO(struct timer_list *timer), 117 TP_PROTO(struct timer_list *timer),
121 118
122 TP_ARGS(timer), 119 TP_ARGS(timer)
123
124 TP_STRUCT__entry(
125 __field( void *, timer )
126 ),
127
128 TP_fast_assign(
129 __entry->timer = timer;
130 ),
131
132 TP_printk("timer=%p", __entry->timer)
133); 120);
134 121
135/** 122/**
@@ -224,14 +211,7 @@ TRACE_EVENT(hrtimer_expire_entry,
224 (unsigned long long)ktime_to_ns((ktime_t) { .tv64 = __entry->now })) 211 (unsigned long long)ktime_to_ns((ktime_t) { .tv64 = __entry->now }))
225 ); 212 );
226 213
227/** 214DECLARE_EVENT_CLASS(hrtimer_class,
228 * hrtimer_expire_exit - called immediately after the hrtimer callback returns
229 * @timer: pointer to struct hrtimer
230 *
231 * When used in combination with the hrtimer_expire_entry tracepoint we can
232 * determine the runtime of the callback function.
233 */
234TRACE_EVENT(hrtimer_expire_exit,
235 215
236 TP_PROTO(struct hrtimer *hrtimer), 216 TP_PROTO(struct hrtimer *hrtimer),
237 217
@@ -249,24 +229,28 @@ TRACE_EVENT(hrtimer_expire_exit,
249); 229);
250 230
251/** 231/**
252 * hrtimer_cancel - called when the hrtimer is canceled 232 * hrtimer_expire_exit - called immediately after the hrtimer callback returns
253 * @hrtimer: pointer to struct hrtimer 233 * @timer: pointer to struct hrtimer
234 *
235 * When used in combination with the hrtimer_expire_entry tracepoint we can
236 * determine the runtime of the callback function.
254 */ 237 */
255TRACE_EVENT(hrtimer_cancel, 238DEFINE_EVENT(hrtimer_class, hrtimer_expire_exit,
256 239
257 TP_PROTO(struct hrtimer *hrtimer), 240 TP_PROTO(struct hrtimer *hrtimer),
258 241
259 TP_ARGS(hrtimer), 242 TP_ARGS(hrtimer)
243);
260 244
261 TP_STRUCT__entry( 245/**
262 __field( void *, hrtimer ) 246 * hrtimer_cancel - called when the hrtimer is canceled
263 ), 247 * @hrtimer: pointer to struct hrtimer
248 */
249DEFINE_EVENT(hrtimer_class, hrtimer_cancel,
264 250
265 TP_fast_assign( 251 TP_PROTO(struct hrtimer *hrtimer),
266 __entry->hrtimer = hrtimer;
267 ),
268 252
269 TP_printk("hrtimer=%p", __entry->hrtimer) 253 TP_ARGS(hrtimer)
270); 254);
271 255
272/** 256/**
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 5a64905d7278..55c1fd1bbc3d 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -439,6 +439,7 @@ static inline notrace int ftrace_get_offsets_##call( \
439 * .fields = LIST_HEAD_INIT(event_class_##call.fields), 439 * .fields = LIST_HEAD_INIT(event_class_##call.fields),
440 * .raw_init = trace_event_raw_init, 440 * .raw_init = trace_event_raw_init,
441 * .probe = ftrace_raw_event_##call, 441 * .probe = ftrace_raw_event_##call,
442 * .reg = ftrace_event_reg,
442 * }; 443 * };
443 * 444 *
444 * static struct ftrace_event_call __used 445 * static struct ftrace_event_call __used
@@ -567,6 +568,7 @@ static struct ftrace_event_class __used event_class_##call = { \
567 .fields = LIST_HEAD_INIT(event_class_##call.fields),\ 568 .fields = LIST_HEAD_INIT(event_class_##call.fields),\
568 .raw_init = trace_event_raw_init, \ 569 .raw_init = trace_event_raw_init, \
569 .probe = ftrace_raw_event_##call, \ 570 .probe = ftrace_raw_event_##call, \
571 .reg = ftrace_event_reg, \
570 _TRACE_PERF_INIT(call) \ 572 _TRACE_PERF_INIT(call) \
571}; 573};
572 574
@@ -705,7 +707,7 @@ perf_trace_##call(void *__data, proto) \
705 int __data_size; \ 707 int __data_size; \
706 int rctx; \ 708 int rctx; \
707 \ 709 \
708 perf_fetch_caller_regs(&__regs, 1); \ 710 perf_fetch_caller_regs(&__regs); \
709 \ 711 \
710 __data_size = ftrace_get_offsets_##call(&__data_offsets, args); \ 712 __data_size = ftrace_get_offsets_##call(&__data_offsets, args); \
711 __entry_size = ALIGN(__data_size + sizeof(*entry) + sizeof(u32),\ 713 __entry_size = ALIGN(__data_size + sizeof(*entry) + sizeof(u32),\
diff --git a/include/trace/syscall.h b/include/trace/syscall.h
index 257e08960d7b..31966a4fb8cc 100644
--- a/include/trace/syscall.h
+++ b/include/trace/syscall.h
@@ -26,7 +26,6 @@ struct syscall_metadata {
26 const char **types; 26 const char **types;
27 const char **args; 27 const char **args;
28 struct list_head enter_fields; 28 struct list_head enter_fields;
29 struct list_head exit_fields;
30 29
31 struct ftrace_event_call *enter_event; 30 struct ftrace_event_call *enter_event;
32 struct ftrace_event_call *exit_event; 31 struct ftrace_event_call *exit_event;
diff --git a/init/main.c b/init/main.c
index a42fdf4aeba9..4ab5124a2952 100644
--- a/init/main.c
+++ b/init/main.c
@@ -66,11 +66,9 @@
66#include <linux/ftrace.h> 66#include <linux/ftrace.h>
67#include <linux/async.h> 67#include <linux/async.h>
68#include <linux/kmemcheck.h> 68#include <linux/kmemcheck.h>
69#include <linux/kmemtrace.h>
70#include <linux/sfi.h> 69#include <linux/sfi.h>
71#include <linux/shmem_fs.h> 70#include <linux/shmem_fs.h>
72#include <linux/slab.h> 71#include <linux/slab.h>
73#include <trace/boot.h>
74 72
75#include <asm/io.h> 73#include <asm/io.h>
76#include <asm/bugs.h> 74#include <asm/bugs.h>
@@ -663,7 +661,6 @@ asmlinkage void __init start_kernel(void)
663#endif 661#endif
664 page_cgroup_init(); 662 page_cgroup_init();
665 enable_debug_pagealloc(); 663 enable_debug_pagealloc();
666 kmemtrace_init();
667 kmemleak_init(); 664 kmemleak_init();
668 debug_objects_mem_init(); 665 debug_objects_mem_init();
669 idr_init_cache(); 666 idr_init_cache();
@@ -725,38 +722,33 @@ int initcall_debug;
725core_param(initcall_debug, initcall_debug, bool, 0644); 722core_param(initcall_debug, initcall_debug, bool, 0644);
726 723
727static char msgbuf[64]; 724static char msgbuf[64];
728static struct boot_trace_call call;
729static struct boot_trace_ret ret;
730 725
731int do_one_initcall(initcall_t fn) 726int do_one_initcall(initcall_t fn)
732{ 727{
733 int count = preempt_count(); 728 int count = preempt_count();
734 ktime_t calltime, delta, rettime; 729 ktime_t calltime, delta, rettime;
730 unsigned long long duration;
731 int ret;
735 732
736 if (initcall_debug) { 733 if (initcall_debug) {
737 call.caller = task_pid_nr(current); 734 printk("calling %pF @ %i\n", fn, task_pid_nr(current));
738 printk("calling %pF @ %i\n", fn, call.caller);
739 calltime = ktime_get(); 735 calltime = ktime_get();
740 trace_boot_call(&call, fn);
741 enable_boot_trace();
742 } 736 }
743 737
744 ret.result = fn(); 738 ret = fn();
745 739
746 if (initcall_debug) { 740 if (initcall_debug) {
747 disable_boot_trace();
748 rettime = ktime_get(); 741 rettime = ktime_get();
749 delta = ktime_sub(rettime, calltime); 742 delta = ktime_sub(rettime, calltime);
750 ret.duration = (unsigned long long) ktime_to_ns(delta) >> 10; 743 duration = (unsigned long long) ktime_to_ns(delta) >> 10;
751 trace_boot_ret(&ret, fn); 744 printk("initcall %pF returned %d after %lld usecs\n", fn,
752 printk("initcall %pF returned %d after %Ld usecs\n", fn, 745 ret, duration);
753 ret.result, ret.duration);
754 } 746 }
755 747
756 msgbuf[0] = 0; 748 msgbuf[0] = 0;
757 749
758 if (ret.result && ret.result != -ENODEV && initcall_debug) 750 if (ret && ret != -ENODEV && initcall_debug)
759 sprintf(msgbuf, "error code %d ", ret.result); 751 sprintf(msgbuf, "error code %d ", ret);
760 752
761 if (preempt_count() != count) { 753 if (preempt_count() != count) {
762 strlcat(msgbuf, "preemption imbalance ", sizeof(msgbuf)); 754 strlcat(msgbuf, "preemption imbalance ", sizeof(msgbuf));
@@ -770,7 +762,7 @@ int do_one_initcall(initcall_t fn)
770 printk("initcall %pF returned with %s\n", fn, msgbuf); 762 printk("initcall %pF returned with %s\n", fn, msgbuf);
771 } 763 }
772 764
773 return ret.result; 765 return ret;
774} 766}
775 767
776 768
@@ -894,7 +886,6 @@ static int __init kernel_init(void * unused)
894 smp_prepare_cpus(setup_max_cpus); 886 smp_prepare_cpus(setup_max_cpus);
895 887
896 do_pre_smp_initcalls(); 888 do_pre_smp_initcalls();
897 start_boot_trace();
898 889
899 smp_init(); 890 smp_init();
900 sched_init_smp(); 891 sched_init_smp();
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index 7a56b22e0602..e34d94d50924 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -41,6 +41,7 @@
41#include <linux/sched.h> 41#include <linux/sched.h>
42#include <linux/init.h> 42#include <linux/init.h>
43#include <linux/slab.h> 43#include <linux/slab.h>
44#include <linux/list.h>
44#include <linux/cpu.h> 45#include <linux/cpu.h>
45#include <linux/smp.h> 46#include <linux/smp.h>
46 47
@@ -62,6 +63,9 @@ static DEFINE_PER_CPU(unsigned int, nr_bp_flexible[TYPE_MAX]);
62 63
63static int nr_slots[TYPE_MAX]; 64static int nr_slots[TYPE_MAX];
64 65
66/* Keep track of the breakpoints attached to tasks */
67static LIST_HEAD(bp_task_head);
68
65static int constraints_initialized; 69static int constraints_initialized;
66 70
67/* Gather the number of total pinned and un-pinned bp in a cpuset */ 71/* Gather the number of total pinned and un-pinned bp in a cpuset */
@@ -103,33 +107,21 @@ static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type)
103 return 0; 107 return 0;
104} 108}
105 109
106static int task_bp_pinned(struct task_struct *tsk, enum bp_type_idx type) 110/*
111 * Count the number of breakpoints of the same type and same task.
112 * The given event must be not on the list.
113 */
114static int task_bp_pinned(struct perf_event *bp, enum bp_type_idx type)
107{ 115{
108 struct perf_event_context *ctx = tsk->perf_event_ctxp; 116 struct perf_event_context *ctx = bp->ctx;
109 struct list_head *list; 117 struct perf_event *iter;
110 struct perf_event *bp;
111 unsigned long flags;
112 int count = 0; 118 int count = 0;
113 119
114 if (WARN_ONCE(!ctx, "No perf context for this task")) 120 list_for_each_entry(iter, &bp_task_head, hw.bp_list) {
115 return 0; 121 if (iter->ctx == ctx && find_slot_idx(iter) == type)
116 122 count += hw_breakpoint_weight(iter);
117 list = &ctx->event_list;
118
119 raw_spin_lock_irqsave(&ctx->lock, flags);
120
121 /*
122 * The current breakpoint counter is not included in the list
123 * at the open() callback time
124 */
125 list_for_each_entry(bp, list, event_entry) {
126 if (bp->attr.type == PERF_TYPE_BREAKPOINT)
127 if (find_slot_idx(bp) == type)
128 count += hw_breakpoint_weight(bp);
129 } 123 }
130 124
131 raw_spin_unlock_irqrestore(&ctx->lock, flags);
132
133 return count; 125 return count;
134} 126}
135 127
@@ -149,7 +141,7 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp,
149 if (!tsk) 141 if (!tsk)
150 slots->pinned += max_task_bp_pinned(cpu, type); 142 slots->pinned += max_task_bp_pinned(cpu, type);
151 else 143 else
152 slots->pinned += task_bp_pinned(tsk, type); 144 slots->pinned += task_bp_pinned(bp, type);
153 slots->flexible = per_cpu(nr_bp_flexible[type], cpu); 145 slots->flexible = per_cpu(nr_bp_flexible[type], cpu);
154 146
155 return; 147 return;
@@ -162,7 +154,7 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp,
162 if (!tsk) 154 if (!tsk)
163 nr += max_task_bp_pinned(cpu, type); 155 nr += max_task_bp_pinned(cpu, type);
164 else 156 else
165 nr += task_bp_pinned(tsk, type); 157 nr += task_bp_pinned(bp, type);
166 158
167 if (nr > slots->pinned) 159 if (nr > slots->pinned)
168 slots->pinned = nr; 160 slots->pinned = nr;
@@ -188,7 +180,7 @@ fetch_this_slot(struct bp_busy_slots *slots, int weight)
188/* 180/*
189 * Add a pinned breakpoint for the given task in our constraint table 181 * Add a pinned breakpoint for the given task in our constraint table
190 */ 182 */
191static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable, 183static void toggle_bp_task_slot(struct perf_event *bp, int cpu, bool enable,
192 enum bp_type_idx type, int weight) 184 enum bp_type_idx type, int weight)
193{ 185{
194 unsigned int *tsk_pinned; 186 unsigned int *tsk_pinned;
@@ -196,10 +188,11 @@ static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable,
196 int old_idx = 0; 188 int old_idx = 0;
197 int idx = 0; 189 int idx = 0;
198 190
199 old_count = task_bp_pinned(tsk, type); 191 old_count = task_bp_pinned(bp, type);
200 old_idx = old_count - 1; 192 old_idx = old_count - 1;
201 idx = old_idx + weight; 193 idx = old_idx + weight;
202 194
195 /* tsk_pinned[n] is the number of tasks having n breakpoints */
203 tsk_pinned = per_cpu(nr_task_bp_pinned[type], cpu); 196 tsk_pinned = per_cpu(nr_task_bp_pinned[type], cpu);
204 if (enable) { 197 if (enable) {
205 tsk_pinned[idx]++; 198 tsk_pinned[idx]++;
@@ -222,23 +215,30 @@ toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type,
222 int cpu = bp->cpu; 215 int cpu = bp->cpu;
223 struct task_struct *tsk = bp->ctx->task; 216 struct task_struct *tsk = bp->ctx->task;
224 217
218 /* Pinned counter cpu profiling */
219 if (!tsk) {
220
221 if (enable)
222 per_cpu(nr_cpu_bp_pinned[type], bp->cpu) += weight;
223 else
224 per_cpu(nr_cpu_bp_pinned[type], bp->cpu) -= weight;
225 return;
226 }
227
225 /* Pinned counter task profiling */ 228 /* Pinned counter task profiling */
226 if (tsk) {
227 if (cpu >= 0) {
228 toggle_bp_task_slot(tsk, cpu, enable, type, weight);
229 return;
230 }
231 229
230 if (!enable)
231 list_del(&bp->hw.bp_list);
232
233 if (cpu >= 0) {
234 toggle_bp_task_slot(bp, cpu, enable, type, weight);
235 } else {
232 for_each_online_cpu(cpu) 236 for_each_online_cpu(cpu)
233 toggle_bp_task_slot(tsk, cpu, enable, type, weight); 237 toggle_bp_task_slot(bp, cpu, enable, type, weight);
234 return;
235 } 238 }
236 239
237 /* Pinned counter cpu profiling */
238 if (enable) 240 if (enable)
239 per_cpu(nr_cpu_bp_pinned[type], bp->cpu) += weight; 241 list_add_tail(&bp->hw.bp_list, &bp_task_head);
240 else
241 per_cpu(nr_cpu_bp_pinned[type], bp->cpu) -= weight;
242} 242}
243 243
244/* 244/*
@@ -301,6 +301,10 @@ static int __reserve_bp_slot(struct perf_event *bp)
301 weight = hw_breakpoint_weight(bp); 301 weight = hw_breakpoint_weight(bp);
302 302
303 fetch_bp_busy_slots(&slots, bp, type); 303 fetch_bp_busy_slots(&slots, bp, type);
304 /*
305 * Simulate the addition of this breakpoint to the constraints
306 * and see the result.
307 */
304 fetch_this_slot(&slots, weight); 308 fetch_this_slot(&slots, weight);
305 309
306 /* Flexible counters need to keep at least one slot */ 310 /* Flexible counters need to keep at least one slot */
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index ff86c558af4c..c772a3d4000d 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -675,7 +675,6 @@ group_sched_in(struct perf_event *group_event,
675 struct perf_event *event, *partial_group = NULL; 675 struct perf_event *event, *partial_group = NULL;
676 const struct pmu *pmu = group_event->pmu; 676 const struct pmu *pmu = group_event->pmu;
677 bool txn = false; 677 bool txn = false;
678 int ret;
679 678
680 if (group_event->state == PERF_EVENT_STATE_OFF) 679 if (group_event->state == PERF_EVENT_STATE_OFF)
681 return 0; 680 return 0;
@@ -703,14 +702,8 @@ group_sched_in(struct perf_event *group_event,
703 } 702 }
704 } 703 }
705 704
706 if (!txn) 705 if (!txn || !pmu->commit_txn(pmu))
707 return 0;
708
709 ret = pmu->commit_txn(pmu);
710 if (!ret) {
711 pmu->cancel_txn(pmu);
712 return 0; 706 return 0;
713 }
714 707
715group_error: 708group_error:
716 /* 709 /*
@@ -1155,9 +1148,9 @@ static void __perf_event_sync_stat(struct perf_event *event,
1155 * In order to keep per-task stats reliable we need to flip the event 1148 * In order to keep per-task stats reliable we need to flip the event
1156 * values when we flip the contexts. 1149 * values when we flip the contexts.
1157 */ 1150 */
1158 value = atomic64_read(&next_event->count); 1151 value = local64_read(&next_event->count);
1159 value = atomic64_xchg(&event->count, value); 1152 value = local64_xchg(&event->count, value);
1160 atomic64_set(&next_event->count, value); 1153 local64_set(&next_event->count, value);
1161 1154
1162 swap(event->total_time_enabled, next_event->total_time_enabled); 1155 swap(event->total_time_enabled, next_event->total_time_enabled);
1163 swap(event->total_time_running, next_event->total_time_running); 1156 swap(event->total_time_running, next_event->total_time_running);
@@ -1547,10 +1540,10 @@ static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count)
1547 1540
1548 hwc->sample_period = sample_period; 1541 hwc->sample_period = sample_period;
1549 1542
1550 if (atomic64_read(&hwc->period_left) > 8*sample_period) { 1543 if (local64_read(&hwc->period_left) > 8*sample_period) {
1551 perf_disable(); 1544 perf_disable();
1552 perf_event_stop(event); 1545 perf_event_stop(event);
1553 atomic64_set(&hwc->period_left, 0); 1546 local64_set(&hwc->period_left, 0);
1554 perf_event_start(event); 1547 perf_event_start(event);
1555 perf_enable(); 1548 perf_enable();
1556 } 1549 }
@@ -1591,7 +1584,7 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
1591 1584
1592 perf_disable(); 1585 perf_disable();
1593 event->pmu->read(event); 1586 event->pmu->read(event);
1594 now = atomic64_read(&event->count); 1587 now = local64_read(&event->count);
1595 delta = now - hwc->freq_count_stamp; 1588 delta = now - hwc->freq_count_stamp;
1596 hwc->freq_count_stamp = now; 1589 hwc->freq_count_stamp = now;
1597 1590
@@ -1743,6 +1736,11 @@ static void __perf_event_read(void *info)
1743 event->pmu->read(event); 1736 event->pmu->read(event);
1744} 1737}
1745 1738
1739static inline u64 perf_event_count(struct perf_event *event)
1740{
1741 return local64_read(&event->count) + atomic64_read(&event->child_count);
1742}
1743
1746static u64 perf_event_read(struct perf_event *event) 1744static u64 perf_event_read(struct perf_event *event)
1747{ 1745{
1748 /* 1746 /*
@@ -1762,7 +1760,7 @@ static u64 perf_event_read(struct perf_event *event)
1762 raw_spin_unlock_irqrestore(&ctx->lock, flags); 1760 raw_spin_unlock_irqrestore(&ctx->lock, flags);
1763 } 1761 }
1764 1762
1765 return atomic64_read(&event->count); 1763 return perf_event_count(event);
1766} 1764}
1767 1765
1768/* 1766/*
@@ -1883,7 +1881,7 @@ static void free_event_rcu(struct rcu_head *head)
1883} 1881}
1884 1882
1885static void perf_pending_sync(struct perf_event *event); 1883static void perf_pending_sync(struct perf_event *event);
1886static void perf_mmap_data_put(struct perf_mmap_data *data); 1884static void perf_buffer_put(struct perf_buffer *buffer);
1887 1885
1888static void free_event(struct perf_event *event) 1886static void free_event(struct perf_event *event)
1889{ 1887{
@@ -1891,7 +1889,7 @@ static void free_event(struct perf_event *event)
1891 1889
1892 if (!event->parent) { 1890 if (!event->parent) {
1893 atomic_dec(&nr_events); 1891 atomic_dec(&nr_events);
1894 if (event->attr.mmap) 1892 if (event->attr.mmap || event->attr.mmap_data)
1895 atomic_dec(&nr_mmap_events); 1893 atomic_dec(&nr_mmap_events);
1896 if (event->attr.comm) 1894 if (event->attr.comm)
1897 atomic_dec(&nr_comm_events); 1895 atomic_dec(&nr_comm_events);
@@ -1899,9 +1897,9 @@ static void free_event(struct perf_event *event)
1899 atomic_dec(&nr_task_events); 1897 atomic_dec(&nr_task_events);
1900 } 1898 }
1901 1899
1902 if (event->data) { 1900 if (event->buffer) {
1903 perf_mmap_data_put(event->data); 1901 perf_buffer_put(event->buffer);
1904 event->data = NULL; 1902 event->buffer = NULL;
1905 } 1903 }
1906 1904
1907 if (event->destroy) 1905 if (event->destroy)
@@ -2126,13 +2124,13 @@ perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
2126static unsigned int perf_poll(struct file *file, poll_table *wait) 2124static unsigned int perf_poll(struct file *file, poll_table *wait)
2127{ 2125{
2128 struct perf_event *event = file->private_data; 2126 struct perf_event *event = file->private_data;
2129 struct perf_mmap_data *data; 2127 struct perf_buffer *buffer;
2130 unsigned int events = POLL_HUP; 2128 unsigned int events = POLL_HUP;
2131 2129
2132 rcu_read_lock(); 2130 rcu_read_lock();
2133 data = rcu_dereference(event->data); 2131 buffer = rcu_dereference(event->buffer);
2134 if (data) 2132 if (buffer)
2135 events = atomic_xchg(&data->poll, 0); 2133 events = atomic_xchg(&buffer->poll, 0);
2136 rcu_read_unlock(); 2134 rcu_read_unlock();
2137 2135
2138 poll_wait(file, &event->waitq, wait); 2136 poll_wait(file, &event->waitq, wait);
@@ -2143,7 +2141,7 @@ static unsigned int perf_poll(struct file *file, poll_table *wait)
2143static void perf_event_reset(struct perf_event *event) 2141static void perf_event_reset(struct perf_event *event)
2144{ 2142{
2145 (void)perf_event_read(event); 2143 (void)perf_event_read(event);
2146 atomic64_set(&event->count, 0); 2144 local64_set(&event->count, 0);
2147 perf_event_update_userpage(event); 2145 perf_event_update_userpage(event);
2148} 2146}
2149 2147
@@ -2342,14 +2340,14 @@ static int perf_event_index(struct perf_event *event)
2342void perf_event_update_userpage(struct perf_event *event) 2340void perf_event_update_userpage(struct perf_event *event)
2343{ 2341{
2344 struct perf_event_mmap_page *userpg; 2342 struct perf_event_mmap_page *userpg;
2345 struct perf_mmap_data *data; 2343 struct perf_buffer *buffer;
2346 2344
2347 rcu_read_lock(); 2345 rcu_read_lock();
2348 data = rcu_dereference(event->data); 2346 buffer = rcu_dereference(event->buffer);
2349 if (!data) 2347 if (!buffer)
2350 goto unlock; 2348 goto unlock;
2351 2349
2352 userpg = data->user_page; 2350 userpg = buffer->user_page;
2353 2351
2354 /* 2352 /*
2355 * Disable preemption so as to not let the corresponding user-space 2353 * Disable preemption so as to not let the corresponding user-space
@@ -2359,9 +2357,9 @@ void perf_event_update_userpage(struct perf_event *event)
2359 ++userpg->lock; 2357 ++userpg->lock;
2360 barrier(); 2358 barrier();
2361 userpg->index = perf_event_index(event); 2359 userpg->index = perf_event_index(event);
2362 userpg->offset = atomic64_read(&event->count); 2360 userpg->offset = perf_event_count(event);
2363 if (event->state == PERF_EVENT_STATE_ACTIVE) 2361 if (event->state == PERF_EVENT_STATE_ACTIVE)
2364 userpg->offset -= atomic64_read(&event->hw.prev_count); 2362 userpg->offset -= local64_read(&event->hw.prev_count);
2365 2363
2366 userpg->time_enabled = event->total_time_enabled + 2364 userpg->time_enabled = event->total_time_enabled +
2367 atomic64_read(&event->child_total_time_enabled); 2365 atomic64_read(&event->child_total_time_enabled);
@@ -2376,6 +2374,25 @@ unlock:
2376 rcu_read_unlock(); 2374 rcu_read_unlock();
2377} 2375}
2378 2376
2377static unsigned long perf_data_size(struct perf_buffer *buffer);
2378
2379static void
2380perf_buffer_init(struct perf_buffer *buffer, long watermark, int flags)
2381{
2382 long max_size = perf_data_size(buffer);
2383
2384 if (watermark)
2385 buffer->watermark = min(max_size, watermark);
2386
2387 if (!buffer->watermark)
2388 buffer->watermark = max_size / 2;
2389
2390 if (flags & PERF_BUFFER_WRITABLE)
2391 buffer->writable = 1;
2392
2393 atomic_set(&buffer->refcount, 1);
2394}
2395
2379#ifndef CONFIG_PERF_USE_VMALLOC 2396#ifndef CONFIG_PERF_USE_VMALLOC
2380 2397
2381/* 2398/*
@@ -2383,15 +2400,15 @@ unlock:
2383 */ 2400 */
2384 2401
2385static struct page * 2402static struct page *
2386perf_mmap_to_page(struct perf_mmap_data *data, unsigned long pgoff) 2403perf_mmap_to_page(struct perf_buffer *buffer, unsigned long pgoff)
2387{ 2404{
2388 if (pgoff > data->nr_pages) 2405 if (pgoff > buffer->nr_pages)
2389 return NULL; 2406 return NULL;
2390 2407
2391 if (pgoff == 0) 2408 if (pgoff == 0)
2392 return virt_to_page(data->user_page); 2409 return virt_to_page(buffer->user_page);
2393 2410
2394 return virt_to_page(data->data_pages[pgoff - 1]); 2411 return virt_to_page(buffer->data_pages[pgoff - 1]);
2395} 2412}
2396 2413
2397static void *perf_mmap_alloc_page(int cpu) 2414static void *perf_mmap_alloc_page(int cpu)
@@ -2407,42 +2424,44 @@ static void *perf_mmap_alloc_page(int cpu)
2407 return page_address(page); 2424 return page_address(page);
2408} 2425}
2409 2426
2410static struct perf_mmap_data * 2427static struct perf_buffer *
2411perf_mmap_data_alloc(struct perf_event *event, int nr_pages) 2428perf_buffer_alloc(int nr_pages, long watermark, int cpu, int flags)
2412{ 2429{
2413 struct perf_mmap_data *data; 2430 struct perf_buffer *buffer;
2414 unsigned long size; 2431 unsigned long size;
2415 int i; 2432 int i;
2416 2433
2417 size = sizeof(struct perf_mmap_data); 2434 size = sizeof(struct perf_buffer);
2418 size += nr_pages * sizeof(void *); 2435 size += nr_pages * sizeof(void *);
2419 2436
2420 data = kzalloc(size, GFP_KERNEL); 2437 buffer = kzalloc(size, GFP_KERNEL);
2421 if (!data) 2438 if (!buffer)
2422 goto fail; 2439 goto fail;
2423 2440
2424 data->user_page = perf_mmap_alloc_page(event->cpu); 2441 buffer->user_page = perf_mmap_alloc_page(cpu);
2425 if (!data->user_page) 2442 if (!buffer->user_page)
2426 goto fail_user_page; 2443 goto fail_user_page;
2427 2444
2428 for (i = 0; i < nr_pages; i++) { 2445 for (i = 0; i < nr_pages; i++) {
2429 data->data_pages[i] = perf_mmap_alloc_page(event->cpu); 2446 buffer->data_pages[i] = perf_mmap_alloc_page(cpu);
2430 if (!data->data_pages[i]) 2447 if (!buffer->data_pages[i])
2431 goto fail_data_pages; 2448 goto fail_data_pages;
2432 } 2449 }
2433 2450
2434 data->nr_pages = nr_pages; 2451 buffer->nr_pages = nr_pages;
2452
2453 perf_buffer_init(buffer, watermark, flags);
2435 2454
2436 return data; 2455 return buffer;
2437 2456
2438fail_data_pages: 2457fail_data_pages:
2439 for (i--; i >= 0; i--) 2458 for (i--; i >= 0; i--)
2440 free_page((unsigned long)data->data_pages[i]); 2459 free_page((unsigned long)buffer->data_pages[i]);
2441 2460
2442 free_page((unsigned long)data->user_page); 2461 free_page((unsigned long)buffer->user_page);
2443 2462
2444fail_user_page: 2463fail_user_page:
2445 kfree(data); 2464 kfree(buffer);
2446 2465
2447fail: 2466fail:
2448 return NULL; 2467 return NULL;
@@ -2456,17 +2475,17 @@ static void perf_mmap_free_page(unsigned long addr)
2456 __free_page(page); 2475 __free_page(page);
2457} 2476}
2458 2477
2459static void perf_mmap_data_free(struct perf_mmap_data *data) 2478static void perf_buffer_free(struct perf_buffer *buffer)
2460{ 2479{
2461 int i; 2480 int i;
2462 2481
2463 perf_mmap_free_page((unsigned long)data->user_page); 2482 perf_mmap_free_page((unsigned long)buffer->user_page);
2464 for (i = 0; i < data->nr_pages; i++) 2483 for (i = 0; i < buffer->nr_pages; i++)
2465 perf_mmap_free_page((unsigned long)data->data_pages[i]); 2484 perf_mmap_free_page((unsigned long)buffer->data_pages[i]);
2466 kfree(data); 2485 kfree(buffer);
2467} 2486}
2468 2487
2469static inline int page_order(struct perf_mmap_data *data) 2488static inline int page_order(struct perf_buffer *buffer)
2470{ 2489{
2471 return 0; 2490 return 0;
2472} 2491}
@@ -2479,18 +2498,18 @@ static inline int page_order(struct perf_mmap_data *data)
2479 * Required for architectures that have d-cache aliasing issues. 2498 * Required for architectures that have d-cache aliasing issues.
2480 */ 2499 */
2481 2500
2482static inline int page_order(struct perf_mmap_data *data) 2501static inline int page_order(struct perf_buffer *buffer)
2483{ 2502{
2484 return data->page_order; 2503 return buffer->page_order;
2485} 2504}
2486 2505
2487static struct page * 2506static struct page *
2488perf_mmap_to_page(struct perf_mmap_data *data, unsigned long pgoff) 2507perf_mmap_to_page(struct perf_buffer *buffer, unsigned long pgoff)
2489{ 2508{
2490 if (pgoff > (1UL << page_order(data))) 2509 if (pgoff > (1UL << page_order(buffer)))
2491 return NULL; 2510 return NULL;
2492 2511
2493 return vmalloc_to_page((void *)data->user_page + pgoff * PAGE_SIZE); 2512 return vmalloc_to_page((void *)buffer->user_page + pgoff * PAGE_SIZE);
2494} 2513}
2495 2514
2496static void perf_mmap_unmark_page(void *addr) 2515static void perf_mmap_unmark_page(void *addr)
@@ -2500,57 +2519,59 @@ static void perf_mmap_unmark_page(void *addr)
2500 page->mapping = NULL; 2519 page->mapping = NULL;
2501} 2520}
2502 2521
2503static void perf_mmap_data_free_work(struct work_struct *work) 2522static void perf_buffer_free_work(struct work_struct *work)
2504{ 2523{
2505 struct perf_mmap_data *data; 2524 struct perf_buffer *buffer;
2506 void *base; 2525 void *base;
2507 int i, nr; 2526 int i, nr;
2508 2527
2509 data = container_of(work, struct perf_mmap_data, work); 2528 buffer = container_of(work, struct perf_buffer, work);
2510 nr = 1 << page_order(data); 2529 nr = 1 << page_order(buffer);
2511 2530
2512 base = data->user_page; 2531 base = buffer->user_page;
2513 for (i = 0; i < nr + 1; i++) 2532 for (i = 0; i < nr + 1; i++)
2514 perf_mmap_unmark_page(base + (i * PAGE_SIZE)); 2533 perf_mmap_unmark_page(base + (i * PAGE_SIZE));
2515 2534
2516 vfree(base); 2535 vfree(base);
2517 kfree(data); 2536 kfree(buffer);
2518} 2537}
2519 2538
2520static void perf_mmap_data_free(struct perf_mmap_data *data) 2539static void perf_buffer_free(struct perf_buffer *buffer)
2521{ 2540{
2522 schedule_work(&data->work); 2541 schedule_work(&buffer->work);
2523} 2542}
2524 2543
2525static struct perf_mmap_data * 2544static struct perf_buffer *
2526perf_mmap_data_alloc(struct perf_event *event, int nr_pages) 2545perf_buffer_alloc(int nr_pages, long watermark, int cpu, int flags)
2527{ 2546{
2528 struct perf_mmap_data *data; 2547 struct perf_buffer *buffer;
2529 unsigned long size; 2548 unsigned long size;
2530 void *all_buf; 2549 void *all_buf;
2531 2550
2532 size = sizeof(struct perf_mmap_data); 2551 size = sizeof(struct perf_buffer);
2533 size += sizeof(void *); 2552 size += sizeof(void *);
2534 2553
2535 data = kzalloc(size, GFP_KERNEL); 2554 buffer = kzalloc(size, GFP_KERNEL);
2536 if (!data) 2555 if (!buffer)
2537 goto fail; 2556 goto fail;
2538 2557
2539 INIT_WORK(&data->work, perf_mmap_data_free_work); 2558 INIT_WORK(&buffer->work, perf_buffer_free_work);
2540 2559
2541 all_buf = vmalloc_user((nr_pages + 1) * PAGE_SIZE); 2560 all_buf = vmalloc_user((nr_pages + 1) * PAGE_SIZE);
2542 if (!all_buf) 2561 if (!all_buf)
2543 goto fail_all_buf; 2562 goto fail_all_buf;
2544 2563
2545 data->user_page = all_buf; 2564 buffer->user_page = all_buf;
2546 data->data_pages[0] = all_buf + PAGE_SIZE; 2565 buffer->data_pages[0] = all_buf + PAGE_SIZE;
2547 data->page_order = ilog2(nr_pages); 2566 buffer->page_order = ilog2(nr_pages);
2548 data->nr_pages = 1; 2567 buffer->nr_pages = 1;
2568
2569 perf_buffer_init(buffer, watermark, flags);
2549 2570
2550 return data; 2571 return buffer;
2551 2572
2552fail_all_buf: 2573fail_all_buf:
2553 kfree(data); 2574 kfree(buffer);
2554 2575
2555fail: 2576fail:
2556 return NULL; 2577 return NULL;
@@ -2558,15 +2579,15 @@ fail:
2558 2579
2559#endif 2580#endif
2560 2581
2561static unsigned long perf_data_size(struct perf_mmap_data *data) 2582static unsigned long perf_data_size(struct perf_buffer *buffer)
2562{ 2583{
2563 return data->nr_pages << (PAGE_SHIFT + page_order(data)); 2584 return buffer->nr_pages << (PAGE_SHIFT + page_order(buffer));
2564} 2585}
2565 2586
2566static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 2587static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
2567{ 2588{
2568 struct perf_event *event = vma->vm_file->private_data; 2589 struct perf_event *event = vma->vm_file->private_data;
2569 struct perf_mmap_data *data; 2590 struct perf_buffer *buffer;
2570 int ret = VM_FAULT_SIGBUS; 2591 int ret = VM_FAULT_SIGBUS;
2571 2592
2572 if (vmf->flags & FAULT_FLAG_MKWRITE) { 2593 if (vmf->flags & FAULT_FLAG_MKWRITE) {
@@ -2576,14 +2597,14 @@ static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
2576 } 2597 }
2577 2598
2578 rcu_read_lock(); 2599 rcu_read_lock();
2579 data = rcu_dereference(event->data); 2600 buffer = rcu_dereference(event->buffer);
2580 if (!data) 2601 if (!buffer)
2581 goto unlock; 2602 goto unlock;
2582 2603
2583 if (vmf->pgoff && (vmf->flags & FAULT_FLAG_WRITE)) 2604 if (vmf->pgoff && (vmf->flags & FAULT_FLAG_WRITE))
2584 goto unlock; 2605 goto unlock;
2585 2606
2586 vmf->page = perf_mmap_to_page(data, vmf->pgoff); 2607 vmf->page = perf_mmap_to_page(buffer, vmf->pgoff);
2587 if (!vmf->page) 2608 if (!vmf->page)
2588 goto unlock; 2609 goto unlock;
2589 2610
@@ -2598,52 +2619,35 @@ unlock:
2598 return ret; 2619 return ret;
2599} 2620}
2600 2621
2601static void 2622static void perf_buffer_free_rcu(struct rcu_head *rcu_head)
2602perf_mmap_data_init(struct perf_event *event, struct perf_mmap_data *data)
2603{
2604 long max_size = perf_data_size(data);
2605
2606 if (event->attr.watermark) {
2607 data->watermark = min_t(long, max_size,
2608 event->attr.wakeup_watermark);
2609 }
2610
2611 if (!data->watermark)
2612 data->watermark = max_size / 2;
2613
2614 atomic_set(&data->refcount, 1);
2615 rcu_assign_pointer(event->data, data);
2616}
2617
2618static void perf_mmap_data_free_rcu(struct rcu_head *rcu_head)
2619{ 2623{
2620 struct perf_mmap_data *data; 2624 struct perf_buffer *buffer;
2621 2625
2622 data = container_of(rcu_head, struct perf_mmap_data, rcu_head); 2626 buffer = container_of(rcu_head, struct perf_buffer, rcu_head);
2623 perf_mmap_data_free(data); 2627 perf_buffer_free(buffer);
2624} 2628}
2625 2629
2626static struct perf_mmap_data *perf_mmap_data_get(struct perf_event *event) 2630static struct perf_buffer *perf_buffer_get(struct perf_event *event)
2627{ 2631{
2628 struct perf_mmap_data *data; 2632 struct perf_buffer *buffer;
2629 2633
2630 rcu_read_lock(); 2634 rcu_read_lock();
2631 data = rcu_dereference(event->data); 2635 buffer = rcu_dereference(event->buffer);
2632 if (data) { 2636 if (buffer) {
2633 if (!atomic_inc_not_zero(&data->refcount)) 2637 if (!atomic_inc_not_zero(&buffer->refcount))
2634 data = NULL; 2638 buffer = NULL;
2635 } 2639 }
2636 rcu_read_unlock(); 2640 rcu_read_unlock();
2637 2641
2638 return data; 2642 return buffer;
2639} 2643}
2640 2644
2641static void perf_mmap_data_put(struct perf_mmap_data *data) 2645static void perf_buffer_put(struct perf_buffer *buffer)
2642{ 2646{
2643 if (!atomic_dec_and_test(&data->refcount)) 2647 if (!atomic_dec_and_test(&buffer->refcount))
2644 return; 2648 return;
2645 2649
2646 call_rcu(&data->rcu_head, perf_mmap_data_free_rcu); 2650 call_rcu(&buffer->rcu_head, perf_buffer_free_rcu);
2647} 2651}
2648 2652
2649static void perf_mmap_open(struct vm_area_struct *vma) 2653static void perf_mmap_open(struct vm_area_struct *vma)
@@ -2658,16 +2662,16 @@ static void perf_mmap_close(struct vm_area_struct *vma)
2658 struct perf_event *event = vma->vm_file->private_data; 2662 struct perf_event *event = vma->vm_file->private_data;
2659 2663
2660 if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) { 2664 if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) {
2661 unsigned long size = perf_data_size(event->data); 2665 unsigned long size = perf_data_size(event->buffer);
2662 struct user_struct *user = event->mmap_user; 2666 struct user_struct *user = event->mmap_user;
2663 struct perf_mmap_data *data = event->data; 2667 struct perf_buffer *buffer = event->buffer;
2664 2668
2665 atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm); 2669 atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm);
2666 vma->vm_mm->locked_vm -= event->mmap_locked; 2670 vma->vm_mm->locked_vm -= event->mmap_locked;
2667 rcu_assign_pointer(event->data, NULL); 2671 rcu_assign_pointer(event->buffer, NULL);
2668 mutex_unlock(&event->mmap_mutex); 2672 mutex_unlock(&event->mmap_mutex);
2669 2673
2670 perf_mmap_data_put(data); 2674 perf_buffer_put(buffer);
2671 free_uid(user); 2675 free_uid(user);
2672 } 2676 }
2673} 2677}
@@ -2685,11 +2689,11 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
2685 unsigned long user_locked, user_lock_limit; 2689 unsigned long user_locked, user_lock_limit;
2686 struct user_struct *user = current_user(); 2690 struct user_struct *user = current_user();
2687 unsigned long locked, lock_limit; 2691 unsigned long locked, lock_limit;
2688 struct perf_mmap_data *data; 2692 struct perf_buffer *buffer;
2689 unsigned long vma_size; 2693 unsigned long vma_size;
2690 unsigned long nr_pages; 2694 unsigned long nr_pages;
2691 long user_extra, extra; 2695 long user_extra, extra;
2692 int ret = 0; 2696 int ret = 0, flags = 0;
2693 2697
2694 /* 2698 /*
2695 * Don't allow mmap() of inherited per-task counters. This would 2699 * Don't allow mmap() of inherited per-task counters. This would
@@ -2706,7 +2710,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
2706 nr_pages = (vma_size / PAGE_SIZE) - 1; 2710 nr_pages = (vma_size / PAGE_SIZE) - 1;
2707 2711
2708 /* 2712 /*
2709 * If we have data pages ensure they're a power-of-two number, so we 2713 * If we have buffer pages ensure they're a power-of-two number, so we
2710 * can do bitmasks instead of modulo. 2714 * can do bitmasks instead of modulo.
2711 */ 2715 */
2712 if (nr_pages != 0 && !is_power_of_2(nr_pages)) 2716 if (nr_pages != 0 && !is_power_of_2(nr_pages))
@@ -2720,9 +2724,9 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
2720 2724
2721 WARN_ON_ONCE(event->ctx->parent_ctx); 2725 WARN_ON_ONCE(event->ctx->parent_ctx);
2722 mutex_lock(&event->mmap_mutex); 2726 mutex_lock(&event->mmap_mutex);
2723 if (event->data) { 2727 if (event->buffer) {
2724 if (event->data->nr_pages == nr_pages) 2728 if (event->buffer->nr_pages == nr_pages)
2725 atomic_inc(&event->data->refcount); 2729 atomic_inc(&event->buffer->refcount);
2726 else 2730 else
2727 ret = -EINVAL; 2731 ret = -EINVAL;
2728 goto unlock; 2732 goto unlock;
@@ -2752,17 +2756,18 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
2752 goto unlock; 2756 goto unlock;
2753 } 2757 }
2754 2758
2755 WARN_ON(event->data); 2759 WARN_ON(event->buffer);
2760
2761 if (vma->vm_flags & VM_WRITE)
2762 flags |= PERF_BUFFER_WRITABLE;
2756 2763
2757 data = perf_mmap_data_alloc(event, nr_pages); 2764 buffer = perf_buffer_alloc(nr_pages, event->attr.wakeup_watermark,
2758 if (!data) { 2765 event->cpu, flags);
2766 if (!buffer) {
2759 ret = -ENOMEM; 2767 ret = -ENOMEM;
2760 goto unlock; 2768 goto unlock;
2761 } 2769 }
2762 2770 rcu_assign_pointer(event->buffer, buffer);
2763 perf_mmap_data_init(event, data);
2764 if (vma->vm_flags & VM_WRITE)
2765 event->data->writable = 1;
2766 2771
2767 atomic_long_add(user_extra, &user->locked_vm); 2772 atomic_long_add(user_extra, &user->locked_vm);
2768 event->mmap_locked = extra; 2773 event->mmap_locked = extra;
@@ -2941,11 +2946,6 @@ __weak struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
2941 return NULL; 2946 return NULL;
2942} 2947}
2943 2948
2944__weak
2945void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip)
2946{
2947}
2948
2949 2949
2950/* 2950/*
2951 * We assume there is only KVM supporting the callbacks. 2951 * We assume there is only KVM supporting the callbacks.
@@ -2971,15 +2971,15 @@ EXPORT_SYMBOL_GPL(perf_unregister_guest_info_callbacks);
2971/* 2971/*
2972 * Output 2972 * Output
2973 */ 2973 */
2974static bool perf_output_space(struct perf_mmap_data *data, unsigned long tail, 2974static bool perf_output_space(struct perf_buffer *buffer, unsigned long tail,
2975 unsigned long offset, unsigned long head) 2975 unsigned long offset, unsigned long head)
2976{ 2976{
2977 unsigned long mask; 2977 unsigned long mask;
2978 2978
2979 if (!data->writable) 2979 if (!buffer->writable)
2980 return true; 2980 return true;
2981 2981
2982 mask = perf_data_size(data) - 1; 2982 mask = perf_data_size(buffer) - 1;
2983 2983
2984 offset = (offset - tail) & mask; 2984 offset = (offset - tail) & mask;
2985 head = (head - tail) & mask; 2985 head = (head - tail) & mask;
@@ -2992,7 +2992,7 @@ static bool perf_output_space(struct perf_mmap_data *data, unsigned long tail,
2992 2992
2993static void perf_output_wakeup(struct perf_output_handle *handle) 2993static void perf_output_wakeup(struct perf_output_handle *handle)
2994{ 2994{
2995 atomic_set(&handle->data->poll, POLL_IN); 2995 atomic_set(&handle->buffer->poll, POLL_IN);
2996 2996
2997 if (handle->nmi) { 2997 if (handle->nmi) {
2998 handle->event->pending_wakeup = 1; 2998 handle->event->pending_wakeup = 1;
@@ -3012,45 +3012,45 @@ static void perf_output_wakeup(struct perf_output_handle *handle)
3012 */ 3012 */
3013static void perf_output_get_handle(struct perf_output_handle *handle) 3013static void perf_output_get_handle(struct perf_output_handle *handle)
3014{ 3014{
3015 struct perf_mmap_data *data = handle->data; 3015 struct perf_buffer *buffer = handle->buffer;
3016 3016
3017 preempt_disable(); 3017 preempt_disable();
3018 local_inc(&data->nest); 3018 local_inc(&buffer->nest);
3019 handle->wakeup = local_read(&data->wakeup); 3019 handle->wakeup = local_read(&buffer->wakeup);
3020} 3020}
3021 3021
3022static void perf_output_put_handle(struct perf_output_handle *handle) 3022static void perf_output_put_handle(struct perf_output_handle *handle)
3023{ 3023{
3024 struct perf_mmap_data *data = handle->data; 3024 struct perf_buffer *buffer = handle->buffer;
3025 unsigned long head; 3025 unsigned long head;
3026 3026
3027again: 3027again:
3028 head = local_read(&data->head); 3028 head = local_read(&buffer->head);
3029 3029
3030 /* 3030 /*
3031 * IRQ/NMI can happen here, which means we can miss a head update. 3031 * IRQ/NMI can happen here, which means we can miss a head update.
3032 */ 3032 */
3033 3033
3034 if (!local_dec_and_test(&data->nest)) 3034 if (!local_dec_and_test(&buffer->nest))
3035 goto out; 3035 goto out;
3036 3036
3037 /* 3037 /*
3038 * Publish the known good head. Rely on the full barrier implied 3038 * Publish the known good head. Rely on the full barrier implied
3039 * by atomic_dec_and_test() order the data->head read and this 3039 * by atomic_dec_and_test() order the buffer->head read and this
3040 * write. 3040 * write.
3041 */ 3041 */
3042 data->user_page->data_head = head; 3042 buffer->user_page->data_head = head;
3043 3043
3044 /* 3044 /*
3045 * Now check if we missed an update, rely on the (compiler) 3045 * Now check if we missed an update, rely on the (compiler)
3046 * barrier in atomic_dec_and_test() to re-read data->head. 3046 * barrier in atomic_dec_and_test() to re-read buffer->head.
3047 */ 3047 */
3048 if (unlikely(head != local_read(&data->head))) { 3048 if (unlikely(head != local_read(&buffer->head))) {
3049 local_inc(&data->nest); 3049 local_inc(&buffer->nest);
3050 goto again; 3050 goto again;
3051 } 3051 }
3052 3052
3053 if (handle->wakeup != local_read(&data->wakeup)) 3053 if (handle->wakeup != local_read(&buffer->wakeup))
3054 perf_output_wakeup(handle); 3054 perf_output_wakeup(handle);
3055 3055
3056 out: 3056 out:
@@ -3070,12 +3070,12 @@ __always_inline void perf_output_copy(struct perf_output_handle *handle,
3070 buf += size; 3070 buf += size;
3071 handle->size -= size; 3071 handle->size -= size;
3072 if (!handle->size) { 3072 if (!handle->size) {
3073 struct perf_mmap_data *data = handle->data; 3073 struct perf_buffer *buffer = handle->buffer;
3074 3074
3075 handle->page++; 3075 handle->page++;
3076 handle->page &= data->nr_pages - 1; 3076 handle->page &= buffer->nr_pages - 1;
3077 handle->addr = data->data_pages[handle->page]; 3077 handle->addr = buffer->data_pages[handle->page];
3078 handle->size = PAGE_SIZE << page_order(data); 3078 handle->size = PAGE_SIZE << page_order(buffer);
3079 } 3079 }
3080 } while (len); 3080 } while (len);
3081} 3081}
@@ -3084,7 +3084,7 @@ int perf_output_begin(struct perf_output_handle *handle,
3084 struct perf_event *event, unsigned int size, 3084 struct perf_event *event, unsigned int size,
3085 int nmi, int sample) 3085 int nmi, int sample)
3086{ 3086{
3087 struct perf_mmap_data *data; 3087 struct perf_buffer *buffer;
3088 unsigned long tail, offset, head; 3088 unsigned long tail, offset, head;
3089 int have_lost; 3089 int have_lost;
3090 struct { 3090 struct {
@@ -3100,19 +3100,19 @@ int perf_output_begin(struct perf_output_handle *handle,
3100 if (event->parent) 3100 if (event->parent)
3101 event = event->parent; 3101 event = event->parent;
3102 3102
3103 data = rcu_dereference(event->data); 3103 buffer = rcu_dereference(event->buffer);
3104 if (!data) 3104 if (!buffer)
3105 goto out; 3105 goto out;
3106 3106
3107 handle->data = data; 3107 handle->buffer = buffer;
3108 handle->event = event; 3108 handle->event = event;
3109 handle->nmi = nmi; 3109 handle->nmi = nmi;
3110 handle->sample = sample; 3110 handle->sample = sample;
3111 3111
3112 if (!data->nr_pages) 3112 if (!buffer->nr_pages)
3113 goto out; 3113 goto out;
3114 3114
3115 have_lost = local_read(&data->lost); 3115 have_lost = local_read(&buffer->lost);
3116 if (have_lost) 3116 if (have_lost)
3117 size += sizeof(lost_event); 3117 size += sizeof(lost_event);
3118 3118
@@ -3124,30 +3124,30 @@ int perf_output_begin(struct perf_output_handle *handle,
3124 * tail pointer. So that all reads will be completed before the 3124 * tail pointer. So that all reads will be completed before the
3125 * write is issued. 3125 * write is issued.
3126 */ 3126 */
3127 tail = ACCESS_ONCE(data->user_page->data_tail); 3127 tail = ACCESS_ONCE(buffer->user_page->data_tail);
3128 smp_rmb(); 3128 smp_rmb();
3129 offset = head = local_read(&data->head); 3129 offset = head = local_read(&buffer->head);
3130 head += size; 3130 head += size;
3131 if (unlikely(!perf_output_space(data, tail, offset, head))) 3131 if (unlikely(!perf_output_space(buffer, tail, offset, head)))
3132 goto fail; 3132 goto fail;
3133 } while (local_cmpxchg(&data->head, offset, head) != offset); 3133 } while (local_cmpxchg(&buffer->head, offset, head) != offset);
3134 3134
3135 if (head - local_read(&data->wakeup) > data->watermark) 3135 if (head - local_read(&buffer->wakeup) > buffer->watermark)
3136 local_add(data->watermark, &data->wakeup); 3136 local_add(buffer->watermark, &buffer->wakeup);
3137 3137
3138 handle->page = offset >> (PAGE_SHIFT + page_order(data)); 3138 handle->page = offset >> (PAGE_SHIFT + page_order(buffer));
3139 handle->page &= data->nr_pages - 1; 3139 handle->page &= buffer->nr_pages - 1;
3140 handle->size = offset & ((PAGE_SIZE << page_order(data)) - 1); 3140 handle->size = offset & ((PAGE_SIZE << page_order(buffer)) - 1);
3141 handle->addr = data->data_pages[handle->page]; 3141 handle->addr = buffer->data_pages[handle->page];
3142 handle->addr += handle->size; 3142 handle->addr += handle->size;
3143 handle->size = (PAGE_SIZE << page_order(data)) - handle->size; 3143 handle->size = (PAGE_SIZE << page_order(buffer)) - handle->size;
3144 3144
3145 if (have_lost) { 3145 if (have_lost) {
3146 lost_event.header.type = PERF_RECORD_LOST; 3146 lost_event.header.type = PERF_RECORD_LOST;
3147 lost_event.header.misc = 0; 3147 lost_event.header.misc = 0;
3148 lost_event.header.size = sizeof(lost_event); 3148 lost_event.header.size = sizeof(lost_event);
3149 lost_event.id = event->id; 3149 lost_event.id = event->id;
3150 lost_event.lost = local_xchg(&data->lost, 0); 3150 lost_event.lost = local_xchg(&buffer->lost, 0);
3151 3151
3152 perf_output_put(handle, lost_event); 3152 perf_output_put(handle, lost_event);
3153 } 3153 }
@@ -3155,7 +3155,7 @@ int perf_output_begin(struct perf_output_handle *handle,
3155 return 0; 3155 return 0;
3156 3156
3157fail: 3157fail:
3158 local_inc(&data->lost); 3158 local_inc(&buffer->lost);
3159 perf_output_put_handle(handle); 3159 perf_output_put_handle(handle);
3160out: 3160out:
3161 rcu_read_unlock(); 3161 rcu_read_unlock();
@@ -3166,15 +3166,15 @@ out:
3166void perf_output_end(struct perf_output_handle *handle) 3166void perf_output_end(struct perf_output_handle *handle)
3167{ 3167{
3168 struct perf_event *event = handle->event; 3168 struct perf_event *event = handle->event;
3169 struct perf_mmap_data *data = handle->data; 3169 struct perf_buffer *buffer = handle->buffer;
3170 3170
3171 int wakeup_events = event->attr.wakeup_events; 3171 int wakeup_events = event->attr.wakeup_events;
3172 3172
3173 if (handle->sample && wakeup_events) { 3173 if (handle->sample && wakeup_events) {
3174 int events = local_inc_return(&data->events); 3174 int events = local_inc_return(&buffer->events);
3175 if (events >= wakeup_events) { 3175 if (events >= wakeup_events) {
3176 local_sub(wakeup_events, &data->events); 3176 local_sub(wakeup_events, &buffer->events);
3177 local_inc(&data->wakeup); 3177 local_inc(&buffer->wakeup);
3178 } 3178 }
3179 } 3179 }
3180 3180
@@ -3211,7 +3211,7 @@ static void perf_output_read_one(struct perf_output_handle *handle,
3211 u64 values[4]; 3211 u64 values[4];
3212 int n = 0; 3212 int n = 0;
3213 3213
3214 values[n++] = atomic64_read(&event->count); 3214 values[n++] = perf_event_count(event);
3215 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { 3215 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
3216 values[n++] = event->total_time_enabled + 3216 values[n++] = event->total_time_enabled +
3217 atomic64_read(&event->child_total_time_enabled); 3217 atomic64_read(&event->child_total_time_enabled);
@@ -3248,7 +3248,7 @@ static void perf_output_read_group(struct perf_output_handle *handle,
3248 if (leader != event) 3248 if (leader != event)
3249 leader->pmu->read(leader); 3249 leader->pmu->read(leader);
3250 3250
3251 values[n++] = atomic64_read(&leader->count); 3251 values[n++] = perf_event_count(leader);
3252 if (read_format & PERF_FORMAT_ID) 3252 if (read_format & PERF_FORMAT_ID)
3253 values[n++] = primary_event_id(leader); 3253 values[n++] = primary_event_id(leader);
3254 3254
@@ -3260,7 +3260,7 @@ static void perf_output_read_group(struct perf_output_handle *handle,
3260 if (sub != event) 3260 if (sub != event)
3261 sub->pmu->read(sub); 3261 sub->pmu->read(sub);
3262 3262
3263 values[n++] = atomic64_read(&sub->count); 3263 values[n++] = perf_event_count(sub);
3264 if (read_format & PERF_FORMAT_ID) 3264 if (read_format & PERF_FORMAT_ID)
3265 values[n++] = primary_event_id(sub); 3265 values[n++] = primary_event_id(sub);
3266 3266
@@ -3491,7 +3491,7 @@ perf_event_read_event(struct perf_event *event,
3491/* 3491/*
3492 * task tracking -- fork/exit 3492 * task tracking -- fork/exit
3493 * 3493 *
3494 * enabled by: attr.comm | attr.mmap | attr.task 3494 * enabled by: attr.comm | attr.mmap | attr.mmap_data | attr.task
3495 */ 3495 */
3496 3496
3497struct perf_task_event { 3497struct perf_task_event {
@@ -3541,7 +3541,8 @@ static int perf_event_task_match(struct perf_event *event)
3541 if (event->cpu != -1 && event->cpu != smp_processor_id()) 3541 if (event->cpu != -1 && event->cpu != smp_processor_id())
3542 return 0; 3542 return 0;
3543 3543
3544 if (event->attr.comm || event->attr.mmap || event->attr.task) 3544 if (event->attr.comm || event->attr.mmap ||
3545 event->attr.mmap_data || event->attr.task)
3545 return 1; 3546 return 1;
3546 3547
3547 return 0; 3548 return 0;
@@ -3766,7 +3767,8 @@ static void perf_event_mmap_output(struct perf_event *event,
3766} 3767}
3767 3768
3768static int perf_event_mmap_match(struct perf_event *event, 3769static int perf_event_mmap_match(struct perf_event *event,
3769 struct perf_mmap_event *mmap_event) 3770 struct perf_mmap_event *mmap_event,
3771 int executable)
3770{ 3772{
3771 if (event->state < PERF_EVENT_STATE_INACTIVE) 3773 if (event->state < PERF_EVENT_STATE_INACTIVE)
3772 return 0; 3774 return 0;
@@ -3774,19 +3776,21 @@ static int perf_event_mmap_match(struct perf_event *event,
3774 if (event->cpu != -1 && event->cpu != smp_processor_id()) 3776 if (event->cpu != -1 && event->cpu != smp_processor_id())
3775 return 0; 3777 return 0;
3776 3778
3777 if (event->attr.mmap) 3779 if ((!executable && event->attr.mmap_data) ||
3780 (executable && event->attr.mmap))
3778 return 1; 3781 return 1;
3779 3782
3780 return 0; 3783 return 0;
3781} 3784}
3782 3785
3783static void perf_event_mmap_ctx(struct perf_event_context *ctx, 3786static void perf_event_mmap_ctx(struct perf_event_context *ctx,
3784 struct perf_mmap_event *mmap_event) 3787 struct perf_mmap_event *mmap_event,
3788 int executable)
3785{ 3789{
3786 struct perf_event *event; 3790 struct perf_event *event;
3787 3791
3788 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { 3792 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
3789 if (perf_event_mmap_match(event, mmap_event)) 3793 if (perf_event_mmap_match(event, mmap_event, executable))
3790 perf_event_mmap_output(event, mmap_event); 3794 perf_event_mmap_output(event, mmap_event);
3791 } 3795 }
3792} 3796}
@@ -3830,6 +3834,14 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
3830 if (!vma->vm_mm) { 3834 if (!vma->vm_mm) {
3831 name = strncpy(tmp, "[vdso]", sizeof(tmp)); 3835 name = strncpy(tmp, "[vdso]", sizeof(tmp));
3832 goto got_name; 3836 goto got_name;
3837 } else if (vma->vm_start <= vma->vm_mm->start_brk &&
3838 vma->vm_end >= vma->vm_mm->brk) {
3839 name = strncpy(tmp, "[heap]", sizeof(tmp));
3840 goto got_name;
3841 } else if (vma->vm_start <= vma->vm_mm->start_stack &&
3842 vma->vm_end >= vma->vm_mm->start_stack) {
3843 name = strncpy(tmp, "[stack]", sizeof(tmp));
3844 goto got_name;
3833 } 3845 }
3834 3846
3835 name = strncpy(tmp, "//anon", sizeof(tmp)); 3847 name = strncpy(tmp, "//anon", sizeof(tmp));
@@ -3846,17 +3858,17 @@ got_name:
3846 3858
3847 rcu_read_lock(); 3859 rcu_read_lock();
3848 cpuctx = &get_cpu_var(perf_cpu_context); 3860 cpuctx = &get_cpu_var(perf_cpu_context);
3849 perf_event_mmap_ctx(&cpuctx->ctx, mmap_event); 3861 perf_event_mmap_ctx(&cpuctx->ctx, mmap_event, vma->vm_flags & VM_EXEC);
3850 ctx = rcu_dereference(current->perf_event_ctxp); 3862 ctx = rcu_dereference(current->perf_event_ctxp);
3851 if (ctx) 3863 if (ctx)
3852 perf_event_mmap_ctx(ctx, mmap_event); 3864 perf_event_mmap_ctx(ctx, mmap_event, vma->vm_flags & VM_EXEC);
3853 put_cpu_var(perf_cpu_context); 3865 put_cpu_var(perf_cpu_context);
3854 rcu_read_unlock(); 3866 rcu_read_unlock();
3855 3867
3856 kfree(buf); 3868 kfree(buf);
3857} 3869}
3858 3870
3859void __perf_event_mmap(struct vm_area_struct *vma) 3871void perf_event_mmap(struct vm_area_struct *vma)
3860{ 3872{
3861 struct perf_mmap_event mmap_event; 3873 struct perf_mmap_event mmap_event;
3862 3874
@@ -4018,14 +4030,14 @@ static u64 perf_swevent_set_period(struct perf_event *event)
4018 hwc->last_period = hwc->sample_period; 4030 hwc->last_period = hwc->sample_period;
4019 4031
4020again: 4032again:
4021 old = val = atomic64_read(&hwc->period_left); 4033 old = val = local64_read(&hwc->period_left);
4022 if (val < 0) 4034 if (val < 0)
4023 return 0; 4035 return 0;
4024 4036
4025 nr = div64_u64(period + val, period); 4037 nr = div64_u64(period + val, period);
4026 offset = nr * period; 4038 offset = nr * period;
4027 val -= offset; 4039 val -= offset;
4028 if (atomic64_cmpxchg(&hwc->period_left, old, val) != old) 4040 if (local64_cmpxchg(&hwc->period_left, old, val) != old)
4029 goto again; 4041 goto again;
4030 4042
4031 return nr; 4043 return nr;
@@ -4064,7 +4076,7 @@ static void perf_swevent_add(struct perf_event *event, u64 nr,
4064{ 4076{
4065 struct hw_perf_event *hwc = &event->hw; 4077 struct hw_perf_event *hwc = &event->hw;
4066 4078
4067 atomic64_add(nr, &event->count); 4079 local64_add(nr, &event->count);
4068 4080
4069 if (!regs) 4081 if (!regs)
4070 return; 4082 return;
@@ -4075,7 +4087,7 @@ static void perf_swevent_add(struct perf_event *event, u64 nr,
4075 if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq) 4087 if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq)
4076 return perf_swevent_overflow(event, 1, nmi, data, regs); 4088 return perf_swevent_overflow(event, 1, nmi, data, regs);
4077 4089
4078 if (atomic64_add_negative(nr, &hwc->period_left)) 4090 if (local64_add_negative(nr, &hwc->period_left))
4079 return; 4091 return;
4080 4092
4081 perf_swevent_overflow(event, 0, nmi, data, regs); 4093 perf_swevent_overflow(event, 0, nmi, data, regs);
@@ -4213,14 +4225,12 @@ int perf_swevent_get_recursion_context(void)
4213} 4225}
4214EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context); 4226EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context);
4215 4227
4216void perf_swevent_put_recursion_context(int rctx) 4228void inline perf_swevent_put_recursion_context(int rctx)
4217{ 4229{
4218 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); 4230 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
4219 barrier(); 4231 barrier();
4220 cpuctx->recursion[rctx]--; 4232 cpuctx->recursion[rctx]--;
4221} 4233}
4222EXPORT_SYMBOL_GPL(perf_swevent_put_recursion_context);
4223
4224 4234
4225void __perf_sw_event(u32 event_id, u64 nr, int nmi, 4235void __perf_sw_event(u32 event_id, u64 nr, int nmi,
4226 struct pt_regs *regs, u64 addr) 4236 struct pt_regs *regs, u64 addr)
@@ -4368,8 +4378,8 @@ static void cpu_clock_perf_event_update(struct perf_event *event)
4368 u64 now; 4378 u64 now;
4369 4379
4370 now = cpu_clock(cpu); 4380 now = cpu_clock(cpu);
4371 prev = atomic64_xchg(&event->hw.prev_count, now); 4381 prev = local64_xchg(&event->hw.prev_count, now);
4372 atomic64_add(now - prev, &event->count); 4382 local64_add(now - prev, &event->count);
4373} 4383}
4374 4384
4375static int cpu_clock_perf_event_enable(struct perf_event *event) 4385static int cpu_clock_perf_event_enable(struct perf_event *event)
@@ -4377,7 +4387,7 @@ static int cpu_clock_perf_event_enable(struct perf_event *event)
4377 struct hw_perf_event *hwc = &event->hw; 4387 struct hw_perf_event *hwc = &event->hw;
4378 int cpu = raw_smp_processor_id(); 4388 int cpu = raw_smp_processor_id();
4379 4389
4380 atomic64_set(&hwc->prev_count, cpu_clock(cpu)); 4390 local64_set(&hwc->prev_count, cpu_clock(cpu));
4381 perf_swevent_start_hrtimer(event); 4391 perf_swevent_start_hrtimer(event);
4382 4392
4383 return 0; 4393 return 0;
@@ -4409,9 +4419,9 @@ static void task_clock_perf_event_update(struct perf_event *event, u64 now)
4409 u64 prev; 4419 u64 prev;
4410 s64 delta; 4420 s64 delta;
4411 4421
4412 prev = atomic64_xchg(&event->hw.prev_count, now); 4422 prev = local64_xchg(&event->hw.prev_count, now);
4413 delta = now - prev; 4423 delta = now - prev;
4414 atomic64_add(delta, &event->count); 4424 local64_add(delta, &event->count);
4415} 4425}
4416 4426
4417static int task_clock_perf_event_enable(struct perf_event *event) 4427static int task_clock_perf_event_enable(struct perf_event *event)
@@ -4421,7 +4431,7 @@ static int task_clock_perf_event_enable(struct perf_event *event)
4421 4431
4422 now = event->ctx->time; 4432 now = event->ctx->time;
4423 4433
4424 atomic64_set(&hwc->prev_count, now); 4434 local64_set(&hwc->prev_count, now);
4425 4435
4426 perf_swevent_start_hrtimer(event); 4436 perf_swevent_start_hrtimer(event);
4427 4437
@@ -4601,7 +4611,7 @@ static int perf_tp_event_match(struct perf_event *event,
4601} 4611}
4602 4612
4603void perf_tp_event(u64 addr, u64 count, void *record, int entry_size, 4613void perf_tp_event(u64 addr, u64 count, void *record, int entry_size,
4604 struct pt_regs *regs, struct hlist_head *head) 4614 struct pt_regs *regs, struct hlist_head *head, int rctx)
4605{ 4615{
4606 struct perf_sample_data data; 4616 struct perf_sample_data data;
4607 struct perf_event *event; 4617 struct perf_event *event;
@@ -4615,12 +4625,12 @@ void perf_tp_event(u64 addr, u64 count, void *record, int entry_size,
4615 perf_sample_data_init(&data, addr); 4625 perf_sample_data_init(&data, addr);
4616 data.raw = &raw; 4626 data.raw = &raw;
4617 4627
4618 rcu_read_lock();
4619 hlist_for_each_entry_rcu(event, node, head, hlist_entry) { 4628 hlist_for_each_entry_rcu(event, node, head, hlist_entry) {
4620 if (perf_tp_event_match(event, &data, regs)) 4629 if (perf_tp_event_match(event, &data, regs))
4621 perf_swevent_add(event, count, 1, &data, regs); 4630 perf_swevent_add(event, count, 1, &data, regs);
4622 } 4631 }
4623 rcu_read_unlock(); 4632
4633 perf_swevent_put_recursion_context(rctx);
4624} 4634}
4625EXPORT_SYMBOL_GPL(perf_tp_event); 4635EXPORT_SYMBOL_GPL(perf_tp_event);
4626 4636
@@ -4864,7 +4874,7 @@ perf_event_alloc(struct perf_event_attr *attr,
4864 hwc->sample_period = 1; 4874 hwc->sample_period = 1;
4865 hwc->last_period = hwc->sample_period; 4875 hwc->last_period = hwc->sample_period;
4866 4876
4867 atomic64_set(&hwc->period_left, hwc->sample_period); 4877 local64_set(&hwc->period_left, hwc->sample_period);
4868 4878
4869 /* 4879 /*
4870 * we currently do not support PERF_FORMAT_GROUP on inherited events 4880 * we currently do not support PERF_FORMAT_GROUP on inherited events
@@ -4913,7 +4923,7 @@ done:
4913 4923
4914 if (!event->parent) { 4924 if (!event->parent) {
4915 atomic_inc(&nr_events); 4925 atomic_inc(&nr_events);
4916 if (event->attr.mmap) 4926 if (event->attr.mmap || event->attr.mmap_data)
4917 atomic_inc(&nr_mmap_events); 4927 atomic_inc(&nr_mmap_events);
4918 if (event->attr.comm) 4928 if (event->attr.comm)
4919 atomic_inc(&nr_comm_events); 4929 atomic_inc(&nr_comm_events);
@@ -5007,7 +5017,7 @@ err_size:
5007static int 5017static int
5008perf_event_set_output(struct perf_event *event, struct perf_event *output_event) 5018perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
5009{ 5019{
5010 struct perf_mmap_data *data = NULL, *old_data = NULL; 5020 struct perf_buffer *buffer = NULL, *old_buffer = NULL;
5011 int ret = -EINVAL; 5021 int ret = -EINVAL;
5012 5022
5013 if (!output_event) 5023 if (!output_event)
@@ -5037,19 +5047,19 @@ set:
5037 5047
5038 if (output_event) { 5048 if (output_event) {
5039 /* get the buffer we want to redirect to */ 5049 /* get the buffer we want to redirect to */
5040 data = perf_mmap_data_get(output_event); 5050 buffer = perf_buffer_get(output_event);
5041 if (!data) 5051 if (!buffer)
5042 goto unlock; 5052 goto unlock;
5043 } 5053 }
5044 5054
5045 old_data = event->data; 5055 old_buffer = event->buffer;
5046 rcu_assign_pointer(event->data, data); 5056 rcu_assign_pointer(event->buffer, buffer);
5047 ret = 0; 5057 ret = 0;
5048unlock: 5058unlock:
5049 mutex_unlock(&event->mmap_mutex); 5059 mutex_unlock(&event->mmap_mutex);
5050 5060
5051 if (old_data) 5061 if (old_buffer)
5052 perf_mmap_data_put(old_data); 5062 perf_buffer_put(old_buffer);
5053out: 5063out:
5054 return ret; 5064 return ret;
5055} 5065}
@@ -5298,7 +5308,7 @@ inherit_event(struct perf_event *parent_event,
5298 hwc->sample_period = sample_period; 5308 hwc->sample_period = sample_period;
5299 hwc->last_period = sample_period; 5309 hwc->last_period = sample_period;
5300 5310
5301 atomic64_set(&hwc->period_left, sample_period); 5311 local64_set(&hwc->period_left, sample_period);
5302 } 5312 }
5303 5313
5304 child_event->overflow_handler = parent_event->overflow_handler; 5314 child_event->overflow_handler = parent_event->overflow_handler;
@@ -5359,12 +5369,12 @@ static void sync_child_event(struct perf_event *child_event,
5359 if (child_event->attr.inherit_stat) 5369 if (child_event->attr.inherit_stat)
5360 perf_event_read_event(child_event, child); 5370 perf_event_read_event(child_event, child);
5361 5371
5362 child_val = atomic64_read(&child_event->count); 5372 child_val = perf_event_count(child_event);
5363 5373
5364 /* 5374 /*
5365 * Add back the child's count to the parent's count: 5375 * Add back the child's count to the parent's count:
5366 */ 5376 */
5367 atomic64_add(child_val, &parent_event->count); 5377 atomic64_add(child_val, &parent_event->child_count);
5368 atomic64_add(child_event->total_time_enabled, 5378 atomic64_add(child_event->total_time_enabled,
5369 &parent_event->child_total_time_enabled); 5379 &parent_event->child_total_time_enabled);
5370 atomic64_add(child_event->total_time_running, 5380 atomic64_add(child_event->total_time_running,
diff --git a/kernel/sched.c b/kernel/sched.c
index f52a8801b7a2..265cf3a2b5d8 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3726,7 +3726,7 @@ int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner)
3726 * off of preempt_enable. Kernel preemptions off return from interrupt 3726 * off of preempt_enable. Kernel preemptions off return from interrupt
3727 * occur there and call schedule directly. 3727 * occur there and call schedule directly.
3728 */ 3728 */
3729asmlinkage void __sched preempt_schedule(void) 3729asmlinkage void __sched notrace preempt_schedule(void)
3730{ 3730{
3731 struct thread_info *ti = current_thread_info(); 3731 struct thread_info *ti = current_thread_info();
3732 3732
@@ -3738,9 +3738,9 @@ asmlinkage void __sched preempt_schedule(void)
3738 return; 3738 return;
3739 3739
3740 do { 3740 do {
3741 add_preempt_count(PREEMPT_ACTIVE); 3741 add_preempt_count_notrace(PREEMPT_ACTIVE);
3742 schedule(); 3742 schedule();
3743 sub_preempt_count(PREEMPT_ACTIVE); 3743 sub_preempt_count_notrace(PREEMPT_ACTIVE);
3744 3744
3745 /* 3745 /*
3746 * Check again in case we missed a preemption opportunity 3746 * Check again in case we missed a preemption opportunity
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 8b1797c4545b..f5306cb0afb1 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -229,23 +229,6 @@ config FTRACE_SYSCALLS
229 help 229 help
230 Basic tracer to catch the syscall entry and exit events. 230 Basic tracer to catch the syscall entry and exit events.
231 231
232config BOOT_TRACER
233 bool "Trace boot initcalls"
234 select GENERIC_TRACER
235 select CONTEXT_SWITCH_TRACER
236 help
237 This tracer helps developers to optimize boot times: it records
238 the timings of the initcalls and traces key events and the identity
239 of tasks that can cause boot delays, such as context-switches.
240
241 Its aim is to be parsed by the scripts/bootgraph.pl tool to
242 produce pretty graphics about boot inefficiencies, giving a visual
243 representation of the delays during initcalls - but the raw
244 /debug/tracing/trace text output is readable too.
245
246 You must pass in initcall_debug and ftrace=initcall to the kernel
247 command line to enable this on bootup.
248
249config TRACE_BRANCH_PROFILING 232config TRACE_BRANCH_PROFILING
250 bool 233 bool
251 select GENERIC_TRACER 234 select GENERIC_TRACER
@@ -325,28 +308,6 @@ config BRANCH_TRACER
325 308
326 Say N if unsure. 309 Say N if unsure.
327 310
328config KSYM_TRACER
329 bool "Trace read and write access on kernel memory locations"
330 depends on HAVE_HW_BREAKPOINT
331 select TRACING
332 help
333 This tracer helps find read and write operations on any given kernel
334 symbol i.e. /proc/kallsyms.
335
336config PROFILE_KSYM_TRACER
337 bool "Profile all kernel memory accesses on 'watched' variables"
338 depends on KSYM_TRACER
339 help
340 This tracer profiles kernel accesses on variables watched through the
341 ksym tracer ftrace plugin. Depending upon the hardware, all read
342 and write operations on kernel variables can be monitored for
343 accesses.
344
345 The results will be displayed in:
346 /debugfs/tracing/profile_ksym
347
348 Say N if unsure.
349
350config STACK_TRACER 311config STACK_TRACER
351 bool "Trace max stack" 312 bool "Trace max stack"
352 depends on HAVE_FUNCTION_TRACER 313 depends on HAVE_FUNCTION_TRACER
@@ -371,26 +332,6 @@ config STACK_TRACER
371 332
372 Say N if unsure. 333 Say N if unsure.
373 334
374config KMEMTRACE
375 bool "Trace SLAB allocations"
376 select GENERIC_TRACER
377 help
378 kmemtrace provides tracing for slab allocator functions, such as
379 kmalloc, kfree, kmem_cache_alloc, kmem_cache_free, etc. Collected
380 data is then fed to the userspace application in order to analyse
381 allocation hotspots, internal fragmentation and so on, making it
382 possible to see how well an allocator performs, as well as debug
383 and profile kernel code.
384
385 This requires an userspace application to use. See
386 Documentation/trace/kmemtrace.txt for more information.
387
388 Saying Y will make the kernel somewhat larger and slower. However,
389 if you disable kmemtrace at run-time or boot-time, the performance
390 impact is minimal (depending on the arch the kernel is built for).
391
392 If unsure, say N.
393
394config WORKQUEUE_TRACER 335config WORKQUEUE_TRACER
395 bool "Trace workqueues" 336 bool "Trace workqueues"
396 select GENERIC_TRACER 337 select GENERIC_TRACER
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index ffb1a5b0550e..84b2c9908dae 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -38,10 +38,8 @@ obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o
38obj-$(CONFIG_NOP_TRACER) += trace_nop.o 38obj-$(CONFIG_NOP_TRACER) += trace_nop.o
39obj-$(CONFIG_STACK_TRACER) += trace_stack.o 39obj-$(CONFIG_STACK_TRACER) += trace_stack.o
40obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o 40obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o
41obj-$(CONFIG_BOOT_TRACER) += trace_boot.o
42obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o 41obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o
43obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o 42obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o
44obj-$(CONFIG_KMEMTRACE) += kmemtrace.o
45obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o 43obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o
46obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o 44obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o
47ifeq ($(CONFIG_BLOCK),y) 45ifeq ($(CONFIG_BLOCK),y)
@@ -55,7 +53,6 @@ obj-$(CONFIG_EVENT_TRACING) += trace_event_perf.o
55endif 53endif
56obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o 54obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
57obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o 55obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
58obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o
59obj-$(CONFIG_EVENT_TRACING) += power-traces.o 56obj-$(CONFIG_EVENT_TRACING) += power-traces.o
60 57
61libftrace-y := ftrace.o 58libftrace-y := ftrace.o
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 6d2cb14f9449..0d88ce9b9fb8 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1883,7 +1883,6 @@ function_trace_probe_call(unsigned long ip, unsigned long parent_ip)
1883 struct hlist_head *hhd; 1883 struct hlist_head *hhd;
1884 struct hlist_node *n; 1884 struct hlist_node *n;
1885 unsigned long key; 1885 unsigned long key;
1886 int resched;
1887 1886
1888 key = hash_long(ip, FTRACE_HASH_BITS); 1887 key = hash_long(ip, FTRACE_HASH_BITS);
1889 1888
@@ -1897,12 +1896,12 @@ function_trace_probe_call(unsigned long ip, unsigned long parent_ip)
1897 * period. This syncs the hash iteration and freeing of items 1896 * period. This syncs the hash iteration and freeing of items
1898 * on the hash. rcu_read_lock is too dangerous here. 1897 * on the hash. rcu_read_lock is too dangerous here.
1899 */ 1898 */
1900 resched = ftrace_preempt_disable(); 1899 preempt_disable_notrace();
1901 hlist_for_each_entry_rcu(entry, n, hhd, node) { 1900 hlist_for_each_entry_rcu(entry, n, hhd, node) {
1902 if (entry->ip == ip) 1901 if (entry->ip == ip)
1903 entry->ops->func(ip, parent_ip, &entry->data); 1902 entry->ops->func(ip, parent_ip, &entry->data);
1904 } 1903 }
1905 ftrace_preempt_enable(resched); 1904 preempt_enable_notrace();
1906} 1905}
1907 1906
1908static struct ftrace_ops trace_probe_ops __read_mostly = 1907static struct ftrace_ops trace_probe_ops __read_mostly =
diff --git a/kernel/trace/kmemtrace.c b/kernel/trace/kmemtrace.c
deleted file mode 100644
index bbfc1bb1660b..000000000000
--- a/kernel/trace/kmemtrace.c
+++ /dev/null
@@ -1,529 +0,0 @@
1/*
2 * Memory allocator tracing
3 *
4 * Copyright (C) 2008 Eduard - Gabriel Munteanu
5 * Copyright (C) 2008 Pekka Enberg <penberg@cs.helsinki.fi>
6 * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
7 */
8
9#include <linux/tracepoint.h>
10#include <linux/seq_file.h>
11#include <linux/debugfs.h>
12#include <linux/dcache.h>
13#include <linux/fs.h>
14
15#include <linux/kmemtrace.h>
16
17#include "trace_output.h"
18#include "trace.h"
19
20/* Select an alternative, minimalistic output than the original one */
21#define TRACE_KMEM_OPT_MINIMAL 0x1
22
23static struct tracer_opt kmem_opts[] = {
24 /* Default disable the minimalistic output */
25 { TRACER_OPT(kmem_minimalistic, TRACE_KMEM_OPT_MINIMAL) },
26 { }
27};
28
29static struct tracer_flags kmem_tracer_flags = {
30 .val = 0,
31 .opts = kmem_opts
32};
33
34static struct trace_array *kmemtrace_array;
35
36/* Trace allocations */
37static inline void kmemtrace_alloc(enum kmemtrace_type_id type_id,
38 unsigned long call_site,
39 const void *ptr,
40 size_t bytes_req,
41 size_t bytes_alloc,
42 gfp_t gfp_flags,
43 int node)
44{
45 struct ftrace_event_call *call = &event_kmem_alloc;
46 struct trace_array *tr = kmemtrace_array;
47 struct kmemtrace_alloc_entry *entry;
48 struct ring_buffer_event *event;
49
50 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry));
51 if (!event)
52 return;
53
54 entry = ring_buffer_event_data(event);
55 tracing_generic_entry_update(&entry->ent, 0, 0);
56
57 entry->ent.type = TRACE_KMEM_ALLOC;
58 entry->type_id = type_id;
59 entry->call_site = call_site;
60 entry->ptr = ptr;
61 entry->bytes_req = bytes_req;
62 entry->bytes_alloc = bytes_alloc;
63 entry->gfp_flags = gfp_flags;
64 entry->node = node;
65
66 if (!filter_check_discard(call, entry, tr->buffer, event))
67 ring_buffer_unlock_commit(tr->buffer, event);
68
69 trace_wake_up();
70}
71
72static inline void kmemtrace_free(enum kmemtrace_type_id type_id,
73 unsigned long call_site,
74 const void *ptr)
75{
76 struct ftrace_event_call *call = &event_kmem_free;
77 struct trace_array *tr = kmemtrace_array;
78 struct kmemtrace_free_entry *entry;
79 struct ring_buffer_event *event;
80
81 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry));
82 if (!event)
83 return;
84 entry = ring_buffer_event_data(event);
85 tracing_generic_entry_update(&entry->ent, 0, 0);
86
87 entry->ent.type = TRACE_KMEM_FREE;
88 entry->type_id = type_id;
89 entry->call_site = call_site;
90 entry->ptr = ptr;
91
92 if (!filter_check_discard(call, entry, tr->buffer, event))
93 ring_buffer_unlock_commit(tr->buffer, event);
94
95 trace_wake_up();
96}
97
98static void kmemtrace_kmalloc(void *ignore,
99 unsigned long call_site,
100 const void *ptr,
101 size_t bytes_req,
102 size_t bytes_alloc,
103 gfp_t gfp_flags)
104{
105 kmemtrace_alloc(KMEMTRACE_TYPE_KMALLOC, call_site, ptr,
106 bytes_req, bytes_alloc, gfp_flags, -1);
107}
108
109static void kmemtrace_kmem_cache_alloc(void *ignore,
110 unsigned long call_site,
111 const void *ptr,
112 size_t bytes_req,
113 size_t bytes_alloc,
114 gfp_t gfp_flags)
115{
116 kmemtrace_alloc(KMEMTRACE_TYPE_CACHE, call_site, ptr,
117 bytes_req, bytes_alloc, gfp_flags, -1);
118}
119
120static void kmemtrace_kmalloc_node(void *ignore,
121 unsigned long call_site,
122 const void *ptr,
123 size_t bytes_req,
124 size_t bytes_alloc,
125 gfp_t gfp_flags,
126 int node)
127{
128 kmemtrace_alloc(KMEMTRACE_TYPE_KMALLOC, call_site, ptr,
129 bytes_req, bytes_alloc, gfp_flags, node);
130}
131
132static void kmemtrace_kmem_cache_alloc_node(void *ignore,
133 unsigned long call_site,
134 const void *ptr,
135 size_t bytes_req,
136 size_t bytes_alloc,
137 gfp_t gfp_flags,
138 int node)
139{
140 kmemtrace_alloc(KMEMTRACE_TYPE_CACHE, call_site, ptr,
141 bytes_req, bytes_alloc, gfp_flags, node);
142}
143
144static void
145kmemtrace_kfree(void *ignore, unsigned long call_site, const void *ptr)
146{
147 kmemtrace_free(KMEMTRACE_TYPE_KMALLOC, call_site, ptr);
148}
149
150static void kmemtrace_kmem_cache_free(void *ignore,
151 unsigned long call_site, const void *ptr)
152{
153 kmemtrace_free(KMEMTRACE_TYPE_CACHE, call_site, ptr);
154}
155
156static int kmemtrace_start_probes(void)
157{
158 int err;
159
160 err = register_trace_kmalloc(kmemtrace_kmalloc, NULL);
161 if (err)
162 return err;
163 err = register_trace_kmem_cache_alloc(kmemtrace_kmem_cache_alloc, NULL);
164 if (err)
165 return err;
166 err = register_trace_kmalloc_node(kmemtrace_kmalloc_node, NULL);
167 if (err)
168 return err;
169 err = register_trace_kmem_cache_alloc_node(kmemtrace_kmem_cache_alloc_node, NULL);
170 if (err)
171 return err;
172 err = register_trace_kfree(kmemtrace_kfree, NULL);
173 if (err)
174 return err;
175 err = register_trace_kmem_cache_free(kmemtrace_kmem_cache_free, NULL);
176
177 return err;
178}
179
180static void kmemtrace_stop_probes(void)
181{
182 unregister_trace_kmalloc(kmemtrace_kmalloc, NULL);
183 unregister_trace_kmem_cache_alloc(kmemtrace_kmem_cache_alloc, NULL);
184 unregister_trace_kmalloc_node(kmemtrace_kmalloc_node, NULL);
185 unregister_trace_kmem_cache_alloc_node(kmemtrace_kmem_cache_alloc_node, NULL);
186 unregister_trace_kfree(kmemtrace_kfree, NULL);
187 unregister_trace_kmem_cache_free(kmemtrace_kmem_cache_free, NULL);
188}
189
190static int kmem_trace_init(struct trace_array *tr)
191{
192 kmemtrace_array = tr;
193
194 tracing_reset_online_cpus(tr);
195
196 kmemtrace_start_probes();
197
198 return 0;
199}
200
201static void kmem_trace_reset(struct trace_array *tr)
202{
203 kmemtrace_stop_probes();
204}
205
206static void kmemtrace_headers(struct seq_file *s)
207{
208 /* Don't need headers for the original kmemtrace output */
209 if (!(kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL))
210 return;
211
212 seq_printf(s, "#\n");
213 seq_printf(s, "# ALLOC TYPE REQ GIVEN FLAGS "
214 " POINTER NODE CALLER\n");
215 seq_printf(s, "# FREE | | | | "
216 " | | | |\n");
217 seq_printf(s, "# |\n\n");
218}
219
220/*
221 * The following functions give the original output from kmemtrace,
222 * plus the origin CPU, since reordering occurs in-kernel now.
223 */
224
225#define KMEMTRACE_USER_ALLOC 0
226#define KMEMTRACE_USER_FREE 1
227
228struct kmemtrace_user_event {
229 u8 event_id;
230 u8 type_id;
231 u16 event_size;
232 u32 cpu;
233 u64 timestamp;
234 unsigned long call_site;
235 unsigned long ptr;
236};
237
238struct kmemtrace_user_event_alloc {
239 size_t bytes_req;
240 size_t bytes_alloc;
241 unsigned gfp_flags;
242 int node;
243};
244
245static enum print_line_t
246kmemtrace_print_alloc(struct trace_iterator *iter, int flags,
247 struct trace_event *event)
248{
249 struct trace_seq *s = &iter->seq;
250 struct kmemtrace_alloc_entry *entry;
251 int ret;
252
253 trace_assign_type(entry, iter->ent);
254
255 ret = trace_seq_printf(s, "type_id %d call_site %pF ptr %lu "
256 "bytes_req %lu bytes_alloc %lu gfp_flags %lu node %d\n",
257 entry->type_id, (void *)entry->call_site, (unsigned long)entry->ptr,
258 (unsigned long)entry->bytes_req, (unsigned long)entry->bytes_alloc,
259 (unsigned long)entry->gfp_flags, entry->node);
260
261 if (!ret)
262 return TRACE_TYPE_PARTIAL_LINE;
263 return TRACE_TYPE_HANDLED;
264}
265
266static enum print_line_t
267kmemtrace_print_free(struct trace_iterator *iter, int flags,
268 struct trace_event *event)
269{
270 struct trace_seq *s = &iter->seq;
271 struct kmemtrace_free_entry *entry;
272 int ret;
273
274 trace_assign_type(entry, iter->ent);
275
276 ret = trace_seq_printf(s, "type_id %d call_site %pF ptr %lu\n",
277 entry->type_id, (void *)entry->call_site,
278 (unsigned long)entry->ptr);
279
280 if (!ret)
281 return TRACE_TYPE_PARTIAL_LINE;
282 return TRACE_TYPE_HANDLED;
283}
284
285static enum print_line_t
286kmemtrace_print_alloc_user(struct trace_iterator *iter, int flags,
287 struct trace_event *event)
288{
289 struct trace_seq *s = &iter->seq;
290 struct kmemtrace_alloc_entry *entry;
291 struct kmemtrace_user_event *ev;
292 struct kmemtrace_user_event_alloc *ev_alloc;
293
294 trace_assign_type(entry, iter->ent);
295
296 ev = trace_seq_reserve(s, sizeof(*ev));
297 if (!ev)
298 return TRACE_TYPE_PARTIAL_LINE;
299
300 ev->event_id = KMEMTRACE_USER_ALLOC;
301 ev->type_id = entry->type_id;
302 ev->event_size = sizeof(*ev) + sizeof(*ev_alloc);
303 ev->cpu = iter->cpu;
304 ev->timestamp = iter->ts;
305 ev->call_site = entry->call_site;
306 ev->ptr = (unsigned long)entry->ptr;
307
308 ev_alloc = trace_seq_reserve(s, sizeof(*ev_alloc));
309 if (!ev_alloc)
310 return TRACE_TYPE_PARTIAL_LINE;
311
312 ev_alloc->bytes_req = entry->bytes_req;
313 ev_alloc->bytes_alloc = entry->bytes_alloc;
314 ev_alloc->gfp_flags = entry->gfp_flags;
315 ev_alloc->node = entry->node;
316
317 return TRACE_TYPE_HANDLED;
318}
319
320static enum print_line_t
321kmemtrace_print_free_user(struct trace_iterator *iter, int flags,
322 struct trace_event *event)
323{
324 struct trace_seq *s = &iter->seq;
325 struct kmemtrace_free_entry *entry;
326 struct kmemtrace_user_event *ev;
327
328 trace_assign_type(entry, iter->ent);
329
330 ev = trace_seq_reserve(s, sizeof(*ev));
331 if (!ev)
332 return TRACE_TYPE_PARTIAL_LINE;
333
334 ev->event_id = KMEMTRACE_USER_FREE;
335 ev->type_id = entry->type_id;
336 ev->event_size = sizeof(*ev);
337 ev->cpu = iter->cpu;
338 ev->timestamp = iter->ts;
339 ev->call_site = entry->call_site;
340 ev->ptr = (unsigned long)entry->ptr;
341
342 return TRACE_TYPE_HANDLED;
343}
344
345/* The two other following provide a more minimalistic output */
346static enum print_line_t
347kmemtrace_print_alloc_compress(struct trace_iterator *iter)
348{
349 struct kmemtrace_alloc_entry *entry;
350 struct trace_seq *s = &iter->seq;
351 int ret;
352
353 trace_assign_type(entry, iter->ent);
354
355 /* Alloc entry */
356 ret = trace_seq_printf(s, " + ");
357 if (!ret)
358 return TRACE_TYPE_PARTIAL_LINE;
359
360 /* Type */
361 switch (entry->type_id) {
362 case KMEMTRACE_TYPE_KMALLOC:
363 ret = trace_seq_printf(s, "K ");
364 break;
365 case KMEMTRACE_TYPE_CACHE:
366 ret = trace_seq_printf(s, "C ");
367 break;
368 case KMEMTRACE_TYPE_PAGES:
369 ret = trace_seq_printf(s, "P ");
370 break;
371 default:
372 ret = trace_seq_printf(s, "? ");
373 }
374
375 if (!ret)
376 return TRACE_TYPE_PARTIAL_LINE;
377
378 /* Requested */
379 ret = trace_seq_printf(s, "%4zu ", entry->bytes_req);
380 if (!ret)
381 return TRACE_TYPE_PARTIAL_LINE;
382
383 /* Allocated */
384 ret = trace_seq_printf(s, "%4zu ", entry->bytes_alloc);
385 if (!ret)
386 return TRACE_TYPE_PARTIAL_LINE;
387
388 /* Flags
389 * TODO: would be better to see the name of the GFP flag names
390 */
391 ret = trace_seq_printf(s, "%08x ", entry->gfp_flags);
392 if (!ret)
393 return TRACE_TYPE_PARTIAL_LINE;
394
395 /* Pointer to allocated */
396 ret = trace_seq_printf(s, "0x%tx ", (ptrdiff_t)entry->ptr);
397 if (!ret)
398 return TRACE_TYPE_PARTIAL_LINE;
399
400 /* Node and call site*/
401 ret = trace_seq_printf(s, "%4d %pf\n", entry->node,
402 (void *)entry->call_site);
403 if (!ret)
404 return TRACE_TYPE_PARTIAL_LINE;
405
406 return TRACE_TYPE_HANDLED;
407}
408
409static enum print_line_t
410kmemtrace_print_free_compress(struct trace_iterator *iter)
411{
412 struct kmemtrace_free_entry *entry;
413 struct trace_seq *s = &iter->seq;
414 int ret;
415
416 trace_assign_type(entry, iter->ent);
417
418 /* Free entry */
419 ret = trace_seq_printf(s, " - ");
420 if (!ret)
421 return TRACE_TYPE_PARTIAL_LINE;
422
423 /* Type */
424 switch (entry->type_id) {
425 case KMEMTRACE_TYPE_KMALLOC:
426 ret = trace_seq_printf(s, "K ");
427 break;
428 case KMEMTRACE_TYPE_CACHE:
429 ret = trace_seq_printf(s, "C ");
430 break;
431 case KMEMTRACE_TYPE_PAGES:
432 ret = trace_seq_printf(s, "P ");
433 break;
434 default:
435 ret = trace_seq_printf(s, "? ");
436 }
437
438 if (!ret)
439 return TRACE_TYPE_PARTIAL_LINE;
440
441 /* Skip requested/allocated/flags */
442 ret = trace_seq_printf(s, " ");
443 if (!ret)
444 return TRACE_TYPE_PARTIAL_LINE;
445
446 /* Pointer to allocated */
447 ret = trace_seq_printf(s, "0x%tx ", (ptrdiff_t)entry->ptr);
448 if (!ret)
449 return TRACE_TYPE_PARTIAL_LINE;
450
451 /* Skip node and print call site*/
452 ret = trace_seq_printf(s, " %pf\n", (void *)entry->call_site);
453 if (!ret)
454 return TRACE_TYPE_PARTIAL_LINE;
455
456 return TRACE_TYPE_HANDLED;
457}
458
459static enum print_line_t kmemtrace_print_line(struct trace_iterator *iter)
460{
461 struct trace_entry *entry = iter->ent;
462
463 if (!(kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL))
464 return TRACE_TYPE_UNHANDLED;
465
466 switch (entry->type) {
467 case TRACE_KMEM_ALLOC:
468 return kmemtrace_print_alloc_compress(iter);
469 case TRACE_KMEM_FREE:
470 return kmemtrace_print_free_compress(iter);
471 default:
472 return TRACE_TYPE_UNHANDLED;
473 }
474}
475
476static struct trace_event_functions kmem_trace_alloc_funcs = {
477 .trace = kmemtrace_print_alloc,
478 .binary = kmemtrace_print_alloc_user,
479};
480
481static struct trace_event kmem_trace_alloc = {
482 .type = TRACE_KMEM_ALLOC,
483 .funcs = &kmem_trace_alloc_funcs,
484};
485
486static struct trace_event_functions kmem_trace_free_funcs = {
487 .trace = kmemtrace_print_free,
488 .binary = kmemtrace_print_free_user,
489};
490
491static struct trace_event kmem_trace_free = {
492 .type = TRACE_KMEM_FREE,
493 .funcs = &kmem_trace_free_funcs,
494};
495
496static struct tracer kmem_tracer __read_mostly = {
497 .name = "kmemtrace",
498 .init = kmem_trace_init,
499 .reset = kmem_trace_reset,
500 .print_line = kmemtrace_print_line,
501 .print_header = kmemtrace_headers,
502 .flags = &kmem_tracer_flags
503};
504
505void kmemtrace_init(void)
506{
507 /* earliest opportunity to start kmem tracing */
508}
509
510static int __init init_kmem_tracer(void)
511{
512 if (!register_ftrace_event(&kmem_trace_alloc)) {
513 pr_warning("Warning: could not register kmem events\n");
514 return 1;
515 }
516
517 if (!register_ftrace_event(&kmem_trace_free)) {
518 pr_warning("Warning: could not register kmem events\n");
519 return 1;
520 }
521
522 if (register_tracer(&kmem_tracer) != 0) {
523 pr_warning("Warning: could not register the kmem tracer\n");
524 return 1;
525 }
526
527 return 0;
528}
529device_initcall(init_kmem_tracer);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 1da7b6ea8b85..28d0615a513f 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -2242,8 +2242,6 @@ static void trace_recursive_unlock(void)
2242 2242
2243#endif 2243#endif
2244 2244
2245static DEFINE_PER_CPU(int, rb_need_resched);
2246
2247/** 2245/**
2248 * ring_buffer_lock_reserve - reserve a part of the buffer 2246 * ring_buffer_lock_reserve - reserve a part of the buffer
2249 * @buffer: the ring buffer to reserve from 2247 * @buffer: the ring buffer to reserve from
@@ -2264,13 +2262,13 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
2264{ 2262{
2265 struct ring_buffer_per_cpu *cpu_buffer; 2263 struct ring_buffer_per_cpu *cpu_buffer;
2266 struct ring_buffer_event *event; 2264 struct ring_buffer_event *event;
2267 int cpu, resched; 2265 int cpu;
2268 2266
2269 if (ring_buffer_flags != RB_BUFFERS_ON) 2267 if (ring_buffer_flags != RB_BUFFERS_ON)
2270 return NULL; 2268 return NULL;
2271 2269
2272 /* If we are tracing schedule, we don't want to recurse */ 2270 /* If we are tracing schedule, we don't want to recurse */
2273 resched = ftrace_preempt_disable(); 2271 preempt_disable_notrace();
2274 2272
2275 if (atomic_read(&buffer->record_disabled)) 2273 if (atomic_read(&buffer->record_disabled))
2276 goto out_nocheck; 2274 goto out_nocheck;
@@ -2295,21 +2293,13 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
2295 if (!event) 2293 if (!event)
2296 goto out; 2294 goto out;
2297 2295
2298 /*
2299 * Need to store resched state on this cpu.
2300 * Only the first needs to.
2301 */
2302
2303 if (preempt_count() == 1)
2304 per_cpu(rb_need_resched, cpu) = resched;
2305
2306 return event; 2296 return event;
2307 2297
2308 out: 2298 out:
2309 trace_recursive_unlock(); 2299 trace_recursive_unlock();
2310 2300
2311 out_nocheck: 2301 out_nocheck:
2312 ftrace_preempt_enable(resched); 2302 preempt_enable_notrace();
2313 return NULL; 2303 return NULL;
2314} 2304}
2315EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve); 2305EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve);
@@ -2355,13 +2345,7 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer,
2355 2345
2356 trace_recursive_unlock(); 2346 trace_recursive_unlock();
2357 2347
2358 /* 2348 preempt_enable_notrace();
2359 * Only the last preempt count needs to restore preemption.
2360 */
2361 if (preempt_count() == 1)
2362 ftrace_preempt_enable(per_cpu(rb_need_resched, cpu));
2363 else
2364 preempt_enable_no_resched_notrace();
2365 2349
2366 return 0; 2350 return 0;
2367} 2351}
@@ -2469,13 +2453,7 @@ void ring_buffer_discard_commit(struct ring_buffer *buffer,
2469 2453
2470 trace_recursive_unlock(); 2454 trace_recursive_unlock();
2471 2455
2472 /* 2456 preempt_enable_notrace();
2473 * Only the last preempt count needs to restore preemption.
2474 */
2475 if (preempt_count() == 1)
2476 ftrace_preempt_enable(per_cpu(rb_need_resched, cpu));
2477 else
2478 preempt_enable_no_resched_notrace();
2479 2457
2480} 2458}
2481EXPORT_SYMBOL_GPL(ring_buffer_discard_commit); 2459EXPORT_SYMBOL_GPL(ring_buffer_discard_commit);
@@ -2501,12 +2479,12 @@ int ring_buffer_write(struct ring_buffer *buffer,
2501 struct ring_buffer_event *event; 2479 struct ring_buffer_event *event;
2502 void *body; 2480 void *body;
2503 int ret = -EBUSY; 2481 int ret = -EBUSY;
2504 int cpu, resched; 2482 int cpu;
2505 2483
2506 if (ring_buffer_flags != RB_BUFFERS_ON) 2484 if (ring_buffer_flags != RB_BUFFERS_ON)
2507 return -EBUSY; 2485 return -EBUSY;
2508 2486
2509 resched = ftrace_preempt_disable(); 2487 preempt_disable_notrace();
2510 2488
2511 if (atomic_read(&buffer->record_disabled)) 2489 if (atomic_read(&buffer->record_disabled))
2512 goto out; 2490 goto out;
@@ -2536,7 +2514,7 @@ int ring_buffer_write(struct ring_buffer *buffer,
2536 2514
2537 ret = 0; 2515 ret = 0;
2538 out: 2516 out:
2539 ftrace_preempt_enable(resched); 2517 preempt_enable_notrace();
2540 2518
2541 return ret; 2519 return ret;
2542} 2520}
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 086d36316805..8683dec6946b 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1404,7 +1404,6 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
1404 struct bprint_entry *entry; 1404 struct bprint_entry *entry;
1405 unsigned long flags; 1405 unsigned long flags;
1406 int disable; 1406 int disable;
1407 int resched;
1408 int cpu, len = 0, size, pc; 1407 int cpu, len = 0, size, pc;
1409 1408
1410 if (unlikely(tracing_selftest_running || tracing_disabled)) 1409 if (unlikely(tracing_selftest_running || tracing_disabled))
@@ -1414,7 +1413,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
1414 pause_graph_tracing(); 1413 pause_graph_tracing();
1415 1414
1416 pc = preempt_count(); 1415 pc = preempt_count();
1417 resched = ftrace_preempt_disable(); 1416 preempt_disable_notrace();
1418 cpu = raw_smp_processor_id(); 1417 cpu = raw_smp_processor_id();
1419 data = tr->data[cpu]; 1418 data = tr->data[cpu];
1420 1419
@@ -1452,7 +1451,7 @@ out_unlock:
1452 1451
1453out: 1452out:
1454 atomic_dec_return(&data->disabled); 1453 atomic_dec_return(&data->disabled);
1455 ftrace_preempt_enable(resched); 1454 preempt_enable_notrace();
1456 unpause_graph_tracing(); 1455 unpause_graph_tracing();
1457 1456
1458 return len; 1457 return len;
@@ -4597,9 +4596,6 @@ __init static int tracer_alloc_buffers(void)
4597 4596
4598 register_tracer(&nop_trace); 4597 register_tracer(&nop_trace);
4599 current_trace = &nop_trace; 4598 current_trace = &nop_trace;
4600#ifdef CONFIG_BOOT_TRACER
4601 register_tracer(&boot_tracer);
4602#endif
4603 /* All seems OK, enable tracing */ 4599 /* All seems OK, enable tracing */
4604 tracing_disabled = 0; 4600 tracing_disabled = 0;
4605 4601
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 2cd96399463f..84d3f123e86f 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -9,10 +9,7 @@
9#include <linux/mmiotrace.h> 9#include <linux/mmiotrace.h>
10#include <linux/tracepoint.h> 10#include <linux/tracepoint.h>
11#include <linux/ftrace.h> 11#include <linux/ftrace.h>
12#include <trace/boot.h>
13#include <linux/kmemtrace.h>
14#include <linux/hw_breakpoint.h> 12#include <linux/hw_breakpoint.h>
15
16#include <linux/trace_seq.h> 13#include <linux/trace_seq.h>
17#include <linux/ftrace_event.h> 14#include <linux/ftrace_event.h>
18 15
@@ -29,26 +26,14 @@ enum trace_type {
29 TRACE_MMIO_RW, 26 TRACE_MMIO_RW,
30 TRACE_MMIO_MAP, 27 TRACE_MMIO_MAP,
31 TRACE_BRANCH, 28 TRACE_BRANCH,
32 TRACE_BOOT_CALL,
33 TRACE_BOOT_RET,
34 TRACE_GRAPH_RET, 29 TRACE_GRAPH_RET,
35 TRACE_GRAPH_ENT, 30 TRACE_GRAPH_ENT,
36 TRACE_USER_STACK, 31 TRACE_USER_STACK,
37 TRACE_KMEM_ALLOC,
38 TRACE_KMEM_FREE,
39 TRACE_BLK, 32 TRACE_BLK,
40 TRACE_KSYM,
41 33
42 __TRACE_LAST_TYPE, 34 __TRACE_LAST_TYPE,
43}; 35};
44 36
45enum kmemtrace_type_id {
46 KMEMTRACE_TYPE_KMALLOC = 0, /* kmalloc() or kfree(). */
47 KMEMTRACE_TYPE_CACHE, /* kmem_cache_*(). */
48 KMEMTRACE_TYPE_PAGES, /* __get_free_pages() and friends. */
49};
50
51extern struct tracer boot_tracer;
52 37
53#undef __field 38#undef __field
54#define __field(type, item) type item; 39#define __field(type, item) type item;
@@ -209,18 +194,11 @@ extern void __ftrace_bad_type(void);
209 TRACE_MMIO_RW); \ 194 TRACE_MMIO_RW); \
210 IF_ASSIGN(var, ent, struct trace_mmiotrace_map, \ 195 IF_ASSIGN(var, ent, struct trace_mmiotrace_map, \
211 TRACE_MMIO_MAP); \ 196 TRACE_MMIO_MAP); \
212 IF_ASSIGN(var, ent, struct trace_boot_call, TRACE_BOOT_CALL);\
213 IF_ASSIGN(var, ent, struct trace_boot_ret, TRACE_BOOT_RET);\
214 IF_ASSIGN(var, ent, struct trace_branch, TRACE_BRANCH); \ 197 IF_ASSIGN(var, ent, struct trace_branch, TRACE_BRANCH); \
215 IF_ASSIGN(var, ent, struct ftrace_graph_ent_entry, \ 198 IF_ASSIGN(var, ent, struct ftrace_graph_ent_entry, \
216 TRACE_GRAPH_ENT); \ 199 TRACE_GRAPH_ENT); \
217 IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry, \ 200 IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry, \
218 TRACE_GRAPH_RET); \ 201 TRACE_GRAPH_RET); \
219 IF_ASSIGN(var, ent, struct kmemtrace_alloc_entry, \
220 TRACE_KMEM_ALLOC); \
221 IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \
222 TRACE_KMEM_FREE); \
223 IF_ASSIGN(var, ent, struct ksym_trace_entry, TRACE_KSYM);\
224 __ftrace_bad_type(); \ 202 __ftrace_bad_type(); \
225 } while (0) 203 } while (0)
226 204
@@ -381,8 +359,6 @@ int register_tracer(struct tracer *type);
381void unregister_tracer(struct tracer *type); 359void unregister_tracer(struct tracer *type);
382int is_tracing_stopped(void); 360int is_tracing_stopped(void);
383 361
384extern int process_new_ksym_entry(char *ksymname, int op, unsigned long addr);
385
386extern unsigned long nsecs_to_usecs(unsigned long nsecs); 362extern unsigned long nsecs_to_usecs(unsigned long nsecs);
387 363
388extern unsigned long tracing_thresh; 364extern unsigned long tracing_thresh;
@@ -456,8 +432,6 @@ extern int trace_selftest_startup_sysprof(struct tracer *trace,
456 struct trace_array *tr); 432 struct trace_array *tr);
457extern int trace_selftest_startup_branch(struct tracer *trace, 433extern int trace_selftest_startup_branch(struct tracer *trace,
458 struct trace_array *tr); 434 struct trace_array *tr);
459extern int trace_selftest_startup_ksym(struct tracer *trace,
460 struct trace_array *tr);
461#endif /* CONFIG_FTRACE_STARTUP_TEST */ 435#endif /* CONFIG_FTRACE_STARTUP_TEST */
462 436
463extern void *head_page(struct trace_array_cpu *data); 437extern void *head_page(struct trace_array_cpu *data);
@@ -628,54 +602,6 @@ enum trace_iterator_flags {
628 602
629extern struct tracer nop_trace; 603extern struct tracer nop_trace;
630 604
631/**
632 * ftrace_preempt_disable - disable preemption scheduler safe
633 *
634 * When tracing can happen inside the scheduler, there exists
635 * cases that the tracing might happen before the need_resched
636 * flag is checked. If this happens and the tracer calls
637 * preempt_enable (after a disable), a schedule might take place
638 * causing an infinite recursion.
639 *
640 * To prevent this, we read the need_resched flag before
641 * disabling preemption. When we want to enable preemption we
642 * check the flag, if it is set, then we call preempt_enable_no_resched.
643 * Otherwise, we call preempt_enable.
644 *
645 * The rational for doing the above is that if need_resched is set
646 * and we have yet to reschedule, we are either in an atomic location
647 * (where we do not need to check for scheduling) or we are inside
648 * the scheduler and do not want to resched.
649 */
650static inline int ftrace_preempt_disable(void)
651{
652 int resched;
653
654 resched = need_resched();
655 preempt_disable_notrace();
656
657 return resched;
658}
659
660/**
661 * ftrace_preempt_enable - enable preemption scheduler safe
662 * @resched: the return value from ftrace_preempt_disable
663 *
664 * This is a scheduler safe way to enable preemption and not miss
665 * any preemption checks. The disabled saved the state of preemption.
666 * If resched is set, then we are either inside an atomic or
667 * are inside the scheduler (we would have already scheduled
668 * otherwise). In this case, we do not want to call normal
669 * preempt_enable, but preempt_enable_no_resched instead.
670 */
671static inline void ftrace_preempt_enable(int resched)
672{
673 if (resched)
674 preempt_enable_no_resched_notrace();
675 else
676 preempt_enable_notrace();
677}
678
679#ifdef CONFIG_BRANCH_TRACER 605#ifdef CONFIG_BRANCH_TRACER
680extern int enable_branch_tracing(struct trace_array *tr); 606extern int enable_branch_tracing(struct trace_array *tr);
681extern void disable_branch_tracing(void); 607extern void disable_branch_tracing(void);
@@ -766,6 +692,8 @@ struct filter_pred {
766 int pop_n; 692 int pop_n;
767}; 693};
768 694
695extern struct list_head ftrace_common_fields;
696
769extern enum regex_type 697extern enum regex_type
770filter_parse_regex(char *buff, int len, char **search, int *not); 698filter_parse_regex(char *buff, int len, char **search, int *not);
771extern void print_event_filter(struct ftrace_event_call *call, 699extern void print_event_filter(struct ftrace_event_call *call,
diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
deleted file mode 100644
index c21d5f3956ad..000000000000
--- a/kernel/trace/trace_boot.c
+++ /dev/null
@@ -1,185 +0,0 @@
1/*
2 * ring buffer based initcalls tracer
3 *
4 * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
5 *
6 */
7
8#include <linux/init.h>
9#include <linux/debugfs.h>
10#include <linux/ftrace.h>
11#include <linux/kallsyms.h>
12#include <linux/time.h>
13
14#include "trace.h"
15#include "trace_output.h"
16
17static struct trace_array *boot_trace;
18static bool pre_initcalls_finished;
19
20/* Tells the boot tracer that the pre_smp_initcalls are finished.
21 * So we are ready .
22 * It doesn't enable sched events tracing however.
23 * You have to call enable_boot_trace to do so.
24 */
25void start_boot_trace(void)
26{
27 pre_initcalls_finished = true;
28}
29
30void enable_boot_trace(void)
31{
32 if (boot_trace && pre_initcalls_finished)
33 tracing_start_sched_switch_record();
34}
35
36void disable_boot_trace(void)
37{
38 if (boot_trace && pre_initcalls_finished)
39 tracing_stop_sched_switch_record();
40}
41
42static int boot_trace_init(struct trace_array *tr)
43{
44 boot_trace = tr;
45
46 if (!tr)
47 return 0;
48
49 tracing_reset_online_cpus(tr);
50
51 tracing_sched_switch_assign_trace(tr);
52 return 0;
53}
54
55static enum print_line_t
56initcall_call_print_line(struct trace_iterator *iter)
57{
58 struct trace_entry *entry = iter->ent;
59 struct trace_seq *s = &iter->seq;
60 struct trace_boot_call *field;
61 struct boot_trace_call *call;
62 u64 ts;
63 unsigned long nsec_rem;
64 int ret;
65
66 trace_assign_type(field, entry);
67 call = &field->boot_call;
68 ts = iter->ts;
69 nsec_rem = do_div(ts, NSEC_PER_SEC);
70
71 ret = trace_seq_printf(s, "[%5ld.%09ld] calling %s @ %i\n",
72 (unsigned long)ts, nsec_rem, call->func, call->caller);
73
74 if (!ret)
75 return TRACE_TYPE_PARTIAL_LINE;
76 else
77 return TRACE_TYPE_HANDLED;
78}
79
80static enum print_line_t
81initcall_ret_print_line(struct trace_iterator *iter)
82{
83 struct trace_entry *entry = iter->ent;
84 struct trace_seq *s = &iter->seq;
85 struct trace_boot_ret *field;
86 struct boot_trace_ret *init_ret;
87 u64 ts;
88 unsigned long nsec_rem;
89 int ret;
90
91 trace_assign_type(field, entry);
92 init_ret = &field->boot_ret;
93 ts = iter->ts;
94 nsec_rem = do_div(ts, NSEC_PER_SEC);
95
96 ret = trace_seq_printf(s, "[%5ld.%09ld] initcall %s "
97 "returned %d after %llu msecs\n",
98 (unsigned long) ts,
99 nsec_rem,
100 init_ret->func, init_ret->result, init_ret->duration);
101
102 if (!ret)
103 return TRACE_TYPE_PARTIAL_LINE;
104 else
105 return TRACE_TYPE_HANDLED;
106}
107
108static enum print_line_t initcall_print_line(struct trace_iterator *iter)
109{
110 struct trace_entry *entry = iter->ent;
111
112 switch (entry->type) {
113 case TRACE_BOOT_CALL:
114 return initcall_call_print_line(iter);
115 case TRACE_BOOT_RET:
116 return initcall_ret_print_line(iter);
117 default:
118 return TRACE_TYPE_UNHANDLED;
119 }
120}
121
122struct tracer boot_tracer __read_mostly =
123{
124 .name = "initcall",
125 .init = boot_trace_init,
126 .reset = tracing_reset_online_cpus,
127 .print_line = initcall_print_line,
128};
129
130void trace_boot_call(struct boot_trace_call *bt, initcall_t fn)
131{
132 struct ftrace_event_call *call = &event_boot_call;
133 struct ring_buffer_event *event;
134 struct ring_buffer *buffer;
135 struct trace_boot_call *entry;
136 struct trace_array *tr = boot_trace;
137
138 if (!tr || !pre_initcalls_finished)
139 return;
140
141 /* Get its name now since this function could
142 * disappear because it is in the .init section.
143 */
144 sprint_symbol(bt->func, (unsigned long)fn);
145 preempt_disable();
146
147 buffer = tr->buffer;
148 event = trace_buffer_lock_reserve(buffer, TRACE_BOOT_CALL,
149 sizeof(*entry), 0, 0);
150 if (!event)
151 goto out;
152 entry = ring_buffer_event_data(event);
153 entry->boot_call = *bt;
154 if (!filter_check_discard(call, entry, buffer, event))
155 trace_buffer_unlock_commit(buffer, event, 0, 0);
156 out:
157 preempt_enable();
158}
159
160void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn)
161{
162 struct ftrace_event_call *call = &event_boot_ret;
163 struct ring_buffer_event *event;
164 struct ring_buffer *buffer;
165 struct trace_boot_ret *entry;
166 struct trace_array *tr = boot_trace;
167
168 if (!tr || !pre_initcalls_finished)
169 return;
170
171 sprint_symbol(bt->func, (unsigned long)fn);
172 preempt_disable();
173
174 buffer = tr->buffer;
175 event = trace_buffer_lock_reserve(buffer, TRACE_BOOT_RET,
176 sizeof(*entry), 0, 0);
177 if (!event)
178 goto out;
179 entry = ring_buffer_event_data(event);
180 entry->boot_ret = *bt;
181 if (!filter_check_discard(call, entry, buffer, event))
182 trace_buffer_unlock_commit(buffer, event, 0, 0);
183 out:
184 preempt_enable();
185}
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
index 9d589d8dcd1a..52fda6c04ac3 100644
--- a/kernel/trace/trace_clock.c
+++ b/kernel/trace/trace_clock.c
@@ -32,16 +32,15 @@
32u64 notrace trace_clock_local(void) 32u64 notrace trace_clock_local(void)
33{ 33{
34 u64 clock; 34 u64 clock;
35 int resched;
36 35
37 /* 36 /*
38 * sched_clock() is an architecture implemented, fast, scalable, 37 * sched_clock() is an architecture implemented, fast, scalable,
39 * lockless clock. It is not guaranteed to be coherent across 38 * lockless clock. It is not guaranteed to be coherent across
40 * CPUs, nor across CPU idle events. 39 * CPUs, nor across CPU idle events.
41 */ 40 */
42 resched = ftrace_preempt_disable(); 41 preempt_disable_notrace();
43 clock = sched_clock(); 42 clock = sched_clock();
44 ftrace_preempt_enable(resched); 43 preempt_enable_notrace();
45 44
46 return clock; 45 return clock;
47} 46}
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index dc008c1240da..84128371f254 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -271,33 +271,6 @@ FTRACE_ENTRY(mmiotrace_map, trace_mmiotrace_map,
271 __entry->map_id, __entry->opcode) 271 __entry->map_id, __entry->opcode)
272); 272);
273 273
274FTRACE_ENTRY(boot_call, trace_boot_call,
275
276 TRACE_BOOT_CALL,
277
278 F_STRUCT(
279 __field_struct( struct boot_trace_call, boot_call )
280 __field_desc( pid_t, boot_call, caller )
281 __array_desc( char, boot_call, func, KSYM_SYMBOL_LEN)
282 ),
283
284 F_printk("%d %s", __entry->caller, __entry->func)
285);
286
287FTRACE_ENTRY(boot_ret, trace_boot_ret,
288
289 TRACE_BOOT_RET,
290
291 F_STRUCT(
292 __field_struct( struct boot_trace_ret, boot_ret )
293 __array_desc( char, boot_ret, func, KSYM_SYMBOL_LEN)
294 __field_desc( int, boot_ret, result )
295 __field_desc( unsigned long, boot_ret, duration )
296 ),
297
298 F_printk("%s %d %lx",
299 __entry->func, __entry->result, __entry->duration)
300);
301 274
302#define TRACE_FUNC_SIZE 30 275#define TRACE_FUNC_SIZE 30
303#define TRACE_FILE_SIZE 20 276#define TRACE_FILE_SIZE 20
@@ -318,53 +291,3 @@ FTRACE_ENTRY(branch, trace_branch,
318 __entry->func, __entry->file, __entry->correct) 291 __entry->func, __entry->file, __entry->correct)
319); 292);
320 293
321FTRACE_ENTRY(kmem_alloc, kmemtrace_alloc_entry,
322
323 TRACE_KMEM_ALLOC,
324
325 F_STRUCT(
326 __field( enum kmemtrace_type_id, type_id )
327 __field( unsigned long, call_site )
328 __field( const void *, ptr )
329 __field( size_t, bytes_req )
330 __field( size_t, bytes_alloc )
331 __field( gfp_t, gfp_flags )
332 __field( int, node )
333 ),
334
335 F_printk("type:%u call_site:%lx ptr:%p req:%zi alloc:%zi"
336 " flags:%x node:%d",
337 __entry->type_id, __entry->call_site, __entry->ptr,
338 __entry->bytes_req, __entry->bytes_alloc,
339 __entry->gfp_flags, __entry->node)
340);
341
342FTRACE_ENTRY(kmem_free, kmemtrace_free_entry,
343
344 TRACE_KMEM_FREE,
345
346 F_STRUCT(
347 __field( enum kmemtrace_type_id, type_id )
348 __field( unsigned long, call_site )
349 __field( const void *, ptr )
350 ),
351
352 F_printk("type:%u call_site:%lx ptr:%p",
353 __entry->type_id, __entry->call_site, __entry->ptr)
354);
355
356FTRACE_ENTRY(ksym_trace, ksym_trace_entry,
357
358 TRACE_KSYM,
359
360 F_STRUCT(
361 __field( unsigned long, ip )
362 __field( unsigned char, type )
363 __array( char , cmd, TASK_COMM_LEN )
364 __field( unsigned long, addr )
365 ),
366
367 F_printk("ip: %pF type: %d ksym_name: %pS cmd: %s",
368 (void *)__entry->ip, (unsigned int)__entry->type,
369 (void *)__entry->addr, __entry->cmd)
370);
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 8a2b73f7c068..23751659582e 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -9,8 +9,6 @@
9#include <linux/kprobes.h> 9#include <linux/kprobes.h>
10#include "trace.h" 10#include "trace.h"
11 11
12EXPORT_SYMBOL_GPL(perf_arch_fetch_caller_regs);
13
14static char *perf_trace_buf[4]; 12static char *perf_trace_buf[4];
15 13
16/* 14/*
@@ -56,13 +54,7 @@ static int perf_trace_event_init(struct ftrace_event_call *tp_event,
56 } 54 }
57 } 55 }
58 56
59 if (tp_event->class->reg) 57 ret = tp_event->class->reg(tp_event, TRACE_REG_PERF_REGISTER);
60 ret = tp_event->class->reg(tp_event, TRACE_REG_PERF_REGISTER);
61 else
62 ret = tracepoint_probe_register(tp_event->name,
63 tp_event->class->perf_probe,
64 tp_event);
65
66 if (ret) 58 if (ret)
67 goto fail; 59 goto fail;
68 60
@@ -96,9 +88,7 @@ int perf_trace_init(struct perf_event *p_event)
96 mutex_lock(&event_mutex); 88 mutex_lock(&event_mutex);
97 list_for_each_entry(tp_event, &ftrace_events, list) { 89 list_for_each_entry(tp_event, &ftrace_events, list) {
98 if (tp_event->event.type == event_id && 90 if (tp_event->event.type == event_id &&
99 tp_event->class && 91 tp_event->class && tp_event->class->reg &&
100 (tp_event->class->perf_probe ||
101 tp_event->class->reg) &&
102 try_module_get(tp_event->mod)) { 92 try_module_get(tp_event->mod)) {
103 ret = perf_trace_event_init(tp_event, p_event); 93 ret = perf_trace_event_init(tp_event, p_event);
104 break; 94 break;
@@ -138,12 +128,7 @@ void perf_trace_destroy(struct perf_event *p_event)
138 if (--tp_event->perf_refcount > 0) 128 if (--tp_event->perf_refcount > 0)
139 goto out; 129 goto out;
140 130
141 if (tp_event->class->reg) 131 tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER);
142 tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER);
143 else
144 tracepoint_probe_unregister(tp_event->name,
145 tp_event->class->perf_probe,
146 tp_event);
147 132
148 /* 133 /*
149 * Ensure our callback won't be called anymore. See 134 * Ensure our callback won't be called anymore. See
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 53cffc0b0801..e8e6043f4d29 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -28,6 +28,7 @@
28DEFINE_MUTEX(event_mutex); 28DEFINE_MUTEX(event_mutex);
29 29
30LIST_HEAD(ftrace_events); 30LIST_HEAD(ftrace_events);
31LIST_HEAD(ftrace_common_fields);
31 32
32struct list_head * 33struct list_head *
33trace_get_fields(struct ftrace_event_call *event_call) 34trace_get_fields(struct ftrace_event_call *event_call)
@@ -37,15 +38,11 @@ trace_get_fields(struct ftrace_event_call *event_call)
37 return event_call->class->get_fields(event_call); 38 return event_call->class->get_fields(event_call);
38} 39}
39 40
40int trace_define_field(struct ftrace_event_call *call, const char *type, 41static int __trace_define_field(struct list_head *head, const char *type,
41 const char *name, int offset, int size, int is_signed, 42 const char *name, int offset, int size,
42 int filter_type) 43 int is_signed, int filter_type)
43{ 44{
44 struct ftrace_event_field *field; 45 struct ftrace_event_field *field;
45 struct list_head *head;
46
47 if (WARN_ON(!call->class))
48 return 0;
49 46
50 field = kzalloc(sizeof(*field), GFP_KERNEL); 47 field = kzalloc(sizeof(*field), GFP_KERNEL);
51 if (!field) 48 if (!field)
@@ -68,7 +65,6 @@ int trace_define_field(struct ftrace_event_call *call, const char *type,
68 field->size = size; 65 field->size = size;
69 field->is_signed = is_signed; 66 field->is_signed = is_signed;
70 67
71 head = trace_get_fields(call);
72 list_add(&field->link, head); 68 list_add(&field->link, head);
73 69
74 return 0; 70 return 0;
@@ -80,17 +76,32 @@ err:
80 76
81 return -ENOMEM; 77 return -ENOMEM;
82} 78}
79
80int trace_define_field(struct ftrace_event_call *call, const char *type,
81 const char *name, int offset, int size, int is_signed,
82 int filter_type)
83{
84 struct list_head *head;
85
86 if (WARN_ON(!call->class))
87 return 0;
88
89 head = trace_get_fields(call);
90 return __trace_define_field(head, type, name, offset, size,
91 is_signed, filter_type);
92}
83EXPORT_SYMBOL_GPL(trace_define_field); 93EXPORT_SYMBOL_GPL(trace_define_field);
84 94
85#define __common_field(type, item) \ 95#define __common_field(type, item) \
86 ret = trace_define_field(call, #type, "common_" #item, \ 96 ret = __trace_define_field(&ftrace_common_fields, #type, \
87 offsetof(typeof(ent), item), \ 97 "common_" #item, \
88 sizeof(ent.item), \ 98 offsetof(typeof(ent), item), \
89 is_signed_type(type), FILTER_OTHER); \ 99 sizeof(ent.item), \
100 is_signed_type(type), FILTER_OTHER); \
90 if (ret) \ 101 if (ret) \
91 return ret; 102 return ret;
92 103
93static int trace_define_common_fields(struct ftrace_event_call *call) 104static int trace_define_common_fields(void)
94{ 105{
95 int ret; 106 int ret;
96 struct trace_entry ent; 107 struct trace_entry ent;
@@ -130,6 +141,35 @@ int trace_event_raw_init(struct ftrace_event_call *call)
130} 141}
131EXPORT_SYMBOL_GPL(trace_event_raw_init); 142EXPORT_SYMBOL_GPL(trace_event_raw_init);
132 143
144int ftrace_event_reg(struct ftrace_event_call *call, enum trace_reg type)
145{
146 switch (type) {
147 case TRACE_REG_REGISTER:
148 return tracepoint_probe_register(call->name,
149 call->class->probe,
150 call);
151 case TRACE_REG_UNREGISTER:
152 tracepoint_probe_unregister(call->name,
153 call->class->probe,
154 call);
155 return 0;
156
157#ifdef CONFIG_PERF_EVENTS
158 case TRACE_REG_PERF_REGISTER:
159 return tracepoint_probe_register(call->name,
160 call->class->perf_probe,
161 call);
162 case TRACE_REG_PERF_UNREGISTER:
163 tracepoint_probe_unregister(call->name,
164 call->class->perf_probe,
165 call);
166 return 0;
167#endif
168 }
169 return 0;
170}
171EXPORT_SYMBOL_GPL(ftrace_event_reg);
172
133static int ftrace_event_enable_disable(struct ftrace_event_call *call, 173static int ftrace_event_enable_disable(struct ftrace_event_call *call,
134 int enable) 174 int enable)
135{ 175{
@@ -140,23 +180,13 @@ static int ftrace_event_enable_disable(struct ftrace_event_call *call,
140 if (call->flags & TRACE_EVENT_FL_ENABLED) { 180 if (call->flags & TRACE_EVENT_FL_ENABLED) {
141 call->flags &= ~TRACE_EVENT_FL_ENABLED; 181 call->flags &= ~TRACE_EVENT_FL_ENABLED;
142 tracing_stop_cmdline_record(); 182 tracing_stop_cmdline_record();
143 if (call->class->reg) 183 call->class->reg(call, TRACE_REG_UNREGISTER);
144 call->class->reg(call, TRACE_REG_UNREGISTER);
145 else
146 tracepoint_probe_unregister(call->name,
147 call->class->probe,
148 call);
149 } 184 }
150 break; 185 break;
151 case 1: 186 case 1:
152 if (!(call->flags & TRACE_EVENT_FL_ENABLED)) { 187 if (!(call->flags & TRACE_EVENT_FL_ENABLED)) {
153 tracing_start_cmdline_record(); 188 tracing_start_cmdline_record();
154 if (call->class->reg) 189 ret = call->class->reg(call, TRACE_REG_REGISTER);
155 ret = call->class->reg(call, TRACE_REG_REGISTER);
156 else
157 ret = tracepoint_probe_register(call->name,
158 call->class->probe,
159 call);
160 if (ret) { 190 if (ret) {
161 tracing_stop_cmdline_record(); 191 tracing_stop_cmdline_record();
162 pr_info("event trace: Could not enable event " 192 pr_info("event trace: Could not enable event "
@@ -194,8 +224,7 @@ static int __ftrace_set_clr_event(const char *match, const char *sub,
194 mutex_lock(&event_mutex); 224 mutex_lock(&event_mutex);
195 list_for_each_entry(call, &ftrace_events, list) { 225 list_for_each_entry(call, &ftrace_events, list) {
196 226
197 if (!call->name || !call->class || 227 if (!call->name || !call->class || !call->class->reg)
198 (!call->class->probe && !call->class->reg))
199 continue; 228 continue;
200 229
201 if (match && 230 if (match &&
@@ -321,7 +350,7 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
321 * The ftrace subsystem is for showing formats only. 350 * The ftrace subsystem is for showing formats only.
322 * They can not be enabled or disabled via the event files. 351 * They can not be enabled or disabled via the event files.
323 */ 352 */
324 if (call->class && (call->class->probe || call->class->reg)) 353 if (call->class && call->class->reg)
325 return call; 354 return call;
326 } 355 }
327 356
@@ -474,8 +503,7 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
474 503
475 mutex_lock(&event_mutex); 504 mutex_lock(&event_mutex);
476 list_for_each_entry(call, &ftrace_events, list) { 505 list_for_each_entry(call, &ftrace_events, list) {
477 if (!call->name || !call->class || 506 if (!call->name || !call->class || !call->class->reg)
478 (!call->class->probe && !call->class->reg))
479 continue; 507 continue;
480 508
481 if (system && strcmp(call->class->system, system) != 0) 509 if (system && strcmp(call->class->system, system) != 0)
@@ -544,32 +572,10 @@ out:
544 return ret; 572 return ret;
545} 573}
546 574
547static ssize_t 575static void print_event_fields(struct trace_seq *s, struct list_head *head)
548event_format_read(struct file *filp, char __user *ubuf, size_t cnt,
549 loff_t *ppos)
550{ 576{
551 struct ftrace_event_call *call = filp->private_data;
552 struct ftrace_event_field *field; 577 struct ftrace_event_field *field;
553 struct list_head *head;
554 struct trace_seq *s;
555 int common_field_count = 5;
556 char *buf;
557 int r = 0;
558
559 if (*ppos)
560 return 0;
561
562 s = kmalloc(sizeof(*s), GFP_KERNEL);
563 if (!s)
564 return -ENOMEM;
565
566 trace_seq_init(s);
567 578
568 trace_seq_printf(s, "name: %s\n", call->name);
569 trace_seq_printf(s, "ID: %d\n", call->event.type);
570 trace_seq_printf(s, "format:\n");
571
572 head = trace_get_fields(call);
573 list_for_each_entry_reverse(field, head, link) { 579 list_for_each_entry_reverse(field, head, link) {
574 /* 580 /*
575 * Smartly shows the array type(except dynamic array). 581 * Smartly shows the array type(except dynamic array).
@@ -584,29 +590,54 @@ event_format_read(struct file *filp, char __user *ubuf, size_t cnt,
584 array_descriptor = NULL; 590 array_descriptor = NULL;
585 591
586 if (!array_descriptor) { 592 if (!array_descriptor) {
587 r = trace_seq_printf(s, "\tfield:%s %s;\toffset:%u;" 593 trace_seq_printf(s, "\tfield:%s %s;\toffset:%u;"
588 "\tsize:%u;\tsigned:%d;\n", 594 "\tsize:%u;\tsigned:%d;\n",
589 field->type, field->name, field->offset, 595 field->type, field->name, field->offset,
590 field->size, !!field->is_signed); 596 field->size, !!field->is_signed);
591 } else { 597 } else {
592 r = trace_seq_printf(s, "\tfield:%.*s %s%s;\toffset:%u;" 598 trace_seq_printf(s, "\tfield:%.*s %s%s;\toffset:%u;"
593 "\tsize:%u;\tsigned:%d;\n", 599 "\tsize:%u;\tsigned:%d;\n",
594 (int)(array_descriptor - field->type), 600 (int)(array_descriptor - field->type),
595 field->type, field->name, 601 field->type, field->name,
596 array_descriptor, field->offset, 602 array_descriptor, field->offset,
597 field->size, !!field->is_signed); 603 field->size, !!field->is_signed);
598 } 604 }
605 }
606}
599 607
600 if (--common_field_count == 0) 608static ssize_t
601 r = trace_seq_printf(s, "\n"); 609event_format_read(struct file *filp, char __user *ubuf, size_t cnt,
610 loff_t *ppos)
611{
612 struct ftrace_event_call *call = filp->private_data;
613 struct list_head *head;
614 struct trace_seq *s;
615 char *buf;
616 int r;
602 617
603 if (!r) 618 if (*ppos)
604 break; 619 return 0;
605 } 620
621 s = kmalloc(sizeof(*s), GFP_KERNEL);
622 if (!s)
623 return -ENOMEM;
624
625 trace_seq_init(s);
626
627 trace_seq_printf(s, "name: %s\n", call->name);
628 trace_seq_printf(s, "ID: %d\n", call->event.type);
629 trace_seq_printf(s, "format:\n");
630
631 /* print common fields */
632 print_event_fields(s, &ftrace_common_fields);
633
634 trace_seq_putc(s, '\n');
606 635
607 if (r) 636 /* print event specific fields */
608 r = trace_seq_printf(s, "\nprint fmt: %s\n", 637 head = trace_get_fields(call);
609 call->print_fmt); 638 print_event_fields(s, head);
639
640 r = trace_seq_printf(s, "\nprint fmt: %s\n", call->print_fmt);
610 641
611 if (!r) { 642 if (!r) {
612 /* 643 /*
@@ -963,35 +994,31 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
963 return -1; 994 return -1;
964 } 995 }
965 996
966 if (call->class->probe || call->class->reg) 997 if (call->class->reg)
967 trace_create_file("enable", 0644, call->dir, call, 998 trace_create_file("enable", 0644, call->dir, call,
968 enable); 999 enable);
969 1000
970#ifdef CONFIG_PERF_EVENTS 1001#ifdef CONFIG_PERF_EVENTS
971 if (call->event.type && (call->class->perf_probe || call->class->reg)) 1002 if (call->event.type && call->class->reg)
972 trace_create_file("id", 0444, call->dir, call, 1003 trace_create_file("id", 0444, call->dir, call,
973 id); 1004 id);
974#endif 1005#endif
975 1006
976 if (call->class->define_fields) { 1007 /*
977 /* 1008 * Other events may have the same class. Only update
978 * Other events may have the same class. Only update 1009 * the fields if they are not already defined.
979 * the fields if they are not already defined. 1010 */
980 */ 1011 head = trace_get_fields(call);
981 head = trace_get_fields(call); 1012 if (list_empty(head)) {
982 if (list_empty(head)) { 1013 ret = call->class->define_fields(call);
983 ret = trace_define_common_fields(call); 1014 if (ret < 0) {
984 if (!ret) 1015 pr_warning("Could not initialize trace point"
985 ret = call->class->define_fields(call); 1016 " events/%s\n", call->name);
986 if (ret < 0) { 1017 return ret;
987 pr_warning("Could not initialize trace point"
988 " events/%s\n", call->name);
989 return ret;
990 }
991 } 1018 }
992 trace_create_file("filter", 0644, call->dir, call,
993 filter);
994 } 1019 }
1020 trace_create_file("filter", 0644, call->dir, call,
1021 filter);
995 1022
996 trace_create_file("format", 0444, call->dir, call, 1023 trace_create_file("format", 0444, call->dir, call,
997 format); 1024 format);
@@ -999,11 +1026,17 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
999 return 0; 1026 return 0;
1000} 1027}
1001 1028
1002static int __trace_add_event_call(struct ftrace_event_call *call) 1029static int
1030__trace_add_event_call(struct ftrace_event_call *call, struct module *mod,
1031 const struct file_operations *id,
1032 const struct file_operations *enable,
1033 const struct file_operations *filter,
1034 const struct file_operations *format)
1003{ 1035{
1004 struct dentry *d_events; 1036 struct dentry *d_events;
1005 int ret; 1037 int ret;
1006 1038
1039 /* The linker may leave blanks */
1007 if (!call->name) 1040 if (!call->name)
1008 return -EINVAL; 1041 return -EINVAL;
1009 1042
@@ -1011,8 +1044,8 @@ static int __trace_add_event_call(struct ftrace_event_call *call)
1011 ret = call->class->raw_init(call); 1044 ret = call->class->raw_init(call);
1012 if (ret < 0) { 1045 if (ret < 0) {
1013 if (ret != -ENOSYS) 1046 if (ret != -ENOSYS)
1014 pr_warning("Could not initialize trace " 1047 pr_warning("Could not initialize trace events/%s\n",
1015 "events/%s\n", call->name); 1048 call->name);
1016 return ret; 1049 return ret;
1017 } 1050 }
1018 } 1051 }
@@ -1021,11 +1054,10 @@ static int __trace_add_event_call(struct ftrace_event_call *call)
1021 if (!d_events) 1054 if (!d_events)
1022 return -ENOENT; 1055 return -ENOENT;
1023 1056
1024 ret = event_create_dir(call, d_events, &ftrace_event_id_fops, 1057 ret = event_create_dir(call, d_events, id, enable, filter, format);
1025 &ftrace_enable_fops, &ftrace_event_filter_fops,
1026 &ftrace_event_format_fops);
1027 if (!ret) 1058 if (!ret)
1028 list_add(&call->list, &ftrace_events); 1059 list_add(&call->list, &ftrace_events);
1060 call->mod = mod;
1029 1061
1030 return ret; 1062 return ret;
1031} 1063}
@@ -1035,7 +1067,10 @@ int trace_add_event_call(struct ftrace_event_call *call)
1035{ 1067{
1036 int ret; 1068 int ret;
1037 mutex_lock(&event_mutex); 1069 mutex_lock(&event_mutex);
1038 ret = __trace_add_event_call(call); 1070 ret = __trace_add_event_call(call, NULL, &ftrace_event_id_fops,
1071 &ftrace_enable_fops,
1072 &ftrace_event_filter_fops,
1073 &ftrace_event_format_fops);
1039 mutex_unlock(&event_mutex); 1074 mutex_unlock(&event_mutex);
1040 return ret; 1075 return ret;
1041} 1076}
@@ -1152,8 +1187,6 @@ static void trace_module_add_events(struct module *mod)
1152{ 1187{
1153 struct ftrace_module_file_ops *file_ops = NULL; 1188 struct ftrace_module_file_ops *file_ops = NULL;
1154 struct ftrace_event_call *call, *start, *end; 1189 struct ftrace_event_call *call, *start, *end;
1155 struct dentry *d_events;
1156 int ret;
1157 1190
1158 start = mod->trace_events; 1191 start = mod->trace_events;
1159 end = mod->trace_events + mod->num_trace_events; 1192 end = mod->trace_events + mod->num_trace_events;
@@ -1161,38 +1194,14 @@ static void trace_module_add_events(struct module *mod)
1161 if (start == end) 1194 if (start == end)
1162 return; 1195 return;
1163 1196
1164 d_events = event_trace_events_dir(); 1197 file_ops = trace_create_file_ops(mod);
1165 if (!d_events) 1198 if (!file_ops)
1166 return; 1199 return;
1167 1200
1168 for_each_event(call, start, end) { 1201 for_each_event(call, start, end) {
1169 /* The linker may leave blanks */ 1202 __trace_add_event_call(call, mod,
1170 if (!call->name)
1171 continue;
1172 if (call->class->raw_init) {
1173 ret = call->class->raw_init(call);
1174 if (ret < 0) {
1175 if (ret != -ENOSYS)
1176 pr_warning("Could not initialize trace "
1177 "point events/%s\n", call->name);
1178 continue;
1179 }
1180 }
1181 /*
1182 * This module has events, create file ops for this module
1183 * if not already done.
1184 */
1185 if (!file_ops) {
1186 file_ops = trace_create_file_ops(mod);
1187 if (!file_ops)
1188 return;
1189 }
1190 call->mod = mod;
1191 ret = event_create_dir(call, d_events,
1192 &file_ops->id, &file_ops->enable, 1203 &file_ops->id, &file_ops->enable,
1193 &file_ops->filter, &file_ops->format); 1204 &file_ops->filter, &file_ops->format);
1194 if (!ret)
1195 list_add(&call->list, &ftrace_events);
1196 } 1205 }
1197} 1206}
1198 1207
@@ -1319,25 +1328,14 @@ static __init int event_trace_init(void)
1319 trace_create_file("enable", 0644, d_events, 1328 trace_create_file("enable", 0644, d_events,
1320 NULL, &ftrace_system_enable_fops); 1329 NULL, &ftrace_system_enable_fops);
1321 1330
1331 if (trace_define_common_fields())
1332 pr_warning("tracing: Failed to allocate common fields");
1333
1322 for_each_event(call, __start_ftrace_events, __stop_ftrace_events) { 1334 for_each_event(call, __start_ftrace_events, __stop_ftrace_events) {
1323 /* The linker may leave blanks */ 1335 __trace_add_event_call(call, NULL, &ftrace_event_id_fops,
1324 if (!call->name)
1325 continue;
1326 if (call->class->raw_init) {
1327 ret = call->class->raw_init(call);
1328 if (ret < 0) {
1329 if (ret != -ENOSYS)
1330 pr_warning("Could not initialize trace "
1331 "point events/%s\n", call->name);
1332 continue;
1333 }
1334 }
1335 ret = event_create_dir(call, d_events, &ftrace_event_id_fops,
1336 &ftrace_enable_fops, 1336 &ftrace_enable_fops,
1337 &ftrace_event_filter_fops, 1337 &ftrace_event_filter_fops,
1338 &ftrace_event_format_fops); 1338 &ftrace_event_format_fops);
1339 if (!ret)
1340 list_add(&call->list, &ftrace_events);
1341 } 1339 }
1342 1340
1343 while (true) { 1341 while (true) {
@@ -1524,12 +1522,11 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip)
1524 struct ftrace_entry *entry; 1522 struct ftrace_entry *entry;
1525 unsigned long flags; 1523 unsigned long flags;
1526 long disabled; 1524 long disabled;
1527 int resched;
1528 int cpu; 1525 int cpu;
1529 int pc; 1526 int pc;
1530 1527
1531 pc = preempt_count(); 1528 pc = preempt_count();
1532 resched = ftrace_preempt_disable(); 1529 preempt_disable_notrace();
1533 cpu = raw_smp_processor_id(); 1530 cpu = raw_smp_processor_id();
1534 disabled = atomic_inc_return(&per_cpu(ftrace_test_event_disable, cpu)); 1531 disabled = atomic_inc_return(&per_cpu(ftrace_test_event_disable, cpu));
1535 1532
@@ -1551,7 +1548,7 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip)
1551 1548
1552 out: 1549 out:
1553 atomic_dec(&per_cpu(ftrace_test_event_disable, cpu)); 1550 atomic_dec(&per_cpu(ftrace_test_event_disable, cpu));
1554 ftrace_preempt_enable(resched); 1551 preempt_enable_notrace();
1555} 1552}
1556 1553
1557static struct ftrace_ops trace_ops __initdata = 1554static struct ftrace_ops trace_ops __initdata =
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 57bb1bb32999..36d40104b17f 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -497,12 +497,10 @@ void print_subsystem_event_filter(struct event_subsystem *system,
497} 497}
498 498
499static struct ftrace_event_field * 499static struct ftrace_event_field *
500find_event_field(struct ftrace_event_call *call, char *name) 500__find_event_field(struct list_head *head, char *name)
501{ 501{
502 struct ftrace_event_field *field; 502 struct ftrace_event_field *field;
503 struct list_head *head;
504 503
505 head = trace_get_fields(call);
506 list_for_each_entry(field, head, link) { 504 list_for_each_entry(field, head, link) {
507 if (!strcmp(field->name, name)) 505 if (!strcmp(field->name, name))
508 return field; 506 return field;
@@ -511,6 +509,20 @@ find_event_field(struct ftrace_event_call *call, char *name)
511 return NULL; 509 return NULL;
512} 510}
513 511
512static struct ftrace_event_field *
513find_event_field(struct ftrace_event_call *call, char *name)
514{
515 struct ftrace_event_field *field;
516 struct list_head *head;
517
518 field = __find_event_field(&ftrace_common_fields, name);
519 if (field)
520 return field;
521
522 head = trace_get_fields(call);
523 return __find_event_field(head, name);
524}
525
514static void filter_free_pred(struct filter_pred *pred) 526static void filter_free_pred(struct filter_pred *pred)
515{ 527{
516 if (!pred) 528 if (!pred)
@@ -627,9 +639,6 @@ static int init_subsystem_preds(struct event_subsystem *system)
627 int err; 639 int err;
628 640
629 list_for_each_entry(call, &ftrace_events, list) { 641 list_for_each_entry(call, &ftrace_events, list) {
630 if (!call->class || !call->class->define_fields)
631 continue;
632
633 if (strcmp(call->class->system, system->name) != 0) 642 if (strcmp(call->class->system, system->name) != 0)
634 continue; 643 continue;
635 644
@@ -646,9 +655,6 @@ static void filter_free_subsystem_preds(struct event_subsystem *system)
646 struct ftrace_event_call *call; 655 struct ftrace_event_call *call;
647 656
648 list_for_each_entry(call, &ftrace_events, list) { 657 list_for_each_entry(call, &ftrace_events, list) {
649 if (!call->class || !call->class->define_fields)
650 continue;
651
652 if (strcmp(call->class->system, system->name) != 0) 658 if (strcmp(call->class->system, system->name) != 0)
653 continue; 659 continue;
654 660
@@ -1251,9 +1257,6 @@ static int replace_system_preds(struct event_subsystem *system,
1251 list_for_each_entry(call, &ftrace_events, list) { 1257 list_for_each_entry(call, &ftrace_events, list) {
1252 struct event_filter *filter = call->filter; 1258 struct event_filter *filter = call->filter;
1253 1259
1254 if (!call->class || !call->class->define_fields)
1255 continue;
1256
1257 if (strcmp(call->class->system, system->name) != 0) 1260 if (strcmp(call->class->system, system->name) != 0)
1258 continue; 1261 continue;
1259 1262
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index 8536e2a65969..4ba44deaac25 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -125,12 +125,6 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call) \
125 125
126#include "trace_entries.h" 126#include "trace_entries.h"
127 127
128static int ftrace_raw_init_event(struct ftrace_event_call *call)
129{
130 INIT_LIST_HEAD(&call->class->fields);
131 return 0;
132}
133
134#undef __entry 128#undef __entry
135#define __entry REC 129#define __entry REC
136 130
@@ -158,7 +152,7 @@ static int ftrace_raw_init_event(struct ftrace_event_call *call)
158struct ftrace_event_class event_class_ftrace_##call = { \ 152struct ftrace_event_class event_class_ftrace_##call = { \
159 .system = __stringify(TRACE_SYSTEM), \ 153 .system = __stringify(TRACE_SYSTEM), \
160 .define_fields = ftrace_define_fields_##call, \ 154 .define_fields = ftrace_define_fields_##call, \
161 .raw_init = ftrace_raw_init_event, \ 155 .fields = LIST_HEAD_INIT(event_class_ftrace_##call.fields),\
162}; \ 156}; \
163 \ 157 \
164struct ftrace_event_call __used \ 158struct ftrace_event_call __used \
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index b3f3776b0cd6..16aee4d44e8f 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -54,14 +54,14 @@ function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip)
54 struct trace_array_cpu *data; 54 struct trace_array_cpu *data;
55 unsigned long flags; 55 unsigned long flags;
56 long disabled; 56 long disabled;
57 int cpu, resched; 57 int cpu;
58 int pc; 58 int pc;
59 59
60 if (unlikely(!ftrace_function_enabled)) 60 if (unlikely(!ftrace_function_enabled))
61 return; 61 return;
62 62
63 pc = preempt_count(); 63 pc = preempt_count();
64 resched = ftrace_preempt_disable(); 64 preempt_disable_notrace();
65 local_save_flags(flags); 65 local_save_flags(flags);
66 cpu = raw_smp_processor_id(); 66 cpu = raw_smp_processor_id();
67 data = tr->data[cpu]; 67 data = tr->data[cpu];
@@ -71,7 +71,7 @@ function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip)
71 trace_function(tr, ip, parent_ip, flags, pc); 71 trace_function(tr, ip, parent_ip, flags, pc);
72 72
73 atomic_dec(&data->disabled); 73 atomic_dec(&data->disabled);
74 ftrace_preempt_enable(resched); 74 preempt_enable_notrace();
75} 75}
76 76
77static void 77static void
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 79f4bac99a94..6bff23625781 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -641,7 +641,8 @@ trace_print_graph_duration(unsigned long long duration, struct trace_seq *s)
641 641
642 /* Print nsecs (we don't want to exceed 7 numbers) */ 642 /* Print nsecs (we don't want to exceed 7 numbers) */
643 if (len < 7) { 643 if (len < 7) {
644 snprintf(nsecs_str, 8 - len, "%03lu", nsecs_rem); 644 snprintf(nsecs_str, min(sizeof(nsecs_str), 8UL - len), "%03lu",
645 nsecs_rem);
645 ret = trace_seq_printf(s, ".%s", nsecs_str); 646 ret = trace_seq_printf(s, ".%s", nsecs_str);
646 if (!ret) 647 if (!ret)
647 return TRACE_TYPE_PARTIAL_LINE; 648 return TRACE_TYPE_PARTIAL_LINE;
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index f52b5f50299d..1b79d1c15726 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -30,6 +30,8 @@
30#include <linux/ptrace.h> 30#include <linux/ptrace.h>
31#include <linux/perf_event.h> 31#include <linux/perf_event.h>
32#include <linux/stringify.h> 32#include <linux/stringify.h>
33#include <linux/limits.h>
34#include <linux/uaccess.h>
33#include <asm/bitsperlong.h> 35#include <asm/bitsperlong.h>
34 36
35#include "trace.h" 37#include "trace.h"
@@ -38,6 +40,7 @@
38#define MAX_TRACE_ARGS 128 40#define MAX_TRACE_ARGS 128
39#define MAX_ARGSTR_LEN 63 41#define MAX_ARGSTR_LEN 63
40#define MAX_EVENT_NAME_LEN 64 42#define MAX_EVENT_NAME_LEN 64
43#define MAX_STRING_SIZE PATH_MAX
41#define KPROBE_EVENT_SYSTEM "kprobes" 44#define KPROBE_EVENT_SYSTEM "kprobes"
42 45
43/* Reserved field names */ 46/* Reserved field names */
@@ -58,14 +61,16 @@ const char *reserved_field_names[] = {
58}; 61};
59 62
60/* Printing function type */ 63/* Printing function type */
61typedef int (*print_type_func_t)(struct trace_seq *, const char *, void *); 64typedef int (*print_type_func_t)(struct trace_seq *, const char *, void *,
65 void *);
62#define PRINT_TYPE_FUNC_NAME(type) print_type_##type 66#define PRINT_TYPE_FUNC_NAME(type) print_type_##type
63#define PRINT_TYPE_FMT_NAME(type) print_type_format_##type 67#define PRINT_TYPE_FMT_NAME(type) print_type_format_##type
64 68
65/* Printing in basic type function template */ 69/* Printing in basic type function template */
66#define DEFINE_BASIC_PRINT_TYPE_FUNC(type, fmt, cast) \ 70#define DEFINE_BASIC_PRINT_TYPE_FUNC(type, fmt, cast) \
67static __kprobes int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s, \ 71static __kprobes int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s, \
68 const char *name, void *data)\ 72 const char *name, \
73 void *data, void *ent)\
69{ \ 74{ \
70 return trace_seq_printf(s, " %s=" fmt, name, (cast)*(type *)data);\ 75 return trace_seq_printf(s, " %s=" fmt, name, (cast)*(type *)data);\
71} \ 76} \
@@ -80,6 +85,49 @@ DEFINE_BASIC_PRINT_TYPE_FUNC(s16, "%d", int)
80DEFINE_BASIC_PRINT_TYPE_FUNC(s32, "%ld", long) 85DEFINE_BASIC_PRINT_TYPE_FUNC(s32, "%ld", long)
81DEFINE_BASIC_PRINT_TYPE_FUNC(s64, "%lld", long long) 86DEFINE_BASIC_PRINT_TYPE_FUNC(s64, "%lld", long long)
82 87
88/* data_rloc: data relative location, compatible with u32 */
89#define make_data_rloc(len, roffs) \
90 (((u32)(len) << 16) | ((u32)(roffs) & 0xffff))
91#define get_rloc_len(dl) ((u32)(dl) >> 16)
92#define get_rloc_offs(dl) ((u32)(dl) & 0xffff)
93
94static inline void *get_rloc_data(u32 *dl)
95{
96 return (u8 *)dl + get_rloc_offs(*dl);
97}
98
99/* For data_loc conversion */
100static inline void *get_loc_data(u32 *dl, void *ent)
101{
102 return (u8 *)ent + get_rloc_offs(*dl);
103}
104
105/*
106 * Convert data_rloc to data_loc:
107 * data_rloc stores the offset from data_rloc itself, but data_loc
108 * stores the offset from event entry.
109 */
110#define convert_rloc_to_loc(dl, offs) ((u32)(dl) + (offs))
111
112/* For defining macros, define string/string_size types */
113typedef u32 string;
114typedef u32 string_size;
115
116/* Print type function for string type */
117static __kprobes int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s,
118 const char *name,
119 void *data, void *ent)
120{
121 int len = *(u32 *)data >> 16;
122
123 if (!len)
124 return trace_seq_printf(s, " %s=(fault)", name);
125 else
126 return trace_seq_printf(s, " %s=\"%s\"", name,
127 (const char *)get_loc_data(data, ent));
128}
129static const char PRINT_TYPE_FMT_NAME(string)[] = "\\\"%s\\\"";
130
83/* Data fetch function type */ 131/* Data fetch function type */
84typedef void (*fetch_func_t)(struct pt_regs *, void *, void *); 132typedef void (*fetch_func_t)(struct pt_regs *, void *, void *);
85 133
@@ -94,32 +142,38 @@ static __kprobes void call_fetch(struct fetch_param *fprm,
94 return fprm->fn(regs, fprm->data, dest); 142 return fprm->fn(regs, fprm->data, dest);
95} 143}
96 144
97#define FETCH_FUNC_NAME(kind, type) fetch_##kind##_##type 145#define FETCH_FUNC_NAME(method, type) fetch_##method##_##type
98/* 146/*
99 * Define macro for basic types - we don't need to define s* types, because 147 * Define macro for basic types - we don't need to define s* types, because
100 * we have to care only about bitwidth at recording time. 148 * we have to care only about bitwidth at recording time.
101 */ 149 */
102#define DEFINE_BASIC_FETCH_FUNCS(kind) \ 150#define DEFINE_BASIC_FETCH_FUNCS(method) \
103DEFINE_FETCH_##kind(u8) \ 151DEFINE_FETCH_##method(u8) \
104DEFINE_FETCH_##kind(u16) \ 152DEFINE_FETCH_##method(u16) \
105DEFINE_FETCH_##kind(u32) \ 153DEFINE_FETCH_##method(u32) \
106DEFINE_FETCH_##kind(u64) 154DEFINE_FETCH_##method(u64)
107 155
108#define CHECK_BASIC_FETCH_FUNCS(kind, fn) \ 156#define CHECK_FETCH_FUNCS(method, fn) \
109 ((FETCH_FUNC_NAME(kind, u8) == fn) || \ 157 (((FETCH_FUNC_NAME(method, u8) == fn) || \
110 (FETCH_FUNC_NAME(kind, u16) == fn) || \ 158 (FETCH_FUNC_NAME(method, u16) == fn) || \
111 (FETCH_FUNC_NAME(kind, u32) == fn) || \ 159 (FETCH_FUNC_NAME(method, u32) == fn) || \
112 (FETCH_FUNC_NAME(kind, u64) == fn)) 160 (FETCH_FUNC_NAME(method, u64) == fn) || \
161 (FETCH_FUNC_NAME(method, string) == fn) || \
162 (FETCH_FUNC_NAME(method, string_size) == fn)) \
163 && (fn != NULL))
113 164
114/* Data fetch function templates */ 165/* Data fetch function templates */
115#define DEFINE_FETCH_reg(type) \ 166#define DEFINE_FETCH_reg(type) \
116static __kprobes void FETCH_FUNC_NAME(reg, type)(struct pt_regs *regs, \ 167static __kprobes void FETCH_FUNC_NAME(reg, type)(struct pt_regs *regs, \
117 void *offset, void *dest) \ 168 void *offset, void *dest) \
118{ \ 169{ \
119 *(type *)dest = (type)regs_get_register(regs, \ 170 *(type *)dest = (type)regs_get_register(regs, \
120 (unsigned int)((unsigned long)offset)); \ 171 (unsigned int)((unsigned long)offset)); \
121} 172}
122DEFINE_BASIC_FETCH_FUNCS(reg) 173DEFINE_BASIC_FETCH_FUNCS(reg)
174/* No string on the register */
175#define fetch_reg_string NULL
176#define fetch_reg_string_size NULL
123 177
124#define DEFINE_FETCH_stack(type) \ 178#define DEFINE_FETCH_stack(type) \
125static __kprobes void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs,\ 179static __kprobes void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs,\
@@ -129,6 +183,9 @@ static __kprobes void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs,\
129 (unsigned int)((unsigned long)offset)); \ 183 (unsigned int)((unsigned long)offset)); \
130} 184}
131DEFINE_BASIC_FETCH_FUNCS(stack) 185DEFINE_BASIC_FETCH_FUNCS(stack)
186/* No string on the stack entry */
187#define fetch_stack_string NULL
188#define fetch_stack_string_size NULL
132 189
133#define DEFINE_FETCH_retval(type) \ 190#define DEFINE_FETCH_retval(type) \
134static __kprobes void FETCH_FUNC_NAME(retval, type)(struct pt_regs *regs,\ 191static __kprobes void FETCH_FUNC_NAME(retval, type)(struct pt_regs *regs,\
@@ -137,6 +194,9 @@ static __kprobes void FETCH_FUNC_NAME(retval, type)(struct pt_regs *regs,\
137 *(type *)dest = (type)regs_return_value(regs); \ 194 *(type *)dest = (type)regs_return_value(regs); \
138} 195}
139DEFINE_BASIC_FETCH_FUNCS(retval) 196DEFINE_BASIC_FETCH_FUNCS(retval)
197/* No string on the retval */
198#define fetch_retval_string NULL
199#define fetch_retval_string_size NULL
140 200
141#define DEFINE_FETCH_memory(type) \ 201#define DEFINE_FETCH_memory(type) \
142static __kprobes void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs,\ 202static __kprobes void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs,\
@@ -149,6 +209,62 @@ static __kprobes void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs,\
149 *(type *)dest = retval; \ 209 *(type *)dest = retval; \
150} 210}
151DEFINE_BASIC_FETCH_FUNCS(memory) 211DEFINE_BASIC_FETCH_FUNCS(memory)
212/*
213 * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max
214 * length and relative data location.
215 */
216static __kprobes void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs,
217 void *addr, void *dest)
218{
219 long ret;
220 int maxlen = get_rloc_len(*(u32 *)dest);
221 u8 *dst = get_rloc_data(dest);
222 u8 *src = addr;
223 mm_segment_t old_fs = get_fs();
224 if (!maxlen)
225 return;
226 /*
227 * Try to get string again, since the string can be changed while
228 * probing.
229 */
230 set_fs(KERNEL_DS);
231 pagefault_disable();
232 do
233 ret = __copy_from_user_inatomic(dst++, src++, 1);
234 while (dst[-1] && ret == 0 && src - (u8 *)addr < maxlen);
235 dst[-1] = '\0';
236 pagefault_enable();
237 set_fs(old_fs);
238
239 if (ret < 0) { /* Failed to fetch string */
240 ((u8 *)get_rloc_data(dest))[0] = '\0';
241 *(u32 *)dest = make_data_rloc(0, get_rloc_offs(*(u32 *)dest));
242 } else
243 *(u32 *)dest = make_data_rloc(src - (u8 *)addr,
244 get_rloc_offs(*(u32 *)dest));
245}
246/* Return the length of string -- including null terminal byte */
247static __kprobes void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs,
248 void *addr, void *dest)
249{
250 int ret, len = 0;
251 u8 c;
252 mm_segment_t old_fs = get_fs();
253
254 set_fs(KERNEL_DS);
255 pagefault_disable();
256 do {
257 ret = __copy_from_user_inatomic(&c, (u8 *)addr + len, 1);
258 len++;
259 } while (c && ret == 0 && len < MAX_STRING_SIZE);
260 pagefault_enable();
261 set_fs(old_fs);
262
263 if (ret < 0) /* Failed to check the length */
264 *(u32 *)dest = 0;
265 else
266 *(u32 *)dest = len;
267}
152 268
153/* Memory fetching by symbol */ 269/* Memory fetching by symbol */
154struct symbol_cache { 270struct symbol_cache {
@@ -203,6 +319,8 @@ static __kprobes void FETCH_FUNC_NAME(symbol, type)(struct pt_regs *regs,\
203 *(type *)dest = 0; \ 319 *(type *)dest = 0; \
204} 320}
205DEFINE_BASIC_FETCH_FUNCS(symbol) 321DEFINE_BASIC_FETCH_FUNCS(symbol)
322DEFINE_FETCH_symbol(string)
323DEFINE_FETCH_symbol(string_size)
206 324
207/* Dereference memory access function */ 325/* Dereference memory access function */
208struct deref_fetch_param { 326struct deref_fetch_param {
@@ -224,12 +342,14 @@ static __kprobes void FETCH_FUNC_NAME(deref, type)(struct pt_regs *regs,\
224 *(type *)dest = 0; \ 342 *(type *)dest = 0; \
225} 343}
226DEFINE_BASIC_FETCH_FUNCS(deref) 344DEFINE_BASIC_FETCH_FUNCS(deref)
345DEFINE_FETCH_deref(string)
346DEFINE_FETCH_deref(string_size)
227 347
228static __kprobes void free_deref_fetch_param(struct deref_fetch_param *data) 348static __kprobes void free_deref_fetch_param(struct deref_fetch_param *data)
229{ 349{
230 if (CHECK_BASIC_FETCH_FUNCS(deref, data->orig.fn)) 350 if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
231 free_deref_fetch_param(data->orig.data); 351 free_deref_fetch_param(data->orig.data);
232 else if (CHECK_BASIC_FETCH_FUNCS(symbol, data->orig.fn)) 352 else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))
233 free_symbol_cache(data->orig.data); 353 free_symbol_cache(data->orig.data);
234 kfree(data); 354 kfree(data);
235} 355}
@@ -240,23 +360,43 @@ static __kprobes void free_deref_fetch_param(struct deref_fetch_param *data)
240#define DEFAULT_FETCH_TYPE _DEFAULT_FETCH_TYPE(BITS_PER_LONG) 360#define DEFAULT_FETCH_TYPE _DEFAULT_FETCH_TYPE(BITS_PER_LONG)
241#define DEFAULT_FETCH_TYPE_STR __stringify(DEFAULT_FETCH_TYPE) 361#define DEFAULT_FETCH_TYPE_STR __stringify(DEFAULT_FETCH_TYPE)
242 362
243#define ASSIGN_FETCH_FUNC(kind, type) \ 363/* Fetch types */
244 .kind = FETCH_FUNC_NAME(kind, type) 364enum {
245 365 FETCH_MTD_reg = 0,
246#define ASSIGN_FETCH_TYPE(ptype, ftype, sign) \ 366 FETCH_MTD_stack,
247 {.name = #ptype, \ 367 FETCH_MTD_retval,
248 .size = sizeof(ftype), \ 368 FETCH_MTD_memory,
249 .is_signed = sign, \ 369 FETCH_MTD_symbol,
250 .print = PRINT_TYPE_FUNC_NAME(ptype), \ 370 FETCH_MTD_deref,
251 .fmt = PRINT_TYPE_FMT_NAME(ptype), \ 371 FETCH_MTD_END,
252ASSIGN_FETCH_FUNC(reg, ftype), \ 372};
253ASSIGN_FETCH_FUNC(stack, ftype), \ 373
254ASSIGN_FETCH_FUNC(retval, ftype), \ 374#define ASSIGN_FETCH_FUNC(method, type) \
255ASSIGN_FETCH_FUNC(memory, ftype), \ 375 [FETCH_MTD_##method] = FETCH_FUNC_NAME(method, type)
256ASSIGN_FETCH_FUNC(symbol, ftype), \ 376
257ASSIGN_FETCH_FUNC(deref, ftype), \ 377#define __ASSIGN_FETCH_TYPE(_name, ptype, ftype, _size, sign, _fmttype) \
378 {.name = _name, \
379 .size = _size, \
380 .is_signed = sign, \
381 .print = PRINT_TYPE_FUNC_NAME(ptype), \
382 .fmt = PRINT_TYPE_FMT_NAME(ptype), \
383 .fmttype = _fmttype, \
384 .fetch = { \
385ASSIGN_FETCH_FUNC(reg, ftype), \
386ASSIGN_FETCH_FUNC(stack, ftype), \
387ASSIGN_FETCH_FUNC(retval, ftype), \
388ASSIGN_FETCH_FUNC(memory, ftype), \
389ASSIGN_FETCH_FUNC(symbol, ftype), \
390ASSIGN_FETCH_FUNC(deref, ftype), \
391 } \
258 } 392 }
259 393
394#define ASSIGN_FETCH_TYPE(ptype, ftype, sign) \
395 __ASSIGN_FETCH_TYPE(#ptype, ptype, ftype, sizeof(ftype), sign, #ptype)
396
397#define FETCH_TYPE_STRING 0
398#define FETCH_TYPE_STRSIZE 1
399
260/* Fetch type information table */ 400/* Fetch type information table */
261static const struct fetch_type { 401static const struct fetch_type {
262 const char *name; /* Name of type */ 402 const char *name; /* Name of type */
@@ -264,14 +404,16 @@ static const struct fetch_type {
264 int is_signed; /* Signed flag */ 404 int is_signed; /* Signed flag */
265 print_type_func_t print; /* Print functions */ 405 print_type_func_t print; /* Print functions */
266 const char *fmt; /* Fromat string */ 406 const char *fmt; /* Fromat string */
407 const char *fmttype; /* Name in format file */
267 /* Fetch functions */ 408 /* Fetch functions */
268 fetch_func_t reg; 409 fetch_func_t fetch[FETCH_MTD_END];
269 fetch_func_t stack;
270 fetch_func_t retval;
271 fetch_func_t memory;
272 fetch_func_t symbol;
273 fetch_func_t deref;
274} fetch_type_table[] = { 410} fetch_type_table[] = {
411 /* Special types */
412 [FETCH_TYPE_STRING] = __ASSIGN_FETCH_TYPE("string", string, string,
413 sizeof(u32), 1, "__data_loc char[]"),
414 [FETCH_TYPE_STRSIZE] = __ASSIGN_FETCH_TYPE("string_size", u32,
415 string_size, sizeof(u32), 0, "u32"),
416 /* Basic types */
275 ASSIGN_FETCH_TYPE(u8, u8, 0), 417 ASSIGN_FETCH_TYPE(u8, u8, 0),
276 ASSIGN_FETCH_TYPE(u16, u16, 0), 418 ASSIGN_FETCH_TYPE(u16, u16, 0),
277 ASSIGN_FETCH_TYPE(u32, u32, 0), 419 ASSIGN_FETCH_TYPE(u32, u32, 0),
@@ -302,12 +444,28 @@ static __kprobes void fetch_stack_address(struct pt_regs *regs,
302 *(unsigned long *)dest = kernel_stack_pointer(regs); 444 *(unsigned long *)dest = kernel_stack_pointer(regs);
303} 445}
304 446
447static fetch_func_t get_fetch_size_function(const struct fetch_type *type,
448 fetch_func_t orig_fn)
449{
450 int i;
451
452 if (type != &fetch_type_table[FETCH_TYPE_STRING])
453 return NULL; /* Only string type needs size function */
454 for (i = 0; i < FETCH_MTD_END; i++)
455 if (type->fetch[i] == orig_fn)
456 return fetch_type_table[FETCH_TYPE_STRSIZE].fetch[i];
457
458 WARN_ON(1); /* This should not happen */
459 return NULL;
460}
461
305/** 462/**
306 * Kprobe event core functions 463 * Kprobe event core functions
307 */ 464 */
308 465
309struct probe_arg { 466struct probe_arg {
310 struct fetch_param fetch; 467 struct fetch_param fetch;
468 struct fetch_param fetch_size;
311 unsigned int offset; /* Offset from argument entry */ 469 unsigned int offset; /* Offset from argument entry */
312 const char *name; /* Name of this argument */ 470 const char *name; /* Name of this argument */
313 const char *comm; /* Command of this argument */ 471 const char *comm; /* Command of this argument */
@@ -429,9 +587,9 @@ error:
429 587
430static void free_probe_arg(struct probe_arg *arg) 588static void free_probe_arg(struct probe_arg *arg)
431{ 589{
432 if (CHECK_BASIC_FETCH_FUNCS(deref, arg->fetch.fn)) 590 if (CHECK_FETCH_FUNCS(deref, arg->fetch.fn))
433 free_deref_fetch_param(arg->fetch.data); 591 free_deref_fetch_param(arg->fetch.data);
434 else if (CHECK_BASIC_FETCH_FUNCS(symbol, arg->fetch.fn)) 592 else if (CHECK_FETCH_FUNCS(symbol, arg->fetch.fn))
435 free_symbol_cache(arg->fetch.data); 593 free_symbol_cache(arg->fetch.data);
436 kfree(arg->name); 594 kfree(arg->name);
437 kfree(arg->comm); 595 kfree(arg->comm);
@@ -548,7 +706,7 @@ static int parse_probe_vars(char *arg, const struct fetch_type *t,
548 706
549 if (strcmp(arg, "retval") == 0) { 707 if (strcmp(arg, "retval") == 0) {
550 if (is_return) 708 if (is_return)
551 f->fn = t->retval; 709 f->fn = t->fetch[FETCH_MTD_retval];
552 else 710 else
553 ret = -EINVAL; 711 ret = -EINVAL;
554 } else if (strncmp(arg, "stack", 5) == 0) { 712 } else if (strncmp(arg, "stack", 5) == 0) {
@@ -562,7 +720,7 @@ static int parse_probe_vars(char *arg, const struct fetch_type *t,
562 if (ret || param > PARAM_MAX_STACK) 720 if (ret || param > PARAM_MAX_STACK)
563 ret = -EINVAL; 721 ret = -EINVAL;
564 else { 722 else {
565 f->fn = t->stack; 723 f->fn = t->fetch[FETCH_MTD_stack];
566 f->data = (void *)param; 724 f->data = (void *)param;
567 } 725 }
568 } else 726 } else
@@ -588,7 +746,7 @@ static int __parse_probe_arg(char *arg, const struct fetch_type *t,
588 case '%': /* named register */ 746 case '%': /* named register */
589 ret = regs_query_register_offset(arg + 1); 747 ret = regs_query_register_offset(arg + 1);
590 if (ret >= 0) { 748 if (ret >= 0) {
591 f->fn = t->reg; 749 f->fn = t->fetch[FETCH_MTD_reg];
592 f->data = (void *)(unsigned long)ret; 750 f->data = (void *)(unsigned long)ret;
593 ret = 0; 751 ret = 0;
594 } 752 }
@@ -598,7 +756,7 @@ static int __parse_probe_arg(char *arg, const struct fetch_type *t,
598 ret = strict_strtoul(arg + 1, 0, &param); 756 ret = strict_strtoul(arg + 1, 0, &param);
599 if (ret) 757 if (ret)
600 break; 758 break;
601 f->fn = t->memory; 759 f->fn = t->fetch[FETCH_MTD_memory];
602 f->data = (void *)param; 760 f->data = (void *)param;
603 } else { 761 } else {
604 ret = split_symbol_offset(arg + 1, &offset); 762 ret = split_symbol_offset(arg + 1, &offset);
@@ -606,7 +764,7 @@ static int __parse_probe_arg(char *arg, const struct fetch_type *t,
606 break; 764 break;
607 f->data = alloc_symbol_cache(arg + 1, offset); 765 f->data = alloc_symbol_cache(arg + 1, offset);
608 if (f->data) 766 if (f->data)
609 f->fn = t->symbol; 767 f->fn = t->fetch[FETCH_MTD_symbol];
610 } 768 }
611 break; 769 break;
612 case '+': /* deref memory */ 770 case '+': /* deref memory */
@@ -636,14 +794,17 @@ static int __parse_probe_arg(char *arg, const struct fetch_type *t,
636 if (ret) 794 if (ret)
637 kfree(dprm); 795 kfree(dprm);
638 else { 796 else {
639 f->fn = t->deref; 797 f->fn = t->fetch[FETCH_MTD_deref];
640 f->data = (void *)dprm; 798 f->data = (void *)dprm;
641 } 799 }
642 } 800 }
643 break; 801 break;
644 } 802 }
645 if (!ret && !f->fn) 803 if (!ret && !f->fn) { /* Parsed, but do not find fetch method */
804 pr_info("%s type has no corresponding fetch method.\n",
805 t->name);
646 ret = -EINVAL; 806 ret = -EINVAL;
807 }
647 return ret; 808 return ret;
648} 809}
649 810
@@ -652,6 +813,7 @@ static int parse_probe_arg(char *arg, struct trace_probe *tp,
652 struct probe_arg *parg, int is_return) 813 struct probe_arg *parg, int is_return)
653{ 814{
654 const char *t; 815 const char *t;
816 int ret;
655 817
656 if (strlen(arg) > MAX_ARGSTR_LEN) { 818 if (strlen(arg) > MAX_ARGSTR_LEN) {
657 pr_info("Argument is too long.: %s\n", arg); 819 pr_info("Argument is too long.: %s\n", arg);
@@ -674,7 +836,13 @@ static int parse_probe_arg(char *arg, struct trace_probe *tp,
674 } 836 }
675 parg->offset = tp->size; 837 parg->offset = tp->size;
676 tp->size += parg->type->size; 838 tp->size += parg->type->size;
677 return __parse_probe_arg(arg, parg->type, &parg->fetch, is_return); 839 ret = __parse_probe_arg(arg, parg->type, &parg->fetch, is_return);
840 if (ret >= 0) {
841 parg->fetch_size.fn = get_fetch_size_function(parg->type,
842 parg->fetch.fn);
843 parg->fetch_size.data = parg->fetch.data;
844 }
845 return ret;
678} 846}
679 847
680/* Return 1 if name is reserved or already used by another argument */ 848/* Return 1 if name is reserved or already used by another argument */
@@ -1043,6 +1211,54 @@ static const struct file_operations kprobe_profile_ops = {
1043 .release = seq_release, 1211 .release = seq_release,
1044}; 1212};
1045 1213
1214/* Sum up total data length for dynamic arraies (strings) */
1215static __kprobes int __get_data_size(struct trace_probe *tp,
1216 struct pt_regs *regs)
1217{
1218 int i, ret = 0;
1219 u32 len;
1220
1221 for (i = 0; i < tp->nr_args; i++)
1222 if (unlikely(tp->args[i].fetch_size.fn)) {
1223 call_fetch(&tp->args[i].fetch_size, regs, &len);
1224 ret += len;
1225 }
1226
1227 return ret;
1228}
1229
1230/* Store the value of each argument */
1231static __kprobes void store_trace_args(int ent_size, struct trace_probe *tp,
1232 struct pt_regs *regs,
1233 u8 *data, int maxlen)
1234{
1235 int i;
1236 u32 end = tp->size;
1237 u32 *dl; /* Data (relative) location */
1238
1239 for (i = 0; i < tp->nr_args; i++) {
1240 if (unlikely(tp->args[i].fetch_size.fn)) {
1241 /*
1242 * First, we set the relative location and
1243 * maximum data length to *dl
1244 */
1245 dl = (u32 *)(data + tp->args[i].offset);
1246 *dl = make_data_rloc(maxlen, end - tp->args[i].offset);
1247 /* Then try to fetch string or dynamic array data */
1248 call_fetch(&tp->args[i].fetch, regs, dl);
1249 /* Reduce maximum length */
1250 end += get_rloc_len(*dl);
1251 maxlen -= get_rloc_len(*dl);
1252 /* Trick here, convert data_rloc to data_loc */
1253 *dl = convert_rloc_to_loc(*dl,
1254 ent_size + tp->args[i].offset);
1255 } else
1256 /* Just fetching data normally */
1257 call_fetch(&tp->args[i].fetch, regs,
1258 data + tp->args[i].offset);
1259 }
1260}
1261
1046/* Kprobe handler */ 1262/* Kprobe handler */
1047static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs) 1263static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
1048{ 1264{
@@ -1050,8 +1266,7 @@ static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
1050 struct kprobe_trace_entry_head *entry; 1266 struct kprobe_trace_entry_head *entry;
1051 struct ring_buffer_event *event; 1267 struct ring_buffer_event *event;
1052 struct ring_buffer *buffer; 1268 struct ring_buffer *buffer;
1053 u8 *data; 1269 int size, dsize, pc;
1054 int size, i, pc;
1055 unsigned long irq_flags; 1270 unsigned long irq_flags;
1056 struct ftrace_event_call *call = &tp->call; 1271 struct ftrace_event_call *call = &tp->call;
1057 1272
@@ -1060,7 +1275,8 @@ static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
1060 local_save_flags(irq_flags); 1275 local_save_flags(irq_flags);
1061 pc = preempt_count(); 1276 pc = preempt_count();
1062 1277
1063 size = sizeof(*entry) + tp->size; 1278 dsize = __get_data_size(tp, regs);
1279 size = sizeof(*entry) + tp->size + dsize;
1064 1280
1065 event = trace_current_buffer_lock_reserve(&buffer, call->event.type, 1281 event = trace_current_buffer_lock_reserve(&buffer, call->event.type,
1066 size, irq_flags, pc); 1282 size, irq_flags, pc);
@@ -1069,9 +1285,7 @@ static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
1069 1285
1070 entry = ring_buffer_event_data(event); 1286 entry = ring_buffer_event_data(event);
1071 entry->ip = (unsigned long)kp->addr; 1287 entry->ip = (unsigned long)kp->addr;
1072 data = (u8 *)&entry[1]; 1288 store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
1073 for (i = 0; i < tp->nr_args; i++)
1074 call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset);
1075 1289
1076 if (!filter_current_check_discard(buffer, call, entry, event)) 1290 if (!filter_current_check_discard(buffer, call, entry, event))
1077 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc); 1291 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
@@ -1085,15 +1299,15 @@ static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri,
1085 struct kretprobe_trace_entry_head *entry; 1299 struct kretprobe_trace_entry_head *entry;
1086 struct ring_buffer_event *event; 1300 struct ring_buffer_event *event;
1087 struct ring_buffer *buffer; 1301 struct ring_buffer *buffer;
1088 u8 *data; 1302 int size, pc, dsize;
1089 int size, i, pc;
1090 unsigned long irq_flags; 1303 unsigned long irq_flags;
1091 struct ftrace_event_call *call = &tp->call; 1304 struct ftrace_event_call *call = &tp->call;
1092 1305
1093 local_save_flags(irq_flags); 1306 local_save_flags(irq_flags);
1094 pc = preempt_count(); 1307 pc = preempt_count();
1095 1308
1096 size = sizeof(*entry) + tp->size; 1309 dsize = __get_data_size(tp, regs);
1310 size = sizeof(*entry) + tp->size + dsize;
1097 1311
1098 event = trace_current_buffer_lock_reserve(&buffer, call->event.type, 1312 event = trace_current_buffer_lock_reserve(&buffer, call->event.type,
1099 size, irq_flags, pc); 1313 size, irq_flags, pc);
@@ -1103,9 +1317,7 @@ static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri,
1103 entry = ring_buffer_event_data(event); 1317 entry = ring_buffer_event_data(event);
1104 entry->func = (unsigned long)tp->rp.kp.addr; 1318 entry->func = (unsigned long)tp->rp.kp.addr;
1105 entry->ret_ip = (unsigned long)ri->ret_addr; 1319 entry->ret_ip = (unsigned long)ri->ret_addr;
1106 data = (u8 *)&entry[1]; 1320 store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
1107 for (i = 0; i < tp->nr_args; i++)
1108 call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset);
1109 1321
1110 if (!filter_current_check_discard(buffer, call, entry, event)) 1322 if (!filter_current_check_discard(buffer, call, entry, event))
1111 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc); 1323 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
@@ -1137,7 +1349,7 @@ print_kprobe_event(struct trace_iterator *iter, int flags,
1137 data = (u8 *)&field[1]; 1349 data = (u8 *)&field[1];
1138 for (i = 0; i < tp->nr_args; i++) 1350 for (i = 0; i < tp->nr_args; i++)
1139 if (!tp->args[i].type->print(s, tp->args[i].name, 1351 if (!tp->args[i].type->print(s, tp->args[i].name,
1140 data + tp->args[i].offset)) 1352 data + tp->args[i].offset, field))
1141 goto partial; 1353 goto partial;
1142 1354
1143 if (!trace_seq_puts(s, "\n")) 1355 if (!trace_seq_puts(s, "\n"))
@@ -1179,7 +1391,7 @@ print_kretprobe_event(struct trace_iterator *iter, int flags,
1179 data = (u8 *)&field[1]; 1391 data = (u8 *)&field[1];
1180 for (i = 0; i < tp->nr_args; i++) 1392 for (i = 0; i < tp->nr_args; i++)
1181 if (!tp->args[i].type->print(s, tp->args[i].name, 1393 if (!tp->args[i].type->print(s, tp->args[i].name,
1182 data + tp->args[i].offset)) 1394 data + tp->args[i].offset, field))
1183 goto partial; 1395 goto partial;
1184 1396
1185 if (!trace_seq_puts(s, "\n")) 1397 if (!trace_seq_puts(s, "\n"))
@@ -1214,11 +1426,6 @@ static void probe_event_disable(struct ftrace_event_call *call)
1214 } 1426 }
1215} 1427}
1216 1428
1217static int probe_event_raw_init(struct ftrace_event_call *event_call)
1218{
1219 return 0;
1220}
1221
1222#undef DEFINE_FIELD 1429#undef DEFINE_FIELD
1223#define DEFINE_FIELD(type, item, name, is_signed) \ 1430#define DEFINE_FIELD(type, item, name, is_signed) \
1224 do { \ 1431 do { \
@@ -1239,7 +1446,7 @@ static int kprobe_event_define_fields(struct ftrace_event_call *event_call)
1239 DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0); 1446 DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0);
1240 /* Set argument names as fields */ 1447 /* Set argument names as fields */
1241 for (i = 0; i < tp->nr_args; i++) { 1448 for (i = 0; i < tp->nr_args; i++) {
1242 ret = trace_define_field(event_call, tp->args[i].type->name, 1449 ret = trace_define_field(event_call, tp->args[i].type->fmttype,
1243 tp->args[i].name, 1450 tp->args[i].name,
1244 sizeof(field) + tp->args[i].offset, 1451 sizeof(field) + tp->args[i].offset,
1245 tp->args[i].type->size, 1452 tp->args[i].type->size,
@@ -1261,7 +1468,7 @@ static int kretprobe_event_define_fields(struct ftrace_event_call *event_call)
1261 DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0); 1468 DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0);
1262 /* Set argument names as fields */ 1469 /* Set argument names as fields */
1263 for (i = 0; i < tp->nr_args; i++) { 1470 for (i = 0; i < tp->nr_args; i++) {
1264 ret = trace_define_field(event_call, tp->args[i].type->name, 1471 ret = trace_define_field(event_call, tp->args[i].type->fmttype,
1265 tp->args[i].name, 1472 tp->args[i].name,
1266 sizeof(field) + tp->args[i].offset, 1473 sizeof(field) + tp->args[i].offset,
1267 tp->args[i].type->size, 1474 tp->args[i].type->size,
@@ -1301,8 +1508,13 @@ static int __set_print_fmt(struct trace_probe *tp, char *buf, int len)
1301 pos += snprintf(buf + pos, LEN_OR_ZERO, "\", %s", arg); 1508 pos += snprintf(buf + pos, LEN_OR_ZERO, "\", %s", arg);
1302 1509
1303 for (i = 0; i < tp->nr_args; i++) { 1510 for (i = 0; i < tp->nr_args; i++) {
1304 pos += snprintf(buf + pos, LEN_OR_ZERO, ", REC->%s", 1511 if (strcmp(tp->args[i].type->name, "string") == 0)
1305 tp->args[i].name); 1512 pos += snprintf(buf + pos, LEN_OR_ZERO,
1513 ", __get_str(%s)",
1514 tp->args[i].name);
1515 else
1516 pos += snprintf(buf + pos, LEN_OR_ZERO, ", REC->%s",
1517 tp->args[i].name);
1306 } 1518 }
1307 1519
1308#undef LEN_OR_ZERO 1520#undef LEN_OR_ZERO
@@ -1339,11 +1551,11 @@ static __kprobes void kprobe_perf_func(struct kprobe *kp,
1339 struct ftrace_event_call *call = &tp->call; 1551 struct ftrace_event_call *call = &tp->call;
1340 struct kprobe_trace_entry_head *entry; 1552 struct kprobe_trace_entry_head *entry;
1341 struct hlist_head *head; 1553 struct hlist_head *head;
1342 u8 *data; 1554 int size, __size, dsize;
1343 int size, __size, i;
1344 int rctx; 1555 int rctx;
1345 1556
1346 __size = sizeof(*entry) + tp->size; 1557 dsize = __get_data_size(tp, regs);
1558 __size = sizeof(*entry) + tp->size + dsize;
1347 size = ALIGN(__size + sizeof(u32), sizeof(u64)); 1559 size = ALIGN(__size + sizeof(u32), sizeof(u64));
1348 size -= sizeof(u32); 1560 size -= sizeof(u32);
1349 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, 1561 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
@@ -1355,9 +1567,8 @@ static __kprobes void kprobe_perf_func(struct kprobe *kp,
1355 return; 1567 return;
1356 1568
1357 entry->ip = (unsigned long)kp->addr; 1569 entry->ip = (unsigned long)kp->addr;
1358 data = (u8 *)&entry[1]; 1570 memset(&entry[1], 0, dsize);
1359 for (i = 0; i < tp->nr_args; i++) 1571 store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
1360 call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset);
1361 1572
1362 head = this_cpu_ptr(call->perf_events); 1573 head = this_cpu_ptr(call->perf_events);
1363 perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head); 1574 perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head);
@@ -1371,11 +1582,11 @@ static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri,
1371 struct ftrace_event_call *call = &tp->call; 1582 struct ftrace_event_call *call = &tp->call;
1372 struct kretprobe_trace_entry_head *entry; 1583 struct kretprobe_trace_entry_head *entry;
1373 struct hlist_head *head; 1584 struct hlist_head *head;
1374 u8 *data; 1585 int size, __size, dsize;
1375 int size, __size, i;
1376 int rctx; 1586 int rctx;
1377 1587
1378 __size = sizeof(*entry) + tp->size; 1588 dsize = __get_data_size(tp, regs);
1589 __size = sizeof(*entry) + tp->size + dsize;
1379 size = ALIGN(__size + sizeof(u32), sizeof(u64)); 1590 size = ALIGN(__size + sizeof(u32), sizeof(u64));
1380 size -= sizeof(u32); 1591 size -= sizeof(u32);
1381 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, 1592 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
@@ -1388,9 +1599,7 @@ static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri,
1388 1599
1389 entry->func = (unsigned long)tp->rp.kp.addr; 1600 entry->func = (unsigned long)tp->rp.kp.addr;
1390 entry->ret_ip = (unsigned long)ri->ret_addr; 1601 entry->ret_ip = (unsigned long)ri->ret_addr;
1391 data = (u8 *)&entry[1]; 1602 store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
1392 for (i = 0; i < tp->nr_args; i++)
1393 call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset);
1394 1603
1395 head = this_cpu_ptr(call->perf_events); 1604 head = this_cpu_ptr(call->perf_events);
1396 perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, regs, head); 1605 perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, regs, head);
@@ -1486,15 +1695,12 @@ static int register_probe_event(struct trace_probe *tp)
1486 int ret; 1695 int ret;
1487 1696
1488 /* Initialize ftrace_event_call */ 1697 /* Initialize ftrace_event_call */
1698 INIT_LIST_HEAD(&call->class->fields);
1489 if (probe_is_return(tp)) { 1699 if (probe_is_return(tp)) {
1490 INIT_LIST_HEAD(&call->class->fields);
1491 call->event.funcs = &kretprobe_funcs; 1700 call->event.funcs = &kretprobe_funcs;
1492 call->class->raw_init = probe_event_raw_init;
1493 call->class->define_fields = kretprobe_event_define_fields; 1701 call->class->define_fields = kretprobe_event_define_fields;
1494 } else { 1702 } else {
1495 INIT_LIST_HEAD(&call->class->fields);
1496 call->event.funcs = &kprobe_funcs; 1703 call->event.funcs = &kprobe_funcs;
1497 call->class->raw_init = probe_event_raw_init;
1498 call->class->define_fields = kprobe_event_define_fields; 1704 call->class->define_fields = kprobe_event_define_fields;
1499 } 1705 }
1500 if (set_print_fmt(tp) < 0) 1706 if (set_print_fmt(tp) < 0)
diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c
deleted file mode 100644
index 8eaf00749b65..000000000000
--- a/kernel/trace/trace_ksym.c
+++ /dev/null
@@ -1,508 +0,0 @@
1/*
2 * trace_ksym.c - Kernel Symbol Tracer
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * Copyright (C) IBM Corporation, 2009
19 */
20
21#include <linux/kallsyms.h>
22#include <linux/uaccess.h>
23#include <linux/debugfs.h>
24#include <linux/ftrace.h>
25#include <linux/module.h>
26#include <linux/slab.h>
27#include <linux/fs.h>
28
29#include "trace_output.h"
30#include "trace.h"
31
32#include <linux/hw_breakpoint.h>
33#include <asm/hw_breakpoint.h>
34
35#include <asm/atomic.h>
36
37#define KSYM_TRACER_OP_LEN 3 /* rw- */
38
39struct trace_ksym {
40 struct perf_event **ksym_hbp;
41 struct perf_event_attr attr;
42#ifdef CONFIG_PROFILE_KSYM_TRACER
43 atomic64_t counter;
44#endif
45 struct hlist_node ksym_hlist;
46};
47
48static struct trace_array *ksym_trace_array;
49
50static unsigned int ksym_tracing_enabled;
51
52static HLIST_HEAD(ksym_filter_head);
53
54static DEFINE_MUTEX(ksym_tracer_mutex);
55
56#ifdef CONFIG_PROFILE_KSYM_TRACER
57
58#define MAX_UL_INT 0xffffffff
59
60void ksym_collect_stats(unsigned long hbp_hit_addr)
61{
62 struct hlist_node *node;
63 struct trace_ksym *entry;
64
65 rcu_read_lock();
66 hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) {
67 if (entry->attr.bp_addr == hbp_hit_addr) {
68 atomic64_inc(&entry->counter);
69 break;
70 }
71 }
72 rcu_read_unlock();
73}
74#endif /* CONFIG_PROFILE_KSYM_TRACER */
75
76void ksym_hbp_handler(struct perf_event *hbp, int nmi,
77 struct perf_sample_data *data,
78 struct pt_regs *regs)
79{
80 struct ring_buffer_event *event;
81 struct ksym_trace_entry *entry;
82 struct ring_buffer *buffer;
83 int pc;
84
85 if (!ksym_tracing_enabled)
86 return;
87
88 buffer = ksym_trace_array->buffer;
89
90 pc = preempt_count();
91
92 event = trace_buffer_lock_reserve(buffer, TRACE_KSYM,
93 sizeof(*entry), 0, pc);
94 if (!event)
95 return;
96
97 entry = ring_buffer_event_data(event);
98 entry->ip = instruction_pointer(regs);
99 entry->type = hw_breakpoint_type(hbp);
100 entry->addr = hw_breakpoint_addr(hbp);
101 strlcpy(entry->cmd, current->comm, TASK_COMM_LEN);
102
103#ifdef CONFIG_PROFILE_KSYM_TRACER
104 ksym_collect_stats(hw_breakpoint_addr(hbp));
105#endif /* CONFIG_PROFILE_KSYM_TRACER */
106
107 trace_buffer_unlock_commit(buffer, event, 0, pc);
108}
109
110/* Valid access types are represented as
111 *
112 * rw- : Set Read/Write Access Breakpoint
113 * -w- : Set Write Access Breakpoint
114 * --- : Clear Breakpoints
115 * --x : Set Execution Break points (Not available yet)
116 *
117 */
118static int ksym_trace_get_access_type(char *str)
119{
120 int access = 0;
121
122 if (str[0] == 'r')
123 access |= HW_BREAKPOINT_R;
124
125 if (str[1] == 'w')
126 access |= HW_BREAKPOINT_W;
127
128 if (str[2] == 'x')
129 access |= HW_BREAKPOINT_X;
130
131 switch (access) {
132 case HW_BREAKPOINT_R:
133 case HW_BREAKPOINT_W:
134 case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
135 return access;
136 default:
137 return -EINVAL;
138 }
139}
140
141/*
142 * There can be several possible malformed requests and we attempt to capture
143 * all of them. We enumerate some of the rules
144 * 1. We will not allow kernel symbols with ':' since it is used as a delimiter.
145 * i.e. multiple ':' symbols disallowed. Possible uses are of the form
146 * <module>:<ksym_name>:<op>.
147 * 2. No delimiter symbol ':' in the input string
148 * 3. Spurious operator symbols or symbols not in their respective positions
149 * 4. <ksym_name>:--- i.e. clear breakpoint request when ksym_name not in file
150 * 5. Kernel symbol not a part of /proc/kallsyms
151 * 6. Duplicate requests
152 */
153static int parse_ksym_trace_str(char *input_string, char **ksymname,
154 unsigned long *addr)
155{
156 int ret;
157
158 *ksymname = strsep(&input_string, ":");
159 *addr = kallsyms_lookup_name(*ksymname);
160
161 /* Check for malformed request: (2), (1) and (5) */
162 if ((!input_string) ||
163 (strlen(input_string) != KSYM_TRACER_OP_LEN) ||
164 (*addr == 0))
165 return -EINVAL;;
166
167 ret = ksym_trace_get_access_type(input_string);
168
169 return ret;
170}
171
172int process_new_ksym_entry(char *ksymname, int op, unsigned long addr)
173{
174 struct trace_ksym *entry;
175 int ret = -ENOMEM;
176
177 entry = kzalloc(sizeof(struct trace_ksym), GFP_KERNEL);
178 if (!entry)
179 return -ENOMEM;
180
181 hw_breakpoint_init(&entry->attr);
182
183 entry->attr.bp_type = op;
184 entry->attr.bp_addr = addr;
185 entry->attr.bp_len = HW_BREAKPOINT_LEN_4;
186
187 entry->ksym_hbp = register_wide_hw_breakpoint(&entry->attr,
188 ksym_hbp_handler);
189
190 if (IS_ERR(entry->ksym_hbp)) {
191 ret = PTR_ERR(entry->ksym_hbp);
192 if (ret == -ENOSPC) {
193 printk(KERN_ERR "ksym_tracer: Maximum limit reached."
194 " No new requests for tracing can be accepted now.\n");
195 } else {
196 printk(KERN_INFO "ksym_tracer request failed. Try again"
197 " later!!\n");
198 }
199 goto err;
200 }
201
202 hlist_add_head_rcu(&(entry->ksym_hlist), &ksym_filter_head);
203
204 return 0;
205
206err:
207 kfree(entry);
208
209 return ret;
210}
211
212static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf,
213 size_t count, loff_t *ppos)
214{
215 struct trace_ksym *entry;
216 struct hlist_node *node;
217 struct trace_seq *s;
218 ssize_t cnt = 0;
219 int ret;
220
221 s = kmalloc(sizeof(*s), GFP_KERNEL);
222 if (!s)
223 return -ENOMEM;
224 trace_seq_init(s);
225
226 mutex_lock(&ksym_tracer_mutex);
227
228 hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
229 ret = trace_seq_printf(s, "%pS:",
230 (void *)(unsigned long)entry->attr.bp_addr);
231 if (entry->attr.bp_type == HW_BREAKPOINT_R)
232 ret = trace_seq_puts(s, "r--\n");
233 else if (entry->attr.bp_type == HW_BREAKPOINT_W)
234 ret = trace_seq_puts(s, "-w-\n");
235 else if (entry->attr.bp_type == (HW_BREAKPOINT_W | HW_BREAKPOINT_R))
236 ret = trace_seq_puts(s, "rw-\n");
237 WARN_ON_ONCE(!ret);
238 }
239
240 cnt = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
241
242 mutex_unlock(&ksym_tracer_mutex);
243
244 kfree(s);
245
246 return cnt;
247}
248
249static void __ksym_trace_reset(void)
250{
251 struct trace_ksym *entry;
252 struct hlist_node *node, *node1;
253
254 mutex_lock(&ksym_tracer_mutex);
255 hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head,
256 ksym_hlist) {
257 unregister_wide_hw_breakpoint(entry->ksym_hbp);
258 hlist_del_rcu(&(entry->ksym_hlist));
259 synchronize_rcu();
260 kfree(entry);
261 }
262 mutex_unlock(&ksym_tracer_mutex);
263}
264
265static ssize_t ksym_trace_filter_write(struct file *file,
266 const char __user *buffer,
267 size_t count, loff_t *ppos)
268{
269 struct trace_ksym *entry;
270 struct hlist_node *node;
271 char *buf, *input_string, *ksymname = NULL;
272 unsigned long ksym_addr = 0;
273 int ret, op, changed = 0;
274
275 buf = kzalloc(count + 1, GFP_KERNEL);
276 if (!buf)
277 return -ENOMEM;
278
279 ret = -EFAULT;
280 if (copy_from_user(buf, buffer, count))
281 goto out;
282
283 buf[count] = '\0';
284 input_string = strstrip(buf);
285
286 /*
287 * Clear all breakpoints if:
288 * 1: echo > ksym_trace_filter
289 * 2: echo 0 > ksym_trace_filter
290 * 3: echo "*:---" > ksym_trace_filter
291 */
292 if (!input_string[0] || !strcmp(input_string, "0") ||
293 !strcmp(input_string, "*:---")) {
294 __ksym_trace_reset();
295 ret = 0;
296 goto out;
297 }
298
299 ret = op = parse_ksym_trace_str(input_string, &ksymname, &ksym_addr);
300 if (ret < 0)
301 goto out;
302
303 mutex_lock(&ksym_tracer_mutex);
304
305 ret = -EINVAL;
306 hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
307 if (entry->attr.bp_addr == ksym_addr) {
308 /* Check for malformed request: (6) */
309 if (entry->attr.bp_type != op)
310 changed = 1;
311 else
312 goto out_unlock;
313 break;
314 }
315 }
316 if (changed) {
317 unregister_wide_hw_breakpoint(entry->ksym_hbp);
318 entry->attr.bp_type = op;
319 ret = 0;
320 if (op > 0) {
321 entry->ksym_hbp =
322 register_wide_hw_breakpoint(&entry->attr,
323 ksym_hbp_handler);
324 if (IS_ERR(entry->ksym_hbp))
325 ret = PTR_ERR(entry->ksym_hbp);
326 else
327 goto out_unlock;
328 }
329 /* Error or "symbol:---" case: drop it */
330 hlist_del_rcu(&(entry->ksym_hlist));
331 synchronize_rcu();
332 kfree(entry);
333 goto out_unlock;
334 } else {
335 /* Check for malformed request: (4) */
336 if (op)
337 ret = process_new_ksym_entry(ksymname, op, ksym_addr);
338 }
339out_unlock:
340 mutex_unlock(&ksym_tracer_mutex);
341out:
342 kfree(buf);
343 return !ret ? count : ret;
344}
345
346static const struct file_operations ksym_tracing_fops = {
347 .open = tracing_open_generic,
348 .read = ksym_trace_filter_read,
349 .write = ksym_trace_filter_write,
350};
351
352static void ksym_trace_reset(struct trace_array *tr)
353{
354 ksym_tracing_enabled = 0;
355 __ksym_trace_reset();
356}
357
358static int ksym_trace_init(struct trace_array *tr)
359{
360 int cpu, ret = 0;
361
362 for_each_online_cpu(cpu)
363 tracing_reset(tr, cpu);
364 ksym_tracing_enabled = 1;
365 ksym_trace_array = tr;
366
367 return ret;
368}
369
370static void ksym_trace_print_header(struct seq_file *m)
371{
372 seq_puts(m,
373 "# TASK-PID CPU# Symbol "
374 "Type Function\n");
375 seq_puts(m,
376 "# | | | "
377 " | |\n");
378}
379
380static enum print_line_t ksym_trace_output(struct trace_iterator *iter)
381{
382 struct trace_entry *entry = iter->ent;
383 struct trace_seq *s = &iter->seq;
384 struct ksym_trace_entry *field;
385 char str[KSYM_SYMBOL_LEN];
386 int ret;
387
388 if (entry->type != TRACE_KSYM)
389 return TRACE_TYPE_UNHANDLED;
390
391 trace_assign_type(field, entry);
392
393 ret = trace_seq_printf(s, "%11s-%-5d [%03d] %pS", field->cmd,
394 entry->pid, iter->cpu, (char *)field->addr);
395 if (!ret)
396 return TRACE_TYPE_PARTIAL_LINE;
397
398 switch (field->type) {
399 case HW_BREAKPOINT_R:
400 ret = trace_seq_printf(s, " R ");
401 break;
402 case HW_BREAKPOINT_W:
403 ret = trace_seq_printf(s, " W ");
404 break;
405 case HW_BREAKPOINT_R | HW_BREAKPOINT_W:
406 ret = trace_seq_printf(s, " RW ");
407 break;
408 default:
409 return TRACE_TYPE_PARTIAL_LINE;
410 }
411
412 if (!ret)
413 return TRACE_TYPE_PARTIAL_LINE;
414
415 sprint_symbol(str, field->ip);
416 ret = trace_seq_printf(s, "%s\n", str);
417 if (!ret)
418 return TRACE_TYPE_PARTIAL_LINE;
419
420 return TRACE_TYPE_HANDLED;
421}
422
423struct tracer ksym_tracer __read_mostly =
424{
425 .name = "ksym_tracer",
426 .init = ksym_trace_init,
427 .reset = ksym_trace_reset,
428#ifdef CONFIG_FTRACE_SELFTEST
429 .selftest = trace_selftest_startup_ksym,
430#endif
431 .print_header = ksym_trace_print_header,
432 .print_line = ksym_trace_output
433};
434
435#ifdef CONFIG_PROFILE_KSYM_TRACER
436static int ksym_profile_show(struct seq_file *m, void *v)
437{
438 struct hlist_node *node;
439 struct trace_ksym *entry;
440 int access_type = 0;
441 char fn_name[KSYM_NAME_LEN];
442
443 seq_puts(m, " Access Type ");
444 seq_puts(m, " Symbol Counter\n");
445 seq_puts(m, " ----------- ");
446 seq_puts(m, " ------ -------\n");
447
448 rcu_read_lock();
449 hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) {
450
451 access_type = entry->attr.bp_type;
452
453 switch (access_type) {
454 case HW_BREAKPOINT_R:
455 seq_puts(m, " R ");
456 break;
457 case HW_BREAKPOINT_W:
458 seq_puts(m, " W ");
459 break;
460 case HW_BREAKPOINT_R | HW_BREAKPOINT_W:
461 seq_puts(m, " RW ");
462 break;
463 default:
464 seq_puts(m, " NA ");
465 }
466
467 if (lookup_symbol_name(entry->attr.bp_addr, fn_name) >= 0)
468 seq_printf(m, " %-36s", fn_name);
469 else
470 seq_printf(m, " %-36s", "<NA>");
471 seq_printf(m, " %15llu\n",
472 (unsigned long long)atomic64_read(&entry->counter));
473 }
474 rcu_read_unlock();
475
476 return 0;
477}
478
479static int ksym_profile_open(struct inode *node, struct file *file)
480{
481 return single_open(file, ksym_profile_show, NULL);
482}
483
484static const struct file_operations ksym_profile_fops = {
485 .open = ksym_profile_open,
486 .read = seq_read,
487 .llseek = seq_lseek,
488 .release = single_release,
489};
490#endif /* CONFIG_PROFILE_KSYM_TRACER */
491
492__init static int init_ksym_trace(void)
493{
494 struct dentry *d_tracer;
495
496 d_tracer = tracing_init_dentry();
497
498 trace_create_file("ksym_trace_filter", 0644, d_tracer,
499 NULL, &ksym_tracing_fops);
500
501#ifdef CONFIG_PROFILE_KSYM_TRACER
502 trace_create_file("ksym_profile", 0444, d_tracer,
503 NULL, &ksym_profile_fops);
504#endif
505
506 return register_tracer(&ksym_tracer);
507}
508device_initcall(init_ksym_trace);
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 0e73bc2ef8c5..c9fd5bd02036 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -46,7 +46,6 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
46 struct trace_array_cpu *data; 46 struct trace_array_cpu *data;
47 unsigned long flags; 47 unsigned long flags;
48 long disabled; 48 long disabled;
49 int resched;
50 int cpu; 49 int cpu;
51 int pc; 50 int pc;
52 51
@@ -54,7 +53,7 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
54 return; 53 return;
55 54
56 pc = preempt_count(); 55 pc = preempt_count();
57 resched = ftrace_preempt_disable(); 56 preempt_disable_notrace();
58 57
59 cpu = raw_smp_processor_id(); 58 cpu = raw_smp_processor_id();
60 if (cpu != wakeup_current_cpu) 59 if (cpu != wakeup_current_cpu)
@@ -74,7 +73,7 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
74 out: 73 out:
75 atomic_dec(&data->disabled); 74 atomic_dec(&data->disabled);
76 out_enable: 75 out_enable:
77 ftrace_preempt_enable(resched); 76 preempt_enable_notrace();
78} 77}
79 78
80static struct ftrace_ops trace_ops __read_mostly = 79static struct ftrace_ops trace_ops __read_mostly =
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 250e7f9bd2f0..39a5ca4cf15b 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -17,7 +17,6 @@ static inline int trace_valid_entry(struct trace_entry *entry)
17 case TRACE_BRANCH: 17 case TRACE_BRANCH:
18 case TRACE_GRAPH_ENT: 18 case TRACE_GRAPH_ENT:
19 case TRACE_GRAPH_RET: 19 case TRACE_GRAPH_RET:
20 case TRACE_KSYM:
21 return 1; 20 return 1;
22 } 21 }
23 return 0; 22 return 0;
@@ -755,56 +754,3 @@ trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr)
755} 754}
756#endif /* CONFIG_BRANCH_TRACER */ 755#endif /* CONFIG_BRANCH_TRACER */
757 756
758#ifdef CONFIG_KSYM_TRACER
759static int ksym_selftest_dummy;
760
761int
762trace_selftest_startup_ksym(struct tracer *trace, struct trace_array *tr)
763{
764 unsigned long count;
765 int ret;
766
767 /* start the tracing */
768 ret = tracer_init(trace, tr);
769 if (ret) {
770 warn_failed_init_tracer(trace, ret);
771 return ret;
772 }
773
774 ksym_selftest_dummy = 0;
775 /* Register the read-write tracing request */
776
777 ret = process_new_ksym_entry("ksym_selftest_dummy",
778 HW_BREAKPOINT_R | HW_BREAKPOINT_W,
779 (unsigned long)(&ksym_selftest_dummy));
780
781 if (ret < 0) {
782 printk(KERN_CONT "ksym_trace read-write startup test failed\n");
783 goto ret_path;
784 }
785 /* Perform a read and a write operation over the dummy variable to
786 * trigger the tracer
787 */
788 if (ksym_selftest_dummy == 0)
789 ksym_selftest_dummy++;
790
791 /* stop the tracing. */
792 tracing_stop();
793 /* check the trace buffer */
794 ret = trace_test_buffer(tr, &count);
795 trace->reset(tr);
796 tracing_start();
797
798 /* read & write operations - one each is performed on the dummy variable
799 * triggering two entries in the trace buffer
800 */
801 if (!ret && count != 2) {
802 printk(KERN_CONT "Ksym tracer startup test failed");
803 ret = -1;
804 }
805
806ret_path:
807 return ret;
808}
809#endif /* CONFIG_KSYM_TRACER */
810
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index f4bc9b27de5f..056468eae7cf 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -110,12 +110,12 @@ static inline void check_stack(void)
110static void 110static void
111stack_trace_call(unsigned long ip, unsigned long parent_ip) 111stack_trace_call(unsigned long ip, unsigned long parent_ip)
112{ 112{
113 int cpu, resched; 113 int cpu;
114 114
115 if (unlikely(!ftrace_enabled || stack_trace_disabled)) 115 if (unlikely(!ftrace_enabled || stack_trace_disabled))
116 return; 116 return;
117 117
118 resched = ftrace_preempt_disable(); 118 preempt_disable_notrace();
119 119
120 cpu = raw_smp_processor_id(); 120 cpu = raw_smp_processor_id();
121 /* no atomic needed, we only modify this variable by this cpu */ 121 /* no atomic needed, we only modify this variable by this cpu */
@@ -127,7 +127,7 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip)
127 out: 127 out:
128 per_cpu(trace_active, cpu)--; 128 per_cpu(trace_active, cpu)--;
129 /* prevent recursion in schedule */ 129 /* prevent recursion in schedule */
130 ftrace_preempt_enable(resched); 130 preempt_enable_notrace();
131} 131}
132 132
133static struct ftrace_ops trace_ops __read_mostly = 133static struct ftrace_ops trace_ops __read_mostly =
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 34e35804304b..bac752f0cfb5 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -23,6 +23,9 @@ static int syscall_exit_register(struct ftrace_event_call *event,
23static int syscall_enter_define_fields(struct ftrace_event_call *call); 23static int syscall_enter_define_fields(struct ftrace_event_call *call);
24static int syscall_exit_define_fields(struct ftrace_event_call *call); 24static int syscall_exit_define_fields(struct ftrace_event_call *call);
25 25
26/* All syscall exit events have the same fields */
27static LIST_HEAD(syscall_exit_fields);
28
26static struct list_head * 29static struct list_head *
27syscall_get_enter_fields(struct ftrace_event_call *call) 30syscall_get_enter_fields(struct ftrace_event_call *call)
28{ 31{
@@ -34,9 +37,7 @@ syscall_get_enter_fields(struct ftrace_event_call *call)
34static struct list_head * 37static struct list_head *
35syscall_get_exit_fields(struct ftrace_event_call *call) 38syscall_get_exit_fields(struct ftrace_event_call *call)
36{ 39{
37 struct syscall_metadata *entry = call->data; 40 return &syscall_exit_fields;
38
39 return &entry->exit_fields;
40} 41}
41 42
42struct trace_event_functions enter_syscall_print_funcs = { 43struct trace_event_functions enter_syscall_print_funcs = {
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c
index a7974a552ca9..c080956f4d8e 100644
--- a/kernel/trace/trace_sysprof.c
+++ b/kernel/trace/trace_sysprof.c
@@ -33,12 +33,13 @@ static DEFINE_MUTEX(sample_timer_lock);
33 */ 33 */
34static DEFINE_PER_CPU(struct hrtimer, stack_trace_hrtimer); 34static DEFINE_PER_CPU(struct hrtimer, stack_trace_hrtimer);
35 35
36struct stack_frame { 36struct stack_frame_user {
37 const void __user *next_fp; 37 const void __user *next_fp;
38 unsigned long return_address; 38 unsigned long return_address;
39}; 39};
40 40
41static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) 41static int
42copy_stack_frame(const void __user *fp, struct stack_frame_user *frame)
42{ 43{
43 int ret; 44 int ret;
44 45
@@ -125,7 +126,7 @@ trace_kernel(struct pt_regs *regs, struct trace_array *tr,
125static void timer_notify(struct pt_regs *regs, int cpu) 126static void timer_notify(struct pt_regs *regs, int cpu)
126{ 127{
127 struct trace_array_cpu *data; 128 struct trace_array_cpu *data;
128 struct stack_frame frame; 129 struct stack_frame_user frame;
129 struct trace_array *tr; 130 struct trace_array *tr;
130 const void __user *fp; 131 const void __user *fp;
131 int is_user; 132 int is_user;
diff --git a/mm/mmap.c b/mm/mmap.c
index 456ec6f27889..e38e910cb756 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1734,8 +1734,10 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
1734 grow = (address - vma->vm_end) >> PAGE_SHIFT; 1734 grow = (address - vma->vm_end) >> PAGE_SHIFT;
1735 1735
1736 error = acct_stack_growth(vma, size, grow); 1736 error = acct_stack_growth(vma, size, grow);
1737 if (!error) 1737 if (!error) {
1738 vma->vm_end = address; 1738 vma->vm_end = address;
1739 perf_event_mmap(vma);
1740 }
1739 } 1741 }
1740 anon_vma_unlock(vma); 1742 anon_vma_unlock(vma);
1741 return error; 1743 return error;
@@ -1781,6 +1783,7 @@ static int expand_downwards(struct vm_area_struct *vma,
1781 if (!error) { 1783 if (!error) {
1782 vma->vm_start = address; 1784 vma->vm_start = address;
1783 vma->vm_pgoff -= grow; 1785 vma->vm_pgoff -= grow;
1786 perf_event_mmap(vma);
1784 } 1787 }
1785 } 1788 }
1786 anon_vma_unlock(vma); 1789 anon_vma_unlock(vma);
@@ -2208,6 +2211,7 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
2208 vma->vm_page_prot = vm_get_page_prot(flags); 2211 vma->vm_page_prot = vm_get_page_prot(flags);
2209 vma_link(mm, vma, prev, rb_link, rb_parent); 2212 vma_link(mm, vma, prev, rb_link, rb_parent);
2210out: 2213out:
2214 perf_event_mmap(vma);
2211 mm->total_vm += len >> PAGE_SHIFT; 2215 mm->total_vm += len >> PAGE_SHIFT;
2212 if (flags & VM_LOCKED) { 2216 if (flags & VM_LOCKED) {
2213 if (!mlock_vma_pages_range(vma, addr, addr + len)) 2217 if (!mlock_vma_pages_range(vma, addr, addr + len))
diff --git a/mm/slab.c b/mm/slab.c
index e49f8f46f46d..47360c3e5abd 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -102,7 +102,6 @@
102#include <linux/cpu.h> 102#include <linux/cpu.h>
103#include <linux/sysctl.h> 103#include <linux/sysctl.h>
104#include <linux/module.h> 104#include <linux/module.h>
105#include <linux/kmemtrace.h>
106#include <linux/rcupdate.h> 105#include <linux/rcupdate.h>
107#include <linux/string.h> 106#include <linux/string.h>
108#include <linux/uaccess.h> 107#include <linux/uaccess.h>
diff --git a/mm/slob.c b/mm/slob.c
index 23631e2bb57a..a82ab5811bd9 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -66,8 +66,10 @@
66#include <linux/module.h> 66#include <linux/module.h>
67#include <linux/rcupdate.h> 67#include <linux/rcupdate.h>
68#include <linux/list.h> 68#include <linux/list.h>
69#include <linux/kmemtrace.h>
70#include <linux/kmemleak.h> 69#include <linux/kmemleak.h>
70
71#include <trace/events/kmem.h>
72
71#include <asm/atomic.h> 73#include <asm/atomic.h>
72 74
73/* 75/*
diff --git a/mm/slub.c b/mm/slub.c
index 578f68f3c51f..7bb7940f4eee 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -17,7 +17,6 @@
17#include <linux/slab.h> 17#include <linux/slab.h>
18#include <linux/proc_fs.h> 18#include <linux/proc_fs.h>
19#include <linux/seq_file.h> 19#include <linux/seq_file.h>
20#include <linux/kmemtrace.h>
21#include <linux/kmemcheck.h> 20#include <linux/kmemcheck.h>
22#include <linux/cpu.h> 21#include <linux/cpu.h>
23#include <linux/cpuset.h> 22#include <linux/cpuset.h>
diff --git a/scripts/package/Makefile b/scripts/package/Makefile
index 3a681ef25306..cea12744a671 100644
--- a/scripts/package/Makefile
+++ b/scripts/package/Makefile
@@ -111,13 +111,38 @@ tar%pkg: FORCE
111clean-dirs += $(objtree)/tar-install/ 111clean-dirs += $(objtree)/tar-install/
112 112
113 113
114# perf-pkg - generate a source tarball with perf source
115# ---------------------------------------------------------------------------
116
117perf-tar=perf-$(KERNELVERSION)
118
119quiet_cmd_perf_tar = TAR
120 cmd_perf_tar = \
121git archive --prefix=$(perf-tar)/ HEAD^{tree} \
122 $$(cat $(srctree)/tools/perf/MANIFEST) -o $(perf-tar).tar; \
123mkdir -p $(perf-tar); \
124git rev-parse HEAD > $(perf-tar)/HEAD; \
125tar rf $(perf-tar).tar $(perf-tar)/HEAD; \
126rm -r $(perf-tar); \
127$(if $(findstring tar-src,$@),, \
128$(if $(findstring bz2,$@),bzip2, \
129$(if $(findstring gz,$@),gzip, \
130$(error unknown target $@))) \
131 -f -9 $(perf-tar).tar)
132
133perf-%pkg: FORCE
134 $(call cmd,perf_tar)
135
114# Help text displayed when executing 'make help' 136# Help text displayed when executing 'make help'
115# --------------------------------------------------------------------------- 137# ---------------------------------------------------------------------------
116help: FORCE 138help: FORCE
117 @echo ' rpm-pkg - Build both source and binary RPM kernel packages' 139 @echo ' rpm-pkg - Build both source and binary RPM kernel packages'
118 @echo ' binrpm-pkg - Build only the binary kernel package' 140 @echo ' binrpm-pkg - Build only the binary kernel package'
119 @echo ' deb-pkg - Build the kernel as an deb package' 141 @echo ' deb-pkg - Build the kernel as an deb package'
120 @echo ' tar-pkg - Build the kernel as an uncompressed tarball' 142 @echo ' tar-pkg - Build the kernel as an uncompressed tarball'
121 @echo ' targz-pkg - Build the kernel as a gzip compressed tarball' 143 @echo ' targz-pkg - Build the kernel as a gzip compressed tarball'
122 @echo ' tarbz2-pkg - Build the kernel as a bzip2 compressed tarball' 144 @echo ' tarbz2-pkg - Build the kernel as a bzip2 compressed tarball'
145 @echo ' perf-tar-src-pkg - Build $(perf-tar).tar source tarball'
146 @echo ' perf-targz-src-pkg - Build $(perf-tar).tar.gz source tarball'
147 @echo ' perf-tarbz2-src-pkg - Build $(perf-tar).tar.bz2 source tarball'
123 148
diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore
index e1d60d780784..cb43289e447f 100644
--- a/tools/perf/.gitignore
+++ b/tools/perf/.gitignore
@@ -18,3 +18,5 @@ perf-archive
18tags 18tags
19TAGS 19TAGS
20cscope* 20cscope*
21config.mak
22config.mak.autogen
diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt
index 94a258c96a44..27d52dae5a43 100644
--- a/tools/perf/Documentation/perf-probe.txt
+++ b/tools/perf/Documentation/perf-probe.txt
@@ -31,6 +31,10 @@ OPTIONS
31--vmlinux=PATH:: 31--vmlinux=PATH::
32 Specify vmlinux path which has debuginfo (Dwarf binary). 32 Specify vmlinux path which has debuginfo (Dwarf binary).
33 33
34-s::
35--source=PATH::
36 Specify path to kernel source.
37
34-v:: 38-v::
35--verbose:: 39--verbose::
36 Be more verbose (show parsed arguments, etc). 40 Be more verbose (show parsed arguments, etc).
@@ -90,8 +94,8 @@ Each probe argument follows below syntax.
90 94
91 [NAME=]LOCALVAR|$retval|%REG|@SYMBOL[:TYPE] 95 [NAME=]LOCALVAR|$retval|%REG|@SYMBOL[:TYPE]
92 96
93'NAME' specifies the name of this argument (optional). You can use the name of local variable, local data structure member (e.g. var->field, var.field2), or kprobe-tracer argument format (e.g. $retval, %ax, etc). Note that the name of this argument will be set as the last member name if you specify a local data structure member (e.g. field2 for 'var->field1.field2'.) 97'NAME' specifies the name of this argument (optional). You can use the name of local variable, local data structure member (e.g. var->field, var.field2), local array with fixed index (e.g. array[1], var->array[0], var->pointer[2]), or kprobe-tracer argument format (e.g. $retval, %ax, etc). Note that the name of this argument will be set as the last member name if you specify a local data structure member (e.g. field2 for 'var->field1.field2'.)
94'TYPE' casts the type of this argument (optional). If omitted, perf probe automatically set the type based on debuginfo. 98'TYPE' casts the type of this argument (optional). If omitted, perf probe automatically set the type based on debuginfo. You can specify 'string' type only for the local variable or structure member which is an array of or a pointer to 'char' or 'unsigned char' type.
95 99
96LINE SYNTAX 100LINE SYNTAX
97----------- 101-----------
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 34e255fc3e2f..3ee27dccfde9 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -103,6 +103,19 @@ OPTIONS
103--raw-samples:: 103--raw-samples::
104Collect raw sample records from all opened counters (default for tracepoint counters). 104Collect raw sample records from all opened counters (default for tracepoint counters).
105 105
106-C::
107--cpu::
108Collect samples only on the list of cpus provided. Multiple CPUs can be provided as a
109comma-sperated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
110In per-thread mode with inheritance mode on (default), samples are captured only when
111the thread executes on the designated CPUs. Default is to monitor all CPUs.
112
113-N::
114--no-buildid-cache::
115Do not update the builid cache. This saves some overhead in situations
116where the information in the perf.data file (which includes buildids)
117is sufficient.
118
106SEE ALSO 119SEE ALSO
107-------- 120--------
108linkperf:perf-stat[1], linkperf:perf-list[1] 121linkperf:perf-stat[1], linkperf:perf-list[1]
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 909fa766fa1c..4b3a2d46b437 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -46,6 +46,13 @@ OPTIONS
46-B:: 46-B::
47 print large numbers with thousands' separators according to locale 47 print large numbers with thousands' separators according to locale
48 48
49-C::
50--cpu=::
51Count only on the list of cpus provided. Multiple CPUs can be provided as a
52comma-sperated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
53In per-thread mode, this option is ignored. The -a option is still necessary
54to activate system-wide monitoring. Default is to count on all CPUs.
55
49EXAMPLES 56EXAMPLES
50-------- 57--------
51 58
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index 785b9fc32a46..1f9687663f2a 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -25,9 +25,11 @@ OPTIONS
25--count=<count>:: 25--count=<count>::
26 Event period to sample. 26 Event period to sample.
27 27
28-C <cpu>:: 28-C <cpu-list>::
29--CPU=<cpu>:: 29--cpu=<cpu>::
30 CPU to profile. 30Monitor only on the list of cpus provided. Multiple CPUs can be provided as a
31comma-sperated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
32Default is to monitor all CPUS.
31 33
32-d <seconds>:: 34-d <seconds>::
33--delay=<seconds>:: 35--delay=<seconds>::
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
new file mode 100644
index 000000000000..8c7fc0c8f0b8
--- /dev/null
+++ b/tools/perf/MANIFEST
@@ -0,0 +1,12 @@
1tools/perf
2include/linux/perf_event.h
3include/linux/rbtree.h
4include/linux/list.h
5include/linux/hash.h
6include/linux/stringify.h
7lib/rbtree.c
8include/linux/swab.h
9arch/*/include/asm/unistd*.h
10include/linux/poison.h
11include/linux/magic.h
12include/linux/hw_breakpoint.h
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 3d8f31ed771d..26f626d45a9e 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -285,14 +285,10 @@ else
285 QUIET_STDERR = ">/dev/null 2>&1" 285 QUIET_STDERR = ">/dev/null 2>&1"
286endif 286endif
287 287
288BITBUCKET = "/dev/null" 288-include feature-tests.mak
289 289
290ifneq ($(shell sh -c "(echo '\#include <stdio.h>'; echo 'int main(void) { return puts(\"hi\"); }') | $(CC) -x c - $(ALL_CFLAGS) -o $(BITBUCKET) "$(QUIET_STDERR)" && echo y"), y) 290ifeq ($(call try-cc,$(SOURCE_HELLO),-Werror -fstack-protector-all),y)
291 BITBUCKET = .perf.dev.null 291 CFLAGS := $(CFLAGS) -fstack-protector-all
292endif
293
294ifeq ($(shell sh -c "echo 'int foo(void) {char X[2]; return 3;}' | $(CC) -x c -c -Werror -fstack-protector-all - -o $(BITBUCKET) "$(QUIET_STDERR)" && echo y"), y)
295 CFLAGS := $(CFLAGS) -fstack-protector-all
296endif 292endif
297 293
298 294
@@ -508,7 +504,8 @@ PERFLIBS = $(LIB_FILE)
508-include config.mak 504-include config.mak
509 505
510ifndef NO_DWARF 506ifndef NO_DWARF
511ifneq ($(shell sh -c "(echo '\#include <dwarf.h>'; echo '\#include <libdw.h>'; echo '\#include <version.h>'; echo '\#ifndef _ELFUTILS_PREREQ'; echo '\#error'; echo '\#endif'; echo 'int main(void) { Dwarf *dbg; dbg = dwarf_begin(0, DWARF_C_READ); return (long)dbg; }') | $(CC) -x c - $(ALL_CFLAGS) -I/usr/include/elfutils -ldw -lelf -o $(BITBUCKET) $(ALL_LDFLAGS) $(EXTLIBS) "$(QUIET_STDERR)" && echo y"), y) 507FLAGS_DWARF=$(ALL_CFLAGS) -I/usr/include/elfutils -ldw -lelf $(ALL_LDFLAGS) $(EXTLIBS)
508ifneq ($(call try-cc,$(SOURCE_DWARF),$(FLAGS_DWARF)),y)
512 msg := $(warning No libdw.h found or old libdw.h found or elfutils is older than 0.138, disables dwarf support. Please install new elfutils-devel/libdw-dev); 509 msg := $(warning No libdw.h found or old libdw.h found or elfutils is older than 0.138, disables dwarf support. Please install new elfutils-devel/libdw-dev);
513 NO_DWARF := 1 510 NO_DWARF := 1
514endif # Dwarf support 511endif # Dwarf support
@@ -536,16 +533,18 @@ ifneq ($(OUTPUT),)
536 BASIC_CFLAGS += -I$(OUTPUT) 533 BASIC_CFLAGS += -I$(OUTPUT)
537endif 534endif
538 535
539ifeq ($(shell sh -c "(echo '\#include <libelf.h>'; echo 'int main(void) { Elf * elf = elf_begin(0, ELF_C_READ, 0); return (long)elf; }') | $(CC) -x c - $(ALL_CFLAGS) -o $(BITBUCKET) $(ALL_LDFLAGS) $(EXTLIBS) "$(QUIET_STDERR)" && echo y"), y) 536FLAGS_LIBELF=$(ALL_CFLAGS) $(ALL_LDFLAGS) $(EXTLIBS)
540ifneq ($(shell sh -c "(echo '\#include <gnu/libc-version.h>'; echo 'int main(void) { const char * version = gnu_get_libc_version(); return (long)version; }') | $(CC) -x c - $(ALL_CFLAGS) -o $(BITBUCKET) $(ALL_LDFLAGS) $(EXTLIBS) "$(QUIET_STDERR)" && echo y"), y) 537ifneq ($(call try-cc,$(SOURCE_LIBELF),$(FLAGS_LIBELF)),y)
541 msg := $(error No gnu/libc-version.h found, please install glibc-dev[el]/glibc-static); 538 FLAGS_GLIBC=$(ALL_CFLAGS) $(ALL_LDFLAGS)
539 ifneq ($(call try-cc,$(SOURCE_GLIBC),$(FLAGS_GLIBC)),y)
540 msg := $(error No gnu/libc-version.h found, please install glibc-dev[el]/glibc-static);
541 else
542 msg := $(error No libelf.h/libelf found, please install libelf-dev/elfutils-libelf-devel);
543 endif
542endif 544endif
543 545
544 ifneq ($(shell sh -c "(echo '\#include <libelf.h>'; echo 'int main(void) { Elf * elf = elf_begin(0, ELF_C_READ_MMAP, 0); return (long)elf; }') | $(CC) -x c - $(ALL_CFLAGS) -o $(BITBUCKET) $(ALL_LDFLAGS) $(EXTLIBS) "$(QUIET_STDERR)" && echo y"), y) 546ifneq ($(call try-cc,$(SOURCE_ELF_MMAP),$(FLAGS_COMMON)),y)
545 BASIC_CFLAGS += -DLIBELF_NO_MMAP 547 BASIC_CFLAGS += -DLIBELF_NO_MMAP
546 endif
547else
548 msg := $(error No libelf.h/libelf found, please install libelf-dev/elfutils-libelf-devel and glibc-dev[el]);
549endif 548endif
550 549
551ifndef NO_DWARF 550ifndef NO_DWARF
@@ -561,69 +560,80 @@ endif # NO_DWARF
561ifdef NO_NEWT 560ifdef NO_NEWT
562 BASIC_CFLAGS += -DNO_NEWT_SUPPORT 561 BASIC_CFLAGS += -DNO_NEWT_SUPPORT
563else 562else
564ifneq ($(shell sh -c "(echo '\#include <newt.h>'; echo 'int main(void) { newtInit(); newtCls(); return newtFinished(); }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -lnewt -o $(BITBUCKET) $(ALL_LDFLAGS) $(EXTLIBS) "$(QUIET_STDERR)" && echo y"), y) 563 FLAGS_NEWT=$(ALL_CFLAGS) $(ALL_LDFLAGS) $(EXTLIBS) -lnewt
565 msg := $(warning newt not found, disables TUI support. Please install newt-devel or libnewt-dev); 564 ifneq ($(call try-cc,$(SOURCE_NEWT),$(FLAGS_NEWT)),y)
566 BASIC_CFLAGS += -DNO_NEWT_SUPPORT 565 msg := $(warning newt not found, disables TUI support. Please install newt-devel or libnewt-dev);
567else 566 BASIC_CFLAGS += -DNO_NEWT_SUPPORT
568 # Fedora has /usr/include/slang/slang.h, but ubuntu /usr/include/slang.h 567 else
569 BASIC_CFLAGS += -I/usr/include/slang 568 # Fedora has /usr/include/slang/slang.h, but ubuntu /usr/include/slang.h
570 EXTLIBS += -lnewt -lslang 569 BASIC_CFLAGS += -I/usr/include/slang
571 LIB_OBJS += $(OUTPUT)util/newt.o 570 EXTLIBS += -lnewt -lslang
572endif 571 LIB_OBJS += $(OUTPUT)util/newt.o
573endif # NO_NEWT 572 endif
574
575ifndef NO_LIBPERL
576PERL_EMBED_LDOPTS = `perl -MExtUtils::Embed -e ldopts 2>/dev/null`
577PERL_EMBED_CCOPTS = `perl -MExtUtils::Embed -e ccopts 2>/dev/null`
578endif 573endif
579 574
580ifneq ($(shell sh -c "(echo '\#include <EXTERN.h>'; echo '\#include <perl.h>'; echo 'int main(void) { perl_alloc(); return 0; }') | $(CC) -x c - $(PERL_EMBED_CCOPTS) -o $(BITBUCKET) $(PERL_EMBED_LDOPTS) > /dev/null 2>&1 && echo y"), y) 575ifdef NO_LIBPERL
581 BASIC_CFLAGS += -DNO_LIBPERL 576 BASIC_CFLAGS += -DNO_LIBPERL
582else 577else
583 ALL_LDFLAGS += $(PERL_EMBED_LDOPTS) 578 PERL_EMBED_LDOPTS = `perl -MExtUtils::Embed -e ldopts 2>/dev/null`
584 LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-perl.o 579 PERL_EMBED_CCOPTS = `perl -MExtUtils::Embed -e ccopts 2>/dev/null`
585 LIB_OBJS += $(OUTPUT)scripts/perl/Perf-Trace-Util/Context.o 580 FLAGS_PERL_EMBED=$(PERL_EMBED_CCOPTS) $(PERL_EMBED_LDOPTS)
586endif
587 581
588ifndef NO_LIBPYTHON 582 ifneq ($(call try-cc,$(SOURCE_PERL_EMBED),$(FLAGS_PERL_EMBED)),y)
589PYTHON_EMBED_LDOPTS = `python-config --ldflags 2>/dev/null` 583 BASIC_CFLAGS += -DNO_LIBPERL
590PYTHON_EMBED_CCOPTS = `python-config --cflags 2>/dev/null` 584 else
585 ALL_LDFLAGS += $(PERL_EMBED_LDOPTS)
586 LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-perl.o
587 LIB_OBJS += $(OUTPUT)scripts/perl/Perf-Trace-Util/Context.o
588 endif
591endif 589endif
592 590
593ifneq ($(shell sh -c "(echo '\#include <Python.h>'; echo 'int main(void) { Py_Initialize(); return 0; }') | $(CC) -x c - $(PYTHON_EMBED_CCOPTS) -o $(BITBUCKET) $(PYTHON_EMBED_LDOPTS) > /dev/null 2>&1 && echo y"), y) 591ifdef NO_LIBPYTHON
594 BASIC_CFLAGS += -DNO_LIBPYTHON 592 BASIC_CFLAGS += -DNO_LIBPYTHON
595else 593else
596 ALL_LDFLAGS += $(PYTHON_EMBED_LDOPTS) 594 PYTHON_EMBED_LDOPTS = `python-config --ldflags 2>/dev/null`
597 LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-python.o 595 PYTHON_EMBED_CCOPTS = `python-config --cflags 2>/dev/null`
598 LIB_OBJS += $(OUTPUT)scripts/python/Perf-Trace-Util/Context.o 596 FLAGS_PYTHON_EMBED=$(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS)
597 ifneq ($(call try-cc,$(SOURCE_PYTHON_EMBED),$(FLAGS_PYTHON_EMBED)),y)
598 BASIC_CFLAGS += -DNO_LIBPYTHON
599 else
600 ALL_LDFLAGS += $(PYTHON_EMBED_LDOPTS)
601 LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-python.o
602 LIB_OBJS += $(OUTPUT)scripts/python/Perf-Trace-Util/Context.o
603 endif
599endif 604endif
600 605
601ifdef NO_DEMANGLE 606ifdef NO_DEMANGLE
602 BASIC_CFLAGS += -DNO_DEMANGLE 607 BASIC_CFLAGS += -DNO_DEMANGLE
603else ifdef HAVE_CPLUS_DEMANGLE
604 EXTLIBS += -liberty
605 BASIC_CFLAGS += -DHAVE_CPLUS_DEMANGLE
606else 608else
607 has_bfd := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o $(BITBUCKET) $(ALL_LDFLAGS) $(EXTLIBS) -lbfd "$(QUIET_STDERR)" && echo y") 609 ifdef HAVE_CPLUS_DEMANGLE
608 610 EXTLIBS += -liberty
609 ifeq ($(has_bfd),y) 611 BASIC_CFLAGS += -DHAVE_CPLUS_DEMANGLE
610 EXTLIBS += -lbfd 612 else
611 else 613 FLAGS_BFD=$(ALL_CFLAGS) $(ALL_LDFLAGS) $(EXTLIBS) -lbfd
612 has_bfd_iberty := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o $(BITBUCKET) $(ALL_LDFLAGS) $(EXTLIBS) -lbfd -liberty "$(QUIET_STDERR)" && echo y") 614 has_bfd := $(call try-cc,$(SOURCE_BFD),$(FLAGS_BFD))
613 ifeq ($(has_bfd_iberty),y) 615 ifeq ($(has_bfd),y)
614 EXTLIBS += -lbfd -liberty 616 EXTLIBS += -lbfd
615 else 617 else
616 has_bfd_iberty_z := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o $(BITBUCKET) $(ALL_LDFLAGS) $(EXTLIBS) -lbfd -liberty -lz "$(QUIET_STDERR)" && echo y") 618 FLAGS_BFD_IBERTY=$(FLAGS_BFD) -liberty
617 ifeq ($(has_bfd_iberty_z),y) 619 has_bfd_iberty := $(call try-cc,$(SOURCE_BFD),$(FLAGS_BFD_IBERTY))
618 EXTLIBS += -lbfd -liberty -lz 620 ifeq ($(has_bfd_iberty),y)
621 EXTLIBS += -lbfd -liberty
619 else 622 else
620 has_cplus_demangle := $(shell sh -c "(echo 'extern char *cplus_demangle(const char *, int);'; echo 'int main(void) { cplus_demangle(0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o $(BITBUCKET) $(ALL_LDFLAGS) $(EXTLIBS) -liberty "$(QUIET_STDERR)" && echo y") 623 FLAGS_BFD_IBERTY_Z=$(FLAGS_BFD_IBERTY) -lz
621 ifeq ($(has_cplus_demangle),y) 624 has_bfd_iberty_z := $(call try-cc,$(SOURCE_BFD),$(FLAGS_BFD_IBERTY_Z))
622 EXTLIBS += -liberty 625 ifeq ($(has_bfd_iberty_z),y)
623 BASIC_CFLAGS += -DHAVE_CPLUS_DEMANGLE 626 EXTLIBS += -lbfd -liberty -lz
624 else 627 else
625 msg := $(warning No bfd.h/libbfd found, install binutils-dev[el]/zlib-static to gain symbol demangling) 628 FLAGS_CPLUS_DEMANGLE=$(ALL_CFLAGS) $(ALL_LDFLAGS) $(EXTLIBS) -liberty
626 BASIC_CFLAGS += -DNO_DEMANGLE 629 has_cplus_demangle := $(call try-cc,$(SOURCE_CPLUS_DEMANGLE),$(FLAGS_CPLUS_DEMANGLE))
630 ifeq ($(has_cplus_demangle),y)
631 EXTLIBS += -liberty
632 BASIC_CFLAGS += -DHAVE_CPLUS_DEMANGLE
633 else
634 msg := $(warning No bfd.h/libbfd found, install binutils-dev[el]/zlib-static to gain symbol demangling)
635 BASIC_CFLAGS += -DNO_DEMANGLE
636 endif
627 endif 637 endif
628 endif 638 endif
629 endif 639 endif
@@ -865,7 +875,7 @@ export TAR INSTALL DESTDIR SHELL_PATH
865 875
866SHELL = $(SHELL_PATH) 876SHELL = $(SHELL_PATH)
867 877
868all:: .perf.dev.null shell_compatibility_test $(ALL_PROGRAMS) $(BUILT_INS) $(OTHER_PROGRAMS) $(OUTPUT)PERF-BUILD-OPTIONS 878all:: shell_compatibility_test $(ALL_PROGRAMS) $(BUILT_INS) $(OTHER_PROGRAMS) $(OUTPUT)PERF-BUILD-OPTIONS
869ifneq (,$X) 879ifneq (,$X)
870 $(foreach p,$(patsubst %$X,%,$(filter %$X,$(ALL_PROGRAMS) $(BUILT_INS) perf$X)), test '$p' -ef '$p$X' || $(RM) '$p';) 880 $(foreach p,$(patsubst %$X,%,$(filter %$X,$(ALL_PROGRAMS) $(BUILT_INS) perf$X)), test '$p' -ef '$p$X' || $(RM) '$p';)
871endif 881endif
@@ -1195,11 +1205,6 @@ clean:
1195.PHONY: .FORCE-PERF-VERSION-FILE TAGS tags cscope .FORCE-PERF-CFLAGS 1205.PHONY: .FORCE-PERF-VERSION-FILE TAGS tags cscope .FORCE-PERF-CFLAGS
1196.PHONY: .FORCE-PERF-BUILD-OPTIONS 1206.PHONY: .FORCE-PERF-BUILD-OPTIONS
1197 1207
1198.perf.dev.null:
1199 touch .perf.dev.null
1200
1201.INTERMEDIATE: .perf.dev.null
1202
1203### Make sure built-ins do not have dups and listed in perf.c 1208### Make sure built-ins do not have dups and listed in perf.c
1204# 1209#
1205check-builtins:: 1210check-builtins::
diff --git a/tools/perf/arch/sh/Makefile b/tools/perf/arch/sh/Makefile
new file mode 100644
index 000000000000..15130b50dfe3
--- /dev/null
+++ b/tools/perf/arch/sh/Makefile
@@ -0,0 +1,4 @@
1ifndef NO_DWARF
2PERF_HAVE_DWARF_REGS := 1
3LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o
4endif
diff --git a/tools/perf/arch/sh/util/dwarf-regs.c b/tools/perf/arch/sh/util/dwarf-regs.c
new file mode 100644
index 000000000000..a11edb007a6c
--- /dev/null
+++ b/tools/perf/arch/sh/util/dwarf-regs.c
@@ -0,0 +1,55 @@
1/*
2 * Mapping of DWARF debug register numbers into register names.
3 *
4 * Copyright (C) 2010 Matt Fleming <matt@console-pimps.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19 *
20 */
21
22#include <libio.h>
23#include <dwarf-regs.h>
24
25/*
26 * Generic dwarf analysis helpers
27 */
28
29#define SH_MAX_REGS 18
30const char *sh_regs_table[SH_MAX_REGS] = {
31 "r0",
32 "r1",
33 "r2",
34 "r3",
35 "r4",
36 "r5",
37 "r6",
38 "r7",
39 "r8",
40 "r9",
41 "r10",
42 "r11",
43 "r12",
44 "r13",
45 "r14",
46 "r15",
47 "pc",
48 "pr",
49};
50
51/* Return architecture dependent register string (for kprobe-tracer) */
52const char *get_arch_regstr(unsigned int n)
53{
54 return (n <= SH_MAX_REGS) ? sh_regs_table[n] : NULL;
55}
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 96db5248e995..fd20670ce986 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -61,11 +61,9 @@ static int hists__add_entry(struct hists *self, struct addr_location *al)
61static int process_sample_event(event_t *event, struct perf_session *session) 61static int process_sample_event(event_t *event, struct perf_session *session)
62{ 62{
63 struct addr_location al; 63 struct addr_location al;
64 struct sample_data data;
64 65
65 dump_printf("(IP, %d): %d: %#Lx\n", event->header.misc, 66 if (event__preprocess_sample(event, session, &al, &data, NULL) < 0) {
66 event->ip.pid, event->ip.ip);
67
68 if (event__preprocess_sample(event, session, &al, NULL) < 0) {
69 pr_warning("problem processing %d event, skipping it.\n", 67 pr_warning("problem processing %d event, skipping it.\n",
70 event->header.type); 68 event->header.type);
71 return -1; 69 return -1;
diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c
index f8e3d1852029..29ad20e67919 100644
--- a/tools/perf/builtin-buildid-cache.c
+++ b/tools/perf/builtin-buildid-cache.c
@@ -78,8 +78,7 @@ static int __cmd_buildid_cache(void)
78 struct str_node *pos; 78 struct str_node *pos;
79 char debugdir[PATH_MAX]; 79 char debugdir[PATH_MAX];
80 80
81 snprintf(debugdir, sizeof(debugdir), "%s/%s", getenv("HOME"), 81 snprintf(debugdir, sizeof(debugdir), "%s", buildid_dir);
82 DEBUG_CACHE_DIR);
83 82
84 if (add_name_list_str) { 83 if (add_name_list_str) {
85 list = strlist__new(true, add_name_list_str); 84 list = strlist__new(true, add_name_list_str);
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index a6e2fdc7a04e..39e6627ebb96 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -35,10 +35,7 @@ static int diff__process_sample_event(event_t *event, struct perf_session *sessi
35 struct addr_location al; 35 struct addr_location al;
36 struct sample_data data = { .period = 1, }; 36 struct sample_data data = { .period = 1, };
37 37
38 dump_printf("(IP, %d): %d: %#Lx\n", event->header.misc, 38 if (event__preprocess_sample(event, session, &al, &data, NULL) < 0) {
39 event->ip.pid, event->ip.ip);
40
41 if (event__preprocess_sample(event, session, &al, NULL) < 0) {
42 pr_warning("problem processing %d event, skipping it.\n", 39 pr_warning("problem processing %d event, skipping it.\n",
43 event->header.type); 40 event->header.type);
44 return -1; 41 return -1;
@@ -47,8 +44,6 @@ static int diff__process_sample_event(event_t *event, struct perf_session *sessi
47 if (al.filtered || al.sym == NULL) 44 if (al.filtered || al.sym == NULL)
48 return 0; 45 return 0;
49 46
50 event__parse_sample(event, session->sample_type, &data);
51
52 if (hists__add_entry(&session->hists, &al, data.period)) { 47 if (hists__add_entry(&session->hists, &al, data.period)) {
53 pr_warning("problem incrementing symbol period, skipping event\n"); 48 pr_warning("problem incrementing symbol period, skipping event\n");
54 return -1; 49 return -1;
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index e4a4da32a568..54551867e7e0 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -182,6 +182,8 @@ static const struct option options[] = {
182 "Show source code lines.", opt_show_lines), 182 "Show source code lines.", opt_show_lines),
183 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, 183 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
184 "file", "vmlinux pathname"), 184 "file", "vmlinux pathname"),
185 OPT_STRING('s', "source", &symbol_conf.source_prefix,
186 "directory", "path to kernel source"),
185#endif 187#endif
186 OPT__DRY_RUN(&probe_event_dry_run), 188 OPT__DRY_RUN(&probe_event_dry_run),
187 OPT_INTEGER('\0', "max-probes", &params.max_probe_points, 189 OPT_INTEGER('\0', "max-probes", &params.max_probe_points,
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 711745f56bba..b93879677cca 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -49,7 +49,6 @@ static int group = 0;
49static int realtime_prio = 0; 49static int realtime_prio = 0;
50static bool raw_samples = false; 50static bool raw_samples = false;
51static bool system_wide = false; 51static bool system_wide = false;
52static int profile_cpu = -1;
53static pid_t target_pid = -1; 52static pid_t target_pid = -1;
54static pid_t target_tid = -1; 53static pid_t target_tid = -1;
55static pid_t *all_tids = NULL; 54static pid_t *all_tids = NULL;
@@ -61,6 +60,7 @@ static bool call_graph = false;
61static bool inherit_stat = false; 60static bool inherit_stat = false;
62static bool no_samples = false; 61static bool no_samples = false;
63static bool sample_address = false; 62static bool sample_address = false;
63static bool no_buildid = false;
64 64
65static long samples = 0; 65static long samples = 0;
66static u64 bytes_written = 0; 66static u64 bytes_written = 0;
@@ -74,6 +74,7 @@ static int file_new = 1;
74static off_t post_processing_offset; 74static off_t post_processing_offset;
75 75
76static struct perf_session *session; 76static struct perf_session *session;
77static const char *cpu_list;
77 78
78struct mmap_data { 79struct mmap_data {
79 int counter; 80 int counter;
@@ -268,12 +269,17 @@ static void create_counter(int counter, int cpu)
268 if (inherit_stat) 269 if (inherit_stat)
269 attr->inherit_stat = 1; 270 attr->inherit_stat = 1;
270 271
271 if (sample_address) 272 if (sample_address) {
272 attr->sample_type |= PERF_SAMPLE_ADDR; 273 attr->sample_type |= PERF_SAMPLE_ADDR;
274 attr->mmap_data = track;
275 }
273 276
274 if (call_graph) 277 if (call_graph)
275 attr->sample_type |= PERF_SAMPLE_CALLCHAIN; 278 attr->sample_type |= PERF_SAMPLE_CALLCHAIN;
276 279
280 if (system_wide)
281 attr->sample_type |= PERF_SAMPLE_CPU;
282
277 if (raw_samples) { 283 if (raw_samples) {
278 attr->sample_type |= PERF_SAMPLE_TIME; 284 attr->sample_type |= PERF_SAMPLE_TIME;
279 attr->sample_type |= PERF_SAMPLE_RAW; 285 attr->sample_type |= PERF_SAMPLE_RAW;
@@ -300,7 +306,7 @@ try_again:
300 die("Permission error - are you root?\n" 306 die("Permission error - are you root?\n"
301 "\t Consider tweaking" 307 "\t Consider tweaking"
302 " /proc/sys/kernel/perf_event_paranoid.\n"); 308 " /proc/sys/kernel/perf_event_paranoid.\n");
303 else if (err == ENODEV && profile_cpu != -1) { 309 else if (err == ENODEV && cpu_list) {
304 die("No such device - did you specify" 310 die("No such device - did you specify"
305 " an out-of-range profile CPU?\n"); 311 " an out-of-range profile CPU?\n");
306 } 312 }
@@ -439,8 +445,6 @@ static void atexit_header(void)
439static void event__synthesize_guest_os(struct machine *machine, void *data) 445static void event__synthesize_guest_os(struct machine *machine, void *data)
440{ 446{
441 int err; 447 int err;
442 char *guest_kallsyms;
443 char path[PATH_MAX];
444 struct perf_session *psession = data; 448 struct perf_session *psession = data;
445 449
446 if (machine__is_host(machine)) 450 if (machine__is_host(machine))
@@ -460,13 +464,6 @@ static void event__synthesize_guest_os(struct machine *machine, void *data)
460 pr_err("Couldn't record guest kernel [%d]'s reference" 464 pr_err("Couldn't record guest kernel [%d]'s reference"
461 " relocation symbol.\n", machine->pid); 465 " relocation symbol.\n", machine->pid);
462 466
463 if (machine__is_default_guest(machine))
464 guest_kallsyms = (char *) symbol_conf.default_guest_kallsyms;
465 else {
466 sprintf(path, "%s/proc/kallsyms", machine->root_dir);
467 guest_kallsyms = path;
468 }
469
470 /* 467 /*
471 * We use _stext for guest kernel because guest kernel's /proc/kallsyms 468 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
472 * have no _text sometimes. 469 * have no _text sometimes.
@@ -622,10 +619,15 @@ static int __cmd_record(int argc, const char **argv)
622 close(child_ready_pipe[0]); 619 close(child_ready_pipe[0]);
623 } 620 }
624 621
625 if ((!system_wide && no_inherit) || profile_cpu != -1) { 622 nr_cpus = read_cpu_map(cpu_list);
626 open_counters(profile_cpu); 623 if (nr_cpus < 1) {
624 perror("failed to collect number of CPUs\n");
625 return -1;
626 }
627
628 if (!system_wide && no_inherit && !cpu_list) {
629 open_counters(-1);
627 } else { 630 } else {
628 nr_cpus = read_cpu_map();
629 for (i = 0; i < nr_cpus; i++) 631 for (i = 0; i < nr_cpus; i++)
630 open_counters(cpumap[i]); 632 open_counters(cpumap[i]);
631 } 633 }
@@ -704,7 +706,7 @@ static int __cmd_record(int argc, const char **argv)
704 if (perf_guest) 706 if (perf_guest)
705 perf_session__process_machines(session, event__synthesize_guest_os); 707 perf_session__process_machines(session, event__synthesize_guest_os);
706 708
707 if (!system_wide && profile_cpu == -1) 709 if (!system_wide)
708 event__synthesize_thread(target_tid, process_synthesized_event, 710 event__synthesize_thread(target_tid, process_synthesized_event,
709 session); 711 session);
710 else 712 else
@@ -794,8 +796,8 @@ static const struct option options[] = {
794 "system-wide collection from all CPUs"), 796 "system-wide collection from all CPUs"),
795 OPT_BOOLEAN('A', "append", &append_file, 797 OPT_BOOLEAN('A', "append", &append_file,
796 "append to the output file to do incremental profiling"), 798 "append to the output file to do incremental profiling"),
797 OPT_INTEGER('C', "profile_cpu", &profile_cpu, 799 OPT_STRING('C', "cpu", &cpu_list, "cpu",
798 "CPU to profile on"), 800 "list of cpus to monitor"),
799 OPT_BOOLEAN('f', "force", &force, 801 OPT_BOOLEAN('f', "force", &force,
800 "overwrite existing data file (deprecated)"), 802 "overwrite existing data file (deprecated)"),
801 OPT_U64('c', "count", &user_interval, "event period to sample"), 803 OPT_U64('c', "count", &user_interval, "event period to sample"),
@@ -815,6 +817,8 @@ static const struct option options[] = {
815 "Sample addresses"), 817 "Sample addresses"),
816 OPT_BOOLEAN('n', "no-samples", &no_samples, 818 OPT_BOOLEAN('n', "no-samples", &no_samples,
817 "don't sample"), 819 "don't sample"),
820 OPT_BOOLEAN('N', "no-buildid-cache", &no_buildid,
821 "do not update the buildid cache"),
818 OPT_END() 822 OPT_END()
819}; 823};
820 824
@@ -825,7 +829,7 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
825 argc = parse_options(argc, argv, options, record_usage, 829 argc = parse_options(argc, argv, options, record_usage,
826 PARSE_OPT_STOP_AT_NON_OPTION); 830 PARSE_OPT_STOP_AT_NON_OPTION);
827 if (!argc && target_pid == -1 && target_tid == -1 && 831 if (!argc && target_pid == -1 && target_tid == -1 &&
828 !system_wide && profile_cpu == -1) 832 !system_wide && !cpu_list)
829 usage_with_options(record_usage, options); 833 usage_with_options(record_usage, options);
830 834
831 if (force && append_file) { 835 if (force && append_file) {
@@ -839,6 +843,8 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
839 } 843 }
840 844
841 symbol__init(); 845 symbol__init();
846 if (no_buildid)
847 disable_buildid_cache();
842 848
843 if (!nr_counters) { 849 if (!nr_counters) {
844 nr_counters = 1; 850 nr_counters = 1;
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index fd7407c7205c..ce42bbaa252d 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -155,30 +155,7 @@ static int process_sample_event(event_t *event, struct perf_session *session)
155 struct addr_location al; 155 struct addr_location al;
156 struct perf_event_attr *attr; 156 struct perf_event_attr *attr;
157 157
158 event__parse_sample(event, session->sample_type, &data); 158 if (event__preprocess_sample(event, session, &al, &data, NULL) < 0) {
159
160 dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc,
161 data.pid, data.tid, data.ip, data.period);
162
163 if (session->sample_type & PERF_SAMPLE_CALLCHAIN) {
164 unsigned int i;
165
166 dump_printf("... chain: nr:%Lu\n", data.callchain->nr);
167
168 if (!ip_callchain__valid(data.callchain, event)) {
169 pr_debug("call-chain problem with event, "
170 "skipping it.\n");
171 return 0;
172 }
173
174 if (dump_trace) {
175 for (i = 0; i < data.callchain->nr; i++)
176 dump_printf("..... %2d: %016Lx\n",
177 i, data.callchain->ips[i]);
178 }
179 }
180
181 if (event__preprocess_sample(event, session, &al, NULL) < 0) {
182 fprintf(stderr, "problem processing %d event, skipping it.\n", 159 fprintf(stderr, "problem processing %d event, skipping it.\n",
183 event->header.type); 160 event->header.type);
184 return -1; 161 return -1;
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 9a39ca3c3ac4..a6b4d44f9502 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -69,7 +69,7 @@ static struct perf_event_attr default_attrs[] = {
69}; 69};
70 70
71static bool system_wide = false; 71static bool system_wide = false;
72static unsigned int nr_cpus = 0; 72static int nr_cpus = 0;
73static int run_idx = 0; 73static int run_idx = 0;
74 74
75static int run_count = 1; 75static int run_count = 1;
@@ -82,6 +82,7 @@ static int thread_num = 0;
82static pid_t child_pid = -1; 82static pid_t child_pid = -1;
83static bool null_run = false; 83static bool null_run = false;
84static bool big_num = false; 84static bool big_num = false;
85static const char *cpu_list;
85 86
86 87
87static int *fd[MAX_NR_CPUS][MAX_COUNTERS]; 88static int *fd[MAX_NR_CPUS][MAX_COUNTERS];
@@ -158,7 +159,7 @@ static int create_perf_stat_counter(int counter)
158 PERF_FORMAT_TOTAL_TIME_RUNNING; 159 PERF_FORMAT_TOTAL_TIME_RUNNING;
159 160
160 if (system_wide) { 161 if (system_wide) {
161 unsigned int cpu; 162 int cpu;
162 163
163 for (cpu = 0; cpu < nr_cpus; cpu++) { 164 for (cpu = 0; cpu < nr_cpus; cpu++) {
164 fd[cpu][counter][0] = sys_perf_event_open(attr, 165 fd[cpu][counter][0] = sys_perf_event_open(attr,
@@ -208,7 +209,7 @@ static inline int nsec_counter(int counter)
208static void read_counter(int counter) 209static void read_counter(int counter)
209{ 210{
210 u64 count[3], single_count[3]; 211 u64 count[3], single_count[3];
211 unsigned int cpu; 212 int cpu;
212 size_t res, nv; 213 size_t res, nv;
213 int scaled; 214 int scaled;
214 int i, thread; 215 int i, thread;
@@ -542,6 +543,8 @@ static const struct option options[] = {
542 "null run - dont start any counters"), 543 "null run - dont start any counters"),
543 OPT_BOOLEAN('B', "big-num", &big_num, 544 OPT_BOOLEAN('B', "big-num", &big_num,
544 "print large numbers with thousands\' separators"), 545 "print large numbers with thousands\' separators"),
546 OPT_STRING('C', "cpu", &cpu_list, "cpu",
547 "list of cpus to monitor in system-wide"),
545 OPT_END() 548 OPT_END()
546}; 549};
547 550
@@ -566,10 +569,13 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used)
566 } 569 }
567 570
568 if (system_wide) 571 if (system_wide)
569 nr_cpus = read_cpu_map(); 572 nr_cpus = read_cpu_map(cpu_list);
570 else 573 else
571 nr_cpus = 1; 574 nr_cpus = 1;
572 575
576 if (nr_cpus < 1)
577 usage_with_options(stat_usage, options);
578
573 if (target_pid != -1) { 579 if (target_pid != -1) {
574 target_tid = target_pid; 580 target_tid = target_pid;
575 thread_num = find_all_tid(target_pid, &all_tids); 581 thread_num = find_all_tid(target_pid, &all_tids);
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index a66f4272b994..1e8e92e317b9 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -102,6 +102,7 @@ struct sym_entry *sym_filter_entry_sched = NULL;
102static int sym_pcnt_filter = 5; 102static int sym_pcnt_filter = 5;
103static int sym_counter = 0; 103static int sym_counter = 0;
104static int display_weighted = -1; 104static int display_weighted = -1;
105static const char *cpu_list;
105 106
106/* 107/*
107 * Symbols 108 * Symbols
@@ -982,6 +983,7 @@ static void event__process_sample(const event_t *self,
982 u64 ip = self->ip.ip; 983 u64 ip = self->ip.ip;
983 struct sym_entry *syme; 984 struct sym_entry *syme;
984 struct addr_location al; 985 struct addr_location al;
986 struct sample_data data;
985 struct machine *machine; 987 struct machine *machine;
986 u8 origin = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; 988 u8 origin = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
987 989
@@ -1024,7 +1026,8 @@ static void event__process_sample(const event_t *self,
1024 if (self->header.misc & PERF_RECORD_MISC_EXACT_IP) 1026 if (self->header.misc & PERF_RECORD_MISC_EXACT_IP)
1025 exact_samples++; 1027 exact_samples++;
1026 1028
1027 if (event__preprocess_sample(self, session, &al, symbol_filter) < 0 || 1029 if (event__preprocess_sample(self, session, &al, &data,
1030 symbol_filter) < 0 ||
1028 al.filtered) 1031 al.filtered)
1029 return; 1032 return;
1030 1033
@@ -1351,8 +1354,8 @@ static const struct option options[] = {
1351 "profile events on existing thread id"), 1354 "profile events on existing thread id"),
1352 OPT_BOOLEAN('a', "all-cpus", &system_wide, 1355 OPT_BOOLEAN('a', "all-cpus", &system_wide,
1353 "system-wide collection from all CPUs"), 1356 "system-wide collection from all CPUs"),
1354 OPT_INTEGER('C', "CPU", &profile_cpu, 1357 OPT_STRING('C', "cpu", &cpu_list, "cpu",
1355 "CPU to profile on"), 1358 "list of cpus to monitor"),
1356 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, 1359 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
1357 "file", "vmlinux pathname"), 1360 "file", "vmlinux pathname"),
1358 OPT_BOOLEAN('K', "hide_kernel_symbols", &hide_kernel_symbols, 1361 OPT_BOOLEAN('K', "hide_kernel_symbols", &hide_kernel_symbols,
@@ -1428,10 +1431,10 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
1428 return -ENOMEM; 1431 return -ENOMEM;
1429 1432
1430 /* CPU and PID are mutually exclusive */ 1433 /* CPU and PID are mutually exclusive */
1431 if (target_tid > 0 && profile_cpu != -1) { 1434 if (target_tid > 0 && cpu_list) {
1432 printf("WARNING: PID switch overriding CPU\n"); 1435 printf("WARNING: PID switch overriding CPU\n");
1433 sleep(1); 1436 sleep(1);
1434 profile_cpu = -1; 1437 cpu_list = NULL;
1435 } 1438 }
1436 1439
1437 if (!nr_counters) 1440 if (!nr_counters)
@@ -1469,10 +1472,13 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
1469 attrs[counter].sample_period = default_interval; 1472 attrs[counter].sample_period = default_interval;
1470 } 1473 }
1471 1474
1472 if (target_tid != -1 || profile_cpu != -1) 1475 if (target_tid != -1)
1473 nr_cpus = 1; 1476 nr_cpus = 1;
1474 else 1477 else
1475 nr_cpus = read_cpu_map(); 1478 nr_cpus = read_cpu_map(cpu_list);
1479
1480 if (nr_cpus < 1)
1481 usage_with_options(top_usage, options);
1476 1482
1477 get_term_dimensions(&winsize); 1483 get_term_dimensions(&winsize);
1478 if (print_entries == 0) { 1484 if (print_entries == 0) {
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index dddf3f01b5ab..294da725a57d 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -11,8 +11,9 @@
11 11
12static char const *script_name; 12static char const *script_name;
13static char const *generate_script_lang; 13static char const *generate_script_lang;
14static bool debug_ordering; 14static bool debug_mode;
15static u64 last_timestamp; 15static u64 last_timestamp;
16static u64 nr_unordered;
16 17
17static int default_start_script(const char *script __unused, 18static int default_start_script(const char *script __unused,
18 int argc __unused, 19 int argc __unused,
@@ -91,13 +92,15 @@ static int process_sample_event(event_t *event, struct perf_session *session)
91 } 92 }
92 93
93 if (session->sample_type & PERF_SAMPLE_RAW) { 94 if (session->sample_type & PERF_SAMPLE_RAW) {
94 if (debug_ordering) { 95 if (debug_mode) {
95 if (data.time < last_timestamp) { 96 if (data.time < last_timestamp) {
96 pr_err("Samples misordered, previous: %llu " 97 pr_err("Samples misordered, previous: %llu "
97 "this: %llu\n", last_timestamp, 98 "this: %llu\n", last_timestamp,
98 data.time); 99 data.time);
100 nr_unordered++;
99 } 101 }
100 last_timestamp = data.time; 102 last_timestamp = data.time;
103 return 0;
101 } 104 }
102 /* 105 /*
103 * FIXME: better resolve from pid from the struct trace_entry 106 * FIXME: better resolve from pid from the struct trace_entry
@@ -113,6 +116,15 @@ static int process_sample_event(event_t *event, struct perf_session *session)
113 return 0; 116 return 0;
114} 117}
115 118
119static u64 nr_lost;
120
121static int process_lost_event(event_t *event, struct perf_session *session __used)
122{
123 nr_lost += event->lost.lost;
124
125 return 0;
126}
127
116static struct perf_event_ops event_ops = { 128static struct perf_event_ops event_ops = {
117 .sample = process_sample_event, 129 .sample = process_sample_event,
118 .comm = event__process_comm, 130 .comm = event__process_comm,
@@ -120,6 +132,7 @@ static struct perf_event_ops event_ops = {
120 .event_type = event__process_event_type, 132 .event_type = event__process_event_type,
121 .tracing_data = event__process_tracing_data, 133 .tracing_data = event__process_tracing_data,
122 .build_id = event__process_build_id, 134 .build_id = event__process_build_id,
135 .lost = process_lost_event,
123 .ordered_samples = true, 136 .ordered_samples = true,
124}; 137};
125 138
@@ -132,9 +145,18 @@ static void sig_handler(int sig __unused)
132 145
133static int __cmd_trace(struct perf_session *session) 146static int __cmd_trace(struct perf_session *session)
134{ 147{
148 int ret;
149
135 signal(SIGINT, sig_handler); 150 signal(SIGINT, sig_handler);
136 151
137 return perf_session__process_events(session, &event_ops); 152 ret = perf_session__process_events(session, &event_ops);
153
154 if (debug_mode) {
155 pr_err("Misordered timestamps: %llu\n", nr_unordered);
156 pr_err("Lost events: %llu\n", nr_lost);
157 }
158
159 return ret;
138} 160}
139 161
140struct script_spec { 162struct script_spec {
@@ -544,8 +566,8 @@ static const struct option options[] = {
544 "generate perf-trace.xx script in specified language"), 566 "generate perf-trace.xx script in specified language"),
545 OPT_STRING('i', "input", &input_name, "file", 567 OPT_STRING('i', "input", &input_name, "file",
546 "input file name"), 568 "input file name"),
547 OPT_BOOLEAN('d', "debug-ordering", &debug_ordering, 569 OPT_BOOLEAN('d', "debug-mode", &debug_mode,
548 "check that samples time ordering is monotonic"), 570 "do various checks like samples ordering and lost events"),
549 571
550 OPT_END() 572 OPT_END()
551}; 573};
diff --git a/tools/perf/feature-tests.mak b/tools/perf/feature-tests.mak
new file mode 100644
index 000000000000..ddb68e601f0e
--- /dev/null
+++ b/tools/perf/feature-tests.mak
@@ -0,0 +1,119 @@
1define SOURCE_HELLO
2#include <stdio.h>
3int main(void)
4{
5 return puts(\"hi\");
6}
7endef
8
9ifndef NO_DWARF
10define SOURCE_DWARF
11#include <dwarf.h>
12#include <libdw.h>
13#include <version.h>
14#ifndef _ELFUTILS_PREREQ
15#error
16#endif
17
18int main(void)
19{
20 Dwarf *dbg = dwarf_begin(0, DWARF_C_READ);
21 return (long)dbg;
22}
23endef
24endif
25
26define SOURCE_LIBELF
27#include <libelf.h>
28
29int main(void)
30{
31 Elf *elf = elf_begin(0, ELF_C_READ, 0);
32 return (long)elf;
33}
34endef
35
36define SOURCE_GLIBC
37#include <gnu/libc-version.h>
38
39int main(void)
40{
41 const char *version = gnu_get_libc_version();
42 return (long)version;
43}
44endef
45
46define SOURCE_ELF_MMAP
47#include <libelf.h>
48int main(void)
49{
50 Elf *elf = elf_begin(0, ELF_C_READ_MMAP, 0);
51 return (long)elf;
52}
53endef
54
55ifndef NO_NEWT
56define SOURCE_NEWT
57#include <newt.h>
58
59int main(void)
60{
61 newtInit();
62 newtCls();
63 return newtFinished();
64}
65endef
66endif
67
68ifndef NO_LIBPERL
69define SOURCE_PERL_EMBED
70#include <EXTERN.h>
71#include <perl.h>
72
73int main(void)
74{
75perl_alloc();
76return 0;
77}
78endef
79endif
80
81ifndef NO_LIBPYTHON
82define SOURCE_PYTHON_EMBED
83#include <Python.h>
84
85int main(void)
86{
87 Py_Initialize();
88 return 0;
89}
90endef
91endif
92
93define SOURCE_BFD
94#include <bfd.h>
95
96int main(void)
97{
98 bfd_demangle(0, 0, 0);
99 return 0;
100}
101endef
102
103define SOURCE_CPLUS_DEMANGLE
104extern char *cplus_demangle(const char *, int);
105
106int main(void)
107{
108 cplus_demangle(0, 0);
109 return 0;
110}
111endef
112
113# try-cc
114# Usage: option = $(call try-cc, source-to-build, cc-options)
115try-cc = $(shell sh -c \
116 'TMP="$(TMPOUT).$$$$"; \
117 echo "$(1)" | \
118 $(CC) -x c - $(2) -o "$$TMP" > /dev/null 2>&1 && echo y; \
119 rm -f "$$TMP"')
diff --git a/tools/perf/perf-archive.sh b/tools/perf/perf-archive.sh
index 2e7a4f417e20..677e59d62a8d 100644
--- a/tools/perf/perf-archive.sh
+++ b/tools/perf/perf-archive.sh
@@ -7,7 +7,17 @@ if [ $# -ne 0 ] ; then
7 PERF_DATA=$1 7 PERF_DATA=$1
8fi 8fi
9 9
10DEBUGDIR=~/.debug/ 10#
11# PERF_BUILDID_DIR environment variable set by perf
12# path to buildid directory, default to $HOME/.debug
13#
14if [ -z $PERF_BUILDID_DIR ]; then
15 PERF_BUILDID_DIR=~/.debug/
16else
17 # append / to make substitutions work
18 PERF_BUILDID_DIR=$PERF_BUILDID_DIR/
19fi
20
11BUILDIDS=$(mktemp /tmp/perf-archive-buildids.XXXXXX) 21BUILDIDS=$(mktemp /tmp/perf-archive-buildids.XXXXXX)
12NOBUILDID=0000000000000000000000000000000000000000 22NOBUILDID=0000000000000000000000000000000000000000
13 23
@@ -22,13 +32,13 @@ MANIFEST=$(mktemp /tmp/perf-archive-manifest.XXXXXX)
22 32
23cut -d ' ' -f 1 $BUILDIDS | \ 33cut -d ' ' -f 1 $BUILDIDS | \
24while read build_id ; do 34while read build_id ; do
25 linkname=$DEBUGDIR.build-id/${build_id:0:2}/${build_id:2} 35 linkname=$PERF_BUILDID_DIR.build-id/${build_id:0:2}/${build_id:2}
26 filename=$(readlink -f $linkname) 36 filename=$(readlink -f $linkname)
27 echo ${linkname#$DEBUGDIR} >> $MANIFEST 37 echo ${linkname#$PERF_BUILDID_DIR} >> $MANIFEST
28 echo ${filename#$DEBUGDIR} >> $MANIFEST 38 echo ${filename#$PERF_BUILDID_DIR} >> $MANIFEST
29done 39done
30 40
31tar cfj $PERF_DATA.tar.bz2 -C $DEBUGDIR -T $MANIFEST 41tar cfj $PERF_DATA.tar.bz2 -C $PERF_BUILDID_DIR -T $MANIFEST
32rm -f $MANIFEST $BUILDIDS 42rm -f $MANIFEST $BUILDIDS
33echo -e "Now please run:\n" 43echo -e "Now please run:\n"
34echo -e "$ tar xvf $PERF_DATA.tar.bz2 -C ~/.debug\n" 44echo -e "$ tar xvf $PERF_DATA.tar.bz2 -C ~/.debug\n"
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index 6e4871191138..cdd6c03f1e14 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -458,6 +458,8 @@ int main(int argc, const char **argv)
458 handle_options(&argv, &argc, NULL); 458 handle_options(&argv, &argc, NULL);
459 commit_pager_choice(); 459 commit_pager_choice();
460 set_debugfs_path(); 460 set_debugfs_path();
461 set_buildid_dir();
462
461 if (argc > 0) { 463 if (argc > 0) {
462 if (!prefixcmp(argv[0], "--")) 464 if (!prefixcmp(argv[0], "--"))
463 argv[0] += 2; 465 argv[0] += 2;
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index 70c5cf87d020..5c26e2d314af 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -43,19 +43,17 @@ struct perf_event_ops build_id__mark_dso_hit_ops = {
43char *dso__build_id_filename(struct dso *self, char *bf, size_t size) 43char *dso__build_id_filename(struct dso *self, char *bf, size_t size)
44{ 44{
45 char build_id_hex[BUILD_ID_SIZE * 2 + 1]; 45 char build_id_hex[BUILD_ID_SIZE * 2 + 1];
46 const char *home;
47 46
48 if (!self->has_build_id) 47 if (!self->has_build_id)
49 return NULL; 48 return NULL;
50 49
51 build_id__sprintf(self->build_id, sizeof(self->build_id), build_id_hex); 50 build_id__sprintf(self->build_id, sizeof(self->build_id), build_id_hex);
52 home = getenv("HOME");
53 if (bf == NULL) { 51 if (bf == NULL) {
54 if (asprintf(&bf, "%s/%s/.build-id/%.2s/%s", home, 52 if (asprintf(&bf, "%s/.build-id/%.2s/%s", buildid_dir,
55 DEBUG_CACHE_DIR, build_id_hex, build_id_hex + 2) < 0) 53 build_id_hex, build_id_hex + 2) < 0)
56 return NULL; 54 return NULL;
57 } else 55 } else
58 snprintf(bf, size, "%s/%s/.build-id/%.2s/%s", home, 56 snprintf(bf, size, "%s/.build-id/%.2s/%s", buildid_dir,
59 DEBUG_CACHE_DIR, build_id_hex, build_id_hex + 2); 57 build_id_hex, build_id_hex + 2);
60 return bf; 58 return bf;
61} 59}
diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h
index 65fe664fddf6..27e9ebe4076e 100644
--- a/tools/perf/util/cache.h
+++ b/tools/perf/util/cache.h
@@ -23,6 +23,7 @@ extern int perf_config(config_fn_t fn, void *);
23extern int perf_config_int(const char *, const char *); 23extern int perf_config_int(const char *, const char *);
24extern int perf_config_bool(const char *, const char *); 24extern int perf_config_bool(const char *, const char *);
25extern int config_error_nonbool(const char *); 25extern int config_error_nonbool(const char *);
26extern const char *perf_config_dirname(const char *, const char *);
26 27
27/* pager.c */ 28/* pager.c */
28extern void setup_pager(void); 29extern void setup_pager(void);
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 52c777e451ed..f231f43424d2 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -18,7 +18,7 @@
18#include "util.h" 18#include "util.h"
19#include "callchain.h" 19#include "callchain.h"
20 20
21bool ip_callchain__valid(struct ip_callchain *chain, event_t *event) 21bool ip_callchain__valid(struct ip_callchain *chain, const event_t *event)
22{ 22{
23 unsigned int chain_size = event->header.size; 23 unsigned int chain_size = event->header.size;
24 chain_size -= (unsigned long)&event->ip.__more_data - (unsigned long)event; 24 chain_size -= (unsigned long)&event->ip.__more_data - (unsigned long)event;
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index f2e9ee164bd8..624a96c636fd 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -63,5 +63,5 @@ int register_callchain_param(struct callchain_param *param);
63int append_chain(struct callchain_node *root, struct ip_callchain *chain, 63int append_chain(struct callchain_node *root, struct ip_callchain *chain,
64 struct map_symbol *syms, u64 period); 64 struct map_symbol *syms, u64 period);
65 65
66bool ip_callchain__valid(struct ip_callchain *chain, event_t *event); 66bool ip_callchain__valid(struct ip_callchain *chain, const event_t *event);
67#endif /* __PERF_CALLCHAIN_H */ 67#endif /* __PERF_CALLCHAIN_H */
diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c
index dabe892d0e53..e02d78cae70f 100644
--- a/tools/perf/util/config.c
+++ b/tools/perf/util/config.c
@@ -11,6 +11,11 @@
11 11
12#define MAXNAME (256) 12#define MAXNAME (256)
13 13
14#define DEBUG_CACHE_DIR ".debug"
15
16
17char buildid_dir[MAXPATHLEN]; /* root dir for buildid, binary cache */
18
14static FILE *config_file; 19static FILE *config_file;
15static const char *config_file_name; 20static const char *config_file_name;
16static int config_linenr; 21static int config_linenr;
@@ -127,7 +132,7 @@ static int get_value(config_fn_t fn, void *data, char *name, unsigned int len)
127 break; 132 break;
128 if (!iskeychar(c)) 133 if (!iskeychar(c))
129 break; 134 break;
130 name[len++] = tolower(c); 135 name[len++] = c;
131 if (len >= MAXNAME) 136 if (len >= MAXNAME)
132 return -1; 137 return -1;
133 } 138 }
@@ -327,6 +332,13 @@ int perf_config_bool(const char *name, const char *value)
327 return !!perf_config_bool_or_int(name, value, &discard); 332 return !!perf_config_bool_or_int(name, value, &discard);
328} 333}
329 334
335const char *perf_config_dirname(const char *name, const char *value)
336{
337 if (!name)
338 return NULL;
339 return value;
340}
341
330static int perf_default_core_config(const char *var __used, const char *value __used) 342static int perf_default_core_config(const char *var __used, const char *value __used)
331{ 343{
332 /* Add other config variables here and to Documentation/config.txt. */ 344 /* Add other config variables here and to Documentation/config.txt. */
@@ -428,3 +440,53 @@ int config_error_nonbool(const char *var)
428{ 440{
429 return error("Missing value for '%s'", var); 441 return error("Missing value for '%s'", var);
430} 442}
443
444struct buildid_dir_config {
445 char *dir;
446};
447
448static int buildid_dir_command_config(const char *var, const char *value,
449 void *data)
450{
451 struct buildid_dir_config *c = data;
452 const char *v;
453
454 /* same dir for all commands */
455 if (!prefixcmp(var, "buildid.") && !strcmp(var + 8, "dir")) {
456 v = perf_config_dirname(var, value);
457 if (!v)
458 return -1;
459 strncpy(c->dir, v, MAXPATHLEN-1);
460 c->dir[MAXPATHLEN-1] = '\0';
461 }
462 return 0;
463}
464
465static void check_buildid_dir_config(void)
466{
467 struct buildid_dir_config c;
468 c.dir = buildid_dir;
469 perf_config(buildid_dir_command_config, &c);
470}
471
472void set_buildid_dir(void)
473{
474 buildid_dir[0] = '\0';
475
476 /* try config file */
477 check_buildid_dir_config();
478
479 /* default to $HOME/.debug */
480 if (buildid_dir[0] == '\0') {
481 char *v = getenv("HOME");
482 if (v) {
483 snprintf(buildid_dir, MAXPATHLEN-1, "%s/%s",
484 v, DEBUG_CACHE_DIR);
485 } else {
486 strncpy(buildid_dir, DEBUG_CACHE_DIR, MAXPATHLEN-1);
487 }
488 buildid_dir[MAXPATHLEN-1] = '\0';
489 }
490 /* for communicating with external commands */
491 setenv("PERF_BUILDID_DIR", buildid_dir, 1);
492}
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index 4e01490e51e5..0f9b8d7a7d7e 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -20,7 +20,7 @@ static int default_cpu_map(void)
20 return nr_cpus; 20 return nr_cpus;
21} 21}
22 22
23int read_cpu_map(void) 23static int read_all_cpu_map(void)
24{ 24{
25 FILE *onlnf; 25 FILE *onlnf;
26 int nr_cpus = 0; 26 int nr_cpus = 0;
@@ -57,3 +57,58 @@ int read_cpu_map(void)
57 57
58 return default_cpu_map(); 58 return default_cpu_map();
59} 59}
60
61int read_cpu_map(const char *cpu_list)
62{
63 unsigned long start_cpu, end_cpu = 0;
64 char *p = NULL;
65 int i, nr_cpus = 0;
66
67 if (!cpu_list)
68 return read_all_cpu_map();
69
70 if (!isdigit(*cpu_list))
71 goto invalid;
72
73 while (isdigit(*cpu_list)) {
74 p = NULL;
75 start_cpu = strtoul(cpu_list, &p, 0);
76 if (start_cpu >= INT_MAX
77 || (*p != '\0' && *p != ',' && *p != '-'))
78 goto invalid;
79
80 if (*p == '-') {
81 cpu_list = ++p;
82 p = NULL;
83 end_cpu = strtoul(cpu_list, &p, 0);
84
85 if (end_cpu >= INT_MAX || (*p != '\0' && *p != ','))
86 goto invalid;
87
88 if (end_cpu < start_cpu)
89 goto invalid;
90 } else {
91 end_cpu = start_cpu;
92 }
93
94 for (; start_cpu <= end_cpu; start_cpu++) {
95 /* check for duplicates */
96 for (i = 0; i < nr_cpus; i++)
97 if (cpumap[i] == (int)start_cpu)
98 goto invalid;
99
100 assert(nr_cpus < MAX_NR_CPUS);
101 cpumap[nr_cpus++] = (int)start_cpu;
102 }
103 if (*p)
104 ++p;
105
106 cpu_list = p;
107 }
108 if (nr_cpus > 0)
109 return nr_cpus;
110
111 return default_cpu_map();
112invalid:
113 return -1;
114}
diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
index 86c78bb33098..3e60f56e490e 100644
--- a/tools/perf/util/cpumap.h
+++ b/tools/perf/util/cpumap.h
@@ -1,7 +1,7 @@
1#ifndef __PERF_CPUMAP_H 1#ifndef __PERF_CPUMAP_H
2#define __PERF_CPUMAP_H 2#define __PERF_CPUMAP_H
3 3
4extern int read_cpu_map(void); 4extern int read_cpu_map(const char *cpu_list);
5extern int cpumap[]; 5extern int cpumap[];
6 6
7#endif /* __PERF_CPUMAP_H */ 7#endif /* __PERF_CPUMAP_H */
diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c
index 6cddff2bc970..318dab15d177 100644
--- a/tools/perf/util/debug.c
+++ b/tools/perf/util/debug.c
@@ -86,12 +86,10 @@ void trace_event(event_t *event)
86 dump_printf_color(" ", color); 86 dump_printf_color(" ", color);
87 for (j = 0; j < 15-(i & 15); j++) 87 for (j = 0; j < 15-(i & 15); j++)
88 dump_printf_color(" ", color); 88 dump_printf_color(" ", color);
89 for (j = 0; j < (i & 15); j++) { 89 for (j = i & ~15; j <= i; j++) {
90 if (isprint(raw_event[i-15+j])) 90 dump_printf_color("%c", color,
91 dump_printf_color("%c", color, 91 isprint(raw_event[j]) ?
92 raw_event[i-15+j]); 92 raw_event[j] : '.');
93 else
94 dump_printf_color(".", color);
95 } 93 }
96 dump_printf_color("\n", color); 94 dump_printf_color("\n", color);
97 } 95 }
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 2fbf6a463c81..d7f21d71eb69 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -657,11 +657,36 @@ static void dso__calc_col_width(struct dso *self)
657} 657}
658 658
659int event__preprocess_sample(const event_t *self, struct perf_session *session, 659int event__preprocess_sample(const event_t *self, struct perf_session *session,
660 struct addr_location *al, symbol_filter_t filter) 660 struct addr_location *al, struct sample_data *data,
661 symbol_filter_t filter)
661{ 662{
662 u8 cpumode = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; 663 u8 cpumode = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
663 struct thread *thread = perf_session__findnew(session, self->ip.pid); 664 struct thread *thread;
665
666 event__parse_sample(self, session->sample_type, data);
667
668 dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld cpu:%d\n",
669 self->header.misc, data->pid, data->tid, data->ip,
670 data->period, data->cpu);
671
672 if (session->sample_type & PERF_SAMPLE_CALLCHAIN) {
673 unsigned int i;
674
675 dump_printf("... chain: nr:%Lu\n", data->callchain->nr);
676
677 if (!ip_callchain__valid(data->callchain, self)) {
678 pr_debug("call-chain problem with event, "
679 "skipping it.\n");
680 goto out_filtered;
681 }
664 682
683 if (dump_trace) {
684 for (i = 0; i < data->callchain->nr; i++)
685 dump_printf("..... %2d: %016Lx\n",
686 i, data->callchain->ips[i]);
687 }
688 }
689 thread = perf_session__findnew(session, self->ip.pid);
665 if (thread == NULL) 690 if (thread == NULL)
666 return -1; 691 return -1;
667 692
@@ -687,6 +712,7 @@ int event__preprocess_sample(const event_t *self, struct perf_session *session,
687 al->map ? al->map->dso->long_name : 712 al->map ? al->map->dso->long_name :
688 al->level == 'H' ? "[hypervisor]" : "<not found>"); 713 al->level == 'H' ? "[hypervisor]" : "<not found>");
689 al->sym = NULL; 714 al->sym = NULL;
715 al->cpu = data->cpu;
690 716
691 if (al->map) { 717 if (al->map) {
692 if (symbol_conf.dso_list && 718 if (symbol_conf.dso_list &&
@@ -726,9 +752,9 @@ out_filtered:
726 return 0; 752 return 0;
727} 753}
728 754
729int event__parse_sample(event_t *event, u64 type, struct sample_data *data) 755int event__parse_sample(const event_t *event, u64 type, struct sample_data *data)
730{ 756{
731 u64 *array = event->sample.array; 757 const u64 *array = event->sample.array;
732 758
733 if (type & PERF_SAMPLE_IP) { 759 if (type & PERF_SAMPLE_IP) {
734 data->ip = event->ip.ip; 760 data->ip = event->ip.ip;
@@ -767,7 +793,8 @@ int event__parse_sample(event_t *event, u64 type, struct sample_data *data)
767 u32 *p = (u32 *)array; 793 u32 *p = (u32 *)array;
768 data->cpu = *p; 794 data->cpu = *p;
769 array++; 795 array++;
770 } 796 } else
797 data->cpu = -1;
771 798
772 if (type & PERF_SAMPLE_PERIOD) { 799 if (type & PERF_SAMPLE_PERIOD) {
773 data->period = *array; 800 data->period = *array;
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 8577085db067..887ee63bbb62 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -157,8 +157,9 @@ int event__process_task(event_t *self, struct perf_session *session);
157 157
158struct addr_location; 158struct addr_location;
159int event__preprocess_sample(const event_t *self, struct perf_session *session, 159int event__preprocess_sample(const event_t *self, struct perf_session *session,
160 struct addr_location *al, symbol_filter_t filter); 160 struct addr_location *al, struct sample_data *data,
161int event__parse_sample(event_t *event, u64 type, struct sample_data *data); 161 symbol_filter_t filter);
162int event__parse_sample(const event_t *event, u64 type, struct sample_data *data);
162 163
163extern const char *event__name[]; 164extern const char *event__name[];
164 165
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 1f62435f96c2..d7e67b167ea3 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -16,6 +16,8 @@
16#include "symbol.h" 16#include "symbol.h"
17#include "debug.h" 17#include "debug.h"
18 18
19static bool no_buildid_cache = false;
20
19/* 21/*
20 * Create new perf.data header attribute: 22 * Create new perf.data header attribute:
21 */ 23 */
@@ -385,8 +387,7 @@ static int perf_session__cache_build_ids(struct perf_session *self)
385 int ret; 387 int ret;
386 char debugdir[PATH_MAX]; 388 char debugdir[PATH_MAX];
387 389
388 snprintf(debugdir, sizeof(debugdir), "%s/%s", getenv("HOME"), 390 snprintf(debugdir, sizeof(debugdir), "%s", buildid_dir);
389 DEBUG_CACHE_DIR);
390 391
391 if (mkdir(debugdir, 0755) != 0 && errno != EEXIST) 392 if (mkdir(debugdir, 0755) != 0 && errno != EEXIST)
392 return -1; 393 return -1;
@@ -471,7 +472,8 @@ static int perf_header__adds_write(struct perf_header *self, int fd)
471 } 472 }
472 buildid_sec->size = lseek(fd, 0, SEEK_CUR) - 473 buildid_sec->size = lseek(fd, 0, SEEK_CUR) -
473 buildid_sec->offset; 474 buildid_sec->offset;
474 perf_session__cache_build_ids(session); 475 if (!no_buildid_cache)
476 perf_session__cache_build_ids(session);
475 } 477 }
476 478
477 lseek(fd, sec_start, SEEK_SET); 479 lseek(fd, sec_start, SEEK_SET);
@@ -1190,3 +1192,8 @@ int event__process_build_id(event_t *self,
1190 session); 1192 session);
1191 return 0; 1193 return 0;
1192} 1194}
1195
1196void disable_buildid_cache(void)
1197{
1198 no_buildid_cache = true;
1199}
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 07f89b66b318..7b5848ce1505 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -70,6 +70,7 @@ struct hist_entry *__hists__add_entry(struct hists *self,
70 .map = al->map, 70 .map = al->map,
71 .sym = al->sym, 71 .sym = al->sym,
72 }, 72 },
73 .cpu = al->cpu,
73 .ip = al->addr, 74 .ip = al->addr,
74 .level = al->level, 75 .level = al->level,
75 .period = period, 76 .period = period,
@@ -794,6 +795,21 @@ enum hist_filter {
794 HIST_FILTER__THREAD, 795 HIST_FILTER__THREAD,
795}; 796};
796 797
798static void hists__remove_entry_filter(struct hists *self, struct hist_entry *h,
799 enum hist_filter filter)
800{
801 h->filtered &= ~(1 << filter);
802 if (h->filtered)
803 return;
804
805 ++self->nr_entries;
806 self->stats.total_period += h->period;
807 self->stats.nr_events[PERF_RECORD_SAMPLE] += h->nr_events;
808
809 if (h->ms.sym && self->max_sym_namelen < h->ms.sym->namelen)
810 self->max_sym_namelen = h->ms.sym->namelen;
811}
812
797void hists__filter_by_dso(struct hists *self, const struct dso *dso) 813void hists__filter_by_dso(struct hists *self, const struct dso *dso)
798{ 814{
799 struct rb_node *nd; 815 struct rb_node *nd;
@@ -813,15 +829,7 @@ void hists__filter_by_dso(struct hists *self, const struct dso *dso)
813 continue; 829 continue;
814 } 830 }
815 831
816 h->filtered &= ~(1 << HIST_FILTER__DSO); 832 hists__remove_entry_filter(self, h, HIST_FILTER__DSO);
817 if (!h->filtered) {
818 ++self->nr_entries;
819 self->stats.total_period += h->period;
820 self->stats.nr_events[PERF_RECORD_SAMPLE] += h->nr_events;
821 if (h->ms.sym &&
822 self->max_sym_namelen < h->ms.sym->namelen)
823 self->max_sym_namelen = h->ms.sym->namelen;
824 }
825 } 833 }
826} 834}
827 835
@@ -840,15 +848,8 @@ void hists__filter_by_thread(struct hists *self, const struct thread *thread)
840 h->filtered |= (1 << HIST_FILTER__THREAD); 848 h->filtered |= (1 << HIST_FILTER__THREAD);
841 continue; 849 continue;
842 } 850 }
843 h->filtered &= ~(1 << HIST_FILTER__THREAD); 851
844 if (!h->filtered) { 852 hists__remove_entry_filter(self, h, HIST_FILTER__THREAD);
845 ++self->nr_entries;
846 self->stats.total_period += h->period;
847 self->stats.nr_events[PERF_RECORD_SAMPLE] += h->nr_events;
848 if (h->ms.sym &&
849 self->max_sym_namelen < h->ms.sym->namelen)
850 self->max_sym_namelen = h->ms.sym->namelen;
851 }
852 } 853 }
853} 854}
854 855
@@ -1037,7 +1038,7 @@ fallback:
1037 dso, dso->long_name, sym, sym->name); 1038 dso, dso->long_name, sym, sym->name);
1038 1039
1039 snprintf(command, sizeof(command), 1040 snprintf(command, sizeof(command),
1040 "objdump --start-address=0x%016Lx --stop-address=0x%016Lx -dS %s|grep -v %s|expand", 1041 "objdump --start-address=0x%016Lx --stop-address=0x%016Lx -dS -C %s|grep -v %s|expand",
1041 map__rip_2objdump(map, sym->start), 1042 map__rip_2objdump(map, sym->start),
1042 map__rip_2objdump(map, sym->end), 1043 map__rip_2objdump(map, sym->end),
1043 filename, filename); 1044 filename, filename);
diff --git a/tools/perf/util/newt.c b/tools/perf/util/newt.c
index 7537ca15900b..7979003adeaf 100644
--- a/tools/perf/util/newt.c
+++ b/tools/perf/util/newt.c
@@ -278,9 +278,48 @@ struct ui_browser {
278 void *first_visible_entry, *entries; 278 void *first_visible_entry, *entries;
279 u16 top, left, width, height; 279 u16 top, left, width, height;
280 void *priv; 280 void *priv;
281 unsigned int (*refresh_entries)(struct ui_browser *self);
282 void (*seek)(struct ui_browser *self,
283 off_t offset, int whence);
281 u32 nr_entries; 284 u32 nr_entries;
282}; 285};
283 286
287static void ui_browser__list_head_seek(struct ui_browser *self,
288 off_t offset, int whence)
289{
290 struct list_head *head = self->entries;
291 struct list_head *pos;
292
293 switch (whence) {
294 case SEEK_SET:
295 pos = head->next;
296 break;
297 case SEEK_CUR:
298 pos = self->first_visible_entry;
299 break;
300 case SEEK_END:
301 pos = head->prev;
302 break;
303 default:
304 return;
305 }
306
307 if (offset > 0) {
308 while (offset-- != 0)
309 pos = pos->next;
310 } else {
311 while (offset++ != 0)
312 pos = pos->prev;
313 }
314
315 self->first_visible_entry = pos;
316}
317
318static bool ui_browser__is_current_entry(struct ui_browser *self, unsigned row)
319{
320 return (self->first_visible_entry_idx + row) == self->index;
321}
322
284static void ui_browser__refresh_dimensions(struct ui_browser *self) 323static void ui_browser__refresh_dimensions(struct ui_browser *self)
285{ 324{
286 int cols, rows; 325 int cols, rows;
@@ -297,8 +336,34 @@ static void ui_browser__refresh_dimensions(struct ui_browser *self)
297 336
298static void ui_browser__reset_index(struct ui_browser *self) 337static void ui_browser__reset_index(struct ui_browser *self)
299{ 338{
300 self->index = self->first_visible_entry_idx = 0; 339 self->index = self->first_visible_entry_idx = 0;
301 self->first_visible_entry = NULL; 340 self->seek(self, 0, SEEK_SET);
341}
342
343static int ui_browser__show(struct ui_browser *self, const char *title)
344{
345 if (self->form != NULL)
346 return 0;
347 ui_browser__refresh_dimensions(self);
348 newtCenteredWindow(self->width + 2, self->height, title);
349 self->form = newt_form__new();
350 if (self->form == NULL)
351 return -1;
352
353 self->sb = newtVerticalScrollbar(self->width + 1, 0, self->height,
354 HE_COLORSET_NORMAL,
355 HE_COLORSET_SELECTED);
356 if (self->sb == NULL)
357 return -1;
358
359 newtFormAddHotKey(self->form, NEWT_KEY_UP);
360 newtFormAddHotKey(self->form, NEWT_KEY_DOWN);
361 newtFormAddHotKey(self->form, NEWT_KEY_PGUP);
362 newtFormAddHotKey(self->form, NEWT_KEY_PGDN);
363 newtFormAddHotKey(self->form, NEWT_KEY_HOME);
364 newtFormAddHotKey(self->form, NEWT_KEY_END);
365 newtFormAddComponent(self->form, self->sb);
366 return 0;
302} 367}
303 368
304static int objdump_line__show(struct objdump_line *self, struct list_head *head, 369static int objdump_line__show(struct objdump_line *self, struct list_head *head,
@@ -352,26 +417,10 @@ static int objdump_line__show(struct objdump_line *self, struct list_head *head,
352 417
353static int ui_browser__refresh_entries(struct ui_browser *self) 418static int ui_browser__refresh_entries(struct ui_browser *self)
354{ 419{
355 struct objdump_line *pos; 420 int row;
356 struct list_head *head = self->entries;
357 struct hist_entry *he = self->priv;
358 int row = 0;
359 int len = he->ms.sym->end - he->ms.sym->start;
360
361 if (self->first_visible_entry == NULL || self->first_visible_entry == self->entries)
362 self->first_visible_entry = head->next;
363
364 pos = list_entry(self->first_visible_entry, struct objdump_line, node);
365
366 list_for_each_entry_from(pos, head, node) {
367 bool current_entry = (self->first_visible_entry_idx + row) == self->index;
368 SLsmg_gotorc(self->top + row, self->left);
369 objdump_line__show(pos, head, self->width,
370 he, len, current_entry);
371 if (++row == self->height)
372 break;
373 }
374 421
422 newtScrollbarSet(self->sb, self->index, self->nr_entries - 1);
423 row = self->refresh_entries(self);
375 SLsmg_set_color(HE_COLORSET_NORMAL); 424 SLsmg_set_color(HE_COLORSET_NORMAL);
376 SLsmg_fill_region(self->top + row, self->left, 425 SLsmg_fill_region(self->top + row, self->left,
377 self->height - row, self->width, ' '); 426 self->height - row, self->width, ' ');
@@ -379,42 +428,13 @@ static int ui_browser__refresh_entries(struct ui_browser *self)
379 return 0; 428 return 0;
380} 429}
381 430
382static int ui_browser__run(struct ui_browser *self, const char *title, 431static int ui_browser__run(struct ui_browser *self, struct newtExitStruct *es)
383 struct newtExitStruct *es)
384{ 432{
385 if (self->form) {
386 newtFormDestroy(self->form);
387 newtPopWindow();
388 }
389
390 ui_browser__refresh_dimensions(self);
391 newtCenteredWindow(self->width + 2, self->height, title);
392 self->form = newt_form__new();
393 if (self->form == NULL)
394 return -1;
395
396 self->sb = newtVerticalScrollbar(self->width + 1, 0, self->height,
397 HE_COLORSET_NORMAL,
398 HE_COLORSET_SELECTED);
399 if (self->sb == NULL)
400 return -1;
401
402 newtFormAddHotKey(self->form, NEWT_KEY_UP);
403 newtFormAddHotKey(self->form, NEWT_KEY_DOWN);
404 newtFormAddHotKey(self->form, NEWT_KEY_PGUP);
405 newtFormAddHotKey(self->form, NEWT_KEY_PGDN);
406 newtFormAddHotKey(self->form, ' ');
407 newtFormAddHotKey(self->form, NEWT_KEY_HOME);
408 newtFormAddHotKey(self->form, NEWT_KEY_END);
409 newtFormAddHotKey(self->form, NEWT_KEY_TAB);
410 newtFormAddHotKey(self->form, NEWT_KEY_RIGHT);
411
412 if (ui_browser__refresh_entries(self) < 0) 433 if (ui_browser__refresh_entries(self) < 0)
413 return -1; 434 return -1;
414 newtFormAddComponent(self->form, self->sb);
415 435
416 while (1) { 436 while (1) {
417 unsigned int offset; 437 off_t offset;
418 438
419 newtFormRun(self->form, es); 439 newtFormRun(self->form, es);
420 440
@@ -428,9 +448,8 @@ static int ui_browser__run(struct ui_browser *self, const char *title,
428 break; 448 break;
429 ++self->index; 449 ++self->index;
430 if (self->index == self->first_visible_entry_idx + self->height) { 450 if (self->index == self->first_visible_entry_idx + self->height) {
431 struct list_head *pos = self->first_visible_entry;
432 ++self->first_visible_entry_idx; 451 ++self->first_visible_entry_idx;
433 self->first_visible_entry = pos->next; 452 self->seek(self, +1, SEEK_CUR);
434 } 453 }
435 break; 454 break;
436 case NEWT_KEY_UP: 455 case NEWT_KEY_UP:
@@ -438,9 +457,8 @@ static int ui_browser__run(struct ui_browser *self, const char *title,
438 break; 457 break;
439 --self->index; 458 --self->index;
440 if (self->index < self->first_visible_entry_idx) { 459 if (self->index < self->first_visible_entry_idx) {
441 struct list_head *pos = self->first_visible_entry;
442 --self->first_visible_entry_idx; 460 --self->first_visible_entry_idx;
443 self->first_visible_entry = pos->prev; 461 self->seek(self, -1, SEEK_CUR);
444 } 462 }
445 break; 463 break;
446 case NEWT_KEY_PGDN: 464 case NEWT_KEY_PGDN:
@@ -453,12 +471,7 @@ static int ui_browser__run(struct ui_browser *self, const char *title,
453 offset = self->nr_entries - 1 - self->index; 471 offset = self->nr_entries - 1 - self->index;
454 self->index += offset; 472 self->index += offset;
455 self->first_visible_entry_idx += offset; 473 self->first_visible_entry_idx += offset;
456 474 self->seek(self, +offset, SEEK_CUR);
457 while (offset--) {
458 struct list_head *pos = self->first_visible_entry;
459 self->first_visible_entry = pos->next;
460 }
461
462 break; 475 break;
463 case NEWT_KEY_PGUP: 476 case NEWT_KEY_PGUP:
464 if (self->first_visible_entry_idx == 0) 477 if (self->first_visible_entry_idx == 0)
@@ -471,36 +484,22 @@ static int ui_browser__run(struct ui_browser *self, const char *title,
471 484
472 self->index -= offset; 485 self->index -= offset;
473 self->first_visible_entry_idx -= offset; 486 self->first_visible_entry_idx -= offset;
474 487 self->seek(self, -offset, SEEK_CUR);
475 while (offset--) {
476 struct list_head *pos = self->first_visible_entry;
477 self->first_visible_entry = pos->prev;
478 }
479 break; 488 break;
480 case NEWT_KEY_HOME: 489 case NEWT_KEY_HOME:
481 ui_browser__reset_index(self); 490 ui_browser__reset_index(self);
482 break; 491 break;
483 case NEWT_KEY_END: { 492 case NEWT_KEY_END:
484 struct list_head *head = self->entries;
485 offset = self->height - 1; 493 offset = self->height - 1;
494 if (offset >= self->nr_entries)
495 offset = self->nr_entries - 1;
486 496
487 if (offset > self->nr_entries) 497 self->index = self->nr_entries - 1;
488 offset = self->nr_entries; 498 self->first_visible_entry_idx = self->index - offset;
489 499 self->seek(self, -offset, SEEK_END);
490 self->index = self->first_visible_entry_idx = self->nr_entries - 1 - offset;
491 self->first_visible_entry = head->prev;
492 while (offset-- != 0) {
493 struct list_head *pos = self->first_visible_entry;
494 self->first_visible_entry = pos->prev;
495 }
496 }
497 break; 500 break;
498 case NEWT_KEY_RIGHT:
499 case NEWT_KEY_LEFT:
500 case NEWT_KEY_TAB:
501 return es->u.key;
502 default: 501 default:
503 continue; 502 return es->u.key;
504 } 503 }
505 if (ui_browser__refresh_entries(self) < 0) 504 if (ui_browser__refresh_entries(self) < 0)
506 return -1; 505 return -1;
@@ -550,6 +549,31 @@ static char *callchain_list__sym_name(struct callchain_list *self,
550 return bf; 549 return bf;
551} 550}
552 551
552static unsigned int hist_entry__annotate_browser_refresh(struct ui_browser *self)
553{
554 struct objdump_line *pos;
555 struct list_head *head = self->entries;
556 struct hist_entry *he = self->priv;
557 int row = 0;
558 int len = he->ms.sym->end - he->ms.sym->start;
559
560 if (self->first_visible_entry == NULL || self->first_visible_entry == self->entries)
561 self->first_visible_entry = head->next;
562
563 pos = list_entry(self->first_visible_entry, struct objdump_line, node);
564
565 list_for_each_entry_from(pos, head, node) {
566 bool current_entry = ui_browser__is_current_entry(self, row);
567 SLsmg_gotorc(self->top + row, self->left);
568 objdump_line__show(pos, head, self->width,
569 he, len, current_entry);
570 if (++row == self->height)
571 break;
572 }
573
574 return row;
575}
576
553static void __callchain__append_graph_browser(struct callchain_node *self, 577static void __callchain__append_graph_browser(struct callchain_node *self,
554 newtComponent tree, u64 total, 578 newtComponent tree, u64 total,
555 int *indexes, int depth) 579 int *indexes, int depth)
@@ -712,7 +736,9 @@ int hist_entry__tui_annotate(struct hist_entry *self)
712 ui_helpline__push("Press <- or ESC to exit"); 736 ui_helpline__push("Press <- or ESC to exit");
713 737
714 memset(&browser, 0, sizeof(browser)); 738 memset(&browser, 0, sizeof(browser));
715 browser.entries = &head; 739 browser.entries = &head;
740 browser.refresh_entries = hist_entry__annotate_browser_refresh;
741 browser.seek = ui_browser__list_head_seek;
716 browser.priv = self; 742 browser.priv = self;
717 list_for_each_entry(pos, &head, node) { 743 list_for_each_entry(pos, &head, node) {
718 size_t line_len = strlen(pos->line); 744 size_t line_len = strlen(pos->line);
@@ -722,7 +748,8 @@ int hist_entry__tui_annotate(struct hist_entry *self)
722 } 748 }
723 749
724 browser.width += 18; /* Percentage */ 750 browser.width += 18; /* Percentage */
725 ret = ui_browser__run(&browser, self->ms.sym->name, &es); 751 ui_browser__show(&browser, self->ms.sym->name);
752 ret = ui_browser__run(&browser, &es);
726 newtFormDestroy(browser.form); 753 newtFormDestroy(browser.form);
727 newtPopWindow(); 754 newtPopWindow();
728 list_for_each_entry_safe(pos, n, &head, node) { 755 list_for_each_entry_safe(pos, n, &head, node) {
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 9bf0f402ca73..4af5bd59cfd1 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -602,8 +602,15 @@ parse_breakpoint_event(const char **strp, struct perf_event_attr *attr)
602 return EVT_FAILED; 602 return EVT_FAILED;
603 } 603 }
604 604
605 /* We should find a nice way to override the access type */ 605 /*
606 attr->bp_len = HW_BREAKPOINT_LEN_4; 606 * We should find a nice way to override the access length
607 * Provide some defaults for now
608 */
609 if (attr->bp_type == HW_BREAKPOINT_X)
610 attr->bp_len = sizeof(long);
611 else
612 attr->bp_len = HW_BREAKPOINT_LEN_4;
613
607 attr->type = PERF_TYPE_BREAKPOINT; 614 attr->type = PERF_TYPE_BREAKPOINT;
608 615
609 return EVT_HANDLED; 616 return EVT_HANDLED;
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 914c67095d96..4445a1e7052f 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -195,6 +195,65 @@ static int try_to_find_kprobe_trace_events(struct perf_probe_event *pev,
195 return ntevs; 195 return ntevs;
196} 196}
197 197
198/*
199 * Find a src file from a DWARF tag path. Prepend optional source path prefix
200 * and chop off leading directories that do not exist. Result is passed back as
201 * a newly allocated path on success.
202 * Return 0 if file was found and readable, -errno otherwise.
203 */
204static int get_real_path(const char *raw_path, const char *comp_dir,
205 char **new_path)
206{
207 const char *prefix = symbol_conf.source_prefix;
208
209 if (!prefix) {
210 if (raw_path[0] != '/' && comp_dir)
211 /* If not an absolute path, try to use comp_dir */
212 prefix = comp_dir;
213 else {
214 if (access(raw_path, R_OK) == 0) {
215 *new_path = strdup(raw_path);
216 return 0;
217 } else
218 return -errno;
219 }
220 }
221
222 *new_path = malloc((strlen(prefix) + strlen(raw_path) + 2));
223 if (!*new_path)
224 return -ENOMEM;
225
226 for (;;) {
227 sprintf(*new_path, "%s/%s", prefix, raw_path);
228
229 if (access(*new_path, R_OK) == 0)
230 return 0;
231
232 if (!symbol_conf.source_prefix)
233 /* In case of searching comp_dir, don't retry */
234 return -errno;
235
236 switch (errno) {
237 case ENAMETOOLONG:
238 case ENOENT:
239 case EROFS:
240 case EFAULT:
241 raw_path = strchr(++raw_path, '/');
242 if (!raw_path) {
243 free(*new_path);
244 *new_path = NULL;
245 return -ENOENT;
246 }
247 continue;
248
249 default:
250 free(*new_path);
251 *new_path = NULL;
252 return -errno;
253 }
254 }
255}
256
198#define LINEBUF_SIZE 256 257#define LINEBUF_SIZE 256
199#define NR_ADDITIONAL_LINES 2 258#define NR_ADDITIONAL_LINES 2
200 259
@@ -244,6 +303,7 @@ int show_line_range(struct line_range *lr)
244 struct line_node *ln; 303 struct line_node *ln;
245 FILE *fp; 304 FILE *fp;
246 int fd, ret; 305 int fd, ret;
306 char *tmp;
247 307
248 /* Search a line range */ 308 /* Search a line range */
249 ret = init_vmlinux(); 309 ret = init_vmlinux();
@@ -266,6 +326,15 @@ int show_line_range(struct line_range *lr)
266 return ret; 326 return ret;
267 } 327 }
268 328
329 /* Convert source file path */
330 tmp = lr->path;
331 ret = get_real_path(tmp, lr->comp_dir, &lr->path);
332 free(tmp); /* Free old path */
333 if (ret < 0) {
334 pr_warning("Failed to find source file. (%d)\n", ret);
335 return ret;
336 }
337
269 setup_pager(); 338 setup_pager();
270 339
271 if (lr->function) 340 if (lr->function)
@@ -557,7 +626,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev)
557/* Parse perf-probe event argument */ 626/* Parse perf-probe event argument */
558static int parse_perf_probe_arg(char *str, struct perf_probe_arg *arg) 627static int parse_perf_probe_arg(char *str, struct perf_probe_arg *arg)
559{ 628{
560 char *tmp; 629 char *tmp, *goodname;
561 struct perf_probe_arg_field **fieldp; 630 struct perf_probe_arg_field **fieldp;
562 631
563 pr_debug("parsing arg: %s into ", str); 632 pr_debug("parsing arg: %s into ", str);
@@ -580,7 +649,7 @@ static int parse_perf_probe_arg(char *str, struct perf_probe_arg *arg)
580 pr_debug("type:%s ", arg->type); 649 pr_debug("type:%s ", arg->type);
581 } 650 }
582 651
583 tmp = strpbrk(str, "-."); 652 tmp = strpbrk(str, "-.[");
584 if (!is_c_varname(str) || !tmp) { 653 if (!is_c_varname(str) || !tmp) {
585 /* A variable, register, symbol or special value */ 654 /* A variable, register, symbol or special value */
586 arg->var = strdup(str); 655 arg->var = strdup(str);
@@ -590,10 +659,11 @@ static int parse_perf_probe_arg(char *str, struct perf_probe_arg *arg)
590 return 0; 659 return 0;
591 } 660 }
592 661
593 /* Structure fields */ 662 /* Structure fields or array element */
594 arg->var = strndup(str, tmp - str); 663 arg->var = strndup(str, tmp - str);
595 if (arg->var == NULL) 664 if (arg->var == NULL)
596 return -ENOMEM; 665 return -ENOMEM;
666 goodname = arg->var;
597 pr_debug("%s, ", arg->var); 667 pr_debug("%s, ", arg->var);
598 fieldp = &arg->field; 668 fieldp = &arg->field;
599 669
@@ -601,22 +671,38 @@ static int parse_perf_probe_arg(char *str, struct perf_probe_arg *arg)
601 *fieldp = zalloc(sizeof(struct perf_probe_arg_field)); 671 *fieldp = zalloc(sizeof(struct perf_probe_arg_field));
602 if (*fieldp == NULL) 672 if (*fieldp == NULL)
603 return -ENOMEM; 673 return -ENOMEM;
604 if (*tmp == '.') { 674 if (*tmp == '[') { /* Array */
605 str = tmp + 1; 675 str = tmp;
606 (*fieldp)->ref = false; 676 (*fieldp)->index = strtol(str + 1, &tmp, 0);
607 } else if (tmp[1] == '>') {
608 str = tmp + 2;
609 (*fieldp)->ref = true; 677 (*fieldp)->ref = true;
610 } else { 678 if (*tmp != ']' || tmp == str + 1) {
611 semantic_error("Argument parse error: %s\n", str); 679 semantic_error("Array index must be a"
612 return -EINVAL; 680 " number.\n");
681 return -EINVAL;
682 }
683 tmp++;
684 if (*tmp == '\0')
685 tmp = NULL;
686 } else { /* Structure */
687 if (*tmp == '.') {
688 str = tmp + 1;
689 (*fieldp)->ref = false;
690 } else if (tmp[1] == '>') {
691 str = tmp + 2;
692 (*fieldp)->ref = true;
693 } else {
694 semantic_error("Argument parse error: %s\n",
695 str);
696 return -EINVAL;
697 }
698 tmp = strpbrk(str, "-.[");
613 } 699 }
614
615 tmp = strpbrk(str, "-.");
616 if (tmp) { 700 if (tmp) {
617 (*fieldp)->name = strndup(str, tmp - str); 701 (*fieldp)->name = strndup(str, tmp - str);
618 if ((*fieldp)->name == NULL) 702 if ((*fieldp)->name == NULL)
619 return -ENOMEM; 703 return -ENOMEM;
704 if (*str != '[')
705 goodname = (*fieldp)->name;
620 pr_debug("%s(%d), ", (*fieldp)->name, (*fieldp)->ref); 706 pr_debug("%s(%d), ", (*fieldp)->name, (*fieldp)->ref);
621 fieldp = &(*fieldp)->next; 707 fieldp = &(*fieldp)->next;
622 } 708 }
@@ -624,11 +710,13 @@ static int parse_perf_probe_arg(char *str, struct perf_probe_arg *arg)
624 (*fieldp)->name = strdup(str); 710 (*fieldp)->name = strdup(str);
625 if ((*fieldp)->name == NULL) 711 if ((*fieldp)->name == NULL)
626 return -ENOMEM; 712 return -ENOMEM;
713 if (*str != '[')
714 goodname = (*fieldp)->name;
627 pr_debug("%s(%d)\n", (*fieldp)->name, (*fieldp)->ref); 715 pr_debug("%s(%d)\n", (*fieldp)->name, (*fieldp)->ref);
628 716
629 /* If no name is specified, set the last field name */ 717 /* If no name is specified, set the last field name (not array index)*/
630 if (!arg->name) { 718 if (!arg->name) {
631 arg->name = strdup((*fieldp)->name); 719 arg->name = strdup(goodname);
632 if (arg->name == NULL) 720 if (arg->name == NULL)
633 return -ENOMEM; 721 return -ENOMEM;
634 } 722 }
@@ -776,8 +864,11 @@ int synthesize_perf_probe_arg(struct perf_probe_arg *pa, char *buf, size_t len)
776 len -= ret; 864 len -= ret;
777 865
778 while (field) { 866 while (field) {
779 ret = e_snprintf(tmp, len, "%s%s", field->ref ? "->" : ".", 867 if (field->name[0] == '[')
780 field->name); 868 ret = e_snprintf(tmp, len, "%s", field->name);
869 else
870 ret = e_snprintf(tmp, len, "%s%s",
871 field->ref ? "->" : ".", field->name);
781 if (ret <= 0) 872 if (ret <= 0)
782 goto error; 873 goto error;
783 tmp += ret; 874 tmp += ret;
@@ -904,6 +995,7 @@ out:
904static int synthesize_kprobe_trace_arg(struct kprobe_trace_arg *arg, 995static int synthesize_kprobe_trace_arg(struct kprobe_trace_arg *arg,
905 char *buf, size_t buflen) 996 char *buf, size_t buflen)
906{ 997{
998 struct kprobe_trace_arg_ref *ref = arg->ref;
907 int ret, depth = 0; 999 int ret, depth = 0;
908 char *tmp = buf; 1000 char *tmp = buf;
909 1001
@@ -917,16 +1009,24 @@ static int synthesize_kprobe_trace_arg(struct kprobe_trace_arg *arg,
917 buf += ret; 1009 buf += ret;
918 buflen -= ret; 1010 buflen -= ret;
919 1011
1012 /* Special case: @XXX */
1013 if (arg->value[0] == '@' && arg->ref)
1014 ref = ref->next;
1015
920 /* Dereferencing arguments */ 1016 /* Dereferencing arguments */
921 if (arg->ref) { 1017 if (ref) {
922 depth = __synthesize_kprobe_trace_arg_ref(arg->ref, &buf, 1018 depth = __synthesize_kprobe_trace_arg_ref(ref, &buf,
923 &buflen, 1); 1019 &buflen, 1);
924 if (depth < 0) 1020 if (depth < 0)
925 return depth; 1021 return depth;
926 } 1022 }
927 1023
928 /* Print argument value */ 1024 /* Print argument value */
929 ret = e_snprintf(buf, buflen, "%s", arg->value); 1025 if (arg->value[0] == '@' && arg->ref)
1026 ret = e_snprintf(buf, buflen, "%s%+ld", arg->value,
1027 arg->ref->offset);
1028 else
1029 ret = e_snprintf(buf, buflen, "%s", arg->value);
930 if (ret < 0) 1030 if (ret < 0)
931 return ret; 1031 return ret;
932 buf += ret; 1032 buf += ret;
diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
index e9db1a214ca4..ed362acff4b6 100644
--- a/tools/perf/util/probe-event.h
+++ b/tools/perf/util/probe-event.h
@@ -50,6 +50,7 @@ struct perf_probe_point {
50struct perf_probe_arg_field { 50struct perf_probe_arg_field {
51 struct perf_probe_arg_field *next; /* Next field */ 51 struct perf_probe_arg_field *next; /* Next field */
52 char *name; /* Name of the field */ 52 char *name; /* Name of the field */
53 long index; /* Array index number */
53 bool ref; /* Referencing flag */ 54 bool ref; /* Referencing flag */
54}; 55};
55 56
@@ -85,6 +86,7 @@ struct line_range {
85 int end; /* End line number */ 86 int end; /* End line number */
86 int offset; /* Start line offset */ 87 int offset; /* Start line offset */
87 char *path; /* Real path name */ 88 char *path; /* Real path name */
89 char *comp_dir; /* Compile directory */
88 struct list_head line_list; /* Visible lines */ 90 struct list_head line_list; /* Visible lines */
89}; 91};
90 92
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index d964cb199c67..f88070ea5b90 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -37,6 +37,7 @@
37#include "event.h" 37#include "event.h"
38#include "debug.h" 38#include "debug.h"
39#include "util.h" 39#include "util.h"
40#include "symbol.h"
40#include "probe-finder.h" 41#include "probe-finder.h"
41 42
42/* Kprobe tracer basic type is up to u64 */ 43/* Kprobe tracer basic type is up to u64 */
@@ -143,12 +144,21 @@ static const char *cu_find_realpath(Dwarf_Die *cu_die, const char *fname)
143 return src; 144 return src;
144} 145}
145 146
147/* Get DW_AT_comp_dir (should be NULL with older gcc) */
148static const char *cu_get_comp_dir(Dwarf_Die *cu_die)
149{
150 Dwarf_Attribute attr;
151 if (dwarf_attr(cu_die, DW_AT_comp_dir, &attr) == NULL)
152 return NULL;
153 return dwarf_formstring(&attr);
154}
155
146/* Compare diename and tname */ 156/* Compare diename and tname */
147static bool die_compare_name(Dwarf_Die *dw_die, const char *tname) 157static bool die_compare_name(Dwarf_Die *dw_die, const char *tname)
148{ 158{
149 const char *name; 159 const char *name;
150 name = dwarf_diename(dw_die); 160 name = dwarf_diename(dw_die);
151 return name ? strcmp(tname, name) : -1; 161 return name ? (strcmp(tname, name) == 0) : false;
152} 162}
153 163
154/* Get type die, but skip qualifiers and typedef */ 164/* Get type die, but skip qualifiers and typedef */
@@ -319,7 +329,7 @@ static int __die_find_variable_cb(Dwarf_Die *die_mem, void *data)
319 tag = dwarf_tag(die_mem); 329 tag = dwarf_tag(die_mem);
320 if ((tag == DW_TAG_formal_parameter || 330 if ((tag == DW_TAG_formal_parameter ||
321 tag == DW_TAG_variable) && 331 tag == DW_TAG_variable) &&
322 (die_compare_name(die_mem, name) == 0)) 332 die_compare_name(die_mem, name))
323 return DIE_FIND_CB_FOUND; 333 return DIE_FIND_CB_FOUND;
324 334
325 return DIE_FIND_CB_CONTINUE; 335 return DIE_FIND_CB_CONTINUE;
@@ -338,7 +348,7 @@ static int __die_find_member_cb(Dwarf_Die *die_mem, void *data)
338 const char *name = data; 348 const char *name = data;
339 349
340 if ((dwarf_tag(die_mem) == DW_TAG_member) && 350 if ((dwarf_tag(die_mem) == DW_TAG_member) &&
341 (die_compare_name(die_mem, name) == 0)) 351 die_compare_name(die_mem, name))
342 return DIE_FIND_CB_FOUND; 352 return DIE_FIND_CB_FOUND;
343 353
344 return DIE_FIND_CB_SIBLING; 354 return DIE_FIND_CB_SIBLING;
@@ -356,14 +366,50 @@ static Dwarf_Die *die_find_member(Dwarf_Die *st_die, const char *name,
356 * Probe finder related functions 366 * Probe finder related functions
357 */ 367 */
358 368
369static struct kprobe_trace_arg_ref *alloc_trace_arg_ref(long offs)
370{
371 struct kprobe_trace_arg_ref *ref;
372 ref = zalloc(sizeof(struct kprobe_trace_arg_ref));
373 if (ref != NULL)
374 ref->offset = offs;
375 return ref;
376}
377
359/* Show a location */ 378/* Show a location */
360static int convert_location(Dwarf_Op *op, struct probe_finder *pf) 379static int convert_variable_location(Dwarf_Die *vr_die, struct probe_finder *pf)
361{ 380{
381 Dwarf_Attribute attr;
382 Dwarf_Op *op;
383 size_t nops;
362 unsigned int regn; 384 unsigned int regn;
363 Dwarf_Word offs = 0; 385 Dwarf_Word offs = 0;
364 bool ref = false; 386 bool ref = false;
365 const char *regs; 387 const char *regs;
366 struct kprobe_trace_arg *tvar = pf->tvar; 388 struct kprobe_trace_arg *tvar = pf->tvar;
389 int ret;
390
391 /* TODO: handle more than 1 exprs */
392 if (dwarf_attr(vr_die, DW_AT_location, &attr) == NULL ||
393 dwarf_getlocation_addr(&attr, pf->addr, &op, &nops, 1) <= 0 ||
394 nops == 0) {
395 /* TODO: Support const_value */
396 pr_err("Failed to find the location of %s at this address.\n"
397 " Perhaps, it has been optimized out.\n", pf->pvar->var);
398 return -ENOENT;
399 }
400
401 if (op->atom == DW_OP_addr) {
402 /* Static variables on memory (not stack), make @varname */
403 ret = strlen(dwarf_diename(vr_die));
404 tvar->value = zalloc(ret + 2);
405 if (tvar->value == NULL)
406 return -ENOMEM;
407 snprintf(tvar->value, ret + 2, "@%s", dwarf_diename(vr_die));
408 tvar->ref = alloc_trace_arg_ref((long)offs);
409 if (tvar->ref == NULL)
410 return -ENOMEM;
411 return 0;
412 }
367 413
368 /* If this is based on frame buffer, set the offset */ 414 /* If this is based on frame buffer, set the offset */
369 if (op->atom == DW_OP_fbreg) { 415 if (op->atom == DW_OP_fbreg) {
@@ -405,27 +451,72 @@ static int convert_location(Dwarf_Op *op, struct probe_finder *pf)
405 return -ENOMEM; 451 return -ENOMEM;
406 452
407 if (ref) { 453 if (ref) {
408 tvar->ref = zalloc(sizeof(struct kprobe_trace_arg_ref)); 454 tvar->ref = alloc_trace_arg_ref((long)offs);
409 if (tvar->ref == NULL) 455 if (tvar->ref == NULL)
410 return -ENOMEM; 456 return -ENOMEM;
411 tvar->ref->offset = (long)offs;
412 } 457 }
413 return 0; 458 return 0;
414} 459}
415 460
416static int convert_variable_type(Dwarf_Die *vr_die, 461static int convert_variable_type(Dwarf_Die *vr_die,
417 struct kprobe_trace_arg *targ) 462 struct kprobe_trace_arg *tvar,
463 const char *cast)
418{ 464{
465 struct kprobe_trace_arg_ref **ref_ptr = &tvar->ref;
419 Dwarf_Die type; 466 Dwarf_Die type;
420 char buf[16]; 467 char buf[16];
421 int ret; 468 int ret;
422 469
470 /* TODO: check all types */
471 if (cast && strcmp(cast, "string") != 0) {
472 /* Non string type is OK */
473 tvar->type = strdup(cast);
474 return (tvar->type == NULL) ? -ENOMEM : 0;
475 }
476
423 if (die_get_real_type(vr_die, &type) == NULL) { 477 if (die_get_real_type(vr_die, &type) == NULL) {
424 pr_warning("Failed to get a type information of %s.\n", 478 pr_warning("Failed to get a type information of %s.\n",
425 dwarf_diename(vr_die)); 479 dwarf_diename(vr_die));
426 return -ENOENT; 480 return -ENOENT;
427 } 481 }
428 482
483 pr_debug("%s type is %s.\n",
484 dwarf_diename(vr_die), dwarf_diename(&type));
485
486 if (cast && strcmp(cast, "string") == 0) { /* String type */
487 ret = dwarf_tag(&type);
488 if (ret != DW_TAG_pointer_type &&
489 ret != DW_TAG_array_type) {
490 pr_warning("Failed to cast into string: "
491 "%s(%s) is not a pointer nor array.",
492 dwarf_diename(vr_die), dwarf_diename(&type));
493 return -EINVAL;
494 }
495 if (ret == DW_TAG_pointer_type) {
496 if (die_get_real_type(&type, &type) == NULL) {
497 pr_warning("Failed to get a type information.");
498 return -ENOENT;
499 }
500 while (*ref_ptr)
501 ref_ptr = &(*ref_ptr)->next;
502 /* Add new reference with offset +0 */
503 *ref_ptr = zalloc(sizeof(struct kprobe_trace_arg_ref));
504 if (*ref_ptr == NULL) {
505 pr_warning("Out of memory error\n");
506 return -ENOMEM;
507 }
508 }
509 if (!die_compare_name(&type, "char") &&
510 !die_compare_name(&type, "unsigned char")) {
511 pr_warning("Failed to cast into string: "
512 "%s is not (unsigned) char *.",
513 dwarf_diename(vr_die));
514 return -EINVAL;
515 }
516 tvar->type = strdup(cast);
517 return (tvar->type == NULL) ? -ENOMEM : 0;
518 }
519
429 ret = die_get_byte_size(&type) * 8; 520 ret = die_get_byte_size(&type) * 8;
430 if (ret) { 521 if (ret) {
431 /* Check the bitwidth */ 522 /* Check the bitwidth */
@@ -445,8 +536,8 @@ static int convert_variable_type(Dwarf_Die *vr_die,
445 strerror(-ret)); 536 strerror(-ret));
446 return ret; 537 return ret;
447 } 538 }
448 targ->type = strdup(buf); 539 tvar->type = strdup(buf);
449 if (targ->type == NULL) 540 if (tvar->type == NULL)
450 return -ENOMEM; 541 return -ENOMEM;
451 } 542 }
452 return 0; 543 return 0;
@@ -460,16 +551,44 @@ static int convert_variable_fields(Dwarf_Die *vr_die, const char *varname,
460 struct kprobe_trace_arg_ref *ref = *ref_ptr; 551 struct kprobe_trace_arg_ref *ref = *ref_ptr;
461 Dwarf_Die type; 552 Dwarf_Die type;
462 Dwarf_Word offs; 553 Dwarf_Word offs;
463 int ret; 554 int ret, tag;
464 555
465 pr_debug("converting %s in %s\n", field->name, varname); 556 pr_debug("converting %s in %s\n", field->name, varname);
466 if (die_get_real_type(vr_die, &type) == NULL) { 557 if (die_get_real_type(vr_die, &type) == NULL) {
467 pr_warning("Failed to get the type of %s.\n", varname); 558 pr_warning("Failed to get the type of %s.\n", varname);
468 return -ENOENT; 559 return -ENOENT;
469 } 560 }
470 561 pr_debug2("Var real type: (%x)\n", (unsigned)dwarf_dieoffset(&type));
471 /* Check the pointer and dereference */ 562 tag = dwarf_tag(&type);
472 if (dwarf_tag(&type) == DW_TAG_pointer_type) { 563
564 if (field->name[0] == '[' &&
565 (tag == DW_TAG_array_type || tag == DW_TAG_pointer_type)) {
566 if (field->next)
567 /* Save original type for next field */
568 memcpy(die_mem, &type, sizeof(*die_mem));
569 /* Get the type of this array */
570 if (die_get_real_type(&type, &type) == NULL) {
571 pr_warning("Failed to get the type of %s.\n", varname);
572 return -ENOENT;
573 }
574 pr_debug2("Array real type: (%x)\n",
575 (unsigned)dwarf_dieoffset(&type));
576 if (tag == DW_TAG_pointer_type) {
577 ref = zalloc(sizeof(struct kprobe_trace_arg_ref));
578 if (ref == NULL)
579 return -ENOMEM;
580 if (*ref_ptr)
581 (*ref_ptr)->next = ref;
582 else
583 *ref_ptr = ref;
584 }
585 ref->offset += die_get_byte_size(&type) * field->index;
586 if (!field->next)
587 /* Save vr_die for converting types */
588 memcpy(die_mem, vr_die, sizeof(*die_mem));
589 goto next;
590 } else if (tag == DW_TAG_pointer_type) {
591 /* Check the pointer and dereference */
473 if (!field->ref) { 592 if (!field->ref) {
474 pr_err("Semantic error: %s must be referred by '->'\n", 593 pr_err("Semantic error: %s must be referred by '->'\n",
475 field->name); 594 field->name);
@@ -495,10 +614,15 @@ static int convert_variable_fields(Dwarf_Die *vr_die, const char *varname,
495 *ref_ptr = ref; 614 *ref_ptr = ref;
496 } else { 615 } else {
497 /* Verify it is a data structure */ 616 /* Verify it is a data structure */
498 if (dwarf_tag(&type) != DW_TAG_structure_type) { 617 if (tag != DW_TAG_structure_type) {
499 pr_warning("%s is not a data structure.\n", varname); 618 pr_warning("%s is not a data structure.\n", varname);
500 return -EINVAL; 619 return -EINVAL;
501 } 620 }
621 if (field->name[0] == '[') {
622 pr_err("Semantic error: %s is not a pointor nor array.",
623 varname);
624 return -EINVAL;
625 }
502 if (field->ref) { 626 if (field->ref) {
503 pr_err("Semantic error: %s must be referred by '.'\n", 627 pr_err("Semantic error: %s must be referred by '.'\n",
504 field->name); 628 field->name);
@@ -525,6 +649,7 @@ static int convert_variable_fields(Dwarf_Die *vr_die, const char *varname,
525 } 649 }
526 ref->offset += (long)offs; 650 ref->offset += (long)offs;
527 651
652next:
528 /* Converting next field */ 653 /* Converting next field */
529 if (field->next) 654 if (field->next)
530 return convert_variable_fields(die_mem, field->name, 655 return convert_variable_fields(die_mem, field->name,
@@ -536,51 +661,32 @@ static int convert_variable_fields(Dwarf_Die *vr_die, const char *varname,
536/* Show a variables in kprobe event format */ 661/* Show a variables in kprobe event format */
537static int convert_variable(Dwarf_Die *vr_die, struct probe_finder *pf) 662static int convert_variable(Dwarf_Die *vr_die, struct probe_finder *pf)
538{ 663{
539 Dwarf_Attribute attr;
540 Dwarf_Die die_mem; 664 Dwarf_Die die_mem;
541 Dwarf_Op *expr;
542 size_t nexpr;
543 int ret; 665 int ret;
544 666
545 if (dwarf_attr(vr_die, DW_AT_location, &attr) == NULL) 667 pr_debug("Converting variable %s into trace event.\n",
546 goto error; 668 dwarf_diename(vr_die));
547 /* TODO: handle more than 1 exprs */
548 ret = dwarf_getlocation_addr(&attr, pf->addr, &expr, &nexpr, 1);
549 if (ret <= 0 || nexpr == 0)
550 goto error;
551 669
552 ret = convert_location(expr, pf); 670 ret = convert_variable_location(vr_die, pf);
553 if (ret == 0 && pf->pvar->field) { 671 if (ret == 0 && pf->pvar->field) {
554 ret = convert_variable_fields(vr_die, pf->pvar->var, 672 ret = convert_variable_fields(vr_die, pf->pvar->var,
555 pf->pvar->field, &pf->tvar->ref, 673 pf->pvar->field, &pf->tvar->ref,
556 &die_mem); 674 &die_mem);
557 vr_die = &die_mem; 675 vr_die = &die_mem;
558 } 676 }
559 if (ret == 0) { 677 if (ret == 0)
560 if (pf->pvar->type) { 678 ret = convert_variable_type(vr_die, pf->tvar, pf->pvar->type);
561 pf->tvar->type = strdup(pf->pvar->type);
562 if (pf->tvar->type == NULL)
563 ret = -ENOMEM;
564 } else
565 ret = convert_variable_type(vr_die, pf->tvar);
566 }
567 /* *expr will be cached in libdw. Don't free it. */ 679 /* *expr will be cached in libdw. Don't free it. */
568 return ret; 680 return ret;
569error:
570 /* TODO: Support const_value */
571 pr_err("Failed to find the location of %s at this address.\n"
572 " Perhaps, it has been optimized out.\n", pf->pvar->var);
573 return -ENOENT;
574} 681}
575 682
576/* Find a variable in a subprogram die */ 683/* Find a variable in a subprogram die */
577static int find_variable(Dwarf_Die *sp_die, struct probe_finder *pf) 684static int find_variable(Dwarf_Die *sp_die, struct probe_finder *pf)
578{ 685{
579 Dwarf_Die vr_die; 686 Dwarf_Die vr_die, *scopes;
580 char buf[32], *ptr; 687 char buf[32], *ptr;
581 int ret; 688 int ret, nscopes;
582 689
583 /* TODO: Support arrays */
584 if (pf->pvar->name) 690 if (pf->pvar->name)
585 pf->tvar->name = strdup(pf->pvar->name); 691 pf->tvar->name = strdup(pf->pvar->name);
586 else { 692 else {
@@ -607,12 +713,26 @@ static int find_variable(Dwarf_Die *sp_die, struct probe_finder *pf)
607 pr_debug("Searching '%s' variable in context.\n", 713 pr_debug("Searching '%s' variable in context.\n",
608 pf->pvar->var); 714 pf->pvar->var);
609 /* Search child die for local variables and parameters. */ 715 /* Search child die for local variables and parameters. */
610 if (!die_find_variable(sp_die, pf->pvar->var, &vr_die)) { 716 if (die_find_variable(sp_die, pf->pvar->var, &vr_die))
717 ret = convert_variable(&vr_die, pf);
718 else {
719 /* Search upper class */
720 nscopes = dwarf_getscopes_die(sp_die, &scopes);
721 if (nscopes > 0) {
722 ret = dwarf_getscopevar(scopes, nscopes, pf->pvar->var,
723 0, NULL, 0, 0, &vr_die);
724 if (ret >= 0)
725 ret = convert_variable(&vr_die, pf);
726 else
727 ret = -ENOENT;
728 free(scopes);
729 } else
730 ret = -ENOENT;
731 }
732 if (ret < 0)
611 pr_warning("Failed to find '%s' in this function.\n", 733 pr_warning("Failed to find '%s' in this function.\n",
612 pf->pvar->var); 734 pf->pvar->var);
613 return -ENOENT; 735 return ret;
614 }
615 return convert_variable(&vr_die, pf);
616} 736}
617 737
618/* Show a probe point to output buffer */ 738/* Show a probe point to output buffer */
@@ -897,7 +1017,7 @@ static int probe_point_search_cb(Dwarf_Die *sp_die, void *data)
897 1017
898 /* Check tag and diename */ 1018 /* Check tag and diename */
899 if (dwarf_tag(sp_die) != DW_TAG_subprogram || 1019 if (dwarf_tag(sp_die) != DW_TAG_subprogram ||
900 die_compare_name(sp_die, pp->function) != 0) 1020 !die_compare_name(sp_die, pp->function))
901 return DWARF_CB_OK; 1021 return DWARF_CB_OK;
902 1022
903 pf->fname = dwarf_decl_file(sp_die); 1023 pf->fname = dwarf_decl_file(sp_die);
@@ -1096,7 +1216,7 @@ end:
1096static int line_range_add_line(const char *src, unsigned int lineno, 1216static int line_range_add_line(const char *src, unsigned int lineno,
1097 struct line_range *lr) 1217 struct line_range *lr)
1098{ 1218{
1099 /* Copy real path */ 1219 /* Copy source path */
1100 if (!lr->path) { 1220 if (!lr->path) {
1101 lr->path = strdup(src); 1221 lr->path = strdup(src);
1102 if (lr->path == NULL) 1222 if (lr->path == NULL)
@@ -1220,7 +1340,7 @@ static int line_range_search_cb(Dwarf_Die *sp_die, void *data)
1220 struct line_range *lr = lf->lr; 1340 struct line_range *lr = lf->lr;
1221 1341
1222 if (dwarf_tag(sp_die) == DW_TAG_subprogram && 1342 if (dwarf_tag(sp_die) == DW_TAG_subprogram &&
1223 die_compare_name(sp_die, lr->function) == 0) { 1343 die_compare_name(sp_die, lr->function)) {
1224 lf->fname = dwarf_decl_file(sp_die); 1344 lf->fname = dwarf_decl_file(sp_die);
1225 dwarf_decl_line(sp_die, &lr->offset); 1345 dwarf_decl_line(sp_die, &lr->offset);
1226 pr_debug("fname: %s, lineno:%d\n", lf->fname, lr->offset); 1346 pr_debug("fname: %s, lineno:%d\n", lf->fname, lr->offset);
@@ -1263,6 +1383,7 @@ int find_line_range(int fd, struct line_range *lr)
1263 size_t cuhl; 1383 size_t cuhl;
1264 Dwarf_Die *diep; 1384 Dwarf_Die *diep;
1265 Dwarf *dbg; 1385 Dwarf *dbg;
1386 const char *comp_dir;
1266 1387
1267 dbg = dwarf_begin(fd, DWARF_C_READ); 1388 dbg = dwarf_begin(fd, DWARF_C_READ);
1268 if (!dbg) { 1389 if (!dbg) {
@@ -1298,7 +1419,18 @@ int find_line_range(int fd, struct line_range *lr)
1298 } 1419 }
1299 off = noff; 1420 off = noff;
1300 } 1421 }
1301 pr_debug("path: %lx\n", (unsigned long)lr->path); 1422
1423 /* Store comp_dir */
1424 if (lf.found) {
1425 comp_dir = cu_get_comp_dir(&lf.cu_die);
1426 if (comp_dir) {
1427 lr->comp_dir = strdup(comp_dir);
1428 if (!lr->comp_dir)
1429 ret = -ENOMEM;
1430 }
1431 }
1432
1433 pr_debug("path: %s\n", lr->path);
1302 dwarf_end(dbg); 1434 dwarf_end(dbg);
1303 1435
1304 return (ret < 0) ? ret : lf.found; 1436 return (ret < 0) ? ret : lf.found;
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index c422cd676313..030791870e33 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -27,8 +27,10 @@ static int perf_session__open(struct perf_session *self, bool force)
27 27
28 self->fd = open(self->filename, O_RDONLY); 28 self->fd = open(self->filename, O_RDONLY);
29 if (self->fd < 0) { 29 if (self->fd < 0) {
30 pr_err("failed to open file: %s", self->filename); 30 int err = errno;
31 if (!strcmp(self->filename, "perf.data")) 31
32 pr_err("failed to open %s: %s", self->filename, strerror(err));
33 if (err == ENOENT && !strcmp(self->filename, "perf.data"))
32 pr_err(" (try 'perf record' first)"); 34 pr_err(" (try 'perf record' first)");
33 pr_err("\n"); 35 pr_err("\n");
34 return -errno; 36 return -errno;
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 2316cb5a4116..c27b4b03fbc1 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -13,6 +13,7 @@ enum sort_type sort__first_dimension;
13unsigned int dsos__col_width; 13unsigned int dsos__col_width;
14unsigned int comms__col_width; 14unsigned int comms__col_width;
15unsigned int threads__col_width; 15unsigned int threads__col_width;
16unsigned int cpus__col_width;
16static unsigned int parent_symbol__col_width; 17static unsigned int parent_symbol__col_width;
17char * field_sep; 18char * field_sep;
18 19
@@ -28,6 +29,8 @@ static int hist_entry__sym_snprintf(struct hist_entry *self, char *bf,
28 size_t size, unsigned int width); 29 size_t size, unsigned int width);
29static int hist_entry__parent_snprintf(struct hist_entry *self, char *bf, 30static int hist_entry__parent_snprintf(struct hist_entry *self, char *bf,
30 size_t size, unsigned int width); 31 size_t size, unsigned int width);
32static int hist_entry__cpu_snprintf(struct hist_entry *self, char *bf,
33 size_t size, unsigned int width);
31 34
32struct sort_entry sort_thread = { 35struct sort_entry sort_thread = {
33 .se_header = "Command: Pid", 36 .se_header = "Command: Pid",
@@ -63,6 +66,13 @@ struct sort_entry sort_parent = {
63 .se_snprintf = hist_entry__parent_snprintf, 66 .se_snprintf = hist_entry__parent_snprintf,
64 .se_width = &parent_symbol__col_width, 67 .se_width = &parent_symbol__col_width,
65}; 68};
69
70struct sort_entry sort_cpu = {
71 .se_header = "CPU",
72 .se_cmp = sort__cpu_cmp,
73 .se_snprintf = hist_entry__cpu_snprintf,
74 .se_width = &cpus__col_width,
75};
66 76
67struct sort_dimension { 77struct sort_dimension {
68 const char *name; 78 const char *name;
@@ -76,6 +86,7 @@ static struct sort_dimension sort_dimensions[] = {
76 { .name = "dso", .entry = &sort_dso, }, 86 { .name = "dso", .entry = &sort_dso, },
77 { .name = "symbol", .entry = &sort_sym, }, 87 { .name = "symbol", .entry = &sort_sym, },
78 { .name = "parent", .entry = &sort_parent, }, 88 { .name = "parent", .entry = &sort_parent, },
89 { .name = "cpu", .entry = &sort_cpu, },
79}; 90};
80 91
81int64_t cmp_null(void *l, void *r) 92int64_t cmp_null(void *l, void *r)
@@ -242,6 +253,20 @@ static int hist_entry__parent_snprintf(struct hist_entry *self, char *bf,
242 self->parent ? self->parent->name : "[other]"); 253 self->parent ? self->parent->name : "[other]");
243} 254}
244 255
256/* --sort cpu */
257
258int64_t
259sort__cpu_cmp(struct hist_entry *left, struct hist_entry *right)
260{
261 return right->cpu - left->cpu;
262}
263
264static int hist_entry__cpu_snprintf(struct hist_entry *self, char *bf,
265 size_t size, unsigned int width)
266{
267 return repsep_snprintf(bf, size, "%-*d", width, self->cpu);
268}
269
245int sort_dimension__add(const char *tok) 270int sort_dimension__add(const char *tok)
246{ 271{
247 unsigned int i; 272 unsigned int i;
@@ -281,6 +306,8 @@ int sort_dimension__add(const char *tok)
281 sort__first_dimension = SORT_SYM; 306 sort__first_dimension = SORT_SYM;
282 else if (!strcmp(sd->name, "parent")) 307 else if (!strcmp(sd->name, "parent"))
283 sort__first_dimension = SORT_PARENT; 308 sort__first_dimension = SORT_PARENT;
309 else if (!strcmp(sd->name, "cpu"))
310 sort__first_dimension = SORT_CPU;
284 } 311 }
285 312
286 list_add_tail(&sd->entry->list, &hist_entry__sort_list); 313 list_add_tail(&sd->entry->list, &hist_entry__sort_list);
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 0d61c4082f43..560c855417e4 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -39,6 +39,7 @@ extern struct sort_entry sort_parent;
39extern unsigned int dsos__col_width; 39extern unsigned int dsos__col_width;
40extern unsigned int comms__col_width; 40extern unsigned int comms__col_width;
41extern unsigned int threads__col_width; 41extern unsigned int threads__col_width;
42extern unsigned int cpus__col_width;
42extern enum sort_type sort__first_dimension; 43extern enum sort_type sort__first_dimension;
43 44
44struct hist_entry { 45struct hist_entry {
@@ -51,6 +52,7 @@ struct hist_entry {
51 struct map_symbol ms; 52 struct map_symbol ms;
52 struct thread *thread; 53 struct thread *thread;
53 u64 ip; 54 u64 ip;
55 s32 cpu;
54 u32 nr_events; 56 u32 nr_events;
55 char level; 57 char level;
56 u8 filtered; 58 u8 filtered;
@@ -68,7 +70,8 @@ enum sort_type {
68 SORT_COMM, 70 SORT_COMM,
69 SORT_DSO, 71 SORT_DSO,
70 SORT_SYM, 72 SORT_SYM,
71 SORT_PARENT 73 SORT_PARENT,
74 SORT_CPU,
72}; 75};
73 76
74/* 77/*
@@ -104,6 +107,7 @@ extern int64_t sort__comm_collapse(struct hist_entry *, struct hist_entry *);
104extern int64_t sort__dso_cmp(struct hist_entry *, struct hist_entry *); 107extern int64_t sort__dso_cmp(struct hist_entry *, struct hist_entry *);
105extern int64_t sort__sym_cmp(struct hist_entry *, struct hist_entry *); 108extern int64_t sort__sym_cmp(struct hist_entry *, struct hist_entry *);
106extern int64_t sort__parent_cmp(struct hist_entry *, struct hist_entry *); 109extern int64_t sort__parent_cmp(struct hist_entry *, struct hist_entry *);
110int64_t sort__cpu_cmp(struct hist_entry *left, struct hist_entry *right);
107extern size_t sort__parent_print(FILE *, struct hist_entry *, unsigned int); 111extern size_t sort__parent_print(FILE *, struct hist_entry *, unsigned int);
108extern int sort_dimension__add(const char *); 112extern int sort_dimension__add(const char *);
109void sort_entry__setup_elide(struct sort_entry *self, struct strlist *list, 113void sort_entry__setup_elide(struct sort_entry *self, struct strlist *list,
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index b63e5713849f..971d0a05d6b4 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -933,6 +933,25 @@ static bool elf_sec__is_a(GElf_Shdr *self, Elf_Data *secstrs, enum map_type type
933 } 933 }
934} 934}
935 935
936static size_t elf_addr_to_index(Elf *elf, GElf_Addr addr)
937{
938 Elf_Scn *sec = NULL;
939 GElf_Shdr shdr;
940 size_t cnt = 1;
941
942 while ((sec = elf_nextscn(elf, sec)) != NULL) {
943 gelf_getshdr(sec, &shdr);
944
945 if ((addr >= shdr.sh_addr) &&
946 (addr < (shdr.sh_addr + shdr.sh_size)))
947 return cnt;
948
949 ++cnt;
950 }
951
952 return -1;
953}
954
936static int dso__load_sym(struct dso *self, struct map *map, const char *name, 955static int dso__load_sym(struct dso *self, struct map *map, const char *name,
937 int fd, symbol_filter_t filter, int kmodule) 956 int fd, symbol_filter_t filter, int kmodule)
938{ 957{
@@ -944,12 +963,13 @@ static int dso__load_sym(struct dso *self, struct map *map, const char *name,
944 int err = -1; 963 int err = -1;
945 uint32_t idx; 964 uint32_t idx;
946 GElf_Ehdr ehdr; 965 GElf_Ehdr ehdr;
947 GElf_Shdr shdr; 966 GElf_Shdr shdr, opdshdr;
948 Elf_Data *syms; 967 Elf_Data *syms, *opddata = NULL;
949 GElf_Sym sym; 968 GElf_Sym sym;
950 Elf_Scn *sec, *sec_strndx; 969 Elf_Scn *sec, *sec_strndx, *opdsec;
951 Elf *elf; 970 Elf *elf;
952 int nr = 0; 971 int nr = 0;
972 size_t opdidx = 0;
953 973
954 elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL); 974 elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
955 if (elf == NULL) { 975 if (elf == NULL) {
@@ -969,6 +989,10 @@ static int dso__load_sym(struct dso *self, struct map *map, const char *name,
969 goto out_elf_end; 989 goto out_elf_end;
970 } 990 }
971 991
992 opdsec = elf_section_by_name(elf, &ehdr, &opdshdr, ".opd", &opdidx);
993 if (opdsec)
994 opddata = elf_rawdata(opdsec, NULL);
995
972 syms = elf_getdata(sec, NULL); 996 syms = elf_getdata(sec, NULL);
973 if (syms == NULL) 997 if (syms == NULL)
974 goto out_elf_end; 998 goto out_elf_end;
@@ -1013,6 +1037,13 @@ static int dso__load_sym(struct dso *self, struct map *map, const char *name,
1013 if (!is_label && !elf_sym__is_a(&sym, map->type)) 1037 if (!is_label && !elf_sym__is_a(&sym, map->type))
1014 continue; 1038 continue;
1015 1039
1040 if (opdsec && sym.st_shndx == opdidx) {
1041 u32 offset = sym.st_value - opdshdr.sh_addr;
1042 u64 *opd = opddata->d_buf + offset;
1043 sym.st_value = *opd;
1044 sym.st_shndx = elf_addr_to_index(elf, sym.st_value);
1045 }
1046
1016 sec = elf_getscn(elf, sym.st_shndx); 1047 sec = elf_getscn(elf, sym.st_shndx);
1017 if (!sec) 1048 if (!sec)
1018 goto out_elf_end; 1049 goto out_elf_end;
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 5e02d2c17154..80e569bbdecc 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -9,8 +9,6 @@
9#include <linux/rbtree.h> 9#include <linux/rbtree.h>
10#include <stdio.h> 10#include <stdio.h>
11 11
12#define DEBUG_CACHE_DIR ".debug"
13
14#ifdef HAVE_CPLUS_DEMANGLE 12#ifdef HAVE_CPLUS_DEMANGLE
15extern char *cplus_demangle(const char *, int); 13extern char *cplus_demangle(const char *, int);
16 14
@@ -73,6 +71,7 @@ struct symbol_conf {
73 full_paths, 71 full_paths,
74 show_cpu_utilization; 72 show_cpu_utilization;
75 const char *vmlinux_name, 73 const char *vmlinux_name,
74 *source_prefix,
76 *field_sep; 75 *field_sep;
77 const char *default_guest_vmlinux_name, 76 const char *default_guest_vmlinux_name,
78 *default_guest_kallsyms, 77 *default_guest_kallsyms,
@@ -112,7 +111,8 @@ struct addr_location {
112 u64 addr; 111 u64 addr;
113 char level; 112 char level;
114 bool filtered; 113 bool filtered;
115 unsigned int cpumode; 114 u8 cpumode;
115 s32 cpu;
116}; 116};
117 117
118enum dso_kernel_type { 118enum dso_kernel_type {
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index 4e8b6b0c551c..f380fed74359 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -89,6 +89,7 @@
89 89
90extern const char *graph_line; 90extern const char *graph_line;
91extern const char *graph_dotted_line; 91extern const char *graph_dotted_line;
92extern char buildid_dir[];
92 93
93/* On most systems <limits.h> would have given us this, but 94/* On most systems <limits.h> would have given us this, but
94 * not on some systems (e.g. GNU/Hurd). 95 * not on some systems (e.g. GNU/Hurd).
@@ -152,6 +153,8 @@ extern void warning(const char *err, ...) __attribute__((format (printf, 1, 2)))
152extern void set_die_routine(void (*routine)(const char *err, va_list params) NORETURN); 153extern void set_die_routine(void (*routine)(const char *err, va_list params) NORETURN);
153 154
154extern int prefixcmp(const char *str, const char *prefix); 155extern int prefixcmp(const char *str, const char *prefix);
156extern void set_buildid_dir(void);
157extern void disable_buildid_cache(void);
155 158
156static inline const char *skip_prefix(const char *str, const char *prefix) 159static inline const char *skip_prefix(const char *str, const char *prefix)
157{ 160{